00001
00002
00003 #include <sys/stat.h>
00004
00005
00006 #include "TSystem.h"
00007
00008
00009 #include "RunFinder.h"
00010
00011 using namespace std;
00012
00013
00014 bool Anp::Finder::operator==(const std::string &lhs, const File &rhs)
00015 {
00016 return (lhs == rhs.name);
00017 }
00018
00019
00020 bool Anp::Finder::operator==(const File &lhs, const std::string &rhs)
00021 {
00022 return (lhs.name == rhs);
00023 }
00024
00025
00026 bool Anp::Finder::operator<(const File &lhs, const File &rhs)
00027 {
00028 return (lhs.name < rhs.name);
00029 }
00030
00031
00032 double Anp::Finder::file_size(const std::string &path)
00033 {
00034 struct stat result;
00035 if(stat(path.c_str(), &result) != 0)
00036 {
00037 return 0.0;
00038 }
00039
00040 return double(result.st_size)/1038336;
00041 }
00042
00043
00044 const std::vector<std::string> Anp::Finder::read_file(const std::string &file)
00045 {
00046 std::vector<std::string> svec;
00047
00048 std::ifstream infile(file.c_str());
00049 if(!infile || !infile.is_open())
00050 {
00051 std::cout << "Error! Cannot open file:" << file << std::endl;
00052 return svec;
00053 }
00054
00055 while(infile.good())
00056 {
00057 std::string instr;
00058 std::getline(infile, instr);
00059
00060 if(instr.size() < 6)
00061 {
00062 continue;
00063 }
00064
00065 svec.push_back(instr);
00066 }
00067
00068 infile.close();
00069
00070 return svec;
00071 }
00072
00073
00074 Anp::RunFinder::RunFinder()
00075 {
00076 }
00077
00078
00079 Anp::RunFinder::~RunFinder()
00080 {
00081 }
00082
00083
00084 int Anp::RunFinder::AddDir(const std::string &path, const std::string &key)
00085 {
00086 void *dir_ptr = gSystem -> OpenDirectory(path.c_str());
00087 if(!dir_ptr)
00088 {
00089 cout << "RunFinder::AddDir - directory does not exist:" << endl
00090 << " " << path << endl;
00091 return 0;
00092 }
00093
00094 unsigned int count_pass = 0, count_miss = 0;
00095
00096 const char *entry = gSystem -> GetDirEntry(dir_ptr);
00097 while(entry)
00098 {
00099 const string file = entry;
00100 entry = gSystem -> GetDirEntry(dir_ptr);
00101
00102 if(!(file.size() > 1))
00103 {
00104 continue;
00105 }
00106
00107 const Finder::File newfile = GetFile(path, file);
00108
00109 if(newfile.name.empty() || newfile.path.empty())
00110 {
00111 cerr << "RunFinder::Add() - invalid file: " << file << endl;
00112 continue;
00113 }
00114
00115 if(!FindMatch(newfile.name))
00116 {
00117 continue;
00118 }
00119
00120 if(!key.empty() && newfile.name.find(key, 0) == std::string::npos)
00121 {
00122 continue;
00123 }
00124
00125 if(!(Finder::file_size(newfile.fullpath()) > 0.0))
00126 {
00127 ++count_miss;
00128 }
00129
00130 if(std::find(fFiles.begin(), fFiles.end(), newfile.name) == fFiles.end())
00131 {
00132 fFiles.push_back(newfile);
00133 ++count_pass;
00134 }
00135 else
00136 {
00137 std::cerr << "RunFinder::AddFileName(): duplicate file " << file << std::endl;
00138 }
00139 }
00140
00141 if(count_miss > 0)
00142 {
00143 std::cerr << "RunFinder: skipped " << count_miss << " non existing files" << std::endl;
00144 }
00145
00146 std::sort(fFiles.begin(), fFiles.end());
00147
00148 return count_pass;
00149 }
00150
00151
00152 int Anp::RunFinder::Add(const std::string &path, const std::string &ifile, const std::string &key)
00153 {
00154 const std::vector<std::string> ivec = Anp::Finder::read_file(ifile);
00155 if(ivec.empty())
00156 {
00157 std::cerr << "RunFinder::Add() - failed to read index file" << std::endl;
00158 return 0;
00159 }
00160
00161 unsigned int count_pass = 0, count_miss = 0;
00162 for(std::vector<std::string>::const_iterator sit = ivec.begin(); sit != ivec.end(); ++sit)
00163 {
00164 std::string filename = *sit;
00165
00166 const Finder::File newfile = GetFile(path, *sit);
00167
00168 if(newfile.name.empty() || newfile.path.empty())
00169 {
00170 std::cerr << "RunFinder::Add() - invalid file: " << *sit << std::endl;
00171 continue;
00172 }
00173
00174 if(!FindMatch(newfile.name))
00175 {
00176 continue;
00177 }
00178
00179 if(!key.empty() && newfile.name.find(key, 0) == std::string::npos)
00180 {
00181 continue;
00182 }
00183
00184 if(!(Finder::file_size(newfile.fullpath()) > 0.0))
00185 {
00186 ++count_miss;
00187 }
00188
00189 if(std::find(fFiles.begin(), fFiles.end(), newfile.name) == fFiles.end())
00190 {
00191 fFiles.push_back(newfile);
00192 ++count_pass;
00193 }
00194 else
00195 {
00196 std::cerr << "RunFinder::AddFileName(): duplicate file " << filename << std::endl;
00197 }
00198 }
00199
00200 if(count_miss > 0)
00201 {
00202 std::cerr << "RunFinder: skipped " << count_miss << " non existing files" << std::endl;
00203 }
00204
00205 std::sort(fFiles.begin(), fFiles.end());
00206
00207 return count_pass;
00208 }
00209
00210
00211 int Anp::RunFinder::AddIndex(const std::string &ifile, const std::string &key)
00212 {
00213 const std::vector<std::string> ivec = Anp::Finder::read_file(ifile);
00214 if(ivec.empty())
00215 {
00216 std::cerr << "RunFinder::AddIndex() - failed to read index file" << std::endl;
00217 return 0;
00218 }
00219
00220 unsigned int count_pass = 0, count_miss = 0;
00221 for(std::vector<std::string>::const_iterator sit = ivec.begin(); sit != ivec.end(); ++sit)
00222 {
00223 std::string filename = *sit;
00224
00225 const Finder::File newfile("", filename);
00226
00227 if(newfile.name.empty())
00228 {
00229 std::cerr << "RunFinder::Add() - invalid file: " << *sit << std::endl;
00230 continue;
00231 }
00232
00233 if(!FindMatch(newfile.name))
00234 {
00235 continue;
00236 }
00237
00238 if(!key.empty() && newfile.name.find(key, 0) == std::string::npos)
00239 {
00240 continue;
00241 }
00242
00243 if(std::find(fFiles.begin(), fFiles.end(), newfile.name) == fFiles.end())
00244 {
00245 fFiles.push_back(newfile);
00246 ++count_pass;
00247 }
00248 else
00249 {
00250 ++count_miss;
00251 std::cerr << "RunFinder::AddFileName(): duplicate file " << filename << std::endl;
00252 }
00253 }
00254
00255 if(count_miss > 0)
00256 {
00257 std::cerr << "RunFinder: skipped " << count_miss << " non existing files" << std::endl;
00258 }
00259
00260 std::sort(fFiles.begin(), fFiles.end());
00261
00262 return count_pass;
00263 }
00264
00265
00266 void Anp::RunFinder::Compress()
00267 {
00268 std::vector<unsigned int> svec;
00269
00270 for(std::vector<std::string>::const_iterator kit = fKeys.begin(); kit != fKeys.end(); ++kit)
00271 {
00272 unsigned int count = 0;
00273 for(std::vector<Anp::Finder::File>::const_iterator fit = fFiles.begin(); fit != fFiles.end(); ++fit)
00274 {
00275 if((fit -> name).find(*kit, 0) != std::string::npos)
00276 {
00277 ++count;
00278 }
00279 }
00280
00281 if(std::find(svec.begin(), svec.end(), count) == svec.end())
00282 {
00283 svec.push_back(count);
00284 }
00285 }
00286
00287 if(svec.empty())
00288 {
00289 std::cerr << "Anp::RunFinder::Compress() - no files match any keys" << std::endl;
00290 return;
00291 }
00292 else if(svec.size() == 1)
00293 {
00294 return;
00295 }
00296
00297 std::sort(svec.begin(), svec.end());
00298
00299 const unsigned int max = svec.back();
00300 const unsigned int min = svec.front();
00301
00302 std::sort(svec.begin(), svec.end());
00303
00304 std::cout << "Anp::RunFinder::Compress()" << std::endl
00305 << " Minimum number of matches per key: " << min << std::endl
00306 << " Maximum number of matches per key: " << max << std::endl
00307 << " Will forces number of matches pre key to be " << max << std::endl;
00308
00309 unsigned int nremoved = 0;
00310 for(std::vector<std::string>::const_iterator kit = fKeys.begin(); kit != fKeys.end(); ++kit)
00311 {
00312 unsigned int count = 0;
00313 for(std::vector<Anp::Finder::File>::const_iterator fit = fFiles.begin(); fit != fFiles.end(); ++fit)
00314 {
00315 if((fit -> name).find(*kit, 0) != std::string::npos)
00316 {
00317 ++count;
00318 }
00319 }
00320
00321 if(count == max)
00322 {
00323 continue;
00324 }
00325
00326 ++nremoved;
00327
00328 std::vector<Anp::Finder::File>::iterator fit = fFiles.begin();
00329 while(fit != fFiles.end())
00330 {
00331 if((fit -> name).find(*kit, 0) != std::string::npos)
00332 {
00333 std::cout << "Removed " << fit -> name << std::endl;
00334 fit = fFiles.erase(fit);
00335 }
00336 else
00337 {
00338 ++fit;
00339 }
00340 }
00341 }
00342
00343 std::sort(fFiles.begin(), fFiles.end());
00344
00345 std::cout << "Anp::RunFinder::Compress() - removed " << nremoved << " keys" << std::endl;
00346 }
00347
00348
00349 bool Anp::RunFinder::FindMatch(const std::string &filename) const
00350 {
00351 for(std::vector<std::string>::const_iterator it = fKeys.begin(); it != fKeys.end(); ++it)
00352 {
00353 if(filename.find(*it, 0) == std::string::npos)
00354 {
00355 return false;
00356 }
00357 }
00358
00359 return true;
00360 }
00361
00362
00363 const Anp::Finder::File Anp::RunFinder::GetFile(std::string path, const std::string &entry) const
00364 {
00365 if(path.empty() || entry.empty())
00366 {
00367 std::cerr << "RunFinder::GetFile() - empty path or filename" << std::endl;
00368 return Anp::Finder::File(path, entry);
00369 }
00370
00371 if(path.find_last_of("/") == path.size() - 1)
00372 {
00373 path.erase(path.size() - 1, std::string::npos);
00374 }
00375
00376 const std::string::size_type lpos = entry.find_last_of("/", std::string::npos);
00377 const std::string::size_type fpos = entry.find_first_of("/", 0);
00378
00379
00380 if(lpos == std::string::npos)
00381 {
00382 return Anp::Finder::File(path, entry);
00383 }
00384
00385
00386 if(fpos == 0 || lpos == entry.size() - 1)
00387 {
00388 std::cerr << "RunFinder::GetFile() - file format error!" << std::endl;
00389 return Anp::Finder::File(path, entry);
00390 }
00391
00392
00393 if(fpos > lpos)
00394 {
00395 std::cerr << "RunFinder::GetFile() - logic error!" << std::endl;
00396 return Anp::Finder::File(path, entry);
00397 }
00398
00399 path = path + "/" + entry.substr(0, lpos);
00400
00401 return Anp::Finder::File(path, entry.substr(lpos + 1, std::string::npos));
00402 }
00403
00404
00405 const std::string Anp::RunFinder::GetFile(const int index) const
00406 {
00407 if(index >= int(fFiles.size()) || index < 0)
00408 {
00409 std::cerr << "RunFinder::GetFile() - index is out of range" << std::endl;
00410 return std::string();
00411 }
00412
00413 return fFiles[index].name;
00414 }
00415
00416
00417 const std::string Anp::RunFinder::GetPath(const int index) const
00418 {
00419 if(index >= int(fFiles.size()) || index < 0)
00420 {
00421 std::cerr << "RunFinder::GetFile() - index is out of range" << std::endl;
00422 return std::string();
00423 }
00424
00425 return fFiles[index].fullpath();
00426 }
00427
00428
00429 double Anp::RunFinder::GetFileSize(const int index) const
00430 {
00431 if(index < 0)
00432 {
00433 double size = 0.0;
00434 for(std::vector<Anp::Finder::File>::const_iterator it = fFiles.begin(); it != fFiles.end(); ++it)
00435 {
00436 size += Anp::Finder::file_size(it -> fullpath());
00437 }
00438
00439 return size;
00440 }
00441
00442 return Anp::Finder::file_size(GetPath(index));
00443 }