Main Page | Modules | Namespace List | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | File Members | Related Pages

LikeModule.cxx

Go to the documentation of this file.
00001 
00002 // C++
00003 #include <cmath>
00004 #include <iomanip>
00005 #include <iostream>
00006 #include <stdio.h>
00007 #include <stdlib.h>
00008 #include <sstream>
00009 
00010 // ROOT
00011 #include "TFile.h"
00012 #include "TTree.h"
00013 
00014 #include "LikeModule.h"
00015  
00016 using namespace std;
00017 
00018 //-------------------------------------------------------------------------------------------
00019 Lit::LikeModule::LikeModule()
00020    :fDimn(0),
00021     fTree(0)
00022 {
00023 }
00024 
00025 //-------------------------------------------------------------------------------------------
00026 Lit::LikeModule::~LikeModule()
00027 {
00028    if(fTree)
00029    {
00030       delete fTree;
00031    }
00032 }
00033 
00034 //-------------------------------------------------------------------------------------------
00035 bool Lit::LikeModule::Write(const string &filepath, const string &treename)
00036 { 
00037    TFile *file = new TFile(filepath.c_str(), "UPDATE");
00038    if(!file || !file -> IsOpen())
00039    {
00040       cout << "LikeModule::Write() - failed to recreate ROOT file:\n" << filepath << endl;      
00041       return false;  
00042    }
00043 
00044    // write out all events in TTree
00045    Write(file, treename);
00046 
00047    file -> Write();
00048    file -> Close();
00049 
00050    return true;
00051 }
00052 
00053 //-------------------------------------------------------------------------------------------
00054 void Lit::LikeModule::Write(TDirectory *dir, const string &treename)
00055 { 
00056    if(!dir)
00057    {
00058       cerr << "LikeModule::Write() - null TDirectory pointer" << endl;
00059       return;
00060    }
00061 
00062    if(dir -> Get(treename.c_str()))
00063    {
00064       const string treename_tmp = treename + ";*";
00065       cout << "LikeModule::Write - delete object with name " << treename_tmp << endl;
00066       dir -> Delete(treename_tmp.c_str());
00067    }
00068 
00069    Event *event = new Event();
00070    TTree *tree = new TTree(treename.c_str(), "event tree");
00071    tree -> SetDirectory(dir);
00072    tree -> Branch("event", "Lit::Event", &event);
00073 
00074    double size = 0.0;
00075    for(EventVec::const_iterator it = fEvent.begin(); it != fEvent.end(); ++it)
00076    {
00077       (*event) = (*it);
00078       size += tree -> Fill();
00079    }
00080 
00081    cout << "LikeModule::Write() - filled tree with " << size/1048576.0 << "MB and " 
00082         << fEvent.size () << " events" << endl;
00083 
00084    delete event; 
00085 }
00086 
00087 //-------------------------------------------------------------------------------------------
00088 bool Lit::LikeModule::Read(const string &filepath, const int nevent, const string &treename)
00089 { 
00090    TFile *file = new TFile(filepath.c_str(), "READ");
00091    if(!file || !file -> IsOpen())
00092    {
00093       cout << "LikeModule::Read() - failed to read ROOT file:\n" << filepath << endl;      
00094       return false;  
00095    }
00096 
00097    const bool result = Read(file, nevent, treename);
00098 
00099    file -> Close();
00100 
00101    return result;
00102 }
00103 
00104 //-------------------------------------------------------------------------------------------
00105 bool Lit::LikeModule::Read(TDirectory *dir, int nevent, const string &treename)
00106 { 
00107    TTree *tree = dynamic_cast<TTree *>(dir -> Get(treename.c_str()));
00108    if(!tree)
00109    {
00110       cout << "LikeModule::Read() - failed to find " << treename << " tree" << endl;
00111       return false;
00112    }
00113 
00114    Event *event = new Event();
00115    tree -> SetBranchAddress("event", &event);
00116 
00117    if(nevent < 1 || nevent > tree -> GetEntries())
00118    {
00119       nevent = tree -> GetEntries();
00120    }
00121 
00122    double size = 0.0;
00123    for(int i = 0; i < nevent; ++i)
00124    {
00125       size += tree -> GetEntry(i);
00126       Add(*event);
00127    }
00128 
00129    cout << "LikeModule::Read() - read " << size/1048576.0 << "MB and " 
00130         << fEvent.size () << " events" << endl;
00131 
00132    delete event;
00133 
00134    return true;
00135 }
00136 
00137 //-------------------------------------------------------------------------------------------
00138 void Lit::LikeModule::Add(const Event &event)
00139 {   
00140    if(fTree)
00141    {
00142       cerr << "LikeModule::Add() - can not add event: tree is already built" << endl;
00143       return;      
00144    }
00145 
00146    if(fDimn < 1)
00147    {
00148       fDimn = event.GetNVar();
00149    }
00150    else if(fDimn != event.GetNVar())
00151    {
00152       cerr << "LikeModule::Add() - number of dimension does not match previous events" << endl;
00153       return;
00154    }
00155 
00156    fEvent.push_back(event);
00157 
00158    for(unsigned int ivar = 0; ivar < fDimn; ++ivar)
00159    {
00160       fVar[ivar].push_back(event.GetVar(ivar));
00161    }
00162 
00163    std::map<short, unsigned int>::iterator cit = fCount.find(event.GetType());
00164    if(cit == fCount.end())
00165    {
00166       fCount[event.GetType()] = 1;
00167    }
00168    else
00169    {
00170       ++(cit -> second);
00171    }
00172 }
00173 
00174 //-------------------------------------------------------------------------------------------
00175 bool Lit::LikeModule::Fill(const unsigned short optimize_depth, const std::string &option)
00176 {
00177    if(fTree)
00178    {
00179       cerr << "LikeModule::Fill - tree already have been created" << endl;
00180       return false;
00181    }
00182 
00183    // If trim option is set then find class with lowest number of events
00184    // and set that as maximum number of events for all other classes.
00185    unsigned int min = 0;
00186    if(option.find("trim") != string::npos)
00187    {  
00188       for(map<short, unsigned int>::const_iterator it = fCount.begin(); it != fCount.end(); ++it)
00189       {
00190          if(min == 0 || min > it -> second)
00191          {
00192             min = it -> second;
00193          }
00194       }
00195       
00196       cout << "LikeModule::Fill - trimming all classes to " << min << " events" << endl;
00197       for(map<short, unsigned int>::const_iterator it = fCount.begin(); it != fCount.end(); ++it)
00198       {
00199          cout << "class " << it -> first << " has " 
00200               << setfill(' ') << setw(8) << it -> second << " events" << endl;
00201       }
00202 
00203       fCount.clear();
00204       fVar.clear();
00205 
00206       EventVec evec;
00207 
00208       for(EventVec::const_iterator event = fEvent.begin(); event != fEvent.end(); ++event)
00209       {
00210          std::map<short, unsigned int>::iterator cit = fCount.find(event -> GetType());
00211          if(cit == fCount.end())
00212          {
00213             fCount[event -> GetType()] = 1;
00214          }
00215          else if(cit -> second < min)
00216          {
00217             ++(cit -> second);
00218          }
00219          else
00220          {
00221             continue;
00222          }
00223          
00224          for(unsigned int d = 0; d < fDimn; ++d)
00225          {
00226             fVar[d].push_back(event -> GetVar(d));
00227          }       
00228 
00229          evec.push_back(*event);
00230       }
00231 
00232       cout << "LikeModule::Fill - erased " << fEvent.size() - evec.size() << " events" << endl;
00233 
00234       fEvent = evec;
00235    }
00236 
00237    //clear event count
00238    fCount.clear();
00239 
00240    // sort each variable for all events - needs this before Balance()
00241    for(VarMap::iterator it = fVar.begin(); it != fVar.end(); ++it)
00242    {
00243       std::sort((it -> second).begin(), (it -> second).end());
00244    }
00245 
00246    if(option.find("metric") != string::npos)
00247    {
00248       ComputeMetric();
00249 
00250       // sort again each variable for all events - needs this before Balance()
00251       // rescaling has changed variable values
00252       for(VarMap::iterator it = fVar.begin(); it != fVar.end(); ++it)
00253       {
00254          std::sort((it -> second).begin(), (it -> second).end());
00255       }
00256    }
00257 
00258    // If optimize_depth > 0 then fill first optimize_depth levels
00259    // with empty nodes that split separating variable in half for
00260    // all child nodes. If optimize_depth = 0 then split variable 0
00261    // at the median (in half) and return it as root node
00262    fTree = Balance(optimize_depth);
00263 
00264    if(!fTree)
00265    {
00266       cout << "LikeModule::Fill - failed to create tree" << endl;
00267       return false;      
00268    }      
00269    
00270    for(EventVec::const_iterator event = fEvent.begin(); event != fEvent.end(); ++event)
00271    {
00272       fTree -> Add(*event, 0);
00273 
00274       std::map<short, unsigned int>::iterator cit = fCount.find(event -> GetType());
00275       if(cit == fCount.end())
00276       {
00277          fCount[event -> GetType()] = 1;
00278       }
00279       else
00280       {
00281          ++(cit -> second);
00282       }
00283    }
00284    
00285    for(map<short, unsigned int>::const_iterator it = fCount.begin(); it != fCount.end(); ++it)
00286    {
00287       cout << "class " << it -> first << " has " 
00288            << setfill(' ') << setw(8) << it -> second << " events" << endl;
00289    }
00290    
00291    return true;
00292 }
00293 
00294 //-------------------------------------------------------------------------------------------
00295 bool Lit::LikeModule::Find(Event event, const unsigned int nfind) const
00296 {  
00297    // if tree has been filled then search for nfind closest events 
00298    // if metic (fVarScale map) is computed then rescale event variables
00299    // using previsouly computed width of variable distribution
00300 
00301    if(!fTree)
00302    {
00303       cerr << "LikeModule::Find() - tree has not been filled" << endl;
00304       return false;
00305    }
00306    if(fDimn != event.GetNVar())
00307    {
00308       cerr << "LikeModule::Find() - number of dimension does not match training events" << endl;
00309       return false;
00310    }
00311    if(nfind < 1)
00312    {
00313       cerr << "LikeModule::Find() - requested 0 nearest neighbors" << endl;
00314       return false;
00315    }
00316 
00317    // if variable widths are computed then rescale variable in this event
00318    // to same widths as events in stored kd-tree
00319    if(!fVarScale.empty())
00320    {
00321       event = Scale(event);
00322    }
00323 
00324    fResult.Clear();
00325 
00326    fResult.fEvent = event;
00327 
00328    // recursive kd-tree search for nfind-nearest neighbors
00329    Lit::Find(fResult.fList, fTree, event, nfind);
00330 
00331    for(List::const_iterator lit = fResult.GetList().begin(); lit != fResult.GetList().end(); ++lit)
00332    {
00333       const Node *node = lit -> first;
00334 
00335       map<short, double>::iterator pit = fResult.fProbNN.find(node -> GetType());
00336       if(pit == fResult.fProbNN.end())
00337       {
00338          fResult.fProbNN[node -> GetType()] = node -> GetWeight()/double(nfind);
00339       }
00340       else
00341       {
00342          pit -> second += node -> GetWeight()/double(nfind);
00343       }
00344    }
00345 
00346    return true;
00347 }
00348 
00349 //-------------------------------------------------------------------------------------------
00350 bool Lit::LikeModule::Find(const unsigned int nfind, const string &option) const
00351 {
00352    if(fCount.empty() || !fTree)
00353    {
00354       return false;
00355    }
00356 
00357    static std::map<short, unsigned int>::const_iterator cit = fCount.end();
00358 
00359    if(cit == fCount.end())
00360    {
00361       cit = fCount.begin();
00362    }
00363 
00364    const short etype = (cit++) -> first;
00365 
00366    if(option == "flat")
00367    {
00368       VarVec dvec;
00369       for(unsigned int d = 0; d < fDimn; ++d)
00370       {
00371          VarMap::const_iterator vit = fVar.find(d);
00372          if(vit == fVar.end())
00373          {
00374             return false;
00375          }
00376 
00377          const std::vector<double> &vvec = vit -> second;
00378          
00379          if(vvec.empty())
00380          {
00381             return false;
00382          }
00383 
00384          // assume that vector elements of fVar are sorted
00385          const VarType min = vvec.front();
00386          const VarType max = vvec.back();
00387          const VarType width = max - min;
00388 
00389          if(width < 0.0 || width > 0.0)
00390          {
00391             dvec.push_back(min + width*double(std::rand())/double(RAND_MAX));
00392          }
00393          else
00394          {
00395             return false;
00396          }
00397       }
00398 
00399       const Event event(dvec, 1.0, etype);
00400       
00401       Find(event, nfind);  
00402 
00403       return true;
00404    }
00405 
00406    return false;
00407 }
00408 
00409 //-------------------------------------------------------------------------------------------
00410 Lit::Node* Lit::LikeModule::Balance(const unsigned int bdepth)
00411 {
00412    // Balance() balances binary tree for first bdepth levels
00413    // for each depth we split sorted depth % dimension variables
00414    // into 2^bdepth parts
00415 
00416    if(fVar.empty() || fDimn != fVar.size())
00417    {
00418       cout << "LikeModule::Balance - can not build a tree" << endl;
00419       return 0;
00420    }
00421 
00422    const unsigned int size = (fVar.begin() -> second).size();
00423    if(size < 1)
00424    {
00425       cout << "LikeModule::Balance - can not build a tree without events" << endl;
00426       return 0;
00427    }
00428 
00429    for(VarMap::const_iterator it = fVar.begin(); it != fVar.end(); ++it)
00430    {
00431       if((it -> second).size() != size)
00432       {
00433          cout << "LikeModule::Balance - # of variables doesn't match between dimensions" << endl;
00434          return 0;
00435       }
00436    }
00437 
00438    if(double(fDimn*size) < pow(2.0, double(bdepth)))
00439    {
00440       cout << "LikeModule::Balance - balance depth exceeds number of events" << endl;
00441       return 0;      
00442    }   
00443 
00444    cout << "Balancing tree for " << fDimn << " variables with " << size << " values" << endl;
00445 
00446    vector<Node *> pvec, cvec;
00447 
00448    VarMap::const_iterator it = fVar.find(0);
00449    if(it == fVar.end() || (it -> second).size() < 2)
00450    {
00451       cout << "Missing 0 variable" << endl;
00452       return 0;
00453    }
00454 
00455    const Event pevent(VarVec(fDimn, (it -> second)[size/2]), -1.0, -1);
00456    
00457    Node *tree = new Node(0, pevent, 0);
00458    
00459    pvec.push_back(tree);
00460 
00461    for(unsigned int depth = 1; depth < bdepth; ++depth)
00462    {            
00463       const unsigned int mod = depth % fDimn;
00464    
00465       VarMap::const_iterator vit = fVar.find(mod);
00466       if(vit == fVar.end())
00467       {
00468          cerr << "Missing " << mod << " variable" << endl;
00469          return 0;
00470       }
00471       const vector<double> &dvec = vit -> second;
00472 
00473       if(dvec.size() < 2)
00474       {
00475          cerr << "Missing " << mod << " variable" << endl;
00476          return 0;       
00477       }
00478 
00479       //cout << "Mod = " << mod << endl;
00480 
00481       unsigned int ichild = 1;
00482       for(vector<Node *>::iterator pit = pvec.begin(); pit != pvec.end(); ++pit)
00483       {
00484          Node *parent = *pit;
00485 
00486          const VarType lmedian = dvec[size*ichild/(2*pvec.size() + 1)];
00487          //cout << "ChildL " << setw(2) << ichild << " lmedian " << lmedian << endl;
00488          ++ichild;
00489 
00490          const VarType rmedian = dvec[size*ichild/(2*pvec.size() + 1)];
00491          //cout << "ChildR " << setw(2) << ichild << " rmedian " << rmedian << endl;
00492          ++ichild;
00493       
00494          const Event levent(VarVec(fDimn, lmedian), -1.0, -1);
00495          const Event revent(VarVec(fDimn, rmedian), -1.0, -1);
00496 
00497          Node *lchild = new Node(parent, levent, mod);
00498          Node *rchild = new Node(parent, revent, mod);
00499 
00500          parent -> SetNodeL(lchild);
00501          parent -> SetNodeR(rchild);
00502 
00503          cvec.push_back(lchild);
00504          cvec.push_back(rchild);
00505       }
00506 
00507       pvec = cvec;
00508       cvec.clear();
00509    }
00510 
00511    return tree;
00512 }
00513 
00514 //-------------------------------------------------------------------------------------------
00515 void Lit::LikeModule::ComputeMetric(const unsigned int ifrac)
00516 {
00517    // compute scale factor for each variable (dimension) so that 
00518    // distance is computed uniformely along each dimension
00519    // compute width of interval that includes (100 - 2*ifrac)% of events
00520    // below, assume that in fVar each vector of values is sorted
00521 
00522    if(ifrac > 49)
00523    {
00524       cerr << "LikeModule::ComputeMetric() - fraction can not exceed 50%" << endl;
00525       return;
00526    }
00527    
00528    if(!fVarScale.empty())
00529    {
00530       cerr << "LikeModule::ComputeMetric() - metric is already computed" << endl;
00531       return;
00532    }
00533 
00534    cout << "Computing scale for 1d axes: ifrac = " << ifrac << endl;
00535 
00536    fVarScale.clear();
00537    
00538    for(VarMap::const_iterator vit = fVar.begin(); vit != fVar.end(); ++vit)
00539    {
00540       const vector<double> &dvec = vit -> second;
00541       
00542       vector<double>::const_iterator beg_it = dvec.end();
00543       vector<double>::const_iterator end_it = dvec.end();
00544       for(vector<double>::const_iterator dit = dvec.begin(); dit != dvec.end(); ++dit)
00545       {
00546          const int dist = distance(dvec.begin(), dit);
00547 
00548          if((100*dist)/dvec.size() == ifrac && beg_it == dvec.end())
00549          {
00550             beg_it = dit;
00551          }
00552 
00553          if((100*dist)/dvec.size() == 100 - ifrac && end_it == dvec.end())
00554          {
00555             end_it = dit;
00556          }
00557       }
00558 
00559       if(beg_it == dvec.end() || end_it == dvec.end())
00560       {
00561          cerr << "LikeModule::ComputeMetric() - failed to determine scale " << vit -> first << endl;
00562          continue;
00563       }
00564 
00565       const double lpos = *beg_it;
00566       const double rpos = *end_it;
00567 
00568       if(!(lpos < rpos))
00569       {
00570          cerr << "LikeModule::ComputeMetric() - min value is greater than max value" << endl;
00571          continue;       
00572       }
00573       
00574       cout << "Variable " << vit -> first 
00575            << " included " << distance(beg_it, end_it) + 1
00576            << " events: width = " << setfill(' ') << setw(5) << setprecision(3) << rpos - lpos
00577            << ", (min, max) = (" << setfill(' ') << setw(5) << setprecision(3) << lpos 
00578            << ", " << setfill(' ') << setw(5) << setprecision(3) << rpos << ")" << endl;
00579 
00580       fVarScale[vit -> first] = rpos - lpos;
00581    }
00582 
00583    fVar.clear();
00584 
00585    for(unsigned int ievent = 0; ievent < fEvent.size(); ++ievent)
00586    {      
00587       fEvent[ievent] = Scale(fEvent[ievent]);
00588 
00589       for(unsigned int ivar = 0; ivar < fDimn; ++ivar)
00590       {
00591          fVar[ivar].push_back(fEvent[ievent].GetVar(ivar));
00592       }  
00593    }
00594 }
00595 
00596 //-------------------------------------------------------------------------------------------
00597 const Lit::Event Lit::LikeModule::Scale(const Event &event) const
00598 {
00599    // scale each event variable so that rms of variables is approximately 1.0
00600    // this allows comparisons of variables with distinct scales and units
00601    
00602    if(fVarScale.empty())
00603    {
00604       return event;
00605    }
00606 
00607    if(event.GetNVar() != fVarScale.size())
00608    {
00609       cerr << "LikeModule::Scale() - mismatched metric and event size" << endl;
00610       return event;
00611    }
00612 
00613    VarVec vvec(event.GetNVar(), 0.0);
00614 
00615    for(unsigned int ivar = 0; ivar < event.GetNVar(); ++ivar)
00616    {
00617       map<int, double>::const_iterator fit = fVarScale.find(ivar);
00618       if(fit == fVarScale.end())
00619       {
00620          cerr << "LikeModule::Scale() - failed to find scale for " << ivar << endl;
00621          continue;
00622       }
00623       
00624       if(fit -> second > 0.0)
00625       {
00626          vvec[ivar] = event.GetVar(ivar)/fit -> second;
00627       }
00628       else
00629       {
00630          cerr << "Variable " << ivar << " has zero width" << endl;
00631       }
00632    }
00633 
00634    return Event(vvec, event.GetWeight(), event.GetType());
00635 }
00636 
00637 //-------------------------------------------------------------------------------------------
00638 void Lit::LikeModule::Print() const
00639 {
00640    Print(std::cout);
00641 }
00642 
00643 //-------------------------------------------------------------------------------------------
00644 void Lit::LikeModule::Print(std::ostream &os) const
00645 {
00646    os << "---------------------------------------------------------------------------------"<< endl;
00647    os << "Dumping stringstream content of LikeModule" << endl;
00648    os << "---------------------------------------------------------------------------------"<< endl;
00649 }

Generated on Thu Nov 1 11:51:13 2007 for loon by  doxygen 1.3.9.1