liblast
Last Class Reference

#include <last.h>

List of all members.

Public Member Functions

Inits

Initializer functions.

 Last ()
 Constructor for standard settings: 95% significance Lastlevel, minimum frequency 2, type trees, dynamic upper bound, BBRC.
 ~Last ()
void Reset ()
 Use this to clear the database before feeding new compounds and activities.
void Defaults ()
 Use this to set default parameters as in default constructor.
Getters

Getter functions.

int GetMinfreq ()
 Get minimum frequency.
int GetType ()
 Get type.
bool GetBackbone ()
 Get whether BBRC representatives should be mined.
bool GetDynamicUpperBound ()
 Get whether dynamic upper bound pruning is used.
bool GetPruning ()
 Get whether statistical metric pruning should be used.
bool GetConsoleOut ()
 Get whether output should be directed to the console.
bool GetAromatic ()
 Get whether aromatic rings should be perceived instead of Kekule notation.
bool GetRefineSingles ()
 Get whether fragments with frequency 1 should be refined.
bool GetDoOutput ()
 Get whether output is enabled.
bool GetBbrcSep ()
 Get whether BBRCs should be separated in the output.
bool GetChisqActive ()
 Get whether chi-square filter is active.
float GetChisqSig ()
 Get significance threshold.
bool GetRegression ()
 Dummy method for regression (only used for bbrcs).
int GetMaxHops ()
 Get maximum number of hops.
Setters

Setter functions.

void SetMinfreq (int val)
 Set minimum frequency (>=1 here). Same as '-f'.
bool SetType (int val)
 Set type 1 (paths) or 2 (trees) here. Same as '-l'.
bool SetBackbone (bool val)
 Pass 'false' here to switch off mining for BBRC representatives. Same as '-b'.
bool SetDynamicUpperBound (bool val)
 Pass 'false' here to disable dynamic upper bound pruning (e.g. for performance measures). Same as '-d'.
bool SetPruning (bool val)
 Pass 'false' here to disable statistical metrical pruning completely. Same as '-u'.
bool SetConsoleOut (bool val)
 Pass 'true' here to disable usage of result vector and directly print each fragment to the console (saves memory).
void SetAromatic (bool val)
 Pass 'true' here to enable aromatic rings and use Kekule notation. IMPORTANT! SET THIS BEFORE CALLING AddCompound()! Same as '-a'.
bool SetRefineSingles (bool val)
 Pass 'true' here to enable refinement of fragments with frequency 1. Same as '-s'.
void SetDoOutput (bool val)
 Pass 'false' here to disable output. Same as '-o'.
bool SetBbrcSep (bool val)
 Set this to 'true' to enable BBRC separators in output.
bool SetChisqActive (bool val)
 Set this to 'true' to enable chi-square filter.
bool SetChisqSig (float _chisq_val)
 Set significance threshold here (between 0 and 1). Same as '-p'.
bool SetRegression (bool val)
 Dummy method for regression (only used for bbrcs). Same as '-g'.
bool SetMaxHops (int val)
 Set maximum number of hops. Same as '-m'.
Others

Other functions.

vector< string > * MineRoot (unsigned int j)
 Mine fragments rooted at the j-th root node (element type).
void ReadGsp (FILE *gsp)
 Read in a gSpan file.
bool AddCompound (string smiles, unsigned int comp_id)
 Add a compound to the database.
bool AddActivity (float act, unsigned int comp_id)
 Add an activity to the database.
int GetNoRootNodes ()
 Get number of root nodes (different element types).
int GetNoCompounds ()
 Get number of compounds in the database.
float ChisqTest (vector< float > all, vector< float > feat)
float KSTest (vector< float > all, vector< float > feat)
 Calculate a KS p-value on the fly- just use it. all (feat): all (feature) database activities. Returns (negative) positive sign, if (de)activating.

Constructor & Destructor Documentation

Last ( )

Constructor for standard settings: 95% significance Lastlevel, minimum frequency 2, type trees, dynamic upper bound, BBRC.

References Defaults(), fm::last_gsp_out, and Reset().

           : init_mining_done(false) {
  if (!fm::last_instance_present) {
      fm::last_database = NULL; fm::last_statistics = NULL; fm::last_result = NULL;
      Reset();
      Defaults();
      fm::last_instance_present=true;
      fm::last_gsp_out = false; 
      if (getenv("FMINER_SILENT")) {
        FILE* fp = freopen ("fminer_debug.txt","w",stderr);
      }
  }
  else {
    cerr << "Error! Cannot create more than 1 instance." << endl; 
    exit(1);
  }
}
~Last ( )

References fm::last_chisq, and fm::last_ks.

            {
    if (fm::last_instance_present) {
        delete fm::last_database;
        delete fm::last_statistics; 
        delete fm::last_chisq; 
        delete fm::last_ks;
        delete fm::last_graphstate;
        delete fm::last_closelegoccurrences;
        delete fm::last_legoccurrences;

        fm::last_Lastcandidatelegsoccurrences.clear();
        fm::last_candidatecloselegsoccs.clear();
        fm::last_candidateLastcloselegsoccsused.clear();

        fm::last_instance_present=false;
    }
}

Member Function Documentation

bool AddActivity ( float  act,
unsigned int  comp_id 
)

Add an activity to the database.

References fm::last_db_built.

                                                      {
  if (fm::last_db_built) {
    cerr << "LastDatabase has been already processed! Please reset() and insert a new dataset." << endl;
    return false;
  }
  activity_map.insert(make_pair(comp_id, act));
  return true;
}
bool AddCompound ( string  smiles,
unsigned int  comp_id 
)

Add a compound to the database.

References fm::last_db_built.

                                                          {
  if (fm::last_db_built) {
    cerr << "LastDatabase has been already processed! Please reset() and insert a new dataset." << endl;
    return false;
  }
  stringstream ss(smiles);
  OBConversion conv(&ss, &cout);
  if(!conv.SetInAndOutFormats("SMI","INCHI")) {
    cerr << "Formats not available" << endl;
    return false;
  }
  OBMol mol;
  if (!conv.Read(&mol)) {
    cerr << "Could not convert '" << smiles << "' (leaving out)." << endl;
    return false;
  }
  conv.SetOptions("w",OBConversion::OUTOPTIONS);
  string inchi = conv.WriteString(&mol);
  // remove newline
  string::size_type pos = inchi.find_last_not_of("\n");
  if (pos != string::npos) {
    inchi = inchi.substr(0, pos+1);
  }
  //cerr << "Inchi: '" << inchi << "'" << endl;
  pair<unsigned int, string> ori = make_pair(comp_id, smiles);
  pair< map<string,pair<unsigned int, string> >::iterator, bool> res = inchi_compound_map.insert(make_pair(inchi,ori));
  if (!res.second) {
    cerr << "Note: structure of '" << smiles << "' has been already inserted, inserting anyway..." << endl;
  }

  // insert into actual map augmented by number
  string inchi_no = inchi;
  inchi_no += "-";
  comp_runner++;
  stringstream out; out << comp_runner;
  string comp_runner_s = out.str();
  inchi_no += comp_runner_s;
  pair< map<string,pair<unsigned int, string> >::iterator, bool> resmm = inchi_compound_mmap.insert(make_pair(inchi_no,ori));
  return true;
}
float ChisqTest ( vector< float >  all,
vector< float >  feat 
)

References fm::last_chisq.

                                                           {
  map<float, unsigned int> _nr_acts;
  map<float, unsigned int> _f_sets;

  each(all) {
    if (! _nr_acts.insert(make_pair(all[i],1)).second) {
      _nr_acts[all[i]]++;
    }
  }
  each(feat) {
    if (! _f_sets.insert(make_pair(feat[i],1)).second) {
      _f_sets[feat[i]]++;
    }
  }
  return fm::last_chisq->ChiSqTest(_f_sets, _nr_acts);
}
void Defaults ( )

Use this to set default parameters as in default constructor.

References fm::last_aromatic, fm::last_bbrc_sep, fm::last_do_pruning, fm::last_gsp_out, fm::last_max_hops, and fm::last_regression.

Referenced by Last().

                    {
    fm::last_minfreq = 2;
    fm::last_type = 2;
    fm::last_do_pruning = true;
    fm::last_console_out = true;
    fm::last_aromatic = true;
    fm::last_refine_singles = false;
    fm::last_do_output=true;
    fm::last_bbrc_sep=false;
    fm::last_updated = true;
    fm::last_gsp_out=true;
    fm::last_regression=false;

    // LAST
    fm::last_do_last=true;
    fm::last_hops=0;
    fm::last_die = 0;
    fm::last_max_hops = 25;
}
bool GetAromatic ( )

Get whether aromatic rings should be perceived instead of Kekule notation.

References fm::last_aromatic.

Referenced by MineRoot().

bool GetBackbone ( )

Get whether BBRC representatives should be mined.

{return false;}
bool GetBbrcSep ( )

Get whether BBRCs should be separated in the output.

References fm::last_bbrc_sep.

bool GetChisqActive ( )

Get whether chi-square filter is active.

References fm::last_chisq.

Referenced by MineRoot().

{return fm::last_chisq->active;}
float GetChisqSig ( )

Get significance threshold.

References fm::last_chisq, fm::last_ks, and fm::last_regression.

Referenced by MineRoot().

{if (!fm::last_regression) return fm::last_chisq->sig; else return fm::last_ks->sig;}
bool GetConsoleOut ( )

Get whether output should be directed to the console.

{return fm::last_console_out;}
bool GetDoOutput ( )

Get whether output is enabled.

Referenced by MineRoot().

{return fm::last_do_output;}
bool GetDynamicUpperBound ( )

Get whether dynamic upper bound pruning is used.

{return false;}
int GetMaxHops ( )

Get maximum number of hops.

References fm::last_max_hops.

Referenced by MineRoot().

int GetMinfreq ( )

Get minimum frequency.

Referenced by MineRoot().

{return fm::last_minfreq;}
int GetNoCompounds ( ) [inline]

Get number of compounds in the database.

References fm::last_db_built.

int GetNoRootNodes ( ) [inline]

Get number of root nodes (different element types).

References fm::last_db_built.

Referenced by MineRoot().

bool GetPruning ( )

Get whether statistical metric pruning should be used.

References fm::last_do_pruning.

Referenced by MineRoot().

bool GetRefineSingles ( )

Get whether fragments with frequency 1 should be refined.

Referenced by SetMinfreq().

{return fm::last_refine_singles;}
bool GetRegression ( )

Dummy method for regression (only used for bbrcs).

References fm::last_regression.

Referenced by MineRoot().

int GetType ( )

Get type.

Referenced by MineRoot().

{return fm::last_type;}
float KSTest ( vector< float >  all,
vector< float >  feat 
) [inline]

Calculate a KS p-value on the fly- just use it. all (feat): all (feature) database activities. Returns (negative) positive sign, if (de)activating.

References fm::last_ks.

vector< string > * MineRoot ( unsigned int  j)

Mine fragments rooted at the j-th root node (element type).

References GetAromatic(), GetChisqActive(), GetChisqSig(), GetDoOutput(), GetMaxHops(), GetMinfreq(), GetNoRootNodes(), GetPruning(), GetRegression(), GetType(), fm::last_bbrc_sep, fm::last_chisq, fm::last_db_built, and fm::last_regression.

                                             {
    fm::last_result->clear();
    if (!init_mining_done) {
        if (!fm::last_db_built) {
          AddDataCanonical();
        }
        // Adjust chisq bound
        if (!fm::last_regression) {
          if (fm::last_chisq->nr_acts.size()>1 && fm::last_chisq->nr_acts.size() < 6) {
            if (fm::last_chisq->sig == -1.0) { // do not override user-supplied threshold
              fm::last_chisq->sig=fm::last_chisq->df_thresholds[fm::last_chisq->nr_acts.size()-1];
            }
          }
          else {
            cerr << "Error! Too many classes: '" << fm::last_chisq->nr_acts.size() << "' (Max. 5)." << endl;
            exit(1);
          }
        }
        fm::last_database->edgecount (); 
        fm::last_database->reorder (); 
        LastinitLastLegStatics (); 
        fm::last_graphstate->init (); 
        if (fm::last_bbrc_sep && fm::last_do_output && !fm::last_console_out) (*fm::last_result) << fm::last_graphstate->sep();
        init_mining_done=true; 

        if (!fm::last_regression) {
            cerr << "Settings:" << endl \
                 << "---" << endl \
                 << "Type:                                 " << GetType() << endl \
                 << "Minimum frequency:                    " << GetMinfreq() << endl \
                 << "Aromatic:                             " << GetAromatic() << endl \
                 << "Regression:                           " << GetRegression() << endl \
                 << "Chi-square active (chi-square-value): " << GetChisqActive() << " (" << GetChisqSig()<< ")" << endl \
                 << "Statistical metric pruning:           " << GetPruning() << endl \
                 << "Do output:                            " << GetDoOutput() << endl \
                 << "Max Hops:                             " << GetMaxHops() << endl \
                 << "---" << endl;
        }
        else {
            cerr << "Settings:" << endl \
                 << "---" << endl \
                 << "Type:                                 " << GetType() << endl \
                 << "Minimum frequency:                    " << GetMinfreq() << endl \
                 << "Aromatic:                             " << GetAromatic() << endl \
                 << "Regression:                           " << GetRegression() << endl \
                 << "KS active (p-value):                  " << GetChisqActive() << " (" << GetChisqSig()<< ")" << endl \
                 << "Statistical metric pruning:           " << GetPruning() << endl \
                 << "Do output:                            " << GetDoOutput() << endl \
                 << "Max Hops:                             " << GetMaxHops() << endl \
                 << "---" << endl;
        }



        if (fm::last_do_output) {

          string xml_header = 
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n\
<graphml xmlns=\"http://graphml.graphdrawing.org/xmlns\"\n\
xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n\
xsi:noNamespaceSchemaLocation=\"graphml.xsd\">\n\
\n\
<!-- LAtent STructure Mining (LAST) descriptors-->\n\
\n\
<key id=\"act\" for=\"graph\" attr.name=\"activating\" attr.type=\"boolean\" />\n\
<key id=\"hops\" for=\"graph\" attr.name=\"hops\" attr.type=\"int\" />\n\
<key id=\"lab_n\" for=\"node\" attr.name=\"node_labels\" attr.type=\"string\" />\n\
<key id=\"lab_e\" for=\"edge\" attr.name=\"edge_labels\" attr.type=\"string\" />\n\
<key id=\"weight\" for=\"edge\" attr.name=\"edge_weight\" attr.type=\"int\" />\n\
<key id=\"del\" for=\"edge\" attr.name=\"edge_deleted\" attr.type=\"boolean\" />\n\n";

         if (!fm::last_console_out) (*fm::last_result) << xml_header;
         else cout << xml_header;
      }
    }

    if (j >= fm::last_database->nodelabels.size()) { cerr << "Error! Root node " << j << " does not exist." << endl;  exit(1); }
    if ( fm::last_database->nodelabels[j].frequency >= fm::last_minfreq && fm::last_database->nodelabels[j].frequentedgelabels.size () ) {
        LastPath path(j);
        path.expand(); // mining step
    }
    if (j==GetNoRootNodes()-1 && fm::last_do_output) {
      if (!fm::last_console_out) (*fm::last_result) << "</graphml>\n";
      else cout << "</graphml>" << endl;
    }
    if (getenv("FMINER_SILENT")) {
      fclose (stderr);
    }
    return fm::last_result;
}
void ReadGsp ( FILE *  gsp)

Read in a gSpan file.

                           {
    fm::last_database->readGsp(gsp);
}
void Reset ( )

Use this to clear the database before feeding new compounds and activities.

References fm::last_chisq, fm::last_db_built, and fm::last_ks.

Referenced by Last().

                 { 
    if (fm::last_instance_present) {
        delete fm::last_database;
        delete fm::last_statistics;
        delete fm::last_chisq;
        delete fm::last_ks;
        delete fm::last_graphstate;
        delete fm::last_closelegoccurrences;
        delete fm::last_legoccurrences;
    }
    fm::last_database = new LastDatabase();
    fm::last_db_built = false;
    fm::last_statistics = new LastStatistics();
    fm::last_chisq = new ChisqLastConstraint(-1.0);
    fm::last_ks = new KSLastConstraint(0.95);
    fm::last_graphstate = new LastGraphState();
    fm::last_closelegoccurrences = new CloseLastLegOccurrences();
    fm::last_legoccurrences = new LastLegOccurrences();

    fm::last_candidateLastcloselegsoccsused.clear();
    fm::last_candidatecloselegsoccs.clear();
    fm::last_candidateLastcloselegsoccsused.clear();

    fm::last_chisq->active=true; 
    fm::last_result = &r;
    fm::last_gsw_counter=0;



    // clearing privates
    init_mining_done = false;
    comp_runner=0; 
    comp_no=0; 
    r.clear();
    inchi_compound_map.clear();
    inchi_compound_mmap.clear();
    activity_map.clear();
    
    if (getenv("FMINER_SILENT")) {
        fclose (stderr);
        FILE* fp = freopen ("fminer_debug.txt","w",stderr);
     }
}
void SetAromatic ( bool  val)

Pass 'true' here to enable aromatic rings and use Kekule notation. IMPORTANT! SET THIS BEFORE CALLING AddCompound()! Same as '-a'.

References fm::last_aromatic.

                               {
    fm::last_aromatic = val;
}
bool SetBackbone ( bool  val)

Pass 'false' here to switch off mining for BBRC representatives. Same as '-b'.

                               {
    return 0;
}
bool SetBbrcSep ( bool  val)

Set this to 'true' to enable BBRC separators in output.

                              {
    return 0;
}
bool SetChisqActive ( bool  val)

Set this to 'true' to enable chi-square filter.

                                  {
    return 0;
}
bool SetChisqSig ( float  _chisq_val)

Set significance threshold here (between 0 and 1). Same as '-p'.

                                       {
    return 0;
}
bool SetConsoleOut ( bool  val)

Pass 'true' here to disable usage of result vector and directly print each fragment to the console (saves memory).

                                 {
    // console out not switched by fminer
    fm::last_console_out=val;
    return 1;
}
void SetDoOutput ( bool  val)

Pass 'false' here to disable output. Same as '-o'.

                               {
    fm::last_do_output = val;
}
bool SetDynamicUpperBound ( bool  val)

Pass 'false' here to disable dynamic upper bound pruning (e.g. for performance measures). Same as '-d'.

                                        {
    return 0;
}
bool SetMaxHops ( int  val)

Set maximum number of hops. Same as '-m'.

References fm::last_max_hops.

                             {
    fm::last_max_hops=val;
    return 1;
}
void SetMinfreq ( int  val)

Set minimum frequency (>=1 here). Same as '-f'.

References GetRefineSingles().

                             {
    if (val < 1) { cerr << "Error! Invalid value '" << val << "' for parameter minfreq." << endl; exit(1); }
    if (val > 1 && GetRefineSingles()) { cerr << "Warning! Minimum frequency of '" << val << "' could not be set due to activated single refinement." << endl;}
    fm::last_minfreq = val;
}
bool SetPruning ( bool  val)

Pass 'false' here to disable statistical metrical pruning completely. Same as '-u'.

                              {
    return 0;
}
bool SetRefineSingles ( bool  val)

Pass 'true' here to enable refinement of fragments with frequency 1. Same as '-s'.

                                    {
    return 0;
}
bool SetRegression ( bool  val)

Dummy method for regression (only used for bbrcs). Same as '-g'.

References fm::last_regression.

                                 {
    // return 0;
    // TODO: enable regression
    fm::last_regression=val;
    return 1;
}
bool SetType ( int  val)

Set type 1 (paths) or 2 (trees) here. Same as '-l'.

                          {
    return 0;
}

The documentation for this class was generated from the following files: