|
liblast
|
#include <last.h>
Public Member Functions | |
Inits | |
Initializer functions. | |
| Last () | |
| Constructor for standard settings: 95% significance Lastlevel, minimum frequency 2, type trees, dynamic upper bound, BBRC. | |
| ~Last () | |
| void | Reset () |
| Use this to clear the database before feeding new compounds and activities. | |
| void | Defaults () |
| Use this to set default parameters as in default constructor. | |
Getters | |
Getter functions. | |
| int | GetMinfreq () |
| Get minimum frequency. | |
| int | GetType () |
| Get type. | |
| bool | GetBackbone () |
| Get whether BBRC representatives should be mined. | |
| bool | GetDynamicUpperBound () |
| Get whether dynamic upper bound pruning is used. | |
| bool | GetPruning () |
| Get whether statistical metric pruning should be used. | |
| bool | GetConsoleOut () |
| Get whether output should be directed to the console. | |
| bool | GetAromatic () |
| Get whether aromatic rings should be perceived instead of Kekule notation. | |
| bool | GetRefineSingles () |
| Get whether fragments with frequency 1 should be refined. | |
| bool | GetDoOutput () |
| Get whether output is enabled. | |
| bool | GetBbrcSep () |
| Get whether BBRCs should be separated in the output. | |
| bool | GetChisqActive () |
| Get whether chi-square filter is active. | |
| float | GetChisqSig () |
| Get significance threshold. | |
| bool | GetRegression () |
| Dummy method for regression (only used for bbrcs). | |
| int | GetMaxHops () |
| Get maximum number of hops. | |
Setters | |
Setter functions. | |
| void | SetMinfreq (int val) |
| Set minimum frequency (>=1 here). Same as '-f'. | |
| bool | SetType (int val) |
| Set type 1 (paths) or 2 (trees) here. Same as '-l'. | |
| bool | SetBackbone (bool val) |
| Pass 'false' here to switch off mining for BBRC representatives. Same as '-b'. | |
| bool | SetDynamicUpperBound (bool val) |
| Pass 'false' here to disable dynamic upper bound pruning (e.g. for performance measures). Same as '-d'. | |
| bool | SetPruning (bool val) |
| Pass 'false' here to disable statistical metrical pruning completely. Same as '-u'. | |
| bool | SetConsoleOut (bool val) |
| Pass 'true' here to disable usage of result vector and directly print each fragment to the console (saves memory). | |
| void | SetAromatic (bool val) |
| Pass 'true' here to enable aromatic rings and use Kekule notation. IMPORTANT! SET THIS BEFORE CALLING AddCompound()! Same as '-a'. | |
| bool | SetRefineSingles (bool val) |
| Pass 'true' here to enable refinement of fragments with frequency 1. Same as '-s'. | |
| void | SetDoOutput (bool val) |
| Pass 'false' here to disable output. Same as '-o'. | |
| bool | SetBbrcSep (bool val) |
| Set this to 'true' to enable BBRC separators in output. | |
| bool | SetChisqActive (bool val) |
| Set this to 'true' to enable chi-square filter. | |
| bool | SetChisqSig (float _chisq_val) |
| Set significance threshold here (between 0 and 1). Same as '-p'. | |
| bool | SetRegression (bool val) |
| Dummy method for regression (only used for bbrcs). Same as '-g'. | |
| bool | SetMaxHops (int val) |
| Set maximum number of hops. Same as '-m'. | |
Others | |
Other functions. | |
| vector< string > * | MineRoot (unsigned int j) |
| Mine fragments rooted at the j-th root node (element type). | |
| void | ReadGsp (FILE *gsp) |
| Read in a gSpan file. | |
| bool | AddCompound (string smiles, unsigned int comp_id) |
| Add a compound to the database. | |
| bool | AddActivity (float act, unsigned int comp_id) |
| Add an activity to the database. | |
| int | GetNoRootNodes () |
| Get number of root nodes (different element types). | |
| int | GetNoCompounds () |
| Get number of compounds in the database. | |
| float | ChisqTest (vector< float > all, vector< float > feat) |
| float | KSTest (vector< float > all, vector< float > feat) |
| Calculate a KS p-value on the fly- just use it. all (feat): all (feature) database activities. Returns (negative) positive sign, if (de)activating. | |
| Last | ( | ) |
Constructor for standard settings: 95% significance Lastlevel, minimum frequency 2, type trees, dynamic upper bound, BBRC.
References Defaults(), fm::last_gsp_out, and Reset().
: init_mining_done(false) { if (!fm::last_instance_present) { fm::last_database = NULL; fm::last_statistics = NULL; fm::last_result = NULL; Reset(); Defaults(); fm::last_instance_present=true; fm::last_gsp_out = false; if (getenv("FMINER_SILENT")) { FILE* fp = freopen ("fminer_debug.txt","w",stderr); } } else { cerr << "Error! Cannot create more than 1 instance." << endl; exit(1); } }
| ~Last | ( | ) |
References fm::last_chisq, and fm::last_ks.
{
if (fm::last_instance_present) {
delete fm::last_database;
delete fm::last_statistics;
delete fm::last_chisq;
delete fm::last_ks;
delete fm::last_graphstate;
delete fm::last_closelegoccurrences;
delete fm::last_legoccurrences;
fm::last_Lastcandidatelegsoccurrences.clear();
fm::last_candidatecloselegsoccs.clear();
fm::last_candidateLastcloselegsoccsused.clear();
fm::last_instance_present=false;
}
}
| bool AddActivity | ( | float | act, |
| unsigned int | comp_id | ||
| ) |
Add an activity to the database.
References fm::last_db_built.
{
if (fm::last_db_built) {
cerr << "LastDatabase has been already processed! Please reset() and insert a new dataset." << endl;
return false;
}
activity_map.insert(make_pair(comp_id, act));
return true;
}
| bool AddCompound | ( | string | smiles, |
| unsigned int | comp_id | ||
| ) |
Add a compound to the database.
References fm::last_db_built.
{
if (fm::last_db_built) {
cerr << "LastDatabase has been already processed! Please reset() and insert a new dataset." << endl;
return false;
}
stringstream ss(smiles);
OBConversion conv(&ss, &cout);
if(!conv.SetInAndOutFormats("SMI","INCHI")) {
cerr << "Formats not available" << endl;
return false;
}
OBMol mol;
if (!conv.Read(&mol)) {
cerr << "Could not convert '" << smiles << "' (leaving out)." << endl;
return false;
}
conv.SetOptions("w",OBConversion::OUTOPTIONS);
string inchi = conv.WriteString(&mol);
// remove newline
string::size_type pos = inchi.find_last_not_of("\n");
if (pos != string::npos) {
inchi = inchi.substr(0, pos+1);
}
//cerr << "Inchi: '" << inchi << "'" << endl;
pair<unsigned int, string> ori = make_pair(comp_id, smiles);
pair< map<string,pair<unsigned int, string> >::iterator, bool> res = inchi_compound_map.insert(make_pair(inchi,ori));
if (!res.second) {
cerr << "Note: structure of '" << smiles << "' has been already inserted, inserting anyway..." << endl;
}
// insert into actual map augmented by number
string inchi_no = inchi;
inchi_no += "-";
comp_runner++;
stringstream out; out << comp_runner;
string comp_runner_s = out.str();
inchi_no += comp_runner_s;
pair< map<string,pair<unsigned int, string> >::iterator, bool> resmm = inchi_compound_mmap.insert(make_pair(inchi_no,ori));
return true;
}
| float ChisqTest | ( | vector< float > | all, |
| vector< float > | feat | ||
| ) |
References fm::last_chisq.
{
map<float, unsigned int> _nr_acts;
map<float, unsigned int> _f_sets;
each(all) {
if (! _nr_acts.insert(make_pair(all[i],1)).second) {
_nr_acts[all[i]]++;
}
}
each(feat) {
if (! _f_sets.insert(make_pair(feat[i],1)).second) {
_f_sets[feat[i]]++;
}
}
return fm::last_chisq->ChiSqTest(_f_sets, _nr_acts);
}
| void Defaults | ( | ) |
Use this to set default parameters as in default constructor.
References fm::last_aromatic, fm::last_bbrc_sep, fm::last_do_pruning, fm::last_gsp_out, fm::last_max_hops, and fm::last_regression.
Referenced by Last().
{
fm::last_minfreq = 2;
fm::last_type = 2;
fm::last_do_pruning = true;
fm::last_console_out = true;
fm::last_aromatic = true;
fm::last_refine_singles = false;
fm::last_do_output=true;
fm::last_bbrc_sep=false;
fm::last_updated = true;
fm::last_gsp_out=true;
fm::last_regression=false;
// LAST
fm::last_do_last=true;
fm::last_hops=0;
fm::last_die = 0;
fm::last_max_hops = 25;
}
| bool GetAromatic | ( | ) |
Get whether aromatic rings should be perceived instead of Kekule notation.
References fm::last_aromatic.
Referenced by MineRoot().
{return fm::last_aromatic;}
| bool GetBackbone | ( | ) |
Get whether BBRC representatives should be mined.
{return false;}
| bool GetBbrcSep | ( | ) |
Get whether BBRCs should be separated in the output.
References fm::last_bbrc_sep.
{return fm::last_bbrc_sep;}
| bool GetChisqActive | ( | ) |
Get whether chi-square filter is active.
References fm::last_chisq.
Referenced by MineRoot().
{return fm::last_chisq->active;}
| float GetChisqSig | ( | ) |
Get significance threshold.
References fm::last_chisq, fm::last_ks, and fm::last_regression.
Referenced by MineRoot().
{if (!fm::last_regression) return fm::last_chisq->sig; else return fm::last_ks->sig;}
| bool GetConsoleOut | ( | ) |
Get whether output should be directed to the console.
{return fm::last_console_out;}
| bool GetDoOutput | ( | ) |
| bool GetDynamicUpperBound | ( | ) |
Get whether dynamic upper bound pruning is used.
{return false;}
| int GetMaxHops | ( | ) |
Get maximum number of hops.
References fm::last_max_hops.
Referenced by MineRoot().
{return fm::last_max_hops;}
| int GetMinfreq | ( | ) |
| int GetNoCompounds | ( | ) | [inline] |
Get number of compounds in the database.
References fm::last_db_built.
| int GetNoRootNodes | ( | ) | [inline] |
Get number of root nodes (different element types).
References fm::last_db_built.
Referenced by MineRoot().
| bool GetPruning | ( | ) |
Get whether statistical metric pruning should be used.
References fm::last_do_pruning.
Referenced by MineRoot().
{return fm::last_do_pruning;}
| bool GetRefineSingles | ( | ) |
Get whether fragments with frequency 1 should be refined.
Referenced by SetMinfreq().
{return fm::last_refine_singles;}
| bool GetRegression | ( | ) |
Dummy method for regression (only used for bbrcs).
References fm::last_regression.
Referenced by MineRoot().
{return fm::last_regression;}
| int GetType | ( | ) |
| float KSTest | ( | vector< float > | all, |
| vector< float > | feat | ||
| ) | [inline] |
Calculate a KS p-value on the fly- just use it. all (feat): all (feature) database activities. Returns (negative) positive sign, if (de)activating.
References fm::last_ks.
| vector< string > * MineRoot | ( | unsigned int | j | ) |
Mine fragments rooted at the j-th root node (element type).
References GetAromatic(), GetChisqActive(), GetChisqSig(), GetDoOutput(), GetMaxHops(), GetMinfreq(), GetNoRootNodes(), GetPruning(), GetRegression(), GetType(), fm::last_bbrc_sep, fm::last_chisq, fm::last_db_built, and fm::last_regression.
{
fm::last_result->clear();
if (!init_mining_done) {
if (!fm::last_db_built) {
AddDataCanonical();
}
// Adjust chisq bound
if (!fm::last_regression) {
if (fm::last_chisq->nr_acts.size()>1 && fm::last_chisq->nr_acts.size() < 6) {
if (fm::last_chisq->sig == -1.0) { // do not override user-supplied threshold
fm::last_chisq->sig=fm::last_chisq->df_thresholds[fm::last_chisq->nr_acts.size()-1];
}
}
else {
cerr << "Error! Too many classes: '" << fm::last_chisq->nr_acts.size() << "' (Max. 5)." << endl;
exit(1);
}
}
fm::last_database->edgecount ();
fm::last_database->reorder ();
LastinitLastLegStatics ();
fm::last_graphstate->init ();
if (fm::last_bbrc_sep && fm::last_do_output && !fm::last_console_out) (*fm::last_result) << fm::last_graphstate->sep();
init_mining_done=true;
if (!fm::last_regression) {
cerr << "Settings:" << endl \
<< "---" << endl \
<< "Type: " << GetType() << endl \
<< "Minimum frequency: " << GetMinfreq() << endl \
<< "Aromatic: " << GetAromatic() << endl \
<< "Regression: " << GetRegression() << endl \
<< "Chi-square active (chi-square-value): " << GetChisqActive() << " (" << GetChisqSig()<< ")" << endl \
<< "Statistical metric pruning: " << GetPruning() << endl \
<< "Do output: " << GetDoOutput() << endl \
<< "Max Hops: " << GetMaxHops() << endl \
<< "---" << endl;
}
else {
cerr << "Settings:" << endl \
<< "---" << endl \
<< "Type: " << GetType() << endl \
<< "Minimum frequency: " << GetMinfreq() << endl \
<< "Aromatic: " << GetAromatic() << endl \
<< "Regression: " << GetRegression() << endl \
<< "KS active (p-value): " << GetChisqActive() << " (" << GetChisqSig()<< ")" << endl \
<< "Statistical metric pruning: " << GetPruning() << endl \
<< "Do output: " << GetDoOutput() << endl \
<< "Max Hops: " << GetMaxHops() << endl \
<< "---" << endl;
}
if (fm::last_do_output) {
string xml_header =
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n\
<graphml xmlns=\"http://graphml.graphdrawing.org/xmlns\"\n\
xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n\
xsi:noNamespaceSchemaLocation=\"graphml.xsd\">\n\
\n\
<!-- LAtent STructure Mining (LAST) descriptors-->\n\
\n\
<key id=\"act\" for=\"graph\" attr.name=\"activating\" attr.type=\"boolean\" />\n\
<key id=\"hops\" for=\"graph\" attr.name=\"hops\" attr.type=\"int\" />\n\
<key id=\"lab_n\" for=\"node\" attr.name=\"node_labels\" attr.type=\"string\" />\n\
<key id=\"lab_e\" for=\"edge\" attr.name=\"edge_labels\" attr.type=\"string\" />\n\
<key id=\"weight\" for=\"edge\" attr.name=\"edge_weight\" attr.type=\"int\" />\n\
<key id=\"del\" for=\"edge\" attr.name=\"edge_deleted\" attr.type=\"boolean\" />\n\n";
if (!fm::last_console_out) (*fm::last_result) << xml_header;
else cout << xml_header;
}
}
if (j >= fm::last_database->nodelabels.size()) { cerr << "Error! Root node " << j << " does not exist." << endl; exit(1); }
if ( fm::last_database->nodelabels[j].frequency >= fm::last_minfreq && fm::last_database->nodelabels[j].frequentedgelabels.size () ) {
LastPath path(j);
path.expand(); // mining step
}
if (j==GetNoRootNodes()-1 && fm::last_do_output) {
if (!fm::last_console_out) (*fm::last_result) << "</graphml>\n";
else cout << "</graphml>" << endl;
}
if (getenv("FMINER_SILENT")) {
fclose (stderr);
}
return fm::last_result;
}
| void ReadGsp | ( | FILE * | gsp | ) |
Read in a gSpan file.
{
fm::last_database->readGsp(gsp);
}
| void Reset | ( | ) |
Use this to clear the database before feeding new compounds and activities.
References fm::last_chisq, fm::last_db_built, and fm::last_ks.
Referenced by Last().
{
if (fm::last_instance_present) {
delete fm::last_database;
delete fm::last_statistics;
delete fm::last_chisq;
delete fm::last_ks;
delete fm::last_graphstate;
delete fm::last_closelegoccurrences;
delete fm::last_legoccurrences;
}
fm::last_database = new LastDatabase();
fm::last_db_built = false;
fm::last_statistics = new LastStatistics();
fm::last_chisq = new ChisqLastConstraint(-1.0);
fm::last_ks = new KSLastConstraint(0.95);
fm::last_graphstate = new LastGraphState();
fm::last_closelegoccurrences = new CloseLastLegOccurrences();
fm::last_legoccurrences = new LastLegOccurrences();
fm::last_candidateLastcloselegsoccsused.clear();
fm::last_candidatecloselegsoccs.clear();
fm::last_candidateLastcloselegsoccsused.clear();
fm::last_chisq->active=true;
fm::last_result = &r;
fm::last_gsw_counter=0;
// clearing privates
init_mining_done = false;
comp_runner=0;
comp_no=0;
r.clear();
inchi_compound_map.clear();
inchi_compound_mmap.clear();
activity_map.clear();
if (getenv("FMINER_SILENT")) {
fclose (stderr);
FILE* fp = freopen ("fminer_debug.txt","w",stderr);
}
}
| void SetAromatic | ( | bool | val | ) |
Pass 'true' here to enable aromatic rings and use Kekule notation. IMPORTANT! SET THIS BEFORE CALLING AddCompound()! Same as '-a'.
References fm::last_aromatic.
{
fm::last_aromatic = val;
}
| bool SetBackbone | ( | bool | val | ) |
Pass 'false' here to switch off mining for BBRC representatives. Same as '-b'.
{
return 0;
}
| bool SetBbrcSep | ( | bool | val | ) |
Set this to 'true' to enable BBRC separators in output.
{
return 0;
}
| bool SetChisqActive | ( | bool | val | ) |
Set this to 'true' to enable chi-square filter.
{
return 0;
}
| bool SetChisqSig | ( | float | _chisq_val | ) |
Set significance threshold here (between 0 and 1). Same as '-p'.
{
return 0;
}
| bool SetConsoleOut | ( | bool | val | ) |
Pass 'true' here to disable usage of result vector and directly print each fragment to the console (saves memory).
{
// console out not switched by fminer
fm::last_console_out=val;
return 1;
}
| void SetDoOutput | ( | bool | val | ) |
Pass 'false' here to disable output. Same as '-o'.
{
fm::last_do_output = val;
}
| bool SetDynamicUpperBound | ( | bool | val | ) |
Pass 'false' here to disable dynamic upper bound pruning (e.g. for performance measures). Same as '-d'.
{
return 0;
}
| bool SetMaxHops | ( | int | val | ) |
Set maximum number of hops. Same as '-m'.
References fm::last_max_hops.
{
fm::last_max_hops=val;
return 1;
}
| void SetMinfreq | ( | int | val | ) |
Set minimum frequency (>=1 here). Same as '-f'.
References GetRefineSingles().
{
if (val < 1) { cerr << "Error! Invalid value '" << val << "' for parameter minfreq." << endl; exit(1); }
if (val > 1 && GetRefineSingles()) { cerr << "Warning! Minimum frequency of '" << val << "' could not be set due to activated single refinement." << endl;}
fm::last_minfreq = val;
}
| bool SetPruning | ( | bool | val | ) |
Pass 'false' here to disable statistical metrical pruning completely. Same as '-u'.
{
return 0;
}
| bool SetRefineSingles | ( | bool | val | ) |
Pass 'true' here to enable refinement of fragments with frequency 1. Same as '-s'.
{
return 0;
}
| bool SetRegression | ( | bool | val | ) |
Dummy method for regression (only used for bbrcs). Same as '-g'.
References fm::last_regression.
{
// return 0;
// TODO: enable regression
fm::last_regression=val;
return 1;
}
| bool SetType | ( | int | val | ) |
Set type 1 (paths) or 2 (trees) here. Same as '-l'.
{
return 0;
}