#include <Multilign_object.h>
Public Member Functions | |
Multilign_object () | |
Default constructor:. | |
Multilign_object (const vector< vector< string > > &inputlist, const bool isrna=true, TProgressDialog *progress=NULL) | |
Multilign_object (const bool Multifind, const string &outputmultifind, const vector< string > &ctfiles, TProgressDialog *progress=NULL, const bool isrna=true) | |
~Multilign_object () | |
int | CountBP (const int i=0, const int j=0, const double percent=0.8) const |
count the number of basepairs with the lowest free energies below the percent of the miminal free energy. Dsv file is used for the counting. By default, the first dsv file in the progressive dynalign calculations is used, i.e. i = 0 and j = 0. | |
int | ProgressiveMultilign (const short int numProcessors=1, const bool Dsv=1, const bool Ali=1, const short int maxtrace=750, const short int bpwin=2, const short int awin=1, const short int percent=20, const short int imaxseparation=-99, const float gap=0.4, const bool singleinsert=true, const short int singlefold_subopt_percent=30, const bool local=false) |
int | MultiTempMultilign () |
int | WriteAlignment (const string allali="all.ali") const |
calculate and output multiple alignment | |
int | GetErrorCode () const |
Return an error code, where a return of zero is no error. | |
string | GetErrorMessage (const int error) const |
void | ResetError () |
Reset the underlying RNA objects internal error code, after an error is handled. | |
int | SetMaxPairs (const int maxpairs=-1) |
int | GetMaxPairs () const |
get the value of MaxPairs | |
int | AverageLength () const |
get the average length of the input sequences | |
int | SetIterations (const int it=2) |
set the value of iterations | |
int | GetIterations () const |
get the value of iterations | |
int | SetMaxDsv (const float maxdsvchange=1) |
set the value of MaxDsv/maxdsvchange | |
float | GetMaxDsv () const |
get the value of MaxDsv/maxdsvchange | |
int | GetSequenceNumber () const |
get the sequence number | |
int | SetIndexSeq (size_t indexSeq=1) |
set the Index Sequence for Multilign calculation. | |
int | SetIndexSeq (const string seqname) |
an overloaded function accepting a string as its parameter. | |
string | GetIndexSeq () const |
return the filename of the index seq. | |
void | Randomize () |
randomize the order of inputList. | |
int | AddOneInput (const string seq, const string ct, const string constraint="", const string shape="") |
add one entry into inputList. | |
int | RemoveOneInput (const string seq) |
remove one entry from inputList. | |
void | SetSHAPESlope (const double slope=1.8) |
set the slope parameter for SHAPE | |
double | GetSHAPESlope () const |
get the SHAPESlope | |
void | SetSHAPEIntercept (const double intercept=-0.6) |
set the intercept parameter for SHAPE. | |
double | GetSHAPEIntercept () const |
get the SHAPEIntercept. | |
void | SetTemperature (const double temp=310.15) |
set the temperature to fold the sequences. | |
double | GetTemperature () const |
get the temperature to fold the sequences | |
void | SetNucType (const bool isrna=true) |
set the flag isRNA to be true or false. By default it is true. When it is true, RNA nearest neighbor parameters are used. | |
bool | GetNucType () const |
get the type fo nucleic acid | |
int | CleanupIntermediateFiles () const |
delete intermediate pairwise dsv and aout files | |
void | SetProgress (TProgressDialog *Progress=NULL) |
Provide a TProgressDialog for following calculation progress. | |
void | StopProgress () |
Provide a means to stop using a TProgressDialog by assigning NULL to progress pointer. | |
TProgressDialog * | GetProgress () const |
get the progress | |
void | GetInputFilenames () |
For diagnostic purpose only. Output the input sequence, ct, constraints, and SHAPE filenames to stdout. | |
void | GetPairs () |
For diagnostic purpose only. Output the paired sequence filenames to stdout. | |
vector< float > | get_energies () |
vector< float > | get_dGIndex () |
vector< vector< string > > | get_pair_alignments () |
Protected Attributes | |
int | ErrorCode |
vector< string > | input_alignment |
vector< string > | input_sequences |
vector< string > | ct_files |
string | output_multifind |
vector< float > | energies |
vector< float > | dGIndex |
vector< vector< string > > | pair_alignments |
Private Types | |
typedef vector< string >::size_type | vs_index |
typedef vector< vector< string > >::iterator | vvs_it |
typedef vector< vector< string > >::const_iterator | vvs_cit |
Private Member Functions | |
int | PairSeq1 () |
int | PairMultifindSeq1 () |
int | PrepInput () |
This function check the legality of the input filenames and prepare the parameters for the multilign calculations. It should be called before multilign calcultions at least once and whenever something related to seq/ct changes, e.g. SetIndexSeq, AddOneInput, RemoveOneInput, Randomize, etc. | |
int | PrepMultifindInput () |
void | ToHead (vvs_it first, vvs_it middle) |
move the element pointed by middle before the first-pointed element. | |
int | NameDsvFiles () |
name all the dsv files | |
int | NameMultifindDsvFiles () |
int | NameAliFiles () |
name all the ali files | |
int | NameMultifindAliFiles () |
Private Attributes | |
TProgressDialog * | progress |
vector< vector< string > > | inputList |
vector< pair< vs_index, vs_index > > | seqPair |
string ** | dsvFiles |
string ** | aliFiles |
double | temperature |
The following are the parameters for Multilign calculations. | |
int | maxPairs |
float | maxDsv |
int | iterations |
bool | isRNA |
double | SHAPESlope |
double | SHAPEIntercept |
Dynalign_object * | instance |
The Multilign_object class provides an entry point for the Multilign algorithm.
typedef vector<string>::size_type Multilign_object::vs_index [private] |
typedef vector<vector<string> >::const_iterator Multilign_object::vvs_cit [private] |
typedef vector<vector<string> >::iterator Multilign_object::vvs_it [private] |
Multilign_object::Multilign_object | ( | ) |
Default constructor:.
Multilign_object::Multilign_object | ( | const vector< vector< string > > & | inputlist, | |
const bool | isrna = true , |
|||
TProgressDialog * | progress = NULL | |||
) |
Constructor:
inputlist | is a vector of vectors of strings storing the name of the filenames. Currently, inputList is an matrix of 4 columns: col 1 is the input seq filename; col 2 is the output ct filename; col 3 is the input constraint filename; col 4 is the input SHAPE filename. Empty string is not allowed for Col 1 and Col 2; If no SHAPE or folding constraints are given, Col 3 and Col 4 for the correponding sequence are empty strings. | |
isrna | is a bool indicating the sequences are RNA or DNA. The default of true indicates RNA. | |
progress | is a TProgressDialog for reporting progress of the calculation to the user. The default value of NULL means that no communication is provided. |
Multilign_object::Multilign_object | ( | const bool | Multifind, | |
const string & | outputmultifind, | |||
const vector< string > & | ctfiles, | |||
TProgressDialog * | progress = NULL , |
|||
const bool | isrna = true | |||
) |
Multilign_object::~Multilign_object | ( | ) |
int Multilign_object::AddOneInput | ( | const string | seq, | |
const string | ct, | |||
const string | constraint = "" , |
|||
const string | shape = "" | |||
) |
add one entry into inputList.
seq | is a string value of sequence filename to be appended1 | |
ct | is a string value of corresponding ct filename | |
constraint | is a string value of corresponding constraint filename. By default, it is empty, meaning no folding constraint exists | |
shape | is string value of corresponding SHAPE filename. By default, it is empty, meaning no SHAPE exists. |
int Multilign_object::AverageLength | ( | ) | const |
get the average length of the input sequences
int Multilign_object::CleanupIntermediateFiles | ( | ) | const |
delete intermediate pairwise dsv and aout files
int Multilign_object::CountBP | ( | const int | i = 0 , |
|
const int | j = 0 , |
|||
const double | percent = 0.8 | |||
) | const |
count the number of basepairs with the lowest free energies below the percent of the miminal free energy. Dsv file is used for the counting. By default, the first dsv file in the progressive dynalign calculations is used, i.e. i = 0 and j = 0.
i | is an int value indicating which one in the iteration. | |
j | is an int value indicating which iteration. | |
percent | is threshold of double value in percentage. |
vector<float> Multilign_object::get_dGIndex | ( | ) | [inline] |
vector<float> Multilign_object::get_energies | ( | ) | [inline] |
vector<vector<string> > Multilign_object::get_pair_alignments | ( | ) | [inline] |
int Multilign_object::GetErrorCode | ( | ) | const [inline] |
Return an error code, where a return of zero is no error.
This function returns and error flag that is generated during construction by RNA(const char &filename, const int type, const bool IsRNA=true) or from CalculateFreeEnergy(). An error of zero is always no error. Other codes are errors and a c-string can be fetched for the error with GetErrorMessage().
string Multilign_object::GetErrorMessage | ( | const int | error | ) | const |
Return error messages based on code from GetErrorCode and other error codes. 0 = no error 1000 = Error associated with sequence 1 or with a procedure, function will get message from sequence 1 (the inherited RNA class). 2000 = Error associated with sequence 2, function will get message from sequence 2 (the RNA2 class). 3000 = Errors with each sequence, function will get messages from each.
error | is the integer error code provided by GetErrorCode(). |
string Multilign_object::GetIndexSeq | ( | ) | const |
return the filename of the index seq.
void Multilign_object::GetInputFilenames | ( | ) |
For diagnostic purpose only. Output the input sequence, ct, constraints, and SHAPE filenames to stdout.
The following functions are used for Diagnostic purpose only //////////// Generally not needed, but for debugging input.
int Multilign_object::GetIterations | ( | ) | const |
get the value of iterations
float Multilign_object::GetMaxDsv | ( | ) | const |
get the value of MaxDsv/maxdsvchange
int Multilign_object::GetMaxPairs | ( | ) | const |
get the value of MaxPairs
bool Multilign_object::GetNucType | ( | ) | const |
void Multilign_object::GetPairs | ( | ) |
For diagnostic purpose only. Output the paired sequence filenames to stdout.
TProgressDialog * Multilign_object::GetProgress | ( | ) | const |
get the progress
int Multilign_object::GetSequenceNumber | ( | ) | const |
get the sequence number
double Multilign_object::GetSHAPEIntercept | ( | ) | const |
get the SHAPEIntercept.
double Multilign_object::GetSHAPESlope | ( | ) | const |
get the SHAPESlope
double Multilign_object::GetTemperature | ( | ) | const |
get the temperature to fold the sequences
int Multilign_object::MultiTempMultilign | ( | ) |
int Multilign_object::NameAliFiles | ( | ) | [private] |
name all the ali files
int Multilign_object::NameDsvFiles | ( | ) | [private] |
name all the dsv files
int Multilign_object::NameMultifindAliFiles | ( | ) | [private] |
int Multilign_object::NameMultifindDsvFiles | ( | ) | [private] |
int Multilign_object::PairMultifindSeq1 | ( | ) | [private] |
int Multilign_object::PairSeq1 | ( | ) | [private] |
private functions /////////////////////////////// Pair sequences for dynalign calculation
int Multilign_object::PrepInput | ( | ) | [private] |
This function check the legality of the input filenames and prepare the parameters for the multilign calculations. It should be called before multilign calcultions at least once and whenever something related to seq/ct changes, e.g. SetIndexSeq, AddOneInput, RemoveOneInput, Randomize, etc.
int Multilign_object::PrepMultifindInput | ( | ) | [private] |
int Multilign_object::ProgressiveMultilign | ( | const short int | numProcessors = 1 , |
|
const bool | Dsv = 1 , |
|||
const bool | Ali = 1 , |
|||
const short int | maxtrace = 750 , |
|||
const short int | bpwin = 2 , |
|||
const short int | awin = 1 , |
|||
const short int | percent = 20 , |
|||
const short int | imaxseparation = -99 , |
|||
const float | gap = 0.4 , |
|||
const bool | singleinsert = true , |
|||
const short int | singlefold_subopt_percent = 30 , |
|||
const bool | local = false | |||
) |
The core function doing dynalign calculation and templating In case of error, the function returns a non-zero that can be parsed by GetErrorMessage() or GetErrorMessageString().
Dsv | is a boolean value indicating to output pairwise dsv files or not. It has to be set to true currently. | |
Ali | is a boolean value indicating to output pairwise ali files or not. It has to be set to true currently. | |
maxtrace | is the maximum number of common structures to be determined. | |
bpwin | the the base pair window parameter, where 0 allows the structures to have similar pairs and larger windows make the structures more diverse. | |
awin | is the alignment window parameter, where 0 allows the alignments to be similar and larger values make the alignments more diverse. | |
percent | is the maximum percent difference in total folding free energy change above the lowest for suboptimal common structures. | |
imaxseparation | is the maximum separation between aligned nucleotides. Values >= 0 are the traditional parameter, those below zero trigger the HMM alignment method, which is now prefered. | |
gap | is the cost of adding gap nucleotides in the alignment in kcal/mol. | |
singleinsert | is whether single basepair inserts are allowed in one sequence vs the other. | |
singlefold_subopt_percent | is the maximum % difference of folding energy above the lowest free energy structure for pairs in single sequence folding that will be allowed in the dynalign calculation. | |
local | is whether Dynalign is being run in local (true) or global mode (false). | |
numProcessors | is the number of processors to use for the calculation. This requires a compilation for SMP. |
void Multilign_object::Randomize | ( | ) |
randomize the order of inputList.
int Multilign_object::RemoveOneInput | ( | const string | seq | ) |
remove one entry from inputList.
seq | is a string value of sequence filename of which the entry in inputList will be removed |
void Multilign_object::ResetError | ( | ) |
Reset the underlying RNA objects internal error code, after an error is handled.
int Multilign_object::SetIndexSeq | ( | const string | seqname | ) |
an overloaded function accepting a string as its parameter.
seqname | is the seq filename that will be set as the index sequence. |
int Multilign_object::SetIndexSeq | ( | size_t | indexSeq = 1 |
) |
set the Index Sequence for Multilign calculation.
indexSeq | is a size_t value indicating which sequence is the index sequence; by default it is the 1st one. |
int Multilign_object::SetIterations | ( | const int | it = 2 |
) |
set the value of iterations
it | is an value of int assigned to iterations. By default it is set to 2. |
int Multilign_object::SetMaxDsv | ( | const float | maxdsvchange = 1 |
) |
set the value of MaxDsv/maxdsvchange
maxdsvchange | is a value of float assigned to MaxDsv. By default it is set to 1. |
int Multilign_object::SetMaxPairs | ( | const int | maxpairs = -1 |
) |
maxpairs | is int value defining how the MaxPairs will be set. By default it is set to be -1, meaning the average length of all the sequences. |
void Multilign_object::SetNucType | ( | const bool | isrna = true |
) |
set the flag isRNA to be true or false. By default it is true. When it is true, RNA nearest neighbor parameters are used.
isrna |
void Multilign_object::SetProgress | ( | TProgressDialog * | Progress = NULL |
) |
Provide a TProgressDialog for following calculation progress.
Progress | is a pointer to TProgressDialog |
void Multilign_object::SetSHAPEIntercept | ( | const double | intercept = -0.6 |
) |
set the intercept parameter for SHAPE.
intercept | is a double value assigned to SHAPEIntercept. By default, it is set to -0.6. |
void Multilign_object::SetSHAPESlope | ( | const double | slope = 1.8 |
) |
set the slope parameter for SHAPE
slope | is a double value assigned to SHAPESlope. By default, it is set to 1.8. |
void Multilign_object::SetTemperature | ( | const double | temp = 310.15 |
) |
set the temperature to fold the sequences.
temp | is a double value of temperature; by default it is set to 310.15K |
void Multilign_object::StopProgress | ( | ) |
Provide a means to stop using a TProgressDialog by assigning NULL to progress pointer.
move the element pointed by middle before the first-pointed element.
first | is an vector<vector<string> >iterator | |
middle | is the same type. |
int Multilign_object::WriteAlignment | ( | const string | allali = "all.ali" |
) | const |
calculate and output multiple alignment
allali | is the output filename of multiple alignment |
string** Multilign_object::aliFiles [private] |
vector<string> Multilign_object::ct_files [protected] |
vector<float> Multilign_object::dGIndex [protected] |
string** Multilign_object::dsvFiles [private] |
vector<float> Multilign_object::energies [protected] |
int Multilign_object::ErrorCode [protected] |
vector<string> Multilign_object::input_alignment [protected] |
vector<string> Multilign_object::input_sequences [protected] |
vector<vector<string> > Multilign_object::inputList [private] |
Dynalign_object* Multilign_object::instance [private] |
bool Multilign_object::isRNA [private] |
int Multilign_object::iterations [private] |
float Multilign_object::maxDsv [private] |
int Multilign_object::maxPairs [private] |
string Multilign_object::output_multifind [protected] |
vector<vector<string> > Multilign_object::pair_alignments [mutable, protected] |
TProgressDialog* Multilign_object::progress [private] |
vector<pair<vs_index, vs_index> > Multilign_object::seqPair [private] |
double Multilign_object::SHAPEIntercept [private] |
double Multilign_object::SHAPESlope [private] |
double Multilign_object::temperature [private] |
The following are the parameters for Multilign calculations.