RNAstructure Classes  Version 6.0
Public Member Functions | Protected Attributes | Private Types | Private Member Functions | Private Attributes | List of all members
Multilign_object Class Reference

Multilign_object Class. More...

#include <Multilign_object.h>

Inheritance diagram for Multilign_object:
Multifind_object

Public Member Functions

 Multilign_object ()
 Default constructor: More...
 
 Multilign_object (const vector< vector< string > > &inputlist, const bool isrna=true, ProgressHandler *progress=NULL)
 
 Multilign_object (const bool Multifind, const string &outputmultifind, const vector< string > &ctfiles, ProgressHandler *progress=NULL, const bool isrna=true)
 
 ~Multilign_object ()
 
int CountBP (const int i=0, const int j=0, const double percent=0.8) const
 count the number of basepairs with the lowest free energies below the percent of the miminal free energy. Dsv file is used for the counting. By default, the first dsv file in the progressive dynalign calculations is used, i.e. i = 0 and j = 0. More...
 
int ProgressiveMultilign (const short int numProcessors=1, const bool Dsv=1, const bool Ali=1, const short int maxtrace=750, const short int bpwin=2, const short int awin=1, const short int percent=20, const short int imaxseparation=-99, const float gap=0.4, const bool singleinsert=true, const short int singlefold_subopt_percent=30, const bool local=false)
 
int MultiTempMultilign ()
 
int WriteAlignment (const string allali="all.ali") const
 calculate and output multiple alignment More...
 
int GetErrorCode () const
 Return an error code, where a return of zero is no error. More...
 
string GetErrorMessage (const int error) const
 
const string GetErrorDetails ()
 
void ResetError ()
 Reset the underlying RNA objects internal error code, after an error is handled. More...
 
int SetMaxPairs (const int maxpairs=-1)
 
int GetMaxPairs () const
 get the value of MaxPairs More...
 
int AverageLength () const
 get the average length of the input sequences More...
 
int SetIterations (const int it=2)
 set the value of iterations More...
 
int GetIterations () const
 get the value of iterations More...
 
int SetMaxDsv (const float maxdsvchange=1)
 set the value of MaxDsv/maxdsvchange More...
 
float GetMaxDsv () const
 get the value of MaxDsv/maxdsvchange More...
 
int GetSequenceNumber () const
 get the sequence number More...
 
int SetIndexSeq (size_t indexSeq=1)
 set the Index Sequence for Multilign calculation. More...
 
int SetIndexSeq (const string seqname)
 an overloaded function accepting a string as its parameter. More...
 
string GetIndexSeq () const
 return the filename of the index seq. More...
 
void Randomize ()
 randomize the order of inputList. More...
 
int AddOneInput (const string seq, const string ct, const string constraint="", const string shape="")
 add one entry into inputList. More...
 
int RemoveOneInput (const string seq)
 remove one entry from inputList. More...
 
void SetSHAPESlope (const double slope=1.8)
 set the slope parameter for SHAPE More...
 
double GetSHAPESlope () const
 get the SHAPESlope More...
 
void SetSHAPEIntercept (const double intercept=-0.6)
 set the intercept parameter for SHAPE. More...
 
double GetSHAPEIntercept () const
 get the SHAPEIntercept. More...
 
void SetTemperature (const double temp=310.15)
 set the temperature to fold the sequences. More...
 
double GetTemperature () const
 get the temperature to fold the sequences More...
 
void SetNucType (const bool isrna=true)
 set the flag isRNA to be true or false. By default it is true. When it is true, RNA nearest neighbor parameters are used. More...
 
bool GetNucType () const
 get the type fo nucleic acid More...
 
int CleanupIntermediateFiles () const
 delete intermediate pairwise dsv and aout files More...
 
void SetProgress (ProgressHandler *Progress=NULL)
 Provide a TProgressDialog for following calculation progress. More...
 
void StopProgress ()
 Provide a means to stop using a TProgressDialog by assigning NULL to progress pointer. More...
 
ProgressHandler * GetProgress () const
 get the progress More...
 
void GetInputFilenames ()
 For diagnostic purpose only. Output the input sequence, ct, constraints, and SHAPE filenames to stdout. More...
 
void GetPairs ()
 For diagnostic purpose only. Output the paired sequence filenames to stdout. More...
 
vector< float > get_energies ()
 
vector< float > get_dGIndex ()
 
vector< vector< string > > get_pair_alignments ()
 

Protected Attributes

int ErrorCode
 
vector< string > input_alignment
 
vector< string > input_sequences
 
vector< string > ct_files
 
string output_multifind
 
vector< float > energies
 
vector< float > dGIndex
 
vector< vector< string > > pair_alignments
 

Private Types

typedef vector< string >::size_type vs_index
 
typedef vector< vector< string > >::iterator vvs_it
 
typedef vector< vector< string > >::const_iterator vvs_cit
 

Private Member Functions

int PairSeq1 ()
 
int PairMultifindSeq1 ()
 
int PrepInput ()
 This function check the legality of the input filenames and prepare the parameters for the multilign calculations. It should be called before multilign calcultions at least once and whenever something related to seq/ct changes, e.g. SetIndexSeq, AddOneInput, RemoveOneInput, Randomize, etc. More...
 
int PrepMultifindInput ()
 
void ToHead (vvs_it first, vvs_it middle)
 move the element pointed by middle before the first-pointed element. More...
 
int NameDsvFiles ()
 name all the dsv files More...
 
int NameMultifindDsvFiles ()
 
int NameAliFiles ()
 name all the ali files More...
 
int NameMultifindAliFiles ()
 

Private Attributes

ProgressHandler * progress
 
vector< vector< string > > inputList
 
vector< pair< vs_index, vs_index > > seqPair
 
string ** dsvFiles
 
string ** aliFiles
 
int maxPairs
 The following are the parameters for Multilign calculations. More...
 
float maxDsv
 
int iterations
 
double SHAPESlope
 
double SHAPEIntercept
 
Dynalign_objectinstance
 
Thermodynamics thermo
 

Detailed Description

Multilign_object Class.

The Multilign_object class provides an entry point for the Multilign algorithm.

Member Typedef Documentation

typedef vector<string>::size_type Multilign_object::vs_index
private
typedef vector<vector<string> >::const_iterator Multilign_object::vvs_cit
private
typedef vector<vector<string> >::iterator Multilign_object::vvs_it
private

Constructor & Destructor Documentation

Multilign_object::Multilign_object ( )

Default constructor:

Multilign_object::Multilign_object ( const vector< vector< string > > &  inputlist,
const bool  isrna = true,
ProgressHandler *  progress = NULL 
)

Constructor:

Parameters
inputlistis a vector of vectors of strings storing the name of the filenames. Currently, inputList is an matrix of 4 columns: col 1 is the input seq filename; col 2 is the output ct filename; col 3 is the input constraint filename; col 4 is the input SHAPE filename. Empty string is not allowed for Col 1 and Col 2; If no SHAPE or folding constraints are given, Col 3 and Col 4 for the correponding sequence are empty strings.
isrnais a bool indicating the sequences are RNA or DNA. The default of true indicates RNA.
progressis a TProgressDialog for reporting progress of the calculation to the user. The default value of NULL means that no communication is provided.
Multilign_object::Multilign_object ( const bool  Multifind,
const string &  outputmultifind,
const vector< string > &  ctfiles,
ProgressHandler *  progress = NULL,
const bool  isrna = true 
)
Multilign_object::~Multilign_object ( )

Member Function Documentation

int Multilign_object::AddOneInput ( const string  seq,
const string  ct,
const string  constraint = "",
const string  shape = "" 
)

add one entry into inputList.

Parameters
seqis a string value of sequence filename to be appended1
ctis a string value of corresponding ct filename
constraintis a string value of corresponding constraint filename. By default, it is empty, meaning no folding constraint exists
shapeis string value of corresponding SHAPE filename. By default, it is empty, meaning no SHAPE exists.
Returns
a is int value of ErrorCode
int Multilign_object::AverageLength ( ) const

get the average length of the input sequences

Returns
the average length of the input sequences.
int Multilign_object::CleanupIntermediateFiles ( ) const

delete intermediate pairwise dsv and aout files

Returns
an int value of error code.
int Multilign_object::CountBP ( const int  i = 0,
const int  j = 0,
const double  percent = 0.8 
) const

count the number of basepairs with the lowest free energies below the percent of the miminal free energy. Dsv file is used for the counting. By default, the first dsv file in the progressive dynalign calculations is used, i.e. i = 0 and j = 0.

Parameters
iis an int value indicating which one in the iteration.
jis an int value indicating which iteration.
percentis threshold of double value in percentage.
Returns
an int of the number of basepairs counted.
vector<float> Multilign_object::get_dGIndex ( )
inline
vector<float> Multilign_object::get_energies ( )
inline
vector<vector<string> > Multilign_object::get_pair_alignments ( )
inline
int Multilign_object::GetErrorCode ( ) const
inline

Return an error code, where a return of zero is no error.

This function returns and error flag that is generated during construction by RNA(const char &filename, const int type, const bool IsRNA=true) or from CalculateFreeEnergy(). An error of zero is always no error. Other codes are errors and a c-string can be fetched for the error with GetErrorMessage().

Returns
An integer that provides the error code.
const string Multilign_object::GetErrorDetails ( )
inline
string Multilign_object::GetErrorMessage ( const int  error) const

Return error messages based on code from GetErrorCode and other error codes. 0 = no error 1000 = Error associated with sequence 1 or with a procedure, function will get message from sequence 1 (the inherited RNA class). 2000 = Error associated with sequence 2, function will get message from sequence 2 (the RNA2 class). 3000 = Errors with each sequence, function will get messages from each.

Parameters
erroris the integer error code provided by GetErrorCode().
Returns
A string that provides an error message or from other functions that return integer error codes.
string Multilign_object::GetIndexSeq ( ) const

return the filename of the index seq.

Returns
a string of index seq filename.
void Multilign_object::GetInputFilenames ( )

For diagnostic purpose only. Output the input sequence, ct, constraints, and SHAPE filenames to stdout.

The following functions are used for Diagnostic purpose only ////////////Generally not needed, but for debugging input.

int Multilign_object::GetIterations ( ) const

get the value of iterations

Returns
the value of iterations.
float Multilign_object::GetMaxDsv ( ) const

get the value of MaxDsv/maxdsvchange

Returns
the value of MaxDsv/maxdsvchange.
int Multilign_object::GetMaxPairs ( ) const

get the value of MaxPairs

Returns
the value of MaxPairs
bool Multilign_object::GetNucType ( ) const

get the type fo nucleic acid

Returns
return true when it is of RNA prediction; otherwise, false.
void Multilign_object::GetPairs ( )

For diagnostic purpose only. Output the paired sequence filenames to stdout.

ProgressHandler * Multilign_object::GetProgress ( ) const

get the progress

Returns
the pointer to TProgressDialog
int Multilign_object::GetSequenceNumber ( ) const

get the sequence number

Returns
the number of input sequences
double Multilign_object::GetSHAPEIntercept ( ) const

get the SHAPEIntercept.

Returns
SHAPEIntercept of double value.
double Multilign_object::GetSHAPESlope ( ) const

get the SHAPESlope

Returns
a SHAPESlope of double value.
double Multilign_object::GetTemperature ( ) const

get the temperature to fold the sequences

Returns
a double value of the set temperature.
int Multilign_object::MultiTempMultilign ( )
int Multilign_object::NameAliFiles ( )
private

name all the ali files

Returns
an int value of errorcode
int Multilign_object::NameDsvFiles ( )
private

name all the dsv files

Returns
an int value of errorcode
int Multilign_object::NameMultifindAliFiles ( )
private
int Multilign_object::NameMultifindDsvFiles ( )
private
int Multilign_object::PairMultifindSeq1 ( )
private
int Multilign_object::PairSeq1 ( )
private

private functions /////////////////////////////// Pair sequences for dynalign calculation

Returns
the value of ErrorCode
int Multilign_object::PrepInput ( )
private

This function check the legality of the input filenames and prepare the parameters for the multilign calculations. It should be called before multilign calcultions at least once and whenever something related to seq/ct changes, e.g. SetIndexSeq, AddOneInput, RemoveOneInput, Randomize, etc.

Returns
the int value of ErrorCode.
int Multilign_object::PrepMultifindInput ( )
private
int Multilign_object::ProgressiveMultilign ( const short int  numProcessors = 1,
const bool  Dsv = 1,
const bool  Ali = 1,
const short int  maxtrace = 750,
const short int  bpwin = 2,
const short int  awin = 1,
const short int  percent = 20,
const short int  imaxseparation = -99,
const float  gap = 0.4,
const bool  singleinsert = true,
const short int  singlefold_subopt_percent = 30,
const bool  local = false 
)

The core function doing dynalign calculation and templating In case of error, the function returns a non-zero that can be parsed by GetErrorMessage() or GetErrorMessageString().

Parameters
Dsvis a boolean value indicating to output pairwise dsv files or not. It has to be set to true currently.
Aliis a boolean value indicating to output pairwise ali files or not. It has to be set to true currently.
maxtraceis the maximum number of common structures to be determined.
bpwinthe the base pair window parameter, where 0 allows the structures to have similar pairs and larger windows make the structures more diverse.
awinis the alignment window parameter, where 0 allows the alignments to be similar and larger values make the alignments more diverse.
percentis the maximum percent difference in total folding free energy change above the lowest for suboptimal common structures.
imaxseparationis the maximum separation between aligned nucleotides. Values >= 0 are the traditional parameter, those below zero trigger the HMM alignment method, which is now prefered.
gapis the cost of adding gap nucleotides in the alignment in kcal/mol.
singleinsertis whether single basepair inserts are allowed in one sequence vs the other.
singlefold_subopt_percentis the maximum % difference of folding energy above the lowest free energy structure for pairs in single sequence folding that will be allowed in the dynalign calculation.
localis whether Dynalign is being run in local (true) or global mode (false).
numProcessorsis the number of processors to use for the calculation. This requires a compilation for SMP.
Returns
an int that indicates an error code (0 = no error, non-zero = error occurred).
void Multilign_object::Randomize ( )

randomize the order of inputList.

int Multilign_object::RemoveOneInput ( const string  seq)

remove one entry from inputList.

Parameters
seqis a string value of sequence filename of which the entry in inputList will be removed
Returns
a int value of ErrorCode
void Multilign_object::ResetError ( )

Reset the underlying RNA objects internal error code, after an error is handled.

int Multilign_object::SetIndexSeq ( size_t  indexSeq = 1)

set the Index Sequence for Multilign calculation.

Parameters
indexSeqis a size_t value indicating which sequence is the index sequence; by default it is the 1st one.
Returns
a int value of ErrorCode
int Multilign_object::SetIndexSeq ( const string  seqname)

an overloaded function accepting a string as its parameter.

Parameters
seqnameis the seq filename that will be set as the index sequence.
Returns
an int value of ErrorCode
int Multilign_object::SetIterations ( const int  it = 2)

set the value of iterations

Parameters
itis an value of int assigned to iterations. By default it is set to 2.
Returns
an errorcode
int Multilign_object::SetMaxDsv ( const float  maxdsvchange = 1)

set the value of MaxDsv/maxdsvchange

Parameters
maxdsvchangeis a value of float assigned to MaxDsv. By default it is set to 1.
Returns
an errorcode
int Multilign_object::SetMaxPairs ( const int  maxpairs = -1)
Parameters
maxpairsis int value defining how the MaxPairs will be set. By default it is set to be -1, meaning the average length of all the sequences.
Returns
an errorcode.
void Multilign_object::SetNucType ( const bool  isrna = true)

set the flag isRNA to be true or false. By default it is true. When it is true, RNA nearest neighbor parameters are used.

Parameters
isrna
void Multilign_object::SetProgress ( ProgressHandler *  Progress = NULL)

Provide a TProgressDialog for following calculation progress.

Parameters
Progressis a pointer to TProgressDialog
void Multilign_object::SetSHAPEIntercept ( const double  intercept = -0.6)

set the intercept parameter for SHAPE.

Parameters
interceptis a double value assigned to SHAPEIntercept. By default, it is set to -0.6.
void Multilign_object::SetSHAPESlope ( const double  slope = 1.8)

set the slope parameter for SHAPE

Parameters
slopeis a double value assigned to SHAPESlope. By default, it is set to 1.8.
void Multilign_object::SetTemperature ( const double  temp = 310.15)

set the temperature to fold the sequences.

Parameters
tempis a double value of temperature; by default it is set to 310.15K
void Multilign_object::StopProgress ( )

Provide a means to stop using a TProgressDialog by assigning NULL to progress pointer.

void Multilign_object::ToHead ( vvs_it  first,
vvs_it  middle 
)
private

move the element pointed by middle before the first-pointed element.

Parameters
firstis an vector<vector<string> >::iterator
middleis the same type.
int Multilign_object::WriteAlignment ( const string  allali = "all.ali") const

calculate and output multiple alignment

Parameters
allaliis the output filename of multiple alignment
Returns
an int of error code.

Member Data Documentation

string** Multilign_object::aliFiles
private
vector<string> Multilign_object::ct_files
protected
vector<float> Multilign_object::dGIndex
protected
string** Multilign_object::dsvFiles
private
vector<float> Multilign_object::energies
protected
int Multilign_object::ErrorCode
protected
vector<string> Multilign_object::input_alignment
protected
vector<string> Multilign_object::input_sequences
protected
vector<vector<string> > Multilign_object::inputList
private
Dynalign_object* Multilign_object::instance
private
int Multilign_object::iterations
private
float Multilign_object::maxDsv
private
int Multilign_object::maxPairs
private

The following are the parameters for Multilign calculations.

string Multilign_object::output_multifind
protected
vector<vector<string> > Multilign_object::pair_alignments
mutableprotected
ProgressHandler* Multilign_object::progress
private
vector<pair<vs_index, vs_index> > Multilign_object::seqPair
private
double Multilign_object::SHAPEIntercept
private
double Multilign_object::SHAPESlope
private
Thermodynamics Multilign_object::thermo
private

The documentation for this class was generated from the following files: