#include<iostream>
#include<cstring>
#include<vector>
#include<algorithm>
using namespace std;
// this is the function for collection a raw string as a input and converting into a vector
int complement(string &seq);
int get_codon(string seq,vector<string>&ivec,int frame_no)
{
string s1;
//this is for forward frame code extraction
if(frame_no ==1 ||frame_no==2 || frame_no==3 )
{
//this is to extract the sequence into the vector called ivec
for(int i = frame_no;i<seq.size()-1;i++)
{
s1="";
s1.push_back(seq[i-1]);
s1.push_back(seq[i]);
s1.push_back(seq[i+1]);
//this is for extracting the string as codons to a string in triplets
i = i+2;
if(s1.size()>=3)
{
ivec.push_back(s1);
}
else
{
break;
}
}
}
//this is for reverse open reading frame ie -1,-2,-3
else if (frame_no ==4 || frame_no ==5|| frame_no == 6)
{
complement(seq);
reverse(seq.begin(),seq.end());
for(int i = frame_no; i<seq.size()-1;i++)
{
s1="";
s1.push_back(seq[i-1]);
s1.push_back(seq[i]);
s1.push_back(seq[i+1]);
//xtraction of the codes into triplets as s1 string
i =i+2;
if(s1.size()>=3)
{
ivec.push_back(s1);
}
else
{
break;
}
}
}
}
int complement(string &seq)
{
for(int i = 0; i < seq.size();i++)
{
if(seq[i]=='A'||seq[i]=='a')
seq[i]='T';
else if(seq[i]=='T'||seq[i]=='t')
seq[i]='A';
else if(seq[i]=='G'||seq[i]=='g')
seq[i]='C';
else if(seq[i]=='C'||seq[i]=='c')
seq[i]='G';
else
continue;
}
}
// this function is for extraction of gene from given orf frame which is having start and stop flags :P
void codon_find(vector<string>&ivec,vector<string>&gvec)
{
int start_ind = 0;
int stop_ind =0;
string gene;
string start = "ATG";
string stop1 = "TAG";
string stop2 = "TAA";
string stop3 = "TGA";
for(int i = 0;i<ivec.size();i++)
{
//this is statement for the begining of the a gene. We are iterating through the vector in search for
//start string and then we are adding it to the gene string ;-)
if ((ivec[i]==start)&&(start_ind==0)&&(start_ind==0))
{
start_ind=1;
gene = gene+ivec[i];
}
//this condition is when the genecomes across the start codon whichc is
// already having a start codon before
else if((ivec[i]==start)&&(start_ind == 1)&& (stop_ind==0))
{
gene= "";
gene = gene+ivec[i];
}
//when program comes across the terminating codon
//Pushing all the codons which are present in gene string to gvec vector :P
else if ((ivec[i]==stop1)||(ivec[i]==stop2)||(ivec[i]==stop3)&&(start_ind==1)&&(stop_ind==0))
{
gene = gene+ivec[i];
gvec.push_back(gene);
gvec.push_back("\n");
gene = "";
start_ind = 0;
start_ind = 0;
}
// if we doesnt come across any of the special cases mentioned above then.......
else if ((start_ind == 1 )&& (stop_ind == 0))
{
gene = gene+ivec[i];
}
}
}
int main()
{
string seq= "GAAGTGTTTTATCTGACTTACACCCCTGAAGATGTTGAAGGGAATGTTCAGCTGGAAACTGGAGATAAAATAAACTTTGTAATTGATAACAATAAACATACTGGTGCTGTAAGTGCTCGTAATATTATGCTGTTGAAAAAGAAACAAGCTCGCTATCAGGGAGTAGTTTGTGCCATGAAAGAGGCATTTGGCTTTATTGAAAGAGGCGATATTGTAAAGGAGATATTCTTTCACTATAGTGAATTTAAAGGTGACTTAGAATCCTTACAGCCTGGAGATGACGTGGAATTCACAATCAAGGACCGAAATGGTAAAGAAGTTGCAACAGATGTCAGACTATTGCCTCAAGGAACAGTCATTTTTGAAGATATCAGCATTGAACATTTTGAAGGAACTGTAACCAAAGTTATCCCCAAAGTACCCAGTAAAAACCAGAATGACCCATTGCCAGGACGCATCAAAGTTGATTTTGTGATTCCTAAAGAACTTCCCTTTGGAGACAAAGATACAAAATCCAAGGTGACGCTGTTGGAAGGTGACCACGTTAGGTTTAATATTTCAACAGACCGTCGTGACAAATTAGAACGAGCAACCAACATAGAAGTTCTATCAAATACATTTCAGTTCACTAATGAAGCCAGAGAGATGGGTGTAATTGCTGCCATGAGAGATGGTTTTGGTTTCATCAAGTGTGTGGATCGTGATGCTCGTATGTTCTTCCACTTCAGTGAAATTCTGGATGGGAACCAGCTTCATATTGCAGATGAAGTAGAGTTTACTGTGGTTCCTGATATGCTCTCTGCCCAAAGAAATCATGCTATTAGGATTAAAAAACTTCCCAAGGGCACGGTTTCGTTCCACTCCCATTCAGATCATCGTTTTCTGGGCACTGTAGAAAAAGAGGCCACTTTTTCGAATCCTAAAACCACTAGCCCAAATAAAGGCAAAGAAAAGGAGGCTGAGGATGGCATTATTGCTTATGATGATTGTGGGGTGAAACTGACTATTGCTTTTCAAGCCAAGGATGTGGAAGGATCTACTTCTCCTCAAATAGGAGACAAGGTTGAATTTAGTATTAGTGACAAACAGAGGCCTGGACAGCAGATTGCAACTTGTGTGCGGCTCTTAGGTCGTAATTCAAACTCCAAGAGGCTCTTGGGTTATGTGGCAACTTTGAAGGATAATTTTGGATTTATTGAAACAGCCAATCATGATAAGGAAATCTTTTTCCATTACAGTGAGTTCTCTGGTGATGTTGATAGCCTGGAACTGGGGGACATGGTTGAGTACAGCTTGTCCAAAGGAAAAGGCAACAAAGTCAGTGCAGAAAAAGTGAACAAAACACACTCAGTGAATGGCATTACTGAGGAAGCTGATCCCACCATCTACTCTGGTAAAGTCATTCGCCCCTTGAGGAGTGTTGATCCAACACAGAATGAGTACCAAGGAATGATTGAGATCGTGGACGAAGGGGATATGAAAGGTGAGGTCTATCCATTTGGCATAGTTGGGATGGCCAACAAAGGGGATTGCCTACAGAAAGGGGAGAGTGTCAAGTTCCAGTTGTGTGTCCTGGGCCAAAATGCACAGACTATGGCCTACAACATCACACCCCTGCGTAGGGCTACAGTGGAGTGTGTGAAAGATCAGTTTGGCTTCATTAACTATGAAGTAGGAGATAGCAAGAAGCTCTTTTTCCACGTGAAAGAAGTTCAGGATGGCATTGAGCTACAGGCAGGAGATGAGGTGGAATTCTCAGTGATTCTTAATCAGCGCACTGGCAAGTGCAGTGCTTGTAATGTTTGGCGAGTCTGCGAGGGCCCCAAGGCTGTTGCAGCTCCACGACCTGATAGGTTGGTCAATCGCTTGAAGAATATCACCCTGGATGATGCCAGTGCTCCTCGCCTAATGGTTCTTCGTCAGCCAAGGGGACCAGATAACTCAATGGGATTTGGTGCAGAAAGAAAGATCCGTCAAGCTGGTGTCATTGACTAACCACATCCACAAAGCACATCATTAATCCACTATGATCAAGTTGGGGGGATTCTGGTGAAGGGTTCTGAATATCTCTCTCTTCATCCCTCCCAAAATCTGGAATACTTATTCTATTGAGCTATTACACCAGTTTTAACACCTTCC";
vector<string>ivec;
vector<string>gvec;
//calling the function for ORF finder :P
for(int j=1; j<=6;j++)
{
cout<<"ORF"<<j<<":-"<<endl;
get_codon(seq,ivec,j);
cout<<endl;
for(int i = 0;i<ivec.size();i++)
{
cout<<ivec[i]<<" ";
}
cout<<endl;
//calling the codon find function :P
codon_find(ivec,gvec);
vector<string>::iterator iter;
cout << "Genes :- "<< endl;
for(iter= gvec.begin();iter<gvec.end();iter++)
{
cout<<*iter;
}
cout<<endl;
ivec.clear();
gvec.clear();
}
return (0);
}
#include<cstring>
#include<vector>
#include<algorithm>
using namespace std;
// this is the function for collection a raw string as a input and converting into a vector
int complement(string &seq);
int get_codon(string seq,vector<string>&ivec,int frame_no)
{
string s1;
//this is for forward frame code extraction
if(frame_no ==1 ||frame_no==2 || frame_no==3 )
{
//this is to extract the sequence into the vector called ivec
for(int i = frame_no;i<seq.size()-1;i++)
{
s1="";
s1.push_back(seq[i-1]);
s1.push_back(seq[i]);
s1.push_back(seq[i+1]);
//this is for extracting the string as codons to a string in triplets
i = i+2;
if(s1.size()>=3)
{
ivec.push_back(s1);
}
else
{
break;
}
}
}
//this is for reverse open reading frame ie -1,-2,-3
else if (frame_no ==4 || frame_no ==5|| frame_no == 6)
{
complement(seq);
reverse(seq.begin(),seq.end());
for(int i = frame_no; i<seq.size()-1;i++)
{
s1="";
s1.push_back(seq[i-1]);
s1.push_back(seq[i]);
s1.push_back(seq[i+1]);
//xtraction of the codes into triplets as s1 string
i =i+2;
if(s1.size()>=3)
{
ivec.push_back(s1);
}
else
{
break;
}
}
}
}
int complement(string &seq)
{
for(int i = 0; i < seq.size();i++)
{
if(seq[i]=='A'||seq[i]=='a')
seq[i]='T';
else if(seq[i]=='T'||seq[i]=='t')
seq[i]='A';
else if(seq[i]=='G'||seq[i]=='g')
seq[i]='C';
else if(seq[i]=='C'||seq[i]=='c')
seq[i]='G';
else
continue;
}
}
// this function is for extraction of gene from given orf frame which is having start and stop flags :P
void codon_find(vector<string>&ivec,vector<string>&gvec)
{
int start_ind = 0;
int stop_ind =0;
string gene;
string start = "ATG";
string stop1 = "TAG";
string stop2 = "TAA";
string stop3 = "TGA";
for(int i = 0;i<ivec.size();i++)
{
//this is statement for the begining of the a gene. We are iterating through the vector in search for
//start string and then we are adding it to the gene string ;-)
if ((ivec[i]==start)&&(start_ind==0)&&(start_ind==0))
{
start_ind=1;
gene = gene+ivec[i];
}
//this condition is when the genecomes across the start codon whichc is
// already having a start codon before
else if((ivec[i]==start)&&(start_ind == 1)&& (stop_ind==0))
{
gene= "";
gene = gene+ivec[i];
}
//when program comes across the terminating codon
//Pushing all the codons which are present in gene string to gvec vector :P
else if ((ivec[i]==stop1)||(ivec[i]==stop2)||(ivec[i]==stop3)&&(start_ind==1)&&(stop_ind==0))
{
gene = gene+ivec[i];
gvec.push_back(gene);
gvec.push_back("\n");
gene = "";
start_ind = 0;
start_ind = 0;
}
// if we doesnt come across any of the special cases mentioned above then.......
else if ((start_ind == 1 )&& (stop_ind == 0))
{
gene = gene+ivec[i];
}
}
}
int main()
{
string seq= "GAAGTGTTTTATCTGACTTACACCCCTGAAGATGTTGAAGGGAATGTTCAGCTGGAAACTGGAGATAAAATAAACTTTGTAATTGATAACAATAAACATACTGGTGCTGTAAGTGCTCGTAATATTATGCTGTTGAAAAAGAAACAAGCTCGCTATCAGGGAGTAGTTTGTGCCATGAAAGAGGCATTTGGCTTTATTGAAAGAGGCGATATTGTAAAGGAGATATTCTTTCACTATAGTGAATTTAAAGGTGACTTAGAATCCTTACAGCCTGGAGATGACGTGGAATTCACAATCAAGGACCGAAATGGTAAAGAAGTTGCAACAGATGTCAGACTATTGCCTCAAGGAACAGTCATTTTTGAAGATATCAGCATTGAACATTTTGAAGGAACTGTAACCAAAGTTATCCCCAAAGTACCCAGTAAAAACCAGAATGACCCATTGCCAGGACGCATCAAAGTTGATTTTGTGATTCCTAAAGAACTTCCCTTTGGAGACAAAGATACAAAATCCAAGGTGACGCTGTTGGAAGGTGACCACGTTAGGTTTAATATTTCAACAGACCGTCGTGACAAATTAGAACGAGCAACCAACATAGAAGTTCTATCAAATACATTTCAGTTCACTAATGAAGCCAGAGAGATGGGTGTAATTGCTGCCATGAGAGATGGTTTTGGTTTCATCAAGTGTGTGGATCGTGATGCTCGTATGTTCTTCCACTTCAGTGAAATTCTGGATGGGAACCAGCTTCATATTGCAGATGAAGTAGAGTTTACTGTGGTTCCTGATATGCTCTCTGCCCAAAGAAATCATGCTATTAGGATTAAAAAACTTCCCAAGGGCACGGTTTCGTTCCACTCCCATTCAGATCATCGTTTTCTGGGCACTGTAGAAAAAGAGGCCACTTTTTCGAATCCTAAAACCACTAGCCCAAATAAAGGCAAAGAAAAGGAGGCTGAGGATGGCATTATTGCTTATGATGATTGTGGGGTGAAACTGACTATTGCTTTTCAAGCCAAGGATGTGGAAGGATCTACTTCTCCTCAAATAGGAGACAAGGTTGAATTTAGTATTAGTGACAAACAGAGGCCTGGACAGCAGATTGCAACTTGTGTGCGGCTCTTAGGTCGTAATTCAAACTCCAAGAGGCTCTTGGGTTATGTGGCAACTTTGAAGGATAATTTTGGATTTATTGAAACAGCCAATCATGATAAGGAAATCTTTTTCCATTACAGTGAGTTCTCTGGTGATGTTGATAGCCTGGAACTGGGGGACATGGTTGAGTACAGCTTGTCCAAAGGAAAAGGCAACAAAGTCAGTGCAGAAAAAGTGAACAAAACACACTCAGTGAATGGCATTACTGAGGAAGCTGATCCCACCATCTACTCTGGTAAAGTCATTCGCCCCTTGAGGAGTGTTGATCCAACACAGAATGAGTACCAAGGAATGATTGAGATCGTGGACGAAGGGGATATGAAAGGTGAGGTCTATCCATTTGGCATAGTTGGGATGGCCAACAAAGGGGATTGCCTACAGAAAGGGGAGAGTGTCAAGTTCCAGTTGTGTGTCCTGGGCCAAAATGCACAGACTATGGCCTACAACATCACACCCCTGCGTAGGGCTACAGTGGAGTGTGTGAAAGATCAGTTTGGCTTCATTAACTATGAAGTAGGAGATAGCAAGAAGCTCTTTTTCCACGTGAAAGAAGTTCAGGATGGCATTGAGCTACAGGCAGGAGATGAGGTGGAATTCTCAGTGATTCTTAATCAGCGCACTGGCAAGTGCAGTGCTTGTAATGTTTGGCGAGTCTGCGAGGGCCCCAAGGCTGTTGCAGCTCCACGACCTGATAGGTTGGTCAATCGCTTGAAGAATATCACCCTGGATGATGCCAGTGCTCCTCGCCTAATGGTTCTTCGTCAGCCAAGGGGACCAGATAACTCAATGGGATTTGGTGCAGAAAGAAAGATCCGTCAAGCTGGTGTCATTGACTAACCACATCCACAAAGCACATCATTAATCCACTATGATCAAGTTGGGGGGATTCTGGTGAAGGGTTCTGAATATCTCTCTCTTCATCCCTCCCAAAATCTGGAATACTTATTCTATTGAGCTATTACACCAGTTTTAACACCTTCC";
vector<string>ivec;
vector<string>gvec;
//calling the function for ORF finder :P
for(int j=1; j<=6;j++)
{
cout<<"ORF"<<j<<":-"<<endl;
get_codon(seq,ivec,j);
cout<<endl;
for(int i = 0;i<ivec.size();i++)
{
cout<<ivec[i]<<" ";
}
cout<<endl;
//calling the codon find function :P
codon_find(ivec,gvec);
vector<string>::iterator iter;
cout << "Genes :- "<< endl;
for(iter= gvec.begin();iter<gvec.end();iter++)
{
cout<<*iter;
}
cout<<endl;
ivec.clear();
gvec.clear();
}
return (0);
}
No comments:
Post a Comment