不同的平台测出来的数据采用的质量编码格式一般会有差异。关于FastQ的格式介绍可以阅读《FastQ格式介绍》
譬如sanger的编码采用的是质量值+33,而Soleax采用的是质量值+64。有时候由于特殊情况需要转换一下编码格式,用perl写有点麻烦而且运行速度也不咋地。下面给大家推荐一款我自己用c++写的程序。
在该程序的第58行,readqual[i]=readqual[i] +31; 由于我们这个程序是把phred33编码格式转换成Soleax64的,新的编码格式里面每个字母的质量需要增加31. 如果用于其他转码用途大家自己根据需要改一下这一行就可以了。
- #include <iostream>
- #include <fstream>
- #include <cstring>
- #include <cstdlib>
- using namespace std;
- int main(int argc,char *argv[])
- {
- ifstream Readsin; // 输入文件句柄
- ofstream Readsout; // 输出文件句柄
- if(argc != 3)//三个参数,这个值是3,还有一个是程序名称
- {
- cout << "ERROR: illegal argument number: " << argc << endl;
- cout << "Usage:\n"
- << "\tInput format:\n"
- << "\t\tphred33to64 FastaqFile_33 FastaqFile_64 \n" << endl;
- exit(0);
- }
- Readsin.open(argv[1]);//参数从1开始,0是名称
- Readsout.open(argv[2]);
- if(!Readsin.good()) // 检查 输入文件是否存在以及被打开
- {
- cout << "ERROR: illegal input file path: " << argv[1] <<endl;
- cout << "Usage:\n"
- << "\tInput format:\n"
- << "\t\tphred33to64 FastaqFile_33 FastaqFile_64 \n" << endl;
- exit(0);
- }
- if(!Readsin.good()) // 检查 输出文件是否可以被创建
- {
- cout << "ERROR: illegal input file path: " << argv[2] <<endl;
- cout << "Usage:\n"
- << "\tInput format:\n"
- << "\t\tphred33to64 FastaqFile_33 FastaqFile_64 \n" << endl;
- exit(0);
- }
- //
- cout << "Filtering Reads ...." << endl;
- string readname;
- string readseq;
- string readlinker;
- string readqual;
- int Readslength;
- while (Readsin != NULL)
- {
- getline(Readsin,readname,'\n');
- getline(Readsin,readseq,'\n');
- getline(Readsin,readlinker,'\n');
- getline(Readsin,readqual,'\n');
- Readslength=readseq.length();
- for (int i=0;i<Readslength;i++)
- {
- readqual[i]=readqual[i] +31;//修改31来自定义该软件的转码方式
- }
- Readsout << readname << endl;
- Readsout << readseq << endl;
- Readsout << readlinker<<endl;
- Readsout << readqual << endl;
- }
- Readsout.close();
- Readsin.close();
- return 0;
- }