不同的平台测出来的数据采用的质量编码格式一般会有差异。关于FastQ的格式介绍可以阅读《FastQ格式介绍》
譬如sanger的编码采用的是质量值+33,而Soleax采用的是质量值+64。有时候由于特殊情况需要转换一下编码格式,用perl写有点麻烦而且运行速度也不咋地。下面给大家推荐一款我自己用c++写的程序。
在该程序的第58行,readqual[i]=readqual[i] +31; 由于我们这个程序是把phred33编码格式转换成Soleax64的,新的编码格式里面每个字母的质量需要增加31. 如果用于其他转码用途大家自己根据需要改一下这一行就可以了。
#include <iostream> #include <fstream> #include <cstring> #include <cstdlib> using namespace std; int main(int argc,char *argv[]) { ifstream Readsin; // 输入文件句柄 ofstream Readsout; // 输出文件句柄 if(argc != 3)//三个参数,这个值是3,还有一个是程序名称 { cout << "ERROR: illegal argument number: " << argc << endl; cout << "Usage:\n" << "\tInput format:\n" << "\t\tphred33to64 FastaqFile_33 FastaqFile_64 \n" << endl; exit(0); } Readsin.open(argv[1]);//参数从1开始,0是名称 Readsout.open(argv[2]); if(!Readsin.good()) // 检查 输入文件是否存在以及被打开 { cout << "ERROR: illegal input file path: " << argv[1] <<endl; cout << "Usage:\n" << "\tInput format:\n" << "\t\tphred33to64 FastaqFile_33 FastaqFile_64 \n" << endl; exit(0); } if(!Readsin.good()) // 检查 输出文件是否可以被创建 { cout << "ERROR: illegal input file path: " << argv[2] <<endl; cout << "Usage:\n" << "\tInput format:\n" << "\t\tphred33to64 FastaqFile_33 FastaqFile_64 \n" << endl; exit(0); } // cout << "Filtering Reads ...." << endl; string readname; string readseq; string readlinker; string readqual; int Readslength; while (Readsin != NULL) { getline(Readsin,readname,'\n'); getline(Readsin,readseq,'\n'); getline(Readsin,readlinker,'\n'); getline(Readsin,readqual,'\n'); Readslength=readseq.length(); for (int i=0;i<Readslength;i++) { readqual[i]=readqual[i] +31;//修改31来自定义该软件的转码方式 } Readsout << readname << endl; Readsout << readseq << endl; Readsout << readlinker<<endl; Readsout << readqual << endl; } Readsout.close(); Readsin.close(); return 0; }