相关的分析软件:
- methylKit: https://code.google.com/p/methylkit/
- BSMAP:https://code.google.com/p/bsmap/
- MOABS: https://code.google.com/p/moabs/
- Bismark: http://www.bioinformatics.babraham.ac.uk/projects/bismark/
- BiSeq: http://www.bioconductor.org/packages/devel/bioc/html/BiSeq.html
- MethPipe: http://smithlabresearch.org/software/methpipe/
首先是对数据进行比对,推荐使用bismark,因为比较经典目前很多关于后面的数据处理所开发的R包都可以读入该软件的输出。
一:运行bismark阶段
bismark内置了两种比对工具,bowtie(默认),以及bowtie2.至于选择那个基本上都可以吧。
当然在运行bismark的时候你不得不看看它软件的说明,以及主页。
准备基因组:($dir代表路径)
bismark_genome_preparation --path_to_bowtie $dir --bowtie2 --verbose $dir/Homo_sapiens_UCSC_hg19/
比对:
bismark --bowtie2 --phred64-quals -N 1 $dir/Homo_sapiens_UCSC_hg19/ -1 PE_1.fq -2 PE_2.fq
结果输出:
bismark_methylation_extractor -p --bedGraph --counts --buffer_size 10G PE_1.fq_bismark_bt2_pe.sam
输出的几个文本文件例如:report\bedgraph等文件还是很有用的
二:进行相关的统计以及注释(推荐使用methylKit)
methylKit可以读入bismark的输出SAM文件,但是需要一定处理就是排序,这里借助SAMtools就可以了。
samtools view -bS PE_1.fq_bismark_bt2_pe.sam >PE_1.bam samtools sort PE_1.bam PE_1.sorted samtools view -h PE_1.sorted.bam >PE_1.sorted.sam
#######################################
#Descriptive statistics on samples library(methylKit) myobj=read.bismark("1h.sorted.sam","1h",assembly="hg19",save.context=c("CpG","CHG","CHH"),read.context="none",mincov=10,minqual=20,save.folder = getwd()) myobj1=read.bismark("3h.sorted.sam","1h",assembly="hg19",save.context=c("CpG","CHG","CHH"),read.context="none",mincov=10,minqual=20,save.folder = getwd()) myobj2=read.bismark("6h.sorted.sam","1h",assembly="hg19",save.context=c("CpG","CHG","CHH"),read.context="none",mincov=10,minqual=20,save.folder = getwd()) myobj3=read.bismark("HL.sorted.sam","1h",assembly="hg19",save.context=c("CpG","CHG","CHH"),read.context="none",mincov=10,minqual=20,save.folder = getwd()) ########### myobj=read("1h_CpG.txt","1h",assembly="hg19") getMethylationStats(myobj,plot=F,both.strands=F) png("1h_CpG_methylation.png",width=800,height=600) getMethylationStats(myobj,plot=T,both.strands=F) dev.off() library ("graphics") png("1h_CpG_coverage.png",width=800,height=600) getCoverageStats(myobj,plot=T,both.strands=F) dev.off()
#######################################
#genome coverage
java -jar /home/zhum/programm/picard-tools-1.103/MarkDuplicates.jar INPUT=/home/fyc/F13FTSECKF2813_HUMqlhH/clean_reads/HepG2-3hA/3h.sorted.bam OUTPUT=h3_dedup_reads.bam METRICS_FILE= h3.dedup.metrics MAX_FILE_HANDLES=1000 java -jar /home/zhum/programm/picard-tools-1.103/AddOrReplaceReadGroups.jar -I=h3_dedup_reads.bam -o=3h.add.sorted.bam -RGPL=illumina -RGSM=h3 -RGLB=h3 -RGPU=BARCODE CREATE_INDEX=true VALIDATION_STRINGENCY=LENIENT java -Xmx2g -jar /home/zhum/programm/GenomeAnalysisTK-2.7-4-g6f46d11/GenomeAnalysisTK.jar -R /home/zhum/database/ucsc.hg19.fasta -T DepthOfCoverage -o genome_coverage -I 3h.add.sorted.bam --printBaseCounts --omitDepthOutputAtEachBase --omitLocusTable
#################################
#Sample Correlation
library(methylKit) file.list=list( "1h_CpG.txt","3h_CpG.txt","6h_CpG.txt","HL_CpG.txt") myobj=read(file.list,sample.id=list("h1","h3","h6","HL"),assembly="hg19",treatment=c(0,0,0,1)) png("correlation.png",width=800,height=600) meth=unite(myobj, destrand=FALSE) getCorrelation(meth,plot=T) dev.off()
#Descriptive statistics on samples
png("CpG_methylation.png",width=1000,height=600) par(mfrow=c(2,2),cex=0.8) getMethylationStats(myobj[[1]],plot=T,both.strands=F) getMethylationStats(myobj[[2]],plot=T,both.strands=F) getMethylationStats(myobj[[3]],plot=T,both.strands=F) getMethylationStats(myobj[[4]],plot=T,both.strands=F) dev.off()
library ("graphics") png("CpG_coverage.png",width=1000,height=600) par(mfrow=c(2,2),cex=0.8) getCoverageStats(myobj[[1]],plot=T,both.strands=F) getCoverageStats(myobj[[2]],plot=T,both.strands=F) getCoverageStats(myobj[[3]],plot=T,both.strands=F) getCoverageStats(myobj[[4]],plot=T,both.strands=F) dev.off()
###########################
#Finding differentially methylated bases or regions myDiff=calculateDiffMeth(meth,num.cores=6) myDiff25p.hyper=get.methylDiff(myDiff,difference=25,qvalue=0.01,type="hyper") myDiff25p.hypo=get.methylDiff(myDiff,difference=25,qvalue=0.01,type="hypo") myDiff25p=get.methylDiff(myDiff,difference=25,qvalue=0.01)
############################
#Annotating differentially methylated bases or regions gene.obj=read.transcript.features("/home/fyc/methylation/Homo_sapiens_UCSC_hg19/hg19_refseq_all_types.bed") cpg.obj=read.feature.flank("/home/fyc/methylation/Homo_sapiens_UCSC_hg19/cpgi.hg19.bed",feature.flank.name=c("CpGi","shores")) diffAnn=annotate.WithGenicParts(myDiff25p,gene.obj) diffCpGann=annotate.WithFeature.Flank(myDiff25p,cpg.obj$CpGi,cpg.obj$shores,feature.name="CpGi",flank.name="shores") png("Differential_methylation_annotation.png",width=800,height=600) par(mfrow=c(1,2)) plotTargetAnnotation(diffAnn,precedence=TRUE,main="differential methylation annotation") plotTargetAnnotation(diffCpGann,col=c("green","gray","white"),main="differential methylation annotation") dev.off()
原文来自:http://blog.sina.com.cn/s/blog_83f77c940102uxgp.html
1F
傻大白想从0开始学甲基化分析,求指点迷津