甲基化数据分析

相关的分析软件:

  • methylKit: https://code.google.com/p/methylkit/
  • BSMAP:https://code.google.com/p/bsmap/
  • MOABS: https://code.google.com/p/moabs/
  • Bismark: http://www.bioinformatics.babraham.ac.uk/projects/bismark/
  • BiSeq: http://www.bioconductor.org/packages/devel/bioc/html/BiSeq.html
  • MethPipe: http://smithlabresearch.org/software/methpipe/

首先是对数据进行比对,推荐使用bismark,因为比较经典目前很多关于后面的数据处理所开发的R包都可以读入该软件的输出。

一:运行bismark阶段

bismark内置了两种比对工具,bowtie(默认),以及bowtie2.至于选择那个基本上都可以吧。

当然在运行bismark的时候你不得不看看它软件的说明,以及主页。

准备基因组:($dir代表路径)

bismark_genome_preparation --path_to_bowtie $dir --bowtie2 --verbose $dir/Homo_sapiens_UCSC_hg19/

比对:

bismark --bowtie2 --phred64-quals -N 1 $dir/Homo_sapiens_UCSC_hg19/ -1 PE_1.fq -2 PE_2.fq

结果输出:

bismark_methylation_extractor -p --bedGraph --counts --buffer_size 10G PE_1.fq_bismark_bt2_pe.sam

输出的几个文本文件例如:report\bedgraph等文件还是很有用的

二:进行相关的统计以及注释(推荐使用methylKit)

methylKit可以读入bismark的输出SAM文件,但是需要一定处理就是排序,这里借助SAMtools就可以了。

samtools view -bS PE_1.fq_bismark_bt2_pe.sam >PE_1.bam
samtools sort PE_1.bam PE_1.sorted
samtools view -h PE_1.sorted.bam >PE_1.sorted.sam

#######################################

#Descriptive statistics on samples
library(methylKit)
myobj=read.bismark("1h.sorted.sam","1h",assembly="hg19",save.context=c("CpG","CHG","CHH"),read.context="none",mincov=10,minqual=20,save.folder = getwd())
myobj1=read.bismark("3h.sorted.sam","1h",assembly="hg19",save.context=c("CpG","CHG","CHH"),read.context="none",mincov=10,minqual=20,save.folder = getwd())
myobj2=read.bismark("6h.sorted.sam","1h",assembly="hg19",save.context=c("CpG","CHG","CHH"),read.context="none",mincov=10,minqual=20,save.folder = getwd())
myobj3=read.bismark("HL.sorted.sam","1h",assembly="hg19",save.context=c("CpG","CHG","CHH"),read.context="none",mincov=10,minqual=20,save.folder = getwd())
###########
myobj=read("1h_CpG.txt","1h",assembly="hg19")
getMethylationStats(myobj,plot=F,both.strands=F)
png("1h_CpG_methylation.png",width=800,height=600)
getMethylationStats(myobj,plot=T,both.strands=F)
dev.off()
library ("graphics")
png("1h_CpG_coverage.png",width=800,height=600)
getCoverageStats(myobj,plot=T,both.strands=F)
dev.off()

#######################################

#genome coverage

java -jar /home/zhum/programm/picard-tools-1.103/MarkDuplicates.jar INPUT=/home/fyc/F13FTSECKF2813_HUMqlhH/clean_reads/HepG2-3hA/3h.sorted.bam OUTPUT=h3_dedup_reads.bam METRICS_FILE= h3.dedup.metrics MAX_FILE_HANDLES=1000
java -jar /home/zhum/programm/picard-tools-1.103/AddOrReplaceReadGroups.jar -I=h3_dedup_reads.bam -o=3h.add.sorted.bam -RGPL=illumina -RGSM=h3 -RGLB=h3 -RGPU=BARCODE CREATE_INDEX=true VALIDATION_STRINGENCY=LENIENT
java -Xmx2g -jar /home/zhum/programm/GenomeAnalysisTK-2.7-4-g6f46d11/GenomeAnalysisTK.jar -R /home/zhum/database/ucsc.hg19.fasta -T DepthOfCoverage -o genome_coverage -I 3h.add.sorted.bam --printBaseCounts --omitDepthOutputAtEachBase --omitLocusTable

#################################

#Sample Correlation

library(methylKit)
file.list=list( "1h_CpG.txt","3h_CpG.txt","6h_CpG.txt","HL_CpG.txt")
myobj=read(file.list,sample.id=list("h1","h3","h6","HL"),assembly="hg19",treatment=c(0,0,0,1))
png("correlation.png",width=800,height=600)
meth=unite(myobj, destrand=FALSE)
getCorrelation(meth,plot=T)
dev.off()

#Descriptive statistics on samples

png("CpG_methylation.png",width=1000,height=600)
par(mfrow=c(2,2),cex=0.8)
getMethylationStats(myobj[[1]],plot=T,both.strands=F)
getMethylationStats(myobj[[2]],plot=T,both.strands=F)
getMethylationStats(myobj[[3]],plot=T,both.strands=F)
getMethylationStats(myobj[[4]],plot=T,both.strands=F)
dev.off()
library ("graphics")
png("CpG_coverage.png",width=1000,height=600)
par(mfrow=c(2,2),cex=0.8)
getCoverageStats(myobj[[1]],plot=T,both.strands=F)
getCoverageStats(myobj[[2]],plot=T,both.strands=F)
getCoverageStats(myobj[[3]],plot=T,both.strands=F)
getCoverageStats(myobj[[4]],plot=T,both.strands=F)
dev.off()

###########################

#Finding differentially methylated bases or regions
myDiff=calculateDiffMeth(meth,num.cores=6)
myDiff25p.hyper=get.methylDiff(myDiff,difference=25,qvalue=0.01,type="hyper")
myDiff25p.hypo=get.methylDiff(myDiff,difference=25,qvalue=0.01,type="hypo")
myDiff25p=get.methylDiff(myDiff,difference=25,qvalue=0.01)

############################

#Annotating differentially methylated bases or regions
gene.obj=read.transcript.features("/home/fyc/methylation/Homo_sapiens_UCSC_hg19/hg19_refseq_all_types.bed")
cpg.obj=read.feature.flank("/home/fyc/methylation/Homo_sapiens_UCSC_hg19/cpgi.hg19.bed",feature.flank.name=c("CpGi","shores"))
diffAnn=annotate.WithGenicParts(myDiff25p,gene.obj)
diffCpGann=annotate.WithFeature.Flank(myDiff25p,cpg.obj$CpGi,cpg.obj$shores,feature.name="CpGi",flank.name="shores")
png("Differential_methylation_annotation.png",width=800,height=600)
par(mfrow=c(1,2))
plotTargetAnnotation(diffAnn,precedence=TRUE,main="differential methylation annotation")
plotTargetAnnotation(diffCpGann,col=c("green","gray","white"),main="differential methylation annotation")
dev.off()

原文来自:http://blog.sina.com.cn/s/blog_83f77c940102uxgp.html

    • phoenidi 0

      傻大白想从0开始学甲基化分析,求指点迷津

    发表评论

    匿名网友

    拖动滑块以完成验证