两种方法批量绘制散点图

小技巧分享。

模拟数据

就模拟一个基因表达矩阵吧,列是基因,有10个基因,然后行是样本,这10个基因在100个样本的表达量。

expr=rnorm(1000)
dim(expr)=c(100,10)
colnames(expr)=LETTERS[1:10]
ITH_genes=LETTERS[1:10]
math=rbinom(10)

有一个变量是math值,也是100个样本的一个指标,然后就可以看10个基因分别和这个math的散点图,看相关性。

使用

这个时候表达矩阵是宽的

box <- lapply(ITH_genes,function(i) {
 dat=data.frame(gene=as.numeric(expr[,i]),
 math=math) 
 head(dat)
 ## 画boxplot 
 library(ggpubr)
 ggscatter(dat, x = "gene", y = "math",
 color = 'black', shape = 21, size = 0.5, # Points color, shape and size
 add = "reg.line", # Add regressin line
 add.params = list(color = "blue", fill = "lightgray"), # Customize reg. line
 conf.int = TRUE, # Add confidence interval
 cor.coef = TRUE, # Add correlation coefficient. see ?stat_cor
 xlab = i,
 cor.coeff.args = list(method = "pearson", label.sep = "\n")
 )
})
library(cowplot)
plot_grid(plotlist=box, ncol=5 )

使用facet分面

这个时候宽表达矩阵,需要折叠起来,变成长的数据形式才能绘图。

library(reshape2)
dat=melt(expr)
dat[,1]=rep(math,100)
colnames(dat)=c('math','gene','value')
p=ggscatter(dat, y= "math", x = "value", shape = 21, size = 0.5,
 add = "reg.line", # Add regressin line
 add.params = list(color = "blue", fill = "lightgray"), # Customize reg. line
 conf.int = TRUE, # Add confidence interval
 cor.coef = TRUE, # Add correlation coefficient. see ?stat_cor
 cor.coeff.args = list(method = "pearson" , label.sep = "\n")
)
facet(p, facet.by = "gene" )

全部代码

expr=rnorm(1000)
dim(expr)=c(100,10)
colnames(expr)=LETTERS[1:10]
ITH_genes=LETTERS[1:10]
math=rbinom(10)
box <- lapply(ITH_genes,function(i) {
 dat=data.frame(gene=as.numeric(expr[,i]),
 math=math) 
 head(dat)
 ## 画boxplot 
 library(ggpubr)
 ggscatter(dat, x = "gene", y = "math",
 color = 'black', shape = 21, size = 0.5, # Points color, shape and size
 add = "reg.line", # Add regressin line
 add.params = list(color = "blue", fill = "lightgray"), # Customize reg. line
 conf.int = TRUE, # Add confidence interval
 cor.coef = TRUE, # Add correlation coefficient. see ?stat_cor
 xlab = i,
 cor.coeff.args = list(method = "pearson", label.sep = "\n")
 )
})
library(cowplot)
plot_grid(plotlist=box, ncol=5 )

library(reshape2)
dat=melt(expr)
dat[,1]=rep(math,100)
colnames(dat)=c('math','gene','value')
p=ggscatter(dat, y= "math", x = "value", shape = 21, size = 0.5,
 add = "reg.line", # Add regressin line
 add.params = list(color = "blue", fill = "lightgray"), # Customize reg. line
 conf.int = TRUE, # Add confidence interval
 cor.coef = TRUE, # Add correlation coefficient. see ?stat_cor
 cor.coeff.args = list(method = "pearson" , label.sep = "\n")
)
facet(p, facet.by = "gene" )

Comments are closed.