小技巧分享。
模拟数据
就模拟一个基因表达矩阵吧,列是基因,有10个基因,然后行是样本,这10个基因在100个样本的表达量。
expr=rnorm(1000)
dim(expr)=c(100,10)
colnames(expr)=LETTERS[1:10]
ITH_genes=LETTERS[1:10]
math=rbinom(10)
有一个变量是math值,也是100个样本的一个指标,然后就可以看10个基因分别和这个math的散点图,看相关性。
使用
这个时候表达矩阵是宽的
box <- lapply(ITH_genes,function(i) {
dat=data.frame(gene=as.numeric(expr[,i]),
math=math)
head(dat)
## 画boxplot
library(ggpubr)
ggscatter(dat, x = "gene", y = "math",
color = 'black', shape = 21, size = 0.5, # Points color, shape and size
add = "reg.line", # Add regressin line
add.params = list(color = "blue", fill = "lightgray"), # Customize reg. line
conf.int = TRUE, # Add confidence interval
cor.coef = TRUE, # Add correlation coefficient. see ?stat_cor
xlab = i,
cor.coeff.args = list(method = "pearson", label.sep = "\n")
)
})
library(cowplot)
plot_grid(plotlist=box, ncol=5 )
使用facet分面
这个时候宽表达矩阵,需要折叠起来,变成长的数据形式才能绘图。
library(reshape2)
dat=melt(expr)
dat[,1]=rep(math,100)
colnames(dat)=c('math','gene','value')
p=ggscatter(dat, y= "math", x = "value", shape = 21, size = 0.5,
add = "reg.line", # Add regressin line
add.params = list(color = "blue", fill = "lightgray"), # Customize reg. line
conf.int = TRUE, # Add confidence interval
cor.coef = TRUE, # Add correlation coefficient. see ?stat_cor
cor.coeff.args = list(method = "pearson" , label.sep = "\n")
)
facet(p, facet.by = "gene" )
全部代码
expr=rnorm(1000)
dim(expr)=c(100,10)
colnames(expr)=LETTERS[1:10]
ITH_genes=LETTERS[1:10]
math=rbinom(10)
box <- lapply(ITH_genes,function(i) {
dat=data.frame(gene=as.numeric(expr[,i]),
math=math)
head(dat)
## 画boxplot
library(ggpubr)
ggscatter(dat, x = "gene", y = "math",
color = 'black', shape = 21, size = 0.5, # Points color, shape and size
add = "reg.line", # Add regressin line
add.params = list(color = "blue", fill = "lightgray"), # Customize reg. line
conf.int = TRUE, # Add confidence interval
cor.coef = TRUE, # Add correlation coefficient. see ?stat_cor
xlab = i,
cor.coeff.args = list(method = "pearson", label.sep = "\n")
)
})
library(cowplot)
plot_grid(plotlist=box, ncol=5 )
library(reshape2)
dat=melt(expr)
dat[,1]=rep(math,100)
colnames(dat)=c('math','gene','value')
p=ggscatter(dat, y= "math", x = "value", shape = 21, size = 0.5,
add = "reg.line", # Add regressin line
add.params = list(color = "blue", fill = "lightgray"), # Customize reg. line
conf.int = TRUE, # Add confidence interval
cor.coef = TRUE, # Add correlation coefficient. see ?stat_cor
cor.coeff.args = list(method = "pearson" , label.sep = "\n")
)
facet(p, facet.by = "gene" )