韦恩图
- 韦恩图是用来反映不同集合之间的交集和并集情况的展示图。一般用于展示2-5个集合之间的交并关系。集合数目更多时,将会比较难分辨,更多集合的展示方式一般使用upSetView。
绘制韦恩图
from matplotlib import pyplot as plt
import numpy as np
from matplotlib_venn import venn3, venn3_circles
set1 = set(['A', 'B', 'C', 'D','H'])
set2 = set(['B', 'C', 'D', 'E','I'])
set3 = set(['C', 'D',' E', 'F', 'G'])
venn3([set1, set2, set3], ('Set1', 'Set2', 'Set3'))
plt.show()
image.png
from matplotlib import pyplot as plt
import numpy as np
from matplotlib_venn import venn3, venn3_circles
from matplotlib_venn import venn2, venn2_circles
# 画2*2四幅图
figure, axes = plt.subplots(2, 2)
# 10=Ab, 01 = aB, 11 = AB
venn2(subsets={'10': 1, '01': 2, '11': 3}, set_labels = ('A', 'B'), ax=axes[0][0])
venn2_circles((1,2,3), ax=axes[0][1])
venn3(subsets=(1, 1, 1, 1, 1, 1, 1), set_labels = ('A', 'B', 'C'), ax=axes[1][0])
venn3_circles({'001': 1, '100': 3, '010': 5, '110': 7, '011': 9}, ax=axes[1][1])
plt.show()
image.png
from matplotlib import pyplot as plt
import numpy as np
from matplotlib_venn import venn3, venn3_circles
# 图窗口大小
plt.figure(figsize=(6,6))
v = venn3(subsets=(1, 1, 1, 1, 1, 1, 1), set_labels = ('A', 'B', 'C'))
# id为'100'的图,不透明,白色,内容为'Unknown',名字叫'Set "A"'
v.get_patch_by_id('100').set_alpha(1.0)
v.get_patch_by_id('100').set_color('white')
v.get_label_by_id('100').set_text('Unknown')
v.get_label_by_id('A').set_text('Set "A"')
# 圆环是虚线
c = venn3_circles(subsets=(1, 1, 1, 1, 1, 1, 1), linestyle='dashed')
# c[0]为点,点的大小为"1"
c[0].set_lw(1.0)
c[0].set_ls('dotted')
plt.title("Sample Venn diagram")
#
plt.annotate('Unknown set', xy=v.get_label_by_id('100').get_position() - np.array([0, 0.05]), xytext=(-70,-70),
ha='center', textcoords='offset points', bbox=dict(boxstyle='round,pad=0.5', fc='gray', alpha=0.1),
arrowprops=dict(arrowstyle='->', connectionstyle='arc3,rad=0.5',color='gray'))
plt.show()
image.png
这里吐槽一下python 不能画 3个以上的圆饼,以下两幅用R画出来的。
image.png image.pngR 韦恩图实例
-
假如我有四个不同类型癌症已找到的差异基因
image.png -
每个文件的基因排序方式如下
image.png
options(stringsAsFactors = F) # 以字符串的方式读入,而不是因子
setwd("../Cancergene") # 工作路径
####读取数据
bladderCancer = read.table("bladderCancer.txt",header=F, sep="\t")
bladderGene = bladderCancer$V1 # 把第一列赋值给bladderGene
breastCancer = read.table("breastCancer.txt", header=F, sep="\t")
breastGene = breastCancer$V1
lungCancer = read.table("lungCancer.txt", header=F, sep="\t")
lungGene = lungCancer$V1
liverCancer = read.table("liverCancer.txt", header=F, sep="\t")
liverGene = liverCancer$V1
####交集
intersect(intersect(intersect(bladderGene, breastGene), lungGene), liverGene)
####频数统计
all.genes = c(bladderGene,breastGene,lungGene,liverGene) # 把四个向量合并成一个向量
tbl = as.data.frame(table(all.genes)) # table 函数看每个基因出现的频次,然后把这些转成dataframe
tbl[tbl$Freq == 4,] # 查看频次为4的基因
####可视化,韦恩图
library(VennDiagram)
venn.diagram(list("Bladder Cancer" = bladderGene,
"Breast Cancer" = breastGene,
"Lung Cancer" = lungGene,
"Liver Cancer" = liverGene) ,
height=5000,
width=5200,
resolution=500,
imagetype="tiff",
filename="VennPlot.tiff",
col="transparent",
fill = c("cornflowerblue", "green", "yellow", "darkorchid1"),alpha = 0.50,
label.col = c("orange", "white", "darkorchid4", "white", "white", "white", "white", "white", "darkblue", "white", "white", "white", "white", "darkgreen", "white"),
cat.col = c("darkblue", "darkgreen", "orange", "darkorchid4"),
cex=1.5,
cat.cex=1.4
)
image.png
参考文献
- (matplotlib-venn 0.11.5 官网)
- (R画图)