image.png

In [104]:
options(stringsAsFactors=FALSE)
library(GenomicFeatures)
library(ChIPseeker)
library(RColorBrewer)
library(ggsci)
library(ggplot2)
library(ggpubr)
library(networkD3)
library(grid)
library(HiveR)
library(scatterpie)
library(ggExtra)
library(treemap)
library(pheatmap)
library(reshape)
library(ggalluvial)
library(ggmosaic)
library(ggtern)
library(data.table)
library(pals)

Fig.3a

In [ ]:
chipseq <- read.delim('ChIPseq.experiment.table.txt')
atacseq <- read.delim('DHS.experiment.table.txt')
chipexp <- read.delim('metrics/experiment.metrics.table.out', comment.char = '#')
chipsam <- read.delim('metrics/sample.metrics.table.out', comment.char = '#')
atacexp <- read.delim('metrics/DHS.experiment.metrics.table.out', comment.char = '#')
atacsam <- read.delim('metrics/DHS.sample.metrics.table.out', comment.char = '#')
chippik <- read.delim('metrics/peak.metrics.table.out', comment.char = '#')
atacpik <- read.delim('metrics/DHS.peak.metrics.table.out', comment.char = '#')
chippik <- chippik[which(chippik$IDR>50),]
atacpik <- atacpik[which(atacpik$IDR>1000),]
allpeak <- rbind(chippik, atacpik)
allpeak$ID <- sprintf("%s/%s/%s",allpeak$genome,allpeak$project,allpeak$experiment)
allpeak <- allpeak[!duplicated(allpeak$ID),]
rownames(allpeak) <- allpeak$ID 
qc1 <- unique(rbind(chipseq,atacseq)[, c("result_dir","project","main_group","analysis_name",'SPOT')]) %>%
  dplyr::filter(main_group %in% c("DNA-binding protein","open chromatin","histone related")) %>%
  dplyr::filter(!is.na(SPOT)) %>% dplyr::select(result_dir, main_group, SPOT)

chipsam2 <- chipsam[!duplicated(chipsam$sample) & !is.na(chipsam$sample),]
rownames(chipsam2) <- chipsam2$sample
qc2 <- chipseq[grep("_control",chipseq$replicate),c('result_dir','sample_id')] %>%
  dplyr::mutate(SPOT=chipsam2[sample_id,'SPOT'], main_group="control") %>%
  dplyr::filter(!is.na(SPOT)) %>% 
  dplyr::select(result_dir, main_group, SPOT)

pltd <- rbind(qc1, qc2) %>% dplyr::mutate(main_group=factor(main_group))
filtered <- expall[expall$main_group %in% c("DNA-binding protein", "histone related", "open chromatin") & 
                     (!expall$factor %in% c("FLAG", "CenH3", "dCas9", "Heterochromatin", "H1", "H2A", "H2AW", "H2AX", "H2AZ", "H2B", "H3", "H4", "H3.1", "H3.3", "IgG", "NRPE1", "NRPD1")) & 
                     !grepl("PolII",expall$group_list, ignore.case=T) & !grepl("RNA polymerase",expall$group_list, ignore.case=T) & 
                     !grepl("Unknown",expall$group_list, ignore.case=T) & !grepl("H2Bub1",expall$group_list, ignore.case=T) & 
                     !grepl("AGO",expall$group_list, ignore.case=T) & !grepl("DCL1",expall$group_list, ignore.case=T) & 
                     !grepl("lysinebutyrylation",expall$group_list, ignore.case=T) & !grepl("crotonylation",expall$group_list, ignore.case=T) & 
                     !grepl("phosphorylation",expall$group_list, ignore.case=T) & !grepl("ubiquitination",expall$group_list, ignore.case=T), ] ## , "H3K4me1", "H3K36me3"
filtered <- filtered[ifelse(filtered$result_dir=="arabidopsis_thaliana", ifelse(grepl("histone related",filtered$group_list), grepl("H3K4me1|H3K4me2|H3K4me3|H3K9me1|H3K9me2|H3K9me3|H3K9ac|H3K27ac|H3K27me1|H3K27me2|H3K27me3|H3K36me1|H3K36me2|H3K36me3|H3K79me2|H3K79me3|H3K56ac|H4K16ac|H3K23ac|H4K12ac|H3K4ac",filtered$group_list) , TRUE), TRUE), ]

filtered$idr <- ifelse(filtered$main_group == "open chromatin", sprintf("/public/workspace/chend/chiphub/ATACseq/analysis/%s/hammock/%s.peak.idr.hammock.gz",filtered$result_dir,filtered$analysis_name),
                       sprintf("/public/workspace/chend/chiphub/ChIPip/analysis/%s/%s/hammock/%s.peak.idr.hammock.gz",filtered$result_dir,filtered$project,filtered$analysis_name))

filtered$peak <- ifelse(filtered$main_group == "open chromatin", sprintf("/public/workspace/chend/chiphub/ATACseq/analysis/%s/peak/macs2/%s_pooled.narrowPeak.bed",filtered$result_dir,filtered$analysis_name),
                        ifelse(grepl("H3K36me3|H3K20me1|H3K4me1|H3K79me2|H3K79me3|H3K27me3|H3K9me3|H3K9me1",filtered$group_list), 
                               sprintf("/public/workspace/chend/chiphub/ChIPip/analysis/%s/%s/broad/macs2/naive/%s.broadPeak.bed",filtered$result_dir,filtered$project,filtered$analysis_name), 
                               sprintf("/public/workspace/chend/chiphub/ChIPip/analysis/%s/%s/peak/macs2/optimal/%s.narrowPeak.bed",filtered$result_dir,filtered$project,filtered$analysis_name)))

filtered <- filtered[file.exists(filtered$idr), ]

filtered$subgroup <- unlist(lapply(as.character(filtered$group_list), function(x){tail(unlist(strsplit(x," // ")),n=1)}))

athchip$peak <- peak[rownames(athchip),'IDR']
athchipOK <- athchip[athchip$OK & athchip$peak>50,]
athchipOK <- athchipOK[with(athchipOK, order(factor)), ]
pkfiles <- athchipOK$idr
athTFs <- unique(athchip$factor)
coTFs <- matrix(0, nrow = length(athTFs), ncol = length(athTFs))
colnames(coTFs) <- rownames(coTFs) <- athTFs
for(i in athTFs){
  for(j in athTFs){
    coTFs[i,j] <- max(jaccardmat[which(athchip$factor==i),which(athchip$factor==j)])
  }
}

PPI <- read.delim('ath.PPI.txt')
rownames(PPI) <- sprintf("%s-%s", PPI[,1], PPI[,2])
intct <- which(coTFs>median(coTFs), arr.ind=T)
cobTFs <- data.frame(from=athTFs[intct[,1]], to=athTFs[intct[,2]]) %>%
  mutate(fg=factors[from,'V1'], tg=factors[to,'V1'])
In [47]:
phtm <- pheatmap(log1p(coTFs), silent = T)
htmd <- log1p(coTFs)[phtm$tree_row$labels[phtm$tree_row$order], 
                     phtm$tree_row$labels[phtm$tree_row$order]]
diag(htmd) <- htmd[upper.tri(htmd)] <- -0.001
annotation_row2 <- data.frame(subgroup=factors[rownames(htmd),'V3']) %>% 
  mutate(subgroup=ifelse(subgroup %in% tx, subgroup, 'Others'))
rownames(annotation_row2) <- rownames(htmd)
labels_row <- rep("", nrow(htmd))
# fcols <- colorRampPalette(brewer.pal(n=8, name="Set1"))(length(unique(annotation_row$fam)))
# names(fcols) <- unique(annotation_row$fam)
# fcols['Others'] <- '#999999'
# ann_colors <- list(fam=fcols)
ann_colors2 <- ann_colors
ann_colors2$subgroup <- c(ann_colors2$subgroup, "Unknown"="#DEDEDE")
## ComplexHeatmap::
pheatmap(htmd, color=c(NA,brewer.ylorrd(255)), cluster_rows = F, cluster_cols = F, 
         cellwidth=3, cellheight=3, fontsize=3, border_color = NA, 
         annotation_colors=ann_colors2, 
         annotation_row=annotation_row2, labels_row = labels_row)

Fig.3b

Fig.3c

In [92]:
options(repr.plot.width = 16, repr.plot.height = 16, repr.plot.res = 100)
library(igraph)
set.seed(123456)
V(network)$size <- V(network)$degree <- log1p(degree(network)+1)*2
V(network)$size[V(network)$size > 5] <- 5
layout.fr <- layout.fruchterman.reingold(network)
plot(network, layout=layout.fr, vertex.label.font=3, 
     vertex.frame.color=NA, edge.curved=0)

Fig.3d

In [58]:
sankeyData <- list() 
fams <- unique(c(paste0("x|",regfam[miRTFSub$from]), miRfam, paste0("z|",regfam[miRTarSub$V2])))
sankeyData$nodes <- data.frame(name=fams)
ids <- as.integer(rownames(sankeyData$nodes)) - 1
names(ids) <- sankeyData$nodes$name
mir2tf <- data.frame(source=miRfam[miRTarSub$V1], target=regfam[miRTarSub$V2])
mir2tf <- aggregate(mir2tf$source, mir2tf, length) 
tf2mir <- data.frame(source=regfam[miRTFSub$from], target=miRfam[miRTFSub$to])
tf2mir <- aggregate(tf2mir$source, tf2mir, length) 

connects <- rbind(data.frame(source=ids[paste0("x|",tf2mir$source)], target=ids[tf2mir$target], value=tf2mir$x), data.frame(source=ids[mir2tf$source], target=ids[paste0("z|",mir2tf$target)], value=mir2tf$x))
sankeyData$links <- connects

rownames(miRTFSub) <- paste(miRTFSub[,1], miRTFSub[,2])
rownames(miRTarSub) <- paste(miRTarSub[,1], miRTarSub[,2])
rownames(TFTarSub) <- paste(TFTarSub[,1], TFTarSub[,2])
xData <- expand.grid(master=masters, miRNA=miRs, target=TFs, stringsAsFactors=FALSE) 
xData <- xData[which(paste(xData$master, xData$miRNA) %in% rownames(miRTFSub) & paste(xData$miRNA, xData$target) %in% rownames(miRTarSub) & paste(xData$master, xData$target) %in% rownames(TFTarSub)), ]
## check 
# apply(xData,1,function(x){
# x[1] %in% miRTFSub$from & x[2] %in% miRTFSub$to & x[2] %in% miRTarSub$V1 & x[3] %in% miRTarSub$V2 & x[1] %in% TFTarSub$from & x[3] %in% TFTarSub$to
# })

alluvialData <- data.frame(master=regfam[xData$master], miRNA=miRfam[xData$miRNA], target=paste0("z|",regfam[xData$target]))
alluvialData <- aggregate(alluvialData$master, alluvialData, length)
colnames(alluvialData)[4] <- "freq"

## pltd <- alluvialData[alluvialData$freq > 1,]
pltd <- alluvialData

length(unique(xData$miRNA))
length(unique(xData$master))
length(</