genes_p50_path <- file.path(PROJHOME, "reference",
"genes.protein_coding.1kb_flank.p50_overlap.bed")
genes_p50 <-
readr::read_tsv(genes_p50_path, col_names = c("chrom", "start", "end", "name")) %>%
tidyr::separate(name, c("gene_id", "gene"), sep = "_")
is_defined <- col_data$coo_ns %in% c("ABC", "GCB")
texpr_p50 <- texpr[ , is_defined]
gois <- rownames(texpr)[gene_ids_filt %in% genes_p50$gene_id]
nfkbiz_mutants <- muts %>% dplyr::filter(gene == "NFKBIZ") %$% patient %>% unique()
data <-
texpr_p50 %>%
as.data.frame() %>%
tibble::rownames_to_column("gene") %>%
tidyr::gather(patient, expr, -gene) %>%
dplyr::left_join(col_data, by = "patient") %>%
dplyr::mutate(nfkbiz = ifelse(patient %in% nfkbiz_mutants, "mutated", "not_mutated"))
p50_abc_vs_gcb <-
data %>%
dplyr::select(-patient) %>%
dplyr::group_by(coo_ns, gene, nfkbiz) %>%
tidyr::nest() %>%
tidyr::spread(nfkbiz, data) %>%
dplyr::mutate(
wilcox = purrr::map2(mutated, not_mutated, ~wilcox.test(.x$expr, .y$expr)),
pval = purrr::map_dbl(wilcox, "p.value")) %>%
dplyr::group_by(coo_ns) %>%
dplyr::mutate(qval = p.adjust(pval, "BH")) %>%
dplyr::select(coo_ns, gene, pval, qval) %>%
dplyr::ungroup() %>%
dplyr::arrange(pval)
knitr::kable(head(p50_abc_vs_gcb, 30))
coo_ns | gene | pval | qval |
---|---|---|---|
ABC | LTB_ENSG00000227507 | 0.0000150 | 0.2320012 |
ABC | HIST1H2BH | 0.0000537 | 0.4154006 |
ABC | SLC22A5 | 0.0002830 | 0.9541278 |
ABC | ACADM | 0.0003267 | 0.9541278 |
GCB | CCL24_ENSG00000106178 | 0.0003817 | 1.0000000 |
ABC | POU4F1 | 0.0004777 | 0.9541278 |
ABC | NOL4 | 0.0005008 | 0.9541278 |
ABC | HSF5 | 0.0006447 | 0.9541278 |
GCB | DLGAP1 | 0.0006483 | 1.0000000 |
ABC | PLAC8 | 0.0006559 | 0.9541278 |
ABC | TSHZ2 | 0.0006925 | 0.9541278 |
GCB | ADA | 0.0007679 | 1.0000000 |
GCB | AC015660.1 | 0.0008767 | 1.0000000 |
ABC | GMPPA | 0.0011191 | 0.9541278 |
GCB | OXCT2 | 0.0011294 | 1.0000000 |
ABC | CCL28 | 0.0011486 | 0.9541278 |
ABC | KIAA1919 | 0.0012103 | 0.9541278 |
ABC | KPNA5 | 0.0012422 | 0.9541278 |
GCB | MAP7D2 | 0.0012511 | 1.0000000 |
ABC | PLK3 | 0.0013083 | 0.9541278 |
ABC | USP53 | 0.0014504 | 0.9541278 |
ABC | NKX3-1 | 0.0015994 | 0.9541278 |
ABC | SMOC1 | 0.0016281 | 0.9541278 |
ABC | DNAJA2 | 0.0016480 | 0.9541278 |
ABC | ALG1L | 0.0017505 | 0.9541278 |
ABC | SH3YL1 | 0.0017782 | 0.9541278 |
ABC | SLC36A4 | 0.0018702 | 0.9541278 |
ABC | GADD45A | 0.0019178 | 0.9541278 |
ABC | IBTK | 0.0019665 | 0.9541278 |
ABC | TNF_ENSG00000232810 | 0.0019665 | 0.9541278 |
p50_abc_vs_gcb %>%
dplyr::mutate(is_goi = gene %in% gois) %>%
ggplot(aes(x = pval, fill = is_goi)) +
geom_histogram(bins = 25) +
scale_fill_manual(values = c(`FALSE` = "grey", `TRUE` = "red")) +
facet_grid(~coo_ns)