library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.7 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
Here I want to explore the best way to visualize the transcript presence data…. eventually this will be integrated with DNA methylation data. Will start out looking out males only.
cd ../data
curl -O https://raw.githubusercontent.com/epigeneticstoocean/2018_L18-adult-methylation/main/analyses/transcript_counts_per_gene_per_sample_controls_males.csv
curl -O https://raw.githubusercontent.com/epigeneticstoocean/2018_L18-adult-methylation/main/analyses/transcript_counts_per_gene_per_sample_exposed_males.csv
## % Total % Received % Xferd Average Speed Time Time Time Current
## Dload Upload Total Spent Left Speed
##
0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0
100 1376k 100 1376k 0 0 24.4M 0 --:--:-- --:--:-- --:--:-- 24.4M
## % Total % Received % Xferd Average Speed Time Time Time Current
## Dload Upload Total Spent Left Speed
##
0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0
100 1774k 100 1774k 0 0 26.2M 0 --:--:-- --:--:-- --:--:-- 26.2M
m_con <- read.csv("../data/transcript_counts_per_gene_per_sample_controls_males.csv")
head(m_con)
## gene_name transcript_counts.S13M transcript_counts.S64M
## 1 ATP6 1 1
## 2 COX1 1 1
## 3 COX2 1 1
## 4 COX3 1 1
## 5 CYTB 1 1
## 6 LOC111099029 1 1
## transcript_counts.S6M transcript_counts.S7M
## 1 1 1
## 2 1 1
## 3 1 1
## 4 1 1
## 5 1 1
## 6 1 1
## control_males_sum_transcript_counts control_males_median_transcript_counts
## 1 4 1
## 2 4 1
## 3 4 1
## 4 4 1
## 5 4 1
## 6 4 1
## control_males_mean_transcript_counts control_males_max_transcript_counts
## 1 1 1
## 2 1 1
## 3 1 1
## 4 1 1
## 5 1 1
## 6 1 1
## control_males_min_transcript_counts control_males_std_dev_transcript_counts
## 1 1 0
## 2 1 0
## 3 1 0
## 4 1 0
## 5 1 0
## 6 1 0
m_exp <- read.csv("../data/transcript_counts_per_gene_per_sample_exposed_males.csv")
head(m_exp)
## gene_name transcript_counts.S12M transcript_counts.S23M
## 1 ATP6 1 1
## 2 COX1 1 1
## 3 COX2 1 1
## 4 COX3 1 1
## 5 CYTB 1 1
## 6 LOC111099029 1 1
## transcript_counts.S31M transcript_counts.S48M transcript_counts.S59M
## 1 1 1 1
## 2 1 1 1
## 3 1 1 1
## 4 1 1 1
## 5 1 1 1
## 6 1 1 1
## transcript_counts.S9M exposed_males_sum_transcript_counts
## 1 1 6
## 2 1 6
## 3 1 6
## 4 1 6
## 5 1 6
## 6 1 6
## exposed_males_median_transcript_counts exposed_males_mean_transcript_counts
## 1 1 1
## 2 1 1
## 3 1 1
## 4 1 1
## 5 1 1
## 6 1 1
## exposed_males_max_transcript_counts exposed_males_min_transcript_counts
## 1 1 1
## 2 1 1
## 3 1 1
## 4 1 1
## 5 1 1
## 6 1 1
## exposed_males_std_dev_transcript_counts
## 1 0
## 2 0
## 3 0
## 4 0
## 5 0
## 6 0
males <- inner_join(m_con, m_exp, by = "gene_name")
ggplot(m_con, aes(x = control_males_max_transcript_counts)) +
geom_histogram(bins = 80) +
xlim(0,50) +
scale_y_log10()
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 53 rows containing missing values (geom_bar).
ggplot(m_exp, aes(x = exposed_males_max_transcript_counts)) +
geom_histogram(bins = 80) +
xlim(0,50) +
scale_y_log10()
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 52 rows containing missing values (geom_bar).
#max <- males %>%
#pivot_longer(c(`control_males_max_transcript_counts`, `exposed_males_max_transcript_counts`), names_to = "max", values_to = "counts")
ggplot() +
geom_histogram(data = m_con, aes(x = control_males_max_transcript_counts), fill = "blue", alpha = 0.2) +
geom_histogram(data = m_exp, aes(x = exposed_males_max_transcript_counts), fill = "green", alpha = 0.2) +
scale_y_log10()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous y-axis
## Warning: Removed 3 rows containing missing values (geom_bar).
## Removed 3 rows containing missing values (geom_bar).
m2 <- males %>% mutate(diffmean = abs(control_males_mean_transcript_counts - exposed_males_mean_transcript_counts)) %>%
mutate(diffmed = abs(control_males_median_transcript_counts - exposed_males_median_transcript_counts)) %>%
mutate(covar_exp = exposed_males_std_dev_transcript_counts / exposed_males_mean_transcript_counts) %>%
mutate(covar_con = control_males_std_dev_transcript_counts / control_males_mean_transcript_counts)
library("RColorBrewer")
myPalette <- colorRampPalette(rev(brewer.pal(11, "Spectral")))
sc <- scale_colour_gradientn(colours = myPalette(100), limits=c(1, 4))
ggplot(m2, aes(x = control_males_mean_transcript_counts, y = exposed_males_mean_transcript_counts, color = diffmean)) +
geom_point(alpha = 0.6, size = 3) +
geom_smooth(method = "lm") +
geom_abline(size=1.0,colour="red") +
sc
## `geom_smooth()` using formula 'y ~ x'
# ylim(0,20) +
#xlim(0,20)
myPalette <- colorRampPalette(rev(brewer.pal(11, "Spectral")))
sc <- scale_colour_gradientn(colours = myPalette(100), limits=c(0, 3))
ggplot(m2, aes(x = control_males_median_transcript_counts, y = exposed_males_median_transcript_counts, color = diffmean)) +
#geom_point(alpha = 0.1, size = 1) +
geom_smooth(method = "lm") +
geom_abline(size=1.0,colour="red") +
geom_jitter(alpha = 0.3, size =0.5) +
sc
## `geom_smooth()` using formula 'y ~ x'
myPalette <- colorRampPalette(rev(brewer.pal(11, "Spectral")))
sc <- scale_colour_gradientn(colours = myPalette(100), limits=c(0, 4))
ggplot(m2, aes(x = covar_con, y = covar_exp, color = diffmean)) +
geom_point(alpha = 0.2, size = 2) +
geom_smooth(method = "lm") +
geom_abline(size=1.0,colour="red") +
sc
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 725 rows containing non-finite values (stat_smooth).
## Warning: Removed 725 rows containing missing values (geom_point).