library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.7 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
Here I want to explore the best way to visualize the transcript presence data…. eventually this will be integrated with DNA methylation data.
A comparison of males and females irrespective of treatment
cd ../data
curl -O https://raw.githubusercontent.com/epigeneticstoocean/2018_L18-adult-methylation/main/analyses/transcript_counts_per_gene_per_sample_males.csv
curl -O https://raw.githubusercontent.com/epigeneticstoocean/2018_L18-adult-methylation/main/analyses/transcript_counts_per_gene_per_sample_females.csv
## % Total % Received % Xferd Average Speed Time Time Time Current
## Dload Upload Total Spent Left Speed
##
0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0
100 2005k 100 2005k 0 0 32.6M 0 --:--:-- --:--:-- --:--:-- 32.6M
## % Total % Received % Xferd Average Speed Time Time Time Current
## Dload Upload Total Spent Left Speed
##
0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0
100 2582k 100 2582k 0 0 35.0M 0 --:--:-- --:--:-- --:--:-- 35.5M
male.all <- read.csv("../data/transcript_counts_per_gene_per_sample_males.csv")
female.all <- read.csv("../data/transcript_counts_per_gene_per_sample_females.csv")
all <- inner_join(male.all, female.all, by = "gene_name")
ggplot() +
geom_histogram(data = male.all, aes(x = male_max_transcript_counts), fill = "blue", alpha = 0.2) +
geom_histogram(data = female.all, aes(x = female_max_transcript_counts), fill = "green", alpha = 0.2) +
scale_y_log10()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous y-axis
## Warning: Removed 4 rows containing missing values (geom_bar).
## Warning: Removed 2 rows containing missing values (geom_bar).
all.wstats <- all %>% mutate(diffmean = abs(male_mean_transcript_counts - female_mean_transcript_counts)) %>%
mutate(m.covar = male_std_dev_transcript_counts / male_mean_transcript_counts) %>%
mutate(f.covar = female_std_dev_transcript_counts / female_mean_transcript_counts)
library("RColorBrewer")
myPalette <- colorRampPalette(rev(brewer.pal(11, "Spectral")))
sc <- scale_colour_gradientn(colours = myPalette(100), limits=c(0, 16))
ggplot(all.wstats, aes(x = male_mean_transcript_counts, y = female_mean_transcript_counts, color = diffmean)) +
geom_point(alpha = 0.6, size = 3) +
geom_smooth(method = "lm") +
geom_abline(size=1.0,colour="red") +
sc
## `geom_smooth()` using formula 'y ~ x'
#ylim(0,20) +
#xlim(0,20)
myPalette <- colorRampPalette(rev(brewer.pal(11, "Spectral")))
sc <- scale_colour_gradientn(colours = myPalette(100), limits=c(0, 16))
ggplot(all.wstats, aes(x = male_median_transcript_counts, y = female_median_transcript_counts, color = diffmean)) +
geom_point(alpha = 0.6, size = 3) +
geom_smooth(method = "lm") +
geom_abline(size=1.0,colour="red") +
sc
## `geom_smooth()` using formula 'y ~ x'
#ylim(0,20) +
#xlim(0,20)
myPalette <- colorRampPalette(rev(brewer.pal(11, "Spectral")))
sc <- scale_colour_gradientn(colours = myPalette(100), limits=c(0, 16))
ggplot(all.wstats, aes(x = m.covar, y = f.covar, color = diffmean)) +
geom_point(alpha = 0.1, size = 2) +
geom_smooth(method = "lm") +
geom_abline(size=1.0,colour="red") +
sc
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 551 rows containing non-finite values (stat_smooth).
## Warning: Removed 551 rows containing missing values (geom_point).