library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.7     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

Here I want to explore the best way to visualize the transcript presence data…. eventually this will be integrated with DNA methylation data.

A comparison of males and females irrespective of treatment

cd ../data

curl -O https://raw.githubusercontent.com/epigeneticstoocean/2018_L18-adult-methylation/main/analyses/transcript_counts_per_gene_per_sample_males.csv

curl -O https://raw.githubusercontent.com/epigeneticstoocean/2018_L18-adult-methylation/main/analyses/transcript_counts_per_gene_per_sample_females.csv
##   % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
##                                  Dload  Upload   Total   Spent    Left  Speed
## 
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
100 2005k  100 2005k    0     0  32.6M      0 --:--:-- --:--:-- --:--:-- 32.6M
##   % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
##                                  Dload  Upload   Total   Spent    Left  Speed
## 
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
100 2582k  100 2582k    0     0  35.0M      0 --:--:-- --:--:-- --:--:-- 35.5M
male.all <- read.csv("../data/transcript_counts_per_gene_per_sample_males.csv")
female.all <- read.csv("../data/transcript_counts_per_gene_per_sample_females.csv")
all <- inner_join(male.all, female.all, by = "gene_name")
ggplot() +
  geom_histogram(data = male.all, aes(x = male_max_transcript_counts), fill = "blue", alpha = 0.2) +
  geom_histogram(data = female.all, aes(x = female_max_transcript_counts), fill = "green", alpha = 0.2) +
  scale_y_log10()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous y-axis
## Warning: Removed 4 rows containing missing values (geom_bar).
## Warning: Removed 2 rows containing missing values (geom_bar).

all.wstats <- all %>% mutate(diffmean = abs(male_mean_transcript_counts - female_mean_transcript_counts)) %>% 
  mutate(m.covar = male_std_dev_transcript_counts / male_mean_transcript_counts) %>%
  mutate(f.covar = female_std_dev_transcript_counts / female_mean_transcript_counts)
library("RColorBrewer")
myPalette <- colorRampPalette(rev(brewer.pal(11, "Spectral")))
sc <- scale_colour_gradientn(colours = myPalette(100), limits=c(0, 16))

ggplot(all.wstats, aes(x = male_mean_transcript_counts, y = female_mean_transcript_counts, color = diffmean)) +
  geom_point(alpha = 0.6, size = 3) +
  geom_smooth(method = "lm") +
  geom_abline(size=1.0,colour="red") +
  sc 
## `geom_smooth()` using formula 'y ~ x'

  #ylim(0,20) +
  #xlim(0,20)
myPalette <- colorRampPalette(rev(brewer.pal(11, "Spectral")))
sc <- scale_colour_gradientn(colours = myPalette(100), limits=c(0, 16))

ggplot(all.wstats, aes(x = male_median_transcript_counts, y = female_median_transcript_counts, color = diffmean)) +
  geom_point(alpha = 0.6, size = 3) +
  geom_smooth(method = "lm") +
  geom_abline(size=1.0,colour="red") +
  sc 
## `geom_smooth()` using formula 'y ~ x'

  #ylim(0,20) +
  #xlim(0,20)
myPalette <- colorRampPalette(rev(brewer.pal(11, "Spectral")))
sc <- scale_colour_gradientn(colours = myPalette(100), limits=c(0, 16))

ggplot(all.wstats, aes(x = m.covar, y = f.covar, color = diffmean)) +
  geom_point(alpha = 0.1, size = 2) +
  geom_smooth(method = "lm") +
  geom_abline(size=1.0,colour="red") +
  sc 
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 551 rows containing non-finite values (stat_smooth).
## Warning: Removed 551 rows containing missing values (geom_point).