---
title: "31-transcript-viz"
output: html_document
---

```{r}
library(tidyverse)
```


Here I want to explore the best way to visualize the transcript presence data.... eventually this will be integrated with DNA methylation data.
Will start out looking out males only. 


```{bash}
cd ../data

curl -O https://raw.githubusercontent.com/epigeneticstoocean/2018_L18-adult-methylation/main/analyses/transcript_counts_per_gene_per_sample_controls_males.csv

curl -O https://raw.githubusercontent.com/epigeneticstoocean/2018_L18-adult-methylation/main/analyses/transcript_counts_per_gene_per_sample_exposed_males.csv
```

```{r}
m_con <- read.csv("../data/transcript_counts_per_gene_per_sample_controls_males.csv")
```

```{r}
head(m_con)
```


```{r}
m_exp <- read.csv("../data/transcript_counts_per_gene_per_sample_exposed_males.csv")

```

```{r}
head(m_exp)
```


```{r}
males <- inner_join(m_con, m_exp, by = "gene_name")

```


```{r}
ggplot(m_con, aes(x = control_males_max_transcript_counts)) +
  geom_histogram(bins = 80) +
  xlim(0,50) +
  scale_y_log10() 

ggplot(m_exp, aes(x = exposed_males_max_transcript_counts)) +
  geom_histogram(bins = 80) +
  xlim(0,50) +
  scale_y_log10() 
    
```
```{r}
#max <- males %>% 
  #pivot_longer(c(`control_males_max_transcript_counts`, `exposed_males_max_transcript_counts`), names_to = "max", values_to = "counts")
```


```{r}
ggplot() +
  geom_histogram(data = m_con, aes(x = control_males_max_transcript_counts), fill = "blue", alpha = 0.2) +
  geom_histogram(data = m_exp, aes(x = exposed_males_max_transcript_counts), fill = "green", alpha = 0.2) +
  scale_y_log10()
```
```{r}
m2 <- males %>% mutate(diffmean = abs(control_males_mean_transcript_counts - exposed_males_mean_transcript_counts)) %>% 
  mutate(diffmed = abs(control_males_median_transcript_counts - exposed_males_median_transcript_counts)) %>%
  mutate(covar_exp = exposed_males_std_dev_transcript_counts / exposed_males_mean_transcript_counts) %>%
  mutate(covar_con = control_males_std_dev_transcript_counts / control_males_mean_transcript_counts)
```


```{r}
library("RColorBrewer")
```


```{r}

myPalette <- colorRampPalette(rev(brewer.pal(11, "Spectral")))
sc <- scale_colour_gradientn(colours = myPalette(100), limits=c(1, 4))

ggplot(m2, aes(x = control_males_mean_transcript_counts, y = exposed_males_mean_transcript_counts, color = diffmean)) +
  geom_point(alpha = 0.6, size = 3) +
  geom_smooth(method = "lm") +
  geom_abline(size=1.0,colour="red") +
  sc 
 # ylim(0,20) +
  #xlim(0,20)

```
```{r}

myPalette <- colorRampPalette(rev(brewer.pal(11, "Spectral")))
sc <- scale_colour_gradientn(colours = myPalette(100), limits=c(0, 3))

ggplot(m2, aes(x = control_males_median_transcript_counts, y = exposed_males_median_transcript_counts, color = diffmean)) +
  #geom_point(alpha = 0.1, size = 1) +
  geom_smooth(method = "lm") +
  geom_abline(size=1.0,colour="red") +
  geom_jitter(alpha = 0.3, size =0.5) +
  sc 
```


```{r}

myPalette <- colorRampPalette(rev(brewer.pal(11, "Spectral")))
sc <- scale_colour_gradientn(colours = myPalette(100), limits=c(0, 4))

ggplot(m2, aes(x = covar_con, y = covar_exp, color = diffmean)) +
  geom_point(alpha = 0.2, size = 2) +
  geom_smooth(method = "lm") +
  geom_abline(size=1.0,colour="red") +
  sc

```


```{r}
model2 <- lm(covar_exp ~ covar_con, data = m2)

summary(model2)
```


```{r}
mean_meth <- read.csv("../RAnalysis/data/meanmethgene_10x_allsampssex.csv")
```


```{r}
length(unique(as.factor(mean_meth$gene)))
```


```{r}

myPalette <- colorRampPalette(rev(brewer.pal(11, "Spectral")))
sc <- scale_colour_gradientn(colours = myPalette(100), limits=c(1, 4))

ggplot(m2, aes(x = control_males_mean_transcript_counts, y = exposed_males_mean_transcript_counts, color = diffmean)) +
  geom_point(alpha = 0.6, size = 3) +
  geom_smooth(method = "lm") +
  geom_abline(size=1.0,colour="red") +
  sc 
 # ylim(0,20) +
  #xlim(0,20)

```