DMLs between the Olympia oyster populations, Hood Canal and South Sound, were identified using MethylKit. File is: analyses/dml25.bed
MACAU was used to identify loci at which methylation is associated with a phenotype, in our case shell length, while controlling for relatedness.
pwd
mkdir ../analyses/BEDtools/
DML = "../analyses/dml25.bed"
!head {DML}
!wc -l {DML}
macau75 = "../analyses/macau/macau.sign.perc.meth.10x75.bed"
!head {macau75}
!wc -l {macau75}
Olurida_v081-20190709.gene.gff - genes
Olurida_v081-20190709.CDS.gff - Coding regions of genes
Olurida_v081-20190709.exon.gff - Exons
Olurida_v081-20190709.mRNA.gff - mRNA
Olurida_v081_TE-Cg.gff - Transposable elements
20190709-Olurida_v081.stringtie.gtf - alternative splice variants
Note: may also have an intron track, Steven was working on that. Could also try to get new 3' and 5' UTR tracks.
gene = "../genome-features/Olurida_v081-20190709.gene.gff"
CDS = "../genome-features/Olurida_v081-20190709.CDS.gff"
exon = "../genome-features/Olurida_v081-20190709.exon.gff"
mRNA = "../genome-features/Olurida_v081-20190709.mRNA.gff"
TE = "../genome-features/Olurida_v081_TE-Cg.gff"
ASV = "../genome-features/20190709-Olurida_v081.stringtie.gtf"
AllLoci = "../analyses/macau/macau-all-loci.bed"
AllLoci10x75 = "../analyses/macau/macau-all-loci.10x75.bed"
! bedtools intersect \
Bedtool options to use:
-u
- Write the original A entry once if any overlaps found in B, i.e. just report the fact >=1 hit was found
-a
- File A
-b
- File B
! echo "Total methylated loci:"
! cat {AllLoci} | wc -l
! echo "Loci differentially methylated between SS and HC populations:"
! cat {DML} | wc -l
!echo "Loci that overlap with genes:"
! bedtools intersect \
-u \
-a {DML} \
-b {gene} | wc -l
!echo "Loci that overlap with exons:"
! bedtools intersect \
-u \
-a {DML} \
-b {exon} | wc -l
!echo "Loci that overlap with coding sequences:"
! bedtools intersect \
-u \
-a {DML} \
-b {CDS} | wc -l
!echo "Loci that overlap with mRNA:"
! bedtools intersect \
-u \
-a {DML} \
-b {mRNA} | wc -l
!echo "Loci that overlap with transposable elements:"
! bedtools intersect \
-u \
-a {DML} \
-b {TE} | wc -l
!echo "Loci that overlap with alternative splice variants:"
! bedtools intersect \
-u \
-a {DML} \
-b {ASV} | wc -l
!echo "Loci that do not overlap with known features:"
! bedtools intersect \
-v \
-a {DML} \
-b {gene} {exon} {CDS} {mRNA} {TE} {ASV} | wc -l
! bedtools intersect -wb -a {DML} -b {gene} > ../analyses/BEDtools/DML-gene.txt
! bedtools intersect -wb -a {DML} -b {exon} > ../analyses/BEDtools/DML-exon.txt
! bedtools intersect -wb -a {DML} -b {CDS} > ../analyses/BEDtools/DML-CDS.txt
! bedtools intersect -wb -a {DML} -b {mRNA} > ../analyses/BEDtools/DML-mRNA.txt
! bedtools intersect -wb -a {DML} -b {TE} > ../analyses/BEDtools/DML-TE.txt
! bedtools intersect -wb -a {DML} -b {ASV} > ../analyses/BEDtools/DML-ASV.txt
! bedtools intersect -v -a {DML} -b {gene} {exon} {CDS} {mRNA} {TE} {ASV} > ../analyses/BEDtools/DML-intragenic.txt
! echo "genes"
! bedtools intersect -u -a {AllLoci} -b {gene} | wc -l
! echo "exon"
! bedtools intersect -u -a {AllLoci} -b {exon} | wc -l
! echo "CDS"
! bedtools intersect -u -a {AllLoci} -b {CDS} | wc -l
! echo "mRNA"
! bedtools intersect -u -a {AllLoci} -b {mRNA} | wc -l
! echo "TE"
! bedtools intersect -u -a {AllLoci} -b {TE} | wc -l
! echo "ASV"
! bedtools intersect -u -a {AllLoci} -b {ASV} | wc -l
! echo "intragenic"
! bedtools intersect -v -a {AllLoci} -b {gene} {exon} {CDS} {mRNA} {TE} {ASV} | wc -l
! bedtools intersect -wb -a {AllLoci} -b {gene} > ../analyses/BEDtools/AllLoci-gene.txt
! bedtools intersect -wb -a {AllLoci} -b {exon} > ../analyses/BEDtools/AllLoci-exon.txt
! bedtools intersect -wb -a {AllLoci} -b {CDS} > ../analyses/BEDtools/AllLoci-CDS.txt
! bedtools intersect -wb -a {AllLoci} -b {mRNA} > ../analyses/BEDtools/AllLoci-mRNA.txt
! bedtools intersect -wb -a {AllLoci} -b {TE} > ../analyses/BEDtools/AllLoci-TE.txt
! bedtools intersect -wb -a {AllLoci} -b {ASV} > ../analyses/BEDtools/AllLoci-ASV.txt
! bedtools intersect -v -a {AllLoci} -b {gene} {exon} {CDS} {mRNA} {TE} {ASV} > ../analyses/BEDtools/AllLoci-intragenic.txt
! echo "Total methylated loci (10x across 75% of samples):"
! cat ../analyses/macau/macau-all-loci.10x75.bed | wc -l
! echo "Loci associated with shell length (MACAU):"
! cat {macau75} | wc -l
!echo "Loci that overlap with genes:"
! bedtools intersect \
-u \
-a {macau75} \
-b {gene} | wc -l
!echo "Loci that overlap with exons:"
! bedtools intersect \
-u \
-a {macau75} \
-b {exon} | wc -l
!echo "Loci that overlap with coding sequences:"
! bedtools intersect \
-u \
-a {macau75} \
-b {CDS} | wc -l
!echo "Loci that overlap with mRNA:"
! bedtools intersect \
-u \
-a {macau75} \
-b {mRNA} | wc -l
!echo "Loci that overlap with transposable elements:"
! bedtools intersect \
-u \
-a {macau75} \
-b {TE} | wc -l
!echo "Loci that overlap with alternative splice variants:"
! bedtools intersect \
-u \
-a {macau75} \
-b {ASV} | wc -l
!echo "Loci that do not overlap with known features:"
! bedtools intersect \
-v \
-a {macau75} \
-b {gene} {exon} {CDS} {mRNA} {TE} {ASV} | wc -l
! bedtools intersect -wb -a {macau75} -b {gene} > ../analyses/BEDtools/macau75-gene.txt
! bedtools intersect -wb -a {macau75} -b {exon} > ../analyses/BEDtools/macau75-exon.txt
! bedtools intersect -wb -a {macau75} -b {CDS} > ../analyses/BEDtools/macau75-CDS.txt
! bedtools intersect -wb -a {macau75} -b {mRNA} > ../analyses/BEDtools/macau75-mRNA.txt
! bedtools intersect -wb -a {macau75} -b {TE} > ../analyses/BEDtools/macau75-TE.txt
! bedtools intersect -wb -a {macau75} -b {ASV} > ../analyses/BEDtools/macau75-ASV.txt
! bedtools intersect -v -a {macau75} -b {gene} {exon} {CDS} {mRNA} {TE} {ASV} > ../analyses/BEDtools/macau75-intragenic.txt
The actual merging will occur in a later RStudio notebook
! curl https://raw.githubusercontent.com/sr320/paper-oly-mbdbs-gen/master/analyses/Olgene_blastx_uniprot.05.tab \
> ../data/Olgene_blastx_uniprot.05.tab
#convert pipes to tab
!tr '|' '\t' < ../data/Olgene_blastx_uniprot.05.tab \
> ../data/Olgene_blastx_uniprot.05-20191122.tab
#Reduce the number of columns using awk. Sort, and save as a new file.
!awk -v OFS='\t' '{print $1, $3, $13}' \
< ../data/Olgene_blastx_uniprot.05-20191122.tab | sort \
> ../data/Olgene_blastx_uniprot.05-20191122-sort.tab
! head ../data/Olgene_blastx_uniprot.05-20191122-sort.tab
#Uniprot codes have ".1" appended, so those need to be removed.
#Isolate the contig column name with cut
#Flip order of characters with rev
#Delete last three characters with cut -c
#Flip order of characters with rev
#Add information as a new column to annotated table with paste
!cut -f1 temporary/olurida-blast-sort.tab \
| rev \
| cut -c 3- \
| rev \
> temporary/olurida-blast-sort2.tab
!curl http://owl.fish.washington.edu/halfshell/bu-alanine-wd/17-07-20/uniprot-SP-GO.sorted \
> ../data/uniprot-SP-GO-sorted.tab
! head ../data/uniprot-SP-GO-sorted.tab
Join the first column in the first file with the first column in the second file
The files are tab delimited, and the output should also be tab delimited (-t $'\t')
! join -1 2 -2 1 -t $'\t' \
../data/Olgene_blastx_uniprot.05-20191122-sort.tab \
../data/uniprot-SP-GO-sorted.tab \
> ../data/Oly_blastx_uniprot.tab
! head ../data/Oly_blastx_uniprot.tab