Evaluate whether GC content calculated on the RNA sequences using the Ensembl Perl API matches the one calculated using R and the genomic DNA sequence. ```{r} library(ensembldb) edb <- EnsDb("/Users/jo/Projects/EnsDbs/98/EnsDb.Hsapiens.v98.sqlite") chrs <- c(1:22, "X", "Y") txs <- transcripts(edb, filter = ~ seq_name %in% chrs) any(is.na(mcols(txs)$gc_content)) ``` Get the sequences using R functionality. ```{r} library(GenomicFeatures) dna <- getGenomeTwoBitFile(edb) tx_exons <- exonsBy(edb, "tx", filter = ~ seq_name %in% chrs) tx_seqs <- extractTranscriptSeqs(dna, tx_exons) library(Biostrings) gc_prop <- letterFrequency(tx_seqs, letters = "GC", as.prob = TRUE)[, 1] names(gc_prop) <- names(tx_seqs) ``` Compare GC contents... ```{r} gc_prop_perl <- mcols(txs)$gc_content names(gc_prop_perl) <- names(txs) stopifnot(all(names(gc_prop) %in% names(gc_prop_perl))) gc_prop_perl <- gc_prop_perl[names(gc_prop)] head(gc_prop_perl) head(gc_prop) library(testthat) expect_equal(gc_prop_perl, 100 * gc_prop) ```