# specify the path to your files
path <- "../results/MEGAN"
# get all files in the directory
files <- list.files(path, pattern = "*.tab", full.names = TRUE)
Fraction of reads
Arthropoda
df <- files %>%
map_df(~{
read_tsv(.x, col_names = FALSE) %>%
set_names(c("phylum", "count")) %>%
mutate(sample = str_remove(basename(.x), ".MEGAN.taxonName_to_count_summarized-phylum.tab")) # remove the extension from the filename
})
datatable(
df %>%
pivot_wider(names_from = phylum, values_from = count, values_fill = 0) %>%
mutate(total = rowSums(select(., 2:35), na.rm = TRUE)) %>%
mutate(fraction = Arthropoda/total) %>%
select("sample", "fraction", "Arthropoda", "Chordata", "Microsporidia", "Proteobacteria", "Bacteroidetes")
)
df %>%
pivot_wider(names_from = phylum, values_from = count, values_fill = 0) %>%
mutate(total = rowSums(select(., 2:35), na.rm = TRUE)) %>%
mutate(fraction = Arthropoda/total) %>%
select("sample", "fraction", "Arthropoda", "Chordata", "Microsporidia", "Proteobacteria", "Bacteroidetes") %>%
ggplot(aes(x=fraction)) +
geom_histogram(bins = 50, fill="blue", color="black") +
labs(x="Percent", y="Frequency", title="Histogram of Arthr0poda Fraction of reads") +
theme_minimal()
df %>%
filter(phylum == c("Arthropoda", "Chordata", "Microsporidia", "Proteobacteria", "Bacteroidetes")) %>%
group_by(phylum, sample) %>%
summarise(total = mean(count, na.rm = TRUE)) %>%
ggplot(aes(x = phylum, y = total)) +
geom_col(position = "dodge") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(x = "Category", y = "Total Count")
df_wider <- df %>%
pivot_wider(names_from = phylum, values_from = count, values_fill = 0) %>%
select("sample", "Arthropoda", "Chordata", "Microsporidia", "Proteobacteria", "Bacteroidetes")
LS0tCnRpdGxlOiAiTm9uLXRhcmdldCB0YXhhIgphdXRob3I6IFN0ZXZlbiBSb2JlcnRzCmRhdGU6ICJgciBmb3JtYXQoU3lzLnRpbWUoKSwgJyVkICVCLCAlWScpYCIgCm91dHB1dDogCiAgIyBnaXRodWJfZG9jdW1lbnQ6CiAgIyAgIHRvYzogdHJ1ZQogICMgICB0b2NfZGVwdGg6IDMKICAjICAgbnVtYmVyX3NlY3Rpb25zOiB0cnVlCiAgIyAgIGh0bWxfcHJldmlldzogdHJ1ZQogIGh0bWxfZG9jdW1lbnQ6CiAgICB0aGVtZTogcmVhZGFibGUKICAgIGhpZ2hsaWdodDogemVuYnVybgogICAgdG9jOiB0cnVlCiAgICB0b2NfZmxvYXQ6IHRydWUKICAgIG51bWJlcl9zZWN0aW9uczogdHJ1ZQogICAgY29kZV9mb2xkaW5nOiBzaG93CiAgICBjb2RlX2Rvd25sb2FkOiB0cnVlCi0tLQoKYGBge3Igc2V0dXAsIGluY2x1ZGU9RkFMU0V9CmxpYnJhcnkoa25pdHIpCmxpYnJhcnkodGlkeXZlcnNlKQpsaWJyYXJ5KGthYmxlRXh0cmEpCmxpYnJhcnkoREVTZXEyKQpsaWJyYXJ5KHBoZWF0bWFwKQpsaWJyYXJ5KFJDb2xvckJyZXdlcikKbGlicmFyeShkYXRhLnRhYmxlKQpsaWJyYXJ5KERUKQpsaWJyYXJ5KGZvcm1hdHRhYmxlKQpsaWJyYXJ5KEJpb3N0cmluZ3MpCmxpYnJhcnkoc3BhYSkKa25pdHI6Om9wdHNfY2h1bmskc2V0KAogIGVjaG8gPSBUUlVFLCAgICAgICAgICMgRGlzcGxheSBjb2RlIGNodW5rcwogIGV2YWwgPSBGQUxTRSwgICAgICAgICAjIEV2YWx1YXRlIGNvZGUgY2h1bmtzCiAgd2FybmluZyA9IEZBTFNFLCAgICAgIyBIaWRlIHdhcm5pbmdzCiAgbWVzc2FnZSA9IEZBTFNFLCAgICAgIyBIaWRlIG1lc3NhZ2VzCiAgZmlnLndpZHRoID0gNiwgICAgICAgIyBTZXQgcGxvdCB3aWR0aCBpbiBpbmNoZXMKICBmaWcuaGVpZ2h0ID0gNCwgICAgICAjIFNldCBwbG90IGhlaWdodCBpbiBpbmNoZXMKICBmaWcuYWxpZ24gPSAiY2VudGVyIiAjIEFsaWduIHBsb3RzIHRvIHRoZSBjZW50ZXIKKQpgYGAKCgpgYGB7ciwgZXZhbD1UUlVFfQoKIyBzcGVjaWZ5IHRoZSBwYXRoIHRvIHlvdXIgZmlsZXMKcGF0aCA8LSAiLi4vcmVzdWx0cy9NRUdBTiIKCiMgZ2V0IGFsbCBmaWxlcyBpbiB0aGUgZGlyZWN0b3J5CmZpbGVzIDwtIGxpc3QuZmlsZXMocGF0aCwgcGF0dGVybiA9ICIqLnRhYiIsIGZ1bGwubmFtZXMgPSBUUlVFKSAKCgpgYGAKCgojIEZyYWN0aW9uIG9mIHJlYWRzIEFydGhyb3BvZGEKCgpgYGB7ciwgZXZhbD1UUlVFfQpkZiA8LSBmaWxlcyAlPiUKICBtYXBfZGYofnsKICAgIHJlYWRfdHN2KC54LCBjb2xfbmFtZXMgPSBGQUxTRSkgJT4lCiAgICBzZXRfbmFtZXMoYygicGh5bHVtIiwgImNvdW50IikpICU+JQogICAgbXV0YXRlKHNhbXBsZSA9IHN0cl9yZW1vdmUoYmFzZW5hbWUoLngpLCAiLk1FR0FOLnRheG9uTmFtZV90b19jb3VudF9zdW1tYXJpemVkLXBoeWx1bS50YWIiKSkgICMgcmVtb3ZlIHRoZSBleHRlbnNpb24gZnJvbSB0aGUgZmlsZW5hbWUKICB9KQoKCmBgYApgYGB7ciwgZXZhbD1UUlVFfQpkYXRhdGFibGUoZGYpCgpgYGAKCmBgYHtyLCBldmFsPVRSVUV9CmRhdGF0YWJsZSgKICBkZiAlPiUKIHBpdm90X3dpZGVyKG5hbWVzX2Zyb20gPSBwaHlsdW0sIHZhbHVlc19mcm9tID0gY291bnQsIHZhbHVlc19maWxsID0gMCkgJT4lCiBtdXRhdGUodG90YWwgPSByb3dTdW1zKHNlbGVjdCguLCAyOjM1KSwgbmEucm0gPSBUUlVFKSkgJT4lCiBtdXRhdGUoZnJhY3Rpb24gPSBBcnRocm9wb2RhL3RvdGFsKSAlPiUKIHNlbGVjdCgic2FtcGxlIiwgImZyYWN0aW9uIiwgIkFydGhyb3BvZGEiLCAiQ2hvcmRhdGEiLCAiTWljcm9zcG9yaWRpYSIsICJQcm90ZW9iYWN0ZXJpYSIsICJCYWN0ZXJvaWRldGVzIikKKQpgYGAKYGBge3IsIGV2YWw9VFJVRX0KZGYgJT4lCiBwaXZvdF93aWRlcihuYW1lc19mcm9tID0gcGh5bHVtLCB2YWx1ZXNfZnJvbSA9IGNvdW50LCB2YWx1ZXNfZmlsbCA9IDApICU+JQogbXV0YXRlKHRvdGFsID0gcm93U3VtcyhzZWxlY3QoLiwgMjozNSksIG5hLnJtID0gVFJVRSkpICU+JQogbXV0YXRlKGZyYWN0aW9uID0gQXJ0aHJvcG9kYS90b3RhbCkgJT4lCiBzZWxlY3QoInNhbXBsZSIsICJmcmFjdGlvbiIsICJBcnRocm9wb2RhIiwgIkNob3JkYXRhIiwgIk1pY3Jvc3BvcmlkaWEiLCAiUHJvdGVvYmFjdGVyaWEiLCAiQmFjdGVyb2lkZXRlcyIpICU+JQogIGdncGxvdChhZXMoeD1mcmFjdGlvbikpICsKICBnZW9tX2hpc3RvZ3JhbShiaW5zID0gNTAsIGZpbGw9ImJsdWUiLCBjb2xvcj0iYmxhY2siKSArCiAgbGFicyh4PSJQZXJjZW50IiwgeT0iRnJlcXVlbmN5IiwgdGl0bGU9Ikhpc3RvZ3JhbSBvZiBBcnRocjBwb2RhIEZyYWN0aW9uIG9mIHJlYWRzIikgKwogIHRoZW1lX21pbmltYWwoKQpgYGAKCgoKCmBgYHtyLCBldmFsPVRSVUV9CmRmICU+JQogIGZpbHRlcihwaHlsdW0gPT0gYygiQXJ0aHJvcG9kYSIsICJDaG9yZGF0YSIsICJNaWNyb3Nwb3JpZGlhIiwgIlByb3Rlb2JhY3RlcmlhIiwgIkJhY3Rlcm9pZGV0ZXMiKSkgJT4lCiAgZ3JvdXBfYnkocGh5bHVtLCBzYW1wbGUpICU+JQogIHN1bW1hcmlzZSh0b3RhbCA9IG1lYW4oY291bnQsIG5hLnJtID0gVFJVRSkpICU+JQogIGdncGxvdChhZXMoeCA9IHBoeWx1bSwgeSA9IHRvdGFsKSkgKwogIGdlb21fY29sKHBvc2l0aW9uID0gImRvZGdlIikgKwogIHRoZW1lKGF4aXMudGV4dC54ID0gZWxlbWVudF90ZXh0KGFuZ2xlID0gOTAsIGhqdXN0ID0gMSkpICsKICBsYWJzKHggPSAiQ2F0ZWdvcnkiLCB5ID0gIlRvdGFsIENvdW50IikKCmBgYAoKYGBge3IsIGV2YWw9VFJVRX0KZGZfd2lkZXIgPC0gZGYgJT4lCiBwaXZvdF93aWRlcihuYW1lc19mcm9tID0gcGh5bHVtLCB2YWx1ZXNfZnJvbSA9IGNvdW50LCB2YWx1ZXNfZmlsbCA9IDApICU+JQogc2VsZWN0KCJzYW1wbGUiLCAiQXJ0aHJvcG9kYSIsICJDaG9yZGF0YSIsICJNaWNyb3Nwb3JpZGlhIiwgIlByb3Rlb2JhY3RlcmlhIiwgIkJhY3Rlcm9pZGV0ZXMiKQoKYGBgCgpgYGB7ciwgZXZhbD1UUlVFfQpkYXRhdGFibGUoZGZfd2lkZXIpCmBgYAoKCgoK