#!/usr/bin/env Rscript suppressPackageStartupMessages({ library(dplyr) library(readr) library(stringr) library(tibble) }) # Read arguments args <- commandArgs(trailingOnly = TRUE) if (length(args) != 3) { stop("Usage: Rscript format_counts.R ") } counts_file <- args[1] metadata_file <- args[2] output_file <- args[3] # Read files counts_df <- read.delim(counts_file, check.names = FALSE) # for .txt metadata_df <- read.csv(metadata_file, check.names = FALSE) # for .csv # a) Only keep rows with "Y" in MIRNA column counts_df <- counts_df %>% filter(MIRNA == "Y") # b) Remove columns 1 and 3 (Coords and MIRNA) counts_df <- counts_df %>% select(-1, -3) # c) Make column 2 the rownames counts_df <- counts_df %>% column_to_rownames(var = colnames(counts_df)[1]) # d) Only keep the first 3 digits of all remaining columns colnames(counts_df) <- sapply(colnames(counts_df), function(x){ substr(x, 1,3) }) names(counts_df) # e) Rename columns by matching to metadata (AzentaSampleName → ColonyID_Timepoint) # Create mapping colnames(metadata_df) name_map <- metadata_df %>% select(AzentaSampleName, ColonyID, Timepoint) %>% mutate(new_name = paste(ColonyID, Timepoint, sep = "_")) %>% select(AzentaSampleName, new_name) %>% deframe() # Apply mapping where matches exist new_colnames <- colnames(counts_df) new_colnames <- ifelse(new_colnames %in% names(name_map), name_map[new_colnames], new_colnames) colnames(counts_df) <- new_colnames # Save output write_tsv(counts_df %>% rownames_to_column(var = "Name"), output_file)