#!/usr/bin/env Rscript

suppressPackageStartupMessages({
  library(dplyr)
  library(readr)
  library(stringr)
  library(tibble)
})

# Read arguments
args <- commandArgs(trailingOnly = TRUE)
if (length(args) != 3) {
  stop("Usage: Rscript format_counts.R <counts_file> <metadata_file> <output_file>")
}

counts_file <- args[1]
metadata_file <- args[2]
output_file <- args[3]

# Read files
counts_df <- read.delim(counts_file, check.names = FALSE)  # for .txt
metadata_df <- read.csv(metadata_file, check.names = FALSE)  # for .csv

# a) Only keep rows with "Y" in MIRNA column
counts_df <- counts_df %>% filter(MIRNA == "Y")

# b) Remove columns 1 and 3 (Coords and MIRNA)
counts_df <- counts_df %>% select(-1, -3)

# c) Make column 2 the rownames
counts_df <- counts_df %>% column_to_rownames(var = colnames(counts_df)[1])

# d) Only keep the first 3 digits of all remaining columns

colnames(counts_df) <- sapply(colnames(counts_df), function(x){
  substr(x, 1,3)
})

names(counts_df)

# e) Rename columns by matching to metadata (AzentaSampleName → ColonyID_Timepoint)
#    Create mapping
colnames(metadata_df)

name_map <- metadata_df %>%
  select(AzentaSampleName, ColonyID, Timepoint) %>%
  mutate(new_name = paste(ColonyID, Timepoint, sep = "_")) %>%
  select(AzentaSampleName, new_name) %>%
  deframe()

# Apply mapping where matches exist
new_colnames <- colnames(counts_df)
new_colnames <- ifelse(new_colnames %in% names(name_map),
                       name_map[new_colnames],
                       new_colnames)
colnames(counts_df) <- new_colnames

# Save output
write_tsv(counts_df %>% rownames_to_column(var = "Name"), output_file)