--- title: "2_6_merging_pot_and_crab_data.Rmd" author: "Aidan Coyle" date: "8/4/2022" output: html_document --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) ``` ## Introduction In the previous script, we finished by merging our Tidbit data with our pot data, giving us a dataframe of pot data with temperature information In this script, we'll merge that dataframe with our crab ID information. This means that for each crab, we'll have the temperature in the pot at the time the crab was caught. #### Load libraries (and install if necessary), and load packages ```{r libraries, message=FALSE, warning=FALSE} # Add all required libraries here list.of.packages <- c("tidyverse", "lubridate") # Get names of all required packages that aren't installed new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[, "Package"])] # Install all new packages if(length(new.packages)) install.packages(new.packages) # Load all required libraries lapply(list.of.packages, FUN = function(X) { do.call("require", list(X)) }) ``` # Load crab data and pot data with temperature ```{r} # We're loading the data from earlier, which contains all crabs in which BCS condition was examined crab_dat <- read.csv(file = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/crab_data/BCS_cleaned.csv") # Load in the pot data to which we've added temperature pot_dat <- read.csv(file = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/pot_data_with_temperature.csv") # Issue: crab_dat doesn't have a single unique identifying column in common with pot_dat (such as time of haul) # Solution: create one from five columns! # Both dataframes have year, project, trip number (AKA leg), location, and pot number as columns # Now modify some of the values to be easier for R to read (and make more sense to us when in an ID column) # We'll organize these so we're doing them in each dataframe in a row (make one change in both, not making all changes in one, then moving to the other) # Change name of projects pot_dat[pot_dat$Project == "Red King Crab Survey", ]$Project <- "RKC" pot_dat[pot_dat$Project == "Tanner Crab Survey", ]$Project <- "Tanner" crab_dat[crab_dat$Project == "Red King Crab Survey", ]$Project <- "RKC" crab_dat[crab_dat$Project == "Tanner Crab Survey", ]$Project <- "Tanner" # Remove spaces in locations pot_dat$Location <- sub(" ", "_", pot_dat$Location) crab_dat$Location <- sub(" ", "_", crab_dat$Location) # Filter out all crab data from before 2005, which is when temperature data began to be collected # Also filter out data from after 2019, which is when our temperature data ends crab_dat <- crab_dat %>% filter(Year >= 2005 & Year <= 2019) # Examine all locations in each dataframe, see if they match names(table(pot_dat$Location)) == names(table(crab_dat$Location)) # Examine survey names in each dataframe, see if they match names(table(pot_dat$Project)) == names(table(crab_dat$Project)) # Create pot ID column pot_dat$pot_ID <- paste(pot_dat$Year, pot_dat$Project, pot_dat$Trip.No, pot_dat$Location, pot_dat$Pot.No, sep = "_") crab_dat$pot_ID <- paste(crab_dat$Year, crab_dat$Project, crab_dat$Trip.No, crab_dat$Location, crab_dat$Pot.No, sep = "_") # Join pot and crab dataframes full_crab_dat <- left_join(x = crab_dat, y = pot_dat, by = "pot_ID", suffix = c("", ".y")) # Drop all crabs without values for temperature full_crab_dat <- full_crab_dat[!is.na(full_crab_dat$temp), ] # Drop irrelevant columns full_crab_dat <- full_crab_dat %>% dplyr::select(-c(pot_ID, Year.y, Project.y, Trip.No.y, Location.y, Pot.No.y, Buoy.No, tidbit_id)) # Write out data write.csv(full_crab_dat, file = "../output/ADFG_SE_AK_pot_surveys/cleaned_data/crab_data/BCS_examined_crab_with_temperature.csv", row.names = FALSE) ```