#In this script, I'll visualize my preliminary data to see what differences there are between eelgrass and bare patches across sites. #Step 1: Import data averageAreaAdjustedMerged <- read.csv(file = "/Users/yaamini/Documents/project-oyster-oa/analyses/DNR_Preliminary_Analyses_20170321/Oyster-AverageAdjustedMergedArea.csv", header = TRUE, na.strings = "NA") averageAreaAdjustedMerged <- within(averageAreaAdjustedMerged, rm("X")) #Removing the extra column "X" averageAreaAdjustedMerged[is.na(averageAreaAdjustedMerged)] <- 0 #Replace NAs with 0s head(averageAreaAdjustedMerged) #Confirm changes #Step 2: Nonmetric multidimensional scaling plot (NMDS) #Load the source file for the biostats package source("biostats.R") install.packages("vegan") #Install vegan package library(vegan) #Make sure first column of protein names is recognized as row names instead of values area.protID2 <- averageAreaAdjustedMerged[-1] rownames(area.protID2) <- averageAreaAdjustedMerged[,1] #Transpose the file so that rows and columns are switched and normalized by log(x+1) area.t <- t(area.protID2) area.tra <- (area.t + 1) area.tra <- data.trans(area.tra, method = 'log', plot = FALSE) #Make MDS dissimilarity matrix proc.nmds <- metaMDS(area.tra, distance = 'bray', k = 2, trymax = 100, autotransform = FALSE) #Make preliminary NMDS plot fig.nmds <- ordiplot(proc.nmds, choices=c(1,2), type='none', display='sites', xlab='Axis 1', ylab='Axis 2', cex=0.5) points(fig.nmds, 'sites', col=c(1:5), pch=c(15, 15, 15, 15, 15, 20, 20, 20, 20, 20)) legend(x= , y=, pch=c(15, 15, 15, 15, 15, 20, 20, 20, 20, 20), legend=c('bareCaseInlet', 'bareFidalgoBay', 'bareWillapaBay', 'bareSkokomishRiver', 'barePortGamble', 'eelgrassCaseInlet', 'eelgrassFidalgoBay', 'eelgrassWillapaBay', 'eelgrassSkokomishRiver', 'eelgrassPortGamble'), col=c(1:5)) #Export preliminary NMDS plot png(filename = "preliminaryNMDS.png") fig.nmds <- ordiplot(proc.nmds, choices=c(1,2), type='none', display='sites', xlab='Axis 1', ylab='Axis 2', cex=0.5) points(fig.nmds, 'sites', col=c(1:5), pch=c(15, 15, 15, 15, 15, 20, 20, 20, 20, 20)) legend(x= , y=, pch=c(15, 15, 15, 15, 15, 20, 20, 20, 20, 20), legend=c('bareCaseInlet', 'bareFidalgoBay', 'bareWillapaBay', 'bareSkokomishRiver', 'barePortGamble', 'eelgrassCaseInlet', 'eelgrassFidalgoBay', 'eelgrassWillapaBay', 'eelgrassSkokomishRiver', 'eelgrassPortGamble'), col=c(1:5)) dev.off() #Modify NMDS plot #I showed my preliminary NMDS plot to Emma and she made a change to my code, removing the score column from my data. #Transpose the file so that rows and columns are switched and normalized by log(x+1) area2.t <- t(area.protID2[,1:10]) area2.tra <- (area2.t+1) area2.tra <- data.trans(area2.tra, method = 'log', plot = FALSE) #Make MDS dissimilarity matrix proc.nmds <- metaMDS(area.tra, distance = 'bray', k = 2, trymax = 100, autotransform = FALSE) #Make figure fig.nmds <- ordiplot(proc.nmds, choices=c(1,2), type='none', display='sites', xlab='Axis 1', ylab='Axis 2', cex=0.5) #bare=circle #eelgrass=triangle #case=red #fidalgo=blue #willapa=black #skokomish=green #gamble=magenta points(fig.nmds, 'sites', col=c('red', 'blue', 'black', 'green', 'magenta','red', 'blue', 'black', 'green', 'magenta'), pch=c(rep(16,5), rep(17,5))) legend(-0.01,0.02, pch=c(rep(16,5), 1, 2), legend=c('Case Inlet', "Fidalgo Bay", "Willapa Bay", "Skokomish", "Port Gamble", "Bare", "Eelgrass"), col=c('red', 'blue', 'black', 'green', 'magenta', 'black', 'black')) ################################################### #Step 3: Heatmap #Install package install.packages("pheatmap") library(pheatmap) #Data should be log(x) or log(x+1) transformed for this analysis, so I'll use my area.tra dataset. #Create heatmap pheatmap(area.tra, cluster_rows = T, cluster_cols = T, clustering_distance_rows = 'euclidean', clustering_distance_cols = 'euclidean', clustering_method = 'average', show_rownames = T, show_colnames = F) #Export preliminary heatmap as a .png png(filename = "preliminaryHeatmap.png") pheatmap(area.tra, cluster_rows = T, cluster_cols = T, clustering_distance_rows = 'euclidean', clustering_distance_cols = 'euclidean', clustering_method = 'average', show_rownames = T, show_colnames = F) dev.off() #Modify heatmap parameters #Step 4: Bubble Plots #ggplot bubble plots library(ggplot2) library(ggthemes) #I had to use geom_jitter because my points were too close together, you may want to use geom_point #width and height are specific to jitter #alpha makes the bubbles transparent #geom_hline puts horizontal lines between y-axis values ggplot(data) + geom_jitter(aes(x=column name with x values, y=column name with y values, colour=column to dictate point color, size=column to dictate bubble sizes), alpha=0.5, width=0.2, height=0.2) + labs(y='y-axis label') + geom_hline(yintercept=c(1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5, 10.5, 11.5, 12.5, 13.5, 14.5, 15.5, 16.5, 17.5, 18.5, 19.5, 20.5, 21.5, 22.5, 23.5), color='grey80') + theme_tufte()