--- title: "Story_Assignment_6" output: pdf_document: default html_document: default date: "2024-11-20" --- #Noah Krebs ```{r} library(tidyr) data <- read.csv("6 ReducingPain.csv") long_data <- pivot_longer(data, cols = everything(), # Select all columns to reshape names_to = "Condition", # Name for the new variable column values_to = "Score") # Name for the value column write.csv(long_data, "6_ReducingPain_long.csv", row.names = FALSE) ``` ```{r} data_1 <- read.csv("6 ReducingPainLong.csv") print(data_1) ``` ```{r} #Question 1b overall_mean <- mean(data_1$PainLevel) group_means <- tapply(data_1$PainLevel, data_1$Treatment, mean) n <- table(data_1$Treatment) group_SS <- sum(n * (group_means - overall_mean)^2) total_SS <- sum((data_1$PainLevel - overall_mean)^2) error_SS <- total_SS - group_SS df_total <- length(data_1$PainLevel) - 1 df_groups <- length(group_means) - 1 df_error <- df_total - df_groups MS_group <- group_SS / df_groups MS_error <- error_SS / df_error F_value <- MS_group / MS_error p_value <- pf(F_value, df_groups, df_error, lower.tail = FALSE) print(F_value) print(p_value) ``` ```{r} #Question 1 c-d anova <- aov(data_1$PainLevel ~ data_1$Treatment, data = data) summary(anova) # Conclusion: Since the p_value is less than .05, we can conclude that at least one of the three treatments possesses sinificantly different pain values from one of the other two treatments. ``` ```{r} ``` ```{r} #Question 1 e-g group_means <- tapply(data_1$PainLevel, data_1$Treatment, mean) group_means <- sort(group_means) # Order means from smallest to largest print(group_means) # Calculate pairwise differences pairwise_differences <- combn(names(group_means), 2, function(x) { diff <- abs(group_means[x[1]] - group_means[x[2]]) data.frame(pair = paste(x[1], "-", x[2]), difference = diff) }, simplify = FALSE) pairwise_differences <- do.call(rbind, pairwise_differences) # Combine into a data frame print(pairwise_differences) # Calculate group means for individual observations group_means_map <- with(data, ave(data_1$PainLevel, data_1$Treatment, FUN = mean)) # Calculate Error SS error_SS <- sum((data$PainLevel - group_means_map)^2) # Calculate Error DF df_error <- nrow(data) - length(group_means) # Calculate Error MS MS_error <- error_SS / df_error cat("Error Mean Square (MS_error):", MS_error, "\n") # Calculate Standard Error (SE) for a specific pair (Audiobook vs Music) n <- table(data$Treatment) SE <- sqrt(MS_error * (1 / n["Audiobook"] + 1 / n["Music"])) cat("Standard Error (SE):", SE, "\n") pairwise_differences$q_stat <- pairwise_differences$difference / SE print(pairwise_differences) k <- length(group_means) critical_q <- qtukey(0.95, k, df_error) pairwise_differences$p_value <- ptukey(pairwise_differences$q_stat, k, df_error, lower.tail = FALSE) print(pairwise_differences) anova_result <- aov(data_1$PainLevel ~ data_1$Treatment, data = data) tukey_result <- TukeyHSD(anova_result) #After looking at the reults I observed a significant difference in the music and earphones comparison and the audiobook and earphones comparison but not in the usic and audiobook comparison. Thus we can conclude that exposure to music and audiobooks lessen pain and are significantly better than teh earphones treatment. ``` ```{r} #Question 1 #Tukey short data <- data.frame( Treatment = c(rep("Audiobook", 10), rep("Music", 10), rep("Earphones", 10)), PainLevel = c(5, 6, 7, 2, 6, 3, 4, 8, 5, 4, 5, 4, 4, 7, 6, 4, 6, 4, 3, 5, 4, 8, 7, 6, 10, 6, 10, 8, 5, 6) ) aov_res <- aov(formula = PainLevel ~ Treatment, data = data) summary(aov_res) tukey_res <- TukeyHSD(x = aov_res, ordered = TRUE, conf.level = 0.95) print(tukey_res) ``` ```{r} #Question 2 #Part A effect_difference <- 0.5 # Smallest difference to detect error_ms <- 6.5 # Error Mean Square alpha <- 0.05 # Significance level power <- 0.9 # Desired power num_groups <- 4 # Number of streams f <- effect_difference / sqrt(error_ms) group_means <- c(0, 0.5, 1.0, 1.5) overall_mean <- mean(group_means) n <- 2 achieved_power <- 0 while (achieved_power < power) { total_sample_size <- n * num_groups df1 <- num_groups - 1 df2 <- total_sample_size - num_groups ncp <- n * sum((group_means - overall_mean)^2) / error_ms f_crit <- qf(p=alpha, df1=df1, df2=df2, lower.tail=FALSE) achieved_power <- pf(q=f_crit, df1=df1, df2=df2, ncp=ncp, lower.tail=FALSE) if (achieved_power < power) { n <- n + 1 } } print(n) ``` ```{r} #Question 2 # Part B #To reach a power of at least .9, one has to take 4 groups of 75 samples. #300 total #Each sample of tag cost $25 #300x25 = $7500 which is less than 100,000. You will be able to complete the experiment with the desired power!