---
title: "Story_Assignment_6"
output:
  pdf_document: default
  html_document: default
date: "2024-11-20"
---
#Noah Krebs
```{r}
library(tidyr)
data <- read.csv("6 ReducingPain.csv")
long_data <- pivot_longer(data, 
                          cols = everything(),    # Select all columns to reshape
                          names_to = "Condition", # Name for the new variable column
                          values_to = "Score")    # Name for the value column

write.csv(long_data, "6_ReducingPain_long.csv", row.names = FALSE)
```

```{r}
data_1 <- read.csv("6 ReducingPainLong.csv")
print(data_1)
```
```{r}
#Question 1b
overall_mean <- mean(data_1$PainLevel)


group_means <- tapply(data_1$PainLevel, data_1$Treatment, mean)


n <- table(data_1$Treatment)


group_SS <- sum(n * (group_means - overall_mean)^2)


total_SS <- sum((data_1$PainLevel - overall_mean)^2)


error_SS <- total_SS - group_SS


df_total <- length(data_1$PainLevel) - 1
df_groups <- length(group_means) - 1
df_error <- df_total - df_groups

MS_group <- group_SS / df_groups
MS_error <- error_SS / df_error

F_value <- MS_group / MS_error

p_value <- pf(F_value, df_groups, df_error, lower.tail = FALSE)

print(F_value)
print(p_value)
```
```{r}
#Question 1 c-d
anova <- aov(data_1$PainLevel ~ data_1$Treatment, data = data)
summary(anova)
# Conclusion: Since the p_value is less than .05, we can conclude that at least one of the three treatments possesses sinificantly different pain values from one of the other two treatments.
```

```{r}

```
```{r}
#Question 1 e-g
group_means <- tapply(data_1$PainLevel, data_1$Treatment, mean)
group_means <- sort(group_means) # Order means from smallest to largest

print(group_means)

# Calculate pairwise differences
pairwise_differences <- combn(names(group_means), 2, function(x) {
  diff <- abs(group_means[x[1]] - group_means[x[2]])
  data.frame(pair = paste(x[1], "-", x[2]), difference = diff)
}, simplify = FALSE)
pairwise_differences <- do.call(rbind, pairwise_differences) # Combine into a data frame
print(pairwise_differences)

# Calculate group means for individual observations
group_means_map <- with(data, ave(data_1$PainLevel, data_1$Treatment, FUN = mean))

# Calculate Error SS
error_SS <- sum((data$PainLevel - group_means_map)^2)

# Calculate Error DF
df_error <- nrow(data) - length(group_means)

# Calculate Error MS
MS_error <- error_SS / df_error

cat("Error Mean Square (MS_error):", MS_error, "\n")

# Calculate Standard Error (SE) for a specific pair (Audiobook vs Music)
n <- table(data$Treatment)
SE <- sqrt(MS_error * (1 / n["Audiobook"] + 1 / n["Music"]))

cat("Standard Error (SE):", SE, "\n")

pairwise_differences$q_stat <- pairwise_differences$difference / SE

print(pairwise_differences)

k <- length(group_means)
critical_q <- qtukey(0.95, k, df_error)
pairwise_differences$p_value <- ptukey(pairwise_differences$q_stat, k, df_error, lower.tail = FALSE)
print(pairwise_differences)


anova_result <- aov(data_1$PainLevel ~ data_1$Treatment, data = data)
tukey_result <- TukeyHSD(anova_result)

#After looking at the reults I observed a significant difference in the music and earphones comparison and the audiobook and earphones comparison but not in the usic and audiobook comparison. Thus we can conclude that exposure to music and audiobooks lessen pain and are significantly better than teh earphones treatment.
```
```{r}
#Question 1
#Tukey short
data <- data.frame(
  Treatment = c(rep("Audiobook", 10), rep("Music", 10), rep("Earphones", 10)),
  PainLevel = c(5, 6, 7, 2, 6, 3, 4, 8, 5, 4, 5, 4, 4, 7, 6, 4, 6, 4, 3, 5, 4, 8, 7, 6, 10, 6, 10, 8, 5, 6)
)
aov_res <- aov(formula = PainLevel ~ Treatment, data = data)
summary(aov_res)
tukey_res <- TukeyHSD(x = aov_res, ordered = TRUE, conf.level = 0.95)
print(tukey_res)

```
```{r}
#Question 2
#Part A
effect_difference <- 0.5  # Smallest difference to detect
error_ms <- 6.5           # Error Mean Square
alpha <- 0.05             # Significance level
power <- 0.9              # Desired power
num_groups <- 4           # Number of streams

f <- effect_difference / sqrt(error_ms)

group_means <- c(0, 0.5, 1.0, 1.5)
overall_mean <- mean(group_means)


n <- 2  
achieved_power <- 0

while (achieved_power < power) {
  total_sample_size <- n * num_groups
  df1 <- num_groups - 1
  df2 <- total_sample_size - num_groups
  
  ncp <- n * sum((group_means - overall_mean)^2) / error_ms
  
  f_crit <- qf(p=alpha, df1=df1, df2=df2, lower.tail=FALSE)
  
  achieved_power <- pf(q=f_crit, df1=df1, df2=df2, ncp=ncp, lower.tail=FALSE)
  
  if (achieved_power < power) {
    n <- n + 1
  }
}
print(n)
```


```{r}
#Question 2
# Part B
#To reach a power of at least .9, one has to take 4 groups of 75 samples.
#300 total
#Each sample of tag cost $25
#300x25 = $7500 which is less than 100,000. You will be able to complete the experiment with the desired power!