# Basic setup code that you will need for this problem
library(data.table)
library(ggplot2)
rm(list=ls())
set.seed(0)
# 1
# Assume X ~ Normal(mu_x, sig_x)
# Then Null and alternative H's are:
# H0: mu_x = 58
# H1: mu_x != 58
# type I error (alpha) = 0.05
type_I_error <- 0.05
# estimate mu_x with mu_x_hat = x_bar
x <- c(63.97,
60.29,
85.60,
72.57,
54.65,
53.74,
69.13,
70.07,
67.19,
53.95
)
n <- length(x)
x_bar_obs <- mean(x)
# define the sampling distribution of x_bar:
#
# X ~ Normal(mu_x, sig_x)
# X_bar ~ Normal(mu_x_bar, sig_x_bar)
#
# mu_x_bar = mu_x
# sig_x_bar = sig_x / sqrt(n)
mu_x = 58
sig_x = sqrt(10) # given in the problem
mu_x_bar <- mu_x # from central limit theorem
sig_x_bar <- sig_x / sqrt(n) # from central limit theorem
# prepare for two-tailed calculations
x_bar_obs_lower <- mu_x_bar - abs(x_bar_obs - mu_x_bar)
x_bar_obs_upper <- mu_x_bar + abs(x_bar_obs - mu_x_bar)
# critical value
critical_value_lower <- qnorm(type_I_error/2, mu_x_bar, sig_x_bar, lower.tail=T)
critical_value_upper <- qnorm(type_I_error/2, mu_x_bar, sig_x_bar, lower.tail=F)
# CI
w <- critical_value_upper - critical_value_lower
CI_lower <- x_bar_obs - w/2
CI_upper <- x_bar_obs + w/2
# p-value
p_value_lower <- pnorm(x_bar_obs_lower, mu_x_bar, sig_x_bar, lower.tail=T)
p_value_upper <- pnorm(x_bar_obs_upper, mu_x_bar, sig_x_bar, lower.tail=F)
# Populate marking variables
ans_1_test_stat_obs <- x_bar_obs
ans_1_critical_value_lower <- critical_value_lower
ans_1_critical_value_upper <- critical_value_upper
ans_1_CI_lower <- CI_lower
ans_1_CI_upper <- CI_upper
ans_1_p_value <- p_value_lower + p_value_upper
# 2
ans_2_critical_value_lower <- 0.025
ans_2_critical_value_upper <- 1 - 0.025
w <- ans_2_critical_value_upper - ans_2_critical_value_lower
ans_2_CI_lower <- 0.75 - w/2
ans_2_CI_upper <- 0.75 + w/2
ans_2_p_value <- 0.5
# 3
ans_3a <- "confidence"
ans_3b <- "beta"
ans_3c <- "alpha"
ans_3d <- "power"
ans_3e <- "alpha"
ans_3f <- "power"
# Basic setup code that you will need for this problem
library(data.table)
library(ggplot2)
library(stringr)
rm(list=ls())
# Please include the following line of code as well
set.seed(0)
d <-fread('https://crossley.github.io/book_stats/data/eeg/epochs.txt')
## The column names that come from this file have spaces
## This line removes those spaces (depends on the `stringr` package)
names(d) <- str_replace_all(names(d), c(" " = "." , "," = "" ))
## Problem 1
## Step 1: State the null and alternative hypotheses
## H0: muX = muY
## H1: muX != muY
## H0: muX - muY = 0
## H1: muX - muY != 0
## Step 2: Choose a type I error rate
alph <- 0.05
## Step 3: Fully specify a statistic that estimates the parameter in step 1
## Let W = X - Y
## mu_w_hat = w_bar ~ N(mu_w_bar, sig_w_bar)
## However, since we must estimate the population variance:
## t = ( w_bar - 0 ) / sig_w_bar ~ t(df)
## We know because of the structure of our MEG data that our
## sample size for X and Y are the same (equal sample size)
## and we are told to assume equal variance.
## sig_w_bar = sqrt( (var_x + var_y) / n)
## Step 4: Perform an experiment / obtain a sample from the statistic specified in step 3
x <- d[time>0, mean(MEG.133), .(epoch)][, V1]
y <- d[time>0, mean(MEG.135), .(epoch)][, V1]
n <- length(x)
xbar <- mean(x)
ybar <- mean(y)
varx <- var(x)
vary <- var(y)
sp <- sqrt((varx + vary)/n)
tobs = (xbar - ybar) / sp
df = 2*n - 2
t_crit_lower <- qt(0.025, df, lower.tail=TRUE)
t_crit_upper <- qt(0.025, df, lower.tail=FALSE)
w <- (t_crit_upper - t_crit_lower) * sp
ci_lower <- (xbar - ybar) - w/2
ci_upper <- (xbar - ybar) + w/2
## Step 5: Make a decision
p_val_upper <- pt(abs(tobs), df, lower.tail=FALSE)
p_val_lower <- pt(-abs(tobs), df, lower.tail=TRUE)
p_val <- p_val_upper + p_val_lower
if(p_val < 0.05) {
decision <- 'reject H0'
} else {
decision <- 'fail to reject H0'
}
## Step 6: Check your work with `t.test()`
r_result <- t.test(x,
y,
alternative='two.sided',
mu=0,
paired=FALSE,
var.equal=TRUE,
conf.level=1 - alph)
# define variables for marking
ans_1_t_test_stat_obs <- tobs
ans_1_critical_value_lower <- t_crit_lower * sp
ans_1_critical_value_upper <- t_crit_upper * sp
ans_1_CI_lower <- ci_lower
ans_1_CI_upper <- ci_upper
ans_1_p_value <- p_val_lower + p_val_upper
# Problem 2
# 2. Consider two random variables X∼(μX,σX) and Y∼(μY,σY)
# . Let X generate data for MEG channel 039 during the first
# 30 epochs and Y generate data for MEG channel 039 during
# the remaining epochs. Test the hypothesis that the mean
# MEG signal for t>0 in these two signals are significantly
# different. Assume X and Y are independent but do not
# assume that σX=σY .
## Step 1: State the null and alternative hypotheses
## H0: muX = muY
## H1: muX != muY
## H0: muX - muY = 0
## H1: muX - muY != 0
## Step 2: Choose a type I error rate
alph <- 0.05
## Step 3: Fully specify a statistic that estimates the parameter in step 1
## Let W = X - Y
## mu_w_hat = w_bar ~ N(mu_w_bar, sig_w_bar)
## However, since we must estimate the population variance:
## t = ( w_bar - 0 ) / sig_w_bar ~ t(df)
## We know because of the structure of our MEG data that our sample size for X
## and Y are the same (equal sample size) and we are told NOT to assume equal
## variance.
## sig_w_bar = sqrt( varx/nx + vary/ny )
## df ... complicaated ...
## Step 4: Perform an experiment / obtain a sample from the statistic specified in step 3
x <- d[time>0 & epoch <= min(epoch) + 29][, mean(MEG.039), .(epoch)][, V1]
y <- d[time>0 & epoch > min(epoch) + 29][, mean(MEG.039), .(epoch)][, V1]
nx <- length(x)
ny <- length(y)
xbar <- mean(x)
ybar <- mean(y)
varx <- var(x)
vary <- var(y)
sp <- sqrt((varx/nx + vary/ny))
tobs <- (xbar - ybar) / sp
df <- ( (varx/nx + vary/ny)^2 ) / ( ((varx/nx)^2)/(nx-1) + ((vary/ny)^2)/(ny-1) )
t_crit_lower <- qt(0.025, df, lower.tail=TRUE)
t_crit_upper <- qt(0.025, df, lower.tail=FALSE)
w <- (t_crit_upper - t_crit_lower) * sp
ci_lower <- (xbar - ybar) - w/2
ci_upper <- (xbar - ybar) + w/2
## Step 5: Make a decision
p_val_upper <- pt(abs(tobs), df, lower.tail=FALSE)
p_val_lower <- pt(-abs(tobs), df, lower.tail=TRUE)
p_val <- p_val_upper + p_val_lower
if(p_val < 0.05) {
decision <- 'reject H0'
} else {
decision <- 'fail to reject H0'
}
## Step 6: Check your work with `t.test()`
r_result <- t.test(x,
y,
alternative='two.sided',
mu=0,
paired=FALSE,
var.equal=FALSE,
conf.level=1 - alph)
# define variables for marking
ans_2_t_test_stat_obs <- tobs
ans_2_critical_value_lower <- t_crit_lower * sp
ans_2_critical_value_upper <- t_crit_upper * sp
ans_2_CI_lower <- ci_lower
ans_2_CI_upper <- ci_upper
ans_2_p_value <- p_val_lower + p_val_upper
# Problem 3
# Do you think assuming that two different MEG channels on
# the same persons head are likely to be independent?
# Explain your reasoning.
# Probably not! In the extreme case, two sensors might be
# placed right next to each other, and this would lead them
# to read out almost the same signal, so measuring one
# signal might nearly completely determine the other. That
# is the opposite of independence.
# Problem 4
## Step 1: State the null and alternative hypotheses
## H0: muX = muY
## H1: muX != muY
## H0: muX - muY = 0
## H1: muX - muY != 0
## Step 2: Choose a type I error rate
alph <- 0.05
## Step 3: Fully specify a statistic that estimates the parameter in step 1
## Let W = X - Y
## mu_w_hat = w_bar ~ N(mu_w_bar, sig_w_bar)
## However, since we must estimate the population variance:
## t = ( w_bar - 0 ) / sig_w_bar ~ t(df)
## We know because of the structure of our MEG data that our sample size for X
## and Y are the same (equal sample size) and we are told NOT to assume independence,
## so we do a repeated measures t-test.
## Step 4: Perform an experiment / obtain a sample from the statistic specified in step 3
x <- d[time>0][, mean(MEG.039), .(epoch)][, V1]
y <- d[time>0][, mean(MEG.135), .(epoch)][, V1]
d <- x - y
n <- length(d)
dbar <- mean(d)
vard <- var(d)
sp <- sqrt((vard/n))
tobs = dbar / sp
df = n - 1
t_crit_lower <- qt(0.025, df, lower.tail=TRUE)
t_crit_upper <- qt(0.025, df, lower.tail=FALSE)
w <- (t_crit_upper - t_crit_lower) * sp
ci_lower <- dbar - w/2
ci_upper <- dbar + w/2
## Step 5: Make a decision
p_val_upper <- pt(abs(tobs), df, lower.tail=FALSE)
p_val_lower <- pt(-abs(tobs), df, lower.tail=TRUE)
p_val <- p_val_upper + p_val_lower
if(p_val < 0.05) {
decision <- 'reject H0'
} else {
decision <- 'fail to reject H0'
}
## Step 6: Check your work with `t.test()`
r_result <- t.test(x,
y,
alternative='two.sided',
mu=0,
paired=TRUE,
var.equal=TRUE,
conf.level=1 - alph)
r_result_2 <- t.test(d,
alternative='two.sided',
mu=0,
paired=FALSE,
var.equal=TRUE,
conf.level=1 - alph)
# define variables for marking
ans_4_t_test_stat_obs <- tobs
ans_4_critical_value_lower <- t_crit_lower * sp
ans_4_critical_value_upper <- t_crit_upper * sp
ans_4_CI_lower <- ci_lower
ans_4_CI_upper <- ci_upper
ans_4_p_value <- p_val_lower + p_val_upper