R has functions for quantile and probabilities for t. You must specify the correct degrees of freedom. Quantiles will return t values (often critical values), and p will return probabilities for obtaining a random value from minus infinity up to the point you specify.
qt(c(.025, .975), df=7) # 7 degrees of freedom, p=.05, 2-tails
## [1] -2.364624 2.364624
qt(c(.025, .975), df=1000) # large sample, result near 1.96
## [1] -1.962339 1.962339
#
pt(2, df=7) # notice this is one-tailed cumulative
## [1] 0.9571903
1-pt(2, df=7)
## [1] 0.04280966
pt(2, df=1000)
## [1] 0.9771148
1-pt(2, df=1000)
## [1] 0.02288517
Here you see the critical values for t (alpha = .05) when there are 7 degrees of freedom. You also see the probability of a more extreme value when the obtained result is 2 and there are either 7 or 1000 degrees of freedom. It is easier to get a significant result with larger df. Notice that the obtained value of 2 would NOT be significant with 7 df under a two-tailed test, but it is significant with a 1-tailed test in the right direction.
Single sample t
Given that H0: mu is 75, H1: mu is not 75, SDy = 14, N=49, construct a rejection region and draw a picture to illustrate.
mean=75; sd=14; samsize=49; tdf=48 # givens
SEM <- sd/sqrt(samsize) # compute standard error of the mean
#qt(.025,df=tdf) # critical values of t for 95 percent CI
#qt(.975,df=tdf)
lo <- mean+SEM*qt(.025, df=tdf) # edge of the rejection region
hi <- mean+SEM*qt(.975, df=tdf)
ends <- c(lo, hi) # collect the edges
ends
## [1] 70.97873 79.02127
# Illustration (graph) of the above example
x <- seq(-5,4,length=100)*SEM + mean # spots on the normal to be graphed
hx <- dt(((x-mean)/SEM),tdf) # find the height at each spot using the rare 'd' function
plot(x, hx, xlab="Mean", ylab="Relative Frequency", # run the plot - x, hx is a scatterplot type 'l' is a line
main="t-distribution", type='l', axes=T) # run the plot
abline(v=mean, lwd=2) # fat line for the population mean
abline(v=lo, lwd=1) # thin line for lower bound
abline(v=hi, lwd=1) # thin line for upper bound
text((lo-2),.01, 'RR-') # label for rejection region - placed by trial and error using axes
text((hi+2),.01, 'RR+') # label for rejection region
Note that this graph if very similar to one introduced in the hypothesis testing module.
Two independent groups - illustration of computations
xbar1 <-10
SD1 <- 2
N1 <- 100
#
xbar2 <- 12
SD2 <- 3
N2 <- 100
#
meandiff <- xbar2-xbar1 # find the observerd difference in means
Vpool <- ((N1-1)*SD1^2+(N2-SD2^2))/(N1+N2-2) #find the pooled variance
Spool <- sqrt(Vpool) # find the pooled standard deviation
Sam.adj <- (N1+N2)/(N1*N2) # find the denominator for the standard error
St.err.M <- sqrt(Vpool*Sam.adj) # fid the standard error of the difference in means
tdf <- N1+N2-2 # find the degrees of freedom for the independent samples t
#qt(.025,df=tdf) # the critical values of t
#qt(.975,df=tdf)
lo <- meandiff+St.err.M*qt(.025, df=tdf) # find the lower bound of the confidence interval
lo
## [1] 1.562621
hi <- meandiff+St.err.M*qt(.975, df=tdf) # find the uppoer bound of the confidence interval
hi
## [1] 2.437379
t.obs <- meandiff/St.err.M # find the observed (obtained) value of t
t.obs
## [1] 9.017437
p <- 1-pt(t.obs,tdf) # find the associated p value
p
## [1] 1.110223e-16
# Computation of Cohen' d (effect size) for the same data
d = meandiff/Spool
d
## [1] 1.275258
y <- c(1, 2, 3, 4, 5, 6, 7, 8) # sample values so code below will run
x <- c(1, 1, 1, 1, 2, 2, 2, 2)
y1 <- y
y2 <- x
# one sample t-test
t.test(y,mu=3) # Ho: mu=3
##
## One Sample t-test
##
## data: y
## t = 1.7321, df = 7, p-value = 0.1269
## alternative hypothesis: true mean is not equal to 3
## 95 percent confidence interval:
## 2.452175 6.547825
## sample estimates:
## mean of x
## 4.5
# assumes UNequal variances (now considered best practice)
# to use equal variances assumption, use the var.equal = TRUE option
# independent 2-group t-test
t.test(y~x) # where y is numeric and x is a binary factor
##
## Welch Two Sample t-test
##
## data: y by x
## t = -4.3818, df = 6, p-value = 0.004659
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -6.233715 -1.766285
## sample estimates:
## mean in group 1 mean in group 2
## 2.5 6.5
# independent 2-group t-test
t.test(y1,y2) # where y1 and y2 are numeric
##
## Welch Two Sample t-test
##
## data: y1 and y2
## t = 3.3845, df = 7.6652, p-value = 0.0102
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.9402986 5.0597014
## sample estimates:
## mean of x mean of y
## 4.5 1.5
# paired t-test
t.test(y1,y2,paired=TRUE) # where y1 & y2 are numeric
##
## Paired t-test
##
## data: y1 and y2
## t = 4.2426, df = 7, p-value = 0.003828
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 1.327958 4.672042
## sample estimates:
## mean of the differences
## 3
x1 <- c(4, 5, 5, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 9, 9, 10)
M1 <-mean(x1)
SD1 <- sd(x1)
N1 <- length(x1)
dat1 <- c(M1, SD1, N1)
dat1
## [1] 7.000000 1.632993 16.000000
#
x2 <- x1+1
M2 <- mean(x2)
SD2 <- sd(x2)
N2 <- length(x2)
dat2 <- c(M2, SD2, N2)
dat2
## [1] 8.000000 1.632993 16.000000
#
#Single sample t-tests
t.test(x1, mu=4)
##
## One Sample t-test
##
## data: x1
## t = 7.3485, df = 15, p-value = 2.412e-06
## alternative hypothesis: true mean is not equal to 4
## 95 percent confidence interval:
## 6.129839 7.870161
## sample estimates:
## mean of x
## 7
t.test(x2, mu=5)
##
## One Sample t-test
##
## data: x2
## t = 7.3485, df = 15, p-value = 2.412e-06
## alternative hypothesis: true mean is not equal to 5
## 95 percent confidence interval:
## 7.129839 8.870161
## sample estimates:
## mean of x
## 8
# independent samples t-test
t.test(x1,x2, var.equal = TRUE)
##
## Two Sample t-test
##
## data: x1 and x2
## t = -1.7321, df = 30, p-value = 0.09354
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -2.1791066 0.1791066
## sample estimates:
## mean of x mean of y
## 7 8
# Note that df=30
# Same data; t-test calulated from Ms, SDs, and Ns
meandiff <- M1-M2
Vpool <- ((N1-1)*SD1^2+(N2-1)*SD2^2)/(N1+N2-2)
Spool <- sqrt(Vpool)
d <- meandiff/Spool
Sam.adj <- (N1+N2)/(N1*N2)
St.err.M <- sqrt(Vpool*Sam.adj)
tdf <- N1+N2-2
lo <- meandiff+St.err.M*qt(.025, df=tdf)
hi <- meandiff+St.err.M*qt(.975, df=tdf)
t.obs <- meandiff/St.err.M
p <- 2*(1-pt(abs(t.obs),tdf))
meandiff
## [1] -1
St.err.M
## [1] 0.5773503
tdf
## [1] 30
lo
## [1] -2.179107
hi
## [1] 0.1791066
t.obs
## [1] -1.732051
p
## [1] 0.09353647
d
## [1] -0.6123724