-
Notifications
You must be signed in to change notification settings - Fork 192
/
baci-fish.r
230 lines (173 loc) · 6.85 KB
/
baci-fish.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
# BACI-fish
# 2014-11-28 CJS sf.autoplot.lmer
# 2014-11-26 CJS ggplot, split=TRUE, lmer, ##*** problam
# This is the example that appears in Smith (2002, Table 6).
# Samples of fish were taken for a period
# of 12 months before and 13 months after a nuclear
# power plant began operations.
# The power plant is cooled by water that is drawn from a river.
# When the water exits the plant, its temperature is
# elevated.
# The concern is that the warmed water will adversely affect the abundance
# and composition of fish below the plant.
# [It wasn't clear from Smith (2002) what the response measure is,
# so I'll arbitrarily assume that it is counts.]
options(useFancyQuotes=FALSE) # renders summary output corrects
library(ggplot2)
library(lmerTest)
library(lsmeans)
library(plyr)
#source("../../schwarz.functions.r")
source('http://www.stat.sfu.ca/~cschwarz/Stat-650/Notes/MyPrograms/schwarz.functions.r')
# Read in the actual data
sink("baci-fish-R-001.txt",split=TRUE)
##***part001b;
cat(" BACI design measuring fish counts with multiple (paired) yearly measurements before/after \n\n")
fish <- read.csv("baci-fish.csv", header=TRUE, as.is=TRUE, strip.white=TRUE)
fish$trt <- interaction(fish$SiteClass, fish$Site, fish$Period)
fish$Period <- factor(fish$Period)
fish$SiteClass <- factor(fish$SiteClass)
fish$Site <- factor(fish$Site)
fish$SamplingTimeF <- factor(fish$SamplingTime)
fish$trt <- factor(fish$trt)
cat("Listing of part of the raw data \n")
fish[1:10,]
##***part001e;
sink()
str(fish)
# Preliminary plot
# Get plot of series over time
##***part010b;
prelimplot <- ggplot(data=fish, aes(x=SamplingTime, y=Count,
group=Site, color=SiteClass, shape=Site))+
ggtitle("Fish counts over time")+
geom_point()+
geom_line()+
geom_vline(xintercept=-0.5+min(fish$SamplingTime[as.character(fish$Period) == "After"]))
prelimplot
##***part010e;
ggsave(plot=prelimplot, file="baci-fish-R-010.png",
h=4, w=6, units="in", dpi=300)
# There are several ways in which this BACI design can be analyzed.
###############################################################################
# Do a t-test on the differeces of the averages at each sampling time for each site
# There is only one observtion at each sampling time, so we don't have compute
# the averages here
sink('baci-fish-R-101.txt',split=TRUE)
##***part101b;
# Reshape the file to get the Control and Impact measurements on the same line
fish.month <- reshape(fish, v.names=c("Count"), timevar=c("SiteClass"),
idvar=c("SamplingTime","Period"), drop=c("Site","trt"), direction="wide" )
fish.month$diff <- fish.month$Count.Impact - fish.month$Count.Control
fish.month[1:10,]
##***part101e;
sink()
# Plot the difference over time
##***part102b;
plotdiff <- ggplot(data=fish.month, aes(x=SamplingTime, y=diff))+
ggtitle("Plot of differences over time")+
ylab("Difference (Impact-Control)")+
geom_point()+
geom_line()+
geom_vline(xintercept=-0.5+min(fish$SamplingTime[as.character(fish$Period) == "After"]))
plotdiff
##***part102e;
ggsave(plot=plotdiff, file="baci-fish-R-102.png", h=4, w=6, units="in", dpi=300)
# do the two sample t-test not assuming equal variances
sink('baci-fish-R-104.txt',split=TRUE)
##***part104b;
result <- try(t.test(diff ~ Period, data=fish.month),silent=TRUE)
if(class(result)=="try-error")
{cat("Unable to do unequal variance t-test because of small sample size\n")} else
{
result$diff.in.means <- sum(result$estimate*c(1,-1))
names(result$diff.in.means)<- "diff.in.means"
result$se.diff <- result$statistic / abs(result$diff.in.means)
names(result$se.diff) <- 'SE.diff'
print(result)
print(result$diff.in.means)
print(result$se.diff)
}
##***part104e;
sink()
# do the two sample t-test assuming equal variances
sink('baci-fish-R-105.txt',split=TRUE)
##***part105b;
result <- t.test(diff ~ Period, data=fish.month, var.equal=TRUE)
result$diff.in.means <- sum(result$estimate*c(1,-1))
names(result$diff.in.means)<- "diff.in.means"
result$se.diff <- result$statistic / abs(result$diff.in.means)
names(result$se.diff) <- 'SE.diff'
result
result$diff.in.means
result$se.diff
##***part105e;
sink()
# do the two sample Wilcoxon test
sink('baci-fish-R-107.txt',split=TRUE)
##***part107b;
result <- wilcox.test(diff ~ Period, data=fish.month, conf.int=TRUE)
result
##***part107e;
sink()
###############################################################################
# Do a Mixed effect linear model on the individual values
sink('baci-fish-R-300-type3.txt',split=TRUE)
##***part300b;
# Because there is ONLY one measurement per year, the SamplingTime*Site and
# residual variance are total confounded and cannot be separated. This is
# the residual term.
result.lmer <- lmer(Count ~ SiteClass+Period+SiteClass:Period + (1|SamplingTimeF),
data=fish)
anova(result.lmer, ddf="Kenward-Roger")
##***part300e;
sink()
summary(result.lmer)
sink('baci-fish-R-300-vc.txt',split=TRUE)
##***part300vcb;
# Extract the variance components
vc <- VarCorr(result.lmer)
vc
##***part300vce;
sink()
# LSmeans after a lm() fit
sink('baci-fish-R-s300LSM-SiteClass.txt',split=TRUE)
##***parts300LSM-SiteClassb;
result.lmer.lsmo.S <- lsmeans::lsmeans(result.lmer, ~SiteClass)
cat("\n\n Estimated marginal means for SiteClass \n\n")
summary(result.lmer.lsmo.S)
##***parts300LSM-SiteClasse;
sink()
sink('baci-fish-R-300LSM-Period.txt',split=TRUE)
##***part300LSM-Periodb;
result.lmer.lsmo.P <- lsmeans::lsmeans(result.lmer, ~Period)
cat("\n\n Estimated marginal means \n\n")
summary(result.lmer.lsmo.P)
##***part300LSM-Periode;
sink()
sink('baci-fish-R-300LSM-int.txt',split=TRUE)
##***part300LSM-intb;
result.lmer.lsmo.SP <- lsmeans::lsmeans(result.lmer, ~SiteClass:Period)
cat("\n\n Estimated marginal means \n\n")
summary(result.lmer.lsmo.SP)
##***part300LSM-inte;
sink()
# Estimate the BACI contrast
# You could look at the entry in the summary table from the model fit, but
# this is dangerous as these entries depend on the contrast matrix.
# It is far safer to the contrast function applied to an lsmeans object
temp <- summary(result.lmer)$coefficients # get all the coefficients
temp[grepl("SiteClass",rownames(temp)) & grepl("Period", rownames(temp)),]
sink("baci-fish-R-300baci.txt",split=TRUE)
##***part300bacib;
# Estimate the BACI contrast along with a se
contrast(result.lmer.lsmo.SP, list(baci=c(1,-1,-1,1)))
confint(contrast(result.lmer.lsmo.SP, list(baci=c(1,-1,-1,1))))
##***part300bacie;
sink()
# Check the residuals etc
##***part300diagnosticb;
diagplot <- sf.autoplot.lmer(result.lmer)
diagplot
##***part300diagnostice;
ggsave(plot=diagplot, file='baci-fish-R-300-diagnostic.png', h=4, w=6, units="in", dpi=300)