forked from burakbayramli/books
-
Notifications
You must be signed in to change notification settings - Fork 0
/
time-series.R
149 lines (130 loc) · 5.02 KB
/
time-series.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
# Examples for the lecture on time series
# Real-world data set
library(datasets)
data(lynx)
# Synthetic data set
# Logistic map corrupted with observational noise
# Exercise fo the student: why is this "logistic"?
logistic.map <- function(x,r=4) { r*x*(1-x) }
logistic.iteration <- function(n,x.init,r=4){
x <- vector(length=n)
x[1] <- x.init
for (i in 1:(n-1)) {
x[i+1] <- logistic.map(x[i],r=r)
}
return(x)
}
x <- logistic.iteration(1000,x.init=runif(1))
y <- x+rnorm(1000,mean=0,sd=0.05)
# plot all of the lynx series
plot(lynx)
# Plot the first part of the synthetic time series
plot(y[1:100],xlab="t",ylab=expression(y[t]),type="l")
# autocorrelation functions
acf(lynx)
acf(y)
# Convert a time series into a data frame of lagged values
# Input: a time series, maximum lag to use, whether older values go on the right
# or the left
# Output: a data frame with (order+1) columns, named lag0, lag1, ... , and
# length(ts)-order rows
design.matrix.from.ts <- function(ts,order,right.older=TRUE) {
n <- length(ts)
x <- ts[(order+1):n]
for (lag in 1:order) {
if (right.older) {
x <- cbind(x,ts[(order+1-lag):(n-lag)])
} else {
x <- cbind(ts[(order+1-lag):(n-lag)],x)
}
}
lag.names <- c("lag0",paste("lag",1:order,sep=""))
if (right.older) {
colnames(x) <- lag.names
} else {
colnames(x) <- rev(lag.names)
}
return(as.data.frame(x))
}
# Plot successive values of lynx against each other
plot(lag0 ~ lag1,data=design.matrix.from.ts(lynx,1),xlab=expression(lynx[t]),
ylab=expression(lynx[t+1]),pch=16)
# Plot successive values of y against each other
plot(lag0 ~ lag1,data=design.matrix.from.ts(y,1),xlab=expression(y[t]),
ylab=expression(y[t+1]),pch=16)
# Exercise for student: write one function to do either of those plots
# Fit an additive autoregressive model
# additive model fitting is outsourced to mgcv::gam, with splines
# Inputs: time series (x), order of autoregression (order)
# Output: fitted GAM object
aar <- function(ts,order) {
stopifnot(require(mgcv))
# Automatically generate a suitable data frame from the time series
# and a formula to go along with it
fit <- gam(as.formula(auto.formula(order)),
data=design.matrix.from.ts(ts,order))
return(fit)
}
# Generate formula for an autoregressive GAM of a specified order
# Input: order (integer)
# Output: a formula which looks like
# "lag0 ~ s(lag1) + s(lag2) + ... + s(lagorder)"
auto.formula <- function(order) {
inputs <- paste("s(lag",1:order,")",sep="",collapse="+")
form <- paste("lag0 ~ ",inputs)
return(form)
}
# Plot successive values of y against each other
plot(lag0 ~ lag1,data=design.matrix.from.ts(y,1),xlab=expression(y[t]),
ylab=expression(y[t+1]),pch=16)
# Add the linear regression (which would be the AR(1) model)
abline(lm(lag0~lag1,data=design.matrix.from.ts(y,1)),col="red")
# Fit an AR(8) and add its fitted values
yar8 <- arma(y,order=c(8,0))
points(y,fitted(yar8),col="red")
# Fit a first-order nonparametric autoregression, add fitted values
yaar1 <- aar(y,order=1)
points(y[-length(y)],fitted(yaar1),col="blue")
# simple block bootstrap
# inputs: time series (ts), block length, length of output
# presumes: ts is a univariate time series
# output: one resampled time series
rblockboot <- function(ts,block.length,len.out=length(ts)) {
# chop up ts into blocks
the.blocks <- as.matrix(design.matrix.from.ts(ts,block.length-1,
right.older=FALSE))
# look carefully at design.matrix.from.ts to see why we need the -1
# How many blocks is that?
blocks.in.ts <- nrow(the.blocks)
# Sanity-check
stopifnot(blocks.in.ts == length(ts) - block.length+1)
# How many blocks will we need (round up)?
blocks.needed <- ceiling(len.out/block.length)
# Sample blocks with replacement
picked.blocks <- sample(1:blocks.in.ts,size=blocks.needed,replace=TRUE)
# put the blocks in the randomly-selected order
x <- the.blocks[picked.blocks,]
# convert from a matrix to a vector and return
# need transpose because R goes from vectors to matrices and back column by
# column, not row by row
x.vec <- as.vector(t(x))
# Discard uneeded extra observations at the end silently
return(x[1:len.out])
}
###############################################################################
# Exercises for the reader:
# 1. Modify this to do the circular block bootstrap (hint: extend ts)
# 2. Modify this to do block bootstrapping of multiple time series, using the
# # SAME blocks for all series (to preserve dependencies across series)
##############################################################################
# GDP per capita time series
# Taken from the St. Louis Federal Reserve Bank's FRED data service
# GDP in billions of constant (2005) dollars
# http://research.stlouisfed.org/fred2/series/GDPCA?cid=106
# over population in thousands
# http://research.stlouisfed.org/fred2/series/POP?cid=104
# so the defualt units are millions of dollars per capita, which is needlessly
# hard to interpret
gdppc <- read.csv("gdp-pc.csv")
gdppc$y <- gdppc$y*1e6
plot(gdppc,log="y",type="l")