########################################################################################################### ########################################################################################################### ### ### ### Note: Requires all functions associated with the EM algoritm to be loaded ### ### ### ########################################################################################################### ########################################################################################################### ########################################################################################################### ### ### ### SECTION 1: PLOTTING FUNCTIONS ### ### ### ########################################################################################################### ############################### ### ### PLOT THE DATA MASKS ### ############################### blockPlot = function(X) { ### Get dimension of X n = nrow(X) p = ncol(X) ### Initialize coordinates x.coord.l = x.coord.r = y.coord.t = y.coord.b = cols = vector() ### Build coordinate map for (i in 1:p) { x.coord.l = c(x.coord.l, rep((i - 1) / p, n)) x.coord.r = c(x.coord.r, rep(i / p, n)) y.coord.t = c(y.coord.t, (seq(1:n) - 1) / n) y.coord.b = c(y.coord.b, seq(1:n) / n) cols = c(cols, as.numeric(X[, i])) } ### Initialize plotting screen dummy.y = 0.5 dummy.x = 0.5 plot(dummy.y ~ dummy.x, ylim = c(0, 1), xlim = c(0, 1), col = 0) ### Plot it! rect(x.coord.l, y.coord.b, x.coord.r, y.coord.t, col = cols, border = NA) } ########################################################################################################### ### ### ### SECTION 2: STAT FUNCTIONS ### ### ### ########################################################################################################### ############################### ### ### MAIN STATS FUNCTION ### ############################### getStats = function(orig, est, cal.idx = NA, DoFL = 12) { ### Only use overlapping points na.idx = is.na(orig) | is.na(est) orig[na.idx] = NA est[na.idx] = NA ### Matrix or vectors? if (is.null(dim(orig))) {orig = matrix(orig, ncol = 1)} if (is.null(dim(est))) {est = matrix(est, ncol = 1)} if (!identical(cal.idx, NA) & is.null(dim(cal.idx))) {cal.idx = matrix(cal.idx, ncol = 1)} if (!identical(dim(orig), dim(est)) | (!identical(dim(orig), dim(cal.idx)) & !identical(cal.idx, NA))) {stop("Nonconformable arguments.")} if (is.null(colnames(orig))) {colnames(orig) = c(1:ncol(orig))} ### Dummy for cal stats (in case no calibration period) cal.stats = NA ### # of columns nc = ncol(orig) ### Calibration period? if (!identical(cal.idx, NA)) { ### Ensure only overlapping points used cal.idx[na.idx] = FALSE ### Center baselines on calibration period orig = .em.center(orig, DoFL = DoFL, idx = cal.idx)$X est = .em.center(est, DoFL = DoFL, idx = cal.idx)$X ### Store calibration data orig.cal = orig est.cal = est orig.cal[!cal.idx] = NA est.cal[!cal.idx] = NA ### Set up storage cal.stats = matrix(nrow = 3, ncol = ncol(orig) + 2) colnames(cal.stats) = c(colnames(orig), "Station Mean", "Pointwise Mean") rownames(cal.stats) = c("RMS Error", "Explained Variance", "Cor. Coef.") attr(cal.stats, "Type: ") = "Calibration Period" ### Calculate rms error resids = (orig.cal - est.cal) ^ 2 n.eff = colSums(!is.na(resids)) - 1 rms = sqrt(colSums(resids, na.rm = TRUE) / n.eff) stn.rms = mean(rms, na.rm = TRUE) point.rms = sqrt(sum(resids, na.rm = TRUE) / (sum(!is.na(resids)) - 1)) cal.stats[1, ] = c(rms, stn.rms, point.rms) ### Calculate average explained variance den = orig.cal ^ 2 R2c = 1 - colSums(resids, na.rm = TRUE) / colSums(den, na.rm = TRUE) stn.R2c = mean(R2c, na.rm = TRUE) point.R2c = 1 - sum(resids, na.rm = TRUE) / sum(den, na.rm = TRUE) cal.stats[2, ] = c(R2c, stn.R2c, point.R2c) ### Calculate correlation coefficient r = .em.covFn(orig.cal, est.cal, cor = TRUE) stn.r = mean(r, na.rm = TRUE) o = matrix(as.vector(orig.cal), ncol = 1) e = matrix(as.vector(est.cal), ncol = 1) point.r = .em.covFn(o, e, cor = TRUE) cal.stats[3, ] = c(r, stn.r, point.r) ### Set up for verification period orig[cal.idx] = NA est[cal.idx] = NA } ### Verification period if (identical(cal.idx, NA)) { ### Center orig = .em.center(orig, DoFL = DoFL)$X est = .em.center(est, DoFL = DoFL)$X } ### Set up storage ver.stats = matrix(nrow = 4, ncol = ncol(orig) + 2) colnames(ver.stats) = c(colnames(orig), "Station Mean", "Pointwise Mean") rownames(ver.stats) = c("RMS Error", "RE", "CE", "Cor. Coef.") attr(ver.stats, "Type: ") = "Verification Period" ### Calculate rms error resids = matrix((orig - est) ^ 2, ncol = nc) n.eff = colSums(!is.na(resids)) - 1 n.eff[n.eff < 1] = NA rms = sqrt(colSums(resids, na.rm = TRUE) / n.eff) stn.rms = mean(rms, na.rm = TRUE) point.rms = sqrt(sum(resids, na.rm = TRUE) / (sum(!is.na(resids)) - 1)) ver.stats[1, ] = c(rms, stn.rms, point.rms) ### Calculate RE if (!identical(cal.idx, NA)) { den = orig ^ 2 RE = 1 - colSums(resids, na.rm = TRUE) / colSums(den, na.rm = TRUE) stn.RE = mean(RE, na.rm = TRUE) point.RE = 1 - sum(resids, na.rm = TRUE) / sum(den, na.rm = TRUE) ver.stats[2, ] = c(RE, stn.RE, point.RE) } else { ver.stats[2, ] = NA } ### Calculate CE den = .em.mScale(orig, -apply(orig, 2, mean, na.rm = TRUE), type = 1, add = TRUE, scale = FALSE, center = FALSE) ^ 2 den = colSums(den, na.rm = TRUE) den[den == 0] = NA CE = 1 - colSums(resids, na.rm = TRUE) / den stn.CE = mean(CE, na.rm = TRUE) point.CE = 1 - sum(resids, na.rm = TRUE) / sum(den, na.rm = TRUE) ver.stats[3, ] = c(CE, stn.CE, point.CE) ### Calculate correlation coefficient r = .em.covFn(orig, est, cor = TRUE) stn.r = mean(r, na.rm = TRUE) o = matrix(as.vector(orig), ncol = 1) e = matrix(as.vector(est), ncol = 1) point.r = .em.covFn(o, e, cor = TRUE) ver.stats[4, ] = c(r, stn.r, point.r) ### Collate summary statistics if (!identical(cal.idx, NA)) { sum.stats.1 = "Summary Statistics\n------------------\n" sum.stats.2 = paste("Calibration RMS Error: ", round(cal.stats[1, ncol(cal.stats)], 4), "\n") sum.stats.3 = paste("Calibration Expl. Var: ", round(cal.stats[2, ncol(cal.stats)], 4), "\n") sum.stats.4 = paste("Calibration Cor. Coef: ", round(cal.stats[3, ncol(cal.stats)], 4), "\n") } else { sum.stats.1 = "Summary Statistics\n------------------\n" sum.stats.2 = "Calibration RMS Error: NA\n" sum.stats.3 = "Calibration Expl. Var: NA\n" sum.stats.4 = "Calibration Cor. Coef: NA\n" } sum.stats.5 = "-------------------------------\n" sum.stats.6 = paste("Verification RMS Error: ", round(ver.stats[1, ncol(ver.stats)], 4), "\n") sum.stats.7 = paste("Verification RE: ", round(ver.stats[2, ncol(ver.stats)], 4), "\n") sum.stats.8 = paste("Verification CE: ", round(ver.stats[3, ncol(ver.stats)], 4), "\n") sum.stats.9 = paste("Verification Cor. Coef: ", round(ver.stats[4, ncol(ver.stats)], 4), "\n") sum.stats = paste(sum.stats.1, sum.stats.2, sum.stats.3, sum.stats.4, sum.stats.5, sum.stats.6, sum.stats.7, sum.stats.8, sum.stats.9, sep = "") message(sum.stats) ### Return all.stats = list(cal = cal.stats, ver = ver.stats, sum = sum.stats) all.stats } ############################### ### ### ANNUAL DATA FROM MONTHLY ### ############################### makeAnnual = function (X, cutoff = 10) { ### Only make annual if correct number of months supplied len = nrow(X) if (len / 12 == trunc(len / 12)) { ### Set up new storage Y = matrix(ncol = ncol(X), nrow = len / 12) ### Get each year's mean; replace with NAs if months < cutoff for (i in 1:(len / 12)) { ### Index of years idx = seq(1:12) + (12 * (i - 1)) ### Greater than cutoff months supplied? complete = colSums(!is.na(X[idx, ])) < cutoff ### Get the means Y[i, ] = colMeans(X[idx, ], na.rm = TRUE) ### Less than cutoff months get NAs Y[i, complete] = NA } } else { ### Oops! Whole years not supplied stop("Input matrix not in whole years. No calculation performed.") } ### Return Y } ########################################################################################################### ### ### ### SECTION 3: SYNTHETIC DATA ### ### ### ########################################################################################################### ############################### ### ### LOAD SOURCE DATA ### ############################### loadData = function() { ### Set up storage raw.data = list() ### Load AVHRR data grid = scan("cloudmaskedavhrr.txt", n = -1) avhrr = ts(t(array(grid, dim = c(5509, 300))), start = 1982, freq = 12) rm(grid) ### Convert to anomalies avhrr = .em.center(avhrr, DoFL = 12)[[1]] ### Load North American GHCN data load("north.america.RData") ### Return raw.data[[1]] = n.america[301:900, ] attr(raw.data[[1]], "source") = "n.america" raw.data[[1]] = .em.center(raw.data[[1]], DoFL = 12)[[1]] raw.data[[2]] = avhrr attr(raw.data[[2]], "source") = "avhrr" raw.data } ############################### ### ### GENERATE SYNTHETIC SETS ### ############################### genSynth = function(dat, len, k.seeds = NA) { ### Get data characteristics n = nrow(dat) p = ncol(dat) reps = ceiling(len / n) ### Setup storage synth = matrix(nrow = n * reps, ncol = p) ### Perform FFT fft.dat = mvfft(dat) ### Setup storage if (identical(k.seeds, NA)) {seeds = matrix(nrow = n, ncol = reps)} else {seeds = matrix(k.seeds, ncol = reps)} ### Randomize Fourier components for (j in 1:reps) { ### Get starting index idx = (j - 1) * n + 1 ### Existing seeds? if (identical(k.seeds, NA)) { ### Generate random sequence k = runif(ceiling(n / 2), 0, 2 * pi) ### Reverse order and combine if (2 * length(k) > n) {k = c(-rev(k), k[-1])} else {k = c(-rev(k), k)} ### Make exponential k = exp(-complex(imaginary = k)) ### Save seeds seeds[, j] = k } else {k = seeds[, j]} ### Apply to Fourier components k.matrix = matrix(rep(k, p), ncol = p) fft.dat = fft.dat * k.matrix ### Invert and save synth[idx:(idx + n - 1), ] = Re(mvfft(fft.dat, TRUE) / n) } ### Truncate to desired length synth = synth[1:len, ] ### Return synth = list(dat = synth, seeds = as.vector(seeds)) synth } ############################### ### ### MASK RANDOMIZATION ### ############################### maskRand = function(all.mask, early.mask, late.mask) { ### Get mask characteristics p = ncol(all.mask) ### Setup temporary storage new.all = new.early = new.late = matrix(nrow = nrow(all.mask), ncol = ncol(all.mask)) ### Randomly reorder sel = order(rnorm(p)) new.all = all.mask[, sel] new.early = early.mask[, sel] new.late = late.mask[, sel] ### Reverse time sequence? reverse = as.logical(round(runif(1, 0, 1))) if (reverse == TRUE) { new.all = matrix(rev(new.all), ncol = p) new.early = matrix(rev(new.early), ncol = p) new.late = matrix(rev(new.late), ncol = p) } sel = c(sel, reverse) ### Return mask new.mask = list(all = new.all, early = new.early, late = new.late, order = sel) new.mask } ########################################################################################################### ### ### ### SECTION 4: TEST SCRIPT ### ### ### ########################################################################################################### ############################### ### ### REGEM TEST ### ############################### regemTest = function (source = 2, rand = TRUE, tol = 0.0005, maxiter = 500, replicates = 100, plot.series = 12, inflate = 1, max.eof = 8, series = 63, source.data = raw.data, duplicate = FALSE, seeds = NA, save.seeds = FALSE, masks = ant.masks) { ################### ### ### Initial setup ### ################### ### Data storage Y = list() ### If duplicating a previous run, get info if (duplicate == TRUE) { ### Number of sets, variables, and time steps source = attr(seeds, "source") series = attr(seeds, "series") times = attr(seeds, "times") replicates = attr(seeds, "replicates") rand = attr(seeds, "rand") m.set = attr(seeds, "mask set") } else { ### Get proper source data source.data = source.data[[source]] m.set = 0 } ### Check if plot.series value reasonable plot.series = min(plot.series, series) ### Check for compatible duplicate/save arguments if (duplicate == TRUE & save.seeds == TRUE) { ### Print warning warning("Duplicating a previously saved set. Turning save flag off.") save.seeds = FALSE } ### If AVHRR data, use O10 reconstruction grid cells (with nearby cells used in event of duplicates to prevent exact collinearity) locs = masks$grids ### If North American data, set locs to NA (forces later randomization) if (source == 1) {locs = NA} ### Setup seeds list seed.storage = list() seed.storage$seeds = list() seed.storage$grids = list() seed.storage$order = list() ### Establish list main = list() ### Establish dimension names ttls.cats = c("RMS Error", "RE", "CE", "Corr. Coef", "Trend Z", "Ideal CV EOFs", "Mann 07 EOFs") ridge.cats = c("RMS Error", "RE", "CE", "Corr. Coef", "Trend Z") ttls.series.names = c(paste("Series #", c(1:series), sep = ""), "Station Mean", "Pointwise Mean", "Error vs. Ridge") ridge.series.names = c(paste("Series #", c(1:series), sep = ""), "Station Mean", "Pointwise Mean") replicate.names = paste("Replicate #", c(1:replicates), sep = "") ttls.eofs = paste(c(1:max.eof), "EOFs") ### Establish storage main$all = array(NA, dim = c(series + 3, 7, max.eof, replicates), dimnames = list(ttls.series.names, ttls.cats, ttls.eofs, replicate.names)) main$cv = array(NA, dim = c(series + 3, 7, max.eof, replicates), dimnames = list(ttls.series.names, ttls.cats, ttls.eofs, replicate.names)) main$ridge = array(NA, dim = c(series + 2, 5, replicates), dimnames = list(ridge.series.names, ridge.cats, replicate.names)) ### Establish text descrip.txt = vector() descrip.txt[1] = "TTLS full set" descrip.txt[2] = "TTLS early/late worst performance" descrip.txt[3] = "ridge regression" ### Set attributes for (i in 1:3) { attr(main[[i]], "description") = descrip.txt[i] } ################### ### ### Experiments ### ################### ### Start loop for (j in 1:replicates) { ### Perform if not re-using previous data if (duplicate == FALSE) { ### Randomize mask? if (rand == TRUE) { use.masks = maskRand(ant.masks[[1 + m.set]], ant.masks[[2 + m.set]], ant.masks[[3 + m.set]]) use.all = use.masks$all use.early = use.masks$early use.late = use.masks$late use.sel = use.masks$sel } else { use.all = ant.masks[[1 + m.set]] use.early = ant.masks[[2 + m.set]] use.late = ant.masks[[3 + m.set]] use.sel = c(1:series, 0) } ### Randomly select locations? if (identical(locs, NA)) { ### Randomly select sel = runif(1000, 0, 1) sel = unique(trunc(109 * sel)) sel[sel == 0] = NA sel = na.omit(sel) sel = sort(sel[1:63]) } ### Not randomly selecting locations if (!identical(locs, NA)) { ### Use existing sel = locs } ### Get data Y$all = source.data[, sel] ### Generate random realization Y.rand = genSynth(Y$all, nrow(use.all), NA) Y$all = Y.rand$dat seeds = Y.rand$seeds ### Saving seeds? if (save.seeds == TRUE) { ### Store the seeds seed.storage$seeds[[j]] = seeds ### Store the locations seed.storage$grids[[j]] = sel ### Store the mask order seed.storage$order[[j]] = use.sel ### Set attributes attr(seed.storage, "source.description") = attr(source.data, "source") attr(seed.storage, "source") = source attr(seed.storage, "series") = series attr(seed.storage, "times") = times attr(seed.storage, "replicates") = replicates attr(seed.storage, "rand") = rand attr(seed.storage, "mask set") = m.set } } ### If re-using previous data, get the data if (duplicate == TRUE) { ### Extract the seeds k = seeds$seeds[[j]] ### Extract the locations locs = seeds$grids[[j]] ### Extract the mask order sel = seeds$order[[j]] reverse = as.logical(sel[length(sel)]) sel = sel[-length(sel)] ### Set up masks use.all = masks[[1 + m.set]][, sel] use.early = masks[[2 + m.set]][, sel] use.late = masks[[3 + m.set]][, sel] ### Reverse order? if (reverse == TRUE) { use.all = matrix(rev(use.all), ncol = ncol(use.all)) use.early = matrix(rev(use.early), ncol = ncol(use.early)) use.late = matrix(rev(use.late), ncol = ncol(use.late)) } ### Get data Y$all = source.data[, sel] ### Use seeds and locations to generate set Y.rand = genSynth(Y$all, nrow(use.all), k) Y$all = Y.rand$dat } ### Get censored data for early/late cross-validation Y$censored = Y$all Y$censored[!use.all] = NA ### Get early withholding set Y$early = Y$all Y$early[!use.early] = NA ### Get late withholding set Y$late = Y$all Y$late[!use.late] = NA ### Calculate Mann '07 EOFs ### Check for sufficient # of points, center, & estimate variance n.orig = .em.dofChk(!use.all, 12, series) center = .em.center(Y$censored, 12, use.all, FALSE) scales = sqrt(colSums(Y$censored ^ 2, na.rm = TRUE) / (n.orig - 12)) ### Update Y$censored = center[[1]] center = center[[2]] ### Construct covariance matrix prep.C = .em.prepC(Y$censored, NA, scales, !use.all, TRUE, nrow(use.all), series, 12) C = prep.C[[2]] ### Scale to correlation D = 1 / sqrt(diag(C)) C = .em.mScale(D * C, D, type = 1, add = FALSE, center = FALSE, max.size = 500) ### Get eigenvalues eig.C = eigen(C, only.values = TRUE, symmetric = TRUE)$values ### Construct uncertainty bands (North et al. 1982) err = sqrt(2 / (nrow(use.all) - 12)) * eig.C err.1 = err[-length(err)] err.2 = err[-1] sum.err = err.1 + err.2 ### Determine whether bands overlap eig.1 = eig.C[-length(eig.C)] eig.2 = eig.C[-1] diff = eig.1 - eig.2 mann.eofs = c(1:length(err.1))[diff < sum.err][1] - 1 ### Store main$all[, 7, 1:max.eof, j] = mann.eofs main$cv[, 7, 1:max.eof, j] = mann.eofs ### Print update message(paste("=======================================\nStarting set #", j, " . . .\n=======================================\n", sep = "")) ### Print update message("=======================================\nRIDGE REGRESSION . . .\n=======================================\n") ### Infill infill.ridge = emFn(Y$censored, Y$all, type = "mridge", maxiter = maxiter, tol = tol, DoFL = 12, gap = 50, plots = TRUE, plot.series = plot.series, inflate = inflate, ccov = FALSE) ### Extract ridge.estimates = infill.ridge[[1]]$X ### Get stats main.stats = getStats(Y$all, ridge.estimates, cal.idx = use.all)$ver ### Save stats main$ridge[, c(1:4), j] = t(main.stats) ### Get trend Z-scores t.orig = t.orig.SE = t.infill = t.infill.SE = vector() for (s in 1:series) { ### Get trends trend.orig = summary(lm(Y$all[, s] ~ I(c(1:nrow(use.all)) / 120)))$coef trend.infill = summary(lm(ridge.estimates[, s] ~ I(c(1:nrow(use.all)) / 120)))$coef ### Store trends & SEs t.orig[s] = trend.orig[2, 1] t.orig.SE[s] = trend.orig[2, 2] t.infill[s] = trend.infill[2, 1] ### Convert to Z-scores trend.Z = abs(t.orig - t.infill) / t.orig.SE } ### Get data set average trend Z-score ave.orig = summary(lm(rowMeans(Y$all) ~ I(c(1:nrow(use.all)) / 120)))$coef ave.infill = summary(lm(rowMeans(ridge.estimates) ~ I(c(1:nrow(use.all)) / 120)))$coef ### Calculate Z-score trend.Z = c(trend.Z, abs(ave.orig[2, 1] - ave.infill[2, 1]) / ave.orig[2, 2], abs(ave.orig[2, 1] - ave.infill[2, 1]) / ave.orig[2, 2]) ### Store main$ridge[, 5, j] = trend.Z message("=======================================\nTTLS, FULL SET . . .\n=======================================\n") ### Loop through each EOF for (eofs in 1:max.eof) { ### EOF message message(paste("=======================================\n", eofs, " Retained EOFs . . .\n=======================================\n", sep = "")) ### Infill infill.ttls = emFn(Y$censored, Y$all, type = "ttls", maxiter = maxiter, tol = tol, DoFL = 12, gap = 50, plots = TRUE, plot.series = plot.series, inflate = inflate, n.eof = eofs, ccov = FALSE) ### Extract main.estimates = infill.ttls[[eofs]]$X ### Get stats main.stats = getStats(Y$all, main.estimates, cal.idx = use.all)$ver ### Save stats main$all[c(1:(series + 2)), c(1:4), eofs, j] = t(main.stats) ### Get trend Z-scores t.orig = t.orig.SE = t.infill = t.infill.SE = vector() for (s in 1:series) { ### Get trends trend.orig = summary(lm(Y$all[, s] ~ I(c(1:nrow(use.all)) / 120)))$coef trend.infill = summary(lm(main.estimates[, s] ~ I(c(1:nrow(use.all)) / 120)))$coef ### Store trends & SEs t.orig[s] = trend.orig[2, 1] t.orig.SE[s] = trend.orig[2, 2] t.infill[s] = trend.infill[2, 1] ### Convert to Z-scores trend.Z = abs(t.orig - t.infill) / t.orig.SE } ### Get data set average trend Z-score ave.orig = summary(lm(rowMeans(Y$all) ~ I(c(1:nrow(use.all)) / 120)))$coef ave.infill = summary(lm(rowMeans(main.estimates) ~ I(c(1:nrow(use.all)) / 120)))$coef ### Calculate Z-score trend.Z = c(trend.Z, abs(ave.orig[2, 1] - ave.infill[2, 1]) / ave.orig[2, 2], abs(ave.orig[2, 1] - ave.infill[2, 1]) / ave.orig[2, 2]) ### Store main$all[c(1:(series + 2)), 5, eofs, j] = trend.Z ### Get error vs. ridge estimates ridge.err = suppressMessages(getStats(ridge.estimates, main.estimates, cal.idx = use.all)$ver) len = ncol(ridge.err) ridge.err = ridge.err[, len] main$all[series + 3, c(1:4), eofs, j] = ridge.err } ### Find ideal EOFs for (i in 1:(series + 3)) { ### Get RMSE RMSE = main$all[i, 1, , j] ### Find minimum ideal = c(1:max.eof)[RMSE == min(RMSE)[1]] ### Store main$all[i, 6, , j] = ideal } ### Loop through each EOF for (eofs in 1:max.eof) { ### Print update message("=======================================\nTTLS, EARLY WITHHOLDING . . .\n=======================================\n") ### EOF message message(paste("=======================================\n", eofs, " Retained EOFs . . .\n=======================================\n", sep = "")) ### Infill infill.early = emFn(Y$early, Y$all, type = "ttls", maxiter = maxiter, tol = tol, DoFL = 12, gap = 50, plots = TRUE, plot.series = plot.series, inflate = inflate, n.eof = eofs, ccov = FALSE) ### Extract early.estimates = infill.early[[eofs]]$X ### Get stats early.stats = suppressMessages(getStats(Y$censored, early.estimates, cal.idx = use.early)$ver) ### Print update message("=======================================\nTTLS, LATE WITHHOLDING . . .\n=======================================\n") ### EOF message message(paste("=======================================\n", eofs, " Retained EOFs . . .\n=======================================\n", sep = "")) ### Infill infill.late = emFn(Y$late, Y$all, type = "ttls", maxiter = maxiter, tol = tol, DoFL = 12, gap = 50, plots = TRUE, plot.series = plot.series, inflate = inflate, n.eof = eofs, ccov = FALSE) ### Extract late.estimates = infill.late[[eofs]]$X ### Get stats late.stats = suppressMessages(getStats(Y$censored, late.estimates, cal.idx = use.late)$ver) ### Find worst performance by RMS error len = ncol(early.stats) - 2 max.err = rep(1, len) test.early = early.stats[1, 1:len] test.late = late.stats[1, 1:len] max.err[test.late > test.early] = 2 ### Save worst estimates cv.estimates = early.estimates cv.mask = use.early cv.estimates[, max.err == 2] = late.estimates[, max.err == 2] cv.mask[, max.err == 2] = use.late[, max.err == 2] ### Get stats for worst estimates cv.stats = getStats(Y$censored, cv.estimates, cal.idx = cv.mask)$ver ### Save stats main$cv[c(1:(series + 2)), c(1:4), eofs, j] = t(cv.stats) ### Get trend Z-scores t.orig = t.orig.SE = t.infill = t.infill.SE = vector() for (s in 1:series) { ### Get trends trend.orig = summary(lm(Y$all[, s] ~ I(c(1:nrow(use.all)) / 120)))$coef trend.infill = summary(lm(cv.estimates[, s] ~ I(c(1:nrow(use.all)) / 120)))$coef ### Store trends & SEs t.orig[s] = trend.orig[2, 1] t.orig.SE[s] = trend.orig[2, 2] t.infill[s] = trend.infill[2, 1] ### Convert to Z-scores trend.Z = abs(t.orig - t.infill) / t.orig.SE } ### Get data set average trend Z-score ave.orig = summary(lm(rowMeans(Y$all) ~ I(c(1:nrow(use.all)) / 120)))$coef ave.infill = summary(lm(rowMeans(cv.estimates) ~ I(c(1:nrow(use.all)) / 120)))$coef ### Calculate Z-score trend.Z = c(trend.Z, abs(ave.orig[2, 1] - ave.infill[2, 1]) / ave.orig[2, 2], NA) ### Store main$cv[c(1:(series + 2)), 5, eofs, j] = trend.Z ### Get error vs. ridge estimates ridge.err = suppressMessages(getStats(ridge.estimates, cv.estimates, cal.idx = cv.mask)$ver) len = ncol(ridge.err) ridge.err = ridge.err[, len] main$cv[series + 3, c(1:4), eofs, j] = ridge.err } ### Find ideal EOFs for (i in 1:(series + 3)) { ### Get RMSE RMSE = main$cv[i, 1, , j] ### Find minimum ideal = c(1:max.eof)[RMSE == min(RMSE)[1]] ### Store main$cv[i, 6, , j] = ideal } } ### Collate & return main$replicates = replicates main$series = series main$source = attr(source.data, "source") main$tol = tol main$seeds = seed.storage main } ########################################################################################################### ### ### ### SECTION 5: MISCELLANEOUS ### ### ### ########################################################################################################### ############################### ### ### LOAD & SETUP ### ############################### load("regem.pared.r") raw.data = loadData() ############################### ### ### GENERATE R OBJECT FOR TEST ### SCRIPTS & DATA ### ############################### save(ant.masks, blockPlot, getStats, makeAnnual, loadData, genSynth, maskRand, regemTest, file = "ttls.ridge.test.R") ############################### ### ### GENERATE R OBJECT FOR ### COMPLETE REGEM ALGORITHM ### ############################### save(.em.ssq, .em.mScale, .em.covFn, .em.cCor, .em.center, .em.dofChk, .em.prepC, .em.unique, .em.subProblems, .em.ridgeLimit, .em.holdout, .em.plotSeries, .em.plotVar, .em.plotVarTsvd, .em.plotSV, .em.plotParams, .em.plotEstErr, .em.plotActErr, .em.gcvFn, .em.ttlsRegFn, .em.ridgeRegFn, .em.tsvdRegFn, emFn, file = "RegEM.pared.R")