# caret review models = read.table("http://cross-entropy.net/ML210/Caret_Models.tsv", header = T, sep = "\t", quote = '"', colClasses = rep("character", 5)) sort(table(models$Type), decreasing = T) sort(models$Method.Name[grep("Regression", models$Type)]) parameters = NULL for (value in models$Tuning.Parameters) { for (element in strsplit(value, ', ')) { parameters = c(parameters, element) } } sort(table(parameters)) sort(models$Method.Name[grep("lambda", models$Tuning.Parameters)]) # glm, glmnet, knn, lda, lm, pcr, pls, qda models$Tuning.Parameters[models$Method.Name == "glmnet"] # basis splines library(ISLR) age.limits = range(Wage$age) age.grid = seq(from = age.limits[1], to = age.limits[2]) # 18 .. 80 library(splines) x = Wage$age y = Wage$wage model1 = lm(y ~ bs(x, knots = c(25, 40, 60))) x = age.grid predictions1 = predict(model1, data.frame(x)) summary(predictions1) x = Wage$age y = Wage$wage X = cbind(x, x^2, x^3, ifelse(x > 25, (x - 25)^3, 0), ifelse(x > 40, (x - 40)^3, 0), ifelse(x > 60, (x - 60)^3, 0)) model2 = lm(y ~ X) x = age.grid X = cbind(x, x^2, x^3, ifelse(x > 25, (x - 25)^3, 0), ifelse(x > 40, (x - 40)^3, 0), ifelse(x > 60, (x - 60)^3, 0)) predictions2 = predict(model2, data.frame(X)) summary(predictions2) # natural splines x = Wage$age y = Wage$wage model3 = lm(y ~ ns(x, knots = c(40), Boundary.knots = c(25, 60))) x = age.grid predictions3 = predict(model3, data.frame(x)) summary(predictions3) x = Wage$age y = Wage$wage X = cbind(x, (ifelse(x > 25, (x - 25)^3, 0) - ifelse(x > 60, (x - 60)^3, 0)) / (60 - 25) - (ifelse(x > 40, (x - 40)^3, 0) - ifelse(x > 60, (x - 60)^3, 0)) / (60 - 40)) model4 = lm(y ~ X) x = age.grid X = cbind(x, (ifelse(x > 25, (x - 25)^3, 0) - ifelse(x > 60, (x - 60)^3, 0)) / (60 - 25) - (ifelse(x > 40, (x - 40)^3, 0) - ifelse(x > 60, (x - 60)^3, 0)) / (60 - 40)) predictions4 = predict(model4, data.frame(X)) summary(predictions4) # smoothing splines Smoother.Matrix = function(x, df) { n = length(x) S = matrix(0, n, n) for(i in 1:n) { y = rep(0, n) y[i] = 1 S[,i] = predict(smooth.spline(x, y, df = df), x)$y } return((S + t(S)) / 2) } S = Smoother.Matrix(Wage$age, df = 6.8) model = smooth.spline(Wage$age, Wage$wage, df = 6.8) model$df sum(diag(S)) estimates = S %*% Wage$wage predictions = predict(model, Wage$age)$y estimates[1:5] predictions[1:5] # prediction using a smooth spline model model = smooth.spline(Wage$age, Wage$wage) predictions5 = predict(model, age.grid)$y summary(predictions5) # simple implementation for prediction using a smooth spline model predict.value = function(knot, coef, x) { done = 0 i = 1 while (done == 0) { if ((knot[i] <= x) && ((knot[i + 1] > x) || (knot[i + 1] == 1))) { done = 1 } else { i = i + 1 } } dm = c(x - knot[i], x - knot[i - 1], x - knot[i - 2]) dp = c(knot[i + 1] - x, knot[i + 2] - x, knot[i + 3] - x) aj = c(coef[i - 3], coef[i - 2], coef[i - 1], coef[i]) for (j in 1:3) { kmj = 4 - j ilo = kmj for (jj in 1:kmj) { aj[jj] = (aj[jj + 1] * dm[ilo] + aj[jj] * dp[jj]) / (dm[ilo] + dp[jj]) ilo = ilo - 1 } } return(aj[1]) } predictions6 = array(0, length(age.grid)) for (i in 1:(length(age.grid))) { predictions6[i] = predict.value(model$fit$knot, model$fit$coef, (age.grid[i] - model$fit$min) / model$fit$range) } summary(predictions6)