# TODO: Add comment # # Author: mike-bowles PatriciaHoffman ############################################################################### library(e1071) library(rpart) data(iris) attach(iris) ## classification mode # default with factor response: summary(iris) model <- svm(Species ~ ., data = iris) print(model) summary(model) x <- subset(iris, select = -Species) y <- Species # test with train data pred <- predict(model, x) # Check accuracy: table(pred, y) # visualize (classes by color, SV by crosses): plot(cmdscale(dist(iris[,-5])), col = as.integer(iris[,5]), pch = c("o","+")[1:150 %in% model$index + 1]) plot(model, iris, Petal.Width ~ Petal.Length, slice = list(Sepal.Width = 3, Sepal.Length = 4)) ## plot with custom symbols and colors plot(model, iris, Petal.Width ~ Petal.Length, slice = list(Sepal.Width = 2, Sepal.Length = 3)) # svSymbol = "v", dataSymbol = "o", symbolPalette = rainbow(4), # color.palette = terrain.colors) data(iris) ## tune `svm' for classification with RBF-kernel (default in svm), ## using one split for training/validation set obj <- tune(svm, Species~., data = iris, ranges = list(gamma = 2^(-1:1), cost = 2^(2:4)), tunecontrol = tune.control(sampling = "cross") ) summary(obj) plot(obj) #try it again with best gamma and cost ## classification mode # default with factor response: model <- svm(Species ~ ., data = iris, gamma = 0.5, cost = 4) print(model) summary(model) x <- subset(iris, select = -Species) y <- Species # test with train data pred <- predict(model, x) # Check accuracy: table(pred, y) data(Glass, package = "mlbench") # # http://archive.ics.uci.edu/ml/datasets/Glass%20Identification # index <- 1:nrow(Glass) testindex <- sample(index, trunc(length(index)/3)) testset <- Glass[testindex, ] trainset <- Glass[-testindex, ] svm.model <- svm(Type ~ ., data = trainset, cost = 100, gamma = 1) svm.pred <- predict(svm.model, testset[, -10]) table(pred = svm.pred, true = testset[, 10]) length(testset[,10]) 1-sum(svm.pred == testset[,10])/length(testset[,10]) rpart.model <- rpart(Type ~ ., data = trainset) rpart.pred <- predict(rpart.model, testset[, -10], type = "class") table(pred = rpart.pred, true = testset[, 10]) 1-sum(rpart.pred == testset[,10])/length(testset[,10]) #Let's see if we can improve with parameter selection. obj <- tune(svm, Type~., data = trainset, ranges = list(gamma = 2^(-3:0), cost = 2^(5:8)), tunecontrol = tune.control(sampling = "cross")) summary(obj) plot(obj) #rerun with best values of regularization parameters. svm.model <- svm(Type ~ ., data = trainset, cost = 64, gamma = .25) svm.pred <- predict(svm.model, testset[, -10]) table(pred = svm.pred, true = testset[, 10]) length(testset[,10]) 1-sum(svm.pred == testset[,10])/length(testset[,10]) ## try regression mode on two dimensions # create data x <- seq(0.1, 5, by = 0.05) y <- log(x) + rnorm(x, sd = 0.2) # estimate model and predict input values m <- svm(x, y) new <- predict(m, x) # visualize plot(x, y) points(x, log(x), col = 2) points(x, new, col = 4)