library(MASS) library(rggobi) ggobi() d.olive<-dataset(1)[[1]] olive.lda<-lda(Region~.,d.olive[,-2]) pRegion<-predict(olive.lda,d.olive[,-2])$class table(d.olive[,1],pRegion) d.olive<-cbind(d.olive[,1],pRegion,d.olive[,-1]) names(d.olive)[1]<-"Region" ggobi(d.olive) # Jittering region/pregion, Brushing # Checking equal elliptical # Generating variance-covariance ellipses f.norm.vec<-function(x){ nrm<-sqrt(sum(x^2)) return(x/nrm) } f.norm<-function(x){ nrm<-sqrt(sum(x^2)) return(nrm) } f.var.ellipse<-function(x,n=100){ xm<-apply(x,2,mean) p<-dim(x)[2] xn<-dim(x)[1] xv<-var(x) ev<-eigen(xv) sph<-matrix(rnorm(n*p),ncol=p) cntr<-t(apply(sph,1,f.norm.vec)) cntr<-cntr%*%diag(sqrt(ev$values))%*%t(ev$vectors) cntr<-cntr+matrix(rep(xm,n),nrow=n,byrow=T) return(cntr) } ggobi() d.flea<-dataset(1)[[1]] d.flea.varcov<-rbind(f.var.ellipse(d.flea[d.flea[,7]=="Concinna ",1:6]), f.var.ellipse(d.flea[d.flea[,7]=="Heikert. ",1:6]), f.var.ellipse(d.flea[d.flea[,7]=="Heptapot. ",1:6])) d.flea.varcov<-data.frame(tars1=d.flea.varcov[,1],tars2=d.flea.varcov[,2], head=d.flea.varcov[,3],aede1=d.flea.varcov[,4],aede2=d.flea.varcov[,5], aede3=d.flea.varcov[,6],species=c(rep("Concinna ",100),rep("Heikert. ",100), rep("Heptapot. ",100))) ggobi(d.flea.varcov) # brush species and spin # Trees library(rpart) olive.rp<-rpart(Region~.,data.frame(d.olive[,-(2:3)]),method="class") # Forests library(randomForest) olive.rf<-randomForest(Region~.,data=data.frame(d.olive[,c(1,4:11)]),importance=TRUE, proximity=TRUE,mtry=4) order(olive.rf$importance[,5],decreasing=T) pred<-as.numeric(olive.rf$predicted) margin<-olive.rf$vote dimnames(margin)[[2]]<-c("Vote1","Vote2","Vote3") d.olive.rf<-cbind(pred, margin,d.olive) ggobi(d.olive.rf) # Explore votes olive.rf<-randomForest(Region~.,data=data.frame(d.olive[,c(1,4:6,8:11)]), importance=TRUE, proximity=TRUE,mtry=4) order(olive.rf$importance[,5],decreasing=T) linoarach<-0.969/1022*d.olive[,8]+0.245/105*d.olive[,10] d.olive<-cbind(d.olive[,1:3],linoarach,d.olive[,4:11]) olive.rf<-randomForest(Region~.,data=data.frame(d.olive[,c(1,4:7,10:12)]),importance=TRUE, proximity=TRUE,mtry=4) order(olive.rf$importance[,5],decreasing=T) pred<-as.numeric(olive.rf$predicted) margin<-olive.rf$vote dimnames(margin)[[2]]<-c("Vote1","Vote2","Vote3") d.olive.rf<-cbind(pred, margin,d.olive) ggobi(d.olive.rf) d.olive.sth<-d.olive[as.numeric(d.olive[,1])==1,c(3,5:12)] d.olive.sth[,1]<-factor(d.olive.sth[,1],levels=c("North-Apulia ","South-Apulia ","Calabria ","Sicily ")) olive.sth.rf<-randomForest(Area~.,data=data.frame(d.olive.sth),importance=TRUE, proximity=TRUE,mtry=2,ntree=1000) print(olive.sth.rf) order(olive.sth.rf$importance[,6],decreasing=T) pred<-as.numeric(olive.sth.rf$predicted) margin<-olive.sth.rf$vote dimnames(margin)[[2]]<-c("Vote1","Vote2","Vote3","Vote4") d.olive.sth.rf<-cbind(pred, margin,d.olive.sth) ggobi(d.olive.sth.rf) # Neural networks indx.tst<-c(1,7,12,15,16,22,27,32,34,35,36,41,50,54,61,68,70,75, ,76,80,95,101,102,105,106,110,116,118,119,122,134,137,140,147,148,150, ,151,156,165,175,177,182,183,185,186,187,190,192,194,201,202,211,213,217, ,218,219,225,227,241,242,246,257,259,263,266,274,280,284,289,291,292,297, ,305,310,313,314,323,330,333,338,341,342,347,351,352,356,358,359,369,374, ,375,376,386,392,405,406,415,416,418,420,421,423,426,428,435,440,451,458, ,460,462,466,468,470,474,476,480,481,482,487,492,493,500,501,509,519,522, ,530,532,541,543,545,546,551,559,567,570) indx.tr<-c(1:572)[-indx.tst] test.cl <- function(true, pred){ true <- max.col(true) cres <- max.col(pred) table(true, cres) } d.olive.train<-d.olive[indx.tr,-c(1,2)] d.olive.test<-d.olive[indx.tst,-c(1,2)] d.olive.train<-d.olive.train[d.olive.train[,1]=="North-Apulia "| d.olive.train[,1]=="Calabria "|d.olive.train[,1]=="South-Apulia "| d.olive.train[,1]=="Sicily ",] d.olive.train[,1]<-factor(d.olive.train[,1],levels=c("North-Apulia ","South-Apulia ","Calabria ","Sicily ")) d.olive.test<-d.olive.test[d.olive.test[,1]=="North-Apulia "| d.olive.test[,1]=="Calabria "|d.olive.test[,1]=="South-Apulia "| d.olive.test[,1]=="Sicily ",] d.olive.test[,1]<-factor(d.olive.test[,1],levels=c("North-Apulia ","South-Apulia ","Calabria ","Sicily ")) targetr<-class.ind(d.olive.train[,1]) targets<-class.ind(d.olive.test[,1]) olive.nn<-nnet(d.olive.train[,-1],targetr,size=4,linout=T,decay=5e-4, range=0.6,maxit=1000) test.cl(targetr, predict(olive.nn,d.olive.train[,-1])) test.cl(targets, predict(olive.nn,d.olive.test[,-1])) d.olive.nn.tr<-cbind(d.olive.train,targetr,predict(olive.nn,d.olive.train[,-1]), max.col(predict(olive.nn,d.olive.train[,-1])), targetr-predict(olive.nn,d.olive.train[,-1]), rep("train",246)) names(d.olive.nn.tr)[14]<-"fit.NthAp" names(d.olive.nn.tr)[15]<-"fit.SthAp" names(d.olive.nn.tr)[16]<-"fit.Calab" names(d.olive.nn.tr)[17]<-"fit.Sicily" names(d.olive.nn.tr)[18]<-"pArea" names(d.olive.nn.tr)[19]<-"res.NthAp" names(d.olive.nn.tr)[20]<-"res.SthAp" names(d.olive.nn.tr)[21]<-"res.Calab" names(d.olive.nn.tr)[22]<-"res.Sicily" names(d.olive.nn.tr)[23]<-"Sample" d.olive.nn.ts<-cbind(d.olive.test,targets,predict(olive.nn,d.olive.test[,-1]), max.col(predict(olive.nn,d.olive.test[,-1])), targets-predict(olive.nn,d.olive.test[,-1]), rep("test",77)) names(d.olive.nn.ts)[14]<-"fit.NthAp" names(d.olive.nn.ts)[15]<-"fit.SthAp" names(d.olive.nn.ts)[16]<-"fit.Calab" names(d.olive.nn.ts)[17]<-"fit.Sicily" names(d.olive.nn.ts)[18]<-"pArea" names(d.olive.nn.ts)[19]<-"res.NthAp" names(d.olive.nn.ts)[20]<-"res.SthAp" names(d.olive.nn.ts)[21]<-"res.Calab" names(d.olive.nn.ts)[22]<-"res.Sicily" names(d.olive.nn.ts)[23]<-"Sample" d.olive.nn<-rbind(d.olive.nn.tr,d.olive.nn.ts) ggobi(d.olive.nn)