diff -Nru adabag-4.1/debian/changelog adabag-4.2/debian/changelog --- adabag-4.1/debian/changelog 2018-01-22 01:23:14.000000000 +0000 +++ adabag-4.2/debian/changelog 2018-01-22 01:23:14.000000000 +0000 @@ -1,14 +1,28 @@ -adabag (4.1-1cran1ppa0) trusty; urgency=medium +adabag (4.2-1cran1ppa0trusty0) trusty; urgency=medium - * Compilation for Ubuntu 14.04.3 LTS + * Compilation for Ubuntu 14.04.5 LTS + + -- Michael Rutter Mon, 22 Jan 2018 01:22:03 +0000 + +adabag (4.2-1cran1) testing; urgency=low + + * cran2deb svn: 362M with DB version 1. + + -- cran2deb4ubuntu Sun, 21 Jan 2018 12:47:43 -0500 + + +adabag (4.1-1cran2) testing; urgency=low + + * cran2deb svn: 362M with DB version 1. + + -- cran2deb4ubuntu Sat, 22 Oct 2016 22:39:26 -0400 - -- Michael Rutter Sat, 17 Oct 2015 15:57:47 +0000 adabag (4.1-1cran1) testing; urgency=low * cran2deb svn: 362M with DB version 1. - -- cran2deb4ubuntu Sat, 17 Oct 2015 10:04:39 -0400 + -- cran2deb4ubuntu Sat, 17 Oct 2015 10:04:47 -0400 adabag (4.0-1cran1) testing; urgency=low diff -Nru adabag-4.1/debian/compat adabag-4.2/debian/compat --- adabag-4.1/debian/compat 1970-01-01 00:00:00.000000000 +0000 +++ adabag-4.2/debian/compat 2018-01-22 01:23:14.000000000 +0000 @@ -0,0 +1 @@ +7 \ No newline at end of file diff -Nru adabag-4.1/debian/control adabag-4.2/debian/control --- adabag-4.1/debian/control 2018-01-22 01:23:14.000000000 +0000 +++ adabag-4.2/debian/control 2018-01-22 01:23:14.000000000 +0000 @@ -2,13 +2,15 @@ Section: gnu-r Priority: optional Maintainer: cran2deb4ubuntu -Build-Depends: r-cran-rpart, r-cran-mlbench, r-cran-caret, r-base-dev, - xvfb, xauth, xfonts-base, r-base-core, debhelper (>> 4.1.0), cdbs +Build-Depends: r-cran-rpart, r-cran-caret, r-cran-foreach, + r-cran-doparallel, r-base-dev, xvfb, xauth, xfonts-base, r-base-core, + debhelper (>> 4.1.0), cdbs Standards-Version: 3.9.1 Package: r-cran-adabag Architecture: all -Depends: r-cran-rpart, r-cran-mlbench, r-cran-caret, r-base-core +Depends: r-cran-rpart, r-cran-caret, r-cran-foreach, r-cran-doparallel, + r-base-core Description: GNU R package "Applies Multiclass AdaBoost.M1, SAMME and Bagging" . @@ -32,7 +34,8 @@ pruning (Guo and Boukir, 2013) and a function to auto prune the 'rpart' tree. Moreover, three new plots are also available importanceplot(), plot.errorevol() and plot.margins(). Version 4.1 - allows to predict on unlabeled data. + allows to predict on unlabeled data. Version 4.2 includes the parallel + computation option for some of the functions. . Author: Alfaro, Esteban; Gamez, Matias and Garcia, Noelia; with contributions from Li Guo diff -Nru adabag-4.1/debian/copyright adabag-4.2/debian/copyright --- adabag-4.1/debian/copyright 2018-01-22 01:23:14.000000000 +0000 +++ adabag-4.2/debian/copyright 2018-01-22 01:23:14.000000000 +0000 @@ -2,7 +2,7 @@ automatically using cran2deb4ubuntu by cran2deb4ubuntu . -The original GNU R package is Copyright (C) 2015 Alfaro, Esteban; +The original GNU R package is Copyright (C) 2018 Alfaro, Esteban; Gamez, Matias and Garcia, Noelia; with contributions from Li Guo and possibly others. diff -Nru adabag-4.1/DESCRIPTION adabag-4.2/DESCRIPTION --- adabag-4.1/DESCRIPTION 2015-10-14 21:41:31.000000000 +0000 +++ adabag-4.2/DESCRIPTION 2018-01-19 14:52:39.000000000 +0000 @@ -1,11 +1,12 @@ Package: adabag Type: Package Title: Applies Multiclass AdaBoost.M1, SAMME and Bagging -Version: 4.1 -Date: 2015-10-14 +Version: 4.2 +Date: 2018-01-10 Author: Alfaro, Esteban; Gamez, Matias and Garcia, Noelia; with contributions from Li Guo Maintainer: Esteban Alfaro -Depends: rpart, mlbench, caret +Depends: rpart, caret, foreach, doParallel +Suggests: mlbench Description: It implements Freund and Schapire's Adaboost.M1 algorithm and Breiman's Bagging algorithm using classification trees as individual classifiers. Once these classifiers have been trained, they can be used to predict on new data. Also, cross validation estimation of the error can @@ -20,10 +21,10 @@ these trees. Version 4.0 includes the margin-based ordered aggregation for Bagging pruning (Guo and Boukir, 2013) and a function to auto prune the 'rpart' tree. Moreover, three new plots are also available importanceplot(), plot.errorevol() and plot.margins(). Version 4.1 allows to predict on - unlabeled data. + unlabeled data. Version 4.2 includes the parallel computation option for some of the functions. License: GPL (>= 2) LazyLoad: yes NeedsCompilation: no -Packaged: 2015-10-14 20:14:49 UTC; Esteban +Packaged: 2018-01-18 09:32:23 UTC; emilio Repository: CRAN -Date/Publication: 2015-10-14 23:41:31 +Date/Publication: 2018-01-19 14:52:39 UTC diff -Nru adabag-4.1/inst/CITATION adabag-4.2/inst/CITATION --- adabag-4.1/inst/CITATION 2015-10-14 20:14:49.000000000 +0000 +++ adabag-4.2/inst/CITATION 2015-10-20 10:12:32.000000000 +0000 @@ -13,7 +13,7 @@ url = "http://www.jstatsoft.org/v54/i02/", textVersion = - paste("Esteban Alfaro, Matias Gamez, Noelia Garcia (2013).", + paste("Alfaro, E., Gamez, M. Garcia, N.(2013).", "adabag: An R Package for Classification with Boosting and Bagging.", "Journal of Statistical Software, 54(2), 1-35.", "URL http://www.jstatsoft.org/v54/i02/.") diff -Nru adabag-4.1/man/adabag-package.Rd adabag-4.2/man/adabag-package.Rd --- adabag-4.1/man/adabag-package.Rd 2015-10-14 20:14:49.000000000 +0000 +++ adabag-4.2/man/adabag-package.Rd 2018-01-17 19:38:48.000000000 +0000 @@ -127,11 +127,11 @@ ## rpart library should be loaded data(iris) iris.adaboost <- boosting(Species~., data=iris, boos=TRUE, - mfinal=6) + mfinal=3) importanceplot(iris.adaboost) sub <- c(sample(1:50, 35), sample(51:100, 35), sample(101:150, 35)) -iris.bagging <- bagging(Species ~ ., data=iris[sub,], mfinal=10) +iris.bagging <- bagging(Species ~ ., data=iris[sub,], mfinal=3) #Predicting with labeled data iris.predbagging<-predict.bagging(iris.bagging, newdata=iris[-sub,]) iris.predbagging diff -Nru adabag-4.1/man/bagging.cv.Rd adabag-4.2/man/bagging.cv.Rd --- adabag-4.1/man/bagging.cv.Rd 2015-10-14 20:14:49.000000000 +0000 +++ adabag-4.2/man/bagging.cv.Rd 2018-01-18 09:14:32.000000000 +0000 @@ -10,7 +10,7 @@ } \usage{ -bagging.cv(formula, data, v = 10, mfinal = 100, control) +bagging.cv(formula, data, v = 10, mfinal = 100, control, par=FALSE) } %- maybe also 'usage' for other objects documented here. @@ -24,6 +24,9 @@ \item{mfinal}{an integer, the number of iterations for which boosting is run or the number of trees to use. Defaults to \code{mfinal=100} iterations.} \item{control}{options that control details of the rpart algorithm. See rpart.control for more details. } + \item{par}{ if \code{TRUE}, the cross validation process is runned in parallel. If \code{FALSE} (by default), + the function runs without parallelization. } + } \value{ @@ -57,7 +60,7 @@ ## rpart library should be loaded library(rpart) data(iris) -iris.baggingcv <- bagging.cv(Species ~ ., v=2, data=iris, mfinal=10, +iris.baggingcv <- bagging.cv(Species ~ ., v=2, data=iris, mfinal=3, control=rpart.control(cp=0.01)) iris.baggingcv[-1] diff -Nru adabag-4.1/man/bagging.Rd adabag-4.2/man/bagging.Rd --- adabag-4.1/man/bagging.Rd 2015-10-14 20:14:49.000000000 +0000 +++ adabag-4.2/man/bagging.Rd 2018-01-18 09:15:40.000000000 +0000 @@ -8,7 +8,7 @@ in 1996 using classification trees as single classifiers. } \usage{ -bagging(formula, data, mfinal = 100, control,...) +bagging(formula, data, mfinal = 100, control, par=FALSE,...) } %- maybe also 'usage' for other objects documented here. \arguments{ @@ -18,6 +18,8 @@ \item{mfinal}{an integer, the number of iterations for which boosting is run or the number of trees to use. Defaults to \code{mfinal=100} iterations.} \item{control}{options that control details of the rpart algorithm. See rpart.control for more details. } + \item{par}{ if \code{TRUE}, the cross validation process is runned in parallel. If \code{FALSE} (by default), + the function runs without parallelization. } \item{...}{ further arguments passed to or from other methods.} } \details{ @@ -67,10 +69,10 @@ data(Vehicle) l <- length(Vehicle[,1]) sub <- sample(1:l,2*l/3) -Vehicle.bagging <- bagging(Class ~.,data=Vehicle[sub, ],mfinal=15, +Vehicle.bagging <- bagging(Class ~.,data=Vehicle[sub, ],mfinal=5, control=rpart.control(maxdepth=5, minsplit=15)) #Using the pruning option -Vehicle.bagging.pred <- predict.bagging(Vehicle.bagging,newdata=Vehicle[-sub, ], newmfinal=10) +Vehicle.bagging.pred <- predict.bagging(Vehicle.bagging,newdata=Vehicle[-sub, ], newmfinal=3) Vehicle.bagging.pred$confusion Vehicle.bagging.pred$error diff -Nru adabag-4.1/man/boosting.cv.Rd adabag-4.2/man/boosting.cv.Rd --- adabag-4.1/man/boosting.cv.Rd 2015-10-14 20:14:49.000000000 +0000 +++ adabag-4.2/man/boosting.cv.Rd 2018-01-17 19:46:10.000000000 +0000 @@ -11,7 +11,7 @@ \usage{ boosting.cv(formula, data, v = 10, boos = TRUE, mfinal = 100, - coeflearn = "Breiman", control) + coeflearn = "Breiman", control, par=FALSE) } %- maybe also 'usage' for other objects documented here. \arguments{ @@ -32,6 +32,8 @@ and \code{alpha} is the weight updating coefficient. On the other hand, if coeflearn is 'Zhu' the SAMME algorithm is implemented with \code{alpha=ln((1-err)/err)+} \code{ln(nclasses-1)}.} \item{control}{options that control details of the rpart algorithm. See rpart.control for more details. } + \item{par}{ if \code{TRUE}, the cross validation process is runned in parallel. If \code{FALSE} (by default), + the function runs without parallelization. } } @@ -68,7 +70,7 @@ ## rpart library should be loaded data(iris) -iris.boostcv <- boosting.cv(Species ~ ., v=2, data=iris, mfinal=10, +iris.boostcv <- boosting.cv(Species ~ ., v=2, data=iris, mfinal=5, control=rpart.control(cp=0.01)) iris.boostcv[-1] diff -Nru adabag-4.1/man/boosting.Rd adabag-4.2/man/boosting.Rd --- adabag-4.1/man/boosting.Rd 2015-10-14 20:14:49.000000000 +0000 +++ adabag-4.2/man/boosting.Rd 2018-01-18 09:32:17.000000000 +0000 @@ -1,109 +1,110 @@ -\name{boosting} -\alias{boosting} -\alias{adaboost.M1} - -\title{ Applies the AdaBoost.M1 and SAMME algorithms to a data set } - -\description{ Fits the AdaBoost.M1 (Freund and Schapire, 1996) and SAMME (Zhu et al., 2009) algorithms - using classification trees as single classifiers. } - -\usage{ - boosting(formula, data, boos = TRUE, mfinal = 100, coeflearn = 'Breiman', - control,...) -} - -\arguments{ - \item{formula}{ a formula, as in the \code{lm} function. } - \item{data}{a data frame in which to interpret the variables named in \code{formula}. } - \item{boos}{ if \code{TRUE} (by default), a bootstrap sample of the training set is drawn using - the weights for each observation on that iteration. If \code{FALSE}, every observation - is used with its weights. } - \item{mfinal}{an integer, the number of iterations for which boosting is run - or the number of trees to use. Defaults to \code{mfinal=100} iterations. } - \item{coeflearn}{ if 'Breiman'(by default), \code{alpha=1/2ln((1-err)/err)} is used. - If 'Freund' \code{alpha=ln((1-err)/err)} is used. In both cases the AdaBoost.M1 algorithm is used - and \code{alpha} is the weight updating coefficient. On the other hand, if coeflearn is 'Zhu' the SAMME algorithm - is implemented with \code{alpha=ln((1-err)/err)+} \code{ln(nclasses-1)}.} - \item{control}{options that control details of the rpart algorithm. See rpart.control for more details. } - \item{...}{ further arguments passed to or from other methods.} - -} - -\details{ - AdaBoost.M1 and SAMME are simple generalizations of AdaBoost for more than two classes. In AdaBoost-SAMME - the individual trees are required to have an error lower than 1-1/nclasses instead of 1/2 of the AdaBoost.M1 - -} - -\value{ - An object of class \code{boosting}, which is a list with the following components: - \item{formula}{the formula used. } - \item{trees}{the trees grown along the iterations. } - \item{weights}{a vector with the weighting of the trees of all iterations. } - \item{votes}{a matrix describing, for each observation, the number of trees that assigned it to each class, weighting each tree by its \code{alpha} coefficient. } - \item{prob}{a matrix describing, for each observation, the posterior probability or degree of support of each class. - These probabilities are calculated using the proportion of votes in the final ensemble.} - \item{class}{the class predicted by the ensemble classifier. } - \item{importance}{returns the relative importance of each variable in the classification task. - This measure takes into account the gain of the Gini index given by a variable in a tree and the weight of this tree. } -} - -\references{Alfaro, E., Gamez, M. and Garcia, N. (2013): ``adabag: An R Package for Classification with Boosting and Bagging''. Journal of Statistical Software, Vol 54, 2, pp. 1--35. - - Alfaro, E., Garcia, N., Gamez, M. and Elizondo, D. (2008): ``Bankruptcy forecasting: An empirical comparison of AdaBoost and neural networks''. Decision Support Systems, 45, pp. 110--122. - - Breiman, L. (1998): ``Arcing classifiers''. The Annals of Statistics, Vol 26, 3, pp. 801--849. - - Freund, Y. and Schapire, R.E. (1996): ``Experiments with a new boosting algorithm''. In Proceedings of the Thirteenth International Conference on Machine Learning, pp. 148--156, Morgan Kaufmann. - - Zhu, J., Zou, H., Rosset, S. and Hastie, T. (2009): ``Multi-class AdaBoost''. Statistics and Its Interface, 2, pp. 349--360. - - } - -\author{Esteban Alfaro-Cortes \email{Esteban.Alfaro@uclm.es}, Matias Gamez-Martinez \email{Matias.Gamez@uclm.es} and Noelia Garcia-Rubio \email{Noelia.Garcia@uclm.es} } - - -\seealso{ - \code{\link{predict.boosting}}, - \code{\link{boosting.cv}} } - -\examples{ - -## rpart library should be loaded -data(iris) -iris.adaboost <- boosting(Species~., data=iris, boos=TRUE, mfinal=5) -iris.adaboost - - -## Data Vehicle (four classes) -data(Vehicle) -l <- length(Vehicle[,1]) -sub <- sample(1:l,2*l/3) -mfinal <- 10 -maxdepth <- 5 - -Vehicle.rpart <- rpart(Class~.,data=Vehicle[sub,],maxdepth=maxdepth) -Vehicle.rpart.pred <- predict(Vehicle.rpart,newdata=Vehicle[-sub, ],type="class") -tb <- table(Vehicle.rpart.pred,Vehicle$Class[-sub]) -error.rpart <- 1-(sum(diag(tb))/sum(tb)) -tb -error.rpart - -Vehicle.adaboost <- boosting(Class ~.,data=Vehicle[sub, ],mfinal=mfinal, coeflearn="Zhu", - control=rpart.control(maxdepth=maxdepth)) -Vehicle.adaboost.pred <- predict.boosting(Vehicle.adaboost,newdata=Vehicle[-sub, ]) -Vehicle.adaboost.pred$confusion -Vehicle.adaboost.pred$error - -#comparing error evolution in training and test set -errorevol(Vehicle.adaboost,newdata=Vehicle[sub, ])->evol.train -errorevol(Vehicle.adaboost,newdata=Vehicle[-sub, ])->evol.test - -plot.errorevol(evol.test,evol.train) - - -} - - -\keyword{tree }% at least one, from doc/KEYWORDS -\keyword{classif} +\name{boosting} +\alias{boosting} +\alias{adaboost.M1} + +\title{ Applies the AdaBoost.M1 and SAMME algorithms to a data set } + +\description{ Fits the AdaBoost.M1 (Freund and Schapire, 1996) and SAMME (Zhu et al., 2009) algorithms + using classification trees as single classifiers. } + +\usage{ + boosting(formula, data, boos = TRUE, mfinal = 100, coeflearn = 'Breiman', + control,...) +} + +\arguments{ + \item{formula}{ a formula, as in the \code{lm} function. } + \item{data}{a data frame in which to interpret the variables named in \code{formula}. } + \item{boos}{ if \code{TRUE} (by default), a bootstrap sample of the training set is drawn using + the weights for each observation on that iteration. If \code{FALSE}, every observation + is used with its weights. } + \item{mfinal}{an integer, the number of iterations for which boosting is run + or the number of trees to use. Defaults to \code{mfinal=100} iterations. } + \item{coeflearn}{ if 'Breiman'(by default), \code{alpha=1/2ln((1-err)/err)} is used. + If 'Freund' \code{alpha=ln((1-err)/err)} is used. In both cases the AdaBoost.M1 algorithm is used + and \code{alpha} is the weight updating coefficient. On the other hand, if coeflearn is 'Zhu' the SAMME algorithm + is implemented with \code{alpha=ln((1-err)/err)+} \code{ln(nclasses-1)}.} + \item{control}{options that control details of the rpart algorithm. See rpart.control for more details. } + \item{...}{ further arguments passed to or from other methods.} + +} + +\details{ + AdaBoost.M1 and SAMME are simple generalizations of AdaBoost for more than two classes. In AdaBoost-SAMME + the individual trees are required to have an error lower than 1-1/nclasses instead of 1/2 of the AdaBoost.M1 + +} + +\value{ + An object of class \code{boosting}, which is a list with the following components: + \item{formula}{the formula used. } + \item{trees}{the trees grown along the iterations. } + \item{weights}{a vector with the weighting of the trees of all iterations. } + \item{votes}{a matrix describing, for each observation, the number of trees that assigned it to each class, weighting each tree by its \code{alpha} coefficient. } + \item{prob}{a matrix describing, for each observation, the posterior probability or degree of support of each class. + These probabilities are calculated using the proportion of votes in the final ensemble.} + \item{class}{the class predicted by the ensemble classifier. } + \item{importance}{returns the relative importance of each variable in the classification task. + This measure takes into account the gain of the Gini index given by a variable in a tree and the weight of this tree. } +} + +\references{Alfaro, E., Gamez, M. and Garcia, N. (2013): ``adabag: An R Package for Classification with Boosting and Bagging''. Journal of Statistical Software, Vol 54, 2, pp. 1--35. + + Alfaro, E., Garcia, N., Gamez, M. and Elizondo, D. (2008): ``Bankruptcy forecasting: An empirical comparison of AdaBoost and neural networks''. Decision Support Systems, 45, pp. 110--122. + + Breiman, L. (1998): ``Arcing classifiers''. The Annals of Statistics, Vol 26, 3, pp. 801--849. + + Freund, Y. and Schapire, R.E. (1996): ``Experiments with a new boosting algorithm''. In Proceedings of the Thirteenth International Conference on Machine Learning, pp. 148--156, Morgan Kaufmann. + + Zhu, J., Zou, H., Rosset, S. and Hastie, T. (2009): ``Multi-class AdaBoost''. Statistics and Its Interface, 2, pp. 349--360. + + } + +\author{Esteban Alfaro-Cortes \email{Esteban.Alfaro@uclm.es}, Matias Gamez-Martinez \email{Matias.Gamez@uclm.es} and Noelia Garcia-Rubio \email{Noelia.Garcia@uclm.es} } + + +\seealso{ + \code{\link{predict.boosting}}, + \code{\link{boosting.cv}} } + +\examples{ + +## rpart library should be loaded +data(iris) +iris.adaboost <- boosting(Species~., data=iris, boos=TRUE, mfinal=3) +iris.adaboost + + +## Data Vehicle (four classes) +library(mlbench) +data(Vehicle) +l <- length(Vehicle[,1]) +sub <- sample(1:l,2*l/3) +mfinal <- 3 +maxdepth <- 5 + +Vehicle.rpart <- rpart(Class~.,data=Vehicle[sub,],maxdepth=maxdepth) +Vehicle.rpart.pred <- predict(Vehicle.rpart,newdata=Vehicle[-sub, ],type="class") +tb <- table(Vehicle.rpart.pred,Vehicle$Class[-sub]) +error.rpart <- 1-(sum(diag(tb))/sum(tb)) +tb +error.rpart + +Vehicle.adaboost <- boosting(Class ~.,data=Vehicle[sub, ],mfinal=mfinal, coeflearn="Zhu", + control=rpart.control(maxdepth=maxdepth)) +Vehicle.adaboost.pred <- predict.boosting(Vehicle.adaboost,newdata=Vehicle[-sub, ]) +Vehicle.adaboost.pred$confusion +Vehicle.adaboost.pred$error + +#comparing error evolution in training and test set +errorevol(Vehicle.adaboost,newdata=Vehicle[sub, ])->evol.train +errorevol(Vehicle.adaboost,newdata=Vehicle[-sub, ])->evol.test + +plot.errorevol(evol.test,evol.train) + + +} + + +\keyword{tree }% at least one, from doc/KEYWORDS +\keyword{classif} diff -Nru adabag-4.1/man/errorevol.rd adabag-4.2/man/errorevol.rd --- adabag-4.1/man/errorevol.rd 2015-10-14 20:14:49.000000000 +0000 +++ adabag-4.2/man/errorevol.rd 2018-01-18 09:16:50.000000000 +0000 @@ -57,13 +57,13 @@ } \examples{ - +library(mlbench) data(BreastCancer) l <- length(BreastCancer[,1]) sub <- sample(1:l,2*l/3) cntrl <- rpart.control(maxdepth = 3, minsplit = 0, cp = -1) -BC.adaboost <- boosting(Class ~.,data=BreastCancer[sub,-1],mfinal=15, control=cntrl) +BC.adaboost <- boosting(Class ~.,data=BreastCancer[sub,-1],mfinal=5, control=cntrl) BC.adaboost.pred <- predict.boosting(BC.adaboost,newdata=BreastCancer[-sub,-1]) errorevol(BC.adaboost,newdata=BreastCancer[-sub,-1])->evol.test diff -Nru adabag-4.1/man/importanceplot.rd adabag-4.2/man/importanceplot.rd --- adabag-4.1/man/importanceplot.rd 2015-10-14 20:14:49.000000000 +0000 +++ adabag-4.2/man/importanceplot.rd 2018-01-18 09:16:18.000000000 +0000 @@ -56,12 +56,12 @@ library(rpart) data(iris) sub <- c(sample(1:50, 25), sample(51:100, 25), sample(101:150, 25)) -iris.adaboost <- boosting(Species ~ ., data=iris[sub,], mfinal=10) +iris.adaboost <- boosting(Species ~ ., data=iris[sub,], mfinal=3) importanceplot(iris.adaboost) #Examples with bagging -iris.bagging <- bagging(Species ~ ., data=iris[sub,], mfinal=10) -importanceplot(iris.bagging, horiz=TRUE, cex.names=.6) +#iris.bagging <- bagging(Species ~ ., data=iris[sub,], mfinal=5) +#importanceplot(iris.bagging, horiz=TRUE, cex.names=.6) } diff -Nru adabag-4.1/man/MarginOrderedPruning.Bagging.rd adabag-4.2/man/MarginOrderedPruning.Bagging.rd --- adabag-4.1/man/MarginOrderedPruning.Bagging.rd 2015-10-14 20:14:49.000000000 +0000 +++ adabag-4.2/man/MarginOrderedPruning.Bagging.rd 2018-01-18 09:15:06.000000000 +0000 @@ -60,7 +60,7 @@ ## create bagging with training set #increase mfinal in your own execution of this example to see #the real usefulness of this function -Satellite.bagging<-bagging(classes~.,data=Satellite[ind==1,],mfinal=8) +Satellite.bagging<-bagging(classes~.,data=Satellite[ind==1,],mfinal=3) #Satellite.bagging.pred<-predict(Satellite.bagging,Satellite[ind==3,]) ##pruning bagging diff -Nru adabag-4.1/man/margins.Rd adabag-4.2/man/margins.Rd --- adabag-4.1/man/margins.Rd 2015-10-14 20:14:49.000000000 +0000 +++ adabag-4.2/man/margins.Rd 2018-01-18 09:19:14.000000000 +0000 @@ -54,7 +54,7 @@ library(rpart) data(iris) sub <- c(sample(1:50, 25), sample(51:100, 25), sample(101:150, 25)) -iris.adaboost <- boosting(Species ~ ., data=iris[sub,], mfinal=10) +iris.adaboost <- boosting(Species ~ ., data=iris[sub,], mfinal=3) margins(iris.adaboost,iris[sub,])->iris.margins # training set plot.margins(iris.margins) @@ -64,7 +64,7 @@ plot.margins(iris.predmargins,iris.margins) #Examples with bagging -iris.bagging <- bagging(Species ~ ., data=iris[sub,], mfinal=5) +iris.bagging <- bagging(Species ~ ., data=iris[sub,], mfinal=3) margins(iris.bagging,iris[sub,])->iris.bagging.margins # training set iris.predbagging<- predict.bagging(iris.bagging, newdata=iris[-sub,]) diff -Nru adabag-4.1/man/plot.margins.rd adabag-4.2/man/plot.margins.rd --- adabag-4.1/man/plot.margins.rd 2015-10-14 20:14:49.000000000 +0000 +++ adabag-4.2/man/plot.margins.rd 2018-01-18 09:17:16.000000000 +0000 @@ -55,12 +55,13 @@ \code{\link{predict.bagging}} } \examples{ +library(mlbench) data(BreastCancer) l <- length(BreastCancer[,1]) sub <- sample(1:l,2*l/3) cntrl <- rpart.control(maxdepth = 3, minsplit = 0, cp = -1) -BC.adaboost <- boosting(Class ~.,data=BreastCancer[sub,-1],mfinal=15, control=cntrl) +BC.adaboost <- boosting(Class ~.,data=BreastCancer[sub,-1],mfinal=5, control=cntrl) BC.adaboost.pred <- predict.boosting(BC.adaboost,newdata=BreastCancer[-sub,-1]) BC.margins<-margins(BC.adaboost,BreastCancer[sub,-1]) # training set diff -Nru adabag-4.1/man/predict.bagging.Rd adabag-4.2/man/predict.bagging.Rd --- adabag-4.1/man/predict.bagging.Rd 2015-10-14 20:14:49.000000000 +0000 +++ adabag-4.2/man/predict.bagging.Rd 2018-01-18 09:14:06.000000000 +0000 @@ -57,12 +57,12 @@ \code{\link{bagging.cv}} } \examples{ -library(rpart) -data(iris) -sub <- c(sample(1:50, 25), sample(51:100, 25), sample(101:150, 25)) -iris.bagging <- bagging(Species ~ ., data=iris[sub,], mfinal=5) -iris.predbagging<- predict.bagging(iris.bagging, newdata=iris[-sub,]) -iris.predbagging +#library(rpart) +#data(iris) +#sub <- c(sample(1:50, 25), sample(51:100, 25), sample(101:150, 25)) +#iris.bagging <- bagging(Species ~ ., data=iris[sub,], mfinal=5) +#iris.predbagging<- predict.bagging(iris.bagging, newdata=iris[-sub,]) +#iris.predbagging ## rpart and mlbench libraries should be loaded library(rpart) @@ -70,7 +70,7 @@ data(BreastCancer) l <- length(BreastCancer[,1]) sub <- sample(1:l,2*l/3) -BC.bagging <- bagging(Class ~.,data=BreastCancer[,-1],mfinal=10, +BC.bagging <- bagging(Class ~.,data=BreastCancer[,-1],mfinal=5, control=rpart.control(maxdepth=3)) BC.bagging.pred <- predict.bagging(BC.bagging,newdata=BreastCancer[-sub,-1]) BC.bagging.pred$prob diff -Nru adabag-4.1/man/predict.boosting.Rd adabag-4.2/man/predict.boosting.Rd --- adabag-4.1/man/predict.boosting.Rd 2015-10-14 20:14:49.000000000 +0000 +++ adabag-4.2/man/predict.boosting.Rd 2018-01-17 19:45:26.000000000 +0000 @@ -92,7 +92,7 @@ tb error.rpart -BC.adaboost <- boosting(Class ~.,data=BreastCancer[,-1],mfinal=20, coeflearn="Freund", +BC.adaboost <- boosting(Class ~.,data=BreastCancer[,-1],mfinal=10, coeflearn="Freund", boos=FALSE , control=rpart.control(maxdepth=3)) #Using the pruning option diff -Nru adabag-4.1/MD5 adabag-4.2/MD5 --- adabag-4.1/MD5 2015-10-14 21:41:31.000000000 +0000 +++ adabag-4.2/MD5 2018-01-19 14:52:39.000000000 +0000 @@ -1,38 +1,38 @@ -26040aec98a948e0e4636cfc69e67061 *DESCRIPTION -7531ab04ab8ffb60c989602f842ecba0 *NAMESPACE +c535aaaa059159769d83293309af4b9e *DESCRIPTION +583683141b6d0fe20fcdbd7cc2d3bb66 *NAMESPACE b9acfdb97f35491e6060e3684ea553ed *R/Margin.vote.R -672dfbb0b5aff8127ac4ef5349a2bfaa *R/MarginOrderedPruning.Bagging.R +a602667989450c4c5348deb5e19c0761 *R/MarginOrderedPruning.Bagging.R e952017f771d206318c5d46f3bc4d99e *R/OOBIndex.R cc09677e2a1658e22cbdb6e3fc182bde *R/adabag-internal.R -ebd0cb9b1a5cd01305cfd8e3c1cab1cb *R/adaboost.M1.R +c375c1ce56deb28ffb01828064beea9f *R/adaboost.M1.R ad07a33fd678858aace285e92a688862 *R/autoprune.R -472c414f6b4d2ac0dcff654736ad0a35 *R/bagging.R -cad3e8c17656eae419ec662e3fb6bf17 *R/bagging.cv.R -081286edee6cb91197624066de6f58e5 *R/boosting.cv.R +e0c52a1d4d7b8bc1c7a7c52ede0447e2 *R/bagging.R +fb303ab7385c435d241ceed0097a74b0 *R/bagging.cv.R +40290c56727290a92e23a11eb100a904 *R/boosting.cv.R 68e47e84dbb06e27a852943afd884f17 *R/entropyEachTree.bagging.R 77cf6d93a6a8d0e85a6c9e4db63ce1fe *R/errorevol.R -84e16c56238ee762ef456fd3542ba816 *R/importanceplot.R -dac61527bc71d2c70a2dde5db0867b8f *R/margins.R -0777304b1399625767df7e3aa4c6140c *R/plot.errorevol.R -03ba02f44a0ecb7c900a1d03f5b173ea *R/plot.margins.R +9d9eb24da8459e79b4369513c6e1f8ef *R/importanceplot.R +bfe9275b49355fbb58f49ae3a050eaac *R/margins.R +9739148b3d1c1d14a0fdec76978cc102 *R/plot.errorevol.R +c52decf855e5a4d4a690b8ca2654bc2f *R/plot.margins.R 92dcf46828c2d3bfdb9cd25992324ee4 *R/predict.bagging.R -bf2085dd251efc25abf62275ccdd22d9 *R/predict.boosting.R +4b699166a0382a58e45bb9ae535c402d *R/predict.boosting.R 6f0eb52d7da0b0dd55d25ed50a9ef9e8 *R/predictOrderedAggregation.bagging.R cc36ead65f65451d8b92b36be06e93fc *R/select.R 6d75742d4956a73908e59508889da40d *R/vote.bagging.R -db115a64a57d3ef2525495cbff820128 *inst/CITATION -cc0ba1db8486f2c8136b7b0272b765b3 *man/MarginOrderedPruning.Bagging.rd +da727426115aa61246c06620d3f4f4f3 *inst/CITATION +503bf3a780146c3e7acb195544da6b50 *man/MarginOrderedPruning.Bagging.rd 1e2bb8aede6eb1980c6336a900aa0c66 *man/adabag-internal.Rd -2a0247158a9d2953c90be819476235a2 *man/adabag-package.Rd +216e649ab47ca817de81aabda1f9b029 *man/adabag-package.Rd eeba3a2e1884efa2aa6701a200b15791 *man/autoprune.rd -04948f7168a3dcc01037651983a9203d *man/bagging.Rd -0bc5368fd5faf0c974a7212b00079e37 *man/bagging.cv.Rd -c3cdec77706fd01ebfb776167bd2381b *man/boosting.Rd -ef01cd558265fdc3236a02a843812bf7 *man/boosting.cv.Rd -f3894582af492fb9368ec57010e593a8 *man/errorevol.rd -ba0cfeedaacb6e146ca50f225288acac *man/importanceplot.rd -5ec3e66a493c0a80e04f3cd3965d3df7 *man/margins.Rd +358e31f385706b4fe6459676a8351e44 *man/bagging.Rd +49f61b011f2704e72c5715ef590179cf *man/bagging.cv.Rd +b78af8c72c7daf665fa352ee21bfc8fa *man/boosting.Rd +ad1d429215a470cf51895ada3a4a728b *man/boosting.cv.Rd +2310df992cd6f9c9b79c200fd4538181 *man/errorevol.rd +f5f05802cdc37116a9b0ea49c7d4a2af *man/importanceplot.rd +9e65e93934aeea63d19df2619ecc221f *man/margins.Rd 7c6761d1b06b1430fff5be4d2787e2c2 *man/plot.errorevol.rd -de111b7534680a119d53242e4b4c1bf8 *man/plot.margins.rd -8e71409508bbc0083b7c82dda0af80ff *man/predict.bagging.Rd -d8673afc8aaf36cd7222faf6b80c2e33 *man/predict.boosting.Rd +c0efdb42ee6590ad46345cb5ab4c5150 *man/plot.margins.rd +3a3fac4ebefe7cc988c3d9ecba33ecdc *man/predict.bagging.Rd +dae57cc36acdb48ea255cb05773ca3fa *man/predict.boosting.Rd diff -Nru adabag-4.1/NAMESPACE adabag-4.2/NAMESPACE --- adabag-4.1/NAMESPACE 2015-10-14 20:14:49.000000000 +0000 +++ adabag-4.2/NAMESPACE 2017-12-17 08:50:44.000000000 +0000 @@ -8,8 +8,11 @@ # Import all packages listed as Imports or Depends import( rpart, - mlbench, - caret +# mlbench, + caret, +foreach, +doParallel +#, parallel ) S3method(predict, boosting) @@ -19,4 +22,6 @@ #2015-10 lo aņado importFrom("graphics", "abline", "barplot", "legend", "lines", "plot") -importFrom("stats", "as.formula", "predict", "model.frame") \ No newline at end of file +importFrom("stats", "as.formula", "predict", "model.frame") +#Prueba +importFrom("parallel", "clusterEvalQ", "clusterExport", "detectCores", "makeCluster", "stopCluster") \ No newline at end of file diff -Nru adabag-4.1/R/adaboost.M1.R adabag-4.2/R/adaboost.M1.R --- adabag-4.1/R/adaboost.M1.R 2015-10-14 20:14:49.000000000 +0000 +++ adabag-4.2/R/adaboost.M1.R 2017-10-30 18:57:50.000000000 +0000 @@ -16,7 +16,8 @@ guardarpesos <- array(0, c(n,mfinal)) #para ver los pesos de las observaciones w <- rep(1/n,n) # desaparece el not visible binding for "<<-" que se usa en boos=F -data<-data.frame(pesos, data) #Los pesos en rpart deben ser una columna del dataframe +#data<-data.frame(pesos, data) #Los pesos en rpart deben ser una columna del dataframe +data<-cbind(pesos, data) #Los pesos en rpart deben ser una columna del dataframe arboles <- list() #Creamos una lista para guardar los arboles pond <- rep(0,mfinal) # Un vector donde guardaremos la ponderacion de cada arbol. @@ -183,7 +184,7 @@ #2015-07-25 pruebo a meter las clases de vardep como atributo de la salida attr(ans, "vardep.summary") <- summary(vardep, maxsum=700) -mf <- model.frame(formula=formula, data=data) +mf <- model.frame(formula=formula, data=data[,-1]) terms <- attr(mf, "terms") ans$terms <- terms ans$call <- match.call() diff -Nru adabag-4.1/R/bagging.cv.R adabag-4.2/R/bagging.cv.R --- adabag-4.1/R/bagging.cv.R 2015-10-14 20:14:49.000000000 +0000 +++ adabag-4.2/R/bagging.cv.R 2017-05-24 17:57:34.000000000 +0000 @@ -1,28 +1,65 @@ bagging.cv <- -function ( formula, data,v=10, mfinal=100,control) -{ -vardep<-data[,as.character(formula[[2]])] -n <- length(vardep) -#para validacion cruzada 2n) stop(" v should be in [2, n]") -if(v<2) stop(" v should be in [2, n]") - -predclass <- rep("O",n) - - for (i in 1:v) { - test <- v * (0:floor(n/v)) + i - test <- test[test < n + 1] - fit <- bagging(formula, data[-test,],mfinal, control=control) - predclass[test] <- predict.bagging(fit, data[test,])$class + function ( formula, data,v=10, mfinal=100,control, par=FALSE) + { + vardep<-data[,as.character(formula[[2]])] +# n <<- length(vardep) + n <- length(vardep) + #para validacion cruzada 2n) stop(" v should be in [2, n]") + if(v<2) stop(" v should be in [2, n]") + + + if (par==TRUE) { + +# no_cores <- detectCores() - 1 # Calculate the number of cores + no_cores <- 2 # para el check de CRAN + cl <- makeCluster(no_cores) # Initiate cluster + registerDoParallel(cl) #PAra el foreach + # clusterExport(cl, "vardep") + clusterEvalQ(cl, library(adabag)) + + +# for (i in 1:v) { + + kk<-foreach(i = 1:v, .combine = rbind, .packages='adabag') %dopar% + { + + n <- length(vardep) + + test <- v * (0:floor(n/v)) + i + test <- test[test < n + 1] + fit <- bagging(formula, data[-test,],mfinal=mfinal, control=control) +# predclass[test] <- predict.bagging(fit, data[test,])$class + predclass <- predict.bagging(fit, data[test,])$class + + x<-data.frame(test, predclass) + + return(x) + } + stopCluster(cl) + + predclass<-kk$predclass[order(kk$test)] + + } + + if (par==FALSE) { + predclass <- rep("O",n) + for (i in 1:v) { + test <- v * (0:floor(n/v)) + i + test <- test[test < n + 1] + fit <- bagging(formula, data[-test,],mfinal, control=control) + predclass[test] <- predict.bagging(fit, data[test,])$class + } + + } + + # para que devuelva la matriz de confusion + tabla <- table(predclass, vardep, dnn=c("Predicted Class", "Observed Class")) + + # Para que devuelva el error en newdata + error<- 1- sum(predclass== vardep)/n + + output<- list(class=predclass, confusion=tabla, error=error) - # para que devuelva la matriz de confusion -tabla <- table(predclass, vardep, dnn=c("Predicted Class", "Observed Class")) - -# Para que devuelva el error en newdata -error<- 1- sum(predclass== vardep)/n - -output<- list(class=predclass, confusion=tabla, error=error) - -} - + } diff -Nru adabag-4.1/R/bagging.R adabag-4.2/R/bagging.R --- adabag-4.1/R/bagging.R 2015-10-14 20:14:49.000000000 +0000 +++ adabag-4.2/R/bagging.R 2018-01-17 19:52:24.000000000 +0000 @@ -1,109 +1,156 @@ bagging <- -function(formula, data, mfinal=100, control,...) { - -formula<- as.formula(formula) -vardep <- data[,as.character(formula[[2]])] - n <- length(data[,1]) -nclases <- nlevels(vardep) -pred<- data.frame(rep(0,n)) # Dataframe para guardar las pred, al inicio esta vacio, pero luego se va agnadiendo - - arboles <- list() #Creamos una lista para guardar los arboles -replicas <- array(0, c(n,mfinal)) - #2012-05-16 nueva medida de importancia - #sustituye a acum - arboles[[1]] <- rpart(formula, data = data, control = rpart.control(minsplit=1, cp=-1, maxdepth=30) ) - -#Para sacar el n de variables, este luego lo sustituye en el bucle - -#if( is.numeric(nrow(arboles[[1]]$splits))=="FALSE" ) stop("change rpart.control to avoid empty trees") - nvar<-dim(varImp(arboles[[1]], surrogates = FALSE, competes = FALSE))[1] - imp<- array(0, c(mfinal,nvar)) #Creo una matriz para guardar el "improve" de cada variable conforme evoluciona boosting/bagging - - - -for (m in 1:mfinal) { - -#boostrap<- sample(1:n,replace=TRUE) -#fit <- rpart(formula,data=data[boostrap,], control=control) - - k2 <- 1 #Los autores agradecen su sugerencia a Ignacio Medina - while (k2 == 1){ - - boostrap <- sample(1:n, replace = TRUE) - fit <- rpart(formula, data = data[boostrap, ], control = control) - k2 <- length(fit$frame$var) + function(formula, data, mfinal=100, control, par=FALSE,...) { + + formula<- as.formula(formula) + vardep <- data[,as.character(formula[[2]])] + n <- length(data[,1]) + nclases <- nlevels(vardep) + pred<- data.frame(rep(0,n)) # Dataframe para guardar las pred, al inicio esta vacio, pero luego se va agnadiendo + + arboles <- list() #Creamos una lista para guardar los arboles + replicas <- array(0, c(n,mfinal)) + #2012-05-16 nueva medida de importancia + #sustituye a acum + arboles[[1]] <- rpart(formula, data = data, control = rpart.control(minsplit=1, cp=-1, maxdepth=30) ) + + #Para sacar el n de variables, este luego lo sustituye en el bucle + + #if( is.numeric(nrow(arboles[[1]]$splits))=="FALSE" ) stop("change rpart.control to avoid empty trees") + nvar<-dim(varImp(arboles[[1]], surrogates = FALSE, competes = FALSE))[1] + + data1<-data + + #2017-04-02 Prueba para parallel con TRUE/FALSE + if (par==TRUE) { + + #2017-02-14 Prueba para parallel + no_cores <- detectCores() - 1 # Calculate the number of cores + cl <- makeCluster(no_cores) # Initiate cluster + registerDoParallel(cl) #PAra el foreach + #clusterExport(cl, "n") + clusterEvalQ(cl, library(adabag)) + +#Esta parte no se, si quedo o lo quite al final + comb <- function(x, ...) { lapply(seq_along(x), + function(i) c(x[[i]], lapply(list(...), function(y) y[[i]]),lapply(list(...), function(z) z[[i]]))) #saca las 3 cosas pero duplica los arboles + + } + oper <- foreach(m=1:mfinal, .combine='comb', .multicombine=TRUE, + .init=list(list(), list(), list())) %dopar% { + + #for (m in 1:mfinal) { + #boostrap<- sample(1:n,replace=TRUE) + #fit <- rpart(formula,data=data[boostrap,], control=control) + + n <- length(data[,1]) + k2 <- 1 #Los autores agradecen su sugerencia a Ignacio Medina + while (k2 == 1){ + + + boostrap <- sample(1:n, replace = TRUE) + # fit <- rpart(formula, data = data[boostrap, ], control = control) + fit <- rpart(formula, data = data1[boostrap, ], control = control) + k2 <- length(fit$frame$var) + } + + + + # arboles[[m]] <- fit #Guardamos los arboles + # replicas[,m]<-boostrap + + +# k <- varImp(arboles[[m]], surrogates = FALSE, competes = FALSE) +# imp[m,] <-k[sort(row.names(k)), ] + k <- varImp(fit, surrogates = FALSE, competes = FALSE) + imp <-k[sort(row.names(k)), ] + + #Parallel + list(fit, boostrap, imp) } + stopCluster(cl) + + arboles <- oper[[1]][1:mfinal] #Guardamos los arboles de 1:mfinal porque estan duplicados + replicas<-matrix(unlist(oper[[2]], use.names=FALSE ), ncol = mfinal, byrow = FALSE) + imp<-matrix(unlist(oper[[3]][1:mfinal], use.names=FALSE ), ncol = nvar, byrow = TRUE) -arboles[[m]] <- fit #Guardamos los arboles -replicas[,m]<-boostrap - -#2014-11-19 lo sustituyo por sapply -#if(m==1){pred <- predict(arboles[[m]],data,type="class")} -#else{pred <- data.frame(pred,predict(arboles[[m]],data,type="class"))} - - - k <- varImp(arboles[[m]], surrogates = FALSE, competes = FALSE) - imp[m,] <-k[sort(row.names(k)), ] - -} - - - -pred<-as.data.frame(sapply (arboles, predict, data=data, type="class")) - - - - -classfinal <- array(0, c(n,nlevels(vardep))) -for (i in 1:nlevels(vardep)){ - classfinal[,i] <- matrix(as.numeric(pred==levels(vardep)[i]),nrow=n)%*%rep(1,mfinal) -} - -predclass <- rep("O",n) -#2014-11-12 Se puede hacer esto usando apply para evitar el bucle? -#Creo la funcion "select" que en caso de empate devuelva la clase mayoritaria de entre las empatadas -#2015-07-25 modifico la funcion select para poder usar predict con unlabeled data -predclass[]<-apply(classfinal,1,FUN=select, vardep.summary=summary(vardep)) - -#for(i in 1:n){ -#predclass[i] <- as.character(levels(vardep)[(order(classfinal[i,],decreasing=TRUE)[1])]) -#if(length(which(classfinal[i,]==max(classfinal[i,])))>1) -# {predclass[i] <-names(summary(vardep)[which(classfinal[i,]==max(classfinal[i,]))])[ -#order(summary(vardep)[which(classfinal[i,]==max(classfinal[i,]))],decreasing=TRUE)[1]] -#} -#else{predclass[i] <- as.character(levels(vardep)[(order(classfinal[i,],decreasing=TRUE)[1])])} -#} -#normalizar la importancia de las variables, las ponderaciones son todas iguales en bagging - - pond<-rep(1,mfinal) - imppond<-as.vector(as.vector(pond)%*%imp) - imppond<-imppond/sum(imppond)*100 - names(imppond)<-sort(row.names(k)) - - -#Para que devuelva las probabilidades a posteriori -classfinal/apply(classfinal,1,sum)->votosporc - - -ans<- list(formula=formula,trees=arboles,votes=classfinal,prob=votosporc,class=predclass, samples=replicas, importance=imppond) - -#2015-07-25 pruebo a meter las clases de vardep como atributo de la salida -attr(ans, "vardep.summary") <- summary(vardep, maxsum=700) - -mf <- model.frame(formula=formula, data=data) -terms <- attr(mf, "terms") -ans$terms <- terms -ans$call <- match.call() - - - - -class(ans) <- "bagging" -ans - -} - - + } + + #A partir de aqui igual sin parallel + if (par==FALSE) { + + imp<- array(0, c(mfinal,nvar)) #Creo una matriz para guardar el "improve" de cada variable conforme evoluciona boosting/bagging + + + for (m in 1:mfinal) { + + k2 <- 1 #Los autores agradecen su sugerencia a Ignacio Medina + while (k2 == 1){ + + boostrap <- sample(1:n, replace = TRUE) + fit <- rpart(formula, data = data1[boostrap, ], control = control) + k2 <- length(fit$frame$var) + } + + + + arboles[[m]] <- fit #Guardamos los arboles + replicas[,m]<-boostrap + k <- varImp(arboles[[m]], surrogates = FALSE, competes = FALSE) + imp[m,] <-k[sort(row.names(k)), ] + } + } + + + #pred<-as.data.frame(sapply (arboles, predict, data=data, type="class")) + #Lo cambio porque da problemas el data=data + pred<-as.data.frame(sapply (arboles, predict, data1, type="class")) + + + + + classfinal <- array(0, c(n,nlevels(vardep))) + for (i in 1:nlevels(vardep)){ + classfinal[,i] <- matrix(as.numeric(pred==levels(vardep)[i]),nrow=n)%*%rep(1,mfinal) + } + + predclass <- rep("O",n) + #2014-11-12 Se puede hacer esto usando apply para evitar el bucle? + #Creo la funcion "select" que en caso de empate devuelva la clase mayoritaria de entre las empatadas + #2015-07-25 modifico la funcion select para poder usar predict con unlabeled data + predclass[]<-apply(classfinal,1,FUN=select, vardep.summary=summary(vardep)) + + #normalizar la importancia de las variables, las ponderaciones son todas iguales en bagging + + pond<-rep(1,mfinal) + + k <- varImp(arboles[[1]], surrogates = FALSE, competes = FALSE) + + imppond<-as.vector(as.vector(pond)%*%imp) + imppond<-imppond/sum(imppond)*100 + names(imppond)<-sort(row.names(k)) + + + #Para que devuelva las probabilidades a posteriori + classfinal/apply(classfinal,1,sum)->votosporc + + + ans<- list(formula=formula,trees=arboles,votes=classfinal,prob=votosporc,class=predclass, samples=replicas, importance=imppond) + + #2015-07-25 pruebo a meter las clases de vardep como atributo de la salida + attr(ans, "vardep.summary") <- summary(vardep, maxsum=700) + + mf <- model.frame(formula=formula, data=data1) + terms <- attr(mf, "terms") + ans$terms <- terms + ans$call <- match.call() + + + + + class(ans) <- "bagging" + ans + + } diff -Nru adabag-4.1/R/boosting.cv.R adabag-4.2/R/boosting.cv.R --- adabag-4.1/R/boosting.cv.R 2015-10-14 20:14:49.000000000 +0000 +++ adabag-4.2/R/boosting.cv.R 2017-05-24 18:25:26.000000000 +0000 @@ -1,5 +1,5 @@ boosting.cv <- -function ( formula, data,v=10,boos=TRUE ,mfinal=100, coeflearn="Breiman", control) +function ( formula, data,v=10,boos=TRUE ,mfinal=100, coeflearn="Breiman", control, par=FALSE) { #Exigimos que coeflearn sea uno de esos tres valores @@ -13,16 +13,61 @@ if(v>n) stop(" v should be in [2, n]") if(v<2) stop(" v should be in [2, n]") -predclass <- rep("O",n) - for (i in 1:v) { + + + +if (par==TRUE) { + + # Calculate the number of cores + no_cores <- detectCores() - 1 + + # Initiate cluster + cl <- makeCluster(no_cores) + + #Para el foreach + registerDoParallel(cl) +# clusterExport(cl, "n") + clusterEvalQ(cl, library(adabag)) + + + +# for (i in 1:v) { + kk<-foreach(i = 1:v, .combine = rbind, .packages='adabag') %dopar% + { + + n <- length(vardep) test <- v * (0:floor(n/v)) + i test <- test[test < n + 1] fit <- boosting(formula, data[-test,],boos ,mfinal,coeflearn,control=control) - predclass[test] <- predict.boosting(fit, data[test,])$class +# predclass[test] <- predict.boosting(fit, data[test,])$class + predclass <- predict.boosting(fit, data[test,])$class + + x<-data.frame(test, predclass) + + return(x) + + +#cat("i: ", c(i, date()), "\n") -cat("i: ", c(i, date()), "\n") } + stopCluster(cl) + predclass<-kk$predclass[order(kk$test)] +} + + +if (par==FALSE) { + predclass <- rep("O",n) + for (i in 1:v) { + test <- v * (0:floor(n/v)) + i + test <- test[test < n + 1] + fit <- boosting(formula, data[-test,],boos ,mfinal,coeflearn,control=control) + predclass[test] <- predict.boosting(fit, data[test,])$class + + cat("i: ", c(i, date()), "\n") + } + +} # para que devuelva la matriz de confusion tabla <- table(predclass, vardep, dnn=c("Predicted Class", "Observed Class")) @@ -30,7 +75,6 @@ # Para que devuelva el error en newdata error<- 1- sum(predclass== vardep)/n -output<- list(class=predclass, confusion=tabla, error=error) - -} +output<- list(class=predclass, confusion=tabla, error=error ) +} \ No newline at end of file diff -Nru adabag-4.1/R/importanceplot.R adabag-4.2/R/importanceplot.R --- adabag-4.1/R/importanceplot.R 2015-10-14 20:14:49.000000000 +0000 +++ adabag-4.2/R/importanceplot.R 2017-12-06 19:26:00.000000000 +0000 @@ -3,5 +3,5 @@ if(!((class(object)=="bagging")|(class(object)=="boosting"))) stop("object class should be bagging or boosting") - barplot(object$imp[order(object$imp,decreasing=TRUE)], main="Variables relative importance", col="lightblue",las=1,xaxs="r",...) + barplot(object$imp[order(object$imp,decreasing=TRUE)], main="Variable relative importance", col="lightblue",las=1,xaxs="r",...) } diff -Nru adabag-4.1/R/MarginOrderedPruning.Bagging.R adabag-4.2/R/MarginOrderedPruning.Bagging.R --- adabag-4.1/R/MarginOrderedPruning.Bagging.R 2015-10-14 20:14:49.000000000 +0000 +++ adabag-4.2/R/MarginOrderedPruning.Bagging.R 2016-11-15 17:32:48.000000000 +0000 @@ -8,6 +8,9 @@ prunedBagging<- list(formula=baggingObject$formula,trees=baggingObject$trees[myBestTreeIndex]) +#2016-11-15 pruebo a meter las clases de vardep como atributo de la salida porque lo uso en predict.bagging +attr(prunedBagging, "vardep.summary") <- attributes(baggingObject)$vardep.summary + class(prunedBagging) <- "bagging" output<-list(prunedBagging=prunedBagging,AccuracyOrderedEnsemblePruningSet=MyentropyEachTree.order.pred$AccuracyOrderedEnsemble) diff -Nru adabag-4.1/R/margins.R adabag-4.2/R/margins.R --- adabag-4.1/R/margins.R 2015-10-14 20:14:49.000000000 +0000 +++ adabag-4.2/R/margins.R 2016-12-14 18:34:08.000000000 +0000 @@ -27,7 +27,9 @@ for (i in 1:n) { k<-votosporc[i, as.numeric(vardep[i])]-votosporc[i,] -margen[i]<- min(k[k!=0]) +#margen[i]<- min(k[k!=0]) #Da problemas en caso de empate + +margen[i]<- min(k[-as.numeric(vardep[i])]) } diff -Nru adabag-4.1/R/plot.errorevol.R adabag-4.2/R/plot.errorevol.R --- adabag-4.1/R/plot.errorevol.R 2015-10-14 20:14:49.000000000 +0000 +++ adabag-4.2/R/plot.errorevol.R 2017-08-29 08:57:34.000000000 +0000 @@ -4,10 +4,10 @@ if(!((class(y)=="errorevol")|is.null(y))) stop("y class should be errorevol or NULL") - plot(x$error, type="l", ylim=c(0,max(x$error)), main="Ensemble error vs number of trees", xlab="Iterations", ylab="Error", col = "red",...) + plot(x$error, type="l", ylim=c(0,max(x$error)+0.05), main="Ensemble error vs number of trees", xlab="Iterations", ylab="Error", col = "red",...) if(!is.null(y)) { - lines(y$error, cex = .5 ,col="blue3") + lines(y$error, cex = .5 ,col="blue3", lty=2) legend("topright", c("test","train"), col = c("red", "blue"), lty=1:2) diff -Nru adabag-4.1/R/plot.margins.R adabag-4.2/R/plot.margins.R --- adabag-4.1/R/plot.margins.R 2015-10-14 20:14:49.000000000 +0000 +++ adabag-4.2/R/plot.margins.R 2017-12-11 08:24:56.000000000 +0000 @@ -6,11 +6,12 @@ stop("y class should be margins or NULL") plot(sort(x$margins), (1:length(x$margins))/length(x$margins), - type="l", xlim=c(-1,1),main="Margin cumulative distribution graph", xlab="m", ylab="% observations", col="blue3", lwd=2) + type="l", xlim=c(-1,1),main="Margin cumulative distribution graph", xlab="m", ylab="Cumulative relative frequency", col="darkblue", lwd=3, cex.main=2) - abline(v=0, col="red",lty=2, lwd=2) + abline(v=0, col="red",lty=3, lwd=3 ) if(!is.null(y)) { - lines(sort(y$margins), (1:length(y$margins))/length(y$margins), type="l", cex = .5 ,col="green", lwd=2) - legend("topleft", c("test","train"), col = c("blue", "green"), lty=1, lwd=2) + lines(sort(y$margins), (1:length(y$margins))/length(y$margins), type="l", cex = .5 ,col="green", lwd=3, lty=2) + legend("topleft", c("test","train"), col = c("darkblue", "green"), lty=1:2, lwd=3) } } + diff -Nru adabag-4.1/R/predict.boosting.R adabag-4.2/R/predict.boosting.R --- adabag-4.1/R/predict.boosting.R 2015-10-14 20:14:49.000000000 +0000 +++ adabag-4.2/R/predict.boosting.R 2017-10-30 19:05:12.000000000 +0000 @@ -9,15 +9,16 @@ # mfinal <- length(object$trees) n <- length(newdata[, 1]) -#2015-07-26 lo cambio para pedict con unlabeled data +#2015-07-26 lo cambio para predict con unlabeled data vardep.summary<-attributes(object)$vardep.summary nclases <- length(vardep.summary) # nclases <- nlevels(vardep) pesos <- rep(1/n, n) - newdata <- data.frame(newdata, pesos) - pond <- object$weights[1:newmfinal] # para podar +# newdata <- data.frame(newdata, pesos) #2017-10-29 Ashima Honra suggestion + newdata <- cbind(newdata, pesos) + pond <- object$weights[1:newmfinal] # para podar #pred <- data.frame(rep(0, n)) #for (m in 1:newmfinal) { # if (m == 1) {pred <- predict(object$trees[[m]], newdata, type = "class")}