I'm getting this error:
Something is wrong; all the Accuracy metric values are missing:
Accuracy Kappa
Min. : NA Min. : NA
1st Qu.: NA 1st Qu.: NA
Median : NA Median : NA
Mean :NaN Mean :NaN
3rd Qu.: NA 3rd Qu.: NA
Max. : NA Max. : NA
NA's :5 NA's :5
Error in train.default(x, y, weights = w, ...) : Stopping
In addition: Warning message:
In nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
There were missing values in resampled performance measures.
The first link suggests that the levels of the response variable cannot be 0
and 1
. This is not the case in my data:
R> str(test$y)
Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
R> levels(test$y)
[1] "No" "Yes"
So, I'm not sure what's going on.
test <- structure(list(y = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), .Label = c("No", "Yes"), class = "factor"), x1 = structure(c(6L,
40L, 26L, 7L, 18L, 9L, 26L, 36L, 23L, 16L, 6L, 20L, 23L, 26L,
41L, 20L, 31L, 7L, 2L, 2L, 18L, 2L, 12L, 9L, 40L, 40L, 14L, 8L,
2L, 20L, 15L, 12L, 8L, 17L, 17L, 21L, 18L, 32L, 2L, 2L), .Label = c("Accommodation and Restaurant Services",
"Admin/Support Services", "Agriculture", "Arts, Entertainment, and Rec.",
"Construction: Heavy and Civil Engineering", "Construction: of Buildings",
"Construction: Specialty Trade Contractors", "EDU Services",
"Finance / Insurance", "Fishing, Hunting, Trapping", "Forestry & Logging",
"Health Care and Social Assistance", "Information", "Management of Companies and Enterprises",
"Manufacturing: Food/Bev/Textile", "Manufacturing: Metals/Machinery/Computers/Appliances",
"Manufacturing: Wood/Paper/Chemical/Mineral", "Merchandise Trade",
"Mining, Quarrying, and Oil and Gas Extraction", "Other Services (Blue Collar)",
"Prof./Sci./Tech: Acct / Tax", "Prof./Sci./Tech: Advertising / Media",
"Prof./Sci./Tech: Architecture / Eng.", "Prof./Sci./Tech: Computer Design",
"Prof./Sci./Tech: Law", "Prof./Sci./Tech: Mgmt Consulting", "Prof./Sci./Tech: Other",
"Prof./Sci./Tech: R&D", "Prof./Sci./Tech: Specialized Design",
"Public Admin.", "Real Estate", "Retail Trade", "Support Agriculture",
"Transportation", "Unknown", "Utilities", "Warehousing", "Waste Management & Remediation Services",
"Wholesale Trade: Brokers", "Wholesale Trade: Durable Goods",
"Wholesale Trade: NonDurable Goods"), class = "factor"), x2 = structure(c(36L,
11L, 35L, 46L, 5L, 10L, 37L, 41L, 11L, 5L, 5L, 10L, 20L, 10L,
5L, 5L, 45L, 20L, 11L, 10L, 18L, 35L, 5L, 6L, 41L, 5L, 44L, 36L,
39L, 10L, 44L, 8L, 34L, 15L, 39L, 10L, 18L, 19L, 35L, 11L), .Label = c("AK",
"AL", "AR", "AZ", "CA", "CO", "CT", "DC", "DE", "FL", "GA", "HI",
"IA", "ID", "IL", "IN", "KS", "KY", "LA", "MA", "MD", "ME", "MI",
"MN", "MO", "MS", "MT", "NC", "ND", "NE", "NH", "NJ", "NM", "NV",
"NY", "OH", "OK", "OR", "PA", "RI", "SC", "SD", "TN", "TX", "UT",
"VA", "VT", "WA", "WI", "WV", "WY"), class = "factor"), x3 = c(0.004714,
0, 0.015551, 0.360246999999988, 5e-04, 0.035714, 0.357143, 0.00591043019290109,
0.138889, 0.028846, 0.0075, 0.00051, 0.006329, 0.065789, 0.1125,
0.003125, 0.003889, 0.000391, 0.011905, 0.004, 0, 0.00025, 0.005,
0.076923, 0.149254, 0.0220719438793245, 0.360246999999988, 0.057692,
0, 0.015625, 0.000714, 0, 0.001087, 0.006135, 0.003846, 0.066667,
0.009091, 0, 0.360246999999988, 0.012821), x4 = c(3.69626899674553,
0, 4.34824643385123, 4.22834902062364, 2.94001815500766, 3.27207378750001,
4.61543448110941, 4.56919828334781, 4.32498170308737, 3.73719264270474,
3.87511916546257, 1.70757017609794, 3.76499759928488, 3.7635028654676,
4.15094055396548, 3.43949059038968, 3.70423633730879, 3.18864729599972,
2.85186960072977, 2.37291200297011, 0, 2.69983772586725, 3.23829706787539,
3.17695898058691, 4.32314893008404, 0, 4.64518638929519, 3.17405980772503,
0, 2.5092025223311, 2.47856649559384, 0, 2.06818586174616, 4.08439751914115,
3.50906804501716, 3.02160271602824, 2.71349054309394, 0, 4.6020708485543,
2.79657433321043), x5 = c(472, 502, 506, 510, 497, 493, 515,
542, 557, 465, 480, 369.618950156498, 518, 571, 512, 520, 464,
578, 500, 526, 489.830047438596, 345, 664.964755505884, 546,
505, 572, 540, 567, 473, 575, 558, 509.58218597766, 579, 616,
561, 581, 291, 415.846613389669, 476, 442), x6 = c(374, 482,
491, 540, 534, 493, 514, 570, 577, 485, 488, 627, 542, 529, 445,
531, 456, 535, 381, 586, 474.392596434054, 484, 487.854513298151,
518, 524, 582, 530, 571, 582.582737417662, 572, 592, 477, 585,
594, 574, 609, 389, 581.722630168064, 550, 458), x7 = c(5.8e-05,
0, 0.015551, 0.01, 0, 0, 0.0683816249999983, -0.00050051658067362,
0.068194, 0.056615, 0, 0, 0.001097, 0, 0.0683816249999983, 0,
0.002361, 0.000781, 0.021667, 0, 0, 0, 0, 0.001154, 0.001, -0.000657947357427473,
0, 0, 0, 0, 0, 0, 0, 0.001479, 0.001269, 0.005333, 0.000455,
0, 0, 0), x8 = c(14, 13, 53, 24, 8, 13, 13, 20, 17, 35, 19, 11,
42, 15, 33, 1, 20, 6, 24, 3, 14, 3, 3, 17, 42, 8, 4, 0, 5, 4,
10, 5, 8, 41, 31, 6, 2, 18, 7, 7), x9 = c(18, 2, 49, 19, 14,
8, 7, 6, 7, 21, 19, 1, 34, 2, 24, 3, 30, 5, 3, 12, 9, 4, 2, 9,
59, 15, 7, 0, 20, 1, 6, 13, 1, 64, 34, 18, 12, 0, 0, 6), x10 = c(48,
68.8884165199473, 63, 54, 78, 80, 77.3502747403963, 74, 79, 71,
76.7682937433346, 65.0624751538981, 63, 80, 41, 81.4257054732527,
67, 78, 80, 73, 52.5390991618267, 60.8813703575155, 66, 72, 64,
61.266324949851, 43.2207804060158, 80, 61.708917114202, 80, 75,
73.3412226739437, 80, 78, 57, 78, 23, 30.321279640657, 69.1391208799255,
60.9766796474371), x11 = c(4.62, 0.81, 1.98, 1.51, 1.51, 1.2,
0.74, 1.2, 4.04, 2.06, 1.43, 1.51, 4.16, 0.81, 0.81, 1.82, 2.1,
0.89, 0.73, 0.97, 20.49, 1.51, 1.51, 4.09, 1.33, 0.89, 1.59,
1.43, 4.54, 1.51, 1.2, 1.04, 1.59, 2.57, 4.4, 1.28, 0.89, 17.94,
1.29, 1.59), x12 = c(-3, -44.4574826440087, 1, 5, 2, 2, 39.0861520260711,
14, 0, -6, 40.5638314058397, 22.0124501206663, 3, 12, 27, 7.55072978911628,
5, -1, -12, 0, 14.5217398963732, -2.06782290930381, -13, 4, 1,
39.251983622172, 0, 0, 33.2355632837177, 0, 6, 20.3416928763606,
40.7136165846826, -2, 7, 0, 9, 0.622995283657772, -6.64967287401836,
-3.6632790085156)), .Names = c("y", "x1", "x2", "x3", "x4", "x5",
"x6", "x7", "x8", "x9", "x10", "x11", "x12"), row.names = c(59110L,
266133L, 110275L, 271642L, 54361L, 54818L, 59197L, 94902L, 80531L,
291L, 51460L, 228662L, 174960L, 27500L, 105584L, 132839L, 233895L,
194802L, 123435L, 165332L, 318615L, 133731L, 256878L, 99780L,
31551L, 106032L, 280841L, 130066L, 136252L, 29868L, 282962L,
55762L, 312670L, 152593L, 50020L, 220877L, 13104L, 20888L, 319386L,
229603L), class = "data.frame")
Based on comments both here and on github/caret, I have updated the code. The non-parallel forest now works, but the parallel forests do not.
test$x7 <- NULL # remove low variance "dummy" variable
# based on comments on github (link above).
library(party) # conditional RF
t_control <- trainControl(method= "repeatedcv", number= 10,
repeats= 1)
mtry_def <- floor(sqrt(ncol(test)))
t_grid <- expand.grid(mtry= c(mtry_def/2, mtry_def, 2 * mtry_def))
## works without parallel (after removing options per @topepo):
rf1 <- train(y ~ ., data= test,
method= "cforest", trControl= t_control,
tuneGrid= t_grid) # remove verbose, importance, proximity
## doesn't work with parallel:
cl <- makeCluster(detectCores() - 1)
rf1 <- train(y ~ ., data= test,
method= "cforest", trControl= t_control,
tuneGrid= t_grid, allowParallel= TRUE) # same errors as prior to edit
rf2 <- train(y ~ ., data= test,
method= "parRF", trControl= t_control, verbose= FALSE,
tuneGrid= t_grid, allowParallel= TRUE, proximity= FALSE,
importance= TRUE) # same errors as prior to edit
# moving from method= "parRF" --> method= "rf" does work:
rf3 <- train(y ~ ., data= test,
method= "rf", trControl= t_control, verbose= FALSE,
tuneGrid= t_grid, allowParallel= TRUE, proximity= FALSE,
importance= TRUE)
# defaults (ie-- outside caret) work
rf3a <- randomForest(y ~ ., data= test, mtry= 3, importance=TRUE)
rf3b <- cforest(y ~ ., data= test, controls= cforest_control(mtry= 3))
# updated sessionInfo() -- AM running on a different computer
R version 3.2.2 (2015-08-14)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 7 x64 (build 7601) Service Pack 1
[1] LC_COLLATE=English_United States.1252 LC_CTYPE=English_United States.1252 LC_MONETARY=English_United States.1252
[4] LC_NUMERIC=C LC_TIME=English_United States.1252
attached base packages:
[1] stats4 grid parallel stats graphics grDevices utils datasets methods base
other attached packages:
[1] kernlab_0.9-22 party_1.0-23 strucchange_1.5-1 sandwich_2.3-4 zoo_1.7-12 modeltools_0.2-21
[7] mvtnorm_1.0-3 randomForest_4.6-10 caret_6.0-52 ggplot2_1.0.1 lattice_0.20-33 doParallel_1.0.8
[13] iterators_1.0.7 foreach_1.4.2
loaded via a namespace (and not attached):
[1] Rcpp_0.12.1 compiler_3.2.2 nloptr_1.0.4 plyr_1.8.3 class_7.3-13 tools_3.2.2
[7] digest_0.6.8 lme4_1.1-9 nlme_3.1-122 gtable_0.1.2 mgcv_1.8-7 Matrix_1.2-2
[13] brglm_0.5-9 SparseM_1.7 coin_1.1-0 proto_0.3-10 e1071_1.6-7 BradleyTerry2_1.0-6
[19] stringr_1.0.0 gtools_3.5.0 MatrixModels_0.4-1 nnet_7.3-11 survival_2.38-3 multcomp_1.4-1
[25] TH.data_1.0-6 minqa_1.2.4 reshape2_1.4.1 car_2.1-0 magrittr_1.5 scales_0.3.0
[31] codetools_0.2-14 MASS_7.3-43 splines_3.2.2 pbkrtest_0.4-2 colorspace_1.2-6 quantreg_5.19
[37] stringi_0.5-5 munsell_0.4.2
#### original sessionInfo()
R> sessionInfo()
R version 3.2.2 (2015-08-14)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 7 x64 (build 7601) Service Pack 1
[1] LC_COLLATE=English_United States.1252 LC_CTYPE=English_United States.1252 LC_MONETARY=English_United States.1252
[4] LC_NUMERIC=C LC_TIME=English_United States.1252
attached base packages:
[1] parallel stats4 grid stats graphics grDevices utils datasets methods base
other attached packages:
[1] doParallel_1.0.8 iterators_1.0.7 foreach_1.4.2 kernlab_0.9-22 party_1.0-23 strucchange_1.5-1
[7] sandwich_2.3-3 zoo_1.7-12 modeltools_0.2-21 mvtnorm_1.0-3 randomForest_4.6-10 caret_6.0-52
[13] ggplot2_1.0.1 lattice_0.20-33
loaded via a namespace (and not attached):
[1] Rcpp_0.12.1 compiler_3.2.2 nloptr_1.0.4 plyr_1.8.3 class_7.3-13 tools_3.2.2
[7] digest_0.6.8 lme4_1.1-9 gtable_0.1.2 nlme_3.1-121 mgcv_1.8-7 Matrix_1.2-2
[13] SparseM_1.7 brglm_0.5-9 coin_1.1-0 proto_0.3-10 e1071_1.6-7 BradleyTerry2_1.0-6
[19] stringr_1.0.0 MatrixModels_0.4-1 gtools_3.5.0 nnet_7.3-10 survival_2.38-3 multcomp_1.4-1
[25] TH.data_1.0-6 minqa_1.2.4 car_2.1-0 reshape2_1.4.1 magrittr_1.5 scales_0.3.0
[31] codetools_0.2-14 splines_3.2.2 MASS_7.3-43 pbkrtest_0.4-2 colorspace_1.2-6 quantreg_5.19
[37] stringi_0.5-5 munsell_0.4.2
Any help would be greatly appreciated, thanks!!
When I run the first cforest
model, I can see that "In addition: There were 31 warnings (use warnings() to see them)"
. These say that
unused arguments (verbose = FALSE, proximity = FALSE, importance = TRUE)
These are arguments to the randomForest
function and not cforest
. Removing them removes the errors.
Update for the update:
This looks like confusion over the ...
and where allowParallel
can be invoked. When running the code for rf1
, I get these warnings:
unused argument (allowParallel = TRUE)
Looking at ?train
and ?cforest
, neither has that argument; it is in trainControl
Here is the confusing part: running rf3
with allowParallel
as an argument to train
does not generate an error. This is because cforest
does not have the ellipses and randomForest
> names(formals(cforest))
[1] "formula" "data" "subset" "weights" "controls" "xtrafo"
[7] "ytrafo" "scores"
> names(formals(randomForest:::randomForest.default))
[1] "x" "y" "xtest" "ytest"
[5] "ntree" "mtry" "replace" "classwt"
[9] "cutoff" "strata" "sampsize" "nodesize"
[13] "maxnodes" "importance" "localImp" "nPerm"
[17] "proximity" "oob.prox" "norm.votes" "do.trace"
[21] "keep.forest" "corr.bias" "keep.inbag" "..."
So, for rf1
there is no "bottomless pit" to send the inappropriate argument (allowParallel
) but for rf3
there is a sequence of ...
arguments and none of the functions ever have a terminal test to see if allowParallel
is an inappropriate argument.
Pass allowParallel
to trainControl
and not train
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With