I'm running the following script in R. If I use a %do% rather than a %dopar% the script works fine. However, if in the outer loop I use a %dopar% the loop runs forever without throwing any error (constant increase in memory usage until it goes out of memory). I'm using 16 cores.
library(parallel)
library(foreach)
library(doSNOW)
library(dplyr)
NumberOfCluster <- 16
cl <- makeCluster(NumberOfCluster)
registerDoSNOW(cl)
foreach(i = UNSPSC_list, .packages = c('data.table', 'dplyr'), .verbose = TRUE) %dopar%
{
terms <- as.data.table(unique(gsub(" ", "", unlist(terms_list_by_UNSPSC$Terms[which(substr(terms_list_by_UNSPSC$UNSPSC,1,6) == i)]))))
temp <- inner_join(N_of_UNSPSCs_by_Term, terms, on = 'V1')
temp$V2 <- 1/as.numeric(temp$V2)
temp <- temp[order(temp$V2, decreasing = TRUE),]
names(temp) <- c('Term','Imp')
ABNs <- unique(UNSPSCs_per_ABN[which(substr(UNSPSCs_per_ABN$UNSPSC,1,4) == substr(i,1,4)), 1])
predictions <- as.numeric(vector())
predictions <- foreach (j = seq(1 : nrow(train)), .combine = 'c', .packages = 'dplyr') %do%
{
descr <- names(which(!is.na(train[j,]) == TRUE))
if(unlist(predict_all[j,1]) %in% unlist(ABNs) || !unlist(predict_all[j,1]) %in% unlist(suppliers)) {union_all(predictions, sum(temp$Imp[which(temp$Term %in% descr)]))} else {union_all(predictions, 0)}
}
save(predictions, file = paste("Predictions", i,".rda", sep = "_"))
}
Due to operator precedence, you cannot put braces around the inner foreach loop. This is structured very much like the nested for loop. The outer foreach is iterating over the values in bvec , passing them to the inner foreach , which iterates over the values in avec for each value of bvec .
Parallelizing nested loops. If we have nested for loops, it is often enough to simply parallelize the outermost loop: a(); #pragma omp parallel for for (int i = 0; i < 4; ++i) { for (int j = 0; j < 4; ++j) { c(i, j); } } z(); This is all that we need most of the time.
Nested for LoopsWe can also use a for loop inside another for loop.
Note: It is possible to use one type of loop inside the body of another loop. For example, we can put a for loop inside the while loop.
The proper way of nesting foreach
loop is using %:%
operator. See the example. I have tested it on Windows.
library(foreach)
library(doSNOW)
NumberOfCluster <- 4
cl <- makeCluster(NumberOfCluster)
registerDoSNOW(cl)
N <- 1e6
system.time(foreach(i = 1:10, .combine = rbind) %:%
foreach(j = 1:10, .combine = c) %do% mean(rnorm(N, i, j)))
system.time(foreach(i = 1:10, .combine = rbind) %:%
foreach(j = 1:10, .combine = c) %dopar% mean(rnorm(N, i, j)))
Output:
> system.time(foreach(i = 1:10, .combine = rbind) %:%
+ foreach(j = 1:10, .combine = c) %do% mean(rnorm(N, i, j)))
user system elapsed
7.38 0.23 7.64
> system.time(foreach(i = 1:10, .combine = rbind) %:%
+ foreach(j = 1:10, .combine = c) %dopar% mean(rnorm(N, i, j)))
user system elapsed
0.09 0.00 2.14
Scheme for using nested loops is as following:
foreach(i) %:% foreach(j) {foo(i, j)}
Operator %:%
is used to nest several foreach
loops. You can not do computation between nesting. In your case you have to do two loops, for example:
# Loop over i
x <- foreach(i = 1:10, .combine = c) %dopar% 2 ^ i
# Nested loop over i and j
foreach(i = 1:10, .combine = rbind) %:% foreach(j = 1:10, .combine = c) %dopar% {x[i] + j}
Untested code:
library(data.table)
library(foreach)
library(doSNOW)
NumberOfCluster <- 2
cl <- makeCluster(NumberOfCluster)
registerDoSNOW(cl)
# Create ABNs as list
ABNs <- foreach(i = UNSPSC_list, .packages = c('data.table', 'dplyr'), .verbose = TRUE) %dopar% {
terms <- as.data.table(unique(gsub(" ", "", unlist(terms_list_by_UNSPSC$Terms[which(substr(terms_list_by_UNSPSC$UNSPSC, 1, 6) == i)]))))
temp <- inner_join(N_of_UNSPSCs_by_Term, terms, on = 'V1')
temp$V2 <- 1 / as.numeric(temp$V2)
temp <- temp[order(temp$V2, decreasing = TRUE), ]
names(temp) <- c('Term', 'Imp')
unique(UNSPSCs_per_ABN[which(substr(UNSPSCs_per_ABN$UNSPSC,1,4) == substr(i,1,4)), 1])
}
# Nested loop
predictions <- foreach(i = UNSPSC_list, .packages = c('data.table', 'dplyr'), .verbose = TRUE) %:%
foreach(j = seq(1:nrow(train)), .combine = 'c', .packages = 'dplyr') %dopar% {
descr <- names(which(!is.na(train[j, ]) == TRUE))
if (unlist(predict_all[j, 1]) %in% unlist(ABNs[[i]]) || !unlist(predict_all[j, 1]) %in% unlist(suppliers)) {
sum(temp$Imp[which(temp$Term %in% descr)])
} else 0
}
for (i in seq_along(predictions)) save(predictions[[i]], file = paste("Predictions", i, ".rda", sep = "_"))
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With