Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Getting the error "level sets of factors are different" when running a for loop

Tags:

r

I have the following 3 tables:

AggData <- structure(list(Path = c("NonBrand", "Brand", "NonBrand,NonBrand", 
"Brand,Brand", "NonBrand,NonBrand,NonBrand", "Brand,Brand,Brand", 
"Brand,NonBrand", "NonBrand,Brand", "NonBrand,NonBrand,NonBrand,NonBrand", 
"Brand,Brand,Brand,Brand", "NonBrand,NonBrand,NonBrand,NonBrand,NonBrand", 
"Brand,Brand,Brand,Brand,Brand", "Brand,Brand,NonBrand", "NonBrand,Brand,Brand", 
"Brand,NonBrand,NonBrand", "NonBrand,NonBrand,NonBrand,NonBrand,NonBrand,NonBrand", 
"NonBrand,NonBrand,Brand", "Brand,NonBrand,Brand", "NonBrand,Brand,NonBrand", 
"NonBrand,NonBrand,NonBrand,NonBrand,NonBrand,NonBrand,NonBrand", 
"Brand,Brand,Brand,Brand,Brand,Brand", "NonBrand,NonBrand,NonBrand,NonBrand,NonBrand,NonBrand,NonBrand,NonBrand", 
"NonBrand,Brand,Brand,Brand", "NonBrand,NonBrand,NonBrand,Brand", 
"Brand,Brand,Brand,NonBrand", "Brand,Brand,Brand,Brand,Brand,Brand,Brand", 
"Brand,NonBrand,NonBrand,NonBrand", "NonBrand,NonBrand,Brand,Brand", 
"Brand,Brand,NonBrand,NonBrand", "Brand,NonBrand,Brand,Brand", 
"NonBrand,NonBrand,NonBrand,NonBrand,NonBrand,NonBrand,NonBrand,NonBrand,NonBrand", 
"Brand,Brand,NonBrand,Brand", "NonBrand,Brand,NonBrand,NonBrand", 
"Brand,Brand,Brand,Brand,Brand,Brand,Brand,Brand", "NonBrand,NonBrand,NonBrand,NonBrand,NonBrand,NonBrand,NonBrand,NonBrand,NonBrand,NonBrand", 
"NonBrand,NonBrand,Brand,NonBrand", "Brand,NonBrand,NonBrand,Brand", 
"NonBrand,Brand,Brand,Brand,Brand", "NonBrand,NonBrand,NonBrand,NonBrand,Brand", 
"Brand,NonBrand,Brand,NonBrand", "NonBrand,Brand,Brand,NonBrand", 
"Brand,Brand,Brand,Brand,NonBrand", "Brand,NonBrand,NonBrand,NonBrand,NonBrand", 
"Brand,Brand,Brand,Brand,Brand,Brand,Brand,Brand,Brand", "NonBrand,NonBrand,NonBrand,NonBrand,NonBrand,NonBrand,NonBrand,NonBrand,NonBrand,NonBrand,NonBrand", 
"Brand,NonBrand,Brand,Brand,Brand", "NonBrand,Brand,NonBrand,Brand", 
"Brand,Brand,Brand,NonBrand,Brand", "NonBrand,NonBrand,Brand,Brand,Brand", 
"NonBrand,NonBrand,NonBrand,Brand,Brand", "Brand,Brand,NonBrand,Brand,Brand", 
"Brand,Brand,Brand,NonBrand,NonBrand", "Brand,Brand,Brand,Brand,Brand,Brand,Brand,Brand,Brand,Brand", 
"NonBrand,NonBrand,NonBrand,Brand,NonBrand", "Brand,Brand,NonBrand,NonBrand,NonBrand", 
"NonBrand,Brand,Brand,Brand,Brand,Brand", "NonBrand,Brand,NonBrand,NonBrand,NonBrand", 
"NonBrand,NonBrand,Brand,NonBrand,NonBrand", "NonBrand,NonBrand,NonBrand,NonBrand,NonBrand,Brand", 
"Brand,NonBrand,NonBrand,NonBrand,NonBrand,NonBrand", "Brand,Brand,Brand,Brand,Brand,NonBrand", 
"NonBrand,Brand,Brand,NonBrand,NonBrand", "Brand,NonBrand,NonBrand,Brand,Brand", 
"NonBrand,NonBrand,NonBrand,NonBrand,Brand,Brand", "NonBrand,NonBrand,Brand,Brand,Brand,Brand", 
"NonBrand,NonBrand,NonBrand,NonBrand,Brand,NonBrand", "NonBrand,NonBrand,Brand,NonBrand,Brand", 
"Brand,NonBrand,NonBrand,Brand,NonBrand", "NonBrand,NonBrand,NonBrand,Brand,Brand,Brand", 
"NonBrand,Brand,Brand,NonBrand,Brand", "Brand,NonBrand,NonBrand,NonBrand,NonBrand,Brand", 
"Brand,Brand,NonBrand,NonBrand,NonBrand,NonBrand,NonBrand", "Brand,Brand,Brand,Brand,NonBrand,NonBrand,NonBrand"
), click_count = c(1799265, 874478, 198657, 128159, 45728, 30172, 
20520, 17815, 16718, 9479, 6554, 3722, 3561, 3408, 3391, 3366, 
3256, 2526, 1846, 1708, 1682, 1013, 951, 899, 881, 782, 780, 
703, 642, 625, 615, 601, 453, 442, 414, 407, 362, 343, 313, 284, 
281, 281, 271, 269, 268, 229, 223, 218, 215, 212, 204, 162, 161, 
158, 155, 145, 132, 130, 115, 103, 102, 86, 77, 77, 72, 68, 68, 
67, 58, 52, 32, 18, 18), conv_count = c(30938, 19652, 7401, 3803, 
2014, 1072, 1084, 981, 652, 379, 230, 166, 205, 246, 254, 93, 
239, 104, 112, 51, 76, 23, 66, 81, 55, 29, 62, 57, 50, 37, 17, 
33, 38, 17, 8, 41, 33, 30, 24, 16, 26, 18, 16, 17, 7, 21, 10, 
8, 27, 23, 11, 13, 6, 15, 14, 16, 8, 10, 6, 6, 11, 11, 8, 9, 
8, 8, 9, 7, 7, 6, 6, 6, 7), CR = c(0.0171947989873643, 0.0224728352228415, 
0.0372551684561833, 0.0296740767328085, 0.0440430370888733, 0.0355296301206417, 
0.0528265107212476, 0.0550659556553466, 0.0389998803684651, 0.0399831205823399, 
0.0350930729325603, 0.0445996775926921, 0.057568098848638, 0.0721830985915493, 
0.0749041580654674, 0.0276292335115865, 0.0734029484029484, 0.0411718131433096, 
0.0606717226435536, 0.0298594847775176, 0.0451843043995244, 0.0227048371174729, 
0.0694006309148265, 0.0901001112347052, 0.0624290578887628, 0.0370843989769821, 
0.0794871794871795, 0.0810810810810811, 0.0778816199376947, 0.0592, 
0.0276422764227642, 0.0549084858569052, 0.0838852097130243, 0.0384615384615385, 
0.0193236714975845, 0.100737100737101, 0.0911602209944751, 0.0874635568513119, 
0.0766773162939297, 0.0563380281690141, 0.0925266903914591, 0.0640569395017794, 
0.0590405904059041, 0.0631970260223048, 0.0261194029850746, 0.091703056768559, 
0.0448430493273543, 0.036697247706422, 0.125581395348837, 0.108490566037736, 
0.053921568627451, 0.0802469135802469, 0.0372670807453416, 0.0949367088607595, 
0.0903225806451613, 0.110344827586207, 0.0606060606060606, 0.0769230769230769, 
0.0521739130434783, 0.058252427184466, 0.107843137254902, 0.127906976744186, 
0.103896103896104, 0.116883116883117, 0.111111111111111, 0.117647058823529, 
0.132352941176471, 0.104477611940299, 0.120689655172414, 0.115384615384615, 
0.1875, 0.333333333333333, 0.388888888888889)), .Names = c("Path", 
"click_count", "conv_count", "CR"), row.names = c(NA, -73L), class = "data.frame")

another one here:

breakVector <- structure(list(breakVector = structure(c(1L, 1L), .Label = "NonBrand", class = "factor"), 
    CR = c(0.461541302855402, 0.538458697144598)), .Names = c("breakVector", 
"CR"), row.names = c(NA, -2L), class = "data.frame")

and:

FinalTable <- structure(list(autribution_category = structure(c(2L, 1L), .Label = c("Brand", 
"NonBrand"), class = "factor"), attributed_result = c(0, 0)), .Names = c("autribution_category", 
"attributed_result"), row.names = 1:2, class = "data.frame")

when I run the following command:

if (FinalTable [2,1] == breakVector[1,1]) {
     FinalTable$attributed_result[2] <- FinalTable$attributed_result[2] + 
     breakVector[1,2] * AggData$conv_count[3];
     break}

I get the following error:

Error in Ops.factor(FinalTable[2, 1], breakVector[1, 1]) : 
  level sets of factors are different

This is pretty weird, since both values that im comparing are factors, I don't see any reason why R cant compare the two levels?

like image 542
Yehoshaphat Schellekens Avatar asked Jul 06 '14 10:07

Yehoshaphat Schellekens


2 Answers

FinalTable[2,1] and breakVector[1,1] do not have the same levels:

> FinalTable[2,1]
[1] Brand
Levels: Brand NonBrand
> breakVector[1,1]
[1] NonBrand
Levels: NonBrand

This is easily fixed by using

breakVector[,1] <- factor(breakVector[,1], levels=c("Brand", "NonBrand"))

or, more generally

breakVector[,1] <- factor(breakVector[,1], levels=levels(FinalTable[,1]))
like image 188
nico Avatar answered Nov 16 '22 22:11

nico


Perhaps, it will better compare both variables like a string:

if (as.character(FinalTable [2,1]) == as.character(breakVector[1,1])) {
     FinalTable$attributed_result[2] <- FinalTable$attributed_result[2] + 
     breakVector[1,2] * AggData$conv_count[3];
     break}
like image 41
Joan Christopher Avatar answered Nov 17 '22 00:11

Joan Christopher