I can't figure out what's going on with data.table in this situation:
fooFun <- function(tbl, totCols) {
tbl[, paste0("col", 1:totCols) := 0]
}
Start with an empty 1-col data table.
> tbl = data.table(initialCol=double())
Then add 99 columns by reference:
> fooFun(tbl, 99)
> colnames(tbl)
[1] "initialCol" "col1" "col2" "col3" "col4" "col5" "col6" "col7" "col8" "col9" "col10" "col11" "col12" "col13"
[15] "col14" "col15" "col16" "col17" "col18" "col19" "col20" "col21" "col22" "col23" "col24" "col25" "col26" "col27"
[29] "col28" "col29" "col30" "col31" "col32" "col33" "col34" "col35" "col36" "col37" "col38" "col39" "col40" "col41"
[43] "col42" "col43" "col44" "col45" "col46" "col47" "col48" "col49" "col50" "col51" "col52" "col53" "col54" "col55"
[57] "col56" "col57" "col58" "col59" "col60" "col61" "col62" "col63" "col64" "col65" "col66" "col67" "col68" "col69"
[71] "col70" "col71" "col72" "col73" "col74" "col75" "col76" "col77" "col78" "col79" "col80" "col81" "col82" "col83"
[85] "col84" "col85" "col86" "col87" "col88" "col89" "col90" "col91" "col92" "col93" "col94" "col95" "col96" "col97"
[99] "col98" "col99"
All looks good. Now add the 100th column:
> fooFun(tbl, 100)
> colnames(tbl)
[1] "initialCol" "col1" "col2" "col3" "col4" "col5" "col6" "col7" "col8" "col9" "col10" "col11" "col12" "col13"
[15] "col14" "col15" "col16" "col17" "col18" "col19" "col20" "col21" "col22" "col23" "col24" "col25" "col26" "col27"
[29] "col28" "col29" "col30" "col31" "col32" "col33" "col34" "col35" "col36" "col37" "col38" "col39" "col40" "col41"
[43] "col42" "col43" "col44" "col45" "col46" "col47" "col48" "col49" "col50" "col51" "col52" "col53" "col54" "col55"
[57] "col56" "col57" "col58" "col59" "col60" "col61" "col62" "col63" "col64" "col65" "col66" "col67" "col68" "col69"
[71] "col70" "col71" "col72" "col73" "col74" "col75" "col76" "col77" "col78" "col79" "col80" "col81" "col82" "col83"
[85] "col84" "col85" "col86" "col87" "col88" "col89" "col90" "col91" "col92" "col93" "col94" "col95" "col96" "col97"
[99] "col98" "col99"
What? Not there... Now add one column outside of the function call:
> tbl[, newCol := 5]
> colnames(tbl)
[1] "initialCol" "col1" "col2" "col3" "col4" "col5" "col6" "col7" "col8" "col9" "col10" "col11" "col12" "col13"
[15] "col14" "col15" "col16" "col17" "col18" "col19" "col20" "col21" "col22" "col23" "col24" "col25" "col26" "col27"
[29] "col28" "col29" "col30" "col31" "col32" "col33" "col34" "col35" "col36" "col37" "col38" "col39" "col40" "col41"
[43] "col42" "col43" "col44" "col45" "col46" "col47" "col48" "col49" "col50" "col51" "col52" "col53" "col54" "col55"
[57] "col56" "col57" "col58" "col59" "col60" "col61" "col62" "col63" "col64" "col65" "col66" "col67" "col68" "col69"
[71] "col70" "col71" "col72" "col73" "col74" "col75" "col76" "col77" "col78" "col79" "col80" "col81" "col82" "col83"
[85] "col84" "col85" "col86" "col87" "col88" "col89" "col90" "col91" "col92" "col93" "col94" "col95" "col96" "col97"
[99] "col98" "col99" "newCol"
All good. Now add that 100th column:
> fooFun(tbl, 100)
> colnames(tbl)
[1] "initialCol" "col1" "col2" "col3" "col4" "col5" "col6" "col7" "col8" "col9" "col10" "col11" "col12" "col13"
[15] "col14" "col15" "col16" "col17" "col18" "col19" "col20" "col21" "col22" "col23" "col24" "col25" "col26" "col27"
[29] "col28" "col29" "col30" "col31" "col32" "col33" "col34" "col35" "col36" "col37" "col38" "col39" "col40" "col41"
[43] "col42" "col43" "col44" "col45" "col46" "col47" "col48" "col49" "col50" "col51" "col52" "col53" "col54" "col55"
[57] "col56" "col57" "col58" "col59" "col60" "col61" "col62" "col63" "col64" "col65" "col66" "col67" "col68" "col69"
[71] "col70" "col71" "col72" "col73" "col74" "col75" "col76" "col77" "col78" "col79" "col80" "col81" "col82" "col83"
[85] "col84" "col85" "col86" "col87" "col88" "col89" "col90" "col91" "col92" "col93" "col94" "col95" "col96" "col97"
[99] "col98" "col99" "newCol" "col100"
It's there now. Now add 20 more:
> fooFun(tbl, 120)
> colnames(tbl)
[1] "initialCol" "col1" "col2" "col3" "col4" "col5" "col6" "col7" "col8" "col9" "col10" "col11" "col12" "col13"
[15] "col14" "col15" "col16" "col17" "col18" "col19" "col20" "col21" "col22" "col23" "col24" "col25" "col26" "col27"
[29] "col28" "col29" "col30" "col31" "col32" "col33" "col34" "col35" "col36" "col37" "col38" "col39" "col40" "col41"
[43] "col42" "col43" "col44" "col45" "col46" "col47" "col48" "col49" "col50" "col51" "col52" "col53" "col54" "col55"
[57] "col56" "col57" "col58" "col59" "col60" "col61" "col62" "col63" "col64" "col65" "col66" "col67" "col68" "col69"
[71] "col70" "col71" "col72" "col73" "col74" "col75" "col76" "col77" "col78" "col79" "col80" "col81" "col82" "col83"
[85] "col84" "col85" "col86" "col87" "col88" "col89" "col90" "col91" "col92" "col93" "col94" "col95" "col96" "col97"
[99] "col98" "col99" "newCol" "col100" "col101" "col102" "col103" "col104" "col105" "col106" "col107" "col108" "col109" "col110"
[113] "col111" "col112" "col113" "col114" "col115" "col116" "col117" "col118" "col119" "col120"
Looks OK. Now add a bunch more:
> fooFun(tbl, 240)
> colnames(tbl)
[1] "initialCol" "col1" "col2" "col3" "col4" "col5" "col6" "col7" "col8" "col9" "col10" "col11" "col12" "col13"
[15] "col14" "col15" "col16" "col17" "col18" "col19" "col20" "col21" "col22" "col23" "col24" "col25" "col26" "col27"
[29] "col28" "col29" "col30" "col31" "col32" "col33" "col34" "col35" "col36" "col37" "col38" "col39" "col40" "col41"
[43] "col42" "col43" "col44" "col45" "col46" "col47" "col48" "col49" "col50" "col51" "col52" "col53" "col54" "col55"
[57] "col56" "col57" "col58" "col59" "col60" "col61" "col62" "col63" "col64" "col65" "col66" "col67" "col68" "col69"
[71] "col70" "col71" "col72" "col73" "col74" "col75" "col76" "col77" "col78" "col79" "col80" "col81" "col82" "col83"
[85] "col84" "col85" "col86" "col87" "col88" "col89" "col90" "col91" "col92" "col93" "col94" "col95" "col96" "col97"
[99] "col98" "col99" "newCol" "col100" "col101" "col102" "col103" "col104" "col105" "col106" "col107" "col108" "col109" "col110"
[113] "col111" "col112" "col113" "col114" "col115" "col116" "col117" "col118" "col119" "col120" "col121" "col122" "col123" "col124"
[127] "col125" "col126" "col127" "col128" "col129" "col130" "col131" "col132" "col133" "col134" "col135" "col136" "col137" "col138"
[141] "col139" "col140" "col141" "col142" "col143" "col144" "col145" "col146" "col147" "col148" "col149" "col150" "col151" "col152"
[155] "col153" "col154" "col155" "col156" "col157" "col158" "col159" "col160" "col161" "col162" "col163" "col164" "col165" "col166"
[169] "col167" "col168" "col169" "col170" "col171" "col172" "col173" "col174" "col175" "col176" "col177" "col178" "col179" "col180"
[183] "col181" "col182" "col183" "col184" "col185" "col186" "col187" "col188" "col189" "col190" "col191" "col192" "col193" "col194"
[197] "col195" "col196" "col197" "col198"
No good.
What's going on?
@Arun pointed out that this issue has already been addressed on the mailing list: #5204. Following the advice in that thread, I increased the default number of column pointers that are alloc'd when a data.table is created:
options(datatable.alloccol = 900)
This way it won't hit the default pre-allocation of 100 columns when increasing the number of columns over 100 on an already-created table. This works around the underlying issue about shallow copying the object when the pre-allocation limit is reached, so that it produces the expected behavior for the tests in this SO question.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With