I've got a table 'inputdf' with sample names in a random order in column 'SampleFileName'.
> colnames(inputdf)
[1] "Dye/SamplePeak" "SampleFileName" "Marker" "Allele" "Size" "Height"
[7] "Area" "DataPoint" "flank" "correction" "start" "end"
[13] "control" "iithreshold" "CAG"
I'm using tidyr spread results from the 'height' column into separate columns, with each column named by the value in 'SampleFileName'.
library(tidyr)
height <- spread(inputdf, key=SampleFileName, value=Height, fill = 0, convert = FALSE) #Extract heights into separate columns for each sample
My samples aren't in alphabetical order in column 'SampleFileName' and I'd like to keep them in that order. However, spread automatically sorts them alphabetically. I'd be grateful for your help!
> colnames(height)
[1] "Dye/SamplePeak" "Marker"
[3] "Allele" "Size"
[5] "Area" "DataPoint"
[7] "flank" "correction"
[9] "start" "end"
[11] "control" "iithreshold"
[13] "CAG" "A01_MF20170522_FA_A01_2017-05-22_1.fsa"
[15] "A01_MF20170623_FA_A01_2017-06-23_1.fsa" "A02_MF20170623_FA_A02_2017-06-23_1.fsa"
[17] "A03_MF20170623_FA_A03_2017-06-23_1.fsa" "A05_MF20170623_FA_A05_2017-06-23_1.fsa"
[19] "A06_MF20170623_FA_A06_2017-06-23_1.fsa" "A07_MF20170623_FA_A07_2017-06-23_1.fsa"
[21] "A08_MF20170623_FA_A08_2017-06-23_1.fsa" "A09_MF20170623_FA_A09_2017-06-23_1.fsa"
[23] "A10_MF20170623_FA_A10_2017-06-23_1.fsa" "A11_MF20170623_FA_A11_2017-06-23_1.fsa"
[25] "A12_MF20170623_FA_A12_2017-06-23_1.fsa" "B01_MF20170623_FA_B01_2017-06-23_1.fsa"
[27] "B02_MF20170623_FA_B02_2017-06-23_1.fsa" "B03_MF20170623_FA_B03_2017-06-23_1.fsa"
[29] "B04_MF20170623_FA_B04_2017-06-23_1.fsa" "B05_MF20170623_FA_B05_2017-06-23_1.fsa"
[31] "B06_MF20170623_FA_B06_2017-06-23_1.fsa" "B07_MF20170623_FA_B07_2017-06-23_1.fsa"
[33] "B08_MF20170522_FA_B08_2017-05-22_1.fsa" "B08_MF20170623_FA_B08_2017-06-23_1.fsa"
[35] "C01_MF20170623_FA_C01_2017-06-23_1.fsa" "C02_MF20170529_FA_C02_2017-05-30_1.fsa"
[37] "C02_MF20170623_FA_C02_2017-06-23_1.fsa" "C05_MF20170623_FA_C05_2017-06-23_1.fsa"
[39] "C07_MF20170623_FA_C07_2017-06-23_1.fsa" "C08_MF20170623_FA_C08_2017-06-23_1.fsa"
[41] "C09_MF20170623_FA_C09_2017-06-23_1.fsa" "C10_MF20170623_FA_C10_2017-06-23_1.fsa"
[43] "C11_MF20170623_FA_C11_2017-06-23_1.fsa" "C12_MF20170623_FA_C12_2017-06-23_1.fsa"
[45] "D02_MF20170623_FA_D02_2017-06-23_1.fsa" "D03_MF20170623_FA_D03_2017-06-23_1.fsa"
[47] "D04_MF20170623_FA_D04_2017-06-23_1.fsa" "D05_MF20170623_FA_D05_2017-06-23_1.fsa"
[49] "D06_MF20170623_FA_D06_2017-06-23_1.fsa" "D08_MF20170623_FA_D08_2017-06-23_1.fsa"
[51] "D10_MF20170623_FA_D10_2017-06-23_1.fsa" "D11_MF20170623_FA_D11_2017-06-23_1.fsa"
[53] "D12_MF20170623_FA_D12_2017-06-23_1.fsa"
You can use dplyr::select()
after tidyr::spread()
to preserve the original column order. Note your rows may still be alphabetically sorted.
library(tidyverse)
mtcars <- rownames_to_column(mtcars, "vehicle")
colnames(mtcars)
# [1] "vehicle" "mpg" "cyl" "disp" "hp" "drat" "wt" "qsec" "vs" "am" "gear" "carb"
Regular spread behavior with alphabetically arranged column order:
spreadMtcarsSorted <- gather(mtcars, measure, value, -vehicle) %>%
spread(measure, value)
colnames(spreadMtcarsSorted)
# [1] "vehicle" "am" "carb" "cyl" "disp" "drat" "gear" "hp" "mpg" "qsec" "vs" "wt"
Reverting to original column order after the spread operation:
spreadMtcarsOriginalColOrder <- gather(mtcars, measure, value, -vehicle) %>%
spread(measure, value) %>%
select(colnames(mtcars)) # use original column order before returning new data frame
colnames(spreadMtcarsOriginalColOrder)
# [1] "vehicle" "mpg" "cyl" "disp" "hp" "drat" "wt" "qsec" "vs" "am" "gear" "carb"
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With