One column of my data.frame looks like the following:
c("BP_1_CSPP", "BP_2_GEGS", "BP_3_AEAG", "BP_4_KPAP", "BP_5_TAKP",
"BP_6_GGDR", "BP_7_MQQP", "BP_8_EEEE", "BP_9_RSDP", "BP_10_APAS",
"BP_11_KRGG", "BP_12_RSQQ", "BP_13_QQLS", "BP_14_EPEV", "BP_15_AAPS",
"BP_16_SDVT", "BP_17_GQQQ", "BP_18_AETP", "BP_19_PPSA", "BP_20_DATP",
"EpQ_1_AYAT", "EpQ_2_HEKL", "EpQ_3_SCSV", "EpQ_4_MAYV", "EpQ_5_LKDP",
"EpQ_6_ERCE", "EpQ_7_DNPA", "EpQ_8_YGIS", "EpQ_9_GMSS", "EpQ_10_AAKK",
"EpQ_11_NIRI", "EpQ_12_ERRR", "EpQ_13_MDRE", "EpQ_14_SRQM", "EpQ_15_DWSI",
"EpQ_16_VLVQ", "EpQ_17_GRTI", "EpQ_18_EKVR", "EpQ_19_PDVA", "EpQ_20_ADVT",
"LbT_1_RPGG", "LbT_2_TQGD", "LbT_3_EVKS", "LbT_4_VIEM", "LbT_5_GSAD",
"LbT_6_VRPI", "LbT_7_CELG", "LbT_8_APQQ", "LbT_9_SAEE", "LbT_10_GEAE",
"LbT_11_EELR", "LbT_12_EWAN", "LbT_13_IKEE", "LbT_14_VSDF", "LbT_15_WEDV",
"LbT_16_SGGA", "LbT_17_KATN", "LbT_18_EREG", "LbT_19_AWAS", "LbT_20_VDRD",
"abc_1_CVTQ", "abc_2_KEAP", "abc_3_TAYI", "abc_4_MITN", "abc_5_MPTV",
"abc_6_TRTG", "abc_7_KSTI", "abc_8_KEAI", "abc_9_HVYS", "abc_10_LGMG",
"abc_11_VAYQ", "abc_12_AGTG", "abc_13_TDSW", "abc_14_HKKS", "abc_15_YGLA",
"abc_16_WEEW", "abc_17_HSTI", "abc_18_EKCI", "abc_19_PAGI", "abc_20_TGTI",
"TcII")
Considering all the numbers < 10, which are located within the strings (e.g. "BP_1_CSPP", "BP_2_GEGS" , I wanted to add a leading zero to them, such that I would have:
"BP_01_CSPP", "BP_02_GEGS", "BP_03_AEAG", "BP_04_KPAP", "BP_05_TAKP",
"BP_06_GGDR"
and so on.
This question almost did the job, yet it does not worked for my data as:
The "0" will not be inserted at the same position all the time (some strings have 3 characters before the 0 to be inserted (e.g. BP_1_CSPP) while others have 4 (e.g. EpQ_3_SCSV)
I will still have some characters after the zero to be inserted i.e. the zero will be inserted at the middle of the string.
We can use sub
to match the pattern of _
followed by a single number (([0-9])
) captured as a group (inside the brackets) followed by _
and replace it with _
followed by 0, the backreference of the capture group (\\1
) followed by _
.
v1 <- sub("_([0-9])_", "_0\\1_", v1)
v1
#[1] "BP_01_CSPP" "BP_02_GEGS" "BP_03_AEAG" "BP_04_KPAP" "BP_05_TAKP" "BP_06_GGDR" "BP_07_MQQP" "BP_08_EEEE" "BP_09_RSDP" "BP_10_APAS" "BP_11_KRGG"
#[12] "BP_12_RSQQ" "BP_13_QQLS" "BP_14_EPEV" "BP_15_AAPS" "BP_16_SDVT" "BP_17_GQQQ" "BP_18_AETP" "BP_19_PPSA" "BP_20_DATP" "EpQ_01_AYAT" "EpQ_02_HEKL"
#[23] "EpQ_03_SCSV" "EpQ_04_MAYV" "EpQ_05_LKDP" "EpQ_06_ERCE" "EpQ_07_DNPA" "EpQ_08_YGIS" "EpQ_09_GMSS" "EpQ_10_AAKK" "EpQ_11_NIRI" "EpQ_12_ERRR" "EpQ_13_MDRE"
#[34] "EpQ_14_SRQM" "EpQ_15_DWSI" "EpQ_16_VLVQ" "EpQ_17_GRTI" "EpQ_18_EKVR" "EpQ_19_PDVA" "EpQ_20_ADVT" "LbT_01_RPGG" "LbT_02_TQGD" "LbT_03_EVKS" "LbT_04_VIEM"
#[45] "LbT_05_GSAD" "LbT_06_VRPI" "LbT_07_CELG" "LbT_08_APQQ" "LbT_09_SAEE" "LbT_10_GEAE" "LbT_11_EELR" "LbT_12_EWAN" "LbT_13_IKEE" "LbT_14_VSDF" "LbT_15_WEDV"
#[56] "LbT_16_SGGA" "LbT_17_KATN" "LbT_18_EREG" "LbT_19_AWAS" "LbT_20_VDRD" "abc_01_CVTQ" "abc_02_KEAP" "abc_03_TAYI" "abc_04_MITN" "abc_05_MPTV" "abc_06_TRTG"
#[67] "abc_07_KSTI" "abc_08_KEAI" "abc_09_HVYS" "abc_10_LGMG" "abc_11_VAYQ" "abc_12_AGTG" "abc_13_TDSW" "abc_14_HKKS" "abc_15_YGLA" "abc_16_WEEW" "abc_17_HSTI"
#[78] "abc_18_EKCI" "abc_19_PAGI" "abc_20_TGTI" "TcII"
If we are using strsplit
, another option is split
by _
, replace the numbers by formatting with sprintf
and then paste
together
sapply(strsplit(v1, "_"), function(x) {
if(length(x)>1) x[2] <- sprintf("%02d", as.numeric(x[2]))
paste(x, collapse="_")})
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With