I have this data:
merged_dat1
# A tibble: 4 x 35
# Groups: Product.Name [4]
Product.Name also apps battery better big camera case cheap day definitely enough even however
<chr> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
1 BLU Studio ~ 498 545 1552 465 306 839 406 161 413 225 156 590 178
2 iphone 4s 71 37 380 43 19 13 21 38 43 25 16 128 52
3 Motorola Mo~ 160 221 253 123 69 428 39 125 101 49 157 133 79
4 Samsung Gal~ 76 70 122 49 25 69 22 17 31 15 53 71 31
# ... with 21 more variables: issues <int>, life <int>, little <int>, long <int>, lot <int>, low <int>,
# many <int>, memory <int>, much <int>, overall <int>, phones <int>, pictures <int>, pretty <int>,
# quality <int>, right <int>, screen <int>, size <int>, still <int>, use <int>, way <int>, well <int>
I would like to divide each value of a row by its row sum.
For example, the row sum of the first row is 15044
. Each value of the first row should be divided by this value. This is possibly a duplicate but I could not find a topic on this. It would be nice if someone has a dplyr
solution.
structure(list(Product.Name = c("BLU Studio 5.0", "iphone 4s",
"Motorola Moto E", "Samsung Galaxy II"), also = c(498L, 71L,
160L, 76L), apps = c(545L, 37L, 221L, 70L), battery = c(1552L,
380L, 253L, 122L), better = c(465L, 43L, 123L, 49L), big = c(306L,
19L, 69L, 25L), camera = c(839L, 13L, 428L, 69L), case = c(406L,
21L, 39L, 22L), cheap = c(161L, 38L, 125L, 17L), day = c(413L,
43L, 101L, 31L), definitely = c(225L, 25L, 49L, 15L), enough = c(156L,
16L, 157L, 53L), even = c(590L, 128L, 133L, 71L), however = c(178L,
52L, 79L, 31L), issues = c(334L, 49L, 60L, 23L), life = c(649L,
60L, 136L, 25L), little = c(283L, 45L, 156L, 44L), long = c(197L,
49L, 65L, 25L), lot = c(316L, 35L, 107L, 39L), low = c(203L,
25L, 116L, 24L), many = c(207L, 32L, 77L, 51L), memory = c(200L,
10L, 148L, 48L), much = c(421L, 79L, 165L, 53L), overall = c(206L,
35L, 77L, 8L), phones = c(749L, 84L, 214L, 63L), pictures = c(263L,
12L, 94L, 32L), pretty = c(332L, 25L, 97L, 31L), quality = c(669L,
40L, 186L, 49L), right = c(189L, 49L, 45L, 33L), screen = c(1359L,
71L, 252L, 82L), size = c(244L, 7L, 93L, 55L), still = c(416L,
48L, 107L, 28L), use = c(650L, 126L, 256L, 140L), way = c(218L,
40L, 44L, 12L), well = c(605L, 103L, 205L, 114L)), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -4L), vars = "Product.Name", drop = TRUE, indices = list(
0L, 1L, 2L, 3L), group_sizes = c(1L, 1L, 1L, 1L), biggest_group_size = 1L, labels = structure(list(
Product.Name = c("BLU Studio 5.0", "iphone 4s", "Motorola Moto E",
"Samsung Galaxy II")), class = "data.frame", row.names = c(NA,
-4L), vars = "Product.Name", drop = TRUE))
We can create a rowSums
column first and then do the mutate_if
library(tidyverse)
merged_dat1 %>%
as.data.frame %>%
mutate(newSum = select_if(., is.numeric) %>%
reduce(`+`)) %>%
mutate_if(is.numeric, list(~ ./newSum)) %>%
select(-newSum)
# Product.Name also apps battery better big camera case cheap
#1 BLU Studio 5.0 0.03310290 0.03622707 0.10316405 0.03090933 0.020340335 0.055769742 0.02698750 0.01070194
#2 iphone 4s 0.03717277 0.01937173 0.19895288 0.02251309 0.009947644 0.006806283 0.01099476 0.01989529
#3 Motorola Moto E 0.03450507 0.04766013 0.05456114 0.02652577 0.014880311 0.092301057 0.00841061 0.02695708
#4 Samsung Galaxy II 0.04662577 0.04294479 0.07484663 0.03006135 0.015337423 0.042331288 0.01349693 0.01042945
# day definitely enough even however issues life little long lot
#1 0.02745281 0.014956129 0.010369583 0.03921829 0.01183196 0.02220154 0.04314012 0.01881149 0.01309492 0.02100505
#2 0.02251309 0.013089005 0.008376963 0.06701571 0.02722513 0.02565445 0.03141361 0.02356021 0.02565445 0.01832461
#3 0.02178132 0.010567177 0.033858098 0.02868234 0.01703688 0.01293940 0.02932931 0.03364244 0.01401768 0.02307526
#4 0.01901840 0.009202454 0.032515337 0.04355828 0.01901840 0.01411043 0.01533742 0.02699387 0.01533742 0.02392638
# low many memory much overall phones pictures pretty quality right
#1 0.01349375 0.01375964 0.013294337 0.02798458 0.013693167 0.04978729 0.017482053 0.02206860 0.04446956 0.01256315
#2 0.01308901 0.01675393 0.005235602 0.04136126 0.018324607 0.04397906 0.006282723 0.01308901 0.02094241 0.02565445
#3 0.02501617 0.01660556 0.031917188 0.03558335 0.016605564 0.04615053 0.020271727 0.02091870 0.04011214 0.00970455
#4 0.01472393 0.03128834 0.029447853 0.03251534 0.004907975 0.03865031 0.019631902 0.01901840 0.03006135 0.02024540
# screen size still use way well
#1 0.09033502 0.016219091 0.02765222 0.04320659 0.014490827 0.04021537
#2 0.03717277 0.003664921 0.02513089 0.06596859 0.020942408 0.05392670
#3 0.05434548 0.020056071 0.02307526 0.05520811 0.009488894 0.04420962
#4 0.05030675 0.033742331 0.01717791 0.08588957 0.007361963 0.06993865
NOTE: This also does a type check
Also, if we base R
, this can be done compactly as
merged_dat1[-1] <- merged_dat1[-1]/rowSums(merged_dat1[-1])
With dplyr
, you can also try:
df %>%
ungroup() %>%
mutate(across(-1)/rowSums(across(-1)))
Product.Name also apps battery better big camera case cheap day
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 BLU Studio … 0.0331 0.0362 0.103 0.0309 0.0203 0.0558 0.0270 0.0107 0.0275
2 iphone 4s 0.0372 0.0194 0.199 0.0225 0.00995 0.00681 0.0110 0.0199 0.0225
3 Motorola Mo… 0.0345 0.0477 0.0546 0.0265 0.0149 0.0923 0.00841 0.0270 0.0218
4 Samsung Gal… 0.0466 0.0429 0.0748 0.0301 0.0153 0.0423 0.0135 0.0104 0.0190
Or sum by type of the columns:
df %>%
ungroup() %>%
mutate(across(where(is.numeric))/rowSums(across(where(is.numeric))))
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With