I have this data:
merged_dat1
# A tibble: 4 x 35
# Groups: Product.Name [4]
Product.Name also apps battery better big camera case cheap day definitely enough even however
<chr> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
1 BLU Studio ~ 498 545 1552 465 306 839 406 161 413 225 156 590 178
2 iphone 4s 71 37 380 43 19 13 21 38 43 25 16 128 52
3 Motorola Mo~ 160 221 253 123 69 428 39 125 101 49 157 133 79
4 Samsung Gal~ 76 70 122 49 25 69 22 17 31 15 53 71 31
# ... with 21 more variables: issues <int>, life <int>, little <int>, long <int>, lot <int>, low <int>,
# many <int>, memory <int>, much <int>, overall <int>, phones <int>, pictures <int>, pretty <int>,
# quality <int>, right <int>, screen <int>, size <int>, still <int>, use <int>, way <int>, well <int>
I would like to divide each value of a row by its row sum.
For example, the row sum of the first row is 15044. Each value of the first row should be divided by this value. This is possibly a duplicate but I could not find a topic on this. It would be nice if someone has a dplyr solution.
structure(list(Product.Name = c("BLU Studio 5.0", "iphone 4s",
"Motorola Moto E", "Samsung Galaxy II"), also = c(498L, 71L,
160L, 76L), apps = c(545L, 37L, 221L, 70L), battery = c(1552L,
380L, 253L, 122L), better = c(465L, 43L, 123L, 49L), big = c(306L,
19L, 69L, 25L), camera = c(839L, 13L, 428L, 69L), case = c(406L,
21L, 39L, 22L), cheap = c(161L, 38L, 125L, 17L), day = c(413L,
43L, 101L, 31L), definitely = c(225L, 25L, 49L, 15L), enough = c(156L,
16L, 157L, 53L), even = c(590L, 128L, 133L, 71L), however = c(178L,
52L, 79L, 31L), issues = c(334L, 49L, 60L, 23L), life = c(649L,
60L, 136L, 25L), little = c(283L, 45L, 156L, 44L), long = c(197L,
49L, 65L, 25L), lot = c(316L, 35L, 107L, 39L), low = c(203L,
25L, 116L, 24L), many = c(207L, 32L, 77L, 51L), memory = c(200L,
10L, 148L, 48L), much = c(421L, 79L, 165L, 53L), overall = c(206L,
35L, 77L, 8L), phones = c(749L, 84L, 214L, 63L), pictures = c(263L,
12L, 94L, 32L), pretty = c(332L, 25L, 97L, 31L), quality = c(669L,
40L, 186L, 49L), right = c(189L, 49L, 45L, 33L), screen = c(1359L,
71L, 252L, 82L), size = c(244L, 7L, 93L, 55L), still = c(416L,
48L, 107L, 28L), use = c(650L, 126L, 256L, 140L), way = c(218L,
40L, 44L, 12L), well = c(605L, 103L, 205L, 114L)), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -4L), vars = "Product.Name", drop = TRUE, indices = list(
0L, 1L, 2L, 3L), group_sizes = c(1L, 1L, 1L, 1L), biggest_group_size = 1L, labels = structure(list(
Product.Name = c("BLU Studio 5.0", "iphone 4s", "Motorola Moto E",
"Samsung Galaxy II")), class = "data.frame", row.names = c(NA,
-4L), vars = "Product.Name", drop = TRUE))
We can create a rowSums column first and then do the mutate_if
library(tidyverse)
merged_dat1 %>%
as.data.frame %>%
mutate(newSum = select_if(., is.numeric) %>%
reduce(`+`)) %>%
mutate_if(is.numeric, list(~ ./newSum)) %>%
select(-newSum)
# Product.Name also apps battery better big camera case cheap
#1 BLU Studio 5.0 0.03310290 0.03622707 0.10316405 0.03090933 0.020340335 0.055769742 0.02698750 0.01070194
#2 iphone 4s 0.03717277 0.01937173 0.19895288 0.02251309 0.009947644 0.006806283 0.01099476 0.01989529
#3 Motorola Moto E 0.03450507 0.04766013 0.05456114 0.02652577 0.014880311 0.092301057 0.00841061 0.02695708
#4 Samsung Galaxy II 0.04662577 0.04294479 0.07484663 0.03006135 0.015337423 0.042331288 0.01349693 0.01042945
# day definitely enough even however issues life little long lot
#1 0.02745281 0.014956129 0.010369583 0.03921829 0.01183196 0.02220154 0.04314012 0.01881149 0.01309492 0.02100505
#2 0.02251309 0.013089005 0.008376963 0.06701571 0.02722513 0.02565445 0.03141361 0.02356021 0.02565445 0.01832461
#3 0.02178132 0.010567177 0.033858098 0.02868234 0.01703688 0.01293940 0.02932931 0.03364244 0.01401768 0.02307526
#4 0.01901840 0.009202454 0.032515337 0.04355828 0.01901840 0.01411043 0.01533742 0.02699387 0.01533742 0.02392638
# low many memory much overall phones pictures pretty quality right
#1 0.01349375 0.01375964 0.013294337 0.02798458 0.013693167 0.04978729 0.017482053 0.02206860 0.04446956 0.01256315
#2 0.01308901 0.01675393 0.005235602 0.04136126 0.018324607 0.04397906 0.006282723 0.01308901 0.02094241 0.02565445
#3 0.02501617 0.01660556 0.031917188 0.03558335 0.016605564 0.04615053 0.020271727 0.02091870 0.04011214 0.00970455
#4 0.01472393 0.03128834 0.029447853 0.03251534 0.004907975 0.03865031 0.019631902 0.01901840 0.03006135 0.02024540
# screen size still use way well
#1 0.09033502 0.016219091 0.02765222 0.04320659 0.014490827 0.04021537
#2 0.03717277 0.003664921 0.02513089 0.06596859 0.020942408 0.05392670
#3 0.05434548 0.020056071 0.02307526 0.05520811 0.009488894 0.04420962
#4 0.05030675 0.033742331 0.01717791 0.08588957 0.007361963 0.06993865
NOTE: This also does a type check
Also, if we base R, this can be done compactly as
merged_dat1[-1] <- merged_dat1[-1]/rowSums(merged_dat1[-1])
With dplyr, you can also try:
df %>%
ungroup() %>%
mutate(across(-1)/rowSums(across(-1)))
Product.Name also apps battery better big camera case cheap day
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 BLU Studio … 0.0331 0.0362 0.103 0.0309 0.0203 0.0558 0.0270 0.0107 0.0275
2 iphone 4s 0.0372 0.0194 0.199 0.0225 0.00995 0.00681 0.0110 0.0199 0.0225
3 Motorola Mo… 0.0345 0.0477 0.0546 0.0265 0.0149 0.0923 0.00841 0.0270 0.0218
4 Samsung Gal… 0.0466 0.0429 0.0748 0.0301 0.0153 0.0423 0.0135 0.0104 0.0190
Or sum by type of the columns:
df %>%
ungroup() %>%
mutate(across(where(is.numeric))/rowSums(across(where(is.numeric))))
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With