Divide each row by its sum

Question

I have this data:

 merged_dat1
# A tibble: 4 x 35
# Groups:   Product.Name [4]
  Product.Name  also  apps battery better   big camera  case cheap   day definitely enough  even however
  <chr>        <int> <int>   <int>  <int> <int>  <int> <int> <int> <int>      <int>  <int> <int>   <int>
1 BLU Studio ~   498   545    1552    465   306    839   406   161   413        225    156   590     178
2 iphone 4s       71    37     380     43    19     13    21    38    43         25     16   128      52
3 Motorola Mo~   160   221     253    123    69    428    39   125   101         49    157   133      79
4 Samsung Gal~    76    70     122     49    25     69    22    17    31         15     53    71      31
# ... with 21 more variables: issues <int>, life <int>, little <int>, long <int>, lot <int>, low <int>,
#   many <int>, memory <int>, much <int>, overall <int>, phones <int>, pictures <int>, pretty <int>,
#   quality <int>, right <int>, screen <int>, size <int>, still <int>, use <int>, way <int>, well <int>

I would like to divide each value of a row by its row sum.

For example, the row sum of the first row is 15044. Each value of the first row should be divided by this value. This is possibly a duplicate but I could not find a topic on this. It would be nice if someone has a dplyr solution.

structure(list(Product.Name = c("BLU Studio 5.0", "iphone 4s", 
"Motorola Moto E", "Samsung Galaxy II"), also = c(498L, 71L, 
160L, 76L), apps = c(545L, 37L, 221L, 70L), battery = c(1552L, 
380L, 253L, 122L), better = c(465L, 43L, 123L, 49L), big = c(306L, 
19L, 69L, 25L), camera = c(839L, 13L, 428L, 69L), case = c(406L, 
21L, 39L, 22L), cheap = c(161L, 38L, 125L, 17L), day = c(413L, 
43L, 101L, 31L), definitely = c(225L, 25L, 49L, 15L), enough = c(156L, 
16L, 157L, 53L), even = c(590L, 128L, 133L, 71L), however = c(178L, 
52L, 79L, 31L), issues = c(334L, 49L, 60L, 23L), life = c(649L, 
60L, 136L, 25L), little = c(283L, 45L, 156L, 44L), long = c(197L, 
49L, 65L, 25L), lot = c(316L, 35L, 107L, 39L), low = c(203L, 
25L, 116L, 24L), many = c(207L, 32L, 77L, 51L), memory = c(200L, 
10L, 148L, 48L), much = c(421L, 79L, 165L, 53L), overall = c(206L, 
35L, 77L, 8L), phones = c(749L, 84L, 214L, 63L), pictures = c(263L, 
12L, 94L, 32L), pretty = c(332L, 25L, 97L, 31L), quality = c(669L, 
40L, 186L, 49L), right = c(189L, 49L, 45L, 33L), screen = c(1359L, 
71L, 252L, 82L), size = c(244L, 7L, 93L, 55L), still = c(416L, 
48L, 107L, 28L), use = c(650L, 126L, 256L, 140L), way = c(218L, 
40L, 44L, 12L), well = c(605L, 103L, 205L, 114L)), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -4L), vars = "Product.Name", drop = TRUE, indices = list(
    0L, 1L, 2L, 3L), group_sizes = c(1L, 1L, 1L, 1L), biggest_group_size = 1L, labels = structure(list(
    Product.Name = c("BLU Studio 5.0", "iphone 4s", "Motorola Moto E", 
    "Samsung Galaxy II")), class = "data.frame", row.names = c(NA, 
-4L), vars = "Product.Name", drop = TRUE))

akrun · Accepted Answer

We can create a rowSums column first and then do the mutate_if

library(tidyverse)
merged_dat1 %>% 
   as.data.frame %>%
   mutate(newSum = select_if(., is.numeric) %>% 
                       reduce(`+`)) %>% 
   mutate_if(is.numeric, list(~ ./newSum)) %>% 
   select(-newSum)
#  Product.Name       also       apps    battery     better         big      camera       case      cheap
#1    BLU Studio 5.0 0.03310290 0.03622707 0.10316405 0.03090933 0.020340335 0.055769742 0.02698750 0.01070194
#2         iphone 4s 0.03717277 0.01937173 0.19895288 0.02251309 0.009947644 0.006806283 0.01099476 0.01989529
#3   Motorola Moto E 0.03450507 0.04766013 0.05456114 0.02652577 0.014880311 0.092301057 0.00841061 0.02695708
#4 Samsung Galaxy II 0.04662577 0.04294479 0.07484663 0.03006135 0.015337423 0.042331288 0.01349693 0.01042945
#         day  definitely      enough       even    however     issues       life     little       long        lot
#1 0.02745281 0.014956129 0.010369583 0.03921829 0.01183196 0.02220154 0.04314012 0.01881149 0.01309492 0.02100505
#2 0.02251309 0.013089005 0.008376963 0.06701571 0.02722513 0.02565445 0.03141361 0.02356021 0.02565445 0.01832461
#3 0.02178132 0.010567177 0.033858098 0.02868234 0.01703688 0.01293940 0.02932931 0.03364244 0.01401768 0.02307526
#4 0.01901840 0.009202454 0.032515337 0.04355828 0.01901840 0.01411043 0.01533742 0.02699387 0.01533742 0.02392638
#         low       many      memory       much     overall     phones    pictures     pretty    quality      right
#1 0.01349375 0.01375964 0.013294337 0.02798458 0.013693167 0.04978729 0.017482053 0.02206860 0.04446956 0.01256315
#2 0.01308901 0.01675393 0.005235602 0.04136126 0.018324607 0.04397906 0.006282723 0.01308901 0.02094241 0.02565445
#3 0.02501617 0.01660556 0.031917188 0.03558335 0.016605564 0.04615053 0.020271727 0.02091870 0.04011214 0.00970455
#4 0.01472393 0.03128834 0.029447853 0.03251534 0.004907975 0.03865031 0.019631902 0.01901840 0.03006135 0.02024540
#      screen        size      still        use         way       well
#1 0.09033502 0.016219091 0.02765222 0.04320659 0.014490827 0.04021537
#2 0.03717277 0.003664921 0.02513089 0.06596859 0.020942408 0.05392670
#3 0.05434548 0.020056071 0.02307526 0.05520811 0.009488894 0.04420962
#4 0.05030675 0.033742331 0.01717791 0.08588957 0.007361963 0.06993865

NOTE: This also does a type check

Also, if we base R, this can be done compactly as

merged_dat1[-1] <- merged_dat1[-1]/rowSums(merged_dat1[-1])

tmfmnk · Answer

With dplyr, you can also try:

df %>%
 ungroup() %>%
 mutate(across(-1)/rowSums(across(-1)))

  Product.Name   also   apps battery better     big  camera    case  cheap    day
  <chr>         <dbl>  <dbl>   <dbl>  <dbl>   <dbl>   <dbl>   <dbl>  <dbl>  <dbl>
1 BLU Studio … 0.0331 0.0362  0.103  0.0309 0.0203  0.0558  0.0270  0.0107 0.0275
2 iphone 4s    0.0372 0.0194  0.199  0.0225 0.00995 0.00681 0.0110  0.0199 0.0225
3 Motorola Mo… 0.0345 0.0477  0.0546 0.0265 0.0149  0.0923  0.00841 0.0270 0.0218
4 Samsung Gal… 0.0466 0.0429  0.0748 0.0301 0.0153  0.0423  0.0135  0.0104 0.0190

Or sum by type of the columns:

df %>%
 ungroup() %>%
 mutate(across(where(is.numeric))/rowSums(across(where(is.numeric))))

Divide each row by its sum

Tags:

r

dplyr

Banjo

2 Answers

akrun

tmfmnk

Recent Activity

Donate For Us

Divide each row by its sum

Tags:

r

dplyr

Banjo

2 Answers

akrun

tmfmnk

Related questions

Recent Activity

Donate For Us