Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Var declare inside a printf can't be garbage collected by GCC

Tags:

c

gcc

I'm trying to optimize the size of the elf after the compilation.

But the string declared inside a printf are moved to the .rodata section and are not optimize by the linker.

#include <stdio.h>
#include <stdlib.h>

static char str1[] = "String1"; //goto .data.str1
const char str2[] = "String2"; //goto .rodata.str2

int main(){

printf("%s",str1); //"%s" in .rodata
printf("%s",str2);
printf("On-the fly string"); //goto .rodata

return 0;

}

void unused_function(){
    const char str3[] = "String Not Used"; //goto .text.unused_function and is removed by the linker
    printf("I would like to optimize this string"); //goto .rodata and isn't removed by the linker
    printf("%s", str3); //
}

I compile and link with these arguments :

gcc -fdata-sections -ffunction-sections -Wl,--gc-sections hello.c -o hello.out

This is after the compilation:

Contents of section .data.str1:
 0000 53747269 6e673100                    String1.        
Contents of section .rodata.str2:
 0000 53747269 6e673200                    String2.        
Contents of section .rodata:
 0000 2573004f 6e2d7468 6520666c 79207374  %s.On-the fly st
 0010 72696e67 00000000 4920776f 756c6420  ring....I would 
 0020 6c696b65 20746f20 6f707469 6d697a65  like to optimize
 0030 20746869 73207374 72696e67 00         this string.   
Contents of section .text.main:
 0000 554889e5 be000000 00bf0000 0000b800  UH..............
 0010 000000e8 00000000 be000000 00bf0000  ................
 0020 0000b800 000000e8 00000000 bf000000  ................
 0030 00b80000 0000e800 000000b8 00000000  ................
 0040 5dc3                                 ].              
Contents of section .text.unused_function:
 0000 554889e5 4883ec20 64488b04 25280000  UH..H.. dH..%(..
 0010 00488945 f831c048 b8537472 696e6720  .H.E.1.H.String 
 0020 4e488945 e048b86f 74205573 65640048  NH.E.H.ot Used.H
 0030 8945e8bf 00000000 b8000000 00e80000  .E..............
 0040 0000488d 45e04889 c6bf0000 0000b800  ..H.E.H.........
 0050 000000e8 00000000 488b45f8 64483304  ........H.E.dH3.
 0060 25280000 007405e8 00000000 c9c3      %(...t........  

and after the link

Contents of section .text:
 400440 31ed4989 d15e4889 e24883e4 f0505449  1.I..^H..H...PTI
 400450 c7c0e005 400048c7 c1700540 0048c7c7  [email protected][email protected]..
 400460 2d054000 e8b7ffff fff4660f 1f440000  [email protected]..
 400470 b83f1060 0055482d 38106000 4883f80e  .?.`.UH-8.`.H...
 400480 4889e577 025dc3b8 00000000 4885c074  H..w.]......H..t
 400490 f45dbf38 106000ff e00f1f80 00000000  .].8.`..........
 4004a0 b8381060 0055482d 38106000 48c1f803  .8.`.UH-8.`.H...
 4004b0 4889e548 89c248c1 ea3f4801 d048d1f8  H..H..H..?H..H..
 4004c0 75025dc3 ba000000 004885d2 74f45d48  u.]......H..t.]H
 4004d0 89c6bf38 106000ff e20f1f80 00000000  ...8.`..........
 4004e0 803d510b 20000075 11554889 e5e87eff  .=Q. ..u.UH...~.
 4004f0 ffff5dc6 053e0b20 0001f3c3 0f1f4000  ..]..>. ......@.
 400500 48833d18 09200000 741eb800 00000048  H.=.. ..t......H
 400510 85c07414 55bf200e 60004889 e5ffd05d  ..t.U. .`.H....]
 400520 e97bffff ff0f1f00 e973ffff ff554889  .{.......s...UH.
 400530 e5be3010 6000bff8 054000b8 00000000  ..0.`....@......
 400540 e8cbfeff ffbef005 4000bff8 054000b8  ........@....@..
 400550 00000000 e8b7feff ffbffb05 4000b800  ............@...
 400560 000000e8 a8feffff b8000000 005dc390  .............]..
 400570 41574189 ff415649 89f64155 4989d541  AWA..AVI..AUI..A
 400580 544c8d25 88082000 55488d2d 88082000  TL.%.. .UH.-.. .
 400590 534c29e5 31db48c1 fd034883 ec08e83d  SL).1.H...H....=
 4005a0 feffff48 85ed741e 0f1f8400 00000000  ...H..t.........
 4005b0 4c89ea4c 89f64489 ff41ff14 dc4883c3  L..L..D..A...H..
 4005c0 014839eb 75ea4883 c4085b5d 415c415d  .H9.u.H...[]A\A]
 4005d0 415e415f c366662e 0f1f8400 00000000  A^A_.ff.........
 4005e0 f3c3                                 ..              
Contents of section .rodata:
 4005f0 53747269 6e673200 2573004f 6e2d7468  String2.%s.On-th
 400600 6520666c 79207374 72696e67 00000000  e fly string....
 400610 4920776f 756c6420 6c696b65 20746f20  I would like to 
 400620 6f707469 6d697a65 20746869 73207374  optimize this st
 400630 72696e67 00                          ring.                 
Contents of section .data:
 601030 53747269 6e673100                    String1.     

So as you can see the function "unused_function()" and the string declared as const are removed but I would like to remove the last string "I would like to optimize this string"

UPDATE

The first example can be easily remove using -fmerge-constants because the string inside the printf will be put in a section depending on their length but it will not work if 2 strings have the same length and if one is used.

#include <stdio.h>
#include <stdlib.h>

void used_function();

static char str1[] = "String1"; //goto .data.str1 const char str2[] = "String2"; //goto .rodata.str2

int main(){

printf("%s",str1); //"%s" in .rodata printf("%s",str2); printf("On-the fly string"); //goto .rodata

 used_function();

return 0;

}

void unused_function(){
    const char str3[] = "String Not Used"; //goto .text.unused_function and is removed by the linker
    printf("I would like to optimize this string 1"); //goto .rodata.str1.8 and isn't removed by the linker
    printf("%s", str3); // }

void used_function(){
    printf("A string used here with the same size 2"); //goto .rodata.str1.8 }

After the link compiled with -O2

Contents of section .rodata:
 400640 2573004f 6e2d7468 6520666c 79207374  %s.On-the fly st
 400650 72696e67 00000000 41207374 72696e67  ring....A string
 400660 20757365 64206865 72652077 69746820   used here with 
 400670 74686520 73616d65 2073697a 65203200  the same size 2.
 400680 4920776f 756c6420 6c696b65 20746f20  I would like to 
 400690 6f707469 6d697a65 20746869 73207374  optimize this st
 4006a0 72696e67 20310000 53747269 6e673200  ring 1..String2.

String is still here !

like image 224
Clement Avatar asked Sep 29 '14 13:09

Clement


1 Answers

Use the compiler option -O2 to optimize your binary. This will eliminate the strings that are referenced only by the uncalled unused_function.

like image 196
user1990 Avatar answered Oct 25 '22 17:10

user1990