Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

global pointers are resolved by the optimizer - but references not - why?

what is the big difference between a global pointer and a global reference for the VS2010 optimizer? why isn't the reference resolved down?

typedef unsigned char byte_t;
typedef unsigned short word_t;

struct byte_reg_t
{
  byte_t low;
  byte_t high;
};

union word_reg_t
{
  word_t value;
  byte_reg_t part;
};

word_reg_t r16;

byte_t& low_ref = r16.part.low;
byte_t* const low_ptr = &r16.part.low;

#define SPLIT() _asm nop;

int main()
{
  low_ref = 4;
  SPLIT()

  byte_t a = r16.part.low;
  SPLIT()

  byte_t b = low_ref;
  SPLIT()

  byte_t c = *low_ptr;
  SPLIT()

  return a+b+c;
} 

compiled in Release-Mode with Assembly Output produce this results

;byte_t a = r16.part.low;
mov cl, BYTE PTR ?r16@@3Tword_reg_t@@A

;byte_t b = low_ref;
mov edx, DWORD PTR ?low_ref@@3AAEA ; low_ref
mov dl, BYTE PTR [edx]

;byte_t c = *low_ptr;
mov al, BYTE PTR ?r16@@3Tword_reg_t@@A

unmodified disassembly

.text:00401000 _main           proc near               ; CODE XREF: __tmainCRTStartup+11D
.text:00401000                 mov     eax, ?low_ref@@3AAEA ; uchar & low_ref
.text:00401005                 mov     byte ptr [eax], 4
.text:00401008                 nop
.text:00401009                 mov     cl, ?r16@@3Tword_reg_t@@A ; word_reg_t r16
.text:0040100F                 nop
.text:00401010                 mov     edx, ?low_ref@@3AAEA ; uchar & low_ref
.text:00401016                 mov     dl, [edx]
.text:00401018                 nop
.text:00401019                 mov     al, ?r16@@3Tword_reg_t@@A ; word_reg_t r16
.text:0040101E                 nop
.text:0040101F                 movzx   eax, al
.text:00401022                 movzx   edx, dl
.text:00401025                 movzx   ecx, cl
.text:00401028                 add     eax, edx
.text:0040102A                 add     eax, ecx
.text:0040102C                 retn
.text:0040102C _main           endp

.data:00403374 ?r16@@3Tword_reg_t@@A db ?              ; DATA XREF: _main+9
.data:00403374                                         ; _main+19
.data:00403375                 align 4

.data:00403018 ; unsigned char & low_ref
.data:00403018 ?low_ref@@3AAEA dd offset ?r16@@3Tword_reg_t@@A ; DATA XREF: _main
.data:00403018                                         ; _main+10
.data:00403018                                         ; word_reg_t r16

I tested several variants (returning from function etc.) - no resolving if the low_ref is in use

  • is the optimizer stupid?
  • a uncommon case for optimization?
  • some c/c++ standard restrictions?

UPDATE

It seems to be an uncommon case for optimization - thx Michael Burr

it works if the reference is in a function scope - or inside of a class or struct instantiated in function scope (but its still strange that the optimizer resolves ptr const but not references - which are 100% identical)

UPDATE 2

its even more strange - if you switch from byte_t to int both resolvings works - const ptr and reference

  • global ptr const to global byte_t var: resolved
  • global ptr const to global int var: resolved
  • global reference to global byte_t var: NOT RESOLVED
  • global reference to global int var: resolved
  • global reference to local byte_t var: resolved
  • global reference to local int var: resolved

so there is a small difference in the optimizer for ptr const and references, the reference scope..... and the refered type...sometimes :)

UPDATE 3

simpler testcode - checked with VS2010 and clang 3.1

typedef unsigned char byte_t;
typedef unsigned int dword_t;

//for msvc
#define SPLIT() _asm nop _asm nop;
//for clang
//#define SPLIT() asm("nop"); asm("nop");

byte_t byte;
dword_t dword;

byte_t& global_ref_byte = byte;
dword_t& global_ref_dword = dword;

byte_t* const global_ptrc_byte = &byte;
dword_t* const global_ptrc_dword = &dword;

int main(int argc, char** argv)
{
  byte_t& local_ref_byte = byte;
  dword_t& local_ref_dword = dword;

  dword_t random = (dword_t)argv;

  byte = (byte_t)random;
  dword = (dword_t)random;
  SPLIT()

  byte_t a = global_ref_byte;
  SPLIT()

  dword_t b = global_ref_dword;
  SPLIT()

  byte_t c = *global_ptrc_byte;
  SPLIT()

  dword_t d = *global_ptrc_dword;
  SPLIT()

  byte_t e = local_ref_byte;
  SPLIT()

  dword_t f = local_ref_dword;
  SPLIT()

  dword_t result = a+b+c+d+e+f;

  return result;
}

VS2010 disassembly

.text:00401000 ; int __cdecl main(int argc, const char **argv, const char **envp)
.text:00401000 _main           proc near               ; CODE XREF: ___tmainCRTStartup+11D
.text:00401000
.text:00401000 argc            = dword ptr  8
.text:00401000 argv            = dword ptr  0Ch
.text:00401000 envp            = dword ptr  10h
.text:00401000
.text:00401000                 push    ebp
.text:00401001                 mov     ebp, esp
.text:00401003                 mov     eax, [ebp+argv]
.text:00401006                 push    ebx
.text:00401007                 push    esi
.text:00401008                 push    edi
.text:00401009                 mov     byte_403374, al
.text:0040100E                 mov     dword_403378, eax
.text:00401013                 nop
.text:00401014                 nop
.text:00401015                 mov     eax, off_40301C
.text:0040101A                 mov     al, [eax]
.text:0040101C                 nop
.text:0040101D                 nop
.text:0040101E                 mov     ecx, dword_403378
.text:00401024                 nop
.text:00401025                 nop
.text:00401026                 mov     dl, byte_403374
.text:0040102C                 nop
.text:0040102D                 nop
.text:0040102E                 mov     esi, dword_403378
.text:00401034                 nop
.text:00401035                 nop
.text:00401036                 mov     bl, byte_403374
.text:0040103C                 nop
.text:0040103D                 nop
.text:0040103E                 mov     edi, dword_403378
.text:00401044                 nop
.text:00401045                 nop
.text:00401046                 movzx   edx, dl
.text:00401049                 movzx   ebx, bl
.text:0040104C                 add     edx, edi
.text:0040104E                 movzx   eax, al
.text:00401051                 add     edx, ebx
.text:00401053                 add     eax, edx
.text:00401055                 pop     edi
.text:00401056                 add     eax, esi
.text:00401058                 pop     esi
.text:00401059                 add     eax, ecx
.text:0040105B                 pop     ebx
.text:0040105C                 pop     ebp
.text:0040105D                 retn
.text:0040105D _main           endp

clang 3.1 disassembly

.text:004012E0 sub_4012E0      proc near               ; CODE XREF: sub_401020+91
.text:004012E0
.text:004012E0 arg_4           = dword ptr  0Ch
.text:004012E0
.text:004012E0                 push    ebp
.text:004012E1                 mov     ebp, esp
.text:004012E3                 call    sub_4014F0
.text:004012E8                 mov     eax, [ebp+arg_4]
.text:004012EB                 mov     byte_402000, al
.text:004012F0                 mov     dword_402004, eax
.text:004012F5                 nop
.text:004012F6                 nop
.text:004012F7                 movzx   eax, byte_402000
.text:004012FE                 nop
.text:004012FF                 nop
.text:00401300                 add     eax, dword_402004
.text:00401306                 nop
.text:00401307                 nop
.text:00401308                 movzx   ecx, byte_402000
.text:0040130F                 add     ecx, eax
.text:00401311                 nop
.text:00401312                 nop
.text:00401313                 add     ecx, dword_402004
.text:00401319                 nop
.text:0040131A                 nop
.text:0040131B                 movzx   eax, byte_402000
.text:00401322                 add     eax, ecx
.text:00401324                 nop
.text:00401325                 nop
.text:00401326                 add     eax, dword_402004
.text:0040132C                 nop
.text:0040132D                 nop
.text:0040132E                 pop     ebp
.text:0040132F                 retn
.text:0040132F sub_4012E0      endp

without the nops both optimizers can produces better code - but clang is still better

VS2010 (more code because of the unresolved byte reference)

.text:00401003                 mov     eax, [ebp+argv]
.text:00401006                 movzx   ecx, al
.text:00401009                 lea     edx, [eax+eax*2]
.text:0040100C                 mov     byte_403374, al
.text:00401011                 mov     dword_403378, eax
.text:00401016                 lea     eax, [edx+ecx*2]
.text:00401019                 mov     ecx, off_40301C
.text:0040101F                 movzx   edx, byte ptr [ecx]
.text:00401022                 add     eax, edx

clang 3.1:

.text:004012E8                 mov     eax, [ebp+arg_4]
.text:004012EB                 mov     byte_402000, al
.text:004012F0                 mov     dword_402004, eax
.text:004012F5                 movzx   ecx, al
.text:004012F8                 add     ecx, eax
.text:004012FA                 lea     eax, [ecx+ecx*2]
like image 646
llm Avatar asked Jul 03 '12 09:07

llm


People also ask

Why do you need references in c++?

The main use of references is acting as function formal parameters to support pass-by-reference. In an reference variable is passed into a function, the function works on the original copy (instead of a clone copy in pass-by-value). Changes inside the function are reflected outside the function.

When to use a reference c++?

Use references when you can, and pointers when you have to. References are usually preferred over pointers whenever you don't need “reseating”. This usually means that references are most useful in a class's public interface. References typically appear on the skin of an object, and pointers on the inside.


1 Answers

Here's what I believe is happening. The reference is being treated the similar to a non-const global pointer. You can see this if you remove the const from the low_ptr declaration.

You can also see that if you move the reference to be local to the function the compiler is able to optimize the access through it without problem.

I'd guess that since global references are quite rare (a 'statistic' I'll admit I just made up) that there's been little effort into optimizing them.

like image 177
Michael Burr Avatar answered Oct 02 '22 03:10

Michael Burr