Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

GCC Aliasing Checks w/Restrict pointers

Consider the following two snippets:

#define ALIGN_BYTES 32 #define ASSUME_ALIGNED(x) x = __builtin_assume_aligned(x, ALIGN_BYTES)  void fn0(const float *restrict a0, const float *restrict a1,          float *restrict b, int n) {     ASSUME_ALIGNED(a0); ASSUME_ALIGNED(a1); ASSUME_ALIGNED(b);      for (int i = 0; i < n; ++i)         b[i] = a0[i] + a1[i]; }  void fn1(const float *restrict *restrict a, float *restrict b, int n) {     ASSUME_ALIGNED(a[0]); ASSUME_ALIGNED(a[1]); ASSUME_ALIGNED(b);      for (int i = 0; i < n; ++i)         b[i] = a[0][i] + a[1][i]; } 

When I compile the function as gcc-4.7.2 -Ofast -march=native -std=c99 -ftree-vectorizer-verbose=5 -S test.c -Wall I find that GCC inserts aliasing checks for the second function.

How can I prevent this such that the resulting assembly for fn1 is the same as that for fn0? (When the number of parameters increases from three to, say, 30 the argument-passing approach (fn0) becomes cumbersome and the number of aliasing checks in the fn1 approach becomes ridiculous .)

Assembly (x86-64, AVX capable chip); aliasing cruft at .LFB10

fn0: .LFB9:     .cfi_startproc     testl   %ecx, %ecx     jle .L1     movl    %ecx, %r10d     shrl    $3, %r10d     leal    0(,%r10,8), %r9d     testl   %r9d, %r9d     je  .L8     cmpl    $7, %ecx     jbe .L8     xorl    %eax, %eax     xorl    %r8d, %r8d     .p2align 4,,10     .p2align 3 .L4:     vmovaps (%rsi,%rax), %ymm0     addl    $1, %r8d     vaddps  (%rdi,%rax), %ymm0, %ymm0     vmovaps %ymm0, (%rdx,%rax)     addq    $32, %rax     cmpl    %r8d, %r10d     ja  .L4     cmpl    %r9d, %ecx     je  .L1 .L3:     movslq  %r9d, %rax     salq    $2, %rax     addq    %rax, %rdi     addq    %rax, %rsi     addq    %rax, %rdx     xorl    %eax, %eax     .p2align 4,,10     .p2align 3 .L6:     vmovss  (%rsi,%rax,4), %xmm0     vaddss  (%rdi,%rax,4), %xmm0, %xmm0     vmovss  %xmm0, (%rdx,%rax,4)     addq    $1, %rax     leal    (%r9,%rax), %r8d     cmpl    %r8d, %ecx     jg  .L6 .L1:     vzeroupper     ret .L8:     xorl    %r9d, %r9d     jmp .L3     .cfi_endproc .LFE9:     .size   fn0, .-fn0     .p2align 4,,15     .globl  fn1     .type   fn1, @function fn1: .LFB10:     .cfi_startproc     testq   %rdx, %rdx     movq    (%rdi), %r8     movq    8(%rdi), %r9     je  .L12     leaq    32(%rsi), %rdi     movq    %rdx, %r10     leaq    32(%r8), %r11     shrq    $3, %r10     cmpq    %rdi, %r8     leaq    0(,%r10,8), %rax     setae   %cl     cmpq    %r11, %rsi     setae   %r11b     orl %r11d, %ecx     cmpq    %rdi, %r9     leaq    32(%r9), %r11     setae   %dil     cmpq    %r11, %rsi     setae   %r11b     orl %r11d, %edi     andl    %edi, %ecx     cmpq    $7, %rdx     seta    %dil     testb   %dil, %cl     je  .L19     testq   %rax, %rax     je  .L19     xorl    %ecx, %ecx     xorl    %edi, %edi     .p2align 4,,10     .p2align 3 .L15:     vmovaps (%r9,%rcx), %ymm0     addq    $1, %rdi     vaddps  (%r8,%rcx), %ymm0, %ymm0     vmovaps %ymm0, (%rsi,%rcx)     addq    $32, %rcx     cmpq    %rdi, %r10     ja  .L15     cmpq    %rax, %rdx     je  .L12     .p2align 4,,10     .p2align 3 .L20:     vmovss  (%r9,%rax,4), %xmm0     vaddss  (%r8,%rax,4), %xmm0, %xmm0     vmovss  %xmm0, (%rsi,%rax,4)     addq    $1, %rax     cmpq    %rax, %rdx     ja  .L20 .L12:     vzeroupper     ret .L19:     xorl    %eax, %eax     jmp .L20     .cfi_endproc 
like image 681
Freddie Witherden Avatar asked Mar 25 '13 11:03

Freddie Witherden


People also ask

What is pointer aliasing?

If a function has two pointers pa and pb , with the same value, we say the pointers alias each other. This introduces constraints on the order of instruction execution. If two write accesses that alias occur in program order, they must happen in the same order on the processor and cannot be re-ordered.

How do you get around strict aliasing?

The answer typically is to type pun, often the methods used violate strict aliasing rules. Sometimes we want to circumvent the type system and interpret an object as a different type. This is called type punning, to reinterpret a segment of memory as another type.

Does c++ have restrict keyword?

C++ does not have standard support for restrict , but many compilers have equivalents that usually work in both C++ and C, such as the GCC's and Clang's __restrict__ , and Visual C++'s __declspec(restrict) .

What is strict aliasing and why do we care?

"Strict aliasing is an assumption, made by the C (or C++) compiler, that dereferencing pointers to objects of different types will never refer to the same memory location (i.e. alias each other.)"


1 Answers

There is away to tell compiler to stop checking aliasing:

please add line:

#pragma GCC ivdep 

right in front of the loop you want to vectorize, if you need more information please read:

https://gcc.gnu.org/onlinedocs/gcc-4.9.2/gcc/Loop-Specific-Pragmas.html

like image 165
PhD AP EcE Avatar answered Sep 28 '22 20:09

PhD AP EcE