Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

LLVM struct array iteration

While compiling this code with LLVM:

struct bar {
    int int1;
    int int2;
    char char1;
    char char2;
    char char3;
};


struct foo {
    struct bar array[16];
};


int func(struct foo *f, int num) {

    for(int i = 0; i < num; i++){
        f->array[i].int1 = 1;
        f->array[i].int2 = 2;
        f->array[i].char1 = 'a';
        f->array[i].char2 = 'b';        
        f->array[i].char3 = 'c';        
    }
    return num;
}

For some reason the compiler decides to iterate through this array in a weird manner. first it selects a seemingly arbitrary point in the middle or at the end of the struct, and then stores the appropriate values with immediates relative to the arbitrary point.

I found out that the arbitrary point is selected from this IR code:

  %scevgep = getelementptr %struct.foo* %f, i32 0, i32 0, i32 0, i32 4

Where 4 is the offset of char3.

In this example the store for int1, int2, char1, char2 will have negative immediates, char3 will have immediate 0.

It seems that with different arrangements of struct bar you get different offsets but always inside or at the end of the struct.

For example changing struct bar to:

struct bar {
    char char1;
    char char2;
    char char3;
    int int1;
    int int2;
};

Will result with the following IR line:

  %scevgep = getelementptr %struct.foo* %f, i32 0, i32 0, i32 0, i32 3

Which means that the store for char1, char2 and char 3 will have negative immediates, int1 will have immediate 0, and int2 will have a positive immediate.

Why does it iterate relative to a point other then the base of the struct?

like image 493
Eitan Ziv Avatar asked May 01 '16 18:05

Eitan Ziv


1 Answers

A recent build of Clang doesn't produce the getelementptr instruction you describe. It uses normal indexing. The weirdest thing it does is to produce a version with the body of the loop unrolled twice:

target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

%struct.foo = type { [16 x %struct.bar] }
%struct.bar = type { i32, i32, i8, i8, i8 }

define i32 @func(%struct.foo* nocapture %f, i32 %num) {
entry:
  %cmp25 = icmp sgt i32 %num, 0
  br i1 %cmp25, label %for.body.preheader, label %for.cond.cleanup

for.body.preheader:                               ; preds = %entry
  %xtraiter = and i32 %num, 1
  %0 = icmp eq i32 %num, 1
  br i1 %0, label %for.cond.cleanup.loopexit.unr-lcssa, label %for.body.preheader.new

for.body.preheader.new:                           ; preds = %for.body.preheader
  %unroll_iter = sub i32 %num, %xtraiter
  br label %for.body

for.cond.cleanup.loopexit.unr-lcssa.loopexit:     ; preds = %for.body
  %indvars.iv.next.1.lcssa = phi i64 [ %indvars.iv.next.1, %for.body ]
  br label %for.cond.cleanup.loopexit.unr-lcssa

for.cond.cleanup.loopexit.unr-lcssa:              ; preds = %for.cond.cleanup.loopexit.unr-lcssa.loopexit, %for.body.preheader
  %indvars.iv.unr = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next.1.lcssa, %for.cond.cleanup.loopexit.unr-lcssa.loopexit ]
  %lcmp.mod = icmp eq i32 %xtraiter, 0
  br i1 %lcmp.mod, label %for.cond.cleanup.loopexit, label %for.body.epil.preheader

for.body.epil.preheader:                          ; preds = %for.cond.cleanup.loopexit.unr-lcssa
  br label %for.body.epil

for.body.epil:                                    ; preds = %for.body.epil.preheader
  %int1.epil = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 0, i32 0, i64 %indvars.iv.unr, i32 0
  store i32 1, i32* %int1.epil, align 4, !tbaa !1
  %int2.epil = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 0, i32 0, i64 %indvars.iv.unr, i32 1
  store i32 2, i32* %int2.epil, align 4, !tbaa !6
  %char1.epil = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 0, i32 0, i64 %indvars.iv.unr, i32 2
  store i8 97, i8* %char1.epil, align 4, !tbaa !7
  %char2.epil = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 0, i32 0, i64 %indvars.iv.unr, i32 3
  store i8 98, i8* %char2.epil, align 1, !tbaa !8
  %char3.epil = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 0, i32 0, i64 %indvars.iv.unr, i32 4
  store i8 99, i8* %char3.epil, align 2, !tbaa !9
  br label %for.cond.cleanup.loopexit.epilog-lcssa

for.cond.cleanup.loopexit.epilog-lcssa:           ; preds = %for.body.epil
  br label %for.cond.cleanup.loopexit

for.cond.cleanup.loopexit:                        ; preds = %for.cond.cleanup.loopexit.unr-lcssa, %for.cond.cleanup.loopexit.epilog-lcssa
  br label %for.cond.cleanup

for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
  ret i32 %num

for.body:                                         ; preds = %for.body, %for.body.preheader.new
  %indvars.iv = phi i64 [ 0, %for.body.preheader.new ], [ %indvars.iv.next.1, %for.body ]
  %niter = phi i32 [ %unroll_iter, %for.body.preheader.new ], [ %niter.nsub.1, %for.body ]
  %int1 = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 0, i32 0, i64 %indvars.iv, i32 0
  store i32 1, i32* %int1, align 4, !tbaa !1
  %int2 = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 0, i32 0, i64 %indvars.iv, i32 1
  store i32 2, i32* %int2, align 4, !tbaa !6
  %char1 = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 0, i32 0, i64 %indvars.iv, i32 2
  store i8 97, i8* %char1, align 4, !tbaa !7
  %char2 = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 0, i32 0, i64 %indvars.iv, i32 3
  store i8 98, i8* %char2, align 1, !tbaa !8
  %char3 = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 0, i32 0, i64 %indvars.iv, i32 4
  store i8 99, i8* %char3, align 2, !tbaa !9
  %indvars.iv.next = or i64 %indvars.iv, 1
  %int1.1 = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 0, i32 0, i64 %indvars.iv.next, i32 0
  store i32 1, i32* %int1.1, align 4, !tbaa !1
  %int2.1 = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 0, i32 0, i64 %indvars.iv.next, i32 1
  store i32 2, i32* %int2.1, align 4, !tbaa !6
  %char1.1 = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 0, i32 0, i64 %indvars.iv.next, i32 2
  store i8 97, i8* %char1.1, align 4, !tbaa !7
  %char2.1 = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 0, i32 0, i64 %indvars.iv.next, i32 3
  store i8 98, i8* %char2.1, align 1, !tbaa !8
  %char3.1 = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 0, i32 0, i64 %indvars.iv.next, i32 4
  store i8 99, i8* %char3.1, align 2, !tbaa !9
  %indvars.iv.next.1 = add nsw i64 %indvars.iv, 2
  %niter.nsub.1 = add i32 %niter, -2
  %niter.ncmp.1 = icmp eq i32 %niter.nsub.1, 0
  br i1 %niter.ncmp.1, label %for.cond.cleanup.loopexit.unr-lcssa.loopexit, label %for.body
}

If you update your question with steps to reproduce the IR you saw, I'm happy to explain why LLVM produced it, but I don't want to guess based on the name of the instruction.

like image 155
Chandler Carruth Avatar answered Sep 28 '22 03:09

Chandler Carruth