While compiling this code with LLVM:
struct bar {
int int1;
int int2;
char char1;
char char2;
char char3;
};
struct foo {
struct bar array[16];
};
int func(struct foo *f, int num) {
for(int i = 0; i < num; i++){
f->array[i].int1 = 1;
f->array[i].int2 = 2;
f->array[i].char1 = 'a';
f->array[i].char2 = 'b';
f->array[i].char3 = 'c';
}
return num;
}
For some reason the compiler decides to iterate through this array in a weird manner. first it selects a seemingly arbitrary point in the middle or at the end of the struct, and then stores the appropriate values with immediates relative to the arbitrary point.
I found out that the arbitrary point is selected from this IR code:
%scevgep = getelementptr %struct.foo* %f, i32 0, i32 0, i32 0, i32 4
Where 4 is the offset of char3.
In this example the store for int1, int2, char1, char2 will have negative immediates, char3 will have immediate 0.
It seems that with different arrangements of struct bar you get different offsets but always inside or at the end of the struct.
For example changing struct bar to:
struct bar {
char char1;
char char2;
char char3;
int int1;
int int2;
};
Will result with the following IR line:
%scevgep = getelementptr %struct.foo* %f, i32 0, i32 0, i32 0, i32 3
Which means that the store for char1, char2 and char 3 will have negative immediates, int1 will have immediate 0, and int2 will have a positive immediate.
Why does it iterate relative to a point other then the base of the struct?
A recent build of Clang doesn't produce the getelementptr
instruction you describe. It uses normal indexing. The weirdest thing it does is to produce a version with the body of the loop unrolled twice:
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
%struct.foo = type { [16 x %struct.bar] }
%struct.bar = type { i32, i32, i8, i8, i8 }
define i32 @func(%struct.foo* nocapture %f, i32 %num) {
entry:
%cmp25 = icmp sgt i32 %num, 0
br i1 %cmp25, label %for.body.preheader, label %for.cond.cleanup
for.body.preheader: ; preds = %entry
%xtraiter = and i32 %num, 1
%0 = icmp eq i32 %num, 1
br i1 %0, label %for.cond.cleanup.loopexit.unr-lcssa, label %for.body.preheader.new
for.body.preheader.new: ; preds = %for.body.preheader
%unroll_iter = sub i32 %num, %xtraiter
br label %for.body
for.cond.cleanup.loopexit.unr-lcssa.loopexit: ; preds = %for.body
%indvars.iv.next.1.lcssa = phi i64 [ %indvars.iv.next.1, %for.body ]
br label %for.cond.cleanup.loopexit.unr-lcssa
for.cond.cleanup.loopexit.unr-lcssa: ; preds = %for.cond.cleanup.loopexit.unr-lcssa.loopexit, %for.body.preheader
%indvars.iv.unr = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next.1.lcssa, %for.cond.cleanup.loopexit.unr-lcssa.loopexit ]
%lcmp.mod = icmp eq i32 %xtraiter, 0
br i1 %lcmp.mod, label %for.cond.cleanup.loopexit, label %for.body.epil.preheader
for.body.epil.preheader: ; preds = %for.cond.cleanup.loopexit.unr-lcssa
br label %for.body.epil
for.body.epil: ; preds = %for.body.epil.preheader
%int1.epil = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 0, i32 0, i64 %indvars.iv.unr, i32 0
store i32 1, i32* %int1.epil, align 4, !tbaa !1
%int2.epil = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 0, i32 0, i64 %indvars.iv.unr, i32 1
store i32 2, i32* %int2.epil, align 4, !tbaa !6
%char1.epil = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 0, i32 0, i64 %indvars.iv.unr, i32 2
store i8 97, i8* %char1.epil, align 4, !tbaa !7
%char2.epil = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 0, i32 0, i64 %indvars.iv.unr, i32 3
store i8 98, i8* %char2.epil, align 1, !tbaa !8
%char3.epil = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 0, i32 0, i64 %indvars.iv.unr, i32 4
store i8 99, i8* %char3.epil, align 2, !tbaa !9
br label %for.cond.cleanup.loopexit.epilog-lcssa
for.cond.cleanup.loopexit.epilog-lcssa: ; preds = %for.body.epil
br label %for.cond.cleanup.loopexit
for.cond.cleanup.loopexit: ; preds = %for.cond.cleanup.loopexit.unr-lcssa, %for.cond.cleanup.loopexit.epilog-lcssa
br label %for.cond.cleanup
for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
ret i32 %num
for.body: ; preds = %for.body, %for.body.preheader.new
%indvars.iv = phi i64 [ 0, %for.body.preheader.new ], [ %indvars.iv.next.1, %for.body ]
%niter = phi i32 [ %unroll_iter, %for.body.preheader.new ], [ %niter.nsub.1, %for.body ]
%int1 = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 0, i32 0, i64 %indvars.iv, i32 0
store i32 1, i32* %int1, align 4, !tbaa !1
%int2 = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 0, i32 0, i64 %indvars.iv, i32 1
store i32 2, i32* %int2, align 4, !tbaa !6
%char1 = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 0, i32 0, i64 %indvars.iv, i32 2
store i8 97, i8* %char1, align 4, !tbaa !7
%char2 = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 0, i32 0, i64 %indvars.iv, i32 3
store i8 98, i8* %char2, align 1, !tbaa !8
%char3 = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 0, i32 0, i64 %indvars.iv, i32 4
store i8 99, i8* %char3, align 2, !tbaa !9
%indvars.iv.next = or i64 %indvars.iv, 1
%int1.1 = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 0, i32 0, i64 %indvars.iv.next, i32 0
store i32 1, i32* %int1.1, align 4, !tbaa !1
%int2.1 = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 0, i32 0, i64 %indvars.iv.next, i32 1
store i32 2, i32* %int2.1, align 4, !tbaa !6
%char1.1 = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 0, i32 0, i64 %indvars.iv.next, i32 2
store i8 97, i8* %char1.1, align 4, !tbaa !7
%char2.1 = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 0, i32 0, i64 %indvars.iv.next, i32 3
store i8 98, i8* %char2.1, align 1, !tbaa !8
%char3.1 = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 0, i32 0, i64 %indvars.iv.next, i32 4
store i8 99, i8* %char3.1, align 2, !tbaa !9
%indvars.iv.next.1 = add nsw i64 %indvars.iv, 2
%niter.nsub.1 = add i32 %niter, -2
%niter.ncmp.1 = icmp eq i32 %niter.nsub.1, 0
br i1 %niter.ncmp.1, label %for.cond.cleanup.loopexit.unr-lcssa.loopexit, label %for.body
}
If you update your question with steps to reproduce the IR you saw, I'm happy to explain why LLVM produced it, but I don't want to guess based on the name of the instruction.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With