Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Which is faster for reverse iteration, for or while loops?

I am trying to implement the standard memmove function in Rust and I was wondering which method is faster for downwards iteration (where src < dest):

for i in (0..n).rev() {
    //Do copying
}

or

let mut i = n;
while i != 0 {
    i -= 1;
    // Do copying
}

Will the rev() in the for loops version significantly slow it down?

like image 510
EpicPotato Avatar asked Feb 21 '16 22:02

EpicPotato


1 Answers

TL;DR: Use the for loop.


Both should be equally fast. We can check the compiler's ability to peel away the layers of abstraction involved in the for loop quite simply:

#[inline(never)]
fn blackhole() {}

#[inline(never)]
fn with_for(n: usize) {
    for i in (0..n).rev() { blackhole(); }
}

#[inline(never)]
fn with_while(n: usize) {
    let mut i = n;
    while i > 0 {
        blackhole();
        i -= 1;
    }
}

This generates this LLVM IR:

; Function Attrs: noinline nounwind readnone uwtable
define internal void @_ZN8with_for20h645c385965fcce1fhaaE(i64) unnamed_addr #0 {
entry-block:
  ret void
}

; Function Attrs: noinline nounwind readnone uwtable
define internal void @_ZN10with_while20hc09c3331764a9434yaaE(i64) unnamed_addr #0 {
entry-block:
  ret void
}

Even if you are not versed in LLVM, it is obvious that both functions compiled down to the same IR (and thus obviously to the same assembly).

Since their performance is the same, one should prefer the more explicit for loop and reserve the while loop to cases where the iteration is irregular.


EDIT: to address starblue's concern of unfitness.

#[link(name = "snappy")]
extern {
    fn blackhole(i: libc::c_int) -> libc::c_int;
}

#[inline(never)]
fn with_for(n: i32) {
    for i in (0..n).rev() { unsafe { blackhole(i as libc::c_int); } }
}

#[inline(never)]
fn with_while(n: i32) {
    let mut i = n;
    while i > 0 {
        unsafe { blackhole(i as libc::c_int); }
        i -= 1;
    }
}

compiles down to:

; Function Attrs: noinline nounwind uwtable
define internal void @_ZN8with_for20h7cf06f33e247fa35maaE(i32) unnamed_addr #1 {
entry-block:
  %1 = icmp sgt i32 %0, 0
  br i1 %1, label %match_case.preheader, label %clean_ast_95_

match_case.preheader:                             ; preds = %entry-block
  br label %match_case

match_case:                                       ; preds = %match_case.preheader, %match_case
  %.in = phi i32 [ %2, %match_case ], [ %0, %match_case.preheader ]
  %2 = add i32 %.in, -1
  %3 = tail call i32 @blackhole(i32 %2)
  %4 = icmp sgt i32 %2, 0
  br i1 %4, label %match_case, label %clean_ast_95_.loopexit

clean_ast_95_.loopexit:                           ; preds = %match_case
  br label %clean_ast_95_

clean_ast_95_:                                    ; preds = %clean_ast_95_.loopexit, %entry-block
  ret void
}

; Function Attrs: noinline nounwind uwtable
define internal void @_ZN10with_while20hee8edd624cfe9293IaaE(i32) unnamed_addr #1 {
entry-block:
  %1 = icmp sgt i32 %0, 0
  br i1 %1, label %while_body.preheader, label %while_exit

while_body.preheader:                             ; preds = %entry-block
  br label %while_body

while_exit.loopexit:                              ; preds = %while_body
  br label %while_exit

while_exit:                                       ; preds = %while_exit.loopexit, %entry-block
  ret void

while_body:                                       ; preds = %while_body.preheader, %while_body
  %i.05 = phi i32 [ %3, %while_body ], [ %0, %while_body.preheader ]
  %2 = tail call i32 @blackhole(i32 %i.05)
  %3 = add nsw i32 %i.05, -1
  %4 = icmp sgt i32 %i.05, 1
  br i1 %4, label %while_body, label %while_exit.loopexit
}

The core loops are:

; -- for loop
match_case:                                       ; preds = %match_case.preheader, %match_case
  %.in = phi i32 [ %2, %match_case ], [ %0, %match_case.preheader ]
  %2 = add i32 %.in, -1
  %3 = tail call i32 @blackhole(i32 %2)
  %4 = icmp sgt i32 %2, 0
  br i1 %4, label %match_case, label %clean_ast_95_.loopexit

; -- while loop
while_body:                                       ; preds = %while_body.preheader, %while_body
  %i.05 = phi i32 [ %3, %while_body ], [ %0, %while_body.preheader ]
  %2 = tail call i32 @blackhole(i32 %i.05)
  %3 = add nsw i32 %i.05, -1
  %4 = icmp sgt i32 %i.05, 1
  br i1 %4, label %while_body, label %while_exit.loopexit

And the only difference is that:

  • for decrements before calling blackhole, while decrements after
  • for compares against 0, while compares against 1

otherwise, it's the same core loop.

like image 68
Matthieu M. Avatar answered Sep 30 '22 09:09

Matthieu M.