Why does rustc take over a minute to compile this 250 line program with optimization while clang takes under a second?

Tags:

Background: I made a silly code generation program for various programming languages, the goal being to compare how long it takes various compilers to compile absurdly long 100K+ LOC programs made of simple functions. But when compiling, the Rust version of a program never finished compiling when optimization flags were given to rustc.

I discovered that is it easy to make a pretty small Rust program (example below) that takes (to my sensibilities) far too long to compile with the -C opt-level=2 or -C opt-level=3 flags for rustc. I tried in 1.16 stable, 1.30 stable, 1.32.0-nightly on Linux and 1.30 stable on macOS and Windows - all take what looks to me far too long to compile. I say this because in comparison, roughly equivalent C++ takes under a second to compile with clang++ -O3. This leads me to a couple questions:

Is there an optimization rustc is doing (slowly) to this code that clang isn't?
If I wanted to look into this myself, are there good resources to profile rustc? It would be cool to see if a particular optimisation in opt-level=2 is the culprit.

The rustc guide tells how to turn on debug logs. Using that gets:

INFO 2018-12-09T19:37:54Z: rustc_codegen_ssa::base: codegen_instance(std::rt::lang_start::<()>)
INFO 2018-12-09T19:37:54Z: rustc_codegen_ssa::base: codegen_instance(std::rt::lang_start::{{closure}}::<(), i8, extern "rust-call" fn(()) -> i32, fn()>)
INFO 2018-12-09T19:37:54Z: rustc_codegen_ssa::base: codegen_instance(std::sys::unix::process::process_common::ExitCode::as_i32)
INFO 2018-12-09T19:37:54Z: rustc_codegen_ssa::base: codegen_instance(<[closure@DefId(1/1:1916 ~ std[424f]::rt[0]::lang_start[0]::{{closure}}[0]) 0:fn()] as std::ops::FnOnce<()>>::call_once - shim)
INFO 2018-12-09T19:37:54Z: rustc_codegen_ssa::base: codegen_instance(<[closure@DefId(1/1:1916 ~ std[424f]::rt[0]::lang_start[0]::{{closure}}[0]) 0:fn()] as std::ops::FnOnce<()>>::call_once - shim(vtable))
INFO 2018-12-09T19:37:54Z: rustc_codegen_ssa::base: codegen_instance(std::ptr::real_drop_in_place::<[closure@DefId(1/1:1916 ~ std[424f]::rt[0]::lang_start[0]::{{closure}}[0]) 0:fn()]> - shim(None))
INFO 2018-12-09T19:37:54Z: rustc_codegen_ssa::base: codegen_instance(<() as std::process::Termination>::report)
INFO 2018-12-09T19:37:54Z: rustc_codegen_ssa::base: codegen_instance(<std::process::ExitCode as std::process::Termination>::report)
INFO 2018-12-09T19:37:54Z: rustc_codegen_ssa::base: codegen_instance(std::fmt::ArgumentV1::new::<i32>)
INFO 2018-12-09T19:39:12Z: rustc_codegen_llvm::back::lto: 5 symbols to preserve in this crate
INFO 2018-12-09T19:39:12Z: rustc_codegen_llvm::back::lto: going for that thin, thin LTO

Notice the minute gap between the last rustc_codegen_ssa::base and rustc_codegen_llvm::back::lto - how should I interpret this?

Rust gist and C++ gist

//$ rustc -C opt-level=2 test_20.rs
//  takes over a minute to compile, rustc 1.32.0-nightly
//  see https://gist.github.com/ajdust/5e92cab52ffab5ea2a52edbd47aa348a
#![allow(unused_parens)]

fn f0(p: i32) -> i32 {
    let x1: i32 = (p - ((((21 | 1) | p) ^ 84) & ((48 ^ (52 | (p & (2 ^ 61)))) - 67)));
    let x2: i32 = x1;
    let mut x3: i32 = 54;
    let x4: i32 = 75;
    let x5: i32 = (77 & 39);
    let x6: i32 = (x2 * x5);
    let x7: i32 = (88 * (8 + x1));
    x3 = (x3 + 60);
    ((((((((32 * p) & x1) ^ x2) - x3) ^ x4) & x5) | x6) | x7)
}

fn f1(p: i32) -> i32 {
    let mut x1: i32 = f0(78);
    x1 = (x1 ^ p);
    let mut x2: i32 = f0(x1);
    x2 = (x2 * 3);
    let x3: i32 = f0(x1);
    let x4: i32 = ((21 & (x3 - ((93 * (x3 - (f0(x3) - (x2 - (f0(x1) | 43))))) | (f0(p) - f0(x1))))) * 41);
    ((((((f0(p) | x2) ^ p) & x1) ^ x2) | x3) - x4)
}

fn f2(p: i32) -> i32 {
    let mut x1: i32 = f1(50);
    x1 = (x1 * p);
    x1 = (x1 | f0(p));
    let mut x2: i32 = f1(x1);
    x2 = (x2 | f1(x2));
    let mut x3: i32 = (24 * f0(x1));
    x2 = (x2 & f0(p));
    x3 = (x3 ^ x1);
    let x4: i32 = x1;
    (((((x4 ^ p) | x1) * x2) + x3) | x4)
}

fn f3(p: i32) -> i32 {
    let mut x1: i32 = f2(75);
    let x2: i32 = x1;
    x1 = (x1 & x2);
    let x3: i32 = f0(p);
    let x4: i32 = ((f1(x3) ^ f1(x2)) + 92);
    x1 = (x1 | (x2 ^ 94));
    x1 = (x1 * x2);
    let x5: i32 = (f0(x1) & (3 ^ (f0(x1) * f2(x4))));
    x1 = (x1 + x2);
    (((((((x1 * x5) * p) - x1) | x2) * x3) - x4) - x5)
}

fn f4(p: i32) -> i32 {
    let mut x1: i32 = f3(14);
    x1 = (x1 + f0(p));
    let mut x2: i32 = f1(x1);
    x1 = (x1 - 41);
    x2 = (x2 ^ 61);
    let x3: i32 = f2(p);
    x2 = (x2 ^ p);
    let x4: i32 = x2;
    x1 = (x1 - p);
    x1 = (x1 * x4);
    ((((((88 & 11) & p) - x1) * x2) ^ x3) | x4)
}

fn f5(p: i32) -> i32 {
    let mut x1: i32 = f4(50);
    x1 = (x1 ^ 13);
    ((35 + p) | x1)
}

fn f6(p: i32) -> i32 {
    let mut x1: i32 = f5(51);
    x1 = (x1 + 27);
    let x2: i32 = (p + (p | f1(x1)));
    x1 = (x1 + f0(x2));
    let x3: i32 = f0(x1);
    let mut x4: i32 = 48;
    x1 = (x1 ^ f0(p));
    x1 = (x1 & 26);
    x4 = (x4 * f1(x4));
    (((((99 - p) * x1) ^ x2) & x3) + x4)
}

fn f7(p: i32) -> i32 {
    let mut x1: i32 = f6(71);
    x1 = (x1 & 66);
    x1 = (x1 & p);
    let x2: i32 = 57;
    x1 = (x1 * 26);
    let x3: i32 = (21 & p);
    let x4: i32 = (f0(x1) & (f3(p) * f2(p)));
    let x5: i32 = f6(x3);
    ((((((x5 + p) | x1) + x2) - x3) & x4) * x5)
}

fn f8(p: i32) -> i32 {
    let mut x1: i32 = f7(57);
    x1 = (x1 & f5(p));
    x1 = (x1 ^ (x1 & f1(p)));
    let x2: i32 = 25;
    let x3: i32 = f5(x1);
    ((((x1 - p) * x1) & x2) ^ x3)
}

fn f9(p: i32) -> i32 {
    let mut x1: i32 = f8(23);
    x1 = (x1 | (((26 | f4(x1)) - f0(p)) | f8(p)));
    let x2: i32 = x1;
    let mut x3: i32 = 58;
    x3 = (x3 - p);
    let x4: i32 = f7(x1);
    let x5: i32 = f7(x2);
    let x6: i32 = (f7(x1) & 79);
    (((((((33 | p) - x1) + x2) + x3) * x4) ^ x5) + x6)
}

fn f10(p: i32) -> i32 {
    let mut x1: i32 = f9(75);
    x1 = (x1 | 37);
    (((f8(x1) + f3(x1)) | p) * x1)
}

fn f11(p: i32) -> i32 {
    let mut x1: i32 = f10(8);
    x1 = (x1 ^ f6(x1));
    let mut x2: i32 = p;
    x2 = (x2 ^ 84);
    let x3: i32 = (f5(p) ^ f5(p));
    x1 = (x1 * f5(p));
    x1 = (x1 | f1(x2));
    x1 = (x1 * f8(p));
    ((((((f0(x3) | f9(p)) - f4(x1)) + p) & x1) & x2) - x3)
}

fn f12(p: i32) -> i32 {
    let mut x1: i32 = f11(33);
    x1 = (x1 * 84);
    let mut x2: i32 = (67 - f0(p));
    x2 = (x2 | x1);
    x1 = (x1 - 67);
    x2 = (x2 - f6(p));
    (((p - p) * x1) | x2)
}

fn f13(p: i32) -> i32 {
    let mut x1: i32 = f12(90);
    x1 = (x1 + (f6(x1) - f4(p)));
    x1 = (x1 - 19);
    let x2: i32 = 92;
    let mut x3: i32 = f9(x1);
    let mut x4: i32 = x3;
    x4 = (x4 - (87 | f5(x3)));
    x3 = (x3 | 49);
    let x5: i32 = 25;
    let x6: i32 = x3;
    (((((((2 & p) - x1) ^ x2) ^ x3) ^ x4) | x5) | x6)
}

fn f14(p: i32) -> i32 {
    let mut x1: i32 = f13(66);
    let x2: i32 = f2(p);
    x1 = (x1 - 11);
    let mut x3: i32 = 69;
    x3 = (x3 * x2);
    let x4: i32 = 91;
    (((((19 * p) + x1) | x2) ^ x3) & x4)
}

fn f15(p: i32) -> i32 {
    let mut x1: i32 = f14(79);
    x1 = (x1 + (f8(p) & p));
    let x2: i32 = p;
    x1 = (x1 | ((f5(p) & x2) ^ x2));
    let mut x3: i32 = x1;
    x1 = (x1 - p);
    x3 = (x3 * p);
    ((((40 * p) ^ x1) + x2) + x3)
}

fn f16(p: i32) -> i32 {
    let x1: i32 = f15(77);
    let mut x2: i32 = 5;
    let mut x3: i32 = x1;
    let x4: i32 = p;
    x2 = (x2 + p);
    let x5: i32 = x4;
    x3 = (x3 | f9(x4));
    let x6: i32 = (68 ^ (61 ^ (24 * f14(x4))));
    (((((((88 + p) - x1) & x2) | x3) & x4) ^ x5) | x6)
}

fn f17(p: i32) -> i32 {
    let mut x1: i32 = f16(41);
    x1 = (x1 | 4);
    let mut x2: i32 = x1;
    x1 = (x1 | 52);
    x1 = (x1 & 49);
    x2 = (x2 & (f8(x2) ^ p));
    let mut x3: i32 = x2;
    x3 = (x3 ^ ((x1 ^ x2) + f15(x2)));
    let mut x4: i32 = (f13(x2) ^ 73);
    x4 = (x4 - f12(x1));
    (((((x3 - p) + x1) ^ x2) + x3) | x4)
}

fn f18(p: i32) -> i32 {
    let mut x1: i32 = f17(3);
    x1 = (x1 & (p - ((33 * (95 | 87)) | (9 - f1(x1)))));
    x1 = (x1 & (80 - f16(x1)));
    x1 = (x1 & p);
    x1 = (x1 + p);
    x1 = (x1 | (82 - ((81 ^ p) - 97)));
    ((20 - p) * x1)
}

fn f19(p: i32) -> i32 {
    let x1: i32 = f18(24);
    let x2: i32 = (p & p);
    let mut x3: i32 = 82;
    let x4: i32 = (4 + x1);
    x3 = (x3 | ((f10(p) + (f16(x3) - 34)) - f10(x1)));
    let x5: i32 = (x4 | (x1 * (((f16(x1) + f4(x4)) - 43) & f7(x3))));
    (((((((f14(x3) | f9(x5)) - p) & x1) * x2) & x3) * x4) + x5)
}

fn f20(p: i32) -> i32 {
    let x1: i32 = f19(78);
    let x2: i32 = 81;
    let x3: i32 = (x2 + (59 & x1));
    (((((f9(x3) ^ f11(x3)) * p) * x1) - x2) ^ x3)
}

fn main() {
    let mut x0: i32 = f20(65);
    x0 = (x0 * (53 + 37));
    let mut x1: i32 = (x0 - ((41 | ((f20(x0) * f9(x0)) + ((((f20(x0) + (77 + (f14(x0) ^ 60))) * 27) & 62) + x0))) & f20(x0)));
    let x2: i32 = f15(x1);
    x1 = (x1 | (x0 * (4 ^ 37)));
    let m: i32 = (((x2 | x0) | x1) | x2);
    println!("{}", m);

}

785

asked Dec 09 '18 19:12

Aaron B

1 Answers

This was effectively answered in the comments - thanks @bluss. Passing the -Cinline-threshold=1000 to rustc brings the compilation speed down to the expected speed of under a second or so.

As this can still be replicated 2.5 years later, I finally followed through with the suggestion to create a bug report issue in the rust-lang repo to see if there's a way Rust can be improved. You can see that bug report issue here: https://github.com/rust-lang/rust/issues/86870

Thanks to all who commented.

157

answered Oct 26 '22 07:10

Aaron B

Related questions
                            
                                Why does Rust disallow mutable aliasing?
                            
                                "Subclassing" traits in Rust
                            
                                When and why to use AsRef<T> instead of &T
                            
                                Why is my Rust program slower than the equivalent Java program?
                            
                                Is there a way to get the field names of a struct in a macro?
                            
                                Do all primitive types implement the Copy trait?
                            
                                Read XML file into struct
                            
                                How does the mechanism behind the creation of boxed traits work?
                            
                                Parsing a char to u32
                            
                                Execute a shell command
                            
                                Is it possible to overload a function with different numbers of arguments (using traits)
                            
                                How can I force `build.rs` to run again without cleaning my whole project?
                            
                                Does Rust have a debug macro?
                            
                                How to allocate space for a Vec<T> in Rust?
                            
                                How to pull a dependency with different features under Cargo.toml "dependencies" and "dev-dependencies"?
                            
                                Write::write_fmt causes page fault on a bare metal
                            
                                Closures and Higher-Ranked-Trait-Bounds lifetime issue
                            
                                How to create a function that creates a Cartesian product Iterator from an Iterator of Iterators?

Donate For Us

If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!

Donate Us With

Why does rustc take over a minute to compile this 250 line program with optimization while clang takes under a second?

Tags:

compiler-optimization

rust

Aaron B

People also ask

1 Answers

Aaron B

Recent Activity

Donate For Us