Background: I made a silly code generation program for various programming languages, the goal being to compare how long it takes various compilers to compile absurdly long 100K+ LOC programs made of simple functions. But when compiling, the Rust version of a program never finished compiling when optimization flags were given to rustc
.
I discovered that is it easy to make a pretty small Rust program (example below) that takes (to my sensibilities) far too long to compile with the -C opt-level=2
or -C opt-level=3
flags for rustc
. I tried in 1.16 stable, 1.30 stable, 1.32.0-nightly on Linux and 1.30 stable on macOS and Windows - all take what looks to me far too long to compile. I say this because in comparison, roughly equivalent C++ takes under a second to compile with clang++ -O3
. This leads me to a couple questions:
rustc
is doing (slowly) to this code that clang
isn't?rustc
? It would be cool to see if a particular optimisation in opt-level=2
is the culprit.The rustc guide tells how to turn on debug logs. Using that gets:
INFO 2018-12-09T19:37:54Z: rustc_codegen_ssa::base: codegen_instance(std::rt::lang_start::<()>)
INFO 2018-12-09T19:37:54Z: rustc_codegen_ssa::base: codegen_instance(std::rt::lang_start::{{closure}}::<(), i8, extern "rust-call" fn(()) -> i32, fn()>)
INFO 2018-12-09T19:37:54Z: rustc_codegen_ssa::base: codegen_instance(std::sys::unix::process::process_common::ExitCode::as_i32)
INFO 2018-12-09T19:37:54Z: rustc_codegen_ssa::base: codegen_instance(<[closure@DefId(1/1:1916 ~ std[424f]::rt[0]::lang_start[0]::{{closure}}[0]) 0:fn()] as std::ops::FnOnce<()>>::call_once - shim)
INFO 2018-12-09T19:37:54Z: rustc_codegen_ssa::base: codegen_instance(<[closure@DefId(1/1:1916 ~ std[424f]::rt[0]::lang_start[0]::{{closure}}[0]) 0:fn()] as std::ops::FnOnce<()>>::call_once - shim(vtable))
INFO 2018-12-09T19:37:54Z: rustc_codegen_ssa::base: codegen_instance(std::ptr::real_drop_in_place::<[closure@DefId(1/1:1916 ~ std[424f]::rt[0]::lang_start[0]::{{closure}}[0]) 0:fn()]> - shim(None))
INFO 2018-12-09T19:37:54Z: rustc_codegen_ssa::base: codegen_instance(<() as std::process::Termination>::report)
INFO 2018-12-09T19:37:54Z: rustc_codegen_ssa::base: codegen_instance(<std::process::ExitCode as std::process::Termination>::report)
INFO 2018-12-09T19:37:54Z: rustc_codegen_ssa::base: codegen_instance(std::fmt::ArgumentV1::new::<i32>)
INFO 2018-12-09T19:39:12Z: rustc_codegen_llvm::back::lto: 5 symbols to preserve in this crate
INFO 2018-12-09T19:39:12Z: rustc_codegen_llvm::back::lto: going for that thin, thin LTO
Notice the minute gap between the last rustc_codegen_ssa::base
and rustc_codegen_llvm::back::lto
- how should I interpret this?
Rust gist and C++ gist
//$ rustc -C opt-level=2 test_20.rs
// takes over a minute to compile, rustc 1.32.0-nightly
// see https://gist.github.com/ajdust/5e92cab52ffab5ea2a52edbd47aa348a
#![allow(unused_parens)]
fn f0(p: i32) -> i32 {
let x1: i32 = (p - ((((21 | 1) | p) ^ 84) & ((48 ^ (52 | (p & (2 ^ 61)))) - 67)));
let x2: i32 = x1;
let mut x3: i32 = 54;
let x4: i32 = 75;
let x5: i32 = (77 & 39);
let x6: i32 = (x2 * x5);
let x7: i32 = (88 * (8 + x1));
x3 = (x3 + 60);
((((((((32 * p) & x1) ^ x2) - x3) ^ x4) & x5) | x6) | x7)
}
fn f1(p: i32) -> i32 {
let mut x1: i32 = f0(78);
x1 = (x1 ^ p);
let mut x2: i32 = f0(x1);
x2 = (x2 * 3);
let x3: i32 = f0(x1);
let x4: i32 = ((21 & (x3 - ((93 * (x3 - (f0(x3) - (x2 - (f0(x1) | 43))))) | (f0(p) - f0(x1))))) * 41);
((((((f0(p) | x2) ^ p) & x1) ^ x2) | x3) - x4)
}
fn f2(p: i32) -> i32 {
let mut x1: i32 = f1(50);
x1 = (x1 * p);
x1 = (x1 | f0(p));
let mut x2: i32 = f1(x1);
x2 = (x2 | f1(x2));
let mut x3: i32 = (24 * f0(x1));
x2 = (x2 & f0(p));
x3 = (x3 ^ x1);
let x4: i32 = x1;
(((((x4 ^ p) | x1) * x2) + x3) | x4)
}
fn f3(p: i32) -> i32 {
let mut x1: i32 = f2(75);
let x2: i32 = x1;
x1 = (x1 & x2);
let x3: i32 = f0(p);
let x4: i32 = ((f1(x3) ^ f1(x2)) + 92);
x1 = (x1 | (x2 ^ 94));
x1 = (x1 * x2);
let x5: i32 = (f0(x1) & (3 ^ (f0(x1) * f2(x4))));
x1 = (x1 + x2);
(((((((x1 * x5) * p) - x1) | x2) * x3) - x4) - x5)
}
fn f4(p: i32) -> i32 {
let mut x1: i32 = f3(14);
x1 = (x1 + f0(p));
let mut x2: i32 = f1(x1);
x1 = (x1 - 41);
x2 = (x2 ^ 61);
let x3: i32 = f2(p);
x2 = (x2 ^ p);
let x4: i32 = x2;
x1 = (x1 - p);
x1 = (x1 * x4);
((((((88 & 11) & p) - x1) * x2) ^ x3) | x4)
}
fn f5(p: i32) -> i32 {
let mut x1: i32 = f4(50);
x1 = (x1 ^ 13);
((35 + p) | x1)
}
fn f6(p: i32) -> i32 {
let mut x1: i32 = f5(51);
x1 = (x1 + 27);
let x2: i32 = (p + (p | f1(x1)));
x1 = (x1 + f0(x2));
let x3: i32 = f0(x1);
let mut x4: i32 = 48;
x1 = (x1 ^ f0(p));
x1 = (x1 & 26);
x4 = (x4 * f1(x4));
(((((99 - p) * x1) ^ x2) & x3) + x4)
}
fn f7(p: i32) -> i32 {
let mut x1: i32 = f6(71);
x1 = (x1 & 66);
x1 = (x1 & p);
let x2: i32 = 57;
x1 = (x1 * 26);
let x3: i32 = (21 & p);
let x4: i32 = (f0(x1) & (f3(p) * f2(p)));
let x5: i32 = f6(x3);
((((((x5 + p) | x1) + x2) - x3) & x4) * x5)
}
fn f8(p: i32) -> i32 {
let mut x1: i32 = f7(57);
x1 = (x1 & f5(p));
x1 = (x1 ^ (x1 & f1(p)));
let x2: i32 = 25;
let x3: i32 = f5(x1);
((((x1 - p) * x1) & x2) ^ x3)
}
fn f9(p: i32) -> i32 {
let mut x1: i32 = f8(23);
x1 = (x1 | (((26 | f4(x1)) - f0(p)) | f8(p)));
let x2: i32 = x1;
let mut x3: i32 = 58;
x3 = (x3 - p);
let x4: i32 = f7(x1);
let x5: i32 = f7(x2);
let x6: i32 = (f7(x1) & 79);
(((((((33 | p) - x1) + x2) + x3) * x4) ^ x5) + x6)
}
fn f10(p: i32) -> i32 {
let mut x1: i32 = f9(75);
x1 = (x1 | 37);
(((f8(x1) + f3(x1)) | p) * x1)
}
fn f11(p: i32) -> i32 {
let mut x1: i32 = f10(8);
x1 = (x1 ^ f6(x1));
let mut x2: i32 = p;
x2 = (x2 ^ 84);
let x3: i32 = (f5(p) ^ f5(p));
x1 = (x1 * f5(p));
x1 = (x1 | f1(x2));
x1 = (x1 * f8(p));
((((((f0(x3) | f9(p)) - f4(x1)) + p) & x1) & x2) - x3)
}
fn f12(p: i32) -> i32 {
let mut x1: i32 = f11(33);
x1 = (x1 * 84);
let mut x2: i32 = (67 - f0(p));
x2 = (x2 | x1);
x1 = (x1 - 67);
x2 = (x2 - f6(p));
(((p - p) * x1) | x2)
}
fn f13(p: i32) -> i32 {
let mut x1: i32 = f12(90);
x1 = (x1 + (f6(x1) - f4(p)));
x1 = (x1 - 19);
let x2: i32 = 92;
let mut x3: i32 = f9(x1);
let mut x4: i32 = x3;
x4 = (x4 - (87 | f5(x3)));
x3 = (x3 | 49);
let x5: i32 = 25;
let x6: i32 = x3;
(((((((2 & p) - x1) ^ x2) ^ x3) ^ x4) | x5) | x6)
}
fn f14(p: i32) -> i32 {
let mut x1: i32 = f13(66);
let x2: i32 = f2(p);
x1 = (x1 - 11);
let mut x3: i32 = 69;
x3 = (x3 * x2);
let x4: i32 = 91;
(((((19 * p) + x1) | x2) ^ x3) & x4)
}
fn f15(p: i32) -> i32 {
let mut x1: i32 = f14(79);
x1 = (x1 + (f8(p) & p));
let x2: i32 = p;
x1 = (x1 | ((f5(p) & x2) ^ x2));
let mut x3: i32 = x1;
x1 = (x1 - p);
x3 = (x3 * p);
((((40 * p) ^ x1) + x2) + x3)
}
fn f16(p: i32) -> i32 {
let x1: i32 = f15(77);
let mut x2: i32 = 5;
let mut x3: i32 = x1;
let x4: i32 = p;
x2 = (x2 + p);
let x5: i32 = x4;
x3 = (x3 | f9(x4));
let x6: i32 = (68 ^ (61 ^ (24 * f14(x4))));
(((((((88 + p) - x1) & x2) | x3) & x4) ^ x5) | x6)
}
fn f17(p: i32) -> i32 {
let mut x1: i32 = f16(41);
x1 = (x1 | 4);
let mut x2: i32 = x1;
x1 = (x1 | 52);
x1 = (x1 & 49);
x2 = (x2 & (f8(x2) ^ p));
let mut x3: i32 = x2;
x3 = (x3 ^ ((x1 ^ x2) + f15(x2)));
let mut x4: i32 = (f13(x2) ^ 73);
x4 = (x4 - f12(x1));
(((((x3 - p) + x1) ^ x2) + x3) | x4)
}
fn f18(p: i32) -> i32 {
let mut x1: i32 = f17(3);
x1 = (x1 & (p - ((33 * (95 | 87)) | (9 - f1(x1)))));
x1 = (x1 & (80 - f16(x1)));
x1 = (x1 & p);
x1 = (x1 + p);
x1 = (x1 | (82 - ((81 ^ p) - 97)));
((20 - p) * x1)
}
fn f19(p: i32) -> i32 {
let x1: i32 = f18(24);
let x2: i32 = (p & p);
let mut x3: i32 = 82;
let x4: i32 = (4 + x1);
x3 = (x3 | ((f10(p) + (f16(x3) - 34)) - f10(x1)));
let x5: i32 = (x4 | (x1 * (((f16(x1) + f4(x4)) - 43) & f7(x3))));
(((((((f14(x3) | f9(x5)) - p) & x1) * x2) & x3) * x4) + x5)
}
fn f20(p: i32) -> i32 {
let x1: i32 = f19(78);
let x2: i32 = 81;
let x3: i32 = (x2 + (59 & x1));
(((((f9(x3) ^ f11(x3)) * p) * x1) - x2) ^ x3)
}
fn main() {
let mut x0: i32 = f20(65);
x0 = (x0 * (53 + 37));
let mut x1: i32 = (x0 - ((41 | ((f20(x0) * f9(x0)) + ((((f20(x0) + (77 + (f14(x0) ^ 60))) * 27) & 62) + x0))) & f20(x0)));
let x2: i32 = f15(x1);
x1 = (x1 | (x0 * (4 ^ 37)));
let m: i32 = (((x2 | x0) | x1) | x2);
println!("{}", m);
}
That is because the languages are different and choose different tradeoffs. The tradeoffs might make a language inapplicable in your domain. Rust has always been marketed as a "systems programming language". C++ is Rust's closes competitor in this domain space and it suffers from terribly slow compiling as well.
But rustc is not a slow compiler — it implements the most advanced incremental compilation in industrial compilers, it takes advantage of compilation model based on proper modules (crates), and it has been meticulously optimized. Fast to compile Rust projects are a reality, even if they are not common.
This was effectively answered in the comments - thanks @bluss. Passing the -Cinline-threshold=1000
to rustc
brings the compilation speed down to the expected speed of under a second or so.
As this can still be replicated 2.5 years later, I finally followed through with the suggestion to create a bug report issue in the rust-lang repo to see if there's a way Rust can be improved. You can see that bug report issue here: https://github.com/rust-lang/rust/issues/86870
Thanks to all who commented.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With