Whats the most efficient way to implement GetDeleteObjects below?
class Foo {
public:
std::vector<Bar> GetDeleteObjects();
private:
std::vector<Bar> objects_;
}
std::vector<Bar> Foo::GetDeleteObjects() {
std::vector<Bar> result = objects_;
objects_.clear();
return result;
}
Currently, at least the copy from objects_ to result is executed. Can this be made faster with std::move, for example?
You could swap the vectors:
std::vector<Bar> Foo::GetDeleteObjects() { std::vector<Bar> result; result.swap(objects_); return result; }
You can use move construction for move-aware types like std::vector<T>:
std::vector<Bar>
Foo::GetDeleteObjects() {
std::vector<Bar> result(std::move(objects_));
// objects_ left in valid but unspecified state after move
objects_.clear();
return result;
}
In many implementations, the transfer during the move-constructions most likely already resets the pointers and the call to clear() is not needed. However, a moved from object is only guaranteed to be in a valid but unspecified state. Thus, it is, unfortunately, necessary to clear().
The other three answers are correct so there is nothing for me to add here in terms of answering the question, but since the OP is interested in efficiency I compiled up all suggestions in clang with -O3.
There's almost nothing in it between two of the solutions, but the std::exchange solution stands out as producing more efficient code on my compiler, with the added advantage that it's idiomatically perfect.
I thought the results were interesting:
given:
std::vector<Bar> Foo::GetDeleteObjects1() {
std::vector<Bar> tmp;
tmp.swap(objects_);
return tmp;
}
results in:
__ZN3Foo17GetDeleteObjects1Ev:
.cfi_startproc
pushq %rbp
Ltmp0:
.cfi_def_cfa_offset 16
Ltmp1:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Ltmp2:
.cfi_def_cfa_register %rbp
movq $0, 8(%rdi) ; construct tmp's allocator
movq $0, (%rdi) ;... shame this wasn't optimised away
movups (%rsi), %xmm0 ; swap
movups %xmm0, (%rdi)
xorps %xmm0, %xmm0 ;... but compiler has detected that
movups %xmm0, (%rsi) ;... LHS of swap will always be empty
movq 16(%rsi), %rax ;... so redundant fetch of LHS is elided
movq %rax, 16(%rdi)
movq $0, 16(%rsi) ;... same here
movq %rdi, %rax
popq %rbp
retq
given:
std::vector<Bar>
Foo::GetDeleteObjects2() {
std::vector<Bar> tmp = std::move(objects_);
objects_.clear();
return tmp;
}
results in:
__ZN3Foo17GetDeleteObjects2Ev:
.cfi_startproc
pushq %rbp
Ltmp3:
.cfi_def_cfa_offset 16
Ltmp4:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Ltmp5:
.cfi_def_cfa_register %rbp
movq $0, 8(%rdi) ; move-construct ... shame about these
movq $0, (%rdi) ; ... redundant zero-writes
movups (%rsi), %xmm0 ; ... copy right to left ...
movups %xmm0, (%rdi)
movq 16(%rsi), %rax
movq %rax, 16(%rdi)
movq $0, 16(%rsi) ; zero out moved-from vector ...
movq $0, 8(%rsi) ; ... happens to be identical to clear()
movq $0, (%rsi) ; ... so clear() is optimised away
movq %rdi, %rax
popq %rbp
retq
finally, given :
std::vector<Bar>
Foo::GetDeleteObjects3() {
return std::exchange(objects_, {});
}
results in a very pleasing:
__ZN3Foo17GetDeleteObjects3Ev:
.cfi_startproc
pushq %rbp
Ltmp6:
.cfi_def_cfa_offset 16
Ltmp7:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Ltmp8:
.cfi_def_cfa_register %rbp
movq $0, (%rdi) ; move-construct the result
movq (%rsi), %rax
movq %rax, (%rdi)
movups 8(%rsi), %xmm0
movups %xmm0, 8(%rdi)
movq $0, 16(%rsi) ; zero out the source
movq $0, 8(%rsi)
movq $0, (%rsi)
movq %rdi, %rax
popq %rbp
retq
Conclusion:
The std::exchange method is both idiomatically perfect and optimally efficient.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With