Whats the most efficient way to implement GetDeleteObjects
below?
class Foo {
public:
std::vector<Bar> GetDeleteObjects();
private:
std::vector<Bar> objects_;
}
std::vector<Bar> Foo::GetDeleteObjects() {
std::vector<Bar> result = objects_;
objects_.clear();
return result;
}
Currently, at least the copy from objects_ to result is executed. Can this be made faster with std::move
, for example?
You could swap the vectors:
std::vector<Bar> Foo::GetDeleteObjects() { std::vector<Bar> result; result.swap(objects_); return result; }
You can use move construction for move-aware types like std::vector<T>
:
std::vector<Bar>
Foo::GetDeleteObjects() {
std::vector<Bar> result(std::move(objects_));
// objects_ left in valid but unspecified state after move
objects_.clear();
return result;
}
In many implementations, the transfer during the move-constructions most likely already resets the pointers and the call to clear()
is not needed. However, a moved from object is only guaranteed to be in a valid but unspecified state. Thus, it is, unfortunately, necessary to clear()
.
The other three answers are correct so there is nothing for me to add here in terms of answering the question, but since the OP is interested in efficiency I compiled up all suggestions in clang with -O3.
There's almost nothing in it between two of the solutions, but the std::exchange
solution stands out as producing more efficient code on my compiler, with the added advantage that it's idiomatically perfect.
I thought the results were interesting:
given:
std::vector<Bar> Foo::GetDeleteObjects1() {
std::vector<Bar> tmp;
tmp.swap(objects_);
return tmp;
}
results in:
__ZN3Foo17GetDeleteObjects1Ev:
.cfi_startproc
pushq %rbp
Ltmp0:
.cfi_def_cfa_offset 16
Ltmp1:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Ltmp2:
.cfi_def_cfa_register %rbp
movq $0, 8(%rdi) ; construct tmp's allocator
movq $0, (%rdi) ;... shame this wasn't optimised away
movups (%rsi), %xmm0 ; swap
movups %xmm0, (%rdi)
xorps %xmm0, %xmm0 ;... but compiler has detected that
movups %xmm0, (%rsi) ;... LHS of swap will always be empty
movq 16(%rsi), %rax ;... so redundant fetch of LHS is elided
movq %rax, 16(%rdi)
movq $0, 16(%rsi) ;... same here
movq %rdi, %rax
popq %rbp
retq
given:
std::vector<Bar>
Foo::GetDeleteObjects2() {
std::vector<Bar> tmp = std::move(objects_);
objects_.clear();
return tmp;
}
results in:
__ZN3Foo17GetDeleteObjects2Ev:
.cfi_startproc
pushq %rbp
Ltmp3:
.cfi_def_cfa_offset 16
Ltmp4:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Ltmp5:
.cfi_def_cfa_register %rbp
movq $0, 8(%rdi) ; move-construct ... shame about these
movq $0, (%rdi) ; ... redundant zero-writes
movups (%rsi), %xmm0 ; ... copy right to left ...
movups %xmm0, (%rdi)
movq 16(%rsi), %rax
movq %rax, 16(%rdi)
movq $0, 16(%rsi) ; zero out moved-from vector ...
movq $0, 8(%rsi) ; ... happens to be identical to clear()
movq $0, (%rsi) ; ... so clear() is optimised away
movq %rdi, %rax
popq %rbp
retq
finally, given :
std::vector<Bar>
Foo::GetDeleteObjects3() {
return std::exchange(objects_, {});
}
results in a very pleasing:
__ZN3Foo17GetDeleteObjects3Ev:
.cfi_startproc
pushq %rbp
Ltmp6:
.cfi_def_cfa_offset 16
Ltmp7:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Ltmp8:
.cfi_def_cfa_register %rbp
movq $0, (%rdi) ; move-construct the result
movq (%rsi), %rax
movq %rax, (%rdi)
movups 8(%rsi), %xmm0
movups %xmm0, 8(%rdi)
movq $0, 16(%rsi) ; zero out the source
movq $0, 8(%rsi)
movq $0, (%rsi)
movq %rdi, %rax
popq %rbp
retq
Conclusion:
The std::exchange method is both idiomatically perfect and optimally efficient.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With