I want to know if it's more optimized to use a local function (in the exemple below _drawBitmap) who require only 3 parameters and can't be inlined because the function access some owner procedure variables, or to use a global function that can be inlined (but does it will be really inlined?) and that will require 5 parameters.
also don't know if it's important but this code is mostly for android/ios compilation
Code With local function :
procedure TMyObject.onPaint(Sender: TObject; Canvas: TCanvas; const ARect: TRectF);
function _drawBitmap(const aBitmap: {$IFDEF _USE_TEXTURE}TTexture{$ELSE}Tbitmap{$ENDIF}; const aTopLeft: TpointF; Const aOpacity: Single): boolean;
var aDestRect: TrectF;
begin
Result := False;
if aBitmap <> nil then begin
//calculate aDestRect
aDestRect := canvas.AlignToPixel(
TRectF.Create(
aTopLeft,
aBitmap.Width/ScreenScale,
aBitmap.Height/ScreenScale));
//if the aBitmap is visible
if ARect.IntersectsWith(aDestRect) then begin
Result := True;
{$IFDEF _USE_TEXTURE}
TCustomCanvasGpu(Canvas).DrawTexture(aDestRect, // ATexRect
TRectF.Create(0,
0,
aBitmap.Width,
aBitmap.Height), // ARect
ALPrepareColor(TCustomCanvasGpu.ModulateColor, aOpacity * AbsoluteOpacity), // https://quality.embarcadero.com/browse/RSP-15432
aBitmap);
{$ELSE}
Canvas.DrawBitmap(aBitmap, // ABitmap
TRectF.Create(0,
0,
aBitmap.Width,
aBitmap.Height), // SrcRect
aDestRect, // DstRect
aOpacity * AbsoluteOpacity, // AOpacity
samevalue(aDestRect.Width, aBitmap.Width, Tepsilon.Position) and
samevalue(aDestRect.height, aBitmap.height, Tepsilon.Position)); // HighSpeed - set interpolation to none
{$ENDIF};
end;
end;
end;
begin
_drawBitmap(aBitmap, aPos, 1);
end;
ASM :
MyObject.pas.2632: _drawBitmap(fBtnFilterBitmap, // aBitmap
00B97511 55 push ebp
00B97512 680000803F push $3f800000
00B97517 8B45F8 mov eax,[ebp-$08]
00B9751A 8D90C4050000 lea edx,[eax+$000005c4]
00B97520 8B45F8 mov eax,[ebp-$08]
00B97523 8B80A8040000 mov eax,[eax+$000004a8]
00B97529 E882FDFFFF call _drawBitmap
00B9752E 59 pop ecx
MyObject.pas.2562: begin
00B972B0 55 push ebp
00B972B1 8BEC mov ebp,esp
00B972B3 83C4A0 add esp,-$60
00B972B6 53 push ebx
00B972B7 56 push esi
00B972B8 57 push edi
00B972B9 8955FC mov [ebp-$04],edx
00B972BC 8BF0 mov esi,eax
MyObject.pas.2563: Result := False;
00B972BE 33DB xor ebx,ebx
MyObject.pas.2564: if aBitmap <> nil then begin
00B972C0 85F6 test esi,esi
00B972C2 0F84B4010000 jz $00b9747c
MyObject.pas.2567: aDestRect := canvas.AlignToPixel(
00B972C8 8B450C mov eax,[ebp+$0c]
00B972CB 8B78FC mov edi,[eax-$04]
00B972CE 8BC6 mov eax,esi
00B972D0 E88F559BFF call TBitmap.GetWidth
...
and with global function :
function drawBitmap(const Canvas: TCanvas; const ARect: TRectF; const aBitmap: {$IFDEF _USE_TEXTURE}TTexture{$ELSE}Tbitmap{$ENDIF}; const aTopLeft: TpointF; Const aOpacity: Single): boolean; inline;
var aDestRect: TrectF;
begin
Result := False;
if aBitmap <> nil then begin
//calculate aDestRect
aDestRect := canvas.AlignToPixel(
TRectF.Create(
aTopLeft,
aBitmap.Width/ScreenScale,
aBitmap.Height/ScreenScale));
//if the aBitmap is visible
if ARect.IntersectsWith(aDestRect) then begin
Result := True;
{$IFDEF _USE_TEXTURE}
TCustomCanvasGpu(Canvas).DrawTexture(aDestRect, // ATexRect
TRectF.Create(0,
0,
aBitmap.Width,
aBitmap.Height), // ARect
ALPrepareColor(TCustomCanvasGpu.ModulateColor, aOpacity * AbsoluteOpacity), // https://quality.embarcadero.com/browse/RSP-15432
aBitmap);
{$ELSE}
Canvas.DrawBitmap(aBitmap, // ABitmap
TRectF.Create(0,
0,
aBitmap.Width,
aBitmap.Height), // SrcRect
aDestRect, // DstRect
aOpacity * AbsoluteOpacity, // AOpacity
samevalue(aDestRect.Width, aBitmap.Width, Tepsilon.Position) and
samevalue(aDestRect.height, aBitmap.height, Tepsilon.Position)); // HighSpeed - set interpolation to none
{$ENDIF};
end;
end;
end;
procedure TMyObject.onPaint(Sender: TObject; Canvas: TCanvas; const ARect: TRectF);
begin
drawBitmap(aBitmap, aPos, 1);
end;
ASM:
MyObject.pas.2636: drawBitmap(Canvas, aRect, fBtnFilterBitmap, // aBitmap
00B98F6D 8BFB mov edi,ebx
00B98F6F 8B83A8040000 mov eax,[ebx+$000004a8]
00B98F75 8945F0 mov [ebp-$10],eax
00B98F78 8D83C4050000 lea eax,[ebx+$000005c4]
00B98F7E 8945EC mov [ebp-$14],eax
00B98F81 C645EB00 mov byte ptr [ebp-$15],$00
00B98F85 8B75F0 mov esi,[ebp-$10]
00B98F88 85F6 test esi,esi
00B98F8A 0F840A020000 jz $00b9919a
00B98F90 8BC6 mov eax,esi
00B98F92 E8CD389BFF call TBitmap.GetWidth
...
Here, calling the function will be instantly, in respect to using the VCL TCanvas. So it is clearly premature optimization, and there is no performance difference in practice between the two. The global function may be more difficult to maintain (unless it is some code which can be actually be reused somewhere else in the unit). Anyway, even a global function is not a good idea: if you have some specific reusable process, define a class instead: it will be cleaner and easier to debug/extend/test.
Only for very small functions, which do not call any other functions, inlining may give some performance benefits. For instance:
function Add(n1,n2: integer): integer; inline;
begin
result := n1 + n2;
end;
But in your case, it won't make any sense.
And, as you stated, it is up to the compiler to actually inline the asm, or not. If it states that inlining won't make any benefit (it may even be slower than a sub-function), it won't inline the function.
For completeness, at low asssembly level, when you call a local function within another function, access to the variables in the scope is done adding the caller "stack frame" pointer as an additional parameter.
In pseudo code, it is like that:
function _drawBitmap(const stackframe: TLocalStackRecord; const aBitmap: {$IFDEF _USE_TEXTURE}TTexture{$ELSE}Tbitmap{$ENDIF}; const aTopLeft: TpointF; Const aOpacity: Single): boolean;
var aDestRect: TrectF;
begin
Result := False;
if aBitmap <> nil then begin
//calculate aDestRect
aDestRect := stackframe.canvas.AlignToPixel(
TRectF.Create(
aTopLeft,
aBitmap.Width/ScreenScale,
aBitmap.Height/ScreenScale));
...
Try to avoid premature optimization:
Programmers waste enormous amounts of time thinking about, or worrying about, the speed of noncritical parts of their programs, and these attempts at efficiency actually have a strong negative impact when debugging and maintenance are considered. We should forget about small efficiencies, say about 97% of the time: premature optimization is the root of all evil. Yet we should not pass up our opportunities in that critical 3%. Variant in Knuth, "Structured Programming with Goto Statements". Computing Surveys 6:4 (December 1974), pp. 261–301, §1.
To avoid wasting your time (and money), use a profiler - e.g. Eric's Sampling Profiler - to find out which part of your code will actually need to be optimized.
Make it right, then make it fast. And make it always readeable and maintainable.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With