This question is inspired by my attempts to answer another question: Converting decimal/integer to binary - how and why it works the way it does?
The only documentation for the bitwise shift operators that I can find says:
The operations x shl y and x shr y shift the value of x to the left or right by y bits, which (if x is an unsigned integer) is equivalent to multiplying or dividing x by 2^y; the result is of the same type as x. For example, if N stores the value 01101 (decimal 13), then N shl 1 returns 11010 (decimal 26). Note that the value of y is interpreted modulo the size of the type of x. Thus for example, if x is an integer, x shl 40 is interpreted as x shl 8 because an integer is 32 bits and 40 mod 32 is 8.
Consider this program:
{$APPTYPE CONSOLE}
program BitwiseShift;
var
u8: Byte;
u16: Word;
u32: LongWord;
u64: UInt64;
begin
u8 := $ff;
Writeln((u8 shl 7) shr 7);
// expects: 1 actual: 255
u16 := $ffff;
Writeln((u16 shl 15) shr 15);
// expects: 1 actual: 65535
u32 := $ffffffff;
Writeln((u32 shl 31) shr 31);
// expects: 1 actual: 1
u64 := $ffffffffffffffff;
Writeln((u64 shl 63) shr 63);
// expects: 1 actual: 1
end.
I have run this with both XE3 and XE5, for both 32 and 64 bit Windows compilers, and the outupts are consistent, as commented in the code above.
I expected that (u8 shl 7) shr 7
would be evaluated entirely in the context of an 8 bit type. So when bits are shifted beyond the end of that 8 bit type, those bits are lost.
My question is why the program behaves as it does.
Interestingly I translated the program to C++ and on my 64 bit mingw 4.6.3 obtained the same output.
#include <cstdint>
#include <iostream>
int main()
{
uint8_t u8 = 0xff;
std::cout << ((u8 << 7) >> 7) << std::endl;
uint16_t u16 = 0xffff;
std::cout << ((u16 << 15) >> 15) << std::endl;
uint32_t u32 = 0xffffffff;
std::cout << ((u32 << 31) >> 31) << std::endl;
uint64_t u64 = 0xffffffffffffffff;
std::cout << ((u64 << 63) >> 63) << std::endl;
}
The reason is type promotion:
One special case of implicit type conversion is type promotion, where the compiler automatically expands the binary representation of objects of integer or floating-point types. Promotions are commonly used with types smaller than the native type of the target platform's ALU prior to arithmetic and logical operations in order to make such operations possible, or more efficient if the ALU can work with more than one type. C and C++ perform such promotion for objects of boolean, character, wide character, enumeration, and short integer types which are promoted to int, and for objects of type float, which are promoted to double. Unlike some other type conversions, promotions never lose precision or modify the value stored in the object.
So in the following code
var
u8: Byte;
begin
u8 := $ff;
Writeln((u8 shl 7) shr 7);
..
the u8
value is promoted to 32-value before shl
; to fix the result you need explicit type conversion:
Writeln(Byte(u8 shl 7) shr 7);
C++ Standard, Section 4.5 Integral promotions:
An rvalue of type char, signed char, unsigned char, short int, or unsigned short int can be converted to an rvalue of type int if int can represent all the values of the source type; otherwise, the source rvalue can be converted to an rvalue of type unsigned int.
To check if Delphi follows the same convention in type promotion I've written the following application:
var
u8: Byte;
u16: Word;
u32: LongWord;
procedure Test(Value: Integer); overload;
begin
Writeln('Integer');
end;
procedure Test(Value: Cardinal); overload;
begin
Writeln('Cardinal');
end;
begin
u8 := $ff;
Test(u8); // 'Integer'
u16 := $ffff;
Test(u16); // 'Integer'
u32 := $ffffffff;
Test(u32); // 'Cardinal'
Readln;
end.
So I believe there should be no difference between Delphi and C++ here.
I modified your test to
procedure TestByte;
var
u8 : Byte;
LShift : Integer;
begin
Writeln( 'Byte' );
u8 := $FF;
LShift := 7;
Writeln( IntToHex( u8, 16 ), '-', LShift : 2, ' ', IntToHex( u8 shl LShift, 16 ), ' ', IntToHex( ( u8 shl LShift ) shr LShift, 16 ) );
LShift := 15;
Writeln( IntToHex( u8, 16 ), '-', LShift : 2, ' ', IntToHex( u8 shl LShift, 16 ), ' ', IntToHex( ( u8 shl LShift ) shr LShift, 16 ) );
LShift := 31;
Writeln( IntToHex( u8, 16 ), '-', LShift : 2, ' ', IntToHex( u8 shl LShift, 16 ), ' ', IntToHex( ( u8 shl LShift ) shr LShift, 16 ) );
LShift := 63;
Writeln( IntToHex( u8, 16 ), '-', LShift : 2, ' ', IntToHex( u8 shl LShift, 16 ), ' ', IntToHex( ( u8 shl LShift ) shr LShift, 16 ) );
end;
procedure TestWord;
var
u8 : Word;
LShift : Integer;
begin
Writeln( 'Word' );
u8 := $FF;
LShift := 7;
Writeln( IntToHex( u8, 16 ), '-', LShift : 2, ' ', IntToHex( u8 shl LShift, 16 ), ' ', IntToHex( ( u8 shl LShift ) shr LShift, 16 ) );
LShift := 15;
Writeln( IntToHex( u8, 16 ), '-', LShift : 2, ' ', IntToHex( u8 shl LShift, 16 ), ' ', IntToHex( ( u8 shl LShift ) shr LShift, 16 ) );
LShift := 31;
Writeln( IntToHex( u8, 16 ), '-', LShift : 2, ' ', IntToHex( u8 shl LShift, 16 ), ' ', IntToHex( ( u8 shl LShift ) shr LShift, 16 ) );
LShift := 63;
Writeln( IntToHex( u8, 16 ), '-', LShift : 2, ' ', IntToHex( u8 shl LShift, 16 ), ' ', IntToHex( ( u8 shl LShift ) shr LShift, 16 ) );
end;
procedure TestLongWord;
var
u8 : LongWord;
LShift : Integer;
begin
Writeln( 'LongWord' );
u8 := $FF;
LShift := 7;
Writeln( IntToHex( u8, 16 ), '-', LShift : 2, ' ', IntToHex( u8 shl LShift, 16 ), ' ', IntToHex( ( u8 shl LShift ) shr LShift, 16 ) );
LShift := 15;
Writeln( IntToHex( u8, 16 ), '-', LShift : 2, ' ', IntToHex( u8 shl LShift, 16 ), ' ', IntToHex( ( u8 shl LShift ) shr LShift, 16 ) );
LShift := 31;
Writeln( IntToHex( u8, 16 ), '-', LShift : 2, ' ', IntToHex( u8 shl LShift, 16 ), ' ', IntToHex( ( u8 shl LShift ) shr LShift, 16 ) );
LShift := 63;
Writeln( IntToHex( u8, 16 ), '-', LShift : 2, ' ', IntToHex( u8 shl LShift, 16 ), ' ', IntToHex( ( u8 shl LShift ) shr LShift, 16 ) );
end;
procedure TestUInt64;
var
u8 : UInt64;
LShift : Integer;
begin
Writeln( 'UInt64' );
u8 := $FF;
LShift := 7;
Writeln( IntToHex( u8, 16 ), '-', LShift : 2, ' ', IntToHex( u8 shl LShift, 16 ), ' ', IntToHex( ( u8 shl LShift ) shr LShift, 16 ) );
LShift := 15;
Writeln( IntToHex( u8, 16 ), '-', LShift : 2, ' ', IntToHex( u8 shl LShift, 16 ), ' ', IntToHex( ( u8 shl LShift ) shr LShift, 16 ) );
LShift := 31;
Writeln( IntToHex( u8, 16 ), '-', LShift : 2, ' ', IntToHex( u8 shl LShift, 16 ), ' ', IntToHex( ( u8 shl LShift ) shr LShift, 16 ) );
LShift := 63;
Writeln( IntToHex( u8, 16 ), '-', LShift : 2, ' ', IntToHex( u8 shl LShift, 16 ), ' ', IntToHex( ( u8 shl LShift ) shr LShift, 16 ) );
end;
begin
TestByte;
TestWord;
TestLongWord;
TestUInt64;
end.
and it gave me this result
Byte 00000000000000FF- 7 0000000000007F80 00000000000000FF 00000000000000FF-15 00000000007F8000 00000000000000FF 00000000000000FF-31 0000000080000000 0000000000000001 00000000000000FF-63 0000000080000000 0000000000000001 Word 00000000000000FF- 7 0000000000007F80 00000000000000FF 00000000000000FF-15 00000000007F8000 00000000000000FF 00000000000000FF-31 0000000080000000 0000000000000001 00000000000000FF-63 0000000080000000 0000000000000001 LongWord 00000000000000FF- 7 0000000000007F80 00000000000000FF 00000000000000FF-15 00000000007F8000 00000000000000FF 00000000000000FF-31 0000000080000000 0000000000000001 00000000000000FF-63 0000000080000000 0000000000000001 UInt64 00000000000000FF- 7 0000000000007F80 00000000000000FF 00000000000000FF-15 00000000007F8000 00000000000000FF 00000000000000FF-31 0000007F80000000 00000000000000FF 00000000000000FF-63 8000000000000000 0000000000000001
So internally the values are not handled in the type they are declared
What's happening behind the scenes is actually quite interesting.
Given the following Delphi app:
program BitwiseShift;
var
u8: Byte;
begin
//all in one go
u8 := $ff;
Writeln((u8 shl 7) shr 7);
// expects: 1 actual: 255
//step by step
u8 := $ff;
u8:= u8 shl 7;
u8:= u8 shr 7;
WriteLn(u8);
// expects: 1 actual: 1
end.
The following assembly is produced (in XE2)
BitwiseShift.dpr.10: Writeln((u8 shl 7) shr 7);
004060D3 33D2 xor edx,edx
004060D5 8A1594AB4000 mov dl,[$0040ab94]
004060DB C1E207 shl edx,$07
004060DE C1EA07 shr edx,$07
004060E1 A114784000 mov eax,[$00407814] <<--- The result is NOT a byte!!
004060E6 E895D6FFFF call @Write0Long
004060EB E864D9FFFF call @WriteLn
004060F0 E8A7CCFFFF call @_IOTest
BitwiseShift.dpr.13: u8 := $ff;
004060F5 C60594AB4000FF mov byte ptr [$0040ab94],$ff
BitwiseShift.dpr.14: u8:= u8 shl 7;
004060FC C02594AB400007 shl byte ptr [$0040ab94],$07
BitwiseShift.dpr.15: u8:= u8 shr 7;
00406103 33C0 xor eax,eax
00406105 A094AB4000 mov al,[$0040ab94]
0040610A C1E807 shr eax,$07
0040610D A294AB4000 mov [$0040ab94],al
BitwiseShift.dpr.16: WriteLn(u8);
00406112 33D2 xor edx,edx
00406114 8A1594AB4000 mov dl,[$0040ab94]
0040611A A114784000 mov eax,[$00407814]
0040611F E85CD6FFFF call @Write0Long
00406124 E82BD9FFFF call @WriteLn
00406129 E86ECCFFFF call @_IOTest
The rule as far as I can make out is:
Rule
The narrowness of the shift being performed (8/16/32 bits) depends on the size of the result of the shift, not the size of variables used in the shift. In the original case you do not reserve a variable to hold the result and thus Delphi chooses a default (integer) for you.
How to get the expected result
In my altered case the result is byte sized and hence the data gets chopped to that size.
If you alter your case to force the use of bytes, your original expectations are met:
Writeln(byte(byte(u8 shl 7) shr 7));
// expects: 1 actual: 1
Project24.dpr.19: Writeln(byte(byte(u8 shl 7) shr 7));
00406135 8A1594AB4000 mov dl,[$0040ab94]
0040613B C1E207 shl edx,$07
0040613E 81E2FF000000 and edx,$000000ff
00406144 C1EA07 shr edx,$07
00406147 81E2FF000000 and edx,$000000ff
0040614D A114784000 mov eax,[$00407814]
00406152 E829D6FFFF call @Write0Long
00406157 E8F8D8FFFF call @WriteLn
0040615C E83BCCFFFF call @_IOTest
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With