If I get the following Objective-C source file:
// test.m
#import <objc/Object.h>
@interface MySuperClass: Object {
}
-(void) myMessage1;
@end
@implementation MySuperClass
-(void) myMessage1 {
}
@end
@interface MyClass: MySuperClass {
}
-(void) myMessage2;
@end
@implementation MyClass
-(void) myMessage2 {
}
@end
int main() {
return 0;
}
and try to generate an assembly file from it with clang -fobjc-nonfragile-abi -fnext-runtime -S test.m
, I get the following assembly code:
.file "test.m"
.text
.align 16, 0x90
.type _2D__5B_MySuperClass_20_myMessage1_5D_,@function
_2D__5B_MySuperClass_20_myMessage1_5D_: # @"\01-[MySuperClass myMessage1]"
.Ltmp0:
.cfi_startproc
# BB#0:
movq %rdi, -8(%rsp)
movq %rsi, -16(%rsp)
ret
.Ltmp1:
.size _2D__5B_MySuperClass_20_myMessage1_5D_, .Ltmp1-_2D__5B_MySuperClass_20_myMessage1_5D_
.Ltmp2:
.cfi_endproc
.Leh_func_end0:
.align 16, 0x90
.type _2D__5B_MyClass_20_myMessage2_5D_,@function
_2D__5B_MyClass_20_myMessage2_5D_: # @"\01-[MyClass myMessage2]"
.Ltmp3:
.cfi_startproc
# BB#0:
movq %rdi, -8(%rsp)
movq %rsi, -16(%rsp)
ret
.Ltmp4:
.size _2D__5B_MyClass_20_myMessage2_5D_, .Ltmp4-_2D__5B_MyClass_20_myMessage2_5D_
.Ltmp5:
.cfi_endproc
.Leh_func_end1:
.globl main
.align 16, 0x90
.type main,@function
main: # @main
.Ltmp6:
.cfi_startproc
# BB#0:
movl $0, %eax
movl $0, -4(%rsp)
ret
.Ltmp7:
.size main, .Ltmp7-main
.Ltmp8:
.cfi_endproc
.Leh_func_end2:
.type L_OBJC_CLASS_NAME_,@object # @"\01L_OBJC_CLASS_NAME_"
.section "__TEXT,__objc_classname,cstring_literals","aw",@progbits
L_OBJC_CLASS_NAME_:
.asciz "MySuperClass"
.size L_OBJC_CLASS_NAME_, 13
.type l_OBJC_METACLASS_RO_$_MySuperClass,@object # @"\01l_OBJC_METACLASS_RO_$_MySuperClass"
.section "__DATA, __objc_const","aw",@progbits
.align 8
l_OBJC_METACLASS_RO_$_MySuperClass:
.long 1 # 0x1
.long 40 # 0x28
.long 40 # 0x28
.zero 4
.quad 0
.quad L_OBJC_CLASS_NAME_
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.size l_OBJC_METACLASS_RO_$_MySuperClass, 72
.type OBJC_METACLASS_$_MySuperClass,@object # @"OBJC_METACLASS_$_MySuperClass"
.section "__DATA, __objc_data","aw",@progbits
.globl OBJC_METACLASS_$_MySuperClass
.align 8
OBJC_METACLASS_$_MySuperClass:
.quad OBJC_METACLASS_$_Object
.quad OBJC_METACLASS_$_Object
.quad _objc_empty_cache
.quad _objc_empty_vtable
.quad l_OBJC_METACLASS_RO_$_MySuperClass
.size OBJC_METACLASS_$_MySuperClass, 40
.type L_OBJC_METH_VAR_NAME_,@object # @"\01L_OBJC_METH_VAR_NAME_"
.section "__TEXT,__objc_methname,cstring_literals","aw",@progbits
L_OBJC_METH_VAR_NAME_:
.asciz "myMessage1"
.size L_OBJC_METH_VAR_NAME_, 11
.type L_OBJC_METH_VAR_TYPE_,@object # @"\01L_OBJC_METH_VAR_TYPE_"
.section "__TEXT,__objc_methtype,cstring_literals","aw",@progbits
L_OBJC_METH_VAR_TYPE_:
.asciz "v16@0:8"
.size L_OBJC_METH_VAR_TYPE_, 8
.type l_OBJC_$_INSTANCE_METHODS_MySuperClass,@object # @"\01l_OBJC_$_INSTANCE_METHODS_MySuperClass"
.section "__DATA, __objc_const","aw",@progbits
.align 8
l_OBJC_$_INSTANCE_METHODS_MySuperClass:
.long 24 # 0x18
.long 1 # 0x1
.quad L_OBJC_METH_VAR_NAME_
.quad L_OBJC_METH_VAR_TYPE_
.quad _2D__5B_MySuperClass_20_myMessage1_5D_
.size l_OBJC_$_INSTANCE_METHODS_MySuperClass, 32
.type l_OBJC_CLASS_RO_$_MySuperClass,@object # @"\01l_OBJC_CLASS_RO_$_MySuperClass"
.align 8
l_OBJC_CLASS_RO_$_MySuperClass:
.long 0 # 0x0
.long 8 # 0x8
.long 8 # 0x8
.zero 4
.quad 0
.quad L_OBJC_CLASS_NAME_
.quad l_OBJC_$_INSTANCE_METHODS_MySuperClass
.quad 0
.quad 0
.quad 0
.quad 0
.size l_OBJC_CLASS_RO_$_MySuperClass, 72
.type OBJC_CLASS_$_MySuperClass,@object # @"OBJC_CLASS_$_MySuperClass"
.section "__DATA, __objc_data","aw",@progbits
.globl OBJC_CLASS_$_MySuperClass
.align 8
OBJC_CLASS_$_MySuperClass:
.quad OBJC_METACLASS_$_MySuperClass
.quad OBJC_CLASS_$_Object
.quad _objc_empty_cache
.quad _objc_empty_vtable
.quad l_OBJC_CLASS_RO_$_MySuperClass
.size OBJC_CLASS_$_MySuperClass, 40
.type L_OBJC_CLASS_NAME_1,@object # @"\01L_OBJC_CLASS_NAME_1"
.section "__TEXT,__objc_classname,cstring_literals","aw",@progbits
L_OBJC_CLASS_NAME_1:
.asciz "MyClass"
.size L_OBJC_CLASS_NAME_1, 8
.type l_OBJC_METACLASS_RO_$_MyClass,@object # @"\01l_OBJC_METACLASS_RO_$_MyClass"
.section "__DATA, __objc_const","aw",@progbits
.align 8
l_OBJC_METACLASS_RO_$_MyClass:
.long 1 # 0x1
.long 40 # 0x28
.long 40 # 0x28
.zero 4
.quad 0
.quad L_OBJC_CLASS_NAME_1
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.size l_OBJC_METACLASS_RO_$_MyClass, 72
.type OBJC_METACLASS_$_MyClass,@object # @"OBJC_METACLASS_$_MyClass"
.section "__DATA, __objc_data","aw",@progbits
.globl OBJC_METACLASS_$_MyClass
.align 8
OBJC_METACLASS_$_MyClass:
.quad OBJC_METACLASS_$_Object
.quad OBJC_METACLASS_$_MySuperClass
.quad _objc_empty_cache
.quad _objc_empty_vtable
.quad l_OBJC_METACLASS_RO_$_MyClass
.size OBJC_METACLASS_$_MyClass, 40
.type L_OBJC_METH_VAR_NAME_2,@object # @"\01L_OBJC_METH_VAR_NAME_2"
.section "__TEXT,__objc_methname,cstring_literals","aw",@progbits
L_OBJC_METH_VAR_NAME_2:
.asciz "myMessage2"
.size L_OBJC_METH_VAR_NAME_2, 11
.type l_OBJC_$_INSTANCE_METHODS_MyClass,@object # @"\01l_OBJC_$_INSTANCE_METHODS_MyClass"
.section "__DATA, __objc_const","aw",@progbits
.align 8
l_OBJC_$_INSTANCE_METHODS_MyClass:
.long 24 # 0x18
.long 1 # 0x1
.quad L_OBJC_METH_VAR_NAME_2
.quad L_OBJC_METH_VAR_TYPE_
.quad _2D__5B_MyClass_20_myMessage2_5D_
.size l_OBJC_$_INSTANCE_METHODS_MyClass, 32
.type l_OBJC_CLASS_RO_$_MyClass,@object # @"\01l_OBJC_CLASS_RO_$_MyClass"
.align 8
l_OBJC_CLASS_RO_$_MyClass:
.long 0 # 0x0
.long 8 # 0x8
.long 8 # 0x8
.zero 4
.quad 0
.quad L_OBJC_CLASS_NAME_1
.quad l_OBJC_$_INSTANCE_METHODS_MyClass
.quad 0
.quad 0
.quad 0
.quad 0
.size l_OBJC_CLASS_RO_$_MyClass, 72
.type OBJC_CLASS_$_MyClass,@object # @"OBJC_CLASS_$_MyClass"
.section "__DATA, __objc_data","aw",@progbits
.globl OBJC_CLASS_$_MyClass
.align 8
OBJC_CLASS_$_MyClass:
.quad OBJC_METACLASS_$_MyClass
.quad OBJC_CLASS_$_MySuperClass
.quad _objc_empty_cache
.quad _objc_empty_vtable
.quad l_OBJC_CLASS_RO_$_MyClass
.size OBJC_CLASS_$_MyClass, 40
.type L_OBJC_LABEL_CLASS_$,@object # @"\01L_OBJC_LABEL_CLASS_$"
.section "__DATA, __objc_classlist, regular, no_dead_strip","aw",@progbits
.align 8
L_OBJC_LABEL_CLASS_$:
.quad OBJC_CLASS_$_MySuperClass
.quad OBJC_CLASS_$_MyClass
.size L_OBJC_LABEL_CLASS_$, 16
.type L_OBJC_IMAGE_INFO,@object # @"\01L_OBJC_IMAGE_INFO"
.section "__DATA, __objc_imageinfo, regular, no_dead_strip","a",@progbits
.align 4
L_OBJC_IMAGE_INFO:
.long 0 # 0x0
.long 16 # 0x10
.size L_OBJC_IMAGE_INFO, 8
.section ".note.GNU-stack","",@progbits
My question is: how does the Objective-C runtime library, which must be linked against test.o
so that the executable file can be successfully created, retrieves the methods list in order to create, for example, a vtable? Is it possible to use the .section ..., @function
, .section ..., @object
or .section ..., @progbits
assembly directives to get this information, at least on linking time?
The Objective-C runtime is a runtime library that provides support for the dynamic properties of the Objective-C language, and as such is linked to by all Objective-C apps. Objective-C runtime library support functions are implemented in the shared library found at /usr/lib/libobjc.
Categories provide the ability to add functionality to an object without subclassing or changing the actual object. A handy tool, they are often used to add methods to existing classes, such as NSString or your own custom objects.
An Objective-C method declaration includes the parameters as part of its name, using colons, like this: - (void)someMethodWithValue:(SomeType)value; As with the return type, the parameter type is specified in parentheses, just like a standard C type-cast.
The compiler, linker, and runtime work together.
First, the compiler parses the source code for each class and emits directives like .long
, .zero
, and .quad
describing the class's instance variables, properties, selectors, and methods. The assembler turns these directives into raw data.
The data is in a format that the runtime understands. For example, the data starting at symbol OBJC_CLASS_$_MyClass
matches the layout of the runtime's struct class_t
(defined in objc-runtime-new.h
). The data at symbol l_OBJC_CLASS_RO_$_MyClass
matches the layout of the runtime's struct class_ro_t
(although most of the fields are 0 because the runtime updates them when it loads the class). The struct class_ro_t
has a baseMethods
field of type method_list_t *
, which in the case of l_OBJC_CLASS_RO_$_MyClass
is initialized to l_OBJC_$_INSTANCE_METHODS_MyClass
. At l_OBJC_$_INSTANCE_METHODS_MyClass
you will find data laid out like a struct method_list_t
, which ends with an array of struct method_t
- one for each method in the class. In your example, it's not very interesting because each of your classes has only one method.
The compiler uses the .section
directives to tell the linker how to group chunks of that data together. For example, all of the struct class_t
chunks will be put together in a section named __objc_classlist
. This way, the runtime can just look up the section named __objc_classlist
, and then process the entire section as an array of struct class_t
. Take a look at the GETSECT
macro in objc-file.mm
.
The linker arranges for the function _objc_init
(in objc-os.mm
) to run very early in the lifetime of your process, before main
. The _objc_init
function registers some callbacks with the dynamic loader. In particular, it tells the loader to call map_images
(in objc-runtime-new.mm
), which calls map_images_nolock
, which eventually calls _read_images
. The _read_images
function actually parses those chunks of data emitted by the compiler and turns them into the data structures that objc_msgSend
uses to actually send messages to objects.
You can download an archive of the Mac OS X 10.8 Objective-C runtime source code to learn more. This archive also contains source files for iOS/ARM (and even Windows!), although it might not correspond exactly to any version of iOS.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With