Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

How does the Objective-C runtime retrieve the list of classes and methods?

If I get the following Objective-C source file:

// test.m
#import <objc/Object.h>

@interface MySuperClass: Object {

}
-(void) myMessage1;
@end

@implementation MySuperClass
-(void) myMessage1 {

}
@end

@interface MyClass: MySuperClass {

}
-(void) myMessage2;
@end

@implementation MyClass
-(void) myMessage2 {

}
@end

int main() {

    return 0;
}

and try to generate an assembly file from it with clang -fobjc-nonfragile-abi -fnext-runtime -S test.m, I get the following assembly code:

    .file   "test.m"
    .text
    .align  16, 0x90
    .type   _2D__5B_MySuperClass_20_myMessage1_5D_,@function
_2D__5B_MySuperClass_20_myMessage1_5D_: # @"\01-[MySuperClass myMessage1]"
.Ltmp0:
    .cfi_startproc
# BB#0:
    movq    %rdi, -8(%rsp)
    movq    %rsi, -16(%rsp)
    ret
.Ltmp1:
    .size   _2D__5B_MySuperClass_20_myMessage1_5D_, .Ltmp1-_2D__5B_MySuperClass_20_myMessage1_5D_
.Ltmp2:
    .cfi_endproc
.Leh_func_end0:

    .align  16, 0x90
    .type   _2D__5B_MyClass_20_myMessage2_5D_,@function
_2D__5B_MyClass_20_myMessage2_5D_:      # @"\01-[MyClass myMessage2]"
.Ltmp3:
    .cfi_startproc
# BB#0:
    movq    %rdi, -8(%rsp)
    movq    %rsi, -16(%rsp)
    ret
.Ltmp4:
    .size   _2D__5B_MyClass_20_myMessage2_5D_, .Ltmp4-_2D__5B_MyClass_20_myMessage2_5D_
.Ltmp5:
    .cfi_endproc
.Leh_func_end1:

    .globl  main
    .align  16, 0x90
    .type   main,@function
main:                                   # @main
.Ltmp6:
    .cfi_startproc
# BB#0:
    movl    $0, %eax
    movl    $0, -4(%rsp)
    ret
.Ltmp7:
    .size   main, .Ltmp7-main
.Ltmp8:
    .cfi_endproc
.Leh_func_end2:

    .type   L_OBJC_CLASS_NAME_,@object # @"\01L_OBJC_CLASS_NAME_"
    .section    "__TEXT,__objc_classname,cstring_literals","aw",@progbits
L_OBJC_CLASS_NAME_:
    .asciz   "MySuperClass"
    .size   L_OBJC_CLASS_NAME_, 13

    .type   l_OBJC_METACLASS_RO_$_MySuperClass,@object # @"\01l_OBJC_METACLASS_RO_$_MySuperClass"
    .section    "__DATA, __objc_const","aw",@progbits
    .align  8
l_OBJC_METACLASS_RO_$_MySuperClass:
    .long   1                       # 0x1
    .long   40                      # 0x28
    .long   40                      # 0x28
    .zero   4
    .quad   0
    .quad   L_OBJC_CLASS_NAME_
    .quad   0
    .quad   0
    .quad   0
    .quad   0
    .quad   0
    .size   l_OBJC_METACLASS_RO_$_MySuperClass, 72

    .type   OBJC_METACLASS_$_MySuperClass,@object # @"OBJC_METACLASS_$_MySuperClass"
    .section    "__DATA, __objc_data","aw",@progbits
    .globl  OBJC_METACLASS_$_MySuperClass
    .align  8
OBJC_METACLASS_$_MySuperClass:
    .quad   OBJC_METACLASS_$_Object
    .quad   OBJC_METACLASS_$_Object
    .quad   _objc_empty_cache
    .quad   _objc_empty_vtable
    .quad   l_OBJC_METACLASS_RO_$_MySuperClass
    .size   OBJC_METACLASS_$_MySuperClass, 40

    .type   L_OBJC_METH_VAR_NAME_,@object # @"\01L_OBJC_METH_VAR_NAME_"
    .section    "__TEXT,__objc_methname,cstring_literals","aw",@progbits
L_OBJC_METH_VAR_NAME_:
    .asciz   "myMessage1"
    .size   L_OBJC_METH_VAR_NAME_, 11

    .type   L_OBJC_METH_VAR_TYPE_,@object # @"\01L_OBJC_METH_VAR_TYPE_"
    .section    "__TEXT,__objc_methtype,cstring_literals","aw",@progbits
L_OBJC_METH_VAR_TYPE_:
    .asciz   "v16@0:8"
    .size   L_OBJC_METH_VAR_TYPE_, 8

    .type   l_OBJC_$_INSTANCE_METHODS_MySuperClass,@object # @"\01l_OBJC_$_INSTANCE_METHODS_MySuperClass"
    .section    "__DATA, __objc_const","aw",@progbits
    .align  8
l_OBJC_$_INSTANCE_METHODS_MySuperClass:
    .long   24                      # 0x18
    .long   1                       # 0x1
    .quad   L_OBJC_METH_VAR_NAME_
    .quad   L_OBJC_METH_VAR_TYPE_
    .quad   _2D__5B_MySuperClass_20_myMessage1_5D_
    .size   l_OBJC_$_INSTANCE_METHODS_MySuperClass, 32

    .type   l_OBJC_CLASS_RO_$_MySuperClass,@object # @"\01l_OBJC_CLASS_RO_$_MySuperClass"
    .align  8
l_OBJC_CLASS_RO_$_MySuperClass:
    .long   0                       # 0x0
    .long   8                       # 0x8
    .long   8                       # 0x8
    .zero   4
    .quad   0
    .quad   L_OBJC_CLASS_NAME_
    .quad   l_OBJC_$_INSTANCE_METHODS_MySuperClass
    .quad   0
    .quad   0
    .quad   0
    .quad   0
    .size   l_OBJC_CLASS_RO_$_MySuperClass, 72

    .type   OBJC_CLASS_$_MySuperClass,@object # @"OBJC_CLASS_$_MySuperClass"
    .section    "__DATA, __objc_data","aw",@progbits
    .globl  OBJC_CLASS_$_MySuperClass
    .align  8
OBJC_CLASS_$_MySuperClass:
    .quad   OBJC_METACLASS_$_MySuperClass
    .quad   OBJC_CLASS_$_Object
    .quad   _objc_empty_cache
    .quad   _objc_empty_vtable
    .quad   l_OBJC_CLASS_RO_$_MySuperClass
    .size   OBJC_CLASS_$_MySuperClass, 40

    .type   L_OBJC_CLASS_NAME_1,@object # @"\01L_OBJC_CLASS_NAME_1"
    .section    "__TEXT,__objc_classname,cstring_literals","aw",@progbits
L_OBJC_CLASS_NAME_1:
    .asciz   "MyClass"
    .size   L_OBJC_CLASS_NAME_1, 8

    .type   l_OBJC_METACLASS_RO_$_MyClass,@object # @"\01l_OBJC_METACLASS_RO_$_MyClass"
    .section    "__DATA, __objc_const","aw",@progbits
    .align  8
l_OBJC_METACLASS_RO_$_MyClass:
    .long   1                       # 0x1
    .long   40                      # 0x28
    .long   40                      # 0x28
    .zero   4
    .quad   0
    .quad   L_OBJC_CLASS_NAME_1
    .quad   0
    .quad   0
    .quad   0
    .quad   0
    .quad   0
    .size   l_OBJC_METACLASS_RO_$_MyClass, 72

    .type   OBJC_METACLASS_$_MyClass,@object # @"OBJC_METACLASS_$_MyClass"
    .section    "__DATA, __objc_data","aw",@progbits
    .globl  OBJC_METACLASS_$_MyClass
    .align  8
OBJC_METACLASS_$_MyClass:
    .quad   OBJC_METACLASS_$_Object
    .quad   OBJC_METACLASS_$_MySuperClass
    .quad   _objc_empty_cache
    .quad   _objc_empty_vtable
    .quad   l_OBJC_METACLASS_RO_$_MyClass
    .size   OBJC_METACLASS_$_MyClass, 40

    .type   L_OBJC_METH_VAR_NAME_2,@object # @"\01L_OBJC_METH_VAR_NAME_2"
    .section    "__TEXT,__objc_methname,cstring_literals","aw",@progbits
L_OBJC_METH_VAR_NAME_2:
    .asciz   "myMessage2"
    .size   L_OBJC_METH_VAR_NAME_2, 11

    .type   l_OBJC_$_INSTANCE_METHODS_MyClass,@object # @"\01l_OBJC_$_INSTANCE_METHODS_MyClass"
    .section    "__DATA, __objc_const","aw",@progbits
    .align  8
l_OBJC_$_INSTANCE_METHODS_MyClass:
    .long   24                      # 0x18
    .long   1                       # 0x1
    .quad   L_OBJC_METH_VAR_NAME_2
    .quad   L_OBJC_METH_VAR_TYPE_
    .quad   _2D__5B_MyClass_20_myMessage2_5D_
    .size   l_OBJC_$_INSTANCE_METHODS_MyClass, 32

    .type   l_OBJC_CLASS_RO_$_MyClass,@object # @"\01l_OBJC_CLASS_RO_$_MyClass"
    .align  8
l_OBJC_CLASS_RO_$_MyClass:
    .long   0                       # 0x0
    .long   8                       # 0x8
    .long   8                       # 0x8
    .zero   4
    .quad   0
    .quad   L_OBJC_CLASS_NAME_1
    .quad   l_OBJC_$_INSTANCE_METHODS_MyClass
    .quad   0
    .quad   0
    .quad   0
    .quad   0
    .size   l_OBJC_CLASS_RO_$_MyClass, 72

    .type   OBJC_CLASS_$_MyClass,@object # @"OBJC_CLASS_$_MyClass"
    .section    "__DATA, __objc_data","aw",@progbits
    .globl  OBJC_CLASS_$_MyClass
    .align  8
OBJC_CLASS_$_MyClass:
    .quad   OBJC_METACLASS_$_MyClass
    .quad   OBJC_CLASS_$_MySuperClass
    .quad   _objc_empty_cache
    .quad   _objc_empty_vtable
    .quad   l_OBJC_CLASS_RO_$_MyClass
    .size   OBJC_CLASS_$_MyClass, 40

    .type   L_OBJC_LABEL_CLASS_$,@object # @"\01L_OBJC_LABEL_CLASS_$"
    .section    "__DATA, __objc_classlist, regular, no_dead_strip","aw",@progbits
    .align  8
L_OBJC_LABEL_CLASS_$:
    .quad   OBJC_CLASS_$_MySuperClass
    .quad   OBJC_CLASS_$_MyClass
    .size   L_OBJC_LABEL_CLASS_$, 16

    .type   L_OBJC_IMAGE_INFO,@object # @"\01L_OBJC_IMAGE_INFO"
    .section    "__DATA, __objc_imageinfo, regular, no_dead_strip","a",@progbits
    .align  4
L_OBJC_IMAGE_INFO:
    .long   0                       # 0x0
    .long   16                      # 0x10
    .size   L_OBJC_IMAGE_INFO, 8


    .section    ".note.GNU-stack","",@progbits

My question is: how does the Objective-C runtime library, which must be linked against test.o so that the executable file can be successfully created, retrieves the methods list in order to create, for example, a vtable? Is it possible to use the .section ..., @function, .section ..., @object or .section ..., @progbits assembly directives to get this information, at least on linking time?

like image 515
LuisABOL Avatar asked Aug 11 '12 15:08

LuisABOL


People also ask

What does the Objective-C runtime do?

The Objective-C runtime is a runtime library that provides support for the dynamic properties of the Objective-C language, and as such is linked to by all Objective-C apps. Objective-C runtime library support functions are implemented in the shared library found at /usr/lib/libobjc.

How does Objective-C categories work?

Categories provide the ability to add functionality to an object without subclassing or changing the actual object. A handy tool, they are often used to add methods to existing classes, such as NSString or your own custom objects.

How do you declare method in Objective-C?

An Objective-C method declaration includes the parameters as part of its name, using colons, like this: - (void)someMethodWithValue:(SomeType)value; As with the return type, the parameter type is specified in parentheses, just like a standard C type-cast.


1 Answers

The compiler, linker, and runtime work together.

First, the compiler parses the source code for each class and emits directives like .long, .zero, and .quad describing the class's instance variables, properties, selectors, and methods. The assembler turns these directives into raw data.

The data is in a format that the runtime understands. For example, the data starting at symbol OBJC_CLASS_$_MyClass matches the layout of the runtime's struct class_t (defined in objc-runtime-new.h). The data at symbol l_OBJC_CLASS_RO_$_MyClass matches the layout of the runtime's struct class_ro_t (although most of the fields are 0 because the runtime updates them when it loads the class). The struct class_ro_t has a baseMethods field of type method_list_t *, which in the case of l_OBJC_CLASS_RO_$_MyClass is initialized to l_OBJC_$_INSTANCE_METHODS_MyClass. At l_OBJC_$_INSTANCE_METHODS_MyClass you will find data laid out like a struct method_list_t, which ends with an array of struct method_t - one for each method in the class. In your example, it's not very interesting because each of your classes has only one method.

The compiler uses the .section directives to tell the linker how to group chunks of that data together. For example, all of the struct class_t chunks will be put together in a section named __objc_classlist. This way, the runtime can just look up the section named __objc_classlist, and then process the entire section as an array of struct class_t. Take a look at the GETSECT macro in objc-file.mm.

The linker arranges for the function _objc_init (in objc-os.mm) to run very early in the lifetime of your process, before main. The _objc_init function registers some callbacks with the dynamic loader. In particular, it tells the loader to call map_images (in objc-runtime-new.mm), which calls map_images_nolock, which eventually calls _read_images. The _read_images function actually parses those chunks of data emitted by the compiler and turns them into the data structures that objc_msgSend uses to actually send messages to objects.

You can download an archive of the Mac OS X 10.8 Objective-C runtime source code to learn more. This archive also contains source files for iOS/ARM (and even Windows!), although it might not correspond exactly to any version of iOS.

like image 149
rob mayoff Avatar answered Nov 15 '22 19:11

rob mayoff