Translating assembly to pseudocode

Question

I'm working on a homework project involving a "bomb" written in compiled c which I have to reverse-engineer to come up with 5 strings which will disarm each of five "phases" of the bomb. I'm stuck on phase 3 right now, trying to translate the assembly (x86, AT&T syntax I believe) produced by gdb for that function. What I've been able to figure out so far is that it is trying to take a string of six numbers as user input and judge them on some criteria, but that's where I'm losing it. The function is as follows (with my attempted pseudocode translation next to it).

0x08048816 <phase_3+0>: push   %ebp
0x08048817 <phase_3+1>: mov    %esp,%ebp
0x08048819 <phase_3+3>: push   %edi
0x0804881a <phase_3+4>: push   %ebx
0x0804881b <phase_3+5>: sub    $0x30,%esp
0x0804881e <phase_3+8>: lea    -0x24(%ebp),%eax                             
0x08048821 <phase_3+11>:    mov    %eax,0x4(%esp)                           
0x08048825 <phase_3+15>:    mov    0x8(%ebp),%eax                           
0x08048828 <phase_3+18>:    mov    %eax,(%esp)                              
0x0804882b <phase_3+21>:    call   0x8048d2c <read_six_numbers>
0x08048830 <phase_3+26>:    mov    -0x24(%ebp),%eax                     eax = p1
0x08048833 <phase_3+29>:    cmp    $0x1,%eax                             if eax != 1
0x08048836 <phase_3+32>:    je     0x804883d <phase_3+39>                   explode bomb
0x08048838 <phase_3+34>:    call   0x8048fec <explode_bomb>             else
0x0804883d <phase_3+39>:    movl   $0x1,-0xc(%ebp)                       ebp[-12] = 1
0x08048844 <phase_3+46>:    jmp    0x804888a <phase_3+116>              while ebp[-12] < 5 {
0x08048846 <phase_3+48>:    mov    -0xc(%ebp),%eax                          eax = ebp[-12]
0x08048849 <phase_3+51>:    mov    -0x24(%ebp,%eax,4),%eax                  {magic}
0x0804884d <phase_3+55>:    mov    %eax,%ebx                                ebx = eax
0x0804884f <phase_3+57>:    mov    -0xc(%ebp),%eax                          eax = ebp[-12]
0x08048852 <phase_3+60>:    sub    $0x1,%eax                                 eax -= 1
0x08048855 <phase_3+63>:    mov    -0x24(%ebp,%eax,4),%eax                  {magic}
0x08048859 <phase_3+67>:    mov    %eax,%edx                                edx = eax
0x0804885b <phase_3+69>:    mov    0x804a6d8,%eax                           eax = 0x804a6d8
0x08048860 <phase_3+74>:    mov    $0xffffffff,%ecx                      ecx = 255
0x08048865 <phase_3+79>:    mov    %eax,-0x2c(%ebp)                         ebp[-12] = eax
0x08048868 <phase_3+82>:    mov    $0x0,%eax                                 eax = 0
0x0804886d <phase_3+87>:    cld                         
0x0804886e <phase_3+88>:    mov    -0x2c(%ebp),%edi                         edi = ebp[-12]
0x08048871 <phase_3+91>:    repnz scas %es:(%edi),%al                       {deep magic}
0x08048873 <phase_3+93>:    mov    %ecx,%eax                                eax = ecx
0x08048875 <phase_3+95>:    not    %eax                                     eax = -eax
0x08048877 <phase_3+97>:    sub    $0x1,%eax                                 eax -= 1
0x0804887a <phase_3+100>:   imul   %edx,%eax                                eax *= edx
0x0804887d <phase_3+103>:   cmp    %eax,%ebx                                if (eax != ebx)
0x0804887f <phase_3+105>:   je     0x8048886 <phase_3+112>                      explode_bomb
0x08048881 <phase_3+107>:   call   0x8048fec <explode_bomb>                 else
0x08048886 <phase_3+112>:   addl   $0x1,-0xc(%ebp)                           ebp[-12] += 1
0x0804888a <phase_3+116>:   cmpl   $0x5,-0xc(%ebp)
0x0804888e <phase_3+120>:   jle    0x8048846 <phase_3+48>               }
0x08048890 <phase_3+122>:   add    $0x30,%esp
0x08048893 <phase_3+125>:   pop    %ebx
0x08048894 <phase_3+126>:   pop    %edi
0x08048895 <phase_3+127>:   pop    %ebp
0x08048896 <phase_3+128>:   ret

I am at least a little bit (though not a lot) confident in most of this; the lines that I'm absolutely sure are wrong are the three lines currently marked as "magic" -- phase_3+51, phase_3+63, and phase_3+91 (the two mov lines with weird syntax and the repnz). I haven't seen either syntax around much and I can't figure out what search terms to use to look them up.

Any general (and/or scathing) critiques of my attempt at this? Obvious places where I'm going off the rails? Obviously, since this is homework I don't need someone to give me the answer; I just want to know if my interpretation is generally sound (and what those three lines mean that I'm baffled by).

Thanks much for any help!

*EDIT***

The read_six_numbers function disassembles as follows:

0x08048d2c <read_six_numbers+0>:    push   %ebp
0x08048d2d <read_six_numbers+1>:    mov    %esp,%ebp
0x08048d2f <read_six_numbers+3>:    push   %esi
0x08048d30 <read_six_numbers+4>:    push   %ebx
0x08048d31 <read_six_numbers+5>:    sub    $0x30,%esp
0x08048d34 <read_six_numbers+8>:    mov    0xc(%ebp),%eax
0x08048d37 <read_six_numbers+11>:   add    $0x14,%eax
0x08048d3a <read_six_numbers+14>:   mov    0xc(%ebp),%edx
0x08048d3d <read_six_numbers+17>:   add    $0x10,%edx
0x08048d40 <read_six_numbers+20>:   mov    0xc(%ebp),%ecx
0x08048d43 <read_six_numbers+23>:   add    $0xc,%ecx
0x08048d46 <read_six_numbers+26>:   mov    0xc(%ebp),%ebx
0x08048d49 <read_six_numbers+29>:   add    $0x8,%ebx
0x08048d4c <read_six_numbers+32>:   mov    0xc(%ebp),%esi
0x08048d4f <read_six_numbers+35>:   add    $0x4,%esi
0x08048d52 <read_six_numbers+38>:   mov    %eax,0x1c(%esp)
0x08048d56 <read_six_numbers+42>:   mov    %edx,0x18(%esp)
0x08048d5a <read_six_numbers+46>:   mov    %ecx,0x14(%esp)
0x08048d5e <read_six_numbers+50>:   mov    %ebx,0x10(%esp)
0x08048d62 <read_six_numbers+54>:   mov    %esi,0xc(%esp)
0x08048d66 <read_six_numbers+58>:   mov    0xc(%ebp),%eax
0x08048d69 <read_six_numbers+61>:   mov    %eax,0x8(%esp)
0x08048d6d <read_six_numbers+65>:   movl   $0x804965d,0x4(%esp)
0x08048d75 <read_six_numbers+73>:   mov    0x8(%ebp),%eax
0x08048d78 <read_six_numbers+76>:   mov    %eax,(%esp)
0x08048d7b <read_six_numbers+79>:   call   0x80485a4 <sscanf@plt>
0x08048d80 <read_six_numbers+84>:   mov    %eax,-0xc(%ebp)
0x08048d83 <read_six_numbers+87>:   cmpl   $0x5,-0xc(%ebp)
0x08048d87 <read_six_numbers+91>:   jg     0x8048d8e <read_six_numbers+98>
0x08048d89 <read_six_numbers+93>:   call   0x8048fec <explode_bomb>
0x08048d8e <read_six_numbers+98>:   add    $0x30,%esp
0x08048d91 <read_six_numbers+101>:  pop    %ebx
0x08048d92 <read_six_numbers+102>:  pop    %esi
0x08048d93 <read_six_numbers+103>:  pop    %ebp
0x08048d94 <read_six_numbers+104>:  ret

ninjalj · Accepted Answer

mov    -0x24(%ebp,%eax,4),%eax

The above instruction is accessing an element of an array. This is called SIB addressing in x86, for Scale, Index, Base. There is also an Offset component. The array is based at an address determined by the Base register (EBP here) plus an offset (when using a frame pointer, local variables, including arrays, are addressed as an offset from the frame pointer). The element number is at the Index register (EAX here). The size of each element is determined by the Scale (4 here).

mov    0x804a6d8,%eax
mov    $0xffffffff,%ecx
mov    %eax,-0x2c(%ebp)
mov    $0x0,%eax
cld                         
mov    -0x2c(%ebp),%edi
repnz scas %es:(%edi),%al
mov    %ecx,%eax
not    %eax
sub    $0x1,%eax

This is just strlen(0x805a6d8). ES:EDI points to a string to scan (compare agains a reference byte) at 0x804a6d8. AL contains the character to scan for: 0 - ASCII NUL. cld sets the direction for the scan: ascending (std would make the scan descending). ECX is initialized to ~0 = -1: all bits 1. repnz repeats the scas (SCAN STRING) instruction decrementing ECX while ECX is not zero (which will not happen since ECX is big enough to prevent that) and the scan is not successful (NZ, while the scan (compare between the string and the reference AL) didn't set the zero flag). After that, ECX contains -1-(steps in the scan). NOT makes that (steps in the scan). SUB makes that (steps in the scan) - 1 = (length of string not including the terminating NUL). Also explained at http://www.int80h.org/strlen/.

Translating assembly to pseudocode

Tags:

c

assembly

reverse-engineering

pseudocode

disassembly

rosalindwills

1 Answers

ninjalj

Recent Activity

Donate For Us

Translating assembly to pseudocode

Tags:

c

assembly

reverse-engineering

pseudocode

disassembly

rosalindwills

1 Answers

ninjalj

Related questions

Recent Activity

Donate For Us