Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Counting character frequencies in an array of characters - x86 Assembly

Tags:

x86

assembly

I'm trying to count the occurrences of characters in a string. My code is below:

data segment 'DSEG'
    text        db  "This is a sentence.",0     ; string
    textSize    dw  $ - text - 1                ; size of string, - 1 to account for null-termination character
    freqTable   dd  256 DUP(0)
ends 'DSEG'

code segment 'CSEG'
start:                          
mov ax, data        ; set segment registers
mov ds, ax
mov es, ax
;---------------------------------------

sub cx, cx
mov cx, textSize        ; number of times to loop
L1:
    mov ax, [OFFSET text + cx - 1]  ; loop from back using cx, put character in ax
    inc [OFFSET freqTable + 4*ax]   ; increment table's index at the ascii value of character
    LOOP L1

;---------------------------------------
mov ax, 4c00h       ; return to OS
int 21h

ends 'CSEG'
end start           ; set entry point

I made an array of DWORDS where each index would represent a character. I then loop through the string and tried to increment the array at the ascii value of each character.

However, I get a wrong parameters error when I try to increment in the loop. I'm not sure what is causing this error. I am guessing I can't just increment the way I'm trying to. How do I properly create the frequency table? Am I missing something small?

like image 723
aanrv Avatar asked May 21 '15 20:05

aanrv


2 Answers

X86 doesn't allow for memory addressing through use of the AX and CX registers as you did. You can however use EAX and ECX for this purpose.
Troughout next code snippet bits EAX[8,31] and ECX[16,31] will remain zero.

;---------------------------------------
 xor  eax, eax
 xor  ecx, ecx
 mov  cx, textSize        ; number of times to loop
L1:
 mov  al, [OFFSET text + ecx - 1]
 inc  [OFFSET freqTable + eax*4]
 loop L1
 ;---------------------------------------
like image 149
Sep Roland Avatar answered Oct 08 '22 23:10

Sep Roland


To help you understand how to count characters I created next little program with EMU8086 (compatible with your assembly) : the program ask the user for a filename, open the file, read all characters and count them, and close the file.

Next image shows how it works : there is an array of frequencies ("freq_array") with 256 positions. Each position is the counter of the corresponding char, for example, the position 65 is the counter for 'A' (chr(65)).

enter image description here

Everytime one char is read from file, the char itself is used as offset to reach its counter. For example, if the char 48 ('0') is read from file, the number 48 is added to the array offset (offset + 48), and that position is incremented. When the file ends, all its chars have been counted.

Now the code :

.model small
.stack 100h

;-----------------------------------------

.data

freq_array   dw 256 dup(0) ;ARRAY OF FREQUENCIES OF EACH ASCII CHARACTER. 

msj          db 13,10,'Enter name of file: $'

filename     db 99        ;MAX NUMBER OF CHARACTERS ALLOWED (98).
             db ?         ;LENGTH (NUMBER OF CHARACTERS ENTERED BY USER).
             db 99 dup(0) ;CHARACTERS ENTERED BY USER. END WITH CHR(13).

filehandler  dw ?         ;FILE HANDLER.

the_char     db ?         ;CHAR READ FROM FILE.

;-----------------------------------------

.code
start:

;INITIALIZE DATA SEGMENT.
  mov  ax, @data
  mov  ds, ax                 

  call get_source_file        ;GET FILE NAME.
  call count_chars            ;FILL FREQ_ARRAY WITH FREQUENCIES OF CHARS.

;WAIT FOR ANY KEY.    
  mov  ah, 7
  int  21h

;FINISH PROGRAM.
  mov  ax, 4c00h
  int  21h

;-----------------------------------------

get_source_file proc
;DISPLAY MESSAGE.
  mov dx, offset msj
  mov ah, 9
  int 21h      

;CAPTURE FILENAME FROM KEYBOARD.                                    
  mov ah, 0Ah
  mov dx, offset filename
  int 21h                

;CAPTURED STRING ENDS WITH CHR(13), BUT FILES REQUIRE
;THE FILENAME TO END WITH CHR(0), SO LET'S CHANGE IT.
  mov si, offset filename + 1 ;STRING LENGTH.
  mov cl, [ si ]        ;MOVE LENGTH TO CL.
  mov ch, 0             ;CLEAR CH TO USE CX. 
  inc cx                ;ONE MORE BYTE TO REACH CHR(13).
  add si, cx            ;NOW SI POINTS TO CHR(13).
  mov al, 0
  mov [ si ], al        ;REPLACE CHR(13) BY 0.

  ret
get_source_file endp

;-----------------------------------------
;READ ALL CHARACTERS FROM FILE INCREASING THE COUNTER OF
;EACH CHARACTER IN THE ARRAY OF FREQUENCIES. EACH CHARACTER
;IS USED AS THE OFFSET OF ITS OWN COUNTER, EXAMPLE: THE
;COUNTER FOR 'A' IS THE POSITION 65 OF FREQ_ARRAY.

count_chars proc
;OPEN FILE.
  mov  ah, 3dh          ;SERVICE TO OPEN FILE.
  mov  al, 0            ;OPEN AS READ ONLY.
  mov  dx, offset filename + 2
  int  21h  
  mov  filehandler, ax ;NECESSARY FOR OPERATIONS ON FILE.

;COUNT CHARACTERS.
reading:  
;READ ONE CHAR FROM FILE.
  mov  ah, 3fh          ;SERVICE TO READ FROM FILE.
  mov  bx, filehandler
  mov  cx, 1            ;HOW MANY BYTES TO READ.
  mov  dx, offset the_char ;WHERE TO STORE THE READ BYTES.  
  int  21h              

;CHECK END OF FILE.
  cmp  ax, 0
  je   end_reading      ;IF READ ZERO BYTES, FINISH.

;INCREASE COUNTER. THE CHAR ITSELF IS BEEN USED AS INDEX: THE
;COUNTER FOR CHAR 65 ('A') IS IN THE 65th POSITION OF THE ARRAY.
  mov  si, offset freq_array
  mov  al, the_char     ;USE CHAR AS OFFSET OF ITS OWN COUNTER.
  mov  ah, 0            ;CLEAR AH TO USE AX.
  shl  ax, 1            ;AX * 2, BECAUSE EVERY COUNTER IS 2 BYTES.
  add  si, ax           ;SI POINTS TO COUNTER POSITION.
  inc  [ word ptr si ]  ;INCREMENT COUNTER FOR CURRENT CHAR.
  jmp  reading          ;REPEAT PROCESS.

end_reading:           
;CLOSE FILE.
  mov  ah, 3eh          ;SERVICE TO CLOSE FILE.
  mov  bx, filehandler
  int  21h

  ret
count_chars endp

;-----------------------------------------

end start

Hope this help you.

This is 16 bits, because the array is DW. To make it compatible with 32 bits (array DD), change next lines:

freq_array   dd 256 dup(0)

shl  ax, 2             ;AX * 4, BECAUSE EVERY COUNTER IS 4 BYTES.

inc  [ dword ptr si ]  ;INCREMENT COUNTER FOR CURRENT CHAR.
like image 30
Jose Manuel Abarca Rodríguez Avatar answered Oct 08 '22 23:10

Jose Manuel Abarca Rodríguez