Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Python library for converting plain text (ASCII) into GSM 7-bit character set?

Is there a python library for encoding ascii data to 7-bit GSM character set (for sending SMS)?

like image 781
M K Saravanan Avatar asked Mar 16 '10 08:03

M K Saravanan


2 Answers

I got tips from gnibbler's answer. Here is a script I somehow made up after looking at an online converter: http://smstools3.kekekasvi.com/topic.php?id=288, and it works correctly for me. Both encoding and decoding.

#!/usr/bin/env python
# -*- coding: utf-8 -*-

gsm = (u"@£$¥èéùìòÇ\nØø\rÅåΔ_ΦΓΛΩΠΨΣΘΞ\x1bÆæßÉ !\"#¤%&'()*+,-./0123456789:;<=>"
   u"?¡ABCDEFGHIJKLMNOPQRSTUVWXYZÄÖÑÜ`¿abcdefghijklmnopqrstuvwxyzäöñüà")
ext = (u"````````````````````^```````````````````{}`````\\````````````[~]`"
   u"|````````````````````````````````````€``````````````````````````")

def get_encode(currentByte, index, bitRightCount, position, nextPosition, leftShiftCount, bytesLength, bytes):
    if index < 8:
        byte = currentByte >> bitRightCount
        if nextPosition < bytesLength:
            idx2 = bytes[nextPosition]
            byte = byte | ((idx2) << leftShiftCount)
            byte = byte & 0x000000FF
        else:
            byte = byte & 0x000000FF
        return chr(byte).encode('hex').upper()
    return ''

def getBytes(plaintext):
    if type(plaintext) != str:
         plaintext = str(plaintext)
    bytes = []
    for c in plaintext.decode('utf-8'):
        idx = gsm.find(c)
        if idx != -1:
            bytes.append(idx)
        else:
            idx = ext.find(c)
            if idx != -1:
                bytes.append(27)
                bytes.append(idx)
    return bytes

def gsm_encode(plaintext):
    res = ""
    f = -1
    t = 0
    bytes = getBytes(plaintext)
    bytesLength = len(bytes)
    for b in bytes:
        f = f+1
        t = (f%8)+1
        res += get_encode(b, t, t-1, f, f+1, 8-t, bytesLength, bytes)

    return res


def chunks(l, n):
    if n < 1:
        n = 1
    return [l[i:i + n] for i in range(0, len(l), n)]

def gsm_decode(codedtext):
    hexparts = chunks(codedtext, 2)
    number   = 0
    bitcount = 0
    output   = ''
    found_external = False
    for byte in hexparts:
    byte = int(byte, 16);
        # add data on to the end
        number = number + (byte << bitcount)
        # increase the counter
        bitcount = bitcount + 1
        # output the first 7 bits
        if number % 128 == 27:
             '''skip'''
             found_external = True
        else:
            if found_external == True:                
                 character = ext[number % 128]
                 found_external = False
            else:
                 character = gsm[number % 128]
            output = output + character

        # then throw them away
        number = number >> 7
        # every 7th letter you have an extra one in the buffer
        if bitcount == 7:
            if number % 128 == 27:
                '''skip'''
                found_external = True
            else:
                if found_external == True:                
                    character = ext[number % 128]
                    found_external = False
                else:
                    character = gsm[number % 128]
                output = output + character

            bitcount = 0
            number = 0
    return output
like image 132
Jimmy Ilenloa Avatar answered Oct 23 '22 09:10

Jimmy Ilenloa


There is now :)

Thanks to Chad for pointing out that this wasn't quite right

Python2 version

# -*- coding: utf8 -*- 
gsm = (u"@£$¥èéùìòÇ\nØø\rÅåΔ_ΦΓΛΩΠΨΣΘΞ\x1bÆæßÉ !\"#¤%&'()*+,-./0123456789:;<=>"
       u"?¡ABCDEFGHIJKLMNOPQRSTUVWXYZÄÖÑܧ¿abcdefghijklmnopqrstuvwxyzäöñüà")
ext = (u"````````````````````^```````````````````{}`````\\````````````[~]`"
       u"|````````````````````````````````````€``````````````````````````")

def gsm_encode(plaintext):
    res = ""
    for c in plaintext:
        idx = gsm.find(c)
        if idx != -1:
            res += chr(idx)
            continue
        idx = ext.find(c)
        if idx != -1:
            res += chr(27) + chr(idx)
    return res.encode('hex')

print gsm_encode(u"Hello World")

The output is hex. Obviously you can skip that if you want the binary stream

Python3 version

# -*- coding: utf8 -*- 
import binascii
gsm = ("@£$¥èéùìòÇ\nØø\rÅåΔ_ΦΓΛΩΠΨΣΘΞ\x1bÆæßÉ !\"#¤%&'()*+,-./0123456789:;<=>?"
       "¡ABCDEFGHIJKLMNOPQRSTUVWXYZÄÖÑܧ¿abcdefghijklmnopqrstuvwxyzäöñüà")
ext = ("````````````````````^```````````````````{}`````\\````````````[~]`"
       "|````````````````````````````````````€``````````````````````````")

def gsm_encode(plaintext):
    res = ""
    for c in plaintext:
        idx = gsm.find(c);
        if idx != -1:
            res += chr(idx)
            continue
        idx = ext.find(c)
        if idx != -1:
            res += chr(27) + chr(idx)
    return binascii.b2a_hex(res.encode('utf-8'))

print(gsm_encode("Hello World"))
like image 37
John La Rooy Avatar answered Oct 23 '22 09:10

John La Rooy