python display unicode in html

Question

I'm writing script to export my links and their titles from chrome to html.
Chrome bookmarks stored as json, in utf encoding
Some titles are on Russian therefore they stored like that:
"name": "\u0425\u0430\u0431\u0440\ ..."

import codecs
f = codecs.open("chrome.json","r", "utf-8")
data = f.readlines()

urls = [] # for links
names = [] # for link titles

ind = 0

for i in data:
    if i.find('"url":') != -1:
        urls.append(i.split('"')[3])
        names.append(data[ind-2].split('"')[3])
    ind += 1

fw = codecs.open("chrome.html","w","utf-8")
fw.write("<html><body>
")
for n in names:
    fw.write(n + '<br>')
    # print type(n) # this will return <type 'unicode'> for each url!
fw.write("</body></html>")

Now, in chrome.html I got those displayed as \u0425\u0430\u0431...
How I can turn them back to Russian?
using python 2.5

Edit: Solved!

s = '\u041f\u0440\u0438\u0432\u0435\u0442 world!'
type(s)
<type 'str'>

print s.decode('raw-unicode-escape').encode('utf-8')
Привет world!

That's what I needed, to convert str of \u041f... into unicode.

f = open("chrome.json", "r")
data = f.readlines()
f.close()

urls = [] # for links
names = [] # for link titles

ind = 0

for i in data:
    if i.find('"url":') != -1:
        urls.append(i.split('"')[3])
        names.append(data[ind-2].split('"')[3])
    ind += 1

fw = open("chrome.html","w")
fw.write("<html><body>
")
for n in names:
    fw.write(n.decode('raw-unicode-escape').encode('utf-8') + '<br>')
fw.write("</body></html>")

John Machin · Accepted Answer

By the way, it's not just Russian; non-ASCII characters are quite common in page names. Example:

name=u'Python Programming Language \u2013 Official Website'
url=u'http://www.python.org/'

As an alternative to fragile code like

urls.append(i.split('"')[3])
names.append(data[ind-2].split('"')[3])
# (1) relies on name being 2 lines before url
# (2) fails if there is a `"` in the name
# example: "name": "The \"Fubar\" website",

you could process the input file using the json module. For Python 2.5, you can get simplejson.

Here's a script that emulates yours:

try:
    import json
except ImportError: 
    import simplejson as json
import sys

def convert_file(infname, outfname):

    def explore(folder_name, folder_info):
        for child_dict in folder_info['children']:
            ctype = child_dict.get('type')
            name = child_dict.get('name')
            if ctype == 'url':
                url = child_dict.get('url')
                # print "name=%r url=%r" % (name, url)
                fw.write(name.encode('utf-8') + '<br>
')
            elif ctype == 'folder':
                explore(name, child_dict)
            else:
                print "*** Unexpected ctype=%r ***" % ctype

    f = open(infname, 'rb')
    bmarks = json.load(f)
    f.close()
    fw = open(outfname, 'w')
    fw.write("<html><body>
")
    for folder_name, folder_info in bmarks['roots'].iteritems():
        explore(folder_name, folder_info)
    fw.write("</body></html>")
    fw.close()    

if __name__ == "__main__":
    convert_file(sys.argv[1], sys.argv[2])

Tested using Python 2.5.4 on Windows 7 Pro.

python display unicode in html

Tags:

python

json

unicode

Edit: Solved!

psycat

1 Answers

John Machin

Recent Activity

Donate For Us

python display unicode in html

Tags:

python

json

unicode

**Edit: Solved!**

psycat

1 Answers

John Machin

Related questions

Recent Activity

Donate For Us

Edit: Solved!