Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

JavaScript function to convert UTF8 string between fullwidth and halfwidth forms

EDIT: Thanks to GOTO 0, I now know exactly what I my question is called.

I need a JavaScript function to convert from UTF-8 fullwidth form to halfwidth form.

like image 291
xpt Avatar asked Dec 10 '13 05:12

xpt


2 Answers

Apperently, you want to convert halfwidth and fullwidth form characters to their equivalent basic latin forms. If this is correct, you can do a replacement using a regular expression. Something like this should work:

var x = "!abc ABC!";
var y = x.replace(
    /[\uff01-\uff5e]/g,
    function(ch) { return String.fromCharCode(ch.charCodeAt(0) - 0xfee0); }
    );

Where x is your input string and y is the output.

like image 168
GOTO 0 Avatar answered Nov 09 '22 22:11

GOTO 0


Year 2018 answer

Many years later – and it’s still impossible to find on the Internet a function that does this. So I wrote mine. (Nearly learned Japanese and Korean to get to this point.)

Simple version

Latin range only.

var shiftCharCode = Δ => c => String.fromCharCode(c.charCodeAt(0) + Δ);
var toFullWidth = str => str.replace(/[!-~]/g, shiftCharCode(0xFEE0));
var toHalfWidth = str => str.replace(/[!-~]/g, shiftCharCode(-0xFEE0));

Complete version

Let me know if I missed any character.

(function () {
    let charsets = {
        latin: {halfRE: /[!-~]/g, fullRE: /[!-~]/g, delta: 0xFEE0},
        hangul1: {halfRE: /[ᄀ-ᄒ]/g, fullRE: /[ᆨ-ᇂ]/g, delta: -0xEDF9},
        hangul2: {halfRE: /[ᅡ-ᅵ]/g, fullRE: /[ᅡ-ᅵ]/g, delta: -0xEE61},
        kana: {delta: 0,
            half: "。「」、・ヲァィゥェォャュョッーアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン゙゚", 
            full: "。「」、・ヲァィゥェォャュョッーアイウエオカキクケコサシ" + 
                "スセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン゛゜"},
        extras: {delta: 0,
            half: "¢£¬¯¦¥₩\u0020|←↑→↓■°", 
            full: "¢£¬ ̄¦¥₩\u3000│←↑→↓■○"}
    };
    let toFull = set => c => set.delta ? 
        String.fromCharCode(c.charCodeAt(0) + set.delta) : 
        [...set.full][[...set.half].indexOf(c)];
    let toHalf = set => c => set.delta ? 
        String.fromCharCode(c.charCodeAt(0) - set.delta) : 
        [...set.half][[...set.full].indexOf(c)];
    let re = (set, way) => set[way + "RE"] || new RegExp("[" + set[way] + "]", "g");
    let sets = Object.keys(charsets).map(i => charsets[i]);
    window.toFullWidth = str0 => 
        sets.reduce((str,set) => str.replace(re(set, "half"), toFull(set)), str0);
    window.toHalfWidth = str0 => 
        sets.reduce((str,set) => str.replace(re(set, "full"), toHalf(set)), str0);
})();

/* Example starts here: */
var set = prompt("Enter a couple of comma-separated strings (half or full-width):", 
    ["aouäöü123", "'\"?:", "¢£¥₩↑→", "コンニチハ", "ᄀ까ᅢ"].join()).split(",");
var steps = [set, set.map(toFullWidth), set.map(toFullWidth).map(toHalfWidth)];
var tdHTML = str => `<td>${str}</td>`;
var stepsHTML = steps.map(step => step.map(tdHTML).join(""));
var rows = document.getElementsByTagName("tr");
[...rows].forEach((row,i) => row.insertAdjacentHTML("beforeEnd", stepsHTML[i]));
th, td {border: 1px solid lightgrey; padding: 0.2em;}
th {text-align: left;}
table {border-collapse: collapse;}
<table>
    <tr><th scope="row">Input:</th></tr>
    <tr><th scope="row">Full-width:</th></tr>
    <tr><th scope="row">Half-width:</th></tr>
</table>
like image 28
7vujy0f0hy Avatar answered Nov 09 '22 22:11

7vujy0f0hy