Javascript: Unicode string to hex

People also ask

How do I encode a hex?

Hex encoding is performed by converting the 8 bit data to 2 hex characters. The hex characters are then stored as the two byte string representation of the characters. Often, some kind of separator is used to make the encoded data easier for human reading.

How do you use hexadecimal in JavaScript?

Uses of Hexadecimal Numbers in JavaScriptJavaScript supports the use of hexadecimal notation in place of any integer, but not decimals. As an example, the number 2514 in hex is 0x9D2, but there is no language-supported way of representing 25.14 as a hex number.

Remember that a JavaScript code unit is 16 bits wide. Therefore the hex string form will be 4 digits per code unit.

usage:

var str = "\u6f22\u5b57"; // "\u6f22\u5b57" === "漢字"
alert(str.hexEncode().hexDecode());

String to hex form:

String.prototype.hexEncode = function(){
    var hex, i;

    var result = "";
    for (i=0; i<this.length; i++) {
        hex = this.charCodeAt(i).toString(16);
        result += ("000"+hex).slice(-4);
    }

    return result
}

Back again:

String.prototype.hexDecode = function(){
    var j;
    var hexes = this.match(/.{1,4}/g) || [];
    var back = "";
    for(j = 0; j<hexes.length; j++) {
        back += String.fromCharCode(parseInt(hexes[j], 16));
    }

    return back;
}

Here is a tweak of McDowell's algorithm that doesn't pad the result:

  function toHex(str) {
    var result = '';
    for (var i=0; i<str.length; i++) {
      result += str.charCodeAt(i).toString(16);
    }
    return result;
  }

It depends on what encoding you use. If you want to convert utf-8 encoded hex to string, use this:

function fromHex(hex,str){
  try{
    str = decodeURIComponent(hex.replace(/(..)/g,'%$1'))
  }
  catch(e){
    str = hex
    console.log('invalid hex input: ' + hex)
  }
  return str
}

For the other direction use this:

function toHex(str,hex){
  try{
    hex = unescape(encodeURIComponent(str))
    .split('').map(function(v){
      return v.charCodeAt(0).toString(16)
    }).join('')
  }
  catch(e){
    hex = str
    console.log('invalid text input: ' + str)
  }
  return hex
}

A more up to date solution, for encoding:

// This is the same for all of the below, and
// you probably won't need it except for debugging
// in most cases.
function bytesToHex(bytes) {
  return Array.from(
    bytes,
    byte => byte.toString(16).padStart(2, "0")
  ).join("");
}

// You almost certainly want UTF-8, which is
// now natively supported:
function stringToUTF8Bytes(string) {
  return new TextEncoder().encode(string);
}

// But you might want UTF-16 for some reason.
// .charCodeAt(index) will return the underlying
// UTF-16 code-units (not code-points!), so you
// just need to format them in whichever endian order you want.
function stringToUTF16Bytes(string, littleEndian) {
  const bytes = new Uint8Array(string.length * 2);
  // Using DataView is the only way to get a specific
  // endianness.
  const view = new DataView(bytes.buffer);
  for (let i = 0; i != string.length; i++) {
    view.setUint16(i, string.charCodeAt(i), littleEndian);
  }
  return bytes;
}

// And you might want UTF-32 in even weirder cases.
// Fortunately, iterating a string gives the code
// points, which are identical to the UTF-32 encoding,
// though you still have the endianess issue.
function stringToUTF32Bytes(string, littleEndian) {
  const codepoints = Array.from(string, c => c.codePointAt(0));
  const bytes = new Uint8Array(codepoints.length * 4);
  // Using DataView is the only way to get a specific
  // endianness.
  const view = new DataView(bytes.buffer);
  for (let i = 0; i != codepoints.length; i++) {
    view.setUint32(i, codepoints[i], littleEndian);
  }
  return bytes;
}

Examples:

bytesToHex(stringToUTF8Bytes("hello 漢字 👍"))
// "68656c6c6f20e6bca2e5ad9720f09f918d"
bytesToHex(stringToUTF16Bytes("hello 漢字 👍", false))
// "00680065006c006c006f00206f225b570020d83ddc4d"
bytesToHex(stringToUTF16Bytes("hello 漢字 👍", true))
// "680065006c006c006f002000226f575b20003dd84ddc"
bytesToHex(stringToUTF32Bytes("hello 漢字 👍", false))
// "00000068000000650000006c0000006c0000006f0000002000006f2200005b57000000200001f44d"
bytesToHex(stringToUTF32Bytes("hello 漢字 👍", true))
// "68000000650000006c0000006c0000006f00000020000000226f0000575b0000200000004df40100"

For decoding, it's generally a lot simpler, you just need:

function hexToBytes(hex) {
    const bytes = new Uint8Array(hex.length / 2);
    for (let i = 0; i !== bytes.length; i++) {
        bytes[i] = parseInt(hex.substr(i * 2, 2), 16);
    }
    return bytes;
}

then use the encoding parameter of TextDecoder:

// UTF-8 is default
new TextDecoder().decode(hexToBytes("68656c6c6f20e6bca2e5ad9720f09f918d"));
// but you can also use:
new TextDecoder("UTF-16LE").decode(hexToBytes("680065006c006c006f002000226f575b20003dd84ddc"))
new TextDecoder("UTF-16BE").decode(hexToBytes("00680065006c006c006f00206f225b570020d83ddc4d"));
// "hello 漢字 👍"

Here's the list of allowed encoding names: https://www.w3.org/TR/encoding/#names-and-labels

You might notice UTF-32 is not on that list, which is a pain, so:

function bytesToStringUTF32(bytes, littleEndian) {
  const view = new DataView(bytes.buffer);
  const codepoints = new Uint32Array(view.byteLength / 4);
  for (let i = 0; i !== codepoints.length; i++) {
    codepoints[i] = view.getUint32(i * 4, littleEndian);
  }
  return String.fromCodePoint(...codepoints);
}

Then:

bytesToStringUTF32(hexToBytes("00000068000000650000006c0000006c0000006f0000002000006f2200005b57000000200001f44d"), false)
bytesToStringUTF32(hexToBytes("68000000650000006c0000006c0000006f00000020000000226f0000575b0000200000004df40100"), true)
// "hello 漢字 👍"

how do you get "\u6f22\u5b57" from 漢字 in JavaScript?

These are JavaScript Unicode escape sequences e.g. \u12AB. To convert them, you could iterate over every code unit in the string, call .toString(16) on it, and go from there.

However, it is more efficient to also use hexadecimal escape sequences e.g. \xAA in the output wherever possible.

Also note that ASCII symbols such as A, b, and - probably don’t need to be escaped.

I’ve written a small JavaScript library that does all this for you, called jsesc. It has lots of options to control the output.

Here’s an online demo of the tool in action: http://mothereff.in/js-escapes#1%E6%BC%A2%E5%AD%97

Your question was tagged as utf-8. Reading the rest of your question, UTF-8 encoding/decoding didn’t seem to be what you wanted here, but in case you ever need it: use utf8.js (online demo).

Here you go. :D

"漢字".split("").reduce((hex,c)=>hex+=c.charCodeAt(0).toString(16).padStart(4,"0"),"")

"6f225b57"

for non unicode

"hi".split("").reduce((hex,c)=>hex+=c.charCodeAt(0).toString(16).padStart(2,"0"),"")

"6869"

ASCII (utf-8) binary HEX string to string

"68656c6c6f20776f726c6421".match(/.{1,2}/g).reduce((acc,char)=>acc+String.fromCharCode(parseInt(char, 16)),"")

String to ASCII (utf-8) binary HEX string

"hello world!".split("").reduce((hex,c)=>hex+=c.charCodeAt(0).toString(16).padStart(2,"0"),"")

--- unicode ---

String to UNICODE (utf-16) binary HEX string

"hello world!".split("").reduce((hex,c)=>hex+=c.charCodeAt(0).toString(16).padStart(4,"0"),"")

UNICODE (utf-16) binary HEX string to string

"00680065006c006c006f00200077006f0072006c00640021".match(/.{1,4}/g).reduce((acc,char)=>acc+String.fromCharCode(parseInt(char, 16)),"")

Here is my take: these functions convert a UTF8 string to a proper HEX without the extra zeroes padding. A real UTF8 string has characters with 1, 2, 3 and 4 bytes length.

While working on this I found a couple key things that solved my problems:

str.split('') doesn't handle multi-byte characters like emojis correctly. The proper/modern way to handle this is with Array.from(str)
encodeURIComponent() and decodeURIComponent() are great tools to convert between string and hex. They are pretty standard, they handle UTF8 correctly.
(Most) ASCII characters (codes 0 - 127) don't get URI encoded, so they need to handled separately. But c.charCodeAt(0).toString(16) works perfectly for those

    function utf8ToHex(str) {
      return Array.from(str).map(c => 
        c.charCodeAt(0) < 128 ? c.charCodeAt(0).toString(16) : 
        encodeURIComponent(c).replace(/\%/g,'').toLowerCase()
      ).join('');
    },
    function hexToUtf8: function(hex) {
      return decodeURIComponent('%' + hex.match(/.{1,2}/g).join('%'));
    }

Demo: https://jsfiddle.net/lyquix/k2tjbrvq/

Related questions
                            
                                How to debug web workers
                            
                                How to change CSS :root color variables in JavaScript
                            
                                JQuery get all elements by class name
                            
                                Reload an IFRAME without adding to the history
                            
                                'await Unexpected identifier' on Node.js 7.5
                            
                                How to iterate through property names of Javascript object?
                            
                                Are arrow functions faster (more performant, lighter) than ordinary standalone function declaration in v8?
                            
                                Dynamically loading css stylesheet doesn't work on IE
                            
                                how to compare two string dates in javascript?
                            
                                Copying an array of objects into another array in javascript [duplicate]
                            
                                .day() returns wrong day of month with Moment.js
                            
                                ReactJs: Prevent multiple times button press
                            
                                React.js -- How to pass properties object to child component?
                            
                                Open links made by createObjectURL in IE11
                            
                                TypeScript - Take object out of array based on attribute value
                            
                                How to change Gregorian date to Persian date in JavaScript?
                            
                                Javascript event e.which?
                            
                                How to add Google Maps Autocomplete search box?
                            
                                How to go from jQuery to React.js? [closed]
                            
                                fetch - Missing boundary in multipart/form-data POST

Donate For Us

If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!

Donate Us With

Javascript: Unicode string to hex

Tags:

javascript

hex

unicode

utf-8

People also ask

Recent Activity

Donate For Us