This block ranges from U+0080 to U+00FF, contains 128 characters and includes the C1 controls, Latin-1 punctuation and symbols, 30 pairs of majuscule and minuscule accented Latin characters and 2 mathematical operators.
btoa() The btoa() method creates a Base64-encoded ASCII string from a binary string (i.e., a string in which each character in the string is treated as a byte of binary data).
Definition and Usage The btoa() method encodes a string in base-64. The btoa() method uses the "A-Z", "a-z", "0-9", "+", "/" and "=" characters to encode the string.
In order to encode/decode a string in JavaScript, We are using built-in functions provided by JavaScript. btoa(): This method encodes a string in base-64 and uses the “A-Z”, “a-z”, “0-9”, “+”, “/” and “=” characters to encode the provided string.
If you have UTF8, use this (actually works with SVG source), like:
btoa(unescape(encodeURIComponent(str)))
example:
var imgsrc = 'data:image/svg+xml;base64,' + btoa(unescape(encodeURIComponent(markup)));
var img = new Image(1, 1); // width, height values are optional params
img.src = imgsrc;
If you need to decode that base64, use this:
var str2 = decodeURIComponent(escape(window.atob(b64)));
console.log(str2);
Example:
var str = "äöüÄÖÜçéèñ";
var b64 = window.btoa(unescape(encodeURIComponent(str)))
console.log(b64);
var str2 = decodeURIComponent(escape(window.atob(b64)));
console.log(str2);
Note: if you need to get this to work in mobile-safari, you might need to strip all the white-space from the base64 data...
function b64_to_utf8( str ) {
str = str.replace(/\s/g, '');
return decodeURIComponent(escape(window.atob( str )));
}
2017 Update
This problem has been bugging me again.
The simple truth is, atob doesn't really handle UTF8-strings - it's ASCII only.
Also, I wouldn't use bloatware like js-base64.
But webtoolkit does have a small, nice and very maintainable implementation:
/**
*
* Base64 encode / decode
* http://www.webtoolkit.info
*
**/
var Base64 = {
// private property
_keyStr: "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="
// public method for encoding
, encode: function (input)
{
var output = "";
var chr1, chr2, chr3, enc1, enc2, enc3, enc4;
var i = 0;
input = Base64._utf8_encode(input);
while (i < input.length)
{
chr1 = input.charCodeAt(i++);
chr2 = input.charCodeAt(i++);
chr3 = input.charCodeAt(i++);
enc1 = chr1 >> 2;
enc2 = ((chr1 & 3) << 4) | (chr2 >> 4);
enc3 = ((chr2 & 15) << 2) | (chr3 >> 6);
enc4 = chr3 & 63;
if (isNaN(chr2))
{
enc3 = enc4 = 64;
}
else if (isNaN(chr3))
{
enc4 = 64;
}
output = output +
this._keyStr.charAt(enc1) + this._keyStr.charAt(enc2) +
this._keyStr.charAt(enc3) + this._keyStr.charAt(enc4);
} // Whend
return output;
} // End Function encode
// public method for decoding
,decode: function (input)
{
var output = "";
var chr1, chr2, chr3;
var enc1, enc2, enc3, enc4;
var i = 0;
input = input.replace(/[^A-Za-z0-9\+\/\=]/g, "");
while (i < input.length)
{
enc1 = this._keyStr.indexOf(input.charAt(i++));
enc2 = this._keyStr.indexOf(input.charAt(i++));
enc3 = this._keyStr.indexOf(input.charAt(i++));
enc4 = this._keyStr.indexOf(input.charAt(i++));
chr1 = (enc1 << 2) | (enc2 >> 4);
chr2 = ((enc2 & 15) << 4) | (enc3 >> 2);
chr3 = ((enc3 & 3) << 6) | enc4;
output = output + String.fromCharCode(chr1);
if (enc3 != 64)
{
output = output + String.fromCharCode(chr2);
}
if (enc4 != 64)
{
output = output + String.fromCharCode(chr3);
}
} // Whend
output = Base64._utf8_decode(output);
return output;
} // End Function decode
// private method for UTF-8 encoding
,_utf8_encode: function (string)
{
var utftext = "";
string = string.replace(/\r\n/g, "\n");
for (var n = 0; n < string.length; n++)
{
var c = string.charCodeAt(n);
if (c < 128)
{
utftext += String.fromCharCode(c);
}
else if ((c > 127) && (c < 2048))
{
utftext += String.fromCharCode((c >> 6) | 192);
utftext += String.fromCharCode((c & 63) | 128);
}
else
{
utftext += String.fromCharCode((c >> 12) | 224);
utftext += String.fromCharCode(((c >> 6) & 63) | 128);
utftext += String.fromCharCode((c & 63) | 128);
}
} // Next n
return utftext;
} // End Function _utf8_encode
// private method for UTF-8 decoding
,_utf8_decode: function (utftext)
{
var string = "";
var i = 0;
var c, c1, c2, c3;
c = c1 = c2 = 0;
while (i < utftext.length)
{
c = utftext.charCodeAt(i);
if (c < 128)
{
string += String.fromCharCode(c);
i++;
}
else if ((c > 191) && (c < 224))
{
c2 = utftext.charCodeAt(i + 1);
string += String.fromCharCode(((c & 31) << 6) | (c2 & 63));
i += 2;
}
else
{
c2 = utftext.charCodeAt(i + 1);
c3 = utftext.charCodeAt(i + 2);
string += String.fromCharCode(((c & 15) << 12) | ((c2 & 63) << 6) | (c3 & 63));
i += 3;
}
} // Whend
return string;
} // End Function _utf8_decode
}
https://www.fileformat.info/info/unicode/utf8.htm
For any character equal to or below 127 (hex 0x7F), the UTF-8 representation is one byte. It is just the lowest 7 bits of the full unicode value. This is also the same as the ASCII value.
For characters equal to or below 2047 (hex 0x07FF), the UTF-8 representation is spread across two bytes. The first byte will have the two high bits set and the third bit clear (i.e. 0xC2 to 0xDF). The second byte will have the top bit set and the second bit clear (i.e. 0x80 to 0xBF).
For all characters equal to or greater than 2048 but less that 65535 (0xFFFF), the UTF-8 representation is spread across three bytes.
We don't have to reinvent the wheel. Just use a library to save the time and headache.
https://github.com/dankogai/js-base64 is good and I confirm it supports unicode very well.
Base64.encode('dankogai'); // ZGFua29nYWk=
Base64.encode('小飼弾'); // 5bCP6aO85by+
Base64.encodeURI('小飼弾'); // 5bCP6aO85by-
Base64.decode('ZGFua29nYWk='); // dankogai
Base64.decode('5bCP6aO85by+'); // 小飼弾
// note .decodeURI() is unnecessary since it accepts both flavors
Base64.decode('5bCP6aO85by-'); // 小飼弾
Using btoa
with unescape
and encodeURIComponent
didn't work for me. Replacing all the special characters with XML/HTML entities and then converting to the base64 representation was the only way to solve this issue for me. Some code:
base64 = btoa(str.replace(/[\u00A0-\u2666]/g, function(c) {
return '&#' + c.charCodeAt(0) + ';';
}));
I just thought I should share how I actually solved the problem and why I think this is the right solution (provided you don't optimize for old browser).
data: ...
)var blob = new Blob(
// I'm using page innerHTML as data
// note that you can use the array
// to concatenate many long strings EFFICIENTLY
[document.body.innerHTML],
// Mime type is important for data url
{type : 'text/html'}
);
// This FileReader works asynchronously, so it doesn't lag
// the web application
var a = new FileReader();
a.onload = function(e) {
// Capture result here
console.log(e.target.result);
};
a.readAsDataURL(blob);
Apart from obvious solution - opening new window with your dataURL as URL you can do two other things.
File saver can create actual fileSave dialog with predefined filename. It can also fallback to normal dataURL approach.
URL.createObjectURL
This is great for reusing base64 encoded data. It creates a short URL for your dataURL:
console.log(URL.createObjectURL(blob));
//Prints: blob:http://stackoverflow.com/7c18953f-f5f8-41d2-abf5-e9cbced9bc42
Don't forget to use the URL including the leading blob
prefix. I used document.body
again:
You can use this short URL as AJAX target, <script>
source or <a>
href location. You're responsible for destroying the URL though:
URL.revokeObjectURL('blob:http://stackoverflow.com/7c18953f-f5f8-41d2-abf5-e9cbced9bc42')
As an complement to Stefan Steiger answer: (as it doesn't look nice as a comment)
Extending String prototype:
String.prototype.b64encode = function() {
return btoa(unescape(encodeURIComponent(this)));
};
String.prototype.b64decode = function() {
return decodeURIComponent(escape(atob(this)));
};
Usage:
var str = "äöüÄÖÜçéèñ";
var encoded = str.b64encode();
console.log( encoded.b64decode() );
NOTE:
As stated in the comments, using unescape
is not recommended as it may be removed in the future:
Warning: Although unescape() is not strictly deprecated (as in "removed from the Web standards"), it is defined in Annex B of the ECMA-262 standard, whose introduction states: … All of the language features and behaviours specified in this annex have one or more undesirable characteristics and in the absence of legacy usage would be removed from this specification.
Note: Do not use unescape to decode URIs, use decodeURI or decodeURIComponent instead.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With