Given either a unicode symbol as a String
or its XML/HTML entities, how could one generate its Unicode number? For example if you're given the string "෴"
, and you could generate its HTML code (෴
), how could you then generate its Unicode number (U+0DF4
)?
I am currently generating the HTML entities by using the CFStringTransform
API and using kCFStringTransformToXMLHex
for the transform. But there isn't a transform for the unicode number itself.
update: Xcode 11.4 • Swift 5.2
extension String {
var data: Data { .init(utf8) }
var html2AttributedString: NSAttributedString? {
do {
return try NSAttributedString(data: data, options: [.documentType: NSAttributedString.DocumentType.html, .characterEncoding: String.Encoding.utf8.rawValue], documentAttributes: nil)
} catch {
print(error)
return nil
}
}
var html2String: String { html2AttributedString?.string ?? "" }
var unicodes: [UInt32] { unicodeScalars.map(\.value) }
}
let str = "<span>€€</span>".html2String // "€€"
str.unicodes // [8364, 8364]
extension StringTransform {
static let toUnicodeHex = Self("Hex/Unicode")
static let toJavaHex = Self("Hex/Java")
static let toPerlHex = Self("Hex/Perl")
}
extension String {
var convertedToUnicodeHex: String { applyingTransform(.toUnicodeHex, reverse: false) ?? "" }
var convertedToJavaHex: String { applyingTransform(.toJavaHex, reverse: false) ?? "" }
var convertedToXMLHex: String { applyingTransform(.toXMLHex, reverse: false) ?? "" }
var convertedToPerlHex: String { applyingTransform(.toPerlHex, reverse: false) ?? "" }
}
"෴".convertedToUnicodeHex // U+0DF4
"෴".convertedToJavaHex // \u0DF4
"෴".convertedToXMLHex // ෴
"෴".convertedToPerlHex // \x{DF4}
"෴".unicodes // [3572]
0x0DF4 // 3572
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With