I am trying to detect if a text input field has any character that doesn't belong to the GSM 7 bit alphabet. The table with the characters is here http://www.dreamfabric.com/sms/default_alphabet.html
After a lot of searching I found this (What regular expression do I need to check for some non-latin characters?) that its pretty close to what I want to accomplish because It detects Non latin characters. How can I alter the regular expression to include the GSM 7 bit alphabet?
<!DOCTYPE HTML>
<html lang="en-US">
<head>
<meta charset="UTF-8">
<title>test foreign chars</title>
</head>
<body>
<input id="foreign_characters" size="12" type="text" name="foreign_characters" value="test">
<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.8.2/jquery.min.js"></script>
<script type="text/javascript">
(function(){
$('#foreign_characters').on("keyup", function(){
var foreignCharacters = $("#foreign_characters").val();
var rforeign = /[^\u0000-\u007f]/;
if (rforeign.test(foreignCharacters)) {
alert("This is non-Latin Characters");
} else {
alert("This is Latin Characters");
}
});
})();
</script>
</body>
</html>
Modified on: Tue, 15 Feb, 2022 at 9:53 AM. Some languages have characters that are not included in the GSM Character Set. For example some languages that include non GSM characters include: Greek (Θ θ, Ι ι, κ, Λ, λ) and Polish (ń, ś, ż).
GSM-7 is a character encoding standard which packs the most commonly used letters and symbols in many languages into 7 bits each for usage on GSM networks. As SMS messages are transmitted 140 8-bit octets at a time, GSM-7 encoded SMS messages can carry up to 160 characters.
The accepted answers will work, but they suffer from complexity (using a regex) and performance (needing to search through two arrays). Here's a solution that will perform better, due to the use of a lookup Set, and a loop which will short-circuit if a non-GSM7 character is found. Unicode points are used so that different character encodings are not a problem when cutting and pasting this code.
const gsmCodePoints = new Set([
0x000a, 0x000c, 0x000d,
0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d,
0x004e, 0x004f,
0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f,
0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e,
0x00a1, 0x00a3, 0x00a4, 0x00a5, 0x00a7,
0x00bf,
0x00c4, 0x00c5, 0x00c6, 0x00c7, 0x00c9,
0x00d1, 0x00d6, 0x00d8, 0x00dc, 0x00df,
0x00e0, 0x00e4, 0x00e5, 0x00e6, 0x00e8, 0x00e9, 0x00ec,
0x00f1, 0x00f2, 0x00f6, 0x00f8, 0x00f9, 0x00fc,
0x0393, 0x0394, 0x0398, 0x039b, 0x039e, 0x03a0, 0x03a3, 0x03a6, 0x03a8, 0x03a9,
0x20ac,
]);
function isGsmMessage(message) {
for (const s of message) {
const codePoint = s.codePointAt(0);
if (codePoint && !gsmCodePoints.has(codePoint)) {
return false;
}
}
return true;
}
isGsmMessage('foo'); // -> true
isGsmMessage('⚡️ bar 🔥'); // -> false
// All GSM characters
isGsmMessage('@£$¥èéùìòÇ\nØø\rÅåΔ_ΦΓΛΩΠΨΣΘΞÆæßÉ\x20!"#¤%&\'()*+,-./0123456789:;<=>?¡ABCDEFGHIJKLMNOPQRSTUVWXYZÄÖÑܧ¿abcdefghijklmnopqrstuvwxyzäöñüà\f^{}\\[~]|€'); // -> true
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With