I am trying to write a custom string splitting function, and it is harder than I would have expected.
Basically, I pass in a string and an array of the values that the string will split on, and it will return an array of substrings, removing empty ones and including the values it splits on. If the string can be split at the same place by two different values, the longer one has precedence.
That is,
split("Go ye away, I want some peace && quiet. & Thanks.", ["Go ", ",", "&&", "&", "."]);
should return
["Go ", "ye away", ",", " I want some peace ", "&&", " quiet", ".", " ", "&", " Thanks", "."]
Can you think of a reasonably simple algorithm for this? If there is a built-in way to do this in Javascript (I don't think there is), it would be nicer.
Something like this?
function mySplit(input, delimiters) {
// Sort delimiters array by length to avoid ambiguity
delimiters.sort(function(a, b) {
if (a.length > b.length) { return -1; }
return 0;
}
var result = [];
// Examine input one character at a time
for (var i = 0; i < input.length; i++) {
for (var j = 0; j < delimiters.length; j++) {
if (input.substr(i, delimiters[j].length) == delimiters[j]) {
// Add first chunk of input to result
if (i > 0) {
result.push(input.substr(0, i));
}
result.push(delimiters[j]);
// Reset input and iteration
input = input.substr(i + delimiters[j].length);
i = 0;
j = 0;
}
}
}
return result;
}
var input = "Go ye away, I want some peace && quiet. & Thanks.";
var delimiters = ["Go ", ",", "&&", "&", "."];
console.log(mySplit(input, delimiters));
// Output: ["Go ", "ye away", ",", " I want some peace ",
// "&&", " quiet", ".", " ", "&", " Thanks", "."]
Exact solution asked for:
function megasplit(toSplit, splitters) {
var splitters = splitters.sorted(function(a,b) {return b.length-a.length});
// sort by length; put here for readability, trivial to separate rest of function into helper function
if (!splitters.length)
return toSplit;
else {
var token = splitters[0];
return toSplit
.split(token) // split on token
.map(function(segment) { // recurse on segments
return megasplit(segment, splitters.slice(1))
})
.intersperse(token) // re-insert token
.flatten() // rejoin segments
.filter(Boolean);
}
}
Demo:
> megasplit(
"Go ye away, I want some peace && quiet. & Thanks.",
["Go ", ",", "&&", "&", "."]
)
["Go ", "ye away", ",", " I want some peace ", "&", "&", " quiet", ".", " ", "&", " Thanks", "."]
Machinery (reusable!):
Array.prototype.copy = function() {
return this.slice()
}
Array.prototype.sorted = function() {
var copy = this.copy();
copy.sort.apply(copy, arguments);
return copy;
}
Array.prototype.flatten = function() {
return [].concat.apply([], this)
}
Array.prototype.mapFlatten = function() {
return this.map.apply(this,arguments).flatten()
}
Array.prototype.intersperse = function(token) {
// [1,2,3].intersperse('x') -> [1,'x',2,'x',3]
return this.mapFlatten(function(x){return [token,x]}).slice(1)
}
Notes:
"&", "&"
). This made use of reduce
impossible and necessitated recursion..split
and be like ["", "Go ", "ye away", ",", " I want some peace ", "&&", " quiet", ".", " ", "&", " Thanks", ".", ""]
1-liner if one follows canonical splitting behavior:
Array.prototype.mapFlatten = function() {
...
}
function megasplit(toSplit, splitters) {
return splitters.sorted(...).reduce(function(strings, token) {
return strings.mapFlatten(function(s){return s.split(token)});
}, [toSplit]);
}
3-liner, if the above was hard to read:
Array.prototype.mapFlatten = function() {
...
}
function megasplit(toSplit, splitters) {
var strings = [toSplit];
splitters.sorted(...).forEach(function(token) {
strings = strings.mapFlatten(function(s){return s.split(token)});
});
return strings;
}
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With