I need a fast method to find all indices of a search term that might occur in a string. I tried this 'brute force' String
extension method:
// Note: makes use of ExSwift
extension String
{
var length: Int { return count(self) }
func indicesOf(searchTerm:String) -> [Int] {
var indices = [Int]()
for i in 0 ..< self.length {
let segment = self[i ... (i + searchTerm.length - 1)]
if (segment == searchTerm) {
indices.append(i)
}
}
return indices;
}
}
... But it's ridiculously slow, especially the shorter the search term is. What would be a better method to find all indices fast?
As Martin said you can implement some of the well known fastest algorithms in String Matching, The Knuth–Morris–Pratt string searching algorithm (or KMP algorithm) searches for occurrences of a "word" W
within a main "text string" S
.
The algorithm has complexity O(n), where n is the length of S
and the O is big-O notation.
extension String {
// Build pi function of prefixes
private func build_pi(str: String) -> [Int] {
var n = count(str)
var pi = Array(count: n + 1, repeatedValue: 0)
var k = -1
pi[0] = -1
for (var i = 0; i < n; ++i) {
while (k >= 0 && str[k] != str[i]) {
k = pi[k]
}
pi[i + 1] = ++k
}
return pi
}
// Knuth-Morris Pratt algorithm
func searchPattern(pattern: String) -> [Int] {
var matches = [Int]()
var n = count(self)
var m = count(pattern)
var k = 0
var pi = build_pi(pattern)
for var i = 0; i < n; ++i {
while (k >= 0 && (k == m || pattern[k] != self[i])) {
k = pi[k]
}
if ++k == m {
matches.append(i - m + 1)
}
}
return matches
}
subscript (i: Int) -> Character {
return self[advance(self.startIndex, i)]
}
}
Then you can use it in the following way:
var string = "apurba mandal loves ayoshi loves"
var pattern = "loves"
println(string.searchPattern(pattern))
An the output should be :
[14, 27]
That belong to the start index of the pattern occurrences inside the the string. I hope this help you.
EDIT:
As Martin said in his comment you need to avoid the use of the advance
function to index an String
by an Int
because it's O(position to index).
One possible solution is to convert the String
to an array of Character
and then access to the indexes is O(1).
Then the extension
can be changed to this one :
extension String {
// Build pi function of prefixes
private func build_pi(str: [Character]) -> [Int] {
var n = count(str)
var pi = Array(count: n + 1, repeatedValue: 0)
var k = -1
pi[0] = -1
for (var i = 0; i < n; ++i) {
while (k >= 0 && str[k] != str[i]) {
k = pi[k]
}
pi[i + 1] = ++k
}
return pi
}
// Knuth-Morris Pratt algorithm
func searchPattern(pattern: String) -> [Int] {
// Convert to Character array to index in O(1)
var patt = Array(pattern)
var S = Array(self)
var matches = [Int]()
var n = count(self)
var m = count(pattern)
var k = 0
var pi = build_pi(patt)
for var i = 0; i < n; ++i {
while (k >= 0 && (k == m || patt[k] != S[i])) {
k = pi[k]
}
if ++k == m {
matches.append(i - m + 1)
}
}
return matches
}
}
Instead of checking for the search term at each position of the string
you could use rangeOfString()
to find the next occurrence (hoping
that rangeOfString()
uses more advanced algorithms):
extension String {
func indicesOf(searchTerm:String) -> [Int] {
var indices = [Int]()
var pos = self.startIndex
while let range = self.rangeOfString(searchTerm, range: pos ..< self.endIndex) {
indices.append(distance(self.startIndex, range.startIndex))
pos = range.startIndex.successor()
}
return indices
}
}
Generally, it depends on the size of the input string and the size of the search string which algorithm is "the fastest". You'll find an overview with links to various algorithms in String searching algorithm.
Update for Swift 3:
extension String {
func indices(of searchTerm:String) -> [Int] {
var indices = [Int]()
var pos = self.startIndex
while let range = range(of: searchTerm, range: pos ..< self.endIndex) {
indices.append(distance(from: startIndex, to: range.lowerBound))
pos = index(after: range.lowerBound)
}
return indices
}
}
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With