I want to make a procedure to find out how many words are there in a string, separated by space, or comma, or some other character. And then add up the total later.
I'm making an average calculator, so I want the total count of data and then add up all the words.
update: Xcode 10.2.x โข Swift 5 or later
Using Foundation method enumerateSubstrings(in: Range)
and setting .byWords
as options:
let sentence = "I want to an algorithm that could help find out how many words are there in a string separated by space or comma or some character. And then append each word separated by a character to an array which could be added up later I'm making an average calculator so I want the total count of data and then add up all the words. By words I mean the numbers separated by a character, preferably space Thanks in advance"
var words: [Substring] = []
sentence.enumerateSubstrings(in: sentence.startIndex..., options: .byWords) { _, range, _, _ in
words.append(sentence[range])
}
print(words) // "["I", "want", "to", "an", "algorithm", "that", "could", "help", "find", "out", "how", "many", "words", "are", "there", "in", "a", "string", "separated", "by", "space", "or", "comma", "or", "some", "character", "And", "then", "append", "each", "word", "separated", "by", "a", "character", "to", "an", "array", "which", "could", "be", "added", "up", "later", "I\\'m", "making", "an", "average", "calculator", "so", "I", "want", "the", "total", "count", "of", "data", "and", "then", "add", "up", "all", "the", "words", "By", "words", "I", "mean", "the", "numbers", "separated", "by", "a", "character", "preferably", "space", "Thanks", "in", "advance"]\n"
print(words.count) // 79
Or using native Swift 5 new Character
property isLetter
and the split method:
let words = sentence.split { !$0.isLetter }
print(words) // "["I", "want", "to", "an", "algorithm", "that", "could", "help", "find", "out", "how", "many", "words", "are", "there", "in", "a", "string", "separated", "by", "space", "or", "comma", "or", "some", "character", "And", "then", "append", "each", "word", "separated", "by", "a", "character", "to", "an", "array", "which", "could", "be", "added", "up", "later", "I", "m", "making", "an", "average", "calculator", "so", "I", "want", "the", "total", "count", "of", "data", "and", "then", "add", "up", "all", "the", "words", "By", "words", "I", "mean", "the", "numbers", "separated", "by", "a", "character", "preferably", "space", "Thanks", "in", "advance"]\n"
print(words.count) // 80
Extending StringProtocol
to support Substrings as well:
extension StringProtocol {
var words: [SubSequence] {
return split { !$0.isLetter }
}
var byWords: [SubSequence] {
var byWords: [SubSequence] = []
enumerateSubstrings(in: startIndex..., options: .byWords) { _, range, _, _ in
byWords.append(self[range])
}
return byWords
}
}
sentence.words // ["I", "want", "to", "an", "algorithm", "that", "could", "help", "find", "out", "how", "many", "words", "are", "there", "in", "a", "string", "separated", "by", "space", "or", "comma", "or", "some", "character", "And", "then", "append", "each", "word", "separated", "by", "a", "character", "to", "an", "array", "which", "could", "be", "added", "up", "later", "I", "m", "making", "an", "average", "calculator", "so", "I", "want", "the", "total", "count", "of", "data", "and", "then", "add", "up", "all", "the", "words", "By", "words", "I", "mean", "the", "numbers", "separated", "by", "a", "character", "preferably", "space", "Thanks", "in", "advance"]
let sentences = "Let there be light!"
let separatedCount = sentences.split(whereSeparator: { ",.! ".contains($0) }).count
print(separatedCount) // prints out 4 (if you just want the array, you can omit ".count")
If you have a specific condition of punctuations you want to use, you could use this code. Also if you prefer to use swift codes only :).
You may want to try componentsSeparatedByCharactersInset
:
let s = "Let there be light"
let c = NSCharacterSet(charactersInString: " ,.")
let a = s.componentsSeparatedByCharactersInSet(c).filter({!$0.isEmpty})
// a = ["Let", "there", "be", "light"]
You can use regular expression
and extension
to simplify your code like this:
extension String {
var wordCount: Int {
let regex = try? NSRegularExpression(pattern: "\\w+")
return regex?.numberOfMatches(in: self, range: NSRange(location: 0, length: self.utf16.count)) ?? 0
}
}
let text = "I live in iran and i love Here"
print(text.wordCount) // 8
If you are aiming at fresh operating systems (such as iOS13) there is no need to reinvent the wheel trying to count words by yourself. You can benefit from a powerful API specially dedicated for this purpose. It can split text into words for many languages you don't even know about, it can and classify parts of speech show lemmas, detect script and more. Check this in playground.
import NaturalLanguage
let taggerLexical = NLTagger(tagSchemes: [.lexicalClass, .lemma])
let txt = "I'm an architector ๐จ๐ปโ๐ผ by 90%. My family ๐จโ๐ฉโ๐งโ๐ฆ and I live in ๐ด๓ ง๓ ข๓ ฅ๓ ฎ๓ ง๓ ฟ."
taggerLexical.string = txt
let lexicalTags = NSCountedSet()
taggerLexical.enumerateTags(in: txt.startIndex..<txt.endIndex, unit: .word, scheme: .lexicalClass, options: [.omitPunctuation, .omitWhitespace]) { tag, tokenRange in
if let tag = tag {
lexicalTags.add(tag)
let lemma = taggerLexical.tag(at: tokenRange.lowerBound, unit: .word, scheme: .lemma).0?.rawValue ?? ""
let word = String(txt[tokenRange])
print("\(word): \(tag.rawValue)\(word == lemma ? "" : " | Lemma: \(lemma) " )")
}
return true
}
let sortedLexicalTagCount = lexicalTags.allObjects.map({ (($0 as! NLTag), lexicalTags.count(for: $0))}).sorted(by: {$0.1 > $1.1})
print("Total word count: \(sortedLexicalTagCount.map({ $0.1}).reduce(0, +)) \nTotal word count without grapheme clusters: \(sortedLexicalTagCount.compactMap({ $0.0 == NLTag.otherWord ? nil : $0.1 }).reduce(0, +)) \nDetails: \(sortedLexicalTagCount.map {($0.0.rawValue, $0.1)})")
// Output:
I: Pronoun
'm: Verb | Lemma: be
an: Determiner
architector: Adjective | Lemma:
๐จ๐ปโ๐ผ: OtherWord | Lemma:
by: Preposition
90: Number | Lemma:
My: Determiner | Lemma: I
family: Noun
๐จโ๐ฉโ๐งโ๐ฆ: OtherWord | Lemma:
and: Conjunction
I: Pronoun
live: Verb
in: Preposition
๐ด๓ ง๓ ข๓ ฅ๓ ฎ๓ ง๓ ฟ: OtherWord | Lemma:
Total word count: 15
Total word count without grapheme clusters: 12
Details: [("OtherWord", 3), ("Pronoun", 2), ("Determiner", 2), ("Verb", 2), ("Preposition", 2), ("Number", 1), ("Noun", 1), ("Conjunction", 1), ("Adjective", 1)]
For older Apple operating systems using preceding linguisticTags
API is an option.
import Foundation
let linguisticTags = txt.linguisticTags(in: text.startIndex..., scheme: NSLinguisticTagScheme.tokenType.rawValue)
print("Total word count: \(linguisticTags.filter({ [NSLinguisticTag.word.rawValue, NSLinguisticTag.other.rawValue].contains($0) }).count)\nTotal word count without grapheme clusters: \(linguisticTags.filter({ [NSLinguisticTag.word.rawValue].contains($0) }).count)")
// Output:
Total word count: 15
Total word count without grapheme clusters: 12
Another option is to use NSRegularExpression
. It knows how match word boundaries (\\b
), word (\\w
) and non-word (\\W
) symbols.
Using .numberOfMatches(in: , range:..)
looks better from the calculation effectiveness point of view since it returns only number of matches but not matches themselves. Yet there are issues for strings with emojis for this approach.
extension String {
private var regexMatchWords: NSRegularExpression? { try? NSRegularExpression(pattern: "\\w+") }
var aproxWordCount: Int {
guard let regex = regexMatchWords else { return 0 }
return regex.numberOfMatches(in: self, range: NSRange(self.startIndex..., in: self))
}
var wordCount: Int {
guard let regex = regexMatchWords else { return 0 }
return regex.matches(in: self, range: NSRange(self.startIndex..., in: self)).reduce(0) { (r, match) in
r + (Range(match.range, in: self) == nil ? 0 : 1)
}
}
var words: [String] {
var w = [String]()
guard let regex = regexMatchWords else { return [] }
regex.enumerateMatches(in: self, range: NSRange(self.startIndex..., in: self)) { (match, _, _) in
guard let match = match else { return }
guard let range = Range(match.range, in: self) else { return }
w.append(self[range])
}
return w
}
}
let text = "We're a family ๐จโ๐ฉโ๐งโ๐ฆ of 4. Next week we'll go to ๐ฌ๐ท."
print("Arpoximate word count: \(text.aproxWordCount)\nWord count: \(text.wordCount)\nWords:\(text.words)")
// Output:
Arpoximate word count: 15
Word count: 12
Words:["We", "re", "a", "family", "of", "4", "Next", "week", "we", "ll", "go", "to"]
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With