Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Extract links from string optimization

Tags:

string

ios

swift

I get data (HTML string) from website. I want to extract all links. I write function (it works), but it is so slow...

Can you help me to optimize it? What standard functions I can use? Function logic: find "http:.//" sting in text, and then read string (buy char) until I will not get "\"".

extension String {

subscript (i: Int) -> Character {
    return self[advance(self.startIndex, i)]
}

subscript (i: Int) -> String {
    return String(self[i] as Character)
}

subscript (r: Range<Int>) -> String {
    return substringWithRange(Range(start: advance(startIndex, r.startIndex), end: advance(startIndex, r.endIndex)))
}}



func extractAllLinks(text:String) -> Array<String>{
var stringArray = Array<String>()
var find = "http://" as String

for (var i = countElements(find); i<countElements(text); i++)
{
    var ch:Character = text[i - Int(countElements(find))]
    if (ch == find[0])
    {
        var j = 0
        while (ch == find[j])
        {
            var ch2:Character = find[j]
            if(countElements(find)-1 == j)
            {
                break
            }
            j++
            i++
            ch = text[i - Int(countElements(find))]
        }

        i -= j
        if (j == (countElements(find)-1))
        {
            var str = ""
            for (; text[i - Int(countElements(find))] != "\""; i++)
            {
                str += text[i - Int(countElements(find))]
            }
            stringArray.append(str)
        }

    }
}
return stringArray}
like image 620
Vasily Bodnarchuk Avatar asked Apr 07 '15 16:04

Vasily Bodnarchuk


4 Answers

Like AdamPro13 said above using NSDataDetector you can easily get all the URLs, see it the following code :

let text = "http://www.google.com. http://www.bla.com"
let types: NSTextCheckingType = .Link
var error : NSError?

let detector = NSDataDetector(types: types.rawValue, error: &error)        
var matches = detector!.matchesInString(text, options: nil, range: NSMakeRange(0, count(text)))

for match in matches {
   println(match.URL!)
}

It outputs :

http://www.google.com
http://www.bla.com

Updated to Swift 2.0

let text = "http://www.google.com. http://www.bla.com"
let types: NSTextCheckingType = .Link

let detector = try? NSDataDetector(types: types.rawValue)

guard let detect = detector else {
   return
}

let matches = detect.matchesInString(text, options: .ReportCompletion, range: NSMakeRange(0, text.characters.count))

for match in matches {
    print(match.URL!)
}

Remember to use the guard statement in the above case it must be inside a function or loop.

I hope this help.

like image 122
Victor Sigler Avatar answered Oct 28 '22 16:10

Victor Sigler


And that is the answer for Swift 5.0

let text = "http://www.google.com. http://www.bla.com"

func checkForUrls(text: String) -> [URL] {
    let types: NSTextCheckingResult.CheckingType = .link

    do {
        let detector = try NSDataDetector(types: types.rawValue)

        let matches = detector.matches(in: text, options: .reportCompletion, range: NSMakeRange(0, text.count))
    
        return matches.compactMap({$0.url})
    } catch let error {
        debugPrint(error.localizedDescription)
    }

    return []
}

checkForUrls(text: text)
like image 21
Matthias Nagel Avatar answered Oct 28 '22 14:10

Matthias Nagel


Very helpful thread! Here's an example that worked in Swift 1.2, based on Victor Sigler's answer.

    // extract first link (if available) and open it!
    let text = "How technology is changing our relationships to each other: http://t.ted.com/mzRtRfX"
    let types: NSTextCheckingType = .Link

    do {
        let detector = try NSDataDetector(types: types.rawValue)
        let matches = detector.matchesInString(text, options: .ReportCompletion, range: NSMakeRange(0, text.characters.count))
        if matches.count > 0 {
            let url = matches[0].URL!
            print("Opening URL: \(url)")
            UIApplication.sharedApplication().openURL(url)
        }

    } catch {
        // none found or some other issue
        print ("error in findAndOpenURL detector")
    }
like image 29
xke Avatar answered Oct 28 '22 14:10

xke


Details

  • Swift 5.2, Xcode 11.4 (11E146)

Solution

// MARK: DataDetector

class DataDetector {

    private class func _find(all type: NSTextCheckingResult.CheckingType,
                             in string: String, iterationClosure: (String) -> Bool) {
        guard let detector = try? NSDataDetector(types: type.rawValue) else { return }
        let range = NSRange(string.startIndex ..< string.endIndex, in: string)
        let matches = detector.matches(in: string, options: [], range: range)
        loop: for match in matches {
            for i in 0 ..< match.numberOfRanges {
                let nsrange = match.range(at: i)
                let startIndex = string.index(string.startIndex, offsetBy: nsrange.lowerBound)
                let endIndex = string.index(string.startIndex, offsetBy: nsrange.upperBound)
                let range = startIndex..<endIndex
                guard iterationClosure(String(string[range])) else { break loop }
            }
        }
    }

    class func find(all type: NSTextCheckingResult.CheckingType, in string: String) -> [String] {
        var results = [String]()
        _find(all: type, in: string) {
            results.append($0)
            return true
        }
        return results
    }

    class func first(type: NSTextCheckingResult.CheckingType, in string: String) -> String? {
        var result: String?
        _find(all: type, in: string) {
            result = $0
            return false
        }
        return result
    }
}

// MARK: String extension

extension String {
    var detectedLinks: [String] { DataDetector.find(all: .link, in: self) }
    var detectedFirstLink: String? { DataDetector.first(type: .link, in: self) }
    var detectedURLs: [URL] { detectedLinks.compactMap { URL(string: $0) } }
    var detectedFirstURL: URL? {
        guard let urlString = detectedFirstLink else { return nil }
        return URL(string: urlString)
    }
}

Usage

let text = """
Lorm Ipsum is simply dummy text of the printing and typesetting industry. apple.com/ Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. http://gooogle.com. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. yahoo.com It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum.
"""

print(text.detectedLinks)
print(text.detectedFirstLink)
print(text.detectedURLs)
print(text.detectedFirstURL)

Console output

["apple.com/", "http://gooogle.com", "yahoo.com"]
Optional("apple.com/")
[apple.com/, http://gooogle.com, yahoo.com]
Optional(apple.com/)
like image 45
Vasily Bodnarchuk Avatar answered Oct 28 '22 16:10

Vasily Bodnarchuk