I wrote a piece of code to illustrate the standard command grep in Go, but the speed is
far behind it, could someone give me any advances? here is the code:
package main
import (
    "bufio"
    "fmt"
    "log"
    "os"
    "strings"
    "sync"
)
func parse_args() (file, pat string) {
    if len(os.Args) < 3 {
        log.Fatal("usage: gorep2 <file_name> <pattern>")
    }
    file = os.Args[1]
    pat = os.Args[2]
    return
}
func readFile(file string, to chan<- string) {
    f, err := os.Open(file)
    if err != nil {
        log.Fatal(err)
    }
    defer f.Close()
    freader := bufio.NewReader(f)
    for {
        line, er := freader.ReadBytes('\n')
        if er == nil {
            to <- string(line)
        } else {
            break
        }
    }
    close(to)
}
func grepLine(pat string, from <-chan string, result chan<- bool) {
    var wg sync.WaitGroup
    for line := range from {
        wg.Add(1)
        go func(l string) {
            defer wg.Done()
            if strings.Contains(l, pat) {
                result <- true
            }
        }(string(line))
    }
    wg.Wait()
    close(result)
}
func main() {
    file, pat := parse_args()
    text_chan := make(chan string, 10)
    result_chan := make(chan bool, 10)
    go readFile(file, text_chan)
    go grepLine(pat, text_chan, result_chan)
    var total uint = 0
    for r := range result_chan {
        if r == true {
            total += 1
        }
    }
    fmt.Printf("Total %d\n", total)
}
The time in Go:
>>> time gogrep /var/log/task.log DEBUG 
Total 21089
real    0m0.156s
user    0m0.156s
sys 0m0.015s
The time in grep:
>>> time grep DEBUG /var/log/task.log | wc -l
21089
real    0m0.069s
user    0m0.046s
sys 0m0.064s
For an easily reproducible benchmark, I counted the number of occurences of the text "and" in Shakespeare.
gogrep: $ go build gogrep.go && time ./gogrep /home/peter/shakespeare.txt and Total 21851 real 0m0.613s user 0m0.651s sys 0m0.068s grep: $ time grep and /home/peter/shakespeare.txt | wc -l 21851 real 0m0.108s user 0m0.107s sys 0m0.014s petergrep: $ go build petergrep.go && time ./petergrep /home/peter/shakespeare.txt and Total 21851 real 0m0.098s user 0m0.092s sys 0m0.008s
petergrep is written in Go. It's fast.
package main
import (
    "bufio"
    "bytes"
    "fmt"
    "log"
    "os"
)
func parse_args() (file, pat string) {
    if len(os.Args) < 3 {
        log.Fatal("usage: petergrep <file_name> <pattern>")
    }
    file = os.Args[1]
    pat = os.Args[2]
    return
}
func grepFile(file string, pat []byte) int64 {
    patCount := int64(0)
    f, err := os.Open(file)
    if err != nil {
        log.Fatal(err)
    }
    defer f.Close()
    scanner := bufio.NewScanner(f)
    for scanner.Scan() {
        if bytes.Contains(scanner.Bytes(), pat) {
            patCount++
        }
    }
    if err := scanner.Err(); err != nil {
        fmt.Fprintln(os.Stderr, err)
    }
    return patCount
}
func main() {
    file, pat := parse_args()
    total := grepFile(file, []byte(pat))
    fmt.Printf("Total %d\n", total)
}
Data: Shakespeare: pg100.txt
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With