Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

How to use Golang custom scanner string literals and expand memory to load entire file into memory?

I have been trying to figure out how to implement what I originally thought would be a simple program. I have a text file of quotations that are all separated by ‘$$’

I want the program to parse the quotation file and randomly select 3 quotes to display and standard output.

There are 1022 quotes in the file.

When I attempt to split the file I get this error: missing '

I can’t seem to figure out how to assign $$ with a string literal, I keep getting:
missing '

This is the custom scanner:

onDollarSign := func(data []byte, atEOF bool) (advance int, token []byte, err error) {  
    for i := 0; i < len(data); i++ { 
        //if data[i] == "$$" {              # this is what I did originally
        //if data[i:i+2] == "$$" {    # (mismatched types []byte and string)
        //if data[i:i+2] == `$$` {    # throws (mismatched types []byte and string)
        // below throws syntax error: unexpected $ AND missing '
        if data[1:i+2] == '$$' {   
            return i + 1, data[:i], nil  
        }  
    }  

The string literal works fine if I only use one $.

For some reason only 71 quotations are loaded into the quotes slice. I'm not sure how to expand. To allow all 1022 quotes to be stored in memory.

I've been having a really difficult time trying to figure out how to do this. this is what I have right now:

package main
import (  
    "bufio"  
    "fmt"  
    "log"  
    "math/rand"  
    "os"  
    "time"  
)  

func main() {  
    rand.Seed(time.Now().UnixNano()) // Try changing this number!  
    quote_file, err := os.Open("/Users/bryan/Dropbox/quotes_file.txt")  
    if err != nil {  
        log.Fatal(err)  
    }  
    scanner := bufio.NewScanner(quote_file)  
    // define split function  
    onDollarSign := func(data []byte, atEOF bool) (advance int, token []byte, err error) {  
        for i := 0; i < len(data); i++ {  
            if data[i] == '$$' {  
                return i + 1, data[:i], nil  
            }  
        }  
        fmt.Print(data)  
        return 0, data, bufio.ErrFinalToken  
    }  
    scanner.Split(onDollarSign)  
    var quotes []string  

    // I think this will scan the file and append all the parsed quotes into quotes  
    for scanner.Scan() {  
        quotes = append(quotes, scanner.Text())  

    }  
    if err := scanner.Err(); err != nil {  
        fmt.Fprintln(os.Stderr, "reading input:", err)  
    }  
    fmt.Print(len(quotes))  
    fmt.Println("quote 1:", quotes[rand.Intn(len(quotes))])  
    fmt.Println("quote 2:", quotes[rand.Intn(len(quotes))])  
    fmt.Println("quote 3:", quotes[rand.Intn(len(quotes))])  
}  
like image 556
BryanWheelock Avatar asked Jan 29 '23 10:01

BryanWheelock


2 Answers

Using a scanner if you end up reading the whole file anyway is kind of convoluted. I'd read the whole file and then simply split it into the list of quotes:

package main

import (
    "bytes"
    "io/ioutil"
    "log"
    "math/rand"
    "os"
)

func main() {
    // Slurp file.
    contents, err := ioutil.ReadFile("/Users/bryan/Dropbox/quotes_file.txt")
    if err != nil {
            log.Fatal(err)
    }

    // Split the quotes
    separator := []byte("$$") // Convert string to []byte
    quotes := bytes.Split(contents, separator)

    // Select three random quotes and write them to stdout
    for i := 0; i < 3; i++ {
            n := rand.Intn(len(quotes))
            quote := quotes[n]

            os.Stdout.Write(quote)
            os.Stdout.Write([]byte{'\n'}) // new line, if necessary
    }
}

Using a scanner would make sense if you selected three quotes before reading the file; then you can stop reading after you have reached the last quote.

like image 185
Peter Avatar answered Feb 02 '23 09:02

Peter


In golang single quote ' is used for single chars(so called "runes" - internally it is an int32 with unicode code point), and double quote for strings which can be longer than 1 char: "$$".

So parser awaits a closing rune chanacter ' just after the first dollar sign.

Here's a good article: https://blog.golang.org/strings

UPDATE: If you want to avoid casting all data to string you may check this way:

...
   onDollarSign := func(data []byte, atEOF bool) (advance int, token []byte, err error) {  
        for i := 0; i < len(data); i++ {  
            if data[i] == '$' && data[i+1] == '$' {  /////   <----
                return i + 1, data[:i], nil  
            }  
        }  
        fmt.Print(data)  
        return 0, data, bufio.ErrFinalToken  
    } 
...
like image 35
Eugene Lisitsky Avatar answered Feb 02 '23 09:02

Eugene Lisitsky