Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Splitting a string at Space, except inside quotation marks

Tags:

string

go

I was wondering if there is any way I could easily split a string at spaces, except when the space is inside quotation marks?

For example, changing

Foo bar random "letters lol" stuff

into

Foo, bar, random, "letters lol", stuff

like image 366
MOBlox Avatar asked Nov 25 '17 19:11

MOBlox


3 Answers

  1. Using strings.FieldsFunc try this:
package main

import (
    "fmt"
    "strings"
)

func main() {
    s := `Foo bar random "letters lol" stuff`
    quoted := false
    a := strings.FieldsFunc(s, func(r rune) bool {
        if r == '"' {
            quoted = !quoted
        }
        return !quoted && r == ' '
    })

    out := strings.Join(a, ", ")
    fmt.Println(out) // Foo, bar, random, "letters lol", stuff
}

  1. Using simple strings.Builder and range over string and keeping or not keeping " at your will, try this
package main

import (
    "fmt"
    "strings"
)

func main() {
    s := `Foo bar random "letters lol" stuff`
    a := []string{}
    sb := &strings.Builder{}
    quoted := false
    for _, r := range s {
        if r == '"' {
            quoted = !quoted
            sb.WriteRune(r) // keep '"' otherwise comment this line
        } else if !quoted && r == ' ' {
            a = append(a, sb.String())
            sb.Reset()
        } else {
            sb.WriteRune(r)
        }
    }
    if sb.Len() > 0 {
        a = append(a, sb.String())
    }

    out := strings.Join(a, ", ")
    fmt.Println(out) // Foo, bar, random, "letters lol", stuff
    // not keep '"': // Foo, bar, random, letters lol, stuff
}


  1. Using scanner.Scanner, try this:
package main

import (
    "fmt"
    "strings"
    "text/scanner"
)

func main() {
    var s scanner.Scanner
    s.Init(strings.NewReader(`Foo bar random "letters lol" stuff`))
    slice := make([]string, 0, 5)
    tok := s.Scan()
    for tok != scanner.EOF {
        slice = append(slice, s.TokenText())
        tok = s.Scan()
    }
    out := strings.Join(slice, ", ")
    fmt.Println(out) // Foo, bar, random, "letters lol", stuff
}

  1. Using csv.NewReader which removes " itself, try this:
package main

import (
    "encoding/csv"
    "fmt"
    "log"
    "strings"
)

func main() {
    s := `Foo bar random "letters lol" stuff`
    r := csv.NewReader(strings.NewReader(s))
    r.Comma = ' '
    record, err := r.Read()
    if err != nil {
        log.Fatal(err)
    }

    out := strings.Join(record, ", ")
    fmt.Println(out) // Foo, bar, random, letters lol, stuff
}

  1. Using regexp, try this:
package main

import (
    "fmt"
    "regexp"
    "strings"
)

func main() {
    s := `Foo bar random "letters lol" stuff`

    r := regexp.MustCompile(`[^\s"]+|"([^"]*)"`)
    a := r.FindAllString(s, -1)

    out := strings.Join(a, ", ")
    fmt.Println(out) // Foo, bar, random, "letters lol", stuff
}
like image 77
wasmup Avatar answered Oct 22 '22 12:10

wasmup


Think about it. You have a string in comma separated values (CSV) file format, RFC4180, except that your separator, outside quote pairs, is a space (instead of a comma). For example,

package main

import (
    "encoding/csv"
    "fmt"
    "strings"
)

func main() {
    s := `Foo bar random "letters lol" stuff`
    fmt.Printf("String:\n%q\n", s)

    // Split string
    r := csv.NewReader(strings.NewReader(s))
    r.Comma = ' ' // space
    fields, err := r.Read()
    if err != nil {
        fmt.Println(err)
        return
    }

    fmt.Printf("\nFields:\n")
    for _, field := range fields {
        fmt.Printf("%q\n", field)
    }
}

Playground: https://play.golang.org/p/Ed4IV97L7H

Output:

String:
"Foo bar random \"letters lol\" stuff"

Fields:
"Foo"
"bar"
"random"
"letters lol"
"stuff"
like image 44
peterSO Avatar answered Oct 22 '22 13:10

peterSO


You could use regex

This (go playground) will cover all use cases for multiple words inside quotes and multiple quoted entries in your array:

package main

import (
    "fmt"
    "regexp"
)

func main() {
    s := `Foo bar random "letters lol" stuff "also will" work on "multiple quoted stuff"`       
    r := regexp.MustCompile(`[^\s"']+|"([^"]*)"|'([^']*)`) 
    arr := r.FindAllString(s, -1)       
    fmt.Println("your array: ", arr)    
}

Output will be:

[Foo, bar, random, "letters lol", stuff, "also will", work, on, "multiple quoted stuff"]

If you want to learn more about regex here is a great SO answer with super handy resources at the end - Learning Regular Expressions

Hope this helps

like image 2
Blue Bot Avatar answered Oct 22 '22 13:10

Blue Bot