Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

golang sync.WaitGroup never completes

Tags:

go

I have the below code that fetches a list of URL's and then conditionally downloads a file and saves it to the filesystem. The files are fetched concurrently and the main goroutine waits for all the files to be fetched. But, the program never exits (and there are no errors) after completing all the requests.

What I think is happening is that somehow the amount of go routines in the WaitGroup is either incremented by too many to begin with (via Add) or not decremented by enough (a Done call is not happening).

Is there something I am obviously doing wrong? How would I inspect how many go routines are presently in the WaitGroup so I can better debug what's happening?

package main

import (
    "fmt"
    "io"
    "io/ioutil"
    "net/http"
    "os"
    "strings"
    "sync"
)

func main() {
    links := parseLinks()

    var wg sync.WaitGroup

    for _, url := range links {
        if isExcelDocument(url) {
            wg.Add(1)
            go downloadFromURL(url, wg)
        } else {
            fmt.Printf("Skipping: %v \n", url)
        }
    }
    wg.Wait()
}

func downloadFromURL(url string, wg sync.WaitGroup) error {
    tokens := strings.Split(url, "/")
    fileName := tokens[len(tokens)-1]
    fmt.Printf("Downloading %v to %v \n", url, fileName)

    content, err := os.Create("temp_docs/" + fileName)
    if err != nil {
        fmt.Printf("Error while creating %v because of %v", fileName, err)
        return err
    }

    resp, err := http.Get(url)
    if err != nil {
        fmt.Printf("Could not fetch %v because %v", url, err)
        return err
    }
    defer resp.Body.Close()

    _, err = io.Copy(content, resp.Body)
    if err != nil {
        fmt.Printf("Error while saving %v from %v", fileName, url)
        return err
    }

    fmt.Printf("Download complete for %v \n", fileName)

    defer wg.Done()
    return nil
}

func isExcelDocument(url string) bool {
    return strings.HasSuffix(url, ".xlsx") || strings.HasSuffix(url, ".xls")
}

func parseLinks() []string {
    linksData, err := ioutil.ReadFile("links.txt")
    if err != nil {
        fmt.Printf("Trouble reading file: %v", err)
    }

    links := strings.Split(string(linksData), ", ")

    return links
}
like image 851
srt32 Avatar asked Jan 11 '15 23:01

srt32


3 Answers

There are two problems with this code. First, you have to pass a pointer to the WaitGroup to downloadFromURL(), otherwise the object will be copied and Done() will not be visible in main().

See:

func main() {
    ...
    go downloadFromURL(url, &wg)
    ...
}

Second, defer wg.Done() should be one of the first statements in downloadFromURL(), otherwise if you return from the function before that statement, it won't get "registered" and won't get called.

func downloadFromURL(url string, wg *sync.WaitGroup) error {
    defer wg.Done()
    ...
}
like image 100
Bartosz Avatar answered Nov 05 '22 21:11

Bartosz


Arguments in Go are always passed by value. Use a pointer when an argument may be modified. Also, make sure that you always execute wg.Done().For example,

package main

import (
    "fmt"
    "io"
    "io/ioutil"
    "net/http"
    "os"
    "strings"
    "sync"
)

func main() {
    links := parseLinks()

    wg := new(sync.WaitGroup)

    for _, url := range links {
        if isExcelDocument(url) {
            wg.Add(1)
            go downloadFromURL(url, wg)
        } else {
            fmt.Printf("Skipping: %v \n", url)
        }
    }
    wg.Wait()
}

func downloadFromURL(url string, wg *sync.WaitGroup) error {
    defer wg.Done()
    tokens := strings.Split(url, "/")
    fileName := tokens[len(tokens)-1]
    fmt.Printf("Downloading %v to %v \n", url, fileName)

    content, err := os.Create("temp_docs/" + fileName)
    if err != nil {
        fmt.Printf("Error while creating %v because of %v", fileName, err)
        return err
    }

    resp, err := http.Get(url)
    if err != nil {
        fmt.Printf("Could not fetch %v because %v", url, err)
        return err
    }
    defer resp.Body.Close()

    _, err = io.Copy(content, resp.Body)
    if err != nil {
        fmt.Printf("Error while saving %v from %v", fileName, url)
        return err
    }

    fmt.Printf("Download complete for %v \n", fileName)

    return nil
}

func isExcelDocument(url string) bool {
    return strings.HasSuffix(url, ".xlsx") || strings.HasSuffix(url, ".xls")
}

func parseLinks() []string {
    linksData, err := ioutil.ReadFile("links.txt")
    if err != nil {
        fmt.Printf("Trouble reading file: %v", err)
    }

    links := strings.Split(string(linksData), ", ")

    return links
}
like image 42
peterSO Avatar answered Nov 05 '22 22:11

peterSO


As @Bartosz mentioned, you will need to pass a reference to your WaitGroup object. He did a great job discussing the importance of defer ws.Done()

I like WaitGroup's simplicity. However, I do not like that we need to pass the reference to the goroutine because that would mean that the concurrency logic would be mixed with your business logic.

So I came up with this generic function to solve this problem for me:

// Parallelize parallelizes the function calls
func Parallelize(functions ...func()) {
    var waitGroup sync.WaitGroup
    waitGroup.Add(len(functions))

    defer waitGroup.Wait()

    for _, function := range functions {
        go func(copy func()) {
            defer waitGroup.Done()
            copy()
        }(function)
    }
}

So your example could be solved this way:

func main() {
    links := parseLinks()

    functions := []func(){}
    for _, url := range links {
        if isExcelDocument(url) {
            function := func(url string){
                return func() { downloadFromURL(url) }
            }(url)

            functions = append(functions, function)
        } else {
            fmt.Printf("Skipping: %v \n", url)
        }
    }

    Parallelize(functions...)
}

func downloadFromURL(url string) {
    ...
}

If you would like to use it, you can find it here https://github.com/shomali11/util

like image 1
Raed Shomali Avatar answered Nov 05 '22 21:11

Raed Shomali