diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1c2f1673..12e21086 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -36,7 +36,7 @@ jobs: OUT="$(gofmt -l -d ./)"; test -z "$OUT" || (echo "$OUT" && return 1) golint -set_exit_status go vet -v ./... - go test -race -v -coverprofile=coverage.txt -covermode=atomic ./ + go test -race -v -coverprofile=coverage.txt -covermode=atomic ./... build: name: Build ${{matrix.go}} diff --git a/colly_test.go b/colly_test.go index 91f1441e..4358b63e 100644 --- a/colly_test.go +++ b/colly_test.go @@ -918,6 +918,19 @@ func TestRedirect(t *testing.T) { c.Visit(ts.URL + "/redirect") } +func TestIssue594(t *testing.T) { + // This is a regression test for a data race bug. There's no + // assertions because it's meant to be used with race detector + ts := newTestServer() + defer ts.Close() + + c := NewCollector() + // if timeout is set, this bug is not triggered + c.SetClient(&http.Client{Timeout: 0 * time.Second}) + + c.Visit(ts.URL) +} + func TestRedirectWithDisallowedURLs(t *testing.T) { ts := newTestServer() defer ts.Close() diff --git a/http_backend.go b/http_backend.go index 0b201d23..f48df628 100644 --- a/http_backend.go +++ b/http_backend.go @@ -186,10 +186,12 @@ func (h *httpBackend) Do(request *http.Request, bodySize int, checkHeadersFunc c return nil, err } defer res.Body.Close() + + finalRequest := request if res.Request != nil { - *request = *res.Request + finalRequest = res.Request } - if !checkHeadersFunc(request, res.StatusCode, res.Header) { + if !checkHeadersFunc(finalRequest, res.StatusCode, res.Header) { // closing res.Body (see defer above) without reading it aborts // the download return nil, ErrAbortedAfterHeaders @@ -200,7 +202,7 @@ func (h *httpBackend) Do(request *http.Request, bodySize int, checkHeadersFunc c bodyReader = io.LimitReader(bodyReader, int64(bodySize)) } contentEncoding := strings.ToLower(res.Header.Get("Content-Encoding")) - if !res.Uncompressed && (strings.Contains(contentEncoding, "gzip") || (contentEncoding == "" && strings.Contains(strings.ToLower(res.Header.Get("Content-Type")), "gzip")) || strings.HasSuffix(strings.ToLower(request.URL.Path), ".xml.gz")) { + if !res.Uncompressed && (strings.Contains(contentEncoding, "gzip") || (contentEncoding == "" && strings.Contains(strings.ToLower(res.Header.Get("Content-Type")), "gzip")) || strings.HasSuffix(strings.ToLower(finalRequest.URL.Path), ".xml.gz")) { bodyReader, err = gzip.NewReader(bodyReader) if err != nil { return nil, err diff --git a/queue/queue_test.go b/queue/queue_test.go index e239c22a..1d10f837 100644 --- a/queue/queue_test.go +++ b/queue/queue_test.go @@ -4,6 +4,7 @@ import ( "math/rand" "net/http" "net/http/httptest" + "sync" "sync/atomic" "testing" "time" @@ -16,6 +17,8 @@ func TestQueue(t *testing.T) { defer server.Close() rng := rand.New(rand.NewSource(12387123712321232)) + var rngMu sync.Mutex + var ( items uint32 requests uint32 @@ -28,7 +31,9 @@ func TestQueue(t *testing.T) { panic(err) } put := func() { + rngMu.Lock() t := time.Duration(rng.Intn(50)) * time.Microsecond + rngMu.Unlock() url := server.URL + "/delay?t=" + t.String() atomic.AddUint32(&items, 1) q.AddURL(url) @@ -49,7 +54,9 @@ func TestQueue(t *testing.T) { } else { atomic.AddUint32(&failure, 1) } + rngMu.Lock() toss := rng.Intn(2) == 0 + rngMu.Unlock() if toss { put() }