Fix refresh of imdb ratings

The connection to the IMDB server used to be cut due to the length of
the ~million upsert
We now download the whole file in memory before begining the upserts
This commit is contained in:
Lucas BEE 2019-04-22 20:31:14 +02:00
parent 1ecdfc2ba5
commit 49a0dbfea9

View File

@ -2,7 +2,9 @@ package ratings
import ( import (
"bufio" "bufio"
"bytes"
"compress/gzip" "compress/gzip"
"io/ioutil"
"net/http" "net/http"
"strconv" "strconv"
"strings" "strings"
@ -32,8 +34,17 @@ func Refresh(env *web.Env) error {
} }
defer resp.Body.Close() defer resp.Body.Close()
// Read all the file (~5MB) in memory
// We do that because the ~1 000 000 upserts take too long, and the IMDB
// server will cut our connection after ~2h
content, err := ioutil.ReadAll(resp.Body)
if err != nil {
return err
}
readerContent := bytes.NewReader(content)
// Unzip it // Unzip it
r, err := gzip.NewReader(resp.Body) r, err := gzip.NewReader(readerContent)
if err != nil { if err != nil {
return err return err
} }
@ -43,18 +54,18 @@ func Refresh(env *web.Env) error {
for scanner.Scan() { for scanner.Scan() {
elmts := strings.Split(scanner.Text(), "\t") elmts := strings.Split(scanner.Text(), "\t")
if len(elmts) != 3 { if len(elmts) != 3 {
log.Debugf("got %d elements weird\n", len(elmts)) log.Debugf("got %d elements weird", len(elmts))
continue continue
} }
rating, err := strconv.ParseFloat(elmts[1], 64) rating, err := strconv.ParseFloat(elmts[1], 64)
if err != nil { if err != nil {
log.Debugf("failed to parse rating %s\n", elmts[1]) log.Debugf("failed to parse rating %s", elmts[1])
continue continue
} }
numVote, err := strconv.ParseInt(elmts[2], 10, 64) numVote, err := strconv.ParseInt(elmts[2], 10, 64)
if err != nil { if err != nil {
log.Debugf("failed to parse numVote %q\n", elmts[2]) log.Debugf("failed to parse numVote %q", elmts[2])
continue continue
} }