package ratings import ( "bufio" "bytes" "compress/gzip" "fmt" "io/ioutil" "net/http" "strconv" "strings" "time" "github.com/sirupsen/logrus" "git.quimbo.fr/odwrtw/canape/backend/models" "git.quimbo.fr/odwrtw/canape/backend/web" ) const imdbRatingsURL = "https://datasets.imdbws.com/title.ratings.tsv.gz" // Refresh will refresh the ImdbRatings func Refresh(env *web.Env) error { log := env.Log.WithFields(logrus.Fields{ "function": "imdbRating.Refresh", }) // Download the data var httpClient = &http.Client{ Timeout: time.Second * 10, } resp, err := httpClient.Get(imdbRatingsURL) if err != nil { return err } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { return fmt.Errorf("got HTTP error %s", resp.Status) } // Read all the file (~5MB) in memory // We do that because the ~1 000 000 upserts take too long, and the IMDB // server will cut our connection after ~2h content, err := ioutil.ReadAll(resp.Body) if err != nil { return err } readerContent := bytes.NewReader(content) // Unzip it r, err := gzip.NewReader(readerContent) if err != nil { return err } tx, err := env.Database.Beginx() if err != nil { return err } // Read it scanner := bufio.NewScanner(r) for scanner.Scan() { elmts := strings.Split(scanner.Text(), "\t") if len(elmts) != 3 { log.Debugf("got %d elements weird", len(elmts)) continue } rating, err := strconv.ParseFloat(elmts[1], 64) if err != nil { log.Debugf("failed to parse rating %s", elmts[1]) continue } numVote, err := strconv.ParseInt(elmts[2], 10, 64) if err != nil { log.Debugf("failed to parse numVote %q", elmts[2]) continue } videoRating := &models.ImdbRating{ ImdbID: elmts[0], Rating: float32(rating), Votes: int(numVote), } err = models.TxUpsertImdbRating(tx, videoRating) if err != nil { log.WithFields(logrus.Fields{ "error": err, }).Error("got error while upserting rating, rollback!") if rollbackErr := tx.Rollback(); err != nil { log.WithFields(logrus.Fields{ "error": rollbackErr, }).Error("unable to rollack") } return err } } return tx.Commit() }