Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add image scraping support #370

Merged
merged 12 commits into from
Mar 11, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitattributes
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
go.mod text eol=lf
go.sum text eol=lf
go.sum text eol=lf
ui/v2.5/** -text
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ require (
github.com/h2non/filetype v1.0.8
// this is required for generate
github.com/inconshreveable/mousetrap v1.0.0 // indirect
github.com/jinzhu/copier v0.0.0-20190924061706-b57f9002281a
github.com/jmoiron/sqlx v1.2.0
github.com/mattn/go-sqlite3 v1.10.0
github.com/rs/cors v1.6.0
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,8 @@ github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANyt
github.com/jackc/fake v0.0.0-20150926172116-812a484cc733/go.mod h1:WrMFNQdiFJ80sQsxDoMokWK1W5TQtxBFNpzWTD84ibQ=
github.com/jackc/pgx v3.2.0+incompatible/go.mod h1:0ZGrqGqkRlliWnWB4zKnWtjbSWbGkVEFm4TeybAXq+I=
github.com/jellevandenhooff/dkim v0.0.0-20150330215556-f50fe3d243e1/go.mod h1:E0B/fFc00Y+Rasa88328GlI/XbtyysCtTHZS8h7IrBU=
github.com/jinzhu/copier v0.0.0-20190924061706-b57f9002281a h1:zPPuIq2jAWWPTrGt70eK/BSch+gFAGrNzecsoENgu2o=
github.com/jinzhu/copier v0.0.0-20190924061706-b57f9002281a/go.mod h1:yL958EeXv8Ylng6IfnvG4oflryUi3vgA3xPs9hmII1s=
github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k=
github.com/jmoiron/sqlx v0.0.0-20180614180643-0dae4fefe7c0/go.mod h1:IiEW3SEiiErVyFdH8NTuWjSifiEQKUoyK3LNqr2kCHU=
github.com/jmoiron/sqlx v1.2.0 h1:41Ip0zITnmWNR/vHV+S4m+VoUivnWY5E4OJfLZjCJMA=
Expand Down
2 changes: 2 additions & 0 deletions graphql/documents/data/scrapers.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ fragment ScrapedPerformerData on ScrapedPerformer {
tattoos
piercings
aliases
image
}

fragment ScrapedScenePerformerData on ScrapedScenePerformer {
Expand Down Expand Up @@ -75,6 +76,7 @@ fragment ScrapedSceneData on ScrapedScene {
details
url
date
image

file {
size
Expand Down
5 changes: 5 additions & 0 deletions graphql/schema/types/scraped-performer.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ type ScrapedPerformer {
tattoos: String
piercings: String
aliases: String

"""This should be base64 encoded"""
image: String
}

input ScrapedPerformerInput {
Expand All @@ -33,4 +36,6 @@ input ScrapedPerformerInput {
tattoos: String
piercings: String
aliases: String

# not including image for the input
}
3 changes: 3 additions & 0 deletions graphql/schema/types/scraper.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,9 @@ type ScrapedScene {
url: String
date: String

"""This should be base64 encoded"""
image: String

file: SceneFileType # Resolver

studio: ScrapedSceneStudio
Expand Down
34 changes: 34 additions & 0 deletions pkg/models/model_scraped_item.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,20 +39,54 @@ type ScrapedPerformer struct {
Tattoos *string `graphql:"tattoos" json:"tattoos"`
Piercings *string `graphql:"piercings" json:"piercings"`
Aliases *string `graphql:"aliases" json:"aliases"`
Image *string `graphql:"image" json:"image"`
}

// this type has no Image field
type ScrapedPerformerStash struct {
Name *string `graphql:"name" json:"name"`
URL *string `graphql:"url" json:"url"`
Twitter *string `graphql:"twitter" json:"twitter"`
Instagram *string `graphql:"instagram" json:"instagram"`
Birthdate *string `graphql:"birthdate" json:"birthdate"`
Ethnicity *string `graphql:"ethnicity" json:"ethnicity"`
Country *string `graphql:"country" json:"country"`
EyeColor *string `graphql:"eye_color" json:"eye_color"`
Height *string `graphql:"height" json:"height"`
Measurements *string `graphql:"measurements" json:"measurements"`
FakeTits *string `graphql:"fake_tits" json:"fake_tits"`
CareerLength *string `graphql:"career_length" json:"career_length"`
Tattoos *string `graphql:"tattoos" json:"tattoos"`
Piercings *string `graphql:"piercings" json:"piercings"`
Aliases *string `graphql:"aliases" json:"aliases"`
}

type ScrapedScene struct {
Title *string `graphql:"title" json:"title"`
Details *string `graphql:"details" json:"details"`
URL *string `graphql:"url" json:"url"`
Date *string `graphql:"date" json:"date"`
Image *string `graphql:"image" json:"image"`
File *SceneFileType `graphql:"file" json:"file"`
Studio *ScrapedSceneStudio `graphql:"studio" json:"studio"`
Movies []*ScrapedSceneMovie `graphql:"movies" json:"movies"`
Tags []*ScrapedSceneTag `graphql:"tags" json:"tags"`
Performers []*ScrapedScenePerformer `graphql:"performers" json:"performers"`
}

// stash doesn't return image, and we need id
type ScrapedSceneStash struct {
ID string `graphql:"id" json:"id"`
Title *string `graphql:"title" json:"title"`
Details *string `graphql:"details" json:"details"`
URL *string `graphql:"url" json:"url"`
Date *string `graphql:"date" json:"date"`
File *SceneFileType `graphql:"file" json:"file"`
Studio *ScrapedSceneStudio `graphql:"studio" json:"studio"`
Tags []*ScrapedSceneTag `graphql:"tags" json:"tags"`
Performers []*ScrapedScenePerformer `graphql:"performers" json:"performers"`
}

type ScrapedScenePerformer struct {
// Set if performer matched
ID *string `graphql:"id" json:"id"`
Expand Down
84 changes: 84 additions & 0 deletions pkg/scraper/image.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
package scraper

import (
"io/ioutil"
"net/http"
"strings"
"time"

"github.com/stashapp/stash/pkg/models"
"github.com/stashapp/stash/pkg/utils"
)

// Timeout to get the image. Includes transfer time. May want to make this
// configurable at some point.
const imageGetTimeout = time.Second * 30

func setPerformerImage(p *models.ScrapedPerformer) error {
if p == nil || p.Image == nil || !strings.HasPrefix(*p.Image, "http") {
// nothing to do
return nil
}

img, err := getImage(*p.Image)
if err != nil {
return err
}

p.Image = img

return nil
}

func setSceneImage(s *models.ScrapedScene) error {
// don't try to get the image if it doesn't appear to be a URL
if s == nil || s.Image == nil || !strings.HasPrefix(*s.Image, "http") {
// nothing to do
return nil
}

img, err := getImage(*s.Image)
if err != nil {
return err
}

s.Image = img

return nil
}

func getImage(url string) (*string, error) {
client := &http.Client{
Timeout: imageGetTimeout,
}

// assume is a URL for now
resp, err := client.Get(url)
if err != nil {
return nil, err
}

defer resp.Body.Close()

body, err := ioutil.ReadAll(resp.Body)
if err != nil {
return nil, err
}

// determine the image type and set the base64 type
contentType := resp.Header.Get("Content-Type")
if contentType == "" {
contentType = http.DetectContentType(body)
}

img := "data:" + contentType + ";base64," + utils.GetBase64StringFromData(body)
return &img, nil
}

func getStashPerformerImage(stashURL string, performerID string) (*string, error) {
return getImage(stashURL + "/performer/" + performerID + "/image")
}

func getStashSceneImage(stashURL string, sceneID string) (*string, error) {
return getImage(stashURL + "/scene/" + sceneID + "/screenshot")
}
29 changes: 27 additions & 2 deletions pkg/scraper/scrapers.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,17 @@ func ScrapePerformer(scraperID string, scrapedPerformer models.ScrapedPerformerI
// find scraper with the provided id
s := findScraper(scraperID)
if s != nil {
return s.ScrapePerformer(scrapedPerformer)
ret, err := s.ScrapePerformer(scrapedPerformer)
if err != nil {
return nil, err
}

// post-process - set the image if applicable
if err := setPerformerImage(ret); err != nil {
logger.Warnf("Could not set image using URL %s: %s", *ret.Image, err.Error())
}

return ret, nil
}

return nil, errors.New("Scraper with ID " + scraperID + " not found")
Expand All @@ -117,7 +127,17 @@ func ScrapePerformer(scraperID string, scrapedPerformer models.ScrapedPerformerI
func ScrapePerformerURL(url string) (*models.ScrapedPerformer, error) {
for _, s := range scrapers {
if s.matchesPerformerURL(url) {
return s.ScrapePerformerURL(url)
ret, err := s.ScrapePerformerURL(url)
if err != nil {
return nil, err
}

// post-process - set the image if applicable
if err := setPerformerImage(ret); err != nil {
logger.Warnf("Could not set image using URL %s: %s", *ret.Image, err.Error())
}

return ret, nil
}
}

Expand Down Expand Up @@ -228,6 +248,11 @@ func postScrapeScene(ret *models.ScrapedScene) error {
}
}

// post-process - set the image if applicable
if err := setSceneImage(ret); err != nil {
logger.Warnf("Could not set image using URL %s: %s", *ret.Image, err.Error())
}

return nil
}

Expand Down
39 changes: 34 additions & 5 deletions pkg/scraper/stash.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"
"strconv"

"github.com/jinzhu/copier"
"github.com/shurcooL/graphql"

"github.com/stashapp/stash/pkg/models"
Expand Down Expand Up @@ -67,20 +68,35 @@ func scrapePerformerFragmentStash(c scraperTypeConfig, scrapedPerformer models.S
client := getStashClient(c)

var q struct {
FindPerformer *models.ScrapedPerformer `graphql:"findPerformer(id: $f)"`
FindPerformer *models.ScrapedPerformerStash `graphql:"findPerformer(id: $f)"`
}

performerID := *scrapedPerformer.URL

// get the id from the URL field
vars := map[string]interface{}{
"f": *scrapedPerformer.URL,
"f": performerID,
}

err := client.Query(context.Background(), &q, vars)
if err != nil {
return nil, err
}

return q.FindPerformer, nil
// need to copy back to a scraped performer
ret := models.ScrapedPerformer{}
err = copier.Copy(&ret, q.FindPerformer)
if err != nil {
return nil, err
}

// get the performer image directly
ret.Image, err = getStashPerformerImage(c.scraperConfig.StashServer.URL, performerID)
if err != nil {
return nil, err
}

return &ret, nil
}

func scrapeSceneFragmentStash(c scraperTypeConfig, scene models.SceneUpdateInput) (*models.ScrapedScene, error) {
Expand All @@ -99,7 +115,7 @@ func scrapeSceneFragmentStash(c scraperTypeConfig, scene models.SceneUpdateInput
}

var q struct {
FindScene *models.ScrapedScene `graphql:"findScene(checksum: $c)"`
FindScene *models.ScrapedSceneStash `graphql:"findScene(checksum: $c)"`
}

checksum := graphql.String(storedScene.Checksum)
Expand Down Expand Up @@ -128,5 +144,18 @@ func scrapeSceneFragmentStash(c scraperTypeConfig, scene models.SceneUpdateInput
}
}

return q.FindScene, nil
// need to copy back to a scraped scene
ret := models.ScrapedScene{}
err = copier.Copy(&ret, q.FindScene)
if err != nil {
return nil, err
}

// get the performer image directly
ret.Image, err = getStashSceneImage(c.scraperConfig.StashServer.URL, q.FindScene.ID)
if err != nil {
return nil, err
}

return &ret, nil
}
Loading