Skip to content

Commit

Permalink
scraper: SinsVR cast fix (#1823)
Browse files Browse the repository at this point in the history
* SinsVR Fix for model not present on scrape

Occasionally sinsVR will leave out the model on the scene page resulting in no model being listed for the scene. This falls back to using the trailer URL which contains the models in the scenes

* Debug prompt removal

* Some more tweaks. 

Check to ensure the profile url is valid. Notice occasionally there is a misspelling of the actor name in the url

* Format
  • Loading branch information
pops64 committed Aug 28, 2024
1 parent 402436b commit 7ca0840
Showing 1 changed file with 28 additions and 0 deletions.
28 changes: 28 additions & 0 deletions pkg/scrape/sinsvr.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package scrape

import (
"encoding/json"
"net/http"
"regexp"
"strconv"
"strings"
Expand Down Expand Up @@ -66,15 +67,18 @@ func SinsVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<
sc.ActorDetails = make(map[string]models.ActorDetails)
e.ForEach(`.video-detail__specs div.cell`, func(id int, e *colly.HTMLElement) {
c := strings.TrimSpace(e.Text)

// Cast
if strings.Contains(c, "Starring") {
e.ForEach(`.cell a`, func(id int, e *colly.HTMLElement) {

cast := strings.Split(e.Text, ",")
sc.Cast = append(sc.Cast, cast...)
if len(cast) > 1 {
sc.ActorDetails[strings.TrimSpace(e.Text)] = models.ActorDetails{Source: sc.ScraperID + " scrape", ProfileUrl: e.Request.AbsoluteURL(e.Attr("href"))}
}
sc.ActorDetails[strings.TrimSpace(e.Text)] = models.ActorDetails{Source: sc.ScraperID + " scrape", ProfileUrl: e.Request.AbsoluteURL(e.Attr("href"))}

})
} else {
// Released - Date Oct 19, 2019
Expand All @@ -85,6 +89,30 @@ func SinsVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<
}
})

// Fallback incase SinsVR forgets to add the actor to the scene. This uses the trailer URL which contains the models in the scene.
if len(sc.Cast) == 0 {
trailerURL := e.ChildAttr("div.video-player-container__player source", "src")
//The cast is the first part of the trailer file name in the URL
re := regexp.MustCompile(`https:\/\/public\.xsinsvr\.com\/video\/.+\/(?P<cast>[A-Za-z_-]+)_trailer`)
r := re.FindStringSubmatch(trailerURL)
castIndex := re.SubexpIndex("cast")
//sinsVR uses _ for whitespace and - to separate cast members
cast := strings.Split(strings.ReplaceAll(r[castIndex], "_", " "), "-")
sc.Cast = append(sc.Cast, cast...)

// This should result in the correct model url for sinsVR but occasionally sins has yet to create the model url and will result in a 404
for _, name := range cast {
profileUrl := `https://xsinsvr.com/model/` + strings.ToLower(strings.ReplaceAll(name, " ", "-"))
profileUrlResp, err := http.Head(profileUrl)
if err != nil {
log.Errorf("Method Head Failed on profileUrlResp %s with error %s", profileUrlResp, err)
} else if profileUrlResp.StatusCode == 200 { //The url is not valid don't bother adding it to the ActorDetails
sc.ActorDetails[name] = models.ActorDetails{Source: sc.ScraperID + " scrape", ProfileUrl: profileUrl}
}
defer profileUrlResp.Body.Close()
}
}

// Duration
durationText := e.ChildText(`div.video-player-container__info div.tn-video-props span`)
for _, regex := range durationRegexes {
Expand Down

0 comments on commit 7ca0840

Please sign in to comment.