Skip to content

Commit

Permalink
Fix For SexbabesVR Scraper
Browse files Browse the repository at this point in the history
The scene id in the the webpage now seems to be 614 for all scenes. Causing all scenes to be rescraped and never adding new scenes.

This pulls the poster url which appears to have a unique identifier in the 2nd to last directory .

Also updated the cover URL to pull the image used for the thumbnail on the index page. As the latest scene has has a SBS image for the cover where the thumbnail contains a more useful image

All appears functional
  • Loading branch information
pops64 committed Sep 27, 2024
1 parent 402436b commit 28e93ab
Showing 1 changed file with 15 additions and 5 deletions.
20 changes: 15 additions & 5 deletions pkg/scrape/sexbabesvr.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,19 @@ func SexBabesVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out c
sc.Site = siteID
sc.HomepageURL = strings.Split(e.Request.URL.String(), "?")[0]

// Scene ID -
// Scene ID
e.ForEach(`dl8-video`, func(id int, e *colly.HTMLElement) {
sc.SiteID = e.Attr("data-scene")
posterURL := e.Request.AbsoluteURL(e.Attr("poster"))
tmp := strings.Split(posterURL, "/")
sc.SiteID = tmp[len(tmp)-2]
sc.SceneID = slugify.Slugify(sc.Site) + "-" + sc.SiteID
sc.Covers = append(sc.Covers, strings.Replace(e.Attr("poster"), "/videoDetail2x", "", -1))
})

// Cover Url
coverURL := e.Request.Ctx.GetAny("coverURL").(string)
log.Infoln(coverURL)
sc.Covers = append(sc.Covers, coverURL)

// Title
e.ForEach(`div.video-detail__description--container h1`, func(id int, e *colly.HTMLElement) {
sc.Title = strings.TrimSpace(e.Text)
Expand Down Expand Up @@ -105,10 +111,14 @@ func SexBabesVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out c
})

siteCollector.OnHTML(`div.videos__content`, func(e *colly.HTMLElement) {
e.ForEach(`a.video-container__description--title`, func(cnt int, e *colly.HTMLElement) {
e.ForEach(`a.video-container__image`, func(cnt int, e *colly.HTMLElement) {
sceneURL := e.Request.AbsoluteURL(e.Attr("href"))
if !funk.ContainsString(knownScenes, sceneURL) {
sceneCollector.Visit(sceneURL)
coverURL := e.ChildAttr("a.video-container__image img", "data-src")
log.Infoln("Scraped Cover", coverURL)
ctx := colly.NewContext()
ctx.Put("coverURL", coverURL)
sceneCollector.Request("GET", sceneURL, nil, ctx, nil)
}
})
})
Expand Down

0 comments on commit 28e93ab

Please sign in to comment.