Skip to content

Commit

Permalink
Merge pull request #1533 from smcallah/patch-1
Browse files Browse the repository at this point in the history
Updated AVJiali.yml and Tenshigao.yml with sceneByName scrapers and cleaned up the code
  • Loading branch information
Maista6969 authored Nov 7, 2023
2 parents beeaadd + 6b73583 commit 50a4f64
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 17 deletions.
51 changes: 38 additions & 13 deletions scrapers/AVJiali.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,14 @@ performerByURL:
url:
- avjiali.com
scraper: performerScraper
sceneByName:
action: scrapeXPath
queryURL: https://avjiali.com/?s={}
scraper: sceneSearch
sceneByQueryFragment:
action: scrapeXPath
queryURL: "{url}"
scraper: sceneScraper
xPathScrapers:
sceneScraper:
scene:
Expand Down Expand Up @@ -38,6 +46,7 @@ xPathScrapers:
- replace:
- regex: ^//
with: "https://"
URL: //link[@rel="canonical"]/@href
Studio:
Name:
fixed: AV Jiali
Expand Down Expand Up @@ -101,43 +110,59 @@ xPathScrapers:
with: $1
- regex: None
with:
Details:
selector: //*[@id="main"]/div[1]/div[2]/p/text()
Details:
selector: //p[span[@class="readmore"]]
postProcess:
- replace:
- regex: ... Read More
with:
Piercings:
selector: $profile[contains(strong, "Piercings:")]//text()
postProcess:
- replace:
- regex: .*Piercings:\s*(.*)\s*$
with: $1
- regex: None
with:
- regex: No Piercings
- regex: None|^No$|No Piercing.*
with:
Tattoos:
selector: $profile[contains(strong, "Tattoo:")]//text()
postProcess:
- replace:
- regex: .*Tattoo:\s*(.*)\s*$
with: $1
- regex: None
- regex: None|^No$|No Tattoo.*
with:
HairColor:
selector: $profile[contains(strong, "Hair color:")]//text()
postProcess:
- replace:
- regex: .*Hair Color:?\s*(.*)\s*$
with: $1
- regex: .*Hair color:?\s*(.*)\s*$
- regex: (?i).*hair color:?\s*(.*)\s*$
with: $1
- regex: None
with:
URL: //link[@rel="canonical"][1]/@href
Image:
selector: //img[@class="rounded"]/@src
selector: //div[@class="model-photo"]/img[@class="rounded"]/@src
postProcess:
- replace:
- regex: ^//
with: https://
- regex: 160x160
with: 500x500
# Last Updated October 03, 2023
- regex: 460x640
with: 690x960
Ethnicity:
fixed: Asian
Gender:
fixed: Female
Country:
fixed: Taiwan
sceneSearch:
common:
$videos: //div[@class="thumb"]/a
scene:
Title: $videos/@title
URL: $videos/@href
Image: $videos/img/@src
Studio:
Name:
fixed: AVJiali
# Last Updated November 07, 2023
26 changes: 22 additions & 4 deletions scrapers/Tenshigao.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,14 @@ performerByURL:
url:
- tenshigao.com
scraper: performerScraper
sceneByName:
action: scrapeXPath
queryURL: https://tenshigao.com/?s={}
scraper: sceneSearch
sceneByQueryFragment:
action: scrapeXPath
queryURL: "{url}"
scraper: sceneScraper
xPathScrapers:
sceneScraper:
common:
Expand Down Expand Up @@ -105,7 +113,7 @@ xPathScrapers:
- regex: None
with:
Details:
selector: //p[span[@class="readmore"]]
selector: //p[span[@class="readmore"]] | //div[@class="intro"]/p
postProcess:
- replace:
- regex: ... Read More
Expand All @@ -116,15 +124,15 @@ xPathScrapers:
- replace:
- regex: .*Piercings:\s*(.*)\s*$
with: $1
- regex: None|^No$|No Piercings
- regex: None|^No$|No Piercing.*
with:
Tattoos:
selector: $profile[contains(strong, "Tattoo:")]//text()
postProcess:
- replace:
- regex: .*Tattoo:\s*(.*)\s*$
with: $1
- regex: None|^No$|No Tattoos
- regex: None|^No$|No Tattoo.*
with:
HairColor:
selector: $profile[contains(strong, "Hair color:")]//text()
Expand All @@ -149,4 +157,14 @@ xPathScrapers:
with: https://
- regex: 160x160
with: 500x500
# Last Updated October 24, 2023
sceneSearch:
common:
$videos: //div[@class="thumb"]/a
scene:
Title: $videos/@title
URL: $videos/@href
Image: $videos/img/@src
Studio:
Name:
fixed: Tenshigao
# Last Updated November 07, 2023

0 comments on commit 50a4f64

Please sign in to comment.