From 602a7131f794541be338d82c7e854e7dd313bf61 Mon Sep 17 00:00:00 2001 From: smcallah Date: Tue, 7 Nov 2023 14:17:25 -0500 Subject: [PATCH 1/2] Update Tenshigao.yml - Added sceneByName scraper and some cleanup Update Tenshigao.yml - Added sceneByName scraper and some cleanup --- scrapers/Tenshigao.yml | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/scrapers/Tenshigao.yml b/scrapers/Tenshigao.yml index 6fc935be6..265b53fe6 100644 --- a/scrapers/Tenshigao.yml +++ b/scrapers/Tenshigao.yml @@ -9,6 +9,14 @@ performerByURL: url: - tenshigao.com scraper: performerScraper +sceneByName: + action: scrapeXPath + queryURL: https://tenshigao.com/?s={} + scraper: sceneSearch +sceneByQueryFragment: + action: scrapeXPath + queryURL: "{url}" + scraper: sceneScraper xPathScrapers: sceneScraper: common: @@ -105,7 +113,7 @@ xPathScrapers: - regex: None with: Details: - selector: //p[span[@class="readmore"]] + selector: //p[span[@class="readmore"]] | //div[@class="intro"]/p postProcess: - replace: - regex: ... Read More @@ -116,7 +124,7 @@ xPathScrapers: - replace: - regex: .*Piercings:\s*(.*)\s*$ with: $1 - - regex: None|^No$|No Piercings + - regex: None|^No$|No Piercing.* with: Tattoos: selector: $profile[contains(strong, "Tattoo:")]//text() @@ -124,7 +132,7 @@ xPathScrapers: - replace: - regex: .*Tattoo:\s*(.*)\s*$ with: $1 - - regex: None|^No$|No Tattoos + - regex: None|^No$|No Tattoo.* with: HairColor: selector: $profile[contains(strong, "Hair color:")]//text() @@ -149,4 +157,14 @@ xPathScrapers: with: https:// - regex: 160x160 with: 500x500 -# Last Updated October 24, 2023 + sceneSearch: + common: + $videos: //div[@class="thumb"]/a + scene: + Title: $videos/@title + URL: $videos/@href + Image: $videos/img/@src + Studio: + Name: + fixed: Tenshigao +# Last Updated November 07, 2023 From 6b73583f3d2f22d3cc5a65d07166ddf3c52f5853 Mon Sep 17 00:00:00 2001 From: smcallah Date: Tue, 7 Nov 2023 14:19:47 -0500 Subject: [PATCH 2/2] Update AVJiali.yml - Added sceneByName scraper and clean up Update AVJiali.yml - Added sceneByName scraper and clean up --- scrapers/AVJiali.yml | 51 +++++++++++++++++++++++++++++++++----------- 1 file changed, 38 insertions(+), 13 deletions(-) diff --git a/scrapers/AVJiali.yml b/scrapers/AVJiali.yml index 704be50d2..03ff3c85f 100644 --- a/scrapers/AVJiali.yml +++ b/scrapers/AVJiali.yml @@ -9,6 +9,14 @@ performerByURL: url: - avjiali.com scraper: performerScraper +sceneByName: + action: scrapeXPath + queryURL: https://avjiali.com/?s={} + scraper: sceneSearch +sceneByQueryFragment: + action: scrapeXPath + queryURL: "{url}" + scraper: sceneScraper xPathScrapers: sceneScraper: scene: @@ -38,6 +46,7 @@ xPathScrapers: - replace: - regex: ^// with: "https://" + URL: //link[@rel="canonical"]/@href Studio: Name: fixed: AV Jiali @@ -101,17 +110,19 @@ xPathScrapers: with: $1 - regex: None with: - Details: - selector: //*[@id="main"]/div[1]/div[2]/p/text() + Details: + selector: //p[span[@class="readmore"]] + postProcess: + - replace: + - regex: ... Read More + with: Piercings: selector: $profile[contains(strong, "Piercings:")]//text() postProcess: - replace: - regex: .*Piercings:\s*(.*)\s*$ with: $1 - - regex: None - with: - - regex: No Piercings + - regex: None|^No$|No Piercing.* with: Tattoos: selector: $profile[contains(strong, "Tattoo:")]//text() @@ -119,25 +130,39 @@ xPathScrapers: - replace: - regex: .*Tattoo:\s*(.*)\s*$ with: $1 - - regex: None + - regex: None|^No$|No Tattoo.* with: HairColor: selector: $profile[contains(strong, "Hair color:")]//text() postProcess: - replace: - - regex: .*Hair Color:?\s*(.*)\s*$ - with: $1 - - regex: .*Hair color:?\s*(.*)\s*$ + - regex: (?i).*hair color:?\s*(.*)\s*$ with: $1 - regex: None with: URL: //link[@rel="canonical"][1]/@href Image: - selector: //img[@class="rounded"]/@src + selector: //div[@class="model-photo"]/img[@class="rounded"]/@src postProcess: - replace: - regex: ^// with: https:// - - regex: 160x160 - with: 500x500 -# Last Updated October 03, 2023 + - regex: 460x640 + with: 690x960 + Ethnicity: + fixed: Asian + Gender: + fixed: Female + Country: + fixed: Taiwan + sceneSearch: + common: + $videos: //div[@class="thumb"]/a + scene: + Title: $videos/@title + URL: $videos/@href + Image: $videos/img/@src + Studio: + Name: + fixed: AVJiali +# Last Updated November 07, 2023