Skip to content

Commit

Permalink
Merge pull request #1389 from nrg101/add-galleryByFragment-to-Adulttime
Browse files Browse the repository at this point in the history
script, improvement: add galleryByFragment to Algolia.py / Algolia_Adulttime.yml
  • Loading branch information
Maista6969 authored Jul 27, 2023
2 parents 0e88eca + ef449ba commit f6dad36
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 22 deletions.
5 changes: 3 additions & 2 deletions SCRAPERS-LIST.md
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ asianamericantgirls.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans
asianfever.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|-
asiansexdiary.com|VegasDreamsLLC.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
asiantgirl.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans
asmrfantasy.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|-
assholefever.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|-
assmeat.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|-
assteenmouth.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
Expand Down Expand Up @@ -961,7 +962,7 @@ onlyblowjob.com|DDFNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
onlygolddigger.com|Only3xGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
onlyprince.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
onlyteenblowjobs.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
oopsie.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|-
oopsie.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|-
openlife.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
oraloverdose.com|PervCity.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
oreno3d.com|Oreno3d.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
Expand Down Expand Up @@ -1352,7 +1353,7 @@ transerotica.com|Transerotica.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans
transexdomination.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans
transexpov.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans
transfeet.com|FFCSH.yml|:heavy_check_mark:|:x:|:x:|:x:|-|-
transfixed.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|Trans
transfixed.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Trans
transgasm.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans
transgressivefilms.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|Trans
transgressivexxx.com|Algolia_EvilAngel|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|Trans
Expand Down
59 changes: 40 additions & 19 deletions scrapers/Algolia.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,8 +254,10 @@ def write_config(date, app_id, api_key):
# API Search Data
def api_search_req(type_search, query, url):
api_request = None
if type_search == "query":
api_request = api_search_query(query, url)
if type_search == "query_all_scenes":
api_request = api_search_query("all_scenes", query, url)
if type_search == "query_all_photosets":
api_request = api_search_query("all_photosets", query, url)
if type_search == "id":
api_request = api_search_id(query, url)
if api_request:
Expand Down Expand Up @@ -304,15 +306,15 @@ def api_search_gallery_id(p_id, url):
return req


def api_search_query(query, url):
def api_search_query(index_name, query, url):
request_api = {
"requests": [{
"indexName": "all_scenes",
"indexName": index_name,
"params": "query=" + query + "&hitsPerPage=40&page=0"
}]
}
req = send_request(url, HEADERS, request_api)
return req
res = send_request(url, HEADERS, request_api)
return res


# Searching Result
Expand Down Expand Up @@ -814,6 +816,8 @@ def parse_gallery_json(gallery_json: dict, url: str = None) -> dict:
SCENE_TITLE = FRAGMENT.get("title")
SCENE_URL = FRAGMENT.get("url")

# log.trace(f"fragment: {FRAGMENT}")

# ACCESS API
# Check existing API keys
CURRENT_TIME = datetime.datetime.now()
Expand Down Expand Up @@ -910,7 +914,7 @@ def parse_gallery_json(gallery_json: dict, url: str = None) -> dict:
if SEARCH_TITLE:
SEARCH_TITLE = SEARCH_TITLE.replace(".", " ")
log.debug(f"[API] Searching for: {SEARCH_TITLE}")
api_search = api_search_req("query", SEARCH_TITLE, api_url)
api_search = api_search_req("query_all_scenes", SEARCH_TITLE, api_url)
final_json = None
if api_search:
result_search = []
Expand All @@ -936,13 +940,13 @@ def parse_gallery_json(gallery_json: dict, url: str = None) -> dict:
log.warning("[API] No result")
if url_title and api_json is None:
log.debug("[API] Searching using URL_TITLE")
api_search = api_search_req("query", url_title, api_url)
api_search = api_search_req("query_all_scenes", url_title, api_url)
if api_search:
log.info(f"[API] Search gives {len(api_search)} result(s)")
api_json = json_parser(api_search)
if SCENE_TITLE and api_json is None:
log.debug("[API] Searching using STASH_TITLE")
api_search = api_search_req("query", SCENE_TITLE, api_url)
api_search = api_search_req("query_all_scenes", SCENE_TITLE, api_url)
if api_search:
log.info(f"[API] Search gives {len(api_search)} result(s)")
api_json = json_parser(api_search)
Expand All @@ -966,13 +970,30 @@ def parse_gallery_json(gallery_json: dict, url: str = None) -> dict:
#log.debug(scraped_movie)
print(json.dumps(scraped_movie))
elif "gallery" in sys.argv:
log.debug("Scraping gallery")
gallery_id = get_id_from_url(SCENE_URL)
if gallery_id:
gallery_results = api_search_gallery_id(gallery_id, api_url)
gallery = gallery_results.json()["results"][0].get("hits")
if gallery:
#log.debug(gallery[0])
scraped_gallery = parse_gallery_json(gallery[0])
#log.debug(scraped_gallery)
print(json.dumps(scraped_gallery))
scraped_gallery = None
if SCENE_URL:
log.debug("Scraping gallery by URL")
gallery_id = get_id_from_url(SCENE_URL)
if gallery_id:
gallery_results = api_search_gallery_id(gallery_id, api_url)
gallery = gallery_results.json()["results"][0].get("hits")
if gallery:
#log.debug(gallery[0])
scraped_gallery = parse_gallery_json(gallery[0])
#log.debug(scraped_gallery)
elif SCENE_TITLE:
log.debug("Scraping gallery by fragment")
# log.debug(f"[API] Searching using SCENE_TITLE: {SCENE_TITLE}")
api_search = api_search_req("query_all_photosets", SCENE_TITLE, api_url)
if api_search:
log.info(f"[API] Search gives {len(api_search)} result(s)")
# log.trace(f"api_search: {api_search}")
log.debug(f"Galleries found: {'; '.join([g['title'] for g in api_search])}")
scraped_gallery = parse_gallery_json(api_search[0])
# Scraping the JSON
if scraped_gallery:
print(json.dumps(scraped_gallery))
else:
log.error("Can't find the gallery")
print(json.dumps({}))
sys.exit()
10 changes: 9 additions & 1 deletion scrapers/Algolia_Adultime.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ sceneByURL:
- adulttimepilots.com/en/video/
- agentredgirl.com/en/video/
- analteenangels.com/en/video/
- asmrfantasy.com/en/video/
- assholefever.com/en/video/
- beingtrans247.com/en/video/
- blowmepov.com/en/video/
Expand Down Expand Up @@ -76,6 +77,13 @@ sceneByQueryFragment:
- Algolia.py
- girlsway
- validName
galleryByFragment:
action: script
script:
- python
- Algolia.py
- puretaboo
- gallery
galleryByURL:
- action: script
url:
Expand Down Expand Up @@ -104,4 +112,4 @@ movieByURL:
- Algolia.py
- puretaboo
- movie
# Last Updated May 25, 2023
# Last Updated July 18, 2023

0 comments on commit f6dad36

Please sign in to comment.