Skip to content

Commit

Permalink
[instagram] add 'cursor' option (#1149)
Browse files Browse the repository at this point in the history
To enable at least 'some' way to continue downloading from the middle
of a user profile listing.
  • Loading branch information
mikf committed Dec 11, 2020
1 parent 0d406c8 commit b88c97b
Showing 1 changed file with 30 additions and 10 deletions.
40 changes: 30 additions & 10 deletions gallery_dl/extractor/instagram.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ def __init__(self, match):
self.www_claim = "0"
self.csrf_token = util.generate_csrf_token()
self._find_tags = re.compile(r"#\w+").findall
self._cursor = None

def items(self):
self.login()
Expand Down Expand Up @@ -68,6 +69,9 @@ def posts(self):
def request(self, url, **kwargs):
response = Extractor.request(self, url, **kwargs)
if response.history and "/accounts/login/" in response.request.url:
if self._cursor:
self.log.info("Use '-o cursor=%s' to continue downloading "
"from the current position", self._cursor)
raise exception.StopExtraction(
"Redirected to login page (%s)", response.request.url)
www_claim = response.headers.get("x-ig-set-www-claim")
Expand Down Expand Up @@ -314,6 +318,18 @@ def _extract_post_page(self, url):
raise exception.NotFoundError("post")
return data["PostPage"][0]["graphql"]["shortcode_media"]

def _get_edge_data(self, user, key):
cursor = self.config("cursor")
if cursor:
return {
"edges": (),
"page_info": {
"end_cursor": cursor,
"has_next_page": True,
},
}
return user[key]

def _pagination(self, query_hash, variables, data):
while True:
for edge in data["edges"]:
Expand All @@ -322,7 +338,9 @@ def _pagination(self, query_hash, variables, data):
info = data["page_info"]
if not info["has_next_page"]:
return
variables["after"] = info["end_cursor"]

variables["after"] = self._cursor = info["end_cursor"]
self.log.debug("Cursor: %s", self._cursor)
data = next(iter(self._graphql_request(
query_hash, variables)["user"].values()))

Expand Down Expand Up @@ -354,7 +372,6 @@ def __init__(self, match):
def posts(self):
url = "{}/{}/".format(self.root, self.user)
user = self._extract_profile_page(url)
edge = user["edge_owner_to_timeline_media"]

if user.get("highlight_reel_count") and self.config("highlights"):
query_hash = "d4d88dc1500312af6f937f7b804c68c3"
Expand All @@ -379,7 +396,8 @@ def posts(self):
highlights = None

query_hash = "003056d32c2554def87228bc3fd9668a"
variables = {"id": user["id"], "first": 12}
variables = {"id": user["id"], "first": 50}
edge = self._get_edge_data(user, "edge_owner_to_timeline_media")
posts = self._pagination(query_hash, variables, edge)

return itertools.chain(highlights, posts) if highlights else posts
Expand All @@ -403,10 +421,10 @@ def __init__(self, match):
def posts(self):
url = "{}/{}/channel/".format(self.root, self.user)
user = self._extract_profile_page(url)
edge = user["edge_felix_video_timeline"]

query_hash = "bc78b344a68ed16dd5d7f264681c4c76"
variables = {"id": user["id"], "first": 12}
variables = {"id": user["id"], "first": 50}
edge = self._get_edge_data(user, "edge_felix_video_timeline")
return self._pagination(query_hash, variables, edge)


Expand All @@ -425,10 +443,10 @@ def __init__(self, match):
def posts(self):
url = "{}/{}/saved/".format(self.root, self.user)
user = self._extract_profile_page(url)
edge = user["edge_saved_media"]

query_hash = "2ce1d673055b99250e93b6f88f878fde"
variables = {"id": user["id"], "first": 12}
variables = {"id": user["id"], "first": 50}
edge = self._get_edge_data(user, "edge_saved_media")
return self._pagination(query_hash, variables, edge)


Expand All @@ -454,10 +472,10 @@ def posts(self):
url = "{}/explore/tags/{}/".format(self.root, self.tag)
data = self._extract_shared_data(url)
hashtag = data["entry_data"]["TagPage"][0]["graphql"]["hashtag"]
edge = hashtag["edge_hashtag_to_media"]

query_hash = "9b498c08113f1e09617a1703c22b2f32"
variables = {"tag_name": hashtag["name"], "first": 12}
variables = {"tag_name": hashtag["name"], "first": 50}
edge = self._get_edge_data(hashtag, "edge_hashtag_to_media")
return self._pagination(query_hash, variables, edge)

def _pagination(self, query_hash, variables, data):
Expand All @@ -468,7 +486,9 @@ def _pagination(self, query_hash, variables, data):
info = data["page_info"]
if not info["has_next_page"]:
return
variables["after"] = info["end_cursor"]

variables["after"] = self._cursor = info["end_cursor"]
self.log.debug("Cursor: %s", self._cursor)
data = self._graphql_request(
query_hash, variables)["hashtag"]["edge_hashtag_to_media"]

Expand Down

0 comments on commit b88c97b

Please sign in to comment.