From cc3498af87d7df711ee42490cbac01d5831ac4ae Mon Sep 17 00:00:00 2001 From: minamotorin <76122224+minamotorin@users.noreply.github.com> Date: Sun, 2 Jan 2022 15:00:44 +0000 Subject: [PATCH] Fix RefreshTokenException('Could not find the Guest token in HTML') ABOUT THE PROBLEM This problem has recently begun to occur on some environments. This doesn't happen every time, so if you are lucky, you don't get the error. The cause is literally literally that twint could not find the Guest token in HTML. Actually, sometimes token isn't included in HTML recently. #!/usr/bin/env python3 # This program is WTFPL. import requests res = requests.get('https://twitter.com') print(res.text.split('\n')[-1]) twint require the result of running the above code is })();. However, sometimes the result is only })(); and missing the Guest token. ABOUT THE SOLUTION In this patch, twint get the Guest token from https://api.twitter.com/1.1/guest/activate.json if could not find the one. The author referred to the code of gallery-dl: https://github.com/mikf/gallery-dl/blob/47eae4c393f09937a5dbcc2cb978702fb173e747/gallery_dl/extractor/twitter.py#L780-L783 Author's note: > I don't understand session of requests, so the code may be not good. > I hope someone rewrite the patch better and create a pull request. This commit was adopted from: https://github.com/twintproject/twint/issues/1320#issuecomment-1003094346 Closes https://github.com/twintproject/twint/issues/1320. --- twint/token.py | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/twint/token.py b/twint/token.py index ae66a24a..2eedcee4 100644 --- a/twint/token.py +++ b/twint/token.py @@ -65,5 +65,30 @@ def refresh(self): logme.debug('Found guest token in HTML') self.config.Guest_token = str(match.group(1)) else: - self.config.Guest_token = None - raise RefreshTokenException('Could not find the Guest token in HTML') + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:78.0) Gecko/20100101 Firefox/78.0', + 'authority': 'api.twitter.com', + 'content-length': '0', + 'authorization': self.config.Bearer_token, + 'x-twitter-client-language': 'en', + 'x-csrf-token': res.cookies.get("ct0"), + 'x-twitter-active-user': 'yes', + 'content-type': 'application/x-www-form-urlencoded', + 'accept': '*/*', + 'sec-gpc': '1', + 'origin': 'https://twitter.com', + 'sec-fetch-site': 'same-site', + 'sec-fetch-mode': 'cors', + 'sec-fetch-dest': 'empty', + 'referer': 'https://twitter.com/', + 'accept-language': 'en-US', + } + self._session.headers.update(headers) + req = self._session.prepare_request(requests.Request('POST', 'https://api.twitter.com/1.1/guest/activate.json')) + res = self._session.send(req, allow_redirects=True, timeout=self._timeout) + match = re.search(r'{"guest_token":"(\d+)"}', res.text) + if match: + self.config.Guest_token = str(match.group(1)) + else: + self.config.Guest_token = None + raise RefreshTokenException('Could not find the Guest token in HTML')