Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix search #14

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions twint/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ def initialize(args):
c.Media = args.media
c.Replies = args.replies
c.Pandas_clean = args.pandas_clean
c.Auth_token = args.auth_token
c.Proxy_host = args.proxy_host
c.Proxy_port = args.proxy_port
c.Proxy_type = args.proxy_type
Expand Down Expand Up @@ -190,6 +191,7 @@ def options():
ap.add_argument("--followers", help="Scrape a person's followers.", action="store_true")
ap.add_argument("--following", help="Scrape a person's follows", action="store_true")
ap.add_argument("--favorites", help="Scrape Tweets a user has liked.", action="store_true")
ap.add_argument("--auth-token", help="Twitter login cookie.")
ap.add_argument("--proxy-type", help="Socks5, HTTP, etc.")
ap.add_argument("--proxy-host", help="Proxy hostname or IP.")
ap.add_argument("--proxy-port", help="The port of the proxy server.")
Expand Down
3 changes: 3 additions & 0 deletions twint/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ class Config:
Favorites: bool = False
TwitterSearch: bool = False
User_full: bool = False
Utc: bool = False
Full_text: bool = False
# Profile_full: bool = False
Store_object: bool = False
Store_object_tweets_list: list = None
Expand All @@ -58,6 +60,7 @@ class Config:
Pandas_clean: bool = True
Lowercase: bool = True
Pandas_au: bool = True
Auth_token: Optional[str] = None
Proxy_host: str = ""
Proxy_port: int = 0
Proxy_type: object = None
Expand Down
25 changes: 21 additions & 4 deletions twint/get.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from async_timeout import timeout
from datetime import datetime
from bs4 import BeautifulSoup
import os
import sys
import socket
import aiohttp
Expand Down Expand Up @@ -108,11 +109,25 @@ def get_connector(config):

async def RequestUrl(config, init):
logme.debug(__name__ + ':RequestUrl')
csrf_token = random.randbytes(16).hex() # Looks like any random string works
Copy link
Author

@9ary 9ary May 9, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a note, randbytes is Python 3.9+ (late 2020). No idea if you want to support older versions, in that case it can be made to work, I guess hardcoding a fixed string might even be feasible.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Personally, I'd like to run the code on Python 3.6. Seems as if there would be enough options available though ...

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is definitely feasible.
It's just that Python 3.6 is already 7 years old, and already EOL. 3.7 is going to be EOL'd in 6 weeks according to https://devguide.python.org/versions/. Debian Bullseye (current stable) ships 3.9, and I usually find that to be a reasonable reference to set the cutoff point.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@LinqLover Python 3.6 has already reached end of support, and Python 3.7 reaches end of support on 2023-06-27 (1 month 17 days away). TWINT should not care about Python versions that have reached end of support.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fair point 👍

_connector = get_connector(config)
_serialQuery = ""
params = []
cookies = {
"ct0": csrf_token,
}
_url = ""
_headers = [("authorization", config.Bearer_token), ("x-guest-token", config.Guest_token)]
_headers = {
"authorization": config.Bearer_token,
"x-guest-token": config.Guest_token,
"x-csrf-token": csrf_token,
}

auth_token = config.Auth_token
if auth_token is None:
auth_token = os.getenv('TWITTER_AUTH_TOKEN')
if auth_token:
cookies["auth_token"] = auth_token

# TODO : do this later
if config.Profile:
Expand All @@ -133,7 +148,7 @@ async def RequestUrl(config, init):
_url = await url.Favorites(config.Username, init)
_serialQuery = _url

response = await Request(_url, params=params, connector=_connector, headers=_headers)
response = await Request(_url, params=params, connector=_connector, headers=_headers, cookies=cookies)

if config.Debug:
print(_serialQuery, file=open("twint-request_urls.log", "a", encoding="utf-8"))
Expand All @@ -156,9 +171,9 @@ def ForceNewTorIdentity(config):
sys.stderr.write('If you want to rotate Tor ports automatically - enable Tor control port\n')


async def Request(_url, connector=None, params=None, headers=None):
async def Request(_url, connector=None, params=None, headers=None, cookies=None):
logme.debug(__name__ + ':Request:Connector')
async with aiohttp.ClientSession(connector=connector, headers=headers) as session:
async with aiohttp.ClientSession(connector=connector, headers=headers, cookies=cookies) as session:
return await Response(session, _url, params)


Expand All @@ -173,6 +188,8 @@ async def Response(session, _url, params=None):
resp = await response.text()
if response.status == 429: # 429 implies Too many requests i.e. Rate Limit Exceeded
raise TokenExpiryException(loads(resp)['errors'][0]['message'])
if response.status == 403:
raise ConnectionError("Access forbidden, try passing --auth-token.")
return resp
except aiohttp.client_exceptions.ClientConnectorError as exc:
if attempt < retries:
Expand Down
3 changes: 1 addition & 2 deletions twint/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,7 @@ async def Feed(self):
if len(self.feed) == 0 and len(self.init) == 0:
while (len(self.feed) == 0 or len(self.init) == 0) and favorite_err_cnt < 5:
self.user_agent = await get.RandomUserAgent(wa=False)
response = await get.RequestUrl(self.config, self.init,
headers=[("User-Agent", self.user_agent)])
response = await get.RequestUrl(self.config, self.init)
self.feed, self.init = feed.MobileFav(response)
favorite_err_cnt += 1
time.sleep(1)
Expand Down
5 changes: 3 additions & 2 deletions twint/storage/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ def init(db):
tweet_id integer not null,
user_id integer not null,
username text not null,
name text not null,
CONSTRAINT replies_pk PRIMARY KEY (user_id, tweet_id),
CONSTRAINT tweet_id_fk FOREIGN KEY (tweet_id) REFERENCES tweets(id)
);
Expand Down Expand Up @@ -289,8 +290,8 @@ def tweets(conn, Tweet, config):

if Tweet.reply_to:
for reply in Tweet.reply_to:
query = 'INSERT INTO replies VALUES(?,?,?)'
cursor.execute(query, (Tweet.id, int(reply['user_id']), reply['username']))
query = 'INSERT INTO replies VALUES(?,?,?,?)'
cursor.execute(query, (Tweet.id, int(reply['id']), reply['screen_name'], reply['name']))

conn.commit()
except sqlite3.IntegrityError:
Expand Down