Spaces:
Runtime error
Runtime error
| import snscrape.modules.twitter as sntwitter | |
| import pandas as pd | |
| import datetime as dt | |
| from tqdm import tqdm | |
| import requests | |
| def get_tweets( | |
| query: str, | |
| ) -> list: | |
| """ | |
| Fetches tweets from Twitter based on a given query and returns a list of extracted tweet information. | |
| Args: | |
| query (str): The query to search for tweets on Twitter. | |
| Returns: | |
| A list of extracted tweet information. | |
| """ | |
| print(f"Fetching tweets with query: {query}") | |
| fetched_tweets = sntwitter.TwitterSearchScraper(query).get_items() | |
| return [extract_tweet_info(tweet) for tweet in tqdm(fetched_tweets)] | |
| def get_replies(username: str, conversation_id: str, max_tweets: int) -> list: | |
| """ | |
| Fetches the replies for a given Twitter user and conversation, and returns a list of extracted tweet information. | |
| Args: | |
| username (str): The username of the Twitter user whose replies are to be fetched. | |
| conversation_id (str): The ID of the conversation for which replies are to be fetched. | |
| Returns: | |
| A list of extracted tweet information for the replies. | |
| """ | |
| print( | |
| f"Fetching replies for username {username} and conversation {conversation_id}" | |
| ) | |
| query = f"to:{username} since_id:{conversation_id} filter:safe" | |
| tweets_list = [] | |
| for i, tweet in tqdm(enumerate(sntwitter.TwitterSearchScraper(query).get_items())): | |
| if i > max_tweets: | |
| break | |
| else: | |
| tweets_list.append(extract_tweet_info(tweet)) | |
| return tweets_list | |
| def get_tweet_by_id_and_username(username: str, tweet_id: str): | |
| """ | |
| Fetches a tweet from Twitter based on the given username and tweet ID. | |
| Args: | |
| username (str): The username of the Twitter user who posted the tweet. | |
| tweet_id (str): The ID of the tweet to fetch. | |
| Returns: | |
| The fetched tweet. | |
| """ | |
| tweet_url = f"https://twitter.com/{username}/status/{tweet_id}" | |
| return sntwitter.TwitterSearchScraper(tweet_url).get_items() | |
| def extract_tweet_info(tweet): | |
| """ | |
| Extracts relevant information from a tweet object and returns a dictionary with the extracted values. | |
| Args: | |
| tweet: A tweet object. | |
| Returns: | |
| A dictionary with the extracted tweet information. | |
| """ | |
| return { | |
| "date": tweet.date, | |
| "username": tweet.user.username, | |
| "content": tweet.rawContent, | |
| "retweet_count": tweet.retweetCount, | |
| "tweet_id": tweet.id, | |
| "like_count": tweet.likeCount, | |
| "reply_count": tweet.replyCount, | |
| "in_reply_to_tweet_id": tweet.inReplyToTweetId, | |
| "conversation_id": tweet.conversationId, | |
| "view_count": tweet.viewCount, | |
| } | |
| def get_follower_ids(username: str, limit: int = 20): | |
| """ | |
| Retrieves a list of Twitter IDs for users who follow a given Twitter handle. | |
| Args: | |
| username (str): The Twitter handle to retrieve follower IDs for. | |
| limit (int): The maximum number of follower IDs to retrieve. | |
| Returns: | |
| A list of Twitter user IDs (as strings). | |
| """ | |
| # Construct the search query using snscrape | |
| query = f"from:{username} replies:True" | |
| start_date = dt.date(year=2023, month=3, day=10) | |
| end_date = dt.date(year=2023, month=3, day=22) | |
| query = f"from:{username} since:{start_date} until:{end_date}" | |
| tweets = get_tweets(query=query) | |
| one_tweet = tweets[-1] | |
| one_tweet_id = one_tweet["tweet_id"] | |
| replies = get_replies( | |
| username=username, conversation_id=one_tweet_id, max_tweets=1000 | |
| ) | |
| return one_tweet, replies | |
| def get_twitter_account_info(twitter_handle: str) -> dict: | |
| """ | |
| Extracts the name, username, follower count, and last tweet of a Twitter user using snscrape. | |
| Args: | |
| twitter_handle (str): The Twitter username to retrieve information for. | |
| Returns: | |
| dict: A dictionary containing the name, username, follower count, and last tweet of the Twitter user. | |
| """ | |
| # Create a TwitterUserScraper object | |
| user_scraper = sntwitter.TwitterUserScraper(twitter_handle) | |
| # Get the user's profile information | |
| user_profile = user_scraper.entity | |
| return { | |
| "name": user_profile.displayname, | |
| "username": user_profile.username, | |
| "user_id": user_profile.id, | |
| "follower_count": user_profile.followersCount, | |
| "friends_count": user_profile.friendsCount, | |
| "verified": user_profile.verified, | |
| } | |
| if __name__ == "__main__": | |
| ## Testing extracting tweets from an account | |
| # Set the search variables (dates for when account tweeted. Does not take into account replies) | |
| account = "taylorlorenz" | |
| start_date = dt.date(year=2023, month=2, day=1) | |
| end_date = dt.date(year=2023, month=3, day=11) | |
| # Format the query string | |
| query = f"from:{account} since:{start_date} until:{end_date}" | |
| print(f"query: {query}") | |
| tweets = get_tweets(query=query) | |
| df_tweets = pd.DataFrame(data=tweets) | |
| df_tweets = df_tweets.sort_values("in_reply_to_tweet_id") | |
| # Uncomment to save output | |
| df_tweets.to_csv("df_tweets.csv") | |
| print(df_tweets.head(2)) | |
| print(df_tweets.tail(2)) | |
| print(f"Total Tweets: {len(tweets)}") | |
| ## Testing extracting conversatin threeds from conversation Id | |
| conversation_id = ( | |
| 1620650202305798144 # A tweet from elon musk about turbulent times | |
| ) | |
| max_tweets = 3000 | |
| tweets = get_replies( | |
| username="elonmusk", conversation_id=conversation_id, max_tweets=max_tweets | |
| ) | |
| df_replies = pd.DataFrame(data=tweets) | |
| # Uncomment to save output | |
| # df_replies.to_csv("df_replies.csv") | |
| print( | |
| f"Number of extracted tweets from conversation_id: {conversation_id}, {len(tweets)}" | |
| ) | |