I am attempting to create a Pandas dataframe that looks like:
| user_name | followers | following | retweets | likes |  tweet date |     tweet    |
|:---------:|:---------:|:---------:|:--------:|:-----:|:-----------:|:------------:|
|   user1   |     50    |    100    |    25    |   10  |  Oct-1-2019 | lorem ipsum… |
|   user1   |     50    |    100    |    25    |   10  |  Oct-6-2019 | lorem ipsum… |
|   user1   |     50    |    100    |    25    |   10  | Oct-19-2019 | lorem ipsum… |
|   user1   |     50    |    100    |    25    |   10  |  Oct-4-2019 | lorem ipsum… |
|   user1   |     50    |    100    |    25    |   10  | Oct-16-2019 | lorem ipsum… |
|   user2   |    321    |   12151   |   2017   |   0   | Sep-12-2018 | lorem ipsum… |
|   user2   |    321    |   12151   |   2017   |   0   | Sep-15-2018 | lorem ipsum… |
|   user2   |    321    |   12151   |   2017   |   0   | Sep-17-2018 | lorem ipsum… |
|   user2   |    321    |   12151   |   2017   |   0   | Sep-17-2018 | lorem ipsum… |
|   user2   |    321    |   12151   |   2017   |   0   | Sep-17-2019 | lorem ipsum… |
|   user3   |    122    |    124    |    11    | 38337 |  Nov-1-2019 |    foobar    |
(The values here are arbitrary)
What I am trying to do is starting with a Twitter profile, to then scrape through the followers of that profile and extract the following features about that profile:
{username (@), follower count, following count, # of retweets, # of likes}
I am using Tweepy to try and accomplish this.
So far, my current codes can grab followers, but it prints out the _json for the follower, and not the proper details I am looking for.
import tweepy
import time
#insert your Twitter keys here
consumer_key =''
consumer_secret=''
access_token=''
access_token_secret=''
#twitter_handle='TimBarbalace'
auth = tweepy.auth.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify = True)
users = []
if(api.verify_credentials):
    print("Logged In Successfully")
else:
    print("Error -- Could not log in with your credentials")
followers = tweepy.Cursor(api.followers).items()
i = 99
curr = 0
for follower in followers:
    if curr < i:
        print(follower)
        curr += 1
    else:
        exit()
And here is an example of the JSON
User(_api=<tweepy.api.API object at 0x0000028E4D3C8F60>, _json={'id': 1898321922, 'id_str': '1898321922', 'name': 'Creator Support', 'screen_name': 'GamerGrowthHQ', 'location': 'Global', 'description': 'Supporting Creators through advice, shout-outs, and daily support. Managed by @adron_foe', 'url': 'https://www.twitch.tv/adron_foe', 'entities': {'url': {'urls': [{'url': 'https://www.twitch.tv/adron_foe', 'expanded_url': 'https://twitch.tv/adron_foe', 'display_url': 'twitch.tv/adron_foe', 'indices': [0, 23]}]}, 'description': {'urls': []}}, 'protected': False, 'followers_count': 130539, 'friends_count': 73691, 'listed_count': 157, 'created_at': 'Mon Sep 23 20:37:10 +0000 2013', 'favourites_count': 2001, 'utc_offset': None, 'time_zone': None, 'geo_enabled': False, 'verified': False, 'statuses_count': 1540, 'lang': None, 'status': {'created_at': 'Sun Sep 29
23:49:54 +0000 2019', 'id': 1178456902491131909, 'id_str': '1178456902491131909', 'text': 'RT @zFakes_: Looking for an editor to make My first twitch emote', 'truncated': False, 'entities': {'hashtags': [], 'symbols': [], 'user_mentions': [], 'urls': []}, 'source': '<a href="http://twitter.com/download/iphone" rel="nofollow">Twitter for iPhone</a>', 'in_reply_to_status_id': None, 'in_reply_to_status_id_str': None, 'in_reply_to_user_id': None, 'in_reply_to_user_id_str': None, 'in_reply_to_screen_name': None, 'geo': None, 'coordinates': None, 'place': None, 'contributors': None, 'retweeted_status': {'created_at': 'Sun Sep 29 10:36:55 +0000 2019', 'id': 1178257339499110401, 'id_str': '1178257339499110401', 'text': 'Looking for an editor to make My first twitch emote', 'truncated': False, 'entities': {'hashtags': [], 'symbols': [], 'user_mentions': [], 'urls': []}, 'source': '<a href="http://twitter.com/download/android" rel="nofollow">Twitter for Android</a>', 'in_reply_to_status_id': None, 'in_reply_to_status_id_str': None, 'in_reply_to_user_id': None, 'in_reply_to_user_id_str': None, 'in_reply_to_screen_name': None, 'geo': None, 'coordinates': None, 'place': None, 'contributors': None, 'is_quote_status': False, 'retweet_count': 1, 'favorite_count': 23, 'favorited': False, 'retweeted': False, 'lang': 'en'}, 'is_quote_status': False, 'retweet_count': 1, 'favorite_count': 0, 'favorited': False, 'retweeted': False, 'lang': 'en'}, 'contributors_enabled': False, 'is_translator': False, 'is_translation_enabled': False, 'profile_background_color': '000000', 'profile_background_image_url': 'http://abs.twimg.com/images/themes/theme12/bg.gif', 'profile_background_image_url_https': 'https://abs.twimg.com/images/themes/theme12/bg.gif', 'profile_background_tile': False, 'profile_image_url': 'http://pbs.twimg.com/profile_images/1120067816118521856/PxOWQ_Qe_normal.png', 'profile_image_url_https': 'https://pbs.twimg.com/profile_images/1120067816118521856/PxOWQ_Qe_normal.png', 'profile_banner_url': 'https://pbs.twimg.com/profile_banners/1898321922/1554732991', 'profile_link_color': '1B95E0', 'profile_sidebar_border_color': '000000', 'profile_sidebar_fill_color': '000000', 'profile_text_color': '000000', 'profile_use_background_image': False, 'has_extended_profile': False, 'default_profile': False, 'default_profile_image': False, 'can_media_tag': True, 'followed_by': True, 'following': False, 'live_following': False, 'follow_request_sent': False, 'notifications': False, 'muting': False, 'blocking': False, 'blocked_by': False, 'translator_type': 'none'}, id=1898321922, id_str='1898321922', name='Creator Support', screen_name='GamerGrowthHQ', location='Global', description='Supporting Creators through advice, shout-outs, and daily support. Managed by @adron_foe', url='https://www.twitch.tv/adron_foe', entities={'url':
{'urls': [{'url': 'https://www.twitch.tv/adron_foe', 'expanded_url': 'https://twitch.tv/adron_foe', 'display_url': 'twitch.tv/adron_foe', 'indices': [0, 23]}]}, 'description': {'urls': []}}, protected=False, followers_count=130539, friends_count=73691, listed_count=157, created_at=datetime.datetime(2013, 9, 23, 20, 37, 10), favourites_count=2001, utc_offset=None, time_zone=None, geo_enabled=False, verified=False, statuses_count=1540, lang=None, status=Status(_api=<tweepy.api.API object at 0x0000028E4D3C8F60>, _json={'created_at': 'Sun Sep 29 23:49:54 +0000 2019', 'id':
1178456902491131909, 'id_str': '1178456902491131909', 'text': 'RT @zFakes_: Looking for an editor to make My first twitch emote', 'truncated': False, 'entities': {'hashtags': [], 'symbols': [], 'user_mentions': [], 'urls': []}, 'source': '<a href="http://twitter.com/download/iphone"
rel="nofollow">Twitter for iPhone</a>', 'in_reply_to_status_id': None, 'in_reply_to_status_id_str': None, 'in_reply_to_user_id': None, 'in_reply_to_user_id_str': None, 'in_reply_to_screen_name': None, 'geo': None, 'coordinates': None, 'place': None, 'contributors': None, 'retweeted_status': {'created_at': 'Sun Sep 29 10:36:55 +0000 2019', 'id': 1178257339499110401, 'id_str': '1178257339499110401', 'text': 'Looking for an editor to make My first twitch emote', 'truncated': False, 'entities': {'hashtags': [], 'symbols': [], 'user_mentions': [], 'urls': []}, 'source': '<a href="http://twitter.com/download/android" rel="nofollow">Twitter for Android</a>', 'in_reply_to_status_id': None, 'in_reply_to_status_id_str': None, 'in_reply_to_user_id': None, 'in_reply_to_user_id_str': None, 'in_reply_to_screen_name': None, 'geo': None, 'coordinates': None, 'place': None, 'contributors': None, 'is_quote_status': False, 'retweet_count': 1, 'favorite_count': 23, 'favorited': False, 'retweeted': False, 'lang': 'en'}, 'is_quote_status': False, 'retweet_count': 1, 'favorite_count': 0, 'favorited': False, 'retweeted': False, 'lang': 'en'}, created_at=datetime.datetime(2019, 9, 29, 23, 49, 54), id=1178456902491131909, id_str='1178456902491131909', text='RT @zFakes_: Looking for an editor to make My first twitch emote', truncated=False, entities={'hashtags': [], 'symbols': [], 'user_mentions': [], 'urls': []}, source='Twitter for iPhone', source_url='http://twitter.com/download/iphone', in_reply_to_status_id=None, in_reply_to_status_id_str=None, in_reply_to_user_id=None, in_reply_to_user_id_str=None, in_reply_to_screen_name=None, geo=None, coordinates=None, place=None, contributors=None, retweeted_status=Status(_api=<tweepy.api.API object at 0x0000028E4D3C8F60>, _json={'created_at': 'Sun Sep 29 10:36:55 +0000 2019', 'id': 1178257339499110401, 'id_str': '1178257339499110401', 'text': 'Looking for an editor to make My first twitch emote', 'truncated': False, 'entities': {'hashtags': [], 'symbols': [], 'user_mentions': [], 'urls': []}, 'source': '<a href="http://twitter.com/download/android" rel="nofollow">Twitter for
Android</a>', 'in_reply_to_status_id': None, 'in_reply_to_status_id_str': None, 'in_reply_to_user_id': None, 'in_reply_to_user_id_str': None,
'in_reply_to_screen_name': None, 'geo': None, 'coordinates': None, 'place': None, 'contributors': None, 'is_quote_status': False, 'retweet_count': 1, 'favorite_count': 23, 'favorited': False, 'retweeted': False, 'lang': 'en'}, created_at=datetime.datetime(2019, 9, 29, 10, 36, 55), id=1178257339499110401, id_str='1178257339499110401', text='Looking for an editor to make My first twitch emote', truncated=False, entities={'hashtags': [], 'symbols': [], 'user_mentions': [], 'urls': []}, source='Twitter for Android', source_url='http://twitter.com/download/android',
in_reply_to_status_id=None, in_reply_to_status_id_str=None, in_reply_to_user_id=None, in_reply_to_user_id_str=None, in_reply_to_screen_name=None, geo=None, coordinates=None, place=None, contributors=None, is_quote_status=False, retweet_count=1, favorite_count=23, favorited=False, retweeted=False, lang='en'), is_quote_status=False, retweet_count=1, favorite_count=0, favorited=False, retweeted=False, lang='en'), contributors_enabled=False, is_translator=False, is_translation_enabled=False, profile_background_color='000000', profile_background_image_url='http://abs.twimg.com/images/themes/theme12/bg.gif', profile_background_image_url_https='https://abs.twimg.com/images/themes/theme12/bg.gif', profile_background_tile=False, profile_image_url='http://pbs.twimg.com/profile_images/1120067816118521856/PxOWQ_Qe_normal.png', profile_image_url_https='https://pbs.twimg.com/profile_images/1120067816118521856/PxOWQ_Qe_normal.png', profile_banner_url='https://pbs.twimg.com/profile_banners/1898321922/1554732991', profile_link_color='1B95E0', profile_sidebar_border_color='000000', profile_sidebar_fill_color='000000', profile_text_color='000000', profile_use_background_image=False, has_extended_profile=False, default_profile=False, default_profile_image=False, can_media_tag=True, followed_by=True, following=False, live_following=False, follow_request_sent=False, notifications=False, muting=False, blocking=False, blocked_by=False, translator_type='none')
I am trying to find a repeatable method that allows me to:
Take 200 followers from the signed in Twitter account, parse their account details (including tweets), and create a (large) Python Pandas dataframe object containing the mentioned details.
I have tried this link and this link, but I have not understood how to properly implement them to accomplish what I am looking for.
Another example is me being able to access the location of a user account, with the following:
import tweepy
import time
#insert your Twitter keys here
consumer_key =''
consumer_secret=''
access_token=''
access_token_secret=''
#twitter_handle='TimBarbalace'
auth = tweepy.auth.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify = True)
users = []
if(api.verify_credentials):
    print("Logged In Successfully")
else:
    print("Error -- Could not log in with your credentials")
followers = tweepy.Cursor(api.followers).items()
i = 99
curr = 0
for follower in followers:
    if curr < i:
        print(follower.screen_name, follower.location)
        curr += 1
    else:
        exit()
Results in:
crzyazn888 Washington, DC
narutouz16
GamerGrowthHQ Global
pleasantemma Hell, Pennsylvania
karadise_art in a galaxy far, far away
webdivaloper
Maurer_Ranger The Internet
megliebsch Philadelphia, Pennyslvania
hoang_le_96 Philadelphia, PA
lasallephilo Philadelphia, PA
brianmaxwell33
BobbyJPolitics Philadelphia, PA
_nadcas
JPower96IsTaken
crypticsmystic
ZacharyFlair Washington, DC
thegierczaks1
KFlahertyRN
cbars68
kaitlyndmcd Philadelphia, PA
illMELt_withyou
jesskidding07
BetaRayJohn
tew_dedicatesd Baltimore, MD
hbthen3rd Redmond, WA
g_laubenstein Philadelphia, PA
tewsaucey
leahgarloff Philadelphia, PA
TheCage52
softballkenz13
zyocard
josephsilvestr5 Mays Chapel, MD
jerry_ooooo
karadevanney Point Place, Wisconsin
omgitsfranipher New Jersey, USA
PaigeBuckworth
LSU_studyabroad
jcaskerr
Process_Pete Towson, MD
lexyandiknowiit Maryland, USA
lawoqTr
sucreidesc83 Казань
LaSalleSGA Philadelphia, PA
N_Pilny1
Kaileyminkk
allyssapingul HOBY MD
cgarvss
ubertev
beckwoodworth
lmgeee22
nosayslion Philadelphia, PA
CoreyRayEid Los Angeles
s0_krispy
aimeemarierose3 La Salle University
where_is_harry_ La Salle University
OfficialDriscoe Baltimore, MD
THEchubby_messi
Sera_Numquam Philadelphia, PA
3dBeddingsets
CelanoScott
alixleto1
dzhuzham4 Missouri, USA
tayyheath D(M)V
50ShadesOfGlaze
Deidre_Mc
nicole_wickizer
Thomasmedia2019 California, USA
water2142
DurkinSays Philadelphia, PA
tavia_overton Baltimore, MD
NotKTLeu
CornHub35 West Palm Beach, FL
The0kayJosh cincinnati zoo
sherree_wale
XavierRivera_ Baltimore, MD
phinguyen_163
dannywess83
okweightlossdna
cd_somers Baltimore, MD
OscarOr85985212
LawAbidingHuman London Town
LorenzoTanoueAK Durham, NC
cdvsmith
StephanieeLynn0
MrAlphonsoJones Virginia
baltiMAURA
keondra281
yagirlmels
HBroughaha
mi_erna
mike_wieczorek
chase_brennan13
Maryjs93 Phoenixville, PA
Brady_McKinney Baltimore... UMD Alumni
akbashor Philadelphia, PA
LinzJustin
cabarca_14
013MG
B_kroner82
NOTE - After reading some Stack Overflow posts, I think only the newest 200 tweets per user can suffice.
I also found this Github link for extracting just tweets?
I have added a Bounty to this question.
To be able to interact with the Twitter API using Python, we'll use a library called Tweepy. To be able to hide the credentials from the source code and load them as environment variables, we'll use python-dotenv. First, create a . env file to hold your credentials.
You can use the loc and iloc functions to access columns in a Pandas DataFrame. Let's see how. If we wanted to access a certain column in our DataFrame, for example the Grades column, we could simply use the loc function and specify the name of the column in order to retrieve it.
followers is a generator containing User(...), which is a tweepy.models.User type
followers in list() to unpack the generator, or just iterate through the followers without unpacking it.list in case there's some need to inspect the content_json for each user, with def jsonify_tweepy
_json for each follower, in a JSON formatjson_normalize.import tweepy
import json
import pandas as pd
from pandas.io.json import json_normalize
#insert your Twitter keys here
consumer_key = ''
consumer_secret= ''
access_token = ''
access_token_secret = ''
auth = tweepy.auth.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)
followers = list(tweepy.Cursor(api.followers).items())
# function to convert _json to JSON
def jsonify_tweepy(tweepy_object):
    json_str = json.dumps(tweepy_object._json)
    return json.loads(json_str)
# Call the function and unload each _json into follower_list
followers_list = [jsonify_tweepy(follower) for follower in followers]
# Convert followers_list to a pandas dataframe
df = json_normalize(followers_list)
class TweetMiner, as shown in the link at the topexcept clauses are a no-no.from datetime import datetime
class TweetMiner(object):
    result_limit = 20    
    data = list()
    api = False
    twitter_keys = {'consumer_key': 'your consumer_key',
                    'consumer_secret': 'your consumer_secret',
                    'access_token_key': 'your access_token',
                    'access_token_secret': 'your access_token_secret'}
    def __init__(self, keys_dict=twitter_keys, api=api, result_limit=20):
        self.twitter_keys = keys_dict
        auth = tweepy.OAuthHandler(keys_dict['consumer_key'],
                                   keys_dict['consumer_secret'])
        auth.set_access_token(keys_dict['access_token_key'],
                              keys_dict['access_token_secret'])
        self.api = tweepy.API(auth, wait_on_rate_limit=True,
                              wait_on_rate_limit_notify=True)
        self.twitter_keys = keys_dict
        self.result_limit = result_limit
    def mine_user_tweets(self, user, mine_rewteets=False, max_pages=5):
        data = list()
        last_tweet_id = False
        page = 1
        while page <= max_pages:
            if last_tweet_id:
                statuses =  self.api.user_timeline(screen_name=user,
                                                   count=self.result_limit,
                                                   max_id=last_tweet_id - 1,
                                                   tweet_mode = 'extended',
                                                   include_retweets=True)        
            else:
                statuses = self.api.user_timeline(screen_name=user,
                                                  count=self.result_limit,
                                                  tweet_mode = 'extended',
                                                  include_retweets=True)
            for item in statuses:
                mined = {'tweet_id': item.id,
                         'name': item.user.name,
                         'screen_name': item.user.screen_name,
                         'retweet_count': item.retweet_count,
                         'text': item.full_text,
                         'mined_at': datetime.now(),
                         'created_at': item.created_at,
                         'favourite_count': item.favorite_count,
                         'hashtags': item.entities['hashtags'],
                         'status_count': item.user.statuses_count,
                         'location': item.place,
                         'source_device': item.source}
                try:
                    mined['retweet_text'] = item.retweeted_status.full_text
                except:
                    mined['retweet_text'] = 'None'
                try:
                    mined['quote_text'] = item.quoted_status.full_text
                    mined['quote_screen_name'] = status.quoted_status.user.screen_name
                except:
                    mined['quote_text'] = 'None'
                    mined['quote_screen_name'] = 'None'
                last_tweet_id = item.id
                data.append(mined)
            page += 1
        return data
df from above, get all the followers and use class TweetMiner to download the tweets for each user.mined_tweets_dict, where each key is a user.miner=TweetMiner(result_limit=200)
mined_tweets_dict = dict()
for name in df['screen_name'].unique():
    try:
        mined_tweets = miner.mine_user_tweets(user=name, max_pages=17)
        mined_tweets_dict[name] = pd.DataFrame(mined_tweets)
    except tweepy.TweepError as e:
        print(f'{name} could not be processed because {e}')
.to_csv:with open('follower_tweets.csv', mode='a', encoding='utf-8') as f:
    for i, df in enumerate(mined_tweets_dict.values()):
        if i == 0:
            df.to_csv(f, header=True, index=False)
        else:
            df.to_csv(f, header=False, index=False)
                        If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With