I have a big dictionary that stores the data in a tweet. Many fields in this have value None
or Null
. When I use get()
function, I get an AttributeError: 'NoneType' object has no attribute 'get'
.
My dictionary is quite large and it is not possible to handle each case individually. Any ideas on how to do this? My code is:
from twython import TwythonStreamer
import json
from pymongo import MongoClient
import os
from datetime import *
client = MongoClient()
db1 = client.PWSocial
db2 = client.PWSocial
db3 = client.PWSocial
db4 = client.PWSocial
APP_KEY = 'XXXXX'
APP_SECRET = 'XXXXX'
OAUTH_TOKEN = 'XXXXX'
OAUTH_TOKEN_SECRET = 'XXXXXX'
class MyStreamer(TwythonStreamer):
def on_success(self, data):
self.n=self.n+1
print self.n
print data
user=data.get("user")
tweets = db1.tweets
user_mentions = db2.user_mentions
hash_tags = db3.hash_tags
users = db4.users
dict1 = {'id':data.get("id"),'contributors':data.get("contributors"),'truncated':data.get('truncated'),'text':data.get('text'),'in_reply_to_status_id':data.get("in_reply_to_status_id"),'favorite_count':data.get('favorite_count'),'source':data.get('source'),'retweeted':data.get('retweeted'),'coordinates':data.get('coordinates'),'symbols':data.get('symbols'),'urls':data.get('urls'),'in_reply_to_screen_name':data.get('in_reply_to_screen_name'),'id_str':data.get('id_str'),'retweet_count':data.get('retweet_count'),'in_reply_to_user_id':data.get('in_reply_to_user_id'),'favorited':data.get('favorited'),'geo':data.get('geo'),'in_reply_to_user_id_str':data.get('in_reply_to_user_id_str'),'lang':data.get('lang'),'created_at':data.get('created_at'),'filter_level':data.get('filter_level'),'in_reply_to_status_id_str':data.get('in_reply_to_status_id_str'),'place':data.get('place'),'user_id':user.get('user_id')}
posts1 = db1.posts
post_id = posts1.insert(dict1)
um = data.get("user_mentions")
#dict2 = {'tweet_id':data.get('id'),'id':um.get('id'),'indices':um.get('indices'),'id_str':um.get('id_str'),'screen_name':um.get('screen_name'),'name':um.get('name')}
#posts2 = db2.posts
#post_id2 = posts2.insert(dict2)
ht = data.get('hashtags')
dict3 = {'tweet_id':data.get('id'),'indices':ht.get('indices'),'text':ht.get('text')}
posts3 = db3.posts
post_id3 = posts3.insert(dict3)
usr = data.get("user")
dict4 = {'follow_request_sent':usr.get('follow_request_sent'),'profile_use_background_image':usr.get('profile_use_background_image'),'default_profile_image':usr.get('default_profile_image'),'id':usr.get('id'),'verified':usr.get('verified'),'profile_image_url_https':usr.get('profile_image_url_https'),'profile_sidebar_fill_color':usr.get("profile_sidebar_fill_color"),'profile_text_color':usr.get('profile_text_color'),'followers_count':usr.get('followers_count'),'profile_sidebar_border_color':usr.get('profile_sidebar_border_color'),'id_str':usr.get('id_str'),'profile_background_color':usr.get('profile_background_color'),'listed_count':usr.get('listed_count'),'profile_background_image_url_https':usr.get('profile_background_image_url_https'),'utc_offset':usr.get('utc_offset'),'statuses_count':usr.get('statuses_count'),'description':usr.get('description'),'friends_count':usr.get('friends_count'),'location':usr.get('location'),'profile_link_color':usr.get('profile_link_color'),'profile_image_url':usr.get('profile_image_url'),'following':usr.get('following'),'geo_enabled':usr.get('geo_enabled'),'profile_banner_url':usr.get('profile_banner_url'),'profile_background_image_url':usr.get('profile_background_image_url'),'name':usr.get('name'),'lang':usr.get('lang'),'profile_background_tile':usr.get('profile_background_tile'),'favourites_count':usr.get('favourites_count'),'screen_name':usr.get('screen_name'),'notifications':usr.get('notifications'),'url':usr.get('url'),'created_at':usr.get('created_at'),'contributors_enabled':usr.get('contributors_enabled'),'time_zone':usr.get('time_zone'),'protected':usr.get('protected'),'default_profile':usr.get('default_profile'),'is_translator':usr.get('is_translator')}
if (datetime.now() > self.stop_time):
self.disconnect()
def on_error(self, status_code, data):
print status_code
return True # Don't kill the stream
def on_timeout(self):
print >> sys.stderr, 'Timeout...'
return True # Don't kill the stream
stream = MyStreamer(APP_KEY, APP_SECRET,
OAUTH_TOKEN, OAUTH_TOKEN_SECRET)
#stream.statuses.filter(follow = [57947109, 183093247, 89443197, 431336956])
stream.statuses.filter(follow = 95995660)
A sample of the dictionary I am dealing with is:
data = {
"contributors": null,
"truncated": false,
"text": "@mipaltan #MI4LakhStronger if i become admin,i will hold trivia based contest which will held fans refresh old memories",
"in_reply_to_status_id": 420452758068539392,
"id": 420455226563231744,
"favorite_count": 0,
"source": "<a href=\"https://mobile.twitter.com\" rel=\"nofollow\">Mobile Web (M2)</a>",
"retweeted": false,
"coordinates": null,
"entities": {
"symbols": [],
"user_mentions": [
{
"id": 106345557,
"indices": [
0,
9
],
"id_str": "106345557",
"screen_name": "mipaltan",
"name": "Mumbai Indians"
}
],
"hashtags": [
{
"indices": [
10,
26
],
"text": "MI4LakhStronger"
}
],
"urls": []
},
"in_reply_to_screen_name": "mipaltan",
"id_str": "420455226563231744",
"retweet_count": 0,
"in_reply_to_user_id": 106345557,
"favorited": false,
"user": {
"follow_request_sent": null,
"profile_use_background_image": true,
"default_profile_image": false,
"id": 87174680,
"verified": false,
"profile_image_url_https": "https://pbs.twimg.com/profile_images/378800000220301249/a0c7b8c5766de83b65a42ca52196c4b3_normal.jpeg",
"profile_sidebar_fill_color": "EADEAA",
"profile_text_color": "333333",
"followers_count": 348,
"profile_sidebar_border_color": "D9B17E",
"id_str": "87174680",
"profile_background_color": "8B542B",
"listed_count": 5,
"profile_background_image_url_https": "https://si0.twimg.com/profile_background_images/350203578/Photo0003.jpg",
"utc_offset": 19800,
"statuses_count": 20119,
"description": "Sports Lover",
"friends_count": 708,
"location": "India",
"profile_link_color": "9D582E",
"profile_image_url": "http://pbs.twimg.com/profile_images/378800000220301249/a0c7b8c5766de83b65a42ca52196c4b3_normal.jpeg",
"following": null,
"geo_enabled": true,
"profile_background_image_url": "http://a0.twimg.com/profile_background_images/350203578/Photo0003.jpg",
"name": "Ronak Baj",
"lang": "en",
"profile_background_tile": true,
"favourites_count": 17,
"screen_name": "ronakbaj",
"notifications": null,
"url": null,
"created_at": "Tue Nov 03 12:02:56 +0000 2009",
"contributors_enabled": false,
"time_zone": "New Delhi",
"protected": false,
"default_profile": false,
"is_translator": false
},
"geo": null,
"in_reply_to_user_id_str": "106345557",
"lang": "en",
"created_at": "Tue Jan 07 07:21:52 +0000 2014",
"filter_level": "medium",
"in_reply_to_status_id_str": "420452758068539392",
"place": null
}
The complete traceback is:
File "new.py", line 51, in <module>
stream.statuses.filter(follow = 95995660)
File "/usr/lib/python2.7/site-packages/twython/streaming/types.py", line 65, in filter
self.streamer._request(url, 'POST', params=params)
File "/usr/lib/python2.7/site-packages/twython/streaming/api.py", line 148, in _request
if self.on_success(data): # pragma: no cover
File "new.py", line 33, in on_success
dict3 = {'tweet_id':data.get('id'),'indices':ht.get('indices'),'text':ht.get('text')}
AttributeError: 'NoneType' object has no attribute 'get'
PS: Please comment if any data I have given is unnecessary, and I will edit the question.
You are trying to access the .get()
method on the result data.get()
calls:
ht = data.get('hashtags')
and
usr = data.get("user")
but if there is no 'hashtags'
or 'user'
keys in the data
dictionary, then either ht
or usr
is set to None
, and the ht.get()
or usr.get()
calls fail.
Either use a better default or guard against usr
being None
:
ht = data.get('hashtags', {}) # return an empty dictionary if missing
and
usr = data.get('user', {}) # return an empty dictionary if missing
If it is an error for a key not to be present, don't use .get()
but use direct access:
usr = data['user']
Note that your data
object has no hashtags
key in any case. There is a hashtags
key in the data['entities']
dictionary, however:
ht = data['entities']['hashtags']
This is a list of hashtags, so ht.get('indices')
will now fail with an AttributeError: 'list' object has no attribute 'get'
; you need to loop through all the different hashtags or pick one from the list.
Am answering this one myself, based on the final solution I found.
What @Martjin Peters said above is absolutely correct, and the tweet that was returned had no data in the hashtags dictionary under the entities dictionary, and in such a case, there was no data in ht. In such a case, the .get()
function cannot be used.
However, the solution to this is simple, simply use
ht = data.get('hashtags', {})
Now, before the ht dictionary is used further, simply using an if
condition will do the trick, like this
.
.
.#above code
if um:
dict2 = um[0]
posts2 = db2.posts
post_id2 = posts2.insert(dict2)
.
.
.#other code
Implement this for all such possible dictionaries, where no value may be returned. In my example, this has to be done for ht
and um
.
Hope this helps if anyone in the future encounters a similar problem.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With