I want to submit login to the website Reddit.com, navigate to a particular area of the page, and submit a comment. I don't see what's wrong with this code, but it is not working in that no change is reflected on the Reddit site.
import mechanize
import cookielib
def main():
#Browser
br = mechanize.Browser()
# Cookie Jar
cj = cookielib.LWPCookieJar()
br.set_cookiejar(cj)
# Browser options
br.set_handle_equiv(True)
br.set_handle_gzip(True)
br.set_handle_redirect(True)
br.set_handle_referer(True)
br.set_handle_robots(False)
# Follows refresh 0 but not hangs on refresh > 0
br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)
#Opens the site to be navigated
r= br.open('http://www.reddit.com')
html = r.read()
# Select the second (index one) form
br.select_form(nr=1)
# User credentials
br.form['user'] = 'DUMMYUSERNAME'
br.form['passwd'] = 'DUMMYPASSWORD'
# Login
br.submit()
#Open up comment page
r= br.open('http://www.reddit.com/r/PoopSandwiches/comments/f47f8/testing/')
html = r.read()
#Text box is the 8th form on the page (which, I believe, is the text area)
br.select_form(nr=7)
#Change 'text' value to a testing string
br.form['text']= "this is an automated test"
#Submit the information
br.submit()
What's wrong with this?
I would definitely suggest trying to use the API if possible, but this works for me (not for your example post, which has been deleted, but for any active one):
#!/usr/bin/env python
import mechanize
import cookielib
import urllib
import logging
import sys
def main():
br = mechanize.Browser()
cj = cookielib.LWPCookieJar()
br.set_cookiejar(cj)
br.set_handle_equiv(True)
br.set_handle_gzip(True)
br.set_handle_redirect(True)
br.set_handle_referer(True)
br.set_handle_robots(False)
br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)
r= br.open('http://www.reddit.com')
# Select the second (index one) form
br.select_form(nr=1)
# User credentials
br.form['user'] = 'user'
br.form['passwd'] = 'passwd'
# Login
br.submit()
# Open up comment page
posting = 'http://www.reddit.com/r/PoopSandwiches/comments/f47f8/testing/'
rval = 'PoopSandwiches'
# you can get the rval in other ways, but this will work for testing
r = br.open(posting)
# You need the 'uh' value from the first form
br.select_form(nr=0)
uh = br.form['uh']
br.select_form(nr=7)
thing_id = br.form['thing_id']
id = '#' + br.form.attrs['id']
# The id that gets posted is the form id with a '#' prepended.
data = {'uh':uh, 'thing_id':thing_id, 'id':id, 'renderstyle':'html', 'r':rval, 'text':"Your text here!"}
new_data_dict = dict((k, urllib.quote(v).replace('%20', '+')) for k, v in data.iteritems())
# not sure if the replace needs to happen, I did it anyway
new_data = 'thing_id=%(thing_id)s&text=%(text)s&id=%(id)s&r=%(r)s&uh=%(uh)s&renderstyle=%(renderstyle)s' %(new_data_dict)
# not sure which of these headers are really needed, but it works with all
# of them, so why not just include them.
req = mechanize.Request('http://www.reddit.com/api/comment', new_data)
req.add_header('Referer', posting)
req.add_header('Accept', ' application/json, text/javascript, */*')
req.add_header('Content-Type', 'application/x-www-form-urlencoded; charset=UTF-8')
req.add_header('X-Requested-With', 'XMLHttpRequest')
cj.add_cookie_header(req)
res = mechanize.urlopen(req)
main()
It would be interesting to turn javascript off and see how the reddit comments are handled then. Right now there is a bunch of magic
that happens in an onsubmit function called when making your post. This is where the uh
and id
value get added.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With