I use lxml to parse the pages. When I run my code with app engine sdk it works, but when I deploy my application in the cloud, I get a messege here:
Traceback (most recent call last): File "/base/data/home/apps/s~testparsercyka/1.356245976008257055/handler_info.py", line 2, in import lxml.html File "/base/data/home/apps/s~testparsercyka/1.356245976008257055/lxml/html/init.py", line 12, in from lxml import etree ImportError: cannot import name etree
Code:
app.yaml
application: testparsercyka
version: 1
runtime: python27
api_version: 1
threadsafe: false
handlers:
- url: /stylesheets
static_dir: stylesheets
- url: /.*
script: handler_info.py
libraries:
- name: lxml
version: "2.3" # I thought this would allow me to use lxml.etree
handler_info.py
import lxml
import lxml.html
import urllib
from google.appengine.ext import webapp
from google.appengine.ext.webapp.util import run_wsgi_app
from google.appengine.ext.webapp import template
import os
import cgi
class MainPage(webapp.RequestHandler):
def get(self):
template_values = {}
path = os.path.join(os.path.dirname(__file__), 'index.html')
self.response.out.write(template.render(path, template_values))
class Handlers(webapp.RequestHandler):
def post(self):
#url = "http://habrahabr.ru/"
url = str(self.request.get('url'))
url_temp = url
teg = str(self.request.get('teg'))
attr = str(self.request.get('attr'))
n0 = str(self.request.get('n0'))
n = str(self.request.get('n'))
a = attr.split(':')
for i in range(int(n0),int(n)):
url = url.format(str(i))
self.response.out.write(url)
html = urllib.urlopen(url).read()
doc = lxml.html.document_fromstring(html)
url = url_temp
self.getn(doc.getroottree().getroot(),teg,a)
def getn(self,node,teg,a):
if ((node.tag==teg) and (node.get(a[0])==a[1])):
#print node.tag,node.keys()
self.response.out.write(node.text)
self.response.out.write('
')
for n in node:
self.getn(n,teg,a)
application = webapp.WSGIApplication([('/', MainPage),('/sign',Handlers)],debug=True)
def main():
run_wsgi_app(application)
if __name__ == "__main__":
main()
Any ideas why this does not work?
I know this is an old question but here is an answer that I have confirmed to work when deployed to App Engine:
app.yaml
application: lxml-test
version: 1
runtime: python27
api_version: 1
threadsafe: false
handlers:
- url: /.*
script: app.app
libraries:
- name: lxml
version: "2.3"
- name: webapp2
version: "latest"
app.py
import webapp2
import lxml.etree
class MainPage(webapp2.RequestHandler):
def get(self):
root = lxml.etree.XML('<top><content>Hello world!</content></top>')
self.response.content_type = 'text/xml'
self.response.write(lxml.etree.tostring(root, xml_declaration=True))
app = webapp2.WSGIApplication(routes=[('/', MainPage)], debug=True)
So in terms of comparing the above with your code, some of the following changes might help:
script: hander_info.py
to script: handler_info.application
.webapp
.It is also possible that the issue has simply resolved itself since 2012 when this question was asked.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With