I'm using PyQt 4.9.1 on Ubuntu 12.04 (amd64) (tried with both python 2.6 and 2.7) to make a headless browser, but i'm getting: Program received signal SIGSEGV, Segmentation fault. Here is a simplified version of the program (still long a bit):
# -*- coding: utf-8 -*-
from pyvirtualdisplay import Display
display = Display(visible=False, size=(1024, 768), color_depth=24)
display.start()
from PyQt4.QtGui import QApplication
#from PySide.QtGui import QApplication
app = QApplication([])
import qt4reactor
qt4reactor.install()
from twisted.web import server
from twisted.web.xmlrpc import XMLRPC
from twisted.internet import defer
from PyQt4.QtWebKit import QWebSettings, QWebView, QWebPage
from PyQt4.QtNetwork import QNetworkAccessManager, QNetworkRequest
from PyQt4.QtCore import QUrl, QByteArray, QTimer
class CustomQNetworkAccessManager(QNetworkAccessManager):
def __init__(self, *args, **kwargs):
super(CustomQNetworkAccessManager, self).__init__(*args, **kwargs)
self.sslErrors.connect(self._ssl_errors)
def _ssl_errors(self, reply, errors):
reply.ignoreSslErrors()
for error in errors:
print 'Ignored SSL Error: {0} - {1}'.format(error.error(), error.errorString())
class CustomQWebPage(QWebPage):
def __init__(self, *args, **kwargs):
super(CustomQWebPage, self).__init__(*args, **kwargs)
def userAgentForUrl(self, url):
return 'Mozilla/5.0 (Windows NT 6.1; rv:5.0) Gecko/20100101 Firefox/5.0'
class WebkitWrapper(QWebView):
def __init__(self, *args, **kwargs):
super(WebkitWrapper, self).__init__(*args, **kwargs)
self.network_manager = CustomQNetworkAccessManager()
web_page = CustomQWebPage()
web_page.setNetworkAccessManager(self.network_manager)
self.setPage(web_page)
settings = self.settings()
settings.setAttribute(QWebSettings.AutoLoadImages, True)
settings.setAttribute(QWebSettings.JavaEnabled, False)
settings.setAttribute(QWebSettings.JavascriptEnabled, False)
settings.setAttribute(QWebSettings.JavascriptCanOpenWindows, False)
settings.setAttribute(QWebSettings.PluginsEnabled, False)
self.loadFinished.connect(self._load_finished)
def perform(self, request_data, timeout=15):
self._deferred_request = defer.Deferred()
url = request_data.get('url', '')
request = QNetworkRequest()
request.setUrl(QUrl(url))
self.load(request)
print 'getting: {0}'.format(url)
return self._deferred_request
def _load_finished(self, ok):
print 'load finished: {0}'.format(ok)
frame = self.page().mainFrame()
result = frame.toHtml()
self._deferred_request.callback(result)
class HeadlessBrowser(object):
def __init__(self, instance_id):
self.webkit_wrapper = WebkitWrapper()
self.instance_id = instance_id
def _return_intance_id(self, result):
return self.instance_id, result
def _request_failed(self, failure):
failure.trap(Exception)
err_msg = failure.getErrorMessage()
print err_msg
print failure.getTraceback()
return err_msg
def shutdown(self):
self.webkit_wrapper.close()
def get_request(self, request_data):
d = self.webkit_wrapper.perform(request_data)
d.addErrback(self._request_failed)
d.addBoth(self._return_intance_id)
return d
class TestXMLRPCServer(XMLRPC):
def __init__(self):
XMLRPC.__init__(self, allowNone=True)
self.browser_instances = dict()
self.instance_counter = 0
def _result_returned(self, result):
instance_id, browser_result = result
print 'killing instance: {0}'.format(instance_id)
browser_instance = self.browser_instances.pop(instance_id)
browser_instance.shutdown()
return browser_result
def xmlrpc_open(self, request_data):
print 'requested: {0}'.format(request_data)
self.instance_counter += 1
browser = HeadlessBrowser(self.instance_counter)
self.browser_instances[self.instance_counter] = browser
deferred_result = browser.get_request(request_data)
deferred_result.addCallback(self._result_returned)
return deferred_result
def start_server(port=8297):
from twisted.internet import reactor
r = TestXMLRPCServer()
reactor.listenTCP(port, server.Site(r))
reactor.run()
if __name__ == '__main__':
start_server()
Now as far as I can tell the problem is in storing the instances of these HeadlessBrowser classes, I use a dict to store them, and in the real implementation I'm reusing them by a custom session_id, but I faked here an instance_counter just to show how it works. I think regardless that I store them in the dict, looks like the instances are garbage collected or I'm not sure why the problem happens. Anyway if I drop the usage of dict, and make it to store just one instance on the xmlrpc class as a class attribute the problem does not happen.
This is a sample client for it:
# -*- coding: utf-8 -*-
import xmlrpclib
def test_server(port=8297):
s = xmlrpclib.Server('http://localhost:{0}/'.format(port))
html = s.open({'url': 'http://www.microsoft.com'})
print html
if __name__ == '__main__':
test_server()
Any suggestions?
UPDATE: Added backtrace:
(gdb) run
Starting program ...
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1".
[New Thread 0x7fffe8f60700 (LWP 13393)]
Xlib: extension "RANDR" missing on display ":1851".
[New Thread 0x7fffe3fff700 (LWP 13394)]
[New Thread 0x7fffe37fe700 (LWP 13395)]
requested: {'url': 'http://www.microsoft.com'}
[New Thread 0x7fffd9a2f700 (LWP 13422)]
[New Thread 0x7fffd9116700 (LWP 13423)]
[New Thread 0x7fffcfdd2700 (LWP 13425)]
getting: http://www.microsoft.com
[New Thread 0x7fffcf5d1700 (LWP 13426)]
[New Thread 0x7fffc5f28700 (LWP 13427)]
[Thread 0x7fffe37fe700 (LWP 13395) exited]
load finished: True
killing instance: 1
Program received signal SIGSEGV, Segmentation fault.
QMetaObject::activate (sender=0x0, m=<optimized out>, local_signal_index=8, argv=0x7fffffffc960) at kernel/qobject.cpp:3456
3456 kernel/qobject.cpp: No such file or directory.
(gdb) bt
#0 QMetaObject::activate (sender=0x0, m=<optimized out>, local_signal_index=8, argv=0x7fffffffc960) at kernel/qobject.cpp:3456
#1 0x00007fffdad358b2 in QWebFrame::loadFinished (this=<optimized out>, _t1=true) at ./moc_qwebframe.cpp:239
#2 0x00007fffdad74e08 in WebCore::FrameLoaderClientQt::dispatchDidFinishLoad (this=0x1428290) at WebCoreSupport/FrameLoaderClientQt.cpp:527
#3 0x00007fffdb0cfcbb in WebCore::FrameLoader::recursiveCheckLoadComplete (this=0x7fffd9141478) at loader/FrameLoader.cpp:2641
#4 0x00007fffdb100754 in WebCore::SubresourceLoader::didFinishLoading (this=0x7fffc5f3d300, finishTime=0) at loader/SubresourceLoader.cpp:202
#5 0x00007fffdb2f033b in WebCore::QNetworkReplyHandler::finish (this=0x14adcb0) at platform/network/qt/QNetworkReplyHandler.cpp:454
#6 0x00007fffdb2f01ea in flush (this=0x14adce8) at platform/network/qt/QNetworkReplyHandler.cpp:195
#7 WebCore::QNetworkReplyHandlerCallQueue::flush (this=0x14adce8) at platform/network/qt/QNetworkReplyHandler.cpp:187
#8 0x00007fffdb2f0255 in WebCore::QNetworkReplyHandlerCallQueue::push (this=0x14adce8, method=
(void (WebCore::QNetworkReplyHandler::*)(WebCore::QNetworkReplyHandler * const)) 0x7fffdb2f0260 <WebCore::QNetworkReplyHandler::finish()>)
at platform/network/qt/QNetworkReplyHandler.cpp:164
#9 0x00007fffdb2f0c8c in WebCore::QNetworkReplyWrapper::didReceiveFinished (this=0x14af650) at platform/network/qt/QNetworkReplyHandler.cpp:349
#10 0x00007ffff482f281 in QMetaObject::activate (sender=0x14ae120, m=<optimized out>, local_signal_index=<optimized out>, argv=0x0) at kernel/qobject.cpp:3547
#11 0x00007fffe0ea5fe6 in QNetworkReplyImplPrivate::finished (this=0x14ae210) at access/qnetworkreplyimpl.cpp:795
#12 0x00007fffe0f1c655 in QNetworkAccessHttpBackend::qt_static_metacall (_o=0x14ae5c0, _c=<optimized out>, _id=<optimized out>, _a=<optimized out>)
at .moc/release-shared/moc_qnetworkaccesshttpbackend_p.cpp:90
#13 0x00007ffff4834446 in QObject::event (this=0x14ae5c0, e=<optimized out>) at kernel/qobject.cpp:1195
#14 0x00007ffff4d3d894 in notify_helper (e=0x7fffc8019be0, receiver=0x14ae5c0, this=0x9d0e30) at kernel/qapplication.cpp:4559
#15 QApplicationPrivate::notify_helper (this=0x9d0e30, receiver=0x14ae5c0, e=0x7fffc8019be0) at kernel/qapplication.cpp:4531
#16 0x00007ffff4d42713 in QApplication::notify (this=0x966ab0, receiver=0x14ae5c0, e=0x7fffc8019be0) at kernel/qapplication.cpp:4420
#17 0x00007ffff5d4c016 in ?? () from .../lib/python2.6/site-packages/PyQt4/QtGui.so
#18 0x00007ffff481ae9c in QCoreApplication::notifyInternal (this=0x966ab0, receiver=0x14ae5c0, event=0x7fffc8019be0) at kernel/qcoreapplication.cpp:876
#19 0x00007ffff481ec6a in sendEvent (event=0x7fffc8019be0, receiver=0x14ae5c0) at ../../include/QtCore/../../src/corelib/kernel/qcoreapplication.h:231
#20 QCoreApplicationPrivate::sendPostedEvents (receiver=0x0, event_type=0, data=0x9d1000) at kernel/qcoreapplication.cpp:1500
#21 0x00007ffff4849f93 in sendPostedEvents () at ../../include/QtCore/../../src/corelib/kernel/qcoreapplication.h:236
#22 postEventSourceDispatch (s=<optimized out>) at kernel/qeventdispatcher_glib.cpp:279
#23 0x00007ffff3a8ec9a in g_main_context_dispatch () from /lib/x86_64-linux-gnu/libglib-2.0.so.0
---Type <return> to continue, or q <return> to quit---
#24 0x00007ffff3a8f060 in ?? () from /lib/x86_64-linux-gnu/libglib-2.0.so.0
#25 0x00007ffff3a8f124 in g_main_context_iteration () from /lib/x86_64-linux-gnu/libglib-2.0.so.0
#26 0x00007ffff484a3bf in QEventDispatcherGlib::processEvents (this=0xb7dfb0, flags=...) at kernel/qeventdispatcher_glib.cpp:424
#27 0x00007ffff4de5d5e in QGuiEventDispatcherGlib::processEvents (this=<optimized out>, flags=...) at kernel/qguieventdispatcher_glib.cpp:204
#28 0x00007ffff4819c82 in QEventLoop::processEvents (this=<optimized out>, flags=...) at kernel/qeventloop.cpp:149
#29 0x00007ffff4819ed7 in QEventLoop::exec (this=0xea6cd0, flags=...) at kernel/qeventloop.cpp:204
#30 0x00007ffff0d627e2 in ?? () from .../lib/python2.6/site-packages/PyQt4/QtCore.so
#31 0x000000000049a15d in PyEval_EvalFrameEx ()
#32 0x000000000049be0f in PyEval_EvalCodeEx ()
#33 0x000000000049a57a in PyEval_EvalFrameEx ()
#34 0x000000000049be0f in PyEval_EvalCodeEx ()
#35 0x000000000049a57a in PyEval_EvalFrameEx ()
#36 0x000000000049be0f in PyEval_EvalCodeEx ()
#37 0x000000000049bef2 in PyEval_EvalCode ()
#38 0x00000000004be6e0 in PyRun_FileExFlags ()
#39 0x00000000004bf3d7 in PyRun_SimpleFileExFlags ()
#40 0x0000000000418850 in Py_Main ()
#41 0x00007ffff68e576d in __libc_start_main () from /lib/x86_64-linux-gnu/libc.so.6
#42 0x0000000000417ab1 in _start ()
(gdb)
I apologize for the late response, finally I got the time to post the solution for my problem. Basically the segfault happened because the qt objects were not deleted before the last reference to the instance of the browser was deleted. Here is the fixed code:
# -*- coding: utf-8 -*-
from pyvirtualdisplay import Display
display = Display(visible=False, size=(1024, 768), color_depth=24)
display.start()
from PyQt4.QtGui import QApplication
app = QApplication([])
import qt4reactor
qt4reactor.install()
from twisted.web import server
from twisted.web.xmlrpc import XMLRPC
from twisted.internet import defer
from PyQt4.QtWebKit import QWebSettings, QWebView, QWebPage
from PyQt4.QtNetwork import QNetworkAccessManager, QNetworkRequest
from PyQt4.QtCore import QUrl, Qt
class CustomQNetworkAccessManager(QNetworkAccessManager):
def __init__(self, *args, **kwargs):
super(CustomQNetworkAccessManager, self).__init__(*args, **kwargs)
self.sslErrors.connect(self._ssl_errors)
self.finished.connect(self._finished)
def _ssl_errors(self, reply, errors):
reply.ignoreSslErrors()
for error in errors:
print 'Ignored SSL Error: {0} - {1}'.format(error.error(), error.errorString())
def _finished(self, reply):
reply.deleteLater()
class CustomQWebPage(QWebPage):
def __init__(self, *args, **kwargs):
super(CustomQWebPage, self).__init__(*args, **kwargs)
def userAgentForUrl(self, url):
return 'Mozilla/5.0 (Windows NT 6.1; rv:5.0) Gecko/20100101 Firefox/5.0'
class WebkitWrapper(object):
def __init__(self, *args, **kwargs):
super(WebkitWrapper, self).__init__(*args, **kwargs)
self.web_view = QWebView()
self.network_manager = CustomQNetworkAccessManager()
self.web_page = CustomQWebPage()
self.web_page.setNetworkAccessManager(self.network_manager)
self.web_view.setPage(self.web_page)
self.web_view.setAttribute(Qt.WA_DeleteOnClose, True)
settings = self.web_view.settings()
settings.setAttribute(QWebSettings.AutoLoadImages, False)
settings.setAttribute(QWebSettings.JavaEnabled, False)
settings.setAttribute(QWebSettings.JavascriptEnabled, False)
settings.setAttribute(QWebSettings.JavascriptCanOpenWindows, False)
settings.setAttribute(QWebSettings.PluginsEnabled, False)
self.web_view.loadFinished.connect(self._load_finished)
def perform(self, request_data, timeout=15):
self._deferred_request = defer.Deferred()
url = request_data.get('url', '')
request = QNetworkRequest()
request.setUrl(QUrl(url))
self.web_view.load(request)
print 'getting: {0}'.format(url)
return self._deferred_request
def shutdown(self):
print 'webview shutdown'
self.web_view.close()
self.network_manager.deleteLater()
self.web_page.deleteLater()
self.web_view.deleteLater()
print 'deletelater scheduled'
def _load_finished(self, ok):
print 'load finished: {0}'.format(ok)
frame = self.web_view.page().mainFrame()
result = unicode(frame.toHtml()).encode('utf-8')
self._deferred_request.callback(result)
class HeadlessBrowser(object):
def __init__(self, instance_id):
self.webkit_wrapper = WebkitWrapper()
self.instance_id = instance_id
def _return_intance_id(self, result):
return self.instance_id, result
def _request_failed(self, failure):
failure.trap(Exception)
err_msg = failure.getErrorMessage()
print err_msg
failure.printDetailedTraceback()
return err_msg
def shutdown(self):
self.webkit_wrapper.shutdown()
def _run_perform(self, _r, request_data):
return self.webkit_wrapper.perform(request_data)
def get_request(self, request_data):
deferred_request = self.webkit_wrapper.perform(request_data)
deferred_request.addErrback(self._request_failed)
deferred_request.addBoth(self._return_intance_id)
return deferred_request
class TestXMLRPCServer(XMLRPC):
def __init__(self):
XMLRPC.__init__(self, allowNone=True)
self.browser_instances = dict()
self.instance_counter = 0
def _request_completed(self, result):
instance_id, dest_result = result
print 'instances: ', self.browser_instances
print 'killing instance: {0}'.format(instance_id)
browser_instance = self.browser_instances.pop(instance_id, None)
browser_instance.shutdown()
print 'instances: ', self.browser_instances
return dest_result
def xmlrpc_open(self, request_data):
print 'requested: {0}'.format(request_data)
self.instance_counter += 1
instance_id = str(self.instance_counter)
browser = HeadlessBrowser(instance_id)
def_request = browser.get_request(request_data)
def_request.addCallback(self._request_completed)
self.browser_instances[instance_id] = browser
return def_request
def start_server(port=8297):
from twisted.internet import reactor
r = TestXMLRPCServer()
reactor.listenTCP(port, server.Site(r))
reactor.run()
if __name__ == '__main__':
start_server()
Everything works with just this line added:
self.web_page.deleteLater()
But it's better to be sure.. Hope that will spare some debugging time for somebody..
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With