I'm using a proxy service (proxymesh) that puts useful information into the headers sent in response to a CONNECT request. For whatever reason, Python's httplib
doesn't parse them:
> CONNECT example.com:443 HTTP/1.1
> Host: example.com:443
>
< HTTP/1.1 200 Connection established
< X-Useful-Header: value # completely ignored
<
The requests module uses httplib
internally, so it ignores them as well. How do I extract headers from a CONNECT
request?
Python's httplib
actually ignores these headers when creating the tunnel. It's hacky, but you can intercept them and merge the "header" lines with the actual HTTP response's headers:
import socket
import httplib
import requests
from requests.packages.urllib3.connection import HTTPSConnection
from requests.packages.urllib3.connectionpool import HTTPSConnectionPool
from requests.packages.urllib3.poolmanager import ProxyManager
from requests.adapters import HTTPAdapter
class ProxyHeaderHTTPSConnection(HTTPSConnection):
def __init__(self, *args, **kwargs):
super(ProxyHeaderHTTPSConnection, self).__init__(*args, **kwargs)
self._proxy_headers = []
def _tunnel(self):
self.send("CONNECT %s:%d HTTP/1.0\r\n" % (self._tunnel_host, self._tunnel_port))
for header, value in self._tunnel_headers.iteritems():
self.send("%s: %s\r\n" % (header, value))
self.send("\r\n")
response = self.response_class(self.sock, strict=self.strict, method=self._method)
version, code, message = response._read_status()
if version == "HTTP/0.9":
# HTTP/0.9 doesn't support the CONNECT verb, so if httplib has
# concluded HTTP/0.9 is being used something has gone wrong.
self.close()
raise socket.error("Invalid response from tunnel request")
if code != 200:
self.close()
raise socket.error("Tunnel connection failed: %d %s" % (code, message.strip()))
self._proxy_headers = []
while True:
line = response.fp.readline(httplib._MAXLINE + 1)
if len(line) > httplib._MAXLINE:
raise LineTooLong("header line")
if not line or line == '\r\n':
break
# The line is a header, save it
if ':' in line:
self._proxy_headers.append(line)
def getresponse(self, buffering=False):
response = super(ProxyHeaderHTTPSConnection, self).getresponse(buffering)
response.msg.headers.extend(self._proxy_headers)
return response
class ProxyHeaderHTTPSConnectionPool(HTTPSConnectionPool):
ConnectionCls = ProxyHeaderHTTPSConnection
class ProxyHeaderProxyManager(ProxyManager):
def _new_pool(self, scheme, host, port):
assert scheme == 'https'
return ProxyHeaderHTTPSConnectionPool(host, port, **self.connection_pool_kw)
class ProxyHeaderHTTPAdapter(HTTPAdapter):
def proxy_manager_for(self, proxy, **proxy_kwargs):
if proxy in self.proxy_manager:
manager = self.proxy_manager[proxy]
else:
proxy_headers = self.proxy_headers(proxy)
manager = self.proxy_manager[proxy] = ProxyHeaderProxyManager(
proxy_url=proxy,
proxy_headers=proxy_headers,
num_pools=self._pool_connections,
maxsize=self._pool_maxsize,
block=self._pool_block,
**proxy_kwargs)
return manager
You can then install the adapter onto a session:
session = requests.Session()
session.mount('https://', ProxyHeaderHTTPAdapter())
response = session.get('https://example.com', proxies={...})
The proxy's headers will be merged in with the response headers, so it should behave as if the proxy modified the response headers directly.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With