Coverage for creepo/httpproxy.py: 83%
86 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 18:52 -0500
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 18:52 -0500
1"""
2The httpproxy module exposes :py:class:`HttpProxy`, whose :py:meth:`httpproxy.rest_proxy`
3method handles each request.
5By default the httpproxy module expects to support secure transmission protocols.
6"""
7import os
8from http.client import responses
10from diskcache import Cache
11import mime
12import urllib3
13from urllib3 import ProxyManager, make_headers
16class HttpProxy:
17 """
18 The http proxy
20 By default this class does not persist anything.
22 Enable persistence by setting the global configuration option `no_cache` = **False**
24 :param config: The global Creepo config
26 :param key: The storage key **AND** path prefix
28 """
30 def __init__(self, config, key):
31 self.key = key
32 self.config = config
33 self._no_cache = True
34 self.logger = config['logger']
36 if config.get('no_cache') is not None:
37 if f"{config.get('no_cache')}" == 'False':
38 self._no_cache = False
40 self.creepo = os.path.join(os.environ.get('HOME'), '.CREEPO_BASE')
42 if os.environ.get('CREEPO_BASE') is not None:
43 self.creepo = os.environ.get('CREEPO_BASE')
45 @property
46 def base(self):
47 """The base path to storage"""
48 return f"{self.creepo}/{self.key}"
50 @property
51 def kind(self):
52 """The kind of proxy"""
53 return self.key
55 @property
56 def no_cache(self):
57 """The no_cache property for this :class:`Proxy`"""
58 return self._no_cache
60 def mimetype(self, path, default):
61 """Return the default mimetype for the proxy"""
63 if len(mime.Types.of(path)) > 0:
64 return mime.Types.of(path)[0].content_type
65 return default
67 def persist(self, request):
68 """Persist the (possibly changed) data"""
69 if not self.no_cache:
70 with Cache(self.base) as cache:
71 cache.set(request['output_filename'], request['response'])
73 def gethttp(self):
74 """convenience method to configure the http request engine"""
75 ca_certs = ()
76 if self.config.get('cacert') is not None:
77 ca_certs = self.config['cacert']
79 http = urllib3.PoolManager(ca_certs=ca_certs, num_pools=10000)
81 if self.config.get('proxy') is not None:
82 default_headers = make_headers()
83 if self.config.get('proxy_user') is not None:
84 default_headers = make_headers(
85 proxy_basic_auth=self.config['proxy_user'] +
86 ':' + self.config['proxy_password'])
87 http = ProxyManager(self.config.get(
88 'proxy'), proxy_headers=default_headers, num_pools=10000)
89 return http
91 def getheaders(self, environ):
92 """convenience method to get the proper headers for the request"""
93 headers = environ['headers']
95 headers['content-type'] = self.mimetype(
96 environ['path'], environ['content_type'])
97 if self.config.get('credentials') is not None:
98 headers = headers | urllib3.make_headers(
99 basic_auth=self.config.get('credentials').get('username') + ':' +
100 self.config.get('credentials').get('password')
101 )
102 return headers
104 def dynamic_config(self, new_host):
105 """convenience method to generate a new config for a dynamic proxy"""
106 return {
107 'no_cache': self.no_cache,
108 'logger': self.config['logger'],
109 f"{self.key}": {
110 'registry': new_host,
111 }
112 }
114 def rest_proxy(self, environ, start_response):
115 """
116 The rest_proxy method is the work engine for everything
118 :param environ: The request Dictionary
120 :param start_response: The CherryPy callback
123 When environ contains a callback function that callback will be called
124 after the initial request.
126 The callback might change the content. For this reason we replace the
127 Content-Length header after the callback.
129 The (potentially modified) response is returned to the caller as a
130 byte array at request['response']
131 """
132 environ['output_filename'] = environ['path']
134 callback = environ.get('callback')
136 if self.no_cache or Cache(self.base).get(environ['output_filename']) is None:
137 http = self.gethttp()
138 headers = self.getheaders(environ)
140 source_url = f"{self.config[self.key]['registry']}{environ['path']}"
142 splitpath = environ['output_filename'].split('/')
144 if not source_url.endswith('/'):
145 # Remove the filename
146 splitpath.pop()
148 if environ['path'].endswith('/'):
149 environ['output_filename'] = environ['output_filename'] + '.index'
150 r = http.request(
151 method='GET',
152 url=source_url,
153 decode_content=False,
154 preload_content=False,
155 headers=headers,
156 )
158 if r.status < 400:
159 if callback is not None:
160 # The callback must set request['response']
161 callback(r.data, environ)
163 r.headers.discard('Content-Length')
165 start_response(
166 f"{r.status} {responses[r.status]}",
167 list(r.headers.items()))
168 yield environ['response']
169 else:
170 start_response(
171 f"{r.status} {responses[r.status]}", list(r.headers.items()))
172 yield r.data
173 environ['response'] = r.data
174 else:
175 self.logger.warning(
176 '%s.%s ***WARNING***: Unexpected status %d for %s',
177 self.kind, __name__, r.status, source_url)
178 start_response(
179 f"{r.status} {responses[r.status]}", list(r.headers.items()))
180 yield r.data
181 r.release_conn()
182 if not self.no_cache and environ.get('response') is not None:
183 self.persist(environ)
184 else:
186 start_response('200 OK', [
187 ('Content-Type',
188 self.mimetype(environ['path'], environ['content_type']))])
189 with Cache(self.base) as cache:
190 result = cache.get(environ['output_filename'])
191 yield result