Coverage for creepo/httpproxy.py: 83%

86 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-03 18:52 -0500

1""" 

2The httpproxy module exposes :py:class:`HttpProxy`, whose :py:meth:`httpproxy.rest_proxy`  

3method handles each request. 

4 

5By default the httpproxy module expects to support secure transmission protocols. 

6""" 

7import os 

8from http.client import responses 

9 

10from diskcache import Cache 

11import mime 

12import urllib3 

13from urllib3 import ProxyManager, make_headers 

14 

15 

16class HttpProxy: 

17 """ 

18 The http proxy 

19 

20 By default this class does not persist anything. 

21 

22 Enable persistence by setting the global configuration option `no_cache` = **False** 

23 

24 :param config: The global Creepo config 

25 

26 :param key: The storage key **AND** path prefix 

27 

28 """ 

29 

30 def __init__(self, config, key): 

31 self.key = key 

32 self.config = config 

33 self._no_cache = True 

34 self.logger = config['logger'] 

35 

36 if config.get('no_cache') is not None: 

37 if f"{config.get('no_cache')}" == 'False': 

38 self._no_cache = False 

39 

40 self.creepo = os.path.join(os.environ.get('HOME'), '.CREEPO_BASE') 

41 

42 if os.environ.get('CREEPO_BASE') is not None: 

43 self.creepo = os.environ.get('CREEPO_BASE') 

44 

45 @property 

46 def base(self): 

47 """The base path to storage""" 

48 return f"{self.creepo}/{self.key}" 

49 

50 @property 

51 def kind(self): 

52 """The kind of proxy""" 

53 return self.key 

54 

55 @property 

56 def no_cache(self): 

57 """The no_cache property for this :class:`Proxy`""" 

58 return self._no_cache 

59 

60 def mimetype(self, path, default): 

61 """Return the default mimetype for the proxy""" 

62 

63 if len(mime.Types.of(path)) > 0: 

64 return mime.Types.of(path)[0].content_type 

65 return default 

66 

67 def persist(self, request): 

68 """Persist the (possibly changed) data""" 

69 if not self.no_cache: 

70 with Cache(self.base) as cache: 

71 cache.set(request['output_filename'], request['response']) 

72 

73 def gethttp(self): 

74 """convenience method to configure the http request engine""" 

75 ca_certs = () 

76 if self.config.get('cacert') is not None: 

77 ca_certs = self.config['cacert'] 

78 

79 http = urllib3.PoolManager(ca_certs=ca_certs, num_pools=10000) 

80 

81 if self.config.get('proxy') is not None: 

82 default_headers = make_headers() 

83 if self.config.get('proxy_user') is not None: 

84 default_headers = make_headers( 

85 proxy_basic_auth=self.config['proxy_user'] + 

86 ':' + self.config['proxy_password']) 

87 http = ProxyManager(self.config.get( 

88 'proxy'), proxy_headers=default_headers, num_pools=10000) 

89 return http 

90 

91 def getheaders(self, environ): 

92 """convenience method to get the proper headers for the request""" 

93 headers = environ['headers'] 

94 

95 headers['content-type'] = self.mimetype( 

96 environ['path'], environ['content_type']) 

97 if self.config.get('credentials') is not None: 

98 headers = headers | urllib3.make_headers( 

99 basic_auth=self.config.get('credentials').get('username') + ':' + 

100 self.config.get('credentials').get('password') 

101 ) 

102 return headers 

103 

104 def dynamic_config(self, new_host): 

105 """convenience method to generate a new config for a dynamic proxy""" 

106 return { 

107 'no_cache': self.no_cache, 

108 'logger': self.config['logger'], 

109 f"{self.key}": { 

110 'registry': new_host, 

111 } 

112 } 

113 

114 def rest_proxy(self, environ, start_response): 

115 """ 

116 The rest_proxy method is the work engine for everything 

117 

118 :param environ: The request Dictionary 

119 

120 :param start_response: The CherryPy callback 

121 

122 

123 When environ contains a callback function that callback will be called  

124 after the initial request. 

125 

126 The callback might change the content. For this reason we replace the  

127 Content-Length header after the callback. 

128 

129 The (potentially modified) response is returned to the caller as a  

130 byte array at request['response'] 

131 """ 

132 environ['output_filename'] = environ['path'] 

133 

134 callback = environ.get('callback') 

135 

136 if self.no_cache or Cache(self.base).get(environ['output_filename']) is None: 

137 http = self.gethttp() 

138 headers = self.getheaders(environ) 

139 

140 source_url = f"{self.config[self.key]['registry']}{environ['path']}" 

141 

142 splitpath = environ['output_filename'].split('/') 

143 

144 if not source_url.endswith('/'): 

145 # Remove the filename 

146 splitpath.pop() 

147 

148 if environ['path'].endswith('/'): 

149 environ['output_filename'] = environ['output_filename'] + '.index' 

150 r = http.request( 

151 method='GET', 

152 url=source_url, 

153 decode_content=False, 

154 preload_content=False, 

155 headers=headers, 

156 ) 

157 

158 if r.status < 400: 

159 if callback is not None: 

160 # The callback must set request['response'] 

161 callback(r.data, environ) 

162 

163 r.headers.discard('Content-Length') 

164 

165 start_response( 

166 f"{r.status} {responses[r.status]}", 

167 list(r.headers.items())) 

168 yield environ['response'] 

169 else: 

170 start_response( 

171 f"{r.status} {responses[r.status]}", list(r.headers.items())) 

172 yield r.data 

173 environ['response'] = r.data 

174 else: 

175 self.logger.warning( 

176 '%s.%s ***WARNING***: Unexpected status %d for %s', 

177 self.kind, __name__, r.status, source_url) 

178 start_response( 

179 f"{r.status} {responses[r.status]}", list(r.headers.items())) 

180 yield r.data 

181 r.release_conn() 

182 if not self.no_cache and environ.get('response') is not None: 

183 self.persist(environ) 

184 else: 

185 

186 start_response('200 OK', [ 

187 ('Content-Type', 

188 self.mimetype(environ['path'], environ['content_type']))]) 

189 with Cache(self.base) as cache: 

190 result = cache.get(environ['output_filename']) 

191 yield result