Coverage for portality/lib/httputil.py: 17%
135 statements
« prev ^ index » next coverage.py v6.4.2, created at 2022-07-22 15:59 +0100
« prev ^ index » next coverage.py v6.4.2, created at 2022-07-22 15:59 +0100
1from portality.core import app
2import requests, time, urllib.request, urllib.parse, urllib.error, json
3from io import StringIO
5class SizeExceededException(Exception):
6 pass
8def quote(s, **kwargs):
9 try:
10 return urllib.parse.quote_plus(s, **kwargs)
11 except:
12 pass
14 try:
15 utf = s.encode("utf-8")
16 return urllib.parse.quote(utf, **kwargs)
17 except:
18 return None
20def _backoff(attempt_number, back_off_factor, max_back_off):
21 seconds = 2**attempt_number * back_off_factor
22 seconds = seconds if seconds < max_back_off else max_back_off
23 return seconds
25def _make_request(method, url,
26 retries=None, back_off_factor=None, max_back_off=None, timeout=None, response_encoding=None,
27 retry_on_timeout=None, retry_codes=None,
28 **kwargs):
30 # fill out all the default arguments
31 if retries is None:
32 retries = app.config.get("HTTP_MAX_RETRIES", 0)
34 if back_off_factor is None:
35 back_off_factor = app.config.get("HTTP_BACK_OFF_FACTOR", 1)
37 if max_back_off is None:
38 max_back_off = app.config.get("HTTP_MAX_BACK_OFF", 30)
40 if timeout is None:
41 timeout = app.config.get("HTTP_TIMEOUT", 30)
43 if retry_on_timeout is None:
44 retry_on_timeout = app.config.get("HTTP_RETRY_ON_TIMEOUT", True)
46 if retry_codes is None:
47 retry_codes = app.config.get("HTTP_RETRY_CODES", [])
49 if response_encoding is None:
50 response_encoding = app.config.get("HTTP_RESPONSE_ENCODING")
52 attempt = 0
53 r = None
55 while attempt <= retries:
56 try:
57 if method == "GET":
58 r = requests.get(url, timeout=timeout, **kwargs)
59 elif method == "POST":
60 r = requests.post(url, timeout=timeout, **kwargs)
61 elif method == "PUT":
62 r = requests.put(url, timeout=timeout, **kwargs)
63 elif method == "DELETE":
64 r = requests.delete(url, timeout=timeout, **kwargs)
65 else:
66 # FIXME: is this right? Maybe raising an exception would be better
67 app.logger.debug("Method {method} not allowed".format(method=method))
68 return None
70 if r.status_code not in retry_codes:
71 break
72 else:
73 attempt += 1
74 app.logger.debug("Request to {url} resulted in status {status}, attempt {attempt}".format(status=r.status_code, url=url, attempt=attempt))
75 except requests.exceptions.Timeout:
76 attempt += 1
77 app.logger.debug('Request to {url} timeout, attempt {attempt}'.format(url=url, attempt=attempt))
78 if not retry_on_timeout:
79 break
80 except requests.exceptions.ConnectionError:
81 attempt += 1
82 app.logger.debug('Request to {url} connection error, attempt {attempt}'.format(url=url, attempt=attempt))
84 bo = _backoff(attempt, back_off_factor, max_back_off)
85 app.logger.debug('Request to {url} backing off for {bo} seconds'.format(url=url, bo=bo))
86 time.sleep(bo)
88 # reset any file pointers to the beginning
89 if "data" in kwargs and hasattr(kwargs["data"], "read") and hasattr(kwargs["data"], "seek"):
90 kwargs["data"].seek(0)
92 if response_encoding is not None and r is not None:
93 r.encoding = 'utf-8'
95 return r
97def put(url, retries=None, back_off_factor=None, max_back_off=None, timeout=None, response_encoding=None,
98 retry_on_timeout=None, retry_codes=None, **kwargs):
99 return _make_request("PUT", url,
100 retries=retries, back_off_factor=back_off_factor,
101 max_back_off=max_back_off,
102 timeout=timeout,
103 response_encoding=response_encoding,
104 retry_on_timeout=retry_on_timeout,
105 retry_codes=retry_codes,
106 **kwargs)
108def delete(url, retries=None, back_off_factor=None, max_back_off=None, timeout=None, response_encoding=None,
109 retry_on_timeout=None, retry_codes=None, **kwargs):
110 return _make_request("DELETE", url,
111 retries=retries, back_off_factor=back_off_factor,
112 max_back_off=max_back_off,
113 timeout=timeout,
114 response_encoding=response_encoding,
115 retry_on_timeout=retry_on_timeout,
116 retry_codes=retry_codes,
117 **kwargs)
119def post(url, retries=None, back_off_factor=None, max_back_off=None, timeout=None, response_encoding=None,
120 retry_on_timeout=None, retry_codes=None, **kwargs):
121 return _make_request("POST", url,
122 retries=retries, back_off_factor=back_off_factor,
123 max_back_off=max_back_off,
124 timeout=timeout,
125 response_encoding=response_encoding,
126 retry_on_timeout=retry_on_timeout,
127 retry_codes=retry_codes,
128 **kwargs)
130def get(url, retries=None, back_off_factor=None, max_back_off=None, timeout=None, response_encoding=None,
131 retry_on_timeout=None, retry_codes=None, **kwargs):
132 return _make_request("GET", url,
133 retries=retries, back_off_factor=back_off_factor,
134 max_back_off=max_back_off,
135 timeout=timeout,
136 response_encoding=response_encoding,
137 retry_on_timeout=retry_on_timeout,
138 retry_codes=retry_codes,
139 **kwargs)
141def get_stream(url, retries=None, back_off_factor=None, max_back_off=None, timeout=None, response_encoding=None,
142 retry_on_timeout=None, retry_codes=None, size_limit=None, chunk_size=None, cut_off=None, read_stream=True, **kwargs):
144 # set the defaults where necessary from configuration
146 if size_limit is None:
147 size_limit = app.config.get("HTTP_STREAM_MAX_SIZE", 0) # size of 0 means no limit
149 if cut_off is None:
150 cut_off = app.config.get("HTTP_STREAM_CUT_OFF", 0) # size of 0 means no limit
152 if chunk_size is None:
153 chunk_size = app.config.get("HTTP_STREAM_CHUNK_SIZE", 262144) # 250Kb
155 # actually make the request (note that we pass stream=True)
156 resp = _make_request("GET", url,
157 retries=retries, back_off_factor=back_off_factor,
158 max_back_off=max_back_off,
159 timeout=timeout,
160 response_encoding=response_encoding,
161 retry_on_timeout=retry_on_timeout,
162 retry_codes=retry_codes,
163 stream=True,
164 **kwargs)
166 if resp is None:
167 return None, "", 0
169 # check that content length header for an early view on whether the resource
170 # is too large
171 if size_limit > 0:
172 header_reported_size = resp.headers.get("content-length")
173 try:
174 header_reported_size = int(header_reported_size)
175 except Exception as e:
176 header_reported_size = 0
178 if header_reported_size > size_limit:
179 resp.connection.close()
180 raise SizeExceededException("Size as announced by Content-Type header is larger than maximum allowed size")
182 downloaded_bytes = 0
183 content = ''
184 chunk_no = 0
186 if read_stream:
187 for chunk in resp.iter_content(chunk_size=chunk_size):
188 chunk_no += 1
189 downloaded_bytes += len(bytes(chunk))
191 # check the size limit again
192 if size_limit > 0 and downloaded_bytes > size_limit:
193 resp.connection.close()
194 raise SizeExceededException("Size limit exceeded during download")
195 if chunk: # filter out keep-alive new chunks
196 content += chunk
198 # now check to see if we have exceeded the cut off point
199 if cut_off > 0 and downloaded_bytes >= cut_off:
200 break
202 resp.connection.close()
204 return resp, content, downloaded_bytes
206######################################################
207# Mock requests Response object - useful for testing
209class MockResponse(object):
210 def __init__(self, status, body=None, headers=None):
211 self.status_code = status
212 self._body = body
213 self._headers = headers
214 self._stream = StringIO(body)
216 def json(self):
217 return json.loads(self._body)
219 @property
220 def data(self):
221 return self._body
223 @property
224 def raw(self):
225 return self._stream
227 @property
228 def headers(self):
229 return self._headers if self._headers is not None else {}
231 def iter_content(self, num_bytes):
232 while True:
233 b = self._stream.read(num_bytes)
234 if b == "":
235 # we have reached the end of the file
236 break
237 yield b
238 yield ""