Coverage for portality/lib/httputil.py: 17%

135 statements  

« prev     ^ index     » next       coverage.py v6.4.2, created at 2022-07-19 18:38 +0100

1from portality.core import app 

2import requests, time, urllib.request, urllib.parse, urllib.error, json 

3from io import StringIO 

4 

5class SizeExceededException(Exception): 

6 pass 

7 

8def quote(s, **kwargs): 

9 try: 

10 return urllib.parse.quote_plus(s, **kwargs) 

11 except: 

12 pass 

13 

14 try: 

15 utf = s.encode("utf-8") 

16 return urllib.parse.quote(utf, **kwargs) 

17 except: 

18 return None 

19 

20def _backoff(attempt_number, back_off_factor, max_back_off): 

21 seconds = 2**attempt_number * back_off_factor 

22 seconds = seconds if seconds < max_back_off else max_back_off 

23 return seconds 

24 

25def _make_request(method, url, 

26 retries=None, back_off_factor=None, max_back_off=None, timeout=None, response_encoding=None, 

27 retry_on_timeout=None, retry_codes=None, 

28 **kwargs): 

29 

30 # fill out all the default arguments 

31 if retries is None: 

32 retries = app.config.get("HTTP_MAX_RETRIES", 0) 

33 

34 if back_off_factor is None: 

35 back_off_factor = app.config.get("HTTP_BACK_OFF_FACTOR", 1) 

36 

37 if max_back_off is None: 

38 max_back_off = app.config.get("HTTP_MAX_BACK_OFF", 30) 

39 

40 if timeout is None: 

41 timeout = app.config.get("HTTP_TIMEOUT", 30) 

42 

43 if retry_on_timeout is None: 

44 retry_on_timeout = app.config.get("HTTP_RETRY_ON_TIMEOUT", True) 

45 

46 if retry_codes is None: 

47 retry_codes = app.config.get("HTTP_RETRY_CODES", []) 

48 

49 if response_encoding is None: 

50 response_encoding = app.config.get("HTTP_RESPONSE_ENCODING") 

51 

52 attempt = 0 

53 r = None 

54 

55 while attempt <= retries: 

56 try: 

57 if method == "GET": 

58 r = requests.get(url, timeout=timeout, **kwargs) 

59 elif method == "POST": 

60 r = requests.post(url, timeout=timeout, **kwargs) 

61 elif method == "PUT": 

62 r = requests.put(url, timeout=timeout, **kwargs) 

63 elif method == "DELETE": 

64 r = requests.delete(url, timeout=timeout, **kwargs) 

65 else: 

66 # FIXME: is this right? Maybe raising an exception would be better 

67 app.logger.debug("Method {method} not allowed".format(method=method)) 

68 return None 

69 

70 if r.status_code not in retry_codes: 

71 break 

72 else: 

73 attempt += 1 

74 app.logger.debug("Request to {url} resulted in status {status}, attempt {attempt}".format(status=r.status_code, url=url, attempt=attempt)) 

75 except requests.exceptions.Timeout: 

76 attempt += 1 

77 app.logger.debug('Request to {url} timeout, attempt {attempt}'.format(url=url, attempt=attempt)) 

78 if not retry_on_timeout: 

79 break 

80 except requests.exceptions.ConnectionError: 

81 attempt += 1 

82 app.logger.debug('Request to {url} connection error, attempt {attempt}'.format(url=url, attempt=attempt)) 

83 

84 bo = _backoff(attempt, back_off_factor, max_back_off) 

85 app.logger.debug('Request to {url} backing off for {bo} seconds'.format(url=url, bo=bo)) 

86 time.sleep(bo) 

87 

88 # reset any file pointers to the beginning 

89 if "data" in kwargs and hasattr(kwargs["data"], "read") and hasattr(kwargs["data"], "seek"): 

90 kwargs["data"].seek(0) 

91 

92 if response_encoding is not None and r is not None: 

93 r.encoding = 'utf-8' 

94 

95 return r 

96 

97def put(url, retries=None, back_off_factor=None, max_back_off=None, timeout=None, response_encoding=None, 

98 retry_on_timeout=None, retry_codes=None, **kwargs): 

99 return _make_request("PUT", url, 

100 retries=retries, back_off_factor=back_off_factor, 

101 max_back_off=max_back_off, 

102 timeout=timeout, 

103 response_encoding=response_encoding, 

104 retry_on_timeout=retry_on_timeout, 

105 retry_codes=retry_codes, 

106 **kwargs) 

107 

108def delete(url, retries=None, back_off_factor=None, max_back_off=None, timeout=None, response_encoding=None, 

109 retry_on_timeout=None, retry_codes=None, **kwargs): 

110 return _make_request("DELETE", url, 

111 retries=retries, back_off_factor=back_off_factor, 

112 max_back_off=max_back_off, 

113 timeout=timeout, 

114 response_encoding=response_encoding, 

115 retry_on_timeout=retry_on_timeout, 

116 retry_codes=retry_codes, 

117 **kwargs) 

118 

119def post(url, retries=None, back_off_factor=None, max_back_off=None, timeout=None, response_encoding=None, 

120 retry_on_timeout=None, retry_codes=None, **kwargs): 

121 return _make_request("POST", url, 

122 retries=retries, back_off_factor=back_off_factor, 

123 max_back_off=max_back_off, 

124 timeout=timeout, 

125 response_encoding=response_encoding, 

126 retry_on_timeout=retry_on_timeout, 

127 retry_codes=retry_codes, 

128 **kwargs) 

129 

130def get(url, retries=None, back_off_factor=None, max_back_off=None, timeout=None, response_encoding=None, 

131 retry_on_timeout=None, retry_codes=None, **kwargs): 

132 return _make_request("GET", url, 

133 retries=retries, back_off_factor=back_off_factor, 

134 max_back_off=max_back_off, 

135 timeout=timeout, 

136 response_encoding=response_encoding, 

137 retry_on_timeout=retry_on_timeout, 

138 retry_codes=retry_codes, 

139 **kwargs) 

140 

141def get_stream(url, retries=None, back_off_factor=None, max_back_off=None, timeout=None, response_encoding=None, 

142 retry_on_timeout=None, retry_codes=None, size_limit=None, chunk_size=None, cut_off=None, read_stream=True, **kwargs): 

143 

144 # set the defaults where necessary from configuration 

145 

146 if size_limit is None: 

147 size_limit = app.config.get("HTTP_STREAM_MAX_SIZE", 0) # size of 0 means no limit 

148 

149 if cut_off is None: 

150 cut_off = app.config.get("HTTP_STREAM_CUT_OFF", 0) # size of 0 means no limit 

151 

152 if chunk_size is None: 

153 chunk_size = app.config.get("HTTP_STREAM_CHUNK_SIZE", 262144) # 250Kb 

154 

155 # actually make the request (note that we pass stream=True) 

156 resp = _make_request("GET", url, 

157 retries=retries, back_off_factor=back_off_factor, 

158 max_back_off=max_back_off, 

159 timeout=timeout, 

160 response_encoding=response_encoding, 

161 retry_on_timeout=retry_on_timeout, 

162 retry_codes=retry_codes, 

163 stream=True, 

164 **kwargs) 

165 

166 if resp is None: 

167 return None, "", 0 

168 

169 # check that content length header for an early view on whether the resource 

170 # is too large 

171 if size_limit > 0: 

172 header_reported_size = resp.headers.get("content-length") 

173 try: 

174 header_reported_size = int(header_reported_size) 

175 except Exception as e: 

176 header_reported_size = 0 

177 

178 if header_reported_size > size_limit: 

179 resp.connection.close() 

180 raise SizeExceededException("Size as announced by Content-Type header is larger than maximum allowed size") 

181 

182 downloaded_bytes = 0 

183 content = '' 

184 chunk_no = 0 

185 

186 if read_stream: 

187 for chunk in resp.iter_content(chunk_size=chunk_size): 

188 chunk_no += 1 

189 downloaded_bytes += len(bytes(chunk)) 

190 

191 # check the size limit again 

192 if size_limit > 0 and downloaded_bytes > size_limit: 

193 resp.connection.close() 

194 raise SizeExceededException("Size limit exceeded during download") 

195 if chunk: # filter out keep-alive new chunks 

196 content += chunk 

197 

198 # now check to see if we have exceeded the cut off point 

199 if cut_off > 0 and downloaded_bytes >= cut_off: 

200 break 

201 

202 resp.connection.close() 

203 

204 return resp, content, downloaded_bytes 

205 

206###################################################### 

207# Mock requests Response object - useful for testing 

208 

209class MockResponse(object): 

210 def __init__(self, status, body=None, headers=None): 

211 self.status_code = status 

212 self._body = body 

213 self._headers = headers 

214 self._stream = StringIO(body) 

215 

216 def json(self): 

217 return json.loads(self._body) 

218 

219 @property 

220 def data(self): 

221 return self._body 

222 

223 @property 

224 def raw(self): 

225 return self._stream 

226 

227 @property 

228 def headers(self): 

229 return self._headers if self._headers is not None else {} 

230 

231 def iter_content(self, num_bytes): 

232 while True: 

233 b = self._stream.read(num_bytes) 

234 if b == "": 

235 # we have reached the end of the file 

236 break 

237 yield b 

238 yield ""