Coverage for portality/lib/query_filters.py: 13%

176 statements  

« prev     ^ index     » next       coverage.py v6.4.2, created at 2022-07-20 16:12 +0100

1from flask_login import current_user 

2from portality.core import app 

3from portality import models, constants 

4from copy import deepcopy 

5 

6# General utilities 

7################### 

8 

9 

10def remove_fields(query: dict, fields_to_remove: list): 

11 q = deepcopy(query) 

12 for del_attr in fields_to_remove: 

13 if del_attr in q: 

14 del q[del_attr] 

15 return q 

16 

17 

18# query sanitisers 

19################## 

20 

21def public_query_validator(q): 

22 # no deep paging 

23 if q.from_result() > 10000: 

24 return False 

25 

26 if q.size() > 200: 

27 return False 

28 

29 return True 

30 

31 

32# query filters 

33############### 

34 

35def remove_search_limits(query: dict): 

36 return remove_fields(query, ['size', 'from']) 

37 

38def only_in_doaj(q): 

39 q.clear_match_all() 

40 q.add_must_filter({"term": {"admin.in_doaj": True}}) 

41 return q 

42 

43 

44def owner(q): 

45 q.clear_match_all() 

46 q.add_must_filter({"term" : {"admin.owner.exact" : current_user.id}}) 

47 return q 

48 

49 

50def update_request(q): 

51 q.clear_match_all() 

52 q.add_must_filter({"range" : {"created_date" : {"gte" : app.config.get("UPDATE_REQUESTS_SHOW_OLDEST")}}}) 

53 q.add_must_filter({"term" : {"admin.application_type.exact" : constants.APPLICATION_TYPE_UPDATE_REQUEST}}) 

54 return q 

55 

56 

57def not_update_request(q): 

58 q.clear_match_all() 

59 q.add_must_filter({"term" : {"admin.application_type.exact" : constants.APPLICATION_TYPE_NEW_APPLICATION}}) 

60 return q 

61 

62 

63def associate(q): 

64 q.clear_match_all() 

65 q.add_must_filter({"term" : {"admin.editor.exact" : current_user.id}}) 

66 return q 

67 

68 

69def editor(q): 

70 gnames = [] 

71 groups = models.EditorGroup.groups_by_editor(current_user.id) 

72 for g in groups: 

73 gnames.append(g.name) 

74 q.clear_match_all() 

75 q.add_must({"terms" : {"admin.editor_group.exact" : gnames}}) 

76 return q 

77 

78 

79def private_source(q): 

80 q.add_include(["admin.application_status", "admin.ticked", 

81 "admin.seal", "last_updated", "created_date", "id", "bibjson"]) 

82 return q 

83 

84 

85def public_source(q): 

86 q.add_include(["admin.ticked", "admin.seal", "last_updated", 

87 "created_date", "id", "bibjson"]) 

88 return q 

89 

90 

91def strip_facets(q): 

92 q.clear_facets() 

93 return q 

94 

95 

96def es_type_fix(q): 

97 # FIXME: document this and will need attention for ES 7 upgrade 

98 ctx = q.as_dict() 

99 if "query" not in ctx: 

100 return q 

101 ctx = ctx["query"] 

102 if "filtered" not in ctx: 

103 return q 

104 ctx = ctx["filtered"] 

105 if "filter" not in ctx: 

106 return q 

107 ctx = ctx["filter"] 

108 if "bool" not in ctx: 

109 return q 

110 ctx = ctx["bool"] 

111 if "must" not in ctx: 

112 return q 

113 ctx = ctx["must"] 

114 for m in ctx: 

115 if "term" in m and "_type" in m["term"]: 

116 m["term"] = {"es_type" : m["term"]["_type"]} 

117 return q 

118 

119 

120def last_update_fallback(q): 

121 s = q.sort() 

122 if s is None or len(s) == 0: 

123 return q 

124 

125 add_created_sort = False 

126 sort_order = None 

127 for sortby in s: 

128 if "last_manual_update" in sortby: 

129 sort_order = sortby["last_manual_update"].get("order") 

130 add_created_sort = True 

131 break 

132 

133 if add_created_sort: 

134 params = {} 

135 if sort_order is not None: 

136 params["order"] = sort_order 

137 s.append({"created_date" : params}) 

138 

139 q.set_sort(s) 

140 return q 

141 

142 

143# results filters 

144################# 

145 

146def public_result_filter(results, unpacked=False): 

147 # Dealing with single unpacked result 

148 if unpacked: 

149 if "admin" in results: 

150 for k in list(results["admin"]): 

151 if k not in ["ticked", "seal"]: 

152 del results["admin"][k] 

153 return results 

154 

155 # Dealing with a list of es results 

156 if "hits" not in results: 

157 return results 

158 if "hits" not in results["hits"]: 

159 return results 

160 

161 for hit in results["hits"]["hits"]: 

162 if "_source" in hit: 

163 if "admin" in hit["_source"]: 

164 for k in list(hit["_source"]["admin"]): 

165 if k not in ["ticked", "seal"]: 

166 del hit["_source"]["admin"][k] 

167 

168 return results 

169 

170 

171def prune_author_emails(results, unpacked=False): 

172 # Dealing with single unpacked ES result 

173 if unpacked: 

174 if "bibjson" in results: 

175 if "author" in results["bibjson"]: 

176 for a in results["bibjson"]["author"]: 

177 if "email" in a: 

178 del a["email"] 

179 return results 

180 

181 # Dealing with a list of ES results 

182 if "hits" not in results: 

183 return results 

184 if "hits" not in results["hits"]: 

185 return results 

186 

187 for hit in results["hits"]["hits"]: 

188 if "_source" in hit: 

189 if "bibjson" in hit["_source"]: 

190 if "author" in hit["_source"]["bibjson"]: 

191 for a in hit["_source"]["bibjson"]["author"]: 

192 if "email" in a: 

193 del a["email"] 

194 

195 return results 

196 

197 

198def publisher_result_filter(results, unpacked=False): 

199 allowed_admin = ["ticked", "seal", "in_doaj", "related_applications", "current_application", "current_journal", "application_status"] 

200 # Dealing with single unpacked ES result 

201 if unpacked: 

202 if "admin" in results: 

203 for k in list(results["admin"]): 

204 if k not in allowed_admin: 

205 del results["admin"][k] 

206 return results 

207 

208 # Dealing with a list of ES results 

209 if "hits" not in results: 

210 return results 

211 if "hits" not in results["hits"]: 

212 return results 

213 

214 for hit in results["hits"]["hits"]: 

215 if "_source" in hit: 

216 if "admin" in hit["_source"]: 

217 for k in list(hit["_source"]["admin"]): 

218 if k not in allowed_admin: 

219 del hit["_source"]["admin"][k] 

220 

221 return results 

222 

223 

224def add_fqw_facets(results, unpacked=False): 

225 if unpacked: 

226 return results 

227 

228 facets = { 

229 "index.license.exact": { 

230 "_type": "terms", 

231 "missing": 0, 

232 "total": 0, 

233 "other": 0, 

234 "terms": [] 

235 }, 

236 "bibjson.journal.title.exact": { 

237 "_type": "terms", 

238 "missing": 0, 

239 "total": 0, 

240 "other": 0, 

241 "terms": [] 

242 }, 

243 "bibjson.archiving_policy.policy.exact": { 

244 "_type": "terms", 

245 "missing": 0, 

246 "total": 0, 

247 "other": 0, 

248 "terms": [] 

249 }, 

250 "created_date": { 

251 "_type": "date_histogram", 

252 "entries": [] 

253 }, 

254 "index.country.exact": { 

255 "_type": "terms", 

256 "missing": 0, 

257 "total": 0, 

258 "other": 0, 

259 "terms": [] 

260 }, 

261 "index.date": { 

262 "_type": "date_histogram", 

263 "entries": [] 

264 }, 

265 "bibjson.journal.volume.exact": { 

266 "_type": "terms", 

267 "missing": 0, 

268 "total": 0, 

269 "other": 0, 

270 "terms": [] 

271 }, 

272 "index.publisher.exact": { 

273 "_type": "terms", 

274 "missing": 0, 

275 "total": 0, 

276 "other": 0, 

277 "terms": [] 

278 }, 

279 "index.has_seal.exact": { 

280 "_type": "terms", 

281 "missing": 0, 

282 "total": 0, 

283 "other": 0, 

284 "terms": [] 

285 }, 

286 "index.has_apc.exact": { 

287 "_type": "terms", 

288 "missing": 0, 

289 "total": 0, 

290 "other": 0, 

291 "terms": [] 

292 }, 

293 "bibjson.editorial_review.process.exact": { 

294 "_type": "match", 

295 "missing": 0, 

296 "total": 0, 

297 "other": 0, 

298 "terms": [] 

299 }, 

300 "index.language.exact": { 

301 "_type": "match", 

302 "missing": 0, 

303 "total": 0, 

304 "other": 0, 

305 "terms": [] 

306 }, 

307 "bibjson.journal.number.exact": { 

308 "_type": "terms", 

309 "missing": 0, 

310 "total": 0, 

311 "other": 0, 

312 "terms": [] 

313 }, 

314 "index.date_toc_fv_month": { 

315 "_type": "date_histogram", 

316 "entries": [] 

317 }, 

318 "index.classification.exact": { 

319 "_type": "terms", 

320 "missing": 0, 

321 "total": 0, 

322 "other": 0, 

323 "terms": [] 

324 }, 

325 "_type": { 

326 "_type": "terms", 

327 "missing": 0, 

328 "total": 0, 

329 "other": 0, 

330 "terms": [] 

331 }, 

332 "index.issn.exact": { 

333 "_type": "terms", 

334 "missing": 0, 

335 "total": 0, 

336 "other": 0, 

337 "terms": [] 

338 } 

339 } 

340 

341 results["aggregations"] = facets 

342 return results 

343 

344 

345def fqw_back_compat(results, unpacked=False): 

346 if unpacked: 

347 return results 

348 

349 # Dealing with a list of ES results 

350 if "hits" not in results: 

351 return results 

352 if "hits" not in results["hits"]: 

353 return results 

354 

355 for hit in results["hits"]["hits"]: 

356 if hit.get("_source").get("es_type") != "journal": 

357 continue 

358 

359 identifiers = [] 

360 bj = hit.get("_source", {}).get("bibjson", {}) 

361 if bj.get("pissn"): 

362 identifiers.append({"type" : "pissn", "id" : bj.get("pissn")}) 

363 if bj.get("eissn"): 

364 identifiers.append({"type" : "eissn", "id" : bj.get("eissn")}) 

365 

366 bj["identifier"] = identifiers 

367 

368 return results