Coverage for portality / models / openurl.py: 89%

267 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-05-05 00:09 +0100

1import re, json 

2from flask import url_for 

3from portality.models import Journal, Article 

4from portality.core import app 

5from copy import deepcopy 

6from portality.lib import dates 

7 

8JOURNAL_SCHEMA_KEYS = ['doi', 'aulast', 'aufirst', 'auinit', 'auinit1', 'auinitm', 'ausuffix', 'au', 'aucorp', 'atitle', 

9 'jtitle', 'stitle', 'date', 'chron', 'ssn', 'quarter', 'volume', 'part', 'issue', 'spage', 

10 'epage', 'pages', 'artnum', 'issn', 'eissn', 'isbn', 'coden', 'sici', 'genre'] 

11 

12# The genres from the OpenURL schema we support 

13SUPPORTED_GENRES = ['journal', 'article'] 

14 

15# Mapping from OpenURL schema to both supported models (Journal, Article) 

16OPENURL_TO_ES = { 

17 'aulast': (None, 'bibjson.author.name.exact'), 

18 'aucorp': (None, 'bibjson.author.affiliation.exact'), 

19 'atitle': (None, 'bibjson.title.exact'), 

20 'jtitle': ('index.title.exact', 'bibjson.journal.title.exact'), # Note we use index.title.exact for journals, to support continuations 

21 'stitle': ('bibjson.alternative_title.exact', None), 

22 'date': (None, 'bibjson.year.exact'), 

23 'volume': (None, 'bibjson.journal.volume.exact'), 

24 'issue': (None, 'bibjson.journal.number.exact'), 

25 'spage': (None, 'bibjson.start_page.exact'), 

26 'epage': (None, 'bibjson.end_page.exact'), 

27 'issn': ('index.issn.exact', 'index.issn.exact'), # bibjson.identifier.id.exact 

28 'eissn': ('index.issn.exact', 'index.issn.exact'), 

29 'isbn': ('index.issn.exact', 'index.issn.exact'), 

30 'doi': (None, 'index.doi.exact') 

31} 

32 

33# Terms search template. Ensure all queries from OpenURL return publicly visible results with in_doaj : true 

34IN_DOAJ_TERM = {"term": {"admin.in_doaj": True}} 

35TERMS_SEARCH = {"query": {"bool": {"must": [IN_DOAJ_TERM]}}} 

36 

37 

38class OpenURLRequest(object): 

39 """ 

40 Based on the fields from ofi/fmt:kev:mtx:journal schema for Journals in OpenURL 1.0 

41 This is the only schema the DOAJ supports. 

42 """ 

43 

44 # ~~API:Feature~~ 

45 

46 def __init__(self, **kwargs): 

47 

48 # Initialise the OpenURLRequest object with empty attributes 

49 for key in JOURNAL_SCHEMA_KEYS: 

50 setattr(self, key, None) 

51 

52 # Save any attributes specified at creation time 

53 if kwargs: 

54 for key, value in kwargs.items(): 

55 setattr(self, key, value) 

56 

57 def __str__(self): 

58 return "OpenURLRequest{" + ", ".join(["%s : %s" % (x, getattr(self, x)) for x in JOURNAL_SCHEMA_KEYS if getattr(self, x)]) + "}" 

59 

60 def query_es(self): 

61 """ 

62 Query Elasticsearch for a set of matches for this request. 

63 :return: The results of a query through the dao, a JSON object. 

64 """ 

65 # Copy to the template, which will be populated with terms 

66 populated_query = deepcopy(TERMS_SEARCH) 

67 

68 # Get all of the attributes with values set. 

69 set_attributes = [(x, getattr(self, x)) for x in JOURNAL_SCHEMA_KEYS[:-1] if getattr(self, x)] 

70 

71 # If we don't have a genre, guess journal FIXME: is it correct to assume journal? 

72 if not self.genre: 

73 self.genre = SUPPORTED_GENRES[0] # TODO: we may want to handle 404 instead 

74 

75 # Set i to use either our mapping for journals or articles 

76 i = SUPPORTED_GENRES.index(getattr(self, 'genre').lower()) 

77 

78 # Add the attributes to the query 

79 for (k, v) in set_attributes: 

80 es_term = OPENURL_TO_ES[k][i] 

81 if es_term is None: 

82 continue 

83 else: 

84 term = {"term": {es_term: v}} 

85 populated_query["query"]["bool"]["must"].append(term) 

86 

87 # avoid doing an empty query 

88 if len(populated_query["query"]["bool"]["must"]) == 0: 

89 app.logger.debug("No valid search terms in OpenURL object") 

90 return None 

91 

92 # Return the results of the query 

93 if i == 0: 

94 app.logger.debug("OpenURL query to journal: " + json.dumps(populated_query)) 

95 return Journal.query(q=populated_query) 

96 elif i == 1: 

97 app.logger.debug("OpenURL query to article: " + json.dumps(populated_query)) 

98 return Article.query(q=populated_query) 

99 

100 def get_result_url(self): 

101 """ 

102 Get the URL for this OpenURLRequest's referent. 

103 :return: The url as a string, or None if not found. 

104 """ 

105 try: 

106 results = self.query_es() 

107 except ValueError: 

108 return None 

109 

110 if results is None: 

111 return None 

112 

113 if results.get('hits', {}).get('total', {}).get('value', 0) == 0: 

114 # No results found for query, retry 

115 results = self.fallthrough_retry() 

116 if results is None or results.get('hits', {}).get('total', {}).get('value', 0) == 0: 

117 # This time we've definitely failed 

118 return None 

119 

120 if results.get('hits', {}).get('hits', [{}])[0].get('_source', {}).get('es_type') == 'journal': 

121 

122 # construct a journal object around the result 

123 journal = Journal(**results['hits']['hits'][0]) 

124 

125 # the continuation is a first-class journal object, so if we have a journal we have the right continuation 

126 # (assuming that the user gave us specific enough information 

127 ident = journal.id 

128 

129 # construct the toc url using the ident only 

130 jtoc_url = url_for("doaj.toc", identifier=ident) 

131 return jtoc_url 

132 

133 #~~->Article:Page~~ 

134 elif results.get('hits', {}).get('hits', [{}])[0].get('_source', {}).get('es_type') == 'article': 

135 return url_for("doaj.article_page", identifier=results['hits']['hits'][0]['_id']) 

136 

137 def query_for_vol(self, journalobj): 

138 

139 # The journal object will already be the correct continuation, if the user provided sufficient detail. 

140 issns = journalobj.bibjson().issns() 

141 

142 # If there's no way to get the wanted issns, give up, else run the query 

143 if issns is None: 

144 return None 

145 else: 

146 volume_query = deepcopy(TERMS_SEARCH) 

147 volume_query["size"] = 0 

148 

149 issn_term = {"terms": {"index.issn.exact": issns}} 

150 volume_query["query"]["bool"]["must"].append(issn_term) 

151 

152 vol_term = {"term": {"bibjson.journal.volume.exact": self.volume}} 

153 volume_query["query"]["bool"]["must"].append(vol_term) 

154 

155 # And if there's an issue, query that too. Note, issue does not make sense on its own. 

156 if self.issue: 

157 iss_term = {"term": {"bibjson.journal.number.exact": self.issue}} 

158 volume_query["query"]["bool"]["must"].append(iss_term) 

159 

160 app.logger.debug("OpenURL subsequent volume query to article: " + json.dumps(volume_query)) 

161 return Article.query(q=volume_query) 

162 

163 def fallthrough_retry(self): 

164 """ 

165 Some things to try differently if we get no results on first attempt 

166 :return: a new result set, or None 

167 """ 

168 results = None 

169 

170 # Search again for the title against alternative_title (may catch translations of titles) 

171 if self.jtitle and not self.stitle: 

172 self.stitle = self.jtitle 

173 self.jtitle = None 

174 results = self.query_es() 

175 

176 return results 

177 

178 def validate_issn(self, issn_str): 

179 """ 

180 If the ISSN is missing a dash, add it so it matches that in the index. 

181 :param issn_str: The ISSN, or if None, this will skip. 

182 :return: The ISSN with the dash added 

183 """ 

184 if issn_str: 

185 match_dash = re.compile('[-]') 

186 if not match_dash.search(issn_str): 

187 issn_str = issn_str[:4] + '-' + issn_str[4:] 

188 return issn_str 

189 

190 @property 

191 def doi(self): 

192 """Digital Object Identifier""" 

193 return self._doi 

194 

195 @doi.setter 

196 def doi(self, val): 

197 self._doi = val 

198 

199 @property 

200 def aulast(self): 

201 """First author's family name, may be more than one word""" 

202 return self._aulast 

203 

204 @aulast.setter 

205 def aulast(self, val): 

206 self._aulast = val 

207 

208 @property 

209 def aufirst(self): 

210 """First author's given name or names or initials""" 

211 return self._aufirst 

212 

213 @aufirst.setter 

214 def aufirst(self, val): 

215 self._aufirst = val 

216 

217 @property 

218 def auinit(self): 

219 """First author's first and middle initials""" 

220 return self._auinit 

221 

222 @auinit.setter 

223 def auinit(self, val): 

224 self._auinit = val 

225 

226 @property 

227 def auinit1(self): 

228 """First author's first initial""" 

229 return self._auinit1 

230 

231 @auinit1.setter 

232 def auinit1(self, val): 

233 self._auinit1 = val 

234 

235 @property 

236 def auinitm(self): 

237 """First author's middle initial""" 

238 return self._auinitm 

239 

240 @auinitm.setter 

241 def auinitm(self, val): 

242 self._auinitm = val 

243 

244 @property 

245 def ausuffix(self): 

246 """First author's name suffix. e.g. 'Jr.', 'III'""" 

247 return self._ausuffix 

248 

249 @ausuffix.setter 

250 def ausuffix(self, val): 

251 self._ausuffix = val 

252 

253 @property 

254 def au(self): 

255 """full name of a single author""" 

256 return self._au 

257 

258 @au.setter 

259 def au(self, val): 

260 self._au = val 

261 

262 @property 

263 def aucorp(self): 

264 """Organisation or corporation that is the author or creator of the document""" 

265 return self._aucorp 

266 

267 @aucorp.setter 

268 def aucorp(self, val): 

269 self._aucorp = val 

270 

271 @property 

272 def atitle(self): 

273 """Article title""" 

274 return self._atitle 

275 

276 @atitle.setter 

277 def atitle(self, val): 

278 self._atitle = val 

279 

280 @property 

281 def jtitle(self): 

282 """Journal title""" 

283 return self._jtitle 

284 

285 @jtitle.setter 

286 def jtitle(self, val): 

287 self._jtitle = val 

288 

289 @property 

290 def stitle(self): 

291 """Abbreviated or short journal title""" 

292 return self._stitle 

293 

294 @stitle.setter 

295 def stitle(self, val): 

296 self._stitle = val 

297 

298 @property 

299 def date(self): 

300 """Date of publication""" 

301 return self._date 

302 

303 @date.setter 

304 def date(self, val): 

305 if val: 

306 try: 

307 parsed_date = dates.parse(val) 

308 val = parsed_date.year 

309 except ValueError: 

310 val = None 

311 self._date = val 

312 

313 @property 

314 def chron(self): 

315 """Non-normalised enumeration / chronology, e.g. '1st quarter'""" 

316 return self._chron 

317 

318 @chron.setter 

319 def chron(self, val): 

320 self._chron = val 

321 

322 @property 

323 def ssn(self): 

324 """Season (chronology). spring|summer|fall|autumn|winter""" 

325 return self._ssn 

326 

327 @ssn.setter 

328 def ssn(self, val): 

329 self._ssn = val 

330 

331 @property 

332 def quarter(self): 

333 """Quarter (chronology). 1|2|3|4""" 

334 return self._quarter 

335 

336 @quarter.setter 

337 def quarter(self, val): 

338 self._quarter = val 

339 

340 @property 

341 def volume(self): 

342 """Volume designation. e.g. '124', or 'VI'""" 

343 return self._volume 

344 

345 @volume.setter 

346 def volume(self, val): 

347 self._volume = val 

348 

349 @property 

350 def part(self): 

351 """Subdivision of a volume or highest level division of the journal. e.g. 'B', 'Supplement'""" 

352 return self._part 

353 

354 @part.setter 

355 def part(self, val): 

356 self._part = val 

357 

358 @property 

359 def issue(self): 

360 """Journal issue""" 

361 return self._issue 

362 

363 @issue.setter 

364 def issue(self, val): 

365 self._issue = val 

366 

367 @property 

368 def spage(self): 

369 """Starting page""" 

370 return self._spage 

371 

372 @spage.setter 

373 def spage(self, val): 

374 self._spage = val 

375 

376 @property 

377 def epage(self): 

378 """Ending page""" 

379 return self._epage 

380 

381 @epage.setter 

382 def epage(self, val): 

383 self._epage = val 

384 

385 @property 

386 def pages(self): 

387 """Page range e.g. '53-58', 'C4-9'""" 

388 return self._pages 

389 

390 @pages.setter 

391 def pages(self, val): 

392 self._pages = val 

393 

394 @property 

395 def artnum(self): 

396 """Article number""" 

397 return self._artnum 

398 

399 @artnum.setter 

400 def artnum(self, val): 

401 self._artnum = val 

402 

403 @property 

404 def issn(self): 

405 """Journal ISSN""" 

406 return self._issn 

407 

408 @issn.setter 

409 def issn(self, val): 

410 self._issn = self.validate_issn(val) 

411 

412 @property 

413 def eissn(self): 

414 """ISSN for electronic version of the journal""" 

415 return self._eissn 

416 

417 @eissn.setter 

418 def eissn(self, val): 

419 self._eissn = self.validate_issn(val) 

420 

421 @property 

422 def isbn(self): 

423 """Journal ISBN""" 

424 return self._isbn 

425 

426 @isbn.setter 

427 def isbn(self, val): 

428 self._isbn = val 

429 

430 @property 

431 def coden(self): 

432 """CODEN""" 

433 return self._coden 

434 

435 @coden.setter 

436 def coden(self, val): 

437 self._coden = val 

438 

439 @property 

440 def sici(self): 

441 """Serial Item and Contribution Identifier (SICI)""" 

442 return self._sici 

443 

444 @sici.setter 

445 def sici(self, val): 

446 self._sici = val 

447 

448 @property 

449 def genre(self): 

450 """journal|issue|article|proceeding|conference|preprint|unknown""" 

451 return self._genre 

452 

453 @genre.setter 

454 def genre(self, val): 

455 self._genre = val