Coverage for portality/models/openurl.py: 93%

274 statements  

« prev     ^ index     » next       coverage.py v6.4.2, created at 2022-08-30 11:09 +0100

1import re, json 

2from flask import url_for 

3from portality.models import Journal, Article 

4from portality.core import app 

5from copy import deepcopy 

6from portality.lib import dates 

7 

8JOURNAL_SCHEMA_KEYS = ['doi', 'aulast', 'aufirst', 'auinit', 'auinit1', 'auinitm', 'ausuffix', 'au', 'aucorp', 'atitle', 

9 'jtitle', 'stitle', 'date', 'chron', 'ssn', 'quarter', 'volume', 'part', 'issue', 'spage', 

10 'epage', 'pages', 'artnum', 'issn', 'eissn', 'isbn', 'coden', 'sici', 'genre'] 

11 

12# The genres from the OpenURL schema we support 

13SUPPORTED_GENRES = ['journal', 'article'] 

14 

15# Mapping from OpenURL schema to both supported models (Journal, Article) 

16OPENURL_TO_ES = { 

17 'aulast': (None, 'bibjson.author.name.exact'), 

18 'aucorp': (None, 'bibjson.author.affiliation.exact'), 

19 'atitle': (None, 'bibjson.title.exact'), 

20 'jtitle': ('index.title.exact', 'bibjson.journal.title.exact'), # Note we use index.title.exact for journals, to support continuations 

21 'stitle': ('bibjson.alternative_title.exact', None), 

22 'date': (None, 'bibjson.year.exact'), 

23 'volume': (None, 'bibjson.journal.volume.exact'), 

24 'issue': (None, 'bibjson.journal.number.exact'), 

25 'spage': (None, 'bibjson.start_page.exact'), 

26 'epage': (None, 'bibjson.end_page.exact'), 

27 'issn': ('index.issn.exact', 'index.issn.exact'), # bibjson.identifier.id.exact 

28 'eissn': ('index.issn.exact', 'index.issn.exact'), 

29 'isbn': ('index.issn.exact', 'index.issn.exact'), 

30 'doi': (None, 'index.doi.exact') 

31} 

32 

33# Terms search template. Ensure all queries from OpenURL return publicly visible results with in_doaj : true 

34IN_DOAJ_TERM = {"term": {"admin.in_doaj": True}} 

35TERMS_SEARCH = {"query": {"bool": {"must": [IN_DOAJ_TERM]}}} 

36 

37 

38class OpenURLRequest(object): 

39 """ 

40 Based on the fields from ofi/fmt:kev:mtx:journal schema for Journals in OpenURL 1.0 

41 This is the only schema the DOAJ supports. 

42 """ 

43 

44 # ~~API:Feature~~ 

45 

46 def __init__(self, **kwargs): 

47 

48 # Initialise the OpenURLRequest object with empty attributes 

49 for key in JOURNAL_SCHEMA_KEYS: 

50 setattr(self, key, None) 

51 

52 # Save any attributes specified at creation time 

53 if kwargs: 

54 for key, value in kwargs.items(): 

55 setattr(self, key, value) 

56 

57 def __str__(self): 

58 return "OpenURLRequest{" + ", ".join(["%s : %s" % (x, getattr(self, x)) for x in JOURNAL_SCHEMA_KEYS if getattr(self, x)]) + "}" 

59 

60 def query_es(self): 

61 """ 

62 Query Elasticsearch for a set of matches for this request. 

63 :return: The results of a query through the dao, a JSON object. 

64 """ 

65 # Copy to the template, which will be populated with terms 

66 populated_query = deepcopy(TERMS_SEARCH) 

67 

68 # Get all of the attributes with values set. 

69 set_attributes = [(x, getattr(self, x)) for x in JOURNAL_SCHEMA_KEYS[:-1] if getattr(self, x)] 

70 

71 # If we don't have a genre, guess journal FIXME: is it correct to assume journal? 

72 if not self.genre: 

73 self.genre = SUPPORTED_GENRES[0] # TODO: we may want to handle 404 instead 

74 

75 # Set i to use either our mapping for journals or articles 

76 i = SUPPORTED_GENRES.index(getattr(self, 'genre').lower()) 

77 

78 # Add the attributes to the query 

79 for (k, v) in set_attributes: 

80 es_term = OPENURL_TO_ES[k][i] 

81 if es_term is None: 

82 continue 

83 else: 

84 term = {"term": {es_term: v}} 

85 populated_query["query"]["bool"]["must"].append(term) 

86 

87 # avoid doing an empty query 

88 if len(populated_query["query"]["bool"]["must"]) == 0: 

89 app.logger.debug("No valid search terms in OpenURL object") 

90 return None 

91 

92 # Return the results of the query 

93 if i == 0: 

94 app.logger.debug("OpenURL query to journal: " + json.dumps(populated_query)) 

95 return Journal.query(q=populated_query) 

96 elif i == 1: 

97 app.logger.debug("OpenURL query to article: " + json.dumps(populated_query)) 

98 return Article.query(q=populated_query) 

99 

100 def get_result_url(self): 

101 """ 

102 Get the URL for this OpenURLRequest's referent. 

103 :return: The url as a string, or None if not found. 

104 """ 

105 try: 

106 results = self.query_es() 

107 except ValueError: 

108 return None 

109 

110 if results is None: 

111 return None 

112 

113 if results.get('hits', {}).get('total', {}).get('value', 0) == 0: 

114 # No results found for query, retry 

115 results = self.fallthrough_retry() 

116 if results is None or results.get('hits', {}).get('total', {}).get('value', 0) == 0: 

117 # This time we've definitely failed 

118 return None 

119 

120 if results.get('hits', {}).get('hits', [{}])[0].get('_source', {}).get('es_type') == 'journal': 

121 

122 # construct a journal object around the result 

123 journal = Journal(**results['hits']['hits'][0]) 

124 

125 # the continuation is a first-class journal object, so if we have a journal we have the right continuation 

126 # (assuming that the user gave us specific enough information 

127 ident = journal.id 

128 

129 # If there request has a volume parameter, query for presence of an article with that volume 

130 if self.volume: 

131 vol_iss_results = self.query_for_vol(journal) 

132 

133 if vol_iss_results == None: 

134 # we were asked for a vol/issue, but weren't given the correct information to get it. 

135 return None 

136 elif vol_iss_results['hits']['total']['value'] > 0: 

137 # construct the toc url using the ident, plus volume and issue 

138 jtoc_url = url_for("doaj.toc", identifier=ident, volume=self.volume, issue=self.issue) 

139 else: 

140 # If no results, the DOAJ does not contain the vol/issue being searched. (Show openurl 404) 

141 jtoc_url = None 

142 else: 

143 # if no volume parameter, construct the toc url using the ident only 

144 jtoc_url = url_for("doaj.toc", identifier=ident) 

145 return jtoc_url 

146 

147 #~~->Article:Page~~ 

148 elif results.get('hits', {}).get('hits', [{}])[0].get('_source', {}).get('es_type') == 'article': 

149 return url_for("doaj.article_page", identifier=results['hits']['hits'][0]['_id']) 

150 

151 def query_for_vol(self, journalobj): 

152 

153 # The journal object will already be the correct continuation, if the user provided sufficient detail. 

154 issns = journalobj.bibjson().issns() 

155 

156 # If there's no way to get the wanted issns, give up, else run the query 

157 if issns is None: 

158 return None 

159 else: 

160 volume_query = deepcopy(TERMS_SEARCH) 

161 volume_query["size"] = 0 

162 

163 issn_term = {"terms": {"index.issn.exact": issns}} 

164 volume_query["query"]["bool"]["must"].append(issn_term) 

165 

166 vol_term = {"term": {"bibjson.journal.volume.exact": self.volume}} 

167 volume_query["query"]["bool"]["must"].append(vol_term) 

168 

169 # And if there's an issue, query that too. Note, issue does not make sense on its own. 

170 if self.issue: 

171 iss_term = {"term": {"bibjson.journal.number.exact": self.issue}} 

172 volume_query["query"]["bool"]["must"].append(iss_term) 

173 

174 app.logger.debug("OpenURL subsequent volume query to article: " + json.dumps(volume_query)) 

175 return Article.query(q=volume_query) 

176 

177 def fallthrough_retry(self): 

178 """ 

179 Some things to try differently if we get no results on first attempt 

180 :return: a new result set, or None 

181 """ 

182 results = None 

183 

184 # Search again for the title against alternative_title (may catch translations of titles) 

185 if self.jtitle and not self.stitle: 

186 self.stitle = self.jtitle 

187 self.jtitle = None 

188 results = self.query_es() 

189 

190 return results 

191 

192 def validate_issn(self, issn_str): 

193 """ 

194 If the ISSN is missing a dash, add it so it matches that in the index. 

195 :param issn_str: The ISSN, or if None, this will skip. 

196 :return: The ISSN with the dash added 

197 """ 

198 if issn_str: 

199 match_dash = re.compile('[-]') 

200 if not match_dash.search(issn_str): 

201 issn_str = issn_str[:4] + '-' + issn_str[4:] 

202 return issn_str 

203 

204 @property 

205 def doi(self): 

206 """Digital Object Identifier""" 

207 return self._doi 

208 

209 @doi.setter 

210 def doi(self, val): 

211 self._doi = val 

212 

213 @property 

214 def aulast(self): 

215 """First author's family name, may be more than one word""" 

216 return self._aulast 

217 

218 @aulast.setter 

219 def aulast(self, val): 

220 self._aulast = val 

221 

222 @property 

223 def aufirst(self): 

224 """First author's given name or names or initials""" 

225 return self._aufirst 

226 

227 @aufirst.setter 

228 def aufirst(self, val): 

229 self._aufirst = val 

230 

231 @property 

232 def auinit(self): 

233 """First author's first and middle initials""" 

234 return self._auinit 

235 

236 @auinit.setter 

237 def auinit(self, val): 

238 self._auinit = val 

239 

240 @property 

241 def auinit1(self): 

242 """First author's first initial""" 

243 return self._auinit1 

244 

245 @auinit1.setter 

246 def auinit1(self, val): 

247 self._auinit1 = val 

248 

249 @property 

250 def auinitm(self): 

251 """First author's middle initial""" 

252 return self._auinitm 

253 

254 @auinitm.setter 

255 def auinitm(self, val): 

256 self._auinitm = val 

257 

258 @property 

259 def ausuffix(self): 

260 """First author's name suffix. e.g. 'Jr.', 'III'""" 

261 return self._ausuffix 

262 

263 @ausuffix.setter 

264 def ausuffix(self, val): 

265 self._ausuffix = val 

266 

267 @property 

268 def au(self): 

269 """full name of a single author""" 

270 return self._au 

271 

272 @au.setter 

273 def au(self, val): 

274 self._au = val 

275 

276 @property 

277 def aucorp(self): 

278 """Organisation or corporation that is the author or creator of the document""" 

279 return self._aucorp 

280 

281 @aucorp.setter 

282 def aucorp(self, val): 

283 self._aucorp = val 

284 

285 @property 

286 def atitle(self): 

287 """Article title""" 

288 return self._atitle 

289 

290 @atitle.setter 

291 def atitle(self, val): 

292 self._atitle = val 

293 

294 @property 

295 def jtitle(self): 

296 """Journal title""" 

297 return self._jtitle 

298 

299 @jtitle.setter 

300 def jtitle(self, val): 

301 self._jtitle = val 

302 

303 @property 

304 def stitle(self): 

305 """Abbreviated or short journal title""" 

306 return self._stitle 

307 

308 @stitle.setter 

309 def stitle(self, val): 

310 self._stitle = val 

311 

312 @property 

313 def date(self): 

314 """Date of publication""" 

315 return self._date 

316 

317 @date.setter 

318 def date(self, val): 

319 if val: 

320 try: 

321 parsed_date = dates.parse(val) 

322 val = parsed_date.year 

323 except ValueError: 

324 val = None 

325 self._date = val 

326 

327 @property 

328 def chron(self): 

329 """Non-normalised enumeration / chronology, e.g. '1st quarter'""" 

330 return self._chron 

331 

332 @chron.setter 

333 def chron(self, val): 

334 self._chron = val 

335 

336 @property 

337 def ssn(self): 

338 """Season (chronology). spring|summer|fall|autumn|winter""" 

339 return self._ssn 

340 

341 @ssn.setter 

342 def ssn(self, val): 

343 self._ssn = val 

344 

345 @property 

346 def quarter(self): 

347 """Quarter (chronology). 1|2|3|4""" 

348 return self._quarter 

349 

350 @quarter.setter 

351 def quarter(self, val): 

352 self._quarter = val 

353 

354 @property 

355 def volume(self): 

356 """Volume designation. e.g. '124', or 'VI'""" 

357 return self._volume 

358 

359 @volume.setter 

360 def volume(self, val): 

361 self._volume = val 

362 

363 @property 

364 def part(self): 

365 """Subdivision of a volume or highest level division of the journal. e.g. 'B', 'Supplement'""" 

366 return self._part 

367 

368 @part.setter 

369 def part(self, val): 

370 self._part = val 

371 

372 @property 

373 def issue(self): 

374 """Journal issue""" 

375 return self._issue 

376 

377 @issue.setter 

378 def issue(self, val): 

379 self._issue = val 

380 

381 @property 

382 def spage(self): 

383 """Starting page""" 

384 return self._spage 

385 

386 @spage.setter 

387 def spage(self, val): 

388 self._spage = val 

389 

390 @property 

391 def epage(self): 

392 """Ending page""" 

393 return self._epage 

394 

395 @epage.setter 

396 def epage(self, val): 

397 self._epage = val 

398 

399 @property 

400 def pages(self): 

401 """Page range e.g. '53-58', 'C4-9'""" 

402 return self._pages 

403 

404 @pages.setter 

405 def pages(self, val): 

406 self._pages = val 

407 

408 @property 

409 def artnum(self): 

410 """Article number""" 

411 return self._artnum 

412 

413 @artnum.setter 

414 def artnum(self, val): 

415 self._artnum = val 

416 

417 @property 

418 def issn(self): 

419 """Journal ISSN""" 

420 return self._issn 

421 

422 @issn.setter 

423 def issn(self, val): 

424 self._issn = self.validate_issn(val) 

425 

426 @property 

427 def eissn(self): 

428 """ISSN for electronic version of the journal""" 

429 return self._eissn 

430 

431 @eissn.setter 

432 def eissn(self, val): 

433 self._eissn = self.validate_issn(val) 

434 

435 @property 

436 def isbn(self): 

437 """Journal ISBN""" 

438 return self._isbn 

439 

440 @isbn.setter 

441 def isbn(self, val): 

442 self._isbn = val 

443 

444 @property 

445 def coden(self): 

446 """CODEN""" 

447 return self._coden 

448 

449 @coden.setter 

450 def coden(self, val): 

451 self._coden = val 

452 

453 @property 

454 def sici(self): 

455 """Serial Item and Contribution Identifier (SICI)""" 

456 return self._sici 

457 

458 @sici.setter 

459 def sici(self, val): 

460 self._sici = val 

461 

462 @property 

463 def genre(self): 

464 """journal|issue|article|proceeding|conference|preprint|unknown""" 

465 return self._genre 

466 

467 @genre.setter 

468 def genre(self, val): 

469 self._genre = val