Coverage for portality/models/openurl.py: 93%
274 statements
« prev ^ index » next coverage.py v6.4.2, created at 2022-09-13 22:06 +0100
« prev ^ index » next coverage.py v6.4.2, created at 2022-09-13 22:06 +0100
1import re, json
2from flask import url_for
3from portality.models import Journal, Article
4from portality.core import app
5from copy import deepcopy
6from portality.lib import dates
8JOURNAL_SCHEMA_KEYS = ['doi', 'aulast', 'aufirst', 'auinit', 'auinit1', 'auinitm', 'ausuffix', 'au', 'aucorp', 'atitle',
9 'jtitle', 'stitle', 'date', 'chron', 'ssn', 'quarter', 'volume', 'part', 'issue', 'spage',
10 'epage', 'pages', 'artnum', 'issn', 'eissn', 'isbn', 'coden', 'sici', 'genre']
12# The genres from the OpenURL schema we support
13SUPPORTED_GENRES = ['journal', 'article']
15# Mapping from OpenURL schema to both supported models (Journal, Article)
16OPENURL_TO_ES = {
17 'aulast': (None, 'bibjson.author.name.exact'),
18 'aucorp': (None, 'bibjson.author.affiliation.exact'),
19 'atitle': (None, 'bibjson.title.exact'),
20 'jtitle': ('index.title.exact', 'bibjson.journal.title.exact'), # Note we use index.title.exact for journals, to support continuations
21 'stitle': ('bibjson.alternative_title.exact', None),
22 'date': (None, 'bibjson.year.exact'),
23 'volume': (None, 'bibjson.journal.volume.exact'),
24 'issue': (None, 'bibjson.journal.number.exact'),
25 'spage': (None, 'bibjson.start_page.exact'),
26 'epage': (None, 'bibjson.end_page.exact'),
27 'issn': ('index.issn.exact', 'index.issn.exact'), # bibjson.identifier.id.exact
28 'eissn': ('index.issn.exact', 'index.issn.exact'),
29 'isbn': ('index.issn.exact', 'index.issn.exact'),
30 'doi': (None, 'index.doi.exact')
31}
33# Terms search template. Ensure all queries from OpenURL return publicly visible results with in_doaj : true
34IN_DOAJ_TERM = {"term": {"admin.in_doaj": True}}
35TERMS_SEARCH = {"query": {"bool": {"must": [IN_DOAJ_TERM]}}}
38class OpenURLRequest(object):
39 """
40 Based on the fields from ofi/fmt:kev:mtx:journal schema for Journals in OpenURL 1.0
41 This is the only schema the DOAJ supports.
42 """
44 # ~~API:Feature~~
46 def __init__(self, **kwargs):
48 # Initialise the OpenURLRequest object with empty attributes
49 for key in JOURNAL_SCHEMA_KEYS:
50 setattr(self, key, None)
52 # Save any attributes specified at creation time
53 if kwargs:
54 for key, value in kwargs.items():
55 setattr(self, key, value)
57 def __str__(self):
58 return "OpenURLRequest{" + ", ".join(["%s : %s" % (x, getattr(self, x)) for x in JOURNAL_SCHEMA_KEYS if getattr(self, x)]) + "}"
60 def query_es(self):
61 """
62 Query Elasticsearch for a set of matches for this request.
63 :return: The results of a query through the dao, a JSON object.
64 """
65 # Copy to the template, which will be populated with terms
66 populated_query = deepcopy(TERMS_SEARCH)
68 # Get all of the attributes with values set.
69 set_attributes = [(x, getattr(self, x)) for x in JOURNAL_SCHEMA_KEYS[:-1] if getattr(self, x)]
71 # If we don't have a genre, guess journal FIXME: is it correct to assume journal?
72 if not self.genre:
73 self.genre = SUPPORTED_GENRES[0] # TODO: we may want to handle 404 instead
75 # Set i to use either our mapping for journals or articles
76 i = SUPPORTED_GENRES.index(getattr(self, 'genre').lower())
78 # Add the attributes to the query
79 for (k, v) in set_attributes:
80 es_term = OPENURL_TO_ES[k][i]
81 if es_term is None:
82 continue
83 else:
84 term = {"term": {es_term: v}}
85 populated_query["query"]["bool"]["must"].append(term)
87 # avoid doing an empty query
88 if len(populated_query["query"]["bool"]["must"]) == 0:
89 app.logger.debug("No valid search terms in OpenURL object")
90 return None
92 # Return the results of the query
93 if i == 0:
94 app.logger.debug("OpenURL query to journal: " + json.dumps(populated_query))
95 return Journal.query(q=populated_query)
96 elif i == 1:
97 app.logger.debug("OpenURL query to article: " + json.dumps(populated_query))
98 return Article.query(q=populated_query)
100 def get_result_url(self):
101 """
102 Get the URL for this OpenURLRequest's referent.
103 :return: The url as a string, or None if not found.
104 """
105 try:
106 results = self.query_es()
107 except ValueError:
108 return None
110 if results is None:
111 return None
113 if results.get('hits', {}).get('total', {}).get('value', 0) == 0:
114 # No results found for query, retry
115 results = self.fallthrough_retry()
116 if results is None or results.get('hits', {}).get('total', {}).get('value', 0) == 0:
117 # This time we've definitely failed
118 return None
120 if results.get('hits', {}).get('hits', [{}])[0].get('_source', {}).get('es_type') == 'journal':
122 # construct a journal object around the result
123 journal = Journal(**results['hits']['hits'][0])
125 # the continuation is a first-class journal object, so if we have a journal we have the right continuation
126 # (assuming that the user gave us specific enough information
127 ident = journal.id
129 # If there request has a volume parameter, query for presence of an article with that volume
130 if self.volume:
131 vol_iss_results = self.query_for_vol(journal)
133 if vol_iss_results == None:
134 # we were asked for a vol/issue, but weren't given the correct information to get it.
135 return None
136 elif vol_iss_results['hits']['total']['value'] > 0:
137 # construct the toc url using the ident, plus volume and issue
138 jtoc_url = url_for("doaj.toc", identifier=ident, volume=self.volume, issue=self.issue)
139 else:
140 # If no results, the DOAJ does not contain the vol/issue being searched. (Show openurl 404)
141 jtoc_url = None
142 else:
143 # if no volume parameter, construct the toc url using the ident only
144 jtoc_url = url_for("doaj.toc", identifier=ident)
145 return jtoc_url
147 #~~->Article:Page~~
148 elif results.get('hits', {}).get('hits', [{}])[0].get('_source', {}).get('es_type') == 'article':
149 return url_for("doaj.article_page", identifier=results['hits']['hits'][0]['_id'])
151 def query_for_vol(self, journalobj):
153 # The journal object will already be the correct continuation, if the user provided sufficient detail.
154 issns = journalobj.bibjson().issns()
156 # If there's no way to get the wanted issns, give up, else run the query
157 if issns is None:
158 return None
159 else:
160 volume_query = deepcopy(TERMS_SEARCH)
161 volume_query["size"] = 0
163 issn_term = {"terms": {"index.issn.exact": issns}}
164 volume_query["query"]["bool"]["must"].append(issn_term)
166 vol_term = {"term": {"bibjson.journal.volume.exact": self.volume}}
167 volume_query["query"]["bool"]["must"].append(vol_term)
169 # And if there's an issue, query that too. Note, issue does not make sense on its own.
170 if self.issue:
171 iss_term = {"term": {"bibjson.journal.number.exact": self.issue}}
172 volume_query["query"]["bool"]["must"].append(iss_term)
174 app.logger.debug("OpenURL subsequent volume query to article: " + json.dumps(volume_query))
175 return Article.query(q=volume_query)
177 def fallthrough_retry(self):
178 """
179 Some things to try differently if we get no results on first attempt
180 :return: a new result set, or None
181 """
182 results = None
184 # Search again for the title against alternative_title (may catch translations of titles)
185 if self.jtitle and not self.stitle:
186 self.stitle = self.jtitle
187 self.jtitle = None
188 results = self.query_es()
190 return results
192 def validate_issn(self, issn_str):
193 """
194 If the ISSN is missing a dash, add it so it matches that in the index.
195 :param issn_str: The ISSN, or if None, this will skip.
196 :return: The ISSN with the dash added
197 """
198 if issn_str:
199 match_dash = re.compile('[-]')
200 if not match_dash.search(issn_str):
201 issn_str = issn_str[:4] + '-' + issn_str[4:]
202 return issn_str
204 @property
205 def doi(self):
206 """Digital Object Identifier"""
207 return self._doi
209 @doi.setter
210 def doi(self, val):
211 self._doi = val
213 @property
214 def aulast(self):
215 """First author's family name, may be more than one word"""
216 return self._aulast
218 @aulast.setter
219 def aulast(self, val):
220 self._aulast = val
222 @property
223 def aufirst(self):
224 """First author's given name or names or initials"""
225 return self._aufirst
227 @aufirst.setter
228 def aufirst(self, val):
229 self._aufirst = val
231 @property
232 def auinit(self):
233 """First author's first and middle initials"""
234 return self._auinit
236 @auinit.setter
237 def auinit(self, val):
238 self._auinit = val
240 @property
241 def auinit1(self):
242 """First author's first initial"""
243 return self._auinit1
245 @auinit1.setter
246 def auinit1(self, val):
247 self._auinit1 = val
249 @property
250 def auinitm(self):
251 """First author's middle initial"""
252 return self._auinitm
254 @auinitm.setter
255 def auinitm(self, val):
256 self._auinitm = val
258 @property
259 def ausuffix(self):
260 """First author's name suffix. e.g. 'Jr.', 'III'"""
261 return self._ausuffix
263 @ausuffix.setter
264 def ausuffix(self, val):
265 self._ausuffix = val
267 @property
268 def au(self):
269 """full name of a single author"""
270 return self._au
272 @au.setter
273 def au(self, val):
274 self._au = val
276 @property
277 def aucorp(self):
278 """Organisation or corporation that is the author or creator of the document"""
279 return self._aucorp
281 @aucorp.setter
282 def aucorp(self, val):
283 self._aucorp = val
285 @property
286 def atitle(self):
287 """Article title"""
288 return self._atitle
290 @atitle.setter
291 def atitle(self, val):
292 self._atitle = val
294 @property
295 def jtitle(self):
296 """Journal title"""
297 return self._jtitle
299 @jtitle.setter
300 def jtitle(self, val):
301 self._jtitle = val
303 @property
304 def stitle(self):
305 """Abbreviated or short journal title"""
306 return self._stitle
308 @stitle.setter
309 def stitle(self, val):
310 self._stitle = val
312 @property
313 def date(self):
314 """Date of publication"""
315 return self._date
317 @date.setter
318 def date(self, val):
319 if val:
320 try:
321 parsed_date = dates.parse(val)
322 val = parsed_date.year
323 except ValueError:
324 val = None
325 self._date = val
327 @property
328 def chron(self):
329 """Non-normalised enumeration / chronology, e.g. '1st quarter'"""
330 return self._chron
332 @chron.setter
333 def chron(self, val):
334 self._chron = val
336 @property
337 def ssn(self):
338 """Season (chronology). spring|summer|fall|autumn|winter"""
339 return self._ssn
341 @ssn.setter
342 def ssn(self, val):
343 self._ssn = val
345 @property
346 def quarter(self):
347 """Quarter (chronology). 1|2|3|4"""
348 return self._quarter
350 @quarter.setter
351 def quarter(self, val):
352 self._quarter = val
354 @property
355 def volume(self):
356 """Volume designation. e.g. '124', or 'VI'"""
357 return self._volume
359 @volume.setter
360 def volume(self, val):
361 self._volume = val
363 @property
364 def part(self):
365 """Subdivision of a volume or highest level division of the journal. e.g. 'B', 'Supplement'"""
366 return self._part
368 @part.setter
369 def part(self, val):
370 self._part = val
372 @property
373 def issue(self):
374 """Journal issue"""
375 return self._issue
377 @issue.setter
378 def issue(self, val):
379 self._issue = val
381 @property
382 def spage(self):
383 """Starting page"""
384 return self._spage
386 @spage.setter
387 def spage(self, val):
388 self._spage = val
390 @property
391 def epage(self):
392 """Ending page"""
393 return self._epage
395 @epage.setter
396 def epage(self, val):
397 self._epage = val
399 @property
400 def pages(self):
401 """Page range e.g. '53-58', 'C4-9'"""
402 return self._pages
404 @pages.setter
405 def pages(self, val):
406 self._pages = val
408 @property
409 def artnum(self):
410 """Article number"""
411 return self._artnum
413 @artnum.setter
414 def artnum(self, val):
415 self._artnum = val
417 @property
418 def issn(self):
419 """Journal ISSN"""
420 return self._issn
422 @issn.setter
423 def issn(self, val):
424 self._issn = self.validate_issn(val)
426 @property
427 def eissn(self):
428 """ISSN for electronic version of the journal"""
429 return self._eissn
431 @eissn.setter
432 def eissn(self, val):
433 self._eissn = self.validate_issn(val)
435 @property
436 def isbn(self):
437 """Journal ISBN"""
438 return self._isbn
440 @isbn.setter
441 def isbn(self, val):
442 self._isbn = val
444 @property
445 def coden(self):
446 """CODEN"""
447 return self._coden
449 @coden.setter
450 def coden(self, val):
451 self._coden = val
453 @property
454 def sici(self):
455 """Serial Item and Contribution Identifier (SICI)"""
456 return self._sici
458 @sici.setter
459 def sici(self, val):
460 self._sici = val
462 @property
463 def genre(self):
464 """journal|issue|article|proceeding|conference|preprint|unknown"""
465 return self._genre
467 @genre.setter
468 def genre(self, val):
469 self._genre = val