Coverage for portality / models / openurl.py: 89%
267 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-04 09:41 +0100
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-04 09:41 +0100
1import re, json
2from flask import url_for
3from portality.models import Journal, Article
4from portality.core import app
5from copy import deepcopy
6from portality.lib import dates
8JOURNAL_SCHEMA_KEYS = ['doi', 'aulast', 'aufirst', 'auinit', 'auinit1', 'auinitm', 'ausuffix', 'au', 'aucorp', 'atitle',
9 'jtitle', 'stitle', 'date', 'chron', 'ssn', 'quarter', 'volume', 'part', 'issue', 'spage',
10 'epage', 'pages', 'artnum', 'issn', 'eissn', 'isbn', 'coden', 'sici', 'genre']
12# The genres from the OpenURL schema we support
13SUPPORTED_GENRES = ['journal', 'article']
15# Mapping from OpenURL schema to both supported models (Journal, Article)
16OPENURL_TO_ES = {
17 'aulast': (None, 'bibjson.author.name.exact'),
18 'aucorp': (None, 'bibjson.author.affiliation.exact'),
19 'atitle': (None, 'bibjson.title.exact'),
20 'jtitle': ('index.title.exact', 'bibjson.journal.title.exact'), # Note we use index.title.exact for journals, to support continuations
21 'stitle': ('bibjson.alternative_title.exact', None),
22 'date': (None, 'bibjson.year.exact'),
23 'volume': (None, 'bibjson.journal.volume.exact'),
24 'issue': (None, 'bibjson.journal.number.exact'),
25 'spage': (None, 'bibjson.start_page.exact'),
26 'epage': (None, 'bibjson.end_page.exact'),
27 'issn': ('index.issn.exact', 'index.issn.exact'), # bibjson.identifier.id.exact
28 'eissn': ('index.issn.exact', 'index.issn.exact'),
29 'isbn': ('index.issn.exact', 'index.issn.exact'),
30 'doi': (None, 'index.doi.exact')
31}
33# Terms search template. Ensure all queries from OpenURL return publicly visible results with in_doaj : true
34IN_DOAJ_TERM = {"term": {"admin.in_doaj": True}}
35TERMS_SEARCH = {"query": {"bool": {"must": [IN_DOAJ_TERM]}}}
38class OpenURLRequest(object):
39 """
40 Based on the fields from ofi/fmt:kev:mtx:journal schema for Journals in OpenURL 1.0
41 This is the only schema the DOAJ supports.
42 """
44 # ~~API:Feature~~
46 def __init__(self, **kwargs):
48 # Initialise the OpenURLRequest object with empty attributes
49 for key in JOURNAL_SCHEMA_KEYS:
50 setattr(self, key, None)
52 # Save any attributes specified at creation time
53 if kwargs:
54 for key, value in kwargs.items():
55 setattr(self, key, value)
57 def __str__(self):
58 return "OpenURLRequest{" + ", ".join(["%s : %s" % (x, getattr(self, x)) for x in JOURNAL_SCHEMA_KEYS if getattr(self, x)]) + "}"
60 def query_es(self):
61 """
62 Query Elasticsearch for a set of matches for this request.
63 :return: The results of a query through the dao, a JSON object.
64 """
65 # Copy to the template, which will be populated with terms
66 populated_query = deepcopy(TERMS_SEARCH)
68 # Get all of the attributes with values set.
69 set_attributes = [(x, getattr(self, x)) for x in JOURNAL_SCHEMA_KEYS[:-1] if getattr(self, x)]
71 # If we don't have a genre, guess journal FIXME: is it correct to assume journal?
72 if not self.genre:
73 self.genre = SUPPORTED_GENRES[0] # TODO: we may want to handle 404 instead
75 # Set i to use either our mapping for journals or articles
76 i = SUPPORTED_GENRES.index(getattr(self, 'genre').lower())
78 # Add the attributes to the query
79 for (k, v) in set_attributes:
80 es_term = OPENURL_TO_ES[k][i]
81 if es_term is None:
82 continue
83 else:
84 term = {"term": {es_term: v}}
85 populated_query["query"]["bool"]["must"].append(term)
87 # avoid doing an empty query
88 if len(populated_query["query"]["bool"]["must"]) == 0:
89 app.logger.debug("No valid search terms in OpenURL object")
90 return None
92 # Return the results of the query
93 if i == 0:
94 app.logger.debug("OpenURL query to journal: " + json.dumps(populated_query))
95 return Journal.query(q=populated_query)
96 elif i == 1:
97 app.logger.debug("OpenURL query to article: " + json.dumps(populated_query))
98 return Article.query(q=populated_query)
100 def get_result_url(self):
101 """
102 Get the URL for this OpenURLRequest's referent.
103 :return: The url as a string, or None if not found.
104 """
105 try:
106 results = self.query_es()
107 except ValueError:
108 return None
110 if results is None:
111 return None
113 if results.get('hits', {}).get('total', {}).get('value', 0) == 0:
114 # No results found for query, retry
115 results = self.fallthrough_retry()
116 if results is None or results.get('hits', {}).get('total', {}).get('value', 0) == 0:
117 # This time we've definitely failed
118 return None
120 if results.get('hits', {}).get('hits', [{}])[0].get('_source', {}).get('es_type') == 'journal':
122 # construct a journal object around the result
123 journal = Journal(**results['hits']['hits'][0])
125 # the continuation is a first-class journal object, so if we have a journal we have the right continuation
126 # (assuming that the user gave us specific enough information
127 ident = journal.id
129 # construct the toc url using the ident only
130 jtoc_url = url_for("doaj.toc", identifier=ident)
131 return jtoc_url
133 #~~->Article:Page~~
134 elif results.get('hits', {}).get('hits', [{}])[0].get('_source', {}).get('es_type') == 'article':
135 return url_for("doaj.article_page", identifier=results['hits']['hits'][0]['_id'])
137 def query_for_vol(self, journalobj):
139 # The journal object will already be the correct continuation, if the user provided sufficient detail.
140 issns = journalobj.bibjson().issns()
142 # If there's no way to get the wanted issns, give up, else run the query
143 if issns is None:
144 return None
145 else:
146 volume_query = deepcopy(TERMS_SEARCH)
147 volume_query["size"] = 0
149 issn_term = {"terms": {"index.issn.exact": issns}}
150 volume_query["query"]["bool"]["must"].append(issn_term)
152 vol_term = {"term": {"bibjson.journal.volume.exact": self.volume}}
153 volume_query["query"]["bool"]["must"].append(vol_term)
155 # And if there's an issue, query that too. Note, issue does not make sense on its own.
156 if self.issue:
157 iss_term = {"term": {"bibjson.journal.number.exact": self.issue}}
158 volume_query["query"]["bool"]["must"].append(iss_term)
160 app.logger.debug("OpenURL subsequent volume query to article: " + json.dumps(volume_query))
161 return Article.query(q=volume_query)
163 def fallthrough_retry(self):
164 """
165 Some things to try differently if we get no results on first attempt
166 :return: a new result set, or None
167 """
168 results = None
170 # Search again for the title against alternative_title (may catch translations of titles)
171 if self.jtitle and not self.stitle:
172 self.stitle = self.jtitle
173 self.jtitle = None
174 results = self.query_es()
176 return results
178 def validate_issn(self, issn_str):
179 """
180 If the ISSN is missing a dash, add it so it matches that in the index.
181 :param issn_str: The ISSN, or if None, this will skip.
182 :return: The ISSN with the dash added
183 """
184 if issn_str:
185 match_dash = re.compile('[-]')
186 if not match_dash.search(issn_str):
187 issn_str = issn_str[:4] + '-' + issn_str[4:]
188 return issn_str
190 @property
191 def doi(self):
192 """Digital Object Identifier"""
193 return self._doi
195 @doi.setter
196 def doi(self, val):
197 self._doi = val
199 @property
200 def aulast(self):
201 """First author's family name, may be more than one word"""
202 return self._aulast
204 @aulast.setter
205 def aulast(self, val):
206 self._aulast = val
208 @property
209 def aufirst(self):
210 """First author's given name or names or initials"""
211 return self._aufirst
213 @aufirst.setter
214 def aufirst(self, val):
215 self._aufirst = val
217 @property
218 def auinit(self):
219 """First author's first and middle initials"""
220 return self._auinit
222 @auinit.setter
223 def auinit(self, val):
224 self._auinit = val
226 @property
227 def auinit1(self):
228 """First author's first initial"""
229 return self._auinit1
231 @auinit1.setter
232 def auinit1(self, val):
233 self._auinit1 = val
235 @property
236 def auinitm(self):
237 """First author's middle initial"""
238 return self._auinitm
240 @auinitm.setter
241 def auinitm(self, val):
242 self._auinitm = val
244 @property
245 def ausuffix(self):
246 """First author's name suffix. e.g. 'Jr.', 'III'"""
247 return self._ausuffix
249 @ausuffix.setter
250 def ausuffix(self, val):
251 self._ausuffix = val
253 @property
254 def au(self):
255 """full name of a single author"""
256 return self._au
258 @au.setter
259 def au(self, val):
260 self._au = val
262 @property
263 def aucorp(self):
264 """Organisation or corporation that is the author or creator of the document"""
265 return self._aucorp
267 @aucorp.setter
268 def aucorp(self, val):
269 self._aucorp = val
271 @property
272 def atitle(self):
273 """Article title"""
274 return self._atitle
276 @atitle.setter
277 def atitle(self, val):
278 self._atitle = val
280 @property
281 def jtitle(self):
282 """Journal title"""
283 return self._jtitle
285 @jtitle.setter
286 def jtitle(self, val):
287 self._jtitle = val
289 @property
290 def stitle(self):
291 """Abbreviated or short journal title"""
292 return self._stitle
294 @stitle.setter
295 def stitle(self, val):
296 self._stitle = val
298 @property
299 def date(self):
300 """Date of publication"""
301 return self._date
303 @date.setter
304 def date(self, val):
305 if val:
306 try:
307 parsed_date = dates.parse(val)
308 val = parsed_date.year
309 except ValueError:
310 val = None
311 self._date = val
313 @property
314 def chron(self):
315 """Non-normalised enumeration / chronology, e.g. '1st quarter'"""
316 return self._chron
318 @chron.setter
319 def chron(self, val):
320 self._chron = val
322 @property
323 def ssn(self):
324 """Season (chronology). spring|summer|fall|autumn|winter"""
325 return self._ssn
327 @ssn.setter
328 def ssn(self, val):
329 self._ssn = val
331 @property
332 def quarter(self):
333 """Quarter (chronology). 1|2|3|4"""
334 return self._quarter
336 @quarter.setter
337 def quarter(self, val):
338 self._quarter = val
340 @property
341 def volume(self):
342 """Volume designation. e.g. '124', or 'VI'"""
343 return self._volume
345 @volume.setter
346 def volume(self, val):
347 self._volume = val
349 @property
350 def part(self):
351 """Subdivision of a volume or highest level division of the journal. e.g. 'B', 'Supplement'"""
352 return self._part
354 @part.setter
355 def part(self, val):
356 self._part = val
358 @property
359 def issue(self):
360 """Journal issue"""
361 return self._issue
363 @issue.setter
364 def issue(self, val):
365 self._issue = val
367 @property
368 def spage(self):
369 """Starting page"""
370 return self._spage
372 @spage.setter
373 def spage(self, val):
374 self._spage = val
376 @property
377 def epage(self):
378 """Ending page"""
379 return self._epage
381 @epage.setter
382 def epage(self, val):
383 self._epage = val
385 @property
386 def pages(self):
387 """Page range e.g. '53-58', 'C4-9'"""
388 return self._pages
390 @pages.setter
391 def pages(self, val):
392 self._pages = val
394 @property
395 def artnum(self):
396 """Article number"""
397 return self._artnum
399 @artnum.setter
400 def artnum(self, val):
401 self._artnum = val
403 @property
404 def issn(self):
405 """Journal ISSN"""
406 return self._issn
408 @issn.setter
409 def issn(self, val):
410 self._issn = self.validate_issn(val)
412 @property
413 def eissn(self):
414 """ISSN for electronic version of the journal"""
415 return self._eissn
417 @eissn.setter
418 def eissn(self, val):
419 self._eissn = self.validate_issn(val)
421 @property
422 def isbn(self):
423 """Journal ISBN"""
424 return self._isbn
426 @isbn.setter
427 def isbn(self, val):
428 self._isbn = val
430 @property
431 def coden(self):
432 """CODEN"""
433 return self._coden
435 @coden.setter
436 def coden(self, val):
437 self._coden = val
439 @property
440 def sici(self):
441 """Serial Item and Contribution Identifier (SICI)"""
442 return self._sici
444 @sici.setter
445 def sici(self, val):
446 self._sici = val
448 @property
449 def genre(self):
450 """journal|issue|article|proceeding|conference|preprint|unknown"""
451 return self._genre
453 @genre.setter
454 def genre(self, val):
455 self._genre = val