Coverage for portality / autocheck / resources / issn_org.py: 89%
44 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-05 00:09 +0100
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-05 00:09 +0100
1from datetime import datetime
3from portality.autocheck.resource_bundle import Resource
4from portality.core import app
6import requests
7import json
8import time
9from bs4 import BeautifulSoup
12class ISSNOrg(Resource):
13 __identity__ = "issn_org"
15 def __init__(self, resource_bundle):
16 super(ISSNOrg, self).__init__(resource_bundle)
17 self._timeout = app.config.get("AUTOCHECK_RESOURCE_ISSN_ORG_TIMEOUT", 10)
18 self._throttle = app.config.get("AUTOCHECK_RESOURCE_ISSN_ORG_THROTTLE", 0)
19 self._last_request = None
21 def make_resource_id(self, issn):
22 return self.name() + "_" + issn
24 def reference_url(self, issn):
25 return "https://portal.issn.org/resource/ISSN/" + issn
27 def fetch_fresh(self, issn):
28 if self._last_request is not None:
29 now = datetime.utcnow()
30 since_last = (now - self._last_request).total_seconds()
31 if since_last < self._throttle:
32 time.sleep(self._throttle - since_last)
34 resp = requests.get(self.reference_url(issn), timeout=self._timeout)
35 self._last_request = datetime.utcnow()
37 page = BeautifulSoup(resp.text, features="lxml")
39 scripts = page.find_all("script", type="application/ld+json")
40 if len(scripts) == 0:
41 return None
43 raw = scripts[0].string
44 data = json.loads(raw)
45 return ISSNOrgData(data)
48class ISSNOrgData(object):
49 def __init__(self, raw):
50 self.data = raw
52 @property
53 def version(self):
54 return self.data.get("mainEntityOfPage", {}).get("version")
56 def is_registered(self):
57 return self.version == "Register"
59 @property
60 def archive_components(self):
61 return [ac for ac in self.data.get("subjectOf", []) if ac.get("@type") == "ArchiveComponent"]