Coverage for portality / autocheck / resources / issn_org.py: 89%

44 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-05-05 00:09 +0100

1from datetime import datetime 

2 

3from portality.autocheck.resource_bundle import Resource 

4from portality.core import app 

5 

6import requests 

7import json 

8import time 

9from bs4 import BeautifulSoup 

10 

11 

12class ISSNOrg(Resource): 

13 __identity__ = "issn_org" 

14 

15 def __init__(self, resource_bundle): 

16 super(ISSNOrg, self).__init__(resource_bundle) 

17 self._timeout = app.config.get("AUTOCHECK_RESOURCE_ISSN_ORG_TIMEOUT", 10) 

18 self._throttle = app.config.get("AUTOCHECK_RESOURCE_ISSN_ORG_THROTTLE", 0) 

19 self._last_request = None 

20 

21 def make_resource_id(self, issn): 

22 return self.name() + "_" + issn 

23 

24 def reference_url(self, issn): 

25 return "https://portal.issn.org/resource/ISSN/" + issn 

26 

27 def fetch_fresh(self, issn): 

28 if self._last_request is not None: 

29 now = datetime.utcnow() 

30 since_last = (now - self._last_request).total_seconds() 

31 if since_last < self._throttle: 

32 time.sleep(self._throttle - since_last) 

33 

34 resp = requests.get(self.reference_url(issn), timeout=self._timeout) 

35 self._last_request = datetime.utcnow() 

36 

37 page = BeautifulSoup(resp.text, features="lxml") 

38 

39 scripts = page.find_all("script", type="application/ld+json") 

40 if len(scripts) == 0: 

41 return None 

42 

43 raw = scripts[0].string 

44 data = json.loads(raw) 

45 return ISSNOrgData(data) 

46 

47 

48class ISSNOrgData(object): 

49 def __init__(self, raw): 

50 self.data = raw 

51 

52 @property 

53 def version(self): 

54 return self.data.get("mainEntityOfPage", {}).get("version") 

55 

56 def is_registered(self): 

57 return self.version == "Register" 

58 

59 @property 

60 def archive_components(self): 

61 return [ac for ac in self.data.get("subjectOf", []) if ac.get("@type") == "ArchiveComponent"]