Coverage for portality / autocheck / checkers / keepers_registry.py: 93%
75 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-05 00:09 +0100
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-05 00:09 +0100
1from portality.models import JournalLikeObject, Autocheck
2from portality.autocheck.resource_bundle import ResourceBundle
3from typing import Callable
4from portality.autocheck.checkers.issn_active import ISSNChecker
5from datetime import datetime
8class KeepersRegistry(ISSNChecker):
9 __identity__ = "keepers_registry"
11 ID_MAP = {
12 "CLOCKSS": "http://issn.org/organization/keepers#clockss",
13 "LOCKSS": "http://issn.org/organization/keepers#lockss",
14 "Internet Archive": "http://issn.org/organization/keepers#internetarchive",
15 "PKP PN": "http://issn.org/organization/keepers#pkppln",
16 "Portico": "http://issn.org/organization/keepers#portico"
17 }
19 REVERSE_ID_MAP = {v: k for k, v in ID_MAP.items()}
21 MISSING = "missing"
22 PRESENT = "present"
23 OUTDATED = "outdated"
24 NOT_RECORDED = "not_recorded"
25 SHOULD_SELECT = "should_select"
27 def _get_archive_components(self, eissn_data, pissn_data):
28 acs = []
29 if eissn_data is not None:
30 acs += eissn_data.archive_components
31 if pissn_data is not None:
32 acs += pissn_data.archive_components
33 return acs
35 def _extract_archive_data(self, acs):
36 ad = {}
37 for ac in acs:
38 id = ac.get("holdingArchive", {}).get("@id")
39 tc = ac.get("temporalCoverage", "")
40 bits = tc.split("/")
42 if len(bits) != 2:
43 continue
45 end_str = bits[1].strip()
46 if end_str == "":
47 end_str = bits[0].strip()
48 if end_str == "":
49 continue
51 try:
52 end_year = int(end_str)
53 except ValueError:
54 continue
56 if id in ad:
57 if end_year > ad[id]:
58 ad[id] = end_year
59 else:
60 ad[id] = end_year
62 return ad
64 def check(self, form: dict,
65 jla: JournalLikeObject,
66 autochecks: Autocheck,
67 resources: ResourceBundle,
68 logger: Callable):
70 eissn, eissn_url, eissn_data, eissn_fail, pissn, pissn_url, pissn_data, pissn_fail = self.retrieve_from_source(form, resources, autochecks, logger)
72 url = eissn_url if eissn_url else pissn_url
74 acs = self._get_archive_components(eissn_data, pissn_data)
75 ad = self._extract_archive_data(acs)
76 services = form.get("preservation_service", [])
77 service_ids = [self.ID_MAP.get(s) for s in services if s in self.ID_MAP]
79 logger("There are {x} preservation services on the record: {y}".format(x=len(services), y=",".join(services)))
81 for archive_id, end_date in ad.items():
82 if archive_id not in service_ids and end_date >= datetime.utcnow().year - 1:
83 service_name = self.REVERSE_ID_MAP.get(archive_id)
84 logger("Service '{x}' has not been selected, but is registered and current in Keepers".format(x=service_name))
85 autochecks.add_check(
86 field="preservation_service",
87 advice=self.SHOULD_SELECT,
88 reference_url=url,
89 context={"service": service_name},
90 checked_by=self.__identity__
91 )
92 continue
94 if archive_id in service_ids:
95 service_name = self.REVERSE_ID_MAP.get(archive_id)
96 if end_date >= datetime.utcnow().year - 1:
97 logger("Service '{x}' is registered and current in Keepers".format(x=service_name))
98 autochecks.add_check(
99 field="preservation_service",
100 advice=self.PRESENT,
101 reference_url=url,
102 context={"service": service_name},
103 checked_by=self.__identity__
104 )
105 else:
106 # the temporal coverage is too old
107 logger(
108 "Service {x} is registerd as issn.org for this record, but the archive is not recent enough".format(x=service_name))
109 autochecks.add_check(
110 field="preservation_service",
111 advice=self.OUTDATED,
112 reference_url=url,
113 context={"service": service_name},
114 checked_by=self.__identity__
115 )
117 for service in services:
118 if service in ["none", "national_library"]:
119 continue
121 id = self.ID_MAP.get(service)
123 if not id:
124 logger("Service {x} is not recorded by Keepers Registry".format(x=service))
125 autochecks.add_check(
126 field="preservation_service",
127 advice=self.NOT_RECORDED,
128 reference_url=url,
129 context={"service": service},
130 checked_by=self.__identity__
131 )
132 continue
134 if id not in ad:
135 # the archive is not mentioned in issn.org
136 logger("Service {x} is not registered at issn.org for this record".format(x=service))
137 autochecks.add_check(
138 field="preservation_service",
139 advice=self.MISSING,
140 reference_url=url,
141 context={"service": service},
142 checked_by=self.__identity__
143 )