Coverage for portality / autocheck / checkers / keepers_registry.py: 93%

75 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-05-04 09:41 +0100

1from portality.models import JournalLikeObject, Autocheck 

2from portality.autocheck.resource_bundle import ResourceBundle 

3from typing import Callable 

4from portality.autocheck.checkers.issn_active import ISSNChecker 

5from datetime import datetime 

6 

7 

8class KeepersRegistry(ISSNChecker): 

9 __identity__ = "keepers_registry" 

10 

11 ID_MAP = { 

12 "CLOCKSS": "http://issn.org/organization/keepers#clockss", 

13 "LOCKSS": "http://issn.org/organization/keepers#lockss", 

14 "Internet Archive": "http://issn.org/organization/keepers#internetarchive", 

15 "PKP PN": "http://issn.org/organization/keepers#pkppln", 

16 "Portico": "http://issn.org/organization/keepers#portico" 

17 } 

18 

19 REVERSE_ID_MAP = {v: k for k, v in ID_MAP.items()} 

20 

21 MISSING = "missing" 

22 PRESENT = "present" 

23 OUTDATED = "outdated" 

24 NOT_RECORDED = "not_recorded" 

25 SHOULD_SELECT = "should_select" 

26 

27 def _get_archive_components(self, eissn_data, pissn_data): 

28 acs = [] 

29 if eissn_data is not None: 

30 acs += eissn_data.archive_components 

31 if pissn_data is not None: 

32 acs += pissn_data.archive_components 

33 return acs 

34 

35 def _extract_archive_data(self, acs): 

36 ad = {} 

37 for ac in acs: 

38 id = ac.get("holdingArchive", {}).get("@id") 

39 tc = ac.get("temporalCoverage", "") 

40 bits = tc.split("/") 

41 

42 if len(bits) != 2: 

43 continue 

44 

45 end_str = bits[1].strip() 

46 if end_str == "": 

47 end_str = bits[0].strip() 

48 if end_str == "": 

49 continue 

50 

51 try: 

52 end_year = int(end_str) 

53 except ValueError: 

54 continue 

55 

56 if id in ad: 

57 if end_year > ad[id]: 

58 ad[id] = end_year 

59 else: 

60 ad[id] = end_year 

61 

62 return ad 

63 

64 def check(self, form: dict, 

65 jla: JournalLikeObject, 

66 autochecks: Autocheck, 

67 resources: ResourceBundle, 

68 logger: Callable): 

69 

70 eissn, eissn_url, eissn_data, eissn_fail, pissn, pissn_url, pissn_data, pissn_fail = self.retrieve_from_source(form, resources, autochecks, logger) 

71 

72 url = eissn_url if eissn_url else pissn_url 

73 

74 acs = self._get_archive_components(eissn_data, pissn_data) 

75 ad = self._extract_archive_data(acs) 

76 services = form.get("preservation_service", []) 

77 service_ids = [self.ID_MAP.get(s) for s in services if s in self.ID_MAP] 

78 

79 logger("There are {x} preservation services on the record: {y}".format(x=len(services), y=",".join(services))) 

80 

81 for archive_id, end_date in ad.items(): 

82 if archive_id not in service_ids and end_date >= datetime.utcnow().year - 1: 

83 service_name = self.REVERSE_ID_MAP.get(archive_id) 

84 logger("Service '{x}' has not been selected, but is registered and current in Keepers".format(x=service_name)) 

85 autochecks.add_check( 

86 field="preservation_service", 

87 advice=self.SHOULD_SELECT, 

88 reference_url=url, 

89 context={"service": service_name}, 

90 checked_by=self.__identity__ 

91 ) 

92 continue 

93 

94 if archive_id in service_ids: 

95 service_name = self.REVERSE_ID_MAP.get(archive_id) 

96 if end_date >= datetime.utcnow().year - 1: 

97 logger("Service '{x}' is registered and current in Keepers".format(x=service_name)) 

98 autochecks.add_check( 

99 field="preservation_service", 

100 advice=self.PRESENT, 

101 reference_url=url, 

102 context={"service": service_name}, 

103 checked_by=self.__identity__ 

104 ) 

105 else: 

106 # the temporal coverage is too old 

107 logger( 

108 "Service {x} is registerd as issn.org for this record, but the archive is not recent enough".format(x=service_name)) 

109 autochecks.add_check( 

110 field="preservation_service", 

111 advice=self.OUTDATED, 

112 reference_url=url, 

113 context={"service": service_name}, 

114 checked_by=self.__identity__ 

115 ) 

116 

117 for service in services: 

118 if service in ["none", "national_library"]: 

119 continue 

120 

121 id = self.ID_MAP.get(service) 

122 

123 if not id: 

124 logger("Service {x} is not recorded by Keepers Registry".format(x=service)) 

125 autochecks.add_check( 

126 field="preservation_service", 

127 advice=self.NOT_RECORDED, 

128 reference_url=url, 

129 context={"service": service}, 

130 checked_by=self.__identity__ 

131 ) 

132 continue 

133 

134 if id not in ad: 

135 # the archive is not mentioned in issn.org 

136 logger("Service {x} is not registered at issn.org for this record".format(x=service)) 

137 autochecks.add_check( 

138 field="preservation_service", 

139 advice=self.MISSING, 

140 reference_url=url, 

141 context={"service": service}, 

142 checked_by=self.__identity__ 

143 )