Coverage for portality / models / oaipmh.py: 95%

62 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-05-04 09:41 +0100

1from copy import deepcopy 

2 

3from portality.models import Journal, Article, ArticleTombstone 

4from portality import constants 

5 

6 

7class OAIPMHRecord(object): 

8 earliest = { 

9 "query": { 

10 "bool": { 

11 "must": [ 

12 {"term": {"admin.in_doaj": True}} 

13 ] 

14 } 

15 }, 

16 "size": 1, 

17 "sort": [ 

18 {"last_updated": {"order": "asc"}} 

19 ] 

20 } 

21 

22 sets = { 

23 "query": { 

24 "bool": { 

25 "must": [ 

26 {"term": {"admin.in_doaj": True}} 

27 ] 

28 } 

29 }, 

30 "size": 0, 

31 "aggs": { 

32 "sets": { 

33 "terms": { 

34 "field": "index.schema_subject.exact", 

35 "order": {"_key": "asc"}, 

36 "size": 100000 

37 } 

38 } 

39 } 

40 } 

41 

42 records = { 

43 "track_total_hits": True, 

44 "query": { 

45 "bool": { 

46 "must": [] 

47 } 

48 }, 

49 "from": 0, 

50 "size": 25 

51 } 

52 

53 set_limit = {"term": {"index.classification.exact": "<set name>"}} 

54 range_limit = {"range": {"last_updated": {"gte": "<from date>", "lte": "<until date>"}}} 

55 created_sort = [{"last_updated": {"order": "desc"}}, {"id.exact": "desc"}] 

56 

57 def earliest_datestamp(self): 

58 result = self.query(q=self.earliest) 

59 return result.get("hits", {}).get("hits", [{}])[0].get("_source", {}).get("last_updated") 

60 

61 def identifier_exists(self, identifier): 

62 obj = self.pull(identifier) 

63 return obj is not None 

64 

65 def list_sets(self): 

66 result = self.query(q=self.sets) 

67 sets = [t.get("key") for t in result.get("aggregations", {}).get("sets", {}).get("buckets", [])] 

68 return sets 

69 

70 def list_records(self, from_date=None, until_date=None, oai_set=None, list_size=None, start_after=None): 

71 q = deepcopy(self.records) 

72 if start_after is not None or from_date is not None or until_date is not None or oai_set is not None: 

73 

74 if oai_set is not None: 

75 a = oai_set.replace(constants.SUBJECTS_SCHEMA,"") 

76 s = deepcopy(self.set_limit) 

77 s["term"]["index.classification.exact"] = a 

78 q["query"]["bool"]["must"].append(s) 

79 

80 if until_date is not None or from_date is not None or start_after is not None: 

81 d = deepcopy(self.range_limit) 

82 

83 if start_after is not None: 

84 d["range"]["last_updated"]["lte"] = start_after[0] 

85 elif until_date is not None: 

86 d["range"]["last_updated"]["lte"] = until_date 

87 else: 

88 del d["range"]["last_updated"]["lte"] 

89 

90 if from_date is not None: 

91 d["range"]["last_updated"]["gte"] = from_date 

92 else: 

93 del d["range"]["last_updated"]["gte"] 

94 

95 q["query"]["bool"]["must"].append(d) 

96 

97 if list_size is not None: 

98 q["size"] = list_size 

99 

100 if start_after is not None: 

101 q["from"] = start_after[1] 

102 else: 

103 q["from"] = 0 

104 

105 q["sort"] = deepcopy(self.created_sort) 

106 

107 # do the query 

108 # print json.dumps(q) 

109 

110 results = self.query(q=q) 

111 

112 total = results.get("hits", {}).get("total", {}).get('value', 0) 

113 return total, [hit.get("_source") for hit in results.get("hits", {}).get("hits", [])] 

114 

115 

116class OAIPMHArticle(OAIPMHRecord, Article): 

117 __type__ = "article,article_tombstone" 

118 

119 def list_records(self, from_date=None, until_date=None, oai_set=None, list_size=None, start_after=None): 

120 total, results = super(OAIPMHArticle, self).list_records(from_date=from_date, 

121 until_date=until_date, oai_set=oai_set, list_size=list_size, start_after=start_after) 

122 return total, [Article(**r) if r.get("es_type") == "article" else ArticleTombstone(**r) for r in results] 

123 

124 def pull(self, identifier): 

125 # override the default pull, as we must check the tombstone record too 

126 article = Article.pull(identifier) 

127 if article is None: 

128 article = ArticleTombstone.pull(identifier) 

129 return article 

130 

131 

132class OAIPMHJournal(OAIPMHRecord, Journal): 

133 def list_records(self, from_date=None, until_date=None, oai_set=None, list_size=None, start_after=None): 

134 total, results = super(OAIPMHJournal, self).list_records(from_date=from_date, 

135 until_date=until_date, oai_set=oai_set, list_size=list_size, start_after=start_after) 

136 return total, [Journal(**r) for r in results]