Coverage for portality / models / oaipmh.py: 95%
62 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-05 00:09 +0100
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-05 00:09 +0100
1from copy import deepcopy
3from portality.models import Journal, Article, ArticleTombstone
4from portality import constants
7class OAIPMHRecord(object):
8 earliest = {
9 "query": {
10 "bool": {
11 "must": [
12 {"term": {"admin.in_doaj": True}}
13 ]
14 }
15 },
16 "size": 1,
17 "sort": [
18 {"last_updated": {"order": "asc"}}
19 ]
20 }
22 sets = {
23 "query": {
24 "bool": {
25 "must": [
26 {"term": {"admin.in_doaj": True}}
27 ]
28 }
29 },
30 "size": 0,
31 "aggs": {
32 "sets": {
33 "terms": {
34 "field": "index.schema_subject.exact",
35 "order": {"_key": "asc"},
36 "size": 100000
37 }
38 }
39 }
40 }
42 records = {
43 "track_total_hits": True,
44 "query": {
45 "bool": {
46 "must": []
47 }
48 },
49 "from": 0,
50 "size": 25
51 }
53 set_limit = {"term": {"index.classification.exact": "<set name>"}}
54 range_limit = {"range": {"last_updated": {"gte": "<from date>", "lte": "<until date>"}}}
55 created_sort = [{"last_updated": {"order": "desc"}}, {"id.exact": "desc"}]
57 def earliest_datestamp(self):
58 result = self.query(q=self.earliest)
59 return result.get("hits", {}).get("hits", [{}])[0].get("_source", {}).get("last_updated")
61 def identifier_exists(self, identifier):
62 obj = self.pull(identifier)
63 return obj is not None
65 def list_sets(self):
66 result = self.query(q=self.sets)
67 sets = [t.get("key") for t in result.get("aggregations", {}).get("sets", {}).get("buckets", [])]
68 return sets
70 def list_records(self, from_date=None, until_date=None, oai_set=None, list_size=None, start_after=None):
71 q = deepcopy(self.records)
72 if start_after is not None or from_date is not None or until_date is not None or oai_set is not None:
74 if oai_set is not None:
75 a = oai_set.replace(constants.SUBJECTS_SCHEMA,"")
76 s = deepcopy(self.set_limit)
77 s["term"]["index.classification.exact"] = a
78 q["query"]["bool"]["must"].append(s)
80 if until_date is not None or from_date is not None or start_after is not None:
81 d = deepcopy(self.range_limit)
83 if start_after is not None:
84 d["range"]["last_updated"]["lte"] = start_after[0]
85 elif until_date is not None:
86 d["range"]["last_updated"]["lte"] = until_date
87 else:
88 del d["range"]["last_updated"]["lte"]
90 if from_date is not None:
91 d["range"]["last_updated"]["gte"] = from_date
92 else:
93 del d["range"]["last_updated"]["gte"]
95 q["query"]["bool"]["must"].append(d)
97 if list_size is not None:
98 q["size"] = list_size
100 if start_after is not None:
101 q["from"] = start_after[1]
102 else:
103 q["from"] = 0
105 q["sort"] = deepcopy(self.created_sort)
107 # do the query
108 # print json.dumps(q)
110 results = self.query(q=q)
112 total = results.get("hits", {}).get("total", {}).get('value', 0)
113 return total, [hit.get("_source") for hit in results.get("hits", {}).get("hits", [])]
116class OAIPMHArticle(OAIPMHRecord, Article):
117 __type__ = "article,article_tombstone"
119 def list_records(self, from_date=None, until_date=None, oai_set=None, list_size=None, start_after=None):
120 total, results = super(OAIPMHArticle, self).list_records(from_date=from_date,
121 until_date=until_date, oai_set=oai_set, list_size=list_size, start_after=start_after)
122 return total, [Article(**r) if r.get("es_type") == "article" else ArticleTombstone(**r) for r in results]
124 def pull(self, identifier):
125 # override the default pull, as we must check the tombstone record too
126 article = Article.pull(identifier)
127 if article is None:
128 article = ArticleTombstone.pull(identifier)
129 return article
132class OAIPMHJournal(OAIPMHRecord, Journal):
133 def list_records(self, from_date=None, until_date=None, oai_set=None, list_size=None, start_after=None):
134 total, results = super(OAIPMHJournal, self).list_records(from_date=from_date,
135 until_date=until_date, oai_set=oai_set, list_size=list_size, start_after=start_after)
136 return total, [Journal(**r) for r in results]