Coverage for portality/models/oaipmh.py: 94%
64 statements
« prev ^ index » next coverage.py v6.4.2, created at 2022-11-09 15:10 +0000
« prev ^ index » next coverage.py v6.4.2, created at 2022-11-09 15:10 +0000
1from copy import deepcopy
2from portality.models import Journal, Article
4class OAIPMHRecord(object):
5 earliest = {
6 "query": {
7 "bool": {
8 "must": [
9 { "term": { "admin.in_doaj": True } }
10 ]
11 }
12 },
13 "size": 1,
14 "sort" : [
15 {"last_updated": {"order": "asc"}}
16 ]
17 }
19 sets = {
20 "query": {
21 "bool": {
22 "must": [
23 { "term": { "admin.in_doaj": True } }
24 ]
25 }
26 },
27 "size": 0,
28 "aggs": {
29 "sets": {
30 "terms": {
31 "field": "index.schema_subject.exact",
32 "order": {"_key" : "asc"},
33 "size": 100000
34 }
35 }
36 }
37 }
39 records = {
40 "track_total_hits": True,
41 "query": {
42 "bool": {
43 "must": [
44 { "term": { "admin.in_doaj": True } }
45 ]
46 }
47 },
48 "from": 0,
49 "size": 25
50 }
52 set_limit = {"term" : { "index.schema_subject.exact" : "<set name>" }}
53 range_limit = { "range" : { "last_updated" : {"gte" : "<from date>", "lte" : "<until date>"} } }
54 created_sort = [{"last_updated" : {"order" : "desc"}}, {"id.exact" : "desc"}]
56 def earliest_datestamp(self):
57 result = self.query(q=self.earliest)
58 return result.get("hits", {}).get("hits", [{}])[0].get("_source", {}).get("last_updated")
60 def identifier_exists(self, identifier):
61 obj = self.pull(identifier)
62 return obj is not None
64 def list_sets(self):
65 result = self.query(q=self.sets)
66 sets = [t.get("key") for t in result.get("aggregations", {}).get("sets", {}).get("buckets", [])]
67 return sets
69 def list_records(self, from_date=None, until_date=None, oai_set=None, list_size=None, start_after=None):
70 q = deepcopy(self.records)
71 if start_after is not None or from_date is not None or until_date is not None or oai_set is not None:
73 if oai_set is not None:
74 s = deepcopy(self.set_limit)
75 s["term"]["index.schema_subject.exact"] = oai_set
76 q["query"]["bool"]["must"].append(s)
78 if until_date is not None or from_date is not None or start_after is not None:
79 d = deepcopy(self.range_limit)
81 if start_after is not None:
82 d["range"]["last_updated"]["lte"] = start_after[0]
83 elif until_date is not None:
84 d["range"]["last_updated"]["lte"] = until_date
85 else:
86 del d["range"]["last_updated"]["lte"]
88 if from_date is not None:
89 d["range"]["last_updated"]["gte"] = from_date
90 else:
91 del d["range"]["last_updated"]["gte"]
93 q["query"]["bool"]["must"].append(d)
95 if list_size is not None:
96 q["size"] = list_size
98 if start_after is not None:
99 q["from"] = start_after[1]
100 else:
101 q["from"] = 0
103 q["sort"] = deepcopy(self.created_sort)
105 # do the query
106 # print json.dumps(q)
108 results = self.query(q=q)
110 total = results.get("hits", {}).get("total", {}).get('value', 0)
111 return total, [hit.get("_source") for hit in results.get("hits", {}).get("hits", [])]
114class OAIPMHArticle(OAIPMHRecord, Article):
115 def list_records(self, from_date=None, until_date=None, oai_set=None, list_size=None, start_after=None):
116 total, results = super(OAIPMHArticle, self).list_records(from_date=from_date,
117 until_date=until_date, oai_set=oai_set, list_size=list_size, start_after=start_after)
118 return total, [Article(**r) for r in results]
120 def pull(self, identifier):
121 # override the default pull, as we care about whether the item is in_doaj
122 record = super(OAIPMHArticle, self).pull(identifier)
123 if record is not None and record.is_in_doaj():
124 return record
125 return None
127class OAIPMHJournal(OAIPMHRecord, Journal):
128 def list_records(self, from_date=None, until_date=None, oai_set=None, list_size=None, start_after=None):
129 total, results = super(OAIPMHJournal, self).list_records(from_date=from_date,
130 until_date=until_date, oai_set=oai_set, list_size=list_size, start_after=start_after)
131 return total, [Journal(**r) for r in results]
133 def pull(self, identifier):
134 # override the default pull, as we care about whether the item is in_doaj
135 record = super(OAIPMHJournal, self).pull(identifier)
136 if record is not None and record.is_in_doaj():
137 return record
138 return None