Coverage for portality/models/oaipmh.py: 94%

64 statements  

« prev     ^ index     » next       coverage.py v6.4.2, created at 2022-11-09 16:22 +0000

1from copy import deepcopy 

2from portality.models import Journal, Article 

3 

4class OAIPMHRecord(object): 

5 earliest = { 

6 "query": { 

7 "bool": { 

8 "must": [ 

9 { "term": { "admin.in_doaj": True } } 

10 ] 

11 } 

12 }, 

13 "size": 1, 

14 "sort" : [ 

15 {"last_updated": {"order": "asc"}} 

16 ] 

17 } 

18 

19 sets = { 

20 "query": { 

21 "bool": { 

22 "must": [ 

23 { "term": { "admin.in_doaj": True } } 

24 ] 

25 } 

26 }, 

27 "size": 0, 

28 "aggs": { 

29 "sets": { 

30 "terms": { 

31 "field": "index.schema_subject.exact", 

32 "order": {"_key" : "asc"}, 

33 "size": 100000 

34 } 

35 } 

36 } 

37 } 

38 

39 records = { 

40 "track_total_hits": True, 

41 "query": { 

42 "bool": { 

43 "must": [ 

44 { "term": { "admin.in_doaj": True } } 

45 ] 

46 } 

47 }, 

48 "from": 0, 

49 "size": 25 

50 } 

51 

52 set_limit = {"term" : { "index.schema_subject.exact" : "<set name>" }} 

53 range_limit = { "range" : { "last_updated" : {"gte" : "<from date>", "lte" : "<until date>"} } } 

54 created_sort = [{"last_updated" : {"order" : "desc"}}, {"id.exact" : "desc"}] 

55 

56 def earliest_datestamp(self): 

57 result = self.query(q=self.earliest) 

58 return result.get("hits", {}).get("hits", [{}])[0].get("_source", {}).get("last_updated") 

59 

60 def identifier_exists(self, identifier): 

61 obj = self.pull(identifier) 

62 return obj is not None 

63 

64 def list_sets(self): 

65 result = self.query(q=self.sets) 

66 sets = [t.get("key") for t in result.get("aggregations", {}).get("sets", {}).get("buckets", [])] 

67 return sets 

68 

69 def list_records(self, from_date=None, until_date=None, oai_set=None, list_size=None, start_after=None): 

70 q = deepcopy(self.records) 

71 if start_after is not None or from_date is not None or until_date is not None or oai_set is not None: 

72 

73 if oai_set is not None: 

74 s = deepcopy(self.set_limit) 

75 s["term"]["index.schema_subject.exact"] = oai_set 

76 q["query"]["bool"]["must"].append(s) 

77 

78 if until_date is not None or from_date is not None or start_after is not None: 

79 d = deepcopy(self.range_limit) 

80 

81 if start_after is not None: 

82 d["range"]["last_updated"]["lte"] = start_after[0] 

83 elif until_date is not None: 

84 d["range"]["last_updated"]["lte"] = until_date 

85 else: 

86 del d["range"]["last_updated"]["lte"] 

87 

88 if from_date is not None: 

89 d["range"]["last_updated"]["gte"] = from_date 

90 else: 

91 del d["range"]["last_updated"]["gte"] 

92 

93 q["query"]["bool"]["must"].append(d) 

94 

95 if list_size is not None: 

96 q["size"] = list_size 

97 

98 if start_after is not None: 

99 q["from"] = start_after[1] 

100 else: 

101 q["from"] = 0 

102 

103 q["sort"] = deepcopy(self.created_sort) 

104 

105 # do the query 

106 # print json.dumps(q) 

107 

108 results = self.query(q=q) 

109 

110 total = results.get("hits", {}).get("total", {}).get('value', 0) 

111 return total, [hit.get("_source") for hit in results.get("hits", {}).get("hits", [])] 

112 

113 

114class OAIPMHArticle(OAIPMHRecord, Article): 

115 def list_records(self, from_date=None, until_date=None, oai_set=None, list_size=None, start_after=None): 

116 total, results = super(OAIPMHArticle, self).list_records(from_date=from_date, 

117 until_date=until_date, oai_set=oai_set, list_size=list_size, start_after=start_after) 

118 return total, [Article(**r) for r in results] 

119 

120 def pull(self, identifier): 

121 # override the default pull, as we care about whether the item is in_doaj 

122 record = super(OAIPMHArticle, self).pull(identifier) 

123 if record is not None and record.is_in_doaj(): 

124 return record 

125 return None 

126 

127class OAIPMHJournal(OAIPMHRecord, Journal): 

128 def list_records(self, from_date=None, until_date=None, oai_set=None, list_size=None, start_after=None): 

129 total, results = super(OAIPMHJournal, self).list_records(from_date=from_date, 

130 until_date=until_date, oai_set=oai_set, list_size=list_size, start_after=start_after) 

131 return total, [Journal(**r) for r in results] 

132 

133 def pull(self, identifier): 

134 # override the default pull, as we care about whether the item is in_doaj 

135 record = super(OAIPMHJournal, self).pull(identifier) 

136 if record is not None and record.is_in_doaj(): 

137 return record 

138 return None