Coverage for portality/api/current/data_objects/article.py: 97%
119 statements
« prev ^ index » next coverage.py v6.4.2, created at 2022-07-22 15:59 +0100
« prev ^ index » next coverage.py v6.4.2, created at 2022-07-22 15:59 +0100
1from portality.api.current.data_objects.common import _check_for_script
2from portality.lib import dataobj, swagger
3from portality import models, regex
4from portality.ui.messages import Messages
5from portality.util import normalise_issn
6from copy import deepcopy
7from portality.regex import DOI,DOI_COMPILED
9BASE_ARTICLE_STRUCT = {
10 "fields": {
11 "id": {"coerce": "unicode"}, # Note that we'll leave these in for ease of use by the
12 "created_date": {"coerce": "utcdatetime"}, # caller, but we'll need to ignore them on the conversion
13 "last_updated": {"coerce": "utcdatetime"}, # to the real object
14 "es_type": {"coerce": "unicode"}
15 },
16 "objects": ["admin", "bibjson"],
18 "structs": {
20 "admin": {
21 "fields": {
22 "in_doaj": {"coerce": "bool", "get__default": False},
23 "seal": {"coerce": "bool", "get__default": False},
24 "publisher_record_id": {"coerce": "unicode"},
25 "upload_id": {"coerce": "unicode"}
26 }
27 },
29 "bibjson": {
30 "fields": {
31 "title": {"coerce": "unicode"},
32 "year": {"coerce": "unicode"},
33 "month": {"coerce": "unicode"},
34 "abstract": {"coerce": "unicode"}
35 },
36 "lists": {
37 "identifier": {"contains": "object"},
38 "link": {"contains": "object"},
39 "author": {"contains": "object"},
40 "keywords": {"coerce": "unicode", "contains": "field"},
41 "subject": {"contains": "object"},
42 },
43 "objects": [
44 "journal",
45 ],
46 "structs": {
48 "identifier": {
49 "fields": {
50 "type": {"coerce": "unicode"},
51 "id": {"coerce": "unicode"}
52 }
53 },
54 # The base struct can't coerce url because we have bad data https://github.com/DOAJ/doajPM/issues/2038
55 # Leaving this here in case we want to reinstate in the future
56# "link": {
57# "fields": {
58# "type": {"coerce": "link_type"},
59# "url": {"coerce": "url"},
60# "content_type": {"coerce": "link_content_type"}
61# }
62# },
63 "author": {
64 "fields": {
65 "name": {"coerce": "unicode"},
66 "affiliation": {"coerce": "unicode"},
67 "orcid_id": {"coerce": "unicode"}
68 }
69 },
70 "journal": {
71 "fields": {
72 "start_page": {"coerce": "unicode"},
73 "end_page": {"coerce": "unicode"},
74 "volume": {"coerce": "unicode"},
75 "number": {"coerce": "unicode"},
76 "publisher": {"coerce": "unicode"},
77 "title": {"coerce": "unicode"},
78 "country": {"coerce": "unicode"}
79 },
80 "lists": {
81 "language": {"coerce": "unicode", "contains": "field"}
82 }
83 },
84 "subject": {
85 "fields": {
86 "scheme": {"coerce": "unicode"},
87 "term": {"coerce": "unicode"},
88 "code": {"coerce": "unicode"}
89 }
90 },
91 }
92 }
93 }
94}
96INCOMING_ARTICLE_REQUIRED = {
97 "required": ["bibjson"],
99 "structs": {
100 "bibjson": {
101 "required": [
102 "title",
103 # "author", # author no longer required
104 "identifier" # One type of identifier is required
105 ],
106 "structs": {
108 "identifier": {
109 "required": ["type", "id"]
110 },
112 "link": {
113 "required": ["type", "url"],
114 "fields": {
115 "type": {"coerce": "link_type"},
116 "url": {"coerce": "url"},
117 "content_type": {"coerce": "link_content_type"}
118 }
119 },
121 "author": {
122 "required": ["name"]
123 }
124 }
125 }
126 }
127}
129OUTGOING_ARTICLE_PATCH = {
130 "structs": {
131 "bibjson": {
132 "structs": {
133 "link": {
134 "required": ["type", "url"],
135 "fields": {
136 "type": {"coerce": "link_type"},
137 "url": {"coerce": "unicode"},
138 "content_type": {"coerce": "link_content_type"}
139 }
140 }
141 }
142 }
143 }
144}
146BASE_ARTICLE_COERCE = deepcopy(dataobj.DataObj.DEFAULT_COERCE)
147BASE_ARTICLE_COERCE["link_type"] = dataobj.string_canonicalise(["fulltext"], allow_fail=False)
148BASE_ARTICLE_COERCE["link_type_optional"] = dataobj.string_canonicalise(["fulltext"], allow_fail=True)
149BASE_ARTICLE_COERCE["link_content_type"] = dataobj.string_canonicalise(["PDF", "HTML", "ePUB", "XML"], allow_fail=True)
151BASE_ARTICLE_SWAGGER_TRANS = deepcopy(swagger.SwaggerSupport.DEFAULT_SWAGGER_TRANS)
152BASE_ARTICLE_SWAGGER_TRANS["link_type"] = {"type": "string", "format": "link_type"} # TODO extend swagger-ui with support for this format and let it produce example values etc. on the front-end
153BASE_ARTICLE_SWAGGER_TRANS["link_type_optional"] = {"type": "string", "format": "link_type_optional"} # TODO extend swagger-ui with support for this format and let it produce example values etc. on the front-end
154BASE_ARTICLE_SWAGGER_TRANS["link_content_type"] = {"type": "string", "format": "link_content_type"} # TODO extend swagger-ui with support for this format and let it produce example values etc. on the front-end
157class IncomingArticleDO(dataobj.DataObj, swagger.SwaggerSupport):
158 """
159 ~~APIIncomingArticle:Model->DataObj:Library~~
160 """
161 def __init__(self, raw=None):
162 self._add_struct(BASE_ARTICLE_STRUCT)
163 self._add_struct(INCOMING_ARTICLE_REQUIRED)
164 super(IncomingArticleDO, self).__init__(raw, construct_silent_prune=True, expose_data=True, coerce_map=BASE_ARTICLE_COERCE, swagger_trans=BASE_ARTICLE_SWAGGER_TRANS)
166 def _trim_empty_strings(self):
168 def _remove_element_if_empty_data(field):
169 if field in bibjson and bibjson[field] == "":
170 del bibjson[field]
172 def _remove_from_the_list_if_empty_data(bibjson_element, field=None):
173 if bibjson_element in bibjson:
174 for i in range(len(bibjson[bibjson_element]) - 1, -1, -1):
175 ide = bibjson[bibjson_element][i]
176 if field is not None:
177 if ide[field] == "":
178 bibjson[bibjson_element].remove(ide)
179 else:
180 if ide == "":
181 bibjson[bibjson_element].remove(ide)
183 bibjson = self.data["bibjson"]
185 _remove_element_if_empty_data("title")
186 _remove_element_if_empty_data("year")
187 _remove_element_if_empty_data("month")
188 _remove_element_if_empty_data("abstract")
189 _remove_from_the_list_if_empty_data("author", "name")
190 _remove_from_the_list_if_empty_data("subject", "term")
191 _remove_from_the_list_if_empty_data("identifier", "id")
192 _remove_from_the_list_if_empty_data("link", "url")
193 _remove_from_the_list_if_empty_data("keywords")
195 def custom_validate(self):
196 # only attempt to validate if this is not a blank object
197 if len(list(self.data.keys())) == 0:
198 return
200 # remove all fields with empty data ""
201 self._trim_empty_strings()
203 if _check_for_script(self.data):
204 raise dataobj.ScriptTagFoundException(Messages.EXCEPTION_SCRIPT_TAG_FOUND)
206 # at least one of print issn / e-issn, and they must be different
207 #
208 # check that there are identifiers at all
209 identifiers = self.bibjson.identifier
210 if identifiers is None or len(identifiers) == 0:
211 raise dataobj.DataStructureException("You must specify at least one of P-ISSN or E-ISSN in bibjson.identifier")
213 # extract the p/e-issn identifier objects
214 pissn = None
215 eissn = None
216 for ident in identifiers:
217 if ident.type == "pissn":
218 pissn = ident
219 elif ident.type == "eissn":
220 eissn = ident
222 # check that at least one of them appears
223 if pissn is None and eissn is None:
224 raise dataobj.DataStructureException("You must specify at least one of P-ISSN or E-ISSN in bibjson.identifier")
226 # normalise the ids
227 if pissn is not None:
228 pissn.id = normalise_issn(pissn.id)
229 if eissn is not None:
230 eissn.id = normalise_issn(eissn.id)
232 # check they are not the same
233 if pissn is not None and eissn is not None:
234 if pissn.id == eissn.id:
235 raise dataobj.DataStructureException("P-ISSN and E-ISSN should be different")
238 # check removed: https://github.com/DOAJ/doajPM/issues/2950
239 # if len(self.bibjson.keywords) > 6:
240 # raise dataobj.DataStructureException("bibjson.keywords may only contain a maximum of 6 keywords")
242 # check if orcid id is valid
243 for author in self.bibjson.author:
244 if author.orcid_id is not None and regex.ORCID_COMPILED.match(author.orcid_id) is None:
245 raise dataobj.DataStructureException("Invalid ORCID iD format. Please use url format, eg: https://orcid.org/0001-1111-1111-1111")
247 for x in self.bibjson.identifier:
248 if x.type == "doi":
249 if not DOI_COMPILED.match(x.id):
250 raise dataobj.DataStructureException(
251 "Invalid DOI format.")
252 break
254 def to_article_model(self, existing=None):
255 dat = deepcopy(self.data)
256 if "journal" in dat["bibjson"] and "start_page" in dat["bibjson"].get("journal", {}):
257 dat["bibjson"]["start_page"] = dat["bibjson"]["journal"]["start_page"]
258 del dat["bibjson"]["journal"]["start_page"]
259 if "journal" in dat["bibjson"] and "end_page" in dat["bibjson"].get("journal", {}):
260 dat["bibjson"]["end_page"] = dat["bibjson"]["journal"]["end_page"]
261 del dat["bibjson"]["journal"]["end_page"]
263 # clear out fields that we don't accept via the API
264 if "admin" in dat and "in_doaj" in dat["admin"]:
265 del dat["admin"]["in_doaj"]
266 if "admin" in dat and "seal" in dat["admin"]:
267 del dat["admin"]["seal"]
268 if "admin" in dat and "upload_id" in dat["admin"]:
269 del dat["admin"]["upload_id"]
270 if "es_type" in dat:
271 del dat["es_type"]
273 if existing is None:
274 return models.Article(**dat)
275 else:
276 merged = dataobj.merge_outside_construct(self._struct, dat, existing.data)
277 return models.Article(**merged) #~~->Article:Model~~
280class OutgoingArticleDO(dataobj.DataObj, swagger.SwaggerSupport):
281 """
282 ~~APIOutgoingArticle:Model->DataObj:Library~~
283 """
284 def __init__(self, raw=None):
285 self._add_struct(BASE_ARTICLE_STRUCT)
286 self._add_struct(OUTGOING_ARTICLE_PATCH)
287 super(OutgoingArticleDO, self).__init__(raw, construct_silent_prune=True, expose_data=True, coerce_map=BASE_ARTICLE_COERCE, swagger_trans=BASE_ARTICLE_SWAGGER_TRANS)
289 @classmethod
290 def from_model(cls, am):
291 assert isinstance(am, models.Article) #~~->Article:Model~~
292 dat = deepcopy(am.data)
293 # Fix some inconsistencies with the model - start and end pages should be in bibjson
294 if "start_page" in dat["bibjson"]:
295 dat["bibjson"].get("journal", {})["start_page"] = dat["bibjson"]["start_page"]
296 del dat["bibjson"]["start_page"]
297 if "end_page" in dat["bibjson"]:
298 dat["bibjson"].get("journal", {})["end_page"] = dat["bibjson"]["end_page"]
299 del dat["bibjson"]["end_page"]
300 return cls(dat)
302 @classmethod
303 def from_model_by_id(cls, id_):
304 a = models.Article.pull(id_)
305 return cls.from_model(a)