Coverage for portality / api / current / data_objects / article.py: 97%
119 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-04 09:41 +0100
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-04 09:41 +0100
1from portality.api.current.data_objects.common import _check_for_script
2from portality.lib import dataobj, swagger
3from portality import models, regex, constants
4from portality.ui.messages import Messages
5from portality.util import normalise_issn
6from copy import deepcopy
7from portality.regex import DOI,DOI_COMPILED
9BASE_ARTICLE_STRUCT = {
10 "fields": {
11 "id": {"coerce": "unicode"}, # Note that we'll leave these in for ease of use by the
12 "created_date": {"coerce": "utcdatetime"}, # caller, but we'll need to ignore them on the conversion
13 "last_updated": {"coerce": "utcdatetime"}, # to the real object
14 "es_type": {"coerce": "unicode"}
15 },
16 "objects": ["admin", "bibjson"],
18 "structs": {
20 "admin": {
21 "fields": {
22 "in_doaj": {"coerce": "bool", "get__default": False},
23 "publisher_record_id": {"coerce": "unicode"},
24 "upload_id": {"coerce": "unicode"}
25 }
26 },
28 "bibjson": {
29 "fields": {
30 "title": {"coerce": "unicode"},
31 "year": {"coerce": "unicode"},
32 "month": {"coerce": "unicode"},
33 "abstract": {"coerce": "unicode"}
34 },
35 "lists": {
36 "identifier": {"contains": "object"},
37 "link": {"contains": "object"},
38 "author": {"contains": "object"},
39 "keywords": {"coerce": "unicode", "contains": "field"},
40 "subject": {"contains": "object"},
41 },
42 "objects": [
43 "journal",
44 ],
45 "structs": {
47 "identifier": {
48 "fields": {
49 "type": {"coerce": "unicode_lower", "allowed_values": constants.ALLOWED_ARTICLE_IDENT_TYPES},
50 "id": {"coerce": "unicode"}
51 }
52 },
53 # The base struct can't coerce url because we have bad data https://github.com/DOAJ/doajPM/issues/2038
54 # Leaving this here in case we want to reinstate in the future
55# "link": {
56# "fields": {
57# "type": {"coerce": "link_type"},
58# "url": {"coerce": "url"},
59# "content_type": {"coerce": "link_content_type"}
60# }
61# },
62 "author": {
63 "fields": {
64 "name": {"coerce": "unicode"},
65 "affiliation": {"coerce": "unicode"},
66 "orcid_id": {"coerce": "unicode"}
67 }
68 },
69 "journal": {
70 "fields": {
71 "start_page": {"coerce": "unicode"},
72 "end_page": {"coerce": "unicode"},
73 "volume": {"coerce": "unicode"},
74 "number": {"coerce": "unicode"},
75 "publisher": {"coerce": "unicode"},
76 "title": {"coerce": "unicode"},
77 "country": {"coerce": "unicode"}
78 },
79 "lists": {
80 "language": {"coerce": "unicode", "contains": "field"}
81 }
82 },
83 "subject": {
84 "fields": {
85 "scheme": {"coerce": "unicode"},
86 "term": {"coerce": "unicode"},
87 "code": {"coerce": "unicode"}
88 }
89 },
90 }
91 }
92 }
93}
95INCOMING_ARTICLE_REQUIRED = {
96 "required": ["bibjson"],
98 "structs": {
99 "bibjson": {
100 "required": [
101 "title",
102 # "author", # author no longer required
103 "identifier" # One type of identifier is required
104 ],
105 "structs": {
107 "identifier": {
108 "required": ["type", "id"]
109 },
111 "link": {
112 "required": ["type", "url"],
113 "fields": {
114 "type": {"coerce": "link_type"},
115 "url": {"coerce": "url"},
116 "content_type": {"coerce": "link_content_type"}
117 }
118 },
120 "author": {
121 "required": ["name"]
122 }
123 }
124 }
125 }
126}
128OUTGOING_ARTICLE_PATCH = {
129 "structs": {
130 "bibjson": {
131 "structs": {
132 "link": {
133 "required": ["type", "url"],
134 "fields": {
135 "type": {"coerce": "link_type"},
136 "url": {"coerce": "unicode"},
137 "content_type": {"coerce": "link_content_type"}
138 }
139 }
140 }
141 }
142 }
143}
145BASE_ARTICLE_COERCE = deepcopy(dataobj.DataObj.DEFAULT_COERCE)
146BASE_ARTICLE_COERCE["link_type"] = dataobj.string_canonicalise(["fulltext"], allow_fail=False)
147BASE_ARTICLE_COERCE["link_type_optional"] = dataobj.string_canonicalise(["fulltext"], allow_fail=True)
148BASE_ARTICLE_COERCE["link_content_type"] = dataobj.string_canonicalise(["PDF", "HTML", "ePUB", "XML"], allow_fail=True)
150BASE_ARTICLE_SWAGGER_TRANS = deepcopy(swagger.SwaggerSupport.DEFAULT_SWAGGER_TRANS)
151BASE_ARTICLE_SWAGGER_TRANS["link_type"] = {"type": "string", "format": "link_type"} # TODO extend swagger-ui with support for this format and let it produce example values etc. on the front-end
152BASE_ARTICLE_SWAGGER_TRANS["link_type_optional"] = {"type": "string", "format": "link_type_optional"} # TODO extend swagger-ui with support for this format and let it produce example values etc. on the front-end
153BASE_ARTICLE_SWAGGER_TRANS["link_content_type"] = {"type": "string", "format": "link_content_type"} # TODO extend swagger-ui with support for this format and let it produce example values etc. on the front-end
156class IncomingArticleDO(dataobj.DataObj, swagger.SwaggerSupport):
157 """
158 ~~APIIncomingArticle:Model->DataObj:Library~~
159 """
160 def __init__(self, raw=None):
161 self._add_struct(BASE_ARTICLE_STRUCT)
162 self._add_struct(INCOMING_ARTICLE_REQUIRED)
163 super(IncomingArticleDO, self).__init__(raw, construct_silent_prune=True, expose_data=True,
164 coerce_map=BASE_ARTICLE_COERCE,
165 swagger_trans=BASE_ARTICLE_SWAGGER_TRANS)
167 def _trim_empty_strings(self):
169 def _remove_element_if_empty_data(field):
170 if field in bibjson and bibjson[field] == "":
171 del bibjson[field]
173 def _remove_from_the_list_if_empty_data(bibjson_element, field=None):
174 if bibjson_element in bibjson:
175 for i in range(len(bibjson[bibjson_element]) - 1, -1, -1):
176 ide = bibjson[bibjson_element][i]
177 if field is not None:
178 if ide.get(field,"") == "":
179 bibjson[bibjson_element].remove(ide)
180 else:
181 if ide == "":
182 bibjson[bibjson_element].remove(ide)
184 bibjson = self.data["bibjson"]
186 _remove_element_if_empty_data("title")
187 _remove_element_if_empty_data("year")
188 _remove_element_if_empty_data("month")
189 _remove_element_if_empty_data("abstract")
190 _remove_from_the_list_if_empty_data("author", "name")
191 _remove_from_the_list_if_empty_data("subject", "term")
192 _remove_from_the_list_if_empty_data("identifier", "id")
193 _remove_from_the_list_if_empty_data("link", "url")
194 _remove_from_the_list_if_empty_data("keywords")
196 def custom_validate(self):
197 # only attempt to validate if this is not a blank object
198 if len(list(self.data.keys())) == 0:
199 return
201 # remove all fields with empty data ""
202 self._trim_empty_strings()
204 if _check_for_script(self.data):
205 raise dataobj.ScriptTagFoundException(Messages.EXCEPTION_SCRIPT_TAG_FOUND)
207 # at least one of print issn / e-issn, and they must be different
208 #
209 # check that there are identifiers at all
210 identifiers = self.bibjson.identifier
211 if identifiers is None or len(identifiers) == 0:
212 raise dataobj.DataStructureException("You must specify at least one Print ISSN or online ISSN in bibjson.identifier")
214 # extract the p/e-issn identifier objects
215 pissn = None
216 eissn = None
217 for ident in identifiers:
218 if ident.type == "pissn":
219 pissn = ident
220 elif ident.type == "eissn":
221 eissn = ident
223 # check that at least one of them appears
224 if pissn is None and eissn is None:
225 raise dataobj.DataStructureException("You must specify at least one Print ISSN or online ISSN in bibjson.identifier")
227 # normalise the ids
228 if pissn is not None:
229 pissn.id = normalise_issn(pissn.id)
230 if eissn is not None:
231 eissn.id = normalise_issn(eissn.id)
233 # check they are not the same
234 if pissn is not None and eissn is not None:
235 if pissn.id == eissn.id:
236 raise dataobj.DataStructureException("Print ISSN and online ISSN should be different")
239 # check removed: https://github.com/DOAJ/doajPM/issues/2950
240 # if len(self.bibjson.keywords) > 6:
241 # raise dataobj.DataStructureException("bibjson.keywords may only contain a maximum of 6 keywords")
243 # check if orcid id is valid
244 for author in self.bibjson.author:
245 if author.orcid_id is not None and regex.ORCID_COMPILED.match(author.orcid_id) is None:
246 raise dataobj.DataStructureException("Invalid ORCID iD. Please enter your ORCID iD structured as: https://orcid.org/0000-0000-0000-0000. URLs must start with https.")
248 for x in self.bibjson.identifier:
249 if x.type == "doi":
250 if not DOI_COMPILED.match(x.id):
251 raise dataobj.DataStructureException(
252 "Invalid DOI format.")
253 break
255 def to_article_model(self, existing=None):
256 dat = deepcopy(self.data)
257 if "journal" in dat["bibjson"] and "start_page" in dat["bibjson"].get("journal", {}):
258 dat["bibjson"]["start_page"] = dat["bibjson"]["journal"]["start_page"]
259 del dat["bibjson"]["journal"]["start_page"]
260 if "journal" in dat["bibjson"] and "end_page" in dat["bibjson"].get("journal", {}):
261 dat["bibjson"]["end_page"] = dat["bibjson"]["journal"]["end_page"]
262 del dat["bibjson"]["journal"]["end_page"]
264 # clear out fields that we don't accept via the API
265 if "admin" in dat and "in_doaj" in dat["admin"]:
266 del dat["admin"]["in_doaj"]
267 if "admin" in dat and "upload_id" in dat["admin"]:
268 del dat["admin"]["upload_id"]
269 if "es_type" in dat:
270 del dat["es_type"]
272 # the seal has been removed, but in case external users are still providing it, keeping
273 # this data cleanup
274 if "admin" in dat and "seal" in dat["admin"]:
275 del dat["admin"]["seal"]
277 if existing is None:
278 return models.Article(**dat)
279 else:
280 merged = dataobj.merge_outside_construct(self._struct, dat, existing.data)
281 return models.Article(**merged) #~~->Article:Model~~
284class OutgoingArticleDO(dataobj.DataObj, swagger.SwaggerSupport):
285 """
286 ~~APIOutgoingArticle:Model->DataObj:Library~~
287 """
288 def __init__(self, raw=None):
289 self._add_struct(BASE_ARTICLE_STRUCT)
290 self._add_struct(OUTGOING_ARTICLE_PATCH)
291 super(OutgoingArticleDO, self).__init__(raw, construct_silent_prune=True, expose_data=True, coerce_map=BASE_ARTICLE_COERCE, swagger_trans=BASE_ARTICLE_SWAGGER_TRANS)
293 @classmethod
294 def from_model(cls, am):
295 assert isinstance(am, models.Article) #~~->Article:Model~~
296 dat = deepcopy(am.data)
297 # Fix some inconsistencies with the model - start and end pages should be in bibjson
298 if "start_page" in dat["bibjson"]:
299 dat["bibjson"].get("journal", {})["start_page"] = dat["bibjson"]["start_page"]
300 del dat["bibjson"]["start_page"]
301 if "end_page" in dat["bibjson"]:
302 dat["bibjson"].get("journal", {})["end_page"] = dat["bibjson"]["end_page"]
303 del dat["bibjson"]["end_page"]
304 return cls(dat)
306 @classmethod
307 def from_model_by_id(cls, id_):
308 a = models.Article.pull(id_)
309 return cls.from_model(a)