Coverage for portality / api / current / data_objects / article.py: 97%

119 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-05-05 00:09 +0100

1from portality.api.current.data_objects.common import _check_for_script 

2from portality.lib import dataobj, swagger 

3from portality import models, regex, constants 

4from portality.ui.messages import Messages 

5from portality.util import normalise_issn 

6from copy import deepcopy 

7from portality.regex import DOI,DOI_COMPILED 

8 

9BASE_ARTICLE_STRUCT = { 

10 "fields": { 

11 "id": {"coerce": "unicode"}, # Note that we'll leave these in for ease of use by the 

12 "created_date": {"coerce": "utcdatetime"}, # caller, but we'll need to ignore them on the conversion 

13 "last_updated": {"coerce": "utcdatetime"}, # to the real object 

14 "es_type": {"coerce": "unicode"} 

15 }, 

16 "objects": ["admin", "bibjson"], 

17 

18 "structs": { 

19 

20 "admin": { 

21 "fields": { 

22 "in_doaj": {"coerce": "bool", "get__default": False}, 

23 "publisher_record_id": {"coerce": "unicode"}, 

24 "upload_id": {"coerce": "unicode"} 

25 } 

26 }, 

27 

28 "bibjson": { 

29 "fields": { 

30 "title": {"coerce": "unicode"}, 

31 "year": {"coerce": "unicode"}, 

32 "month": {"coerce": "unicode"}, 

33 "abstract": {"coerce": "unicode"} 

34 }, 

35 "lists": { 

36 "identifier": {"contains": "object"}, 

37 "link": {"contains": "object"}, 

38 "author": {"contains": "object"}, 

39 "keywords": {"coerce": "unicode", "contains": "field"}, 

40 "subject": {"contains": "object"}, 

41 }, 

42 "objects": [ 

43 "journal", 

44 ], 

45 "structs": { 

46 

47 "identifier": { 

48 "fields": { 

49 "type": {"coerce": "unicode_lower", "allowed_values": constants.ALLOWED_ARTICLE_IDENT_TYPES}, 

50 "id": {"coerce": "unicode"} 

51 } 

52 }, 

53 # The base struct can't coerce url because we have bad data https://github.com/DOAJ/doajPM/issues/2038 

54 # Leaving this here in case we want to reinstate in the future 

55# "link": { 

56# "fields": { 

57# "type": {"coerce": "link_type"}, 

58# "url": {"coerce": "url"}, 

59# "content_type": {"coerce": "link_content_type"} 

60# } 

61# }, 

62 "author": { 

63 "fields": { 

64 "name": {"coerce": "unicode"}, 

65 "affiliation": {"coerce": "unicode"}, 

66 "orcid_id": {"coerce": "unicode"} 

67 } 

68 }, 

69 "journal": { 

70 "fields": { 

71 "start_page": {"coerce": "unicode"}, 

72 "end_page": {"coerce": "unicode"}, 

73 "volume": {"coerce": "unicode"}, 

74 "number": {"coerce": "unicode"}, 

75 "publisher": {"coerce": "unicode"}, 

76 "title": {"coerce": "unicode"}, 

77 "country": {"coerce": "unicode"} 

78 }, 

79 "lists": { 

80 "language": {"coerce": "unicode", "contains": "field"} 

81 } 

82 }, 

83 "subject": { 

84 "fields": { 

85 "scheme": {"coerce": "unicode"}, 

86 "term": {"coerce": "unicode"}, 

87 "code": {"coerce": "unicode"} 

88 } 

89 }, 

90 } 

91 } 

92 } 

93} 

94 

95INCOMING_ARTICLE_REQUIRED = { 

96 "required": ["bibjson"], 

97 

98 "structs": { 

99 "bibjson": { 

100 "required": [ 

101 "title", 

102 # "author", # author no longer required 

103 "identifier" # One type of identifier is required 

104 ], 

105 "structs": { 

106 

107 "identifier": { 

108 "required": ["type", "id"] 

109 }, 

110 

111 "link": { 

112 "required": ["type", "url"], 

113 "fields": { 

114 "type": {"coerce": "link_type"}, 

115 "url": {"coerce": "url"}, 

116 "content_type": {"coerce": "link_content_type"} 

117 } 

118 }, 

119 

120 "author": { 

121 "required": ["name"] 

122 } 

123 } 

124 } 

125 } 

126} 

127 

128OUTGOING_ARTICLE_PATCH = { 

129 "structs": { 

130 "bibjson": { 

131 "structs": { 

132 "link": { 

133 "required": ["type", "url"], 

134 "fields": { 

135 "type": {"coerce": "link_type"}, 

136 "url": {"coerce": "unicode"}, 

137 "content_type": {"coerce": "link_content_type"} 

138 } 

139 } 

140 } 

141 } 

142 } 

143} 

144 

145BASE_ARTICLE_COERCE = deepcopy(dataobj.DataObj.DEFAULT_COERCE) 

146BASE_ARTICLE_COERCE["link_type"] = dataobj.string_canonicalise(["fulltext"], allow_fail=False) 

147BASE_ARTICLE_COERCE["link_type_optional"] = dataobj.string_canonicalise(["fulltext"], allow_fail=True) 

148BASE_ARTICLE_COERCE["link_content_type"] = dataobj.string_canonicalise(["PDF", "HTML", "ePUB", "XML"], allow_fail=True) 

149 

150BASE_ARTICLE_SWAGGER_TRANS = deepcopy(swagger.SwaggerSupport.DEFAULT_SWAGGER_TRANS) 

151BASE_ARTICLE_SWAGGER_TRANS["link_type"] = {"type": "string", "format": "link_type"} # TODO extend swagger-ui with support for this format and let it produce example values etc. on the front-end 

152BASE_ARTICLE_SWAGGER_TRANS["link_type_optional"] = {"type": "string", "format": "link_type_optional"} # TODO extend swagger-ui with support for this format and let it produce example values etc. on the front-end 

153BASE_ARTICLE_SWAGGER_TRANS["link_content_type"] = {"type": "string", "format": "link_content_type"} # TODO extend swagger-ui with support for this format and let it produce example values etc. on the front-end 

154 

155 

156class IncomingArticleDO(dataobj.DataObj, swagger.SwaggerSupport): 

157 """ 

158 ~~APIIncomingArticle:Model->DataObj:Library~~ 

159 """ 

160 def __init__(self, raw=None): 

161 self._add_struct(BASE_ARTICLE_STRUCT) 

162 self._add_struct(INCOMING_ARTICLE_REQUIRED) 

163 super(IncomingArticleDO, self).__init__(raw, construct_silent_prune=True, expose_data=True, 

164 coerce_map=BASE_ARTICLE_COERCE, 

165 swagger_trans=BASE_ARTICLE_SWAGGER_TRANS) 

166 

167 def _trim_empty_strings(self): 

168 

169 def _remove_element_if_empty_data(field): 

170 if field in bibjson and bibjson[field] == "": 

171 del bibjson[field] 

172 

173 def _remove_from_the_list_if_empty_data(bibjson_element, field=None): 

174 if bibjson_element in bibjson: 

175 for i in range(len(bibjson[bibjson_element]) - 1, -1, -1): 

176 ide = bibjson[bibjson_element][i] 

177 if field is not None: 

178 if ide.get(field,"") == "": 

179 bibjson[bibjson_element].remove(ide) 

180 else: 

181 if ide == "": 

182 bibjson[bibjson_element].remove(ide) 

183 

184 bibjson = self.data["bibjson"] 

185 

186 _remove_element_if_empty_data("title") 

187 _remove_element_if_empty_data("year") 

188 _remove_element_if_empty_data("month") 

189 _remove_element_if_empty_data("abstract") 

190 _remove_from_the_list_if_empty_data("author", "name") 

191 _remove_from_the_list_if_empty_data("subject", "term") 

192 _remove_from_the_list_if_empty_data("identifier", "id") 

193 _remove_from_the_list_if_empty_data("link", "url") 

194 _remove_from_the_list_if_empty_data("keywords") 

195 

196 def custom_validate(self): 

197 # only attempt to validate if this is not a blank object 

198 if len(list(self.data.keys())) == 0: 

199 return 

200 

201 # remove all fields with empty data "" 

202 self._trim_empty_strings() 

203 

204 if _check_for_script(self.data): 

205 raise dataobj.ScriptTagFoundException(Messages.EXCEPTION_SCRIPT_TAG_FOUND) 

206 

207 # at least one of print issn / e-issn, and they must be different 

208 # 

209 # check that there are identifiers at all 

210 identifiers = self.bibjson.identifier 

211 if identifiers is None or len(identifiers) == 0: 

212 raise dataobj.DataStructureException("You must specify at least one Print ISSN or online ISSN in bibjson.identifier") 

213 

214 # extract the p/e-issn identifier objects 

215 pissn = None 

216 eissn = None 

217 for ident in identifiers: 

218 if ident.type == "pissn": 

219 pissn = ident 

220 elif ident.type == "eissn": 

221 eissn = ident 

222 

223 # check that at least one of them appears 

224 if pissn is None and eissn is None: 

225 raise dataobj.DataStructureException("You must specify at least one Print ISSN or online ISSN in bibjson.identifier") 

226 

227 # normalise the ids 

228 if pissn is not None: 

229 pissn.id = normalise_issn(pissn.id) 

230 if eissn is not None: 

231 eissn.id = normalise_issn(eissn.id) 

232 

233 # check they are not the same 

234 if pissn is not None and eissn is not None: 

235 if pissn.id == eissn.id: 

236 raise dataobj.DataStructureException("Print ISSN and online ISSN should be different") 

237 

238 

239 # check removed: https://github.com/DOAJ/doajPM/issues/2950 

240 # if len(self.bibjson.keywords) > 6: 

241 # raise dataobj.DataStructureException("bibjson.keywords may only contain a maximum of 6 keywords") 

242 

243 # check if orcid id is valid 

244 for author in self.bibjson.author: 

245 if author.orcid_id is not None and regex.ORCID_COMPILED.match(author.orcid_id) is None: 

246 raise dataobj.DataStructureException("Invalid ORCID iD. Please enter your ORCID iD structured as: https://orcid.org/0000-0000-0000-0000. URLs must start with https.") 

247 

248 for x in self.bibjson.identifier: 

249 if x.type == "doi": 

250 if not DOI_COMPILED.match(x.id): 

251 raise dataobj.DataStructureException( 

252 "Invalid DOI format.") 

253 break 

254 

255 def to_article_model(self, existing=None): 

256 dat = deepcopy(self.data) 

257 if "journal" in dat["bibjson"] and "start_page" in dat["bibjson"].get("journal", {}): 

258 dat["bibjson"]["start_page"] = dat["bibjson"]["journal"]["start_page"] 

259 del dat["bibjson"]["journal"]["start_page"] 

260 if "journal" in dat["bibjson"] and "end_page" in dat["bibjson"].get("journal", {}): 

261 dat["bibjson"]["end_page"] = dat["bibjson"]["journal"]["end_page"] 

262 del dat["bibjson"]["journal"]["end_page"] 

263 

264 # clear out fields that we don't accept via the API 

265 if "admin" in dat and "in_doaj" in dat["admin"]: 

266 del dat["admin"]["in_doaj"] 

267 if "admin" in dat and "upload_id" in dat["admin"]: 

268 del dat["admin"]["upload_id"] 

269 if "es_type" in dat: 

270 del dat["es_type"] 

271 

272 # the seal has been removed, but in case external users are still providing it, keeping 

273 # this data cleanup 

274 if "admin" in dat and "seal" in dat["admin"]: 

275 del dat["admin"]["seal"] 

276 

277 if existing is None: 

278 return models.Article(**dat) 

279 else: 

280 merged = dataobj.merge_outside_construct(self._struct, dat, existing.data) 

281 return models.Article(**merged) #~~->Article:Model~~ 

282 

283 

284class OutgoingArticleDO(dataobj.DataObj, swagger.SwaggerSupport): 

285 """ 

286 ~~APIOutgoingArticle:Model->DataObj:Library~~ 

287 """ 

288 def __init__(self, raw=None): 

289 self._add_struct(BASE_ARTICLE_STRUCT) 

290 self._add_struct(OUTGOING_ARTICLE_PATCH) 

291 super(OutgoingArticleDO, self).__init__(raw, construct_silent_prune=True, expose_data=True, coerce_map=BASE_ARTICLE_COERCE, swagger_trans=BASE_ARTICLE_SWAGGER_TRANS) 

292 

293 @classmethod 

294 def from_model(cls, am): 

295 assert isinstance(am, models.Article) #~~->Article:Model~~ 

296 dat = deepcopy(am.data) 

297 # Fix some inconsistencies with the model - start and end pages should be in bibjson 

298 if "start_page" in dat["bibjson"]: 

299 dat["bibjson"].get("journal", {})["start_page"] = dat["bibjson"]["start_page"] 

300 del dat["bibjson"]["start_page"] 

301 if "end_page" in dat["bibjson"]: 

302 dat["bibjson"].get("journal", {})["end_page"] = dat["bibjson"]["end_page"] 

303 del dat["bibjson"]["end_page"] 

304 return cls(dat) 

305 

306 @classmethod 

307 def from_model_by_id(cls, id_): 

308 a = models.Article.pull(id_) 

309 return cls.from_model(a)