Coverage for portality/api/current/data_objects/article.py: 97%

119 statements  

« prev     ^ index     » next       coverage.py v6.4.2, created at 2022-07-22 15:59 +0100

1from portality.api.current.data_objects.common import _check_for_script 

2from portality.lib import dataobj, swagger 

3from portality import models, regex 

4from portality.ui.messages import Messages 

5from portality.util import normalise_issn 

6from copy import deepcopy 

7from portality.regex import DOI,DOI_COMPILED 

8 

9BASE_ARTICLE_STRUCT = { 

10 "fields": { 

11 "id": {"coerce": "unicode"}, # Note that we'll leave these in for ease of use by the 

12 "created_date": {"coerce": "utcdatetime"}, # caller, but we'll need to ignore them on the conversion 

13 "last_updated": {"coerce": "utcdatetime"}, # to the real object 

14 "es_type": {"coerce": "unicode"} 

15 }, 

16 "objects": ["admin", "bibjson"], 

17 

18 "structs": { 

19 

20 "admin": { 

21 "fields": { 

22 "in_doaj": {"coerce": "bool", "get__default": False}, 

23 "seal": {"coerce": "bool", "get__default": False}, 

24 "publisher_record_id": {"coerce": "unicode"}, 

25 "upload_id": {"coerce": "unicode"} 

26 } 

27 }, 

28 

29 "bibjson": { 

30 "fields": { 

31 "title": {"coerce": "unicode"}, 

32 "year": {"coerce": "unicode"}, 

33 "month": {"coerce": "unicode"}, 

34 "abstract": {"coerce": "unicode"} 

35 }, 

36 "lists": { 

37 "identifier": {"contains": "object"}, 

38 "link": {"contains": "object"}, 

39 "author": {"contains": "object"}, 

40 "keywords": {"coerce": "unicode", "contains": "field"}, 

41 "subject": {"contains": "object"}, 

42 }, 

43 "objects": [ 

44 "journal", 

45 ], 

46 "structs": { 

47 

48 "identifier": { 

49 "fields": { 

50 "type": {"coerce": "unicode"}, 

51 "id": {"coerce": "unicode"} 

52 } 

53 }, 

54 # The base struct can't coerce url because we have bad data https://github.com/DOAJ/doajPM/issues/2038 

55 # Leaving this here in case we want to reinstate in the future 

56# "link": { 

57# "fields": { 

58# "type": {"coerce": "link_type"}, 

59# "url": {"coerce": "url"}, 

60# "content_type": {"coerce": "link_content_type"} 

61# } 

62# }, 

63 "author": { 

64 "fields": { 

65 "name": {"coerce": "unicode"}, 

66 "affiliation": {"coerce": "unicode"}, 

67 "orcid_id": {"coerce": "unicode"} 

68 } 

69 }, 

70 "journal": { 

71 "fields": { 

72 "start_page": {"coerce": "unicode"}, 

73 "end_page": {"coerce": "unicode"}, 

74 "volume": {"coerce": "unicode"}, 

75 "number": {"coerce": "unicode"}, 

76 "publisher": {"coerce": "unicode"}, 

77 "title": {"coerce": "unicode"}, 

78 "country": {"coerce": "unicode"} 

79 }, 

80 "lists": { 

81 "language": {"coerce": "unicode", "contains": "field"} 

82 } 

83 }, 

84 "subject": { 

85 "fields": { 

86 "scheme": {"coerce": "unicode"}, 

87 "term": {"coerce": "unicode"}, 

88 "code": {"coerce": "unicode"} 

89 } 

90 }, 

91 } 

92 } 

93 } 

94} 

95 

96INCOMING_ARTICLE_REQUIRED = { 

97 "required": ["bibjson"], 

98 

99 "structs": { 

100 "bibjson": { 

101 "required": [ 

102 "title", 

103 # "author", # author no longer required 

104 "identifier" # One type of identifier is required 

105 ], 

106 "structs": { 

107 

108 "identifier": { 

109 "required": ["type", "id"] 

110 }, 

111 

112 "link": { 

113 "required": ["type", "url"], 

114 "fields": { 

115 "type": {"coerce": "link_type"}, 

116 "url": {"coerce": "url"}, 

117 "content_type": {"coerce": "link_content_type"} 

118 } 

119 }, 

120 

121 "author": { 

122 "required": ["name"] 

123 } 

124 } 

125 } 

126 } 

127} 

128 

129OUTGOING_ARTICLE_PATCH = { 

130 "structs": { 

131 "bibjson": { 

132 "structs": { 

133 "link": { 

134 "required": ["type", "url"], 

135 "fields": { 

136 "type": {"coerce": "link_type"}, 

137 "url": {"coerce": "unicode"}, 

138 "content_type": {"coerce": "link_content_type"} 

139 } 

140 } 

141 } 

142 } 

143 } 

144} 

145 

146BASE_ARTICLE_COERCE = deepcopy(dataobj.DataObj.DEFAULT_COERCE) 

147BASE_ARTICLE_COERCE["link_type"] = dataobj.string_canonicalise(["fulltext"], allow_fail=False) 

148BASE_ARTICLE_COERCE["link_type_optional"] = dataobj.string_canonicalise(["fulltext"], allow_fail=True) 

149BASE_ARTICLE_COERCE["link_content_type"] = dataobj.string_canonicalise(["PDF", "HTML", "ePUB", "XML"], allow_fail=True) 

150 

151BASE_ARTICLE_SWAGGER_TRANS = deepcopy(swagger.SwaggerSupport.DEFAULT_SWAGGER_TRANS) 

152BASE_ARTICLE_SWAGGER_TRANS["link_type"] = {"type": "string", "format": "link_type"} # TODO extend swagger-ui with support for this format and let it produce example values etc. on the front-end 

153BASE_ARTICLE_SWAGGER_TRANS["link_type_optional"] = {"type": "string", "format": "link_type_optional"} # TODO extend swagger-ui with support for this format and let it produce example values etc. on the front-end 

154BASE_ARTICLE_SWAGGER_TRANS["link_content_type"] = {"type": "string", "format": "link_content_type"} # TODO extend swagger-ui with support for this format and let it produce example values etc. on the front-end 

155 

156 

157class IncomingArticleDO(dataobj.DataObj, swagger.SwaggerSupport): 

158 """ 

159 ~~APIIncomingArticle:Model->DataObj:Library~~ 

160 """ 

161 def __init__(self, raw=None): 

162 self._add_struct(BASE_ARTICLE_STRUCT) 

163 self._add_struct(INCOMING_ARTICLE_REQUIRED) 

164 super(IncomingArticleDO, self).__init__(raw, construct_silent_prune=True, expose_data=True, coerce_map=BASE_ARTICLE_COERCE, swagger_trans=BASE_ARTICLE_SWAGGER_TRANS) 

165 

166 def _trim_empty_strings(self): 

167 

168 def _remove_element_if_empty_data(field): 

169 if field in bibjson and bibjson[field] == "": 

170 del bibjson[field] 

171 

172 def _remove_from_the_list_if_empty_data(bibjson_element, field=None): 

173 if bibjson_element in bibjson: 

174 for i in range(len(bibjson[bibjson_element]) - 1, -1, -1): 

175 ide = bibjson[bibjson_element][i] 

176 if field is not None: 

177 if ide[field] == "": 

178 bibjson[bibjson_element].remove(ide) 

179 else: 

180 if ide == "": 

181 bibjson[bibjson_element].remove(ide) 

182 

183 bibjson = self.data["bibjson"] 

184 

185 _remove_element_if_empty_data("title") 

186 _remove_element_if_empty_data("year") 

187 _remove_element_if_empty_data("month") 

188 _remove_element_if_empty_data("abstract") 

189 _remove_from_the_list_if_empty_data("author", "name") 

190 _remove_from_the_list_if_empty_data("subject", "term") 

191 _remove_from_the_list_if_empty_data("identifier", "id") 

192 _remove_from_the_list_if_empty_data("link", "url") 

193 _remove_from_the_list_if_empty_data("keywords") 

194 

195 def custom_validate(self): 

196 # only attempt to validate if this is not a blank object 

197 if len(list(self.data.keys())) == 0: 

198 return 

199 

200 # remove all fields with empty data "" 

201 self._trim_empty_strings() 

202 

203 if _check_for_script(self.data): 

204 raise dataobj.ScriptTagFoundException(Messages.EXCEPTION_SCRIPT_TAG_FOUND) 

205 

206 # at least one of print issn / e-issn, and they must be different 

207 # 

208 # check that there are identifiers at all 

209 identifiers = self.bibjson.identifier 

210 if identifiers is None or len(identifiers) == 0: 

211 raise dataobj.DataStructureException("You must specify at least one of P-ISSN or E-ISSN in bibjson.identifier") 

212 

213 # extract the p/e-issn identifier objects 

214 pissn = None 

215 eissn = None 

216 for ident in identifiers: 

217 if ident.type == "pissn": 

218 pissn = ident 

219 elif ident.type == "eissn": 

220 eissn = ident 

221 

222 # check that at least one of them appears 

223 if pissn is None and eissn is None: 

224 raise dataobj.DataStructureException("You must specify at least one of P-ISSN or E-ISSN in bibjson.identifier") 

225 

226 # normalise the ids 

227 if pissn is not None: 

228 pissn.id = normalise_issn(pissn.id) 

229 if eissn is not None: 

230 eissn.id = normalise_issn(eissn.id) 

231 

232 # check they are not the same 

233 if pissn is not None and eissn is not None: 

234 if pissn.id == eissn.id: 

235 raise dataobj.DataStructureException("P-ISSN and E-ISSN should be different") 

236 

237 

238 # check removed: https://github.com/DOAJ/doajPM/issues/2950 

239 # if len(self.bibjson.keywords) > 6: 

240 # raise dataobj.DataStructureException("bibjson.keywords may only contain a maximum of 6 keywords") 

241 

242 # check if orcid id is valid 

243 for author in self.bibjson.author: 

244 if author.orcid_id is not None and regex.ORCID_COMPILED.match(author.orcid_id) is None: 

245 raise dataobj.DataStructureException("Invalid ORCID iD format. Please use url format, eg: https://orcid.org/0001-1111-1111-1111") 

246 

247 for x in self.bibjson.identifier: 

248 if x.type == "doi": 

249 if not DOI_COMPILED.match(x.id): 

250 raise dataobj.DataStructureException( 

251 "Invalid DOI format.") 

252 break 

253 

254 def to_article_model(self, existing=None): 

255 dat = deepcopy(self.data) 

256 if "journal" in dat["bibjson"] and "start_page" in dat["bibjson"].get("journal", {}): 

257 dat["bibjson"]["start_page"] = dat["bibjson"]["journal"]["start_page"] 

258 del dat["bibjson"]["journal"]["start_page"] 

259 if "journal" in dat["bibjson"] and "end_page" in dat["bibjson"].get("journal", {}): 

260 dat["bibjson"]["end_page"] = dat["bibjson"]["journal"]["end_page"] 

261 del dat["bibjson"]["journal"]["end_page"] 

262 

263 # clear out fields that we don't accept via the API 

264 if "admin" in dat and "in_doaj" in dat["admin"]: 

265 del dat["admin"]["in_doaj"] 

266 if "admin" in dat and "seal" in dat["admin"]: 

267 del dat["admin"]["seal"] 

268 if "admin" in dat and "upload_id" in dat["admin"]: 

269 del dat["admin"]["upload_id"] 

270 if "es_type" in dat: 

271 del dat["es_type"] 

272 

273 if existing is None: 

274 return models.Article(**dat) 

275 else: 

276 merged = dataobj.merge_outside_construct(self._struct, dat, existing.data) 

277 return models.Article(**merged) #~~->Article:Model~~ 

278 

279 

280class OutgoingArticleDO(dataobj.DataObj, swagger.SwaggerSupport): 

281 """ 

282 ~~APIOutgoingArticle:Model->DataObj:Library~~ 

283 """ 

284 def __init__(self, raw=None): 

285 self._add_struct(BASE_ARTICLE_STRUCT) 

286 self._add_struct(OUTGOING_ARTICLE_PATCH) 

287 super(OutgoingArticleDO, self).__init__(raw, construct_silent_prune=True, expose_data=True, coerce_map=BASE_ARTICLE_COERCE, swagger_trans=BASE_ARTICLE_SWAGGER_TRANS) 

288 

289 @classmethod 

290 def from_model(cls, am): 

291 assert isinstance(am, models.Article) #~~->Article:Model~~ 

292 dat = deepcopy(am.data) 

293 # Fix some inconsistencies with the model - start and end pages should be in bibjson 

294 if "start_page" in dat["bibjson"]: 

295 dat["bibjson"].get("journal", {})["start_page"] = dat["bibjson"]["start_page"] 

296 del dat["bibjson"]["start_page"] 

297 if "end_page" in dat["bibjson"]: 

298 dat["bibjson"].get("journal", {})["end_page"] = dat["bibjson"]["end_page"] 

299 del dat["bibjson"]["end_page"] 

300 return cls(dat) 

301 

302 @classmethod 

303 def from_model_by_id(cls, id_): 

304 a = models.Article.pull(id_) 

305 return cls.from_model(a)