Coverage for portality/api/current/data

1from portality.api.current.data_objects.common import _check_for_script

2from portality.lib import dataobj, swagger

3from portality import models, regex, constants

4from portality.ui.messages import Messages

5from portality.util import normalise_issn

6from copy import deepcopy

7from portality.regex import DOI,DOI_COMPILED

9BASE_ARTICLE_STRUCT = {

10 "fields": {

11 "id": {"coerce": "unicode"}, # Note that we'll leave these in for ease of use by the

12 "created_date": {"coerce": "utcdatetime"}, # caller, but we'll need to ignore them on the conversion

13 "last_updated": {"coerce": "utcdatetime"}, # to the real object

14 "es_type": {"coerce": "unicode"}

15 },

16 "objects": ["admin", "bibjson"],

18 "structs": {

20 "admin": {

21 "fields": {

22 "in_doaj": {"coerce": "bool", "get__default": False},

23 "publisher_record_id": {"coerce": "unicode"},

24 "upload_id": {"coerce": "unicode"}

25 }

26 },

28 "bibjson": {

29 "fields": {

30 "title": {"coerce": "unicode"},

31 "year": {"coerce": "unicode"},

32 "month": {"coerce": "unicode"},

33 "abstract": {"coerce": "unicode"}

34 },

35 "lists": {

36 "identifier": {"contains": "object"},

37 "link": {"contains": "object"},

38 "author": {"contains": "object"},

39 "keywords": {"coerce": "unicode", "contains": "field"},

40 "subject": {"contains": "object"},

41 },

42 "objects": [

43 "journal",

44 ],

45 "structs": {

47 "identifier": {

48 "fields": {

49 "type": {"coerce": "unicode_lower", "allowed_values": constants.ALLOWED_ARTICLE_IDENT_TYPES},

50 "id": {"coerce": "unicode"}

51 }

52 },

53 # The base struct can't coerce url because we have bad data https://github.com/DOAJ/doajPM/issues/2038

54 # Leaving this here in case we want to reinstate in the future

55# "link": {

56# "fields": {

57# "type": {"coerce": "link_type"},

58# "url": {"coerce": "url"},

59# "content_type": {"coerce": "link_content_type"}

60# }

61# },

62 "author": {

63 "fields": {

64 "name": {"coerce": "unicode"},

65 "affiliation": {"coerce": "unicode"},

66 "orcid_id": {"coerce": "unicode"}

67 }

68 },

69 "journal": {

70 "fields": {

71 "start_page": {"coerce": "unicode"},

72 "end_page": {"coerce": "unicode"},

73 "volume": {"coerce": "unicode"},

74 "number": {"coerce": "unicode"},

75 "publisher": {"coerce": "unicode"},

76 "title": {"coerce": "unicode"},

77 "country": {"coerce": "unicode"}

78 },

79 "lists": {

80 "language": {"coerce": "unicode", "contains": "field"}

81 }

82 },

83 "subject": {

84 "fields": {

85 "scheme": {"coerce": "unicode"},

86 "term": {"coerce": "unicode"},

87 "code": {"coerce": "unicode"}

88 }

89 },

90 }

91 }

92 }

93}

95INCOMING_ARTICLE_REQUIRED = {

96 "required": ["bibjson"],

98 "structs": {

99 "bibjson": {

100 "required": [

101 "title",

102 # "author", # author no longer required

103 "identifier" # One type of identifier is required

104 ],

105 "structs": {

106

107 "identifier": {

108 "required": ["type", "id"]

109 },

110

111 "link": {

112 "required": ["type", "url"],

113 "fields": {

114 "type": {"coerce": "link_type"},

115 "url": {"coerce": "url"},

116 "content_type": {"coerce": "link_content_type"}

117 }

118 },

119

120 "author": {

121 "required": ["name"]

122 }

123 }

124 }

125 }

126}

127

128OUTGOING_ARTICLE_PATCH = {

129 "structs": {

130 "bibjson": {

131 "structs": {

132 "link": {

133 "required": ["type", "url"],

134 "fields": {

135 "type": {"coerce": "link_type"},

136 "url": {"coerce": "unicode"},

137 "content_type": {"coerce": "link_content_type"}

138 }

139 }

140 }

141 }

142 }

143}

144

145BASE_ARTICLE_COERCE = deepcopy(dataobj.DataObj.DEFAULT_COERCE)

146BASE_ARTICLE_COERCE["link_type"] = dataobj.string_canonicalise(["fulltext"], allow_fail=False)

147BASE_ARTICLE_COERCE["link_type_optional"] = dataobj.string_canonicalise(["fulltext"], allow_fail=True)

148BASE_ARTICLE_COERCE["link_content_type"] = dataobj.string_canonicalise(["PDF", "HTML", "ePUB", "XML"], allow_fail=True)

149

150BASE_ARTICLE_SWAGGER_TRANS = deepcopy(swagger.SwaggerSupport.DEFAULT_SWAGGER_TRANS)

151BASE_ARTICLE_SWAGGER_TRANS["link_type"] = {"type": "string", "format": "link_type"} # TODO extend swagger-ui with support for this format and let it produce example values etc. on the front-end

152BASE_ARTICLE_SWAGGER_TRANS["link_type_optional"] = {"type": "string", "format": "link_type_optional"} # TODO extend swagger-ui with support for this format and let it produce example values etc. on the front-end

153BASE_ARTICLE_SWAGGER_TRANS["link_content_type"] = {"type": "string", "format": "link_content_type"} # TODO extend swagger-ui with support for this format and let it produce example values etc. on the front-end

154

155

156class IncomingArticleDO(dataobj.DataObj, swagger.SwaggerSupport):

157 """

158 ~~APIIncomingArticle:Model->DataObj:Library~~

159 """

160 def __init__(self, raw=None):

161 self._add_struct(BASE_ARTICLE_STRUCT)

162 self._add_struct(INCOMING_ARTICLE_REQUIRED)

163 super(IncomingArticleDO, self).__init__(raw, construct_silent_prune=True, expose_data=True,

164 coerce_map=BASE_ARTICLE_COERCE,

165 swagger_trans=BASE_ARTICLE_SWAGGER_TRANS)

166

167 def _trim_empty_strings(self):

168

169 def _remove_element_if_empty_data(field):

170 if field in bibjson and bibjson[field] == "":

171 del bibjson[field]

172

173 def _remove_from_the_list_if_empty_data(bibjson_element, field=None):

174 if bibjson_element in bibjson:

175 for i in range(len(bibjson[bibjson_element]) - 1, -1, -1):

176 ide = bibjson[bibjson_element][i]

177 if field is not None:

178 if ide.get(field,"") == "":

179 bibjson[bibjson_element].remove(ide)

180 else:

181 if ide == "":

182 bibjson[bibjson_element].remove(ide)

183

184 bibjson = self.data["bibjson"]

185

186 _remove_element_if_empty_data("title")

187 _remove_element_if_empty_data("year")

188 _remove_element_if_empty_data("month")

189 _remove_element_if_empty_data("abstract")

190 _remove_from_the_list_if_empty_data("author", "name")

191 _remove_from_the_list_if_empty_data("subject", "term")

192 _remove_from_the_list_if_empty_data("identifier", "id")

193 _remove_from_the_list_if_empty_data("link", "url")

194 _remove_from_the_list_if_empty_data("keywords")

195

196 def custom_validate(self):

197 # only attempt to validate if this is not a blank object

198 if len(list(self.data.keys())) == 0:

199 return

200

201 # remove all fields with empty data ""

202 self._trim_empty_strings()

203

204 if _check_for_script(self.data):

205 raise dataobj.ScriptTagFoundException(Messages.EXCEPTION_SCRIPT_TAG_FOUND)

206

207 # at least one of print issn / e-issn, and they must be different

208 #

209 # check that there are identifiers at all

210 identifiers = self.bibjson.identifier

211 if identifiers is None or len(identifiers) == 0:

212 raise dataobj.DataStructureException("You must specify at least one Print ISSN or online ISSN in bibjson.identifier")

213

214 # extract the p/e-issn identifier objects

215 pissn = None

216 eissn = None

217 for ident in identifiers:

218 if ident.type == "pissn":

219 pissn = ident

220 elif ident.type == "eissn":

221 eissn = ident

222

223 # check that at least one of them appears

224 if pissn is None and eissn is None:

225 raise dataobj.DataStructureException("You must specify at least one Print ISSN or online ISSN in bibjson.identifier")

226

227 # normalise the ids

228 if pissn is not None:

229 pissn.id = normalise_issn(pissn.id)

230 if eissn is not None:

231 eissn.id = normalise_issn(eissn.id)

232

233 # check they are not the same

234 if pissn is not None and eissn is not None:

235 if pissn.id == eissn.id:

236 raise dataobj.DataStructureException("Print ISSN and online ISSN should be different")

237

238

239 # check removed: https://github.com/DOAJ/doajPM/issues/2950

240 # if len(self.bibjson.keywords) > 6:

241 # raise dataobj.DataStructureException("bibjson.keywords may only contain a maximum of 6 keywords")

242

243 # check if orcid id is valid

244 for author in self.bibjson.author:

245 if author.orcid_id is not None and regex.ORCID_COMPILED.match(author.orcid_id) is None:

246 raise dataobj.DataStructureException("Invalid ORCID iD. Please enter your ORCID iD structured as: https://orcid.org/0000-0000-0000-0000. URLs must start with https.")

247

248 for x in self.bibjson.identifier:

249 if x.type == "doi":

250 if not DOI_COMPILED.match(x.id):

251 raise dataobj.DataStructureException(

252 "Invalid DOI format.")

253 break

254

255 def to_article_model(self, existing=None):

256 dat = deepcopy(self.data)

257 if "journal" in dat["bibjson"] and "start_page" in dat["bibjson"].get("journal", {}):

258 dat["bibjson"]["start_page"] = dat["bibjson"]["journal"]["start_page"]

259 del dat["bibjson"]["journal"]["start_page"]

260 if "journal" in dat["bibjson"] and "end_page" in dat["bibjson"].get("journal", {}):

261 dat["bibjson"]["end_page"] = dat["bibjson"]["journal"]["end_page"]

262 del dat["bibjson"]["journal"]["end_page"]

263

264 # clear out fields that we don't accept via the API

265 if "admin" in dat and "in_doaj" in dat["admin"]:

266 del dat["admin"]["in_doaj"]

267 if "admin" in dat and "upload_id" in dat["admin"]:

268 del dat["admin"]["upload_id"]

269 if "es_type" in dat:

270 del dat["es_type"]

271

272 # the seal has been removed, but in case external users are still providing it, keeping

273 # this data cleanup

274 if "admin" in dat and "seal" in dat["admin"]:

275 del dat["admin"]["seal"]

276

277 if existing is None:

278 return models.Article(**dat)

279 else:

280 merged = dataobj.merge_outside_construct(self._struct, dat, existing.data)

281 return models.Article(**merged) #~~->Article:Model~~

282

283

284class OutgoingArticleDO(dataobj.DataObj, swagger.SwaggerSupport):

285 """

286 ~~APIOutgoingArticle:Model->DataObj:Library~~

287 """

288 def __init__(self, raw=None):

289 self._add_struct(BASE_ARTICLE_STRUCT)

290 self._add_struct(OUTGOING_ARTICLE_PATCH)

291 super(OutgoingArticleDO, self).__init__(raw, construct_silent_prune=True, expose_data=True, coerce_map=BASE_ARTICLE_COERCE, swagger_trans=BASE_ARTICLE_SWAGGER_TRANS)

292

293 @classmethod

294 def from_model(cls, am):

295 assert isinstance(am, models.Article) #~~->Article:Model~~

296 dat = deepcopy(am.data)

297 # Fix some inconsistencies with the model - start and end pages should be in bibjson

298 if "start_page" in dat["bibjson"]:

299 dat["bibjson"].get("journal", {})["start_page"] = dat["bibjson"]["start_page"]

300 del dat["bibjson"]["start_page"]

301 if "end_page" in dat["bibjson"]:

302 dat["bibjson"].get("journal", {})["end_page"] = dat["bibjson"]["end_page"]

303 del dat["bibjson"]["end_page"]

304 return cls(dat)

305

306 @classmethod

307 def from_model_by_id(cls, id_):

308 a = models.Article.pull(id_)

309 return cls.from_model(a)

Coverage for portality / api / current / data_objects / article.py: 97%

119 statements