Coverage for portality/api/current/data

1from portality.api.current.data_objects.common import _check_for_script

2from portality.lib import dataobj, swagger

3from portality import models, regex

4from portality.ui.messages import Messages

5from portality.util import normalise_issn

6from copy import deepcopy

7from portality.regex import DOI,DOI_COMPILED

9BASE_ARTICLE_STRUCT = {

10 "fields": {

11 "id": {"coerce": "unicode"}, # Note that we'll leave these in for ease of use by the

12 "created_date": {"coerce": "utcdatetime"}, # caller, but we'll need to ignore them on the conversion

13 "last_updated": {"coerce": "utcdatetime"}, # to the real object

14 "es_type": {"coerce": "unicode"}

15 },

16 "objects": ["admin", "bibjson"],

18 "structs": {

20 "admin": {

21 "fields": {

22 "in_doaj": {"coerce": "bool", "get__default": False},

23 "seal": {"coerce": "bool", "get__default": False},

24 "publisher_record_id": {"coerce": "unicode"},

25 "upload_id": {"coerce": "unicode"}

26 }

27 },

29 "bibjson": {

30 "fields": {

31 "title": {"coerce": "unicode"},

32 "year": {"coerce": "unicode"},

33 "month": {"coerce": "unicode"},

34 "abstract": {"coerce": "unicode"}

35 },

36 "lists": {

37 "identifier": {"contains": "object"},

38 "link": {"contains": "object"},

39 "author": {"contains": "object"},

40 "keywords": {"coerce": "unicode", "contains": "field"},

41 "subject": {"contains": "object"},

42 },

43 "objects": [

44 "journal",

45 ],

46 "structs": {

48 "identifier": {

49 "fields": {

50 "type": {"coerce": "unicode"},

51 "id": {"coerce": "unicode"}

52 }

53 },

54 # The base struct can't coerce url because we have bad data https://github.com/DOAJ/doajPM/issues/2038

55 # Leaving this here in case we want to reinstate in the future

56# "link": {

57# "fields": {

58# "type": {"coerce": "link_type"},

59# "url": {"coerce": "url"},

60# "content_type": {"coerce": "link_content_type"}

61# }

62# },

63 "author": {

64 "fields": {

65 "name": {"coerce": "unicode"},

66 "affiliation": {"coerce": "unicode"},

67 "orcid_id": {"coerce": "unicode"}

68 }

69 },

70 "journal": {

71 "fields": {

72 "start_page": {"coerce": "unicode"},

73 "end_page": {"coerce": "unicode"},

74 "volume": {"coerce": "unicode"},

75 "number": {"coerce": "unicode"},

76 "publisher": {"coerce": "unicode"},

77 "title": {"coerce": "unicode"},

78 "country": {"coerce": "unicode"}

79 },

80 "lists": {

81 "language": {"coerce": "unicode", "contains": "field"}

82 }

83 },

84 "subject": {

85 "fields": {

86 "scheme": {"coerce": "unicode"},

87 "term": {"coerce": "unicode"},

88 "code": {"coerce": "unicode"}

89 }

90 },

91 }

92 }

93 }

94}

96INCOMING_ARTICLE_REQUIRED = {

97 "required": ["bibjson"],

99 "structs": {

100 "bibjson": {

101 "required": [

102 "title",

103 # "author", # author no longer required

104 "identifier" # One type of identifier is required

105 ],

106 "structs": {

107

108 "identifier": {

109 "required": ["type", "id"]

110 },

111

112 "link": {

113 "required": ["type", "url"],

114 "fields": {

115 "type": {"coerce": "link_type"},

116 "url": {"coerce": "url"},

117 "content_type": {"coerce": "link_content_type"}

118 }

119 },

120

121 "author": {

122 "required": ["name"]

123 }

124 }

125 }

126 }

127}

128

129OUTGOING_ARTICLE_PATCH = {

130 "structs": {

131 "bibjson": {

132 "structs": {

133 "link": {

134 "required": ["type", "url"],

135 "fields": {

136 "type": {"coerce": "link_type"},

137 "url": {"coerce": "unicode"},

138 "content_type": {"coerce": "link_content_type"}

139 }

140 }

141 }

142 }

143 }

144}

145

146BASE_ARTICLE_COERCE = deepcopy(dataobj.DataObj.DEFAULT_COERCE)

147BASE_ARTICLE_COERCE["link_type"] = dataobj.string_canonicalise(["fulltext"], allow_fail=False)

148BASE_ARTICLE_COERCE["link_type_optional"] = dataobj.string_canonicalise(["fulltext"], allow_fail=True)

149BASE_ARTICLE_COERCE["link_content_type"] = dataobj.string_canonicalise(["PDF", "HTML", "ePUB", "XML"], allow_fail=True)

150

151BASE_ARTICLE_SWAGGER_TRANS = deepcopy(swagger.SwaggerSupport.DEFAULT_SWAGGER_TRANS)

152BASE_ARTICLE_SWAGGER_TRANS["link_type"] = {"type": "string", "format": "link_type"} # TODO extend swagger-ui with support for this format and let it produce example values etc. on the front-end

153BASE_ARTICLE_SWAGGER_TRANS["link_type_optional"] = {"type": "string", "format": "link_type_optional"} # TODO extend swagger-ui with support for this format and let it produce example values etc. on the front-end

154BASE_ARTICLE_SWAGGER_TRANS["link_content_type"] = {"type": "string", "format": "link_content_type"} # TODO extend swagger-ui with support for this format and let it produce example values etc. on the front-end

155

156

157class IncomingArticleDO(dataobj.DataObj, swagger.SwaggerSupport):

158 """

159 ~~APIIncomingArticle:Model->DataObj:Library~~

160 """

161 def __init__(self, raw=None):

162 self._add_struct(BASE_ARTICLE_STRUCT)

163 self._add_struct(INCOMING_ARTICLE_REQUIRED)

164 super(IncomingArticleDO, self).__init__(raw, construct_silent_prune=True, expose_data=True, coerce_map=BASE_ARTICLE_COERCE, swagger_trans=BASE_ARTICLE_SWAGGER_TRANS)

165

166 def _trim_empty_strings(self):

167

168 def _remove_element_if_empty_data(field):

169 if field in bibjson and bibjson[field] == "":

170 del bibjson[field]

171

172 def _remove_from_the_list_if_empty_data(bibjson_element, field=None):

173 if bibjson_element in bibjson:

174 for i in range(len(bibjson[bibjson_element]) - 1, -1, -1):

175 ide = bibjson[bibjson_element][i]

176 if field is not None:

177 if ide[field] == "":

178 bibjson[bibjson_element].remove(ide)

179 else:

180 if ide == "":

181 bibjson[bibjson_element].remove(ide)

182

183 bibjson = self.data["bibjson"]

184

185 _remove_element_if_empty_data("title")

186 _remove_element_if_empty_data("year")

187 _remove_element_if_empty_data("month")

188 _remove_element_if_empty_data("abstract")

189 _remove_from_the_list_if_empty_data("author", "name")

190 _remove_from_the_list_if_empty_data("subject", "term")

191 _remove_from_the_list_if_empty_data("identifier", "id")

192 _remove_from_the_list_if_empty_data("link", "url")

193 _remove_from_the_list_if_empty_data("keywords")

194

195 def custom_validate(self):

196 # only attempt to validate if this is not a blank object

197 if len(list(self.data.keys())) == 0:

198 return

199

200 # remove all fields with empty data ""

201 self._trim_empty_strings()

202

203 if _check_for_script(self.data):

204 raise dataobj.ScriptTagFoundException(Messages.EXCEPTION_SCRIPT_TAG_FOUND)

205

206 # at least one of print issn / e-issn, and they must be different

207 #

208 # check that there are identifiers at all

209 identifiers = self.bibjson.identifier

210 if identifiers is None or len(identifiers) == 0:

211 raise dataobj.DataStructureException("You must specify at least one of P-ISSN or E-ISSN in bibjson.identifier")

212

213 # extract the p/e-issn identifier objects

214 pissn = None

215 eissn = None

216 for ident in identifiers:

217 if ident.type == "pissn":

218 pissn = ident

219 elif ident.type == "eissn":

220 eissn = ident

221

222 # check that at least one of them appears

223 if pissn is None and eissn is None:

224 raise dataobj.DataStructureException("You must specify at least one of P-ISSN or E-ISSN in bibjson.identifier")

225

226 # normalise the ids

227 if pissn is not None:

228 pissn.id = normalise_issn(pissn.id)

229 if eissn is not None:

230 eissn.id = normalise_issn(eissn.id)

231

232 # check they are not the same

233 if pissn is not None and eissn is not None:

234 if pissn.id == eissn.id:

235 raise dataobj.DataStructureException("P-ISSN and E-ISSN should be different")

236

237

238 # check removed: https://github.com/DOAJ/doajPM/issues/2950

239 # if len(self.bibjson.keywords) > 6:

240 # raise dataobj.DataStructureException("bibjson.keywords may only contain a maximum of 6 keywords")

241

242 # check if orcid id is valid

243 for author in self.bibjson.author:

244 if author.orcid_id is not None and regex.ORCID_COMPILED.match(author.orcid_id) is None:

245 raise dataobj.DataStructureException("Invalid ORCID iD format. Please use url format, eg: https://orcid.org/0001-1111-1111-1111")

246

247 for x in self.bibjson.identifier:

248 if x.type == "doi":

249 if not DOI_COMPILED.match(x.id):

250 raise dataobj.DataStructureException(

251 "Invalid DOI format.")

252 break

253

254 def to_article_model(self, existing=None):

255 dat = deepcopy(self.data)

256 if "journal" in dat["bibjson"] and "start_page" in dat["bibjson"].get("journal", {}):

257 dat["bibjson"]["start_page"] = dat["bibjson"]["journal"]["start_page"]

258 del dat["bibjson"]["journal"]["start_page"]

259 if "journal" in dat["bibjson"] and "end_page" in dat["bibjson"].get("journal", {}):

260 dat["bibjson"]["end_page"] = dat["bibjson"]["journal"]["end_page"]

261 del dat["bibjson"]["journal"]["end_page"]

262

263 # clear out fields that we don't accept via the API

264 if "admin" in dat and "in_doaj" in dat["admin"]:

265 del dat["admin"]["in_doaj"]

266 if "admin" in dat and "seal" in dat["admin"]:

267 del dat["admin"]["seal"]

268 if "admin" in dat and "upload_id" in dat["admin"]:

269 del dat["admin"]["upload_id"]

270 if "es_type" in dat:

271 del dat["es_type"]

272

273 if existing is None:

274 return models.Article(**dat)

275 else:

276 merged = dataobj.merge_outside_construct(self._struct, dat, existing.data)

277 return models.Article(**merged) #~~->Article:Model~~

278

279

280class OutgoingArticleDO(dataobj.DataObj, swagger.SwaggerSupport):

281 """

282 ~~APIOutgoingArticle:Model->DataObj:Library~~

283 """

284 def __init__(self, raw=None):

285 self._add_struct(BASE_ARTICLE_STRUCT)

286 self._add_struct(OUTGOING_ARTICLE_PATCH)

287 super(OutgoingArticleDO, self).__init__(raw, construct_silent_prune=True, expose_data=True, coerce_map=BASE_ARTICLE_COERCE, swagger_trans=BASE_ARTICLE_SWAGGER_TRANS)

288

289 @classmethod

290 def from_model(cls, am):

291 assert isinstance(am, models.Article) #~~->Article:Model~~

292 dat = deepcopy(am.data)

293 # Fix some inconsistencies with the model - start and end pages should be in bibjson

294 if "start_page" in dat["bibjson"]:

295 dat["bibjson"].get("journal", {})["start_page"] = dat["bibjson"]["start_page"]

296 del dat["bibjson"]["start_page"]

297 if "end_page" in dat["bibjson"]:

298 dat["bibjson"].get("journal", {})["end_page"] = dat["bibjson"]["end_page"]

299 del dat["bibjson"]["end_page"]

300 return cls(dat)

301

302 @classmethod

303 def from_model_by_id(cls, id_):

304 a = models.Article.pull(id_)

305 return cls.from_model(a)

Coverage for portality/api/current/data_objects/article.py: 29%

119 statements