Coverage for portality / api / current / bulk / articles.py: 84%

99 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-05-04 09:41 +0100

1# ~~APIBulkArticles:Feature->APIBulk:Feature~~ 

2import warnings 

3from copy import deepcopy 

4from typing import List, Dict 

5 

6from portality import models 

7from portality.api.common import Api, Api404Error, Api400Error, Api403Error, Api401Error 

8from portality.api.current.crud import ArticlesCrudApi 

9from portality.bll import DOAJ 

10from portality.bll import exceptions 

11from portality.bll.exceptions import DuplicateArticleException 

12from portality.models import BulkArticles 

13from portality.tasks.article_bulk_create import ArticleBulkCreateBackgroundTask 

14 

15 

16class ArticlesBulkApi(Api): 

17 # ~~->Swagger:Feature~~ 

18 # ~~->API:Documentation~~ 

19 SWAG_TAG = 'Bulk API' 

20 

21 @classmethod 

22 def create_swag(cls): 

23 template = deepcopy(cls.SWAG_TEMPLATE) 

24 template['parameters'].append( 

25 { 

26 "description": "<div class=\"search-query-docs\">A list/array of article JSON objects that you would like to create or update. The contents should be a list, and each object in the list should comply with the schema displayed in the 'GET (Retrieve) an article route' below. Partial updates are not allowed; you have to supply the full JSON.</div>", 

27 "required": True, 

28 "schema": {"type": "string"}, 

29 "name": "article_json", 

30 "in": "body" 

31 } 

32 ) 

33 template['parameters'].append(cls.SWAG_API_KEY_REQ_PARAM) 

34 template['responses']['201'] = cls.R201_BULK 

35 template['responses']['400'] = cls.R400 

36 template['responses']['401'] = cls.R401 

37 template['responses']['403'] = cls.R403 

38 return cls._build_swag_response(template) 

39 

40 @classmethod 

41 def create(cls, articles, account): 

42 warnings.warn("This method is deprecated, use create_async instead", DeprecationWarning) 

43 # We run through the articles once, validating in dry-run mode 

44 # and deduplicating as we go. Then we .save() everything once 

45 # we know all incoming articles are valid. 

46 

47 # as long as authentication (in the layer above) has been successful, and the account exists, then 

48 # we are good to proceed 

49 if account is None: 

50 raise Api401Error() 

51 

52 # convert the data into a suitable article models 

53 articles = [ArticlesCrudApi.prep_article_for_api(data, account) for data in articles] 

54 

55 # ~~->Article:Service~~ 

56 articleService = DOAJ.articleService() 

57 try: 

58 # ~~->BatchCreateArticles:Feature~~ 

59 result = articleService.batch_create_articles(articles, account, add_journal_info=True) 

60 return [a.id for a in articles] 

61 except DuplicateArticleException as e: 

62 raise Api403Error(str(e)) 

63 except exceptions.IngestException as e: 

64 raise Api400Error(str(e)) 

65 except exceptions.ArticleNotAcceptable as e: 

66 raise Api400Error(str(e)) 

67 

68 @classmethod 

69 def create_async_swag(cls): 

70 template = deepcopy(cls.SWAG_TEMPLATE) 

71 template['parameters'].append( 

72 { 

73 "description": "<div class=\"search-query-docs\"><p>A list/array of article JSON objects that you would like to create or update. The contents should be a list, and each object in the list should comply with the schema displayed in the 'GET (Retrieve) an article route' below. Partial updates are not allowed; you have to supply the full JSON.</p><p>This request is asynchronous; the response will contain an upload_id. You can use this id to query the task status.</p></div>", 

74 "required": True, 

75 "schema": {"type": "string"}, 

76 "name": "article_json", 

77 "in": "body" 

78 } 

79 ) 

80 template['parameters'].append(cls.SWAG_API_KEY_REQ_PARAM) 

81 

82 template['responses']['202'] = { 

83 "schema": { 

84 "properties": { 

85 "msg": {"type": "string", }, 

86 "upload_id": {"type": "string", 

87 "description": "The upload id of the task, " 

88 "User can use this ID to check the bulk upload status."}, 

89 "status": {"type": "string", "description": "Link to the status URL for the task"} 

90 }, 

91 "type": "object" 

92 }, 

93 "description": "Resources are being created asynchronously; response contains the task IDs " 

94 } 

95 template['responses']['400'] = cls.R400 

96 return cls._build_swag_response(template) 

97 

98 @classmethod 

99 def create_async(cls, income_articles: List[Dict], account: models.Account): 

100 job = ArticleBulkCreateBackgroundTask.prepare(account.id, incoming_articles=income_articles) 

101 ArticleBulkCreateBackgroundTask.submit(job) 

102 upload_id = next(v for k, v in job.params.items() if k.endswith('__upload_id')) 

103 return upload_id 

104 

105 @classmethod 

106 def get_async_status_swag(cls): 

107 template = deepcopy(cls.SWAG_TEMPLATE) 

108 template['parameters'].append( 

109 { 

110 "description": "<div class=\"search-query-docs\">The upload id of the task, " 

111 "User can use this id to check the bulk upload status.</div>", 

112 "required": True, 

113 "name": "upload_id", 

114 "type": "string", 

115 "in": "path", 

116 } 

117 ) 

118 template['parameters'].append(cls.SWAG_API_KEY_REQ_PARAM) 

119 template['responses']['200'] = { 

120 "description": "Return status of upload ids", 

121 "schema": { 

122 "type": "object", 

123 "properties": { 

124 "id": { 

125 "type": "string", 

126 "description": "The status of the task", 

127 }, 

128 "status": { 

129 "type": "string", 

130 "description": "The status of the task", 

131 "enum": ["pending", "validated", "failed", "processed", "processed_partial"] 

132 }, 

133 "results": { 

134 'type': 'object', 

135 'description': 'The result of the upload', 

136 "properties": { 

137 "imported": { 

138 "type": "integer", 

139 "description": "The number of articles imported", 

140 }, 

141 "failed": { 

142 "type": "integer", 

143 "description": "The number of articles failed to import", 

144 }, 

145 "update": { 

146 "type": "integer", 

147 "description": "The number of articles updated", 

148 }, 

149 "new": { 

150 "type": "integer", 

151 "description": "The number of articles created", 

152 }, 

153 }, 

154 } 

155 

156 }, 

157 }, 

158 } 

159 template['responses']['400'] = { 

160 "description": "Fail get status reason", 

161 "schema": { 

162 "type": "object", 

163 "properties": { 

164 "msg": {"type": "string", "description": "The error message"}, 

165 }, 

166 }, 

167 } 

168 return cls._build_swag_response(template) 

169 

170 @classmethod 

171 def get_async_status(cls, current_user_id, upload_id=None, ) -> Dict: 

172 if not upload_id: 

173 raise Api400Error("upload_id is required") 

174 

175 bulk_article = BulkArticles.pull(upload_id) 

176 if bulk_article is None or bulk_article.owner != current_user_id: 

177 raise Api400Error("upload_id is invalid") 

178 

179 internal_external_status_map = { 

180 "incoming": "pending", 

181 "partial": "processed_partial" 

182 } 

183 

184 status = { 

185 "id": upload_id, 

186 "created": bulk_article.created_date, 

187 'status': internal_external_status_map.get(bulk_article.status, bulk_article.status), 

188 } 

189 

190 if bulk_article.status in ["processed", "partial"]: 

191 status['results'] = { 

192 "imported": bulk_article.imported, 

193 "failed": bulk_article.failed_imports, 

194 "update": bulk_article.updates, 

195 "new": bulk_article.new, 

196 } 

197 

198 if bulk_article.error: 

199 status['error'] = bulk_article.error 

200 

201 if bulk_article.error_details: 

202 status['error_details'] = bulk_article.error_details 

203 

204 if bulk_article.failure_reasons: 

205 status['failure_reasons'] = bulk_article.failure_reasons 

206 

207 return status 

208 

209 @classmethod 

210 def delete_swag(cls): 

211 template = deepcopy(cls.SWAG_TEMPLATE) 

212 template['parameters'].append( 

213 { 

214 "description": "<div class=\"search-query-docs\">A list/array of DOAJ article IDs. E.g. [\"4cf8b72139a749c88d043129f00e1b07\", \"232b53726fb74cc4a8eb4717e5a43193\"].</div>", 

215 "required": True, 

216 "schema": {"type": "string"}, 

217 "name": "article_ids", 

218 "in": "body" 

219 } 

220 ) 

221 template['parameters'].append(cls.SWAG_API_KEY_REQ_PARAM) 

222 template['responses']['204'] = cls.R204 

223 template['responses']['400'] = cls.R400 

224 template['responses']['401'] = cls.R401 

225 return cls._build_swag_response(template) 

226 

227 @classmethod 

228 def delete(cls, article_ids, account): 

229 # we run through delete twice, once as a dry-run and the second time 

230 # as the real deal 

231 # ~~->APICrudArticles:Feature~~ 

232 for id in article_ids: 

233 try: 

234 ArticlesCrudApi.delete(id, account, dry_run=True) 

235 except Api404Error as e: 

236 raise Api400Error("Id {x} does not exist or does not belong to this user account".format(x=id)) 

237 except Api403Error as e: 

238 raise Api400Error("Id {x} is not in a state which allows it to be deleted".format(x=id)) 

239 

240 for id in article_ids: 

241 ArticlesCrudApi.delete(id, account)