Coverage for portality / api / current / bulk / articles.py: 84%
99 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-05 00:09 +0100
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-05 00:09 +0100
1# ~~APIBulkArticles:Feature->APIBulk:Feature~~
2import warnings
3from copy import deepcopy
4from typing import List, Dict
6from portality import models
7from portality.api.common import Api, Api404Error, Api400Error, Api403Error, Api401Error
8from portality.api.current.crud import ArticlesCrudApi
9from portality.bll import DOAJ
10from portality.bll import exceptions
11from portality.bll.exceptions import DuplicateArticleException
12from portality.models import BulkArticles
13from portality.tasks.article_bulk_create import ArticleBulkCreateBackgroundTask
16class ArticlesBulkApi(Api):
17 # ~~->Swagger:Feature~~
18 # ~~->API:Documentation~~
19 SWAG_TAG = 'Bulk API'
21 @classmethod
22 def create_swag(cls):
23 template = deepcopy(cls.SWAG_TEMPLATE)
24 template['parameters'].append(
25 {
26 "description": "<div class=\"search-query-docs\">A list/array of article JSON objects that you would like to create or update. The contents should be a list, and each object in the list should comply with the schema displayed in the 'GET (Retrieve) an article route' below. Partial updates are not allowed; you have to supply the full JSON.</div>",
27 "required": True,
28 "schema": {"type": "string"},
29 "name": "article_json",
30 "in": "body"
31 }
32 )
33 template['parameters'].append(cls.SWAG_API_KEY_REQ_PARAM)
34 template['responses']['201'] = cls.R201_BULK
35 template['responses']['400'] = cls.R400
36 template['responses']['401'] = cls.R401
37 template['responses']['403'] = cls.R403
38 return cls._build_swag_response(template)
40 @classmethod
41 def create(cls, articles, account):
42 warnings.warn("This method is deprecated, use create_async instead", DeprecationWarning)
43 # We run through the articles once, validating in dry-run mode
44 # and deduplicating as we go. Then we .save() everything once
45 # we know all incoming articles are valid.
47 # as long as authentication (in the layer above) has been successful, and the account exists, then
48 # we are good to proceed
49 if account is None:
50 raise Api401Error()
52 # convert the data into a suitable article models
53 articles = [ArticlesCrudApi.prep_article_for_api(data, account) for data in articles]
55 # ~~->Article:Service~~
56 articleService = DOAJ.articleService()
57 try:
58 # ~~->BatchCreateArticles:Feature~~
59 result = articleService.batch_create_articles(articles, account, add_journal_info=True)
60 return [a.id for a in articles]
61 except DuplicateArticleException as e:
62 raise Api403Error(str(e))
63 except exceptions.IngestException as e:
64 raise Api400Error(str(e))
65 except exceptions.ArticleNotAcceptable as e:
66 raise Api400Error(str(e))
68 @classmethod
69 def create_async_swag(cls):
70 template = deepcopy(cls.SWAG_TEMPLATE)
71 template['parameters'].append(
72 {
73 "description": "<div class=\"search-query-docs\"><p>A list/array of article JSON objects that you would like to create or update. The contents should be a list, and each object in the list should comply with the schema displayed in the 'GET (Retrieve) an article route' below. Partial updates are not allowed; you have to supply the full JSON.</p><p>This request is asynchronous; the response will contain an upload_id. You can use this id to query the task status.</p></div>",
74 "required": True,
75 "schema": {"type": "string"},
76 "name": "article_json",
77 "in": "body"
78 }
79 )
80 template['parameters'].append(cls.SWAG_API_KEY_REQ_PARAM)
82 template['responses']['202'] = {
83 "schema": {
84 "properties": {
85 "msg": {"type": "string", },
86 "upload_id": {"type": "string",
87 "description": "The upload id of the task, "
88 "User can use this ID to check the bulk upload status."},
89 "status": {"type": "string", "description": "Link to the status URL for the task"}
90 },
91 "type": "object"
92 },
93 "description": "Resources are being created asynchronously; response contains the task IDs "
94 }
95 template['responses']['400'] = cls.R400
96 return cls._build_swag_response(template)
98 @classmethod
99 def create_async(cls, income_articles: List[Dict], account: models.Account):
100 job = ArticleBulkCreateBackgroundTask.prepare(account.id, incoming_articles=income_articles)
101 ArticleBulkCreateBackgroundTask.submit(job)
102 upload_id = next(v for k, v in job.params.items() if k.endswith('__upload_id'))
103 return upload_id
105 @classmethod
106 def get_async_status_swag(cls):
107 template = deepcopy(cls.SWAG_TEMPLATE)
108 template['parameters'].append(
109 {
110 "description": "<div class=\"search-query-docs\">The upload id of the task, "
111 "User can use this id to check the bulk upload status.</div>",
112 "required": True,
113 "name": "upload_id",
114 "type": "string",
115 "in": "path",
116 }
117 )
118 template['parameters'].append(cls.SWAG_API_KEY_REQ_PARAM)
119 template['responses']['200'] = {
120 "description": "Return status of upload ids",
121 "schema": {
122 "type": "object",
123 "properties": {
124 "id": {
125 "type": "string",
126 "description": "The status of the task",
127 },
128 "status": {
129 "type": "string",
130 "description": "The status of the task",
131 "enum": ["pending", "validated", "failed", "processed", "processed_partial"]
132 },
133 "results": {
134 'type': 'object',
135 'description': 'The result of the upload',
136 "properties": {
137 "imported": {
138 "type": "integer",
139 "description": "The number of articles imported",
140 },
141 "failed": {
142 "type": "integer",
143 "description": "The number of articles failed to import",
144 },
145 "update": {
146 "type": "integer",
147 "description": "The number of articles updated",
148 },
149 "new": {
150 "type": "integer",
151 "description": "The number of articles created",
152 },
153 },
154 }
156 },
157 },
158 }
159 template['responses']['400'] = {
160 "description": "Fail get status reason",
161 "schema": {
162 "type": "object",
163 "properties": {
164 "msg": {"type": "string", "description": "The error message"},
165 },
166 },
167 }
168 return cls._build_swag_response(template)
170 @classmethod
171 def get_async_status(cls, current_user_id, upload_id=None, ) -> Dict:
172 if not upload_id:
173 raise Api400Error("upload_id is required")
175 bulk_article = BulkArticles.pull(upload_id)
176 if bulk_article is None or bulk_article.owner != current_user_id:
177 raise Api400Error("upload_id is invalid")
179 internal_external_status_map = {
180 "incoming": "pending",
181 "partial": "processed_partial"
182 }
184 status = {
185 "id": upload_id,
186 "created": bulk_article.created_date,
187 'status': internal_external_status_map.get(bulk_article.status, bulk_article.status),
188 }
190 if bulk_article.status in ["processed", "partial"]:
191 status['results'] = {
192 "imported": bulk_article.imported,
193 "failed": bulk_article.failed_imports,
194 "update": bulk_article.updates,
195 "new": bulk_article.new,
196 }
198 if bulk_article.error:
199 status['error'] = bulk_article.error
201 if bulk_article.error_details:
202 status['error_details'] = bulk_article.error_details
204 if bulk_article.failure_reasons:
205 status['failure_reasons'] = bulk_article.failure_reasons
207 return status
209 @classmethod
210 def delete_swag(cls):
211 template = deepcopy(cls.SWAG_TEMPLATE)
212 template['parameters'].append(
213 {
214 "description": "<div class=\"search-query-docs\">A list/array of DOAJ article IDs. E.g. [\"4cf8b72139a749c88d043129f00e1b07\", \"232b53726fb74cc4a8eb4717e5a43193\"].</div>",
215 "required": True,
216 "schema": {"type": "string"},
217 "name": "article_ids",
218 "in": "body"
219 }
220 )
221 template['parameters'].append(cls.SWAG_API_KEY_REQ_PARAM)
222 template['responses']['204'] = cls.R204
223 template['responses']['400'] = cls.R400
224 template['responses']['401'] = cls.R401
225 return cls._build_swag_response(template)
227 @classmethod
228 def delete(cls, article_ids, account):
229 # we run through delete twice, once as a dry-run and the second time
230 # as the real deal
231 # ~~->APICrudArticles:Feature~~
232 for id in article_ids:
233 try:
234 ArticlesCrudApi.delete(id, account, dry_run=True)
235 except Api404Error as e:
236 raise Api400Error("Id {x} does not exist or does not belong to this user account".format(x=id))
237 except Api403Error as e:
238 raise Api400Error("Id {x} is not in a state which allows it to be deleted".format(x=id))
240 for id in article_ids:
241 ArticlesCrudApi.delete(id, account)