Coverage for portality / bll / services / export.py: 73%
213 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-05 00:09 +0100
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-05 00:09 +0100
1import uuid
2import csv
3import random
4import string
5import os
7from portality.core import app
8from portality.crosswalks.article_ris import ArticleRisXWalk
9from portality.models import RISExport
10from portality.store import StoreFactory, StoreException
11from portality.util import no_op
12from portality import models
13from portality.crosswalks.journal_questions import Journal2QuestionXwalk
14from portality.lib import dates
16class RISExportReporter(object):
17 def __init__(self):
18 self._loaded = 0
19 self._processed = 0
21 def loaded(self, n):
22 self._loaded = n
24 def processed(self, n):
25 self._processed = n
27 def msg(self, m):
28 pass
30 def counts(self):
31 return self._processed, self._loaded
34class ExportService(object):
35 def csv(self, model: models.JournalLikeObject, query=None, logger=None, out_file=None,
36 biblio_fieldset=True,
37 meta_fieldset=True,
38 article_fieldset=True,
39 doaj_link=True,
40 admin_fieldset=False,
41 obscure_accounts=True,
42 add_sensitive_account_info=False,
43 custom_columns=None,
44 exclude_no_issn=True):
46 # None isn't executable, so convert logger to NO-OP
47 if logger is None:
48 logger = no_op
50 filename = None
51 out = out_file
52 if out_file is None:
53 filename = 'csv_' + uuid.uuid4().hex + '_utf8.csv'
54 container_id = "csv_export_tmp_container"
55 tmpStore = StoreFactory.tmp()
56 try:
57 out = tmpStore.path(container_id, filename, create_container=True, must_exist=False)
58 logger("Temporary CSV will be written to {x}".format(x=out))
59 except StoreException as e:
60 logger("Could not create temporary CSV file: {x}".format(x=e))
61 raise e
63 if filename is None:
64 filename = os.path.basename(out)
66 with open(out, 'w', encoding='utf-8') as csvfile:
67 first = True
68 csvwriter = csv.writer(csvfile)
69 for obj in model.scroll(query, page_size=100, keepalive='1m'):
70 logger("Exporting {f} {x}".format(f=model.__type__, x=obj.id))
72 if exclude_no_issn:
73 bj = obj.bibjson()
74 issn = bj.get_one_identifier(idtype=bj.P_ISSN)
75 if issn is None:
76 issn = bj.get_one_identifier(idtype=bj.E_ISSN)
77 if issn is None:
78 continue
80 row = self.object_as_question_and_answer(obj, biblio_fieldset=biblio_fieldset,
81 meta_fieldset=meta_fieldset,
82 article_fieldset=article_fieldset,
83 doaj_link=doaj_link,
84 admin_fieldset=admin_fieldset,
85 obscure_accounts=obscure_accounts,
86 add_sensitive_account_info=add_sensitive_account_info,
87 custom_columns=custom_columns)
88 if first is True:
89 qs = [q for q, _ in row]
90 csvwriter.writerow(qs)
91 first = False
93 vs = [v for _, v in row]
94 csvwriter.writerow(vs)
96 logger("All records exported and CSV written to temp store {}".format(out))
97 return out, filename
99 def object_as_question_and_answer(self, obj: models.JournalLikeObject,
100 biblio_fieldset=True,
101 meta_fieldset=True,
102 article_fieldset=True,
103 doaj_link=True,
104 admin_fieldset=False,
105 obscure_accounts=True,
106 add_sensitive_account_info=False,
107 custom_columns=None):
108 YES_NO = {True: 'Yes', False: 'No', None: '', '': ''}
109 unmap = {}
111 def _get_doaj_meta_kvs(journal: models.JournalLikeObject):
112 """
113 Get key, value pairs for some meta information we want from the journal object
114 :param journal: a models.Journal
115 :return: a list of (key, value) tuples for our metadata
116 """
117 kvs = [
118 ("Subjects", ' | '.join(journal.bibjson().lcc_paths())),
119 ("Added on Date", journal.created_date if isinstance(journal, models.Journal) else journal.date_applied),
120 ("Last updated Date", journal.last_manual_update),
121 ]
123 if isinstance(journal, models.Journal):
124 kvs.append(("Last Full Review Date", journal.last_full_review))
126 return kvs
128 def _get_doaj_toc_kv(journal):
129 if not isinstance(journal, models.Journal):
130 return "URL in DOAJ", app.config.get("BASE_URL", "https://doaj.org") + "/admin/application/" + journal.id
131 return "URL in DOAJ", app.config.get('JOURNAL_TOC_URL_FRAG', 'https://doaj.org/toc/') + journal.id
133 def _get_article_kvs(journal):
134 if not isinstance(journal, models.Journal):
135 return []
136 stats = journal.article_stats()
137 kvs = [
138 ("Number of Article Records", str(stats.get("total"))),
139 ("Most Recent Article Added", stats.get("latest"))
140 ]
141 return kvs
143 def _usernames(j):
144 o = j.owner
145 if obscure_accounts:
146 if o in unmap:
147 sub = unmap[o]
148 else:
149 sub = "".join(random.choice(string.ascii_lowercase + string.ascii_uppercase + string.digits) for i in range(8))
150 unmap[o] = sub
151 return [("Owner", sub)]
152 else:
153 return [("Owner", o)]
155 def _acc_name(j):
156 o = j.owner
157 a = models.Account.pull(o)
158 return [("Account Name", a.name)] if a is not None else [("Account Name", "")]
160 def _acc_email(j):
161 o = j.owner
162 a = models.Account.pull(o)
163 return [("Account Email", a.email)] if a is not None else [("Account Email", "")]
165 def _admin_dates(a: models.JournalLikeObject):
166 if isinstance(a, models.Application):
167 return [("Date Rejected", a.date_rejected)]
169 return [
170 ("Date Applied", a.date_applied),
171 ("Last Withdrawn Date", a.last_withdrawn),
172 ("Last Reinstated Date", a.last_reinstated),
173 ("Last Owner Transfer", a.last_owner_transfer),
174 ]
176 biblio_kvs = []
177 meta_kvs = []
178 article_kvs = []
179 toc_kv = None
180 admin_kvs = []
181 custom_kvs = []
183 if biblio_fieldset:
184 biblio_kvs = Journal2QuestionXwalk.journal2question(obj)
185 if meta_fieldset:
186 meta_kvs = _get_doaj_meta_kvs(obj)
187 if article_fieldset:
188 article_kvs = _get_article_kvs(obj)
189 if doaj_link:
190 toc_kv = _get_doaj_toc_kv(obj)
191 if admin_fieldset:
192 admin_kvs = _usernames(obj)
193 if add_sensitive_account_info:
194 admin_kvs += _acc_name(obj) + _acc_email(obj)
195 admin_kvs += _admin_dates(obj)
196 if custom_columns is not None:
197 for cc in custom_columns:
198 custom_kvs.append(cc(obj))
200 if biblio_fieldset and doaj_link:
201 biblio_kvs.insert(2, toc_kv)
202 else:
203 biblio_kvs.append(toc_kv)
205 row = biblio_kvs + meta_kvs + article_kvs + admin_kvs + custom_kvs
206 return row
208 def delete_tmp_csv(self, filename):
209 tmpStore = StoreFactory.tmp()
210 container_id = "csv_export_tmp_container"
211 tmpStore.delete_file(container_id, filename)
213 def publish(self, source_file, filename, requester=None, request_date=None, name=None, query=None, model=None):
214 mainStore = StoreFactory.get("export")
215 container_id = app.config.get("STORE_EXPORT_CONTAINER")
216 mainStore.store(container_id, filename, source_path=source_file)
218 e = models.Export()
219 e.generated_date = dates.now_str()
220 e.requester = requester
221 e.request_date = request_date
222 e.name = name if name is not None else filename
223 e.filename = filename
224 e.constraints = query
225 e.model = model
226 e.save()
228 return e
230 def retrieve(self, report_id):
231 report = models.Export.pull(report_id)
232 mainStore = StoreFactory.get("export")
233 container_id = app.config.get("STORE_EXPORT_CONTAINER")
234 fh = mainStore.get(container_id, report.filename)
235 return report, fh
237 def ris(self, article, save=True):
238 if isinstance(article, str):
239 article = models.Article.pull(article)
241 if article is None:
242 return None
244 ris = ArticleRisXWalk.article2ris(article)
245 obj = models.RISExport()
246 obj.set_id(article.id)
247 obj.ris_raw = ris
248 if save:
249 obj.save()
250 else:
251 obj.pre_save_prep()
252 return obj
254 def has_stale_ris(self, article, ris=None):
255 if isinstance(article, str):
256 article = models.Article.pull(article)
258 if ris is None:
259 ris = models.RISExport.pull(article.id)
261 if ris is None:
262 return True
264 if ris.last_updated is None:
265 return True
267 return ris.last_updated_timestamp < article.last_updated_timestamp
269 def remove_ris(self, ris):
270 if isinstance(ris, str):
271 ris = RISExport.pull(ris)
273 if ris is None:
274 return
276 ris.delete()
279 def bulk_generate_ris(self, force_update=False, batch_size=1000, reporter:RISExportReporter=None):
281 if reporter is None:
282 reporter = RISExportReporter()
284 def flush_batch(batch, force=False):
285 if len(batch) == 0:
286 return batch
288 if len(batch) < batch_size and not force:
289 return batch
291 models.RISExport.bulk(batch, action="index", req_timeout=120)
292 reporter.msg("Writing {x} RIS exports".format(x=len(batch)))
293 return []
295 batch = []
296 count = 0
297 loaded = 0
298 for article in models.Article.iterall_unstable():
299 count += 1
300 existing = models.RISExport.pull(article.id)
301 if force_update or self.has_stale_ris(article, existing):
302 updated = self.ris(article, save=False)
303 if not force_update:
304 # if we're not forcing an update, then don't update if the content
305 # is the same
306 if existing is not None and updated.ris_raw == existing.ris_raw:
307 reporter.processed(count)
308 continue
310 batch.append(updated.data)
311 batch = flush_batch(batch)
312 loaded += 1
313 reporter.loaded(loaded)
315 reporter.processed(count)
317 flush_batch(batch, True)
319 return reporter