Coverage for portality / bll / services / export.py: 73%

213 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-05-05 00:09 +0100

1import uuid 

2import csv 

3import random 

4import string 

5import os 

6 

7from portality.core import app 

8from portality.crosswalks.article_ris import ArticleRisXWalk 

9from portality.models import RISExport 

10from portality.store import StoreFactory, StoreException 

11from portality.util import no_op 

12from portality import models 

13from portality.crosswalks.journal_questions import Journal2QuestionXwalk 

14from portality.lib import dates 

15 

16class RISExportReporter(object): 

17 def __init__(self): 

18 self._loaded = 0 

19 self._processed = 0 

20 

21 def loaded(self, n): 

22 self._loaded = n 

23 

24 def processed(self, n): 

25 self._processed = n 

26 

27 def msg(self, m): 

28 pass 

29 

30 def counts(self): 

31 return self._processed, self._loaded 

32 

33 

34class ExportService(object): 

35 def csv(self, model: models.JournalLikeObject, query=None, logger=None, out_file=None, 

36 biblio_fieldset=True, 

37 meta_fieldset=True, 

38 article_fieldset=True, 

39 doaj_link=True, 

40 admin_fieldset=False, 

41 obscure_accounts=True, 

42 add_sensitive_account_info=False, 

43 custom_columns=None, 

44 exclude_no_issn=True): 

45 

46 # None isn't executable, so convert logger to NO-OP 

47 if logger is None: 

48 logger = no_op 

49 

50 filename = None 

51 out = out_file 

52 if out_file is None: 

53 filename = 'csv_' + uuid.uuid4().hex + '_utf8.csv' 

54 container_id = "csv_export_tmp_container" 

55 tmpStore = StoreFactory.tmp() 

56 try: 

57 out = tmpStore.path(container_id, filename, create_container=True, must_exist=False) 

58 logger("Temporary CSV will be written to {x}".format(x=out)) 

59 except StoreException as e: 

60 logger("Could not create temporary CSV file: {x}".format(x=e)) 

61 raise e 

62 

63 if filename is None: 

64 filename = os.path.basename(out) 

65 

66 with open(out, 'w', encoding='utf-8') as csvfile: 

67 first = True 

68 csvwriter = csv.writer(csvfile) 

69 for obj in model.scroll(query, page_size=100, keepalive='1m'): 

70 logger("Exporting {f} {x}".format(f=model.__type__, x=obj.id)) 

71 

72 if exclude_no_issn: 

73 bj = obj.bibjson() 

74 issn = bj.get_one_identifier(idtype=bj.P_ISSN) 

75 if issn is None: 

76 issn = bj.get_one_identifier(idtype=bj.E_ISSN) 

77 if issn is None: 

78 continue 

79 

80 row = self.object_as_question_and_answer(obj, biblio_fieldset=biblio_fieldset, 

81 meta_fieldset=meta_fieldset, 

82 article_fieldset=article_fieldset, 

83 doaj_link=doaj_link, 

84 admin_fieldset=admin_fieldset, 

85 obscure_accounts=obscure_accounts, 

86 add_sensitive_account_info=add_sensitive_account_info, 

87 custom_columns=custom_columns) 

88 if first is True: 

89 qs = [q for q, _ in row] 

90 csvwriter.writerow(qs) 

91 first = False 

92 

93 vs = [v for _, v in row] 

94 csvwriter.writerow(vs) 

95 

96 logger("All records exported and CSV written to temp store {}".format(out)) 

97 return out, filename 

98 

99 def object_as_question_and_answer(self, obj: models.JournalLikeObject, 

100 biblio_fieldset=True, 

101 meta_fieldset=True, 

102 article_fieldset=True, 

103 doaj_link=True, 

104 admin_fieldset=False, 

105 obscure_accounts=True, 

106 add_sensitive_account_info=False, 

107 custom_columns=None): 

108 YES_NO = {True: 'Yes', False: 'No', None: '', '': ''} 

109 unmap = {} 

110 

111 def _get_doaj_meta_kvs(journal: models.JournalLikeObject): 

112 """ 

113 Get key, value pairs for some meta information we want from the journal object 

114 :param journal: a models.Journal 

115 :return: a list of (key, value) tuples for our metadata 

116 """ 

117 kvs = [ 

118 ("Subjects", ' | '.join(journal.bibjson().lcc_paths())), 

119 ("Added on Date", journal.created_date if isinstance(journal, models.Journal) else journal.date_applied), 

120 ("Last updated Date", journal.last_manual_update), 

121 ] 

122 

123 if isinstance(journal, models.Journal): 

124 kvs.append(("Last Full Review Date", journal.last_full_review)) 

125 

126 return kvs 

127 

128 def _get_doaj_toc_kv(journal): 

129 if not isinstance(journal, models.Journal): 

130 return "URL in DOAJ", app.config.get("BASE_URL", "https://doaj.org") + "/admin/application/" + journal.id 

131 return "URL in DOAJ", app.config.get('JOURNAL_TOC_URL_FRAG', 'https://doaj.org/toc/') + journal.id 

132 

133 def _get_article_kvs(journal): 

134 if not isinstance(journal, models.Journal): 

135 return [] 

136 stats = journal.article_stats() 

137 kvs = [ 

138 ("Number of Article Records", str(stats.get("total"))), 

139 ("Most Recent Article Added", stats.get("latest")) 

140 ] 

141 return kvs 

142 

143 def _usernames(j): 

144 o = j.owner 

145 if obscure_accounts: 

146 if o in unmap: 

147 sub = unmap[o] 

148 else: 

149 sub = "".join(random.choice(string.ascii_lowercase + string.ascii_uppercase + string.digits) for i in range(8)) 

150 unmap[o] = sub 

151 return [("Owner", sub)] 

152 else: 

153 return [("Owner", o)] 

154 

155 def _acc_name(j): 

156 o = j.owner 

157 a = models.Account.pull(o) 

158 return [("Account Name", a.name)] if a is not None else [("Account Name", "")] 

159 

160 def _acc_email(j): 

161 o = j.owner 

162 a = models.Account.pull(o) 

163 return [("Account Email", a.email)] if a is not None else [("Account Email", "")] 

164 

165 def _admin_dates(a: models.JournalLikeObject): 

166 if isinstance(a, models.Application): 

167 return [("Date Rejected", a.date_rejected)] 

168 

169 return [ 

170 ("Date Applied", a.date_applied), 

171 ("Last Withdrawn Date", a.last_withdrawn), 

172 ("Last Reinstated Date", a.last_reinstated), 

173 ("Last Owner Transfer", a.last_owner_transfer), 

174 ] 

175 

176 biblio_kvs = [] 

177 meta_kvs = [] 

178 article_kvs = [] 

179 toc_kv = None 

180 admin_kvs = [] 

181 custom_kvs = [] 

182 

183 if biblio_fieldset: 

184 biblio_kvs = Journal2QuestionXwalk.journal2question(obj) 

185 if meta_fieldset: 

186 meta_kvs = _get_doaj_meta_kvs(obj) 

187 if article_fieldset: 

188 article_kvs = _get_article_kvs(obj) 

189 if doaj_link: 

190 toc_kv = _get_doaj_toc_kv(obj) 

191 if admin_fieldset: 

192 admin_kvs = _usernames(obj) 

193 if add_sensitive_account_info: 

194 admin_kvs += _acc_name(obj) + _acc_email(obj) 

195 admin_kvs += _admin_dates(obj) 

196 if custom_columns is not None: 

197 for cc in custom_columns: 

198 custom_kvs.append(cc(obj)) 

199 

200 if biblio_fieldset and doaj_link: 

201 biblio_kvs.insert(2, toc_kv) 

202 else: 

203 biblio_kvs.append(toc_kv) 

204 

205 row = biblio_kvs + meta_kvs + article_kvs + admin_kvs + custom_kvs 

206 return row 

207 

208 def delete_tmp_csv(self, filename): 

209 tmpStore = StoreFactory.tmp() 

210 container_id = "csv_export_tmp_container" 

211 tmpStore.delete_file(container_id, filename) 

212 

213 def publish(self, source_file, filename, requester=None, request_date=None, name=None, query=None, model=None): 

214 mainStore = StoreFactory.get("export") 

215 container_id = app.config.get("STORE_EXPORT_CONTAINER") 

216 mainStore.store(container_id, filename, source_path=source_file) 

217 

218 e = models.Export() 

219 e.generated_date = dates.now_str() 

220 e.requester = requester 

221 e.request_date = request_date 

222 e.name = name if name is not None else filename 

223 e.filename = filename 

224 e.constraints = query 

225 e.model = model 

226 e.save() 

227 

228 return e 

229 

230 def retrieve(self, report_id): 

231 report = models.Export.pull(report_id) 

232 mainStore = StoreFactory.get("export") 

233 container_id = app.config.get("STORE_EXPORT_CONTAINER") 

234 fh = mainStore.get(container_id, report.filename) 

235 return report, fh 

236 

237 def ris(self, article, save=True): 

238 if isinstance(article, str): 

239 article = models.Article.pull(article) 

240 

241 if article is None: 

242 return None 

243 

244 ris = ArticleRisXWalk.article2ris(article) 

245 obj = models.RISExport() 

246 obj.set_id(article.id) 

247 obj.ris_raw = ris 

248 if save: 

249 obj.save() 

250 else: 

251 obj.pre_save_prep() 

252 return obj 

253 

254 def has_stale_ris(self, article, ris=None): 

255 if isinstance(article, str): 

256 article = models.Article.pull(article) 

257 

258 if ris is None: 

259 ris = models.RISExport.pull(article.id) 

260 

261 if ris is None: 

262 return True 

263 

264 if ris.last_updated is None: 

265 return True 

266 

267 return ris.last_updated_timestamp < article.last_updated_timestamp 

268 

269 def remove_ris(self, ris): 

270 if isinstance(ris, str): 

271 ris = RISExport.pull(ris) 

272 

273 if ris is None: 

274 return 

275 

276 ris.delete() 

277 

278 

279 def bulk_generate_ris(self, force_update=False, batch_size=1000, reporter:RISExportReporter=None): 

280 

281 if reporter is None: 

282 reporter = RISExportReporter() 

283 

284 def flush_batch(batch, force=False): 

285 if len(batch) == 0: 

286 return batch 

287 

288 if len(batch) < batch_size and not force: 

289 return batch 

290 

291 models.RISExport.bulk(batch, action="index", req_timeout=120) 

292 reporter.msg("Writing {x} RIS exports".format(x=len(batch))) 

293 return [] 

294 

295 batch = [] 

296 count = 0 

297 loaded = 0 

298 for article in models.Article.iterall_unstable(): 

299 count += 1 

300 existing = models.RISExport.pull(article.id) 

301 if force_update or self.has_stale_ris(article, existing): 

302 updated = self.ris(article, save=False) 

303 if not force_update: 

304 # if we're not forcing an update, then don't update if the content 

305 # is the same 

306 if existing is not None and updated.ris_raw == existing.ris_raw: 

307 reporter.processed(count) 

308 continue 

309 

310 batch.append(updated.data) 

311 batch = flush_batch(batch) 

312 loaded += 1 

313 reporter.loaded(loaded) 

314 

315 reporter.processed(count) 

316 

317 flush_batch(batch, True) 

318 

319 return reporter