Coverage for portality / tasks / public_data_dump.py: 85%
60 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-05 00:09 +0100
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-05 00:09 +0100
1from portality import models
2from portality.background import BackgroundTask, BackgroundApi, BackgroundException
3from portality.core import app
4from portality.lib import dates
5from portality.tasks.helpers import background_helper
6from portality.tasks.redis_huey import scheduled_long_queue as queue
7from portality.bll import DOAJ
8from portality.bll import exceptions
10class PublicDataDumpBackgroundTask(BackgroundTask):
11 """
12 This task allows us to generate the public data dumps for the system. It provides a number of
13 configuration options, and it is IMPORTANT to note that in production it MUST only be run with the
14 following settings:
16 types: all
17 clean: False
18 prune: True
20 If you run this in production with either `journal` or `article` as type,
21 then any existing link to the other data type will be no longer available.
23 If you run this with clean set True, there is a chance that in the event of an error the live
24 data will be deleted, and not replaced with new data. Better to prune after the
25 new data has been generated instead.
26 """
28 __action__ = "public_data_dump"
30 def run(self):
31 """
32 Execute the task as specified by the background_job
33 :return:
34 """
35 job = self.background_job
36 params = job.params
38 clean = self.get_param(params, 'clean')
39 prune = self.get_param(params, 'prune')
40 types = self.get_param(params, 'types')
42 def logger(msg):
43 job.add_audit_message(msg)
44 job.save()
46 svc = DOAJ.publicDataDumpService(logger)
47 if clean:
48 svc.remove_pdd_container()
49 job.add_audit_message("Deleted existing data dump files")
50 job.save()
52 if types == 'all':
53 types = svc.ALL
54 else:
55 types = [types]
57 try:
58 svc.dump(types, prune=prune)
59 except exceptions.SaveException as e:
60 raise BackgroundException("Error generating data dump: {0}".format(e))
62 job.add_audit_message(dates.now_str() + ": done")
64 def cleanup(self):
65 """
66 Cleanup after a successful OR failed run of the task
67 :return:
68 """
69 pass
71 @classmethod
72 def prepare(cls, username, **kwargs):
73 """
74 Take an arbitrary set of keyword arguments and return an instance of a BackgroundJob,
75 or fail with a suitable exception
77 :param kwargs: arbitrary keyword arguments pertaining to this task type
78 :return: a BackgroundJob instance representing this task
79 """
80 params = {}
81 cls.set_param(params, 'clean', False if "clean" not in kwargs else kwargs["clean"] if kwargs["clean"] is not None else False)
82 cls.set_param(params, "prune", False if "prune" not in kwargs else kwargs["prune"] if kwargs["prune"] is not None else False)
83 cls.set_param(params, "types", "all" if "types" not in kwargs else kwargs["types"] if kwargs["types"] in ["all", "journal", "article"] else "all")
85 container = app.config.get("STORE_PUBLIC_DATA_DUMP_CONTAINER")
86 if container is None:
87 raise BackgroundException("You must set STORE_PUBLIC_DATA_DUMP_CONTAINER in the config")
89 # first prepare a job record
90 job = background_helper.create_job(username, cls.__action__,
91 queue_id=huey_helper.queue_id,
92 params=params)
93 return job
95 @classmethod
96 def submit(cls, background_job):
97 """
98 Submit the specified BackgroundJob to the background queue
100 :param background_job: the BackgroundJob instance
101 :return:
102 """
103 background_job.save()
104 public_data_dump.schedule(args=(background_job.id,), delay=app.config.get('HUEY_ASYNC_DELAY', 10))
107huey_helper = PublicDataDumpBackgroundTask.create_huey_helper(queue)
110@huey_helper.register_schedule
111def scheduled_public_data_dump():
112 user = app.config.get("SYSTEM_USERNAME")
113 job = PublicDataDumpBackgroundTask.prepare(user, clean=False, prune=True, types="all")
114 PublicDataDumpBackgroundTask.submit(job)
117@huey_helper.register_execute(is_load_config=False)
118def public_data_dump(job_id):
119 job = models.BackgroundJob.pull(job_id)
120 task = PublicDataDumpBackgroundTask(job)
121 BackgroundApi.execute(task)