Coverage for portality / tasks / public_data_dump.py: 85%

60 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-05-05 00:09 +0100

1from portality import models 

2from portality.background import BackgroundTask, BackgroundApi, BackgroundException 

3from portality.core import app 

4from portality.lib import dates 

5from portality.tasks.helpers import background_helper 

6from portality.tasks.redis_huey import scheduled_long_queue as queue 

7from portality.bll import DOAJ 

8from portality.bll import exceptions 

9 

10class PublicDataDumpBackgroundTask(BackgroundTask): 

11 """ 

12 This task allows us to generate the public data dumps for the system. It provides a number of 

13 configuration options, and it is IMPORTANT to note that in production it MUST only be run with the 

14 following settings: 

15 

16 types: all 

17 clean: False 

18 prune: True 

19 

20 If you run this in production with either `journal` or `article` as type, 

21 then any existing link to the other data type will be no longer available. 

22 

23 If you run this with clean set True, there is a chance that in the event of an error the live 

24 data will be deleted, and not replaced with new data. Better to prune after the 

25 new data has been generated instead. 

26 """ 

27 

28 __action__ = "public_data_dump" 

29 

30 def run(self): 

31 """ 

32 Execute the task as specified by the background_job 

33 :return: 

34 """ 

35 job = self.background_job 

36 params = job.params 

37 

38 clean = self.get_param(params, 'clean') 

39 prune = self.get_param(params, 'prune') 

40 types = self.get_param(params, 'types') 

41 

42 def logger(msg): 

43 job.add_audit_message(msg) 

44 job.save() 

45 

46 svc = DOAJ.publicDataDumpService(logger) 

47 if clean: 

48 svc.remove_pdd_container() 

49 job.add_audit_message("Deleted existing data dump files") 

50 job.save() 

51 

52 if types == 'all': 

53 types = svc.ALL 

54 else: 

55 types = [types] 

56 

57 try: 

58 svc.dump(types, prune=prune) 

59 except exceptions.SaveException as e: 

60 raise BackgroundException("Error generating data dump: {0}".format(e)) 

61 

62 job.add_audit_message(dates.now_str() + ": done") 

63 

64 def cleanup(self): 

65 """ 

66 Cleanup after a successful OR failed run of the task 

67 :return: 

68 """ 

69 pass 

70 

71 @classmethod 

72 def prepare(cls, username, **kwargs): 

73 """ 

74 Take an arbitrary set of keyword arguments and return an instance of a BackgroundJob, 

75 or fail with a suitable exception 

76 

77 :param kwargs: arbitrary keyword arguments pertaining to this task type 

78 :return: a BackgroundJob instance representing this task 

79 """ 

80 params = {} 

81 cls.set_param(params, 'clean', False if "clean" not in kwargs else kwargs["clean"] if kwargs["clean"] is not None else False) 

82 cls.set_param(params, "prune", False if "prune" not in kwargs else kwargs["prune"] if kwargs["prune"] is not None else False) 

83 cls.set_param(params, "types", "all" if "types" not in kwargs else kwargs["types"] if kwargs["types"] in ["all", "journal", "article"] else "all") 

84 

85 container = app.config.get("STORE_PUBLIC_DATA_DUMP_CONTAINER") 

86 if container is None: 

87 raise BackgroundException("You must set STORE_PUBLIC_DATA_DUMP_CONTAINER in the config") 

88 

89 # first prepare a job record 

90 job = background_helper.create_job(username, cls.__action__, 

91 queue_id=huey_helper.queue_id, 

92 params=params) 

93 return job 

94 

95 @classmethod 

96 def submit(cls, background_job): 

97 """ 

98 Submit the specified BackgroundJob to the background queue 

99 

100 :param background_job: the BackgroundJob instance 

101 :return: 

102 """ 

103 background_job.save() 

104 public_data_dump.schedule(args=(background_job.id,), delay=app.config.get('HUEY_ASYNC_DELAY', 10)) 

105 

106 

107huey_helper = PublicDataDumpBackgroundTask.create_huey_helper(queue) 

108 

109 

110@huey_helper.register_schedule 

111def scheduled_public_data_dump(): 

112 user = app.config.get("SYSTEM_USERNAME") 

113 job = PublicDataDumpBackgroundTask.prepare(user, clean=False, prune=True, types="all") 

114 PublicDataDumpBackgroundTask.submit(job) 

115 

116 

117@huey_helper.register_execute(is_load_config=False) 

118def public_data_dump(job_id): 

119 job = models.BackgroundJob.pull(job_id) 

120 task = PublicDataDumpBackgroundTask(job) 

121 BackgroundApi.execute(task)