Coverage for portality/tasks/read_news.py: 0%

63 statements  

« prev     ^ index     » next       coverage.py v6.4.2, created at 2022-07-19 16:52 +0100

1import feedparser 

2 

3from portality import models 

4from portality.core import app 

5 

6from portality.tasks.redis_huey import main_queue, schedule 

7from portality.decorators import write_required 

8 

9from portality.background import BackgroundTask, BackgroundApi 

10 

11class FeedError(Exception): 

12 pass 

13 

14 

15class ReadNewsBackgroundTask(BackgroundTask): 

16 

17 __action__ = "read_news" 

18 

19 def run(self): 

20 """ 

21 Execute the task as specified by the background_jon 

22 :return: 

23 """ 

24 read_feed() 

25 

26 def cleanup(self): 

27 """ 

28 Cleanup after a successful OR failed run of the task 

29 :return: 

30 """ 

31 pass 

32 

33 @classmethod 

34 def prepare(cls, username, **kwargs): 

35 """ 

36 Take an arbitrary set of keyword arguments and return an instance of a BackgroundJob, 

37 or fail with a suitable exception 

38 

39 :param kwargs: arbitrary keyword arguments pertaining to this task type 

40 :return: a BackgroundJob instance representing this task 

41 """ 

42 

43 # first prepare a job record 

44 job = models.BackgroundJob() 

45 job.user = username 

46 job.action = cls.__action__ 

47 return job 

48 

49 @classmethod 

50 def submit(cls, background_job): 

51 """ 

52 Submit the specified BackgroundJob to the background queue 

53 

54 :param background_job: the BackgroundJob instance 

55 :return: 

56 """ 

57 background_job.save() 

58 read_news.schedule(args=(background_job.id,), delay=10) 

59 # fixme: schedule() could raise a huey.exceptions.HueyException and not reach redis- would that be logged? 

60 

61 

62# TODO factor this into the object above, rather than sitting out here as a function (it was migrated 

63# here from another file) 

64def read_feed(): 

65 """~~NewsReader:Feature->News:ExternalService""" 

66 feed_url = app.config.get("BLOG_FEED_URL") 

67 if feed_url is None: 

68 raise FeedError("No BLOG_FEED_URL defined in settings") 

69 

70 f = feedparser.parse(feed_url) 

71 if f.bozo > 0: 

72 raise FeedError(f.bozo_exception) 

73 

74 for e in f.entries: 

75 save_entry(e) 

76 

77def save_entry(entry): 

78 news = None 

79 existing = models.News.by_remote_id(entry.id) 

80 if len(existing) > 1: 

81 raise FeedError("There is more than one object with this id in the index: " + entry.id) 

82 elif len(existing) == 1: 

83 news = existing[0] 

84 else: 

85 news = models.News() 

86 

87 alts = [l.get("href") for l in entry.links if l.get("rel") == "alternate"] 

88 if len(alts) == 0: 

89 raise FeedError("Unable to get url of post from link@rel=alternate") 

90 

91 news.remote_id = entry.id 

92 news.url = alts[0] 

93 news.title = entry.title 

94 news.updated = entry.updated 

95 news.summary = entry.summary 

96 news.published = entry.published 

97 

98 news.save() 

99 

100 

101@main_queue.periodic_task(schedule("read_news")) 

102@write_required(script=True) 

103def scheduled_read_news(): 

104 user = app.config.get("SYSTEM_USERNAME") 

105 job = ReadNewsBackgroundTask.prepare(user) 

106 ReadNewsBackgroundTask.submit(job) 

107 

108@main_queue.task() 

109@write_required(script=True) 

110def read_news(job_id): 

111 job = models.BackgroundJob.pull(job_id) 

112 task = ReadNewsBackgroundTask(job) 

113 BackgroundApi.execute(task)