Coverage for portality/bll/services/site.py: 100%

70 statements  

« prev     ^ index     » next       coverage.py v6.4.2, created at 2022-11-09 15:10 +0000

1import re 

2from datetime import datetime 

3 

4from lxml import etree 

5 

6from portality import models 

7from portality.bll import exceptions 

8from portality.core import app 

9from portality.lib import nav 

10from portality.lib.argvalidate import argvalidate 

11from portality.store import StoreFactory, prune_container 

12from portality.util import get_full_url_safe 

13 

14NS = "{http://www.sitemaps.org/schemas/sitemap/0.9}" 

15 

16 

17def create_simple_sub_element(parent, element_name, text=None): 

18 """ create and attach simple text element to argument *parent* 

19 """ 

20 loc = etree.SubElement(parent, NS + element_name) 

21 if text is not None: 

22 loc.text = text 

23 return loc 

24 

25 

26def create_url_element(parent, loc, change_freq, lastmod=None): 

27 """ create and attach url element to argument *parent* 

28 """ 

29 url_ele = etree.SubElement(parent, NS + "url") 

30 

31 create_simple_sub_element(url_ele, 'loc', loc) 

32 if lastmod is not None: 

33 create_simple_sub_element(url_ele, "lastmod", lastmod) 

34 create_simple_sub_element(url_ele, "changefreq", change_freq) 

35 

36 return url_ele 

37 

38 

39class SiteService(object): 

40 def sitemap(self, prune: bool = True): 

41 """ 

42 Generate the sitemap 

43 ~~Sitemap:Feature~~ 

44 :return: 

45 """ 

46 # first validate the incoming arguments to ensure that we've got the right thing 

47 argvalidate("csv", [ 

48 {"arg": prune, "allow_none": False, "arg_name": "prune"} 

49 ], exceptions.ArgumentException) 

50 

51 action_register = [] 

52 

53 base_url = app.config.get("BASE_URL") 

54 if not base_url.endswith("/"): 

55 base_url += "/" 

56 

57 # ~~-> FileStoreTemp:Feature~~ 

58 filename = 'sitemap__doaj_' + datetime.strftime(datetime.utcnow(), '%Y%m%d_%H%M') + '_utf8.xml' 

59 container_id = app.config.get("STORE_CACHE_CONTAINER") 

60 tmpStore = StoreFactory.tmp() 

61 out = tmpStore.path(container_id, filename, create_container=True, must_exist=False) 

62 

63 toc_changefreq = app.config.get("TOC_CHANGEFREQ", "monthly") 

64 

65 NSMAP = {None: "http://www.sitemaps.org/schemas/sitemap/0.9"} 

66 urlset = etree.Element(NS + "urlset", nsmap=NSMAP) 

67 

68 counter = 0 

69 

70 # do the static pages 

71 _entries = nav.get_nav_entries() 

72 _routes = nav.yield_all_route(_entries) 

73 _urls = (get_full_url_safe(r) for r in _routes) 

74 _urls = filter(None, _urls) 

75 _urls = set(_urls) 

76 _urls = sorted(_urls) 

77 for u in _urls: 

78 create_url_element(urlset, u, toc_changefreq) 

79 counter += 1 

80 

81 # do all the journal ToCs 

82 for j in models.Journal.all_in_doaj(): 

83 # first create an entry purely for the journal 

84 toc_loc = base_url + "toc/" + j.toc_id 

85 create_url_element(urlset, toc_loc, toc_changefreq, lastmod=j.last_updated) 

86 counter += 1 

87 

88 # log to the screen 

89 action_register.append("{x} urls written to sitemap".format(x=counter)) 

90 

91 # save it into the temp store 

92 tree = etree.ElementTree(urlset) 

93 with open(out, "wb") as f: 

94 tree.write(f, encoding="UTF-8", xml_declaration=True, pretty_print=True) 

95 

96 # ~~->FileStore:Feature~~ 

97 mainStore = StoreFactory.get("cache") 

98 try: 

99 mainStore.store(container_id, filename, source_path=out) 

100 url = mainStore.url(container_id, filename) 

101 finally: 

102 tmpStore.delete_file(container_id, 

103 filename) # don't delete the container, just in case someone else is writing to it 

104 

105 action_register.append("Sitemap written to store with url {x}".format(x=url)) 

106 

107 # remove all but the two latest sitemaps 

108 if prune: 

109 def sort(filelist): 

110 rx = "sitemap__doaj_(.+?)_utf8.xml" 

111 return sorted(filelist, 

112 key=lambda x: datetime.strptime(re.match(rx, x).groups(1)[0], '%Y%m%d_%H%M'), 

113 reverse=True) 

114 

115 def _filter(filename): 

116 return filename.startswith("sitemap__") 

117 

118 action_register += prune_container(mainStore, container_id, sort, filter=_filter, keep=2) 

119 

120 # update the ES record to point to the new file 

121 # ~~->Cache:Feature~~ 

122 models.Cache.cache_sitemap(url) 

123 return url, action_register