Coverage for portality/bll/services/site.py: 100%
70 statements
« prev ^ index » next coverage.py v6.4.2, created at 2022-11-09 16:22 +0000
« prev ^ index » next coverage.py v6.4.2, created at 2022-11-09 16:22 +0000
1import re
2from datetime import datetime
4from lxml import etree
6from portality import models
7from portality.bll import exceptions
8from portality.core import app
9from portality.lib import nav
10from portality.lib.argvalidate import argvalidate
11from portality.store import StoreFactory, prune_container
12from portality.util import get_full_url_safe
14NS = "{http://www.sitemaps.org/schemas/sitemap/0.9}"
17def create_simple_sub_element(parent, element_name, text=None):
18 """ create and attach simple text element to argument *parent*
19 """
20 loc = etree.SubElement(parent, NS + element_name)
21 if text is not None:
22 loc.text = text
23 return loc
26def create_url_element(parent, loc, change_freq, lastmod=None):
27 """ create and attach url element to argument *parent*
28 """
29 url_ele = etree.SubElement(parent, NS + "url")
31 create_simple_sub_element(url_ele, 'loc', loc)
32 if lastmod is not None:
33 create_simple_sub_element(url_ele, "lastmod", lastmod)
34 create_simple_sub_element(url_ele, "changefreq", change_freq)
36 return url_ele
39class SiteService(object):
40 def sitemap(self, prune: bool = True):
41 """
42 Generate the sitemap
43 ~~Sitemap:Feature~~
44 :return:
45 """
46 # first validate the incoming arguments to ensure that we've got the right thing
47 argvalidate("csv", [
48 {"arg": prune, "allow_none": False, "arg_name": "prune"}
49 ], exceptions.ArgumentException)
51 action_register = []
53 base_url = app.config.get("BASE_URL")
54 if not base_url.endswith("/"):
55 base_url += "/"
57 # ~~-> FileStoreTemp:Feature~~
58 filename = 'sitemap__doaj_' + datetime.strftime(datetime.utcnow(), '%Y%m%d_%H%M') + '_utf8.xml'
59 container_id = app.config.get("STORE_CACHE_CONTAINER")
60 tmpStore = StoreFactory.tmp()
61 out = tmpStore.path(container_id, filename, create_container=True, must_exist=False)
63 toc_changefreq = app.config.get("TOC_CHANGEFREQ", "monthly")
65 NSMAP = {None: "http://www.sitemaps.org/schemas/sitemap/0.9"}
66 urlset = etree.Element(NS + "urlset", nsmap=NSMAP)
68 counter = 0
70 # do the static pages
71 _entries = nav.get_nav_entries()
72 _routes = nav.yield_all_route(_entries)
73 _urls = (get_full_url_safe(r) for r in _routes)
74 _urls = filter(None, _urls)
75 _urls = set(_urls)
76 _urls = sorted(_urls)
77 for u in _urls:
78 create_url_element(urlset, u, toc_changefreq)
79 counter += 1
81 # do all the journal ToCs
82 for j in models.Journal.all_in_doaj():
83 # first create an entry purely for the journal
84 toc_loc = base_url + "toc/" + j.toc_id
85 create_url_element(urlset, toc_loc, toc_changefreq, lastmod=j.last_updated)
86 counter += 1
88 # log to the screen
89 action_register.append("{x} urls written to sitemap".format(x=counter))
91 # save it into the temp store
92 tree = etree.ElementTree(urlset)
93 with open(out, "wb") as f:
94 tree.write(f, encoding="UTF-8", xml_declaration=True, pretty_print=True)
96 # ~~->FileStore:Feature~~
97 mainStore = StoreFactory.get("cache")
98 try:
99 mainStore.store(container_id, filename, source_path=out)
100 url = mainStore.url(container_id, filename)
101 finally:
102 tmpStore.delete_file(container_id,
103 filename) # don't delete the container, just in case someone else is writing to it
105 action_register.append("Sitemap written to store with url {x}".format(x=url))
107 # remove all but the two latest sitemaps
108 if prune:
109 def sort(filelist):
110 rx = "sitemap__doaj_(.+?)_utf8.xml"
111 return sorted(filelist,
112 key=lambda x: datetime.strptime(re.match(rx, x).groups(1)[0], '%Y%m%d_%H%M'),
113 reverse=True)
115 def _filter(filename):
116 return filename.startswith("sitemap__")
118 action_register += prune_container(mainStore, container_id, sort, filter=_filter, keep=2)
120 # update the ES record to point to the new file
121 # ~~->Cache:Feature~~
122 models.Cache.cache_sitemap(url)
123 return url, action_register