Coverage for portality / core.py: 84%
169 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-04 09:41 +0100
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-04 09:41 +0100
1import os
2import threading
3import yaml
4import json
6from flask import Flask
7from flask_login import LoginManager
8from flask_cors import CORS
9from jinja2 import FileSystemLoader
10from lxml import etree
12from portality import settings, constants, datasets
13from portality.bll import exceptions, DOAJ
14from portality.error_handler import setup_error_logging
15from portality.lib import es_data_mapping, dates, paths
16from portality.ui.debug_toolbar import DoajDebugToolbar
17from portality.ui import templates
19import elasticsearch
21login_manager = LoginManager()
24@login_manager.user_loader
25def load_account_for_login_manager(userid):
26 """
27 ~~LoginManager:Feature->Account:Model~~
28 :param userid:
29 :return:
30 """
31 from portality import models
32 out = models.Account.pull(userid)
33 return out
36def create_app():
37 """
38 ~~CreateApp:Framework->Flask:Technology~~
39 :return:
40 """
41 app = Flask(__name__)
42 # ~~->AppSettings:Config~~
43 configure_app(app)
44 #~~->ErrorHandler:Feature~~
45 setup_error_logging(app)
46 #~~->Jinja2:Environment~~
47 setup_jinja(app)
48 #~~->CrossrefXML:Feature~~
49 app.config["LOAD_CROSSREF_THREAD"] = threading.Thread(target=load_crossref_schema, args=(app, ), daemon=True)
50 app.config["LOAD_CROSSREF_THREAD"].start()
51 #~~->LoginManager:Feature~~
52 login_manager.init_app(app)
53 #~~->CORS:Framework~~
54 CORS(app)
55 #~~->APM:Feature~~
56 initialise_apm(app)
57 #~~->DebugToolbar:Framework~~
58 DoajDebugToolbar(app)
59 #~~->ProxyFix:Framework~~
60 proxyfix(app)
61 #~~->CMS:Build~~
62 build_statics(app)
63 return app
66##################################################
67# Configure the App
69def configure_app(app):
70 """
71 Configure the DOAJ from:
72 a) the settings.py file
73 b) the <env>.cfg file
74 c) the local secrets config in app.cfg
75 d) the ansible-generated instance config
77 instance.cfg is generated by ansible script (server_initial_setup.yml from sysadmin repo)
79 Later imports have precedence, so e.g. instance.cfg override app.cfg, app.cfg will override the same setting
80 in production.cfg and settings.py.
81 """
83 # import for settings.py
84 app.config.from_object(settings)
86 # import from <env>.cfg
87 proj_root = paths.get_project_root().as_posix()
88 app.config['DOAJENV'] = get_app_env(app)
89 config_path = os.path.join(proj_root, app.config['DOAJENV'] + '.cfg')
90 print('Running in ' + app.config['DOAJENV']) # the app.logger is not set up yet (?)
91 if os.path.exists(config_path):
92 app.config.from_pyfile(config_path)
93 print('Loaded environment config from ' + config_path)
95 # import from app.cfg
96 config_path = os.path.join(proj_root, 'app.cfg')
97 if os.path.exists(config_path):
98 app.config.from_pyfile(config_path)
99 print('Loaded secrets config from ' + config_path)
101 # import from instance.cfg
102 instance_path = os.path.join(proj_root, 'instance.cfg')
103 if os.path.exists(instance_path):
104 app.config.from_pyfile(instance_path)
105 print('Loaded instance specific config from ' + instance_path)
108def get_app_env(app):
109 if not app.config.get('VALID_ENVIRONMENTS'):
110 raise Exception('VALID_ENVIRONMENTS must be set in the config. There shouldn\'t be a reason to change it in different set ups, or not have it.')
112 env = os.getenv('DOAJENV')
113 if not env:
114 envpath = paths.rel2abs(__file__, '../.env')
115 if os.path.exists(envpath):
116 with open(envpath, 'r') as f:
117 env = f.readline().strip()
119 if not env or env not in app.config['VALID_ENVIRONMENTS']:
120 raise Exception(
121"""
122Set the DOAJENV environment variable when running the app, guessing is futile and fraught with peril.
123DOAJENV=test python portality/app.py
124to run the app will do.
125Or use the supervisord options - put this in the config: environment= DOAJENV="test" .
127Finally, you can create a file called .env with the text e.g. 'dev' in the root of the repo.
128Recommended only for dev environments so you don't have to bother specifying it each time you run a script or test.
130Valid values are: {valid_doajenv_vals}
132You can put environment-specific secret settings in <environment>.cfg , e.g. dev.cfg .
134The environment specified in the DOAJENV environment variable will override that specified in the
135application configuration (settings.py or app.cfg).
136""".format(valid_doajenv_vals=', '.join(app.config['VALID_ENVIRONMENTS']))
137 )
138 return env
141################################################
142# Crossref setup
144def load_crossref_schema(app):
145 """
146 ~~CrossrefXML:Feature->CrossrefXML:Schema~~
147 :param app:
148 :return:
149 """
150 schema442_path = app.config["SCHEMAS"].get("crossref442")
151 schema531_path = app.config["SCHEMAS"].get("crossref531")
153 if not app.config.get("CROSSREF442_SCHEMA"):
154 path = schema442_path
155 try:
156 schema_doc = etree.parse(schema442_path)
157 schema = etree.XMLSchema(schema_doc)
158 app.config["CROSSREF442_SCHEMA"] = schema
159 except Exception as e:
160 raise exceptions.IngestException(
161 message="There was an error attempting to load schema from " + path, inner=e)
163 if not app.config.get("CROSSREF531_SCHEMA"):
164 path = schema531_path
165 try:
166 schema_doc = etree.parse(schema531_path)
167 schema = etree.XMLSchema(schema_doc)
168 app.config["CROSSREF531_SCHEMA"] = schema
169 except Exception as e:
170 raise exceptions.IngestException(
171 message="There was an error attempting to load schema from " + path, inner=e)
174############################################
175# Elasticsearch initialisation
177def create_es_connection(app):
178 # ~~ElasticConnection:Framework->Elasticsearch:Technology~~
180 conn = elasticsearch.Elasticsearch(app.config['ELASTICSEARCH_HOSTS'],
181 verify_certs=app.config.get("ELASTIC_SEARCH_VERIFY_CERTS", True),
182 timeout=app.config.get('ELASTICSEARCH_REQ_TIMEOUT', 15))
183 return conn
186def prepare_type(es_type):
187 """ Ensure a type has an index correctly prepared - e.g. LCC on app startup """
188 expected_alias = app.config['ELASTIC_SEARCH_DB_PREFIX'] + es_type
190 if not es_connection.indices.exists(expected_alias):
191 initialise_index(app, es_connection, only_mappings=es_type)
194def put_mappings(conn, mappings, force_mappings=False):
196 for key, mapping in iter(mappings.items()):
197 altered_key = app.config['ELASTIC_SEARCH_DB_PREFIX'] + key
199 # If the alias exists, we don't automatically create any new indices (app already initialised)
200 if conn.indices.exists(altered_key):
201 if force_mappings:
202 r = conn.indices.put_mapping(index=altered_key, body=mapping.get("mappings"), request_timeout=app.config.get("ES_SOCKET_TIMEOUT", None))
203 print("Updating mapping via alias {0} for {1}; status: {2}".format(altered_key, key, r))
204 else:
205 print("Alias {0} already exists for type {1}".format(altered_key, key))
206 else:
207 print("Preparing new index / alias for " + key)
208 # Set up a new index and corresponding alias
209 idx_name = altered_key + '-{}'.format(dates.now_str(dates.FMT_DATETIME_LONG))
211 try:
212 resp = es_connection.indices.create(index=idx_name,
213 body=mapping,
214 request_timeout=app.config.get("ES_SOCKET_TIMEOUT", None))
215 print("Initialised index: {}".format(resp['index']))
216 except elasticsearch.exceptions.RequestError as e:
217 print('Could not create index: ' + str(e))
219 # This can be used to throttle the alias creation until the index is definitely there
220 # but seems to be unnecessary most of the time
221 #
222 # import time
223 # for _ in range(300): # try for up to ~3 seconds
224 # if es_connection.indices.exists(index=idx_name):
225 # break
226 # time.sleep(0.1)
227 # else:
228 # raise RuntimeError(f"Index {idx_name} was not found after waiting.")
230 resp2 = es_connection.indices.put_alias(index=idx_name, name=altered_key)
231 print("Created alias: {:<25} -> {}, status {}".format(idx_name, altered_key, resp2))
234def initialise_index(app, conn, only_mappings=None, force_mappings=False):
235 """
236 ~~InitialiseIndex:Framework->Elasticsearch:Technology~~
237 :param app:
238 :param conn:
239 :param only_mappings: Init a subset of the index types
240 :param force_mappings: Put the mapping to an index that already exists
241 :return:
242 """
243 if not app.config['INITIALISE_INDEX']:
244 app.logger.warning('INITIALISE_INDEX config var is not True, initialise_index command cannot run')
245 return
247 if app.config.get("READ_ONLY_MODE", False) and app.config.get("SCRIPTS_READ_ONLY_MODE", False):
248 app.logger.warning("System is in READ-ONLY mode, initialise_index command cannot run")
249 return
251 # get the app mappings
252 mappings = es_data_mapping.get_mappings(app)
254 if only_mappings is not None:
255 mappings = {key: value for (key, value) in mappings.items() if key in only_mappings}
257 # Send the mappings to ES
258 put_mappings(conn, mappings, force_mappings)
261##################################################
262# APM
264def initialise_apm(app):
265 """
266 ~~APM:Feature->ElasticAPM:Technology~~
267 :param app:
268 :return:
269 """
270 if app.config.get('ENABLE_APM', False):
271 from elasticapm.contrib.flask import ElasticAPM
272 app.logger.info("Configuring Elastic APM")
273 apm = ElasticAPM(app, logging=True)
276##################################################
277# proxyfix
279def proxyfix(app):
280 """
281 ~~ProxyFix:Framework~~
282 :param app:
283 :return:
284 """
285 if app.config.get('PROXIED', False):
286 from werkzeug.middleware.proxy_fix import ProxyFix
287 app.wsgi_app = ProxyFix(app.wsgi_app, x_proto=1, x_host=1)
290##################################################
291# Jinja2
293def setup_jinja(app):
294 """
295 Jinja2:Environment->Jinja2:Technology
296 :param app:
297 :return:
298 """
299 '''Add jinja extensions and other init-time config as needed.'''
301 app.jinja_env.add_extension('jinja2.ext.do')
302 app.jinja_env.add_extension('jinja2.ext.loopcontrols')
303 app.jinja_env.globals['getattr'] = getattr
304 app.jinja_env.globals['type'] = type
305 #~~->Constants:Config~~
306 app.jinja_env.globals['constants'] = constants
307 app.jinja_env.globals['templates'] = templates
308 #~~-> Dates:Library~~
309 app.jinja_env.globals['dates'] = dates
310 #~~->Datasets:Data~~
311 app.jinja_env.globals['datasets'] = datasets
312 # ~~->DOAJ:Service~~
313 app.jinja_env.globals['services'] = DOAJ
314 _load_data(app)
315 #~~->CMS:DataStore~~
316 app.jinja_env.loader = FileSystemLoader([app.config['BASE_FILE_PATH'] + '/templates-v2',
317 app.config['BASE_FILE_PATH'] + '/templates',
318 os.path.dirname(app.config['BASE_FILE_PATH']) + '/cms/fragments'])
320 # a jinja filter that prints to the Flask log
321 def jinja_debug(text):
322 print(text)
323 return ''
324 app.jinja_env.filters['debug']=jinja_debug
326 # a jinja filter that parses a string to json
327 def parse_json(value):
328 return json.loads(value) if isinstance(value, str) else value
330 app.jinja_env.filters['parse_json'] = parse_json
333def _load_data(app):
334 if not "data" in app.jinja_env.globals:
335 app.jinja_env.globals["data"] = {}
336 datadir = os.path.join(app.config["BASE_FILE_PATH"], "..", "cms", "data")
337 for datafile in os.listdir(datadir):
338 with open(os.path.join(datadir, datafile)) as f:
339 data = yaml.load(f, Loader=yaml.FullLoader)
340 dataname = datafile.split(".")[0]
341 dataname = dataname.replace("-", "_")
342 app.jinja_env.globals["data"][dataname] = data
345##################################################
346# Static Content
348def build_statics(app):
349 """
350 ~~CMS:Build->CMSFragments:Build~~
351 ~~->CMSSASS:Build~~
352 :param app:
353 :return:
354 """
355 if not app.config.get("CMS_BUILD_ASSETS_ON_STARTUP", False):
356 return
357 from portality.cms import build_fragments, build_sass
359 base_path = paths.get_project_root().as_posix()
361 print("Compiling static content")
362 build_fragments.build(base_path)
363 print("Compiling main SASS")
364 build_sass.build(build_sass.MAIN_SETTINGS, base_path=base_path)
367app = create_app()
368es_connection = create_es_connection(app)