Coverage for portality / core.py: 84%

169 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-05-04 09:41 +0100

1import os 

2import threading 

3import yaml 

4import json 

5 

6from flask import Flask 

7from flask_login import LoginManager 

8from flask_cors import CORS 

9from jinja2 import FileSystemLoader 

10from lxml import etree 

11 

12from portality import settings, constants, datasets 

13from portality.bll import exceptions, DOAJ 

14from portality.error_handler import setup_error_logging 

15from portality.lib import es_data_mapping, dates, paths 

16from portality.ui.debug_toolbar import DoajDebugToolbar 

17from portality.ui import templates 

18 

19import elasticsearch 

20 

21login_manager = LoginManager() 

22 

23 

24@login_manager.user_loader 

25def load_account_for_login_manager(userid): 

26 """ 

27 ~~LoginManager:Feature->Account:Model~~ 

28 :param userid: 

29 :return: 

30 """ 

31 from portality import models 

32 out = models.Account.pull(userid) 

33 return out 

34 

35 

36def create_app(): 

37 """ 

38 ~~CreateApp:Framework->Flask:Technology~~ 

39 :return: 

40 """ 

41 app = Flask(__name__) 

42 # ~~->AppSettings:Config~~ 

43 configure_app(app) 

44 #~~->ErrorHandler:Feature~~ 

45 setup_error_logging(app) 

46 #~~->Jinja2:Environment~~ 

47 setup_jinja(app) 

48 #~~->CrossrefXML:Feature~~ 

49 app.config["LOAD_CROSSREF_THREAD"] = threading.Thread(target=load_crossref_schema, args=(app, ), daemon=True) 

50 app.config["LOAD_CROSSREF_THREAD"].start() 

51 #~~->LoginManager:Feature~~ 

52 login_manager.init_app(app) 

53 #~~->CORS:Framework~~ 

54 CORS(app) 

55 #~~->APM:Feature~~ 

56 initialise_apm(app) 

57 #~~->DebugToolbar:Framework~~ 

58 DoajDebugToolbar(app) 

59 #~~->ProxyFix:Framework~~ 

60 proxyfix(app) 

61 #~~->CMS:Build~~ 

62 build_statics(app) 

63 return app 

64 

65 

66################################################## 

67# Configure the App 

68 

69def configure_app(app): 

70 """ 

71 Configure the DOAJ from: 

72 a) the settings.py file 

73 b) the <env>.cfg file 

74 c) the local secrets config in app.cfg 

75 d) the ansible-generated instance config 

76 

77 instance.cfg is generated by ansible script (server_initial_setup.yml from sysadmin repo) 

78 

79 Later imports have precedence, so e.g. instance.cfg override app.cfg, app.cfg will override the same setting 

80 in production.cfg and settings.py. 

81 """ 

82 

83 # import for settings.py 

84 app.config.from_object(settings) 

85 

86 # import from <env>.cfg 

87 proj_root = paths.get_project_root().as_posix() 

88 app.config['DOAJENV'] = get_app_env(app) 

89 config_path = os.path.join(proj_root, app.config['DOAJENV'] + '.cfg') 

90 print('Running in ' + app.config['DOAJENV']) # the app.logger is not set up yet (?) 

91 if os.path.exists(config_path): 

92 app.config.from_pyfile(config_path) 

93 print('Loaded environment config from ' + config_path) 

94 

95 # import from app.cfg 

96 config_path = os.path.join(proj_root, 'app.cfg') 

97 if os.path.exists(config_path): 

98 app.config.from_pyfile(config_path) 

99 print('Loaded secrets config from ' + config_path) 

100 

101 # import from instance.cfg 

102 instance_path = os.path.join(proj_root, 'instance.cfg') 

103 if os.path.exists(instance_path): 

104 app.config.from_pyfile(instance_path) 

105 print('Loaded instance specific config from ' + instance_path) 

106 

107 

108def get_app_env(app): 

109 if not app.config.get('VALID_ENVIRONMENTS'): 

110 raise Exception('VALID_ENVIRONMENTS must be set in the config. There shouldn\'t be a reason to change it in different set ups, or not have it.') 

111 

112 env = os.getenv('DOAJENV') 

113 if not env: 

114 envpath = paths.rel2abs(__file__, '../.env') 

115 if os.path.exists(envpath): 

116 with open(envpath, 'r') as f: 

117 env = f.readline().strip() 

118 

119 if not env or env not in app.config['VALID_ENVIRONMENTS']: 

120 raise Exception( 

121""" 

122Set the DOAJENV environment variable when running the app, guessing is futile and fraught with peril. 

123DOAJENV=test python portality/app.py 

124to run the app will do. 

125Or use the supervisord options - put this in the config: environment= DOAJENV="test" . 

126 

127Finally, you can create a file called .env with the text e.g. 'dev' in the root of the repo. 

128Recommended only for dev environments so you don't have to bother specifying it each time you run a script or test. 

129 

130Valid values are: {valid_doajenv_vals} 

131 

132You can put environment-specific secret settings in <environment>.cfg , e.g. dev.cfg . 

133 

134The environment specified in the DOAJENV environment variable will override that specified in the 

135application configuration (settings.py or app.cfg). 

136""".format(valid_doajenv_vals=', '.join(app.config['VALID_ENVIRONMENTS'])) 

137 ) 

138 return env 

139 

140 

141################################################ 

142# Crossref setup 

143 

144def load_crossref_schema(app): 

145 """ 

146 ~~CrossrefXML:Feature->CrossrefXML:Schema~~ 

147 :param app: 

148 :return: 

149 """ 

150 schema442_path = app.config["SCHEMAS"].get("crossref442") 

151 schema531_path = app.config["SCHEMAS"].get("crossref531") 

152 

153 if not app.config.get("CROSSREF442_SCHEMA"): 

154 path = schema442_path 

155 try: 

156 schema_doc = etree.parse(schema442_path) 

157 schema = etree.XMLSchema(schema_doc) 

158 app.config["CROSSREF442_SCHEMA"] = schema 

159 except Exception as e: 

160 raise exceptions.IngestException( 

161 message="There was an error attempting to load schema from " + path, inner=e) 

162 

163 if not app.config.get("CROSSREF531_SCHEMA"): 

164 path = schema531_path 

165 try: 

166 schema_doc = etree.parse(schema531_path) 

167 schema = etree.XMLSchema(schema_doc) 

168 app.config["CROSSREF531_SCHEMA"] = schema 

169 except Exception as e: 

170 raise exceptions.IngestException( 

171 message="There was an error attempting to load schema from " + path, inner=e) 

172 

173 

174############################################ 

175# Elasticsearch initialisation 

176 

177def create_es_connection(app): 

178 # ~~ElasticConnection:Framework->Elasticsearch:Technology~~ 

179 

180 conn = elasticsearch.Elasticsearch(app.config['ELASTICSEARCH_HOSTS'], 

181 verify_certs=app.config.get("ELASTIC_SEARCH_VERIFY_CERTS", True), 

182 timeout=app.config.get('ELASTICSEARCH_REQ_TIMEOUT', 15)) 

183 return conn 

184 

185 

186def prepare_type(es_type): 

187 """ Ensure a type has an index correctly prepared - e.g. LCC on app startup """ 

188 expected_alias = app.config['ELASTIC_SEARCH_DB_PREFIX'] + es_type 

189 

190 if not es_connection.indices.exists(expected_alias): 

191 initialise_index(app, es_connection, only_mappings=es_type) 

192 

193 

194def put_mappings(conn, mappings, force_mappings=False): 

195 

196 for key, mapping in iter(mappings.items()): 

197 altered_key = app.config['ELASTIC_SEARCH_DB_PREFIX'] + key 

198 

199 # If the alias exists, we don't automatically create any new indices (app already initialised) 

200 if conn.indices.exists(altered_key): 

201 if force_mappings: 

202 r = conn.indices.put_mapping(index=altered_key, body=mapping.get("mappings"), request_timeout=app.config.get("ES_SOCKET_TIMEOUT", None)) 

203 print("Updating mapping via alias {0} for {1}; status: {2}".format(altered_key, key, r)) 

204 else: 

205 print("Alias {0} already exists for type {1}".format(altered_key, key)) 

206 else: 

207 print("Preparing new index / alias for " + key) 

208 # Set up a new index and corresponding alias 

209 idx_name = altered_key + '-{}'.format(dates.now_str(dates.FMT_DATETIME_LONG)) 

210 

211 try: 

212 resp = es_connection.indices.create(index=idx_name, 

213 body=mapping, 

214 request_timeout=app.config.get("ES_SOCKET_TIMEOUT", None)) 

215 print("Initialised index: {}".format(resp['index'])) 

216 except elasticsearch.exceptions.RequestError as e: 

217 print('Could not create index: ' + str(e)) 

218 

219 # This can be used to throttle the alias creation until the index is definitely there 

220 # but seems to be unnecessary most of the time 

221 # 

222 # import time 

223 # for _ in range(300): # try for up to ~3 seconds 

224 # if es_connection.indices.exists(index=idx_name): 

225 # break 

226 # time.sleep(0.1) 

227 # else: 

228 # raise RuntimeError(f"Index {idx_name} was not found after waiting.") 

229 

230 resp2 = es_connection.indices.put_alias(index=idx_name, name=altered_key) 

231 print("Created alias: {:<25} -> {}, status {}".format(idx_name, altered_key, resp2)) 

232 

233 

234def initialise_index(app, conn, only_mappings=None, force_mappings=False): 

235 """ 

236 ~~InitialiseIndex:Framework->Elasticsearch:Technology~~ 

237 :param app: 

238 :param conn: 

239 :param only_mappings: Init a subset of the index types 

240 :param force_mappings: Put the mapping to an index that already exists 

241 :return: 

242 """ 

243 if not app.config['INITIALISE_INDEX']: 

244 app.logger.warning('INITIALISE_INDEX config var is not True, initialise_index command cannot run') 

245 return 

246 

247 if app.config.get("READ_ONLY_MODE", False) and app.config.get("SCRIPTS_READ_ONLY_MODE", False): 

248 app.logger.warning("System is in READ-ONLY mode, initialise_index command cannot run") 

249 return 

250 

251 # get the app mappings 

252 mappings = es_data_mapping.get_mappings(app) 

253 

254 if only_mappings is not None: 

255 mappings = {key: value for (key, value) in mappings.items() if key in only_mappings} 

256 

257 # Send the mappings to ES 

258 put_mappings(conn, mappings, force_mappings) 

259 

260 

261################################################## 

262# APM 

263 

264def initialise_apm(app): 

265 """ 

266 ~~APM:Feature->ElasticAPM:Technology~~ 

267 :param app: 

268 :return: 

269 """ 

270 if app.config.get('ENABLE_APM', False): 

271 from elasticapm.contrib.flask import ElasticAPM 

272 app.logger.info("Configuring Elastic APM") 

273 apm = ElasticAPM(app, logging=True) 

274 

275 

276################################################## 

277# proxyfix 

278 

279def proxyfix(app): 

280 """ 

281 ~~ProxyFix:Framework~~ 

282 :param app: 

283 :return: 

284 """ 

285 if app.config.get('PROXIED', False): 

286 from werkzeug.middleware.proxy_fix import ProxyFix 

287 app.wsgi_app = ProxyFix(app.wsgi_app, x_proto=1, x_host=1) 

288 

289 

290################################################## 

291# Jinja2 

292 

293def setup_jinja(app): 

294 """ 

295 Jinja2:Environment->Jinja2:Technology 

296 :param app: 

297 :return: 

298 """ 

299 '''Add jinja extensions and other init-time config as needed.''' 

300 

301 app.jinja_env.add_extension('jinja2.ext.do') 

302 app.jinja_env.add_extension('jinja2.ext.loopcontrols') 

303 app.jinja_env.globals['getattr'] = getattr 

304 app.jinja_env.globals['type'] = type 

305 #~~->Constants:Config~~ 

306 app.jinja_env.globals['constants'] = constants 

307 app.jinja_env.globals['templates'] = templates 

308 #~~-> Dates:Library~~ 

309 app.jinja_env.globals['dates'] = dates 

310 #~~->Datasets:Data~~ 

311 app.jinja_env.globals['datasets'] = datasets 

312 # ~~->DOAJ:Service~~ 

313 app.jinja_env.globals['services'] = DOAJ 

314 _load_data(app) 

315 #~~->CMS:DataStore~~ 

316 app.jinja_env.loader = FileSystemLoader([app.config['BASE_FILE_PATH'] + '/templates-v2', 

317 app.config['BASE_FILE_PATH'] + '/templates', 

318 os.path.dirname(app.config['BASE_FILE_PATH']) + '/cms/fragments']) 

319 

320 # a jinja filter that prints to the Flask log 

321 def jinja_debug(text): 

322 print(text) 

323 return '' 

324 app.jinja_env.filters['debug']=jinja_debug 

325 

326 # a jinja filter that parses a string to json 

327 def parse_json(value): 

328 return json.loads(value) if isinstance(value, str) else value 

329 

330 app.jinja_env.filters['parse_json'] = parse_json 

331 

332 

333def _load_data(app): 

334 if not "data" in app.jinja_env.globals: 

335 app.jinja_env.globals["data"] = {} 

336 datadir = os.path.join(app.config["BASE_FILE_PATH"], "..", "cms", "data") 

337 for datafile in os.listdir(datadir): 

338 with open(os.path.join(datadir, datafile)) as f: 

339 data = yaml.load(f, Loader=yaml.FullLoader) 

340 dataname = datafile.split(".")[0] 

341 dataname = dataname.replace("-", "_") 

342 app.jinja_env.globals["data"][dataname] = data 

343 

344 

345################################################## 

346# Static Content 

347 

348def build_statics(app): 

349 """ 

350 ~~CMS:Build->CMSFragments:Build~~ 

351 ~~->CMSSASS:Build~~ 

352 :param app: 

353 :return: 

354 """ 

355 if not app.config.get("CMS_BUILD_ASSETS_ON_STARTUP", False): 

356 return 

357 from portality.cms import build_fragments, build_sass 

358 

359 base_path = paths.get_project_root().as_posix() 

360 

361 print("Compiling static content") 

362 build_fragments.build(base_path) 

363 print("Compiling main SASS") 

364 build_sass.build(build_sass.MAIN_SETTINGS, base_path=base_path) 

365 

366 

367app = create_app() 

368es_connection = create_es_connection(app)