Coverage for portality / view / status.py: 14%
118 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-04 09:41 +0100
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-04 09:41 +0100
1import json
2import math
3import os
4import time
6import requests
7from flask import Blueprint, make_response, url_for
9from portality import util
10from portality.bll import DOAJ
11from portality.core import app
13blueprint = Blueprint('status', __name__)
16@blueprint.route('/stats')
17@util.jsonp
18def stats():
19 res = {}
21 # Get inode use
22 try:
23 st = os.statvfs('/')
24 res['inode_used_pc'] = int((float(st.f_files-st.f_ffree)/st.f_files)*100)
25 # could complete this by installing and using psutil but as disk and memory can currently
26 # be monitored directly by DO, no current need - can change if we move from DO
27 #res['disk_used_pc'] = int((float(st.f_blocks-st.f_bavail)/st.f_blocks)*100)
28 #res['memory_used_pc'] = 0
29 except:
30 pass
32 # Test writing to filesystem
33 ts = int(time.time())
34 fn = '/tmp/status_test_write_' + str(ts) + '.txt'
35 try:
36 f = open(fn, "w")
37 f.write("I am a test at " + str(ts))
38 f.close()
39 res['writable'] = True
40 except:
41 res['writable'] = False
42 try:
43 os.remove(fn)
44 except:
45 pass
47 # Retrieve the hostname
48 try:
49 hn = os.uname()[1]
50 res['host'] = hn
51 except:
52 pass
54 # Return a JSON response
55 resp = make_response(json.dumps(res))
56 resp.mimetype = "application/json"
57 return resp
60@blueprint.route('/')
61@util.jsonp
62def status():
63 res = {'stable': True, 'ping': {'apps': {}, 'indices': {}}, 'background': {'status': 'Background jobs are stable', 'info': []}, 'notes': []}
65 # to get monitoring on this, use uptime robot or similar to check that the status page
66 # contains the 'stable': True string and the following note strings
68 app_note = 'apps reachable'
69 app_unreachable = 0
70 inodes_note = 'inode use on app machines below 95%'
71 inodes_high = 0
72 writable_note = 'app machines can write to disk'
73 not_writable = 0
74 #disk_note = 'disk use on app machines below 95%'
75 #disk_high = 0
76 #memory_note = 'memory use on app machines below 95%'
77 #memory_high = 0
78 es_note = 'indexes stable'
79 es_unreachable = 0
80 indexable_note = 'index accepts index/delete operations'
81 cluster_note = 'cluster stable'
83 for addr in app.config.get('APP_MACHINES_INTERNAL_IPS', []):
84 if not addr.startswith('http'): addr = 'http://' + addr
85 addr += url_for('.stats')
86 try:
87 r = requests.get(addr)
88 except ConnectionError:
89 app_note = "UNREACHABLE: " + addr
90 continue
91 res['ping']['apps'][addr] = r.status_code if r.status_code != 200 else r.json()
92 try:
93 if res['ping']['apps'][addr].get('inode_used_pc',0) >= 95:
94 inodes_high += 1
95 inodes_note = 'INODE GREATER THAN 95% ON ' + str(inodes_high) + ' APP MACHINES'
96 if res['ping']['apps'][addr].get('writable',False) != True:
97 not_writable += 1
98 writable_note = 'WRITE FAILURE ON ' + str(not_writable) + ' APP MACHINES'
99 #if res['ping']['apps'][addr].get('disk_used_pc',0) >= 95:
100 # disk_high += 1
101 # disk_note = 'DISK USE GREATER THAN 95% ON ' + disk_high + ' APP MACHINES'
102 #if res['ping']['apps'][addr].get('memory_used_pc',0) >= 95:
103 # memory_high += 1
104 # memory_note = 'MEMORY USE GREATER THAN 95% ON ' + memory_high + ' APP MACHINES'
105 except:
106 pass
107 if r.status_code != 200:
108 res['stable'] = False
109 app_unreachable += 1
110 app_note = str(app_unreachable) + ' APPS UNREACHABLE'
111 res['notes'].append(app_note)
112 res['notes'].append(inodes_note)
113 res['notes'].append(writable_note)
114 #res['notes'].append(disk_note)
115 #res['notes'].append(memory_note)
117 # check that all necessary ES nodes can actually be pinged from this machine
118 for eddr in app.config['ELASTICSEARCH_HOSTS']:
119 es_eddr = eddr
120 if not isinstance(eddr, str):
121 es_addr = f'http://{eddr["host"]}:{eddr["port"]}'
122 try:
123 r = requests.get(es_addr, timeout=3)
124 res['ping']['indices'][es_addr] = r.status_code
125 res['stable'] = r.status_code == 200
127 if r.status_code != 200:
128 raise Exception('ES is not OK - status is {}'.format(r.status_code))
129 except Exception as e:
130 res['stable'] = False
131 es_unreachable += 1
132 es_note = str(es_unreachable) + ' INDEXES UNREACHABLE'
133 res['notes'].append(es_note)
135 # query ES for cluster health and nodes up (uses second ES host in config)
136 try:
137 es = requests.get(es_addr + '/_stats').json()
138 res['index'] = { 'cluster': {}, 'shards': { 'total': es['_shards']['total'], 'successful': es['_shards']['successful'] }, 'indices': {} }
139 for k, v in es['indices'].items():
140 res['index']['indices'][k] = { 'docs': v['primaries']['docs']['count'], 'size': int(math.ceil(v['primaries']['store']['size_in_bytes']) / 1024 / 1024) }
141 try:
142 ces = requests.get(es_addr + '/_cluster/health')
143 res['index']['cluster'] = ces.json()
144 res['stable'] = res['index']['cluster']['status'] == 'green'
145 if res['index']['cluster']['status'] != 'green': cluster_note = 'CLUSTER UNSTABLE'
146 except:
147 res['stable'] = False
148 cluster_note = 'CLUSTER UNSTABLE'
149 except:
150 res['stable'] = False
151 cluster_note = 'CLUSTER UNSTABLE'
152 res['notes'].append(cluster_note)
154 # check background jobs
155 # ~~BackgroundTask:Monitoring~~
156 bgtask_status_service = DOAJ.backgroundTaskStatusService()
157 res['background'] = bgtask_status_service.create_background_status()
158 if not bgtask_status_service.is_stable(res['background'].get('status')):
159 res['stable'] = False
161 resp = make_response(json.dumps(res))
162 resp.mimetype = "application/json"
163 return resp