Coverage for portality / view / status.py: 14%

118 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-05-05 00:09 +0100

1import json 

2import math 

3import os 

4import time 

5 

6import requests 

7from flask import Blueprint, make_response, url_for 

8 

9from portality import util 

10from portality.bll import DOAJ 

11from portality.core import app 

12 

13blueprint = Blueprint('status', __name__) 

14 

15 

16@blueprint.route('/stats') 

17@util.jsonp 

18def stats(): 

19 res = {} 

20 

21 # Get inode use 

22 try: 

23 st = os.statvfs('/') 

24 res['inode_used_pc'] = int((float(st.f_files-st.f_ffree)/st.f_files)*100) 

25 # could complete this by installing and using psutil but as disk and memory can currently  

26 # be monitored directly by DO, no current need - can change if we move from DO 

27 #res['disk_used_pc'] = int((float(st.f_blocks-st.f_bavail)/st.f_blocks)*100) 

28 #res['memory_used_pc'] = 0 

29 except: 

30 pass 

31 

32 # Test writing to filesystem 

33 ts = int(time.time()) 

34 fn = '/tmp/status_test_write_' + str(ts) + '.txt' 

35 try: 

36 f = open(fn, "w") 

37 f.write("I am a test at " + str(ts)) 

38 f.close() 

39 res['writable'] = True 

40 except: 

41 res['writable'] = False 

42 try: 

43 os.remove(fn) 

44 except: 

45 pass 

46 

47 # Retrieve the hostname 

48 try: 

49 hn = os.uname()[1] 

50 res['host'] = hn 

51 except: 

52 pass 

53 

54 # Return a JSON response 

55 resp = make_response(json.dumps(res)) 

56 resp.mimetype = "application/json" 

57 return resp 

58 

59 

60@blueprint.route('/') 

61@util.jsonp 

62def status(): 

63 res = {'stable': True, 'ping': {'apps': {}, 'indices': {}}, 'background': {'status': 'Background jobs are stable', 'info': []}, 'notes': []} 

64 

65 # to get monitoring on this, use uptime robot or similar to check that the status page  

66 # contains the 'stable': True string and the following note strings 

67 

68 app_note = 'apps reachable' 

69 app_unreachable = 0 

70 inodes_note = 'inode use on app machines below 95%' 

71 inodes_high = 0 

72 writable_note = 'app machines can write to disk' 

73 not_writable = 0 

74 #disk_note = 'disk use on app machines below 95%' 

75 #disk_high = 0 

76 #memory_note = 'memory use on app machines below 95%' 

77 #memory_high = 0 

78 es_note = 'indexes stable' 

79 es_unreachable = 0 

80 indexable_note = 'index accepts index/delete operations' 

81 cluster_note = 'cluster stable' 

82 

83 for addr in app.config.get('APP_MACHINES_INTERNAL_IPS', []): 

84 if not addr.startswith('http'): addr = 'http://' + addr 

85 addr += url_for('.stats') 

86 try: 

87 r = requests.get(addr) 

88 except ConnectionError: 

89 app_note = "UNREACHABLE: " + addr 

90 continue 

91 res['ping']['apps'][addr] = r.status_code if r.status_code != 200 else r.json() 

92 try: 

93 if res['ping']['apps'][addr].get('inode_used_pc',0) >= 95: 

94 inodes_high += 1 

95 inodes_note = 'INODE GREATER THAN 95% ON ' + str(inodes_high) + ' APP MACHINES' 

96 if res['ping']['apps'][addr].get('writable',False) != True: 

97 not_writable += 1 

98 writable_note = 'WRITE FAILURE ON ' + str(not_writable) + ' APP MACHINES' 

99 #if res['ping']['apps'][addr].get('disk_used_pc',0) >= 95: 

100 # disk_high += 1 

101 # disk_note = 'DISK USE GREATER THAN 95% ON ' + disk_high + ' APP MACHINES' 

102 #if res['ping']['apps'][addr].get('memory_used_pc',0) >= 95: 

103 # memory_high += 1 

104 # memory_note = 'MEMORY USE GREATER THAN 95% ON ' + memory_high + ' APP MACHINES' 

105 except: 

106 pass 

107 if r.status_code != 200: 

108 res['stable'] = False 

109 app_unreachable += 1 

110 app_note = str(app_unreachable) + ' APPS UNREACHABLE' 

111 res['notes'].append(app_note) 

112 res['notes'].append(inodes_note) 

113 res['notes'].append(writable_note) 

114 #res['notes'].append(disk_note) 

115 #res['notes'].append(memory_note) 

116 

117 # check that all necessary ES nodes can actually be pinged from this machine 

118 for eddr in app.config['ELASTICSEARCH_HOSTS']: 

119 es_eddr = eddr 

120 if not isinstance(eddr, str): 

121 es_addr = f'http://{eddr["host"]}:{eddr["port"]}' 

122 try: 

123 r = requests.get(es_addr, timeout=3) 

124 res['ping']['indices'][es_addr] = r.status_code 

125 res['stable'] = r.status_code == 200 

126 

127 if r.status_code != 200: 

128 raise Exception('ES is not OK - status is {}'.format(r.status_code)) 

129 except Exception as e: 

130 res['stable'] = False 

131 es_unreachable += 1 

132 es_note = str(es_unreachable) + ' INDEXES UNREACHABLE' 

133 res['notes'].append(es_note) 

134 

135 # query ES for cluster health and nodes up (uses second ES host in config) 

136 try: 

137 es = requests.get(es_addr + '/_stats').json() 

138 res['index'] = { 'cluster': {}, 'shards': { 'total': es['_shards']['total'], 'successful': es['_shards']['successful'] }, 'indices': {} } 

139 for k, v in es['indices'].items(): 

140 res['index']['indices'][k] = { 'docs': v['primaries']['docs']['count'], 'size': int(math.ceil(v['primaries']['store']['size_in_bytes']) / 1024 / 1024) } 

141 try: 

142 ces = requests.get(es_addr + '/_cluster/health') 

143 res['index']['cluster'] = ces.json() 

144 res['stable'] = res['index']['cluster']['status'] == 'green' 

145 if res['index']['cluster']['status'] != 'green': cluster_note = 'CLUSTER UNSTABLE' 

146 except: 

147 res['stable'] = False 

148 cluster_note = 'CLUSTER UNSTABLE' 

149 except: 

150 res['stable'] = False 

151 cluster_note = 'CLUSTER UNSTABLE' 

152 res['notes'].append(cluster_note) 

153 

154 # check background jobs 

155 # ~~BackgroundTask:Monitoring~~ 

156 bgtask_status_service = DOAJ.backgroundTaskStatusService() 

157 res['background'] = bgtask_status_service.create_background_status() 

158 if not bgtask_status_service.is_stable(res['background'].get('status')): 

159 res['stable'] = False 

160 

161 resp = make_response(json.dumps(res)) 

162 resp.mimetype = "application/json" 

163 return resp