Coverage for portality/view/status.py: 14%

1import json

2import math

3import os

4import time

6import requests

7from flask import Blueprint, make_response, url_for

9from portality import util

10from portality.bll import DOAJ

11from portality.core import app

13blueprint = Blueprint('status', __name__)

16@blueprint.route('/stats')

17@util.jsonp

18def stats():

19 res = {}

21 # Get inode use

22 try:

23 st = os.statvfs('/')

24 res['inode_used_pc'] = int((float(st.f_files-st.f_ffree)/st.f_files)*100)

25 # could complete this by installing and using psutil but as disk and memory can currently

26 # be monitored directly by DO, no current need - can change if we move from DO

27 #res['disk_used_pc'] = int((float(st.f_blocks-st.f_bavail)/st.f_blocks)*100)

28 #res['memory_used_pc'] = 0

29 except:

30 pass

32 # Test writing to filesystem

33 ts = int(time.time())

34 fn = '/tmp/status_test_write_' + str(ts) + '.txt'

35 try:

36 f = open(fn, "w")

37 f.write("I am a test at " + str(ts))

38 f.close()

39 res['writable'] = True

40 except:

41 res['writable'] = False

42 try:

43 os.remove(fn)

44 except:

45 pass

47 # Retrieve the hostname

48 try:

49 hn = os.uname()[1]

50 res['host'] = hn

51 except:

52 pass

54 # Return a JSON response

55 resp = make_response(json.dumps(res))

56 resp.mimetype = "application/json"

57 return resp

60@blueprint.route('/')

61@util.jsonp

62def status():

63 res = {'stable': True, 'ping': {'apps': {}, 'indices': {}}, 'background': {'status': 'Background jobs are stable', 'info': []}, 'notes': []}

65 # to get monitoring on this, use uptime robot or similar to check that the status page

66 # contains the 'stable': True string and the following note strings

68 app_note = 'apps reachable'

69 app_unreachable = 0

70 inodes_note = 'inode use on app machines below 95%'

71 inodes_high = 0

72 writable_note = 'app machines can write to disk'

73 not_writable = 0

74 #disk_note = 'disk use on app machines below 95%'

75 #disk_high = 0

76 #memory_note = 'memory use on app machines below 95%'

77 #memory_high = 0

78 es_note = 'indexes stable'

79 es_unreachable = 0

80 indexable_note = 'index accepts index/delete operations'

81 cluster_note = 'cluster stable'

83 for addr in app.config.get('APP_MACHINES_INTERNAL_IPS', []):

84 if not addr.startswith('http'): addr = 'http://' + addr

85 addr += url_for('.stats')

86 try:

87 r = requests.get(addr)

88 except ConnectionError:

89 app_note = "UNREACHABLE: " + addr

90 continue

91 res['ping']['apps'][addr] = r.status_code if r.status_code != 200 else r.json()

92 try:

93 if res['ping']['apps'][addr].get('inode_used_pc',0) >= 95:

94 inodes_high += 1

95 inodes_note = 'INODE GREATER THAN 95% ON ' + str(inodes_high) + ' APP MACHINES'

96 if res['ping']['apps'][addr].get('writable',False) != True:

97 not_writable += 1

98 writable_note = 'WRITE FAILURE ON ' + str(not_writable) + ' APP MACHINES'

99 #if res['ping']['apps'][addr].get('disk_used_pc',0) >= 95:

100 # disk_high += 1

101 # disk_note = 'DISK USE GREATER THAN 95% ON ' + disk_high + ' APP MACHINES'

102 #if res['ping']['apps'][addr].get('memory_used_pc',0) >= 95:

103 # memory_high += 1

104 # memory_note = 'MEMORY USE GREATER THAN 95% ON ' + memory_high + ' APP MACHINES'

105 except:

106 pass

107 if r.status_code != 200:

108 res['stable'] = False

109 app_unreachable += 1

110 app_note = str(app_unreachable) + ' APPS UNREACHABLE'

111 res['notes'].append(app_note)

112 res['notes'].append(inodes_note)

113 res['notes'].append(writable_note)

114 #res['notes'].append(disk_note)

115 #res['notes'].append(memory_note)

116

117 # check that all necessary ES nodes can actually be pinged from this machine

118 for eddr in app.config['ELASTICSEARCH_HOSTS']:

119 es_eddr = eddr

120 if not isinstance(eddr, str):

121 es_addr = f'http://{eddr["host"]}:{eddr["port"]}'

122 try:

123 r = requests.get(es_addr, timeout=3)

124 res['ping']['indices'][es_addr] = r.status_code

125 res['stable'] = r.status_code == 200

126

127 if r.status_code != 200:

128 raise Exception('ES is not OK - status is {}'.format(r.status_code))

129 except Exception as e:

130 res['stable'] = False

131 es_unreachable += 1

132 es_note = str(es_unreachable) + ' INDEXES UNREACHABLE'

133 res['notes'].append(es_note)

134

135 # query ES for cluster health and nodes up (uses second ES host in config)

136 try:

137 es = requests.get(es_addr + '/_stats').json()

138 res['index'] = { 'cluster': {}, 'shards': { 'total': es['_shards']['total'], 'successful': es['_shards']['successful'] }, 'indices': {} }

139 for k, v in es['indices'].items():

140 res['index']['indices'][k] = { 'docs': v['primaries']['docs']['count'], 'size': int(math.ceil(v['primaries']['store']['size_in_bytes']) / 1024 / 1024) }

141 try:

142 ces = requests.get(es_addr + '/_cluster/health')

143 res['index']['cluster'] = ces.json()

144 res['stable'] = res['index']['cluster']['status'] == 'green'

145 if res['index']['cluster']['status'] != 'green': cluster_note = 'CLUSTER UNSTABLE'

146 except:

147 res['stable'] = False

148 cluster_note = 'CLUSTER UNSTABLE'

149 except:

150 res['stable'] = False

151 cluster_note = 'CLUSTER UNSTABLE'

152 res['notes'].append(cluster_note)

153

154 # check background jobs

155 # ~~BackgroundTask:Monitoring~~

156 bgtask_status_service = DOAJ.backgroundTaskStatusService()

157 res['background'] = bgtask_status_service.create_background_status()

158 if not bgtask_status_service.is_stable(res['background'].get('status')):

159 res['stable'] = False

160

161 resp = make_response(json.dumps(res))

162 resp.mimetype = "application/json"

163 return resp

Coverage for portality / view / status.py: 14%

118 statements