check_nagiostats.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321
  1. #!/usr/bin/env python
  2. # -*- encoding: utf-8 -*-
  3. #####################################################################
  4. # (c) 2005-2011 by Sven Velt and team(ix) GmbH, Nuernberg, Germany #
  5. # sv@teamix.net #
  6. # (c) 2016 by Sven Velt, Germany #
  7. # sven-mymonplugins@velt.biz #
  8. # #
  9. # This file is part of "velt.biz - My Monitoring Plugins" #
  10. # a fork of "team(ix) Monitoring Plugins" in 2015 #
  11. # URL: https://gogs.velt.biz/velt.biz/MyMonPlugins/ #
  12. # #
  13. # This file is free software: you can redistribute it and/or modify #
  14. # it under the terms of the GNU General Public License as published #
  15. # by the Free Software Foundation, either version 2 of the License, #
  16. # or (at your option) any later version. #
  17. # #
  18. # This file is distributed in the hope that it will be useful, but #
  19. # WITHOUT ANY WARRANTY; without even the implied warranty of #
  20. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
  21. # GNU General Public License for more details. #
  22. # #
  23. # You should have received a copy of the GNU General Public License #
  24. # along with this file. If not, see <http://www.gnu.org/licenses/>. #
  25. #####################################################################
  26. import os
  27. import shlex
  28. import subprocess
  29. import sys
  30. try:
  31. from monitoringplugin import MonitoringPlugin
  32. except ImportError:
  33. print '=========================='
  34. print 'AIKS! Python import error!'
  35. print '==========================\n'
  36. print 'Could not find "monitoringplugin.py"!\n'
  37. print 'Did you download "%s"' % os.path.basename(sys.argv[0])
  38. print 'without "monitoringplugin.py"?\n'
  39. print 'Please go back to'
  40. print 'https://gogs.velt.biz/velt.biz/MyMonPlugins and download it,'
  41. print 'or even better:'
  42. print 'get a full archive at http://gogs.velt.biz/velt.biz/MyMonPlugins/releases'
  43. print 'or a master snapshot at http://gogs.velt.biz/velt.biz/MyMonPlugins/archive/master.tar.gz\n'
  44. sys.exit(127)
  45. plugin = MonitoringPlugin(pluginname='check_nagiostats', tagforstatusline='NAGIOSTATS', description='Check Nagios statistics', version='0.1')
  46. NAGIOSTATSs = ['/usr/sbin/nagios3stats', '/usr/local/nagios/bin/nagiostats']
  47. VARs = {
  48. 'PROGRUNTIME': { 'type':str, },
  49. 'PROGRUNTIMETT': { 'type':long, 'unit':'', },
  50. 'STATUSFILEAGE': { 'type':str, },
  51. 'STATUSFILEAGETT': { 'type':long, 'unit':'', },
  52. 'NAGIOSVERSION': { 'type':str, },
  53. 'NAGIOSPID': { 'type':str, },
  54. 'NAGIOSVERPID': { 'type':str, },
  55. 'TOTCMDBUF': { 'type':long, 'unit':'', },
  56. 'USEDCMDBUF': { 'type':long, 'unit':'', },
  57. 'HIGHCMDBUF': { 'type':long, 'unit':'', },
  58. 'NUMSERVICES': { 'type':long, 'unit':'', },
  59. 'NUMSVCOK': { 'type':long, 'unit':'', },
  60. 'NUMSVCWARN': { 'type':long, 'unit':'', },
  61. 'NUMSVCUNKN': { 'type':long, 'unit':'', },
  62. 'NUMSVCCRIT': { 'type':long, 'unit':'', },
  63. 'NUMSVCPROB': { 'type':long, 'unit':'', },
  64. 'NUMSVCCHECKED': { 'type':long, 'unit':'', },
  65. 'NUMSVCSCHEDULED': { 'type':long, 'unit':'', },
  66. 'NUMSVCFLAPPING': { 'type':long, 'unit':'', },
  67. 'NUMSVCDOWNTIME': { 'type':long, 'unit':'', },
  68. 'NUMHOSTS': { 'type':long, 'unit':'', },
  69. 'NUMHSTUP': { 'type':long, 'unit':'', },
  70. 'NUMHSTDOWN': { 'type':long, 'unit':'', },
  71. 'NUMHSTUNR': { 'type':long, 'unit':'', },
  72. 'NUMHSTPROB': { 'type':long, 'unit':'', },
  73. 'NUMHSTCHECKED': { 'type':long, 'unit':'', },
  74. 'NUMHSTSCHEDULED': { 'type':long, 'unit':'', },
  75. 'NUMHSTFLAPPING': { 'type':long, 'unit':'', },
  76. 'NUMHSTDOWNTIME': { 'type':long, 'unit':'', },
  77. 'NUMHSTACTCHK1M': { 'type':long, 'unit':'', },
  78. 'NUMHSTPSVCHK1M': { 'type':long, 'unit':'', },
  79. 'NUMSVCACTCHK1M': { 'type':long, 'unit':'', },
  80. 'NUMSVCPSVCHK1M': { 'type':long, 'unit':'', },
  81. 'NUMHSTACTCHK5M': { 'type':long, 'unit':'', },
  82. 'NUMHSTPSVCHK5M': { 'type':long, 'unit':'', },
  83. 'NUMSVCACTCHK5M': { 'type':long, 'unit':'', },
  84. 'NUMSVCPSVCHK5M': { 'type':long, 'unit':'', },
  85. 'NUMHSTACTCHK15M': { 'type':long, 'unit':'', },
  86. 'NUMHSTPSVCHK15M': { 'type':long, 'unit':'', },
  87. 'NUMSVCACTCHK15M': { 'type':long, 'unit':'', },
  88. 'NUMSVCPSVCHK15M': { 'type':long, 'unit':'', },
  89. 'NUMHSTACTCHK60M': { 'type':long, 'unit':'', },
  90. 'NUMHSTPSVCHK60M': { 'type':long, 'unit':'', },
  91. 'NUMSVCACTCHK60M': { 'type':long, 'unit':'', },
  92. 'NUMSVCPSVCHK60M': { 'type':long, 'unit':'', },
  93. 'AVGACTSVCLAT': { 'type':float, 'unit':'s', 'factor':0.001, 'descr':'Average active service check latency', },
  94. 'AVGACTSVCEXT': { 'type':float, 'unit':'s', 'factor':0.001, 'descr':'Average active service check execution time', },
  95. 'AVGACTSVCPSC': { 'type':float, 'unit':'%', },
  96. 'AVGPSVSVCLAT': { 'type':float, 'unit':'s', 'factor':0.001, 'descr':'Average passive service check latency', },
  97. 'AVGPSVSVCPSC': { 'type':float, 'unit':'%', },
  98. 'AVGSVCPSC': { 'type':float, 'unit':'%', },
  99. 'AVGACTHSTLAT': { 'type':float, 'unit':'s', 'factor':0.001, 'descr':'Average active host check latency', },
  100. 'AVGACTHSTEXT': { 'type':float, 'unit':'s', 'factor':0.001, 'descr':'Average active host check execution time', },
  101. 'AVGACTHSTPSC': { 'type':float, 'unit':'%', },
  102. 'AVGPSVHSTLAT': { 'type':float, 'unit':'s', 'factor':0.001, 'descr':'Average passive host check latency', },
  103. 'AVGPSVHSTPSC': { 'type':float, 'unit':'%', },
  104. 'AVGHSTPSC': { 'type':float, 'unit':'%', },
  105. 'MINACTSVCLAT': { 'type':float, 'unit':'s', 'factor':0.001, 'descr':'Minimum active service check latency', },
  106. 'MINACTSVCEXT': { 'type':float, 'unit':'s', 'factor':0.001, 'descr':'Minimum active service check execution time', },
  107. 'MINACTSVCPSC': { 'type':float, 'unit':'%', },
  108. 'MINPSVSVCLAT': { 'type':float, 'unit':'s', 'factor':0.001, 'descr':'Minimum passive service check latency', },
  109. 'MINPSVSVCPSC': { 'type':float, 'unit':'%', },
  110. 'MINSVCPSC': { 'type':float, 'unit':'%', },
  111. 'MINACTHSTLAT': { 'type':float, 'unit':'s', 'factor':0.001, 'descr':'Minimum active host check latency', },
  112. 'MINACTHSTEXT': { 'type':float, 'unit':'s', 'factor':0.001, 'descr':'Minimum active host check execution time', },
  113. 'MINACTHSTPSC': { 'type':float, 'unit':'%', },
  114. 'MINPSVHSTLAT': { 'type':float, 'unit':'s', 'factor':0.001, 'descr':'Minimum passive host check latency', },
  115. 'MINPSVHSTPSC': { 'type':float, 'unit':'%', },
  116. 'MINHSTPSC': { 'type':float, 'unit':'%', },
  117. 'MAXACTSVCLAT': { 'type':float, 'unit':'s', 'factor':0.001, 'descr':'Maximum active service check latency', },
  118. 'MAXACTSVCEXT': { 'type':float, 'unit':'s', 'factor':0.001, 'descr':'Maximum active service check execution time', },
  119. 'MAXACTSVCPSC': { 'type':float, 'unit':'%', },
  120. 'MAXPSVSVCLAT': { 'type':float, 'unit':'s', 'factor':0.001, 'descr':'Maximum passive service check latency', },
  121. 'MAXPSVSVCPSC': { 'type':float, 'unit':'%', },
  122. 'MAXSVCPSC': { 'type':float, 'unit':'%', },
  123. 'MAXACTHSTLAT': { 'type':float, 'unit':'s', 'factor':0.001, 'descr':'Maximum active host check latency', },
  124. 'MAXACTHSTEXT': { 'type':float, 'unit':'s', 'factor':0.001, 'descr':'Maximum active host check execution time', },
  125. 'MAXACTHSTPSC': { 'type':float, 'unit':'%', },
  126. 'MAXPSVHSTLAT': { 'type':float, 'unit':'s', 'factor':0.001, 'descr':'Maximum passive host check latency', },
  127. 'MAXPSVHSTPSC': { 'type':float, 'unit':'%', },
  128. 'MAXHSTPSC': { 'type':float, 'unit':'%', },
  129. 'NUMACTHSTCHECKS1M': { 'type':long, 'unit':'', },
  130. 'NUMOACTHSTCHECKS1M': { 'type':long, 'unit':'', },
  131. 'NUMCACHEDHSTCHECKS1M': { 'type':long, 'unit':'', },
  132. 'NUMSACTHSTCHECKS1M': { 'type':long, 'unit':'', },
  133. 'NUMPARHSTCHECKS1M': { 'type':long, 'unit':'', },
  134. 'NUMSERHSTCHECKS1M': { 'type':long, 'unit':'', },
  135. 'NUMPSVHSTCHECKS1M': { 'type':long, 'unit':'', },
  136. 'NUMACTSVCCHECKS1M': { 'type':long, 'unit':'', },
  137. 'NUMOACTSVCCHECKS1M': { 'type':long, 'unit':'', },
  138. 'NUMCACHEDSVCCHECKS1M': { 'type':long, 'unit':'', },
  139. 'NUMSACTSVCCHECKS1M': { 'type':long, 'unit':'', },
  140. 'NUMPSVSVCCHECKS1M': { 'type':long, 'unit':'', },
  141. 'NUMEXTCMDS1M': { 'type':long, 'unit':'', },
  142. 'NUMACTHSTCHECKS5M': { 'type':long, 'unit':'', },
  143. 'NUMOACTHSTCHECKS5M': { 'type':long, 'unit':'', },
  144. 'NUMCACHEDHSTCHECKS5M': { 'type':long, 'unit':'', },
  145. 'NUMSACTHSTCHECKS5M': { 'type':long, 'unit':'', },
  146. 'NUMPARHSTCHECKS5M': { 'type':long, 'unit':'', },
  147. 'NUMSERHSTCHECKS5M': { 'type':long, 'unit':'', },
  148. 'NUMPSVHSTCHECKS5M': { 'type':long, 'unit':'', },
  149. 'NUMACTSVCCHECKS5M': { 'type':long, 'unit':'', },
  150. 'NUMOACTSVCCHECKS5M': { 'type':long, 'unit':'', },
  151. 'NUMCACHEDSVCCHECKS5M': { 'type':long, 'unit':'', },
  152. 'NUMSACTSVCCHECKS5M': { 'type':long, 'unit':'', },
  153. 'NUMPSVSVCCHECKS5M': { 'type':long, 'unit':'', },
  154. 'NUMEXTCMDS5M': { 'type':long, 'unit':'', },
  155. 'NUMACTHSTCHECKS15M': { 'type':long, 'unit':'', },
  156. 'NUMOACTHSTCHECKS15M': { 'type':long, 'unit':'', },
  157. 'NUMCACHEDHSTCHECKS15M': { 'type':long, 'unit':'', },
  158. 'NUMSACTHSTCHECKS15M': { 'type':long, 'unit':'', },
  159. 'NUMPARHSTCHECKS15M': { 'type':long, 'unit':'', },
  160. 'NUMSERHSTCHECKS15M': { 'type':long, 'unit':'', },
  161. 'NUMPSVHSTCHECKS15M': { 'type':long, 'unit':'', },
  162. 'NUMACTSVCCHECKS15M': { 'type':long, 'unit':'', },
  163. 'NUMOACTSVCCHECKS15M': { 'type':long, 'unit':'', },
  164. 'NUMCACHEDSVCCHECKS15M': { 'type':long, 'unit':'', },
  165. 'NUMSACTSVCCHECKS15M': { 'type':long, 'unit':'', },
  166. 'NUMPSVSVCCHECKS15M': { 'type':long, 'unit':'', },
  167. 'NUMEXTCMDS15M': { 'type':long, 'unit':'', },
  168. }
  169. CHECKs = {
  170. 'AVGACTLATENCY': ['AVGACTSVCLAT', 'AVGACTHSTLAT', ],
  171. 'MAXACTLATENCY': ['MAXACTSVCLAT', 'MAXACTHSTLAT', ],
  172. 'MINACTLATENCY': ['MINACTSVCLAT', 'MINACTHSTLAT', ],
  173. 'AVGPSVLATENCY': ['AVGPSVSVCLAT', 'AVGPSVHSTLAT', ],
  174. 'MAXPSVLATENCY': ['MAXPSVSVCLAT', 'MAXPSVHSTLAT', ],
  175. 'MINPSVLATENCY': ['MINPSVSVCLAT', 'MINPSVHSTLAT', ],
  176. 'AVGLATENCY': ['AVGACTSVCLAT', 'AVGACTHSTLAT', 'AVGPSVSVCLAT', 'AVGPSVHSTLAT', ],
  177. 'MAXLATENCY': ['MAXACTSVCLAT', 'MAXACTHSTLAT', 'MAXPSVSVCLAT', 'MAXPSVHSTLAT',],
  178. 'MINLATENCY': ['MINACTSVCLAT', 'MINACTHSTLAT', 'MINPSVSVCLAT', 'MINPSVHSTLAT',],
  179. 'AVGEXECTIME': ['AVGACTSVCEXT', 'AVGACTHSTEXT', ],
  180. 'MAXEXECTIME': ['MAXACTSVCEXT', 'MAXACTHSTEXT', ],
  181. 'MINEXECTIME': ['MINACTSVCEXT', 'MINACTHSTEXT', ],
  182. 'PERFORMANCE': ['AVGACTSVCLAT', 'AVGPSVSVCLAT', 'AVGACTSVCEXT', 'AVGACTHSTLAT', 'AVGPSVHSTLAT', 'AVGACTHSTEXT', ],
  183. 'PEAK': [ 'MAXACTSVCLAT', 'MAXPSVSVCLAT', 'MAXACTSVCEXT', 'MAXACTHSTLAT', 'MAXPSVHSTLAT', 'MAXACTHSTEXT', ],
  184. }
  185. plugin.add_cmdlineoption('-C', '', 'checks', 'Use built-in checks (predefined lists of variables)', default='')
  186. plugin.add_cmdlineoption('-V', '', 'vars', 'List of "nagiostats" variables to check', default='')
  187. plugin.add_cmdlineoption('-n', '', 'nagiostats', 'Full path to nagiostat', default='')
  188. plugin.add_cmdlineoption('-w', '', 'warn', 'warning thresold', default='')
  189. plugin.add_cmdlineoption('-c', '', 'crit', 'warning thresold', default='')
  190. plugin.parse_cmdlineoptions()
  191. if not plugin.options.nagiostats:
  192. plugin.verbose(2, 'Auto-detecting path to "nagiostats"...')
  193. for nagiostats in NAGIOSTATSs:
  194. if os.path.exists(nagiostats):
  195. plugin.options.nagiostats = nagiostats
  196. plugin.verbose(2, 'Found it at "%s"' % nagiostats)
  197. break
  198. if not os.path.exists(plugin.options.nagiostats):
  199. plugin.back2nagios(3, 'Could not find "nagiostats"')
  200. if not plugin.options.checks and not plugin.options.vars:
  201. plugin.back2nagios(3, 'Need either "-C" or "-V"')
  202. # Checks and Variables
  203. varlist = []
  204. varlist_unknown = []
  205. # Built var list out of -C
  206. if plugin.options.checks:
  207. for check in plugin.options.checks.split(','):
  208. if check not in CHECKs:
  209. plugin.back2nagios(3, 'Unknown check "%s"' % check)
  210. varlist.extend(CHECKs[check])
  211. # Check for unknown vars and build list
  212. for var in plugin.options.vars.split(','):
  213. if var:
  214. plugin.verbose(3, 'See if "%s" is a valid variable' % var)
  215. if var in VARs:
  216. varlist.append(var)
  217. else:
  218. varlist_unknown.append(var)
  219. # See if there are unknown vars:
  220. if varlist_unknown:
  221. plugin.back2nagios(3, 'Unknown variable(s): %s' % ', '.join(varlist_unknown))
  222. # Thresholds
  223. if ',' in plugin.options.warn:
  224. plugin.verbose(2, 'Multiple warning thresolds detected')
  225. plugin.options.warn = plugin.options.warn.split(',')
  226. else:
  227. plugin.verbose(2, 'Single warning thresold detected - use for all variables')
  228. plugin.options.warn = [plugin.options.warn, ] * len(varlist)
  229. if ',' in plugin.options.crit:
  230. plugin.verbose(2, 'Multiple critical thresolds detected')
  231. plugin.options.crit = plugin.options.crit.split(',')
  232. else:
  233. plugin.verbose(2, 'Single critical thresold detected - use for all variables')
  234. plugin.options.crit = [plugin.options.crit, ] * len(varlist)
  235. plugin.verbose(3, 'Length of vars: %s' % len(varlist) )
  236. plugin.verbose(3, 'Length of warns: %s' % len(plugin.options.warn) )
  237. plugin.verbose(3, 'Length of crits: %s' % len(plugin.options.crit) )
  238. if not ( len(varlist) == len(plugin.options.warn) == len(plugin.options.crit) ):
  239. plugin.back2nagios(3, 'Different length of -V, -w and -c')
  240. # Go!
  241. cmdline = '%s -m -d %s' % (plugin.options.nagiostats, ','.join(varlist))
  242. plugin.verbose(1, 'Using command line: %s' % cmdline)
  243. cmdline = shlex.split(cmdline)
  244. try:
  245. cmd = subprocess.Popen(cmdline, stdout=subprocess.PIPE)
  246. outputs = cmd.communicate()[0].rstrip().split('\n')
  247. retcode = cmd.returncode
  248. except OSError:
  249. plugin.back2nagios(3, 'Could not execute "%s"' % cmdline)
  250. plugin.verbose(3, 'Returncode of "nagiostats": %s' % retcode)
  251. if retcode == 254:
  252. plugin.back2nagios(2, 'Could not read "status.dat"')
  253. elif retcode != 0:
  254. plugin.back2nagios(3, 'Unknown return code "%s" - please send output of "-vvv" command line to author!' % retcode)
  255. plugin.verbose(1, 'Asked for variable(s): %s' % ' '.join(varlist) )
  256. plugin.verbose(1, 'Got response(s): %s' % ' '.join(outputs) )
  257. plugin.verbose(3, 'Length of vars: %s' % len(outputs) )
  258. plugin.verbose(3, 'Length of output: %s' % len(varlist) )
  259. if len(outputs) != len(varlist):
  260. plugin.back2nagios(3, 'Did not get expected infos')
  261. for idx in xrange(0, len(varlist)):
  262. var = varlist[idx]
  263. warn = plugin.options.warn[idx]
  264. crit = plugin.options.crit[idx]
  265. output = (VARs[var]['type'])(outputs[idx])
  266. if VARs[var]['type'] in [float, long, int]:
  267. factor = VARs[var].get('factor')
  268. if factor != None:
  269. output = output * factor
  270. returncode = plugin.value_wc_to_returncode(output, warn, crit)
  271. else:
  272. returncode = plugin.RETURNCODE['OK']
  273. perfdata = []
  274. unit = VARs[var].get('unit')
  275. if unit != None:
  276. perfdata.append({'label':var, 'value':output, 'unit':VARs[var]['unit'], 'warn':warn, 'crit':crit,})
  277. else:
  278. unit = ''
  279. descr = VARs[var].get('descr')
  280. if descr != None:
  281. longoutput = descr + ': ' + str(output) + unit
  282. else:
  283. longoutput = str(output) + unit
  284. plugin.remember_check(var, returncode, longoutput, perfdata=perfdata)
  285. plugin.brain2output()
  286. plugin.exit()