Implemented alarm command

This commit is contained in:
Daniele Verducci (Slimpenguin) 2022-04-01 10:27:04 +02:00
parent 8c786d6190
commit c0b115559d
5 changed files with 75 additions and 19 deletions

View File

@ -3,20 +3,27 @@ A collection of utilities for self hosters.
## HEALTHCHECK
A simple server health check.
Sends an email in case of alarm.
Meant to be run with a cron (see healthcheck.cron.example)
Tested on Debian 11, but should run on almost any standard linux box
Sends an email and/or executes a command in case of alarm.
As an example, the command may be a ntfy call to obtain a notification on a mobile phone or desktop computer.
Meant to be run with a cron (see healthcheck.cron.example).
Tested on Debian 11, but should run on almost any standard linux box.
![Email](images/healthcheck_email_notification.png) ![Ntfy](images/healthcheck_ntfy_notification.png)
### Alarms
Provided ready-to-use alarms in config file:
- system load
- cpu load
- disk space
- raid status
- battery level / charger status (for laptops used as servers, apparently common among the self hosters)
- memory status
Alarms that need basic configuration to work on your system:
- cpu temperature (needs to be adapted as every system has a different name for the sensor)
- fan speed (needs to be adapted as every system has a different name for the sensor)
... or you can write your own custom alarm!
### How does it work
The config file contains a list of checks. The most common checks are provided in the config file, but it is possible to configure custom checks, if needed.
Every check definition has:
@ -37,7 +44,7 @@ cp healthcheck.py /usr/local/bin/healthcheck.py
cp healthcheck.cfg.example /usr/local/etc/healthcheck.cfg
```
Edit `/usr/local/etc/healthcheck.cfg` enabling the checks you need and configuring email settings.
Run `/usr/local/bin/healthcheck.py /usr/local/etc/healthcheck.cfg` to check it is working. If needed, change the config to make a check fail and see if the notification mail is delivered.
Run `/usr/local/bin/healthcheck.py /usr/local/etc/healthcheck.cfg` to check it is working. If needed, change the config to make a check fail and see if the notification mail is delivered. If you need to do some testing without spamming emails, run with the parameter `--dry-run`.
Now copy the cron file:
```
cp healthcheck.cron.example /etc/cron.d/healthcheck
@ -54,4 +61,7 @@ As stated in the `uptime` command manual:
#### Note on temperature and fan speed checks:
The check to run needs lm-sensors to be installed and configured. Check your distribution install guide.
The sensors have different name in every system, so you WILL need to adapt the configuration.
Some systems have a single temperature sensors for the whole CPU, while some other has a sensor for every core. In this last case, you may want to copy the `[cpu_temperature]` config in N different configs like `[cpu_temperature_0]`, one for every core, and change the REGEX to match `Core 0`, `Core 1` and so on...
# License
This whole repository is released under GNU General Public License version 3: see http://www.gnu.org/licenses/

View File

@ -1,12 +1,13 @@
[DEFAULT]
#### EMAIL NOTIFICATIONS
#### EMAIL NOTIFICATIONS ####
# Notify this email address(es) in case of alarm, multiple addresses separated by commas
# Comment this if you don't want email to be sent (maybe because using ALARM_COMMAND below)
MAILTO=root@localhost, user@localhost
# Sender address
MAILFROM=root@localhost
#MAILFROM=root@localhost
# Use a remote SMTP host (enable by removing comment)
#SMTPHOST=my.smtp.host:465
@ -19,6 +20,24 @@ MAILFROM=root@localhost
#SMTPSSL=True
#### RUN COMMAND IN CASE OF ALARM ####
# You can run a command or script when an alert is issued.
#
# In this example, `curl` is used to send a POST request to Ntfy (https://ntfy.sh/), a service
# that delivers push notifications to smartphones and desktop computers.
# If you want to use ntfy, just change the topic name with something unique (see documentation
# at https://ntfy.sh/docs/ ), uncomment the ALARM_COMMAND entry and you are ready to go.
# If you generate a lot of traffic, please consider hosting your own ntfy server.
#
# Otherwise, you can replace the curl command with anything you want, you can use the following
# placeholders to pass your command/script the details about the event:
# %%CHECKNAME%% The name of the check (the one between square brackets in this config)
# %%HOSTNAME%% The host name
# %%DATETIME%% The date and time of the event, in human readable format
# %%ERROR%% An human readable error description (the same used in the mail alert)
#ALARM_COMMAND=curl -H "%%CHECKNAME%% alarm on %%HOSTNAME%%" -d "%%ERROR%% on %%DATETIME%%" ntfy.sh/my-unique-topic-name
#### HEALTH CHECKS ####
# Every health check is based on a command being executed, its result being parsed with a regexp
@ -91,7 +110,7 @@ ALARM_VALUE_LESS_THAN=90
DISABLED=True
COMMAND=acpi -a
REGEXP=Adapter \d: (.+)
ALARM_STRING_NOT_EQUAL=on-line
ALARM_STRING_EQUAL=off-line
[free_ram]
# Free ram in %

View File

@ -73,6 +73,7 @@ class Main:
self.config = configparser.ConfigParser(interpolation=None) # Disable interpolation because contains regexp
self.config.read(configPath)
self.hostname = os.uname()[1]
def run(self, dryRun):
''' Runs the healtg checks '''
@ -93,7 +94,10 @@ class Main:
# Alarm!
logging.warning('Alarm for {}: {}!'.format(section, error))
if not dryRun:
self.sendMail(s, error)
if s.mailto:
self.sendMail(s, error)
if s.alarmCommand:
self.executeAlarmCommand(s, error)
# Calls the provided command, checks the value parsing it with the provided regexp
# and returns an error string, or null if the value is within its limits
@ -139,17 +143,15 @@ class Main:
if config.alarm_value_less_than and locale.atof(detectedValue) < float(config.alarm_value_less_than):
return 'value is {}, but should be greater than {}'.format(locale.atof(detectedValue), config.alarm_value_less_than)
def sendMail(self, s, error):
if s.smtphost:
logging.info("Sending detailed logs to %s via %s", s.mailto, s.smtphost)
logging.info("Sending alarm email to %s via %s", s.mailto, s.smtphost)
else:
logging.info("Sending detailed logs to %s using local smtp", s.mailto)
logging.info("Sending alarm email to %s using local smtp", s.mailto)
# Create main message
hostname = os.uname()[1]
msg = MIMEMultipart()
msg['Subject'] = EMAIL_SUBJECT_TPL.format(hostname, s.name)
msg['Subject'] = EMAIL_SUBJECT_TPL.format(self.hostname, s.name)
if s.mailfrom:
m_from = s.mailfrom
else:
@ -161,7 +163,7 @@ class Main:
# Add base text
body = EMAIL_MESSAGE_TPL.format(
s.name,
hostname,
self.hostname,
time.strftime("%a, %d %b %Y %H:%M:%S"),
error
)
@ -183,6 +185,25 @@ class Main:
smtp.sendmail(m_from, s.mailto, msg.as_string())
smtp.quit()
def executeAlarmCommand(self, s, error):
cmdToRun = s.alarmCommand
cmdToRun = cmdToRun.replace('%%CHECKNAME%%', s.name)
cmdToRun = cmdToRun.replace('%%HOSTNAME%%', self.hostname)
cmdToRun = cmdToRun.replace('%%DATETIME%%', time.strftime("%a, %d %b %Y %H:%M:%S"))
cmdToRun = cmdToRun.replace('%%ERROR%%', error)
logging.debug("Executing alarm command %s", cmdToRun)
ret = subprocess.run(cmdToRun, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
if ret.stderr:
self._log.info('{} subprocess stderr:\n{}', cmdToRun, ret.stderr.decode())
if ret.stdout:
stdout = ret.stdout.decode()
self._log.debug('{} subprocess stdout:\n{}', cmdToRun, stdout)
if ret.returncode != 0:
self._log.error('subprocess {} exited with error code {}'.format(cmdToRun, ret.returncode))
class Settings:
''' Represents settings for a check '''
@ -203,9 +224,14 @@ class Settings:
self.smtpuser = self.getStr(name, 'SMTPUSER', None)
self.smtppass = self.getStr(name, 'SMTPPASS', None)
self.smtpssl = self.getBoolean(name, 'SMTPSSL', False)
## List of email address to notify about backup status (mandatory)
mailtoList = config.get(name, 'MAILTO')
self.mailto = [ x.strip() for x in mailtoList.strip().split(self.EMAIL_LIST_SEP) ]
## List of email address to notify in case of alarms (disabled if missing)
mailtoList = self.getStr(name, 'MAILTO', None)
if mailtoList:
self.mailto = [ x.strip() for x in mailtoList.strip().split(self.EMAIL_LIST_SEP) ]
else:
self.mailto = None
## Command to execute in case of alarms (disabled if missing)
self.alarmCommand = self.getStr(name, 'ALARM_COMMAND', None)
## Sender address for the notification email
self.mailfrom = self.getStr(name, 'MAILFROM', getpass.getuser()+'@'+socket.gethostname())
## Values to compare
@ -233,6 +259,7 @@ class Settings:
return defaultValue
if __name__ == '__main__':
import argparse

Binary file not shown.

After

Width:  |  Height:  |  Size: 57 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 56 KiB