diff --git a/README.md b/README.md index c23f58a..f778bee 100644 --- a/README.md +++ b/README.md @@ -3,20 +3,27 @@ A collection of utilities for self hosters. ## HEALTHCHECK A simple server health check. -Sends an email in case of alarm. -Meant to be run with a cron (see healthcheck.cron.example) -Tested on Debian 11, but should run on almost any standard linux box +Sends an email and/or executes a command in case of alarm. +As an example, the command may be a ntfy call to obtain a notification on a mobile phone or desktop computer. +Meant to be run with a cron (see healthcheck.cron.example). +Tested on Debian 11, but should run on almost any standard linux box. + +![Email](images/healthcheck_email_notification.png) ![Ntfy](images/healthcheck_ntfy_notification.png) ### Alarms Provided ready-to-use alarms in config file: -- system load +- cpu load - disk space - raid status - battery level / charger status (for laptops used as servers, apparently common among the self hosters) - memory status + +Alarms that need basic configuration to work on your system: - cpu temperature (needs to be adapted as every system has a different name for the sensor) - fan speed (needs to be adapted as every system has a different name for the sensor) +... or you can write your own custom alarm! + ### How does it work The config file contains a list of checks. The most common checks are provided in the config file, but it is possible to configure custom checks, if needed. Every check definition has: @@ -37,7 +44,7 @@ cp healthcheck.py /usr/local/bin/healthcheck.py cp healthcheck.cfg.example /usr/local/etc/healthcheck.cfg ``` Edit `/usr/local/etc/healthcheck.cfg` enabling the checks you need and configuring email settings. -Run `/usr/local/bin/healthcheck.py /usr/local/etc/healthcheck.cfg` to check it is working. If needed, change the config to make a check fail and see if the notification mail is delivered. +Run `/usr/local/bin/healthcheck.py /usr/local/etc/healthcheck.cfg` to check it is working. If needed, change the config to make a check fail and see if the notification mail is delivered. If you need to do some testing without spamming emails, run with the parameter `--dry-run`. Now copy the cron file: ``` cp healthcheck.cron.example /etc/cron.d/healthcheck @@ -54,4 +61,7 @@ As stated in the `uptime` command manual: #### Note on temperature and fan speed checks: The check to run needs lm-sensors to be installed and configured. Check your distribution install guide. The sensors have different name in every system, so you WILL need to adapt the configuration. - \ No newline at end of file +Some systems have a single temperature sensors for the whole CPU, while some other has a sensor for every core. In this last case, you may want to copy the `[cpu_temperature]` config in N different configs like `[cpu_temperature_0]`, one for every core, and change the REGEX to match `Core 0`, `Core 1` and so on... + +# License +This whole repository is released under GNU General Public License version 3: see http://www.gnu.org/licenses/ diff --git a/healthcheck.cfg.example b/healthcheck.cfg.example index 2e880ab..a360488 100644 --- a/healthcheck.cfg.example +++ b/healthcheck.cfg.example @@ -1,12 +1,13 @@ [DEFAULT] -#### EMAIL NOTIFICATIONS +#### EMAIL NOTIFICATIONS #### # Notify this email address(es) in case of alarm, multiple addresses separated by commas +# Comment this if you don't want email to be sent (maybe because using ALARM_COMMAND below) MAILTO=root@localhost, user@localhost # Sender address -MAILFROM=root@localhost +#MAILFROM=root@localhost # Use a remote SMTP host (enable by removing comment) #SMTPHOST=my.smtp.host:465 @@ -19,6 +20,24 @@ MAILFROM=root@localhost #SMTPSSL=True +#### RUN COMMAND IN CASE OF ALARM #### +# You can run a command or script when an alert is issued. +# +# In this example, `curl` is used to send a POST request to Ntfy (https://ntfy.sh/), a service +# that delivers push notifications to smartphones and desktop computers. +# If you want to use ntfy, just change the topic name with something unique (see documentation +# at https://ntfy.sh/docs/ ), uncomment the ALARM_COMMAND entry and you are ready to go. +# If you generate a lot of traffic, please consider hosting your own ntfy server. +# +# Otherwise, you can replace the curl command with anything you want, you can use the following +# placeholders to pass your command/script the details about the event: +# %%CHECKNAME%% The name of the check (the one between square brackets in this config) +# %%HOSTNAME%% The host name +# %%DATETIME%% The date and time of the event, in human readable format +# %%ERROR%% An human readable error description (the same used in the mail alert) + +#ALARM_COMMAND=curl -H "%%CHECKNAME%% alarm on %%HOSTNAME%%" -d "%%ERROR%% on %%DATETIME%%" ntfy.sh/my-unique-topic-name + #### HEALTH CHECKS #### # Every health check is based on a command being executed, its result being parsed with a regexp @@ -91,7 +110,7 @@ ALARM_VALUE_LESS_THAN=90 DISABLED=True COMMAND=acpi -a REGEXP=Adapter \d: (.+) -ALARM_STRING_NOT_EQUAL=on-line +ALARM_STRING_EQUAL=off-line [free_ram] # Free ram in % diff --git a/healthcheck.py b/healthcheck.py index 50330ce..8054102 100755 --- a/healthcheck.py +++ b/healthcheck.py @@ -73,6 +73,7 @@ class Main: self.config = configparser.ConfigParser(interpolation=None) # Disable interpolation because contains regexp self.config.read(configPath) + self.hostname = os.uname()[1] def run(self, dryRun): ''' Runs the healtg checks ''' @@ -93,7 +94,10 @@ class Main: # Alarm! logging.warning('Alarm for {}: {}!'.format(section, error)) if not dryRun: - self.sendMail(s, error) + if s.mailto: + self.sendMail(s, error) + if s.alarmCommand: + self.executeAlarmCommand(s, error) # Calls the provided command, checks the value parsing it with the provided regexp # and returns an error string, or null if the value is within its limits @@ -138,18 +142,16 @@ class Main: return 'value is {}, but should not exceed {}'.format(locale.atof(detectedValue), config.alarm_value_more_than) if config.alarm_value_less_than and locale.atof(detectedValue) < float(config.alarm_value_less_than): return 'value is {}, but should be greater than {}'.format(locale.atof(detectedValue), config.alarm_value_less_than) - def sendMail(self, s, error): if s.smtphost: - logging.info("Sending detailed logs to %s via %s", s.mailto, s.smtphost) + logging.info("Sending alarm email to %s via %s", s.mailto, s.smtphost) else: - logging.info("Sending detailed logs to %s using local smtp", s.mailto) + logging.info("Sending alarm email to %s using local smtp", s.mailto) # Create main message - hostname = os.uname()[1] msg = MIMEMultipart() - msg['Subject'] = EMAIL_SUBJECT_TPL.format(hostname, s.name) + msg['Subject'] = EMAIL_SUBJECT_TPL.format(self.hostname, s.name) if s.mailfrom: m_from = s.mailfrom else: @@ -161,7 +163,7 @@ class Main: # Add base text body = EMAIL_MESSAGE_TPL.format( s.name, - hostname, + self.hostname, time.strftime("%a, %d %b %Y %H:%M:%S"), error ) @@ -183,6 +185,25 @@ class Main: smtp.sendmail(m_from, s.mailto, msg.as_string()) smtp.quit() + def executeAlarmCommand(self, s, error): + cmdToRun = s.alarmCommand + cmdToRun = cmdToRun.replace('%%CHECKNAME%%', s.name) + cmdToRun = cmdToRun.replace('%%HOSTNAME%%', self.hostname) + cmdToRun = cmdToRun.replace('%%DATETIME%%', time.strftime("%a, %d %b %Y %H:%M:%S")) + cmdToRun = cmdToRun.replace('%%ERROR%%', error) + + logging.debug("Executing alarm command %s", cmdToRun) + + ret = subprocess.run(cmdToRun, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) + if ret.stderr: + self._log.info('{} subprocess stderr:\n{}', cmdToRun, ret.stderr.decode()) + if ret.stdout: + stdout = ret.stdout.decode() + self._log.debug('{} subprocess stdout:\n{}', cmdToRun, stdout) + if ret.returncode != 0: + self._log.error('subprocess {} exited with error code {}'.format(cmdToRun, ret.returncode)) + + class Settings: ''' Represents settings for a check ''' @@ -203,9 +224,14 @@ class Settings: self.smtpuser = self.getStr(name, 'SMTPUSER', None) self.smtppass = self.getStr(name, 'SMTPPASS', None) self.smtpssl = self.getBoolean(name, 'SMTPSSL', False) - ## List of email address to notify about backup status (mandatory) - mailtoList = config.get(name, 'MAILTO') - self.mailto = [ x.strip() for x in mailtoList.strip().split(self.EMAIL_LIST_SEP) ] + ## List of email address to notify in case of alarms (disabled if missing) + mailtoList = self.getStr(name, 'MAILTO', None) + if mailtoList: + self.mailto = [ x.strip() for x in mailtoList.strip().split(self.EMAIL_LIST_SEP) ] + else: + self.mailto = None + ## Command to execute in case of alarms (disabled if missing) + self.alarmCommand = self.getStr(name, 'ALARM_COMMAND', None) ## Sender address for the notification email self.mailfrom = self.getStr(name, 'MAILFROM', getpass.getuser()+'@'+socket.gethostname()) ## Values to compare @@ -233,6 +259,7 @@ class Settings: return defaultValue + if __name__ == '__main__': import argparse diff --git a/images/healthcheck_email_notification.png b/images/healthcheck_email_notification.png new file mode 100644 index 0000000..ca7c727 Binary files /dev/null and b/images/healthcheck_email_notification.png differ diff --git a/images/healthcheck_ntfy_notification.png b/images/healthcheck_ntfy_notification.png new file mode 100644 index 0000000..2d4549c Binary files /dev/null and b/images/healthcheck_ntfy_notification.png differ