[DEFAULT] #### EMAIL NOTIFICATIONS #### # Notify this email address(es) in case of alarm, multiple addresses separated by commas # Comment this if you don't want email to be sent (maybe because using ALARM_COMMAND below) MAILTO=root@localhost, user@localhost # Sender address #MAILFROM=root@localhost # Use a remote SMTP host (enable by removing comment) #SMTPHOST=my.smtp.host:465 # SMTP credentials #SMTPUSER=mysmtpuser #SMTPPASS=mysmtppass # Use SSL for SMTP #SMTPSSL=True #### RUN COMMAND IN CASE OF ALARM #### # You can run a command or script when an alert is issued. # # In this example, `curl` is used to send a POST request to Ntfy (https://ntfy.sh/), a service # that delivers push notifications to smartphones and desktop computers. # If you want to use ntfy, just change the topic name with something unique (see documentation # at https://ntfy.sh/docs/ ), uncomment the ALARM_COMMAND entry and you are ready to go. # If you generate a lot of traffic, please consider hosting your own ntfy server. # # Otherwise, you can replace the curl command with anything you want, you can use the following # placeholders to pass your command/script the details about the event: # %%CHECKNAME%% The name of the check (the one between square brackets in this config) # %%HOSTNAME%% The host name # %%DATETIME%% The date and time of the event, in human readable format # %%ERROR%% An human readable error description (the same used in the mail alert) #ALARM_COMMAND=curl -H "%%CHECKNAME%% alarm on %%HOSTNAME%%" -d "%%ERROR%% on %%DATETIME%%" ntfy.sh/my-unique-topic-name #### HEALTH CHECKS #### # Every health check is based on a command being executed, its result being parsed with a regexp # to extract (as a single group) the numeric or string value, and the value being compared with # a configured value. This checks are ready to be used, just enable the ones you need. # You can add your own custom check declaring another section like this: # # [my_custom_check_name] # DISABLED=False # ALARM_STRING_EQUAL=Lorem ipsum # ALARM_STRING_NOT_EQUAL=The lazy fox # ALARM_VALUE_EQUAL=99 # ALARM_VALUE_NOT_EQUAL=76.365338 # ALARM_VALUE_MORE_THAN=1.0 # ALARM_VALUE_LESS_THAN=12 # COMMAND=/my/custom/binary --with parameters # REGEXP=my regex to parse (awesome|disappointing) command output [system_load_1min] # The system load average in the last minute DISABLED=True ALARM_VALUE_MORE_THAN=1.0 COMMAND=uptime REGEXP=.*load average: (\d+[,.]\d+), \d+[,.]\d+, \d+[,.]\d+ [system_load_5min] # The system load average in the last 5 minutes DISABLED=True ALARM_VALUE_MORE_THAN=1.0 COMMAND=uptime REGEXP=.*load average: \d+[,.]\d+, (\d+[,.]\d+), \d+[,.]\d+ [system_load_15min] # The system load average in the last 15 minutes DISABLED=True ALARM_VALUE_MORE_THAN=1.0 COMMAND=uptime REGEXP=.*load average: \d+[,.]\d+, \d+[,.]\d+, (\d+[,.]\d+) [used_disk_space] # Used disk space (in percent, i.e. ALARM_VALUE_MORE_THAN=75 -> alarm if disk is more than 75% full) DISABLED=True ALARM_VALUE_MORE_THAN=75 COMMAND=df -h /dev/sda1 REGEXP=(\d{1,3})% [raid_status] # Issues an alarm when the raid is corrupted # Checks this part of the /proc/mdstat file: # 243553280 blocks super 1.2 [2/2] [UU] # If the content of the last [ ] contains only U (without _), the raid array is healty # Otherwise, [U_] or [_U] is displayed (may contain more U or _ if the array is more disks) DISABLED=True ALARM_STRING_NOT_EQUAL=UU COMMAND=cat /proc/mdstat REGEXP=.*\] \[([U_]+)\]\n [battery_level] # Issues an alarm when battery is discharging below a certain level (long blackout, pulled power cord...) # For laptops used as servers, apparently common among the self hosters. Requires acpi package installed. # Value is in % DISABLED=True COMMAND=acpi -b REGEXP=Battery \d: .*, (\d{1,3})% ALARM_VALUE_LESS_THAN=90 [laptop_charger_disconnected] # Issues an alarm when laptop charger is disconnected # For laptops used as servers, apparently common among the self hosters. Requires acpi package installed. DISABLED=True COMMAND=acpi -a REGEXP=Adapter \d: (.+) ALARM_STRING_EQUAL=off-line [free_ram] # Free ram in % # Shows another approach: does all the computation in the command and picks up # all the output (by not declaring a regexp). DISABLED=True COMMAND=free | grep Mem | awk '{print int($4/$2 * 100.0)}' ALARM_VALUE_LESS_THAN=20 [available_ram] # Like Free ram, but shows available instead of free. You may want to use this if you use a memcache. DISABLED=True COMMAND=free | grep Mem | awk '{print int($6/$2 * 100.0)}' ALARM_VALUE_LESS_THAN=20 [cpu_temperature] # CPU Temperature alarm: requires lm-sensors installed and configured (check your distribution's guide) # The regexp must be adapted to your configuration: run `sensors` in the command line # to find the name of the temperature sensor in your system. In this case is `Core 0`, # but may be called Tdie or a lot of different names, there is no standard. DISABLED=True ALARM_VALUE_MORE_THAN=80 COMMAND=sensors REGEXP=Core 0: +\+?(-?\d{1,3}).\d°[CF] [fan_speed] # Fan speed alarm: requires lm-sensors installed and configured (check your distribution's guide) # The regexp must be adapted to your configuration: run `sensors` in the command line # to find the name of the fan speed sensor in your system. DISABLED=True ALARM_VALUE_LESS_THAN=300 COMMAND=sensors REGEXP=cpu_fan: +(\d) RPM