128 lines
4.1 KiB
Plaintext
128 lines
4.1 KiB
Plaintext
[DEFAULT]
|
|
|
|
#### EMAIL NOTIFICATIONS
|
|
|
|
# Notify this email address(es) in case of alarm, multiple addresses separated by commas
|
|
MAILTO=root@localhost, user@localhost
|
|
|
|
# Sender address
|
|
MAILFROM=root@localhost
|
|
|
|
# Use a remote SMTP host (enable by removing comment)
|
|
#SMTPHOST=my.smtp.host:465
|
|
|
|
# SMTP credentials
|
|
#SMTPUSER=mysmtpuser
|
|
#SMTPPASS=mysmtppass
|
|
|
|
# Use SSL for SMTP
|
|
#SMTPSSL=True
|
|
|
|
|
|
|
|
#### HEALTH CHECKS ####
|
|
# Every health check is based on a command being executed, its result being parsed with a regexp
|
|
# to extract (as a single group) the numeric or string value, and the value being compared with
|
|
# a configured value. This checks are ready to be used, just enable the ones you need.
|
|
# You can add your own custom check declaring another section like this:
|
|
#
|
|
# [my_custom_check_name]
|
|
# DISABLED=False
|
|
# ALARM_STRING_EQUAL=Lorem ipsum
|
|
# ALARM_STRING_NOT_EQUAL=The lazy fox
|
|
# ALARM_VALUE_EQUAL=99
|
|
# ALARM_VALUE_NOT_EQUAL=76.365338
|
|
# ALARM_VALUE_MORE_THAN=1.0
|
|
# ALARM_VALUE_LESS_THAN=12
|
|
# COMMAND=/my/custom/binary --with parameters
|
|
# REGEXP=my regex to parse (awesome|disappointing) command output
|
|
|
|
[system_load_1min]
|
|
# The system load average in the last minute
|
|
DISABLED=True
|
|
ALARM_VALUE_MORE_THAN=1.0
|
|
COMMAND=uptime
|
|
REGEXP=.*load average: (\d+[,.]\d+), \d+[,.]\d+, \d+[,.]\d+
|
|
|
|
[system_load_5min]
|
|
# The system load average in the last 5 minutes
|
|
DISABLED=True
|
|
ALARM_VALUE_MORE_THAN=1.0
|
|
COMMAND=uptime
|
|
REGEXP=.*load average: \d+[,.]\d+, (\d+[,.]\d+), \d+[,.]\d+
|
|
|
|
[system_load_15min]
|
|
# The system load average in the last 15 minutes
|
|
DISABLED=True
|
|
ALARM_VALUE_MORE_THAN=1.0
|
|
COMMAND=uptime
|
|
REGEXP=.*load average: \d+[,.]\d+, \d+[,.]\d+, (\d+[,.]\d+)
|
|
|
|
[used_disk_space]
|
|
# Used disk space (in percent, i.e. ALARM_VALUE_MORE_THAN=75 -> alarm if disk is more than 75% full)
|
|
DISABLED=True
|
|
ALARM_VALUE_MORE_THAN=75
|
|
COMMAND=df -h /dev/sda1
|
|
REGEXP=(\d{1,3})%
|
|
|
|
[raid_status]
|
|
# Issues an alarm when the raid is corrupted
|
|
# Checks this part of the /proc/mdstat file:
|
|
# 243553280 blocks super 1.2 [2/2] [UU]
|
|
# If the content of the last [ ] contains only U (without _), the raid array is healty
|
|
# Otherwise, [U_] or [_U] is displayed (may contain more U or _ if the array is more disks)
|
|
DISABLED=True
|
|
ALARM_STRING_NOT_EQUAL=UU
|
|
COMMAND=cat /proc/mdstat
|
|
REGEXP=.*\] \[([U_]+)\]\n
|
|
|
|
[battery_level]
|
|
# Issues an alarm when battery is discharging below a certain level (long blackout, pulled power cord...)
|
|
# For laptops used as servers, apparently common among the self hosters. Requires acpi package installed.
|
|
# Value is in %
|
|
DISABLED=True
|
|
COMMAND=acpi -b
|
|
REGEXP=Battery \d: .*, (\d{1,3})%
|
|
ALARM_VALUE_LESS_THAN=90
|
|
|
|
[laptop_charger_disconnected]
|
|
# Issues an alarm when laptop charger is disconnected
|
|
# For laptops used as servers, apparently common among the self hosters. Requires acpi package installed.
|
|
DISABLED=True
|
|
COMMAND=acpi -a
|
|
REGEXP=Adapter \d: (.+)
|
|
ALARM_STRING_NOT_EQUAL=on-line
|
|
|
|
[free_ram]
|
|
# Free ram in %
|
|
# Shows another approach: does all the computation in the command and picks up
|
|
# all the output (by not declaring a regexp).
|
|
DISABLED=True
|
|
COMMAND=free | grep Mem | awk '{print int($4/$2 * 100.0)}'
|
|
ALARM_VALUE_LESS_THAN=20
|
|
|
|
[available_ram]
|
|
# Like Free ram, but shows available instead of free. You may want to use this if you use a memcache.
|
|
DISABLED=True
|
|
COMMAND=free | grep Mem | awk '{print int($6/$2 * 100.0)}'
|
|
ALARM_VALUE_LESS_THAN=20
|
|
|
|
[cpu_temperature]
|
|
# CPU Temperature alarm: requires lm-sensors installed and configured (check your distribution's guide)
|
|
# The regexp must be adapted to your configuration: run `sensors` in the command line
|
|
# to find the name of the temperature sensor in your system. In this case is `Core 0`,
|
|
# but may be called Tdie or a lot of different names, there is no standard.
|
|
DISABLED=True
|
|
ALARM_VALUE_MORE_THAN=80
|
|
COMMAND=sensors
|
|
REGEXP=Core 0: +\+?(-?\d{1,3}).\d°[CF]
|
|
|
|
[fan_speed]
|
|
# Fan speed alarm: requires lm-sensors installed and configured (check your distribution's guide)
|
|
# The regexp must be adapted to your configuration: run `sensors` in the command line
|
|
# to find the name of the fan speed sensor in your system.
|
|
DISABLED=True
|
|
ALARM_VALUE_LESS_THAN=300
|
|
COMMAND=sensors
|
|
REGEXP=cpu_fan: +(\d) RPM
|