Skip to content
Snippets Groups Projects
Commit 0dc54e4e authored by Matej Kramny's avatar Matej Kramny
Browse files

- customisable time format

- custom messages
- configure threshold count instead of uptime %
parent b3bc1d44
No related branches found
No related tags found
No related merge requests found
...@@ -37,7 +37,7 @@ func (api CachetAPI) Ping() error { ...@@ -37,7 +37,7 @@ func (api CachetAPI) Ping() error {
// SendMetric adds a data point to a cachet monitor // SendMetric adds a data point to a cachet monitor
func (api CachetAPI) SendMetric(id int, lag int64) { func (api CachetAPI) SendMetric(id int, lag int64) {
logrus.Debugf("Sending lag metric ID:%d %vms", id, lag) logrus.Debugf("Sending lag metric ID:%d RTT %vms", id, lag)
jsonBytes, _ := json.Marshal(map[string]interface{}{ jsonBytes, _ := json.Marshal(map[string]interface{}{
"value": lag, "value": lag,
......
...@@ -10,12 +10,12 @@ import ( ...@@ -10,12 +10,12 @@ import (
) )
type CachetMonitor struct { type CachetMonitor struct {
SystemName string `json:"system_name"` SystemName string `json:"system_name" yaml:"system_name"`
DateFormat string `json:"date_format" yaml:"date_format"`
API CachetAPI `json:"api"` API CachetAPI `json:"api"`
RawMonitors []map[string]interface{} `json:"monitors" yaml:"monitors"` RawMonitors []map[string]interface{} `json:"monitors" yaml:"monitors"`
Monitors []MonitorInterface `json:"-" yaml:"-"` Monitors []MonitorInterface `json:"-" yaml:"-"`
Immediate bool `json:"-" yaml:"-"` Immediate bool `json:"-" yaml:"-"`
} }
...@@ -28,6 +28,10 @@ func (cfg *CachetMonitor) Validate() bool { ...@@ -28,6 +28,10 @@ func (cfg *CachetMonitor) Validate() bool {
cfg.SystemName = getHostname() cfg.SystemName = getHostname()
} }
if len(cfg.DateFormat) == 0 {
cfg.DateFormat = DefaultTimeFormat
}
if len(cfg.API.Token) == 0 || len(cfg.API.URL) == 0 { if len(cfg.API.Token) == 0 || len(cfg.API.URL) == 0 {
logrus.Warnf("API URL or API Token missing.\nGet help at https://github.com/castawaylabs/cachet-monitor") logrus.Warnf("API URL or API Token missing.\nGet help at https://github.com/castawaylabs/cachet-monitor")
valid = false valid = false
...@@ -74,3 +78,11 @@ func GetMonitorType(t string) string { ...@@ -74,3 +78,11 @@ func GetMonitorType(t string) string {
return t return t
} }
func getTemplateData(monitor *AbstractMonitor) map[string]interface{} {
return map[string]interface{}{
"SystemName": monitor.config.SystemName,
"API": monitor.config.API,
"Monitor": monitor,
}
}
...@@ -8,11 +8,13 @@ import ( ...@@ -8,11 +8,13 @@ import (
"strconv" "strconv"
"strings" "strings"
"time" "time"
"github.com/Sirupsen/logrus"
) )
// Investigating template // Investigating template
var defaultHTTPInvestigatingTpl = MessageTemplate{ var defaultHTTPInvestigatingTpl = MessageTemplate{
Subject: `{{ .Name }} - {{ .config.SystemName }}`, Subject: `{{ .Monitor.Name }} - {{ .SystemName }}`,
Message: `{{ .Name }} check **failed** - {{ .now }} Message: `{{ .Name }} check **failed** - {{ .now }}
{{ .lastFailReason }}`, {{ .lastFailReason }}`,
...@@ -90,6 +92,7 @@ func (mon *HTTPMonitor) Validate() []string { ...@@ -90,6 +92,7 @@ func (mon *HTTPMonitor) Validate() []string {
mon.Template.Fixed.SetDefault(defaultHTTPFixedTpl) mon.Template.Fixed.SetDefault(defaultHTTPFixedTpl)
errs := mon.AbstractMonitor.Validate() errs := mon.AbstractMonitor.Validate()
logrus.Warnf("%#v", mon.Template.Investigating)
if len(mon.ExpectedBody) > 0 { if len(mon.ExpectedBody) > 0 {
exp, err := regexp.Compile(mon.ExpectedBody) exp, err := regexp.Compile(mon.ExpectedBody)
......
...@@ -44,8 +44,9 @@ type AbstractMonitor struct { ...@@ -44,8 +44,9 @@ type AbstractMonitor struct {
Fixed MessageTemplate Fixed MessageTemplate
} }
// Threshold = percentage // Threshold = percentage / number of down incidents
Threshold float32 Threshold float32
ThresholdCount bool `mapstructure:"threshold_count"`
history []bool history []bool
lastFailReason string lastFailReason string
...@@ -83,7 +84,10 @@ func (mon *AbstractMonitor) Validate() []string { ...@@ -83,7 +84,10 @@ func (mon *AbstractMonitor) Validate() []string {
} }
if err := mon.Template.Fixed.Compile(); err != nil { if err := mon.Template.Fixed.Compile(); err != nil {
errs = append(errs, "Could not compile template: "+err.Error()) errs = append(errs, "Could not compile \"fixed\" template: "+err.Error())
}
if err := mon.Template.Investigating.Compile(); err != nil {
errs = append(errs, "Could not compile \"investigating\" template: "+err.Error())
} }
return errs return errs
...@@ -137,11 +141,16 @@ func (mon *AbstractMonitor) tick(iface MonitorInterface) { ...@@ -137,11 +141,16 @@ func (mon *AbstractMonitor) tick(iface MonitorInterface) {
up := iface.test() up := iface.test()
lag := getMs() - reqStart lag := getMs() - reqStart
if len(mon.history) == HistorySize-1 { histSize := HistorySize
if mon.ThresholdCount {
histSize = int(mon.Threshold)
}
if len(mon.history) == histSize-1 {
logrus.Warnf("%v is now saturated\n", mon.Name) logrus.Warnf("%v is now saturated\n", mon.Name)
} }
if len(mon.history) >= HistorySize { if len(mon.history) >= histSize {
mon.history = mon.history[len(mon.history)-(HistorySize-1):] mon.history = mon.history[len(mon.history)-(histSize-1):]
} }
mon.history = append(mon.history, up) mon.history = append(mon.history, up)
mon.AnalyseData() mon.AnalyseData()
...@@ -163,34 +172,53 @@ func (monitor *AbstractMonitor) AnalyseData() { ...@@ -163,34 +172,53 @@ func (monitor *AbstractMonitor) AnalyseData() {
} }
t := (float32(numDown) / float32(len(monitor.history))) * 100 t := (float32(numDown) / float32(len(monitor.history))) * 100
logrus.Printf("%s %.2f%%/%.2f%% down at %v\n", monitor.Name, t, monitor.Threshold, time.Now().UnixNano()/int64(time.Second)) if monitor.ThresholdCount {
logrus.Printf("%s %d/%d down at %v", monitor.Name, numDown, int(monitor.Threshold), time.Now().Format(DefaultTimeFormat))
} else {
logrus.Printf("%s %.2f%%/%.2f%% down at %v", monitor.Name, t, monitor.Threshold, time.Now().Format(DefaultTimeFormat))
}
if len(monitor.history) != HistorySize { histSize := HistorySize
if monitor.ThresholdCount {
histSize = int(monitor.Threshold)
}
if len(monitor.history) != histSize {
// not saturated // not saturated
return return
} }
if t > monitor.Threshold && monitor.incident == nil { triggered := (monitor.ThresholdCount && numDown == int(monitor.Threshold)) || (!monitor.ThresholdCount && t > monitor.Threshold)
if triggered && monitor.incident == nil {
// create incident
subject, message := monitor.Template.Investigating.Exec(getTemplateData(monitor))
monitor.incident = &Incident{ monitor.incident = &Incident{
Name: monitor.Name + " - " + monitor.config.SystemName, Name: subject,
ComponentID: monitor.ComponentID, ComponentID: monitor.ComponentID,
Message: monitor.Name + " check **failed** - " + time.Now().Format(DefaultTimeFormat), Message: message,
Notify: true, Notify: true,
} }
if len(monitor.lastFailReason) > 0 {
monitor.incident.Message += "\n\n `" + monitor.lastFailReason + "`"
}
// is down, create an incident // is down, create an incident
logrus.Printf("%v creating incident. Monitor is down: %v", monitor.Name, monitor.lastFailReason) logrus.Warnf("%v: creating incident. Monitor is down: %v", monitor.Name, monitor.lastFailReason)
// set investigating status // set investigating status
monitor.incident.SetInvestigating() monitor.incident.SetInvestigating()
// create/update incident // create/update incident
if err := monitor.incident.Send(monitor.config); err != nil { if err := monitor.incident.Send(monitor.config); err != nil {
logrus.Printf("Error sending incident: %v\n", err) logrus.Printf("Error sending incident: %v\n", err)
} }
} else if t < monitor.Threshold && monitor.incident != nil {
return
}
// still triggered or no incident
if triggered || monitor.incident == nil {
return
}
logrus.Warnf("Resolving incident")
// was down, created an incident, its now ok, make it resolved. // was down, created an incident, its now ok, make it resolved.
logrus.Printf("%v resolved downtime incident", monitor.Name) logrus.Printf("%v resolved downtime incident", monitor.Name)
...@@ -202,4 +230,3 @@ func (monitor *AbstractMonitor) AnalyseData() { ...@@ -202,4 +230,3 @@ func (monitor *AbstractMonitor) AnalyseData() {
monitor.lastFailReason = "" monitor.lastFailReason = ""
monitor.incident = nil monitor.incident = nil
} }
}
package cachet package cachet
import "text/template" import (
"bytes"
"text/template"
"github.com/Sirupsen/logrus"
)
type MessageTemplate struct { type MessageTemplate struct {
Subject string `json:"subject"` Subject string `json:"subject"`
...@@ -33,6 +38,16 @@ func (t *MessageTemplate) Compile() error { ...@@ -33,6 +38,16 @@ func (t *MessageTemplate) Compile() error {
return err return err
} }
func (t *MessageTemplate) Exec(data interface{}) (string, string) {
buf := new(bytes.Buffer)
logrus.Warnf("%#v", t.subjectTpl)
t.subjectTpl.Execute(buf, data)
subject := buf.String()
return subject, ""
}
func compileTemplate(text string) (*template.Template, error) { func compileTemplate(text string) (*template.Template, error) {
return template.New("").Parse(text) return template.New("").Parse(text)
} }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment