go-watchdog/watchdog.go

356 lines
7.3 KiB
Go

package main
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io/ioutil"
"log"
"net"
"net/http"
"net/url"
"os"
"os/exec"
"strings"
"time"
)
func usage() {
fmt.Println("Usage: go run watchdog.go -c dog.json")
}
func main() {
if 3 != len(os.Args) {
usage()
os.Exit(1)
return
}
if "-c" != os.Args[1] {
usage()
os.Exit(1)
return
}
filename := os.Args[2]
f, err := os.Open(filename)
if nil != err {
log.Fatal(err)
return
}
configFile, err := ioutil.ReadAll(f)
if nil != err {
log.Fatal(err)
return
}
config := &Config{}
err = json.Unmarshal(configFile, config)
if nil != err {
log.Fatal(err)
return
}
//fmt.Printf("%#v\n", config)
done := make(chan struct{}, 1)
allWebhooks := make(map[string]ConfigWebhook)
for i := range config.Webhooks {
h := config.Webhooks[i]
allWebhooks[h.Name] = h
}
logQueue := make(chan string, 10)
go logger(logQueue)
for i := range config.Watches {
c := config.Watches[i]
logQueue <- fmt.Sprintf("Watching '%s'", c.Name)
go func(c ConfigWatch) {
d := New(&Dog{
Name: c.Name,
CheckURL: c.URL,
Keywords: c.Keywords,
Recover: c.RecoverScript,
Webhooks: c.Webhooks,
AllWebhooks: allWebhooks,
logger: logQueue,
})
d.Watch()
}(config.Watches[i])
}
if 0 == len(config.Watches) {
log.Fatal("Nothing to watch")
return
}
<-done
}
type Dog struct {
Name string
CheckURL string
Keywords string
Recover string
Webhooks []string
AllWebhooks map[string]ConfigWebhook
logger chan string
error error
failures int
passes int
lastFailed time.Time
lastPassed time.Time
lastNotified time.Time
}
func New(d *Dog) *Dog {
d.lastPassed = time.Now().Add(-5 * time.Minute)
return d
}
func (d *Dog) Watch() {
d.watch()
for {
// TODO set cancellable callback ?
time.Sleep(5 * time.Minute)
d.watch()
}
}
func (d *Dog) watch() {
d.logger <- fmt.Sprintf("Check: '%s'", d.Name)
err := d.check()
if nil == err {
return
}
failure := false
t := 10
for {
d.recover()
time.Sleep(time.Duration(t) * time.Second)
// backoff
t *= 2
err := d.check()
if nil != err {
failure = true
}
// We should notify if
// * We've had success since the last notification
// * It's been at least 5 minutes since the last notification
fiveMinutesAgo := time.Now().Add(-5 * time.Minute)
if d.lastPassed.After(d.lastNotified) && d.lastNotified.Before(fiveMinutesAgo) {
d.notify(failure)
}
if d.failures >= 5 {
// go back to the main 5-minute loop
break
}
}
}
func (d *Dog) check() error {
var err error
defer func() {
if nil != err {
d.failures += 1
d.lastFailed = time.Now()
} else {
d.lastPassed = time.Now()
d.passes += 1
}
}()
client := NewHTTPClient()
response, err := client.Get(d.CheckURL)
if nil != err {
d.error = fmt.Errorf("Connection Failure: " + err.Error())
return err
}
b, err := ioutil.ReadAll(response.Body)
if nil != err {
d.error = fmt.Errorf("Network Failure: " + err.Error())
return err
}
if !bytes.Contains(b, []byte(d.Keywords)) {
err = fmt.Errorf("Down: '%s' Not Found for '%s'", d.Keywords, d.Name)
d.logger <- fmt.Sprintf("%s", err)
d.error = err
return err
} else {
d.logger <- fmt.Sprintf("Up: '%s'", d.Name)
}
return nil
}
func (d *Dog) recover() {
if "" == d.Recover {
return
}
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
cmd := exec.CommandContext(ctx, "bash")
pipe, err := cmd.StdinPipe()
pipe.Write([]byte(d.Recover))
if nil != err {
d.logger <- fmt.Sprintf("[Recover] Could not write to bash '%s': %s", d.Recover, err)
}
err = cmd.Start()
if nil != err {
d.logger <- fmt.Sprintf("[Recover] Could not start '%s': %s", d.Recover, err)
}
err = pipe.Close()
if nil != err {
d.logger <- fmt.Sprintf("[Recover] Could not close '%s': %s", d.Recover, err)
}
err = cmd.Wait()
cancel()
if nil != err {
d.logger <- fmt.Sprintf("[Recover] '%s' failed: %s", d.Recover, err)
}
}
func (d *Dog) notify(hardFail bool) {
d.logger <- fmt.Sprintf("Notifying the authorities of %s's failure", d.Name)
d.lastNotified = time.Now()
for i := range d.Webhooks {
name := d.Webhooks[i]
if "" == name {
continue
}
h, ok := d.AllWebhooks[name]
if !ok {
// TODO check in main when config is read
d.Webhooks[i] = ""
d.logger <- fmt.Sprintf("[Warning] Could not find webhook '%s' for '%s'", name, h.Name)
continue
}
// TODO do this in main on config init
if "" == h.Method {
h.Method = "POST"
}
var body *strings.Reader
if 0 != len(h.Form) {
form := url.Values{}
for k := range h.Form {
v := h.Form[k]
// TODO real templates
v = strings.Replace(v, "{{ .Name }}", d.Name, -1)
form.Set(k, v)
}
body = strings.NewReader(form.Encode())
}
client := NewHTTPClient()
req, err := http.NewRequest(h.Method, h.URL, body)
if nil != err {
log.Println("[Notify] HTTP Client Network Error:", err)
continue
}
if 0 != len(h.Form) {
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
}
if 0 != len(h.Auth) {
user := h.Auth["user"]
if "" == user {
user = h.Auth["username"]
}
pass := h.Auth["pass"]
if "" == user {
pass = h.Auth["password"]
}
req.SetBasicAuth(user, pass)
}
req.Header.Set("User-Agent", "Watchdog/1.0")
for k := range h.Headers {
req.Header.Set(k, h.Headers[k])
}
resp, err := client.Do(req)
if nil != err {
d.logger <- fmt.Sprintf("[Notify] HTTP Client Error: %s", err)
continue
}
if !(resp.StatusCode >= 200 && resp.StatusCode < 300) {
d.logger <- fmt.Sprintf("[Notify] Response Error: %s", resp.Status)
continue
}
// TODO json vs xml vs txt
var data map[string]interface{}
req.Header.Add("Accept", "application/json")
decoder := json.NewDecoder(resp.Body)
err = decoder.Decode(&data)
if err != nil {
d.logger <- fmt.Sprintf("[Notify] Response Body Error: %s", resp.Status)
continue
}
// TODO some sort of way to determine if data is successful (keywords)
d.logger <- fmt.Sprintf("[Notify] Success? %#v", data)
}
}
type Config struct {
Watches []ConfigWatch `json:"watches"`
Webhooks []ConfigWebhook `json:"webhooks"`
}
type ConfigWatch struct {
Name string `json:"name"`
URL string `json:"url"`
Keywords string `json:"keywords"`
Webhooks []string `json:"webhooks"`
RecoverScript string `json:"recover_script"`
}
type ConfigWebhook struct {
Name string `json:"name"`
Method string `json:"method"`
URL string `json:"url"`
Auth map[string]string `json:"auth"`
Headers map[string]string `json:"headers"`
Form map[string]string `json:"form"`
Config map[string]string `json:"config"`
Configs []map[string]string `json:"configs"`
}
// The default http client uses unsafe defaults
func NewHTTPClient() *http.Client {
transport := &http.Transport{
Dial: (&net.Dialer{
Timeout: 10 * time.Second,
}).Dial,
TLSHandshakeTimeout: 5 * time.Second,
}
client := &http.Client{
Timeout: time.Second * 5,
Transport: transport,
}
return client
}
// This is so that the log messages don't trample
// over each other when they happen simultaneously.
func logger(msgs chan string) {
for {
msg := <-msgs
log.Println(msg)
}
}