From 65c739325365914a148afd74d283e91fe4c95e23 Mon Sep 17 00:00:00 2001 From: AJ ONeal Date: Fri, 7 Jun 2019 20:37:53 -0600 Subject: [PATCH] initial commit --- README.md | 50 ++++++++++ go.mod | 3 + watchdog.go | 274 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 327 insertions(+) create mode 100644 README.md create mode 100644 go.mod create mode 100644 watchdog.go diff --git a/README.md b/README.md new file mode 100644 index 0000000..f98bef4 --- /dev/null +++ b/README.md @@ -0,0 +1,50 @@ +# Watchdog + +Webhooks for all the times that something doesn't go right. + +# Install + +```bash +git clone https://git.coolaj86.com/coolaj86/watchdog.go.git +``` + +# Usage + +```bash +pushd watchdog.go/ +go run ./watchdog.go -c dog.json +``` + +## Sample Config + +```json +{ + "watches": [ + { + "name": "Example Site", + "url": "https://example.com/", + "webhook": "twilio", + "keywords": "My Site", + "recover_script": "systemctl restart example-site" + } + ], + "webhooks": [ + { + "name": "twilio", + "config": { + "sid": "AC00000000000000000000000000000000", + "token": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + }, + "configs": [{ "To": "+1 555 555 5555", "From": "+1 555 234 5678" }], + "url": "https://api.twilio.com/2010-04-01/Accounts/{{ .sid }}/Messages.json", + "auth": { "user": "{{ .sid }}", "pass": "{{ .token }}" }, + "_TODO_Body": "switch template if hardfail", + "form": { + "To": "{{ .To }}", + "From": "{{ .From }}", + "Body": "[{{ .name }}] The system is down. The system is down." + } + } + ] +} +``` diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..83beade --- /dev/null +++ b/go.mod @@ -0,0 +1,3 @@ +module git.coolaj86.com/coolaj86/watchdog.go + +go 1.12 diff --git a/watchdog.go b/watchdog.go new file mode 100644 index 0000000..29aa617 --- /dev/null +++ b/watchdog.go @@ -0,0 +1,274 @@ +package main + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io/ioutil" + "log" + "net" + "net/http" + "os" + "os/exec" + "time" +) + +func usage() { + fmt.Println("Usage: go run watchdog.go -c dog.json") +} + +func main() { + if 3 != len(os.Args) { + usage() + os.Exit(1) + return + } + if "-c" != os.Args[1] { + usage() + os.Exit(1) + return + } + + filename := os.Args[2] + f, err := os.Open(filename) + if nil != err { + log.Fatal(err) + return + } + + configFile, err := ioutil.ReadAll(f) + if nil != err { + log.Fatal(err) + return + } + + config := &Config{} + err = json.Unmarshal(configFile, config) + if nil != err { + log.Fatal(err) + return + } + + //fmt.Printf("%#v\n", config) + + done := make(chan struct{}, 1) + + for i := range config.Watches { + c := config.Watches[i] + fmt.Printf("Watching '%s'", c.Name) + go func(c ConfigWatch) { + w := &Dog{ + Name: c.Name, + CheckURL: c.URL, + Keywords: c.Keywords, + Recover: c.RecoverScript, + } + w.Watch() + }(config.Watches[i]) + } + + if 0 == len(config.Watches) { + log.Fatal("Nothing to watch") + return + } + + <-done +} + +type Dog struct { + Name string + CheckURL string + Keywords string + Recover string + error error + failures int + passes int + lastFailed time.Time + lastPassed time.Time + lastNotified time.Time +} + +func (w *Dog) execRecover() { + if "" == w.Recover { + return + } + + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + cmd := exec.CommandContext(ctx, "bash") + pipe, err := cmd.StdinPipe() + pipe.Write([]byte(w.Recover)) + if nil != err { + fmt.Fprintf(os.Stderr, "Could not write to bash '%s': %s\n", w.Recover, err) + } + err = cmd.Start() + if nil != err { + fmt.Fprintf(os.Stderr, "Could not start '%s': %s\n", w.Recover, err) + } + err = pipe.Close() + if nil != err { + fmt.Fprintf(os.Stderr, "Could not close '%s': %s\n", w.Recover, err) + } + err = cmd.Wait() + cancel() + if nil != err { + fmt.Fprintf(os.Stderr, "'%s' failed: %s\n", w.Recover, err) + } +} + +func (w *Dog) Watch() { + w.watch() + for { + // TODO set cancellable callback ? + time.Sleep(5 * time.Minute) + w.watch() + } +} + +func (w *Dog) watch() { + fmt.Println("Running a check") + + err := w.check() + if nil == err { + return + } + + failure := false + t := 10 + for { + w.execRecover() + time.Sleep(time.Duration(t) * time.Second) + // backoff + t *= 2 + err := w.check() + if nil != err { + failure = true + } + // We should notify if + // * We've had success since the last notification + // * It's been at least 5 minutes since the last notification + if w.lastPassed.After(w.lastNotified) && w.lastNotified.Before(time.Now().Add(-5*time.Minute)) { + err := w.notify(failure) + if nil != err { + fmt.Println("Notify:", err) + } + } + if w.failures >= 5 { + // go back to the main 5-minute loop + break + } + } +} + +func NewHTTPClient() *http.Client { + transport := &http.Transport{ + Dial: (&net.Dialer{ + Timeout: 10 * time.Second, + }).Dial, + TLSHandshakeTimeout: 5 * time.Second, + } + client := &http.Client{ + Timeout: time.Second * 5, + Transport: transport, + } + return client +} + +func (w *Dog) check() error { + var err error + defer func() { + if nil != err { + w.failures += 1 + w.lastFailed = time.Now() + } else { + w.lastPassed = time.Now() + w.passes += 1 + } + }() + + client := NewHTTPClient() + response, err := client.Get(w.CheckURL) + if nil != err { + w.error = fmt.Errorf("Connection Failure: " + err.Error()) + return err + } + + b, err := ioutil.ReadAll(response.Body) + if nil != err { + w.error = fmt.Errorf("Network Failure: " + err.Error()) + return err + } + + if !bytes.Contains(b, []byte(w.Keywords)) { + err = fmt.Errorf("Keywords Not Found: " + w.Keywords) + fmt.Println(err) + w.error = err + return err + } else { + fmt.Println("Happy day!") + } + + return nil +} + +func (w *Dog) notify(hardFail bool) error { + w.lastNotified = time.Now() + fmt.Println("Notifying the authorities of a failure") + return nil + /* + urlStr := "https://api.twilio.com/2010-04-01/Accounts/" + accountSid + "/Messages.json" + msgData := url.Values{} + msgData.Set("To", "+1 555 555 5555") + msgData.Set("From", "+1 555 555 1234") + if hardFail { + msgData.Set("Body", fmt.Sprintf("[%s] The system is down. The system is down.", w.Name)) + } else { + msgData.Set("Body", fmt.Sprintf("[%s] had a hiccup.", w.Name)) + } + msgDataReader := *strings.NewReader(msgData.Encode()) + client := NewHTTPClient() + req, err := http.NewRequest("POST", urlStr, &msgDataReader) + if nil != err { + fmt.Fprintf(os.Stderr, "Failed to text: %s\n", err) + } + req.SetBasicAuth(accountSid, authToken) + req.Header.Add("Accept", "application/json") + req.Header.Add("Content-Type", "application/x-www-form-urlencoded") + resp, _ := client.Do(req) + if resp.StatusCode >= 200 && resp.StatusCode < 300 { + var data map[string]interface{} + decoder := json.NewDecoder(resp.Body) + err := decoder.Decode(&data) + if err == nil { + fmt.Println(data["sid"]) + } + } else { + fmt.Println("Error:", resp.Status) + } + + return nil + */ +} + +type Config struct { + Watches []ConfigWatch `json:"watches"` +} + +type ConfigWatch struct { + Name string `json:"name"` + URL string `json:"url"` + Keywords string `json:"keywords"` + Webhook string `json:"webhook"` + RecoverScript string `json:"recover_script"` +} + +/* + request :+ http.NewRequest("POST" + curl -s --user 'api:YOUR_API_KEY' \ + https://api.mailgun.net/v3/YOUR_DOMAIN_NAME/messages \ + -F from='Excited User ' \ + -F to=YOU@YOUR_DOMAIN_NAME \ + -F to=bar@example.com \ + -F subject='Hello' \ + -F text='Testing some Mailgun awesomeness!' +*/