AJ ONeal
5 years ago
commit
65c7393253
3 changed files with 327 additions and 0 deletions
@ -0,0 +1,50 @@ |
|||
# Watchdog |
|||
|
|||
Webhooks for all the times that something doesn't go right. |
|||
|
|||
# Install |
|||
|
|||
```bash |
|||
git clone https://git.coolaj86.com/coolaj86/watchdog.go.git |
|||
``` |
|||
|
|||
# Usage |
|||
|
|||
```bash |
|||
pushd watchdog.go/ |
|||
go run ./watchdog.go -c dog.json |
|||
``` |
|||
|
|||
## Sample Config |
|||
|
|||
```json |
|||
{ |
|||
"watches": [ |
|||
{ |
|||
"name": "Example Site", |
|||
"url": "https://example.com/", |
|||
"webhook": "twilio", |
|||
"keywords": "My Site", |
|||
"recover_script": "systemctl restart example-site" |
|||
} |
|||
], |
|||
"webhooks": [ |
|||
{ |
|||
"name": "twilio", |
|||
"config": { |
|||
"sid": "AC00000000000000000000000000000000", |
|||
"token": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" |
|||
}, |
|||
"configs": [{ "To": "+1 555 555 5555", "From": "+1 555 234 5678" }], |
|||
"url": "https://api.twilio.com/2010-04-01/Accounts/{{ .sid }}/Messages.json", |
|||
"auth": { "user": "{{ .sid }}", "pass": "{{ .token }}" }, |
|||
"_TODO_Body": "switch template if hardfail", |
|||
"form": { |
|||
"To": "{{ .To }}", |
|||
"From": "{{ .From }}", |
|||
"Body": "[{{ .name }}] The system is down. The system is down." |
|||
} |
|||
} |
|||
] |
|||
} |
|||
``` |
@ -0,0 +1,3 @@ |
|||
module git.coolaj86.com/coolaj86/watchdog.go |
|||
|
|||
go 1.12 |
@ -0,0 +1,274 @@ |
|||
package main |
|||
|
|||
import ( |
|||
"bytes" |
|||
"context" |
|||
"encoding/json" |
|||
"fmt" |
|||
"io/ioutil" |
|||
"log" |
|||
"net" |
|||
"net/http" |
|||
"os" |
|||
"os/exec" |
|||
"time" |
|||
) |
|||
|
|||
func usage() { |
|||
fmt.Println("Usage: go run watchdog.go -c dog.json") |
|||
} |
|||
|
|||
func main() { |
|||
if 3 != len(os.Args) { |
|||
usage() |
|||
os.Exit(1) |
|||
return |
|||
} |
|||
if "-c" != os.Args[1] { |
|||
usage() |
|||
os.Exit(1) |
|||
return |
|||
} |
|||
|
|||
filename := os.Args[2] |
|||
f, err := os.Open(filename) |
|||
if nil != err { |
|||
log.Fatal(err) |
|||
return |
|||
} |
|||
|
|||
configFile, err := ioutil.ReadAll(f) |
|||
if nil != err { |
|||
log.Fatal(err) |
|||
return |
|||
} |
|||
|
|||
config := &Config{} |
|||
err = json.Unmarshal(configFile, config) |
|||
if nil != err { |
|||
log.Fatal(err) |
|||
return |
|||
} |
|||
|
|||
//fmt.Printf("%#v\n", config)
|
|||
|
|||
done := make(chan struct{}, 1) |
|||
|
|||
for i := range config.Watches { |
|||
c := config.Watches[i] |
|||
fmt.Printf("Watching '%s'", c.Name) |
|||
go func(c ConfigWatch) { |
|||
w := &Dog{ |
|||
Name: c.Name, |
|||
CheckURL: c.URL, |
|||
Keywords: c.Keywords, |
|||
Recover: c.RecoverScript, |
|||
} |
|||
w.Watch() |
|||
}(config.Watches[i]) |
|||
} |
|||
|
|||
if 0 == len(config.Watches) { |
|||
log.Fatal("Nothing to watch") |
|||
return |
|||
} |
|||
|
|||
<-done |
|||
} |
|||
|
|||
type Dog struct { |
|||
Name string |
|||
CheckURL string |
|||
Keywords string |
|||
Recover string |
|||
error error |
|||
failures int |
|||
passes int |
|||
lastFailed time.Time |
|||
lastPassed time.Time |
|||
lastNotified time.Time |
|||
} |
|||
|
|||
func (w *Dog) execRecover() { |
|||
if "" == w.Recover { |
|||
return |
|||
} |
|||
|
|||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) |
|||
cmd := exec.CommandContext(ctx, "bash") |
|||
pipe, err := cmd.StdinPipe() |
|||
pipe.Write([]byte(w.Recover)) |
|||
if nil != err { |
|||
fmt.Fprintf(os.Stderr, "Could not write to bash '%s': %s\n", w.Recover, err) |
|||
} |
|||
err = cmd.Start() |
|||
if nil != err { |
|||
fmt.Fprintf(os.Stderr, "Could not start '%s': %s\n", w.Recover, err) |
|||
} |
|||
err = pipe.Close() |
|||
if nil != err { |
|||
fmt.Fprintf(os.Stderr, "Could not close '%s': %s\n", w.Recover, err) |
|||
} |
|||
err = cmd.Wait() |
|||
cancel() |
|||
if nil != err { |
|||
fmt.Fprintf(os.Stderr, "'%s' failed: %s\n", w.Recover, err) |
|||
} |
|||
} |
|||
|
|||
func (w *Dog) Watch() { |
|||
w.watch() |
|||
for { |
|||
// TODO set cancellable callback ?
|
|||
time.Sleep(5 * time.Minute) |
|||
w.watch() |
|||
} |
|||
} |
|||
|
|||
func (w *Dog) watch() { |
|||
fmt.Println("Running a check") |
|||
|
|||
err := w.check() |
|||
if nil == err { |
|||
return |
|||
} |
|||
|
|||
failure := false |
|||
t := 10 |
|||
for { |
|||
w.execRecover() |
|||
time.Sleep(time.Duration(t) * time.Second) |
|||
// backoff
|
|||
t *= 2 |
|||
err := w.check() |
|||
if nil != err { |
|||
failure = true |
|||
} |
|||
// We should notify if
|
|||
// * We've had success since the last notification
|
|||
// * It's been at least 5 minutes since the last notification
|
|||
if w.lastPassed.After(w.lastNotified) && w.lastNotified.Before(time.Now().Add(-5*time.Minute)) { |
|||
err := w.notify(failure) |
|||
if nil != err { |
|||
fmt.Println("Notify:", err) |
|||
} |
|||
} |
|||
if w.failures >= 5 { |
|||
// go back to the main 5-minute loop
|
|||
break |
|||
} |
|||
} |
|||
} |
|||
|
|||
func NewHTTPClient() *http.Client { |
|||
transport := &http.Transport{ |
|||
Dial: (&net.Dialer{ |
|||
Timeout: 10 * time.Second, |
|||
}).Dial, |
|||
TLSHandshakeTimeout: 5 * time.Second, |
|||
} |
|||
client := &http.Client{ |
|||
Timeout: time.Second * 5, |
|||
Transport: transport, |
|||
} |
|||
return client |
|||
} |
|||
|
|||
func (w *Dog) check() error { |
|||
var err error |
|||
defer func() { |
|||
if nil != err { |
|||
w.failures += 1 |
|||
w.lastFailed = time.Now() |
|||
} else { |
|||
w.lastPassed = time.Now() |
|||
w.passes += 1 |
|||
} |
|||
}() |
|||
|
|||
client := NewHTTPClient() |
|||
response, err := client.Get(w.CheckURL) |
|||
if nil != err { |
|||
w.error = fmt.Errorf("Connection Failure: " + err.Error()) |
|||
return err |
|||
} |
|||
|
|||
b, err := ioutil.ReadAll(response.Body) |
|||
if nil != err { |
|||
w.error = fmt.Errorf("Network Failure: " + err.Error()) |
|||
return err |
|||
} |
|||
|
|||
if !bytes.Contains(b, []byte(w.Keywords)) { |
|||
err = fmt.Errorf("Keywords Not Found: " + w.Keywords) |
|||
fmt.Println(err) |
|||
w.error = err |
|||
return err |
|||
} else { |
|||
fmt.Println("Happy day!") |
|||
} |
|||
|
|||
return nil |
|||
} |
|||
|
|||
func (w *Dog) notify(hardFail bool) error { |
|||
w.lastNotified = time.Now() |
|||
fmt.Println("Notifying the authorities of a failure") |
|||
return nil |
|||
/* |
|||
urlStr := "https://api.twilio.com/2010-04-01/Accounts/" + accountSid + "/Messages.json" |
|||
msgData := url.Values{} |
|||
msgData.Set("To", "+1 555 555 5555") |
|||
msgData.Set("From", "+1 555 555 1234") |
|||
if hardFail { |
|||
msgData.Set("Body", fmt.Sprintf("[%s] The system is down. The system is down.", w.Name)) |
|||
} else { |
|||
msgData.Set("Body", fmt.Sprintf("[%s] had a hiccup.", w.Name)) |
|||
} |
|||
msgDataReader := *strings.NewReader(msgData.Encode()) |
|||
client := NewHTTPClient() |
|||
req, err := http.NewRequest("POST", urlStr, &msgDataReader) |
|||
if nil != err { |
|||
fmt.Fprintf(os.Stderr, "Failed to text: %s\n", err) |
|||
} |
|||
req.SetBasicAuth(accountSid, authToken) |
|||
req.Header.Add("Accept", "application/json") |
|||
req.Header.Add("Content-Type", "application/x-www-form-urlencoded") |
|||
resp, _ := client.Do(req) |
|||
if resp.StatusCode >= 200 && resp.StatusCode < 300 { |
|||
var data map[string]interface{} |
|||
decoder := json.NewDecoder(resp.Body) |
|||
err := decoder.Decode(&data) |
|||
if err == nil { |
|||
fmt.Println(data["sid"]) |
|||
} |
|||
} else { |
|||
fmt.Println("Error:", resp.Status) |
|||
} |
|||
|
|||
return nil |
|||
*/ |
|||
} |
|||
|
|||
type Config struct { |
|||
Watches []ConfigWatch `json:"watches"` |
|||
} |
|||
|
|||
type ConfigWatch struct { |
|||
Name string `json:"name"` |
|||
URL string `json:"url"` |
|||
Keywords string `json:"keywords"` |
|||
Webhook string `json:"webhook"` |
|||
RecoverScript string `json:"recover_script"` |
|||
} |
|||
|
|||
/* |
|||
request :+ http.NewRequest("POST" |
|||
curl -s --user 'api:YOUR_API_KEY' \ |
|||
https://api.mailgun.net/v3/YOUR_DOMAIN_NAME/messages \
|
|||
-F from='Excited User <mailgun@YOUR_DOMAIN_NAME>' \ |
|||
-F to=YOU@YOUR_DOMAIN_NAME \ |
|||
-F to=bar@example.com \ |
|||
-F subject='Hello' \ |
|||
-F text='Testing some Mailgun awesomeness!' |
|||
*/ |
Loading…
Reference in new issue