|
|
@ -9,8 +9,10 @@ import ( |
|
|
|
"log" |
|
|
|
"net" |
|
|
|
"net/http" |
|
|
|
"net/url" |
|
|
|
"os" |
|
|
|
"os/exec" |
|
|
|
"strings" |
|
|
|
"time" |
|
|
|
) |
|
|
|
|
|
|
@ -54,17 +56,29 @@ func main() { |
|
|
|
|
|
|
|
done := make(chan struct{}, 1) |
|
|
|
|
|
|
|
allWebhooks := make(map[string]ConfigWebhook) |
|
|
|
|
|
|
|
for i := range config.Webhooks { |
|
|
|
h := config.Webhooks[i] |
|
|
|
allWebhooks[h.Name] = h |
|
|
|
} |
|
|
|
|
|
|
|
logQueue := make(chan string, 10) |
|
|
|
go logger(logQueue) |
|
|
|
for i := range config.Watches { |
|
|
|
c := config.Watches[i] |
|
|
|
fmt.Printf("Watching '%s'", c.Name) |
|
|
|
logQueue <- fmt.Sprintf("Watching '%s'", c.Name) |
|
|
|
go func(c ConfigWatch) { |
|
|
|
w := &Dog{ |
|
|
|
Name: c.Name, |
|
|
|
CheckURL: c.URL, |
|
|
|
Keywords: c.Keywords, |
|
|
|
Recover: c.RecoverScript, |
|
|
|
} |
|
|
|
w.Watch() |
|
|
|
d := New(&Dog{ |
|
|
|
Name: c.Name, |
|
|
|
CheckURL: c.URL, |
|
|
|
Keywords: c.Keywords, |
|
|
|
Recover: c.RecoverScript, |
|
|
|
Webhooks: c.Webhooks, |
|
|
|
AllWebhooks: allWebhooks, |
|
|
|
logger: logQueue, |
|
|
|
}) |
|
|
|
d.Watch() |
|
|
|
}(config.Watches[i]) |
|
|
|
} |
|
|
|
|
|
|
@ -81,6 +95,9 @@ type Dog struct { |
|
|
|
CheckURL string |
|
|
|
Keywords string |
|
|
|
Recover string |
|
|
|
Webhooks []string |
|
|
|
AllWebhooks map[string]ConfigWebhook |
|
|
|
logger chan string |
|
|
|
error error |
|
|
|
failures int |
|
|
|
passes int |
|
|
@ -89,46 +106,24 @@ type Dog struct { |
|
|
|
lastNotified time.Time |
|
|
|
} |
|
|
|
|
|
|
|
func (w *Dog) execRecover() { |
|
|
|
if "" == w.Recover { |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) |
|
|
|
cmd := exec.CommandContext(ctx, "bash") |
|
|
|
pipe, err := cmd.StdinPipe() |
|
|
|
pipe.Write([]byte(w.Recover)) |
|
|
|
if nil != err { |
|
|
|
fmt.Fprintf(os.Stderr, "Could not write to bash '%s': %s\n", w.Recover, err) |
|
|
|
} |
|
|
|
err = cmd.Start() |
|
|
|
if nil != err { |
|
|
|
fmt.Fprintf(os.Stderr, "Could not start '%s': %s\n", w.Recover, err) |
|
|
|
} |
|
|
|
err = pipe.Close() |
|
|
|
if nil != err { |
|
|
|
fmt.Fprintf(os.Stderr, "Could not close '%s': %s\n", w.Recover, err) |
|
|
|
} |
|
|
|
err = cmd.Wait() |
|
|
|
cancel() |
|
|
|
if nil != err { |
|
|
|
fmt.Fprintf(os.Stderr, "'%s' failed: %s\n", w.Recover, err) |
|
|
|
} |
|
|
|
func New(d *Dog) *Dog { |
|
|
|
d.lastPassed = time.Now().Add(-5 * time.Minute) |
|
|
|
return d |
|
|
|
} |
|
|
|
|
|
|
|
func (w *Dog) Watch() { |
|
|
|
w.watch() |
|
|
|
func (d *Dog) Watch() { |
|
|
|
d.watch() |
|
|
|
for { |
|
|
|
// TODO set cancellable callback ?
|
|
|
|
time.Sleep(5 * time.Minute) |
|
|
|
w.watch() |
|
|
|
d.watch() |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
func (w *Dog) watch() { |
|
|
|
fmt.Println("Running a check") |
|
|
|
func (d *Dog) watch() { |
|
|
|
d.logger <- fmt.Sprintf("Check: '%s'", d.Name) |
|
|
|
|
|
|
|
err := w.check() |
|
|
|
err := d.check() |
|
|
|
if nil == err { |
|
|
|
return |
|
|
|
} |
|
|
@ -136,130 +131,212 @@ func (w *Dog) watch() { |
|
|
|
failure := false |
|
|
|
t := 10 |
|
|
|
for { |
|
|
|
w.execRecover() |
|
|
|
d.recover() |
|
|
|
time.Sleep(time.Duration(t) * time.Second) |
|
|
|
// backoff
|
|
|
|
t *= 2 |
|
|
|
err := w.check() |
|
|
|
err := d.check() |
|
|
|
if nil != err { |
|
|
|
failure = true |
|
|
|
} |
|
|
|
// We should notify if
|
|
|
|
// * We've had success since the last notification
|
|
|
|
// * It's been at least 5 minutes since the last notification
|
|
|
|
if w.lastPassed.After(w.lastNotified) && w.lastNotified.Before(time.Now().Add(-5*time.Minute)) { |
|
|
|
err := w.notify(failure) |
|
|
|
if nil != err { |
|
|
|
fmt.Println("Notify:", err) |
|
|
|
} |
|
|
|
fiveMinutesAgo := time.Now().Add(-5 * time.Minute) |
|
|
|
if d.lastPassed.After(d.lastNotified) && d.lastNotified.Before(fiveMinutesAgo) { |
|
|
|
d.notify(failure) |
|
|
|
} |
|
|
|
if w.failures >= 5 { |
|
|
|
if d.failures >= 5 { |
|
|
|
// go back to the main 5-minute loop
|
|
|
|
break |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
func NewHTTPClient() *http.Client { |
|
|
|
transport := &http.Transport{ |
|
|
|
Dial: (&net.Dialer{ |
|
|
|
Timeout: 10 * time.Second, |
|
|
|
}).Dial, |
|
|
|
TLSHandshakeTimeout: 5 * time.Second, |
|
|
|
} |
|
|
|
client := &http.Client{ |
|
|
|
Timeout: time.Second * 5, |
|
|
|
Transport: transport, |
|
|
|
} |
|
|
|
return client |
|
|
|
} |
|
|
|
|
|
|
|
func (w *Dog) check() error { |
|
|
|
func (d *Dog) check() error { |
|
|
|
var err error |
|
|
|
defer func() { |
|
|
|
if nil != err { |
|
|
|
w.failures += 1 |
|
|
|
w.lastFailed = time.Now() |
|
|
|
d.failures += 1 |
|
|
|
d.lastFailed = time.Now() |
|
|
|
} else { |
|
|
|
w.lastPassed = time.Now() |
|
|
|
w.passes += 1 |
|
|
|
d.lastPassed = time.Now() |
|
|
|
d.passes += 1 |
|
|
|
} |
|
|
|
}() |
|
|
|
|
|
|
|
client := NewHTTPClient() |
|
|
|
response, err := client.Get(w.CheckURL) |
|
|
|
response, err := client.Get(d.CheckURL) |
|
|
|
if nil != err { |
|
|
|
w.error = fmt.Errorf("Connection Failure: " + err.Error()) |
|
|
|
d.error = fmt.Errorf("Connection Failure: " + err.Error()) |
|
|
|
return err |
|
|
|
} |
|
|
|
|
|
|
|
b, err := ioutil.ReadAll(response.Body) |
|
|
|
if nil != err { |
|
|
|
w.error = fmt.Errorf("Network Failure: " + err.Error()) |
|
|
|
d.error = fmt.Errorf("Network Failure: " + err.Error()) |
|
|
|
return err |
|
|
|
} |
|
|
|
|
|
|
|
if !bytes.Contains(b, []byte(w.Keywords)) { |
|
|
|
err = fmt.Errorf("Keywords Not Found: " + w.Keywords) |
|
|
|
fmt.Println(err) |
|
|
|
w.error = err |
|
|
|
if !bytes.Contains(b, []byte(d.Keywords)) { |
|
|
|
err = fmt.Errorf("Down: '%s' Not Found for '%s'", d.Keywords, d.Name) |
|
|
|
d.logger <- fmt.Sprintf("%s", err) |
|
|
|
d.error = err |
|
|
|
return err |
|
|
|
} else { |
|
|
|
fmt.Println("Happy day!") |
|
|
|
d.logger <- fmt.Sprintf("Up: '%s'", d.Name) |
|
|
|
} |
|
|
|
|
|
|
|
return nil |
|
|
|
} |
|
|
|
|
|
|
|
func (w *Dog) notify(hardFail bool) error { |
|
|
|
w.lastNotified = time.Now() |
|
|
|
fmt.Println("Notifying the authorities of a failure") |
|
|
|
return nil |
|
|
|
/* |
|
|
|
urlStr := "https://api.twilio.com/2010-04-01/Accounts/" + accountSid + "/Messages.json" |
|
|
|
msgData := url.Values{} |
|
|
|
msgData.Set("To", "+1 555 555 5555") |
|
|
|
msgData.Set("From", "+1 555 555 1234") |
|
|
|
if hardFail { |
|
|
|
msgData.Set("Body", fmt.Sprintf("[%s] The system is down. The system is down.", w.Name)) |
|
|
|
} else { |
|
|
|
msgData.Set("Body", fmt.Sprintf("[%s] had a hiccup.", w.Name)) |
|
|
|
func (d *Dog) recover() { |
|
|
|
if "" == d.Recover { |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) |
|
|
|
cmd := exec.CommandContext(ctx, "bash") |
|
|
|
pipe, err := cmd.StdinPipe() |
|
|
|
pipe.Write([]byte(d.Recover)) |
|
|
|
if nil != err { |
|
|
|
d.logger <- fmt.Sprintf("[Recover] Could not write to bash '%s': %s", d.Recover, err) |
|
|
|
} |
|
|
|
err = cmd.Start() |
|
|
|
if nil != err { |
|
|
|
d.logger <- fmt.Sprintf("[Recover] Could not start '%s': %s", d.Recover, err) |
|
|
|
} |
|
|
|
err = pipe.Close() |
|
|
|
if nil != err { |
|
|
|
d.logger <- fmt.Sprintf("[Recover] Could not close '%s': %s", d.Recover, err) |
|
|
|
} |
|
|
|
err = cmd.Wait() |
|
|
|
cancel() |
|
|
|
if nil != err { |
|
|
|
d.logger <- fmt.Sprintf("[Recover] '%s' failed: %s", d.Recover, err) |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
func (d *Dog) notify(hardFail bool) { |
|
|
|
d.logger <- fmt.Sprintf("Notifying the authorities of %s's failure", d.Name) |
|
|
|
d.lastNotified = time.Now() |
|
|
|
|
|
|
|
for i := range d.Webhooks { |
|
|
|
name := d.Webhooks[i] |
|
|
|
if "" == name { |
|
|
|
continue |
|
|
|
} |
|
|
|
|
|
|
|
h, ok := d.AllWebhooks[name] |
|
|
|
if !ok { |
|
|
|
// TODO check in main when config is read
|
|
|
|
d.Webhooks[i] = "" |
|
|
|
d.logger <- fmt.Sprintf("[Warning] Could not find webhook '%s' for '%s'", name, h.Name) |
|
|
|
continue |
|
|
|
} |
|
|
|
|
|
|
|
// TODO do this in main on config init
|
|
|
|
if "" == h.Method { |
|
|
|
h.Method = "POST" |
|
|
|
} |
|
|
|
msgDataReader := *strings.NewReader(msgData.Encode()) |
|
|
|
|
|
|
|
var body *strings.Reader |
|
|
|
if 0 != len(h.Form) { |
|
|
|
form := url.Values{} |
|
|
|
for k := range h.Form { |
|
|
|
v := h.Form[k] |
|
|
|
// TODO real templates
|
|
|
|
v = strings.Replace(v, "{{ .Name }}", d.Name, -1) |
|
|
|
form.Set(k, v) |
|
|
|
} |
|
|
|
body = strings.NewReader(form.Encode()) |
|
|
|
} |
|
|
|
|
|
|
|
client := NewHTTPClient() |
|
|
|
req, err := http.NewRequest("POST", urlStr, &msgDataReader) |
|
|
|
req, err := http.NewRequest(h.Method, h.URL, body) |
|
|
|
if nil != err { |
|
|
|
log.Println("[Notify] HTTP Client Network Error:", err) |
|
|
|
continue |
|
|
|
} |
|
|
|
|
|
|
|
if 0 != len(h.Form) { |
|
|
|
req.Header.Add("Content-Type", "application/x-www-form-urlencoded") |
|
|
|
} |
|
|
|
|
|
|
|
if 0 != len(h.Auth) { |
|
|
|
user := h.Auth["user"] |
|
|
|
pass := h.Auth["pass"] |
|
|
|
req.SetBasicAuth(user, pass) |
|
|
|
} |
|
|
|
|
|
|
|
resp, err := client.Do(req) |
|
|
|
if nil != err { |
|
|
|
fmt.Fprintf(os.Stderr, "Failed to text: %s\n", err) |
|
|
|
d.logger <- fmt.Sprintf("[Notify] HTTP Client Error: %s", err) |
|
|
|
continue |
|
|
|
} |
|
|
|
|
|
|
|
if !(resp.StatusCode >= 200 && resp.StatusCode < 300) { |
|
|
|
d.logger <- fmt.Sprintf("[Notify] Response Error: %s", resp.Status) |
|
|
|
continue |
|
|
|
} |
|
|
|
req.SetBasicAuth(accountSid, authToken) |
|
|
|
|
|
|
|
// TODO json vs xml vs txt
|
|
|
|
var data map[string]interface{} |
|
|
|
req.Header.Add("Accept", "application/json") |
|
|
|
req.Header.Add("Content-Type", "application/x-www-form-urlencoded") |
|
|
|
resp, _ := client.Do(req) |
|
|
|
if resp.StatusCode >= 200 && resp.StatusCode < 300 { |
|
|
|
var data map[string]interface{} |
|
|
|
decoder := json.NewDecoder(resp.Body) |
|
|
|
err := decoder.Decode(&data) |
|
|
|
if err == nil { |
|
|
|
fmt.Println(data["sid"]) |
|
|
|
} |
|
|
|
} else { |
|
|
|
fmt.Println("Error:", resp.Status) |
|
|
|
decoder := json.NewDecoder(resp.Body) |
|
|
|
err = decoder.Decode(&data) |
|
|
|
if err != nil { |
|
|
|
d.logger <- fmt.Sprintf("[Notify] Response Body Error: %s", resp.Status) |
|
|
|
continue |
|
|
|
} |
|
|
|
|
|
|
|
return nil |
|
|
|
*/ |
|
|
|
// TODO some sort of way to determine if data is successful (keywords)
|
|
|
|
d.logger <- fmt.Sprintf("[Notify] Success? %#v", data) |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
type Config struct { |
|
|
|
Watches []ConfigWatch `json:"watches"` |
|
|
|
Watches []ConfigWatch `json:"watches"` |
|
|
|
Webhooks []ConfigWebhook `json:"webhooks"` |
|
|
|
} |
|
|
|
|
|
|
|
type ConfigWatch struct { |
|
|
|
Name string `json:"name"` |
|
|
|
URL string `json:"url"` |
|
|
|
Keywords string `json:"keywords"` |
|
|
|
Webhook string `json:"webhook"` |
|
|
|
RecoverScript string `json:"recover_script"` |
|
|
|
Name string `json:"name"` |
|
|
|
URL string `json:"url"` |
|
|
|
Keywords string `json:"keywords"` |
|
|
|
Webhooks []string `json:"webhooks"` |
|
|
|
RecoverScript string `json:"recover_script"` |
|
|
|
} |
|
|
|
|
|
|
|
type ConfigWebhook struct { |
|
|
|
Name string `json:"name"` |
|
|
|
Method string `json:"method"` |
|
|
|
URL string `json:"url"` |
|
|
|
Auth map[string]string `json:"auth"` |
|
|
|
Form map[string]string `json:"form"` |
|
|
|
Config map[string]string `json:"config"` |
|
|
|
Configs []map[string]string `json:"configs"` |
|
|
|
} |
|
|
|
|
|
|
|
func NewHTTPClient() *http.Client { |
|
|
|
transport := &http.Transport{ |
|
|
|
Dial: (&net.Dialer{ |
|
|
|
Timeout: 10 * time.Second, |
|
|
|
}).Dial, |
|
|
|
TLSHandshakeTimeout: 5 * time.Second, |
|
|
|
} |
|
|
|
client := &http.Client{ |
|
|
|
Timeout: time.Second * 5, |
|
|
|
Transport: transport, |
|
|
|
} |
|
|
|
return client |
|
|
|
} |
|
|
|
|
|
|
|
func logger(msgs chan string) { |
|
|
|
for { |
|
|
|
msg := <-msgs |
|
|
|
log.Println(msg) |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
/* |
|
|
|