go-serviceman/runner/runner.go

240 lines
5.3 KiB
Go
Raw Normal View History

2019-07-03 05:51:30 +00:00
package runner
import (
"fmt"
"io/ioutil"
2019-07-03 05:51:30 +00:00
"os"
"os/exec"
"path/filepath"
"strconv"
2019-07-03 08:11:50 +00:00
"strings"
2019-07-03 05:51:30 +00:00
"time"
"git.rootprojects.org/root/go-serviceman/service"
ps "github.com/mitchellh/go-ps"
2019-07-03 05:51:30 +00:00
)
2019-07-03 08:11:50 +00:00
// Filled in on init by runner_windows.go
var shellArgs = []string{}
2019-07-03 05:51:30 +00:00
// Notes on spawning a child process
// https://groups.google.com/forum/#!topic/golang-nuts/shST-SDqIp4
// Start will execute the service, and write the PID and logs out to the log directory
func Start(conf *service.Service) error {
pid := os.Getpid()
2019-07-03 05:51:30 +00:00
originalBackoff := 1 * time.Second
maxBackoff := 1 * time.Minute
threshold := 5 * time.Second
backoff := originalBackoff
failures := 0
logfile := filepath.Join(conf.Logdir, conf.Name+".log")
if oldPid, exename, err := getProcess(conf); nil == err {
return fmt.Errorf("%q may already be running as %q (pid %d)", conf.Name, exename, oldPid)
}
go func() {
for {
maybeWritePidFile(pid, conf)
time.Sleep(1 * time.Second)
}
}()
2019-07-03 05:51:30 +00:00
binpath := conf.Exec
args := []string{}
if "" != conf.Interpreter {
binpath = conf.Interpreter
args = append(args, conf.Exec)
}
args = append(args, conf.Argv...)
2019-07-03 08:11:50 +00:00
if !conf.System && 0 != len(shellArgs) {
nargs := append(shellArgs[1:], binpath)
args = append(nargs, args...)
binpath = shellArgs[0]
}
2019-07-03 05:51:30 +00:00
for {
// setup the log
lf, err := os.OpenFile(logfile, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0644)
if nil != err {
2019-07-03 08:11:50 +00:00
fmt.Fprintf(os.Stderr, "[%s] Could not open log file %q\n", time.Now(), logfile)
2019-07-03 05:51:30 +00:00
lf = os.Stderr
} else {
defer lf.Close()
}
start := time.Now()
cmd := exec.Command(binpath, args...)
2019-07-03 09:04:28 +00:00
backgroundCmd(cmd)
2019-07-03 08:11:50 +00:00
fmt.Fprintf(lf, "[%s] Starting %q %s \n", time.Now(), binpath, strings.Join(args, " "))
2019-07-03 05:51:30 +00:00
cmd.Stdin = nil
cmd.Stdout = lf
cmd.Stderr = lf
if "" != conf.Workdir {
cmd.Dir = conf.Workdir
}
2019-08-04 01:36:46 +00:00
if len(conf.Envs) > 0 {
for k, v := range conf.Envs {
cmd.Env = append(cmd.Env, k+"="+v)
}
}
2019-07-03 05:51:30 +00:00
err = cmd.Start()
if nil != err {
2019-07-03 08:11:50 +00:00
fmt.Fprintf(lf, "[%s] Could not start %q process: %s\n", time.Now(), conf.Name, err)
2019-07-03 05:51:30 +00:00
} else {
err = cmd.Wait()
if nil != err {
2019-07-03 08:11:50 +00:00
fmt.Fprintf(lf, "[%s] Process %q failed with error: %s\n", time.Now(), conf.Name, err)
2019-07-03 05:51:30 +00:00
} else {
2019-07-03 08:11:50 +00:00
fmt.Fprintf(lf, "[%s] Process %q exited cleanly\n", time.Now(), conf.Name)
2019-07-03 05:51:30 +00:00
}
}
// if this is a oneshot... so it is
if !conf.Restart {
fmt.Fprintf(lf, "Not restarting %q because `restart` set to `false`\n", conf.Name)
break
}
end := time.Now()
if end.Sub(start) > threshold {
backoff = originalBackoff
failures = 0
} else {
failures++
2019-07-03 05:51:30 +00:00
fmt.Fprintf(lf, "Waiting %s to restart %q (%d consequtive immediate exits)\n", backoff, conf.Name, failures)
time.Sleep(backoff)
backoff *= 2
if backoff > maxBackoff {
backoff = maxBackoff
}
}
}
return nil
}
// Stop will find and stop another serviceman runner instance by it's PID
func Stop(conf *service.Service) error {
i := 0
var err error
for {
if i >= 3 {
return err
}
i++
oldPid, exename, err2 := getProcess(conf)
err = err2
switch err {
case nil:
fmt.Printf("killing old process %q with pid %d\n", exename, oldPid)
err := kill(oldPid)
if nil != err {
return err
}
return waitForProcessToDie(oldPid)
case ErrNoPidFile:
return err
case ErrNoProcess:
return err
case ErrInvalidPidFile:
fallthrough
default:
// waiting a little bit since the PID is written every second
time.Sleep(400 * time.Millisecond)
}
}
return fmt.Errorf("unexpected error: %s", err)
}
// Restart calls Stop, ignoring any failure, and then Start, returning any failure
func Restart(conf *service.Service) error {
_ = Stop(conf)
return Start(conf)
}
var ErrNoPidFile = fmt.Errorf("no pid file")
var ErrInvalidPidFile = fmt.Errorf("malformed pid file")
var ErrNoProcess = fmt.Errorf("process not found by pid")
func waitForProcessToDie(pid int) error {
exename := "unknown"
for i := 0; i < 10; i++ {
px, err := ps.FindProcess(pid)
if nil != err {
return nil
}
if nil == px {
return nil
}
exename = px.Executable()
time.Sleep(1 * time.Second)
}
return fmt.Errorf("process %q (%d) just won't die", exename, pid)
}
func getProcess(conf *service.Service) (int, string, error) {
// TODO make Pidfile() a property of conf?
pidFile := filepath.Join(conf.Logdir, conf.Name+".pid")
b, err := ioutil.ReadFile(pidFile)
if nil != err {
return 0, "", ErrNoPidFile
}
s := strings.TrimSpace(string(b))
oldPid, err := strconv.Atoi(s)
if nil != err {
return 0, "", ErrInvalidPidFile
}
px, err := ps.FindProcess(oldPid)
if nil != err {
return 0, "", err
}
if nil == px {
return 0, "", ErrNoProcess
}
_, err = os.FindProcess(oldPid)
if nil != err {
return 0, "", err
}
exename := px.Executable()
return oldPid, exename, nil
}
// TODO error out if can't write to PID or log
func maybeWritePidFile(pid int, conf *service.Service) bool {
newPid := []byte(strconv.Itoa(pid))
// TODO use a specific PID dir? meh...
pidFile := filepath.Join(conf.Logdir, conf.Name+".pid")
b, err := ioutil.ReadFile(pidFile)
if nil != err {
ioutil.WriteFile(pidFile, newPid, 0644)
return true
}
s := strings.TrimSpace(string(b))
oldPid, err := strconv.Atoi(s)
if nil != err {
ioutil.WriteFile(pidFile, newPid, 0644)
return true
}
if oldPid != pid {
Stop(conf)
ioutil.WriteFile(pidFile, newPid, 0644)
return true
}
return false
2019-07-03 05:51:30 +00:00
}