diff options
Diffstat (limited to 'pkg')
-rwxr-xr-x | pkg/events/events.go | 38 | ||||
-rwxr-xr-x | pkg/pipe/pipe_factory.go | 164 | ||||
-rwxr-xr-x | pkg/pipe/pipe_factory_test.go | 511 | ||||
-rw-r--r-- | pkg/pool/config.go | 75 | ||||
-rwxr-xr-x | pkg/pool/static_pool.go | 352 | ||||
-rwxr-xr-x | pkg/pool/static_pool_test.go | 557 | ||||
-rwxr-xr-x | pkg/pool/supervisor_pool.go | 208 | ||||
-rw-r--r-- | pkg/pool/supervisor_test.go | 154 | ||||
-rwxr-xr-x | pkg/socket/socket_factory.go | 228 | ||||
-rwxr-xr-x | pkg/socket/socket_factory_test.go | 590 | ||||
-rwxr-xr-x | pkg/worker/sync_worker.go | 227 | ||||
-rwxr-xr-x | pkg/worker/sync_worker_test.go | 37 | ||||
-rwxr-xr-x | pkg/worker/worker.go | 302 | ||||
-rwxr-xr-x | pkg/worker/worker_test.go | 19 | ||||
-rwxr-xr-x | pkg/worker_watcher/worker_watcher.go | 310 |
15 files changed, 3772 insertions, 0 deletions
diff --git a/pkg/events/events.go b/pkg/events/events.go new file mode 100755 index 00000000..92dc103a --- /dev/null +++ b/pkg/events/events.go @@ -0,0 +1,38 @@ +package events + +import ( + "sync" + + "github.com/spiral/roadrunner/v2/interfaces/events" +) + +// EventHandler helps to broadcast events to multiple listeners. +type EventHandler struct { + listeners []events.EventListener + sync.RWMutex +} + +func NewEventsHandler() events.Handler { + return &EventHandler{listeners: make([]events.EventListener, 0, 2)} +} + +// NumListeners returns number of event listeners. +func (eb *EventHandler) NumListeners() int { + return len(eb.listeners) +} + +// AddListener registers new event listener. +func (eb *EventHandler) AddListener(listener events.EventListener) { + eb.Lock() + defer eb.Unlock() + eb.listeners = append(eb.listeners, listener) +} + +// Push broadcast events across all event listeners. +func (eb *EventHandler) Push(e interface{}) { + eb.Lock() + defer eb.Unlock() + for k := range eb.listeners { + eb.listeners[k](e) + } +} diff --git a/pkg/pipe/pipe_factory.go b/pkg/pipe/pipe_factory.go new file mode 100755 index 00000000..c86d78c4 --- /dev/null +++ b/pkg/pipe/pipe_factory.go @@ -0,0 +1,164 @@ +package pipe + +import ( + "context" + "os/exec" + + "github.com/spiral/errors" + "github.com/spiral/goridge/v3" + "github.com/spiral/roadrunner/v2/interfaces/worker" + "github.com/spiral/roadrunner/v2/internal" + workerImpl "github.com/spiral/roadrunner/v2/pkg/worker" + "go.uber.org/multierr" +) + +// Factory connects to stack using standard +// streams (STDIN, STDOUT pipes). +type Factory struct{} + +// NewPipeFactory returns new factory instance and starts +// listening + +// todo: review tests +func NewPipeFactory() worker.Factory { + return &Factory{} +} + +type SpawnResult struct { + w worker.BaseProcess + err error +} + +// SpawnWorker creates new Process and connects it to goridge relay, +// method Wait() must be handled on level above. +func (f *Factory) SpawnWorkerWithContext(ctx context.Context, cmd *exec.Cmd) (worker.BaseProcess, error) { + c := make(chan SpawnResult) + const op = errors.Op("spawn worker with context") + go func() { + w, err := workerImpl.InitBaseWorker(cmd) + if err != nil { + c <- SpawnResult{ + w: nil, + err: errors.E(op, err), + } + return + } + + // TODO why out is in? + in, err := cmd.StdoutPipe() + if err != nil { + c <- SpawnResult{ + w: nil, + err: errors.E(op, err), + } + return + } + + // TODO why in is out? + out, err := cmd.StdinPipe() + if err != nil { + c <- SpawnResult{ + w: nil, + err: errors.E(op, err), + } + return + } + + // Init new PIPE relay + relay := goridge.NewPipeRelay(in, out) + w.AttachRelay(relay) + + // Start the worker + err = w.Start() + if err != nil { + c <- SpawnResult{ + w: nil, + err: errors.E(op, err), + } + return + } + + // errors bundle + pid, err := internal.FetchPID(relay) + if pid != w.Pid() || err != nil { + err = multierr.Combine( + err, + w.Kill(), + w.Wait(), + ) + c <- SpawnResult{ + w: nil, + err: errors.E(op, err), + } + return + } + + // everything ok, set ready state + w.State().Set(internal.StateReady) + + // return worker + c <- SpawnResult{ + w: w, + err: nil, + } + }() + + select { + case <-ctx.Done(): + return nil, ctx.Err() + case res := <-c: + if res.err != nil { + return nil, res.err + } + return res.w, nil + } +} + +func (f *Factory) SpawnWorker(cmd *exec.Cmd) (worker.BaseProcess, error) { + const op = errors.Op("spawn worker") + w, err := workerImpl.InitBaseWorker(cmd) + if err != nil { + return nil, errors.E(op, err) + } + + // TODO why out is in? + in, err := cmd.StdoutPipe() + if err != nil { + return nil, errors.E(op, err) + } + + // TODO why in is out? + out, err := cmd.StdinPipe() + if err != nil { + return nil, errors.E(op, err) + } + + // Init new PIPE relay + relay := goridge.NewPipeRelay(in, out) + w.AttachRelay(relay) + + // Start the worker + err = w.Start() + if err != nil { + return nil, errors.E(op, err) + } + + // errors bundle + if pid, err := internal.FetchPID(relay); pid != w.Pid() { + err = multierr.Combine( + err, + w.Kill(), + w.Wait(), + ) + return nil, errors.E(op, err) + } + + // everything ok, set ready state + w.State().Set(internal.StateReady) + return w, nil +} + +// Close the factory. +func (f *Factory) Close(ctx context.Context) error { + return nil +} diff --git a/pkg/pipe/pipe_factory_test.go b/pkg/pipe/pipe_factory_test.go new file mode 100755 index 00000000..99212ff8 --- /dev/null +++ b/pkg/pipe/pipe_factory_test.go @@ -0,0 +1,511 @@ +package pipe + +import ( + "context" + "os/exec" + "strings" + "sync" + "testing" + "time" + + "github.com/spiral/errors" + "github.com/spiral/roadrunner/v2/interfaces/events" + "github.com/spiral/roadrunner/v2/internal" + workerImpl "github.com/spiral/roadrunner/v2/pkg/worker" + "github.com/stretchr/testify/assert" +) + +func Test_GetState(t *testing.T) { + ctx := context.Background() + cmd := exec.Command("php", "../../tests/client.php", "echo", "pipes") + + w, err := NewPipeFactory().SpawnWorkerWithContext(ctx, cmd) + go func() { + assert.NoError(t, w.Wait()) + assert.Equal(t, internal.StateStopped, w.State().Value()) + }() + + assert.NoError(t, err) + assert.NotNil(t, w) + + assert.Equal(t, internal.StateReady, w.State().Value()) + err = w.Stop(ctx) + if err != nil { + t.Errorf("error stopping the Process: error %v", err) + } +} + +func Test_Kill(t *testing.T) { + ctx := context.Background() + cmd := exec.Command("php", "../../tests/client.php", "echo", "pipes") + + w, err := NewPipeFactory().SpawnWorkerWithContext(ctx, cmd) + wg := &sync.WaitGroup{} + wg.Add(1) + go func() { + defer wg.Done() + assert.Error(t, w.Wait()) + // TODO changed from stopped, discuss + assert.Equal(t, internal.StateErrored, w.State().Value()) + }() + + assert.NoError(t, err) + assert.NotNil(t, w) + + assert.Equal(t, internal.StateReady, w.State().Value()) + err = w.Kill() + if err != nil { + t.Errorf("error killing the Process: error %v", err) + } + wg.Wait() +} + +func Test_Pipe_Start(t *testing.T) { + ctx := context.Background() + cmd := exec.Command("php", "../../tests/client.php", "echo", "pipes") + + w, err := NewPipeFactory().SpawnWorkerWithContext(ctx, cmd) + assert.NoError(t, err) + assert.NotNil(t, w) + + go func() { + assert.NoError(t, w.Wait()) + }() + + assert.NoError(t, w.Stop(ctx)) +} + +func Test_Pipe_StartError(t *testing.T) { + cmd := exec.Command("php", "../../tests/client.php", "echo", "pipes") + err := cmd.Start() + if err != nil { + t.Errorf("error running the command: error %v", err) + } + + ctx := context.Background() + w, err := NewPipeFactory().SpawnWorkerWithContext(ctx, cmd) + assert.Error(t, err) + assert.Nil(t, w) +} + +func Test_Pipe_PipeError(t *testing.T) { + cmd := exec.Command("php", "../../tests/client.php", "echo", "pipes") + _, err := cmd.StdinPipe() + if err != nil { + t.Errorf("error creating the STDIN pipe: error %v", err) + } + + ctx := context.Background() + w, err := NewPipeFactory().SpawnWorkerWithContext(ctx, cmd) + assert.Error(t, err) + assert.Nil(t, w) +} + +func Test_Pipe_PipeError2(t *testing.T) { + cmd := exec.Command("php", "../../tests/client.php", "echo", "pipes") + _, err := cmd.StdinPipe() + if err != nil { + t.Errorf("error creating the STDIN pipe: error %v", err) + } + + ctx := context.Background() + w, err := NewPipeFactory().SpawnWorkerWithContext(ctx, cmd) + assert.Error(t, err) + assert.Nil(t, w) +} + +func Test_Pipe_Failboot(t *testing.T) { + cmd := exec.Command("php", "../../tests/failboot.php") + ctx := context.Background() + w, err := NewPipeFactory().SpawnWorkerWithContext(ctx, cmd) + + assert.Nil(t, w) + assert.Error(t, err) + assert.Contains(t, err.Error(), "failboot") +} + +func Test_Pipe_Invalid(t *testing.T) { + cmd := exec.Command("php", "../../tests/invalid.php") + ctx := context.Background() + w, err := NewPipeFactory().SpawnWorkerWithContext(ctx, cmd) + assert.Error(t, err) + assert.Nil(t, w) +} + +func Test_Pipe_Echo(t *testing.T) { + cmd := exec.Command("php", "../../tests/client.php", "echo", "pipes") + ctx := context.Background() + w, err := NewPipeFactory().SpawnWorkerWithContext(ctx, cmd) + if err != nil { + t.Fatal(err) + } + defer func() { + err = w.Stop(ctx) + if err != nil { + t.Errorf("error stopping the Process: error %v", err) + } + }() + + sw, err := workerImpl.From(w) + if err != nil { + t.Fatal(err) + } + + res, err := sw.Exec(internal.Payload{Body: []byte("hello")}) + + assert.NoError(t, err) + assert.NotNil(t, res) + assert.NotNil(t, res.Body) + assert.Empty(t, res.Context) + + assert.Equal(t, "hello", res.String()) +} + +func Test_Pipe_Broken(t *testing.T) { + cmd := exec.Command("php", "../../tests/client.php", "broken", "pipes") + ctx := context.Background() + w, err := NewPipeFactory().SpawnWorkerWithContext(ctx, cmd) + if err != nil { + t.Fatal(err) + } + defer func() { + time.Sleep(time.Second) + err = w.Stop(ctx) + assert.Error(t, err) + }() + + sw, err := workerImpl.From(w) + if err != nil { + t.Fatal(err) + } + + res, err := sw.Exec(internal.Payload{Body: []byte("hello")}) + + assert.Error(t, err) + assert.Nil(t, res.Body) + assert.Nil(t, res.Context) +} + +func Benchmark_Pipe_SpawnWorker_Stop(b *testing.B) { + f := NewPipeFactory() + for n := 0; n < b.N; n++ { + cmd := exec.Command("php", "../../tests/client.php", "echo", "pipes") + w, _ := f.SpawnWorkerWithContext(context.Background(), cmd) + go func() { + if w.Wait() != nil { + b.Fail() + } + }() + + err := w.Stop(context.Background()) + if err != nil { + b.Errorf("error stopping the worker: error %v", err) + } + } +} + +func Benchmark_Pipe_Worker_ExecEcho(b *testing.B) { + cmd := exec.Command("php", "../../tests/client.php", "echo", "pipes") + + w, _ := NewPipeFactory().SpawnWorkerWithContext(context.Background(), cmd) + sw, err := workerImpl.From(w) + if err != nil { + b.Fatal(err) + } + b.ReportAllocs() + b.ResetTimer() + go func() { + err := w.Wait() + if err != nil { + b.Errorf("error waiting the worker: error %v", err) + } + }() + defer func() { + err := w.Stop(context.Background()) + if err != nil { + b.Errorf("error stopping the worker: error %v", err) + } + }() + + for n := 0; n < b.N; n++ { + if _, err := sw.Exec(internal.Payload{Body: []byte("hello")}); err != nil { + b.Fail() + } + } +} + +func Benchmark_Pipe_Worker_ExecEcho3(b *testing.B) { + cmd := exec.Command("php", "../../tests/client.php", "echo", "pipes") + ctx := context.Background() + w, err := NewPipeFactory().SpawnWorkerWithContext(ctx, cmd) + if err != nil { + b.Fatal(err) + } + + defer func() { + err = w.Stop(ctx) + if err != nil { + b.Errorf("error stopping the Process: error %v", err) + } + }() + + sw, err := workerImpl.From(w) + if err != nil { + b.Fatal(err) + } + + for n := 0; n < b.N; n++ { + if _, err := sw.Exec(internal.Payload{Body: []byte("hello")}); err != nil { + b.Fail() + } + } +} + +func Benchmark_Pipe_Worker_ExecEchoWithoutContext(b *testing.B) { + cmd := exec.Command("php", "../../tests/client.php", "echo", "pipes") + ctx := context.Background() + w, err := NewPipeFactory().SpawnWorkerWithContext(ctx, cmd) + if err != nil { + b.Fatal(err) + } + + defer func() { + err = w.Stop(ctx) + if err != nil { + b.Errorf("error stopping the Process: error %v", err) + } + }() + + sw, err := workerImpl.From(w) + if err != nil { + b.Fatal(err) + } + + for n := 0; n < b.N; n++ { + if _, err := sw.Exec(internal.Payload{Body: []byte("hello")}); err != nil { + b.Fail() + } + } +} + +func Test_Echo(t *testing.T) { + ctx := context.Background() + cmd := exec.Command("php", "../../tests/client.php", "echo", "pipes") + + w, err := NewPipeFactory().SpawnWorkerWithContext(ctx, cmd) + if err != nil { + t.Fatal(err) + } + + syncWorker, err := workerImpl.From(w) + if err != nil { + t.Fatal(err) + } + go func() { + assert.NoError(t, syncWorker.Wait()) + }() + defer func() { + err := syncWorker.Stop(ctx) + if err != nil { + t.Errorf("error stopping the Process: error %v", err) + } + }() + + res, err := syncWorker.Exec(internal.Payload{Body: []byte("hello")}) + + assert.Nil(t, err) + assert.NotNil(t, res) + assert.NotNil(t, res.Body) + assert.Empty(t, res.Context) + + assert.Equal(t, "hello", res.String()) +} + +func Test_BadPayload(t *testing.T) { + ctx := context.Background() + cmd := exec.Command("php", "../../tests/client.php", "echo", "pipes") + + w, _ := NewPipeFactory().SpawnWorkerWithContext(ctx, cmd) + + syncWorker, err := workerImpl.From(w) + if err != nil { + t.Fatal(err) + } + + go func() { + assert.NoError(t, syncWorker.Wait()) + }() + defer func() { + err := syncWorker.Stop(ctx) + if err != nil { + t.Errorf("error stopping the Process: error %v", err) + } + }() + + res, err := syncWorker.Exec(internal.Payload{}) + + assert.Error(t, err) + assert.Nil(t, res.Body) + assert.Nil(t, res.Context) + + assert.Contains(t, err.Error(), "payload can not be empty") +} + +func Test_String(t *testing.T) { + ctx := context.Background() + cmd := exec.Command("php", "../../tests/client.php", "echo", "pipes") + + w, _ := NewPipeFactory().SpawnWorkerWithContext(ctx, cmd) + go func() { + assert.NoError(t, w.Wait()) + }() + defer func() { + err := w.Stop(ctx) + if err != nil { + t.Errorf("error stopping the Process: error %v", err) + } + }() + + assert.Contains(t, w.String(), "php ../../tests/client.php echo pipes") + assert.Contains(t, w.String(), "ready") + assert.Contains(t, w.String(), "numExecs: 0") +} + +func Test_Echo_Slow(t *testing.T) { + ctx := context.Background() + cmd := exec.Command("php", "../../tests/slow-client.php", "echo", "pipes", "10", "10") + + w, _ := NewPipeFactory().SpawnWorkerWithContext(ctx, cmd) + go func() { + assert.NoError(t, w.Wait()) + }() + defer func() { + err := w.Stop(ctx) + if err != nil { + t.Errorf("error stopping the Process: error %v", err) + } + }() + + syncWorker, err := workerImpl.From(w) + if err != nil { + t.Fatal(err) + } + + res, err := syncWorker.Exec(internal.Payload{Body: []byte("hello")}) + + assert.Nil(t, err) + assert.NotNil(t, res) + assert.NotNil(t, res.Body) + assert.Empty(t, res.Context) + + assert.Equal(t, "hello", res.String()) +} + +func Test_Broken(t *testing.T) { + ctx := context.Background() + cmd := exec.Command("php", "../../tests/client.php", "broken", "pipes") + + w, err := NewPipeFactory().SpawnWorkerWithContext(ctx, cmd) + if err != nil { + t.Fatal(err) + } + + data := "" + mu := &sync.Mutex{} + w.AddListener(func(event interface{}) { + if wev, ok := event.(events.WorkerEvent); ok { + mu.Lock() + data = string(wev.Payload.([]byte)) + mu.Unlock() + } + }) + + syncWorker, err := workerImpl.From(w) + if err != nil { + t.Fatal(err) + } + + res, err := syncWorker.Exec(internal.Payload{Body: []byte("hello")}) + assert.NotNil(t, err) + assert.Nil(t, res.Body) + assert.Nil(t, res.Context) + + time.Sleep(time.Second * 3) + mu.Lock() + if strings.ContainsAny(data, "undefined_function()") == false { + t.Fail() + } + mu.Unlock() + assert.Error(t, w.Stop(ctx)) +} + +func Test_Error(t *testing.T) { + ctx := context.Background() + cmd := exec.Command("php", "../../tests/client.php", "error", "pipes") + + w, _ := NewPipeFactory().SpawnWorkerWithContext(ctx, cmd) + go func() { + assert.NoError(t, w.Wait()) + }() + + defer func() { + err := w.Stop(ctx) + if err != nil { + t.Errorf("error stopping the Process: error %v", err) + } + }() + + syncWorker, err := workerImpl.From(w) + if err != nil { + t.Fatal(err) + } + + res, err := syncWorker.Exec(internal.Payload{Body: []byte("hello")}) + assert.NotNil(t, err) + assert.Nil(t, res.Body) + assert.Nil(t, res.Context) + + if errors.Is(errors.ErrSoftJob, err) == false { + t.Fatal("error should be of type errors.ErrSoftJob") + } + assert.Contains(t, err.Error(), "exec payload: SoftJobError: hello") +} + +func Test_NumExecs(t *testing.T) { + ctx := context.Background() + cmd := exec.Command("php", "../../tests/client.php", "echo", "pipes") + + w, _ := NewPipeFactory().SpawnWorkerWithContext(ctx, cmd) + go func() { + assert.NoError(t, w.Wait()) + }() + defer func() { + err := w.Stop(ctx) + if err != nil { + t.Errorf("error stopping the Process: error %v", err) + } + }() + + syncWorker, err := workerImpl.From(w) + if err != nil { + t.Fatal(err) + } + + _, err = syncWorker.Exec(internal.Payload{Body: []byte("hello")}) + if err != nil { + t.Errorf("fail to execute payload: error %v", err) + } + assert.Equal(t, int64(1), w.State().NumExecs()) + + _, err = syncWorker.Exec(internal.Payload{Body: []byte("hello")}) + if err != nil { + t.Errorf("fail to execute payload: error %v", err) + } + assert.Equal(t, int64(2), w.State().NumExecs()) + + _, err = syncWorker.Exec(internal.Payload{Body: []byte("hello")}) + if err != nil { + t.Errorf("fail to execute payload: error %v", err) + } + assert.Equal(t, int64(3), w.State().NumExecs()) +} diff --git a/pkg/pool/config.go b/pkg/pool/config.go new file mode 100644 index 00000000..3dcc3584 --- /dev/null +++ b/pkg/pool/config.go @@ -0,0 +1,75 @@ +package pool + +import ( + "runtime" + "time" +) + +// Configures the pool behaviour. +type Config struct { + // Debug flag creates new fresh worker before every request. + Debug bool + + // NumWorkers defines how many sub-processes can be run at once. This value + // might be doubled by Swapper while hot-swap. Defaults to number of CPU cores. + NumWorkers int64 + + // MaxJobs defines how many executions is allowed for the worker until + // it's destruction. set 1 to create new process for each new task, 0 to let + // worker handle as many tasks as it can. + MaxJobs int64 + + // AllocateTimeout defines for how long pool will be waiting for a worker to + // be freed to handle the task. Defaults to 60s. + AllocateTimeout time.Duration + + // DestroyTimeout defines for how long pool should be waiting for worker to + // properly destroy, if timeout reached worker will be killed. Defaults to 60s. + DestroyTimeout time.Duration + + // Supervision config to limit worker and pool memory usage. + Supervisor *SupervisorConfig +} + +// InitDefaults enables default config values. +func (cfg *Config) InitDefaults() { + if cfg.NumWorkers == 0 { + cfg.NumWorkers = int64(runtime.NumCPU()) + } + + if cfg.AllocateTimeout == 0 { + cfg.AllocateTimeout = time.Minute + } + + if cfg.DestroyTimeout == 0 { + cfg.DestroyTimeout = time.Minute + } + if cfg.Supervisor == nil { + return + } + cfg.Supervisor.InitDefaults() +} + +type SupervisorConfig struct { + // WatchTick defines how often to check the state of worker. + WatchTick uint64 + + // TTL defines maximum time worker is allowed to live. + TTL uint64 + + // IdleTTL defines maximum duration worker can spend in idle mode. Disabled when 0. + IdleTTL uint64 + + // ExecTTL defines maximum lifetime per job. + ExecTTL uint64 + + // MaxWorkerMemory limits memory per worker. + MaxWorkerMemory uint64 +} + +// InitDefaults enables default config values. +func (cfg *SupervisorConfig) InitDefaults() { + if cfg.WatchTick == 0 { + cfg.WatchTick = 1 + } +} diff --git a/pkg/pool/static_pool.go b/pkg/pool/static_pool.go new file mode 100755 index 00000000..6cc42143 --- /dev/null +++ b/pkg/pool/static_pool.go @@ -0,0 +1,352 @@ +package pool + +import ( + "context" + "os/exec" + + "github.com/spiral/errors" + "github.com/spiral/roadrunner/v2/interfaces/events" + "github.com/spiral/roadrunner/v2/interfaces/pool" + "github.com/spiral/roadrunner/v2/interfaces/worker" + "github.com/spiral/roadrunner/v2/internal" + events2 "github.com/spiral/roadrunner/v2/pkg/events" + syncWorker "github.com/spiral/roadrunner/v2/pkg/worker" + workerWatcher "github.com/spiral/roadrunner/v2/pkg/worker_watcher" +) + +// StopRequest can be sent by worker to indicate that restart is required. +const StopRequest = "{\"stop\":true}" + +var bCtx = context.Background() + +// ErrorEncoder encode error or make a decision based on the error type +type ErrorEncoder func(err error, w worker.BaseProcess) (internal.Payload, error) + +// Before is set of functions that executes BEFORE Exec +type Before func(req internal.Payload) internal.Payload + +// After is set of functions that executes AFTER Exec +type After func(req internal.Payload, resp internal.Payload) internal.Payload + +type Options func(p *StaticPool) + +// StaticPool controls worker creation, destruction and task routing. Pool uses fixed amount of stack. +type StaticPool struct { + cfg Config + + // worker command creator + cmd func() *exec.Cmd + + // creates and connects to stack + factory worker.Factory + + // distributes the events + events events.Handler + + // manages worker states and TTLs + ww worker.Watcher + + // allocate new worker + allocator worker.Allocator + + errEncoder ErrorEncoder + before []Before + after []After +} + +// NewPool creates new worker pool and task multiplexer. StaticPool will initiate with one worker. +func NewPool(ctx context.Context, cmd func() *exec.Cmd, factory worker.Factory, cfg Config, options ...Options) (pool.Pool, error) { + const op = errors.Op("NewPool") + if factory == nil { + return nil, errors.E(op, errors.Str("no factory initialized")) + } + cfg.InitDefaults() + + if cfg.Debug { + cfg.NumWorkers = 0 + cfg.MaxJobs = 1 + } + + p := &StaticPool{ + cfg: cfg, + cmd: cmd, + factory: factory, + events: events2.NewEventsHandler(), + after: make([]After, 0, 0), + before: make([]Before, 0, 0), + } + + p.allocator = newPoolAllocator(factory, cmd) + p.ww = workerWatcher.NewWorkerWatcher(p.allocator, p.cfg.NumWorkers, p.events) + + workers, err := p.allocateWorkers(ctx, p.cfg.NumWorkers) + if err != nil { + return nil, errors.E(op, err) + } + + // put stack in the pool + err = p.ww.AddToWatch(workers) + if err != nil { + return nil, errors.E(op, err) + } + + p.errEncoder = defaultErrEncoder(p) + + // add pool options + for i := 0; i < len(options); i++ { + options[i](p) + } + + // if supervised config not nil, guess, that pool wanted to be supervised + if cfg.Supervisor != nil { + sp := newPoolWatcher(p, p.events, p.cfg.Supervisor) + // start watcher timer + sp.Start() + return sp, nil + } + + return p, nil +} + +func ExecBefore(before ...Before) Options { + return func(p *StaticPool) { + p.before = append(p.before, before...) + } +} + +func ExecAfter(after ...After) Options { + return func(p *StaticPool) { + p.after = append(p.after, after...) + } +} + +// AddListener connects event listener to the pool. +func (sp *StaticPool) AddListener(listener events.EventListener) { + sp.events.AddListener(listener) +} + +// Config returns associated pool configuration. Immutable. +func (sp *StaticPool) GetConfig() interface{} { + return sp.cfg +} + +// Workers returns worker list associated with the pool. +func (sp *StaticPool) Workers() (workers []worker.BaseProcess) { + return sp.ww.WorkersList() +} + +func (sp *StaticPool) RemoveWorker(wb worker.BaseProcess) error { + return sp.ww.RemoveWorker(wb) +} + +func (sp *StaticPool) Exec(p internal.Payload) (internal.Payload, error) { + const op = errors.Op("exec") + if sp.cfg.Debug { + return sp.execDebug(p) + } + ctxGetFree, cancel := context.WithTimeout(context.Background(), sp.cfg.AllocateTimeout) + defer cancel() + w, err := sp.getWorker(ctxGetFree, op) + if err != nil { + return internal.Payload{}, errors.E(op, err) + } + + sw := w.(worker.SyncWorker) + + if len(sp.before) > 0 { + for i := 0; i < len(sp.before); i++ { + p = sp.before[i](p) + } + } + + rsp, err := sw.Exec(p) + if err != nil { + return sp.errEncoder(err, sw) + } + + // worker want's to be terminated + // TODO careful with string(rsp.Context) + if len(rsp.Body) == 0 && string(rsp.Context) == StopRequest { + sw.State().Set(internal.StateInvalid) + err = sw.Stop(bCtx) + if err != nil { + sp.events.Push(events.WorkerEvent{Event: events.EventWorkerError, Worker: sw, Payload: errors.E(op, err)}) + } + + return sp.Exec(p) + } + + if sp.cfg.MaxJobs != 0 && sw.State().NumExecs() >= sp.cfg.MaxJobs { + err = sp.ww.AllocateNew() + if err != nil { + return internal.Payload{}, errors.E(op, err) + } + } else { + sp.ww.PushWorker(sw) + } + + if len(sp.after) > 0 { + for i := 0; i < len(sp.after); i++ { + rsp = sp.after[i](p, rsp) + } + } + + return rsp, nil +} + +func (sp *StaticPool) ExecWithContext(ctx context.Context, rqs internal.Payload) (internal.Payload, error) { + const op = errors.Op("exec with context") + ctxGetFree, cancel := context.WithTimeout(context.Background(), sp.cfg.AllocateTimeout) + defer cancel() + w, err := sp.getWorker(ctxGetFree, op) + if err != nil { + return internal.Payload{}, errors.E(op, err) + } + + sw := w.(worker.SyncWorker) + + // apply all before function + if len(sp.before) > 0 { + for i := 0; i < len(sp.before); i++ { + rqs = sp.before[i](rqs) + } + } + + rsp, err := sw.ExecWithContext(ctx, rqs) + if err != nil { + return sp.errEncoder(err, sw) + } + + // worker want's to be terminated + if rsp.Body == nil && rsp.Context != nil && string(rsp.Context) == StopRequest { + sw.State().Set(internal.StateInvalid) + err = sw.Stop(bCtx) + if err != nil { + sp.events.Push(events.WorkerEvent{Event: events.EventWorkerError, Worker: sw, Payload: errors.E(op, err)}) + } + + return sp.Exec(rqs) + } + + if sp.cfg.MaxJobs != 0 && sw.State().NumExecs() >= sp.cfg.MaxJobs { + err = sp.ww.AllocateNew() + if err != nil { + return internal.Payload{}, errors.E(op, err) + } + } else { + sp.ww.PushWorker(sw) + } + + // apply all after functions + if len(sp.after) > 0 { + for i := 0; i < len(sp.after); i++ { + rsp = sp.after[i](rqs, rsp) + } + } + + return rsp, nil +} + +func (sp *StaticPool) getWorker(ctxGetFree context.Context, op errors.Op) (worker.BaseProcess, error) { + // GetFreeWorker function consumes context with timeout + w, err := sp.ww.GetFreeWorker(ctxGetFree) + if err != nil { + // if the error is of kind NoFreeWorkers, it means, that we can't get worker from the stack during the allocate timeout + if errors.Is(errors.NoFreeWorkers, err) { + sp.events.Push(events.PoolEvent{Event: events.EventNoFreeWorkers, Payload: errors.E(op, err)}) + return nil, errors.E(op, err) + } + // else if err not nil - return error + return nil, errors.E(op, err) + } + return w, nil +} + +// Destroy all underlying stack (but let them to complete the task). +func (sp *StaticPool) Destroy(ctx context.Context) { + sp.ww.Destroy(ctx) +} + +func defaultErrEncoder(sp *StaticPool) ErrorEncoder { + return func(err error, w worker.BaseProcess) (internal.Payload, error) { + const op = errors.Op("error encoder") + // soft job errors are allowed + if errors.Is(errors.ErrSoftJob, err) { + if sp.cfg.MaxJobs != 0 && w.State().NumExecs() >= sp.cfg.MaxJobs { + err = sp.ww.AllocateNew() + if err != nil { + sp.events.Push(events.PoolEvent{Event: events.EventWorkerConstruct, Payload: errors.E(op, err)}) + } + + w.State().Set(internal.StateInvalid) + err = w.Stop(bCtx) + if err != nil { + sp.events.Push(events.WorkerEvent{Event: events.EventWorkerError, Worker: w, Payload: errors.E(op, err)}) + } + } else { + sp.ww.PushWorker(w) + } + + return internal.Payload{}, errors.E(op, err) + } + + w.State().Set(internal.StateInvalid) + sp.events.Push(events.PoolEvent{Event: events.EventWorkerDestruct, Payload: w}) + errS := w.Stop(bCtx) + + if errS != nil { + return internal.Payload{}, errors.E(op, errors.Errorf("%v, %v", err, errS)) + } + + return internal.Payload{}, errors.E(op, err) + } +} + +func newPoolAllocator(factory worker.Factory, cmd func() *exec.Cmd) worker.Allocator { + return func() (worker.BaseProcess, error) { + w, err := factory.SpawnWorkerWithContext(bCtx, cmd()) + if err != nil { + return nil, err + } + + sw, err := syncWorker.From(w) + if err != nil { + return nil, err + } + return sw, nil + } +} + +func (sp *StaticPool) execDebug(p internal.Payload) (internal.Payload, error) { + sw, err := sp.allocator() + if err != nil { + return internal.Payload{}, err + } + + r, err := sw.(worker.SyncWorker).Exec(p) + + if stopErr := sw.Stop(context.Background()); stopErr != nil { + sp.events.Push(events.WorkerEvent{Event: events.EventWorkerError, Worker: sw, Payload: err}) + } + + return r, err +} + +// allocate required number of stack +func (sp *StaticPool) allocateWorkers(ctx context.Context, numWorkers int64) ([]worker.BaseProcess, error) { + const op = errors.Op("allocate workers") + var workers []worker.BaseProcess + + // constant number of stack simplify logic + for i := int64(0); i < numWorkers; i++ { + ctx, cancel := context.WithTimeout(ctx, sp.cfg.AllocateTimeout) + w, err := sp.factory.SpawnWorkerWithContext(ctx, sp.cmd()) + if err != nil { + cancel() + return nil, errors.E(op, errors.WorkerAllocate, err) + } + workers = append(workers, w) + cancel() + } + return workers, nil +} diff --git a/pkg/pool/static_pool_test.go b/pkg/pool/static_pool_test.go new file mode 100755 index 00000000..dd33a1a6 --- /dev/null +++ b/pkg/pool/static_pool_test.go @@ -0,0 +1,557 @@ +package pool + +import ( + "context" + "log" + "os/exec" + "runtime" + "strconv" + "strings" + "sync" + "testing" + "time" + + "github.com/spiral/errors" + "github.com/spiral/roadrunner/v2/interfaces/events" + "github.com/spiral/roadrunner/v2/internal" + "github.com/spiral/roadrunner/v2/pkg/pipe" + "github.com/stretchr/testify/assert" +) + +var cfg = Config{ + NumWorkers: int64(runtime.NumCPU()), + AllocateTimeout: time.Second * 5, + DestroyTimeout: time.Second * 5, +} + +func Test_NewPool(t *testing.T) { + ctx := context.Background() + p, err := NewPool( + ctx, + func() *exec.Cmd { return exec.Command("php", "../../tests/client.php", "echo", "pipes") }, + pipe.NewPipeFactory(), + cfg, + ) + assert.NoError(t, err) + + defer p.Destroy(ctx) + + assert.NotNil(t, p) +} + +func Test_StaticPool_Invalid(t *testing.T) { + p, err := NewPool( + context.Background(), + func() *exec.Cmd { return exec.Command("php", "../../tests/invalid.php") }, + pipe.NewPipeFactory(), + cfg, + ) + + assert.Nil(t, p) + assert.Error(t, err) +} + +func Test_ConfigNoErrorInitDefaults(t *testing.T) { + p, err := NewPool( + context.Background(), + func() *exec.Cmd { return exec.Command("php", "../../tests/client.php", "echo", "pipes") }, + pipe.NewPipeFactory(), + Config{ + AllocateTimeout: time.Second, + DestroyTimeout: time.Second, + }, + ) + + assert.NotNil(t, p) + assert.NoError(t, err) +} + +func Test_StaticPool_Echo(t *testing.T) { + ctx := context.Background() + p, err := NewPool( + ctx, + func() *exec.Cmd { return exec.Command("php", "../../tests/client.php", "echo", "pipes") }, + pipe.NewPipeFactory(), + cfg, + ) + assert.NoError(t, err) + + defer p.Destroy(ctx) + + assert.NotNil(t, p) + + res, err := p.Exec(internal.Payload{Body: []byte("hello")}) + + assert.NoError(t, err) + assert.NotNil(t, res) + assert.NotNil(t, res.Body) + assert.Empty(t, res.Context) + + assert.Equal(t, "hello", res.String()) +} + +func Test_StaticPool_Echo_NilContext(t *testing.T) { + ctx := context.Background() + p, err := NewPool( + ctx, + func() *exec.Cmd { return exec.Command("php", "../../tests/client.php", "echo", "pipes") }, + pipe.NewPipeFactory(), + cfg, + ) + assert.NoError(t, err) + + defer p.Destroy(ctx) + + assert.NotNil(t, p) + + res, err := p.Exec(internal.Payload{Body: []byte("hello"), Context: nil}) + + assert.NoError(t, err) + assert.NotNil(t, res) + assert.NotNil(t, res.Body) + assert.Empty(t, res.Context) + + assert.Equal(t, "hello", res.String()) +} + +func Test_StaticPool_Echo_Context(t *testing.T) { + ctx := context.Background() + p, err := NewPool( + ctx, + func() *exec.Cmd { return exec.Command("php", "../../tests/client.php", "head", "pipes") }, + pipe.NewPipeFactory(), + cfg, + ) + assert.NoError(t, err) + + defer p.Destroy(ctx) + + assert.NotNil(t, p) + + res, err := p.Exec(internal.Payload{Body: []byte("hello"), Context: []byte("world")}) + + assert.NoError(t, err) + assert.NotNil(t, res) + assert.Empty(t, res.Body) + assert.NotNil(t, res.Context) + + assert.Equal(t, "world", string(res.Context)) +} + +func Test_StaticPool_JobError(t *testing.T) { + ctx := context.Background() + p, err := NewPool( + ctx, + func() *exec.Cmd { return exec.Command("php", "../../tests/client.php", "error", "pipes") }, + pipe.NewPipeFactory(), + cfg, + ) + assert.NoError(t, err) + defer p.Destroy(ctx) + + assert.NotNil(t, p) + + res, err := p.Exec(internal.Payload{Body: []byte("hello")}) + + assert.Error(t, err) + assert.Nil(t, res.Body) + assert.Nil(t, res.Context) + + if errors.Is(errors.ErrSoftJob, err) == false { + t.Fatal("error should be of type errors.Exec") + } + + assert.Contains(t, err.Error(), "hello") +} + +func Test_StaticPool_Broken_Replace(t *testing.T) { + ctx := context.Background() + p, err := NewPool( + ctx, + func() *exec.Cmd { return exec.Command("php", "../../tests/client.php", "broken", "pipes") }, + pipe.NewPipeFactory(), + cfg, + ) + assert.NoError(t, err) + assert.NotNil(t, p) + + block := make(chan struct{}, 1) + + p.AddListener(func(event interface{}) { + if wev, ok := event.(events.WorkerEvent); ok { + if wev.Event == events.EventWorkerLog { + e := string(wev.Payload.([]byte)) + if strings.ContainsAny(e, "undefined_function()") { + block <- struct{}{} + return + } + } + } + }) + + res, err := p.ExecWithContext(ctx, internal.Payload{Body: []byte("hello")}) + assert.Error(t, err) + assert.Nil(t, res.Context) + assert.Nil(t, res.Body) + + <-block + + p.Destroy(ctx) +} + +func Test_StaticPool_Broken_FromOutside(t *testing.T) { + ctx := context.Background() + p, err := NewPool( + ctx, + func() *exec.Cmd { return exec.Command("php", "../../tests/client.php", "echo", "pipes") }, + pipe.NewPipeFactory(), + cfg, + ) + assert.NoError(t, err) + defer p.Destroy(ctx) + + assert.NotNil(t, p) + + res, err := p.Exec(internal.Payload{Body: []byte("hello")}) + + assert.NoError(t, err) + assert.NotNil(t, res) + assert.NotNil(t, res.Body) + assert.Empty(t, res.Context) + + assert.Equal(t, "hello", res.String()) + assert.Equal(t, runtime.NumCPU(), len(p.Workers())) + + // Consume pool events + wg := sync.WaitGroup{} + wg.Add(1) + p.AddListener(func(event interface{}) { + if pe, ok := event.(events.PoolEvent); ok { + if pe.Event == events.EventWorkerConstruct { + wg.Done() + } + } + }) + + // killing random worker and expecting pool to replace it + err = p.Workers()[0].Kill() + if err != nil { + t.Errorf("error killing the process: error %v", err) + } + + wg.Wait() + + list := p.Workers() + for _, w := range list { + assert.Equal(t, internal.StateReady, w.State().Value()) + } + wg.Wait() +} + +func Test_StaticPool_AllocateTimeout(t *testing.T) { + p, err := NewPool( + context.Background(), + func() *exec.Cmd { return exec.Command("php", "../../tests/client.php", "delay", "pipes") }, + pipe.NewPipeFactory(), + Config{ + NumWorkers: 1, + AllocateTimeout: time.Nanosecond * 1, + DestroyTimeout: time.Second * 2, + }, + ) + assert.Error(t, err) + if !errors.Is(errors.WorkerAllocate, err) { + t.Fatal("error should be of type WorkerAllocate") + } + assert.Nil(t, p) +} + +func Test_StaticPool_Replace_Worker(t *testing.T) { + ctx := context.Background() + p, err := NewPool( + ctx, + func() *exec.Cmd { return exec.Command("php", "../../tests/client.php", "pid", "pipes") }, + pipe.NewPipeFactory(), + Config{ + NumWorkers: 1, + MaxJobs: 1, + AllocateTimeout: time.Second, + DestroyTimeout: time.Second, + }, + ) + assert.NoError(t, err) + defer p.Destroy(ctx) + + assert.NotNil(t, p) + + var lastPID string + lastPID = strconv.Itoa(int(p.Workers()[0].Pid())) + + res, _ := p.Exec(internal.Payload{Body: []byte("hello")}) + assert.Equal(t, lastPID, string(res.Body)) + + for i := 0; i < 10; i++ { + res, err := p.Exec(internal.Payload{Body: []byte("hello")}) + + assert.NoError(t, err) + assert.NotNil(t, res) + assert.NotNil(t, res.Body) + assert.Empty(t, res.Context) + + assert.NotEqual(t, lastPID, string(res.Body)) + lastPID = string(res.Body) + } +} + +func Test_StaticPool_Debug_Worker(t *testing.T) { + ctx := context.Background() + p, err := NewPool( + ctx, + func() *exec.Cmd { return exec.Command("php", "../../tests/client.php", "pid", "pipes") }, + pipe.NewPipeFactory(), + Config{ + Debug: true, + AllocateTimeout: time.Second, + DestroyTimeout: time.Second, + }, + ) + assert.NoError(t, err) + defer p.Destroy(ctx) + + assert.NotNil(t, p) + + assert.Len(t, p.Workers(), 0) + + var lastPID string + res, _ := p.Exec(internal.Payload{Body: []byte("hello")}) + assert.NotEqual(t, lastPID, string(res.Body)) + + assert.Len(t, p.Workers(), 0) + + for i := 0; i < 10; i++ { + assert.Len(t, p.Workers(), 0) + res, err := p.Exec(internal.Payload{Body: []byte("hello")}) + + assert.NoError(t, err) + assert.NotNil(t, res) + assert.NotNil(t, res.Body) + assert.Empty(t, res.Context) + + assert.NotEqual(t, lastPID, string(res.Body)) + lastPID = string(res.Body) + } +} + +// identical to replace but controlled on worker side +func Test_StaticPool_Stop_Worker(t *testing.T) { + ctx := context.Background() + p, err := NewPool( + ctx, + func() *exec.Cmd { return exec.Command("php", "../../tests/client.php", "stop", "pipes") }, + pipe.NewPipeFactory(), + Config{ + NumWorkers: 1, + AllocateTimeout: time.Second, + DestroyTimeout: time.Second, + }, + ) + assert.NoError(t, err) + defer p.Destroy(ctx) + + assert.NotNil(t, p) + + var lastPID string + lastPID = strconv.Itoa(int(p.Workers()[0].Pid())) + + res, err := p.Exec(internal.Payload{Body: []byte("hello")}) + if err != nil { + t.Fatal(err) + } + assert.Equal(t, lastPID, string(res.Body)) + + for i := 0; i < 10; i++ { + res, err := p.Exec(internal.Payload{Body: []byte("hello")}) + + assert.NoError(t, err) + assert.NotNil(t, res) + assert.NotNil(t, res.Body) + assert.Empty(t, res.Context) + + assert.NotEqual(t, lastPID, string(res.Body)) + lastPID = string(res.Body) + } +} + +// identical to replace but controlled on worker side +func Test_Static_Pool_Destroy_And_Close(t *testing.T) { + ctx := context.Background() + p, err := NewPool( + ctx, + func() *exec.Cmd { return exec.Command("php", "../../tests/client.php", "delay", "pipes") }, + pipe.NewPipeFactory(), + Config{ + NumWorkers: 1, + AllocateTimeout: time.Second, + DestroyTimeout: time.Second, + }, + ) + + assert.NotNil(t, p) + assert.NoError(t, err) + + p.Destroy(ctx) + _, err = p.Exec(internal.Payload{Body: []byte("100")}) + assert.Error(t, err) +} + +// identical to replace but controlled on worker side +func Test_Static_Pool_Destroy_And_Close_While_Wait(t *testing.T) { + ctx := context.Background() + p, err := NewPool( + ctx, + func() *exec.Cmd { return exec.Command("php", "../../tests/client.php", "delay", "pipes") }, + pipe.NewPipeFactory(), + Config{ + NumWorkers: 1, + AllocateTimeout: time.Second, + DestroyTimeout: time.Second, + }, + ) + + assert.NotNil(t, p) + assert.NoError(t, err) + + go func() { + _, err := p.Exec(internal.Payload{Body: []byte("100")}) + if err != nil { + t.Errorf("error executing payload: error %v", err) + } + }() + time.Sleep(time.Millisecond * 10) + + p.Destroy(ctx) + _, err = p.Exec(internal.Payload{Body: []byte("100")}) + assert.Error(t, err) +} + +// identical to replace but controlled on worker side +func Test_Static_Pool_Handle_Dead(t *testing.T) { + ctx := context.Background() + p, err := NewPool( + context.Background(), + func() *exec.Cmd { return exec.Command("php", "../../tests/slow-destroy.php", "echo", "pipes") }, + pipe.NewPipeFactory(), + Config{ + NumWorkers: 5, + AllocateTimeout: time.Second, + DestroyTimeout: time.Second, + }, + ) + assert.NoError(t, err) + defer p.Destroy(ctx) + + assert.NotNil(t, p) + + for _, w := range p.Workers() { + w.State().Set(internal.StateErrored) + } + + _, err = p.Exec(internal.Payload{Body: []byte("hello")}) + assert.Error(t, err) +} + +// identical to replace but controlled on worker side +func Test_Static_Pool_Slow_Destroy(t *testing.T) { + p, err := NewPool( + context.Background(), + func() *exec.Cmd { return exec.Command("php", "../../tests/slow-destroy.php", "echo", "pipes") }, + pipe.NewPipeFactory(), + Config{ + NumWorkers: 5, + AllocateTimeout: time.Second, + DestroyTimeout: time.Second, + }, + ) + + assert.NoError(t, err) + assert.NotNil(t, p) + + p.Destroy(context.Background()) +} + +func Benchmark_Pool_Echo(b *testing.B) { + ctx := context.Background() + p, err := NewPool( + ctx, + func() *exec.Cmd { return exec.Command("php", "../../tests/client.php", "echo", "pipes") }, + pipe.NewPipeFactory(), + cfg, + ) + if err != nil { + b.Fatal(err) + } + + b.ResetTimer() + b.ReportAllocs() + for n := 0; n < b.N; n++ { + if _, err := p.Exec(internal.Payload{Body: []byte("hello")}); err != nil { + b.Fail() + } + } +} + +// +func Benchmark_Pool_Echo_Batched(b *testing.B) { + ctx := context.Background() + p, _ := NewPool( + ctx, + func() *exec.Cmd { return exec.Command("php", "../../tests/client.php", "echo", "pipes") }, + pipe.NewPipeFactory(), + Config{ + NumWorkers: int64(runtime.NumCPU()), + AllocateTimeout: time.Second * 100, + DestroyTimeout: time.Second, + }, + ) + defer p.Destroy(ctx) + + var wg sync.WaitGroup + for i := 0; i < b.N; i++ { + wg.Add(1) + go func() { + defer wg.Done() + if _, err := p.Exec(internal.Payload{Body: []byte("hello")}); err != nil { + b.Fail() + log.Println(err) + } + }() + } + + wg.Wait() +} + +// +func Benchmark_Pool_Echo_Replaced(b *testing.B) { + ctx := context.Background() + p, _ := NewPool( + ctx, + func() *exec.Cmd { return exec.Command("php", "../../tests/client.php", "echo", "pipes") }, + pipe.NewPipeFactory(), + Config{ + NumWorkers: 1, + MaxJobs: 1, + AllocateTimeout: time.Second, + DestroyTimeout: time.Second, + }, + ) + defer p.Destroy(ctx) + b.ResetTimer() + b.ReportAllocs() + + for n := 0; n < b.N; n++ { + if _, err := p.Exec(internal.Payload{Body: []byte("hello")}); err != nil { + b.Fail() + log.Println(err) + } + } +} diff --git a/pkg/pool/supervisor_pool.go b/pkg/pool/supervisor_pool.go new file mode 100755 index 00000000..6d1f0c58 --- /dev/null +++ b/pkg/pool/supervisor_pool.go @@ -0,0 +1,208 @@ +package pool + +import ( + "context" + "sync" + "time" + + "github.com/spiral/errors" + "github.com/spiral/roadrunner/v2" + "github.com/spiral/roadrunner/v2/interfaces/events" + "github.com/spiral/roadrunner/v2/interfaces/pool" + "github.com/spiral/roadrunner/v2/interfaces/worker" + "github.com/spiral/roadrunner/v2/internal" +) + +const MB = 1024 * 1024 + +type Supervised interface { + pool.Pool + // Start used to start watching process for all pool workers + Start() +} + +type supervised struct { + cfg *SupervisorConfig + events events.Handler + pool pool.Pool + stopCh chan struct{} + mu *sync.RWMutex +} + +func newPoolWatcher(pool pool.Pool, events events.Handler, cfg *SupervisorConfig) Supervised { + sp := &supervised{ + cfg: cfg, + events: events, + pool: pool, + mu: &sync.RWMutex{}, + stopCh: make(chan struct{}), + } + return sp +} + +type ttlExec struct { + err error + p internal.Payload +} + +func (sp *supervised) ExecWithContext(ctx context.Context, rqs internal.Payload) (internal.Payload, error) { + const op = errors.Op("exec_supervised") + if sp.cfg.ExecTTL == 0 { + return sp.pool.Exec(rqs) + } + + c := make(chan ttlExec, 1) + ctx, cancel := context.WithTimeout(ctx, time.Second*time.Duration(sp.cfg.ExecTTL)) + defer cancel() + go func() { + res, err := sp.pool.ExecWithContext(ctx, rqs) + if err != nil { + c <- ttlExec{ + err: errors.E(op, err), + p: internal.Payload{}, + } + } + + c <- ttlExec{ + err: nil, + p: res, + } + }() + + for { + select { + case <-ctx.Done(): + return internal.Payload{}, errors.E(op, errors.TimeOut, ctx.Err()) + case res := <-c: + if res.err != nil { + return internal.Payload{}, res.err + } + + return res.p, nil + } + } +} + +func (sp *supervised) Exec(p internal.Payload) (internal.Payload, error) { + const op = errors.Op("supervised exec") + rsp, err := sp.pool.Exec(p) + if err != nil { + return internal.Payload{}, errors.E(op, err) + } + return rsp, nil +} + +func (sp *supervised) AddListener(listener events.EventListener) { + sp.pool.AddListener(listener) +} + +func (sp *supervised) GetConfig() interface{} { + return sp.pool.GetConfig() +} + +func (sp *supervised) Workers() (workers []worker.BaseProcess) { + sp.mu.Lock() + defer sp.mu.Unlock() + return sp.pool.Workers() +} + +func (sp *supervised) RemoveWorker(worker worker.BaseProcess) error { + return sp.pool.RemoveWorker(worker) +} + +func (sp *supervised) Destroy(ctx context.Context) { + sp.pool.Destroy(ctx) +} + +func (sp *supervised) Start() { + go func() { + watchTout := time.NewTicker(time.Second * time.Duration(sp.cfg.WatchTick)) + for { + select { + case <-sp.stopCh: + watchTout.Stop() + return + // stop here + case <-watchTout.C: + sp.mu.Lock() + sp.control() + sp.mu.Unlock() + } + } + }() +} + +func (sp *supervised) Stop() { + sp.stopCh <- struct{}{} +} + +func (sp *supervised) control() { + now := time.Now() + const op = errors.Op("supervised pool control tick") + + // THIS IS A COPY OF WORKERS + workers := sp.pool.Workers() + + for i := 0; i < len(workers); i++ { + if workers[i].State().Value() == internal.StateInvalid { + continue + } + + s, err := roadrunner.WorkerProcessState(workers[i]) + if err != nil { + // worker not longer valid for supervision + continue + } + + if sp.cfg.TTL != 0 && now.Sub(workers[i].Created()).Seconds() >= float64(sp.cfg.TTL) { + err = sp.pool.RemoveWorker(workers[i]) + if err != nil { + sp.events.Push(events.PoolEvent{Event: events.EventSupervisorError, Payload: errors.E(op, err)}) + return + } + sp.events.Push(events.PoolEvent{Event: events.EventTTL, Payload: workers[i]}) + continue + } + + if sp.cfg.MaxWorkerMemory != 0 && s.MemoryUsage >= sp.cfg.MaxWorkerMemory*MB { + err = sp.pool.RemoveWorker(workers[i]) + if err != nil { + sp.events.Push(events.PoolEvent{Event: events.EventSupervisorError, Payload: errors.E(op, err)}) + return + } + sp.events.Push(events.PoolEvent{Event: events.EventMaxMemory, Payload: workers[i]}) + continue + } + + // firs we check maxWorker idle + if sp.cfg.IdleTTL != 0 { + // then check for the worker state + if workers[i].State().Value() != internal.StateReady { + continue + } + + /* + Calculate idle time + If worker in the StateReady, we read it LastUsed timestamp as UnixNano uint64 + 2. For example maxWorkerIdle is equal to 5sec, then, if (time.Now - LastUsed) > maxWorkerIdle + we are guessing that worker overlap idle time and has to be killed + */ + + // get last used unix nano + lu := workers[i].State().LastUsed() + + // convert last used to unixNano and sub time.now + res := int64(lu) - now.UnixNano() + + // maxWorkerIdle more than diff between now and last used + if sp.cfg.IdleTTL-uint64(res) <= 0 { + err = sp.pool.RemoveWorker(workers[i]) + if err != nil { + sp.events.Push(events.PoolEvent{Event: events.EventSupervisorError, Payload: errors.E(op, err)}) + return + } + sp.events.Push(events.PoolEvent{Event: events.EventIdleTTL, Payload: workers[i]}) + } + } + } +} diff --git a/pkg/pool/supervisor_test.go b/pkg/pool/supervisor_test.go new file mode 100644 index 00000000..2e3e7fd2 --- /dev/null +++ b/pkg/pool/supervisor_test.go @@ -0,0 +1,154 @@ +package pool + +import ( + "context" + "os/exec" + "testing" + "time" + + "github.com/spiral/roadrunner/v2" + "github.com/spiral/roadrunner/v2/internal" + "github.com/spiral/roadrunner/v2/pkg/pipe" + "github.com/stretchr/testify/assert" +) + +var cfgSupervised = Config{ + NumWorkers: int64(1), + AllocateTimeout: time.Second, + DestroyTimeout: time.Second, + Supervisor: &SupervisorConfig{ + WatchTick: 1, + TTL: 100, + IdleTTL: 100, + ExecTTL: 100, + MaxWorkerMemory: 100, + }, +} + +func TestSupervisedPool_Exec(t *testing.T) { + ctx := context.Background() + p, err := NewPool( + ctx, + func() *exec.Cmd { return exec.Command("php", "../../tests/memleak.php", "pipes") }, + pipe.NewPipeFactory(), + cfgSupervised, + ) + + assert.NoError(t, err) + assert.NotNil(t, p) + stopCh := make(chan struct{}) + defer p.Destroy(context.Background()) + + go func() { + for { + select { + case <-stopCh: + return + default: + workers := p.Workers() + if len(workers) > 0 { + s, err := roadrunner.WorkerProcessState(workers[0]) + assert.NoError(t, err) + assert.NotNil(t, s) + // since this is soft limit, double max memory limit watch + if (s.MemoryUsage / MB) > cfgSupervised.Supervisor.MaxWorkerMemory*2 { + assert.Fail(t, "max memory reached") + } + } + } + } + }() + + for i := 0; i < 100; i++ { + time.Sleep(time.Millisecond * 50) + _, err = p.Exec(internal.Payload{ + Context: []byte(""), + Body: []byte("foo"), + }) + assert.NoError(t, err) + } + + stopCh <- struct{}{} +} + +func TestSupervisedPool_ExecTTL_TimedOut(t *testing.T) { + var cfgExecTTL = Config{ + NumWorkers: int64(1), + AllocateTimeout: time.Second, + DestroyTimeout: time.Second, + Supervisor: &SupervisorConfig{ + WatchTick: 1, + TTL: 100, + IdleTTL: 100, + ExecTTL: 1, + MaxWorkerMemory: 100, + }, + } + ctx := context.Background() + p, err := NewPool( + ctx, + func() *exec.Cmd { return exec.Command("php", "../../tests/sleep.php", "pipes") }, + pipe.NewPipeFactory(), + cfgExecTTL, + ) + + assert.NoError(t, err) + assert.NotNil(t, p) + defer p.Destroy(context.Background()) + + pid := p.Workers()[0].Pid() + + resp, err := p.ExecWithContext(context.Background(), internal.Payload{ + Context: []byte(""), + Body: []byte("foo"), + }) + + assert.Error(t, err) + assert.Empty(t, resp) + + time.Sleep(time.Second * 1) + // should be new worker with new pid + assert.NotEqual(t, pid, p.Workers()[0].Pid()) +} + +func TestSupervisedPool_ExecTTL_OK(t *testing.T) { + var cfgExecTTL = Config{ + NumWorkers: int64(1), + AllocateTimeout: time.Second, + DestroyTimeout: time.Second, + Supervisor: &SupervisorConfig{ + WatchTick: 1, + TTL: 100, + IdleTTL: 100, + ExecTTL: 4, + MaxWorkerMemory: 100, + }, + } + ctx := context.Background() + p, err := NewPool( + ctx, + func() *exec.Cmd { return exec.Command("php", "../../tests/sleep.php", "pipes") }, + pipe.NewPipeFactory(), + cfgExecTTL, + ) + + assert.NoError(t, err) + assert.NotNil(t, p) + defer p.Destroy(context.Background()) + + pid := p.Workers()[0].Pid() + + time.Sleep(time.Millisecond * 100) + resp, err := p.Exec(internal.Payload{ + Context: []byte(""), + Body: []byte("foo"), + }) + + assert.NoError(t, err) + assert.Empty(t, resp.Body) + assert.Empty(t, resp.Context) + + time.Sleep(time.Second * 1) + // should be the same pid + assert.Equal(t, pid, p.Workers()[0].Pid()) +} diff --git a/pkg/socket/socket_factory.go b/pkg/socket/socket_factory.go new file mode 100755 index 00000000..f721ad66 --- /dev/null +++ b/pkg/socket/socket_factory.go @@ -0,0 +1,228 @@ +package socket + +import ( + "context" + "net" + "os/exec" + "sync" + "time" + + "github.com/shirou/gopsutil/process" + "github.com/spiral/errors" + "github.com/spiral/roadrunner/v2/interfaces/worker" + "github.com/spiral/roadrunner/v2/internal" + workerImpl "github.com/spiral/roadrunner/v2/pkg/worker" + + "github.com/spiral/goridge/v3" + "go.uber.org/multierr" + "golang.org/x/sync/errgroup" +) + +// Factory connects to external stack using socket server. +type Factory struct { + // listens for incoming connections from underlying processes + ls net.Listener + + // relay connection timeout + tout time.Duration + + // sockets which are waiting for process association + // relays map[int64]*goridge.SocketRelay + relays sync.Map + + ErrCh chan error +} + +// todo: review + +// NewSocketServer returns Factory attached to a given socket listener. +// tout specifies for how long factory should serve for incoming relay connection +func NewSocketServer(ls net.Listener, tout time.Duration) worker.Factory { + f := &Factory{ + ls: ls, + tout: tout, + relays: sync.Map{}, + ErrCh: make(chan error, 10), + } + + // Be careful + // https://github.com/go101/go101/wiki/About-memory-ordering-guarantees-made-by-atomic-operations-in-Go + // https://github.com/golang/go/issues/5045 + go func() { + f.ErrCh <- f.listen() + }() + + return f +} + +// blocking operation, returns an error +func (f *Factory) listen() error { + errGr := &errgroup.Group{} + errGr.Go(func() error { + for { + conn, err := f.ls.Accept() + if err != nil { + return err + } + + rl := goridge.NewSocketRelay(conn) + pid, err := internal.FetchPID(rl) + if err != nil { + return err + } + + f.attachRelayToPid(pid, rl) + } + }) + + return errGr.Wait() +} + +type socketSpawn struct { + w worker.BaseProcess + err error +} + +// SpawnWorker creates Process and connects it to appropriate relay or returns error +func (f *Factory) SpawnWorkerWithContext(ctx context.Context, cmd *exec.Cmd) (worker.BaseProcess, error) { + const op = errors.Op("spawn_worker_with_context") + c := make(chan socketSpawn) + go func() { + ctx, cancel := context.WithTimeout(ctx, f.tout) + defer cancel() + w, err := workerImpl.InitBaseWorker(cmd) + if err != nil { + c <- socketSpawn{ + w: nil, + err: err, + } + return + } + + err = w.Start() + if err != nil { + c <- socketSpawn{ + w: nil, + err: errors.E(op, err), + } + return + } + + rl, err := f.findRelayWithContext(ctx, w) + if err != nil { + err = multierr.Combine( + err, + w.Kill(), + w.Wait(), + ) + + c <- socketSpawn{ + w: nil, + err: errors.E(op, err), + } + return + } + + w.AttachRelay(rl) + w.State().Set(internal.StateReady) + + c <- socketSpawn{ + w: w, + err: nil, + } + }() + + select { + case <-ctx.Done(): + return nil, ctx.Err() + case res := <-c: + if res.err != nil { + return nil, res.err + } + + return res.w, nil + } +} + +func (f *Factory) SpawnWorker(cmd *exec.Cmd) (worker.BaseProcess, error) { + const op = errors.Op("spawn_worker") + w, err := workerImpl.InitBaseWorker(cmd) + if err != nil { + return nil, err + } + + err = w.Start() + if err != nil { + return nil, errors.E(op, err) + } + + rl, err := f.findRelay(w) + if err != nil { + err = multierr.Combine( + err, + w.Kill(), + w.Wait(), + ) + return nil, err + } + + w.AttachRelay(rl) + w.State().Set(internal.StateReady) + + return w, nil +} + +// Close socket factory and underlying socket connection. +func (f *Factory) Close(ctx context.Context) error { + return f.ls.Close() +} + +// waits for Process to connect over socket and returns associated relay of timeout +func (f *Factory) findRelayWithContext(ctx context.Context, w worker.BaseProcess) (*goridge.SocketRelay, error) { + ticker := time.NewTicker(time.Millisecond * 100) + for { + select { + case <-ctx.Done(): + return nil, ctx.Err() + case <-ticker.C: + _, err := process.NewProcess(int32(w.Pid())) + if err != nil { + return nil, err + } + default: + tmp, ok := f.relays.Load(w.Pid()) + if !ok { + continue + } + return tmp.(*goridge.SocketRelay), nil + } + } +} + +func (f *Factory) findRelay(w worker.BaseProcess) (*goridge.SocketRelay, error) { + const op = errors.Op("find_relay") + // poll every 1ms for the relay + pollDone := time.NewTimer(f.tout) + for { + select { + case <-pollDone.C: + return nil, errors.E(op, errors.Str("relay timeout")) + default: + tmp, ok := f.relays.Load(w.Pid()) + if !ok { + continue + } + return tmp.(*goridge.SocketRelay), nil + } + } +} + +// chan to store relay associated with specific pid +func (f *Factory) attachRelayToPid(pid int64, relay goridge.Relay) { + f.relays.Store(pid, relay) +} + +// deletes relay chan associated with specific pid +func (f *Factory) removeRelayFromPid(pid int64) { + f.relays.Delete(pid) +} diff --git a/pkg/socket/socket_factory_test.go b/pkg/socket/socket_factory_test.go new file mode 100755 index 00000000..f1a7d637 --- /dev/null +++ b/pkg/socket/socket_factory_test.go @@ -0,0 +1,590 @@ +package socket + +import ( + "context" + "net" + "os/exec" + "sync" + "testing" + "time" + + "github.com/spiral/roadrunner/v2/internal" + "github.com/spiral/roadrunner/v2/pkg/worker" + "github.com/stretchr/testify/assert" +) + +func Test_Tcp_Start(t *testing.T) { + ctx := context.Background() + time.Sleep(time.Millisecond * 10) // to ensure free socket + + ls, err := net.Listen("tcp", "localhost:9007") + if assert.NoError(t, err) { + defer func() { + err := ls.Close() + if err != nil { + t.Errorf("error closing the listener: error %v", err) + } + }() + } else { + t.Skip("socket is busy") + } + + cmd := exec.Command("php", "../../tests/client.php", "echo", "tcp") + + w, err := NewSocketServer(ls, time.Minute).SpawnWorkerWithContext(ctx, cmd) + assert.NoError(t, err) + assert.NotNil(t, w) + + go func() { + assert.NoError(t, w.Wait()) + }() + + err = w.Stop(ctx) + if err != nil { + t.Errorf("error stopping the Process: error %v", err) + } +} + +func Test_Tcp_StartCloseFactory(t *testing.T) { + time.Sleep(time.Millisecond * 10) // to ensure free socket + ctx := context.Background() + ls, err := net.Listen("tcp", "localhost:9007") + if assert.NoError(t, err) { + } else { + t.Skip("socket is busy") + } + + cmd := exec.Command("php", "../../tests/client.php", "echo", "tcp") + + f := NewSocketServer(ls, time.Minute) + defer func() { + err := ls.Close() + if err != nil { + t.Errorf("error closing the listener: error %v", err) + } + }() + + w, err := f.SpawnWorkerWithContext(ctx, cmd) + assert.NoError(t, err) + assert.NotNil(t, w) + + err = w.Stop(ctx) + if err != nil { + t.Errorf("error stopping the Process: error %v", err) + } +} + +func Test_Tcp_StartError(t *testing.T) { + time.Sleep(time.Millisecond * 10) // to ensure free socket + ctx := context.Background() + ls, err := net.Listen("tcp", "localhost:9007") + if assert.NoError(t, err) { + defer func() { + err := ls.Close() + if err != nil { + t.Errorf("error closing the listener: error %v", err) + } + }() + } else { + t.Skip("socket is busy") + } + + cmd := exec.Command("php", "../../tests/client.php", "echo", "pipes") + err = cmd.Start() + if err != nil { + t.Errorf("error executing the command: error %v", err) + } + + w, err := NewSocketServer(ls, time.Minute).SpawnWorkerWithContext(ctx, cmd) + assert.Error(t, err) + assert.Nil(t, w) +} + +func Test_Tcp_Failboot(t *testing.T) { + time.Sleep(time.Millisecond * 10) // to ensure free socket + ctx := context.Background() + + ls, err := net.Listen("tcp", "localhost:9007") + if assert.NoError(t, err) { + defer func() { + err3 := ls.Close() + if err3 != nil { + t.Errorf("error closing the listener: error %v", err3) + } + }() + } else { + t.Skip("socket is busy") + } + + cmd := exec.Command("php", "../../tests/failboot.php") + + w, err2 := NewSocketServer(ls, time.Second*5).SpawnWorkerWithContext(ctx, cmd) + assert.Nil(t, w) + assert.Error(t, err2) + assert.Contains(t, err2.Error(), "failboot") +} + +func Test_Tcp_Timeout(t *testing.T) { + time.Sleep(time.Millisecond * 10) // to ensure free socket + ctx := context.Background() + ls, err := net.Listen("tcp", "localhost:9007") + if assert.NoError(t, err) { + defer func() { + err := ls.Close() + if err != nil { + t.Errorf("error closing the listener: error %v", err) + } + }() + } else { + t.Skip("socket is busy") + } + + cmd := exec.Command("php", "../../tests/slow-client.php", "echo", "tcp", "200", "0") + + w, err := NewSocketServer(ls, time.Millisecond*1).SpawnWorkerWithContext(ctx, cmd) + assert.Nil(t, w) + assert.Error(t, err) + assert.Contains(t, err.Error(), "context deadline exceeded") +} + +func Test_Tcp_Invalid(t *testing.T) { + time.Sleep(time.Millisecond * 10) // to ensure free socket + ctx := context.Background() + ls, err := net.Listen("tcp", "localhost:9007") + if assert.NoError(t, err) { + defer func() { + err := ls.Close() + if err != nil { + t.Errorf("error closing the listener: error %v", err) + } + }() + } else { + t.Skip("socket is busy") + } + + cmd := exec.Command("php", "../../tests/invalid.php") + + w, err := NewSocketServer(ls, time.Second*1).SpawnWorkerWithContext(ctx, cmd) + assert.Error(t, err) + assert.Nil(t, w) +} + +func Test_Tcp_Broken(t *testing.T) { + time.Sleep(time.Millisecond * 10) // to ensure free socket + ctx := context.Background() + ls, err := net.Listen("tcp", "localhost:9007") + if assert.NoError(t, err) { + defer func() { + err := ls.Close() + if err != nil { + t.Errorf("error closing the listener: error %v", err) + } + }() + } else { + t.Skip("socket is busy") + } + + cmd := exec.Command("php", "../../tests/client.php", "broken", "tcp") + + w, err := NewSocketServer(ls, time.Minute).SpawnWorkerWithContext(ctx, cmd) + if err != nil { + t.Fatal(err) + } + wg := sync.WaitGroup{} + wg.Add(1) + go func() { + defer wg.Done() + err := w.Wait() + assert.Error(t, err) + assert.Contains(t, err.Error(), "undefined_function()") + }() + + defer func() { + time.Sleep(time.Second) + err2 := w.Stop(ctx) + // write tcp 127.0.0.1:9007->127.0.0.1:34204: use of closed network connection + assert.Error(t, err2) + }() + + sw, err := worker.From(w) + if err != nil { + t.Fatal(err) + } + + res, err := sw.Exec(internal.Payload{Body: []byte("hello")}) + assert.Error(t, err) + assert.Nil(t, res.Body) + assert.Nil(t, res.Context) + wg.Wait() +} + +func Test_Tcp_Echo(t *testing.T) { + time.Sleep(time.Millisecond * 10) // to ensure free socket + ctx := context.Background() + ls, err := net.Listen("tcp", "localhost:9007") + if assert.NoError(t, err) { + defer func() { + err := ls.Close() + if err != nil { + t.Errorf("error closing the listener: error %v", err) + } + }() + } else { + t.Skip("socket is busy") + } + + cmd := exec.Command("php", "../../tests/client.php", "echo", "tcp") + + w, _ := NewSocketServer(ls, time.Minute).SpawnWorkerWithContext(ctx, cmd) + go func() { + assert.NoError(t, w.Wait()) + }() + defer func() { + err = w.Stop(ctx) + if err != nil { + t.Errorf("error stopping the Process: error %v", err) + } + }() + + sw, err := worker.From(w) + if err != nil { + t.Fatal(err) + } + + res, err := sw.Exec(internal.Payload{Body: []byte("hello")}) + + assert.NoError(t, err) + assert.NotNil(t, res) + assert.NotNil(t, res.Body) + assert.Empty(t, res.Context) + + assert.Equal(t, "hello", res.String()) +} + +func Test_Unix_Start(t *testing.T) { + ctx := context.Background() + ls, err := net.Listen("unix", "sock.unix") + if err == nil { + defer func() { + err := ls.Close() + if err != nil { + t.Errorf("error closing the listener: error %v", err) + } + }() + } else { + t.Skip("socket is busy") + } + + cmd := exec.Command("php", "../../tests/client.php", "echo", "unix") + + w, err := NewSocketServer(ls, time.Minute).SpawnWorkerWithContext(ctx, cmd) + assert.NoError(t, err) + assert.NotNil(t, w) + + go func() { + assert.NoError(t, w.Wait()) + }() + + err = w.Stop(ctx) + if err != nil { + t.Errorf("error stopping the Process: error %v", err) + } +} + +func Test_Unix_Failboot(t *testing.T) { + ls, err := net.Listen("unix", "sock.unix") + ctx := context.Background() + if err == nil { + defer func() { + err := ls.Close() + if err != nil { + t.Errorf("error closing the listener: error %v", err) + } + }() + } else { + t.Skip("socket is busy") + } + + cmd := exec.Command("php", "../../tests/failboot.php") + + w, err := NewSocketServer(ls, time.Second*5).SpawnWorkerWithContext(ctx, cmd) + assert.Nil(t, w) + assert.Error(t, err) + assert.Contains(t, err.Error(), "failboot") +} + +func Test_Unix_Timeout(t *testing.T) { + ls, err := net.Listen("unix", "sock.unix") + ctx := context.Background() + if err == nil { + defer func() { + err := ls.Close() + if err != nil { + t.Errorf("error closing the listener: error %v", err) + } + }() + } else { + t.Skip("socket is busy") + } + + cmd := exec.Command("php", "../../tests/slow-client.php", "echo", "unix", "200", "0") + + w, err := NewSocketServer(ls, time.Millisecond*100).SpawnWorkerWithContext(ctx, cmd) + assert.Nil(t, w) + assert.Error(t, err) + assert.Contains(t, err.Error(), "context deadline exceeded") +} + +func Test_Unix_Invalid(t *testing.T) { + ctx := context.Background() + ls, err := net.Listen("unix", "sock.unix") + if err == nil { + defer func() { + err := ls.Close() + if err != nil { + t.Errorf("error closing the listener: error %v", err) + } + }() + } else { + t.Skip("socket is busy") + } + + cmd := exec.Command("php", "../../tests/invalid.php") + + w, err := NewSocketServer(ls, time.Second*10).SpawnWorkerWithContext(ctx, cmd) + assert.Error(t, err) + assert.Nil(t, w) +} + +func Test_Unix_Broken(t *testing.T) { + ctx := context.Background() + ls, err := net.Listen("unix", "sock.unix") + if err == nil { + defer func() { + err := ls.Close() + if err != nil { + t.Errorf("error closing the listener: error %v", err) + } + }() + } else { + t.Skip("socket is busy") + } + + cmd := exec.Command("php", "../../tests/client.php", "broken", "unix") + + w, err := NewSocketServer(ls, time.Minute).SpawnWorkerWithContext(ctx, cmd) + if err != nil { + t.Fatal(err) + } + wg := &sync.WaitGroup{} + wg.Add(1) + go func() { + defer wg.Done() + err := w.Wait() + assert.Error(t, err) + assert.Contains(t, err.Error(), "undefined_function()") + }() + + defer func() { + time.Sleep(time.Second) + err = w.Stop(ctx) + assert.Error(t, err) + }() + + sw, err := worker.From(w) + if err != nil { + t.Fatal(err) + } + + res, err := sw.Exec(internal.Payload{Body: []byte("hello")}) + + assert.Error(t, err) + assert.Nil(t, res.Context) + assert.Nil(t, res.Body) + wg.Wait() +} + +func Test_Unix_Echo(t *testing.T) { + ctx := context.Background() + ls, err := net.Listen("unix", "sock.unix") + if err == nil { + defer func() { + err := ls.Close() + if err != nil { + t.Errorf("error closing the listener: error %v", err) + } + }() + } else { + t.Skip("socket is busy") + } + + cmd := exec.Command("php", "../../tests/client.php", "echo", "unix") + + w, err := NewSocketServer(ls, time.Minute).SpawnWorkerWithContext(ctx, cmd) + if err != nil { + t.Fatal(err) + } + go func() { + assert.NoError(t, w.Wait()) + }() + defer func() { + err = w.Stop(ctx) + if err != nil { + t.Errorf("error stopping the Process: error %v", err) + } + }() + + sw, err := worker.From(w) + if err != nil { + t.Fatal(err) + } + + res, err := sw.Exec(internal.Payload{Body: []byte("hello")}) + + assert.NoError(t, err) + assert.NotNil(t, res) + assert.NotNil(t, res.Body) + assert.Empty(t, res.Context) + + assert.Equal(t, "hello", res.String()) +} + +func Benchmark_Tcp_SpawnWorker_Stop(b *testing.B) { + ctx := context.Background() + ls, err := net.Listen("tcp", "localhost:9007") + if err == nil { + defer func() { + err = ls.Close() + if err != nil { + b.Errorf("error closing the listener: error %v", err) + } + }() + } else { + b.Skip("socket is busy") + } + + f := NewSocketServer(ls, time.Minute) + for n := 0; n < b.N; n++ { + cmd := exec.Command("php", "../../tests/client.php", "echo", "tcp") + + w, err := f.SpawnWorkerWithContext(ctx, cmd) + if err != nil { + b.Fatal(err) + } + go func() { + assert.NoError(b, w.Wait()) + }() + + err = w.Stop(ctx) + if err != nil { + b.Errorf("error stopping the Process: error %v", err) + } + } +} + +func Benchmark_Tcp_Worker_ExecEcho(b *testing.B) { + ctx := context.Background() + ls, err := net.Listen("tcp", "localhost:9007") + if err == nil { + defer func() { + err = ls.Close() + if err != nil { + b.Errorf("error closing the listener: error %v", err) + } + }() + } else { + b.Skip("socket is busy") + } + + cmd := exec.Command("php", "../../tests/client.php", "echo", "tcp") + + w, err := NewSocketServer(ls, time.Minute).SpawnWorkerWithContext(ctx, cmd) + if err != nil { + b.Fatal(err) + } + defer func() { + err = w.Stop(ctx) + if err != nil { + b.Errorf("error stopping the Process: error %v", err) + } + }() + + sw, err := worker.From(w) + if err != nil { + b.Fatal(err) + } + + for n := 0; n < b.N; n++ { + if _, err := sw.Exec(internal.Payload{Body: []byte("hello")}); err != nil { + b.Fail() + } + } +} + +func Benchmark_Unix_SpawnWorker_Stop(b *testing.B) { + ctx := context.Background() + ls, err := net.Listen("unix", "sock.unix") + if err == nil { + defer func() { + err := ls.Close() + if err != nil { + b.Errorf("error closing the listener: error %v", err) + } + }() + } else { + b.Skip("socket is busy") + } + + f := NewSocketServer(ls, time.Minute) + for n := 0; n < b.N; n++ { + cmd := exec.Command("php", "../../tests/client.php", "echo", "unix") + + w, err := f.SpawnWorkerWithContext(ctx, cmd) + if err != nil { + b.Fatal(err) + } + err = w.Stop(ctx) + if err != nil { + b.Errorf("error stopping the Process: error %v", err) + } + } +} + +func Benchmark_Unix_Worker_ExecEcho(b *testing.B) { + ctx := context.Background() + ls, err := net.Listen("unix", "sock.unix") + if err == nil { + defer func() { + err := ls.Close() + if err != nil { + b.Errorf("error closing the listener: error %v", err) + } + }() + } else { + b.Skip("socket is busy") + } + + cmd := exec.Command("php", "../../tests/client.php", "echo", "unix") + + w, err := NewSocketServer(ls, time.Minute).SpawnWorkerWithContext(ctx, cmd) + if err != nil { + b.Fatal(err) + } + defer func() { + err = w.Stop(ctx) + if err != nil { + b.Errorf("error stopping the Process: error %v", err) + } + }() + + sw, err := worker.From(w) + if err != nil { + b.Fatal(err) + } + + for n := 0; n < b.N; n++ { + if _, err := sw.Exec(internal.Payload{Body: []byte("hello")}); err != nil { + b.Fail() + } + } +} diff --git a/pkg/worker/sync_worker.go b/pkg/worker/sync_worker.go new file mode 100755 index 00000000..1eb1396e --- /dev/null +++ b/pkg/worker/sync_worker.go @@ -0,0 +1,227 @@ +package worker + +import ( + "bytes" + "context" + "time" + + "github.com/spiral/errors" + "github.com/spiral/roadrunner/v2/interfaces/events" + "github.com/spiral/roadrunner/v2/interfaces/worker" + "github.com/spiral/roadrunner/v2/internal" + "go.uber.org/multierr" + + "github.com/spiral/goridge/v3" +) + +type syncWorker struct { + w worker.BaseProcess +} + +// From creates SyncWorker from WorkerBasa +func From(w worker.BaseProcess) (worker.SyncWorker, error) { + return &syncWorker{ + w: w, + }, nil +} + +// Exec payload without TTL timeout. +func (tw *syncWorker) Exec(p internal.Payload) (internal.Payload, error) { + const op = errors.Op("sync worker Exec") + if len(p.Body) == 0 && len(p.Context) == 0 { + return internal.Payload{}, errors.E(op, errors.Str("payload can not be empty")) + } + + if tw.w.State().Value() != internal.StateReady { + return internal.Payload{}, errors.E(op, errors.Errorf("Process is not ready (%s)", tw.w.State().String())) + } + + // set last used time + tw.w.State().SetLastUsed(uint64(time.Now().UnixNano())) + tw.w.State().Set(internal.StateWorking) + + rsp, err := tw.execPayload(p) + if err != nil { + // just to be more verbose + if errors.Is(errors.ErrSoftJob, err) == false { + tw.w.State().Set(internal.StateErrored) + tw.w.State().RegisterExec() + } + return internal.Payload{}, err + } + + tw.w.State().Set(internal.StateReady) + tw.w.State().RegisterExec() + + return rsp, nil +} + +type wexec struct { + payload internal.Payload + err error +} + +// Exec payload without TTL timeout. +func (tw *syncWorker) ExecWithContext(ctx context.Context, p internal.Payload) (internal.Payload, error) { + const op = errors.Op("ExecWithContext") + c := make(chan wexec, 1) + go func() { + if len(p.Body) == 0 && len(p.Context) == 0 { + c <- wexec{ + payload: internal.Payload{}, + err: errors.E(op, errors.Str("payload can not be empty")), + } + return + } + + if tw.w.State().Value() != internal.StateReady { + c <- wexec{ + payload: internal.Payload{}, + err: errors.E(op, errors.Errorf("Process is not ready (%s)", tw.w.State().String())), + } + return + } + + // set last used time + tw.w.State().SetLastUsed(uint64(time.Now().UnixNano())) + tw.w.State().Set(internal.StateWorking) + + rsp, err := tw.execPayload(p) + if err != nil { + // just to be more verbose + if errors.Is(errors.ErrSoftJob, err) == false { + tw.w.State().Set(internal.StateErrored) + tw.w.State().RegisterExec() + } + c <- wexec{ + payload: internal.Payload{}, + err: errors.E(op, err), + } + return + } + + tw.w.State().Set(internal.StateReady) + tw.w.State().RegisterExec() + + c <- wexec{ + payload: rsp, + err: nil, + } + }() + + select { + case <-ctx.Done(): + err := multierr.Combine(tw.Kill()) + if err != nil { + return internal.Payload{}, multierr.Append(err, ctx.Err()) + } + return internal.Payload{}, ctx.Err() + case res := <-c: + if res.err != nil { + return internal.Payload{}, res.err + } + return res.payload, nil + } +} + +func (tw *syncWorker) execPayload(p internal.Payload) (internal.Payload, error) { + const op = errors.Op("exec payload") + + frame := goridge.NewFrame() + frame.WriteVersion(goridge.VERSION_1) + // can be 0 here + + buf := new(bytes.Buffer) + buf.Write(p.Context) + buf.Write(p.Body) + + // Context offset + frame.WriteOptions(uint32(len(p.Context))) + frame.WritePayloadLen(uint32(buf.Len())) + frame.WritePayload(buf.Bytes()) + + frame.WriteCRC() + + // empty and free the buffer + buf.Truncate(0) + + err := tw.Relay().Send(frame) + if err != nil { + return internal.Payload{}, err + } + + frameR := goridge.NewFrame() + + err = tw.w.Relay().Receive(frameR) + if err != nil { + return internal.Payload{}, errors.E(op, err) + } + if frameR == nil { + return internal.Payload{}, errors.E(op, errors.Str("nil frame received")) + } + + if !frameR.VerifyCRC() { + return internal.Payload{}, errors.E(op, errors.Str("failed to verify CRC")) + } + + flags := frameR.ReadFlags() + + if flags&byte(goridge.ERROR) != byte(0) { + return internal.Payload{}, errors.E(op, errors.ErrSoftJob, errors.Str(string(frameR.Payload()))) + } + + options := frameR.ReadOptions() + if len(options) != 1 { + return internal.Payload{}, errors.E(op, errors.Str("options length should be equal 1 (body offset)")) + } + + payload := internal.Payload{} + payload.Context = frameR.Payload()[:options[0]] + payload.Body = frameR.Payload()[options[0]:] + + return payload, nil +} + +func (tw *syncWorker) String() string { + return tw.w.String() +} + +func (tw *syncWorker) Pid() int64 { + return tw.w.Pid() +} + +func (tw *syncWorker) Created() time.Time { + return tw.w.Created() +} + +func (tw *syncWorker) AddListener(listener events.EventListener) { + tw.w.AddListener(listener) +} + +func (tw *syncWorker) State() internal.State { + return tw.w.State() +} + +func (tw *syncWorker) Start() error { + return tw.w.Start() +} + +func (tw *syncWorker) Wait() error { + return tw.w.Wait() +} + +func (tw *syncWorker) Stop(ctx context.Context) error { + return tw.w.Stop(ctx) +} + +func (tw *syncWorker) Kill() error { + return tw.w.Kill() +} + +func (tw *syncWorker) Relay() goridge.Relay { + return tw.w.Relay() +} + +func (tw *syncWorker) AttachRelay(rl goridge.Relay) { + tw.w.AttachRelay(rl) +} diff --git a/pkg/worker/sync_worker_test.go b/pkg/worker/sync_worker_test.go new file mode 100755 index 00000000..e224e105 --- /dev/null +++ b/pkg/worker/sync_worker_test.go @@ -0,0 +1,37 @@ +package worker + +import ( + "os/exec" + "testing" + + "github.com/spiral/roadrunner/v2/internal" + "github.com/stretchr/testify/assert" +) + +func Test_NotStarted_String(t *testing.T) { + cmd := exec.Command("php", "tests/client.php", "echo", "pipes") + + w, _ := InitBaseWorker(cmd) + assert.Contains(t, w.String(), "php tests/client.php echo pipes") + assert.Contains(t, w.String(), "inactive") + assert.Contains(t, w.String(), "numExecs: 0") +} + +func Test_NotStarted_Exec(t *testing.T) { + cmd := exec.Command("php", "tests/client.php", "echo", "pipes") + + w, _ := InitBaseWorker(cmd) + + syncWorker, err := From(w) + if err != nil { + t.Fatal(err) + } + + res, err := syncWorker.Exec(internal.Payload{Body: []byte("hello")}) + + assert.Error(t, err) + assert.Nil(t, res.Body) + assert.Nil(t, res.Context) + + assert.Contains(t, err.Error(), "Process is not ready (inactive)") +} diff --git a/pkg/worker/worker.go b/pkg/worker/worker.go new file mode 100755 index 00000000..35d3264e --- /dev/null +++ b/pkg/worker/worker.go @@ -0,0 +1,302 @@ +package worker + +import ( + "bytes" + "context" + "fmt" + "io" + "os" + "os/exec" + "strconv" + "strings" + "sync" + "time" + + "github.com/spiral/errors" + "github.com/spiral/goridge/v3" + "github.com/spiral/roadrunner/v2/interfaces/events" + "github.com/spiral/roadrunner/v2/interfaces/worker" + "github.com/spiral/roadrunner/v2/internal" + events2 "github.com/spiral/roadrunner/v2/pkg/events" + "go.uber.org/multierr" +) + +const ( + // WaitDuration - for how long error buffer should attempt to aggregate error messages + // before merging output together since lastError update (required to keep error update together). + WaitDuration = 25 * time.Millisecond + + // ReadBufSize used to make a slice with specified length to read from stderr + ReadBufSize = 10240 // Kb +) + +var syncPool = sync.Pool{ + New: func() interface{} { + buf := make([]byte, ReadBufSize) + return &buf + }, +} + +// Process - supervised process with api over goridge.Relay. +type Process struct { + // created indicates at what time Process has been created. + created time.Time + + // updates parent supervisor or pool about Process events + events events.Handler + + // state holds information about current Process state, + // number of Process executions, buf status change time. + // publicly this object is receive-only and protected using Mutex + // and atomic counter. + state *internal.WorkerState + + // underlying command with associated process, command must be + // provided to Process from outside in non-started form. CmdSource + // stdErr direction will be handled by Process to aggregate error message. + cmd *exec.Cmd + + // pid of the process, points to pid of underlying process and + // can be nil while process is not started. + pid int + + // stderr aggregates stderr output from underlying process. Value can be + // receive only once command is completed and all pipes are closed. + stderr *bytes.Buffer + + // channel is being closed once command is complete. + // waitDone chan interface{} + + // contains information about resulted process state. + endState *os.ProcessState + + // ensures than only one execution can be run at once. + mu sync.RWMutex + + // communication bus with underlying process. + relay goridge.Relay + // rd in a second part of pipe to read from stderr + rd io.Reader + // stop signal terminates io.Pipe from reading from stderr + stop chan struct{} +} + +// InitBaseWorker creates new Process over given exec.cmd. +func InitBaseWorker(cmd *exec.Cmd) (worker.BaseProcess, error) { + if cmd.Process != nil { + return nil, fmt.Errorf("can't attach to running process") + } + w := &Process{ + created: time.Now(), + events: events2.NewEventsHandler(), + cmd: cmd, + state: internal.NewWorkerState(internal.StateInactive), + stderr: new(bytes.Buffer), + stop: make(chan struct{}, 1), + } + + w.rd, w.cmd.Stderr = io.Pipe() + + // small buffer optimization + // at this point we know, that stderr will contain huge messages + w.stderr.Grow(ReadBufSize) + + go func() { + w.watch() + }() + + return w, nil +} + +// Pid returns worker pid. +func (w *Process) Pid() int64 { + return int64(w.pid) +} + +// Created returns time worker was created at. +func (w *Process) Created() time.Time { + return w.created +} + +// AddListener registers new worker event listener. +func (w *Process) AddListener(listener events.EventListener) { + w.events.AddListener(listener) +} + +// State return receive-only Process state object, state can be used to safely access +// Process status, time when status changed and number of Process executions. +func (w *Process) State() internal.State { + return w.state +} + +// State return receive-only Process state object, state can be used to safely access +// Process status, time when status changed and number of Process executions. +func (w *Process) AttachRelay(rl goridge.Relay) { + w.relay = rl +} + +// State return receive-only Process state object, state can be used to safely access +// Process status, time when status changed and number of Process executions. +func (w *Process) Relay() goridge.Relay { + return w.relay +} + +// String returns Process description. fmt.Stringer interface +func (w *Process) String() string { + st := w.state.String() + // we can safely compare pid to 0 + if w.pid != 0 { + st = st + ", pid:" + strconv.Itoa(w.pid) + } + + return fmt.Sprintf( + "(`%s` [%s], numExecs: %v)", + strings.Join(w.cmd.Args, " "), + st, + w.state.NumExecs(), + ) +} + +func (w *Process) Start() error { + err := w.cmd.Start() + if err != nil { + return err + } + w.pid = w.cmd.Process.Pid + return nil +} + +// Wait must be called once for each Process, call will be released once Process is +// complete and will return process error (if any), if stderr is presented it's value +// will be wrapped as WorkerError. Method will return error code if php process fails +// to find or Start the script. +func (w *Process) Wait() error { + const op = errors.Op("worker process wait") + err := multierr.Combine(w.cmd.Wait()) + + // at this point according to the documentation (see cmd.Wait comment) + // if worker finishes with an error, message will be written to the stderr first + // and then w.cmd.Wait return an error + w.endState = w.cmd.ProcessState + if err != nil { + w.state.Set(internal.StateErrored) + + w.mu.RLock() + // if process return code > 0, here will be an error from stderr (if presents) + if w.stderr.Len() > 0 { + err = multierr.Append(err, errors.E(op, errors.Str(w.stderr.String()))) + // stop the stderr buffer + w.stop <- struct{}{} + } + w.mu.RUnlock() + + return multierr.Append(err, w.closeRelay()) + } + + err = multierr.Append(err, w.closeRelay()) + if err != nil { + w.state.Set(internal.StateErrored) + return err + } + + if w.endState.Success() { + w.state.Set(internal.StateStopped) + } + + return nil +} + +func (w *Process) closeRelay() error { + if w.relay != nil { + err := w.relay.Close() + if err != nil { + return err + } + } + return nil +} + +// Stop sends soft termination command to the Process and waits for process completion. +func (w *Process) Stop(ctx context.Context) error { + c := make(chan error) + + go func() { + var err error + w.state.Set(internal.StateStopping) + err = multierr.Append(err, internal.SendControl(w.relay, &internal.StopCommand{Stop: true})) + if err != nil { + w.state.Set(internal.StateKilling) + c <- multierr.Append(err, w.cmd.Process.Kill()) + } + w.state.Set(internal.StateStopped) + c <- nil + }() + + select { + case <-ctx.Done(): + return ctx.Err() + case err := <-c: + if err != nil { + return err + } + return nil + } +} + +// Kill kills underlying process, make sure to call Wait() func to gather +// error log from the stderr. Does not waits for process completion! +func (w *Process) Kill() error { + w.state.Set(internal.StateKilling) + err := w.cmd.Process.Signal(os.Kill) + if err != nil { + return err + } + w.state.Set(internal.StateStopped) + return nil +} + +// put the pointer, to not allocate new slice +// but erase it len and then return back +func (w *Process) put(data *[]byte) { + *data = (*data)[:0] + *data = (*data)[:cap(*data)] + + syncPool.Put(data) +} + +// get pointer to the byte slice +func (w *Process) get() *[]byte { + return syncPool.Get().(*[]byte) +} + +// Write appends the contents of pool to the errBuffer, growing the errBuffer as +// needed. The return value n is the length of pool; errBuffer is always nil. +func (w *Process) watch() { + go func() { + for { + select { + case <-w.stop: + buf := w.get() + // read the last data + n, _ := w.rd.Read(*buf) + w.events.Push(events.WorkerEvent{Event: events.EventWorkerLog, Worker: w, Payload: (*buf)[:n]}) + w.mu.Lock() + // write new message + w.stderr.Write((*buf)[:n]) + w.mu.Unlock() + w.put(buf) + return + default: + // read the max 10kb of stderr per one read + buf := w.get() + n, _ := w.rd.Read(*buf) + w.events.Push(events.WorkerEvent{Event: events.EventWorkerLog, Worker: w, Payload: (*buf)[:n]}) + w.mu.Lock() + // write new message + w.stderr.Write((*buf)[:n]) + w.mu.Unlock() + w.put(buf) + } + } + }() +} diff --git a/pkg/worker/worker_test.go b/pkg/worker/worker_test.go new file mode 100755 index 00000000..805f66b5 --- /dev/null +++ b/pkg/worker/worker_test.go @@ -0,0 +1,19 @@ +package worker + +import ( + "os/exec" + "testing" + + "github.com/stretchr/testify/assert" +) + +func Test_OnStarted(t *testing.T) { + cmd := exec.Command("php", "tests/client.php", "broken", "pipes") + assert.Nil(t, cmd.Start()) + + w, err := InitBaseWorker(cmd) + assert.Nil(t, w) + assert.NotNil(t, err) + + assert.Equal(t, "can't attach to running process", err.Error()) +} diff --git a/pkg/worker_watcher/worker_watcher.go b/pkg/worker_watcher/worker_watcher.go new file mode 100755 index 00000000..8788e509 --- /dev/null +++ b/pkg/worker_watcher/worker_watcher.go @@ -0,0 +1,310 @@ +package worker_watcher //nolint:golint,stylecheck + +import ( + "context" + "runtime" + "sync" + "time" + + "github.com/spiral/errors" + "github.com/spiral/roadrunner/v2/interfaces/events" + "github.com/spiral/roadrunner/v2/interfaces/worker" + "github.com/spiral/roadrunner/v2/internal" + syncWorker "github.com/spiral/roadrunner/v2/pkg/worker" +) + +type Stack struct { + workers []worker.BaseProcess + mutex sync.RWMutex + destroy bool + actualNumOfWorkers int64 +} + +func NewWorkersStack() *Stack { + w := runtime.NumCPU() + return &Stack{ + workers: make([]worker.BaseProcess, 0, w), + actualNumOfWorkers: 0, + } +} + +func (stack *Stack) Reset() { + stack.mutex.Lock() + defer stack.mutex.Unlock() + stack.actualNumOfWorkers = 0 + stack.workers = nil +} + +// Push worker back to the stack +// If stack in destroy state, Push will provide 100ms window to unlock the mutex +func (stack *Stack) Push(w worker.BaseProcess) { + stack.mutex.Lock() + defer stack.mutex.Unlock() + stack.actualNumOfWorkers++ + stack.workers = append(stack.workers, w) +} + +func (stack *Stack) IsEmpty() bool { + stack.mutex.Lock() + defer stack.mutex.Unlock() + return len(stack.workers) == 0 +} + +func (stack *Stack) Pop() (worker.BaseProcess, bool) { + stack.mutex.Lock() + defer stack.mutex.Unlock() + + // do not release new stack + if stack.destroy { + return nil, true + } + + if len(stack.workers) == 0 { + return nil, false + } + + // move worker + w := stack.workers[len(stack.workers)-1] + stack.workers = stack.workers[:len(stack.workers)-1] + stack.actualNumOfWorkers-- + return w, false +} + +func (stack *Stack) FindAndRemoveByPid(pid int64) bool { + stack.mutex.Lock() + defer stack.mutex.Unlock() + for i := 0; i < len(stack.workers); i++ { + // worker in the stack, reallocating + if stack.workers[i].Pid() == pid { + stack.workers = append(stack.workers[:i], stack.workers[i+1:]...) + stack.actualNumOfWorkers-- + // worker found and removed + return true + } + } + // no worker with such ID + return false +} + +// Workers return copy of the workers in the stack +func (stack *Stack) Workers() []worker.BaseProcess { + stack.mutex.Lock() + defer stack.mutex.Unlock() + workersCopy := make([]worker.BaseProcess, 0, 1) + // copy + for _, v := range stack.workers { + workersCopy = append(workersCopy, v) + } + + return workersCopy +} + +func (stack *Stack) isDestroying() bool { + stack.mutex.Lock() + defer stack.mutex.Unlock() + return stack.destroy +} + +// we also have to give a chance to pool to Push worker (return it) +func (stack *Stack) Destroy(ctx context.Context) { + stack.mutex.Lock() + stack.destroy = true + stack.mutex.Unlock() + + tt := time.NewTicker(time.Millisecond * 100) + for { + select { + case <-tt.C: + stack.mutex.Lock() + // that might be one of the workers is working + if len(stack.workers) != int(stack.actualNumOfWorkers) { + stack.mutex.Unlock() + continue + } + stack.mutex.Unlock() + // unnecessary mutex, but + // just to make sure. All stack at this moment are in the stack + // Pop operation is blocked, push can't be done, since it's not possible to pop + stack.mutex.Lock() + for i := 0; i < len(stack.workers); i++ { + // set state for the stack in the stack (unused at the moment) + stack.workers[i].State().Set(internal.StateDestroyed) + } + stack.mutex.Unlock() + tt.Stop() + // clear + stack.Reset() + return + } + } +} + +// workerCreateFunc can be nil, but in that case, dead stack will not be replaced +func NewWorkerWatcher(allocator worker.Allocator, numWorkers int64, events events.Handler) worker.Watcher { + ww := &workerWatcher{ + stack: NewWorkersStack(), + allocator: allocator, + initialNumWorkers: numWorkers, + actualNumWorkers: numWorkers, + events: events, + } + + return ww +} + +type workerWatcher struct { + mutex sync.RWMutex + stack *Stack + allocator worker.Allocator + initialNumWorkers int64 + actualNumWorkers int64 + events events.Handler +} + +func (ww *workerWatcher) AddToWatch(workers []worker.BaseProcess) error { + for i := 0; i < len(workers); i++ { + sw, err := syncWorker.From(workers[i]) + if err != nil { + return err + } + ww.stack.Push(sw) + sw.AddListener(ww.events.Push) + + go func(swc worker.BaseProcess) { + ww.wait(swc) + }(sw) + } + return nil +} + +func (ww *workerWatcher) GetFreeWorker(ctx context.Context) (worker.BaseProcess, error) { + const op = errors.Op("GetFreeWorker") + // thread safe operation + w, stop := ww.stack.Pop() + if stop { + return nil, errors.E(op, errors.ErrWatcherStopped) + } + + // handle worker remove state + // in this state worker is destroyed by supervisor + if w != nil && w.State().Value() == internal.StateRemove { + err := ww.RemoveWorker(w) + if err != nil { + return nil, err + } + // try to get next + return ww.GetFreeWorker(ctx) + } + // no free stack + if w == nil { + for { + select { + default: + w, stop = ww.stack.Pop() + if stop { + return nil, errors.E(op, errors.ErrWatcherStopped) + } + if w == nil { + continue + } + return w, nil + case <-ctx.Done(): + return nil, errors.E(op, errors.NoFreeWorkers, errors.Str("no free workers in the stack, timeout exceed")) + } + } + } + + return w, nil +} + +func (ww *workerWatcher) AllocateNew() error { + ww.stack.mutex.Lock() + const op = errors.Op("allocate new worker") + sw, err := ww.allocator() + if err != nil { + return errors.E(op, errors.WorkerAllocate, err) + } + + ww.addToWatch(sw) + ww.stack.mutex.Unlock() + ww.PushWorker(sw) + + ww.events.Push(events.PoolEvent{ + Event: events.EventWorkerConstruct, + Payload: sw, + }) + + return nil +} + +func (ww *workerWatcher) RemoveWorker(wb worker.BaseProcess) error { + ww.mutex.Lock() + defer ww.mutex.Unlock() + + const op = errors.Op("remove worker") + pid := wb.Pid() + + if ww.stack.FindAndRemoveByPid(pid) { + wb.State().Set(internal.StateInvalid) + err := wb.Kill() + if err != nil { + return errors.E(op, err) + } + return nil + } + + wb.State().Set(internal.StateRemove) + return nil +} + +// O(1) operation +func (ww *workerWatcher) PushWorker(w worker.BaseProcess) { + ww.mutex.Lock() + defer ww.mutex.Unlock() + ww.stack.Push(w) +} + +// Destroy all underlying stack (but let them to complete the task) +func (ww *workerWatcher) Destroy(ctx context.Context) { + // destroy stack, we don't use ww mutex here, since we should be able to push worker + ww.stack.Destroy(ctx) +} + +// Warning, this is O(n) operation, and it will return copy of the actual workers +func (ww *workerWatcher) WorkersList() []worker.BaseProcess { + return ww.stack.Workers() +} + +func (ww *workerWatcher) wait(w worker.BaseProcess) { + const op = errors.Op("process wait") + err := w.Wait() + if err != nil { + ww.events.Push(events.WorkerEvent{ + Event: events.EventWorkerError, + Worker: w, + Payload: errors.E(op, err), + }) + } + + if w.State().Value() == internal.StateDestroyed { + // worker was manually destroyed, no need to replace + return + } + + _ = ww.stack.FindAndRemoveByPid(w.Pid()) + err = ww.AllocateNew() + if err != nil { + ww.events.Push(events.PoolEvent{ + Event: events.EventPoolError, + Payload: errors.E(op, err), + }) + } +} + +func (ww *workerWatcher) addToWatch(wb worker.BaseProcess) { + ww.mutex.Lock() + defer ww.mutex.Unlock() + go func() { + ww.wait(wb) + }() +} |