diff --git a/.gitignore b/.gitignore index 482a92ea..3770c10b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,5 @@ ### IntelliJ IDEA -.idea/* +**/.idea/* !/.idea/runConfigurations/ *.iml diff --git a/charts/ephemeral/Chart.yaml b/charts/ephemeral/Chart.yaml index a1674e89..67d204ab 100644 --- a/charts/ephemeral/Chart.yaml +++ b/charts/ephemeral/Chart.yaml @@ -1,5 +1,5 @@ # -# Copyright (c) 2021 - for information on the respective copyright owner +# Copyright (c) 2021-2023 - for information on the respective copyright owner # see the NOTICE file and/or the repository https://github.com/carbynestack/ephemeral. # # SPDX-License-Identifier: Apache-2.0 diff --git a/charts/ephemeral/README.md b/charts/ephemeral/README.md index a28a19eb..1190dcd7 100644 --- a/charts/ephemeral/README.md +++ b/charts/ephemeral/README.md @@ -85,15 +85,19 @@ helm install --name my-release -f values.yaml ephemeral ### Discovery Service -| Parameter | Description | Default | -| ------------------------------- | ------------------------------------------------------------ | ---------------------------------- | -| `discovery.image.registry` | Image registry used to pull the Discovery Service image | `ghcr.io` | -| `discovery.image.repository` | Discovery Image name | `carbynestack/ephemeral/discovery` | -| `discovery.image.tag` | Discovery Image tag | `latest` | -| `discovery.image.pullPolicy` | Discovery Image pull policy | `IfNotPresent` | -| `discovery.service.annotations` | Annotations that should be attached to the Discovery service | `[]` | -| `discovery.frontendUrl` | The external base URL of the VCP | \`\` | -| `discovery.master.port` | The port of the master discovery service instance | \`\` | +| Parameter | Description | Default | +| -------------------------------- | ---------------------------------------------------------------------------- | ---------------------------------- | +| `discovery.image.registry` | Image registry used to pull the Discovery Service image | `ghcr.io` | +| `discovery.image.repository` | Discovery Image name | `carbynestack/ephemeral/discovery` | +| `discovery.image.tag` | Discovery Image tag | `latest` | +| `discovery.image.pullPolicy` | Discovery Image pull policy | `IfNotPresent` | +| `discovery.service.annotations` | Annotations that should be attached to the Discovery service | `[]` | +| `discovery.frontendUrl` | The external base URL of the VCP | \`\` | +| `discovery.master.port` | The port of the master discovery service instance | \`\` | +| `discovery.isMaster` | Determines whether the service acts as master or slave | `true` | +| `discovery.slave.connectTimeout` | Timeout to establish the connection to the upstream master Discovery Service | `60s` | +| `discovery.stateTimeout` | Timeout in which the transition to the next state is expected | `60s` | +| `discovery.computationTimeout` | Timeout in which the result of a game's mpc computation is expected | `60s` | ### Network Controller @@ -106,30 +110,37 @@ helm install --name my-release -f values.yaml ephemeral ### Ephemeral Service -| Parameter | Description | Default | -| ------------------------------------- | -------------------------------------------------------------------- | ---------------------------------- | -| `ephemeral.image.registry` | Image registry used to pull the Ephemeral Service image | `ghcr.io` | -| `ephemeral.image.repository` | Ephemeral Image name | `carbynestack/ephemeral/ephemeral` | -| `ephemeral.image.tag` | Ephemeral Image tag | `latest` | -| `ephemeral.image.pullPolicy` | Ephemeral Image pull policy | `IfNotPresent` | -| `ephemeral.service.annotations` | Annotations that should be attached to the Ephemeral service | `[]` | -| `ephemeral.minScale` | The minimum amount of pods to keep alive for the application | `1` | -| `ephemeral.resources.requests.cpu` | The requested CPU resources in CPU cores | `100m` | -| `ephemeral.resources.requests.memory` | The requested memory resources | `256Mi` | -| `ephemeral.resources.limits.cpu` | The CPU resource limit in CPU cores | \`\` | -| `ephemeral.amphora.host` | The hostname of the Amphora serivce | `amphora` | -| `ephemeral.amphora.scheme` | The scheme used to access the Amphora serivce | `http` | -| `ephemeral.amphora.path` | The path under which the Amphora serivce is available | `/` | -| `ephemeral.castor.host` | The hostname of the Castor serivce | `castor` | -| `ephemeral.castor.scheme` | The scheme used to access the Castor serivce | `http` | -| `ephemeral.castor.path` | The path under which the Castor serivce is available | `/` | -| `ephemeral.castor.tupleStock` | The number of tuples to hold in stock for each tuple type | `1000` | -| `ephemeral.frontendUrl` | The external base URL of the VCP | \`\` | -| `ephemeral.spdz.prime` | The prime used by SPDZ | \`\` | -| `ephemeral.spdz.rInv` | The rInv used by SPDZ | \`\` | -| `ephemeral.spdz.gfpMacKey` | The macKey for the prime protocol used by SPDZ | \`\` | -| `ephemeral.spdz.gf2nMacKey` | The macKey for the GF(2^n) protocol used by SPDZ | \`\` | -| `ephemeral.spdz.gf2nBitLength` | The Bit length of the GF(2^n) field used by SPDZ | \`\` | -| `ephemeral.spdz.gf2nStorageSize` | The size of GF(2^n) tuples in bytes used by SPDZ | \`\` | -| `ephemeral.spdz.prepFolder` | The directory where SPDZ expects the preprocessing data to be stored | \`Player-Data\` | -| `ephemeral.playerId` | Id of this player | \`\` | +| Parameter | Description | Default | +| --------------------------------------------- | ------------------------------------------------------------------------ | ------------------------------------- | +| `ephemeral.knative.activation.timeoutSeconds` | Timout in seconds for the container to respond to the knative activation | `3600` | +| `ephemeral.image.registry` | Image registry used to pull the Ephemeral Service image | `ghcr.io` | +| `ephemeral.image.repository` | Ephemeral Image name | `carbynestack/ephemeral/ephemeral` | +| `ephemeral.image.tag` | Ephemeral Image tag | `latest` | +| `ephemeral.image.pullPolicy` | Ephemeral Image pull policy | `IfNotPresent` | +| `ephemeral.service.annotations` | Annotations that should be attached to the Ephemeral service | `[]` | +| `ephemeral.minScale` | The minimum amount of pods to keep alive for the application | `1` | +| `ephemeral.resources.requests.cpu` | The requested CPU resources in CPU cores | `100m` | +| `ephemeral.resources.requests.memory` | The requested memory resources | `256Mi` | +| `ephemeral.resources.limits.cpu` | The CPU resource limit in CPU cores | \`\` | +| `ephemeral.amphora.host` | The hostname of the Amphora serivce | `amphora` | +| `ephemeral.amphora.scheme` | The scheme used to access the Amphora serivce | `http` | +| `ephemeral.amphora.path` | The path under which the Amphora serivce is available | `/` | +| `ephemeral.castor.host` | The hostname of the Castor serivce | `castor` | +| `ephemeral.castor.scheme` | The scheme used to access the Castor serivce | `http` | +| `ephemeral.castor.path` | The path under which the Castor serivce is available | `/` | +| `ephemeral.castor.tupleStock` | The number of tuples to hold in stock for each tuple type | `1000` | +| `ephemeral.discovery.host` | The host address of the discovery service | `discovery.default.svc.cluster.local` | +| `ephemeral.discovery.port` | The port of the discovery service | `8080` | +| `ephemeral.discovery.connectTimout` | Timeout to establish the connection to the discovery service | `60s` | +| `ephemeral.frontendUrl` | The external base URL of the VCP | \`\` | +| `ephemeral.spdz.prime` | The prime used by SPDZ | \`\` | +| `ephemeral.spdz.rInv` | The rInv used by SPDZ | \`\` | +| `ephemeral.spdz.gfpMacKey` | The macKey for the prime protocol used by SPDZ | \`\` | +| `ephemeral.spdz.gf2nMacKey` | The macKey for the GF(2^n) protocol used by SPDZ | \`\` | +| `ephemeral.spdz.gf2nBitLength` | The Bit length of the GF(2^n) field used by SPDZ | \`\` | +| `ephemeral.spdz.gf2nStorageSize` | The size of GF(2^n) tuples in bytes used by SPDZ | \`\` | +| `ephemeral.spdz.prepFolder` | The directory where SPDZ expects the preprocessing data to be stored | \`Player-Data\` | +| `ephemeral.playerId` | Id of this player | \`\` | +| `ephemeral.networkEstablishTimeout` | Timeout to establish network connections | `1m` | +| `ephemeral.player.stateTimeout` | Timeout in which the transition to the next state is expected | `60s` | +| `ephemeral.player.computationTimeout` | Timeout in which the result of a game's mpc computation is expected | `60s` | diff --git a/charts/ephemeral/templates/discovery.yaml b/charts/ephemeral/templates/discovery.yaml index 82905970..3de65483 100644 --- a/charts/ephemeral/templates/discovery.yaml +++ b/charts/ephemeral/templates/discovery.yaml @@ -1,5 +1,5 @@ # -# Copyright (c) 2021 - for information on the respective copyright owner +# Copyright (c) 2021-2023 - for information on the respective copyright owner # see the NOTICE file and/or the repository https://github.com/carbynestack/ephemeral. # # SPDX-License-Identifier: Apache-2.0 @@ -75,7 +75,10 @@ data: "masterHost": "{{ .Values.discovery.master.host }}", "masterPort": "{{ .Values.discovery.master.port }}", "slave": {{ if .Values.discovery.isMaster }}false{{ else }}true{{ end }}, - "playerCount": {{ .Values.playerCount }} + "playerCount": {{ .Values.playerCount }}, + "stateTimeout": "{{ .Values.discovery.stateTimeout }}", + "computationTimeout": "{{ .Values.discovery.computationTimeout }}", + "connectTimeout": "{{ .Values.discovery.slave.connectTimeout }}" } --- apiVersion: networking.istio.io/v1alpha3 diff --git a/charts/ephemeral/templates/ephemeral.yaml b/charts/ephemeral/templates/ephemeral.yaml index b50b4134..6acd4f8a 100644 --- a/charts/ephemeral/templates/ephemeral.yaml +++ b/charts/ephemeral/templates/ephemeral.yaml @@ -1,5 +1,5 @@ # -# Copyright (c) 2021 - for information on the respective copyright owner +# Copyright (c) 2021-2023 - for information on the respective copyright owner # see the NOTICE file and/or the repository https://github.com/carbynestack/ephemeral. # # SPDX-License-Identifier: Apache-2.0 @@ -18,6 +18,7 @@ spec: {{- end}} autoscaling.knative.dev/minScale: "{{ .Values.ephemeral.minScale }}" spec: + timeoutSeconds: {{ .Values.ephemeral.knative.activation.timeoutSeconds }} {{- if .Values.ephemeral.image.pullSecrets }} imagePullSecrets: {{- range .Values.ephemeral.image.pullSecrets }} @@ -72,7 +73,7 @@ data: config.json: |- { "retrySleep": "50ms", - "retryTimeout": "1m", + "networkEstablishTimeout": "{{ .Values.ephemeral.networkEstablishTimeout }}", "prime": "{{ .Values.ephemeral.spdz.prime }}", "rInv": "{{ .Values.ephemeral.spdz.rInv }}", "gfpMacKey": "{{ .Values.ephemeral.spdz.gfpMacKey }}", @@ -92,7 +93,13 @@ data: "tupleStock": {{ .Values.ephemeral.castor.tupleStock }} }, "frontendURL": "{{ .Values.ephemeral.frontendUrl }}", - "discoveryAddress": "{{ .Values.ephemeral.discoveryAddress }}", + "discoveryConfig": { + "host": "{{ .Values.ephemeral.discovery.host }}", + "port": "{{ .Values.ephemeral.discovery.port }}", + "connectTimeout": "{{ .Values.ephemeral.discovery.connectTimeout }}" + }, "playerID": {{ .Values.ephemeral.playerId }}, - "playerCount": {{ .Values.playerCount }} + "playerCount": {{ .Values.playerCount }}, + "stateTimeout": "{{ .Values.ephemeral.player.stateTimeout }}", + "computationTimeout": "{{ .Values.ephemeral.player.computationTimeout }}" } diff --git a/charts/ephemeral/values.yaml b/charts/ephemeral/values.yaml index 3811536b..e36aa185 100644 --- a/charts/ephemeral/values.yaml +++ b/charts/ephemeral/values.yaml @@ -1,5 +1,5 @@ # -# Copyright (c) 2021 - for information on the respective copyright owner +# Copyright (c) 2021-2023 - for information on the respective copyright owner # see the NOTICE file and/or the repository https://github.com/carbynestack/ephemeral. # # SPDX-License-Identifier: Apache-2.0 @@ -22,10 +22,17 @@ discovery: master: host: port: + stateTimeout : "60s" + computationTimeout : "600s" + slave: + connectTimeout: "60s" ephemeral: service: annotations: [] + knative: + activation: + timeoutSeconds: 3600 image: registry: ghcr.io repository: carbynestack/ephemeral/ephemeral @@ -49,8 +56,12 @@ ephemeral: path: "/" tupleStock: 1000 frontendUrl: - discoveryAddress: discovery.default.svc.cluster.local + discovery: + host: discovery.default.svc.cluster.local + port: 8080 + connectTimeout: "60s" playerId: + networkEstablishTimeout: "1m" spdz: prime: rInv: @@ -59,6 +70,9 @@ ephemeral: gf2nBitLength: gf2nStorageSize: prepFolder: "Player-Data" + player: + stateTimeout: "60s" + computationTimeout: "600s" networkController: image: diff --git a/cmd/discovery/main.go b/cmd/discovery/main.go index 3c311eca..2ed253a5 100644 --- a/cmd/discovery/main.go +++ b/cmd/discovery/main.go @@ -1,4 +1,4 @@ -// Copyright (c) 2021 - for information on the respective copyright owner +// Copyright (c) 2021-2023 - for information on the respective copyright owner // see the NOTICE file and/or the repository https://github.com/carbynestack/ephemeral. // // SPDX-License-Identifier: Apache-2.0 @@ -8,6 +8,7 @@ import ( "context" "encoding/json" "errors" + "fmt" "github.com/carbynestack/ephemeral/pkg/discovery" c "github.com/carbynestack/ephemeral/pkg/discovery/transport/client" cl "github.com/carbynestack/ephemeral/pkg/discovery/transport/client" @@ -16,13 +17,11 @@ import ( "github.com/carbynestack/ephemeral/pkg/discovery/transport/server" l "github.com/carbynestack/ephemeral/pkg/logger" "github.com/carbynestack/ephemeral/pkg/types" - "time" - . "github.com/carbynestack/ephemeral/pkg/types" "github.com/carbynestack/ephemeral/pkg/utils" - mb "github.com/vardius/message-bus" "go.uber.org/zap" + "time" ) const ( @@ -32,7 +31,6 @@ const ( DefaultBusSize = 10000 // DefaultPortRange is the range of ports used for MCP communication between the players. DefaultPortRange = "30000:30100" - defaultStateTimeout = 60 * time.Second defaultConfigLocation = "/etc/config/config.json" ) @@ -51,22 +49,26 @@ func main() { tr := NewTransportServer(logger, config.Port) pb := discovery.NewPublisher(bus) doneCh := make(chan string) - errCh := make(chan error) + errCh := make(chan error, 1) n, err := discovery.NewIstioNetworker(logger, config.PortRange, doneCh) if err != nil { panic(err) } - stateTimeout, err := getStateTimeout(config) - if err != nil { - panic(err) + var upstreamConfig *DiscoveryClientTypedConfig + if config.Slave { + upstreamConfig = &DiscoveryClientTypedConfig{ + Host: config.MasterHost, + Port: config.MasterPort, + ConnectTimeout: config.ConnectTimeout, + } } - client, mode, err := NewClient(config, stateTimeout, logger, errCh) + client, mode, err := NewClient(upstreamConfig, logger, errCh) if err != nil { panic(err) } // TODO: extract this Istio address dynamically. - s := discovery.NewServiceNG(bus, pb, stateTimeout, tr, n, config.FrontendURL, logger, mode, client, config.PlayerCount) + s := discovery.NewServiceNG(bus, pb, config.StateTimeout, config.ComputationTimeout, tr, n, config.FrontendURL, logger, mode, client, config.PlayerCount) if err != nil { panic(err) } @@ -81,32 +83,28 @@ func main() { } } -func getStateTimeout(conf *DiscoveryConfig) (time.Duration, error) { - if conf.StateTimeout == "" { - return defaultStateTimeout, nil - } - return time.ParseDuration(conf.StateTimeout) -} - -// NewClient returns a new client with parameters specific to the server mode. -func NewClient(config *types.DiscoveryConfig, stateTimeout time.Duration, logger *zap.SugaredLogger, errCh chan error) (*cl.Client, string, error) { +// NewClient returns a new client with parameters specific to the server mode. If upstreamClient is defined, the client +// will be configured to forward incoming events to an upstream master server. With upstreamClient set to nil, the +// service is considered to be the master service. +func NewClient(upstreamConfig *types.DiscoveryClientTypedConfig, logger *zap.SugaredLogger, errCh chan error) (*cl.Client, string, error) { + logger.Debug("Creating new discovery client") mode := ModeMaster client := &cl.Client{} var err error - if config.Slave { // If Follower/Slave -> Open GRPc Connection to Master + if upstreamConfig != nil { // If Follower/Slave -> Open GRPc Connection to Master inCh := make(chan *proto.Event) outCh := make(chan *proto.Event) grpcClientConf := &c.TransportClientConfig{ - In: inCh, - Out: outCh, - ErrCh: errCh, - Host: config.MasterHost, - Port: config.MasterPort, - EventScope: EventScopeAll, - ConnID: "slave", - Timeout: stateTimeout, - Logger: logger, - Context: context.Background(), + In: inCh, + Out: outCh, + ErrCh: errCh, + Host: upstreamConfig.Host, + Port: upstreamConfig.Port, + EventScope: EventScopeAll, + ConnID: "slave", + ConnectTimeout: upstreamConfig.ConnectTimeout, + Logger: logger, + Context: context.Background(), } client, err = c.NewClient(grpcClientConf) if err != nil { @@ -146,7 +144,7 @@ func RunDeletion(doneCh chan string, errCh chan error, logger *zap.SugaredLogger } // ParseConfig parses the configuration file of the discovery service. -func ParseConfig(path string) (*DiscoveryConfig, error) { +func ParseConfig(path string) (*DiscoveryTypedConfig, error) { bytes, err := utils.ReadFile(path) if err != nil { panic(err) @@ -171,11 +169,35 @@ func ParseConfig(path string) (*DiscoveryConfig, error) { if conf.PlayerCount < 2 { return nil, errors.New("invalid config error, PlayerCount must be 2 or higher") } - return &conf, nil + stateTimeout, err := time.ParseDuration(conf.StateTimeout) + if err != nil { + return nil, errors.New(fmt.Sprintf("invalid state timeout format: %v", err)) + } + computationTimeout, err := time.ParseDuration(conf.ComputationTimeout) + if err != nil { + return nil, errors.New(fmt.Sprintf("invalid computation timeout format: %v", err)) + } + connectTimeout, err := time.ParseDuration(conf.ConnectTimeout) + if err != nil { + return nil, errors.New(fmt.Sprintf("invalid connection timeout format: %v", err)) + } + return &DiscoveryTypedConfig{ + FrontendURL: conf.FrontendURL, + MasterHost: conf.MasterHost, + MasterPort: conf.MasterPort, + Slave: conf.Slave, + StateTimeout: stateTimeout, + ComputationTimeout: computationTimeout, + ConnectTimeout: connectTimeout, + Port: conf.Port, + BusSize: conf.BusSize, + PortRange: conf.PortRange, + PlayerCount: conf.PlayerCount, + }, nil } // SetDefaults sets the default values for config properties if they are not set. -func SetDefaults(conf *DiscoveryConfig) { +func SetDefaults(conf *DiscoveryTypedConfig) { if conf.Port == "" { conf.Port = DefaultPort } diff --git a/cmd/discovery/main_test.go b/cmd/discovery/main_test.go index e9906b6a..19b905b7 100644 --- a/cmd/discovery/main_test.go +++ b/cmd/discovery/main_test.go @@ -1,4 +1,4 @@ -// Copyright (c) 2021 - for information on the respective copyright owner +// Copyright (c) 2021-2023 - for information on the respective copyright owner // see the NOTICE file and/or the repository https://github.com/carbynestack/ephemeral. // // SPDX-License-Identifier: Apache-2.0 @@ -23,16 +23,14 @@ import ( var _ = Describe("Main", func() { It("returns a new client", func() { - conf := &DiscoveryConfig{ - Slave: true, - FrontendURL: "abc", - MasterHost: "abc", - MasterPort: "8080", - PlayerCount: 2, + conf := &DiscoveryClientTypedConfig{ + Host: "abc", + Port: "8080", + ConnectTimeout: time.Second, } logger := zap.NewNop().Sugar() errCh := make(chan error) - cl, mode, err := NewClient(conf, time.Second, logger, errCh) + cl, mode, err := NewClient(conf, logger, errCh) Expect(err).NotTo(HaveOccurred()) Expect(mode).To(Equal(ModeSlave)) Expect(cl).NotTo(BeNil()) @@ -62,7 +60,7 @@ var _ = Describe("Main", func() { Context("parameters are plausible", func() { It("succeeds", func() { data := []byte(`{"frontendURL": "apollo.test.specs.cloud","masterHost": "apollo.test.specs.cloud", - "masterPort": "31400","slave": false, "playerCount": 2}`) + "masterPort": "31400","slave": false, "playerCount": 2, "stateTimeout": "1s", "connectTimeout": "2s", "computationTimeout": "3s"}`) err := ioutil.WriteFile(path, data, 0644) Expect(err).NotTo(HaveOccurred()) conf, err := ParseConfig(path) @@ -78,7 +76,7 @@ var _ = Describe("Main", func() { Context("playerCount is invalid", func() { It("returns an error on PlayerCount == 1", func() { data := []byte(`{"frontendURL": "apollo.test.specs.cloud","masterHost": "apollo.test.specs.cloud", - "masterPort": "31400","slave": false, "playerCount": 1}`) + "masterPort": "31400","slave": false, "playerCount": 1, "stateTimeout": "1s", "connectTimeout": "2s"}`) err := ioutil.WriteFile(path, data, 0644) Expect(err).NotTo(HaveOccurred()) _, err = ParseConfig(path) @@ -86,67 +84,113 @@ var _ = Describe("Main", func() { }) It("returns an error on negative PlayerCount", func() { data := []byte(`{"frontendURL": "apollo.test.specs.cloud","masterHost": "apollo.test.specs.cloud", - "masterPort": "31400","slave": false, "playerCount": -2}`) + "masterPort": "31400","slave": false, "playerCount": -2, "stateTimeout": "1s", "connectTimeout": "2s"}`) err := ioutil.WriteFile(path, data, 0644) Expect(err).NotTo(HaveOccurred()) _, err = ParseConfig(path) Expect(err).To(HaveOccurred()) }) }) + Context("stateTimeout is invalid", func() { + It("returns an error on invalid format", func() { + data := []byte(`{"frontendURL": "apollo.test.specs.cloud","masterHost": "apollo.test.specs.cloud", + "masterPort": "31400","slave": false, "playerCount": 2, "stateTimeout": "1", "connectTimeout": "2s", "computationTimeout": "3s"}`) + err := ioutil.WriteFile(path, data, 0644) + Expect(err).NotTo(HaveOccurred()) + conf, err := ParseConfig(path) + Expect(conf).To(BeNil()) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(Equal("invalid state timeout format: time: missing unit in duration 1")) + }) + }) + Context("connectTimeout is invalid", func() { + It("returns an error on invalid format", func() { + data := []byte(`{"frontendURL": "apollo.test.specs.cloud","masterHost": "apollo.test.specs.cloud", + "masterPort": "31400","slave": false, "playerCount": 2, "stateTimeout": "1s", "connectTimeout": "2", "computationTimeout": "3s"}`) + err := ioutil.WriteFile(path, data, 0644) + Expect(err).NotTo(HaveOccurred()) + conf, err := ParseConfig(path) + Expect(conf).To(BeNil()) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(Equal("invalid connection timeout format: time: missing unit in duration 2")) + }) + }) + Context("computationTimeout is invalid", func() { + It("returns an error on invalid format", func() { + data := []byte(`{"frontendURL": "apollo.test.specs.cloud","masterHost": "apollo.test.specs.cloud", + "masterPort": "31400","slave": false, "playerCount": 2, "stateTimeout": "1s", "connectTimeout": "2s", "computationTimeout": "3"}`) + err := ioutil.WriteFile(path, data, 0644) + Expect(err).NotTo(HaveOccurred()) + conf, err := ParseConfig(path) + Expect(conf).To(BeNil()) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(Equal("invalid computation timeout format: time: missing unit in duration 3")) + }) + }) }) }) Context("one of the required parameters is missing", func() { - Context("when no frontendURL is defined", func() { - AfterEach(func() { - _, _, err := cmder.CallCMD(context.TODO(), []string{fmt.Sprintf("rm %s", path)}, "./") - Expect(err).NotTo(HaveOccurred()) - }) - It("returns an error", func() { - path := fmt.Sprintf("/tmp/test-%d", random) - noFrontendURLConfig := []byte(`{"masterHost": "apollo.test.specs.cloud", - "masterPort": "31400","slave": false, "playerCount": 2}`) - err := ioutil.WriteFile(path, noFrontendURLConfig, 0644) - Expect(err).NotTo(HaveOccurred()) - _, err = ParseConfig(path) - Expect(err).To(HaveOccurred()) + AfterEach(func() { + _, _, err := cmder.CallCMD(context.TODO(), []string{fmt.Sprintf("rm %s", path)}, "./") + Expect(err).NotTo(HaveOccurred()) + }) + It("returns an error", func() { + path := fmt.Sprintf("/tmp/test-%d", random) + noFrontendURLConfig := []byte(`{"masterHost": "apollo.test.specs.cloud", + "masterPort": "31400","slave": false, "playerCount": 2, "stateTimeout": "1s", "connectTimeout": "2s", "computationTimeout": "3s"}`) + err := ioutil.WriteFile(path, noFrontendURLConfig, 0644) + Expect(err).NotTo(HaveOccurred()) + _, err = ParseConfig(path) + Expect(err).To(HaveOccurred()) - noMasterHostConfigSlave := []byte(`{"frontendURL": "apollo.test.specs.cloud", - "masterPort": "31400","slave": true, "playerCount": 2}`) - err = ioutil.WriteFile(path, noMasterHostConfigSlave, 0644) - Expect(err).NotTo(HaveOccurred()) - _, err = ParseConfig(path) - Expect(err).To(HaveOccurred()) + noMasterHostConfigSlave := []byte(`{"frontendURL": "apollo.test.specs.cloud", + "masterPort": "31400","slave": true, "playerCount": 2, "stateTimeout": "1s", "connectTimeout": "2s", "computationTimeout": "3s"}`) + err = ioutil.WriteFile(path, noMasterHostConfigSlave, 0644) + Expect(err).NotTo(HaveOccurred()) + _, err = ParseConfig(path) + Expect(err).To(HaveOccurred()) - noMasterHostConfigMaster := []byte(`{"frontendURL": "apollo.test.specs.cloud", - "masterPort": "31400","slave": false, "playerCount": 2}`) - err = ioutil.WriteFile(path, noMasterHostConfigMaster, 0644) - Expect(err).NotTo(HaveOccurred()) - conf, err := ParseConfig(path) - Expect(err).NotTo(HaveOccurred()) - Expect(conf).NotTo(BeNil()) + noMasterHostConfigMaster := []byte(`{"frontendURL": "apollo.test.specs.cloud", + "masterPort": "31400","slave": false, "playerCount": 2, "stateTimeout": "1s", "connectTimeout": "2s", "computationTimeout": "3s"}`) + err = ioutil.WriteFile(path, noMasterHostConfigMaster, 0644) + Expect(err).NotTo(HaveOccurred()) + conf, err := ParseConfig(path) + Expect(err).NotTo(HaveOccurred()) + Expect(conf).NotTo(BeNil()) - noMasterPortConfigSlave := []byte(`{"frontendURL": "apollo.test.specs.cloud","masterHost": "apollo.test.specs.cloud","slave": false, "playerCount": 2}`) - err = ioutil.WriteFile(path, noMasterPortConfigSlave, 0644) - Expect(err).NotTo(HaveOccurred()) - _, err = ParseConfig(path) - Expect(err).To(HaveOccurred()) + noMasterPortConfigSlave := []byte(`{"frontendURL": "apollo.test.specs.cloud","masterHost": "apollo.test.specs.cloud","slave": false, "playerCount": 2, "stateTimeout": "1s", "connectTimeout": "2s", "computationTimeout": "3s"}`) + err = ioutil.WriteFile(path, noMasterPortConfigSlave, 0644) + Expect(err).NotTo(HaveOccurred()) + _, err = ParseConfig(path) + Expect(err).To(HaveOccurred()) - noPlayerCountConfig := []byte(`{"frontendURL": "apollo.test.specs.cloud","masterHost": "apollo.test.specs.cloud","slave": false, "masterPort": "31400"}`) - err = ioutil.WriteFile(path, noPlayerCountConfig, 0644) - Expect(err).NotTo(HaveOccurred()) - _, err = ParseConfig(path) - Expect(err).To(HaveOccurred()) - }) + noPlayerCountConfig := []byte(`{"frontendURL": "apollo.test.specs.cloud","masterHost": "apollo.test.specs.cloud","slave": false, "masterPort": "31400", "stateTimeout": "1s", "connectTimeout": "2s", "computationTimeout": "3s"}`) + err = ioutil.WriteFile(path, noPlayerCountConfig, 0644) + Expect(err).NotTo(HaveOccurred()) + _, err = ParseConfig(path) + Expect(err).To(HaveOccurred()) + + noStateTimeoutConfig := []byte(`{"frontendURL": "apollo.test.specs.cloud","masterHost": "apollo.test.specs.cloud","slave": false, "masterPort": "31400", "playerCount": "2", "connectTimeout": "2s", "computationTimeout": "3s"}`) + err = ioutil.WriteFile(path, noStateTimeoutConfig, 0644) + Expect(err).NotTo(HaveOccurred()) + _, err = ParseConfig(path) + Expect(err).To(HaveOccurred()) + + noConnectTimeoutConfig := []byte(`{"frontendURL": "apollo.test.specs.cloud","masterHost": "apollo.test.specs.cloud","slave": false, "masterPort": "31400", "playerCount": "2", "stateTimeout": "2s", "computationTimeout": "3s"}`) + err = ioutil.WriteFile(path, noConnectTimeoutConfig, 0644) + Expect(err).NotTo(HaveOccurred()) + _, err = ParseConfig(path) + Expect(err).To(HaveOccurred()) }) - Context("when port|busSize|portRange|configLocation are not defined", func() { - It("sets the default values", func() { - conf := &DiscoveryConfig{} - SetDefaults(conf) - Expect(conf.Port).To(Equal(DefaultPort)) - Expect(conf.BusSize).To(Equal(DefaultBusSize)) - Expect(conf.PortRange).To(Equal(DefaultPortRange)) - }) + }) + Context("when port|busSize|portRange|configLocation are not defined", func() { + It("sets the default values", func() { + conf := &DiscoveryTypedConfig{} + SetDefaults(conf) + Expect(conf.Port).To(Equal(DefaultPort)) + Expect(conf.BusSize).To(Equal(DefaultBusSize)) + Expect(conf.PortRange).To(Equal(DefaultPortRange)) }) }) Context("when initializing the gRPC server", func() { @@ -178,38 +222,4 @@ var _ = Describe("Main", func() { }) }) }) - - Context("when getting the stateTimeout", func() { - Context("no stateTimeout was provided in the config", func() { - It("will use the defaultStateTimeout", func() { - var config = &DiscoveryConfig{} - timeout, err := getStateTimeout(config) - Expect(err).NotTo(HaveOccurred()) - Expect(timeout).To(Equal(defaultStateTimeout)) - }) - }) - Context("an empty stateTimeout was provided in the config", func() { - It("will use the defaultStateTimeout", func() { - var config = &DiscoveryConfig{StateTimeout: ""} - timeout, err := getStateTimeout(config) - Expect(err).NotTo(HaveOccurred()) - Expect(timeout).To(Equal(defaultStateTimeout)) - }) - }) - Context("a valid stateTimeout was provided in the config", func() { - It("will use the provided stateTimeout", func() { - var config = &DiscoveryConfig{StateTimeout: "5m"} - timeout, err := getStateTimeout(config) - Expect(err).NotTo(HaveOccurred()) - Expect(timeout).To(Equal(5 * time.Minute)) - }) - }) - Context("an invalid stateTimeout was provided in the config", func() { - It("will return an error", func() { - var config = &DiscoveryConfig{StateTimeout: "invalid"} - _, err := getStateTimeout(config) - Expect(err).To(HaveOccurred()) - }) - }) - }) }) diff --git a/cmd/ephemeral/main.go b/cmd/ephemeral/main.go index 9cfe2da6..603d8140 100644 --- a/cmd/ephemeral/main.go +++ b/cmd/ephemeral/main.go @@ -1,4 +1,4 @@ -// Copyright (c) 2021 - for information on the respective copyright owner +// Copyright (c) 2021-2023 - for information on the respective copyright owner // see the NOTICE file and/or the repository https://github.com/carbynestack/ephemeral. // // SPDX-License-Identifier: Apache-2.0 @@ -90,10 +90,6 @@ func ParseConfig(path string) (*SPDZEngineConfig, error) { // InitTypedConfig converts the string parameters that were parsed by standard json parser to // the parameters which are used internally, e.g. string -> time.Duration. func InitTypedConfig(conf *SPDZEngineConfig) (*SPDZEngineTypedConfig, error) { - retryTimeout, err := time.ParseDuration(conf.RetryTimeout) - if err != nil { - return nil, err - } retrySleep, err := time.ParseDuration(conf.RetrySleep) if err != nil { return nil, err @@ -111,6 +107,22 @@ func InitTypedConfig(conf *SPDZEngineConfig) (*SPDZEngineTypedConfig, error) { if !ok { return nil, errors.New("wrong gfpMacKey format") } + stateTimeout, err := time.ParseDuration(conf.StateTimeout) + if err != nil { + return nil, err + } + computationTimeout, err := time.ParseDuration(conf.ComputationTimeout) + if err != nil { + return nil, err + } + connectTimeout, err := time.ParseDuration(conf.DiscoveryConfig.ConnectTimeout) + if err != nil { + return nil, err + } + networkEstablishTimeout, err := time.ParseDuration(conf.NetworkEstablishTimeout) + if err != nil { + return nil, err + } amphoraURL := url.URL{ Host: conf.AmphoraConfig.Host, @@ -133,22 +145,28 @@ func InitTypedConfig(conf *SPDZEngineConfig) (*SPDZEngineTypedConfig, error) { } return &SPDZEngineTypedConfig{ - RetryTimeout: retryTimeout, - RetrySleep: retrySleep, - Prime: p, - RInv: rInv, - GfpMacKey: gfpMacKey, - Gf2nMacKey: conf.Gf2nMacKey, - Gf2nBitLength: conf.Gf2nBitLength, - Gf2nStorageSize: conf.Gf2nStorageSize, - PrepFolder: conf.PrepFolder, - AmphoraClient: amphoraClient, - CastorClient: castorClient, - TupleStock: conf.CastorConfig.TupleStock, - PlayerID: conf.PlayerID, - PlayerCount: conf.PlayerCount, - FrontendURL: conf.FrontendURL, - MaxBulkSize: conf.MaxBulkSize, - DiscoveryAddress: conf.DiscoveryAddress, + NetworkEstablishTimeout: networkEstablishTimeout, + RetrySleep: retrySleep, + Prime: p, + RInv: rInv, + GfpMacKey: gfpMacKey, + Gf2nMacKey: conf.Gf2nMacKey, + Gf2nBitLength: conf.Gf2nBitLength, + Gf2nStorageSize: conf.Gf2nStorageSize, + PrepFolder: conf.PrepFolder, + AmphoraClient: amphoraClient, + CastorClient: castorClient, + TupleStock: conf.CastorConfig.TupleStock, + PlayerID: conf.PlayerID, + PlayerCount: conf.PlayerCount, + FrontendURL: conf.FrontendURL, + MaxBulkSize: conf.MaxBulkSize, + DiscoveryConfig: DiscoveryClientTypedConfig{ + Host: conf.DiscoveryConfig.Host, + Port: conf.DiscoveryConfig.Port, + ConnectTimeout: connectTimeout, + }, + StateTimeout: stateTimeout, + ComputationTimeout: computationTimeout, }, nil } diff --git a/cmd/ephemeral/main_test.go b/cmd/ephemeral/main_test.go index 9b36bf7e..ace22e12 100644 --- a/cmd/ephemeral/main_test.go +++ b/cmd/ephemeral/main_test.go @@ -1,8 +1,8 @@ -// Copyright (c) 2021 - for information on the respective copyright owner +// Copyright (c) 2021-2023 - for information on the respective copyright owner // see the NOTICE file and/or the repository https://github.com/carbynestack/ephemeral. // // SPDX-License-Identifier: Apache-2.0 -package main_test +package main import ( "context" @@ -15,7 +15,6 @@ import ( . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" - . "github.com/carbynestack/ephemeral/cmd/ephemeral" . "github.com/carbynestack/ephemeral/pkg/types" "github.com/carbynestack/ephemeral/pkg/utils" @@ -51,7 +50,7 @@ var _ = Describe("Main", func() { data := []byte( `{ "retrySleep":"50ms", - "retryTimeout":"1m", + "networkEstablishTimeout":"1m", "prime":"p", "rInv":"r", "gfpMacKey":"gfpKey", @@ -72,7 +71,12 @@ var _ = Describe("Main", func() { "frontendURL":"apollo.test.specs.cloud", "playerID":0, "maxBulkSize":32000, - "discoveryAddress":"discovery.default.svc.cluster.local" + "discoveryConfig": { + "Host":"discovery.default.svc.cluster.local", + "Port":"8080", + "connectTimeout":"2s" + }, + "stateTimeout":"5s" }`) err := ioutil.WriteFile(path, data, 0644) Expect(err).NotTo(HaveOccurred()) @@ -103,13 +107,13 @@ var _ = Describe("Main", func() { Context("when initializing typed config", func() { It("succeeds when all parameters are specified", func() { conf := &SPDZEngineConfig{ - RetryTimeout: "2s", - RetrySleep: "1s", - Prime: "198766463529478683931867765928436695041", - RInv: "133854242216446749056083838363708373830", - GfpMacKey: "1113507028231509545156335486838233835", - Gf2nBitLength: 40, - Gf2nStorageSize: 8, + NetworkEstablishTimeout: "2s", + RetrySleep: "1s", + Prime: "198766463529478683931867765928436695041", + RInv: "133854242216446749056083838363708373830", + GfpMacKey: "1113507028231509545156335486838233835", + Gf2nBitLength: 40, + Gf2nStorageSize: 8, AmphoraConfig: AmphoraConfig{ Host: "localhost", Scheme: "http", @@ -120,17 +124,26 @@ var _ = Describe("Main", func() { Scheme: "http", Path: "castorPath", }, + DiscoveryConfig: DiscoveryClientConfig{ + Host: "localhost", + Port: "8080", + ConnectTimeout: "0s", + }, + StateTimeout: "5s", + ComputationTimeout: "10s", } typedConf, err := InitTypedConfig(conf) Expect(err).NotTo(HaveOccurred()) - Expect(typedConf.RetryTimeout).To(Equal(2 * time.Second)) + Expect(typedConf.NetworkEstablishTimeout).To(Equal(2 * time.Second)) Expect(typedConf.RetrySleep).To(Equal(1 * time.Second)) + Expect(typedConf.StateTimeout).To(Equal(5 * time.Second)) + Expect(typedConf.ComputationTimeout).To(Equal(10 * time.Second)) }) Context("when non-valid parameters are specified", func() { Context("retry timeout format is corrupt", func() { It("returns an error", func() { conf := &SPDZEngineConfig{ - RetryTimeout: "2", + NetworkEstablishTimeout: "2", } typedConf, err := InitTypedConfig(conf) Expect(err).To(HaveOccurred()) @@ -140,8 +153,8 @@ var _ = Describe("Main", func() { Context("retry sleep format is corrupt", func() { It("returns an error", func() { conf := &SPDZEngineConfig{ - RetryTimeout: "2s", - RetrySleep: "1", + NetworkEstablishTimeout: "2s", + RetrySleep: "1", } typedConf, err := InitTypedConfig(conf) Expect(err).To(HaveOccurred()) @@ -151,9 +164,9 @@ var _ = Describe("Main", func() { Context("prime number is not specified", func() { It("returns an error", func() { conf := &SPDZEngineConfig{ - RetryTimeout: "2s", - RetrySleep: "1s", - Prime: "", + NetworkEstablishTimeout: "2s", + RetrySleep: "1s", + Prime: "", } typedConf, err := InitTypedConfig(conf) Expect(err).To(HaveOccurred()) @@ -164,10 +177,10 @@ var _ = Describe("Main", func() { Context("inverse R is not specified", func() { It("returns an error", func() { conf := &SPDZEngineConfig{ - RetryTimeout: "2s", - RetrySleep: "1s", - Prime: "123", - RInv: "", + NetworkEstablishTimeout: "2s", + RetrySleep: "1s", + Prime: "123", + RInv: "", } typedConf, err := InitTypedConfig(conf) Expect(err).To(HaveOccurred()) @@ -178,11 +191,11 @@ var _ = Describe("Main", func() { Context("gfpMacKey is not specified", func() { It("returns an error", func() { conf := &SPDZEngineConfig{ - RetryTimeout: "2s", - RetrySleep: "1s", - Prime: "123", - RInv: "123", - GfpMacKey: "", + NetworkEstablishTimeout: "2s", + RetrySleep: "1s", + Prime: "123", + RInv: "123", + GfpMacKey: "", } typedConf, err := InitTypedConfig(conf) Expect(err).To(HaveOccurred()) @@ -193,13 +206,13 @@ var _ = Describe("Main", func() { Context("amphora URL is not specified", func() { It("returns an error", func() { conf := &SPDZEngineConfig{ - RetryTimeout: "2s", - RetrySleep: "1s", - Prime: "123", - RInv: "123", - GfpMacKey: "123", - Gf2nBitLength: 40, - Gf2nStorageSize: 8, + NetworkEstablishTimeout: "2s", + RetrySleep: "1s", + Prime: "123", + RInv: "123", + GfpMacKey: "123", + Gf2nBitLength: 40, + Gf2nStorageSize: 8, AmphoraConfig: AmphoraConfig{ Host: "", }, @@ -209,6 +222,13 @@ var _ = Describe("Main", func() { Path: "castorPath", TupleStock: 1000, }, + DiscoveryConfig: DiscoveryClientConfig{ + Host: "localhost", + Port: "8080", + ConnectTimeout: "0s", + }, + StateTimeout: "0s", + ComputationTimeout: "0s", } typedConf, err := InitTypedConfig(conf) Expect(err).To(HaveOccurred()) @@ -219,13 +239,13 @@ var _ = Describe("Main", func() { Context("amphora scheme is not specified", func() { It("returns an error", func() { conf := &SPDZEngineConfig{ - RetryTimeout: "2s", - RetrySleep: "1s", - Prime: "123", - RInv: "123", - GfpMacKey: "123", - Gf2nBitLength: 40, - Gf2nStorageSize: 8, + NetworkEstablishTimeout: "2s", + RetrySleep: "1s", + Prime: "123", + RInv: "123", + GfpMacKey: "123", + Gf2nBitLength: 40, + Gf2nStorageSize: 8, AmphoraConfig: AmphoraConfig{ Host: "localhost", Scheme: "", @@ -236,6 +256,13 @@ var _ = Describe("Main", func() { Path: "castorPath", TupleStock: 1000, }, + DiscoveryConfig: DiscoveryClientConfig{ + Host: "localhost", + Port: "8080", + ConnectTimeout: "0s", + }, + StateTimeout: "0s", + ComputationTimeout: "0s", } typedConf, err := InitTypedConfig(conf) Expect(err).To(HaveOccurred()) @@ -246,13 +273,13 @@ var _ = Describe("Main", func() { Context("castor URL is not specified", func() { It("returns an error", func() { conf := &SPDZEngineConfig{ - RetryTimeout: "2s", - RetrySleep: "1s", - Prime: "123", - RInv: "123", - GfpMacKey: "123", - Gf2nBitLength: 40, - Gf2nStorageSize: 8, + NetworkEstablishTimeout: "2s", + RetrySleep: "1s", + Prime: "123", + RInv: "123", + GfpMacKey: "123", + Gf2nBitLength: 40, + Gf2nStorageSize: 8, AmphoraConfig: AmphoraConfig{ Host: "localhost", Scheme: "http", @@ -261,6 +288,13 @@ var _ = Describe("Main", func() { CastorConfig: CastorConfig{ Host: "", }, + DiscoveryConfig: DiscoveryClientConfig{ + Host: "localhost", + Port: "8080", + ConnectTimeout: "0s", + }, + StateTimeout: "0s", + ComputationTimeout: "0s", } typedConf, err := InitTypedConfig(conf) Expect(err).To(HaveOccurred()) @@ -271,13 +305,13 @@ var _ = Describe("Main", func() { Context("castor scheme is not specified", func() { It("returns an error", func() { conf := &SPDZEngineConfig{ - RetryTimeout: "2s", - RetrySleep: "1s", - Prime: "123", - RInv: "123", - GfpMacKey: "123", - Gf2nBitLength: 40, - Gf2nStorageSize: 8, + NetworkEstablishTimeout: "2s", + RetrySleep: "1s", + Prime: "123", + RInv: "123", + GfpMacKey: "123", + Gf2nBitLength: 40, + Gf2nStorageSize: 8, AmphoraConfig: AmphoraConfig{ Host: "localhost", Scheme: "http", @@ -287,6 +321,13 @@ var _ = Describe("Main", func() { Host: "localhost", Scheme: "", }, + DiscoveryConfig: DiscoveryClientConfig{ + Host: "localhost", + Port: "8080", + ConnectTimeout: "0s", + }, + StateTimeout: "0s", + ComputationTimeout: "0s", } typedConf, err := InitTypedConfig(conf) Expect(err).To(HaveOccurred()) @@ -294,6 +335,141 @@ var _ = Describe("Main", func() { Expect(typedConf).To(BeNil()) }) }) + Context("stateTimeout format is corrupt", func() { + It("returns an error", func() { + conf := &SPDZEngineConfig{ + NetworkEstablishTimeout: "2s", + RetrySleep: "1s", + Prime: "198766463529478683931867765928436695041", + RInv: "133854242216446749056083838363708373830", + GfpMacKey: "1113507028231509545156335486838233835", + Gf2nBitLength: 40, + Gf2nStorageSize: 8, + AmphoraConfig: AmphoraConfig{ + Host: "localhost", + Scheme: "http", + Path: "amphoraPath", + }, + CastorConfig: CastorConfig{ + Host: "localhost", + Scheme: "http", + Path: "castorPath", + }, + DiscoveryConfig: DiscoveryClientConfig{ + Host: "localhost", + Port: "8080", + ConnectTimeout: "0s", + }, + StateTimeout: "corrupt", + } + typedConf, err := InitTypedConfig(conf) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(Equal("time: invalid duration corrupt")) + Expect(typedConf).To(BeNil()) + }) + }) + Context("discovery config's connect timeout format is corrupt", func() { + It("returns an error", func() { + conf := &SPDZEngineConfig{ + NetworkEstablishTimeout: "2s", + RetrySleep: "1s", + Prime: "198766463529478683931867765928436695041", + RInv: "133854242216446749056083838363708373830", + GfpMacKey: "1113507028231509545156335486838233835", + Gf2nBitLength: 40, + Gf2nStorageSize: 8, + AmphoraConfig: AmphoraConfig{ + Host: "localhost", + Scheme: "http", + Path: "amphoraPath", + }, + CastorConfig: CastorConfig{ + Host: "localhost", + Scheme: "http", + Path: "castorPath", + }, + DiscoveryConfig: DiscoveryClientConfig{ + Host: "localhost", + Port: "8080", + ConnectTimeout: "corrupt", + }, + StateTimeout: "0s", + ComputationTimeout: "0s", + } + typedConf, err := InitTypedConfig(conf) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(Equal("time: invalid duration corrupt")) + Expect(typedConf).To(BeNil()) + }) + }) + Context("computationTimeout format is corrupt", func() { + It("returns an error", func() { + conf := &SPDZEngineConfig{ + NetworkEstablishTimeout: "2s", + RetrySleep: "1s", + Prime: "198766463529478683931867765928436695041", + RInv: "133854242216446749056083838363708373830", + GfpMacKey: "1113507028231509545156335486838233835", + Gf2nBitLength: 40, + Gf2nStorageSize: 8, + AmphoraConfig: AmphoraConfig{ + Host: "localhost", + Scheme: "http", + Path: "amphoraPath", + }, + CastorConfig: CastorConfig{ + Host: "localhost", + Scheme: "http", + Path: "castorPath", + }, + DiscoveryConfig: DiscoveryClientConfig{ + Host: "localhost", + Port: "8080", + ConnectTimeout: "0s", + }, + StateTimeout: "0s", + ComputationTimeout: "corrupt", + } + typedConf, err := InitTypedConfig(conf) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(Equal("time: invalid duration corrupt")) + Expect(typedConf).To(BeNil()) + }) + }) + Context("networkEstablishTimeout format is corrupt", func() { + It("returns an error", func() { + conf := &SPDZEngineConfig{ + NetworkEstablishTimeout: "corrupt", + RetrySleep: "1s", + Prime: "198766463529478683931867765928436695041", + RInv: "133854242216446749056083838363708373830", + GfpMacKey: "1113507028231509545156335486838233835", + Gf2nBitLength: 40, + Gf2nStorageSize: 8, + AmphoraConfig: AmphoraConfig{ + Host: "localhost", + Scheme: "http", + Path: "amphoraPath", + }, + CastorConfig: CastorConfig{ + Host: "localhost", + Scheme: "http", + Path: "castorPath", + }, + DiscoveryConfig: DiscoveryClientConfig{ + Host: "localhost", + Port: "8080", + ConnectTimeout: "0s", + }, + StateTimeout: "0s", + ComputationTimeout: "0s", + } + typedConf, err := InitTypedConfig(conf) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(Equal("time: invalid duration corrupt")) + Expect(typedConf).To(BeNil()) + }) + }) }) }) }) @@ -304,16 +480,16 @@ var _ = Describe("Main", func() { defer os.RemoveAll(tmpPrepDir) logger := zap.NewNop().Sugar() conf := &SPDZEngineConfig{ - RetryTimeout: "2s", - RetrySleep: "1s", - Prime: "198766463529478683931867765928436695041", - RInv: "133854242216446749056083838363708373830", - GfpMacKey: "1113507028231509545156335486838233835", - Gf2nMacKey: "0xb660b323e6", - Gf2nBitLength: 40, - Gf2nStorageSize: 8, - PlayerCount: 2, - PrepFolder: tmpPrepDir, + NetworkEstablishTimeout: "2s", + RetrySleep: "1s", + Prime: "198766463529478683931867765928436695041", + RInv: "133854242216446749056083838363708373830", + GfpMacKey: "1113507028231509545156335486838233835", + Gf2nMacKey: "0xb660b323e6", + Gf2nBitLength: 40, + Gf2nStorageSize: 8, + PlayerCount: 2, + PrepFolder: tmpPrepDir, AmphoraConfig: AmphoraConfig{ Host: "localhost", Scheme: "http", @@ -325,23 +501,30 @@ var _ = Describe("Main", func() { Path: "castorPath", TupleStock: 1000, }, + DiscoveryConfig: DiscoveryClientConfig{ + Host: "localhost", + Port: "8080", + ConnectTimeout: "0s", + }, + StateTimeout: "0s", + ComputationTimeout: "0s", } handler, err := GetHandlerChain(conf, logger) Expect(err).NotTo(HaveOccurred()) Expect(handler).NotTo(BeNil()) }) }) - Context("when an error in config convertion happens", func() { + Context("when an error in config conversion happens", func() { It("is returned", func() { logger := zap.NewNop().Sugar() conf := &SPDZEngineConfig{ - RetryTimeout: "2s", - RetrySleep: "1s", - Prime: "198766463529478683931867765928436695041", - RInv: "133854242216446749056083838363708373830", - GfpMacKey: "1113507028231509545156335486838233835", - Gf2nBitLength: 40, - Gf2nStorageSize: 8, + NetworkEstablishTimeout: "2s", + RetrySleep: "1s", + Prime: "198766463529478683931867765928436695041", + RInv: "133854242216446749056083838363708373830", + GfpMacKey: "1113507028231509545156335486838233835", + Gf2nBitLength: 40, + Gf2nStorageSize: 8, // an empty amphora config is given to provoke an error. AmphoraConfig: AmphoraConfig{}, CastorConfig: CastorConfig{ @@ -350,6 +533,13 @@ var _ = Describe("Main", func() { Path: "castorPath", TupleStock: 1000, }, + DiscoveryConfig: DiscoveryClientConfig{ + Host: "localhost", + Port: "8080", + ConnectTimeout: "0s", + }, + StateTimeout: "0s", + ComputationTimeout: "0s", } handler, err := GetHandlerChain(conf, logger) Expect(err).To(HaveOccurred()) diff --git a/pkg/discovery/discovery.go b/pkg/discovery/discovery.go index 1e85a250..6199282b 100644 --- a/pkg/discovery/discovery.go +++ b/pkg/discovery/discovery.go @@ -1,4 +1,4 @@ -// Copyright (c) 2021 - for information on the respective copyright owner +// Copyright (c) 2021-2023 - for information on the respective copyright owner // see the NOTICE file and/or the repository https://github.com/carbynestack/ephemeral. // // SPDX-License-Identifier: Apache-2.0 @@ -39,7 +39,7 @@ type Event struct { type PlayerID int32 // NewServiceNG returns a new instance of discovery service. -func NewServiceNG(bus mb.MessageBus, pub *Publisher, timeout time.Duration, tr t.Transport, n Networker, frontendAddress string, logger *zap.SugaredLogger, mode string, client DiscoveryClient, playerCount int) *ServiceNG { +func NewServiceNG(bus mb.MessageBus, pub *Publisher, stateTimeout time.Duration, computationTimeout time.Duration, tr t.Transport, n Networker, frontendAddress string, logger *zap.SugaredLogger, mode string, client DiscoveryClient, playerCount int) *ServiceNG { games := map[string]*Game{} players := map[string]map[PlayerID]*pb.Player{} pods := map[string]int32{} @@ -50,7 +50,8 @@ func NewServiceNG(bus mb.MessageBus, pub *Publisher, timeout time.Duration, tr t games: games, errCh: errCh, pb: pub, - timeout: timeout, + stateTimeout: stateTimeout, + computationTimeout: computationTimeout, transport: tr, players: players, playerCount: playerCount, @@ -76,7 +77,8 @@ type ServiceNG struct { networks map[string]int32 mux sync.Mutex errCh chan error - timeout time.Duration + stateTimeout time.Duration + computationTimeout time.Duration transport t.Transport networker Networker homeFrontendAddress string @@ -156,6 +158,7 @@ func (s *ServiceNG) writeToWire() { outCh := s.transport.GetOut() s.bus.Subscribe(ClientOutgoingEventsTopic, func(e interface{}) { ev := e.(*pb.Event) + s.logger.Debugw("Forwarding message from wire to clients", "Event", ev) // TODO: do not broadcast to all current games. outCh <- ev }) @@ -186,9 +189,11 @@ func (s *ServiceNG) registerPlayer(pl *pb.Player, gameID string) error { // Set the port of the player every time this message is called. pl.Port = s.networks[pl.Pod] }() + s.logger.Debug("Register PLayer", "player", pl, "gameId", gameID) p, ok := s.players[gameID] // Create a new map for the GameID if !ok { + s.logger.Debug("Create new Player map") players := map[PlayerID]*pb.Player{} s.players[gameID] = players } @@ -203,9 +208,10 @@ func (s *ServiceNG) registerPlayer(pl *pb.Player, gameID string) error { // Create a new network if it doesn't exist yet. _, ok = s.networks[pl.Pod] if !ok { + s.logger.Debug("Create new network") port, err := s.createNetwork(pl) if err != nil { - s.logger.Errorf("error creating network %v", err) + s.logger.Errorf("Error creating network %v", err) return err } s.networks[pl.Pod] = port @@ -224,7 +230,7 @@ func (s *ServiceNG) createNetwork(pl *pb.Player) (int32, error) { } return port, err } - s.logger.Debug("Do not create the network for the foreign player.") + s.logger.Debug("Do not create the network for the foreign player") return pl.Port, nil } @@ -249,11 +255,11 @@ func (s *ServiceNG) processIn(e interface{}) { s.registerPlayer(player, ev.GameID) g, ok := s.games[ev.GameID] if !ok { // If game does not exist, create it - g, err := NewGame(ctx, ev.GameID, s.bus, s.timeout, s.logger, s.playerCount) + g, err := NewGame(ctx, ev.GameID, s.bus, s.stateTimeout, s.computationTimeout, s.logger, s.playerCount) if err != nil { s.errCh <- err } - gameErrCh := make(chan error) + gameErrCh := make(chan error, 1) go func() { // Do not propagate this error to the client. // Since should not be related to the client code, but would indicate a bug in the Game FSM. diff --git a/pkg/discovery/discovery_test.go b/pkg/discovery/discovery_test.go index d9f42a2a..9d6d8f54 100644 --- a/pkg/discovery/discovery_test.go +++ b/pkg/discovery/discovery_test.go @@ -1,4 +1,4 @@ -// Copyright (c) 2021 - for information on the respective copyright owner +// Copyright (c) 2021-2023 - for information on the respective copyright owner // see the NOTICE file and/or the repository https://github.com/carbynestack/ephemeral. // // SPDX-License-Identifier: Apache-2.0 @@ -28,17 +28,18 @@ var _ = Describe("DiscoveryNG", func() { func generateDiscoveryNGTestsWithPlayerCount(playerCount int) { var ( - bus mb.MessageBus - timeout = 1 * time.Second - done chan struct{} - pb *Publisher - s *ServiceNG - g *GamesWithBus - stateTimeout time.Duration - tr t.Transport - n *FakeNetworker - frontendAddress string - logger = zap.NewNop().Sugar() + bus mb.MessageBus + timeout = 1 * time.Second + done chan struct{} + pb *Publisher + s *ServiceNG + g *GamesWithBus + stateTimeout time.Duration + computationTimeout time.Duration + tr t.Transport + n *FakeNetworker + frontendAddress string + logger = zap.NewNop().Sugar() ) BeforeEach(func() { @@ -49,6 +50,7 @@ func generateDiscoveryNGTestsWithPlayerCount(playerCount int) { Fsm: &fsm.FSM{}, } stateTimeout = 10 * time.Second + computationTimeout = 20 * time.Second tr = &FakeTransport{} n = &FakeNetworker{ FreePorts: make([]int32, playerCount), @@ -59,7 +61,7 @@ func generateDiscoveryNGTestsWithPlayerCount(playerCount int) { frontendAddress = "192.168.0.1" conf := &FakeDClient{} - s = NewServiceNG(bus, pb, stateTimeout, tr, n, frontendAddress, logger, ModeMaster, conf, playerCount) + s = NewServiceNG(bus, pb, stateTimeout, computationTimeout, tr, n, frontendAddress, logger, ModeMaster, conf, playerCount) g = &GamesWithBus{ Games: s.games, Bus: bus, @@ -212,7 +214,7 @@ func generateDiscoveryNGTestsWithPlayerCount(playerCount int) { assertExternalEvent(gameError[0], ClientOutgoingEventsTopic, g, done, func(states []string) {}) assertExternalEvent(gameError[1], ClientOutgoingEventsTopic, g, done, func(states []string) {}) // Make state timeout smaller to cause the error. - s.timeout = 100 * time.Millisecond + s.stateTimeout = 100 * time.Millisecond go s.Start() s.WaitUntilReady(timeout) pb.PublishExternalEvent(ready[0], ClientIncomingEventsTopic) diff --git a/pkg/discovery/fsm/fsm.go b/pkg/discovery/fsm/fsm.go index 013e4252..b8d50096 100644 --- a/pkg/discovery/fsm/fsm.go +++ b/pkg/discovery/fsm/fsm.go @@ -1,4 +1,4 @@ -// Copyright (c) 2021 - for information on the respective copyright owner +// Copyright (c) 2021-2023 - for information on the respective copyright owner // see the NOTICE file and/or the repository https://github.com/carbynestack/ephemeral. // // SPDX-License-Identifier: Apache-2.0 @@ -20,9 +20,9 @@ const ( ) // NewFSM returns a new finate state machine. -func NewFSM(ctx context.Context, initState string, trn map[TransitionID]*Transition, cb map[string][]*Callback, timeout time.Duration, logger *zap.SugaredLogger) (*FSM, error) { +func NewFSM(ctx context.Context, initState string, trn map[TransitionID]*Transition, cb map[string][]*Callback, stateTimeout time.Duration, logger *zap.SugaredLogger) (*FSM, error) { var stateTimeoutCb *Callback - timer := time.NewTimer(timeout) + timer := time.NewTimer(stateTimeout) beforeCallbacks := make(map[string][]*Callback) afterCallbacks := make(map[string][]*Callback) for k, c := range cb { @@ -52,7 +52,7 @@ func NewFSM(ctx context.Context, initState string, trn map[TransitionID]*Transit history: history, stateTimeoutCallback: stateTimeoutCb, timer: timer, - timeout: timeout, + stateTimeout: stateTimeout, pingCh: make(chan struct{}), doneCh: make(chan struct{}, 1), queue: []*Event{}, @@ -74,7 +74,7 @@ type FSM struct { pingCh chan struct{} doneCh chan struct{} timer *time.Timer - timeout time.Duration + stateTimeout time.Duration queue []*Event logger *zap.SugaredLogger mux sync.Mutex @@ -107,17 +107,11 @@ func (f *FSM) Current() string { // The error is caused either by an unregistered event or by the callback itself. // If the FSM was stopped its state is updated, the timer is stopped and the error channel is closed. // The method is blocking and must be started exactly once. +// +// `errChan` is expected to be a buffered channel with minimum capacity of "1". func (f *FSM) Run(errChan chan error) { for { select { - case <-f.pingCh: - if err := f.process(); err != nil { - f.current = Stopped - errChan <- err - return - } - case <-f.timer.C: - f.stateTimeoutCallback.Action(f.stateTimeoutEvent()) case <-f.ctx.Done(): f.current = Stopped f.timer.Stop() @@ -126,6 +120,22 @@ func (f *FSM) Run(errChan chan error) { f.current = Stopped f.timer.Stop() return + case <-f.pingCh: + if err := f.process(); err != nil { + f.current = Stopped + select { + case errChan <- err: + default: + // The ErrCh is a buffered channel potentially shared by multiple subroutines. Any error written to + // the channel indicates that the current procedure has failed. + // While the "root" error is sufficient to indicate that the routine failed, it may cause further + // errors in other routines. If write to ErrCh fails, err is classified as a consequent error. In + // this case, "err" is discarded to prevent the routine from blocking. + } + return + } + case <-f.timer.C: + f.stateTimeoutCallback.Action(f.stateTimeoutEvent()) } } @@ -146,15 +156,16 @@ func (f *FSM) process() error { return errors.New("the number of events is out of sync with received pings") } event := f.queue[0] + f.logger.Debugf("FSM process event %v", event) f.queue = f.queue[1:] f.history.AddEvent(event) trID := TransitionID{ Source: f.current, Event: event.Name, } - // Specific state transition superceeds the general one, e.g. + // Specific state transition supersedes the general one, e.g. // if there is a transition with a specified source state it is followed, - // otherwise a transition mathing any state "*" is specified. + // otherwise a transition matching any state "*" is specified. tr, ok := f.transitions[trID] if !ok { trID = TransitionID{ @@ -191,7 +202,12 @@ func (f *FSM) doTransition(tr *Transition, event *Event) error { if !f.timer.Stop() && len(f.timer.C) > 0 { <-f.timer.C } - f.timer.Reset(f.timeout) + timeout := f.stateTimeout + // Specific state timeout overrides the fsm's default state timeout. + if tr.Timeout > 0 { + timeout = tr.Timeout + } + f.timer.Reset(timeout) // Run callbacks after state transition. err = f.runCallbackIfExists(f.afterCallbacks, f.current, event) if err != nil { @@ -206,6 +222,7 @@ func (f *FSM) runCallbackIfExists(callbacks map[string][]*Callback, state string callbacksBySource, ok := callbacks[state] if ok { for _, cb := range callbacksBySource { + f.logger.Debugw(fmt.Sprintf("Execute Callback %s", cb.Type), "state", state, "event", event) err := cb.Action(event) if err != nil { return err @@ -308,6 +325,7 @@ type TransitionID struct { type Transition struct { ID TransitionID Event, Src, Dst string + Timeout time.Duration } // WhenIn specifies the source state of the transition. @@ -342,6 +360,12 @@ func (i *Transition) Stay() *Transition { return i } +// WithTimeout defines an individual timeout within the transition to the next state is expected. +func (i *Transition) WithTimeout(d time.Duration) *Transition { + i.Timeout = d + return i +} + // Action is a user defined function executed in the callback. type Action func(interface{}) error diff --git a/pkg/discovery/fsm/fsm_test.go b/pkg/discovery/fsm/fsm_test.go index 61333aa8..fbcfbdd2 100644 --- a/pkg/discovery/fsm/fsm_test.go +++ b/pkg/discovery/fsm/fsm_test.go @@ -127,6 +127,35 @@ var _ = Describe("FSM", func() { }) }) + Context("when individual state timeout is set", func() { + It("overrides default timeout and transitions to another state when the timeout is reached", func() { + respCh := make(chan string) + respond := func(interface{}) error { + respCh <- "timeout" + return nil + } + tr := WhenIn("Init"). + GotEvent("StartTest").GoTo("AwaitTimeout").WithTimeout(5 * time.Millisecond) + cb := WhenStateTimeout().Do(respond) + callbacks := map[string][]*Callback{cb.Src: {cb}} + transitions := map[TransitionID]*Transition{tr.ID: tr} + timeout := 1 * time.Hour + fsm, _ := NewFSM(ctx, "Init", transitions, callbacks, timeout, logger) + go fsm.Run(errChan) + fsm.Write(&Event{ + Name: "StartTest", + Meta: &Metadata{FSM: fsm}, + }) + var resp string + select { + case resp = <-respCh: + case <-time.After(2 * time.Second): + Fail("timeout exceeded - individual timeout not triggered") + } + Expect(resp).To(Equal("timeout")) + }) + }) + Context("when staying the same state", func() { It("executes registered callbacks for the state", func() { respCh := make(chan string) diff --git a/pkg/discovery/game.go b/pkg/discovery/game.go index ca0c5e78..625d3d4d 100644 --- a/pkg/discovery/game.go +++ b/pkg/discovery/game.go @@ -1,4 +1,4 @@ -// Copyright (c) 2021 - for information on the respective copyright owner +// Copyright (c) 2021-2023 - for information on the respective copyright owner // see the NOTICE file and/or the repository https://github.com/carbynestack/ephemeral. // // SPDX-License-Identifier: Apache-2.0 @@ -24,14 +24,15 @@ type FSMWithBus interface { // Game is a single execution of MPC. type Game struct { - id string - fsm *fsm.FSM - bus mb.MessageBus - pb *Publisher - logger *zap.SugaredLogger + id string + fsm *fsm.FSM + bus mb.MessageBus + pb *Publisher } // Init starts the fsm of the Game with its initial state. +// +// `errChan` is expected to be a buffered channel with minimum capacity of "1". func (g *Game) Init(errCh chan error) { // TODO: Think of another option how to assign the fsm to the publisher. g.pb.Fsm = g.fsm @@ -49,13 +50,14 @@ func (g *Game) Bus() mb.MessageBus { } // NewGame returns an instance of Game. -func NewGame(ctx context.Context, id string, bus mb.MessageBus, timeout time.Duration, logger *zap.SugaredLogger, playerCount int) (*Game, error) { +func NewGame(ctx context.Context, id string, bus mb.MessageBus, stateTimeout time.Duration, computationTimeout time.Duration, logger *zap.SugaredLogger, playerCount int) (*Game, error) { publisher := &Publisher{ Bus: bus, } callbacker := GameCallbacker{ pb: publisher, gameID: id, + logger: logger.With("gameID", id), } cb := []*fsm.Callback{ fsm.AfterEnter(WaitPlayersReady).Do(callbacker.sendRegistered()), @@ -71,7 +73,7 @@ func NewGame(ctx context.Context, id string, bus mb.MessageBus, timeout time.Dur fsm.WhenIn(WaitPlayersReady).GotEvent(PlayerReady).Stay(), fsm.WhenIn(WaitPlayersReady).GotEvent(PlayersReady).GoTo(WaitTCPCheck), fsm.WhenIn(WaitTCPCheck).GotEvent(TCPCheckSuccess).Stay(), - fsm.WhenIn(WaitTCPCheck).GotEvent(TCPCheckSuccessAll).GoTo(Playing), + fsm.WhenIn(WaitTCPCheck).GotEvent(TCPCheckSuccessAll).GoTo(Playing).WithTimeout(computationTimeout), fsm.WhenIn(WaitTCPCheck).GotEvent(TCPCheckFailure).GoTo(GameError), fsm.WhenIn(Playing).GotEvent(GameFinishedWithSuccess).Stay(), fsm.WhenIn(Playing).GotEvent(GameFinishedWithError).GoTo(GameError), @@ -81,7 +83,7 @@ func NewGame(ctx context.Context, id string, bus mb.MessageBus, timeout time.Dur fsm.WhenInAnyState().GotEvent(GameDone).GoTo(GameDone), } callbacks, transitions := fsm.InitCallbacksAndTransitions(cb, trs) - f, err := fsm.NewFSM(ctx, Init, transitions, callbacks, timeout, logger) + f, err := fsm.NewFSM(ctx, Init, transitions, callbacks, stateTimeout, logger) if err != nil { return nil, err } @@ -104,12 +106,14 @@ func NewGame(ctx context.Context, id string, bus mb.MessageBus, timeout time.Dur type GameCallbacker struct { pb *Publisher gameID string + logger *zap.SugaredLogger } // sendRegistered notifies the client that it was registered for the game. func (c *GameCallbacker) sendRegistered() func(e interface{}) error { return func(e interface{}) error { meta := e.(*fsm.Event).Meta + c.logger.Debugw("Client registered", "meta", meta) c.pb.Publish(Registered, ServiceEventsTopic, meta.TargetTopic) return nil } @@ -119,6 +123,7 @@ func (c *GameCallbacker) sendRegistered() func(e interface{}) error { func (c *GameCallbacker) gameDone() func(e interface{}) error { return func(e interface{}) error { meta := e.(*fsm.Event).Meta + c.logger.Debugw("Game done", "meta", meta) c.pb.Publish(GameDone, ServiceEventsTopic, meta.TargetTopic) meta.FSM.Stop() return nil @@ -129,6 +134,11 @@ func (c *GameCallbacker) gameDone() func(e interface{}) error { func (c *GameCallbacker) gameError() func(e interface{}) error { return func(e interface{}) error { meta := e.(*fsm.Event).Meta + var history *fsm.History + if meta.FSM != nil { + history = meta.FSM.History() + } + c.logger.Debugw("Game failed", "meta", meta, "event history", history) c.pb.Publish(GameError, DiscoveryTopic, meta.TargetTopic) c.pb.Publish(GameDone, c.gameID) return nil @@ -138,6 +148,7 @@ func (c *GameCallbacker) gameError() func(e interface{}) error { // stateTimeout sends out a StateTimeoutError. func (c *GameCallbacker) stateTimeout() func(e interface{}) error { return func(e interface{}) error { + c.logger.Debug("Send state timeout") c.pb.Publish(StateTimeoutError, c.gameID) return nil } @@ -147,6 +158,7 @@ func (c *GameCallbacker) stateTimeout() func(e interface{}) error { // And if it is the case, it sends out the state "out" to discovery and to itself. func (c *GameCallbacker) checkSomethingReady(players int, in string, out string) func(e interface{}) error { return func(e interface{}) error { + c.logger.Debugw("Check readiness", "Players", players, "Event", in) meta := e.(*fsm.Event).Meta f := meta.FSM if f == nil { @@ -156,6 +168,7 @@ func (c *GameCallbacker) checkSomethingReady(players int, in string, out string) events := f.History().GetEvents() readyPlayers := countEvents(events, in) if readyPlayers == players { + c.logger.Debugf("Players ready - sending message %v", out) // the targetTopic of previous event includes the game id we would need for further event forwarding. c.pb.Publish(out, DiscoveryTopic, meta.TargetTopic) c.pb.Publish(out, c.gameID) diff --git a/pkg/discovery/game_test.go b/pkg/discovery/game_test.go index cf8f607f..b68423be 100644 --- a/pkg/discovery/game_test.go +++ b/pkg/discovery/game_test.go @@ -1,4 +1,4 @@ -// Copyright (c) 2021 - for information on the respective copyright owner +// Copyright (c) 2021-2023 - for information on the respective copyright owner // see the NOTICE file and/or the repository https://github.com/carbynestack/ephemeral. // // SPDX-License-Identifier: Apache-2.0 @@ -38,7 +38,7 @@ func generateTestsWithPlayerCount(playerCount int) { bus = mb.New(10000) timeout = 10 * time.Second gameID = "71b2a100-f3f6-11e9-81b4-2a2ae2dbcce4" - game, _ = NewGame(ctx, gameID, bus, timeout, logger, playerCount) + game, _ = NewGame(ctx, gameID, bus, timeout, timeout, logger, playerCount) pb = Publisher{ Bus: bus, Fsm: game.fsm, @@ -155,7 +155,7 @@ func generateTestsWithPlayerCount(playerCount int) { Context("state timeout occurs", func() { It("transitions to the GameError state", func() { timeout := 10 * time.Millisecond - game, _ := NewGame(ctx, gameID, bus, timeout, logger, playerCount) + game, _ := NewGame(ctx, gameID, bus, timeout, timeout, logger, playerCount) // No player publishes an event, simulate a state timeout. Assert(GameDone, game, done, func(states []string) { Expect(states[0]).To(Equal(Init)) diff --git a/pkg/discovery/transport/client/client.go b/pkg/discovery/transport/client/client.go index d796c0c4..e7ebfe4f 100644 --- a/pkg/discovery/transport/client/client.go +++ b/pkg/discovery/transport/client/client.go @@ -1,4 +1,4 @@ -// Copyright (c) 2021 - for information on the respective copyright owner +// Copyright (c) 2021-2023 - for information on the respective copyright owner // see the NOTICE file and/or the repository https://github.com/carbynestack/ephemeral. // // SPDX-License-Identifier: Apache-2.0 @@ -23,7 +23,7 @@ type TransportClientConfig struct { // In, Out is the external interface for the libraries that would like to use this client. Events received from "In" are forwarded to the server. The responses are sent back to "Out" In, Out chan *pb.Event - // ErrCh is the sink for all errors from the client. + // ErrCh is the sink for all errors from the client. It is supposed to be a buffered channel with a minimum capacity of `1`. ErrCh chan error // Host, Port - the server endpoint to connect to. @@ -35,8 +35,8 @@ type TransportClientConfig struct { // ConnID is the ID of the connection. In case of pure discovery clients, it is equal the gameID. ConnID string - // Timeout is the gRPC dial timeout. - Timeout time.Duration + // ConnectTimeout is the gRPC dial timeout. + ConnectTimeout time.Duration Logger *zap.SugaredLogger @@ -99,19 +99,24 @@ func (c *Client) GetOut() chan *pb.Event { // Connect dials the server and returns a connection. func (c *Client) Connect() (*grpc.ClientConn, error) { - conn, err := grpc.Dial(c.conf.Host+":"+c.conf.Port, grpc.WithInsecure(), grpc.WithBlock(), grpc.WithTimeout(c.conf.Timeout)) + ctx, cancelConnect := context.WithTimeout(context.Background(), c.conf.ConnectTimeout) + defer cancelConnect() + conn, err := grpc.DialContext(ctx, c.conf.Host+":"+c.conf.Port, grpc.WithBlock(), grpc.WithInsecure()) if err != nil { - c.conf.Logger.Error("error establishing a gRPC connection") + c.conf.Logger.Errorf("Error establishing a gRPC connection: %v", err) return nil, err } c.conn = conn + c.conf.Logger.Debug("Client gRPC connection established") return conn, nil } -// Run starts forwarding of the events. It blocks until the gRPC channel is closed or an error occurs. +// Run starts forwarding of the events. The functionality is started as separate go routines which run until the given +// context is closed, or a communication error occurs. func (c *Client) Run(client pb.DiscoveryClient) { ctx := c.conf.Context ctx = metadata.AppendToOutgoingContext(ctx, ConnID, c.conf.ConnID, EventScope, c.conf.EventScope) + c.conf.Logger.Debug("Register client to events", ConnID, c.conf.ConnID, EventScope, c.conf.EventScope) stream, err := client.Events(ctx) if err != nil { c.conf.ErrCh <- err @@ -119,13 +124,25 @@ func (c *Client) Run(client pb.DiscoveryClient) { } c.stream = stream + go func() { + for { + select { + case <-ctx.Done(): + err := c.Stop() + if err != nil { + c.conf.Logger.Errorf("Error stopping gRPC client %v", err) + } + return + } + } + }() go c.streamIn() go c.streamOut() } // Stop closes the underlying gRPC stream and its TCP connection. func (c *Client) Stop() error { - c.conf.Logger.Debug("Stopping the gRPC client") + c.conf.Logger.Debug("Stopping client connection") err := c.stream.CloseSend() if err != nil { return err @@ -140,14 +157,25 @@ func (c *Client) Stop() error { func (c *Client) streamOut() error { for { select { + case <-c.conf.Context.Done(): + c.conf.Logger.Debug("Close the event forwarding as context is done") + return nil case ev := <-c.conf.Out: + c.conf.Logger.Debugf("Sending event %v", ev) err := c.stream.Send(ev) if err != nil { - c.conf.ErrCh <- err + c.conf.Logger.Errorf("Close the event forwarding as an error occurred: %v", err) + select { + case c.conf.ErrCh <- err: + default: + // The ErrCh is a buffered channel shared by multiple subroutines. Any error written to the channel + // indicates that the current procedure has failed. + // While the "root" error is sufficient to indicate that the routine failed, it may cause further + // errors in other routines. If write to ErrCh fails, err is classified as a consequent error. In + // this case, "err" is discarded to prevent the routine from blocking. + } return nil } - case <-c.conf.Context.Done(): - return nil } } } @@ -160,25 +188,32 @@ func (c *Client) streamIn() error { defer func() { err := c.Stop() if err != nil { - c.conf.Logger.Errorf("error stopping gRPC client %v", err) + c.conf.Logger.Errorf("Error stopping gRPC client %v", err) } }() for { + ev, err := c.stream.Recv() select { case <-c.conf.Context.Done(): - return nil - case <-c.stream.Context().Done(): - c.conf.Logger.Errorf("The gRPC stream was closed") + c.conf.Logger.Debugf("Stop receiiving events as context is done. (err: %v)", err) return nil default: - ev, err := c.stream.Recv() + c.conf.Logger.Debugf("Received event %v", ev) if err == io.EOF { - c.conf.Logger.Debug("server closed the connection") + c.conf.Logger.Debug("Server closed the connection") return nil } if err != nil { - c.conf.Logger.Errorf("error from the gRPC stream %s", err.Error()) - c.conf.ErrCh <- err + c.conf.Logger.Errorf("Error from the gRPC stream %s", err.Error()) + select { + case c.conf.ErrCh <- err: + default: + // The ErrCh is a buffered channel shared by multiple subroutines. Any error written to the channel + // indicates that the current procedure has failed. + // While the "root" error is sufficient to indicate that the routine failed, it may cause further + // errors in other routines. If write to ErrCh fails, err is classified as a consequent error. In + // this case, "err" is discarded to prevent the routine from blocking. + } return nil } c.conf.In <- ev diff --git a/pkg/discovery/transport/client/client_test.go b/pkg/discovery/transport/client/client_test.go index 08b07a7b..dc208ef9 100644 --- a/pkg/discovery/transport/client/client_test.go +++ b/pkg/discovery/transport/client/client_test.go @@ -1,4 +1,4 @@ -// Copyright (c) 2021 - for information on the respective copyright owner +// Copyright (c) 2021-2023 - for information on the respective copyright owner // see the NOTICE file and/or the repository https://github.com/carbynestack/ephemeral. // // SPDX-License-Identifier: Apache-2.0 @@ -8,16 +8,15 @@ import ( "context" pb "github.com/carbynestack/ephemeral/pkg/discovery/transport/proto" . "github.com/carbynestack/ephemeral/pkg/discovery/transport/server" - "time" - "go.uber.org/zap/zapcore" + "go.uber.org/zap/zaptest/observer" + "time" . "github.com/carbynestack/ephemeral/pkg/types" . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" "go.uber.org/zap" - "go.uber.org/zap/zaptest/observer" ) var _ = Describe("Client", func() { @@ -33,11 +32,11 @@ var _ = Describe("Client", func() { ) Context("when using the client", func() { BeforeEach(func() { - serverIn = make(chan *pb.Event) - serverOut = make(chan *pb.Event) - clientIn = make(chan *pb.Event) - clientOut = make(chan *pb.Event) - errCh = make(chan error) + serverIn = make(chan *pb.Event, 1) + serverOut = make(chan *pb.Event, 1) + clientIn = make(chan *pb.Event, 1) + clientOut = make(chan *pb.Event, 1) + errCh = make(chan error, 1) logger := zap.NewNop().Sugar() serverConf := &TransportConfig{ In: serverIn, @@ -49,16 +48,16 @@ var _ = Describe("Client", func() { gameID = "71b2a100-f3f6-11e9-81b4-2a2ae2dbcce4" tr = NewTransportServer(serverConf) conf := &TransportClientConfig{ - In: clientIn, - Out: clientOut, - ErrCh: errCh, - Host: "localhost", - Port: port, - EventScope: EventScopeAll, - ConnID: gameID, - Logger: logger, - Timeout: 10 * time.Second, - Context: context.TODO(), + In: clientIn, + Out: clientOut, + ErrCh: errCh, + Host: "localhost", + Port: port, + EventScope: EventScopeAll, + ConnID: gameID, + Logger: logger, + ConnectTimeout: 10 * time.Second, + Context: context.TODO(), } client, _ = NewClient(conf) }) @@ -151,19 +150,20 @@ var _ = Describe("Client", func() { cancel context.CancelFunc ) BeforeEach(func() { - outCh = make(chan *pb.Event) - errCh = make(chan error) + outCh = make(chan *pb.Event, 1) + errCh = make(chan error, 1) ctx, cancel = context.WithCancel(context.Background()) conf = &TransportClientConfig{ Out: outCh, ErrCh: errCh, Context: ctx, + Logger: zap.NewNop().Sugar(), } }) Context("when no error occurs", func() { It("sends an event to the stream", func() { st := &FakeStream{ - sendCh: make(chan struct{}), + sendCh: make(chan struct{}, 1), } cl := Client{ conf: conf, @@ -212,8 +212,8 @@ var _ = Describe("Client", func() { cancel context.CancelFunc ) BeforeEach(func() { - outCh = make(chan *pb.Event) - errCh = make(chan error) + outCh = make(chan *pb.Event, 1) + errCh = make(chan error, 1) ctx, cancel = context.WithCancel(context.Background()) conf = &TransportClientConfig{ Out: outCh, @@ -250,7 +250,7 @@ var _ = Describe("Client", func() { err := cl.streamIn() Expect(err).To(BeNil()) Expect(recorded.Len()).To(Equal(1)) - Expect(recorded.AllUntimed()[0].Entry.Message).To(Equal("error stopping gRPC client " + st.CloseSend().Error())) + Expect(recorded.AllUntimed()[0].Entry.Message).To(Equal("Error stopping gRPC client " + st.CloseSend().Error())) }) }) }) @@ -266,14 +266,14 @@ var _ = Describe("Client", func() { } err := cl.streamIn() Expect(err).To(BeNil()) - Expect(recorded.Len()).To(Equal(3)) - Expect(recorded.AllUntimed()[0].Entry.Message).To(Equal("server closed the connection")) + Expect(recorded.Len()).To(Equal(4)) + Expect(recorded.AllUntimed()[1].Entry.Message).To(Equal("Server closed the connection")) }) }) }) Context("when using client interfaces", func() { It("returns In channel", func() { - inCh := make(chan *pb.Event) + inCh := make(chan *pb.Event, 1) conf := &TransportClientConfig{ In: inCh, } @@ -283,7 +283,7 @@ var _ = Describe("Client", func() { Expect(cl.GetIn()).To(Equal(inCh)) }) It("returns Out channel", func() { - outCh := make(chan *pb.Event) + outCh := make(chan *pb.Event, 1) conf := &TransportClientConfig{ Out: outCh, } @@ -296,8 +296,8 @@ var _ = Describe("Client", func() { Context("when establishing a connection fails", func() { It("sends a message to the error channel", func() { conf := &TransportClientConfig{ - Timeout: 1 * time.Millisecond, - Logger: zap.NewNop().Sugar(), + ConnectTimeout: 1 * time.Millisecond, + Logger: zap.NewNop().Sugar(), } cl := Client{ conf: conf, @@ -313,6 +313,7 @@ var _ = Describe("Client", func() { conf := &TransportClientConfig{ ErrCh: errCh, Context: context.TODO(), + Logger: zap.NewNop().Sugar(), } client := Client{ conf: conf, diff --git a/pkg/discovery/transport/server/server.go b/pkg/discovery/transport/server/server.go index 99db2075..848b9837 100644 --- a/pkg/discovery/transport/server/server.go +++ b/pkg/discovery/transport/server/server.go @@ -1,4 +1,4 @@ -// Copyright (c) 2021 - for information on the respective copyright owner +// Copyright (c) 2021-2023 - for information on the respective copyright owner // see the NOTICE file and/or the repository https://github.com/carbynestack/ephemeral. // // SPDX-License-Identifier: Apache-2.0 @@ -7,11 +7,10 @@ package server import ( "context" "errors" + pb "github.com/carbynestack/ephemeral/pkg/discovery/transport/proto" "io" "net" - pb "github.com/carbynestack/ephemeral/pkg/discovery/transport/proto" - . "github.com/carbynestack/ephemeral/pkg/types" mb "github.com/vardius/message-bus" @@ -47,6 +46,7 @@ type Transport interface { // NewTransportServer returns a new transport server. func NewTransportServer(conf *TransportConfig) *TransportServer { + conf.Logger.Debug("Creating new TransportServer") tr := &TransportServer{ conf: conf, mb: mb.New(10000), @@ -78,6 +78,7 @@ func (d *TransportServer) Run(cb func()) error { if err != nil { return err } + d.conf.Logger.Debugf("Started TransportServer listening on %s", lis.Addr()) pb.RegisterDiscoveryServer(d.grpcServer, d) done := make(chan struct{}, 1) go d.broadcast(done) @@ -88,23 +89,27 @@ func (d *TransportServer) Run(cb func()) error { // Stop stops the transport server. func (d *TransportServer) Stop() { + d.conf.Logger.Debug("Stopping the gRPC Server") d.grpcServer.Stop() } -// Events is a Remote Procedure that is executed by GRPC clietns. it instantiates the communication with the server. The messages are sent and read from In and Out channels instead of manipulating the GRPC stream directly. +// Events is a Remote Procedure that is executed by GRPC clients. it instantiates the communication with the server. +// The messages are sent and read from In and Out channels instead of manipulating the GRPC stream directly. func (d *TransportServer) Events(stream pb.Discovery_EventsServer) error { ctx := stream.Context() connID, scope, err := d.extractMeta(ctx) if err != nil { return err } + d.conf.Logger.Debugw("Start handling events", ConnID, connID, EventScope, scope) // Read all outgoing events from the broadcast topic. - d.mb.Subscribe(broadcastTopic, d.forwardToStream(stream, scope, connID)) + _ = d.mb.Subscribe(broadcastTopic, d.forwardToStream(stream, scope, connID)) errCh := make(chan error) go d.forwardFromStream(stream, errCh) // Block until we receive an error. err = <-errCh - d.mb.Unsubscribe(broadcastTopic, d.forwardToStream(stream, scope, connID)) + d.conf.Logger.Debugw("Event handling received error", "Error", err, ConnID, connID, EventScope, scope) + _ = d.mb.Unsubscribe(broadcastTopic, d.forwardToStream(stream, scope, connID)) d.conf.Logger.Debug("Unsubscribed forwardToStream from the broadcast topic") return err } @@ -122,8 +127,10 @@ func (d *TransportServer) broadcast(done chan struct{}) { for { select { case ev := <-d.conf.Out: + d.conf.Logger.Debugw("Broadcast outgoing event", "Event", ev) d.mb.Publish(broadcastTopic, ev) case <-done: + d.conf.Logger.Debug("Stopped broadcasting") return } } @@ -163,16 +170,17 @@ func (d *TransportServer) forwardToStream(stream pb.Discovery_EventsServer, scop d.sendEvent(stream, ev) } default: - d.conf.Logger.Errorf("unknown event scope %v", scope) + d.conf.Logger.Errorf("Unknown event scope %v", scope) } } } // sendEvent sents out an event and potentially prints an error. func (d *TransportServer) sendEvent(stream pb.Discovery_EventsServer, ev *pb.Event) { + d.conf.Logger.Debugw("Broadcasting event", "Event", ev) err := stream.Send(ev) if err != nil { - d.conf.Logger.Errorf("error broadcasting the event %s", ev.Name) + d.conf.Logger.Errorf("Error broadcasting the event %s", ev.Name) } } @@ -187,13 +195,15 @@ func (d *TransportServer) forwardFromStream(stream pb.Discovery_EventsServer, er default: ev, err := stream.Recv() if err == io.EOF { - d.conf.Logger.Debugf("server is exiting due to an EOF") + d.conf.Logger.Debugf("Server is exiting due to an EOF") return } if err != nil { + d.conf.Logger.Errorw("Received error from stream", "Error", err) errCh <- err return } + d.conf.Logger.Debugw("Received event from stream", "Event", ev) d.conf.In <- ev } } diff --git a/pkg/discovery/transport/server/server_test.go b/pkg/discovery/transport/server/server_test.go index ce128c01..7679fbb9 100644 --- a/pkg/discovery/transport/server/server_test.go +++ b/pkg/discovery/transport/server/server_test.go @@ -1,4 +1,4 @@ -// Copyright (c) 2021 - for information on the respective copyright owner +// Copyright (c) 2021-2023 - for information on the respective copyright owner // see the NOTICE file and/or the repository https://github.com/carbynestack/ephemeral. // // SPDX-License-Identifier: Apache-2.0 @@ -309,7 +309,7 @@ var _ = Describe("Server", func() { ev := &pb.Event{} f(ev) Expect(recorded.Len()).To(Equal(1)) - Expect(recorded.AllUntimed()[0].Entry.Message).To(Equal("unknown event scope " + invalidScope)) + Expect(recorded.AllUntimed()[0].Entry.Message).To(Equal("Unknown event scope " + invalidScope)) }) }) }) @@ -326,7 +326,7 @@ var _ = Describe("Server", func() { st := &BrokenStream{} ts.sendEvent(st, &pb.Event{Name: "abc"}) Expect(recorded.Len()).To(Equal(1)) - Expect(recorded.AllUntimed()[0].Entry.Message).To(Equal("error broadcasting the event abc")) + Expect(recorded.AllUntimed()[0].Entry.Message).To(Equal("Error broadcasting the event abc")) }) }) }) @@ -343,8 +343,10 @@ var _ = Describe("Server", func() { }) Context("when broadcasting events", func() { It("exits upon a message from 'done' channel", func() { + core, recorded := observer.New(zapcore.DebugLevel) conf := &TransportConfig{ - Out: make(chan *pb.Event), + Out: make(chan *pb.Event), + Logger: zap.New(core).Sugar(), } ts := TransportServer{ conf: conf, @@ -353,6 +355,8 @@ var _ = Describe("Server", func() { done <- struct{}{} // The command below should not block. ts.broadcast(done) + Expect(recorded.Len()).To(Equal(1)) + Expect(recorded.AllUntimed()[0].Entry.Message).To(Equal("Stopped broadcasting")) }) }) diff --git a/pkg/ephemeral/fake_spdz_test.go b/pkg/ephemeral/fake_spdz_test.go index 90b958b9..54e43d9a 100644 --- a/pkg/ephemeral/fake_spdz_test.go +++ b/pkg/ephemeral/fake_spdz_test.go @@ -1,4 +1,4 @@ -// Copyright (c) 2021 - for information on the respective copyright owner +// Copyright (c) 2021-2023 - for information on the respective copyright owner // see the NOTICE file and/or the repository https://github.com/carbynestack/ephemeral. // // SPDX-License-Identifier: Apache-2.0 @@ -69,9 +69,10 @@ func (f *BrokenFakeTransportClient) Stop() error { type FakePlayer struct { Initialized bool + history *fsm.History } -func (f *FakePlayer) Init(errCh chan error) { +func (f *FakePlayer) Init() { f.Initialized = true return } @@ -79,7 +80,7 @@ func (f *FakePlayer) Stop() { return } func (f *FakePlayer) History() *fsm.History { - return nil + return f.history } func (f *FakePlayer) Bus() mb.MessageBus { return nil diff --git a/pkg/ephemeral/forwarder_test.go b/pkg/ephemeral/forwarder_test.go index 016a6872..52f6d0e6 100644 --- a/pkg/ephemeral/forwarder_test.go +++ b/pkg/ephemeral/forwarder_test.go @@ -1,4 +1,4 @@ -// Copyright (c) 2021 - for information on the respective copyright owner +// Copyright (c) 2021-2023 - for information on the respective copyright owner // see the NOTICE file and/or the repository https://github.com/carbynestack/ephemeral. // // SPDX-License-Identifier: Apache-2.0 @@ -22,16 +22,18 @@ import ( var _ = Describe("Forwarder", func() { Context("when forwarding events to player", func() { var ( - inCh chan *pb.Event - outCh chan *pb.Event - doneCh chan struct{} - bus mb.MessageBus - spdz MPCEngine - params *PlayerParams - logger *zap.SugaredLogger - forwarder *Forwarder - playerName = "0" - timeout = 10 * time.Second + inCh chan *pb.Event + outCh chan *pb.Event + doneCh chan struct{} + errCh chan error + bus mb.MessageBus + spdz MPCEngine + params *PlayerParams + logger *zap.SugaredLogger + forwarder *Forwarder + playerName = "0" + stateTimeout = 10 * time.Second + computationTimeout = 20 * time.Second ) BeforeEach(func() { @@ -55,7 +57,7 @@ var _ = Describe("Forwarder", func() { It("forwards events in both directions", func() { ctx := context.TODO() testEvent := "test" - pl, _ := NewPlayer(ctx, bus, timeout, spdz, params, logger) + pl, _ := NewPlayer(ctx, bus, stateTimeout, computationTimeout, spdz, params, errCh, logger) event := &pb.Event{ Name: testEvent, } @@ -77,7 +79,7 @@ var _ = Describe("Forwarder", func() { Context("when the context is canceled", func() { It("stops the player", func() { ctx, cancel := context.WithCancel(context.Background()) - pl, _ := NewPlayer(ctx, bus, timeout, spdz, params, logger) + pl, _ := NewPlayer(ctx, bus, stateTimeout, computationTimeout, spdz, params, errCh, logger) cancel() forwarder.conf.Ctx = ctx forwarder.conf.Player = pl diff --git a/pkg/ephemeral/io/carrier.go b/pkg/ephemeral/io/carrier.go index 9116d4d2..8c84007c 100644 --- a/pkg/ephemeral/io/carrier.go +++ b/pkg/ephemeral/io/carrier.go @@ -1,4 +1,4 @@ -// Copyright (c) 2021 - for information on the respective copyright owner +// Copyright (c) 2021-2023 - for information on the respective copyright owner // see the NOTICE file and/or the repository https://github.com/carbynestack/ephemeral. // // SPDX-License-Identifier: Apache-2.0 @@ -10,9 +10,11 @@ import ( "errors" "fmt" "github.com/carbynestack/ephemeral/pkg/amphora" + "go.uber.org/zap" "io" "io/ioutil" "net" + "sync" ) // Result contains the response from SPDZ runtime computation. @@ -20,6 +22,13 @@ type Result struct { Response []string `json:"response"` } +var connectionInfo = "ConnectionInfo" + +type ConnectionInfo struct { + Host string + Port string +} + // AbstractCarrier is the carriers interface. type AbstractCarrier interface { Connect(context.Context, int32, string, string) error @@ -30,25 +39,29 @@ type AbstractCarrier interface { // Carrier is a TCP client for TCP sockets. type Carrier struct { - Dialer func(ctx context.Context, addr, port string) (net.Conn, error) - Conn net.Conn - Packer Packer - connected bool -} - -// Config contains TCP connection properties of Carrier. -type Config struct { - Port string - Host string + Dialer func(ctx context.Context, addr, port string) (net.Conn, error) + Conn net.Conn + Packer Packer + connection *ConnectionInfo + Logger *zap.SugaredLogger + mux sync.Mutex } // Connect establishes a TCP connection to a socket on a given host and port. func (c *Carrier) Connect(ctx context.Context, playerID int32, host string, port string) error { + c.Logger.Debugf("Connecting to %s:%s", host, port) + c.mux.Lock() + defer c.mux.Unlock() + if c.Conn != nil { + c.Logger.Debugw("Cancel connection attempt as carrier already has an active connection", connectionInfo, c.connection) + return nil + } conn, err := c.Dialer(ctx, host, port) - c.Conn = conn if err != nil { return err } + c.connection = &ConnectionInfo{host, port} + c.Conn = conn _, err = conn.Write(c.buildHeader(playerID)) if err != nil { return err @@ -59,7 +72,6 @@ func (c *Carrier) Connect(ctx context.Context, playerID int32, host string, port return err } } - c.connected = true return nil } @@ -93,10 +105,17 @@ func (c Carrier) readPrime() error { // Close closes the underlying TCP connection. func (c *Carrier) Close() error { - if c.connected { - c.Conn.Close() + c.Logger.Debugw("Closing connection", connectionInfo, c.connection) + c.mux.Lock() + defer c.mux.Unlock() + var err error + if c.connection != nil { + err = c.Conn.Close() + c.Logger.Debug("Carrier connection closed") } - return nil + c.connection = nil + c.Conn = nil + return err } // Send transmits Amphora secret shares to a TCP socket opened by an MPC runtime. @@ -114,6 +133,7 @@ func (c *Carrier) Send(secret []amphora.SecretShare) error { if err != nil { return err } + c.Logger.Debugw("Secret data written to socket", connectionInfo, c.connection) return nil } @@ -133,6 +153,7 @@ func (c *Carrier) Read(conv ResponseConverter, bulkObjects bool) (*Result, error resp := []byte{} resp, err := ioutil.ReadAll(c.Conn) if len(resp) == 0 { + c.Logger.Errorw("Carrier read closed with empty response", connectionInfo, c.connection) return nil, errors.New("empty result from socket") } if err != nil { diff --git a/pkg/ephemeral/io/carrier_test.go b/pkg/ephemeral/io/carrier_test.go index ac294ec5..aaea4912 100644 --- a/pkg/ephemeral/io/carrier_test.go +++ b/pkg/ephemeral/io/carrier_test.go @@ -1,4 +1,4 @@ -// Copyright (c) 2021 - for information on the respective copyright owner +// Copyright (c) 2021-2023 - for information on the respective copyright owner // see the NOTICE file and/or the repository https://github.com/carbynestack/ephemeral. // // SPDX-License-Identifier: Apache-2.0 @@ -11,6 +11,7 @@ import ( . "github.com/carbynestack/ephemeral/pkg/ephemeral/io" . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" + "go.uber.org/zap" "net" "sync" ) @@ -28,6 +29,7 @@ var _ = Describe("Carrier", func() { } carrier := Carrier{ Dialer: fakeDialer, + Logger: zap.NewNop().Sugar(), } err := carrier.Connect(context.TODO(), playerID, "", "") Expect(connected).To(BeTrue()) @@ -40,6 +42,7 @@ var _ = Describe("Carrier", func() { } carrier := Carrier{ Dialer: fakeDialer, + Logger: zap.NewNop().Sugar(), } err := carrier.Connect(context.TODO(), playerID, "", "") Expect(err).NotTo(HaveOccurred()) @@ -75,6 +78,7 @@ var _ = Describe("Carrier", func() { carrier := Carrier{ Dialer: dialer, Packer: packer, + Logger: zap.NewNop().Sugar(), } go server.Read(connectionOutput) carrier.Connect(ctx, playerID, "", "") @@ -90,6 +94,7 @@ var _ = Describe("Carrier", func() { carrier := Carrier{ Dialer: dialer, Packer: packer, + Logger: zap.NewNop().Sugar(), } go server.Read(connectionOutput) carrier.Connect(ctx, playerID, "", "") @@ -106,6 +111,7 @@ var _ = Describe("Carrier", func() { carrier := Carrier{ Dialer: dialer, Packer: packer, + Logger: zap.NewNop().Sugar(), } go server.Read(connectionOutput) carrier.Connect(ctx, playerID, "", "") @@ -127,6 +133,7 @@ var _ = Describe("Carrier", func() { carrier := Carrier{ Dialer: dialer, Packer: &packer, + Logger: zap.NewNop().Sugar(), } go server.Read(connectionOutput) carrier.Connect(ctx, playerID, "", "") @@ -148,6 +155,7 @@ var _ = Describe("Carrier", func() { carrier := Carrier{ Dialer: dialer, Packer: &packer, + Logger: zap.NewNop().Sugar(), } go server.Read(connectionOutput) carrier.Connect(ctx, playerID, "", "") @@ -162,6 +170,7 @@ var _ = Describe("Carrier", func() { carrier := Carrier{ Dialer: dialer, Packer: packer, + Logger: zap.NewNop().Sugar(), } go server.Read(connectionOutput) carrier.Connect(ctx, playerID, "", "") @@ -185,6 +194,7 @@ var _ = Describe("Carrier", func() { carrier := Carrier{ Dialer: dialer, Packer: packer, + Logger: zap.NewNop().Sugar(), } waitGroup := sync.WaitGroup{} waitGroup.Add(1) diff --git a/pkg/ephemeral/io/feeder.go b/pkg/ephemeral/io/feeder.go index 60c0be59..f30ee2b5 100644 --- a/pkg/ephemeral/io/feeder.go +++ b/pkg/ephemeral/io/feeder.go @@ -1,4 +1,4 @@ -// Copyright (c) 2021 - for information on the respective copyright owner +// Copyright (c) 2021-2023 - for information on the respective copyright owner // see the NOTICE file and/or the repository https://github.com/carbynestack/ephemeral. // // SPDX-License-Identifier: Apache-2.0 @@ -24,15 +24,14 @@ type Feeder interface { // NewAmphoraFeeder returns a new instance of amphora feeder. func NewAmphoraFeeder(l *zap.SugaredLogger, conf *SPDZEngineTypedConfig) *AmphoraFeeder { - dialer := network.RetryingDialerWithContext(conf.RetrySleep, conf.RetryTimeout, func() { - l.Debug(fmt.Sprintf("retrying to connect to SPDZ socket after %s", conf.RetrySleep)) - }) + dialer := network.RetryingDialerWithContext(conf.RetrySleep, conf.NetworkEstablishTimeout, l) carrier := &Carrier{ Dialer: dialer, Packer: &SPDZPacker{ MaxBulkSize: conf.MaxBulkSize, }, + Logger: l, } return &AmphoraFeeder{ logger: l, @@ -93,13 +92,14 @@ func (f *AmphoraFeeder) LoadFromRequestAndFeed(act *Activation, feedPort string, // Close closes the underlying socket connection. func (f *AmphoraFeeder) Close() error { + f.logger.Debug("Close connections") return f.carrier.Close() } // feedAndRead takes a slice of base64 encoded secret shared parameters along with the port where SPDZ runtime is listening for the input. The base64 input params are converted into a form digestable by SPDZ and sent to the socket. The runtime must send back a response for this function to finish without an error. -func (f *AmphoraFeeder) feedAndRead(params []string, port string, ctx *CtxConfig) (*Result, error) { +func (f *AmphoraFeeder) feedAndRead(params []string, feedPort string, ctx *CtxConfig) (*Result, error) { var conv ResponseConverter - f.logger.Debugw(fmt.Sprintf("Received secret shared parameters %s", params), GameID, ctx.Act.GameID) + f.logger.Debugw(fmt.Sprintf("Received secret shared parameters \"%.10s...\" (len: %d)", params, len(params)), GameID, ctx.Act.GameID) isBulk := false // It must be defined in the Activation whether plaintext or secret shared output is expected. switch strings.ToUpper(ctx.Act.Output.Type) { @@ -116,12 +116,13 @@ func (f *AmphoraFeeder) feedAndRead(params []string, port string, ctx *CtxConfig default: return nil, fmt.Errorf("no output config is given, either %s, %s or %s must be defined", PlainText, SecretShare, AmphoraSecret) } - err := f.carrier.Connect(ctx.Context, ctx.Spdz.PlayerID, "localhost", port) + err := f.carrier.Connect(ctx.Context, ctx.Spdz.PlayerID, "localhost", feedPort) defer f.carrier.Close() if err != nil { return nil, err } - secrets := []amphora.SecretShare{} + f.logger.Debug("Carrier connected") + var secrets []amphora.SecretShare for i := range params { secret := amphora.SecretShare{ Data: params[i], @@ -132,6 +133,7 @@ func (f *AmphoraFeeder) feedAndRead(params []string, port string, ctx *CtxConfig if err != nil { return nil, err } + f.logger.Debug("Parameters written to carrier") return f.carrier.Read(conv, isBulk) } diff --git a/pkg/ephemeral/io/feeder_test.go b/pkg/ephemeral/io/feeder_test.go index 80401bcd..38c13e9f 100644 --- a/pkg/ephemeral/io/feeder_test.go +++ b/pkg/ephemeral/io/feeder_test.go @@ -1,4 +1,4 @@ -// Copyright (c) 2021 - for information on the respective copyright owner +// Copyright (c) 2021-2023 - for information on the respective copyright owner // see the NOTICE file and/or the repository https://github.com/carbynestack/ephemeral. // // SPDX-License-Identifier: Apache-2.0 @@ -10,7 +10,6 @@ import ( "errors" "github.com/carbynestack/ephemeral/pkg/amphora" . "github.com/carbynestack/ephemeral/pkg/types" - . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" "go.uber.org/zap" diff --git a/pkg/ephemeral/io/tuple_streamer.go b/pkg/ephemeral/io/tuple_streamer.go index 329eb05d..9abf43f5 100644 --- a/pkg/ephemeral/io/tuple_streamer.go +++ b/pkg/ephemeral/io/tuple_streamer.go @@ -1,5 +1,5 @@ // -// Copyright (c) 2022 - for information on the respective copyright owner +// Copyright (c) 2022-2023 - for information on the respective copyright owner // see the NOTICE file and/or the repository https://github.com/carbynestack/ephemeral. // // SPDX-License-Identifier: Apache-2.0 @@ -114,8 +114,6 @@ type TupleStreamer interface { StartStreamTuples(terminateCh chan struct{}, errCh chan error, wg *sync.WaitGroup) } -const defaultWriteDeadline = 5 * time.Second - // GetTupleFileName returns the filename for a given tuple type, spdz configuration and thread number func GetTupleFileName(tt castor.TupleType, conf *SPDZEngineTypedConfig, threadNr int) string { return fmt.Sprintf("%s-%s-P%d-T%d", @@ -133,7 +131,7 @@ func NewCastorTupleStreamerWithWriterFactory(l *zap.SugaredLogger, tt castor.Tup loggerWithContext := l.With(GameID, gameID, TupleType, tt, "ThreadNr", threadNr) tupleFileName := GetTupleFileName(tt, conf, threadNr) filePath := filepath.Join(playerDataDir, tupleFileName) - pipeWriter, err := pipeWriterFactory(loggerWithContext, filePath, defaultWriteDeadline) + pipeWriter, err := pipeWriterFactory(loggerWithContext, filePath, conf.ComputationTimeout) if err != nil { return nil, fmt.Errorf("error creating pipe writer: %v", err) } @@ -155,15 +153,24 @@ func NewCastorTupleStreamerWithWriterFactory(l *zap.SugaredLogger, tt castor.Tup // CastorTupleStreamer provides tuples to the SPDZ execution for the given type and configuration. type CastorTupleStreamer struct { - logger *zap.SugaredLogger - pipeWriter PipeWriter - tupleType castor.TupleType - stockSize int32 - castorClient castor.AbstractClient - baseRequestID uuid.UUID - requestCycle int - headerData []byte - streamData []byte + logger *zap.SugaredLogger + pipeWriter PipeWriter + tupleType castor.TupleType + stockSize int32 + castorClient castor.AbstractClient + baseRequestID uuid.UUID + requestCycle int + headerData []byte + streamData []byte + streamerDoneCh chan struct{} + tupleBufferCh chan []byte + fetchTuplesCh chan struct{} + // bufferLckCh is used as a synchronization lock, where one routine can lock the channel by writing to it. Each + // consecutive write will block the writing routine until the channel has been unlocked by reading from it. In + // combination with a "select" statement, the channel can be used as a "timeout-able" lock. + // + // Reading is supposed to be performed by the initial routine which wrote to the channel. + bufferLckCh chan struct{} streamedBytes int } @@ -171,23 +178,44 @@ type CastorTupleStreamer struct { // required by MP-SPDZ func (ts *CastorTupleStreamer) StartStreamTuples(terminateCh chan struct{}, errCh chan error, wg *sync.WaitGroup) { ts.streamData = append(ts.streamData, ts.headerData...) - pipeWriterReady := make(chan struct{}) + ts.streamerDoneCh = make(chan struct{}) + ts.fetchTuplesCh = make(chan struct{}, 1) + ts.bufferLckCh = make(chan struct{}, 1) + ts.tupleBufferCh = make(chan []byte, 1) + ts.fetchTuplesCh <- struct{}{} go func() { defer func() { - var streamedTupleBytes, discardedTupleBytes int + close(ts.streamerDoneCh) + select { + case ts.bufferLckCh <- struct{}{}: + // Lock the buffer routine or wait in case the channel is currently "locked". A blocking write indicates + // that the bufferData routine is currently fetching new tuples from castor. As we want the information + // on discarded bytes to be as accurate as possible, we will wait some time for the tuples to be fetched + // before computing discardedTupleBytes. + case <-time.After(10 * time.Second): + // However, we will not wait for too long for the bufferData routine to finish + } + discardedTupleBytes := 0 + select { + case buffered := <-ts.tupleBufferCh: + discardedTupleBytes = len(buffered) + default: + } + var streamedTupleBytes int if ts.streamedBytes > len(ts.headerData) { streamedTupleBytes = ts.streamedBytes - len(ts.headerData) } if streamedTupleBytes > 0 { - discardedTupleBytes = len(ts.streamData) - } - if streamedTupleBytes > 0 || discardedTupleBytes > 0 { - ts.logger.Debugw("Terminate tuple streamer.", - "Provided bytes", streamedTupleBytes, "Discarded bytes", discardedTupleBytes) + discardedTupleBytes += len(ts.streamData) + } else { + discardedTupleBytes += len(ts.streamData) - len(ts.headerData) + ts.streamedBytes } + ts.logger.Debugw("Terminate tuple streamer", + "Provided bytes", streamedTupleBytes, "Discarded bytes", discardedTupleBytes) _ = ts.pipeWriter.Close() wg.Done() }() + pipeWriterReady := make(chan struct{}) go func() { err := ts.pipeWriter.Open() if err != nil { @@ -196,58 +224,108 @@ func (ts *CastorTupleStreamer) StartStreamTuples(terminateCh chan struct{}, errC } close(pipeWriterReady) }() - for { - select { - case <-terminateCh: - return - case <-pipeWriterReady: - err := ts.writeDataToPipe() - if err != nil { - if errors.Is(err, syscall.EPIPE) { - // pipe error (most likely "broken pipe") is considered to indicate the computation to be - // terminated and therefore won't cause the tuple streamer to an errant termination . In case - // the pipe was closed because of a computation error this will be reported by the MPC execution - // itself - ts.logger.Debugw("received pipe error for tuple stream", "Error", err) - return - } - errCh <- err - return - } - } + select { + case <-terminateCh: + return + case <-pipeWriterReady: + } + streamerErrorCh := make(chan error, 1) + jobsDoneCh := make(chan struct{}, 2) + go ts.bufferData(terminateCh, streamerErrorCh, jobsDoneCh) + go ts.writeDataToPipe(terminateCh, jobsDoneCh) + select { + case <-terminateCh: + case <-jobsDoneCh: + case err := <-streamerErrorCh: + errCh <- err } + return }() } -// writeDataToPipe pulls more tuples from Castor if required and the data to the pipe -func (ts *CastorTupleStreamer) writeDataToPipe() error { - if ts.streamData == nil || len(ts.streamData) == 0 { - requestID := uuid.NewMD5(ts.baseRequestID, []byte(strconv.Itoa(ts.requestCycle))) - ts.requestCycle++ - tupleList, err := ts.castorClient.GetTuples(ts.stockSize, ts.tupleType, requestID) - if err != nil { - return err - } - ts.logger.Debugw("Fetched new tuples from Castor", "RequestID", requestID) - ts.streamData, err = ts.tupleListToByteArray(tupleList) - if err != nil { - return fmt.Errorf("error parsing received tuple list: %v", err) +func (ts *CastorTupleStreamer) bufferData(terminateCh chan struct{}, streamerErrorCh chan error, doneCh chan struct{}) { + defer func() { + ts.logger.Debug("Buffer job done") + doneCh <- struct{}{} + }() + for { + select { + case <-terminateCh: + return + case <-ts.streamerDoneCh: + return + case <-ts.fetchTuplesCh: + ts.bufferLckCh <- struct{}{} + tupleData, err := ts.getTupleData() + if err == nil { + ts.tupleBufferCh <- tupleData + } + <-ts.bufferLckCh + if err != nil { + ts.logger.Debugf("Error fetching tuples: %v", err) + streamerErrorCh <- err + return + } } } - c, err := ts.pipeWriter.Write(ts.streamData) +} + +func (ts *CastorTupleStreamer) getTupleData() ([]byte, error) { + requestID := uuid.NewMD5(ts.baseRequestID, []byte(strconv.Itoa(ts.requestCycle))) + ts.requestCycle++ + tupleList, err := ts.castorClient.GetTuples(ts.stockSize, ts.tupleType, requestID) + if err != nil { + return nil, err + } + ts.logger.Debugw("Fetched new tuples from Castor", "RequestID", requestID) + tupleData, err := ts.tupleListToByteArray(tupleList) if err != nil { - // if pipe error occurred it is most likely a "broken pipe" indicating file has been closed on opposing side - // tuple streamer will terminate in this case as computation is considered terminated and tuple streamer is no - // longer required. - // in all other cases the tuple streamer will retry - if errors.Is(err, syscall.EPIPE) { - return err + return nil, fmt.Errorf("error parsing received tuple list: %v", err) + } + return tupleData, nil +} + +// writeDataToPipe pulls more tuples from Castor if required and writes the data to the pipe +func (ts *CastorTupleStreamer) writeDataToPipe(terminateCh chan struct{}, doneCh chan struct{}) { + defer func() { + ts.logger.Debug("Write job done") + doneCh <- struct{}{} + }() + for { + select { + case <-terminateCh: + return + case <-ts.streamerDoneCh: + return + default: + if ts.streamData == nil || len(ts.streamData) == 0 { + select { + case <-terminateCh: + return + case <-ts.streamerDoneCh: + return + case tuples := <-ts.tupleBufferCh: + ts.streamData = append(ts.streamData, tuples...) + ts.fetchTuplesCh <- struct{}{} + } + } + c, err := ts.pipeWriter.Write(ts.streamData) + ts.streamData = ts.streamData[c:] + ts.streamedBytes += c + if err != nil { + // pipe error (most likely "broken pipe") is considered to indicate the computation to be + // finished and therefore terminate the streamer, but won't cause the tuple streamer to an errant + // termination . In case the pipe was closed because of a computation error this will be reported by + // the MPC execution itself + // in all other cases the tuple streamer will retry + if errors.Is(err, syscall.EPIPE) { + ts.logger.Debugw("Received pipe error for tuple stream", "Error", err) + return + } + ts.logger.Debugf("Pipe broke while streaming: %v", err.Error()) + } } - ts.logger.Debugw("Pipe broke while streaming: %v", err.Error()) } - ts.streamData = ts.streamData[c:] - ts.streamedBytes += c - return nil } // tupleListToByteArray converts a given list of tuple to a byte array diff --git a/pkg/ephemeral/io/tuple_streamer_test.go b/pkg/ephemeral/io/tuple_streamer_test.go index 1552e0cf..7ec5ab9b 100644 --- a/pkg/ephemeral/io/tuple_streamer_test.go +++ b/pkg/ephemeral/io/tuple_streamer_test.go @@ -1,5 +1,5 @@ // -// Copyright (c) 2022 - for information on the respective copyright owner +// Copyright (c) 2022-2023 - for information on the respective copyright owner // see the NOTICE file and/or the repository https://github.com/carbynestack/ephemeral. // // SPDX-License-Identifier: Apache-2.0 @@ -32,11 +32,11 @@ import ( const tupleStock = 42 var _ = Describe("Tuple Streamer", func() { - + var defaultWriteDeadline = 5 * time.Second Context("when StartStreamTuples", func() { var ( fcpw *FakeConsumingPipeWriter - ts CastorTupleStreamer + ts *CastorTupleStreamer cc *FakeCastorClient terminate chan struct{} errCh chan error @@ -50,7 +50,7 @@ var _ = Describe("Tuple Streamer", func() { isClosed: false, } cc = &FakeCastorClient{} - ts = CastorTupleStreamer{ + ts = &CastorTupleStreamer{ logger: zap.NewNop().Sugar(), pipeWriter: fcpw, tupleType: castor.BitGfp, @@ -63,7 +63,7 @@ var _ = Describe("Tuple Streamer", func() { expectedError := errors.New("expected error") fcpw.openError = expectedError wg.Add(1) - go ts.StartStreamTuples(terminate, errCh, wg) + ts.StartStreamTuples(terminate, errCh, wg) var err error select { case err = <-errCh: @@ -81,7 +81,7 @@ var _ = Describe("Tuple Streamer", func() { fbwpw := &FakeBlockingWritePipeWriter{} ts.pipeWriter = fbwpw wg.Add(1) - go ts.StartStreamTuples(terminate, errCh, wg) + ts.StartStreamTuples(terminate, errCh, wg) close(terminate) wg.Wait() close(errCh) @@ -91,14 +91,13 @@ var _ = Describe("Tuple Streamer", func() { }) }) Context("when streamData is empty", func() { - ts.streamData = make([]byte, 0) Context("when castor client returns an error", func() { BeforeEach(func() { ts.castorClient = &BrokenDownloadCastorClient{} }) It("writes error to error channel and stops", func() { wg.Add(1) - go ts.StartStreamTuples(terminate, errCh, wg) + ts.StartStreamTuples(terminate, errCh, wg) wg.Wait() close(terminate) close(errCh) @@ -117,7 +116,7 @@ var _ = Describe("Tuple Streamer", func() { tuples[0] = castor.Tuple{Shares: shares} cc.TupleList = &castor.TupleList{Tuples: tuples} wg.Add(1) - go ts.StartStreamTuples(terminate, errCh, wg) + ts.StartStreamTuples(terminate, errCh, wg) wg.Wait() close(terminate) close(errCh) @@ -138,7 +137,7 @@ var _ = Describe("Tuple Streamer", func() { tuples[0] = castor.Tuple{Shares: shares} cc.TupleList = &castor.TupleList{Tuples: tuples} wg.Add(1) - go ts.StartStreamTuples(terminate, errCh, wg) + ts.StartStreamTuples(terminate, errCh, wg) wg.Wait() close(terminate) close(errCh) @@ -171,7 +170,7 @@ var _ = Describe("Tuple Streamer", func() { }) It("return without error", func() { wg.Add(1) - go ts.StartStreamTuples(terminate, errCh, wg) + ts.StartStreamTuples(terminate, errCh, wg) wg.Wait() close(terminate) close(errCh) @@ -187,7 +186,7 @@ var _ = Describe("Tuple Streamer", func() { }) It("update fields accordingly", func() { wg.Add(1) - go ts.StartStreamTuples(terminate, errCh, wg) + ts.StartStreamTuples(terminate, errCh, wg) wg.Wait() close(terminate) close(errCh) diff --git a/pkg/ephemeral/network/ping_aware_target.go b/pkg/ephemeral/network/ping_aware_target.go index fcdafcee..daa0b059 100644 --- a/pkg/ephemeral/network/ping_aware_target.go +++ b/pkg/ephemeral/network/ping_aware_target.go @@ -36,7 +36,7 @@ func (n *PingAwareTarget) HandleConn(conn net.Conn) { br := bufio.NewReader(conn) ping, err := n.read(br, PingMsg) if err != nil { - n.Logger.Errorf("error while handling a ping message: %s", err) + n.Logger.Errorf("Error while handling a ping message: %s", err) conn.Close() return } @@ -46,7 +46,7 @@ func (n *PingAwareTarget) HandleConn(conn net.Conn) { pong := []byte(PongMsg) _, err := conn.Write(pong) if err != nil { - n.Logger.Errorf("error while writing pong message: %s", err) + n.Logger.Errorf("Error while writing pong message: %s", err) } n.Logger.Debug("Responded with pong message") return @@ -73,7 +73,7 @@ func (n *PingAwareTarget) WaitUntilStarted(address, port string, timeout time.Du defer func() { err := conn.Close() if err != nil { - n.Logger.Errorf("error closing ping connection", err) + n.Logger.Errorf("Error closing ping connection: %v", err) } n.Logger.Debug("Closing the ping connection") }() diff --git a/pkg/ephemeral/network/proxy.go b/pkg/ephemeral/network/proxy.go index f010ffe1..8d8fb5b6 100644 --- a/pkg/ephemeral/network/proxy.go +++ b/pkg/ephemeral/network/proxy.go @@ -1,4 +1,4 @@ -// Copyright (c) 2021 - for information on the respective copyright owner +// Copyright (c) 2021-2023 - for information on the respective copyright owner // see the NOTICE file and/or the repository https://github.com/carbynestack/ephemeral. // // SPDX-License-Identifier: Apache-2.0 @@ -33,7 +33,7 @@ func NewProxy(lg *zap.SugaredLogger, conf *SPDZEngineTypedConfig, checker Networ return &Proxy{ logger: lg, retrySleep: conf.RetrySleep, - retryTimeout: conf.RetryTimeout, + retryTimeout: conf.NetworkEstablishTimeout, tcpChecker: checker, } } @@ -47,6 +47,9 @@ type Proxy struct { proxy *tcpproxy.Proxy ctx *CtxConfig tcpChecker NetworkChecker + // activeProxyIndicatorCh indicates that proxy was successfully started (see [tcpproxy.Proxy.Start]) if the channel + // is closed. + activeProxyIndicatorCh chan struct{} } // Run start the tcpproxy, makes sure it has started by means of a ping. @@ -67,11 +70,17 @@ func (p *Proxy) Run(ctx *CtxConfig, errCh chan error) error { p.logger.Infow("Starting TCP Proxy", GameID, ctx.Act.GameID) go func() { - err := p.proxy.Run() + defer close(errCh) + p.activeProxyIndicatorCh = make(chan struct{}) + err := p.proxy.Start() + if err == nil { + close(p.activeProxyIndicatorCh) + err = p.proxy.Wait() + } errCh <- err }() dialer := RetryingDialer(p.retrySleep, p.retryTimeout, func() { - p.logger.Debugw(fmt.Sprintf("retrying to ping after %s", p.retrySleep), GameID, p.ctx.Act.GameID) + p.logger.Debugw(fmt.Sprintf("Retrying to ping after %s", p.retrySleep), GameID, p.ctx.Act.GameID) }) for i, pat := range pats { @@ -147,7 +156,11 @@ func (p *Proxy) checkTCPConnectionToPeer(config *ProxyConfig) error { func (p *Proxy) Stop() { p.logger.Debugw("Waiting for TCP proxy to stop", GameID, p.ctx.Act.GameID) p.proxy.Close() - p.proxy.Wait() + select { + case <-p.activeProxyIndicatorCh: + p.proxy.Wait() + default: + } p.logger.Debugw("Stopped the TCP proxy", GameID, p.ctx.Act.GameID) } @@ -174,14 +187,27 @@ func RetryingDialer(sleep, timeout time.Duration, sideEffect func()) func(addr, } // RetryingDialerWithContext tries to establish a TCP connection to a socket until the timeout is reached or the context is cancelled. -func RetryingDialerWithContext(sleep, timeout time.Duration, sideEffect func()) func(ctx context.Context, addr, port string) (conn net.Conn, err error) { +func RetryingDialerWithContext(sleep time.Duration, timeout time.Duration, l *zap.SugaredLogger) func(ctx context.Context, addr, port string) (conn net.Conn, err error) { + return RetryingDialerWithContextAndLogTimeout(sleep, timeout, l, 5*time.Second) +} + +// RetryingDialerWithContextAndLogTimeout uses an individual log message timer. +// +// Used for testing +func RetryingDialerWithContextAndLogTimeout(sleep time.Duration, timeout time.Duration, l *zap.SugaredLogger, logPeriod time.Duration) func(ctx context.Context, addr, port string) (conn net.Conn, err error) { return func(ctx context.Context, addr, port string) (conn net.Conn, err error) { started := time.Now() + logTicker := time.NewTicker(logPeriod) + connectTimer := time.NewTimer(0) + defer logTicker.Stop() + defer connectTimer.Stop() for { select { case <-ctx.Done(): - return conn, errors.New("context cancelled") - default: + return conn, errors.New(fmt.Sprintf("cancelled connection attempt for %s:%s - context done", addr, port)) + case <-logTicker.C: + l.Debugf("Connection attempt to %s:%s active for %s", addr, port, time.Now().Sub(started)) + case <-connectTimer.C: var tcpAddr *net.TCPAddr tcpAddr, err = net.ResolveTCPAddr("tcp", addr+":"+port) if err != nil { @@ -189,10 +215,15 @@ func RetryingDialerWithContext(sleep, timeout time.Duration, sideEffect func()) } conn, err = net.DialTCP("tcp", nil, tcpAddr) if err != nil && time.Now().Sub(started) < timeout { - sideEffect() - time.Sleep(sleep) + connectTimer.Reset(sleep) continue } + if conn.(*net.TCPConn) != nil { + if err := conn.(*net.TCPConn).SetKeepAlive(true); err != nil { + return nil, err + } + } + l.Debugw("Dialer done", "Conn", conn, "Err", err) return conn, err } } diff --git a/pkg/ephemeral/network/proxy_test.go b/pkg/ephemeral/network/proxy_test.go index c8ac4742..e503619b 100644 --- a/pkg/ephemeral/network/proxy_test.go +++ b/pkg/ephemeral/network/proxy_test.go @@ -1,11 +1,15 @@ -// Copyright (c) 2021 - for information on the respective copyright owner +// Copyright (c) 2021-2023 - for information on the respective copyright owner // see the NOTICE file and/or the repository https://github.com/carbynestack/ephemeral. // // SPDX-License-Identifier: Apache-2.0 package network import ( + "context" . "github.com/carbynestack/ephemeral/pkg/types" + "go.uber.org/zap/zapcore" + "go.uber.org/zap/zaptest/observer" + "strings" "time" . "github.com/onsi/ginkgo" @@ -20,9 +24,9 @@ var _ = Describe("Proxy", func() { logger := zap.NewNop().Sugar() tcpChecker := &NoopChecker{} spdzConfig := &SPDZEngineTypedConfig{ - PlayerID: 0, - RetrySleep: 50 * time.Millisecond, - RetryTimeout: 10 * time.Second, + PlayerID: 0, + RetrySleep: 50 * time.Millisecond, + NetworkEstablishTimeout: 10 * time.Second, } p := NewProxy(logger, spdzConfig, tcpChecker) ctx := &CtxConfig{ @@ -56,5 +60,51 @@ var _ = Describe("Proxy", func() { Expect(counter).To(BeNumerically(">", 0)) Expect(err).To(HaveOccurred()) }) + It("fails if address cannot be resolved", func() { + dialer := RetryingDialer(1*time.Millisecond, 50*time.Millisecond, func() {}) + conn, err := dialer("invaid", "5555") + Expect(conn).To(BeNil()) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(Equal("lookup invaid: no such host")) + }) + }) + Context("when using the retrying dialer with context", func() { + It("fails if address cannot be resolved", func() { + ctx := context.TODO() + logger := zap.NewNop().Sugar() + dialer := RetryingDialerWithContext(1*time.Millisecond, 50*time.Millisecond, logger) + conn, err := dialer(ctx, "invaid", "5555") + Expect(conn).To(BeNil()) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(Equal("lookup invaid: no such host")) + }) + It("returns error when context is done", func() { + logger := zap.NewNop().Sugar() + ctx, cancel := context.WithCancel(context.TODO()) + cancel() + dialer := RetryingDialerWithContext(0, 0, logger) + conn, err := dialer(ctx, "localhost", "5555") + Expect(conn).To(BeNil()) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(Equal("cancelled connection attempt for localhost:5555 - context done")) + }) + }) + Context("when using the retrying dialer with context", func() { + It("periodically logs status messages", func() { + core, recorded := observer.New(zapcore.DebugLevel) + logger := zap.New(core).Sugar() + ctx := context.TODO() + dialer := RetryingDialerWithContextAndLogTimeout(100*time.Millisecond, 50*time.Millisecond, logger, 5*time.Millisecond) + conn, err := dialer(ctx, "localhost", "5555") + Expect(conn).To(BeNil()) + Expect(err).To(HaveOccurred()) + var logs []string + for _, l := range recorded.All() { + if strings.HasPrefix(l.Entry.Message, "Connection attempt") { + logs = append(logs, l.Message) + } + } + Expect(recorded.Len() > 1).To(BeTrue()) + }) }) }) diff --git a/pkg/ephemeral/network/tcpchecker.go b/pkg/ephemeral/network/tcpchecker.go index 994e4c4c..0a30e29e 100644 --- a/pkg/ephemeral/network/tcpchecker.go +++ b/pkg/ephemeral/network/tcpchecker.go @@ -78,12 +78,12 @@ func (t *TCPChecker) tryToConnect(host, port string) bool { }() conn, err = net.DialTimeout("tcp", host+":"+port, t.conf.DialTimeout) if err != nil { - t.conf.Logger.Debugf("error getting tcp connection %s", err.Error()) + t.conf.Logger.Debugf("Error getting tcp connection %s", err.Error()) return false } err = conn.SetReadDeadline(time.Now().Add(t.conf.DialTimeout)) if err != nil { - t.conf.Logger.Errorf("error setting read deadline, %s\n", err.Error()) + t.conf.Logger.Errorf("Error setting read deadline, %s\n", err.Error()) return false } @@ -113,5 +113,5 @@ func (t *TCPChecker) tryToConnect(host, port string) bool { func (t *TCPChecker) sleepAndIncrement() { t.retries++ time.Sleep(t.conf.DialTimeout) - t.conf.Logger.Debugf("retrying TCPCheck after %s", t.conf.DialTimeout) + t.conf.Logger.Debugf("Retrying TCPCheck after %s", t.conf.DialTimeout) } diff --git a/pkg/ephemeral/player.go b/pkg/ephemeral/player.go index b794de27..de7156b9 100644 --- a/pkg/ephemeral/player.go +++ b/pkg/ephemeral/player.go @@ -1,4 +1,4 @@ -// Copyright (c) 2021 - for information on the respective copyright owner +// Copyright (c) 2021-2023 - for information on the respective copyright owner // see the NOTICE file and/or the repository https://github.com/carbynestack/ephemeral. // // SPDX-License-Identifier: Apache-2.0 @@ -6,11 +6,14 @@ package ephemeral import ( "context" + "errors" + "fmt" d "github.com/carbynestack/ephemeral/pkg/discovery" "github.com/carbynestack/ephemeral/pkg/discovery/fsm" pb "github.com/carbynestack/ephemeral/pkg/discovery/transport/proto" "github.com/carbynestack/ephemeral/pkg/ephemeral/network" . "github.com/carbynestack/ephemeral/pkg/types" + "strings" "time" mb "github.com/vardius/message-bus" @@ -32,25 +35,27 @@ type PlayerParams struct { } // NewPlayer returns an fsm based model of the MPC player. -func NewPlayer(ctx context.Context, bus mb.MessageBus, timeout time.Duration, me MPCEngine, playerParams *PlayerParams, logger *zap.SugaredLogger) (*Player1, error) { - call := NewCallbacker(bus, playerParams, logger) +func NewPlayer(ctx context.Context, bus mb.MessageBus, stateTimeout time.Duration, computationTimeout time.Duration, me MPCEngine, playerParams *PlayerParams, errCh chan error, logger *zap.SugaredLogger) (*Player1, error) { + call := NewCallbacker(bus, playerParams, errCh, logger) cbs := []*fsm.Callback{ fsm.AfterEnter(Registering).Do(call.sendPlayerReady()), fsm.AfterEnter(Playing).Do(call.playing(playerParams.Name, me)), fsm.AfterEnter(PlayerFinishedWithError).Do(call.finishWithError(playerParams.Name)), fsm.AfterEnter(PlayerFinishedWithSuccess).Do(call.finishWithSuccess(playerParams.Name)), fsm.AfterEnter(PlayerDone).Do(call.done()), + fsm.WhenStateTimeout().Do(call.finishWithError(playerParams.Name)), } trs := []*fsm.Transition{ fsm.WhenIn(Init).GotEvent(Register).GoTo(Registering), - fsm.WhenIn(Registering).GotEvent(PlayersReady).GoTo(Playing), + fsm.WhenIn(Registering).GotEvent(PlayersReady).GoTo(Playing).WithTimeout(computationTimeout), fsm.WhenIn(Playing).GotEvent(PlayerFinishedWithSuccess).GoTo(PlayerFinishedWithSuccess), fsm.WhenIn(Playing).GotEvent(PlayingError).GoTo(PlayerFinishedWithError), fsm.WhenInAnyState().GotEvent(GameError).GoTo(PlayerFinishedWithError), fsm.WhenInAnyState().GotEvent(PlayerDone).GoTo(PlayerDone), + fsm.WhenInAnyState().GotEvent(StateTimeoutError).GoTo(PlayerFinishedWithError), } callbacks, transitions := fsm.InitCallbacksAndTransitions(cbs, trs) - f, err := fsm.NewFSM(ctx, "Init", transitions, callbacks, timeout, logger) + f, err := fsm.NewFSM(ctx, "Init", transitions, callbacks, stateTimeout, logger) // We can only update publisher's FSM after fsm is created. call.pb.Fsm = f if err != nil { @@ -74,6 +79,7 @@ func NewPlayer(ctx context.Context, bus mb.MessageBus, timeout time.Duration, me me: me, params: playerParams, call: call, + errCh: errCh, logger: logger, ctx: ctx, }, nil @@ -81,7 +87,7 @@ func NewPlayer(ctx context.Context, bus mb.MessageBus, timeout time.Duration, me // AbstractPlayer is an interface of a player. type AbstractPlayer interface { - Init(errCh chan error) + Init() Stop() History() *fsm.History Bus() mb.MessageBus @@ -102,13 +108,13 @@ type Player1 struct { me MPCEngine params *PlayerParams call *Callbacker + errCh chan error ctx context.Context } // Init starts FSM and triggers the registration of the player. -func (p *Player1) Init(errCh chan error) { - go p.fsm.Run(errCh) - time.Sleep(500 * time.Millisecond) +func (p *Player1) Init() { + go p.fsm.Run(p.errCh) p.call.sendEvent(Register, p.name, struct{}{}) } @@ -134,10 +140,11 @@ func (p *Player1) PublishEvent(name, topic string, event *pb.Event) { } // NewCallbacker returns a new instance of callbacker -func NewCallbacker(bus mb.MessageBus, playerParams *PlayerParams, logger *zap.SugaredLogger) *Callbacker { +func NewCallbacker(bus mb.MessageBus, playerParams *PlayerParams, errCh chan error, logger *zap.SugaredLogger) *Callbacker { return &Callbacker{ pb: d.NewPublisher(bus), playerParams: playerParams, + errCh: errCh, logger: logger, } } @@ -146,10 +153,11 @@ func NewCallbacker(bus mb.MessageBus, playerParams *PlayerParams, logger *zap.Su type Callbacker struct { pb *d.Publisher playerParams *PlayerParams + errCh chan error logger *zap.SugaredLogger } -// registration forwards registeration request to the discovery service. +// registration forwards registration request to the discovery service. func (c *Callbacker) registration() func(e interface{}) error { return func(e interface{}) error { c.sendEvent(Register, DiscoveryTopic, e) @@ -165,13 +173,13 @@ func (c *Callbacker) sendPlayerReady() func(e interface{}) error { } } -// playing signals itself the state of the execution. +// playing triggers the MPC computation and signals itself the state of the execution. func (c *Callbacker) playing(id string, me MPCEngine) func(e interface{}) error { return func(e interface{}) error { ev := e.(*fsm.Event) err := me.Execute(ev.Meta.TransportMsg) if err != nil { - c.logger.Errorf("error during code execution: %v", err) + c.logger.Errorf("Error during code execution: %v", err) c.sendEvent(PlayingError, id, e) return nil } @@ -185,6 +193,21 @@ func (c *Callbacker) finishWithError(id string) func(e interface{}) error { return func(e interface{}) error { c.sendEvent(GameFinishedWithError, DiscoveryTopic, e) c.sendEvent(PlayerDone, id, e) + event := e.(*fsm.Event) + msg := fmt.Sprintf("game failed with error: %s", event.Name) + if event.Meta != nil && event.Meta.FSM != nil && event.Meta.FSM.History() != nil { + eventDetails := make([]string, len(event.Meta.FSM.History().GetEvents())) + for _, s := range event.Meta.FSM.History().GetStates() { + eventDetails = append(eventDetails, s) + } + msg = fmt.Sprintf("%s\n\tHistory: %s", msg, strings.Join(eventDetails, " -> ")) + } + err := errors.New(msg) + c.logger.Debugf("Player finished with error: %v", err) + select { + case c.errCh <- err: + default: + } return nil } } diff --git a/pkg/ephemeral/player_test.go b/pkg/ephemeral/player_test.go index 015de3c9..cabc1459 100644 --- a/pkg/ephemeral/player_test.go +++ b/pkg/ephemeral/player_test.go @@ -1,4 +1,4 @@ -// Copyright (c) 2021 - for information on the respective copyright owner +// Copyright (c) 2021-2023 - for information on the respective copyright owner // see the NOTICE file and/or the repository https://github.com/carbynestack/ephemeral. // // SPDX-License-Identifier: Apache-2.0 @@ -51,7 +51,7 @@ var _ = Describe("Player", func() { Context("when game is successful", func() { It("notifies discovery and transitions to PlayerFinishedWithSuccess", func() { client := NewFakeDiscoveryClient(bus, id) - pl, _ := NewPlayer(ctx, bus, timeout, &me, params, logger) + pl, _ := NewPlayer(ctx, bus, timeout, timeout, &me, params, errCh, logger) client.Run() Assert(GameFinishedWithSuccess, pl, done, func(states []string) { Expect(states[0]).To(Equal(Init)) @@ -59,7 +59,7 @@ var _ = Describe("Player", func() { Expect(states[2]).To(Equal("Playing")) Expect(states[3]).To(Equal("PlayerFinishedWithSuccess")) }) - pl.Init(errCh) + pl.Init() WaitDoneOrTimeout(done) }) }) @@ -67,7 +67,7 @@ var _ = Describe("Player", func() { It("transitions to the PlayerDone state", func() { client := NewFakeDiscoveryClient(bus, id) me := BrokenSPDZEngine{} - pl, _ := NewPlayer(ctx, bus, timeout, &me, params, logger) + pl, _ := NewPlayer(ctx, bus, timeout, timeout, &me, params, errCh, logger) client.Run() Assert(PlayerDone, pl, done, func(states []string) { Expect(states[0]).To(Equal(Init)) @@ -76,7 +76,7 @@ var _ = Describe("Player", func() { Expect(states[3]).To(Equal(PlayerFinishedWithError)) Expect(states[4]).To(Equal(PlayerDone)) }, ServiceEventsTopic) - pl.Init(errCh) + pl.Init() WaitDoneOrTimeout(done) }) }) @@ -85,28 +85,28 @@ var _ = Describe("Player", func() { Context("in Registering state", func() { It("transitions to the PlayerDone state", func() { client := NewFakeBrokenDiscoveryClient(bus, id, false, false) - pl, _ := NewPlayer(ctx, bus, timeout, &me, params, logger) + pl, _ := NewPlayer(ctx, bus, timeout, timeout, &me, params, errCh, logger) client.Run() Assert(PlayerDone, pl, done, func(states []string) { Expect(states[0]).To(Equal(Init)) Expect(states[1]).To(Equal(Registering)) Expect(states[2]).To(Equal(PlayerFinishedWithError)) }, ServiceEventsTopic) - pl.Init(errCh) + pl.Init() WaitDoneOrTimeout(done) }) }) Context("in WaitPlayersReady state", func() { It("transitions to the PlayerFinishedWithError state", func() { client := NewFakeBrokenDiscoveryClient(bus, id, true, false) - pl, _ := NewPlayer(ctx, bus, timeout, &me, params, logger) + pl, _ := NewPlayer(ctx, bus, timeout, timeout, &me, params, errCh, logger) client.Run() Assert(GameFinishedWithError, pl, done, func(states []string) { Expect(states[0]).To(Equal(Init)) Expect(states[1]).To(Equal(Registering)) Expect(states[2]).To(Equal(PlayerFinishedWithError)) }) - pl.Init(errCh) + pl.Init() WaitDoneOrTimeout(done) }) }) diff --git a/pkg/ephemeral/server.go b/pkg/ephemeral/server.go index b9cbc7c8..2f7bf0a8 100644 --- a/pkg/ephemeral/server.go +++ b/pkg/ephemeral/server.go @@ -1,4 +1,4 @@ -// Copyright (c) 2021 - for information on the respective copyright owner +// Copyright (c) 2021-2023 - for information on the respective copyright owner // see the NOTICE file and/or the repository https://github.com/carbynestack/ephemeral. // // SPDX-License-Identifier: Apache-2.0 @@ -10,7 +10,7 @@ import ( "encoding/base64" "encoding/json" "fmt" - "github.com/carbynestack/ephemeral/pkg/ephemeral/io" + "github.com/carbynestack/ephemeral/pkg/discovery/fsm" . "github.com/carbynestack/ephemeral/pkg/types" . "github.com/carbynestack/ephemeral/pkg/utils" "io/ioutil" @@ -36,12 +36,9 @@ const paramsMsg = "either secret params or amphora secret share UUIDs must be sp var ( // The number of parallel games that could run per container. - parallelGames = 1 - retryInterval = 100 * time.Millisecond - fsmStateTimeout = 20 * time.Second - defaultBusSize = 10000 - DoneTopic = "Done" - ctxConf = contextConf("contextConf") + parallelGames = 1 + defaultBusSize = 10000 + ctxConf = contextConf("contextConf") ) // NewServer returns a new server. @@ -155,6 +152,7 @@ func (s *Server) BodyFilter(next http.Handler) http.Handler { } con = context.WithValue(con, ctxConf, ctx) r := req.Clone(con) + s.logger.Debug("Bodyfilter handler done") next.ServeHTTP(writer, r) }) } @@ -165,9 +163,10 @@ func (s *Server) CompilationHandler(next http.Handler) http.Handler { conf, ok := req.Context().Value(ctxConf).(*CtxConfig) if !ok { writer.WriteHeader(http.StatusBadRequest) - s.logger.Error("no context config provided") + s.logger.Error("No context config provided") return } + s.logger.Debugf("Executing Compilation Handler: %v", conf.Act) // These channels initialized here, because they must be unique // for each incoming request. s.respCh = make(chan []byte) @@ -186,7 +185,7 @@ func (s *Server) CompilationHandler(next http.Handler) http.Handler { return } if compile { - s.logger.Infow("Compiling the application.", GameID, conf.Act.GameID) + s.logger.Infow("Compiling the application", GameID, conf.Act.GameID) err := s.compile(conf) if err != nil { msg := fmt.Sprintf("error compiling the code: %s\n", err) @@ -198,6 +197,7 @@ func (s *Server) CompilationHandler(next http.Handler) http.Handler { s.logger.Debugw("Finished compiling the application", GameID, conf.Act.GameID) } } + s.logger.Debug("Compilation handler done") next.ServeHTTP(writer, req) }) } @@ -206,31 +206,29 @@ func (s *Server) CompilationHandler(next http.Handler) http.Handler { func (s *Server) ActivationHandler(writer http.ResponseWriter, req *http.Request) { ctx := req.Context() ctxConfig := ctx.Value(ctxConf).(*CtxConfig) - activationContext := context.Background() - con, cancel := context.WithTimeout(activationContext, ctxConfig.Spdz.RetryTimeout) + con, cancel := context.WithTimeout(ctx, ctxConfig.Spdz.StateTimeout*3+ctxConfig.Spdz.ComputationTimeout) + defer cancel() + deadline, _ := con.Deadline() + s.logger.Debugw("Created Activation context", "Context", con, "Deadline", deadline) ctxConfig.Context = con pod, err := s.getPodName() if err != nil { writer.WriteHeader(http.StatusInternalServerError) - s.logger.Errorw(fmt.Sprintf("error retrieving pod name: %s", err), GameID, ctxConfig.Act.GameID) + s.logger.Errorw(fmt.Sprintf("Error retrieving pod name: %s", err), GameID, ctxConfig.Act.GameID) } s.logger.Debugf("Retrieved pod name %v", pod) spdz := NewSPDZWrapper(ctxConfig, s.respCh, s.execErrCh, s.logger, s.activate) - conf := &io.Config{ - Host: s.config.DiscoveryAddress, - Port: "8080", - } plIO := s.getPlayer(func() AbstractPlayerWithIO { - pl, err := NewPlayerWithIO(ctxConfig, conf, pod, spdz, s.errCh, s.logger) + pl, err := NewPlayerWithIO(ctxConfig, &s.config.DiscoveryConfig, pod, spdz, s.config.StateTimeout, s.config.ComputationTimeout, s.errCh, s.logger) if err != nil { - cancel() - s.logger.Error(err) + s.logger.Errorf("Failed to initialize Player: %v", err) } return pl }) plIO.Start() + select { case stdout := <-s.respCh: writer.WriteHeader(http.StatusOK) @@ -245,13 +243,13 @@ func (s *Server) ActivationHandler(writer http.ResponseWriter, req *http.Request writer.WriteHeader(http.StatusInternalServerError) writer.Write([]byte(msg)) s.logger.Errorw(msg, GameID, ctxConfig.Act.GameID) - case <-time.After(ctxConfig.Spdz.RetryTimeout): - msg := fmt.Sprintf("timeout during MPC execution") + case <-con.Done(): + msg := fmt.Sprintf("timeout during activation procedure") writer.WriteHeader(http.StatusInternalServerError) writer.Write([]byte(msg)) - s.logger.Errorw(msg, GameID, ctxConfig.Act.GameID) + s.logger.Errorw(msg, GameID, ctxConfig.Act.GameID, "FSM History", plIO.History()) } - cancel() + s.logger.Debug("Activation finalized") } // getPlayer is main purpose to test activation handler using a custom PlayerWithIO @@ -267,10 +265,11 @@ func (s *Server) getPlayer(initializer func() AbstractPlayerWithIO) AbstractPlay // AbstractPlayerWithIO is an interface type for a PlayerWithIO. type AbstractPlayerWithIO interface { Start() + History() *fsm.History } // NewPlayerWithIO returns a new instance of PlayerWithIO. -func NewPlayerWithIO(ctx *CtxConfig, conf *io.Config, pod string, spdz MPCEngine, errCh chan error, logger *zap.SugaredLogger) (*PlayerWithIO, error) { +func NewPlayerWithIO(ctx *CtxConfig, dcConf *DiscoveryClientTypedConfig, pod string, spdz MPCEngine, stateTimeout time.Duration, computationTimeout time.Duration, errCh chan error, logger *zap.SugaredLogger) (*PlayerWithIO, error) { bus := mb.New(defaultBusSize) name := NewTopicFromPlayerID(ctx) @@ -283,7 +282,7 @@ func NewPlayerWithIO(ctx *CtxConfig, conf *io.Config, pod string, spdz MPCEngine GameID: ctx.Act.GameID, Name: name, } - pl, _ := NewPlayer(ctx.Context, bus, fsmStateTimeout, spdz, params, logger) + pl, _ := NewPlayer(ctx.Context, bus, stateTimeout, computationTimeout, spdz, params, errCh, logger) wires := &Wires{ In: make(chan *pb.Event, 1), @@ -300,7 +299,7 @@ func NewPlayerWithIO(ctx *CtxConfig, conf *io.Config, pod string, spdz MPCEngine } forwarder := NewForwarder(fConf) - cl, err := NewTransportClientFromDiverseConfigs(conf, ctx, fsmStateTimeout, logger, wires) + cl, err := NewTransportClientFromDiverseConfigs(dcConf, ctx, logger, wires) if err != nil { return nil, err } @@ -331,7 +330,12 @@ func (p *PlayerWithIO) Start() { } dc := pb.NewDiscoveryClient(conn) go p.Client.Run(dc) - p.Player.Init(p.Wires.Err) + p.Player.Init() +} + +// History returns the [fsm.History] of the game's statemachine. +func (p *PlayerWithIO) History() *fsm.History { + return p.Player.History() } func (s *Server) getPodName() (string, error) { @@ -378,18 +382,18 @@ type Wires struct { } // NewTransportClientFromDiverseConfigs returns a new transport client. -func NewTransportClientFromDiverseConfigs(ioConfig *io.Config, ctx *CtxConfig, timeout time.Duration, logger *zap.SugaredLogger, ch *Wires) (*c.Client, error) { +func NewTransportClientFromDiverseConfigs(dcConf *DiscoveryClientTypedConfig, ctx *CtxConfig, logger *zap.SugaredLogger, ch *Wires) (*c.Client, error) { clientConf := &c.TransportClientConfig{ - In: ch.In, - Out: ch.Out, - ErrCh: ch.Err, - Host: ioConfig.Host, - Port: ioConfig.Port, - Logger: logger, - ConnID: ctx.Act.GameID, - EventScope: EventScopeSelf, - Timeout: timeout, - Context: ctx.Context, + In: ch.In, + Out: ch.Out, + ErrCh: ch.Err, + Host: dcConf.Host, + Port: dcConf.Port, + Logger: logger, + ConnID: ctx.Act.GameID, + EventScope: EventScopeSelf, + ConnectTimeout: dcConf.ConnectTimeout, + Context: ctx.Context, } cl, err := c.NewClient(clientConf) if err != nil { diff --git a/pkg/ephemeral/server_test.go b/pkg/ephemeral/server_test.go index 1ca2a9cf..578ca6b6 100644 --- a/pkg/ephemeral/server_test.go +++ b/pkg/ephemeral/server_test.go @@ -1,4 +1,4 @@ -// Copyright (c) 2021 - for information on the respective copyright owner +// Copyright (c) 2021-2023 - for information on the respective copyright owner // see the NOTICE file and/or the repository https://github.com/carbynestack/ephemeral. // // SPDX-License-Identifier: Apache-2.0 @@ -9,13 +9,13 @@ import ( "context" "encoding/json" "errors" + "github.com/carbynestack/ephemeral/pkg/discovery/fsm" "time" . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" "go.uber.org/zap" - "github.com/carbynestack/ephemeral/pkg/ephemeral/io" . "github.com/carbynestack/ephemeral/pkg/types" "net/http" "net/http/httptest" @@ -27,6 +27,7 @@ var _ = Describe("Server", func() { act *Activation handler200 http.Handler rr *httptest.ResponseRecorder + config *SPDZEngineTypedConfig s *Server l *zap.SugaredLogger ) @@ -43,7 +44,12 @@ var _ = Describe("Server", func() { rr = httptest.NewRecorder() l = zap.NewNop().Sugar() - s = NewServer(func(*CtxConfig) error { return nil }, func(*CtxConfig) ([]byte, error) { return nil, nil }, l, &SPDZEngineTypedConfig{}) + config = &SPDZEngineTypedConfig{ + ComputationTimeout: 10 * time.Second, + StateTimeout: 10 * time.Second, + NetworkEstablishTimeout: 10 * time.Second, + } + s = NewServer(func(*CtxConfig) error { return nil }, func(*CtxConfig) ([]byte, error) { return nil, nil }, l, config) }) Context("when going through body filter", func() { @@ -265,7 +271,11 @@ var _ = Describe("Server", func() { GameID: gameID, }, Context: context.Background(), - Spdz: &SPDZEngineTypedConfig{}, + Spdz: &SPDZEngineTypedConfig{ + ComputationTimeout: 10 * time.Second, + StateTimeout: 10 * time.Second, + NetworkEstablishTimeout: 10 * time.Second, + }, } ctx := context.Background() ctx = context.WithValue(ctx, ctxConf, conf) @@ -306,13 +316,13 @@ var _ = Describe("Server", func() { Context("when the timeout is reached during the execution", func() { It("responds with a 500", func() { conf.Spdz = &SPDZEngineTypedConfig{ - RetryTimeout: 1 * time.Millisecond, + NetworkEstablishTimeout: 1 * time.Millisecond, } s.ActivationHandler(rr, req) code := rr.Code respBody := rr.Body.String() Expect(code).To(Equal(http.StatusInternalServerError)) - Expect(respBody).To(Equal("timeout during MPC execution")) + Expect(respBody).To(Equal("timeout during activation procedure")) }) }) }) @@ -320,16 +330,16 @@ var _ = Describe("Server", func() { }) Context("when getting the discovery client", func() { var ( - ioConf *io.Config - ctx *CtxConfig - timeout time.Duration - logger *zap.SugaredLogger - wr *Wires + dcConfig *DiscoveryClientTypedConfig + ctx *CtxConfig + logger *zap.SugaredLogger + wr *Wires ) BeforeEach(func() { - ioConf = &io.Config{ - Host: "host", - Port: "port", + dcConfig = &DiscoveryClientTypedConfig{ + Host: "host", + Port: "port", + ConnectTimeout: time.Second, } ctx = &CtxConfig{ Act: &Activation{ @@ -337,18 +347,17 @@ var _ = Describe("Server", func() { }, Context: context.TODO(), } - timeout = time.Second logger = zap.NewNop().Sugar() wr = &Wires{} }) It("succeeds when all required properties are set", func() { - cl, err := NewTransportClientFromDiverseConfigs(ioConf, ctx, timeout, logger, wr) + cl, err := NewTransportClientFromDiverseConfigs(dcConfig, ctx, logger, wr) Expect(err).NotTo(HaveOccurred()) Expect(cl).NotTo(BeNil()) }) It("returns an error when some client properties are missing", func() { - ioConf.Host = "" - cl, err := NewTransportClientFromDiverseConfigs(ioConf, ctx, timeout, logger, wr) + dcConfig.Host = "" + cl, err := NewTransportClientFromDiverseConfigs(dcConfig, ctx, logger, wr) Expect(err).To(HaveOccurred()) Expect(cl).To(BeNil()) }) @@ -399,15 +408,18 @@ var _ = Describe("Server", func() { GameID: gameID, }, } - conf := &io.Config{ - Host: "host", - Port: "port", + conf := &DiscoveryClientTypedConfig{ + Host: "host", + Port: "port", + ConnectTimeout: 0, } pod := "somePod" spdz := &SPDZWrapper{} - errCh := make(chan error) + stateTimeout := time.Second + computationTimeout := time.Second + errCh := make(chan error, 1) logger := zap.NewNop().Sugar() - pl, err := NewPlayerWithIO(ctx, conf, pod, spdz, errCh, logger) + pl, err := NewPlayerWithIO(ctx, conf, pod, spdz, stateTimeout, computationTimeout, errCh, logger) Expect(err).NotTo(HaveOccurred()) Expect(pl).NotTo(BeNil()) }) @@ -415,6 +427,23 @@ var _ = Describe("Server", func() { }) }) +var _ = Describe("PlayerWithIO", func() { + Context("when fetching state machine history", func() { + It("returns the player's state machine history", func() { + history := &fsm.History{} + history.AddEvent(&fsm.Event{ + GameID: "71b2a100-f3f6-11e9-81b4-2a2ae2dbcce4", + Name: "DummyEvent", + }) + fakePlayer := &FakePlayer{ + history: history, + } + p := &PlayerWithIO{Player: fakePlayer} + Expect(p.History()).To(Equal(history)) + }) + }) +}) + type FakePlayerWithIO struct { respCh chan []byte errCh chan error @@ -424,6 +453,10 @@ func (f *FakePlayerWithIO) Start() { return } +func (f *FakePlayerWithIO) History() *fsm.History { + return nil +} + func requestWithContext(path string, act *Activation) *http.Request { body, _ := json.Marshal(&act) req, _ := http.NewRequest("POST", path, bytes.NewReader(body)) diff --git a/pkg/ephemeral/spdz.go b/pkg/ephemeral/spdz.go index 82067b9e..0949c34f 100644 --- a/pkg/ephemeral/spdz.go +++ b/pkg/ephemeral/spdz.go @@ -1,4 +1,4 @@ -// Copyright (c) 2021 - for information on the respective copyright owner +// Copyright (c) 2021-2023 - for information on the respective copyright owner // see the NOTICE file and/or the repository https://github.com/carbynestack/ephemeral. // // SPDX-License-Identifier: Apache-2.0 @@ -34,7 +34,6 @@ const ( appName = "mpc-program" baseDir = "/mp-spdz" ipFile = baseDir + "/ip-file" - timeout = 20 * time.Second tcpCheckerTimeout = 50 * time.Millisecond defaultPath = baseDir + "/Programs/Source/" + appName + ".mpc" defaultSchedulePath = baseDir + "/Programs/Schedules/" + appName + ".sch" @@ -135,7 +134,7 @@ func DefaultCastorTupleStreamerFactory(l *zap.SugaredLogger, tt castor.TupleType func NewSPDZEngine(logger *zap.SugaredLogger, cmder Executor, config *SPDZEngineTypedConfig) (*SPDZEngine, error) { c := &network.TCPCheckerConf{ DialTimeout: tcpCheckerTimeout, - RetryTimeout: timeout, + RetryTimeout: config.NetworkEstablishTimeout, Logger: logger, } feeder := NewAmphoraFeeder(logger, config) @@ -165,7 +164,6 @@ type SPDZEngine struct { logger *zap.SugaredLogger cmder Executor config *SPDZEngineTypedConfig - doneCh chan struct{} checker network.NetworkChecker feeder Feeder playerDataPaths map[castor.SPDZProtocol]string @@ -179,22 +177,15 @@ type SPDZEngine struct { // Activate starts a proxy, writes an IP file, start SPDZ execution, unpacks inputs parameters, sends them to the runtime and waits for the response. func (s *SPDZEngine) Activate(ctx *CtxConfig) ([]byte, error) { - errCh := make(chan error, 1) + proxyErrCh := make(chan error, 1) act := ctx.Act - err := s.proxy.Run(ctx, errCh) + err := s.proxy.Run(ctx, proxyErrCh) + defer s.proxy.Stop() if err != nil { msg := "error starting the tcp proxy" s.logger.Errorw(msg, GameID, act.GameID) return nil, fmt.Errorf("%s: %s", msg, err) } - defer func() { - select { - case err := <-errCh: - s.logger.Errorw(err.Error(), GameID, act.GameID) - default: - s.proxy.Stop() - } - }() err = s.writeIPFile(s.ipFile, proxyAddress, ctx.Spdz.PlayerCount) if err != nil { msg := "error due to writing to the ip file" @@ -202,27 +193,37 @@ func (s *SPDZEngine) Activate(ctx *CtxConfig) ([]byte, error) { return nil, fmt.Errorf("%s: %s", msg, err) } go s.startMPC(ctx) - + defer s.feeder.Close() feedPort := s.getFeedPort() + doneCh := make(chan struct{}) + var activationResult []byte = nil + var activationErr error = nil go func() { - select { - case <-ctx.Context.Done(): - s.logger.Debug("Closing the TCP socket connection - context cancelled") - _ = s.feeder.Close() - case <-time.After(s.config.RetryTimeout): - s.logger.Debug("Closing the TCP socket connection - retry timeout exceeded") - _ = s.feeder.Close() + defer close(doneCh) + // Read the secret shares either from Amphora or from the http request. + if len(act.AmphoraParams) > 0 { + activationResult, activationErr = s.feeder.LoadFromSecretStoreAndFeed(act, feedPort, ctx) + } else if len(act.SecretParams) > 0 { + activationResult, activationErr = s.feeder.LoadFromRequestAndFeed(act, feedPort, ctx) + } else { + activationErr = errors.New("no MPC parameters specified") } }() - // Read the secret shares either from Amphora or from the http request. - if len(act.AmphoraParams) > 0 { - return s.feeder.LoadFromSecretStoreAndFeed(act, feedPort, ctx) - } - if len(act.SecretParams) > 0 { - return s.feeder.LoadFromRequestAndFeed(act, feedPort, ctx) + select { + case <-doneCh: + if activationErr == nil { + s.logger.Debugw("Activation finished successful", GameID, act.GameID) + } else { + s.logger.Errorw("Activation finished with error", GameID, act.GameID, "Error", activationErr) + } + return activationResult, activationErr + case err := <-proxyErrCh: + s.logger.Errorw("Activation finished with proxy error", GameID, act.GameID, "ProxyError", err) + return nil, err + case <-ctx.Context.Done(): + s.logger.Debug("Stopping SPDZ activation - context closed") + return nil, errors.New("SPDZ activation cancelled due to closed context") } - // The line below should be never reached, since we check activations parameters in the request handlers. However, leaving it here for completeness. - return nil, errors.New("no MPC parameters specified") } func (s *SPDZEngine) getNumberOfThreads() (int, error) { @@ -294,10 +295,10 @@ func (s *SPDZEngine) startMPC(ctx *CtxConfig) { } for _, tt := range castor.SupportedTupleTypes { for thread := 0; thread < nThreads; thread++ { - s.logger.Debugw("Creating new tuple streamer", TupleType, tt, "Config", s.config, "Player-Data", s.playerDataPaths[tt.SpdzProtocol], GameID, gameUUID, "ThreadNr", thread) + s.logger.Debugw("Creating new tuple streamer", TupleType, tt, "TupleStock", s.config.TupleStock, "Player-Data", s.playerDataPaths[tt.SpdzProtocol], GameID, gameUUID, "ThreadNr", thread) streamer, err := s.streamerFactory(s.logger, tt, s.config, s.playerDataPaths[tt.SpdzProtocol], gameUUID, thread) if err != nil { - s.logger.Errorw("error when initializing tuple streamer", GameID, ctx.Act.GameID, TupleType, tt, "Error", err) + s.logger.Errorw("Error when initializing tuple streamer", GameID, ctx.Act.GameID, TupleType, tt, "Error", err) ctx.ErrCh <- err return } @@ -306,6 +307,7 @@ func (s *SPDZEngine) startMPC(ctx *CtxConfig) { } computationFinished := make(chan struct{}) terminateStreams := make(chan struct{}) + defer close(terminateStreams) streamErrCh := make(chan error, len(castor.SupportedTupleTypes)) for _, s := range tupleStreamers { wg.Add(1) @@ -316,7 +318,7 @@ func (s *SPDZEngine) startMPC(ctx *CtxConfig) { go func() { stdout, stderr, err := s.cmder.CallCMD(ctx.Context, command, s.baseDir) if err != nil { - s.logger.Errorw("error while executing the user code", GameID, ctx.Act.GameID, "StdErr", string(stderr), "StdOut", string(stdout), "error", err) + s.logger.Errorw("Error while executing the user code", GameID, ctx.Act.GameID, "StdErr", string(stderr), "StdOut", string(stdout), "error", err) err := fmt.Errorf("error while executing the user code: %v", err) ctx.ErrCh <- err } else { @@ -331,7 +333,6 @@ func (s *SPDZEngine) startMPC(ctx *CtxConfig) { s.logger.Error(error) ctx.ErrCh <- error } - close(terminateStreams) } func (s *SPDZEngine) writeIPFile(path string, addr string, parties int32) error { diff --git a/pkg/integration/discovery_ephemeral_integration.go b/pkg/integration/discovery_ephemeral_integration.go index c9f97ba3..e11d6293 100644 --- a/pkg/integration/discovery_ephemeral_integration.go +++ b/pkg/integration/discovery_ephemeral_integration.go @@ -1,4 +1,4 @@ -// Copyright (c) 2021 - for information on the respective copyright owner +// Copyright (c) 2021-2023 - for information on the respective copyright owner // see the NOTICE file and/or the repository https://github.com/carbynestack/ephemeral. // // SPDX-License-Identifier: Apache-2.0 @@ -11,7 +11,6 @@ import ( pb "github.com/carbynestack/ephemeral/pkg/discovery/transport/proto" "github.com/carbynestack/ephemeral/pkg/discovery/transport/server" p "github.com/carbynestack/ephemeral/pkg/ephemeral" - "github.com/carbynestack/ephemeral/pkg/ephemeral/io" . "github.com/carbynestack/ephemeral/pkg/types" "time" @@ -33,9 +32,10 @@ func generateEphemeralIntegrationTestsWithPlayerCount(playerCount int) { Context("when connecting ephemeral to discovery", func() { It("finishes the game successfully", func() { port := "8080" - conf := &io.Config{ - Host: "localhost", - Port: port, + conf := &DiscoveryClientTypedConfig{ + Host: "localhost", + Port: port, + ConnectTimeout: 2 * time.Second, } logger := zap.NewNop().Sugar() doneCh := make(chan struct{}) @@ -43,9 +43,9 @@ func generateEphemeralIntegrationTestsWithPlayerCount(playerCount int) { doneCh: doneCh, } bus := mb.New(10000) - in := make(chan *pb.Event) - out := make(chan *pb.Event) - errCh := make(chan error) + in := make(chan *pb.Event, 1) + out := make(chan *pb.Event, 1) + errCh := make(chan error, playerCount) serverConf := &server.TransportConfig{ In: in, Out: out, @@ -56,11 +56,12 @@ func generateEphemeralIntegrationTestsWithPlayerCount(playerCount int) { tr := server.NewTransportServer(serverConf) pb := discovery.NewPublisher(bus) stateTimeout := 10 * time.Second + computationTimeout := 20 * time.Second n := &discovery.FakeNetworker{ FreePorts: []int32{30000, 30001, 30002, 30003, 30004, 30005}, } cl := &discovery.FakeDClient{} - s := discovery.NewServiceNG(bus, pb, stateTimeout, tr, n, frontendAddress, logger, ModeMaster, cl, playerCount) + s := discovery.NewServiceNG(bus, pb, stateTimeout, computationTimeout, tr, n, frontendAddress, logger, ModeMaster, cl, playerCount) defer s.Stop() go s.Start() s.WaitUntilReady(5 * time.Second) @@ -78,7 +79,7 @@ func generateEphemeralIntegrationTestsWithPlayerCount(playerCount int) { Context: context.TODO(), } pod := fmt.Sprintf("abc%d", i) - player, err := p.NewPlayerWithIO(ctxConf, conf, pod, spdz, errCh, logger) + player, err := p.NewPlayerWithIO(ctxConf, conf, pod, spdz, stateTimeout, computationTimeout, errCh, logger) Expect(err).NotTo(HaveOccurred()) players[i] = player } diff --git a/pkg/integration/discovery_master_slave_integration.go b/pkg/integration/discovery_master_slave_integration.go index c22947b6..62cb15c5 100644 --- a/pkg/integration/discovery_master_slave_integration.go +++ b/pkg/integration/discovery_master_slave_integration.go @@ -1,4 +1,4 @@ -// Copyright (c) 2021 - for information on the respective copyright owner +// Copyright (c) 2021-2023 - for information on the respective copyright owner // see the NOTICE file and/or the repository https://github.com/carbynestack/ephemeral. // // SPDX-License-Identifier: Apache-2.0 @@ -87,6 +87,7 @@ func getDiscovery(port string, logger *zap.SugaredLogger, bus mb.MessageBus, fro tr := server.NewTransportServer(serverConf) pb := d.NewPublisher(bus) stateTimeout := 10 * time.Second + connectTimeout := 10 * time.Second n := &d.FakeNetworker{ FreePorts: []int32{30000, 30001, 30002}, } @@ -94,19 +95,19 @@ func getDiscovery(port string, logger *zap.SugaredLogger, bus mb.MessageBus, fro outClient := make(chan *proto.Event) clientConf := &c.TransportClientConfig{ - In: inClient, - Out: outClient, - ErrCh: errCh, - Host: "localhost", - Port: "8081", - Logger: logger, - ConnID: "abc", - EventScope: EventScopeAll, - Timeout: 10 * time.Second, - Context: context.TODO(), + In: inClient, + Out: outClient, + ErrCh: errCh, + Host: "localhost", + Port: "8081", + Logger: logger, + ConnID: "abc", + EventScope: EventScopeAll, + ConnectTimeout: 10 * time.Second, + Context: context.TODO(), } cl, _ := c.NewClient(clientConf) playerCount := 2 - s := d.NewServiceNG(bus, pb, stateTimeout, tr, n, frontend, logger, mode, cl, playerCount) + s := d.NewServiceNG(bus, pb, stateTimeout, connectTimeout, tr, n, frontend, logger, mode, cl, playerCount) return s } diff --git a/pkg/network-controller/controller/network/network_controller.go b/pkg/network-controller/controller/network/network_controller.go index 26f8cbbe..d09232fd 100644 --- a/pkg/network-controller/controller/network/network_controller.go +++ b/pkg/network-controller/controller/network/network_controller.go @@ -9,6 +9,7 @@ package network import ( "context" + "fmt" mpcv1alpha1 "github.com/carbynestack/ephemeral/pkg/network-controller/apis/mpc/v1alpha1" clientset "github.com/knative/pkg/client/clientset/versioned" corev1 "k8s.io/api/core/v1" @@ -29,7 +30,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/source" ) -var log = logf.Log.WithName("controller_network") +var log = logf.ZapLogger(true).WithName("controller_network") var istioGW = "test" @@ -177,7 +178,7 @@ func (r *ReconcileNetwork) Reconcile(request reconcile.Request) (reconcile.Resul gatewayName := gatewayName(instance.Name) _, err = r.sharedClientSet.NetworkingV1alpha3().Gateways(request.Namespace).Get(gatewayName, metav1.GetOptions{}) if err != nil && errors.IsNotFound(err) { - reqLogger.Info("Creating a new gateway ", gatewayName) + reqLogger.Info(fmt.Sprintf("Creating a new gateway \"%s\"", gatewayName)) _, err := r.sharedClientSet.NetworkingV1alpha3().Gateways(request.Namespace).Create(gw) if err != nil { reqLogger.Error(err, "not able to create the gateway") diff --git a/pkg/types/consts.go b/pkg/types/consts.go index 805f23d2..5f04d43f 100644 --- a/pkg/types/consts.go +++ b/pkg/types/consts.go @@ -1,11 +1,9 @@ -// Copyright (c) 2021 - for information on the respective copyright owner +// Copyright (c) 2021-2023 - for information on the respective copyright owner // see the NOTICE file and/or the repository https://github.com/carbynestack/ephemeral. // // SPDX-License-Identifier: Apache-2.0 package types -import "time" - const ( // DiscoveryServiceStarted indicates the discovery service has started and ready to processIn events. DiscoveryServiceStarted = "DiscoveryServiceStarted" @@ -17,8 +15,6 @@ const ( ClientOutgoingEventsTopic = "clientOutgoingEvents" MasterOutgoingEventsTopic = "masterOutgoingEvents" DiscoveryTopic = "discovery" - // TODO: read this param from the config. - Timeout = 20 * time.Second Init = "Init" Registering = "Registering" diff --git a/pkg/types/types.go b/pkg/types/types.go index 475c9d32..03af0659 100644 --- a/pkg/types/types.go +++ b/pkg/types/types.go @@ -1,4 +1,4 @@ -// Copyright (c) 2021 - for information on the respective copyright owner +// Copyright (c) 2021-2023 - for information on the respective copyright owner // see the NOTICE file and/or the repository https://github.com/carbynestack/ephemeral. // // SPDX-License-Identifier: Apache-2.0 @@ -29,17 +29,34 @@ type DiscoveryClient interface { GetOut() chan *pb.Event } -// DiscoveryConfig represents the condig of discovery service. +// DiscoveryConfig represents the config of discovery service. type DiscoveryConfig struct { - FrontendURL string `json:"frontendURL"` - MasterHost string `json:"masterHost"` - MasterPort string `json:"masterPort"` - Slave bool `json:"slave"` - StateTimeout string `json:"stateTimeout"` - Port string `json:"port"` - BusSize int `json:"busSize"` - PortRange string `json:"portRange"` - PlayerCount int `json:"playerCount"` + FrontendURL string `json:"frontendURL"` + MasterHost string `json:"masterHost"` + MasterPort string `json:"masterPort"` + Slave bool `json:"slave"` + StateTimeout string `json:"stateTimeout"` + ComputationTimeout string `json:"computationTimeout"` + ConnectTimeout string `json:"connectTimeout"` + Port string `json:"port"` + BusSize int `json:"busSize"` + PortRange string `json:"portRange"` + PlayerCount int `json:"playerCount"` +} + +// DiscoveryTypedConfig reflects DiscoveryConfig, but it contains the real property types +type DiscoveryTypedConfig struct { + FrontendURL string + MasterHost string + MasterPort string + Slave bool + StateTimeout time.Duration + ComputationTimeout time.Duration + ConnectTimeout time.Duration + Port string + BusSize int + PortRange string + PlayerCount int } // Activation is an object that is received as an input from the Ephemeral client. @@ -69,24 +86,26 @@ type CtxConfig struct { // SPDZEngineConfig is the VPC specific configuration. type SPDZEngineConfig struct { - RetrySleep string `json:"retrySleep"` - RetryTimeout string `json:"retryTimeout"` - Prime string `json:"prime"` - RInv string `json:"rInv"` - GfpMacKey string `json:"gfpMacKey"` - Gf2nMacKey string `json:"gf2nMacKey"` - Gf2nBitLength int32 `json:"gf2nBitLength"` + RetrySleep string `json:"retrySleep"` + NetworkEstablishTimeout string `json:"networkEstablishTimeout"` + Prime string `json:"prime"` + RInv string `json:"rInv"` + GfpMacKey string `json:"gfpMacKey"` + Gf2nMacKey string `json:"gf2nMacKey"` + Gf2nBitLength int32 `json:"gf2nBitLength"` // Gf2nStorageSize represents the size in bytes for each gf2n element e.g. depending on the 'USE_GF2N_LONG' flag // being set when compiling SPDZ where storage size is 16 for USE_GF2N_LONG=1, or 8 if set to 0 - Gf2nStorageSize int32 `json:"gf2nStorageSize"` - PrepFolder string `json:"prepFolder"` - AmphoraConfig AmphoraConfig `json:"amphoraConfig"` - CastorConfig CastorConfig `json:"castorConfig"` - FrontendURL string `json:"frontendURL"` - PlayerID int32 `json:"playerID"` - PlayerCount int32 `json:"playerCount"` - MaxBulkSize int32 `json:"maxBulkSize"` - DiscoveryAddress string `json:"discoveryAddress"` + Gf2nStorageSize int32 `json:"gf2nStorageSize"` + PrepFolder string `json:"prepFolder"` + AmphoraConfig AmphoraConfig `json:"amphoraConfig"` + CastorConfig CastorConfig `json:"castorConfig"` + FrontendURL string `json:"frontendURL"` + PlayerID int32 `json:"playerID"` + PlayerCount int32 `json:"playerCount"` + MaxBulkSize int32 `json:"maxBulkSize"` + DiscoveryConfig DiscoveryClientConfig `json:"discoveryConfig"` + StateTimeout string `json:"stateTimeout"` + ComputationTimeout string `json:"computationTimeout"` } // AmphoraConfig specifies the amphora host parameters. @@ -104,6 +123,20 @@ type CastorConfig struct { TupleStock int32 `json:"tupleStock"` } +// Config contains TCP connection properties of Carrier. +type DiscoveryClientConfig struct { + Port string `json:"port"` + Host string `json:"host"` + ConnectTimeout string `json:"connectTimeout"` +} + +// DiscoveryClientTypedConfig reflects DiscoveryClientConfig, but it contains the real property types. +type DiscoveryClientTypedConfig struct { + Port string + Host string + ConnectTimeout time.Duration +} + // OutputConfig defines how the output of the app execution is treated. type OutputConfig struct { Type string `json:"type"` @@ -112,29 +145,23 @@ type OutputConfig struct { // SPDZEngineTypedConfig reflects SPDZEngineConfig, but it contains the real property types. // We need this type, since the default json decoder doesn't know how to deserialize big.Int. type SPDZEngineTypedConfig struct { - RetrySleep time.Duration - RetryTimeout time.Duration - Prime big.Int - RInv big.Int - GfpMacKey big.Int - Gf2nMacKey string - Gf2nBitLength int32 - Gf2nStorageSize int32 - PrepFolder string - AmphoraClient amphora.AbstractClient - CastorClient castor.AbstractClient - TupleStock int32 - PlayerID int32 - PlayerCount int32 - FrontendURL string - MaxBulkSize int32 - DiscoveryAddress string + RetrySleep time.Duration + NetworkEstablishTimeout time.Duration + Prime big.Int + RInv big.Int + GfpMacKey big.Int + Gf2nMacKey string + Gf2nBitLength int32 + Gf2nStorageSize int32 + PrepFolder string + AmphoraClient amphora.AbstractClient + CastorClient castor.AbstractClient + TupleStock int32 + PlayerID int32 + PlayerCount int32 + FrontendURL string + MaxBulkSize int32 + DiscoveryConfig DiscoveryClientTypedConfig + StateTimeout time.Duration + ComputationTimeout time.Duration } - -type contextKey string - -var ( - ActCtx = contextKey("activation") - SpdzCtx = contextKey("spdz") - ProxyCtx = contextKey("proxy") -)