diff --git a/dev/provision-netglue.sh b/dev/provision-netglue.sh index e3c19dfe..bde019c3 100644 --- a/dev/provision-netglue.sh +++ b/dev/provision-netglue.sh @@ -64,6 +64,9 @@ rm -f crictl-$VERSION-linux-$ARCH.tar.gz cp ./10-ignore.link /etc/systemd/network/10-ignore.link systemctl restart systemd-networkd +groupadd platformd -g 9012 +useradd platformd -u 9012 -g 9012 + crictl pull docker.io/nginx:stable-alpine-slim pod=$(crictl -t 1m runp pod.json) ctr=$(crictl -t 1m create $pod ctr.json pod.json) diff --git a/platformd/server.go b/platformd/server.go index c9a0becc..dfa457de 100644 --- a/platformd/server.go +++ b/platformd/server.go @@ -9,6 +9,7 @@ import ( "net/netip" "os" "path/filepath" + "sync" "time" "github.com/envoyproxy/go-control-plane/pkg/cache/v3" @@ -201,7 +202,54 @@ func (s *Server) Run(ctx context.Context, cfg Config) error { return fmt.Errorf("create checkpoint location dir: %w", err) } - // before we start our grpc services make sure our system workloads are running + var ( + g multierror.Group + wg sync.WaitGroup + ) + + g.Go(func() error { + checkSock, err := net.Listen("tcp", cfg.CheckpointConfig.ListenAddr) + if err != nil { + s.stopCh <- struct{}{} + return fmt.Errorf("failed to listen on unix socket: %v", err) + } + + if err := checkGRPCServer.Serve(checkSock); err != nil { + s.stopCh <- struct{}{} + return fmt.Errorf("failed to serve mgmt server: %w", err) + } + return nil + }) + + wg.Add(1) + + g.Go(func() error { + unixSock, err := net.Listen("unix", cfg.ManagementServerListenSock) + if err != nil { + s.stopCh <- struct{}{} + return fmt.Errorf("failed to listen on unix socket: %v", err) + } + + if err := os.Chown( + cfg.ManagementServerListenSock, + int(cfg.ManagementSocketUID), + int(cfg.ManagementSocketGID), + ); err != nil { + s.stopCh <- struct{}{} + return fmt.Errorf("failed to chown mgmt server socket: %w", err) + } + + wg.Done() + + if err := mgmtServer.Serve(unixSock); err != nil { + s.stopCh <- struct{}{} + return fmt.Errorf("failed to serve mgmt server: %w", err) + } + return nil + }) + + s.logger.Info("waiting for sockets") + wg.Wait() if err := criSvc.EnsurePod(ctx, cri.RunOptions{ PodConfig: &runtimev1.PodSandboxConfig{ @@ -232,6 +280,20 @@ func (s *Server) Run(ctx context.Context, cfg Config) error { HostPath: "/etc/platformd/proxy.conf", ContainerPath: "/etc/envoy/config.yaml", }, + { + HostPath: cfg.ManagementServerListenSock, + ContainerPath: cfg.ManagementServerListenSock, + }, + }, + Linux: &runtimev1.LinuxContainerConfig{ + SecurityContext: &runtimev1.LinuxContainerSecurityContext{ + RunAsUser: &runtimev1.Int64Value{ + Value: int64(cfg.ManagementSocketUID), + }, + RunAsGroup: &runtimev1.Int64Value{ + Value: int64(cfg.ManagementSocketGID), + }, + }, }, }, }); err != nil { @@ -274,45 +336,6 @@ func (s *Server) Run(ctx context.Context, cfg Config) error { return fmt.Errorf("ensure coredns: %w", err) } - var g multierror.Group - - g.Go(func() error { - checkSock, err := net.Listen("tcp", cfg.CheckpointConfig.ListenAddr) - if err != nil { - s.stopCh <- struct{}{} - return fmt.Errorf("failed to listen on unix socket: %v", err) - } - - if err := checkGRPCServer.Serve(checkSock); err != nil { - s.stopCh <- struct{}{} - return fmt.Errorf("failed to serve mgmt server: %w", err) - } - return nil - }) - - g.Go(func() error { - unixSock, err := net.Listen("unix", cfg.ManagementServerListenSock) - if err != nil { - s.stopCh <- struct{}{} - return fmt.Errorf("failed to listen on unix socket: %v", err) - } - - if err := os.Chown( - cfg.ManagementServerListenSock, - int(cfg.ManagementSocketUID), - int(cfg.ManagementSocketGID), - ); err != nil { - s.stopCh <- struct{}{} - return fmt.Errorf("failed to chown mgmt server socket: %w", err) - } - - if err := mgmtServer.Serve(unixSock); err != nil { - s.stopCh <- struct{}{} - return fmt.Errorf("failed to serve mgmt server: %w", err) - } - return nil - }) - // start reconciler after mgmt server has been started, // because otherwise creating pending instances will // fail as netglue is not able to retrieve the allocated