diff --git a/internal/inventory/events.go b/internal/inventory/events.go index f305fc4a..77457c94 100644 --- a/internal/inventory/events.go +++ b/internal/inventory/events.go @@ -85,8 +85,6 @@ func (e HostUpdated) validateHostUpdated() error { // Handle the HostUpdate event func (e HostUpdated) Handle(ctx context.Context) error { - slog.Debug("HostUpdate", "HostId", e.HostId, "ProjectId", e.ProjectId, "Labels", e.Labels) - // Validate all fields if err := e.validateHostUpdated(); err != nil { return err diff --git a/internal/inventory/grpc.go b/internal/inventory/grpc.go index d3542961..92afdb83 100644 --- a/internal/inventory/grpc.go +++ b/internal/inventory/grpc.go @@ -195,7 +195,6 @@ func (c *InventoryClient) WatchHosts(hostEvents chan<- events.Event) { } case inventoryv1.SubscribeEventsResponse_EVENT_KIND_UPDATED: - slog.Debug("host updated event", "name", host.Name, "hostid", host.ResourceId) l, err := JsonStringToMap(host.Metadata) if err != nil { diff --git a/internal/k8s/client.go b/internal/k8s/client.go index f766627a..dbfc440e 100644 --- a/internal/k8s/client.go +++ b/internal/k8s/client.go @@ -458,6 +458,15 @@ func (cli *Client) CreateMachineBinding(ctx context.Context, namespace string, b return err } +// DeleteMachineBinding deletes the machine binding object with the given name in the given namespace +func (cli *Client) DeleteMachineBinding(ctx context.Context, namespace, bindingName string) error { + deletePolicy := metav1.DeletePropagationForeground + deleteOptions := metav1.DeleteOptions{ + PropagationPolicy: &deletePolicy, + } + return cli.Dyn.Resource(bindingsResourceSchema).Namespace(namespace).Delete(ctx, bindingName, deleteOptions) +} + // IntelMachines returns all IntelMachine objects in the given namespace for the given cluster func (cli *Client) IntelMachines(ctx context.Context, namespace, clusterName string) ([]intelProvider.IntelMachine, error) { return providerMachines[intelProvider.IntelMachine](ctx, cli, namespace, clusterName, IntelMachineResourceSchema) diff --git a/internal/rest/postv2clusters.go b/internal/rest/postv2clusters.go index c5057c94..a88cb3cb 100644 --- a/internal/rest/postv2clusters.go +++ b/internal/rest/postv2clusters.go @@ -94,29 +94,39 @@ func (s *Server) PostV2Clusters(ctx context.Context, request api.PostV2ClustersR return api.PostV2Clusters400JSONResponse{N400BadRequestJSONResponse: api.N400BadRequestJSONResponse{Message: &msg}}, nil } - // create cluster - slog.Debug("creating cluster", "namespace", namespace) - createdClusterName, err := s.createCluster(ctx, cli, namespace, clusterName, template, nodes, clusterLabels) - if err != nil { - slog.Error("failed to create cluster", "namespace", namespace, "name", clusterName, "error", err) - return api.PostV2Clusters500JSONResponse{ - N500InternalServerErrorJSONResponse: api.N500InternalServerErrorJSONResponse{ - Message: ptr(fmt.Sprintf("failed to create cluster: %v", err)), - }, - }, nil - } - // create machine binding for Intel infra provider if api.TemplateInfoInfraprovidertype(template.Spec.InfraProviderType) == api.Intel { + slog.Debug("creating machine bindings for Intel infra provider", "namespace", namespace, "clusterName", clusterName, "templateName", template.Name) err := createBindings(ctx, cli, namespace, clusterName, template.Name, nodes) if err != nil { msg := fmt.Sprintf("failed to create machine bindings: %v", err) slog.Error(msg) return api.PostV2Clusters500JSONResponse{N500InternalServerErrorJSONResponse: api.N500InternalServerErrorJSONResponse{Message: &msg}}, nil } + slog.Info("Machine bindings created for Intel infra provider", "namespace", namespace, "clusterName", clusterName, "templateName", template.Name) } - slog.Info("Cluster created", "namespace", namespace, "name", createdClusterName) + // create cluster + slog.Debug("creating cluster", "namespace", namespace, "name", clusterName, "template", template.Name, "nodes", nodes, "labels", clusterLabels) + createdClusterName, err := s.createCluster(ctx, cli, namespace, clusterName, template, nodes, clusterLabels) + if err != nil { + slog.Error("failed to create cluster", "namespace", namespace, "name", clusterName, "error", err) + // rolback machine bindings if cluster creation fails + if api.TemplateInfoInfraprovidertype(template.Spec.InfraProviderType) == api.Intel { + slog.Warn("rolling back machine bindings due to cluster creation failure", "namespace", namespace, "clusterName", clusterName) + if rollbackErr := deleteBindings(ctx, cli, namespace, clusterName, nodes); rollbackErr != nil { + slog.Error("failed to rollback machine bindings", "namespace", namespace, "clusterName", clusterName, "error", rollbackErr) + } else { + slog.Info("machine bindings rolled back successfully", "namespace", namespace, "clusterName", clusterName) + } + } + return api.PostV2Clusters500JSONResponse{ + N500InternalServerErrorJSONResponse: api.N500InternalServerErrorJSONResponse{ + Message: ptr(fmt.Sprintf("failed to create cluster: %v", err)), + }, + }, nil + } + slog.Info("cluster created successfully", "namespace", namespace, "name", createdClusterName) return api.PostV2Clusters201JSONResponse(fmt.Sprintf("successfully created cluster %s", createdClusterName)), nil } @@ -220,6 +230,16 @@ func createBindings(ctx context.Context, cli *k8s.Client, namespace, clusterName return nil } +func deleteBindings(ctx context.Context, cli *k8s.Client, namespace, clusterName string, nodes []api.NodeSpec) error { + for _, node := range nodes { + bindingName := fmt.Sprintf("%s-%s", clusterName, node.Id) + if err := cli.DeleteMachineBinding(ctx, namespace, bindingName); err != nil { + return fmt.Errorf("failed to delete machine binding %s: %w", bindingName, err) + } + } + return nil +} + func (s *Server) enableAirGapInstall(ctx context.Context, cli *k8s.Client, namespace, clusterName string, template ct.ClusterTemplate) (bool, error) { // Fetch the cluster template clusterTemplate, err := cli.GetClusterTemplate(ctx, namespace, template.Name)