Skip to content

Commit 7a058aa

Browse files
authored
CLOUDP-328217: Automation agent password secret (#566)
# Summary If a deployment is moved to a different project, the automation agent password will be re-generated, triggering a password change in the automation plan. This will cause a deadlock in a sharded cluster due to the multiple components requiring automation. However, it will not cause issues in replicasets. This is a blocker for migrating projects in sharded deployments. ## Proof of Work For SCRAM (the only auth mechanism who re-generates a pwd), we now save the automation agent's password in a secret. During migration, the stored secret is utilized to preserve the password ,ensuring project migration possible. ## Observed problems For LDAP (Sharded + Replica), the following tests are failing, even though the only modification made is updating the MongoDB resource's project reference, with no other changes applied. To help further investigation, I have commented out certain code in the tests (which can make them fail) so the issue can be consistently reproduced. While the deployment returns to the "running" state, the users are missing from the automation configuration. Additionally, certain flaky parts in the SHA1 and SHA256 tests, as well as the LDAP and X509 project switch tests, have been commented out/disabled due to the lack of support for project migrations. These tests should be activated once project migration functionality has been implemented. ## Checklist - [x] Have you linked a jira ticket and/or is the ticket in the title? - [x] Have you checked whether your jira ticket required DOCSP changes? - [x] Have you added changelog file? - use `skip-changelog` label if not needed - refer to [Changelog files and Release Notes](https://github.com/mongodb/mongodb-kubernetes/blob/master/CONTRIBUTING.md#changelog-files-and-release-notes) section in CONTRIBUTING.md for more details
1 parent 0b18efc commit 7a058aa

34 files changed

+1196
-49
lines changed

.evergreen-tasks.yml

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -690,6 +690,46 @@ tasks:
690690
commands:
691691
- func: "e2e_test"
692692

693+
- name: e2e_sharded_cluster_scram_sha_256_switch_project
694+
tags: [ "patch-run" ]
695+
commands:
696+
- func: "e2e_test"
697+
698+
- name: e2e_sharded_cluster_scram_sha_1_switch_project
699+
tags: [ "patch-run" ]
700+
commands:
701+
- func: "e2e_test"
702+
703+
- name: e2e_sharded_cluster_x509_switch_project
704+
tags: [ "patch-run" ]
705+
commands:
706+
- func: "e2e_test"
707+
708+
- name: e2e_replica_set_scram_sha_256_switch_project
709+
tags: [ "patch-run" ]
710+
commands:
711+
- func: "e2e_test"
712+
713+
- name: e2e_replica_set_scram_sha_1_switch_project
714+
tags: [ "patch-run" ]
715+
commands:
716+
- func: "e2e_test"
717+
718+
- name: e2e_replica_set_x509_switch_project
719+
tags: [ "patch-run" ]
720+
commands:
721+
- func: "e2e_test"
722+
723+
- name: e2e_replica_set_ldap_switch_project
724+
tags: [ "patch-run" ]
725+
commands:
726+
- func: "e2e_test"
727+
728+
- name: e2e_sharded_cluster_ldap_switch_project
729+
tags: [ "patch-run" ]
730+
commands:
731+
- func: "e2e_test"
732+
693733
# TODO: not used in any variant
694734
- name: e2e_replica_set_scram_x509_internal_cluster
695735
tags: [ "patch-run" ]

.evergreen.yml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -745,6 +745,15 @@ task_groups:
745745
- e2e_sharded_cluster_scram_sha_1_user_connectivity
746746
- e2e_sharded_cluster_scram_x509_ic_manual_certs
747747
- e2e_sharded_cluster_external_access
748+
- e2e_replica_set_scram_sha_256_switch_project
749+
- e2e_sharded_cluster_scram_sha_256_switch_project
750+
- e2e_replica_set_scram_sha_1_switch_project
751+
- e2e_sharded_cluster_scram_sha_1_switch_project
752+
# TODO CLOUDP-349093 - Disabled these tests as they don't use the password secret, and project migrations aren't fully supported yet.
753+
# e2e_sharded_cluster_x509_switch_project
754+
# e2e_replica_set_x509_switch_project
755+
# e2e_replica_set_ldap_switch_project
756+
# e2e_sharded_cluster_ldap_switch_project
748757
# e2e_auth_transitions_task_group
749758
- e2e_replica_set_scram_sha_and_x509
750759
- e2e_replica_set_x509_to_scram_transition
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
---
2+
kind: fix
3+
date: 2025-11-27
4+
---
5+
6+
* Backed up the agent password in a secret for SCRAM authentication to prevent unnecessary password rotations.

controllers/om/automation_config.go

Lines changed: 52 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,20 @@
11
package om
22

33
import (
4+
"context"
45
"encoding/json"
6+
"fmt"
57

68
"github.com/google/go-cmp/cmp"
79
"github.com/spf13/cast"
810
"k8s.io/apimachinery/pkg/api/equality"
11+
"k8s.io/apimachinery/pkg/types"
12+
"sigs.k8s.io/controller-runtime/pkg/client"
913

1014
"github.com/mongodb/mongodb-kubernetes/controllers/operator/ldap"
1115
"github.com/mongodb/mongodb-kubernetes/controllers/operator/oidc"
16+
"github.com/mongodb/mongodb-kubernetes/mongodb-community-operator/pkg/kube/secret"
17+
"github.com/mongodb/mongodb-kubernetes/mongodb-community-operator/pkg/util/constants"
1218
"github.com/mongodb/mongodb-kubernetes/pkg/util"
1319
"github.com/mongodb/mongodb-kubernetes/pkg/util/generate"
1420
"github.com/mongodb/mongodb-kubernetes/pkg/util/maputil"
@@ -426,19 +432,57 @@ func (ac *AutomationConfig) EnsureKeyFileContents() error {
426432
return nil
427433
}
428434

435+
// AuthSecretName for a given mdbName (`mdbName`) returns the name of
436+
// the secret associated with it.
437+
func AuthSecretName(mdbName string) string {
438+
return fmt.Sprintf("%s-agent-auth-secret", mdbName)
439+
}
440+
429441
// EnsurePassword makes sure that there is an Automation Agent password
430442
// that the agents will use to communicate with the deployments. The password
431443
// is returned, so it can be provided to the other agents
432-
func (ac *AutomationConfig) EnsurePassword() (string, error) {
433-
if ac.Auth.AutoPwd == "" || ac.Auth.AutoPwd == util.InvalidAutomationAgentPassword {
434-
automationAgentBackupPassword, err := generate.KeyFileContents()
435-
if err != nil {
436-
return "", err
444+
// EnsurePassword makes sure that there is an Automation Agent password
445+
// that the agents will use to communicate with the deployments. The password
446+
// is returned, so it can be provided to the other agents.
447+
func (ac *AutomationConfig) EnsurePassword(ctx context.Context, k8sClient secret.GetUpdateCreator, mdbNamespacedName types.NamespacedName) (string, error) {
448+
secretName := AuthSecretName(mdbNamespacedName.Name)
449+
secretNamespacedName := client.ObjectKey{Name: secretName, Namespace: mdbNamespacedName.Namespace}
450+
var password string
451+
452+
data, err := secret.ReadStringData(ctx, k8sClient, secretNamespacedName)
453+
if err == nil {
454+
if val, ok := data[constants.AutomationAgentAuthSecretKey]; ok && len(val) > 0 {
455+
password = val
456+
}
457+
} else if secret.SecretNotExist(err) {
458+
if ac.Auth.AutoPwd != "" && ac.Auth.AutoPwd != util.InvalidAutomationAgentPassword {
459+
password = ac.Auth.AutoPwd
460+
}
461+
}
462+
463+
if password == "" {
464+
generatedPassword, genErr := generate.KeyFileContents()
465+
if genErr != nil {
466+
return "", genErr
437467
}
438-
ac.Auth.AutoPwd = automationAgentBackupPassword
439-
return automationAgentBackupPassword, nil
468+
password = generatedPassword
469+
}
470+
471+
dataFields := map[string]string{
472+
constants.AutomationAgentAuthSecretKey: password,
473+
}
474+
475+
passwordSecret := secret.Builder().
476+
SetName(secretNamespacedName.Name).
477+
SetNamespace(secretNamespacedName.Namespace).
478+
SetStringMapToData(dataFields).
479+
Build()
480+
481+
if err := secret.CreateOrUpdateIfNeeded(ctx, k8sClient, passwordSecret); err != nil {
482+
return "", fmt.Errorf("failed to update password field in shared secret %s/%s: %w", secretNamespacedName.Namespace, secretNamespacedName.Name, err)
440483
}
441-
return ac.Auth.AutoPwd, nil
484+
ac.Auth.AutoPwd = password
485+
return password, nil
442486
}
443487

444488
func (ac *AutomationConfig) CanEnableX509ProjectAuthentication() (bool, string) {

controllers/operator/appdbreplicaset_controller.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1666,8 +1666,9 @@ func (r *ReconcileAppDbReplicaSet) tryConfigureMonitoringInOpsManager(ctx contex
16661666
AutoUser: util.AutomationAgentUserName,
16671667
AutoPEMKeyFilePath: agentCertPath,
16681668
CAFilePath: util.CAFilePathInContainer,
1669+
MongoDBResource: types.NamespacedName{Namespace: opsManager.Namespace, Name: opsManager.Name},
16691670
}
1670-
err = authentication.Configure(conn, opts, false, log)
1671+
err = authentication.Configure(ctx, r.client, conn, opts, false, log)
16711672
if err != nil {
16721673
log.Errorf("Could not set Automation Authentication options in Ops/Cloud Manager for the Application Database. "+
16731674
"Application Database is always configured with authentication enabled, but this will not be "+

controllers/operator/authentication/authentication.go

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,17 @@
11
package authentication
22

33
import (
4+
"context"
5+
46
"go.uber.org/zap"
57
"golang.org/x/xerrors"
8+
"k8s.io/apimachinery/pkg/types"
69

710
mdbv1 "github.com/mongodb/mongodb-kubernetes/api/v1/mdb"
811
"github.com/mongodb/mongodb-kubernetes/controllers/om"
912
"github.com/mongodb/mongodb-kubernetes/controllers/operator/ldap"
1013
"github.com/mongodb/mongodb-kubernetes/controllers/operator/oidc"
14+
kubernetesClient "github.com/mongodb/mongodb-kubernetes/mongodb-community-operator/pkg/kube/client"
1115
"github.com/mongodb/mongodb-kubernetes/pkg/util"
1216
)
1317

@@ -63,6 +67,8 @@ type Options struct {
6367
AutoPwd string
6468

6569
AutoLdapGroupDN string
70+
71+
MongoDBResource types.NamespacedName
6672
}
6773

6874
func Redact(o Options) Options {
@@ -82,7 +88,7 @@ type UserOptions struct {
8288

8389
// Configure will configure all the specified authentication Mechanisms. We need to ensure we wait for
8490
// the agents to reach ready state after each operation as prematurely updating the automation config can cause the agents to get stuck.
85-
func Configure(conn om.Connection, opts Options, isRecovering bool, log *zap.SugaredLogger) error {
91+
func Configure(ctx context.Context, client kubernetesClient.Client, conn om.Connection, opts Options, isRecovering bool, log *zap.SugaredLogger) error {
8692
log.Infow("ensuring correct deployment mechanisms", "ProcessNames", opts.ProcessNames, "Mechanisms", opts.Mechanisms)
8793

8894
// In case we're recovering, we can push all changes at once, because the mechanism is triggered after 20min by default.
@@ -113,7 +119,7 @@ func Configure(conn om.Connection, opts Options, isRecovering bool, log *zap.Sug
113119

114120
// once we have made sure that the deployment authentication mechanism array contains the desired auth mechanism
115121
// we can then configure the agent authentication.
116-
if err := enableAgentAuthentication(conn, opts, log); err != nil {
122+
if err := enableAgentAuthentication(ctx, client, conn, opts, log); err != nil {
117123
return xerrors.Errorf("error enabling agent authentication: %w", err)
118124
}
119125
if err := waitForReadyStateIfNeeded(); err != nil {
@@ -151,7 +157,7 @@ func Configure(conn om.Connection, opts Options, isRecovering bool, log *zap.Sug
151157

152158
// Disable disables all authentication mechanisms, and waits for the agents to reach goal state. It is still required to provide
153159
// automation agent username, password and keyfile contents to ensure a valid Automation Config.
154-
func Disable(conn om.Connection, opts Options, deleteUsers bool, log *zap.SugaredLogger) error {
160+
func Disable(ctx context.Context, client kubernetesClient.Client, conn om.Connection, opts Options, deleteUsers bool, log *zap.SugaredLogger) error {
155161
ac, err := conn.ReadAutomationConfig()
156162
if err != nil {
157163
return xerrors.Errorf("error reading automation config: %w", err)
@@ -181,7 +187,7 @@ func Disable(conn om.Connection, opts Options, deleteUsers bool, log *zap.Sugare
181187
if err := ac.EnsureKeyFileContents(); err != nil {
182188
return xerrors.Errorf("error ensuring keyfile contents: %w", err)
183189
}
184-
if _, err := ac.EnsurePassword(); err != nil {
190+
if _, err := ac.EnsurePassword(ctx, client, opts.MongoDBResource); err != nil {
185191
return xerrors.Errorf("error ensuring agent password: %w", err)
186192
}
187193

@@ -258,7 +264,7 @@ func removeUnsupportedAgentMechanisms(conn om.Connection, opts Options, log *zap
258264

259265
// enableAgentAuthentication determines which agent authentication mechanism should be configured
260266
// and enables it in Ops Manager
261-
func enableAgentAuthentication(conn om.Connection, opts Options, log *zap.SugaredLogger) error {
267+
func enableAgentAuthentication(ctx context.Context, client kubernetesClient.Client, conn om.Connection, opts Options, log *zap.SugaredLogger) error {
262268
ac, err := conn.ReadAutomationConfig()
263269
if err != nil {
264270
return xerrors.Errorf("error reading automation config: %w", err)
@@ -267,7 +273,7 @@ func enableAgentAuthentication(conn om.Connection, opts Options, log *zap.Sugare
267273
// we then configure the agent authentication for that type
268274
mechanism := convertToMechanismOrPanic(opts.AgentMechanism, ac)
269275

270-
if err := ensureAgentAuthenticationIsConfigured(conn, opts, ac, mechanism, log); err != nil {
276+
if err := ensureAgentAuthenticationIsConfigured(ctx, client, conn, opts, ac, mechanism, log); err != nil {
271277
return xerrors.Errorf("error ensuring agent authentication is configured: %w", err)
272278
}
273279

@@ -365,14 +371,14 @@ func addOrRemoveAgentClientCertificate(conn om.Connection, opts Options, log *za
365371
}
366372

367373
// ensureAgentAuthenticationIsConfigured will configure the agent authentication settings based on the desiredAgentAuthMechanism
368-
func ensureAgentAuthenticationIsConfigured(conn om.Connection, opts Options, ac *om.AutomationConfig, mechanism Mechanism, log *zap.SugaredLogger) error {
374+
func ensureAgentAuthenticationIsConfigured(ctx context.Context, client kubernetesClient.Client, conn om.Connection, opts Options, ac *om.AutomationConfig, mechanism Mechanism, log *zap.SugaredLogger) error {
369375
if mechanism.IsAgentAuthenticationConfigured(ac, opts) {
370376
log.Infof("Agent authentication mechanism %s is already configured", mechanism.GetName())
371377
return nil
372378
}
373379

374380
log.Infof("Enabling %s agent authentication", mechanism.GetName())
375-
return mechanism.EnableAgentAuthentication(conn, opts, log)
381+
return mechanism.EnableAgentAuthentication(ctx, client, conn, opts, log)
376382
}
377383

378384
// ensureDeploymentMechanisms configures the given AutomationConfig to allow deployments to

controllers/operator/authentication/authentication_mechanism.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,21 @@
11
package authentication
22

33
import (
4+
"context"
45
"slices"
56
"strings"
67

78
"go.uber.org/zap"
89
"golang.org/x/xerrors"
910

1011
"github.com/mongodb/mongodb-kubernetes/controllers/om"
12+
kubernetesClient "github.com/mongodb/mongodb-kubernetes/mongodb-community-operator/pkg/kube/client"
1113
"github.com/mongodb/mongodb-kubernetes/pkg/util"
1214
)
1315

1416
// Mechanism is an interface that needs to be implemented for any Ops Manager authentication mechanism
1517
type Mechanism interface {
16-
EnableAgentAuthentication(conn om.Connection, opts Options, log *zap.SugaredLogger) error
18+
EnableAgentAuthentication(ctx context.Context, client kubernetesClient.Client, conn om.Connection, opts Options, log *zap.SugaredLogger) error
1719
DisableAgentAuthentication(conn om.Connection, log *zap.SugaredLogger) error
1820
EnableDeploymentAuthentication(conn om.Connection, opts Options, log *zap.SugaredLogger) error
1921
DisableDeploymentAuthentication(conn om.Connection, log *zap.SugaredLogger) error

0 commit comments

Comments
 (0)