Skip to content

Commit 1061839

Browse files
committed
[Feat] Support StormService pause rollout in upgrade
* Update stormservice golang client * Improve the test coverage * Refactor the API to support manual resume Signed-off-by: Jiaxin Shan <[email protected]>
1 parent 922477a commit 1061839

File tree

21 files changed

+3163
-18
lines changed

21 files changed

+3163
-18
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ AIBRIX_IMAGES := $(foreach img,$(IMAGES),$(AIBRIX_CONTAINER_REGISTRY_NAMESPACE)/
1414
IMG ?= ${AIBRIX_CONTAINER_REGISTRY_NAMESPACE}/controller-manager:${IMAGE_TAG}
1515

1616
# ENVTEST_K8S_VERSION refers to the version of kubebuilder assets to be downloaded by envtest binary.
17-
ENVTEST_K8S_VERSION = 1.29.0
17+
ENVTEST_K8S_VERSION = 1.30.0
1818

1919
# Get the currently used golang install path (in GOPATH/bin, unless GOBIN is set)
2020
ifeq (,$(shell go env GOBIN))

api/orchestration/v1alpha1/stormservice_types.go

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@ limitations under the License.
1717
package v1alpha1
1818

1919
import (
20+
"strconv"
21+
"time"
22+
2023
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2124
"k8s.io/apimachinery/pkg/util/intstr"
2225
)
@@ -119,6 +122,10 @@ type StormServiceStatus struct {
119122

120123
// The label selector information of the pods belonging to the StormService object.
121124
ScalingTargetSelector string `json:"scalingTargetSelector,omitempty"`
125+
126+
// CanaryStatus tracks the progress of canary deployments.
127+
// +optional
128+
CanaryStatus *CanaryStatus `json:"canaryStatus,omitempty"`
122129
}
123130

124131
// These are valid conditions of a stormService.
@@ -146,6 +153,10 @@ type StormServiceUpdateStrategy struct {
146153

147154
// +optional
148155
MaxSurge *intstr.IntOrString `json:"maxSurge,omitempty" protobuf:"bytes,2,opt,name=maxSurge"`
156+
157+
// Canary defines the canary deployment strategy for gradual rollouts.
158+
// +optional
159+
Canary *CanaryUpdateStrategy `json:"canary,omitempty"`
149160
}
150161

151162
// +enum
@@ -181,6 +192,137 @@ type StormServiceList struct {
181192
Items []StormService `json:"items"`
182193
}
183194

195+
// CanaryUpdateStrategy defines the canary deployment configuration
196+
type CanaryUpdateStrategy struct {
197+
// Steps defines the sequence of canary deployment steps
198+
Steps []CanaryStep `json:"steps,omitempty"`
199+
}
200+
201+
// CanaryStep defines a single step in the canary deployment process
202+
type CanaryStep struct {
203+
// SetWeight defines the percentage of traffic/replicas to route to the new version
204+
// +kubebuilder:validation:Minimum=0
205+
// +kubebuilder:validation:Maximum=100
206+
// +optional
207+
SetWeight *int32 `json:"setWeight,omitempty"`
208+
209+
// Pause defines a pause in the canary deployment
210+
// +optional
211+
Pause *PauseStep `json:"pause,omitempty"`
212+
}
213+
214+
// PauseStep defines pause behavior in canary deployments
215+
type PauseStep struct {
216+
// Duration specifies how long to pause
217+
// - String: "30s", "5m", etc. (parsed as time.Duration)
218+
// - Int: seconds as integer
219+
// - nil: manual pause requiring user intervention
220+
// Resume manual pause by setting duration to "0" or 0
221+
// +optional
222+
Duration *intstr.IntOrString `json:"duration,omitempty"`
223+
}
224+
225+
// DurationSeconds converts the pause duration to seconds
226+
// Returns:
227+
// - >= 0: pause duration in seconds
228+
// - 0: manual pause (nil duration) or resume (duration "0"/0)
229+
// - -1: invalid duration string
230+
func (p *PauseStep) DurationSeconds() int32 {
231+
if p.Duration == nil {
232+
return 0 // Manual pause
233+
}
234+
235+
if p.Duration.Type == intstr.String {
236+
// Try parsing as integer first
237+
if s, err := strconv.ParseInt(p.Duration.StrVal, 10, 32); err == nil {
238+
return int32(s)
239+
}
240+
// Try parsing as duration string
241+
if d, err := time.ParseDuration(p.Duration.StrVal); err == nil {
242+
return int32(d.Seconds())
243+
}
244+
return -1 // Invalid string
245+
}
246+
247+
return p.Duration.IntVal
248+
}
249+
250+
// IsManualPause returns true if this is a manual pause (nil duration)
251+
func (p *PauseStep) IsManualPause() bool {
252+
return p.Duration == nil
253+
}
254+
255+
// IsResume returns true if this represents a resume action (duration 0 or "0")
256+
func (p *PauseStep) IsResume() bool {
257+
if p.Duration == nil {
258+
return false
259+
}
260+
return p.DurationSeconds() == 0
261+
}
262+
263+
// CanaryStatus tracks the progress of a canary deployment
264+
type CanaryStatus struct {
265+
// CurrentStep is the index of the current step in the canary deployment
266+
// +optional
267+
CurrentStep int32 `json:"currentStep,omitempty"`
268+
269+
// CurrentWeight is the current percentage of traffic/replicas on the new version
270+
// +optional
271+
CurrentWeight int32 `json:"currentWeight,omitempty"`
272+
273+
// PausedAt indicates when the canary deployment was paused
274+
// +optional
275+
PausedAt *metav1.Time `json:"pausedAt,omitempty"`
276+
277+
// PauseConditions indicates the reasons why the canary deployment is paused
278+
// +optional
279+
PauseConditions []PauseCondition `json:"pauseConditions,omitempty"`
280+
281+
// StableRevision is the revision of the stable/old version
282+
// +optional
283+
StableRevision string `json:"stableRevision,omitempty"`
284+
285+
// CanaryRevision is the revision of the canary/new version
286+
// +optional
287+
CanaryRevision string `json:"canaryRevision,omitempty"`
288+
289+
// Phase indicates the current phase of the canary deployment
290+
// +optional
291+
Phase CanaryPhase `json:"phase,omitempty"`
292+
}
293+
294+
// CanaryPhase represents the phase of a canary deployment
295+
// +enum
296+
type CanaryPhase string
297+
298+
const (
299+
// CanaryPhaseInitializing indicates the canary deployment is starting
300+
CanaryPhaseInitializing CanaryPhase = "Initializing"
301+
// CanaryPhaseProgressing indicates the canary deployment is progressing through steps
302+
CanaryPhaseProgressing CanaryPhase = "Progressing"
303+
// CanaryPhasePaused indicates the canary deployment is paused
304+
CanaryPhasePaused CanaryPhase = "Paused"
305+
// CanaryPhaseCompleted indicates the canary deployment has completed successfully
306+
CanaryPhaseCompleted CanaryPhase = "Completed"
307+
)
308+
309+
// PauseReason represents the reason for a pause condition
310+
// +enum
311+
type PauseReason string
312+
313+
const (
314+
// PauseReasonCanaryPauseStep indicates a pause at a canary step
315+
PauseReasonCanaryPauseStep PauseReason = "CanaryPauseStep"
316+
)
317+
318+
// PauseCondition represents a pause condition in the canary deployment
319+
type PauseCondition struct {
320+
// Reason indicates why the canary deployment was paused
321+
Reason PauseReason `json:"reason"`
322+
// StartTime is when the pause condition was added
323+
StartTime metav1.Time `json:"startTime"`
324+
}
325+
184326
func init() {
185327
SchemeBuilder.Register(&StormService{}, &StormServiceList{})
186328
}

api/orchestration/v1alpha1/zz_generated.deepcopy.go

Lines changed: 119 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

config/crd/orchestration/orchestration.aibrix.ai_stormservices.yaml

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3889,6 +3889,24 @@ spec:
38893889
type: object
38903890
updateStrategy:
38913891
properties:
3892+
canary:
3893+
properties:
3894+
steps:
3895+
items:
3896+
properties:
3897+
pause:
3898+
properties:
3899+
duration:
3900+
type: string
3901+
type: object
3902+
setWeight:
3903+
format: int32
3904+
maximum: 100
3905+
minimum: 0
3906+
type: integer
3907+
type: object
3908+
type: array
3909+
type: object
38923910
maxSurge:
38933911
anyOf:
38943912
- type: integer
@@ -3912,6 +3930,24 @@ spec:
39123930
type: object
39133931
status:
39143932
properties:
3933+
canaryStatus:
3934+
properties:
3935+
canaryRevision:
3936+
type: string
3937+
currentStep:
3938+
format: int32
3939+
type: integer
3940+
currentWeight:
3941+
format: int32
3942+
type: integer
3943+
pausedAt:
3944+
format: date-time
3945+
type: string
3946+
phase:
3947+
type: string
3948+
stableRevision:
3949+
type: string
3950+
type: object
39153951
collisionCount:
39163952
format: int32
39173953
type: integer

0 commit comments

Comments
 (0)