tutus-chain/pkg/core/native/canary_deployment.go

653 lines
18 KiB
Go

package native
import (
"encoding/binary"
"errors"
"github.com/tutus-one/tutus-chain/pkg/core/dao"
"github.com/tutus-one/tutus-chain/pkg/core/storage"
"github.com/tutus-one/tutus-chain/pkg/util"
)
// ARCH-004: Canary Deployment System
// Provides infrastructure for safe, gradual rollouts of new features
// with automatic rollback capabilities based on error rates and invariant violations.
// FeatureStatus represents the deployment status of a feature.
type FeatureStatus uint8
const (
// FeatureDisabled means feature is not active
FeatureDisabled FeatureStatus = iota
// FeatureCanary means feature is active for a percentage of users
FeatureCanary
// FeatureRollingOut means feature is gradually expanding
FeatureRollingOut
// FeatureEnabled means feature is fully active
FeatureEnabled
// FeatureRolledBack means feature was active but reverted
FeatureRolledBack
)
// RollbackReason identifies why a feature was rolled back.
type RollbackReason uint8
const (
RollbackReasonManual RollbackReason = iota
RollbackReasonErrorRate
RollbackReasonInvariantViolation
RollbackReasonCircuitBreaker
RollbackReasonConsensusFailure
RollbackReasonPerformance
)
// FeatureFlag represents a canary feature flag.
type FeatureFlag struct {
// Name is the unique identifier for this feature
Name string
// Description explains what this feature does
Description string
// Status is the current deployment status
Status FeatureStatus
// RolloutPercent is percentage of users who see this feature (0-100)
RolloutPercent uint8
// TargetPercent is the goal percentage for gradual rollout
TargetPercent uint8
// IncrementPercent is how much to increase per rollout step
IncrementPercent uint8
// IncrementBlocks is blocks between rollout increments
IncrementBlocks uint32
// StartBlock is when canary started
StartBlock uint32
// LastIncrementBlock is when last rollout increment happened
LastIncrementBlock uint32
// ErrorCount is errors encountered during canary
ErrorCount uint64
// SuccessCount is successful operations during canary
SuccessCount uint64
// MaxErrorRate is error rate that triggers rollback (per 10000)
MaxErrorRate uint32
// MinSuccessCount is minimum successes before considering rollout
MinSuccessCount uint64
// EnabledBy is who enabled the feature
EnabledBy util.Uint160
// RollbackReason if rolled back
RollbackReason RollbackReason
}
// FeatureMetrics tracks canary performance metrics.
type FeatureMetrics struct {
FeatureName string
TotalOperations uint64
SuccessfulOps uint64
FailedOps uint64
AverageLatency uint64 // In block units
PeakLatency uint64
InvariantChecks uint64
InvariantFailures uint64
}
// Storage prefixes for canary deployment.
const (
canaryPrefixFeature byte = 0xD0 // name -> FeatureFlag
canaryPrefixMetrics byte = 0xD1 // name -> FeatureMetrics
canaryPrefixHistory byte = 0xD2 // name + block -> status change
canaryPrefixUserEnabled byte = 0xD3 // name + address -> enabled flag
canaryPrefixGlobal byte = 0xD4 // -> GlobalCanaryState
)
// Canary deployment errors.
var (
ErrFeatureNotFound = errors.New("feature flag not found")
ErrFeatureAlreadyExists = errors.New("feature flag already exists")
ErrFeatureDisabled = errors.New("feature is disabled")
ErrRolloutInProgress = errors.New("rollout already in progress")
ErrInsufficientData = errors.New("insufficient data for rollout decision")
ErrErrorRateExceeded = errors.New("error rate exceeded threshold")
)
// Default canary settings.
const (
DefaultMaxErrorRate = 100 // 1% (per 10000)
DefaultMinSuccessCount = 100
DefaultIncrementPercent = 10
DefaultIncrementBlocks = 8640 // ~1 day at 10s blocks
DefaultCanaryPercent = 5
)
// CanaryDeployment manages feature flags and canary deployments.
type CanaryDeployment struct {
contractID int32
}
// NewCanaryDeployment creates a new canary deployment manager.
func NewCanaryDeployment(contractID int32) *CanaryDeployment {
return &CanaryDeployment{contractID: contractID}
}
// CreateFeature creates a new feature flag.
func (cd *CanaryDeployment) CreateFeature(d *dao.Simple, name, description string, enabledBy util.Uint160) error {
if cd.GetFeature(d, name) != nil {
return ErrFeatureAlreadyExists
}
feature := &FeatureFlag{
Name: name,
Description: description,
Status: FeatureDisabled,
RolloutPercent: 0,
TargetPercent: 100,
IncrementPercent: DefaultIncrementPercent,
IncrementBlocks: DefaultIncrementBlocks,
MaxErrorRate: DefaultMaxErrorRate,
MinSuccessCount: DefaultMinSuccessCount,
EnabledBy: enabledBy,
}
cd.putFeature(d, feature)
cd.initMetrics(d, name)
return nil
}
// GetFeature retrieves a feature flag.
func (cd *CanaryDeployment) GetFeature(d *dao.Simple, name string) *FeatureFlag {
key := cd.makeFeatureKey(name)
si := d.GetStorageItem(cd.contractID, key)
if si == nil {
return nil
}
return cd.deserializeFeature(si)
}
// StartCanary starts a canary deployment for a feature.
func (cd *CanaryDeployment) StartCanary(d *dao.Simple, name string, percent uint8, currentBlock uint32) error {
feature := cd.GetFeature(d, name)
if feature == nil {
return ErrFeatureNotFound
}
if percent > 100 {
percent = 100
}
feature.Status = FeatureCanary
feature.RolloutPercent = percent
feature.StartBlock = currentBlock
feature.LastIncrementBlock = currentBlock
feature.ErrorCount = 0
feature.SuccessCount = 0
cd.putFeature(d, feature)
cd.recordHistory(d, name, currentBlock, FeatureCanary)
return nil
}
// StartRollout begins gradual rollout to target percentage.
func (cd *CanaryDeployment) StartRollout(d *dao.Simple, name string, targetPercent uint8, currentBlock uint32) error {
feature := cd.GetFeature(d, name)
if feature == nil {
return ErrFeatureNotFound
}
if feature.Status == FeatureRollingOut {
return ErrRolloutInProgress
}
// Require minimum success count before rollout
if feature.SuccessCount < feature.MinSuccessCount {
return ErrInsufficientData
}
feature.Status = FeatureRollingOut
feature.TargetPercent = targetPercent
feature.LastIncrementBlock = currentBlock
cd.putFeature(d, feature)
cd.recordHistory(d, name, currentBlock, FeatureRollingOut)
return nil
}
// ProcessRolloutIncrement checks if rollout should increment.
func (cd *CanaryDeployment) ProcessRolloutIncrement(d *dao.Simple, name string, currentBlock uint32) {
feature := cd.GetFeature(d, name)
if feature == nil || feature.Status != FeatureRollingOut {
return
}
// Check if enough blocks have passed
if currentBlock < feature.LastIncrementBlock+feature.IncrementBlocks {
return
}
// Check error rate
if cd.shouldRollback(feature) {
cd.Rollback(d, name, RollbackReasonErrorRate, currentBlock)
return
}
// Increment rollout
newPercent := feature.RolloutPercent + feature.IncrementPercent
if newPercent >= feature.TargetPercent {
newPercent = feature.TargetPercent
feature.Status = FeatureEnabled
}
feature.RolloutPercent = newPercent
feature.LastIncrementBlock = currentBlock
cd.putFeature(d, feature)
if feature.Status == FeatureEnabled {
cd.recordHistory(d, name, currentBlock, FeatureEnabled)
}
}
// IsEnabled checks if a feature is enabled for a specific user.
func (cd *CanaryDeployment) IsEnabled(d *dao.Simple, name string, user util.Uint160) bool {
feature := cd.GetFeature(d, name)
if feature == nil {
return false
}
switch feature.Status {
case FeatureEnabled:
return true
case FeatureDisabled, FeatureRolledBack:
return false
case FeatureCanary, FeatureRollingOut:
return cd.isInRolloutGroup(user, feature.RolloutPercent)
}
return false
}
// isInRolloutGroup determines if a user is in the rollout group.
func (cd *CanaryDeployment) isInRolloutGroup(user util.Uint160, percent uint8) bool {
if percent >= 100 {
return true
}
if percent == 0 {
return false
}
// Use first byte of address as deterministic bucket
bucket := user[0] % 100
return bucket < percent
}
// RecordSuccess records a successful feature operation.
func (cd *CanaryDeployment) RecordSuccess(d *dao.Simple, name string) {
feature := cd.GetFeature(d, name)
if feature == nil {
return
}
feature.SuccessCount++
cd.putFeature(d, feature)
metrics := cd.GetMetrics(d, name)
if metrics != nil {
metrics.TotalOperations++
metrics.SuccessfulOps++
cd.putMetrics(d, metrics)
}
}
// RecordError records a feature operation error.
func (cd *CanaryDeployment) RecordError(d *dao.Simple, name string, currentBlock uint32) {
feature := cd.GetFeature(d, name)
if feature == nil {
return
}
feature.ErrorCount++
cd.putFeature(d, feature)
metrics := cd.GetMetrics(d, name)
if metrics != nil {
metrics.TotalOperations++
metrics.FailedOps++
cd.putMetrics(d, metrics)
}
// Check if should auto-rollback
if cd.shouldRollback(feature) {
cd.Rollback(d, name, RollbackReasonErrorRate, currentBlock)
}
}
// shouldRollback checks if error rate exceeds threshold.
func (cd *CanaryDeployment) shouldRollback(feature *FeatureFlag) bool {
if feature.SuccessCount+feature.ErrorCount < 10 {
return false // Not enough data
}
errorRate := (feature.ErrorCount * 10000) / (feature.SuccessCount + feature.ErrorCount)
return uint32(errorRate) > feature.MaxErrorRate
}
// Rollback rolls back a feature to disabled state.
func (cd *CanaryDeployment) Rollback(d *dao.Simple, name string, reason RollbackReason, currentBlock uint32) error {
feature := cd.GetFeature(d, name)
if feature == nil {
return ErrFeatureNotFound
}
feature.Status = FeatureRolledBack
feature.RolloutPercent = 0
feature.RollbackReason = reason
cd.putFeature(d, feature)
cd.recordHistory(d, name, currentBlock, FeatureRolledBack)
return nil
}
// Enable fully enables a feature (100% rollout).
func (cd *CanaryDeployment) Enable(d *dao.Simple, name string, currentBlock uint32) error {
feature := cd.GetFeature(d, name)
if feature == nil {
return ErrFeatureNotFound
}
feature.Status = FeatureEnabled
feature.RolloutPercent = 100
cd.putFeature(d, feature)
cd.recordHistory(d, name, currentBlock, FeatureEnabled)
return nil
}
// Disable disables a feature.
func (cd *CanaryDeployment) Disable(d *dao.Simple, name string, currentBlock uint32) error {
feature := cd.GetFeature(d, name)
if feature == nil {
return ErrFeatureNotFound
}
feature.Status = FeatureDisabled
feature.RolloutPercent = 0
cd.putFeature(d, feature)
cd.recordHistory(d, name, currentBlock, FeatureDisabled)
return nil
}
// GetMetrics retrieves feature metrics.
func (cd *CanaryDeployment) GetMetrics(d *dao.Simple, name string) *FeatureMetrics {
key := cd.makeMetricsKey(name)
si := d.GetStorageItem(cd.contractID, key)
if si == nil {
return nil
}
return cd.deserializeMetrics(si)
}
// GetAllFeatures retrieves all feature flags.
func (cd *CanaryDeployment) GetAllFeatures(d *dao.Simple) []*FeatureFlag {
var features []*FeatureFlag
prefix := []byte{canaryPrefixFeature}
d.Seek(cd.contractID, storage.SeekRange{Prefix: prefix}, func(k, v []byte) bool {
if feature := cd.deserializeFeature(v); feature != nil {
features = append(features, feature)
}
return true
})
return features
}
// GetActiveCanaries retrieves features currently in canary/rollout.
func (cd *CanaryDeployment) GetActiveCanaries(d *dao.Simple) []*FeatureFlag {
var features []*FeatureFlag
prefix := []byte{canaryPrefixFeature}
d.Seek(cd.contractID, storage.SeekRange{Prefix: prefix}, func(k, v []byte) bool {
if feature := cd.deserializeFeature(v); feature != nil {
if feature.Status == FeatureCanary || feature.Status == FeatureRollingOut {
features = append(features, feature)
}
}
return true
})
return features
}
// Helper methods.
func (cd *CanaryDeployment) makeFeatureKey(name string) []byte {
key := make([]byte, 1+len(name))
key[0] = canaryPrefixFeature
copy(key[1:], name)
return key
}
func (cd *CanaryDeployment) makeMetricsKey(name string) []byte {
key := make([]byte, 1+len(name))
key[0] = canaryPrefixMetrics
copy(key[1:], name)
return key
}
func (cd *CanaryDeployment) putFeature(d *dao.Simple, feature *FeatureFlag) {
key := cd.makeFeatureKey(feature.Name)
data := cd.serializeFeature(feature)
d.PutStorageItem(cd.contractID, key, data)
}
func (cd *CanaryDeployment) initMetrics(d *dao.Simple, name string) {
metrics := &FeatureMetrics{FeatureName: name}
cd.putMetrics(d, metrics)
}
func (cd *CanaryDeployment) putMetrics(d *dao.Simple, metrics *FeatureMetrics) {
key := cd.makeMetricsKey(metrics.FeatureName)
data := cd.serializeMetrics(metrics)
d.PutStorageItem(cd.contractID, key, data)
}
func (cd *CanaryDeployment) recordHistory(d *dao.Simple, name string, blockHeight uint32, status FeatureStatus) {
key := make([]byte, 1+len(name)+4)
key[0] = canaryPrefixHistory
copy(key[1:], name)
binary.BigEndian.PutUint32(key[1+len(name):], blockHeight)
data := []byte{byte(status)}
d.PutStorageItem(cd.contractID, key, data)
}
// Serialization helpers.
func (cd *CanaryDeployment) serializeFeature(f *FeatureFlag) []byte {
nameBytes := []byte(f.Name)
descBytes := []byte(f.Description)
size := 4 + len(nameBytes) + 4 + len(descBytes) + 1 + 4 + 4 + 4 + 4 + 4 + 8 + 8 + 4 + 8 + 20 + 1
data := make([]byte, size)
offset := 0
binary.BigEndian.PutUint32(data[offset:], uint32(len(nameBytes)))
offset += 4
copy(data[offset:], nameBytes)
offset += len(nameBytes)
binary.BigEndian.PutUint32(data[offset:], uint32(len(descBytes)))
offset += 4
copy(data[offset:], descBytes)
offset += len(descBytes)
data[offset] = byte(f.Status)
offset++
data[offset] = f.RolloutPercent
offset++
data[offset] = f.TargetPercent
offset++
data[offset] = f.IncrementPercent
offset++
binary.BigEndian.PutUint32(data[offset:], f.IncrementBlocks)
offset += 4
binary.BigEndian.PutUint32(data[offset:], f.StartBlock)
offset += 4
binary.BigEndian.PutUint32(data[offset:], f.LastIncrementBlock)
offset += 4
binary.BigEndian.PutUint64(data[offset:], f.ErrorCount)
offset += 8
binary.BigEndian.PutUint64(data[offset:], f.SuccessCount)
offset += 8
binary.BigEndian.PutUint32(data[offset:], f.MaxErrorRate)
offset += 4
binary.BigEndian.PutUint64(data[offset:], f.MinSuccessCount)
offset += 8
copy(data[offset:], f.EnabledBy.BytesBE())
offset += 20
data[offset] = byte(f.RollbackReason)
return data
}
func (cd *CanaryDeployment) deserializeFeature(data []byte) *FeatureFlag {
if len(data) < 8 {
return nil
}
f := &FeatureFlag{}
offset := 0
nameLen := binary.BigEndian.Uint32(data[offset:])
offset += 4
if offset+int(nameLen) > len(data) {
return nil
}
f.Name = string(data[offset : offset+int(nameLen)])
offset += int(nameLen)
if offset+4 > len(data) {
return nil
}
descLen := binary.BigEndian.Uint32(data[offset:])
offset += 4
if offset+int(descLen) > len(data) {
return nil
}
f.Description = string(data[offset : offset+int(descLen)])
offset += int(descLen)
if offset+61 > len(data) {
return nil
}
f.Status = FeatureStatus(data[offset])
offset++
f.RolloutPercent = data[offset]
offset++
f.TargetPercent = data[offset]
offset++
f.IncrementPercent = data[offset]
offset++
f.IncrementBlocks = binary.BigEndian.Uint32(data[offset:])
offset += 4
f.StartBlock = binary.BigEndian.Uint32(data[offset:])
offset += 4
f.LastIncrementBlock = binary.BigEndian.Uint32(data[offset:])
offset += 4
f.ErrorCount = binary.BigEndian.Uint64(data[offset:])
offset += 8
f.SuccessCount = binary.BigEndian.Uint64(data[offset:])
offset += 8
f.MaxErrorRate = binary.BigEndian.Uint32(data[offset:])
offset += 4
f.MinSuccessCount = binary.BigEndian.Uint64(data[offset:])
offset += 8
f.EnabledBy, _ = util.Uint160DecodeBytesBE(data[offset : offset+20])
offset += 20
f.RollbackReason = RollbackReason(data[offset])
return f
}
func (cd *CanaryDeployment) serializeMetrics(m *FeatureMetrics) []byte {
nameBytes := []byte(m.FeatureName)
data := make([]byte, 4+len(nameBytes)+48)
offset := 0
binary.BigEndian.PutUint32(data[offset:], uint32(len(nameBytes)))
offset += 4
copy(data[offset:], nameBytes)
offset += len(nameBytes)
binary.BigEndian.PutUint64(data[offset:], m.TotalOperations)
offset += 8
binary.BigEndian.PutUint64(data[offset:], m.SuccessfulOps)
offset += 8
binary.BigEndian.PutUint64(data[offset:], m.FailedOps)
offset += 8
binary.BigEndian.PutUint64(data[offset:], m.AverageLatency)
offset += 8
binary.BigEndian.PutUint64(data[offset:], m.PeakLatency)
offset += 8
binary.BigEndian.PutUint64(data[offset:], m.InvariantChecks)
return data
}
func (cd *CanaryDeployment) deserializeMetrics(data []byte) *FeatureMetrics {
if len(data) < 8 {
return nil
}
m := &FeatureMetrics{}
offset := 0
nameLen := binary.BigEndian.Uint32(data[offset:])
offset += 4
if offset+int(nameLen) > len(data) {
return nil
}
m.FeatureName = string(data[offset : offset+int(nameLen)])
offset += int(nameLen)
if offset+48 > len(data) {
return nil
}
m.TotalOperations = binary.BigEndian.Uint64(data[offset:])
offset += 8
m.SuccessfulOps = binary.BigEndian.Uint64(data[offset:])
offset += 8
m.FailedOps = binary.BigEndian.Uint64(data[offset:])
offset += 8
m.AverageLatency = binary.BigEndian.Uint64(data[offset:])
offset += 8
m.PeakLatency = binary.BigEndian.Uint64(data[offset:])
offset += 8
m.InvariantChecks = binary.BigEndian.Uint64(data[offset:])
return m
}
// StandardFeatureFlags defines common feature flag names.
var StandardFeatureFlags = struct {
VitaRecoveryV2 string
TributeGamingDetect string
CrossChainProofs string
CommitRevealInvest string
EnhancedAuditLogging string
CircuitBreakerAuto string
}{
VitaRecoveryV2: "vita_recovery_v2",
TributeGamingDetect: "tribute_gaming_detect",
CrossChainProofs: "cross_chain_proofs",
CommitRevealInvest: "commit_reveal_invest",
EnhancedAuditLogging: "enhanced_audit_logging",
CircuitBreakerAuto: "circuit_breaker_auto",
}