Skip to content

Commit a7ee2b2

Browse files
authored
Merge pull request #7116 from comtalyst/comtalyst/azure-managed-instance-cache-master
refactor: upstream Azure managed instance cache refactor
2 parents cc6ddad + 98257a8 commit a7ee2b2

7 files changed

+1112
-327
lines changed
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
/*
2+
Copyright 2022 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package azure
18+
19+
import (
20+
"context"
21+
"strings"
22+
23+
"github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2022-08-01/compute"
24+
"github.com/Azure/go-autorest/autorest/azure"
25+
26+
"k8s.io/klog/v2"
27+
"sigs.k8s.io/cloud-provider-azure/pkg/retry"
28+
)
29+
30+
// When Azure Dedicated Host is enabled or using isolated vm skus, force deleting a VMSS fails with the following error:
31+
//
32+
// "predominantErrorDetail": {
33+
// "innererror": {
34+
// "internalErrorCode": "OperationNotAllowedOnResourceThatManagesUpdatesWithMaintenanceControl"
35+
// },
36+
// "code": "OperationNotAllowed",
37+
// "message": "Operation 'ForceDelete' is not allowed on resource 'aks-newnp-11436513-vmss' since it manages updates using maintenance control."
38+
// },
39+
//
40+
// A programmatically way to determine if a VM size is isolated or not has not been found. The isolated VM documentation:
41+
// https://docs.microsoft.com/en-us/azure/virtual-machines/isolation
42+
// has the current list of isolated VM sizes, but new isolated VM size could be introduced in the future.
43+
//
44+
// As a result of not being able to find out if a VM size is isolated or not, we'll do the following:
45+
// - if scaleSet has isolated vm size or dedicated host, disable forDelete
46+
// - else use forceDelete
47+
// - if new isolated sku were added or dedicatedHost was not updated properly, this forceDelete call will fail with above error.
48+
// In that case, call normal delete (fall-back)
49+
50+
var isolatedVMSizes = map[string]bool{
51+
strings.ToLower("Standard_E80ids_v4"): true,
52+
strings.ToLower("Standard_E80is_v4"): true,
53+
strings.ToLower("Standard_E104i_v5"): true,
54+
strings.ToLower("Standard_E104is_v5"): true,
55+
strings.ToLower("Standard_E104id_v5"): true,
56+
strings.ToLower("Standard_E104ids_v5"): true,
57+
strings.ToLower("Standard_M192is_v2"): true,
58+
strings.ToLower("Standard_M192ims_v2"): true,
59+
strings.ToLower("Standard_M192ids_v2"): true,
60+
strings.ToLower("Standard_M192idms_v2"): true,
61+
strings.ToLower("Standard_F72s_v2"): true,
62+
strings.ToLower("Standard_M128ms"): true,
63+
}
64+
65+
func (scaleSet *ScaleSet) deleteInstances(ctx context.Context, requiredIds *compute.VirtualMachineScaleSetVMInstanceRequiredIDs, commonAsgId string) (*azure.Future, *retry.Error) {
66+
scaleSet.instanceMutex.Lock()
67+
defer scaleSet.instanceMutex.Unlock()
68+
69+
skuName := scaleSet.getSKU()
70+
resourceGroup := scaleSet.manager.config.ResourceGroup
71+
forceDelete := shouldForceDelete(skuName, scaleSet)
72+
future, rerr := scaleSet.manager.azClient.virtualMachineScaleSetsClient.DeleteInstancesAsync(ctx, resourceGroup, commonAsgId, *requiredIds, forceDelete)
73+
if forceDelete && isOperationNotAllowed(rerr) {
74+
klog.Infof("falling back to normal delete for instances %v for %s", requiredIds.InstanceIds, scaleSet.Name)
75+
return scaleSet.manager.azClient.virtualMachineScaleSetsClient.DeleteInstancesAsync(ctx, resourceGroup, commonAsgId, *requiredIds, false)
76+
}
77+
return future, rerr
78+
}
79+
80+
func shouldForceDelete(skuName string, scaleSet *ScaleSet) bool {
81+
return scaleSet.enableForceDelete && !isolatedVMSizes[strings.ToLower(skuName)] && !scaleSet.dedicatedHost
82+
}
83+
84+
func isOperationNotAllowed(rerr *retry.Error) bool {
85+
return rerr != nil && rerr.ServiceErrorCode() == retry.OperationNotAllowed
86+
}
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
/*
2+
Copyright 2022 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package azure
18+
19+
import (
20+
"net/http"
21+
"testing"
22+
23+
"github.com/Azure/go-autorest/autorest/azure"
24+
"github.com/stretchr/testify/assert"
25+
"sigs.k8s.io/cloud-provider-azure/pkg/retry"
26+
)
27+
28+
func TestShouldForceDelete(t *testing.T) {
29+
skuName := "test-vmssSku"
30+
31+
t.Run("should return true", func(t *testing.T) {
32+
scaleSet := &ScaleSet{}
33+
scaleSet.enableForceDelete = true
34+
assert.Equal(t, shouldForceDelete(skuName, scaleSet), true)
35+
})
36+
37+
t.Run("should return false because of dedicated hosts", func(t *testing.T) {
38+
scaleSet := &ScaleSet{}
39+
scaleSet.enableForceDelete = true
40+
scaleSet.dedicatedHost = true
41+
assert.Equal(t, shouldForceDelete(skuName, scaleSet), false)
42+
})
43+
44+
t.Run("should return false because of isolated sku", func(t *testing.T) {
45+
scaleSet := &ScaleSet{}
46+
scaleSet.enableForceDelete = true
47+
skuName = "Standard_F72s_v2" // belongs to the map isolatedVMSizes
48+
assert.Equal(t, shouldForceDelete(skuName, scaleSet), false)
49+
})
50+
51+
}
52+
53+
func TestIsOperationNotAllowed(t *testing.T) {
54+
t.Run("should return false because it's not OperationNotAllowed error", func(t *testing.T) {
55+
error := &retry.Error{
56+
HTTPStatusCode: http.StatusBadRequest,
57+
}
58+
assert.Equal(t, isOperationNotAllowed(error), false)
59+
})
60+
61+
t.Run("should return false because error is nil", func(t *testing.T) {
62+
assert.Equal(t, isOperationNotAllowed(nil), false)
63+
})
64+
65+
t.Run("should return true if error is OperationNotAllowed", func(t *testing.T) {
66+
sre := &azure.ServiceError{
67+
Code: retry.OperationNotAllowed,
68+
Message: "error-message",
69+
}
70+
error := &retry.Error{
71+
RawError: sre,
72+
}
73+
assert.Equal(t, isOperationNotAllowed(error), false)
74+
})
75+
76+
// It is difficult to condition the case where return error matched expected error string for forceDelete and the
77+
// function should return true.
78+
79+
}

cluster-autoscaler/cloudprovider/azure/azure_manager_test.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -703,8 +703,8 @@ func TestGetFilteredAutoscalingGroupsVmss(t *testing.T) {
703703
enableForceDelete: manager.config.EnableForceDelete,
704704
curSize: 3,
705705
sizeRefreshPeriod: manager.azureCache.refreshInterval,
706-
instancesRefreshPeriod: defaultVmssInstancesRefreshPeriod,
707-
getVmssSizeRefreshPeriod: time.Duration(VmssSizeRefreshPeriodDefault) * time.Second,
706+
getVmssSizeRefreshPeriod: time.Duration(manager.azureCache.refreshInterval) * time.Second,
707+
InstanceCache: InstanceCache{instancesRefreshPeriod: defaultVmssInstancesRefreshPeriod},
708708
}}
709709
assert.True(t, assert.ObjectsAreEqualValues(expectedAsgs, asgs), "expected %#v, but found: %#v", expectedAsgs, asgs)
710710
}
@@ -751,8 +751,8 @@ func TestGetFilteredAutoscalingGroupsVmssWithConfiguredSizes(t *testing.T) {
751751
enableForceDelete: manager.config.EnableForceDelete,
752752
curSize: 3,
753753
sizeRefreshPeriod: manager.azureCache.refreshInterval,
754-
instancesRefreshPeriod: defaultVmssInstancesRefreshPeriod,
755-
getVmssSizeRefreshPeriod: time.Duration(VmssSizeRefreshPeriodDefault) * time.Second,
754+
getVmssSizeRefreshPeriod: time.Duration(manager.azureCache.refreshInterval) * time.Second,
755+
InstanceCache: InstanceCache{instancesRefreshPeriod: defaultVmssInstancesRefreshPeriod},
756756
}}
757757
assert.True(t, assert.ObjectsAreEqualValues(expectedAsgs, asgs), "expected %#v, but found: %#v", expectedAsgs, asgs)
758758
}

0 commit comments

Comments
 (0)