diff --git a/.golangci.yaml b/.golangci.yaml index 0df22aa411..5a9983982c 100644 --- a/.golangci.yaml +++ b/.golangci.yaml @@ -93,7 +93,7 @@ linters: - path-except: bundle/direct/dresources linters: - exhaustruct - - path: bundle/direct/dresources/all_test.go + - path: bundle/direct/dresources/.*_test.go linters: - exhaustruct - path-except: ^cmd diff --git a/NEXT_CHANGELOG.md b/NEXT_CHANGELOG.md index 491321a48c..adad66afd3 100644 --- a/NEXT_CHANGELOG.md +++ b/NEXT_CHANGELOG.md @@ -8,6 +8,7 @@ * Skip non-exportable objects (e.g., `MLFLOW_EXPERIMENT`) during `workspace export-dir` instead of failing ([#4081](https://github.com/databricks/cli/issues/4081)) ### Bundles +* Fix app deployment failure when app is in `DELETING` state ([#4176](https://github.com/databricks/cli/pull/4176)) ### Dependency updates diff --git a/acceptance/bundle/resources/apps/create_already_exists/app/app.py b/acceptance/bundle/resources/apps/create_already_exists/app/app.py new file mode 100644 index 0000000000..f1a18139c8 --- /dev/null +++ b/acceptance/bundle/resources/apps/create_already_exists/app/app.py @@ -0,0 +1 @@ +print("Hello world!") diff --git a/acceptance/bundle/resources/apps/create_already_exists/databricks.yml b/acceptance/bundle/resources/apps/create_already_exists/databricks.yml new file mode 100644 index 0000000000..5e9a37ef20 --- /dev/null +++ b/acceptance/bundle/resources/apps/create_already_exists/databricks.yml @@ -0,0 +1,8 @@ +bundle: + name: test-app-already-exists + +resources: + apps: + myapp: + name: test-app-already-exists + source_code_path: ./app diff --git a/acceptance/bundle/resources/apps/create_already_exists/out.test.toml b/acceptance/bundle/resources/apps/create_already_exists/out.test.toml new file mode 100644 index 0000000000..54146af564 --- /dev/null +++ b/acceptance/bundle/resources/apps/create_already_exists/out.test.toml @@ -0,0 +1,5 @@ +Local = true +Cloud = false + +[EnvMatrix] + DATABRICKS_BUNDLE_ENGINE = ["direct"] diff --git a/acceptance/bundle/resources/apps/create_already_exists/output.txt b/acceptance/bundle/resources/apps/create_already_exists/output.txt new file mode 100644 index 0000000000..de5ccd70ac --- /dev/null +++ b/acceptance/bundle/resources/apps/create_already_exists/output.txt @@ -0,0 +1,32 @@ + +>>> [CLI] apps create test-app-already-exists +{ + "app_status": { + "message":"Application is running.", + "state":"RUNNING" + }, + "compute_status": { + "message":"App compute is active.", + "state":"ACTIVE" + }, + "id":"1000", + "name":"test-app-already-exists", + "url":"test-app-already-exists-123.cloud.databricksapps.com" +} + +>>> musterr [CLI] bundle deploy +Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/test-app-already-exists/default/files... +Deploying resources... +Error: cannot create resources.apps.myapp: An app with the same name already exists: test-app-already-exists (409 RESOURCE_ALREADY_EXISTS) + +Endpoint: POST [DATABRICKS_URL]/api/2.0/apps?no_compute=true +HTTP Status: 409 Conflict +API error_code: RESOURCE_ALREADY_EXISTS +API message: An app with the same name already exists: test-app-already-exists + +Updating deployment state... + +>>> [CLI] apps delete test-app-already-exists +{ + "name":"" +} diff --git a/acceptance/bundle/resources/apps/create_already_exists/script b/acceptance/bundle/resources/apps/create_already_exists/script new file mode 100644 index 0000000000..df26cb8895 --- /dev/null +++ b/acceptance/bundle/resources/apps/create_already_exists/script @@ -0,0 +1,8 @@ +# Create an app with the same name outside of bundle +trace $CLI apps create test-app-already-exists + +# Bundle deploy should fail because app already exists and is not in DELETING state +trace musterr $CLI bundle deploy + +# Cleanup: delete the app we created +trace $CLI apps delete test-app-already-exists diff --git a/acceptance/bundle/resources/apps/create_already_exists/test.toml b/acceptance/bundle/resources/apps/create_already_exists/test.toml new file mode 100644 index 0000000000..613ff598f6 --- /dev/null +++ b/acceptance/bundle/resources/apps/create_already_exists/test.toml @@ -0,0 +1,6 @@ +Local = true +Cloud = false +RecordRequests = false + +[EnvMatrix] + DATABRICKS_BUNDLE_ENGINE = ["direct"] diff --git a/bundle/direct/dresources/app.go b/bundle/direct/dresources/app.go index e2ffdb9496..2de5af879c 100644 --- a/bundle/direct/dresources/app.go +++ b/bundle/direct/dresources/app.go @@ -2,6 +2,7 @@ package dresources import ( "context" + "errors" "fmt" "time" @@ -36,12 +37,36 @@ func (r *ResourceApp) DoCreate(ctx context.Context, config *apps.App) (string, * NoCompute: true, ForceSendFields: nil, } - waiter, err := r.client.Apps.Create(ctx, request) + + retrier := retries.New[apps.App](retries.WithTimeout(15*time.Minute), retries.WithRetryFunc(shouldRetry)) + app, err := retrier.Run(ctx, func(ctx context.Context) (*apps.App, error) { + waiter, err := r.client.Apps.Create(ctx, request) + if err != nil { + if errors.Is(err, apierr.ErrResourceAlreadyExists) { + // Check if the app is in DELETING state - only then should we retry + existingApp, getErr := r.client.Apps.GetByName(ctx, config.Name) + if getErr != nil { + // If we can't get the app (e.g., it was just deleted), retry the create + if apierr.IsMissing(getErr) { + return nil, retries.Continues("app was deleted, retrying create") + } + return nil, retries.Halt(err) + } + if existingApp.ComputeStatus != nil && existingApp.ComputeStatus.State == apps.ComputeStateDeleting { + return nil, retries.Continues("app is deleting, retrying create") + } + // App exists and is not being deleted - this is a hard error + return nil, retries.Halt(err) + } + return nil, retries.Halt(err) + } + return waiter.Response, nil + }) if err != nil { return "", nil, err } - return waiter.Response.Name, nil, nil + return app.Name, nil, nil } func (r *ResourceApp) DoUpdate(ctx context.Context, id string, config *apps.App, _ *Changes) (*apps.App, error) { @@ -63,10 +88,7 @@ func (r *ResourceApp) DoUpdate(ctx context.Context, id string, config *apps.App, func (r *ResourceApp) DoDelete(ctx context.Context, id string) error { _, err := r.client.Apps.DeleteByName(ctx, id) - if err != nil { - return err - } - return r.waitForDeletion(ctx, id) + return err } func (*ResourceApp) FieldTriggers(_ bool) map[string]deployplan.ActionType { @@ -79,34 +101,6 @@ func (r *ResourceApp) WaitAfterCreate(ctx context.Context, config *apps.App) (*a return r.waitForApp(ctx, r.client, config.Name) } -func (r *ResourceApp) waitForDeletion(ctx context.Context, name string) error { - retrier := retries.New[struct{}](retries.WithTimeout(10*time.Minute), retries.WithRetryFunc(shouldRetry)) - _, err := retrier.Run(ctx, func(ctx context.Context) (*struct{}, error) { - app, err := r.client.Apps.GetByName(ctx, name) - if err != nil { - if apierr.IsMissing(err) { - return nil, nil - } - return nil, retries.Halt(err) - } - - if app.ComputeStatus == nil { - return nil, retries.Continues("waiting for compute status") - } - - switch app.ComputeStatus.State { - case apps.ComputeStateDeleting: - return nil, retries.Continues("app is deleting") - case apps.ComputeStateActive, apps.ComputeStateStopped, apps.ComputeStateError: - err := fmt.Errorf("app %s was not deleted, current state: %s", name, app.ComputeStatus.State) - return nil, retries.Halt(err) - default: - return nil, retries.Continues(fmt.Sprintf("app is in %s state", app.ComputeStatus.State)) - } - }) - return err -} - // waitForApp waits for the app to reach the target state. The target state is either ACTIVE or STOPPED. // Apps with no_compute set to true will reach the STOPPED state, otherwise they will reach the ACTIVE state. // We can't use the default waiter from SDK because it only waits on ACTIVE state but we need also STOPPED state. diff --git a/bundle/direct/dresources/app_test.go b/bundle/direct/dresources/app_test.go new file mode 100644 index 0000000000..a844ee41e1 --- /dev/null +++ b/bundle/direct/dresources/app_test.go @@ -0,0 +1,123 @@ +package dresources + +import ( + "context" + "testing" + + "github.com/databricks/cli/libs/testserver" + "github.com/databricks/databricks-sdk-go" + "github.com/databricks/databricks-sdk-go/service/apps" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// TestAppDoCreate_RetriesWhenAppIsDeleting verifies that DoCreate retries when +// an app already exists but is in DELETING state. +func TestAppDoCreate_RetriesWhenAppIsDeleting(t *testing.T) { + server := testserver.New(t) + + createCallCount := 0 + getCallCount := 0 + + server.Handle("POST", "/api/2.0/apps", func(req testserver.Request) any { + createCallCount++ + if createCallCount == 1 { + return testserver.Response{ + StatusCode: 409, + Body: map[string]string{ + "error_code": "RESOURCE_ALREADY_EXISTS", + "message": "An app with the same name already exists.", + }, + } + } + return apps.App{ + Name: "test-app", + ComputeStatus: &apps.ComputeStatus{ + State: apps.ComputeStateActive, + }, + } + }) + + server.Handle("GET", "/api/2.0/apps/{name}", func(req testserver.Request) any { + getCallCount++ + return apps.App{ + Name: req.Vars["name"], + ComputeStatus: &apps.ComputeStatus{ + State: apps.ComputeStateDeleting, + }, + } + }) + + testserver.AddDefaultHandlers(server) + + client, err := databricks.NewWorkspaceClient(&databricks.Config{ + Host: server.URL, + Token: "testtoken", + }) + require.NoError(t, err) + + r := (&ResourceApp{}).New(client) + ctx := context.Background() + name, _, err := r.DoCreate(ctx, &apps.App{Name: "test-app"}) + + require.NoError(t, err) + assert.Equal(t, "test-app", name) + assert.Equal(t, 2, createCallCount, "expected Create to be called twice (1 retry)") + assert.Equal(t, 1, getCallCount, "expected Get to be called once to check app state") +} + +// TestAppDoCreate_RetriesWhenGetReturnsNotFound verifies that DoCreate retries +// when the app was just deleted between the create call and the get call. +func TestAppDoCreate_RetriesWhenGetReturnsNotFound(t *testing.T) { + server := testserver.New(t) + + createCallCount := 0 + getCallCount := 0 + + server.Handle("POST", "/api/2.0/apps", func(req testserver.Request) any { + createCallCount++ + if createCallCount == 1 { + return testserver.Response{ + StatusCode: 409, + Body: map[string]string{ + "error_code": "RESOURCE_ALREADY_EXISTS", + "message": "An app with the same name already exists.", + }, + } + } + return apps.App{ + Name: "test-app", + ComputeStatus: &apps.ComputeStatus{ + State: apps.ComputeStateActive, + }, + } + }) + + server.Handle("GET", "/api/2.0/apps/{name}", func(req testserver.Request) any { + getCallCount++ + return testserver.Response{ + StatusCode: 404, + Body: map[string]string{ + "error_code": "RESOURCE_DOES_NOT_EXIST", + "message": "App not found.", + }, + } + }) + + testserver.AddDefaultHandlers(server) + + client, err := databricks.NewWorkspaceClient(&databricks.Config{ + Host: server.URL, + Token: "testtoken", + }) + require.NoError(t, err) + + r := (&ResourceApp{}).New(client) + ctx := context.Background() + name, _, err := r.DoCreate(ctx, &apps.App{Name: "test-app"}) + + require.NoError(t, err) + assert.Equal(t, "test-app", name) + assert.Equal(t, 2, createCallCount, "expected Create to be called twice") + assert.Equal(t, 1, getCallCount, "expected Get to be called once to check app state") +} diff --git a/libs/testserver/apps.go b/libs/testserver/apps.go index 8b8dd0bf7c..1f7a39dd8d 100644 --- a/libs/testserver/apps.go +++ b/libs/testserver/apps.go @@ -36,6 +36,16 @@ func (s *FakeWorkspace) AppsUpsert(req Request, name string) Response { Body: "name is required", } } + // Check if app already exists on create + if _, exists := s.Apps[name]; exists { + return Response{ + StatusCode: 409, + Body: map[string]string{ + "error_code": "RESOURCE_ALREADY_EXISTS", + "message": "An app with the same name already exists: " + name, + }, + } + } } app.AppStatus = &apps.ApplicationStatus{