From 3a3b6f54daaf75343a2f35920786e2430ce3eca6 Mon Sep 17 00:00:00 2001 From: Varun Deep Saini Date: Mon, 15 Dec 2025 20:35:03 +0530 Subject: [PATCH 1/5] Add KillCaller feature to test server for CLI crash recovery testing --- acceptance/internal/config.go | 5 ++ acceptance/internal/prepare_server.go | 38 +++++++++ .../kill_caller/mid_request/out.test.toml | 5 ++ .../kill_caller/mid_request/output.txt | 5 ++ .../selftest/kill_caller/mid_request/script | 1 + .../kill_caller/mid_request/test.toml | 10 +++ acceptance/selftest/kill_caller/out.test.toml | 5 ++ acceptance/selftest/kill_caller/output.txt | 5 ++ acceptance/selftest/kill_caller/script | 1 + acceptance/selftest/kill_caller/test.toml | 11 +++ cmd/pipelines/root/root.go | 2 + cmd/root/root.go | 2 + libs/testserver/server.go | 82 ++++++++++++++++++- 13 files changed, 171 insertions(+), 1 deletion(-) create mode 100644 acceptance/selftest/kill_caller/mid_request/out.test.toml create mode 100644 acceptance/selftest/kill_caller/mid_request/output.txt create mode 100644 acceptance/selftest/kill_caller/mid_request/script create mode 100644 acceptance/selftest/kill_caller/mid_request/test.toml create mode 100644 acceptance/selftest/kill_caller/out.test.toml create mode 100644 acceptance/selftest/kill_caller/output.txt create mode 100644 acceptance/selftest/kill_caller/script create mode 100644 acceptance/selftest/kill_caller/test.toml diff --git a/acceptance/internal/config.go b/acceptance/internal/config.go index 2129fa5573..ed5d1cd03c 100644 --- a/acceptance/internal/config.go +++ b/acceptance/internal/config.go @@ -156,6 +156,11 @@ type ServerStub struct { // Configure as "1ms", "2s", "3m", etc. // See [time.ParseDuration] for details. Delay time.Duration + + // If set, send this signal to the caller process instead of returning a response. + // Use signal numbers: 9 for SIGKILL, 15 for SIGTERM, etc. + // Requires DATABRICKS_CLI_TEST_PID=1 to be set. + KillCaller int } // FindConfigs finds all the config relevant for this test, diff --git a/acceptance/internal/prepare_server.go b/acceptance/internal/prepare_server.go index a401a1cac8..17bc29e118 100644 --- a/acceptance/internal/prepare_server.go +++ b/acceptance/internal/prepare_server.go @@ -10,6 +10,7 @@ import ( "slices" "strings" "sync" + "syscall" "testing" "time" "unicode/utf8" @@ -209,6 +210,43 @@ func startLocalServer(t *testing.T, } } + if stub.KillCaller > 0 { + pid := testserver.ExtractPidFromHeaders(req.Headers) + if pid == 0 { + t.Errorf("KillCaller configured but test-pid not found in User-Agent") + return testserver.Response{ + StatusCode: http.StatusBadRequest, + Body: "test-pid not found in User-Agent (set DATABRICKS_CLI_TEST_PID=1)", + } + } + + signal := syscall.Signal(stub.KillCaller) + t.Logf("KillCaller: sending signal %d to PID %d (pattern: %s)", signal, pid, stub.Pattern) + + process, err := os.FindProcess(pid) + if err != nil { + t.Errorf("Failed to find process %d: %s", pid, err) + return testserver.Response{ + StatusCode: http.StatusInternalServerError, + Body: fmt.Sprintf("failed to find process: %s", err), + } + } + + if err := process.Signal(signal); err != nil { + t.Errorf("Failed to send signal %d to process %d: %s", signal, pid, err) + return testserver.Response{ + StatusCode: http.StatusInternalServerError, + Body: fmt.Sprintf("failed to send signal: %s", err), + } + } + + // Return a response (the CLI will likely be killed before it receives this) + return testserver.Response{ + StatusCode: http.StatusOK, + Body: fmt.Sprintf("sent signal %d to PID %d", signal, pid), + } + } + return stub.Response }) } diff --git a/acceptance/selftest/kill_caller/mid_request/out.test.toml b/acceptance/selftest/kill_caller/mid_request/out.test.toml new file mode 100644 index 0000000000..d560f1de04 --- /dev/null +++ b/acceptance/selftest/kill_caller/mid_request/out.test.toml @@ -0,0 +1,5 @@ +Local = true +Cloud = false + +[EnvMatrix] + DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/selftest/kill_caller/mid_request/output.txt b/acceptance/selftest/kill_caller/mid_request/output.txt new file mode 100644 index 0000000000..8ae1495a64 --- /dev/null +++ b/acceptance/selftest/kill_caller/mid_request/output.txt @@ -0,0 +1,5 @@ + +>>> errcode [CLI] workspace list / +script: line [N]: [PID] Killed "$@" + +Exit code: 137 diff --git a/acceptance/selftest/kill_caller/mid_request/script b/acceptance/selftest/kill_caller/mid_request/script new file mode 100644 index 0000000000..817a90cc76 --- /dev/null +++ b/acceptance/selftest/kill_caller/mid_request/script @@ -0,0 +1 @@ +trace errcode $CLI workspace list / diff --git a/acceptance/selftest/kill_caller/mid_request/test.toml b/acceptance/selftest/kill_caller/mid_request/test.toml new file mode 100644 index 0000000000..7a4894b38d --- /dev/null +++ b/acceptance/selftest/kill_caller/mid_request/test.toml @@ -0,0 +1,10 @@ +Local = true +Env.DATABRICKS_CLI_TEST_PID = "1" + +[[Server]] +Pattern = "GET /api/2.0/workspace/list" +KillCaller = 9 + +[[Repls]] +Old = 'script: line \d+: \d+ Killed' +New = 'script: line [N]: [PID] Killed' diff --git a/acceptance/selftest/kill_caller/out.test.toml b/acceptance/selftest/kill_caller/out.test.toml new file mode 100644 index 0000000000..d560f1de04 --- /dev/null +++ b/acceptance/selftest/kill_caller/out.test.toml @@ -0,0 +1,5 @@ +Local = true +Cloud = false + +[EnvMatrix] + DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/selftest/kill_caller/output.txt b/acceptance/selftest/kill_caller/output.txt new file mode 100644 index 0000000000..6bbd632062 --- /dev/null +++ b/acceptance/selftest/kill_caller/output.txt @@ -0,0 +1,5 @@ + +>>> errcode [CLI] current-user me +script: line [N]: [PID] Killed "$@" + +Exit code: 137 diff --git a/acceptance/selftest/kill_caller/script b/acceptance/selftest/kill_caller/script new file mode 100644 index 0000000000..b6e8c070f6 --- /dev/null +++ b/acceptance/selftest/kill_caller/script @@ -0,0 +1 @@ +trace errcode $CLI current-user me diff --git a/acceptance/selftest/kill_caller/test.toml b/acceptance/selftest/kill_caller/test.toml new file mode 100644 index 0000000000..f7d2b34354 --- /dev/null +++ b/acceptance/selftest/kill_caller/test.toml @@ -0,0 +1,11 @@ +Local = true +Env.DATABRICKS_CLI_TEST_PID = "1" + +# Kill the CLI when it calls /Me endpoint +[[Server]] +Pattern = "GET /api/2.0/preview/scim/v2/Me" +KillCaller = 9 + +[[Repls]] +Old = 'script: line \d+: \d+ Killed' +New = 'script: line [N]: [PID] Killed' diff --git a/cmd/pipelines/root/root.go b/cmd/pipelines/root/root.go index 72d0b52664..1eb874067c 100644 --- a/cmd/pipelines/root/root.go +++ b/cmd/pipelines/root/root.go @@ -10,6 +10,7 @@ import ( "github.com/databricks/cli/internal/build" "github.com/databricks/cli/libs/log" + "github.com/databricks/cli/libs/testserver" "github.com/spf13/cobra" ) @@ -72,6 +73,7 @@ func New(ctx context.Context) *cobra.Command { ctx = withCommandInUserAgent(ctx, cmd) ctx = withCommandExecIdInUserAgent(ctx) ctx = withUpstreamInUserAgent(ctx) + ctx = testserver.InjectPidToUserAgent(ctx) cmd.SetContext(ctx) return nil } diff --git a/cmd/root/root.go b/cmd/root/root.go index 96fde20846..20bf96691d 100644 --- a/cmd/root/root.go +++ b/cmd/root/root.go @@ -17,6 +17,7 @@ import ( "github.com/databricks/cli/libs/log" "github.com/databricks/cli/libs/telemetry" "github.com/databricks/cli/libs/telemetry/protos" + "github.com/databricks/cli/libs/testserver" "github.com/spf13/cobra" ) @@ -79,6 +80,7 @@ func New(ctx context.Context) *cobra.Command { ctx = withCommandInUserAgent(ctx, cmd) ctx = withCommandExecIdInUserAgent(ctx) ctx = withUpstreamInUserAgent(ctx) + ctx = testserver.InjectPidToUserAgent(ctx) cmd.SetContext(ctx) return nil } diff --git a/libs/testserver/server.go b/libs/testserver/server.go index 8b8e346e99..99ec2b56e6 100644 --- a/libs/testserver/server.go +++ b/libs/testserver/server.go @@ -9,15 +9,47 @@ import ( "net/http" "net/http/httptest" "net/url" + "os" "reflect" + "regexp" + "strconv" "strings" "sync" + "syscall" + "github.com/databricks/cli/internal/testutil" + "github.com/databricks/cli/libs/env" + "github.com/databricks/databricks-sdk-go/useragent" "github.com/gorilla/mux" +) - "github.com/databricks/cli/internal/testutil" +const ( + TestPidEnvVar = "DATABRICKS_CLI_TEST_PID" + testPidKey = "test-pid" ) +var testPidRegex = regexp.MustCompile(testPidKey + `/(\d+)`) + +func InjectPidToUserAgent(ctx context.Context) context.Context { + if env.Get(ctx, TestPidEnvVar) != "1" { + return ctx + } + return useragent.InContext(ctx, testPidKey, strconv.Itoa(os.Getpid())) +} + +func ExtractPidFromHeaders(headers http.Header) int { + ua := headers.Get("User-Agent") + matches := testPidRegex.FindStringSubmatch(ua) + if len(matches) < 2 { + return 0 + } + pid, err := strconv.Atoi(matches[1]) + if err != nil { + return 0 + } + return pid +} + type Server struct { *httptest.Server Router *mux.Router @@ -274,6 +306,9 @@ func (s *Server) Handle(method, path string, handler HandlerFunc) { StatusCode: 500, Body: []byte("INJECTED"), } + } else if bytes.Contains(request.Body, []byte("KILL_CALLER")) { + s.handleKillCaller(&request, w) + return } else { respAny := handler(request) if respAny == nil && request.Context.Err() != nil { @@ -322,3 +357,48 @@ func isNil(i any) bool { return false } } + +func (s *Server) handleKillCaller(request *Request, w http.ResponseWriter) { + pid := ExtractPidFromHeaders(request.Headers) + if pid == 0 { + s.t.Errorf("KILL_CALLER requested but test-pid not found in User-Agent") + w.WriteHeader(http.StatusBadRequest) + _, _ = fmt.Fprint(w, "test-pid not found in User-Agent (set DATABRICKS_CLI_TEST_PID=1)") + return + } + + signal := syscall.SIGKILL + bodyStr := string(request.Body) + if idx := strings.Index(bodyStr, "KILL_CALLER:"); idx != -1 { + signalPart := bodyStr[idx+len("KILL_CALLER:"):] + endIdx := 0 + for endIdx < len(signalPart) && signalPart[endIdx] >= '0' && signalPart[endIdx] <= '9' { + endIdx++ + } + if endIdx > 0 { + if sigNum, err := strconv.Atoi(signalPart[:endIdx]); err == nil { + signal = syscall.Signal(sigNum) + } + } + } + + s.t.Logf("KILL_CALLER: sending signal %d to PID %d", signal, pid) + + process, err := os.FindProcess(pid) + if err != nil { + s.t.Errorf("Failed to find process %d: %s", pid, err) + w.WriteHeader(http.StatusInternalServerError) + _, _ = fmt.Fprintf(w, "failed to find process: %s", err) + return + } + + if err := process.Signal(signal); err != nil { + s.t.Errorf("Failed to send signal %d to process %d: %s", signal, pid, err) + w.WriteHeader(http.StatusInternalServerError) + _, _ = fmt.Fprintf(w, "failed to send signal: %s", err) + return + } + + w.WriteHeader(http.StatusOK) + _, _ = fmt.Fprintf(w, "sent signal %d to PID %d", signal, pid) +} From 617d1d3dcfa1f2edac5c4af9040017450f19ae1e Mon Sep 17 00:00:00 2001 From: Varun Deep Saini Date: Tue, 16 Dec 2025 21:27:04 +0530 Subject: [PATCH 2/5] Removed other signals and now only killing is supported --- acceptance/internal/config.go | 7 ++--- acceptance/internal/prepare_server.go | 16 +++++------ .../kill_caller/mid_request/output.txt | 4 +-- .../kill_caller/mid_request/test.toml | 19 +++++++++++-- acceptance/selftest/kill_caller/output.txt | 4 +-- acceptance/selftest/kill_caller/test.toml | 19 +++++++++++-- libs/testserver/server.go | 28 +++++-------------- 7 files changed, 54 insertions(+), 43 deletions(-) diff --git a/acceptance/internal/config.go b/acceptance/internal/config.go index ed5d1cd03c..3b5a3aadc2 100644 --- a/acceptance/internal/config.go +++ b/acceptance/internal/config.go @@ -157,10 +157,9 @@ type ServerStub struct { // See [time.ParseDuration] for details. Delay time.Duration - // If set, send this signal to the caller process instead of returning a response. - // Use signal numbers: 9 for SIGKILL, 15 for SIGTERM, etc. - // Requires DATABRICKS_CLI_TEST_PID=1 to be set. - KillCaller int + // If true, kill the caller process instead of returning a response. + // Requires DATABRICKS_CLI_TEST_PID=1 to be set in the test environment. + KillCaller bool } // FindConfigs finds all the config relevant for this test, diff --git a/acceptance/internal/prepare_server.go b/acceptance/internal/prepare_server.go index 17bc29e118..200532e7e4 100644 --- a/acceptance/internal/prepare_server.go +++ b/acceptance/internal/prepare_server.go @@ -10,7 +10,6 @@ import ( "slices" "strings" "sync" - "syscall" "testing" "time" "unicode/utf8" @@ -210,7 +209,7 @@ func startLocalServer(t *testing.T, } } - if stub.KillCaller > 0 { + if stub.KillCaller { pid := testserver.ExtractPidFromHeaders(req.Headers) if pid == 0 { t.Errorf("KillCaller configured but test-pid not found in User-Agent") @@ -220,8 +219,7 @@ func startLocalServer(t *testing.T, } } - signal := syscall.Signal(stub.KillCaller) - t.Logf("KillCaller: sending signal %d to PID %d (pattern: %s)", signal, pid, stub.Pattern) + t.Logf("KillCaller: killing PID %d (pattern: %s)", pid, stub.Pattern) process, err := os.FindProcess(pid) if err != nil { @@ -232,18 +230,20 @@ func startLocalServer(t *testing.T, } } - if err := process.Signal(signal); err != nil { - t.Errorf("Failed to send signal %d to process %d: %s", signal, pid, err) + // Use process.Kill() for cross-platform compatibility. + // On Unix, this sends SIGKILL. On Windows, this calls TerminateProcess. + if err := process.Kill(); err != nil { + t.Errorf("Failed to kill process %d: %s", pid, err) return testserver.Response{ StatusCode: http.StatusInternalServerError, - Body: fmt.Sprintf("failed to send signal: %s", err), + Body: fmt.Sprintf("failed to kill process: %s", err), } } // Return a response (the CLI will likely be killed before it receives this) return testserver.Response{ StatusCode: http.StatusOK, - Body: fmt.Sprintf("sent signal %d to PID %d", signal, pid), + Body: fmt.Sprintf("killed PID %d", pid), } } diff --git a/acceptance/selftest/kill_caller/mid_request/output.txt b/acceptance/selftest/kill_caller/mid_request/output.txt index 8ae1495a64..fac04c326a 100644 --- a/acceptance/selftest/kill_caller/mid_request/output.txt +++ b/acceptance/selftest/kill_caller/mid_request/output.txt @@ -1,5 +1,5 @@ >>> errcode [CLI] workspace list / -script: line [N]: [PID] Killed "$@" +[PROCESS_KILLED] -Exit code: 137 +Exit code: [KILLED] diff --git a/acceptance/selftest/kill_caller/mid_request/test.toml b/acceptance/selftest/kill_caller/mid_request/test.toml index 7a4894b38d..c264c61861 100644 --- a/acceptance/selftest/kill_caller/mid_request/test.toml +++ b/acceptance/selftest/kill_caller/mid_request/test.toml @@ -3,8 +3,21 @@ Env.DATABRICKS_CLI_TEST_PID = "1" [[Server]] Pattern = "GET /api/2.0/workspace/list" -KillCaller = 9 +KillCaller = true [[Repls]] -Old = 'script: line \d+: \d+ Killed' -New = 'script: line [N]: [PID] Killed' +# macOS bash shows "Killed: 9" (with signal number), Linux shows "Killed" +# Normalize the whole killed line to a placeholder +Old = 'script: line \d+:\s+\d+ Killed(: 9)?\s+"\$@"' +New = '[PROCESS_KILLED]' + +[[Repls]] +# On Windows, there's no "Killed" message - just empty line before Exit code +# Insert [PROCESS_KILLED] placeholder for consistency +Old = '(\n>>> errcode [^\n]+\n)\nExit code:' +New = '${1}[PROCESS_KILLED]\n\nExit code:' + +[[Repls]] +# Normalize exit code: 137 on Unix (128 + SIGKILL), 1 on Windows +Old = 'Exit code: (137|1)' +New = 'Exit code: [KILLED]' diff --git a/acceptance/selftest/kill_caller/output.txt b/acceptance/selftest/kill_caller/output.txt index 6bbd632062..2e413420b8 100644 --- a/acceptance/selftest/kill_caller/output.txt +++ b/acceptance/selftest/kill_caller/output.txt @@ -1,5 +1,5 @@ >>> errcode [CLI] current-user me -script: line [N]: [PID] Killed "$@" +[PROCESS_KILLED] -Exit code: 137 +Exit code: [KILLED] diff --git a/acceptance/selftest/kill_caller/test.toml b/acceptance/selftest/kill_caller/test.toml index f7d2b34354..c7a106e144 100644 --- a/acceptance/selftest/kill_caller/test.toml +++ b/acceptance/selftest/kill_caller/test.toml @@ -4,8 +4,21 @@ Env.DATABRICKS_CLI_TEST_PID = "1" # Kill the CLI when it calls /Me endpoint [[Server]] Pattern = "GET /api/2.0/preview/scim/v2/Me" -KillCaller = 9 +KillCaller = true [[Repls]] -Old = 'script: line \d+: \d+ Killed' -New = 'script: line [N]: [PID] Killed' +# macOS bash shows "Killed: 9" (with signal number), Linux shows "Killed" +# Normalize the whole killed line to a placeholder +Old = 'script: line \d+:\s+\d+ Killed(: 9)?\s+"\$@"' +New = '[PROCESS_KILLED]' + +[[Repls]] +# On Windows, there's no "Killed" message - just empty line before Exit code +# Insert [PROCESS_KILLED] placeholder for consistency +Old = '(\n>>> errcode [^\n]+\n)\nExit code:' +New = '${1}[PROCESS_KILLED]\n\nExit code:' + +[[Repls]] +# Normalize exit code: 137 on Unix (128 + SIGKILL), 1 on Windows +Old = 'Exit code: (137|1)' +New = 'Exit code: [KILLED]' diff --git a/libs/testserver/server.go b/libs/testserver/server.go index 99ec2b56e6..d9f3407858 100644 --- a/libs/testserver/server.go +++ b/libs/testserver/server.go @@ -15,7 +15,6 @@ import ( "strconv" "strings" "sync" - "syscall" "github.com/databricks/cli/internal/testutil" "github.com/databricks/cli/libs/env" @@ -367,22 +366,7 @@ func (s *Server) handleKillCaller(request *Request, w http.ResponseWriter) { return } - signal := syscall.SIGKILL - bodyStr := string(request.Body) - if idx := strings.Index(bodyStr, "KILL_CALLER:"); idx != -1 { - signalPart := bodyStr[idx+len("KILL_CALLER:"):] - endIdx := 0 - for endIdx < len(signalPart) && signalPart[endIdx] >= '0' && signalPart[endIdx] <= '9' { - endIdx++ - } - if endIdx > 0 { - if sigNum, err := strconv.Atoi(signalPart[:endIdx]); err == nil { - signal = syscall.Signal(sigNum) - } - } - } - - s.t.Logf("KILL_CALLER: sending signal %d to PID %d", signal, pid) + s.t.Logf("KILL_CALLER: killing PID %d", pid) process, err := os.FindProcess(pid) if err != nil { @@ -392,13 +376,15 @@ func (s *Server) handleKillCaller(request *Request, w http.ResponseWriter) { return } - if err := process.Signal(signal); err != nil { - s.t.Errorf("Failed to send signal %d to process %d: %s", signal, pid, err) + // Use process.Kill() for cross-platform compatibility. + // On Unix, this sends SIGKILL. On Windows, this calls TerminateProcess. + if err := process.Kill(); err != nil { + s.t.Errorf("Failed to kill process %d: %s", pid, err) w.WriteHeader(http.StatusInternalServerError) - _, _ = fmt.Fprintf(w, "failed to send signal: %s", err) + _, _ = fmt.Fprintf(w, "failed to kill process: %s", err) return } w.WriteHeader(http.StatusOK) - _, _ = fmt.Fprintf(w, "sent signal %d to PID %d", signal, pid) + _, _ = fmt.Fprintf(w, "killed PID %d", pid) } From ba053c39b1c386f0dbc93a1590284a4ad775b97d Mon Sep 17 00:00:00 2001 From: Varun Deep Saini Date: Wed, 17 Dec 2025 13:27:36 +0530 Subject: [PATCH 3/5] Fixed windows tests --- acceptance/selftest/kill_caller/mid_request/test.toml | 9 ++++++++- acceptance/selftest/kill_caller/test.toml | 9 ++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/acceptance/selftest/kill_caller/mid_request/test.toml b/acceptance/selftest/kill_caller/mid_request/test.toml index c264c61861..dedb9fd7a3 100644 --- a/acceptance/selftest/kill_caller/mid_request/test.toml +++ b/acceptance/selftest/kill_caller/mid_request/test.toml @@ -15,9 +15,16 @@ New = '[PROCESS_KILLED]' # On Windows, there's no "Killed" message - just empty line before Exit code # Insert [PROCESS_KILLED] placeholder for consistency Old = '(\n>>> errcode [^\n]+\n)\nExit code:' -New = '${1}[PROCESS_KILLED]\n\nExit code:' +New = """${1}[PROCESS_KILLED] + +Exit code:""" [[Repls]] # Normalize exit code: 137 on Unix (128 + SIGKILL), 1 on Windows Old = 'Exit code: (137|1)' New = 'Exit code: [KILLED]' + +[[Repls]] +# Normalize Windows line endings (CRLF -> LF) - must be LAST +Old = "\r" +New = '' diff --git a/acceptance/selftest/kill_caller/test.toml b/acceptance/selftest/kill_caller/test.toml index c7a106e144..0fd483f97a 100644 --- a/acceptance/selftest/kill_caller/test.toml +++ b/acceptance/selftest/kill_caller/test.toml @@ -16,9 +16,16 @@ New = '[PROCESS_KILLED]' # On Windows, there's no "Killed" message - just empty line before Exit code # Insert [PROCESS_KILLED] placeholder for consistency Old = '(\n>>> errcode [^\n]+\n)\nExit code:' -New = '${1}[PROCESS_KILLED]\n\nExit code:' +New = """${1}[PROCESS_KILLED] + +Exit code:""" [[Repls]] # Normalize exit code: 137 on Unix (128 + SIGKILL), 1 on Windows Old = 'Exit code: (137|1)' New = 'Exit code: [KILLED]' + +[[Repls]] +# Normalize Windows line endings (CRLF -> LF) - must be LAST +Old = "\r" +New = '' From 8bd1547f8036cd5ef92342e37f987a72b6d553f1 Mon Sep 17 00:00:00 2001 From: Varun Deep Saini Date: Tue, 30 Dec 2025 23:42:45 +0530 Subject: [PATCH 4/5] Updated KillCaller to int and addressed comments Signed-off-by: Varun Deep Saini --- acceptance/internal/config.go | 6 ++-- acceptance/internal/prepare_server.go | 19 +++++++++++- .../out.test.toml | 0 .../kill_caller/{ => currentuser}/output.txt | 1 + .../kill_caller/{ => currentuser}/script | 1 + .../kill_caller/currentuser/test.toml | 4 +++ .../kill_caller/mid_request/test.toml | 30 ------------------- .../kill_caller/{ => multiple}/out.test.toml | 0 .../selftest/kill_caller/multiple/output.txt | 25 ++++++++++++++++ .../selftest/kill_caller/multiple/script | 13 ++++++++ .../selftest/kill_caller/multiple/test.toml | 10 +++++++ acceptance/selftest/kill_caller/test.toml | 5 ---- .../kill_caller/workspace/out.test.toml | 5 ++++ .../{mid_request => workspace}/output.txt | 1 + .../{mid_request => workspace}/script | 1 + .../selftest/kill_caller/workspace/test.toml | 4 +++ cmd/pipelines/root/root.go | 4 +-- cmd/root/root.go | 3 +- cmd/root/user_agent_test_pid.go | 28 +++++++++++++++++ libs/testserver/server.go | 14 +-------- 20 files changed, 119 insertions(+), 55 deletions(-) rename acceptance/selftest/kill_caller/{mid_request => currentuser}/out.test.toml (100%) rename acceptance/selftest/kill_caller/{ => currentuser}/output.txt (72%) rename acceptance/selftest/kill_caller/{ => currentuser}/script (50%) create mode 100644 acceptance/selftest/kill_caller/currentuser/test.toml delete mode 100644 acceptance/selftest/kill_caller/mid_request/test.toml rename acceptance/selftest/kill_caller/{ => multiple}/out.test.toml (100%) create mode 100644 acceptance/selftest/kill_caller/multiple/output.txt create mode 100644 acceptance/selftest/kill_caller/multiple/script create mode 100644 acceptance/selftest/kill_caller/multiple/test.toml create mode 100644 acceptance/selftest/kill_caller/workspace/out.test.toml rename acceptance/selftest/kill_caller/{mid_request => workspace}/output.txt (72%) rename acceptance/selftest/kill_caller/{mid_request => workspace}/script (50%) create mode 100644 acceptance/selftest/kill_caller/workspace/test.toml create mode 100644 cmd/root/user_agent_test_pid.go diff --git a/acceptance/internal/config.go b/acceptance/internal/config.go index 3b5a3aadc2..3910107eb9 100644 --- a/acceptance/internal/config.go +++ b/acceptance/internal/config.go @@ -157,9 +157,11 @@ type ServerStub struct { // See [time.ParseDuration] for details. Delay time.Duration - // If true, kill the caller process instead of returning a response. + // Number of times to kill the caller process before returning normal responses. + // 0 = never kill (default), 1 = kill once then allow, 2 = kill twice then allow, etc. + // Useful for testing crash recovery scenarios where first deploy crashes but retry succeeds. // Requires DATABRICKS_CLI_TEST_PID=1 to be set in the test environment. - KillCaller bool + KillCaller int } // FindConfigs finds all the config relevant for this test, diff --git a/acceptance/internal/prepare_server.go b/acceptance/internal/prepare_server.go index 200532e7e4..7ac2f457ad 100644 --- a/acceptance/internal/prepare_server.go +++ b/acceptance/internal/prepare_server.go @@ -184,6 +184,9 @@ func startLocalServer(t *testing.T, s.ResponseCallback = logResponseCallback(t) } + // Track remaining kill counts per pattern (for KillCaller > 0) + killCounters := make(map[string]int) + for ind := range stubs { // We want later stubs takes precedence, because then leaf configs take precedence over parent directory configs // In gorilla/mux earlier handlers take precedence, so we need to reverse the order @@ -191,6 +194,12 @@ func startLocalServer(t *testing.T, require.NotEmpty(t, stub.Pattern) items := strings.Split(stub.Pattern, " ") require.Len(t, items, 2) + + // Initialize kill counter for this pattern + if stub.KillCaller > 0 { + killCounters[stub.Pattern] = stub.KillCaller + } + s.Handle(items[0], items[1], func(req testserver.Request) any { if stub.Delay > 0 { ctx := req.Context @@ -209,7 +218,15 @@ func startLocalServer(t *testing.T, } } - if stub.KillCaller { + // Check if we should kill the caller + shouldKill := false + if stub.KillCaller > 0 && killCounters[stub.Pattern] > 0 { + killCounters[stub.Pattern]-- + shouldKill = true + t.Logf("KillCaller: will kill (remaining kills for %s: %d)", stub.Pattern, killCounters[stub.Pattern]) + } + + if shouldKill { pid := testserver.ExtractPidFromHeaders(req.Headers) if pid == 0 { t.Errorf("KillCaller configured but test-pid not found in User-Agent") diff --git a/acceptance/selftest/kill_caller/mid_request/out.test.toml b/acceptance/selftest/kill_caller/currentuser/out.test.toml similarity index 100% rename from acceptance/selftest/kill_caller/mid_request/out.test.toml rename to acceptance/selftest/kill_caller/currentuser/out.test.toml diff --git a/acceptance/selftest/kill_caller/output.txt b/acceptance/selftest/kill_caller/currentuser/output.txt similarity index 72% rename from acceptance/selftest/kill_caller/output.txt rename to acceptance/selftest/kill_caller/currentuser/output.txt index 2e413420b8..637ea530b4 100644 --- a/acceptance/selftest/kill_caller/output.txt +++ b/acceptance/selftest/kill_caller/currentuser/output.txt @@ -3,3 +3,4 @@ [PROCESS_KILLED] Exit code: [KILLED] +Script continued after kill diff --git a/acceptance/selftest/kill_caller/script b/acceptance/selftest/kill_caller/currentuser/script similarity index 50% rename from acceptance/selftest/kill_caller/script rename to acceptance/selftest/kill_caller/currentuser/script index b6e8c070f6..821c42d8cf 100644 --- a/acceptance/selftest/kill_caller/script +++ b/acceptance/selftest/kill_caller/currentuser/script @@ -1 +1,2 @@ trace errcode $CLI current-user me +echo "Script continued after kill" diff --git a/acceptance/selftest/kill_caller/currentuser/test.toml b/acceptance/selftest/kill_caller/currentuser/test.toml new file mode 100644 index 0000000000..b76fe401fc --- /dev/null +++ b/acceptance/selftest/kill_caller/currentuser/test.toml @@ -0,0 +1,4 @@ +# Kill the CLI when it calls /Me endpoint (once, then allow) +[[Server]] +Pattern = "GET /api/2.0/preview/scim/v2/Me" +KillCaller = 1 diff --git a/acceptance/selftest/kill_caller/mid_request/test.toml b/acceptance/selftest/kill_caller/mid_request/test.toml deleted file mode 100644 index dedb9fd7a3..0000000000 --- a/acceptance/selftest/kill_caller/mid_request/test.toml +++ /dev/null @@ -1,30 +0,0 @@ -Local = true -Env.DATABRICKS_CLI_TEST_PID = "1" - -[[Server]] -Pattern = "GET /api/2.0/workspace/list" -KillCaller = true - -[[Repls]] -# macOS bash shows "Killed: 9" (with signal number), Linux shows "Killed" -# Normalize the whole killed line to a placeholder -Old = 'script: line \d+:\s+\d+ Killed(: 9)?\s+"\$@"' -New = '[PROCESS_KILLED]' - -[[Repls]] -# On Windows, there's no "Killed" message - just empty line before Exit code -# Insert [PROCESS_KILLED] placeholder for consistency -Old = '(\n>>> errcode [^\n]+\n)\nExit code:' -New = """${1}[PROCESS_KILLED] - -Exit code:""" - -[[Repls]] -# Normalize exit code: 137 on Unix (128 + SIGKILL), 1 on Windows -Old = 'Exit code: (137|1)' -New = 'Exit code: [KILLED]' - -[[Repls]] -# Normalize Windows line endings (CRLF -> LF) - must be LAST -Old = "\r" -New = '' diff --git a/acceptance/selftest/kill_caller/out.test.toml b/acceptance/selftest/kill_caller/multiple/out.test.toml similarity index 100% rename from acceptance/selftest/kill_caller/out.test.toml rename to acceptance/selftest/kill_caller/multiple/out.test.toml diff --git a/acceptance/selftest/kill_caller/multiple/output.txt b/acceptance/selftest/kill_caller/multiple/output.txt new file mode 100644 index 0000000000..538672bf86 --- /dev/null +++ b/acceptance/selftest/kill_caller/multiple/output.txt @@ -0,0 +1,25 @@ + +>>> errcode [CLI] current-user me +[PROCESS_KILLED] + +Exit code: [KILLED] +Attempt 1 done + +>>> errcode [CLI] current-user me +[PROCESS_KILLED] + +Exit code: [KILLED] +Attempt 2 done + +>>> errcode [CLI] current-user me +[PROCESS_KILLED] + +Exit code: [KILLED] +Attempt 3 done + +>>> [CLI] current-user me +{ + "id":"123", + "userName":"test@example.com" +} +Attempt 4 done - success! diff --git a/acceptance/selftest/kill_caller/multiple/script b/acceptance/selftest/kill_caller/multiple/script new file mode 100644 index 0000000000..03628e203e --- /dev/null +++ b/acceptance/selftest/kill_caller/multiple/script @@ -0,0 +1,13 @@ +# First 3 attempts should be killed +trace errcode $CLI current-user me +echo "Attempt 1 done" + +trace errcode $CLI current-user me +echo "Attempt 2 done" + +trace errcode $CLI current-user me +echo "Attempt 3 done" + +# 4th attempt should succeed +trace $CLI current-user me +echo "Attempt 4 done - success!" diff --git a/acceptance/selftest/kill_caller/multiple/test.toml b/acceptance/selftest/kill_caller/multiple/test.toml new file mode 100644 index 0000000000..5485fc6a6b --- /dev/null +++ b/acceptance/selftest/kill_caller/multiple/test.toml @@ -0,0 +1,10 @@ +# Kill the CLI 3 times, then allow the 4th request to succeed +[[Server]] +Pattern = "GET /api/2.0/preview/scim/v2/Me" +KillCaller = 3 +Response.Body = ''' +{ + "id": "123", + "userName": "test@example.com" +} +''' diff --git a/acceptance/selftest/kill_caller/test.toml b/acceptance/selftest/kill_caller/test.toml index 0fd483f97a..ea21861a4a 100644 --- a/acceptance/selftest/kill_caller/test.toml +++ b/acceptance/selftest/kill_caller/test.toml @@ -1,11 +1,6 @@ Local = true Env.DATABRICKS_CLI_TEST_PID = "1" -# Kill the CLI when it calls /Me endpoint -[[Server]] -Pattern = "GET /api/2.0/preview/scim/v2/Me" -KillCaller = true - [[Repls]] # macOS bash shows "Killed: 9" (with signal number), Linux shows "Killed" # Normalize the whole killed line to a placeholder diff --git a/acceptance/selftest/kill_caller/workspace/out.test.toml b/acceptance/selftest/kill_caller/workspace/out.test.toml new file mode 100644 index 0000000000..d560f1de04 --- /dev/null +++ b/acceptance/selftest/kill_caller/workspace/out.test.toml @@ -0,0 +1,5 @@ +Local = true +Cloud = false + +[EnvMatrix] + DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/selftest/kill_caller/mid_request/output.txt b/acceptance/selftest/kill_caller/workspace/output.txt similarity index 72% rename from acceptance/selftest/kill_caller/mid_request/output.txt rename to acceptance/selftest/kill_caller/workspace/output.txt index fac04c326a..57eb88c9ad 100644 --- a/acceptance/selftest/kill_caller/mid_request/output.txt +++ b/acceptance/selftest/kill_caller/workspace/output.txt @@ -3,3 +3,4 @@ [PROCESS_KILLED] Exit code: [KILLED] +Script continued after kill diff --git a/acceptance/selftest/kill_caller/mid_request/script b/acceptance/selftest/kill_caller/workspace/script similarity index 50% rename from acceptance/selftest/kill_caller/mid_request/script rename to acceptance/selftest/kill_caller/workspace/script index 817a90cc76..076972136c 100644 --- a/acceptance/selftest/kill_caller/mid_request/script +++ b/acceptance/selftest/kill_caller/workspace/script @@ -1 +1,2 @@ trace errcode $CLI workspace list / +echo "Script continued after kill" diff --git a/acceptance/selftest/kill_caller/workspace/test.toml b/acceptance/selftest/kill_caller/workspace/test.toml new file mode 100644 index 0000000000..eac10a6329 --- /dev/null +++ b/acceptance/selftest/kill_caller/workspace/test.toml @@ -0,0 +1,4 @@ +# Kill the CLI when it calls workspace list endpoint (once, then allow) +[[Server]] +Pattern = "GET /api/2.0/workspace/list" +KillCaller = 1 diff --git a/cmd/pipelines/root/root.go b/cmd/pipelines/root/root.go index 1eb874067c..ade98d899e 100644 --- a/cmd/pipelines/root/root.go +++ b/cmd/pipelines/root/root.go @@ -8,9 +8,9 @@ import ( "os" "strings" + "github.com/databricks/cli/cmd/root" "github.com/databricks/cli/internal/build" "github.com/databricks/cli/libs/log" - "github.com/databricks/cli/libs/testserver" "github.com/spf13/cobra" ) @@ -73,7 +73,7 @@ func New(ctx context.Context) *cobra.Command { ctx = withCommandInUserAgent(ctx, cmd) ctx = withCommandExecIdInUserAgent(ctx) ctx = withUpstreamInUserAgent(ctx) - ctx = testserver.InjectPidToUserAgent(ctx) + ctx = root.InjectTestPidToUserAgent(ctx) cmd.SetContext(ctx) return nil } diff --git a/cmd/root/root.go b/cmd/root/root.go index 20bf96691d..39c36e86d3 100644 --- a/cmd/root/root.go +++ b/cmd/root/root.go @@ -17,7 +17,6 @@ import ( "github.com/databricks/cli/libs/log" "github.com/databricks/cli/libs/telemetry" "github.com/databricks/cli/libs/telemetry/protos" - "github.com/databricks/cli/libs/testserver" "github.com/spf13/cobra" ) @@ -80,7 +79,7 @@ func New(ctx context.Context) *cobra.Command { ctx = withCommandInUserAgent(ctx, cmd) ctx = withCommandExecIdInUserAgent(ctx) ctx = withUpstreamInUserAgent(ctx) - ctx = testserver.InjectPidToUserAgent(ctx) + ctx = InjectTestPidToUserAgent(ctx) cmd.SetContext(ctx) return nil } diff --git a/cmd/root/user_agent_test_pid.go b/cmd/root/user_agent_test_pid.go new file mode 100644 index 0000000000..7148fb3674 --- /dev/null +++ b/cmd/root/user_agent_test_pid.go @@ -0,0 +1,28 @@ +package root + +import ( + "context" + "os" + "strconv" + + "github.com/databricks/cli/libs/env" + "github.com/databricks/databricks-sdk-go/useragent" +) + +const ( + // TestPidEnvVar is the environment variable that enables PID injection into the user agent. + // When set to "1", the CLI will include its process ID in the user agent string. + // This is used by the test server to identify and signal the CLI process. + TestPidEnvVar = "DATABRICKS_CLI_TEST_PID" + testPidKey = "test-pid" +) + +// InjectTestPidToUserAgent adds the current process ID to the user agent if +// DATABRICKS_CLI_TEST_PID=1 is set. This enables the test server to identify +// and signal this process during acceptance tests. +func InjectTestPidToUserAgent(ctx context.Context) context.Context { + if env.Get(ctx, TestPidEnvVar) != "1" { + return ctx + } + return useragent.InContext(ctx, testPidKey, strconv.Itoa(os.Getpid())) +} diff --git a/libs/testserver/server.go b/libs/testserver/server.go index d9f3407858..81d11e704c 100644 --- a/libs/testserver/server.go +++ b/libs/testserver/server.go @@ -17,25 +17,13 @@ import ( "sync" "github.com/databricks/cli/internal/testutil" - "github.com/databricks/cli/libs/env" - "github.com/databricks/databricks-sdk-go/useragent" "github.com/gorilla/mux" ) -const ( - TestPidEnvVar = "DATABRICKS_CLI_TEST_PID" - testPidKey = "test-pid" -) +const testPidKey = "test-pid" var testPidRegex = regexp.MustCompile(testPidKey + `/(\d+)`) -func InjectPidToUserAgent(ctx context.Context) context.Context { - if env.Get(ctx, TestPidEnvVar) != "1" { - return ctx - } - return useragent.InContext(ctx, testPidKey, strconv.Itoa(os.Getpid())) -} - func ExtractPidFromHeaders(headers http.Header) int { ua := headers.Get("User-Agent") matches := testPidRegex.FindStringSubmatch(ua) From 1ad08e2d42e3849d6bc05461c0e376fc9ca3b996 Mon Sep 17 00:00:00 2001 From: Varun Deep Saini Date: Wed, 31 Dec 2025 00:05:51 +0530 Subject: [PATCH 5/5] cleanup Signed-off-by: Varun Deep Saini --- acceptance/internal/prepare_server.go | 78 ++++++++++------------- acceptance/selftest/kill_caller/test.toml | 4 ++ libs/testserver/server.go | 36 ----------- 3 files changed, 38 insertions(+), 80 deletions(-) diff --git a/acceptance/internal/prepare_server.go b/acceptance/internal/prepare_server.go index 7ac2f457ad..5ce41378e7 100644 --- a/acceptance/internal/prepare_server.go +++ b/acceptance/internal/prepare_server.go @@ -218,50 +218,9 @@ func startLocalServer(t *testing.T, } } - // Check if we should kill the caller - shouldKill := false - if stub.KillCaller > 0 && killCounters[stub.Pattern] > 0 { - killCounters[stub.Pattern]-- - shouldKill = true - t.Logf("KillCaller: will kill (remaining kills for %s: %d)", stub.Pattern, killCounters[stub.Pattern]) - } - - if shouldKill { - pid := testserver.ExtractPidFromHeaders(req.Headers) - if pid == 0 { - t.Errorf("KillCaller configured but test-pid not found in User-Agent") - return testserver.Response{ - StatusCode: http.StatusBadRequest, - Body: "test-pid not found in User-Agent (set DATABRICKS_CLI_TEST_PID=1)", - } - } - - t.Logf("KillCaller: killing PID %d (pattern: %s)", pid, stub.Pattern) - - process, err := os.FindProcess(pid) - if err != nil { - t.Errorf("Failed to find process %d: %s", pid, err) - return testserver.Response{ - StatusCode: http.StatusInternalServerError, - Body: fmt.Sprintf("failed to find process: %s", err), - } - } - - // Use process.Kill() for cross-platform compatibility. - // On Unix, this sends SIGKILL. On Windows, this calls TerminateProcess. - if err := process.Kill(); err != nil { - t.Errorf("Failed to kill process %d: %s", pid, err) - return testserver.Response{ - StatusCode: http.StatusInternalServerError, - Body: fmt.Sprintf("failed to kill process: %s", err), - } - } - - // Return a response (the CLI will likely be killed before it receives this) - return testserver.Response{ - StatusCode: http.StatusOK, - Body: fmt.Sprintf("killed PID %d", pid), - } + if shouldKillCaller(stub, killCounters) { + killCaller(t, stub.Pattern, req.Headers) + return testserver.Response{StatusCode: http.StatusOK} } return stub.Response @@ -273,6 +232,37 @@ func startLocalServer(t *testing.T, return s.URL } +func shouldKillCaller(stub ServerStub, killCounters map[string]int) bool { + if stub.KillCaller <= 0 || killCounters[stub.Pattern] <= 0 { + return false + } + killCounters[stub.Pattern]-- + return true +} + +func killCaller(t *testing.T, pattern string, headers http.Header) { + pid := testserver.ExtractPidFromHeaders(headers) + if pid == 0 { + t.Errorf("KillCaller configured but test-pid not found in User-Agent") + return + } + + process, err := os.FindProcess(pid) + if err != nil { + t.Errorf("Failed to find process %d: %s", pid, err) + return + } + + // Use process.Kill() for cross-platform compatibility. + // On Unix, this sends SIGKILL. On Windows, this calls TerminateProcess. + if err := process.Kill(); err != nil { + t.Errorf("Failed to kill process %d: %s", pid, err) + return + } + + t.Logf("KillCaller: killed PID %d (pattern: %s)", pid, pattern) +} + func startProxyServer(t *testing.T, logRequests bool, includeHeaders []string, diff --git a/acceptance/selftest/kill_caller/test.toml b/acceptance/selftest/kill_caller/test.toml index ea21861a4a..faa7caf7b0 100644 --- a/acceptance/selftest/kill_caller/test.toml +++ b/acceptance/selftest/kill_caller/test.toml @@ -1,3 +1,7 @@ +# KillCaller tests verify the test server's ability to terminate CLI processes mid-request. +# This enables testing crash recovery scenarios, e.g., "bundle deploy" fails on first attempt +# but succeeds on retry. Each subdirectory tests a different endpoint or retry count. + Local = true Env.DATABRICKS_CLI_TEST_PID = "1" diff --git a/libs/testserver/server.go b/libs/testserver/server.go index 81d11e704c..735b43cadc 100644 --- a/libs/testserver/server.go +++ b/libs/testserver/server.go @@ -9,7 +9,6 @@ import ( "net/http" "net/http/httptest" "net/url" - "os" "reflect" "regexp" "strconv" @@ -293,9 +292,6 @@ func (s *Server) Handle(method, path string, handler HandlerFunc) { StatusCode: 500, Body: []byte("INJECTED"), } - } else if bytes.Contains(request.Body, []byte("KILL_CALLER")) { - s.handleKillCaller(&request, w) - return } else { respAny := handler(request) if respAny == nil && request.Context.Err() != nil { @@ -344,35 +340,3 @@ func isNil(i any) bool { return false } } - -func (s *Server) handleKillCaller(request *Request, w http.ResponseWriter) { - pid := ExtractPidFromHeaders(request.Headers) - if pid == 0 { - s.t.Errorf("KILL_CALLER requested but test-pid not found in User-Agent") - w.WriteHeader(http.StatusBadRequest) - _, _ = fmt.Fprint(w, "test-pid not found in User-Agent (set DATABRICKS_CLI_TEST_PID=1)") - return - } - - s.t.Logf("KILL_CALLER: killing PID %d", pid) - - process, err := os.FindProcess(pid) - if err != nil { - s.t.Errorf("Failed to find process %d: %s", pid, err) - w.WriteHeader(http.StatusInternalServerError) - _, _ = fmt.Fprintf(w, "failed to find process: %s", err) - return - } - - // Use process.Kill() for cross-platform compatibility. - // On Unix, this sends SIGKILL. On Windows, this calls TerminateProcess. - if err := process.Kill(); err != nil { - s.t.Errorf("Failed to kill process %d: %s", pid, err) - w.WriteHeader(http.StatusInternalServerError) - _, _ = fmt.Fprintf(w, "failed to kill process: %s", err) - return - } - - w.WriteHeader(http.StatusOK) - _, _ = fmt.Fprintf(w, "killed PID %d", pid) -}