From 44d6c603cfaa54eb0981066c97a3ee0a8bba2d15 Mon Sep 17 00:00:00 2001 From: Tatu Saloranta Date: Tue, 11 Nov 2025 15:27:30 -0800 Subject: [PATCH 01/10] Initial PoC --- pom.xml | 14 +++++++++++++- .../io/stargate/sgv2/jsonapi/mcp/PingTool.java | 12 ++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 src/main/java/io/stargate/sgv2/jsonapi/mcp/PingTool.java diff --git a/pom.xml b/pom.xml index 437723ef8e..6998805391 100644 --- a/pom.xml +++ b/pom.xml @@ -71,7 +71,15 @@ pom import - + + + io.quarkiverse.mcp + quarkus-mcp-server-bom + 1.7.1 + pom + import + + software.amazon.awssdk bom @@ -149,6 +157,10 @@ io.quarkus quarkus-logging-json + + io.quarkiverse.mcp + quarkus-mcp-server-sse + jakarta.validation jakarta.validation-api diff --git a/src/main/java/io/stargate/sgv2/jsonapi/mcp/PingTool.java b/src/main/java/io/stargate/sgv2/jsonapi/mcp/PingTool.java new file mode 100644 index 0000000000..9fe33fcb04 --- /dev/null +++ b/src/main/java/io/stargate/sgv2/jsonapi/mcp/PingTool.java @@ -0,0 +1,12 @@ +package io.stargate.sgv2.jsonapi.mcp; + +import io.quarkiverse.mcp.server.Tool; +import io.quarkiverse.mcp.server.ToolArg; + +public class PingTool { + @Tool(description = "Simple Ping tool (with configurable message)") + String ping( + @ToolArg(description = "Response message", defaultValue = "OK") String responseMessage) { + return responseMessage; + } +} From 17bb78b3a812bbfa36ee49a23311a60e0bc10073 Mon Sep 17 00:00:00 2001 From: Tatu Saloranta Date: Tue, 11 Nov 2025 15:44:51 -0800 Subject: [PATCH 02/10] Add sysinfo tool --- .../stargate/sgv2/jsonapi/mcp/PingTool.java | 12 ---- .../stargate/sgv2/jsonapi/mcp/PoCTools.java | 55 +++++++++++++++++++ 2 files changed, 55 insertions(+), 12 deletions(-) delete mode 100644 src/main/java/io/stargate/sgv2/jsonapi/mcp/PingTool.java create mode 100644 src/main/java/io/stargate/sgv2/jsonapi/mcp/PoCTools.java diff --git a/src/main/java/io/stargate/sgv2/jsonapi/mcp/PingTool.java b/src/main/java/io/stargate/sgv2/jsonapi/mcp/PingTool.java deleted file mode 100644 index 9fe33fcb04..0000000000 --- a/src/main/java/io/stargate/sgv2/jsonapi/mcp/PingTool.java +++ /dev/null @@ -1,12 +0,0 @@ -package io.stargate.sgv2.jsonapi.mcp; - -import io.quarkiverse.mcp.server.Tool; -import io.quarkiverse.mcp.server.ToolArg; - -public class PingTool { - @Tool(description = "Simple Ping tool (with configurable message)") - String ping( - @ToolArg(description = "Response message", defaultValue = "OK") String responseMessage) { - return responseMessage; - } -} diff --git a/src/main/java/io/stargate/sgv2/jsonapi/mcp/PoCTools.java b/src/main/java/io/stargate/sgv2/jsonapi/mcp/PoCTools.java new file mode 100644 index 0000000000..df75e60e05 --- /dev/null +++ b/src/main/java/io/stargate/sgv2/jsonapi/mcp/PoCTools.java @@ -0,0 +1,55 @@ +package io.stargate.sgv2.jsonapi.mcp; + +import io.micrometer.core.instrument.MeterRegistry; +import io.quarkiverse.mcp.server.Tool; +import io.quarkiverse.mcp.server.ToolArg; +import io.stargate.sgv2.jsonapi.ConfigPreLoader; +import io.stargate.sgv2.jsonapi.api.model.command.CommandContext; +import io.stargate.sgv2.jsonapi.api.request.RequestContext; +import io.stargate.sgv2.jsonapi.metrics.JsonProcessingMetricsReporter; +import io.stargate.sgv2.jsonapi.service.cqldriver.CqlSessionCacheSupplier; +import io.stargate.sgv2.jsonapi.service.cqldriver.executor.SchemaCache; +import io.stargate.sgv2.jsonapi.service.processor.MeteredCommandProcessor; +import jakarta.inject.Inject; +import java.util.Map; + +public class PoCTools { + @Inject private RequestContext requestContext; + @Inject private SchemaCache schemaCache; + + private final CommandContext.BuilderSupplier contextBuilderSupplier; + private final MeteredCommandProcessor meteredCommandProcessor; + + @Inject + public PoCTools( + MeteredCommandProcessor meteredCommandProcessor, + MeterRegistry meterRegistry, + JsonProcessingMetricsReporter jsonProcessingMetricsReporter, + CqlSessionCacheSupplier sessionCacheSupplier) { + this.meteredCommandProcessor = meteredCommandProcessor; + + contextBuilderSupplier = + CommandContext.builderSupplier() + .withJsonProcessingMetricsReporter(jsonProcessingMetricsReporter) + .withCqlSessionCache(sessionCacheSupplier.get()) + .withCommandConfig(ConfigPreLoader.getPreLoadOrEmpty()) + .withMeterRegistry(meterRegistry); + } + + @Tool(description = "Simple Ping tool (with configurable message)") + String ping( + @ToolArg(description = "Response message", defaultValue = "OK") String responseMessage) { + return responseMessage; + } + + @Tool(description = "System info printer") + Map sysinfo() { + return Map.of( + "requestId", + requestContext.getRequestId(), + "tenantId", + requestContext.getTenantId().orElse("N/A"), + "token", + requestContext.getCassandraToken().orElse("N/A")); + } +} From 0643062279b3881b81be381530d3202cd85b1c4c Mon Sep 17 00:00:00 2001 From: Tatu Saloranta Date: Tue, 11 Nov 2025 16:45:56 -0800 Subject: [PATCH 03/10] ... --- src/main/java/io/stargate/sgv2/jsonapi/mcp/PoCTools.java | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/main/java/io/stargate/sgv2/jsonapi/mcp/PoCTools.java b/src/main/java/io/stargate/sgv2/jsonapi/mcp/PoCTools.java index df75e60e05..583ba8d982 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/mcp/PoCTools.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/mcp/PoCTools.java @@ -36,10 +36,9 @@ public PoCTools( .withMeterRegistry(meterRegistry); } - @Tool(description = "Simple Ping tool (with configurable message)") - String ping( - @ToolArg(description = "Response message", defaultValue = "OK") String responseMessage) { - return responseMessage; + @Tool(description = "Simple Echo tool") + String echo(@ToolArg(description = "Response", defaultValue = "OK") String response) { + return response; } @Tool(description = "System info printer") From 4612401a903105debbd12e2ecf3442202d398d5a Mon Sep 17 00:00:00 2001 From: Tatu Saloranta Date: Tue, 11 Nov 2025 16:58:51 -0800 Subject: [PATCH 04/10] Add "addTool" tool --- .../stargate/sgv2/jsonapi/mcp/PoCTools.java | 23 +++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/src/main/java/io/stargate/sgv2/jsonapi/mcp/PoCTools.java b/src/main/java/io/stargate/sgv2/jsonapi/mcp/PoCTools.java index 583ba8d982..eacdc7f1fd 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/mcp/PoCTools.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/mcp/PoCTools.java @@ -3,6 +3,8 @@ import io.micrometer.core.instrument.MeterRegistry; import io.quarkiverse.mcp.server.Tool; import io.quarkiverse.mcp.server.ToolArg; +import io.quarkiverse.mcp.server.ToolManager; +import io.quarkiverse.mcp.server.ToolResponse; import io.stargate.sgv2.jsonapi.ConfigPreLoader; import io.stargate.sgv2.jsonapi.api.model.command.CommandContext; import io.stargate.sgv2.jsonapi.api.request.RequestContext; @@ -14,6 +16,8 @@ import java.util.Map; public class PoCTools { + @Inject private ToolManager toolManager; + @Inject private RequestContext requestContext; @Inject private SchemaCache schemaCache; @@ -37,11 +41,12 @@ public PoCTools( } @Tool(description = "Simple Echo tool") - String echo(@ToolArg(description = "Response", defaultValue = "OK") String response) { + String echo( + @ToolArg(description = "Response", defaultValue = "OK", required = false) String response) { return response; } - @Tool(description = "System info printer") + @Tool(description = "System info printer tool") Map sysinfo() { return Map.of( "requestId", @@ -51,4 +56,18 @@ Map sysinfo() { "token", requestContext.getCassandraToken().orElse("N/A")); } + + @Tool(description = "Add Tool tool") + String addTool(@ToolArg(description = "Name") String name) { + if (toolManager.getTool(name) != null) { + return "ALREADY_EXISTS"; + } + toolManager + .newTool(name) + .setDescription("Tool '" + name + "': lower-cases given String") + .addArgument("value", "Value to convert", true, String.class) + .setHandler(ta -> ToolResponse.success(ta.args().get("value").toString().toLowerCase())) + .register(); + return "ADDED"; + } } From 6ccf3c078480c3e265551a63770f431ec3df41f1 Mon Sep 17 00:00:00 2001 From: Tatu Saloranta Date: Thu, 13 Nov 2025 17:22:50 -0800 Subject: [PATCH 05/10] Change MCP path to `/v1/mcp` (partially overlapping Data API endpoints) --- src/main/resources/application.yaml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/main/resources/application.yaml b/src/main/resources/application.yaml index 40a1095df8..daa56f9b1c 100644 --- a/src/main/resources/application.yaml +++ b/src/main/resources/application.yaml @@ -124,6 +124,15 @@ quarkus: min-level: trace + # MCP PoC + mcp: + server: + traffic-logging: + enabled: true + text-limit: 500 + sse: + root-path: /v1/mcp + # built-in micrometer properties micrometer: # exports at prometheus default path From f126533707f14dad4f9d6688d747c71f3a995682 Mon Sep 17 00:00:00 2001 From: Tatu Saloranta Date: Thu, 13 Nov 2025 17:29:13 -0800 Subject: [PATCH 06/10] Add project summary generated by BOB --- BOB.md | 178 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 178 insertions(+) create mode 100644 BOB.md diff --git a/BOB.md b/BOB.md new file mode 100644 index 0000000000..97946e5972 --- /dev/null +++ b/BOB.md @@ -0,0 +1,178 @@ +# Stargate Data API - Project Analysis Summary + +## Project Overview + +**Stargate Data API** is a standalone HTTP microservice that provides a JSON Document-based interface for accessing data stored in Apache Cassandra clusters. Built with Quarkus 3.29.2 and Java 21, it targets JavaScript developers who interact through client libraries like stargate-mongoose. + +## Core Technology Stack + +- **Framework**: Quarkus 3.29.2 +- **Language**: Java 21 +- **Build Tool**: Maven +- **Backend Databases**: + - DataStax Enterprise (DSE) 6.9.15 + - HyperConverged Database (HCD) 1.2.3 + - Apache Cassandra with Storage Attached Index (SAI) +- **Driver**: Cassandra Java Driver 4.17.0 + custom QueryBuilder 4.19.0-preview1 +- **Container**: Docker with native image support + +## Key Features + +1. **JSON Document API**: MongoDB-like API for document operations on Cassandra +2. **Vector Search**: Embeddings up to 4096 dimensions with cosine, euclidean, dot_product metrics +3. **Embedding Providers**: OpenAI, AWS Bedrock, NVIDIA, Mistral integration +4. **Lexical Search**: Full-text search with configurable analyzers +5. **Reranking**: Document reranking with NVIDIA models +6. **Tables API**: Structured table operations alongside collections +7. **MCP Server**: Model Context Protocol integration (Quarkus MCP 1.7.1) + +## Architecture Components + +### API Layer (`api/`) +- REST endpoints with OpenAPI/Swagger documentation +- Security, authentication, and tenant management +- Request validation and token handling + +### Service Layer (`service/`) +- **Operations**: CRUD for collections and tables +- **Embedding**: Server-side vectorization +- **Reranking**: Document reranking services +- **Schema**: Collection and table schema management +- **Shredding**: Document decomposition for Cassandra storage +- **CQL**: Query building and execution + +### Configuration (`config/`) +- Document limits: 4MB size, 16 levels depth, 2000 properties +- Database limits: 5 collections per database +- Operation limits: 20 documents per insert/update/delete +- Vectorization and feature flags + +## Supported Commands + +**Collections**: `find`, `findOne`, `insertOne`, `insertMany`, `updateOne`, `updateMany`, `deleteOne`, `deleteMany`, `findOneAndUpdate`, `findOneAndReplace`, `findOneAndDelete`, `countDocuments`, `estimatedDocumentCount`, `createCollection`, `findCollections`, `deleteCollection` + +**Tables**: `createTable`, `dropTable`, `alterTable`, `listTables`, `createIndex`, `createVectorIndex`, `createTextIndex`, `dropIndex`, `listIndexes`, `createType`, `dropType`, `alterType`, `listTypes` + +**Keyspaces**: `createKeyspace`, `findKeyspaces`, `dropKeyspace` + +## Document Model + +- **Field Names**: `[a-zA-Z0-9_-]+` pattern (except reserved `_id`) +- **Paths**: Dotted notation (e.g., `address.suburb`, `tags.0`) +- **Filter Operators**: `$eq`, `$ne`, `$gt`, `$gte`, `$lt`, `$lte`, `$in`, `$nin`, `$exists`, `$not`, `$and`, `$or`, `$nor`, `$all`, `$elemMatch`, `$size` +- **Array Support**: Zero-based indexing with array-specific operators + +## Deployment Options + +1. **Docker Compose**: Quick start with `./start_hcd.sh` or `./start_dse69.sh` +2. **Kubernetes**: Helm charts in `helm/jsonapi/` +3. **Native Executable**: GraalVM support +4. **Dev Mode**: `./mvnw compile quarkus:dev` + +## Testing Infrastructure + +- **Unit Tests**: JUnit 5 + Mockito (5.20.0) +- **Integration Tests**: Testcontainers with DSE/HCD +- **Profiles**: `dse69-it`, `hcd-it` +- **Performance**: NoSQLBench integration + +## Monitoring & Observability + +- **Metrics**: Micrometer + Prometheus +- **Health**: SmallRye Health at `/stargate/health` +- **Logging**: JSON logging, command-level logging +- **API Docs**: Swagger UI at `/swagger-ui/` + +## Key Configuration Defaults + +- Default page size: 20 documents +- Max in-memory sort: 10,000 documents +- Session cache: 300s TTL, max 50 sessions +- LWT retries: 3 attempts +- Max vector dimensions: 4096 floats +- Max string length: 8000 bytes +- Max array length: 1000 elements + +## Notable Design Decisions + +1. **Non-REST Design**: Optimized for machine-generated queries from ODMs +2. **Failure Modes**: "Fail Fast" vs "Fail Silently" for multi-document operations +3. **Optimistic Locking**: Compare-and-set for concurrent updates +4. **Document Shredding**: Custom decomposition for Cassandra storage +5. **Server-Side Vectorization**: Optional embedding generation + +## Project Structure + +- `src/main/java/io/stargate/sgv2/jsonapi/` - Core application +- `src/test/java/` - Comprehensive test suite +- `docs/` - API specifications (dataapi-spec.md, dataapi-network-spec.md) +- `docker-compose/` - Local deployment scripts +- `helm/` - Kubernetes deployment +- `lib/` - Custom Java driver repository + +## Quick Start + +### Running with Docker Compose + +```bash +cd docker-compose +./start_hcd.sh # For HCD +# or +./start_dse69.sh # For DSE 6.9 +``` + +### Running in Development Mode + +```bash +# Start backend database first +cd docker-compose +./start_hcd.sh -d + +# Then start Data API in dev mode +./mvnw compile quarkus:dev -Dstargate.jsonapi.operations.vectorize-enabled=true \ + -Dstargate.jsonapi.operations.database-config.local-datacenter=dc1 +``` + +### Building Docker Image + +```bash +./mvnw clean package -Dquarkus.container-image.build=true -DskipTests +``` + +### Running Tests + +```bash +./mvnw verify # All tests +./mvnw verify -DskipITs # Skip integration tests +./mvnw verify -DskipUnitTests # Only integration tests +``` + +## API Access + +Once running, access: +- **Swagger UI**: http://localhost:8181/swagger-ui/ +- **Health Check**: http://localhost:8181/stargate/health +- **Metrics**: http://localhost:8181/q/metrics + +## Authentication + +Token format for Cassandra backend: +``` +Token: Cassandra:Base64(username):Base64(password) +``` + +Example with default credentials (cassandra/cassandra): +``` +Token: Cassandra:Y2Fzc2FuZHJh:Y2Fzc2FuZHJh +``` + +## Additional Resources + +- [Configuration Guide](CONFIGURATION.md) +- [Data API Specification](docs/dataapi-spec.md) +- [Network Specification](docs/dataapi-network-spec.md) +- [Docker Compose README](docker-compose/README.md) + +--- + +This is a production-ready microservice bridging document-oriented applications with Cassandra's distributed capabilities, with strong support for modern AI/ML workloads through vector search and embedding integration. \ No newline at end of file From 519b77b334315a90684f08a22fbad16225ddaba1 Mon Sep 17 00:00:00 2001 From: Tatu Saloranta Date: Wed, 19 Nov 2025 11:39:16 -0800 Subject: [PATCH 07/10] Update MCP module --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 6998805391..a35c19b687 100644 --- a/pom.xml +++ b/pom.xml @@ -75,7 +75,7 @@ io.quarkiverse.mcp quarkus-mcp-server-bom - 1.7.1 + 1.7.2 pom import From 4e0332c360e59104b42a818cce1d685de0ece880 Mon Sep 17 00:00:00 2001 From: Tatu Saloranta Date: Mon, 24 Nov 2025 17:04:35 -0800 Subject: [PATCH 08/10] Add basic nginx setup for local load-balancing testing --- nginx/nginx-quarkus.conf | 19 +++++++++++++++++++ nginx/start-quarkus-lb.sh | 8 ++++++++ 2 files changed, 27 insertions(+) create mode 100644 nginx/nginx-quarkus.conf create mode 100755 nginx/start-quarkus-lb.sh diff --git a/nginx/nginx-quarkus.conf b/nginx/nginx-quarkus.conf new file mode 100644 index 0000000000..e0ff185ad6 --- /dev/null +++ b/nginx/nginx-quarkus.conf @@ -0,0 +1,19 @@ +events { + worker_connections 256; +} +http { + upstream quarkus_backend { + server 127.0.0.1:8181; + server 127.0.0.1:8182; + } + + server { + listen 8180; + + location / { + proxy_pass http://quarkus_backend; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + } + } +} diff --git a/nginx/start-quarkus-lb.sh b/nginx/start-quarkus-lb.sh new file mode 100755 index 0000000000..2a1c9453f5 --- /dev/null +++ b/nginx/start-quarkus-lb.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +CONFIG_FILE="$SCRIPT_DIR/nginx-quarkus.conf" + +echo "Starting Quarkus-LB with configs from '$CONFIG_FILE'..." + +nginx -c $CONFIG_FILE From 3c258b1cc94a4a2134b20fbd06971ea00a7e6396 Mon Sep 17 00:00:00 2001 From: Tatu Saloranta Date: Mon, 24 Nov 2025 17:12:56 -0800 Subject: [PATCH 09/10] Distribute/make-sticky requests --- nginx/nginx-quarkus.conf | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/nginx/nginx-quarkus.conf b/nginx/nginx-quarkus.conf index e0ff185ad6..d69007419e 100644 --- a/nginx/nginx-quarkus.conf +++ b/nginx/nginx-quarkus.conf @@ -3,6 +3,10 @@ events { } http { upstream quarkus_backend { + # This setting will use client ip to select route: it's stable + # but with localhost will always route to one of selections and + # not distribute. In production should load better + ip_hash; server 127.0.0.1:8181; server 127.0.0.1:8182; } From e90bce5b59496e7194f732e88fd6a7d41f2c7336 Mon Sep 17 00:00:00 2001 From: Tatu Saloranta Date: Tue, 25 Nov 2025 16:41:07 -0800 Subject: [PATCH 10/10] tiny renaming --- .../sgv2/jsonapi/mcp/{PoCTools.java => MCPPocTools.java} | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) rename src/main/java/io/stargate/sgv2/jsonapi/mcp/{PoCTools.java => MCPPocTools.java} (98%) diff --git a/src/main/java/io/stargate/sgv2/jsonapi/mcp/PoCTools.java b/src/main/java/io/stargate/sgv2/jsonapi/mcp/MCPPocTools.java similarity index 98% rename from src/main/java/io/stargate/sgv2/jsonapi/mcp/PoCTools.java rename to src/main/java/io/stargate/sgv2/jsonapi/mcp/MCPPocTools.java index eacdc7f1fd..e5cc143baa 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/mcp/PoCTools.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/mcp/MCPPocTools.java @@ -15,7 +15,7 @@ import jakarta.inject.Inject; import java.util.Map; -public class PoCTools { +public class MCPPocTools { @Inject private ToolManager toolManager; @Inject private RequestContext requestContext; @@ -25,7 +25,7 @@ public class PoCTools { private final MeteredCommandProcessor meteredCommandProcessor; @Inject - public PoCTools( + public MCPPocTools( MeteredCommandProcessor meteredCommandProcessor, MeterRegistry meterRegistry, JsonProcessingMetricsReporter jsonProcessingMetricsReporter,