From 2f311d3461d1b7e1384b0c6514aae429b5010a82 Mon Sep 17 00:00:00 2001 From: Rich Hankins Date: Thu, 11 Dec 2025 06:04:07 +0000 Subject: [PATCH] Python Examples for Context SDK --- examples/python-sdk/README.md | 18 + examples/python-sdk/context/README.md | 108 +++++ .../context/direct_context/README.md | 33 ++ .../context/direct_context/__init__.py | 2 + .../context/direct_context/__main__.py | 5 + .../python-sdk/context/direct_context/main.py | 133 ++++++ .../samples/src/api/http_client.py | 128 ++++++ .../samples/src/data/user_service.py | 128 ++++++ .../samples/src/utils/string_helpers.py | 103 +++++ .../context/direct_context/test_example.py | 61 +++ .../context/file_search_server/README.md | 36 ++ .../context/file_search_server/__init__.py | 2 + .../context/file_search_server/__main__.py | 4 + .../context/file_search_server/main.py | 205 +++++++++ .../file_search_server/test_example.py | 110 +++++ .../context/filesystem_context/README.md | 40 ++ .../context/filesystem_context/__init__.py | 2 + .../context/filesystem_context/__main__.py | 5 + .../context/filesystem_context/main.py | 102 +++++ .../filesystem_context/test_example.py | 61 +++ .../.github/workflows/augment-index.yml | 81 ++++ .../context/github_action_indexer/README.md | 248 +++++++++++ .../context/github_action_indexer/__init__.py | 9 + .../context/github_action_indexer/__main__.py | 59 +++ .../augment_indexer/__init__.py | 24 ++ .../augment_indexer/file_filter.py | 123 ++++++ .../augment_indexer/github_client.py | 307 ++++++++++++++ .../augment_indexer/index_manager.py | 395 ++++++++++++++++++ .../augment_indexer/main.py | 167 ++++++++ .../augment_indexer/models.py | 131 ++++++ .../augment_indexer/requirements.txt | 14 + .../augment_indexer/search.py | 132 ++++++ .../context/github_action_indexer/install.py | 192 +++++++++ .../github_action_indexer/test_example.py | 238 +++++++++++ .../context/prompt_enhancer_server/README.md | 45 ++ .../prompt_enhancer_server/__init__.py | 2 + .../prompt_enhancer_server/__main__.py | 4 + .../context/prompt_enhancer_server/main.py | 252 +++++++++++ .../prompt_enhancer_server/test_example.py | 125 ++++++ .../.github/workflows/index.yml | 1 + 40 files changed, 3835 insertions(+) create mode 100644 examples/python-sdk/context/README.md create mode 100644 examples/python-sdk/context/direct_context/README.md create mode 100644 examples/python-sdk/context/direct_context/__init__.py create mode 100644 examples/python-sdk/context/direct_context/__main__.py create mode 100644 examples/python-sdk/context/direct_context/main.py create mode 100644 examples/python-sdk/context/direct_context/samples/src/api/http_client.py create mode 100644 examples/python-sdk/context/direct_context/samples/src/data/user_service.py create mode 100644 examples/python-sdk/context/direct_context/samples/src/utils/string_helpers.py create mode 100644 examples/python-sdk/context/direct_context/test_example.py create mode 100644 examples/python-sdk/context/file_search_server/README.md create mode 100644 examples/python-sdk/context/file_search_server/__init__.py create mode 100644 examples/python-sdk/context/file_search_server/__main__.py create mode 100644 examples/python-sdk/context/file_search_server/main.py create mode 100644 examples/python-sdk/context/file_search_server/test_example.py create mode 100644 examples/python-sdk/context/filesystem_context/README.md create mode 100644 examples/python-sdk/context/filesystem_context/__init__.py create mode 100644 examples/python-sdk/context/filesystem_context/__main__.py create mode 100644 examples/python-sdk/context/filesystem_context/main.py create mode 100644 examples/python-sdk/context/filesystem_context/test_example.py create mode 100644 examples/python-sdk/context/github_action_indexer/.github/workflows/augment-index.yml create mode 100644 examples/python-sdk/context/github_action_indexer/README.md create mode 100644 examples/python-sdk/context/github_action_indexer/__init__.py create mode 100644 examples/python-sdk/context/github_action_indexer/__main__.py create mode 100644 examples/python-sdk/context/github_action_indexer/augment_indexer/__init__.py create mode 100644 examples/python-sdk/context/github_action_indexer/augment_indexer/file_filter.py create mode 100644 examples/python-sdk/context/github_action_indexer/augment_indexer/github_client.py create mode 100644 examples/python-sdk/context/github_action_indexer/augment_indexer/index_manager.py create mode 100644 examples/python-sdk/context/github_action_indexer/augment_indexer/main.py create mode 100644 examples/python-sdk/context/github_action_indexer/augment_indexer/models.py create mode 100644 examples/python-sdk/context/github_action_indexer/augment_indexer/requirements.txt create mode 100644 examples/python-sdk/context/github_action_indexer/augment_indexer/search.py create mode 100644 examples/python-sdk/context/github_action_indexer/install.py create mode 100644 examples/python-sdk/context/github_action_indexer/test_example.py create mode 100644 examples/python-sdk/context/prompt_enhancer_server/README.md create mode 100644 examples/python-sdk/context/prompt_enhancer_server/__init__.py create mode 100644 examples/python-sdk/context/prompt_enhancer_server/__main__.py create mode 100644 examples/python-sdk/context/prompt_enhancer_server/main.py create mode 100644 examples/python-sdk/context/prompt_enhancer_server/test_example.py diff --git a/examples/python-sdk/README.md b/examples/python-sdk/README.md index d87982c..d1d7aab 100644 --- a/examples/python-sdk/README.md +++ b/examples/python-sdk/README.md @@ -2,9 +2,15 @@ This directory contains examples demonstrating how to use the Augment Python SDK. +The SDK has two main components: + +1. **Auggie SDK** - An agent-based interface for AI-powered workflows with typed responses, sessions, and function calling +2. **Context SDK** - Semantic search and AI-powered code analysis via `FileSystemContext` and `DirectContext` + ## Quick Links - **[User Examples](user_examples/)** - Numbered tutorial examples (01-09) with a comprehensive [user guide](user_examples/user_guide.md) +- **[Context Examples](context/)** - Semantic search and AI-powered code analysis examples - **[Documentation](docs/)** - Detailed guides on specific features - **Basic Examples** - See below for standalone example scripts @@ -75,6 +81,18 @@ python acp_example_usage.py For ClaudeCodeACPClient documentation, see: - [Claude Code Client Guide](docs/CLAUDE_CODE_CLIENT.md) +## Context SDK Examples + +The **[context](context/)** directory contains examples demonstrating the Auggie SDK's context modes for semantic search and AI-powered code analysis: + +- **[Direct Context](context/direct_context/)** - API-based indexing with semantic search and AI Q&A +- **[FileSystem Context](context/filesystem_context/)** - Local directory search via MCP protocol +- **[File Search Server](context/file_search_server/)** - REST API for semantic file search with AI summarization +- **[Prompt Enhancer Server](context/prompt_enhancer_server/)** - HTTP server that enhances prompts with codebase context +- **[GitHub Action Indexer](context/github_action_indexer/)** - Index GitHub repositories for semantic search + +See the [context README](context/README.md) for prerequisites and detailed usage instructions. + ## Prompt-to-SDK Conversion ### `example_complex_prompt.txt` diff --git a/examples/python-sdk/context/README.md b/examples/python-sdk/context/README.md new file mode 100644 index 0000000..014886c --- /dev/null +++ b/examples/python-sdk/context/README.md @@ -0,0 +1,108 @@ +# Context Examples + +Examples demonstrating the Auggie SDK's context modes and AI-powered code analysis. + +## Prerequisites + +1. **Python 3.10+** - Required to run the examples +2. **Auggie CLI** - Required for FileSystem Context examples + ```bash + npm install -g @augmentcode/auggie@prerelease + ``` +3. **Authentication** - Required for all examples + ```bash + auggie login + ``` + This creates a session file at `~/.augment/session.json` with your API token. + + Alternatively, you can set environment variables: + ```bash + export AUGMENT_API_TOKEN=your_token_here + export AUGMENT_API_URL=https://staging-shard-0.api.augmentcode.com/ + ``` + +## Examples + +### [Direct Context](./direct_context/) +API-based indexing with semantic search and AI Q&A. + +**Run it:** +```bash +python -m direct_context +``` + +### [FileSystem Context](./filesystem_context/) +Local directory search via MCP protocol. + +**Important:** The FileSystem Context indexes all files in the workspace directory. To avoid timeouts when indexing large directories (like `node_modules/`), consider adding a `.gitignore` or `.augmentignore` file that excludes them. The auggie CLI respects both `.gitignore` and `.augmentignore` patterns during indexing. + +**Run it:** +```bash +python -m filesystem_context +``` + +### [File Search Server](./file_search_server/) +REST API for semantic file search with AI summarization. + +**Run it:** +```bash +python -m file_search_server . +``` + +Then query the API: +```bash +curl "http://localhost:3000/search?q=python" +``` + +### [Prompt Enhancer Server](./prompt_enhancer_server/) +HTTP server that enhances prompts with codebase context. + +**Run it:** +```bash +python -m prompt_enhancer_server . +``` + +Then enhance prompts: +```bash +curl -X POST http://localhost:3001/enhance \ + -H "Content-Type: application/json" \ + -d '{"prompt": "fix the login bug"}' +``` + +### [GitHub Action Indexer](./github_action_indexer/) +Index GitHub repositories with incremental updates via GitHub Actions. + +This is a more complex example that demonstrates production-ready repository indexing with GitHub Actions integration. It includes an install script for easy setup in your own repositories. + +See [github_action_indexer/README.md](./github_action_indexer/README.md) for setup and usage instructions. + +## Troubleshooting + +### MCP Timeout in FileSystem Context + +**Problem:** The FileSystem Context example times out during indexing. + +**Cause:** The workspace directory contains too many files (e.g., `node_modules/` with 45,000+ files). + +**Solution:** Create a `.gitignore` or `.augmentignore` file in the workspace directory to exclude large directories: + +```bash +# .gitignore or .augmentignore +node_modules/ +dist/ +*.log +.DS_Store +__pycache__/ +*.pyc +``` + +The auggie CLI respects both `.gitignore` and `.augmentignore` patterns and will skip excluded files during indexing. + +### Authentication Errors + +**Problem:** `Error: API key is required for search_and_ask()` + +**Cause:** The SDK cannot find your authentication credentials. + +**Solution:** Run `auggie login` to authenticate, or set the `AUGMENT_API_TOKEN` and `AUGMENT_API_URL` environment variables. + diff --git a/examples/python-sdk/context/direct_context/README.md b/examples/python-sdk/context/direct_context/README.md new file mode 100644 index 0000000..04535bb --- /dev/null +++ b/examples/python-sdk/context/direct_context/README.md @@ -0,0 +1,33 @@ +# Direct Context Example + +API-based indexing with semantic search, AI Q&A, and state persistence. + +## Usage + +```bash +# Authenticate +auggie login + +# Run the example (from the context directory) +cd examples/python-sdk/context +python -m direct_context + +# Or run directly +python direct_context/main.py +``` + +## What It Does + +- Creates a Direct Context instance +- Adds sample files to the index +- Performs semantic searches +- Uses `search_and_ask()` for AI-powered Q&A +- Generates documentation +- Exports/imports context state + +## Key Features + +- **`search()`**: Semantic search returning formatted code snippets +- **`search_and_ask()`**: One-step AI Q&A about indexed code +- **State persistence**: Export/import index for reuse + diff --git a/examples/python-sdk/context/direct_context/__init__.py b/examples/python-sdk/context/direct_context/__init__.py new file mode 100644 index 0000000..d53e045 --- /dev/null +++ b/examples/python-sdk/context/direct_context/__init__.py @@ -0,0 +1,2 @@ +# direct_context package + diff --git a/examples/python-sdk/context/direct_context/__main__.py b/examples/python-sdk/context/direct_context/__main__.py new file mode 100644 index 0000000..9d3a32e --- /dev/null +++ b/examples/python-sdk/context/direct_context/__main__.py @@ -0,0 +1,5 @@ +"""Allow running as: python -m direct_context""" +from .main import main + +main() + diff --git a/examples/python-sdk/context/direct_context/main.py b/examples/python-sdk/context/direct_context/main.py new file mode 100644 index 0000000..7f34c98 --- /dev/null +++ b/examples/python-sdk/context/direct_context/main.py @@ -0,0 +1,133 @@ +""" +Sample: Direct Context - API-based indexing with import/export state + +This sample demonstrates: +- Creating a Direct Context instance +- Adding files to the index +- Searching the indexed files +- Using Generation API to ask questions about indexed code +- Generating documentation from indexed code +- Exporting state to a file +- Importing state from a file +""" + +import json +import sys +import tempfile +from pathlib import Path + +from auggie_sdk.context import DirectContext, File + +# Sample files are in the samples/ subdirectory +SAMPLES_DIR = Path(__file__).parent / "samples" + + +def load_sample_files() -> list[File]: + """Load sample Python files from the samples directory.""" + files = [] + for file_path in SAMPLES_DIR.rglob("*.py"): + relative_path = file_path.relative_to(SAMPLES_DIR) + contents = file_path.read_text() + files.append(File(path=str(relative_path), contents=contents)) + return files + + +def main(): + print("=== Direct Context Sample ===\n") + + # Create a Direct Context instance + # Authentication is automatic via: + # 1. AUGMENT_API_TOKEN / AUGMENT_API_URL env vars, or + # 2. ~/.augment/session.json (created by `auggie login`) + print("Creating Direct Context...") + context = DirectContext.create(debug=True) + + # Load sample files from the samples/ directory + print("\nAdding files to index...") + file_objects = load_sample_files() + print(f" Found {len(file_objects)} sample files") + result = context.add_to_index(file_objects) + print("\nIndexing result:") + print(f" Newly uploaded: {result.newly_uploaded}") + print(f" Already uploaded: {result.already_uploaded}") + + # Search the codebase - returns formatted string ready for LLM use or display + # Using queries that work well with our realistic content + print("\n--- Search 1: Find string utility functions ---") + results1 = context.search("string utility functions for text formatting") + print("Search results:") + print(results1) + + print("\n--- Search 2: Find user management service ---") + results2 = context.search("user management service with CRUD operations") + print("Search results:") + print(results2) + + print("\n--- Search 3: Find HTTP client for API requests ---") + http_results = context.search("HTTP client for making API requests") + print("Search results:") + print(http_results) + + # Use search_and_ask to ask questions about the indexed code + print("\n--- search_and_ask Example 1: Ask questions about the code ---") + question = "How does the UserService class handle user creation and validation?" + print(f"Question: {question}") + + answer = context.search_and_ask( + "user creation and validation in UserService", + question, + ) + + print(f"\nAnswer: {answer}") + + # Use search_and_ask to generate documentation + print("\n--- search_and_ask Example 2: Generate documentation ---") + documentation = context.search_and_ask( + "string utility functions", + "Generate API documentation in markdown format for the string utility functions", + ) + + print("\nGenerated Documentation:") + print(documentation) + + # Use search_and_ask to explain code patterns + print("\n--- search_and_ask Example 3: Explain code patterns ---") + explanation = context.search_and_ask( + "utility functions", + "Explain what these utility functions do and when they would be useful", + ) + + print(f"\nExplanation: {explanation}") + + # Export state to a file + state_file = Path(tempfile.gettempdir()) / "direct-context-state.json" + print(f"\nExporting state to {state_file}...") + context.export_to_file(state_file) + print("State exported successfully") + + # Show the exported state + with open(state_file, "r") as f: + exported_state = json.load(f) + print("\nExported state:") + print(json.dumps(exported_state, indent=2)) + + # Import state in a new context + print("\n--- Testing state import ---") + context2 = DirectContext.import_from_file(state_file, debug=False) + print("State imported successfully") + + # Verify we can still search + results3 = context2.search("string utility functions") + print("\nSearch after importing state:") + print(results3) + + print("\n=== Sample Complete ===") + + +if __name__ == "__main__": + try: + main() + except Exception as error: + print(f"Error: {error}") + sys.exit(1) + diff --git a/examples/python-sdk/context/direct_context/samples/src/api/http_client.py b/examples/python-sdk/context/direct_context/samples/src/api/http_client.py new file mode 100644 index 0000000..557ab09 --- /dev/null +++ b/examples/python-sdk/context/direct_context/samples/src/api/http_client.py @@ -0,0 +1,128 @@ +""" +HTTP client for making API requests with error handling and retries +""" + +from dataclasses import dataclass +from typing import Any, Dict, Literal, Optional, TypeVar, Generic +import json +import urllib.request +import urllib.error + + +@dataclass +class RequestConfig: + method: Literal["GET", "POST", "PUT", "DELETE"] + url: str + headers: Optional[Dict[str, str]] = None + body: Optional[Any] = None + timeout: Optional[int] = None + + +T = TypeVar("T") + + +@dataclass +class ApiResponse(Generic[T]): + data: T + status: int + headers: Dict[str, str] + + +class HttpClient: + """HTTP client class for making API requests""" + + def __init__(self, base_url: str, default_headers: Optional[Dict[str, str]] = None): + self.base_url = base_url.rstrip("/") + self.default_headers = default_headers or {} + + async def get(self, url: str, headers: Optional[Dict[str, str]] = None) -> ApiResponse: + """ + Make a GET request + + Args: + url: Request URL + headers: Optional headers + + Returns: + Promise with response data + """ + return await self._request(RequestConfig(method="GET", url=url, headers=headers)) + + async def post( + self, url: str, body: Optional[Any] = None, headers: Optional[Dict[str, str]] = None + ) -> ApiResponse: + """ + Make a POST request + + Args: + url: Request URL + body: Request body + headers: Optional headers + + Returns: + Promise with response data + """ + return await self._request(RequestConfig(method="POST", url=url, body=body, headers=headers)) + + async def put( + self, url: str, body: Optional[Any] = None, headers: Optional[Dict[str, str]] = None + ) -> ApiResponse: + """ + Make a PUT request + + Args: + url: Request URL + body: Request body + headers: Optional headers + + Returns: + Promise with response data + """ + return await self._request(RequestConfig(method="PUT", url=url, body=body, headers=headers)) + + async def delete(self, url: str, headers: Optional[Dict[str, str]] = None) -> ApiResponse: + """ + Make a DELETE request + + Args: + url: Request URL + headers: Optional headers + + Returns: + Promise with response data + """ + return await self._request(RequestConfig(method="DELETE", url=url, headers=headers)) + + async def _request(self, config: RequestConfig) -> ApiResponse: + """ + Make a generic HTTP request + + Args: + config: Request configuration + + Returns: + Promise with response data + """ + url = config.url if config.url.startswith("http") else f"{self.base_url}{config.url}" + + headers = { + "Content-Type": "application/json", + **self.default_headers, + **(config.headers or {}), + } + + data = json.dumps(config.body).encode() if config.body else None + + req = urllib.request.Request(url, data=data, headers=headers, method=config.method) + + try: + with urllib.request.urlopen(req) as response: + response_data = json.loads(response.read().decode()) + return ApiResponse( + data=response_data, + status=response.status, + headers=dict(response.headers), + ) + except urllib.error.HTTPError as e: + raise Exception(f"HTTP {e.code}: {e.reason}") + diff --git a/examples/python-sdk/context/direct_context/samples/src/data/user_service.py b/examples/python-sdk/context/direct_context/samples/src/data/user_service.py new file mode 100644 index 0000000..e2d776e --- /dev/null +++ b/examples/python-sdk/context/direct_context/samples/src/data/user_service.py @@ -0,0 +1,128 @@ +""" +User service for managing user data and authentication +""" + +from dataclasses import dataclass, field +from datetime import datetime +from typing import Dict, List, Literal, Optional +import random +import string + + +@dataclass +class User: + id: str + email: str + name: str + role: Literal["admin", "user", "guest"] + created_at: datetime + last_login_at: Optional[datetime] = None + + +@dataclass +class CreateUserRequest: + email: str + name: str + password: str + role: Literal["user", "guest"] = "user" + + +class UserService: + """Service class for user management operations""" + + def __init__(self): + self._users: Dict[str, User] = {} + + async def create_user(self, request: CreateUserRequest) -> User: + """ + Create a new user account + + Args: + request: User creation request + + Returns: + Created user (without password) + """ + user_id = self._generate_user_id() + user = User( + id=user_id, + email=request.email, + name=request.name, + role=request.role, + created_at=datetime.now(), + ) + + self._users[user_id] = user + return user + + async def find_user_by_id(self, user_id: str) -> Optional[User]: + """ + Find user by ID + + Args: + user_id: User ID + + Returns: + User if found, None otherwise + """ + return self._users.get(user_id) + + async def find_user_by_email(self, email: str) -> Optional[User]: + """ + Find user by email address + + Args: + email: Email address + + Returns: + User if found, None otherwise + """ + for user in self._users.values(): + if user.email == email: + return user + return None + + async def update_last_login(self, user_id: str) -> None: + """ + Update user's last login timestamp + + Args: + user_id: User ID + """ + user = self._users.get(user_id) + if user: + user.last_login_at = datetime.now() + + async def get_users(self, role: Optional[str] = None) -> List[User]: + """ + Get all users with optional role filter + + Args: + role: Optional role filter + + Returns: + List of users + """ + all_users = list(self._users.values()) + if role: + return [user for user in all_users if user.role == role] + return all_users + + async def delete_user(self, user_id: str) -> bool: + """ + Delete user by ID + + Args: + user_id: User ID + + Returns: + True if user was deleted, False if not found + """ + if user_id in self._users: + del self._users[user_id] + return True + return False + + def _generate_user_id(self) -> str: + return "user_" + "".join(random.choices(string.ascii_lowercase + string.digits, k=9)) + diff --git a/examples/python-sdk/context/direct_context/samples/src/utils/string_helpers.py b/examples/python-sdk/context/direct_context/samples/src/utils/string_helpers.py new file mode 100644 index 0000000..b07135f --- /dev/null +++ b/examples/python-sdk/context/direct_context/samples/src/utils/string_helpers.py @@ -0,0 +1,103 @@ +""" +String utility functions for text processing and formatting +""" + + +def format_number(num: float, locale: str = "en-US") -> str: + """ + Format a number with thousands separators + + Args: + num: Number to format + locale: Locale for formatting (default: 'en-US') + + Returns: + Formatted number string + """ + return f"{num:,.2f}" + + +def is_even(num: int) -> bool: + """ + Check if a number is even + + Args: + num: Number to check + + Returns: + True if number is even, false otherwise + """ + return num % 2 == 0 + + +def is_odd(num: int) -> bool: + """ + Check if a number is odd + + Args: + num: Number to check + + Returns: + True if number is odd, false otherwise + """ + return num % 2 != 0 + + +def clamp(value: float, min_val: float, max_val: float) -> float: + """ + Clamp a value between min and max bounds + + Args: + value: Value to clamp + min_val: Minimum allowed value + max_val: Maximum allowed value + + Returns: + Clamped value + """ + return min(max(value, min_val), max_val) + + +def capitalize(s: str) -> str: + """ + Capitalize the first letter of a string + + Args: + s: String to capitalize + + Returns: + String with first letter capitalized + """ + if not s: + return s + return s[0].upper() + s[1:].lower() + + +def to_title_case(s: str) -> str: + """ + Convert string to title case + + Args: + s: String to convert + + Returns: + String in title case + """ + return " ".join(capitalize(word) for word in s.split(" ")) + + +def truncate(s: str, max_length: int) -> str: + """ + Truncate string to specified length with ellipsis + + Args: + s: String to truncate + max_length: Maximum length + + Returns: + Truncated string + """ + if len(s) <= max_length: + return s + return s[: max_length - 3] + "..." + diff --git a/examples/python-sdk/context/direct_context/test_example.py b/examples/python-sdk/context/direct_context/test_example.py new file mode 100644 index 0000000..e9956a5 --- /dev/null +++ b/examples/python-sdk/context/direct_context/test_example.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python3 +""" +Test for the direct_context example. + +This example runs synchronously and tests the DirectContext API for: +- Adding files to an index +- Searching indexed files +- Using search_and_ask for Q&A +- Exporting and importing state +""" + +import subprocess +import sys +from pathlib import Path + + +def main(): + """Run the direct_context example and verify it completes successfully.""" + # Get the package directory and run from parent so module execution works + package_dir = Path(__file__).parent + context_dir = package_dir.parent + + print("Running: python -m direct_context") + print(f"Working directory: {context_dir}") + + result = subprocess.run( + [sys.executable, "-m", "direct_context"], + capture_output=True, + text=True, + timeout=120, # 2 minutes should be plenty + cwd=str(context_dir), + ) + + # Print output for debugging + if result.stdout: + print("=== stdout ===") + print(result.stdout) + if result.stderr: + print("=== stderr ===") + print(result.stderr) + + # Verify success + if result.returncode != 0: + print(f"❌ Example failed with exit code {result.returncode}") + sys.exit(1) + + # Verify expected output + if "=== Sample Complete ===" not in result.stdout: + print("❌ Example did not complete successfully (missing completion message)") + sys.exit(1) + + if "Search results:" not in result.stdout: + print("❌ Example did not produce search results") + sys.exit(1) + + print("✅ direct_context example passed") + + +if __name__ == "__main__": + main() + diff --git a/examples/python-sdk/context/file_search_server/README.md b/examples/python-sdk/context/file_search_server/README.md new file mode 100644 index 0000000..bf60bd6 --- /dev/null +++ b/examples/python-sdk/context/file_search_server/README.md @@ -0,0 +1,36 @@ +# File Search Server Example + +REST API for semantic file search with AI-powered summarization. + +## Prerequisites + +Install the `auggie` CLI and authenticate: +```bash +npm install -g @augmentcode/auggie@prerelease +auggie login +``` + +## Usage + +```bash +# From the context directory +cd examples/python-sdk/context +python -m file_search_server . + +# Or run directly +python file_search_server/main.py . +``` + +## API Endpoints + +### Search Files +```bash +curl "http://localhost:3000/search?q=python" +curl "http://localhost:3000/search?q=authentication+logic" +``` + +### Health Check +```bash +curl "http://localhost:3000/health" +``` + diff --git a/examples/python-sdk/context/file_search_server/__init__.py b/examples/python-sdk/context/file_search_server/__init__.py new file mode 100644 index 0000000..58268da --- /dev/null +++ b/examples/python-sdk/context/file_search_server/__init__.py @@ -0,0 +1,2 @@ +# file_search_server package + diff --git a/examples/python-sdk/context/file_search_server/__main__.py b/examples/python-sdk/context/file_search_server/__main__.py new file mode 100644 index 0000000..e5458b1 --- /dev/null +++ b/examples/python-sdk/context/file_search_server/__main__.py @@ -0,0 +1,4 @@ +"""Allow running as: python -m file_search_server""" +from .main import main + +main() diff --git a/examples/python-sdk/context/file_search_server/main.py b/examples/python-sdk/context/file_search_server/main.py new file mode 100644 index 0000000..f9ca374 --- /dev/null +++ b/examples/python-sdk/context/file_search_server/main.py @@ -0,0 +1,205 @@ +#!/usr/bin/env python3 +""" +File Search Server Example + +A simple HTTP server that provides AI-powered file search using FileSystem Context. +Search results are processed with AI to summarize only the relevant results. + +Usage: + python main.py [workspace-directory] + python -m file_search_server [workspace-directory] + +Endpoints: + GET /search?q= - Search for files and get AI-summarized results + GET /health - Health check +""" + +import json +import sys +from datetime import datetime +from http.server import BaseHTTPRequestHandler, HTTPServer +from pathlib import Path +from typing import TypedDict +from urllib.parse import parse_qs, urlparse + +from auggie_sdk.context import FileSystemContext + +PORT = 3000 + + +# --- Search Handler --- + + +class SearchResponse(TypedDict): + """Response type for search requests""" + + query: str + summary: str + formattedResults: str + + +def handle_search(query: str, context: FileSystemContext) -> SearchResponse: + """ + Handle search request + + Args: + query: Search query string + context: FileSystemContext instance + + Returns: + SearchResponse with query, summary, and formatted results + """ + # Search for relevant code - returns formatted string ready for LLM use + formatted_results = context.search(query) + + if not formatted_results or formatted_results.strip() == "": + return { + "query": query, + "summary": "No relevant results found.", + "formattedResults": "", + } + + # Use search_and_ask to summarize the relevant results + summary = context.search_and_ask( + query, + f'Provide a concise summary of the relevant results for the query "{query}". ' + "Focus only on the most relevant information.", + ) + + return { + "query": query, + "summary": summary, + "formattedResults": formatted_results, + } + + +# --- HTTP Server --- + +# Global context +context: FileSystemContext | None = None +workspace_dir: str = "." + + +def initialize_context(): + """Initialize the FileSystem Context""" + global context + print("Initializing FileSystem Context...") + context = FileSystemContext.create(workspace_dir, debug=False) + print("FileSystem Context initialized\n") + + +class RequestHandler(BaseHTTPRequestHandler): + """HTTP request handler""" + + def do_OPTIONS(self): + """Handle OPTIONS requests for CORS""" + self.send_response(200) + self._send_cors_headers() + self.end_headers() + + def _send_cors_headers(self): + """Send CORS headers""" + self.send_header("Access-Control-Allow-Origin", "*") + self.send_header("Access-Control-Allow-Methods", "GET, POST, OPTIONS") + self.send_header("Access-Control-Allow-Headers", "Content-Type") + + def _send_json_response(self, status: int, data: dict): + """Send a JSON response""" + self.send_response(status) + self.send_header("Content-Type", "application/json") + self._send_cors_headers() + self.end_headers() + self.wfile.write(json.dumps(data, indent=2).encode()) + + def do_GET(self): + """Handle GET requests""" + parsed_url = urlparse(self.path) + path = parsed_url.path + query_params = parse_qs(parsed_url.query) + + if path == "/search": + self._handle_search(query_params) + elif path == "/health": + self._send_json_response( + 200, + { + "status": "ok", + "workspace": workspace_dir, + "contextReady": context is not None, + }, + ) + else: + self._send_json_response(404, {"error": "Not found"}) + + def _handle_search(self, query_params: dict): + """Handle search endpoint""" + query = query_params.get("q", [None])[0] + + if not query: + self._send_json_response(400, {"error": "Missing query parameter 'q'"}) + return + + if context is None: + self._send_json_response(503, {"error": "Context not initialized yet"}) + return + + try: + print(f'[{datetime.now().isoformat()}] Search request: "{query}"') + result = handle_search(query, context) + self._send_json_response(200, result) + except Exception as error: + print(f"Search error: {error}") + self._send_json_response(500, {"error": str(error)}) + + def log_message(self, format, *args): + """Override to suppress default logging""" + pass + + +def main(): + """Main function""" + global workspace_dir + + # Get workspace directory from command line, default to current directory + workspace_dir_arg = sys.argv[1] if len(sys.argv) > 1 else "." + # Resolve to absolute path to handle relative paths correctly + workspace_dir = str(Path(workspace_dir_arg).resolve()) + + print("=== File Search Server ===\n") + print(f"Workspace directory: {workspace_dir}") + print(f"Starting server on port {PORT}...\n") + + server = None + try: + initialize_context() + + server = HTTPServer(("", PORT), RequestHandler) + print(f"✅ Server running at http://localhost:{PORT}/") + print("\nExample requests:") + print(" # Search with AI-summarized results") + print(f' curl "http://localhost:{PORT}/search?q=python"') + print(f' curl "http://localhost:{PORT}/search?q=authentication+logic"') + print("\n # Health check") + print(f' curl "http://localhost:{PORT}/health"') + print("\nPress Ctrl+C to stop\n") + + server.serve_forever() + except KeyboardInterrupt: + print("\n\nShutting down...") + if context: + context.close() + if server: + server.server_close() + print("Server stopped") + sys.exit(0) + except Exception as error: + print(f"Failed to initialize: {error}") + if context: + context.close() + if server: + server.server_close() + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/examples/python-sdk/context/file_search_server/test_example.py b/examples/python-sdk/context/file_search_server/test_example.py new file mode 100644 index 0000000..218acf1 --- /dev/null +++ b/examples/python-sdk/context/file_search_server/test_example.py @@ -0,0 +1,110 @@ +#!/usr/bin/env python3 +""" +Test for the file_search_server example. + +This example starts an HTTP server on port 3000 that provides: +- GET /health - Health check endpoint +- GET /search?q= - Search endpoint with AI-summarized results + +The test starts the server, verifies the endpoints work, then shuts it down. +""" + +import json +import subprocess +import sys +import time +import urllib.error +import urllib.request +from pathlib import Path + +PORT = 3000 +BASE_URL = f"http://localhost:{PORT}" +STARTUP_TIMEOUT = 30 # seconds to wait for server to start +REQUEST_TIMEOUT = 60 # seconds for each request + + +def wait_for_server(url: str, timeout: int = STARTUP_TIMEOUT) -> bool: + """Wait for the server to be ready.""" + start_time = time.time() + while time.time() - start_time < timeout: + try: + req = urllib.request.Request(url, method="GET") + with urllib.request.urlopen(req, timeout=5) as response: + if response.status == 200: + return True + except (urllib.error.URLError, urllib.error.HTTPError, TimeoutError): + pass + time.sleep(1) + return False + + +def make_request(url: str, timeout: int = REQUEST_TIMEOUT) -> dict: + """Make a GET request and return JSON response.""" + req = urllib.request.Request(url, method="GET") + with urllib.request.urlopen(req, timeout=timeout) as response: + return json.loads(response.read().decode()) + + +def main(): + """Run the file_search_server example and verify it works.""" + # Get the package directory and workspace + package_dir = Path(__file__).parent + workspace_dir = str(package_dir) + # Run from the parent directory so module execution works + context_dir = package_dir.parent + + print(f"Starting server with: python -m file_search_server {workspace_dir}") + print(f"Working directory: {context_dir}") + + # Start the server as a subprocess using module execution + server_process = subprocess.Popen( + [sys.executable, "-m", "file_search_server", workspace_dir], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + cwd=str(context_dir), + ) + + try: + # Wait for server to be ready + print(f"Waiting for server at {BASE_URL}/health...") + if not wait_for_server(f"{BASE_URL}/health"): + stdout, stderr = server_process.communicate(timeout=5) + print(f"❌ Server failed to start within {STARTUP_TIMEOUT}s") + print(f"stdout: {stdout}") + print(f"stderr: {stderr}") + sys.exit(1) + + print("✓ Server is ready") + + # Test health endpoint + print("\nTesting /health endpoint...") + health = make_request(f"{BASE_URL}/health") + assert health.get("status") == "ok", f"Expected status 'ok', got: {health}" + assert health.get("contextReady") is True, f"Context not ready: {health}" + print(f"✓ Health check passed: {health}") + + # Test search endpoint + print("\nTesting /search endpoint...") + search_result = make_request(f"{BASE_URL}/search?q=python") + # The search should return some result (structure may vary) + assert isinstance(search_result, dict), f"Expected dict, got: {type(search_result)}" + print(f"✓ Search returned result with keys: {list(search_result.keys())}") + + print("\n✅ file_search_server example passed") + + finally: + # Shutdown the server + print("\nShutting down server...") + server_process.terminate() + try: + server_process.wait(timeout=5) + except subprocess.TimeoutExpired: + server_process.kill() + server_process.wait() + print("Server stopped") + + +if __name__ == "__main__": + main() + diff --git a/examples/python-sdk/context/filesystem_context/README.md b/examples/python-sdk/context/filesystem_context/README.md new file mode 100644 index 0000000..c652c26 --- /dev/null +++ b/examples/python-sdk/context/filesystem_context/README.md @@ -0,0 +1,40 @@ +# FileSystem Context Example + +Local directory search via MCP protocol with AI-powered Q&A and code review. + +## Prerequisites + +Install the `auggie` CLI: +```bash +auggie --version +``` + +## Usage + +```bash +# Authenticate (for AI features) +auggie login + +# Run the example (from the context directory) +cd examples/python-sdk/context +python -m filesystem_context + +# Or run directly +python filesystem_context/main.py +``` + +## What It Does + +- Spawns `auggie --mcp` process for file system operations +- Searches local directories without explicit indexing +- Uses `search_and_ask()` for AI Q&A about the workspace +- Performs AI-powered code review +- Explains code patterns + +## Key Features + +- **`search()`**: Semantic search over local files +- **`search_and_ask()`**: One-step AI Q&A about workspace +- **MCP Protocol**: Standardized context access +- **Auto-indexing**: Files indexed on-the-fly + diff --git a/examples/python-sdk/context/filesystem_context/__init__.py b/examples/python-sdk/context/filesystem_context/__init__.py new file mode 100644 index 0000000..31ba7a8 --- /dev/null +++ b/examples/python-sdk/context/filesystem_context/__init__.py @@ -0,0 +1,2 @@ +# filesystem_context package + diff --git a/examples/python-sdk/context/filesystem_context/__main__.py b/examples/python-sdk/context/filesystem_context/__main__.py new file mode 100644 index 0000000..f7b77b6 --- /dev/null +++ b/examples/python-sdk/context/filesystem_context/__main__.py @@ -0,0 +1,5 @@ +"""Allow running as: python -m filesystem_context""" +from .main import main + +main() + diff --git a/examples/python-sdk/context/filesystem_context/main.py b/examples/python-sdk/context/filesystem_context/main.py new file mode 100644 index 0000000..3c8c81e --- /dev/null +++ b/examples/python-sdk/context/filesystem_context/main.py @@ -0,0 +1,102 @@ +""" +Sample: FileSystem Context - Local directory retrieval via MCP + +This sample demonstrates: +- Creating a FileSystem Context instance +- Searching a local directory using MCP protocol +- Getting formatted search results +- Interactive Q&A about the workspace using AI +- Code review suggestions using AI +- Properly closing the MCP connection +""" + +import sys +from pathlib import Path + +from auggie_sdk.context import FileSystemContext + + +def main(): + print("=== FileSystem Context Sample ===\n") + + # Use the current SDK directory as the workspace + workspace_dir = str(Path.cwd()) + print(f"Workspace directory: {workspace_dir}") + + # Create a FileSystem Context instance + # Authentication is handled automatically by the auggie CLI via: + # 1. AUGMENT_API_TOKEN / AUGMENT_API_URL env vars, or + # 2. ~/.augment/session.json (created by `auggie login`) + print("\nCreating FileSystem Context (spawning auggie --mcp)...") + context = FileSystemContext.create( + workspace_dir, + auggie_path="auggie", # or specify full path to auggie binary + debug=True, + ) + + try: + # Search 1: Find Python SDK implementation + print("\n--- Search 1: Python SDK implementation ---") + results1 = context.search("Python SDK implementation") + print("Search results:") + print(results1[:500]) # Show first 500 chars + if len(results1) > 500: + print(f"... ({len(results1) - 500} more characters)") + + # Search 2: Find context modes + print("\n--- Search 2: Context modes implementation ---") + results2 = context.search("context modes implementation") + print("Search results:") + print(results2[:500]) # Show first 500 chars + if len(results2) > 500: + print(f"... ({len(results2) - 500} more characters)") + + # search_and_ask Example 1: Ask questions about the workspace + print("\n--- search_and_ask Example 1: Ask about context modes ---") + question1 = "What context modes are available in this SDK?" + print(f"Question: {question1}") + answer1 = context.search_and_ask("context modes", question1) + print(f"\nAnswer: {answer1}") + + # search_and_ask Example 2: Ask about implementation + print("\n--- search_and_ask Example 2: Ask about generation API ---") + question2 = "How is the generation API implemented?" + print(f"Question: {question2}") + answer2 = context.search_and_ask("generation API implementation", question2) + print(f"\nAnswer: {answer2}") + + # search_and_ask Example 3: Code review + print("\n--- search_and_ask Example 3: Code review ---") + review_file = "auggie_sdk/context/__init__.py" + print(f"Reviewing: {review_file}") + review = context.search_and_ask( + f"file:{review_file}", + "Review this code for potential issues, bugs, and improvements. Provide specific, actionable feedback.", + ) + print(f"\nReview:\n{review}") + + # search_and_ask Example 4: Explain patterns + print("\n--- search_and_ask Example 4: Explain code patterns ---") + pattern = "error handling" + print(f"Pattern: {pattern}") + pattern_explanation = context.search_and_ask( + pattern, + f'Explain this code pattern: "{pattern}". What does it do, why is it used, and what are the best practices?', + ) + print(f"\nExplanation:\n{pattern_explanation}") + finally: + # Always close the MCP connection + print("\nClosing MCP connection...") + context.close() + print("MCP connection closed") + + print("\n=== Sample Complete ===") + + +if __name__ == "__main__": + try: + main() + except Exception as error: + print(f"Error: {error}") + sys.exit(1) + diff --git a/examples/python-sdk/context/filesystem_context/test_example.py b/examples/python-sdk/context/filesystem_context/test_example.py new file mode 100644 index 0000000..f345f5a --- /dev/null +++ b/examples/python-sdk/context/filesystem_context/test_example.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python3 +""" +Test for the filesystem_context example. + +This example runs synchronously and tests the FileSystemContext API for: +- Creating an MCP-based context +- Searching a local directory +- Using search_and_ask for Q&A and code review +- Properly closing the MCP connection +""" + +import subprocess +import sys +from pathlib import Path + + +def main(): + """Run the filesystem_context example and verify it completes successfully.""" + # Get the package directory and run from parent so module execution works + package_dir = Path(__file__).parent + context_dir = package_dir.parent + + print("Running: python -m filesystem_context") + print(f"Working directory: {context_dir}") + + result = subprocess.run( + [sys.executable, "-m", "filesystem_context"], + capture_output=True, + text=True, + timeout=120, # 2 minutes should be plenty + cwd=str(context_dir), + ) + + # Print output for debugging + if result.stdout: + print("=== stdout ===") + print(result.stdout) + if result.stderr: + print("=== stderr ===") + print(result.stderr) + + # Verify success + if result.returncode != 0: + print(f"❌ Example failed with exit code {result.returncode}") + sys.exit(1) + + # Verify expected output + if "=== Sample Complete ===" not in result.stdout: + print("❌ Example did not complete successfully (missing completion message)") + sys.exit(1) + + if "MCP connection closed" not in result.stdout: + print("❌ Example did not properly close MCP connection") + sys.exit(1) + + print("✅ filesystem_context example passed") + + +if __name__ == "__main__": + main() + diff --git a/examples/python-sdk/context/github_action_indexer/.github/workflows/augment-index.yml b/examples/python-sdk/context/github_action_indexer/.github/workflows/augment-index.yml new file mode 100644 index 0000000..bdb544c --- /dev/null +++ b/examples/python-sdk/context/github_action_indexer/.github/workflows/augment-index.yml @@ -0,0 +1,81 @@ +name: Index Repository + +on: + push: + branches: + - main + - develop + - 'feature/**' # Index feature branches + - 'release/**' # Index release branches + workflow_dispatch: + inputs: + branch: + description: 'Branch to index (leave empty for current branch)' + required: false + type: string + force_full_reindex: + description: 'Force full re-index' + required: false + type: boolean + default: false + +jobs: + index: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 # Full history for comparison + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + cache: 'pip' + + - name: Install dependencies + run: pip install -r augment_indexer/requirements.txt + + - name: Restore index state + uses: actions/cache@v4 + with: + path: .augment-index-state + # Use branch-specific cache key + key: augment-index-${{ github.ref_name }}-${{ github.sha }} + restore-keys: | + augment-index-${{ github.ref_name }}- + + - name: Index repository + id: index + run: python -m augment_indexer.main + env: + AUGMENT_API_TOKEN: ${{ secrets.AUGMENT_API_TOKEN }} + AUGMENT_API_URL: ${{ secrets.AUGMENT_API_URL }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + STORAGE_TYPE: file + # Branch-specific state path (automatically determined from GITHUB_REF) + # STATE_PATH is optional - defaults to .augment-index-state/{branch}/state.json + MAX_COMMITS: 100 + MAX_FILES: 500 + + - name: Print results + if: always() + run: | + echo "Success: ${{ steps.index.outputs.success }}" + echo "Type: ${{ steps.index.outputs.type }}" + echo "Files Indexed: ${{ steps.index.outputs.files_indexed }}" + echo "Files Deleted: ${{ steps.index.outputs.files_deleted }}" + echo "Checkpoint ID: ${{ steps.index.outputs.checkpoint_id }}" + echo "Commit SHA: ${{ steps.index.outputs.commit_sha }}" + + - name: Upload state artifact + if: success() + uses: actions/upload-artifact@v4 + with: + name: index-state + path: .augment-index-state/ + retention-days: 30 + include-hidden-files: true + diff --git a/examples/python-sdk/context/github_action_indexer/README.md b/examples/python-sdk/context/github_action_indexer/README.md new file mode 100644 index 0000000..f36a26f --- /dev/null +++ b/examples/python-sdk/context/github_action_indexer/README.md @@ -0,0 +1,248 @@ +# GitHub Action Repository Indexer + +A Python example showing how to index a GitHub repository using the Augment SDK Direct Mode with incremental updates. + +## Overview + +This example demonstrates: +- Incremental indexing using GitHub's Compare API +- State persistence using file system storage +- Automatic fallback to full re-index when needed +- GitHub Actions integration + +## Prerequisites + +### Getting Your API Credentials + +1. **Authenticate to get your credentials if you have not already:** + ```bash + auggie login + ``` + This opens a browser for authentication and stores your credentials at `~/.augment/session.json`. + +2. **Extract the values for environment variables:** + ```bash + # View your session file to get the values + cat ~/.augment/session.json + ``` + - `AUGMENT_API_TOKEN`: Use the `accessToken` value from the session file + - `AUGMENT_API_URL`: Use the `baseUrl` value from the session file + +### GitHub Token + +For the `GITHUB_TOKEN`, you need a GitHub Personal Access Token with `repo` scope (for private repos) or `public_repo` scope (for public repos only). [Create one here](https://github.com/settings/tokens). + +## Two Ways to Use This Example + +There are two ways to use this indexer: + +| Mode | Description | Best For | +|------|-------------|----------| +| **Local Testing** | Run from this examples directory to index any GitHub repo | Trying out the indexer, testing on existing repos | +| **GitHub Actions** | Copy the code into your own repo for automatic indexing | Production use, CI/CD integration | + +--- + +## Option 1: Local Testing (Quick Start) + +Test the indexer on any GitHub repository without copying any files. This downloads the repo via the GitHub API and indexes it. + +### Example: Index the facebook/react Repository + +```bash +# Navigate to the context examples directory +cd examples/python-sdk/context + +# Install dependencies +pip install -r github_action_indexer/augment_indexer/requirements.txt + +# Set your credentials (see Prerequisites section above) +export AUGMENT_API_TOKEN="your-token" # From ~/.augment/session.json +export AUGMENT_API_URL="your-url" # From ~/.augment/session.json +export GITHUB_TOKEN="your-github-token" # Your GitHub PAT + +# Index a public repository (e.g., facebook/react) +export GITHUB_REPOSITORY="facebook/react" +export GITHUB_SHA="main" + +# Run the indexer +python -m github_action_indexer index + +# Search the indexed repository +python -m github_action_indexer search "hooks implementation" +python -m github_action_indexer search "reconciler algorithm" +``` + +### Index Your Own Repository + +```bash +# Set to any repo you have access to +export GITHUB_REPOSITORY="your-username/your-repo" +export GITHUB_SHA="main" # or a specific commit SHA + +python -m github_action_indexer index +python -m github_action_indexer search "your search query" +``` + +The index state is saved to `.augment-index-state/{branch}/state.json`, so subsequent runs perform incremental updates. + +--- + +## Option 2: GitHub Actions Setup (Production Use) + +For automatic indexing of your repository on every push, install the indexer into your repository. + +### Quick Install + +```bash +# From the auggie repo, install into your target repository +cd examples/python-sdk/context +python -m github_action_indexer install /path/to/your/repo +``` + +This will: +- Copy the `augment_indexer/` directory (includes `requirements.txt`) +- Create `.github/workflows/augment-index.yml` +- Update `.gitignore` + +### Manual Installation + +If you prefer to install manually: + +1. **Copy the indexer code** to your repository root. The workflow expects the following structure: + ``` + your-repo/ + ├── .github/workflows/augment-index.yml + └── augment_indexer/ + ├── main.py (and other indexer source files) + └── requirements.txt + ``` + +2. **Copy the workflow file** from [`.github/workflows/augment-index.yml`](.github/workflows/augment-index.yml) to your repository's `.github/workflows/` directory. + +### Configure Secrets + +After installing (either method), add your API credentials to repository secrets: + +1. Go to Settings → Secrets and variables → Actions +2. Add `AUGMENT_API_TOKEN` with your Augment API token +3. Add `AUGMENT_API_URL` with your tenant-specific URL (e.g., `https://your-tenant.api.augmentcode.com/`) + +> **Note:** Do not include quotes around the secret values. Enter the raw values directly (e.g., `https://...` not `"https://..."`). + +### Push to trigger indexing + +The workflow will run automatically on every push to `main` + +The GitHub Actions cache will persist the index state between runs, enabling incremental updates. + +### Downloading and Using Artifacts + +After the workflow runs, you can download the index state artifact and use it for local searching: + +1. Go to the workflow run page (Actions tab → select the run) +2. Scroll to "Artifacts" section at the bottom +3. Download `index-state` +4. Extract and use it: + +```bash +# Navigate to the context examples directory +cd examples/python-sdk/context + +# Extract the downloaded artifact +# It will contain a {branch}/ directory with state.json (e.g., main/state.json) +unzip ~/Downloads/index-state.zip -d .augment-index-state + +# Set your credentials +export AUGMENT_API_TOKEN="your-token" +export AUGMENT_API_URL="https://your-tenant.api.augmentcode.com/" +export BRANCH="main" # or whatever branch you indexed + +# Now you can search! +python -m github_action_indexer search "authentication functions" +``` + +This allows you to search the indexed repository locally without re-indexing. + +## Configuration + +Key environment variables: + +| Variable | Description | Required | Default | +|----------|-------------|----------|---------| +| `AUGMENT_API_TOKEN` | Augment API token | Yes | - | +| `AUGMENT_API_URL` | Augment API URL (e.g., `https://your-tenant.api.augmentcode.com/`) | Yes | - | +| `GITHUB_TOKEN` | GitHub token for API access | Yes | Auto-provided in GitHub Actions | +| `GITHUB_REPOSITORY` | Repository in `owner/repo` format | Yes | Auto-provided in GitHub Actions | +| `GITHUB_SHA` | Commit SHA to index | Yes | Auto-provided in GitHub Actions | +| `STATE_PATH` | File path for state storage | No | `.augment-index-state/{branch}/state.json` | +| `MAX_COMMITS` | Max commits before full re-index | No | `100` | +| `MAX_FILES` | Max file changes before full re-index | No | `500` | + +## How It Works + +1. **Load previous state** from storage (if exists) +2. **Check if full re-index is needed**: + - First run (no previous state) + - Force push detected + - Too many commits or file changes + - Ignore files changed +3. **If full re-index**: Download tarball and index all files +4. **If incremental**: Use Compare API to index only changed files +5. **Save new state** to storage + +## Storage Backends + +The index state is stored as a JSON file on the file system by default (`.augment-index-state/{branch}/state.json`). In GitHub Actions, the state is persisted between runs using GitHub Actions cache for efficient incremental updates. + +The indexer can be adapted to use other storage backends like Redis, S3, or databases. The state save/load operations in `augment_indexer/index_manager.py` can be modified to work with any storage system that can persist JSON data. + +## Searching the Index + +After indexing, you can search the repository using the CLI tool: + +```bash +# From the context examples directory +cd examples/python-sdk/context + +# Search for specific functionality +python -m github_action_indexer search "authentication functions" + +# Search for error handling patterns +python -m github_action_indexer search "error handling" + +# Search for specific implementations +python -m github_action_indexer search "database queries" +``` + +The search tool will: +1. Load the index state from storage +2. Perform semantic search using the Augment SDK +3. Display matching code chunks with file paths and line numbers + +Example output: +``` +Searching for: "authentication functions" + +Loading index state... +Loaded index: 42 files indexed +Last indexed commit: abc123def456 +Branch: main + +Found 3 result(s): + +📄 src/auth/login.py + Lines 15-28 + ──────────────────────────────────────────────────────────── + 15 │ async def authenticate_user( + 16 │ username: str, + 17 │ password: str + 18 │ ) -> User: + 19 │ # Authentication logic... + 20 │ ... +``` + +## License + +MIT + diff --git a/examples/python-sdk/context/github_action_indexer/__init__.py b/examples/python-sdk/context/github_action_indexer/__init__.py new file mode 100644 index 0000000..7c0a683 --- /dev/null +++ b/examples/python-sdk/context/github_action_indexer/__init__.py @@ -0,0 +1,9 @@ +""" +GitHub Action Indexer - Index GitHub repositories with incremental updates. + +Usage: + cd examples/python-sdk/context + python -m github_action_indexer index + python -m github_action_indexer search "query" +""" + diff --git a/examples/python-sdk/context/github_action_indexer/__main__.py b/examples/python-sdk/context/github_action_indexer/__main__.py new file mode 100644 index 0000000..057eb57 --- /dev/null +++ b/examples/python-sdk/context/github_action_indexer/__main__.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python3 +""" +GitHub Action Indexer - CLI entry point + +Usage: + python -m github_action_indexer install /path/to/repo + python -m github_action_indexer index + python -m github_action_indexer search "query" + python -m github_action_indexer search "query" --max-chars 5000 +""" + +import sys + + +def main() -> None: + """CLI dispatcher for github_action_indexer commands.""" + if len(sys.argv) < 2: + print("GitHub Action Indexer - Index GitHub repositories with incremental updates") + print() + print("Usage:") + print(" python -m github_action_indexer [args]") + print() + print("Commands:") + print(" install [target_dir] Install the indexer into a repository") + print(" index Index the repository (uses environment variables)") + print(" search Search the indexed repository") + print() + print("Examples:") + print(" python -m github_action_indexer install /path/to/your/repo") + print(" python -m github_action_indexer index") + print(' python -m github_action_indexer search "authentication functions"') + print(' python -m github_action_indexer search "error handling" --max-chars 5000') + sys.exit(1) + + command = sys.argv[1] + # Remove the command from argv so subcommands see correct args + sys.argv = [sys.argv[0]] + sys.argv[2:] + + if command == "install": + from .install import main as install_main + + install_main() + elif command == "index": + from .augment_indexer.main import main as index_main + + index_main() + elif command == "search": + from .augment_indexer.search import main as search_main + + search_main() + else: + print(f"Unknown command: {command}") + print("Available commands: install, index, search") + sys.exit(1) + + +if __name__ == "__main__": + main() + diff --git a/examples/python-sdk/context/github_action_indexer/augment_indexer/__init__.py b/examples/python-sdk/context/github_action_indexer/augment_indexer/__init__.py new file mode 100644 index 0000000..499dfe6 --- /dev/null +++ b/examples/python-sdk/context/github_action_indexer/augment_indexer/__init__.py @@ -0,0 +1,24 @@ +""" +GitHub Action Repository Indexer + +A Python example showing how to index a GitHub repository using the Augment SDK +Direct Mode with incremental updates. + +See README.md for usage instructions. +""" + +from .models import FileChange, IndexConfig, IndexResult, IndexState +from .file_filter import should_filter_file +from .github_client import GitHubClient +from .index_manager import IndexManager + +__all__ = [ + "FileChange", + "IndexConfig", + "IndexResult", + "IndexState", + "should_filter_file", + "GitHubClient", + "IndexManager", +] + diff --git a/examples/python-sdk/context/github_action_indexer/augment_indexer/file_filter.py b/examples/python-sdk/context/github_action_indexer/augment_indexer/file_filter.py new file mode 100644 index 0000000..88ab035 --- /dev/null +++ b/examples/python-sdk/context/github_action_indexer/augment_indexer/file_filter.py @@ -0,0 +1,123 @@ +""" +File filtering logic for GitHub repository indexing. +""" + +import re +from pathlib import Path +from typing import Optional + +# Keyish pattern regex - matches files that likely contain secrets/keys +KEYISH_PATTERN = re.compile( + r'^(\.git|.*\.pem|.*\.key|.*\.pfx|.*\.p12|.*\.jks|.*\.keystore|.*\.pkcs12|.*\.crt|.*\.cer|id_rsa|id_ed25519|id_ecdsa|id_dsa)$' +) + +# Default max file size in bytes (1 MB) +DEFAULT_MAX_FILE_SIZE = 1024 * 1024 # 1 MB + + +def always_ignore_path(path: str) -> bool: + """ + Check if a path should always be ignored (security measure). + + Args: + path: The file path to check. + + Returns: + True if the path contains ".." and should be ignored. + """ + return ".." in path + + +def is_keyish_path(path: str) -> bool: + """ + Check if a path matches the keyish pattern (secrets/keys). + + Args: + path: The file path to check. + + Returns: + True if the filename matches patterns for secret/key files. + """ + # Extract filename from path + filename = Path(path).name + return bool(KEYISH_PATTERN.match(filename)) + + +def is_valid_file_size(size_bytes: int, max_file_size: int = DEFAULT_MAX_FILE_SIZE) -> bool: + """ + Check if file size is valid for upload. + + Args: + size_bytes: The size of the file in bytes. + max_file_size: Maximum allowed file size in bytes. Defaults to 1 MB. + + Returns: + True if the file size is within the allowed limit. + """ + return size_bytes <= max_file_size + + +def is_valid_utf8(content: bytes) -> bool: + """ + Check if file content is valid UTF-8 (not binary). + + Args: + content: The file content as bytes. + + Returns: + True if the content is valid UTF-8, False if it's binary or invalid. + """ + try: + content.decode("utf-8") + return True + except UnicodeDecodeError: + return False + + +def should_filter_file( + path: str, + content: bytes, + max_file_size: Optional[int] = None, +) -> dict: + """ + Check if a file should be filtered out. + + Returns {"filtered": True, "reason": "..."} if file should be skipped. + Returns {"filtered": False} if file should be included. + + Priority order (from file-filtering.md): + 1. Path validation (contains "..") + 2. File size check + 3. .augmentignore rules (checked by caller) + 4. Keyish patterns + 5. .gitignore rules (checked by caller) + 6. UTF-8 validation + + Args: + path: The file path to check. + content: The file content as bytes. + max_file_size: Maximum allowed file size in bytes. Defaults to DEFAULT_MAX_FILE_SIZE. + + Returns: + A dict with "filtered" (bool) and optionally "reason" (str) keys. + """ + effective_max_size = max_file_size if max_file_size is not None else DEFAULT_MAX_FILE_SIZE + + # 1. Check for ".." in path (security) + if always_ignore_path(path): + return {"filtered": True, "reason": "path_contains_dotdot"} + + # 2. Check file size + if not is_valid_file_size(len(content), effective_max_size): + return {"filtered": True, "reason": f"file_too_large ({len(content)} bytes)"} + + # 3. Check keyish patterns (secrets/keys) + if is_keyish_path(path): + return {"filtered": True, "reason": "keyish_pattern"} + + # 4. Check UTF-8 validity (binary detection) + if not is_valid_utf8(content): + return {"filtered": True, "reason": "binary_file"} + + return {"filtered": False} + diff --git a/examples/python-sdk/context/github_action_indexer/augment_indexer/github_client.py b/examples/python-sdk/context/github_action_indexer/augment_indexer/github_client.py new file mode 100644 index 0000000..f69bd62 --- /dev/null +++ b/examples/python-sdk/context/github_action_indexer/augment_indexer/github_client.py @@ -0,0 +1,307 @@ +""" +GitHub API client for fetching repository data. +""" + +import io +import tarfile + +import pathspec +import requests +from github import Github +from github.GithubException import GithubException + +from .file_filter import should_filter_file +from .models import FileChange + + +class GitHubClient: + """GitHub API client for fetching repository data.""" + + def __init__(self, token: str) -> None: + """ + Initialize the GitHub client with an authentication token. + + Args: + token: GitHub personal access token or GitHub App token. + """ + self._github = Github(token) + self._token = token + + def resolve_ref(self, owner: str, repo: str, ref: str) -> str: + """ + Resolve a ref (like "HEAD", "main", or a commit SHA) to a commit SHA. + + Args: + owner: Repository owner. + repo: Repository name. + ref: Git ref to resolve. + + Returns: + The full 40-character commit SHA. + + Raises: + Exception: If the ref cannot be resolved. + """ + try: + repository = self._github.get_repo(f"{owner}/{repo}") + commit = repository.get_commit(ref) + return commit.sha + except GithubException as error: + raise Exception( + f'Failed to resolve ref "{ref}" for {owner}/{repo}: {error}' + ) from error + + def download_tarball(self, owner: str, repo: str, ref: str) -> dict[str, str]: + """ + Download repository as tarball and extract files. + + Args: + owner: Repository owner. + repo: Repository name. + ref: Git ref to download. + + Returns: + Dictionary mapping file paths to their contents. + """ + print(f"Downloading tarball for {owner}/{repo}@{ref}...") + + repository = self._github.get_repo(f"{owner}/{repo}") + tarball_url = repository.get_archive_link("tarball", ref) + + # Download tarball (10 minute timeout to handle large repositories) + # Include auth header for private repos + headers = {"Authorization": f"Bearer {self._token}"} + response = requests.get(tarball_url, headers=headers, stream=True, timeout=600) + if not response.ok: + raise Exception(f"Failed to download tarball: {response.reason}") + + # Load ignore patterns + augmentignore, gitignore = self._load_ignore_patterns(owner, repo, ref) + + # Track filtering statistics + files: dict[str, str] = {} + total_files = 0 + filtered_files = 0 + filter_reasons: dict[str, int] = {} + + # Extract files from tarball + tarball_data = io.BytesIO(response.content) + with tarfile.open(fileobj=tarball_data, mode="r:gz") as tar: + for member in tar.getmembers(): + # Skip directories and symlinks + if not member.isfile(): + continue + + total_files += 1 + + # Remove the root directory prefix (e.g., "owner-repo-sha/") + path_parts = member.name.split("/") + path_parts.pop(0) # Remove first component + file_path = "/".join(path_parts) + + if not file_path: + continue + + # Read file contents + file_obj = tar.extractfile(member) + if file_obj is None: + continue + content_bytes = file_obj.read() + + # Apply filtering in priority order: + # 1. .augmentignore + if augmentignore and augmentignore.match_file(file_path): + filtered_files += 1 + filter_reasons["augmentignore"] = filter_reasons.get("augmentignore", 0) + 1 + continue + + # 2. Path validation, file size, keyish patterns, UTF-8 validation + filter_result = should_filter_file(path=file_path, content=content_bytes) + + if filter_result["filtered"]: + filtered_files += 1 + reason = filter_result.get("reason", "unknown") + filter_reasons[reason] = filter_reasons.get(reason, 0) + 1 + continue + + # 3. .gitignore (checked last) + if gitignore and gitignore.match_file(file_path): + filtered_files += 1 + filter_reasons["gitignore"] = filter_reasons.get("gitignore", 0) + 1 + continue + + # File passed all filters + try: + contents = content_bytes.decode("utf-8") + files[file_path] = contents + except UnicodeDecodeError: + # This should not happen if is_valid_utf8() is working correctly + filtered_files += 1 + filter_reasons["decode_error"] = filter_reasons.get("decode_error", 0) + 1 + print(f"Warning: File {file_path} passed UTF-8 validation but failed to decode") + + print(f"Extracted {len(files)} files from tarball") + print(f"Filtered {filtered_files} of {total_files} files. Reasons: {filter_reasons}") + return files + + def compare_commits( + self, owner: str, repo: str, base: str, head: str + ) -> dict: + """ + Compare two commits and get file changes. + """ + print(f"Comparing {base}...{head}...") + + repository = self._github.get_repo(f"{owner}/{repo}") + comparison = repository.compare(base, head) + + files: list[FileChange] = [] + + for file in comparison.files: + change = FileChange( + path=file.filename, + status=self._map_github_status(file.status), + previousFilename=file.previous_filename, + ) + + # Download file contents for added/modified files + if change.status in ("added", "modified"): + try: + contents = self.get_file_contents(owner, repo, file.filename, head) + change.contents = contents + except Exception as error: + print(f"Warning: Failed to download {file.filename}: {error}") + + files.append(change) + + return { + "files": files, + "commits": comparison.total_commits, + "totalChanges": len(comparison.files), + } + + def get_file_contents( + self, owner: str, repo: str, path: str, ref: str + ) -> str: + """ + Get file contents at a specific ref. + + Args: + owner: Repository owner. + repo: Repository name. + path: File path within the repository. + ref: Git ref to get contents at. + + Returns: + The file contents as a string. + + Raises: + Exception: If the path is not a file. + """ + repository = self._github.get_repo(f"{owner}/{repo}") + content = repository.get_contents(path, ref) + + if isinstance(content, list): + raise Exception(f"{path} is not a file") + + return content.decoded_content.decode("utf-8") + + def _load_ignore_patterns( + self, owner: str, repo: str, ref: str + ) -> tuple[pathspec.PathSpec | None, pathspec.PathSpec | None]: + """ + Load .gitignore and .augmentignore patterns separately. + + Returns both filters to maintain proper priority order: + .augmentignore → keyish → .gitignore + + Args: + owner: Repository owner. + repo: Repository name. + ref: Git ref to load patterns from. + + Returns: + Tuple of (augmentignore, gitignore) PathSpec objects, or None if not found. + """ + augmentignore: pathspec.PathSpec | None = None + gitignore: pathspec.PathSpec | None = None + + # Try to load .gitignore + try: + gitignore_content = self.get_file_contents(owner, repo, ".gitignore", ref) + gitignore = pathspec.PathSpec.from_lines("gitwildmatch", gitignore_content.splitlines()) + except Exception: + # .gitignore doesn't exist + pass + + # Try to load .augmentignore + try: + augmentignore_content = self.get_file_contents(owner, repo, ".augmentignore", ref) + augmentignore = pathspec.PathSpec.from_lines("gitwildmatch", augmentignore_content.splitlines()) + except Exception: + # .augmentignore doesn't exist + pass + + return augmentignore, gitignore + + def _map_github_status(self, status: str) -> str: + """ + Map GitHub file status to our FileChange status. + + Args: + status: GitHub file status string. + + Returns: + Normalized status string. + """ + status_map = { + "added": "added", + "modified": "modified", + "removed": "removed", + "renamed": "renamed", + } + return status_map.get(status, "modified") + + def ignore_files_changed( + self, owner: str, repo: str, base: str, head: str + ) -> bool: + """ + Check if ignore files changed between commits. + + Args: + owner: Repository owner. + repo: Repository name. + base: Base commit SHA. + head: Head commit SHA. + + Returns: + True if .gitignore or .augmentignore changed, False otherwise. + """ + repository = self._github.get_repo(f"{owner}/{repo}") + comparison = repository.compare(base, head) + + ignore_files = [".gitignore", ".augmentignore"] + return any(file.filename in ignore_files for file in comparison.files) + + def is_force_push( + self, owner: str, repo: str, base: str, head: str + ) -> bool: + """ + Check if the push was a force push. + + Args: + owner: Repository owner. + repo: Repository name. + base: Base commit SHA. + head: Head commit SHA. + + Returns: + True if the push was a force push, False otherwise. + """ + try: + repository = self._github.get_repo(f"{owner}/{repo}") + repository.compare(base, head) + return False + except GithubException: + # If comparison fails, it's likely a force push + return True diff --git a/examples/python-sdk/context/github_action_indexer/augment_indexer/index_manager.py b/examples/python-sdk/context/github_action_indexer/augment_indexer/index_manager.py new file mode 100644 index 0000000..c2bf48f --- /dev/null +++ b/examples/python-sdk/context/github_action_indexer/augment_indexer/index_manager.py @@ -0,0 +1,395 @@ +""" +Index Manager - Core indexing logic +""" + +import json +import tempfile +from pathlib import Path +from typing import Optional + +from auggie_sdk.context import DirectContext, File + +from .github_client import GitHubClient +from .models import FileChange, IndexConfig, IndexResult, IndexState, RepositoryInfo + +DEFAULT_MAX_COMMITS = 100 +DEFAULT_MAX_FILES = 500 + + +class IndexManager: + """Index Manager - Core indexing logic for GitHub repositories.""" + + def __init__( + self, context: DirectContext, config: IndexConfig, state_path: str + ) -> None: + """ + Initialize the IndexManager. + + Args: + context: DirectContext instance for indexing operations. + config: Configuration for the indexing operation. + state_path: Path to the state file for persistence. + """ + self._context = context + self._config = config + self._state_path = state_path + self._github = GitHubClient(config.githubToken) + + def resolve_commit_sha(self) -> None: + """ + Resolve the current commit ref to an actual commit SHA. + + This handles cases where GITHUB_SHA might be "HEAD" or a branch name. + Updates the config.currentCommit with the resolved SHA. + """ + resolved_sha = self._github.resolve_ref( + self._config.owner, self._config.repo, self._config.currentCommit + ) + self._config.currentCommit = resolved_sha + + def _load_state(self) -> Optional[IndexState]: + """ + Load index state from file system. + + EXTENDING TO OTHER STORAGE BACKENDS: + Replace this method to load state from your preferred storage: + - Redis: Use redis-py client to GET the state JSON + - S3: Use boto3 to get_object from S3 bucket + - Database: Query your database for the state record + + Example for Redis: + import redis + r = redis.Redis.from_url(redis_url) + data = r.get(state_key) + return json.loads(data) if data else None + + Example for S3: + import boto3 + s3 = boto3.client('s3') + response = s3.get_object(Bucket=bucket, Key=key) + data = response['Body'].read().decode('utf-8') + return json.loads(data) + + Returns: + The loaded IndexState or None if the file doesn't exist. + """ + try: + with open(self._state_path, "r", encoding="utf-8") as f: + return json.load(f) + except FileNotFoundError: + return None + + def _save_state(self, state: IndexState) -> None: + """ + Save index state to file system. + + EXTENDING TO OTHER STORAGE BACKENDS: + Replace this method to save state to your preferred storage: + - Redis: Use redis-py client to SET the state JSON + - S3: Use boto3 to put_object to S3 bucket + - Database: Insert or update the state record in your database + + Example for Redis: + import redis + r = redis.Redis.from_url(redis_url) + r.set(state_key, json.dumps(state)) + + Example for S3: + import boto3 + s3 = boto3.client('s3') + s3.put_object( + Bucket=bucket, + Key=key, + Body=json.dumps(state), + ContentType='application/json' + ) + + Note: The state is just a JSON object (IndexState type) that can be + serialized and stored anywhere. For distributed systems, consider using + Redis or a database for shared state across multiple workers. + + Args: + state: The IndexState to save. + """ + # Ensure directory exists + Path(self._state_path).parent.mkdir(parents=True, exist_ok=True) + + # Write state to file + with open(self._state_path, "w", encoding="utf-8") as f: + json.dump(state, f, indent=2) + + def index(self) -> IndexResult: + """ + Main indexing entry point. + + Returns: + IndexResult with success status and indexing details. + """ + print( + f"Starting index for {self._config.owner}/{self._config.repo}" + f"@{self._config.branch}" + ) + + try: + # Load previous state + previous_state = self._load_state() + + # If we have previous state, we'll need to create a new context with the imported state + # For now, we'll handle this in the incremental update logic + + # Determine if we need full re-index + should_reindex, reason = self._should_full_reindex(previous_state) + + if should_reindex: + return self._full_reindex(reason) + + # Perform incremental update + # previous_state is guaranteed to be non-null here + if not previous_state: + raise RuntimeError("previous_state should not be None at this point") + return self._incremental_update(previous_state) + except Exception as error: + print(f"Indexing failed: {error}") + return IndexResult( + success=False, + type="full", + filesIndexed=0, + filesDeleted=0, + checkpointId="", + commitSha=self._config.currentCommit, + error=str(error), + ) + + def _should_full_reindex( + self, previous_state: Optional[IndexState] + ) -> tuple[bool, Optional[str]]: + """ + Determine if full re-index is needed. + + Args: + previous_state: The previous index state, or None if first run. + + Returns: + Tuple of (should_reindex, reason). + """ + # No previous state - first run + if not previous_state: + return (True, "first_run") + + # Different repository + if ( + previous_state["repository"]["owner"] != self._config.owner + or previous_state["repository"]["name"] != self._config.repo + ): + return (True, "different_repository") + + # Same commit - no changes + if previous_state["lastCommitSha"] == self._config.currentCommit: + print("No changes detected") + return (False, None) + + # Check for force push + is_force_push = self._github.is_force_push( + self._config.owner, + self._config.repo, + previous_state["lastCommitSha"], + self._config.currentCommit, + ) + + if is_force_push: + return (True, "force_push") + + # Get comparison + comparison = self._github.compare_commits( + self._config.owner, + self._config.repo, + previous_state["lastCommitSha"], + self._config.currentCommit, + ) + + # Too many commits + max_commits = self._config.maxCommits or DEFAULT_MAX_COMMITS + if comparison["commits"] > max_commits: + return ( + True, + f"too_many_commits ({comparison['commits']} > {max_commits})", + ) + + # Too many file changes + max_files = self._config.maxFiles or DEFAULT_MAX_FILES + if comparison["totalChanges"] > max_files: + return ( + True, + f"too_many_files ({comparison['totalChanges']} > {max_files})", + ) + + # Check if ignore files changed + ignore_changed = self._github.ignore_files_changed( + self._config.owner, + self._config.repo, + previous_state["lastCommitSha"], + self._config.currentCommit, + ) + + if ignore_changed: + return (True, "ignore_files_changed") + + return (False, None) + + def _full_reindex(self, reason: Optional[str]) -> IndexResult: + """ + Perform full repository re-index. + + Args: + reason: The reason for the full re-index. + + Returns: + IndexResult with the result of the full re-index. + """ + print(f"Performing full re-index (reason: {reason or 'unknown'})") + + # Download entire repository as tarball + files = self._github.download_tarball( + self._config.owner, self._config.repo, self._config.currentCommit + ) + + # Add all files to index + files_to_index = [ + File(path=path, contents=contents) for path, contents in files.items() + ] + + print(f"Adding {len(files_to_index)} files to index...") + self._context.add_to_index(files_to_index) + + # Export DirectContext state + context_state = self._context.export() + context_state_dict = context_state.to_dict() + + new_state: IndexState = { + "contextState": context_state_dict, + "lastCommitSha": self._config.currentCommit, + "repository": RepositoryInfo( + owner=self._config.owner, + name=self._config.repo, + ), + } + + # Save state + self._save_state(new_state) + + return IndexResult( + success=True, + type="full", + filesIndexed=len(files_to_index), + filesDeleted=0, + checkpointId=context_state.checkpoint_id or "", + commitSha=self._config.currentCommit, + reindexReason=reason, + ) + + def _incremental_update(self, previous_state: IndexState) -> IndexResult: + """ + Perform incremental update. + + Args: + previous_state: The previous index state. + + Returns: + IndexResult with the result of the incremental update. + """ + print("Performing incremental update...") + + # Create a temporary file with the previous context state + # Use delete=False because Windows can't reopen a NamedTemporaryFile while it's open + temp_file = tempfile.NamedTemporaryFile( + mode="w", suffix=".json", prefix="github-indexer-incremental-", delete=False + ) + temp_path = Path(temp_file.name) + try: + json.dump(previous_state["contextState"], temp_file, indent=2) + temp_file.close() # Close before reading on Windows + + # Create a new context from the previous state + self._context = DirectContext.import_from_file( + str(temp_path), + api_key=self._config.apiToken, + api_url=self._config.apiUrl, + ) + finally: + temp_path.unlink(missing_ok=True) + + # Get file changes + comparison = self._github.compare_commits( + self._config.owner, + self._config.repo, + previous_state["lastCommitSha"], + self._config.currentCommit, + ) + + # Process changes + files_to_add, files_to_delete = self._process_file_changes(comparison["files"]) + + print(f"Adding {len(files_to_add)} files, deleting {len(files_to_delete)} files") + + # Update index + if files_to_add: + self._context.add_to_index(files_to_add) + + if files_to_delete: + self._context.remove_from_index(files_to_delete) + + # Export DirectContext state + context_state = self._context.export() + context_state_dict = context_state.to_dict() + + new_state: IndexState = { + "contextState": context_state_dict, + "lastCommitSha": self._config.currentCommit, + "repository": previous_state["repository"], + } + + # Save state + self._save_state(new_state) + + return IndexResult( + success=True, + type="incremental", + filesIndexed=len(files_to_add), + filesDeleted=len(files_to_delete), + checkpointId=context_state.checkpoint_id or "", + commitSha=self._config.currentCommit, + ) + + def _process_file_changes( + self, changes: list[FileChange] + ) -> tuple[list[File], list[str]]: + """ + Process file changes and categorize them for indexing. + + Args: + changes: List of file changes from the comparison. + + Returns: + Tuple of (files_to_add, files_to_delete). + """ + files_to_add: list[File] = [] + files_to_delete: list[str] = [] + + for change in changes: + if change.status in ("added", "modified"): + if change.contents: + files_to_add.append( + File(path=change.path, contents=change.contents) + ) + elif change.status == "removed": + files_to_delete.append(change.path) + elif change.status == "renamed": + if change.previousFilename: + files_to_delete.append(change.previousFilename) + if change.contents: + files_to_add.append( + File(path=change.path, contents=change.contents) + ) + + return files_to_add, files_to_delete + diff --git a/examples/python-sdk/context/github_action_indexer/augment_indexer/main.py b/examples/python-sdk/context/github_action_indexer/augment_indexer/main.py new file mode 100644 index 0000000..fd10065 --- /dev/null +++ b/examples/python-sdk/context/github_action_indexer/augment_indexer/main.py @@ -0,0 +1,167 @@ +#!/usr/bin/env python3 +""" +Main entry point for GitHub Action Indexer + +Usage: + cd examples/python-sdk/context + python -m github_action_indexer index +""" + +import os +import re +import sys + +from auggie_sdk.context import DirectContext + +from .index_manager import IndexManager +from .models import IndexConfig + + +def get_api_credentials() -> tuple[str, str]: + """Get API credentials from environment variables.""" + api_token = os.environ.get("AUGMENT_API_TOKEN") + if not api_token: + raise ValueError("AUGMENT_API_TOKEN environment variable is required") + + api_url = os.environ.get("AUGMENT_API_URL") + if not api_url: + raise ValueError( + "AUGMENT_API_URL environment variable is required. Please set it to your " + "tenant-specific URL (e.g., 'https://your-tenant.api.augmentcode.com/')" + ) + + return api_token, api_url + + +def parse_repository_info() -> tuple[str, str, str, str]: + """ + Parse repository information from environment variables. + Returns (owner, repo, branch, current_commit). + """ + repository = os.environ.get("GITHUB_REPOSITORY", "") + parts = repository.split("/") + + if len(parts) != 2 or not parts[0] or not parts[1]: + raise ValueError('GITHUB_REPOSITORY must be in format "owner/repo"') + + owner, repo = parts + + # Extract branch name from GitHub ref + github_ref = os.environ.get("GITHUB_REF", "") + github_ref_name = os.environ.get("GITHUB_REF_NAME", "") + + if github_ref.startswith("refs/heads/"): + branch = github_ref_name + elif github_ref.startswith("refs/tags/"): + branch = f"tag/{github_ref_name}" + elif github_ref_name: + branch = github_ref_name + else: + branch = os.environ.get("BRANCH", "main") + + current_commit = os.environ.get("GITHUB_SHA", "") + if not current_commit: + raise ValueError("GITHUB_SHA environment variable is required") + + return owner, repo, branch, current_commit + + +def load_config() -> IndexConfig: + """Load configuration from environment variables.""" + github_token = os.environ.get("GITHUB_TOKEN") + if not github_token: + raise ValueError("GITHUB_TOKEN environment variable is required") + + api_token, api_url = get_api_credentials() + owner, repo, branch, current_commit = parse_repository_info() + + max_commits = os.environ.get("MAX_COMMITS") + max_files = os.environ.get("MAX_FILES") + + return IndexConfig( + apiToken=api_token, + apiUrl=api_url, + githubToken=github_token, + owner=owner, + repo=repo, + branch=branch, + currentCommit=current_commit, + maxCommits=int(max_commits) if max_commits else None, + maxFiles=int(max_files) if max_files else None, + ) + + +def get_state_path(branch: str) -> str: + """Get the state file path for the current branch.""" + sanitized_branch = re.sub(r"[^a-zA-Z0-9\-_]", "-", branch) + return os.environ.get( + "STATE_PATH", f".augment-index-state/{sanitized_branch}/state.json" + ) + + +def main() -> None: + """Main function.""" + print("GitHub Action Indexer - Starting...") + + try: + # Load configuration + config = load_config() + state_path = get_state_path(config.branch) + + print(f"Repository: {config.owner}/{config.repo}") + print(f"Branch: {config.branch}") + print(f"Commit ref: {config.currentCommit}") + print(f"State path: {state_path}") + + # Create DirectContext + context = DirectContext.create(api_key=config.apiToken, api_url=config.apiUrl) + + # Create index manager and resolve commit SHA + manager = IndexManager(context, config, state_path) + manager.resolve_commit_sha() + + print(f"Resolved commit SHA: {config.currentCommit}") + + # Perform indexing + result = manager.index() + + # Print results + print("\n=== Indexing Results ===") + print(f"Success: {result.success}") + print(f"Type: {result.type}") + print(f"Files Indexed: {result.filesIndexed}") + print(f"Files Deleted: {result.filesDeleted}") + print(f"Checkpoint ID: {result.checkpointId}") + print(f"Commit SHA: {result.commitSha}") + + if result.reindexReason: + print(f"Re-index Reason: {result.reindexReason}") + + if result.error: + print(f"Error: {result.error}", file=sys.stderr) + sys.exit(1) + + # Set GitHub Actions output + github_output = os.environ.get("GITHUB_OUTPUT") + if github_output: + output_lines = [ + f"success={result.success}", + f"type={result.type}", + f"files_indexed={result.filesIndexed}", + f"files_deleted={result.filesDeleted}", + f"checkpoint_id={result.checkpointId}", + f"commit_sha={result.commitSha}", + ] + with open(github_output, "a") as f: + f.write("\n".join(output_lines) + "\n") + + print("\nIndexing completed successfully!") + + except Exception as error: + print(f"Fatal error: {error}", file=sys.stderr) + sys.exit(1) + + +if __name__ == "__main__": + main() + diff --git a/examples/python-sdk/context/github_action_indexer/augment_indexer/models.py b/examples/python-sdk/context/github_action_indexer/augment_indexer/models.py new file mode 100644 index 0000000..8b3dfc0 --- /dev/null +++ b/examples/python-sdk/context/github_action_indexer/augment_indexer/models.py @@ -0,0 +1,131 @@ +""" +Types for the GitHub Action Indexer + +This module defines the data types used by the GitHub Action Indexer +for tracking index state, file changes, configuration, and results. +""" + +from dataclasses import dataclass +from typing import Literal, Optional + +from typing_extensions import TypedDict + +from auggie_sdk.context.models import DirectContextState + + +class RepositoryInfo(TypedDict): + """Repository information for index state.""" + + owner: str # Repository owner + name: str # Repository name + + +class IndexState(TypedDict): + """ + Persistent state for the GitHub Action Indexer. + + This state is stored between indexing runs to enable incremental indexing. + """ + + contextState: DirectContextState + """DirectContext state (checkpoint, blobs, etc.)""" + + lastCommitSha: str + """Last indexed commit SHA (must be a full 40-character SHA, not a ref like 'HEAD')""" + + repository: RepositoryInfo + """Repository information - used to verify we're indexing the same repository""" + + +@dataclass +class FileChange: + """ + Represents a file change detected between commits. + + Used to track what files need to be indexed or removed from the index. + """ + + path: str + """File path""" + + status: Literal["added", "modified", "removed", "renamed"] + """Change status: added, modified, removed, renamed""" + + previousFilename: Optional[str] = None + """Previous filename (for renames)""" + + contents: Optional[str] = None + """File contents (for added/modified files)""" + + oldBlobName: Optional[str] = None + """Blob name from previous index (for modified/removed files)""" + + +@dataclass +class IndexConfig: + """ + Configuration for the GitHub Action Indexer. + + Contains all the settings needed to perform indexing of a GitHub repository. + """ + + apiToken: str + """Augment API token""" + + apiUrl: str + """Augment API URL (provided via AUGMENT_API_URL env var)""" + + githubToken: str + """GitHub token""" + + owner: str + """Repository owner""" + + repo: str + """Repository name""" + + branch: str + """Branch to index""" + + currentCommit: str + """Current commit SHA""" + + maxCommits: Optional[int] = None + """Maximum commits before full re-index""" + + maxFiles: Optional[int] = None + """Maximum file changes before full re-index""" + + +@dataclass +class IndexResult: + """ + Result from an indexing operation. + + Contains information about what was indexed and whether it was successful. + """ + + success: bool + """Whether indexing was successful""" + + type: Literal["full", "incremental", "no-changes"] + """Type of indexing performed""" + + filesIndexed: int + """Number of files indexed""" + + filesDeleted: int + """Number of files deleted""" + + checkpointId: str + """New checkpoint ID""" + + commitSha: str + """Commit SHA that was indexed""" + + error: Optional[str] = None + """Error message if failed""" + + reindexReason: Optional[str] = None + """Reason for full re-index (if applicable)""" + diff --git a/examples/python-sdk/context/github_action_indexer/augment_indexer/requirements.txt b/examples/python-sdk/context/github_action_indexer/augment_indexer/requirements.txt new file mode 100644 index 0000000..5552b4e --- /dev/null +++ b/examples/python-sdk/context/github_action_indexer/augment_indexer/requirements.txt @@ -0,0 +1,14 @@ +# GitHub Action Indexer dependencies + +# Augment SDK for indexing and search +auggie-sdk>=0.1.0 + +# GitHub API client +PyGithub>=2.1.0 + +# HTTP requests (for tarball download) +requests>=2.25.0 + +# Gitignore-style pattern matching +pathspec>=0.11.0 + diff --git a/examples/python-sdk/context/github_action_indexer/augment_indexer/search.py b/examples/python-sdk/context/github_action_indexer/augment_indexer/search.py new file mode 100644 index 0000000..fdac426 --- /dev/null +++ b/examples/python-sdk/context/github_action_indexer/augment_indexer/search.py @@ -0,0 +1,132 @@ +#!/usr/bin/env python3 +""" +CLI tool to search the indexed repository + +Usage: + cd examples/python-sdk/context + python -m github_action_indexer search "your search query" + python -m github_action_indexer search "your search query" --max-chars 5000 +""" + +import argparse +import json +import os +import re +import sys +import tempfile +from pathlib import Path +from typing import Optional + +from auggie_sdk.context import DirectContext + +from .models import IndexState + + +def get_state_path() -> str: + """Get the state file path for the current branch.""" + branch = os.environ.get("BRANCH", "main") + sanitized_branch = re.sub(r"[^a-zA-Z0-9\-_]", "-", branch) + return os.environ.get( + "STATE_PATH", f".augment-index-state/{sanitized_branch}/state.json" + ) + + +def load_state(state_path: str) -> Optional[IndexState]: + """Load index state from file system.""" + try: + with open(state_path, "r") as f: + data = f.read() + return json.loads(data) + except FileNotFoundError: + return None + + +def main() -> None: + """Main search function.""" + # Parse command line arguments + parser = argparse.ArgumentParser( + description="Search the indexed repository", + epilog='Example: python search.py "authentication functions"', + ) + parser.add_argument("query", help="Search query") + parser.add_argument( + "--max-chars", + type=int, + help="Maximum number of characters in output", + dest="max_chars", + ) + args = parser.parse_args() + + # Get API credentials + api_token = os.environ.get("AUGMENT_API_TOKEN") + if not api_token: + print("Error: AUGMENT_API_TOKEN environment variable is required", file=sys.stderr) + sys.exit(1) + + api_url = os.environ.get("AUGMENT_API_URL") + if not api_url: + print( + "Error: AUGMENT_API_URL environment variable is required. Please set it to your " + "tenant-specific URL (e.g., 'https://your-tenant.api.augmentcode.com/')", + file=sys.stderr, + ) + sys.exit(1) + + print(f'Searching for: "{args.query}"') + if args.max_chars is not None: + print(f"Limiting results to max {args.max_chars} characters\n") + else: + print() + + try: + # Load the index state first + state_path = get_state_path() + print(f"Loading index state from: {state_path}") + state = load_state(state_path) + + if not state: + print("Error: No index state found. Run indexing first.", file=sys.stderr) + print(" python -m github_action_indexer index", file=sys.stderr) + sys.exit(1) + + # Create a temporary file with the context state for import + # Use delete=False because Windows can't reopen a NamedTemporaryFile while it's open + temp_file = tempfile.NamedTemporaryFile( + mode="w", suffix=".json", prefix="github-indexer-state-", delete=False + ) + temp_path = Path(temp_file.name) + try: + json.dump(state["contextState"], temp_file, indent=2) + temp_file.close() # Close before reading on Windows + + # Import state using DirectContext.import_from_file + context = DirectContext.import_from_file( + str(temp_path), api_key=api_token, api_url=api_url + ) + finally: + temp_path.unlink(missing_ok=True) + + file_count = len(state["contextState"].get("blobs", [])) + + print(f"Loaded index: {file_count} files indexed") + print(f"Repository: {state['repository']['owner']}/{state['repository']['name']}") + print(f"Last indexed commit: {state['lastCommitSha']}\n") + + # Perform search with optional character limit + results = context.search(args.query, max_output_length=args.max_chars) + + if not results or results.strip() == "": + print("No results found.") + return + + print("Search results:\n") + print(results) + + except Exception as error: + print(f"Search failed: {error}", file=sys.stderr) + sys.exit(1) + + +if __name__ == "__main__": + main() + diff --git a/examples/python-sdk/context/github_action_indexer/install.py b/examples/python-sdk/context/github_action_indexer/install.py new file mode 100644 index 0000000..bb87665 --- /dev/null +++ b/examples/python-sdk/context/github_action_indexer/install.py @@ -0,0 +1,192 @@ +#!/usr/bin/env python3 +""" +GitHub Action Indexer Installation Script + +This script helps developers install the Augment GitHub Action Indexer +into their repositories with minimal setup. + +Usage: + python install.py /path/to/your/repository + python install.py # installs to current directory +""" + +import argparse +import shutil +import sys +from pathlib import Path + +# Colors for console output +COLORS = { + "reset": "\033[0m", + "bright": "\033[1m", + "red": "\033[31m", + "green": "\033[32m", + "yellow": "\033[33m", + "blue": "\033[34m", + "cyan": "\033[36m", +} + + +def colorize(color: str, text: str) -> str: + """Apply color to text.""" + return f"{COLORS.get(color, '')}{text}{COLORS['reset']}" + + +def log(message: str, color: str = "reset") -> None: + """Print a colored message.""" + print(colorize(color, message)) + + +def log_step(step: str, message: str) -> None: + """Print a step message.""" + log(f"[{step}] {message}", "cyan") + + +def log_success(message: str) -> None: + """Print a success message.""" + log(f"✅ {message}", "green") + + +def log_warning(message: str) -> None: + """Print a warning message.""" + log(f"⚠️ {message}", "yellow") + + +def log_error(message: str) -> None: + """Print an error message.""" + log(f"❌ {message}", "red") + + +def copy_directory(src: Path, dst: Path) -> None: + """Copy a directory recursively, excluding __pycache__ and .pyc files.""" + if dst.exists(): + shutil.rmtree(dst) + + def ignore_patterns(directory: str, files: list[str]) -> list[str]: + return [f for f in files if f == "__pycache__" or f.endswith(".pyc")] + + shutil.copytree(src, dst, ignore=ignore_patterns) + + +def update_gitignore(target_dir: Path) -> None: + """Update .gitignore to include Augment indexer entries.""" + gitignore_path = target_dir / ".gitignore" + augment_entry = ".augment-index-state/" + + existing_content = "" + if gitignore_path.exists(): + existing_content = gitignore_path.read_text() + + if augment_entry not in existing_content: + addition = "\n# Augment indexer files\n.augment-index-state/\n" + if existing_content and not existing_content.endswith("\n"): + addition = "\n" + addition + gitignore_path.write_text(existing_content + addition) + log_success("Updated .gitignore") + else: + log_warning(".gitignore already contains Augment indexer entries") + + +def display_next_steps(target_dir: Path) -> None: + """Display next steps after installation.""" + log("\n" + colorize("bright", "🎉 Installation Complete!")) + log("\nNext steps:\n") + + log(colorize("yellow", "1. Set up GitHub repository secrets:")) + log(" Go to your repository Settings > Secrets and variables > Actions") + log(" Add the following secrets:") + log(" • AUGMENT_API_TOKEN - Your Augment API token") + log(" • AUGMENT_API_URL - Your tenant-specific Augment API URL\n") + + log(colorize("yellow", "2. Push to trigger the workflow:")) + log(" git add .") + log(' git commit -m "Add Augment GitHub Action Indexer"') + log(" git push\n") + + log(colorize("green", "The indexer will automatically run on pushes to main!")) + + log(colorize("yellow", "\n(Optional) Test locally first:")) + log(f" cd {target_dir}") + log(" pip install -r augment_indexer/requirements.txt") + log(' export AUGMENT_API_TOKEN="your-token"') + log(' export AUGMENT_API_URL="https://your-tenant.api.augmentcode.com/"') + log(' export GITHUB_TOKEN="your-github-token"') + log(' export GITHUB_REPOSITORY="owner/repo"') + log(' export GITHUB_SHA="$(git rev-parse HEAD)"') + log(" python -m augment_indexer.main") + log(colorize("blue", "\nFor more information, see the documentation at:")) + log("https://github.com/augmentcode/auggie/tree/main/examples/python-sdk/context/github_action_indexer\n") + + +def main() -> None: + """Main installation function.""" + parser = argparse.ArgumentParser( + description="Install the Augment GitHub Action Indexer into your repository" + ) + parser.add_argument( + "target_dir", + nargs="?", + default=".", + help="Target directory to install into (default: current directory)", + ) + args = parser.parse_args() + + # Resolve paths + script_dir = Path(__file__).parent.resolve() + target_dir = Path(args.target_dir).resolve() + + log(colorize("bright", "🚀 Augment GitHub Action Indexer Installation")) + log("This script will set up the Augment GitHub Action Indexer in your repository.\n") + + log(colorize("bright", "📁 Target Directory")) + log(f"Installing to: {colorize('cyan', str(target_dir))}\n") + + # Check if target directory exists + if not target_dir.exists(): + response = input(f"Directory {target_dir} doesn't exist. Create it? (y/N): ") + if not response.lower().startswith("y"): + log("Installation cancelled.") + sys.exit(0) + target_dir.mkdir(parents=True) + log_success(f"Created directory {target_dir}") + + # Check if this looks like a git repository + if not (target_dir / ".git").is_dir(): + log_warning("This doesn't appear to be a Git repository.") + response = input("Continue anyway? (y/N): ") + if not response.lower().startswith("y"): + log("Installation cancelled.") + sys.exit(0) + + try: + # Step 1: Copy augment_indexer directory (includes requirements.txt) + log_step("1", "Copying augment_indexer directory...") + src_indexer = script_dir / "augment_indexer" + dst_indexer = target_dir / "augment_indexer" + copy_directory(src_indexer, dst_indexer) + log_success("Copied augment_indexer/ (includes requirements.txt)") + + # Step 2: Copy GitHub workflow + log_step("2", "Creating GitHub workflow...") + src_workflow = script_dir / ".github" / "workflows" / "augment-index.yml" + dst_workflow_dir = target_dir / ".github" / "workflows" + dst_workflow_dir.mkdir(parents=True, exist_ok=True) + dst_workflow = dst_workflow_dir / "augment-index.yml" + shutil.copy(src_workflow, dst_workflow) + log_success("Created .github/workflows/augment-index.yml") + + # Step 3: Update .gitignore + log_step("3", "Updating .gitignore...") + update_gitignore(target_dir) + + # Display next steps + display_next_steps(target_dir) + + except Exception as e: + log_error(f"Installation failed: {e}") + sys.exit(1) + + +if __name__ == "__main__": + main() + diff --git a/examples/python-sdk/context/github_action_indexer/test_example.py b/examples/python-sdk/context/github_action_indexer/test_example.py new file mode 100644 index 0000000..8941343 --- /dev/null +++ b/examples/python-sdk/context/github_action_indexer/test_example.py @@ -0,0 +1,238 @@ +#!/usr/bin/env python3 +""" +Local tests for the github_action_indexer example. + +This example requires GitHub credentials and environment variables to run +the full indexing flow. However, we can test: +1. Module imports work correctly +2. File filtering logic works correctly +3. Model definitions are valid + +The full integration test would need: +- AUGMENT_API_TOKEN, AUGMENT_API_URL +- GITHUB_TOKEN, GITHUB_REPOSITORY, GITHUB_SHA + +Usage: + cd examples/python-sdk/context + python -m pytest github_action_indexer/test_example.py + # or + python github_action_indexer/test_example.py +""" + +import subprocess +import sys +from pathlib import Path + + +def test_imports(): + """Test that all modules can be imported via module execution.""" + print("Testing imports...") + + # Run a quick import check via module execution + context_dir = Path(__file__).parent.parent + result = subprocess.run( + [ + sys.executable, + "-c", + """ +from github_action_indexer.augment_indexer.file_filter import ( + always_ignore_path, + is_keyish_path, + is_valid_file_size, + is_valid_utf8, + should_filter_file, +) +print(" ✓ file_filter") + +from github_action_indexer.augment_indexer.models import ( + FileChange, + IndexConfig, + IndexResult, + IndexState, + RepositoryInfo, +) +print(" ✓ models") + +from github_action_indexer.augment_indexer.github_client import GitHubClient +print(" ✓ github_client") + +from github_action_indexer.augment_indexer.index_manager import IndexManager +print(" ✓ index_manager") + +print("All imports successful!") +""", + ], + cwd=str(context_dir), + capture_output=True, + text=True, + ) + + print(result.stdout) + if result.stderr: + print(result.stderr, file=sys.stderr) + + if result.returncode != 0: + print("❌ Import test failed") + sys.exit(1) + + +def test_file_filter(): + """Test file filtering logic.""" + print("\nTesting file_filter logic...") + + context_dir = Path(__file__).parent.parent + result = subprocess.run( + [ + sys.executable, + "-c", + """ +from github_action_indexer.augment_indexer.file_filter import ( + always_ignore_path, + is_keyish_path, + is_valid_file_size, + is_valid_utf8, + should_filter_file, +) + +# Test always_ignore_path +assert always_ignore_path("../etc/passwd") is True +assert always_ignore_path("foo/../bar") is True +assert always_ignore_path("foo/bar/baz.py") is False +print(" ✓ always_ignore_path") + +# Test is_keyish_path +assert is_keyish_path("secrets/private.key") is True +assert is_keyish_path("certs/server.pem") is True +assert is_keyish_path("keys/id_rsa") is True +assert is_keyish_path("src/main.py") is False +assert is_keyish_path("docs/readme.md") is False +print(" ✓ is_keyish_path") + +# Test is_valid_file_size +assert is_valid_file_size(1000) is True +assert is_valid_file_size(1024 * 1024) is True # Exactly 1 MB +assert is_valid_file_size(1024 * 1024 + 1) is False # Over 1 MB +print(" ✓ is_valid_file_size") + +# Test is_valid_utf8 +assert is_valid_utf8(b"Hello, world!") is True +assert is_valid_utf8("Hello, 世界!".encode("utf-8")) is True +assert is_valid_utf8(b"\\xff\\xfe") is False # Invalid UTF-8 sequence +assert is_valid_utf8(b"\\x80\\x81\\x82") is False # Invalid UTF-8 (continuation bytes without start) +print(" ✓ is_valid_utf8") + +# Test should_filter_file +result = should_filter_file("src/main.py", b"print('hello')") +assert result["filtered"] is False + +result = should_filter_file("../etc/passwd", b"root:x:0:0") +assert result["filtered"] is True +assert result["reason"] == "path_contains_dotdot" + +result = should_filter_file("secrets/key.pem", b"-----BEGIN RSA PRIVATE KEY-----") +assert result["filtered"] is True +assert result["reason"] == "keyish_pattern" + +result = should_filter_file("image.png", b"\\x89PNG\\r\\n\\x1a\\n") +assert result["filtered"] is True +assert "binary" in result["reason"] + +print(" ✓ should_filter_file") + +print("All file_filter tests passed!") +""", + ], + cwd=str(context_dir), + capture_output=True, + text=True, + ) + + print(result.stdout) + if result.stderr: + print(result.stderr, file=sys.stderr) + + if result.returncode != 0: + print("❌ File filter test failed") + sys.exit(1) + + +def test_models(): + """Test model definitions.""" + print("\nTesting models...") + + context_dir = Path(__file__).parent.parent + result = subprocess.run( + [ + sys.executable, + "-c", + """ +from github_action_indexer.augment_indexer.models import FileChange, IndexConfig, IndexResult + +# Test FileChange +change = FileChange(path="src/main.py", status="added", contents="print('hello')") +assert change.path == "src/main.py" +assert change.status == "added" +print(" ✓ FileChange") + +# Test IndexConfig +config = IndexConfig( + apiToken="token", + apiUrl="https://api.example.com", + githubToken="gh_token", + owner="owner", + repo="repo", + branch="main", + currentCommit="abc123", +) +assert config.owner == "owner" +assert config.repo == "repo" +print(" ✓ IndexConfig") + +# Test IndexResult +result = IndexResult( + success=True, + type="full", + filesIndexed=10, + filesDeleted=0, + checkpointId="cp_123", + commitSha="abc123", +) +assert result.success is True +assert result.filesIndexed == 10 +print(" ✓ IndexResult") + +print("All model tests passed!") +""", + ], + cwd=str(context_dir), + capture_output=True, + text=True, + ) + + print(result.stdout) + if result.stderr: + print(result.stderr, file=sys.stderr) + + if result.returncode != 0: + print("❌ Model test failed") + sys.exit(1) + + +def main(): + """Run all tests.""" + print("=" * 50) + print("GitHub Action Indexer - Local Tests") + print("=" * 50) + + test_imports() + test_file_filter() + test_models() + + print("\n" + "=" * 50) + print("✅ All tests passed!") + print("=" * 50) + + +if __name__ == "__main__": + main() + diff --git a/examples/python-sdk/context/prompt_enhancer_server/README.md b/examples/python-sdk/context/prompt_enhancer_server/README.md new file mode 100644 index 0000000..3bc4308 --- /dev/null +++ b/examples/python-sdk/context/prompt_enhancer_server/README.md @@ -0,0 +1,45 @@ +# Prompt Enhancer Server Example + +HTTP server that enhances vague prompts using AI with codebase context. + +## Prerequisites + +Install the `auggie` CLI and authenticate: +```bash +npm install -g @augmentcode/auggie@prerelease +auggie login +``` + +## Usage + +```bash +# Start server with workspace directory (from the context directory) +cd examples/python-sdk/context +python -m prompt_enhancer_server . + +# Or run directly +python prompt_enhancer_server/main.py . +``` + +## API Endpoints + +### Enhance Prompt +```bash +curl -X POST http://localhost:3001/enhance \ + -H "Content-Type: application/json" \ + -d '{"prompt": "fix the bug"}' +``` + +Response: +```json +{ + "original": "fix the bug", + "enhanced": "Fix the bug in the authentication system. Specifically, investigate the login function..." +} +``` + +### Health Check +```bash +curl "http://localhost:3001/health" +``` + diff --git a/examples/python-sdk/context/prompt_enhancer_server/__init__.py b/examples/python-sdk/context/prompt_enhancer_server/__init__.py new file mode 100644 index 0000000..c520172 --- /dev/null +++ b/examples/python-sdk/context/prompt_enhancer_server/__init__.py @@ -0,0 +1,2 @@ +# prompt_enhancer_server package + diff --git a/examples/python-sdk/context/prompt_enhancer_server/__main__.py b/examples/python-sdk/context/prompt_enhancer_server/__main__.py new file mode 100644 index 0000000..378974c --- /dev/null +++ b/examples/python-sdk/context/prompt_enhancer_server/__main__.py @@ -0,0 +1,4 @@ +"""Allow running as: python -m prompt_enhancer_server""" +from .main import main + +main() diff --git a/examples/python-sdk/context/prompt_enhancer_server/main.py b/examples/python-sdk/context/prompt_enhancer_server/main.py new file mode 100644 index 0000000..0d427f1 --- /dev/null +++ b/examples/python-sdk/context/prompt_enhancer_server/main.py @@ -0,0 +1,252 @@ +#!/usr/bin/env python3 +""" +Prompt Enhancer Server Example + +An HTTP server that enhances user prompts using the Augment Generation API. +This demonstrates how to use the generation API to intelligently improve user prompts. + +The prompt enhancer: +1. Takes a user's prompt +2. Uses the generation API to enhance the prompt +3. Parses the enhanced prompt from the AI response +4. Returns the improved, more specific prompt + +Usage: + python main.py [workspace-directory] + python -m prompt_enhancer_server [workspace-directory] + +Endpoints: + POST /enhance - Enhance a prompt (body: {"prompt": "..."}) + GET /health - Health check +""" + +import json +import re +import sys +from datetime import datetime +from http.server import BaseHTTPRequestHandler, HTTPServer +from pathlib import Path +from typing import Any, TypedDict +from urllib.parse import urlparse + +from auggie_sdk.context import FileSystemContext + +PORT = 3001 + +# Regex for extracting enhanced prompt from AI response +ENHANCED_PROMPT_REGEX = re.compile(r"(.*?)", re.DOTALL) + + +# --- Response Parser --- + + +def parse_enhanced_prompt(response: str) -> str | None: + """ + Parse the enhanced prompt from the AI response + + Args: + response: The AI response containing the enhanced prompt + + Returns: + The enhanced prompt text, or None if not found + """ + match = ENHANCED_PROMPT_REGEX.search(response) + if match and match.group(1): + return match.group(1).strip() + return None + + +# --- Enhance Handler --- + + +class EnhanceResponse(TypedDict): + """Response type for enhance requests""" + + original: str + enhanced: str + + +def handle_enhance(prompt: str, context: FileSystemContext) -> EnhanceResponse: + """ + Handle prompt enhancement request using search_and_ask + + Args: + prompt: The original prompt to enhance + context: FileSystemContext instance + + Returns: + EnhanceResponse with original and enhanced prompts + """ + print(f'\n[{datetime.now().isoformat()}] Enhancing prompt: "{prompt}"') + + # Build the enhancement instruction + enhancement_prompt = ( + "Here is an instruction that I'd like to give you, but it needs to be improved. " + "Rewrite and enhance this instruction to make it clearer, more specific, " + "less ambiguous, and correct any mistakes. " + "If there is code in triple backticks (```) consider whether it is a code sample " + "and should remain unchanged. " + "Reply with the following format:\n\n" + "### BEGIN RESPONSE ###\n" + "Here is an enhanced version of the original instruction that is more specific and clear:\n" + "enhanced prompt goes here\n\n" + "### END RESPONSE ###\n\n" + "Here is my original instruction:\n\n" + f"{prompt}" + ) + + # Use search_and_ask to get the enhancement with relevant codebase context + response = context.search_and_ask(prompt, enhancement_prompt) + + # Parse the enhanced prompt from the response + enhanced = parse_enhanced_prompt(response) + if not enhanced: + raise ValueError("Failed to parse enhanced prompt from response") + + return { + "original": prompt, + "enhanced": enhanced, + } + + +# --- HTTP Server --- + +# Global context +context: FileSystemContext | None = None +workspace_dir: str = "." + + +def initialize_context(): + """Initialize the FileSystem Context""" + global context + print("Initializing FileSystem Context...") + context = FileSystemContext.create(workspace_dir, debug=False) + print("FileSystem Context initialized\n") + + +class RequestHandler(BaseHTTPRequestHandler): + """HTTP request handler""" + + def do_OPTIONS(self): + """Handle OPTIONS requests for CORS""" + self.send_response(200) + self._send_cors_headers() + self.end_headers() + + def _send_cors_headers(self): + """Send CORS headers""" + self.send_header("Access-Control-Allow-Origin", "*") + self.send_header("Access-Control-Allow-Methods", "POST, OPTIONS") + self.send_header("Access-Control-Allow-Headers", "Content-Type") + + def _send_json_response(self, status: int, data: dict[str, Any]): + """Send a JSON response""" + self.send_response(status) + self.send_header("Content-Type", "application/json") + self._send_cors_headers() + self.end_headers() + self.wfile.write(json.dumps(data, indent=2).encode()) + + def do_POST(self): + """Handle POST requests""" + parsed_url = urlparse(self.path) + path = parsed_url.path + + if path == "/enhance": + self._handle_enhance() + else: + self._send_json_response(404, {"error": "Not found"}) + + def _handle_enhance(self): + """Handle enhance endpoint""" + if context is None: + self._send_json_response(503, {"error": "Context not initialized yet"}) + return + + content_length = int(self.headers.get("Content-Length", 0)) + body = self.rfile.read(content_length).decode("utf-8") + + try: + data = json.loads(body) + prompt = data.get("prompt") + + if not prompt or not isinstance(prompt, str): + self._send_json_response(400, {"error": "Missing or invalid 'prompt' field"}) + return + + result = handle_enhance(prompt, context) + self._send_json_response(200, result) + except Exception as error: + print(f"Enhancement error: {error}") + self._send_json_response(500, {"error": str(error)}) + + def do_GET(self): + """Handle GET requests""" + parsed_url = urlparse(self.path) + path = parsed_url.path + + if path == "/health": + self._send_json_response( + 200, + { + "status": "ok", + "workspace": workspace_dir, + "contextReady": context is not None, + }, + ) + else: + self._send_json_response(404, {"error": "Not found"}) + + def log_message(self, format, *args): + """Override to suppress default logging""" + pass + + +def main(): + """Main function""" + global workspace_dir + + # Get workspace directory from command line, default to current directory + workspace_dir_arg = sys.argv[1] if len(sys.argv) > 1 else "." + # Resolve to absolute path to handle relative paths correctly + workspace_dir = str(Path(workspace_dir_arg).resolve()) + + print("=== Prompt Enhancer Server ===\n") + print(f"Workspace directory: {workspace_dir}") + print(f"Starting server on port {PORT}...\n") + + server = None + try: + initialize_context() + + server = HTTPServer(("", PORT), RequestHandler) + print(f"✅ Server running at http://localhost:{PORT}/") + print("\nExample requests:") + print( + f' curl -X POST http://localhost:{PORT}/enhance ' + f'-H "Content-Type: application/json" ' + f'-d \'{{"prompt": "fix the bug"}}\'' + ) + print(f' curl "http://localhost:{PORT}/health"') + print("\nPress Ctrl+C to stop\n") + + server.serve_forever() + except KeyboardInterrupt: + print("\n\nShutting down...") + if context: + context.close() + if server: + server.server_close() + print("Server stopped") + sys.exit(0) + except Exception as error: + print(f"Failed to initialize: {error}") + if context: + context.close() + if server: + server.server_close() + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/examples/python-sdk/context/prompt_enhancer_server/test_example.py b/examples/python-sdk/context/prompt_enhancer_server/test_example.py new file mode 100644 index 0000000..9357a63 --- /dev/null +++ b/examples/python-sdk/context/prompt_enhancer_server/test_example.py @@ -0,0 +1,125 @@ +#!/usr/bin/env python3 +""" +Test for the prompt_enhancer_server example. + +This example starts an HTTP server on port 3001 that provides: +- GET /health - Health check endpoint +- POST /enhance - Prompt enhancement endpoint + +The test starts the server, verifies the endpoints work, then shuts it down. +""" + +import json +import subprocess +import sys +import time +import urllib.error +import urllib.request +from pathlib import Path + +PORT = 3001 +BASE_URL = f"http://localhost:{PORT}" +STARTUP_TIMEOUT = 30 # seconds to wait for server to start +REQUEST_TIMEOUT = 60 # seconds for each request + + +def wait_for_server(url: str, timeout: int = STARTUP_TIMEOUT) -> bool: + """Wait for the server to be ready.""" + start_time = time.time() + while time.time() - start_time < timeout: + try: + req = urllib.request.Request(url, method="GET") + with urllib.request.urlopen(req, timeout=5) as response: + if response.status == 200: + return True + except (urllib.error.URLError, urllib.error.HTTPError, TimeoutError): + pass + time.sleep(1) + return False + + +def make_get_request(url: str, timeout: int = REQUEST_TIMEOUT) -> dict: + """Make a GET request and return JSON response.""" + req = urllib.request.Request(url, method="GET") + with urllib.request.urlopen(req, timeout=timeout) as response: + return json.loads(response.read().decode()) + + +def make_post_request(url: str, data: dict, timeout: int = REQUEST_TIMEOUT) -> dict: + """Make a POST request and return JSON response.""" + body = json.dumps(data).encode("utf-8") + req = urllib.request.Request( + url, + data=body, + method="POST", + headers={"Content-Type": "application/json"}, + ) + with urllib.request.urlopen(req, timeout=timeout) as response: + return json.loads(response.read().decode()) + + +def main(): + """Run the prompt_enhancer_server example and verify it works.""" + # Get the package directory and workspace + package_dir = Path(__file__).parent + workspace_dir = str(package_dir) + # Run from the parent directory so module execution works + context_dir = package_dir.parent + + print(f"Starting server with: python -m prompt_enhancer_server {workspace_dir}") + print(f"Working directory: {context_dir}") + + # Start the server as a subprocess using module execution + server_process = subprocess.Popen( + [sys.executable, "-m", "prompt_enhancer_server", workspace_dir], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + cwd=str(context_dir), + ) + + try: + # Wait for server to be ready + print(f"Waiting for server at {BASE_URL}/health...") + if not wait_for_server(f"{BASE_URL}/health"): + stdout, stderr = server_process.communicate(timeout=5) + print(f"❌ Server failed to start within {STARTUP_TIMEOUT}s") + print(f"stdout: {stdout}") + print(f"stderr: {stderr}") + sys.exit(1) + + print("✓ Server is ready") + + # Test health endpoint + print("\nTesting /health endpoint...") + health = make_get_request(f"{BASE_URL}/health") + assert health.get("status") == "ok", f"Expected status 'ok', got: {health}" + print(f"✓ Health check passed: {health}") + + # Test enhance endpoint + print("\nTesting /enhance endpoint...") + enhance_result = make_post_request( + f"{BASE_URL}/enhance", + {"prompt": "fix the bug"}, + ) + # The enhance should return some result (structure may vary) + assert isinstance(enhance_result, dict), f"Expected dict, got: {type(enhance_result)}" + print(f"✓ Enhance returned result with keys: {list(enhance_result.keys())}") + + print("\n✅ prompt_enhancer_server example passed") + + finally: + # Shutdown the server + print("\nShutting down server...") + server_process.terminate() + try: + server_process.wait(timeout=5) + except subprocess.TimeoutExpired: + server_process.kill() + server_process.wait() + print("Server stopped") + + +if __name__ == "__main__": + main() + diff --git a/examples/typescript-sdk/context/github-action-indexer/.github/workflows/index.yml b/examples/typescript-sdk/context/github-action-indexer/.github/workflows/index.yml index 3758926..bb60a24 100644 --- a/examples/typescript-sdk/context/github-action-indexer/.github/workflows/index.yml +++ b/examples/typescript-sdk/context/github-action-indexer/.github/workflows/index.yml @@ -77,4 +77,5 @@ jobs: name: index-state path: .augment-index-state/ retention-days: 30 + include-hidden-files: true