From f38294e729109744c956b6c42ca1a63d313aa4ce Mon Sep 17 00:00:00 2001
From: Rich Hankins <rich@augmentcode.com>
Date: Mon, 15 Dec 2025 22:11:05 +0000
Subject: [PATCH] Add github workflow

---
 .github/workflows/index.yml |  80 ++++++++
 src/__init__.py             |  24 +++
 src/file_filter.py          | 123 +++++++++++
 src/github_client.py        | 307 ++++++++++++++++++++++++++++
 src/index_manager.py        | 395 ++++++++++++++++++++++++++++++++++++
 src/main.py                 | 167 +++++++++++++++
 src/models.py               | 131 ++++++++++++
 src/search.py               | 132 ++++++++++++
 8 files changed, 1359 insertions(+)
 create mode 100644 .github/workflows/index.yml
 create mode 100644 src/__init__.py
 create mode 100644 src/file_filter.py
 create mode 100644 src/github_client.py
 create mode 100644 src/index_manager.py
 create mode 100644 src/main.py
 create mode 100644 src/models.py
 create mode 100644 src/search.py

diff --git a/.github/workflows/index.yml b/.github/workflows/index.yml
new file mode 100644
index 0000000..43c349a
--- /dev/null
+++ b/.github/workflows/index.yml
@@ -0,0 +1,80 @@
+name: Index Repository
+
+on:
+  push:
+    branches:
+      - main
+      - develop
+      - 'feature/**'  # Index feature branches
+      - 'release/**'  # Index release branches
+  workflow_dispatch:
+    inputs:
+      branch:
+        description: 'Branch to index (leave empty for current branch)'
+        required: false
+        type: string
+      force_full_reindex:
+        description: 'Force full re-index'
+        required: false
+        type: boolean
+        default: false
+
+jobs:
+  index:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0  # Full history for comparison
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+          cache: 'pip'
+
+      - name: Install dependencies
+        run: pip install -r requirements.txt
+
+      - name: Restore index state
+        uses: actions/cache@v4
+        with:
+          path: .augment-index-state
+          # Use branch-specific cache key
+          key: augment-index-${{ github.ref_name }}-${{ github.sha }}
+          restore-keys: |
+            augment-index-${{ github.ref_name }}-
+
+      - name: Index repository
+        id: index
+        run: python src/main.py
+        env:
+          AUGMENT_API_TOKEN: ${{ secrets.AUGMENT_API_TOKEN }}
+          AUGMENT_API_URL: ${{ secrets.AUGMENT_API_URL }}
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          STORAGE_TYPE: file
+          # Branch-specific state path (automatically determined from GITHUB_REF)
+          # STATE_PATH is optional - defaults to .augment-index-state/{branch}/state.json
+          MAX_COMMITS: 100
+          MAX_FILES: 500
+
+      - name: Print results
+        if: always()
+        run: |
+          echo "Success: ${{ steps.index.outputs.success }}"
+          echo "Type: ${{ steps.index.outputs.type }}"
+          echo "Files Indexed: ${{ steps.index.outputs.files_indexed }}"
+          echo "Files Deleted: ${{ steps.index.outputs.files_deleted }}"
+          echo "Checkpoint ID: ${{ steps.index.outputs.checkpoint_id }}"
+          echo "Commit SHA: ${{ steps.index.outputs.commit_sha }}"
+
+      - name: Upload state artifact
+        if: success()
+        uses: actions/upload-artifact@v4
+        with:
+          name: index-state
+          path: .augment-index-state/
+          retention-days: 30
+
diff --git a/src/__init__.py b/src/__init__.py
new file mode 100644
index 0000000..499dfe6
--- /dev/null
+++ b/src/__init__.py
@@ -0,0 +1,24 @@
+"""
+GitHub Action Repository Indexer
+
+A Python example showing how to index a GitHub repository using the Augment SDK
+Direct Mode with incremental updates.
+
+See README.md for usage instructions.
+"""
+
+from .models import FileChange, IndexConfig, IndexResult, IndexState
+from .file_filter import should_filter_file
+from .github_client import GitHubClient
+from .index_manager import IndexManager
+
+__all__ = [
+    "FileChange",
+    "IndexConfig", 
+    "IndexResult",
+    "IndexState",
+    "should_filter_file",
+    "GitHubClient",
+    "IndexManager",
+]
+
diff --git a/src/file_filter.py b/src/file_filter.py
new file mode 100644
index 0000000..88ab035
--- /dev/null
+++ b/src/file_filter.py
@@ -0,0 +1,123 @@
+"""
+File filtering logic for GitHub repository indexing.
+"""
+
+import re
+from pathlib import Path
+from typing import Optional
+
+# Keyish pattern regex - matches files that likely contain secrets/keys
+KEYISH_PATTERN = re.compile(
+    r'^(\.git|.*\.pem|.*\.key|.*\.pfx|.*\.p12|.*\.jks|.*\.keystore|.*\.pkcs12|.*\.crt|.*\.cer|id_rsa|id_ed25519|id_ecdsa|id_dsa)$'
+)
+
+# Default max file size in bytes (1 MB)
+DEFAULT_MAX_FILE_SIZE = 1024 * 1024  # 1 MB
+
+
+def always_ignore_path(path: str) -> bool:
+    """
+    Check if a path should always be ignored (security measure).
+
+    Args:
+        path: The file path to check.
+
+    Returns:
+        True if the path contains ".." and should be ignored.
+    """
+    return ".." in path
+
+
+def is_keyish_path(path: str) -> bool:
+    """
+    Check if a path matches the keyish pattern (secrets/keys).
+
+    Args:
+        path: The file path to check.
+
+    Returns:
+        True if the filename matches patterns for secret/key files.
+    """
+    # Extract filename from path
+    filename = Path(path).name
+    return bool(KEYISH_PATTERN.match(filename))
+
+
+def is_valid_file_size(size_bytes: int, max_file_size: int = DEFAULT_MAX_FILE_SIZE) -> bool:
+    """
+    Check if file size is valid for upload.
+
+    Args:
+        size_bytes: The size of the file in bytes.
+        max_file_size: Maximum allowed file size in bytes. Defaults to 1 MB.
+
+    Returns:
+        True if the file size is within the allowed limit.
+    """
+    return size_bytes <= max_file_size
+
+
+def is_valid_utf8(content: bytes) -> bool:
+    """
+    Check if file content is valid UTF-8 (not binary).
+
+    Args:
+        content: The file content as bytes.
+
+    Returns:
+        True if the content is valid UTF-8, False if it's binary or invalid.
+    """
+    try:
+        content.decode("utf-8")
+        return True
+    except UnicodeDecodeError:
+        return False
+
+
+def should_filter_file(
+    path: str,
+    content: bytes,
+    max_file_size: Optional[int] = None,
+) -> dict:
+    """
+    Check if a file should be filtered out.
+
+    Returns {"filtered": True, "reason": "..."} if file should be skipped.
+    Returns {"filtered": False} if file should be included.
+
+    Priority order (from file-filtering.md):
+        1. Path validation (contains "..")
+        2. File size check
+        3. .augmentignore rules (checked by caller)
+        4. Keyish patterns
+        5. .gitignore rules (checked by caller)
+        6. UTF-8 validation
+
+    Args:
+        path: The file path to check.
+        content: The file content as bytes.
+        max_file_size: Maximum allowed file size in bytes. Defaults to DEFAULT_MAX_FILE_SIZE.
+
+    Returns:
+        A dict with "filtered" (bool) and optionally "reason" (str) keys.
+    """
+    effective_max_size = max_file_size if max_file_size is not None else DEFAULT_MAX_FILE_SIZE
+
+    # 1. Check for ".." in path (security)
+    if always_ignore_path(path):
+        return {"filtered": True, "reason": "path_contains_dotdot"}
+
+    # 2. Check file size
+    if not is_valid_file_size(len(content), effective_max_size):
+        return {"filtered": True, "reason": f"file_too_large ({len(content)} bytes)"}
+
+    # 3. Check keyish patterns (secrets/keys)
+    if is_keyish_path(path):
+        return {"filtered": True, "reason": "keyish_pattern"}
+
+    # 4. Check UTF-8 validity (binary detection)
+    if not is_valid_utf8(content):
+        return {"filtered": True, "reason": "binary_file"}
+
+    return {"filtered": False}
+
diff --git a/src/github_client.py b/src/github_client.py
new file mode 100644
index 0000000..f69bd62
--- /dev/null
+++ b/src/github_client.py
@@ -0,0 +1,307 @@
+"""
+GitHub API client for fetching repository data.
+"""
+
+import io
+import tarfile
+
+import pathspec
+import requests
+from github import Github
+from github.GithubException import GithubException
+
+from .file_filter import should_filter_file
+from .models import FileChange
+
+
+class GitHubClient:
+    """GitHub API client for fetching repository data."""
+
+    def __init__(self, token: str) -> None:
+        """
+        Initialize the GitHub client with an authentication token.
+
+        Args:
+            token: GitHub personal access token or GitHub App token.
+        """
+        self._github = Github(token)
+        self._token = token
+
+    def resolve_ref(self, owner: str, repo: str, ref: str) -> str:
+        """
+        Resolve a ref (like "HEAD", "main", or a commit SHA) to a commit SHA.
+
+        Args:
+            owner: Repository owner.
+            repo: Repository name.
+            ref: Git ref to resolve.
+
+        Returns:
+            The full 40-character commit SHA.
+
+        Raises:
+            Exception: If the ref cannot be resolved.
+        """
+        try:
+            repository = self._github.get_repo(f"{owner}/{repo}")
+            commit = repository.get_commit(ref)
+            return commit.sha
+        except GithubException as error:
+            raise Exception(
+                f'Failed to resolve ref "{ref}" for {owner}/{repo}: {error}'
+            ) from error
+
+    def download_tarball(self, owner: str, repo: str, ref: str) -> dict[str, str]:
+        """
+        Download repository as tarball and extract files.
+
+        Args:
+            owner: Repository owner.
+            repo: Repository name.
+            ref: Git ref to download.
+
+        Returns:
+            Dictionary mapping file paths to their contents.
+        """
+        print(f"Downloading tarball for {owner}/{repo}@{ref}...")
+
+        repository = self._github.get_repo(f"{owner}/{repo}")
+        tarball_url = repository.get_archive_link("tarball", ref)
+
+        # Download tarball (10 minute timeout to handle large repositories)
+        # Include auth header for private repos
+        headers = {"Authorization": f"Bearer {self._token}"}
+        response = requests.get(tarball_url, headers=headers, stream=True, timeout=600)
+        if not response.ok:
+            raise Exception(f"Failed to download tarball: {response.reason}")
+
+        # Load ignore patterns
+        augmentignore, gitignore = self._load_ignore_patterns(owner, repo, ref)
+
+        # Track filtering statistics
+        files: dict[str, str] = {}
+        total_files = 0
+        filtered_files = 0
+        filter_reasons: dict[str, int] = {}
+
+        # Extract files from tarball
+        tarball_data = io.BytesIO(response.content)
+        with tarfile.open(fileobj=tarball_data, mode="r:gz") as tar:
+            for member in tar.getmembers():
+                # Skip directories and symlinks
+                if not member.isfile():
+                    continue
+
+                total_files += 1
+
+                # Remove the root directory prefix (e.g., "owner-repo-sha/")
+                path_parts = member.name.split("/")
+                path_parts.pop(0)  # Remove first component
+                file_path = "/".join(path_parts)
+
+                if not file_path:
+                    continue
+
+                # Read file contents
+                file_obj = tar.extractfile(member)
+                if file_obj is None:
+                    continue
+                content_bytes = file_obj.read()
+
+                # Apply filtering in priority order:
+                # 1. .augmentignore
+                if augmentignore and augmentignore.match_file(file_path):
+                    filtered_files += 1
+                    filter_reasons["augmentignore"] = filter_reasons.get("augmentignore", 0) + 1
+                    continue
+
+                # 2. Path validation, file size, keyish patterns, UTF-8 validation
+                filter_result = should_filter_file(path=file_path, content=content_bytes)
+
+                if filter_result["filtered"]:
+                    filtered_files += 1
+                    reason = filter_result.get("reason", "unknown")
+                    filter_reasons[reason] = filter_reasons.get(reason, 0) + 1
+                    continue
+
+                # 3. .gitignore (checked last)
+                if gitignore and gitignore.match_file(file_path):
+                    filtered_files += 1
+                    filter_reasons["gitignore"] = filter_reasons.get("gitignore", 0) + 1
+                    continue
+
+                # File passed all filters
+                try:
+                    contents = content_bytes.decode("utf-8")
+                    files[file_path] = contents
+                except UnicodeDecodeError:
+                    # This should not happen if is_valid_utf8() is working correctly
+                    filtered_files += 1
+                    filter_reasons["decode_error"] = filter_reasons.get("decode_error", 0) + 1
+                    print(f"Warning: File {file_path} passed UTF-8 validation but failed to decode")
+
+        print(f"Extracted {len(files)} files from tarball")
+        print(f"Filtered {filtered_files} of {total_files} files. Reasons: {filter_reasons}")
+        return files
+
+    def compare_commits(
+        self, owner: str, repo: str, base: str, head: str
+    ) -> dict:
+        """
+        Compare two commits and get file changes.
+        """
+        print(f"Comparing {base}...{head}...")
+
+        repository = self._github.get_repo(f"{owner}/{repo}")
+        comparison = repository.compare(base, head)
+
+        files: list[FileChange] = []
+
+        for file in comparison.files:
+            change = FileChange(
+                path=file.filename,
+                status=self._map_github_status(file.status),
+                previousFilename=file.previous_filename,
+            )
+
+            # Download file contents for added/modified files
+            if change.status in ("added", "modified"):
+                try:
+                    contents = self.get_file_contents(owner, repo, file.filename, head)
+                    change.contents = contents
+                except Exception as error:
+                    print(f"Warning: Failed to download {file.filename}: {error}")
+
+            files.append(change)
+
+        return {
+            "files": files,
+            "commits": comparison.total_commits,
+            "totalChanges": len(comparison.files),
+        }
+
+    def get_file_contents(
+        self, owner: str, repo: str, path: str, ref: str
+    ) -> str:
+        """
+        Get file contents at a specific ref.
+
+        Args:
+            owner: Repository owner.
+            repo: Repository name.
+            path: File path within the repository.
+            ref: Git ref to get contents at.
+
+        Returns:
+            The file contents as a string.
+
+        Raises:
+            Exception: If the path is not a file.
+        """
+        repository = self._github.get_repo(f"{owner}/{repo}")
+        content = repository.get_contents(path, ref)
+
+        if isinstance(content, list):
+            raise Exception(f"{path} is not a file")
+
+        return content.decoded_content.decode("utf-8")
+
+    def _load_ignore_patterns(
+        self, owner: str, repo: str, ref: str
+    ) -> tuple[pathspec.PathSpec | None, pathspec.PathSpec | None]:
+        """
+        Load .gitignore and .augmentignore patterns separately.
+
+        Returns both filters to maintain proper priority order:
+        .augmentignore → keyish → .gitignore
+
+        Args:
+            owner: Repository owner.
+            repo: Repository name.
+            ref: Git ref to load patterns from.
+
+        Returns:
+            Tuple of (augmentignore, gitignore) PathSpec objects, or None if not found.
+        """
+        augmentignore: pathspec.PathSpec | None = None
+        gitignore: pathspec.PathSpec | None = None
+
+        # Try to load .gitignore
+        try:
+            gitignore_content = self.get_file_contents(owner, repo, ".gitignore", ref)
+            gitignore = pathspec.PathSpec.from_lines("gitwildmatch", gitignore_content.splitlines())
+        except Exception:
+            # .gitignore doesn't exist
+            pass
+
+        # Try to load .augmentignore
+        try:
+            augmentignore_content = self.get_file_contents(owner, repo, ".augmentignore", ref)
+            augmentignore = pathspec.PathSpec.from_lines("gitwildmatch", augmentignore_content.splitlines())
+        except Exception:
+            # .augmentignore doesn't exist
+            pass
+
+        return augmentignore, gitignore
+
+    def _map_github_status(self, status: str) -> str:
+        """
+        Map GitHub file status to our FileChange status.
+
+        Args:
+            status: GitHub file status string.
+
+        Returns:
+            Normalized status string.
+        """
+        status_map = {
+            "added": "added",
+            "modified": "modified",
+            "removed": "removed",
+            "renamed": "renamed",
+        }
+        return status_map.get(status, "modified")
+
+    def ignore_files_changed(
+        self, owner: str, repo: str, base: str, head: str
+    ) -> bool:
+        """
+        Check if ignore files changed between commits.
+
+        Args:
+            owner: Repository owner.
+            repo: Repository name.
+            base: Base commit SHA.
+            head: Head commit SHA.
+
+        Returns:
+            True if .gitignore or .augmentignore changed, False otherwise.
+        """
+        repository = self._github.get_repo(f"{owner}/{repo}")
+        comparison = repository.compare(base, head)
+
+        ignore_files = [".gitignore", ".augmentignore"]
+        return any(file.filename in ignore_files for file in comparison.files)
+
+    def is_force_push(
+        self, owner: str, repo: str, base: str, head: str
+    ) -> bool:
+        """
+        Check if the push was a force push.
+
+        Args:
+            owner: Repository owner.
+            repo: Repository name.
+            base: Base commit SHA.
+            head: Head commit SHA.
+
+        Returns:
+            True if the push was a force push, False otherwise.
+        """
+        try:
+            repository = self._github.get_repo(f"{owner}/{repo}")
+            repository.compare(base, head)
+            return False
+        except GithubException:
+            # If comparison fails, it's likely a force push
+            return True
diff --git a/src/index_manager.py b/src/index_manager.py
new file mode 100644
index 0000000..c2bf48f
--- /dev/null
+++ b/src/index_manager.py
@@ -0,0 +1,395 @@
+"""
+Index Manager - Core indexing logic
+"""
+
+import json
+import tempfile
+from pathlib import Path
+from typing import Optional
+
+from auggie_sdk.context import DirectContext, File
+
+from .github_client import GitHubClient
+from .models import FileChange, IndexConfig, IndexResult, IndexState, RepositoryInfo
+
+DEFAULT_MAX_COMMITS = 100
+DEFAULT_MAX_FILES = 500
+
+
+class IndexManager:
+    """Index Manager - Core indexing logic for GitHub repositories."""
+
+    def __init__(
+        self, context: DirectContext, config: IndexConfig, state_path: str
+    ) -> None:
+        """
+        Initialize the IndexManager.
+
+        Args:
+            context: DirectContext instance for indexing operations.
+            config: Configuration for the indexing operation.
+            state_path: Path to the state file for persistence.
+        """
+        self._context = context
+        self._config = config
+        self._state_path = state_path
+        self._github = GitHubClient(config.githubToken)
+
+    def resolve_commit_sha(self) -> None:
+        """
+        Resolve the current commit ref to an actual commit SHA.
+
+        This handles cases where GITHUB_SHA might be "HEAD" or a branch name.
+        Updates the config.currentCommit with the resolved SHA.
+        """
+        resolved_sha = self._github.resolve_ref(
+            self._config.owner, self._config.repo, self._config.currentCommit
+        )
+        self._config.currentCommit = resolved_sha
+
+    def _load_state(self) -> Optional[IndexState]:
+        """
+        Load index state from file system.
+
+        EXTENDING TO OTHER STORAGE BACKENDS:
+        Replace this method to load state from your preferred storage:
+        - Redis: Use redis-py client to GET the state JSON
+        - S3: Use boto3 to get_object from S3 bucket
+        - Database: Query your database for the state record
+
+        Example for Redis:
+            import redis
+            r = redis.Redis.from_url(redis_url)
+            data = r.get(state_key)
+            return json.loads(data) if data else None
+
+        Example for S3:
+            import boto3
+            s3 = boto3.client('s3')
+            response = s3.get_object(Bucket=bucket, Key=key)
+            data = response['Body'].read().decode('utf-8')
+            return json.loads(data)
+
+        Returns:
+            The loaded IndexState or None if the file doesn't exist.
+        """
+        try:
+            with open(self._state_path, "r", encoding="utf-8") as f:
+                return json.load(f)
+        except FileNotFoundError:
+            return None
+
+    def _save_state(self, state: IndexState) -> None:
+        """
+        Save index state to file system.
+
+        EXTENDING TO OTHER STORAGE BACKENDS:
+        Replace this method to save state to your preferred storage:
+        - Redis: Use redis-py client to SET the state JSON
+        - S3: Use boto3 to put_object to S3 bucket
+        - Database: Insert or update the state record in your database
+
+        Example for Redis:
+            import redis
+            r = redis.Redis.from_url(redis_url)
+            r.set(state_key, json.dumps(state))
+
+        Example for S3:
+            import boto3
+            s3 = boto3.client('s3')
+            s3.put_object(
+                Bucket=bucket,
+                Key=key,
+                Body=json.dumps(state),
+                ContentType='application/json'
+            )
+
+        Note: The state is just a JSON object (IndexState type) that can be
+        serialized and stored anywhere. For distributed systems, consider using
+        Redis or a database for shared state across multiple workers.
+
+        Args:
+            state: The IndexState to save.
+        """
+        # Ensure directory exists
+        Path(self._state_path).parent.mkdir(parents=True, exist_ok=True)
+
+        # Write state to file
+        with open(self._state_path, "w", encoding="utf-8") as f:
+            json.dump(state, f, indent=2)
+
+    def index(self) -> IndexResult:
+        """
+        Main indexing entry point.
+
+        Returns:
+            IndexResult with success status and indexing details.
+        """
+        print(
+            f"Starting index for {self._config.owner}/{self._config.repo}"
+            f"@{self._config.branch}"
+        )
+
+        try:
+            # Load previous state
+            previous_state = self._load_state()
+
+            # If we have previous state, we'll need to create a new context with the imported state
+            # For now, we'll handle this in the incremental update logic
+
+            # Determine if we need full re-index
+            should_reindex, reason = self._should_full_reindex(previous_state)
+
+            if should_reindex:
+                return self._full_reindex(reason)
+
+            # Perform incremental update
+            # previous_state is guaranteed to be non-null here
+            if not previous_state:
+                raise RuntimeError("previous_state should not be None at this point")
+            return self._incremental_update(previous_state)
+        except Exception as error:
+            print(f"Indexing failed: {error}")
+            return IndexResult(
+                success=False,
+                type="full",
+                filesIndexed=0,
+                filesDeleted=0,
+                checkpointId="",
+                commitSha=self._config.currentCommit,
+                error=str(error),
+            )
+
+    def _should_full_reindex(
+        self, previous_state: Optional[IndexState]
+    ) -> tuple[bool, Optional[str]]:
+        """
+        Determine if full re-index is needed.
+
+        Args:
+            previous_state: The previous index state, or None if first run.
+
+        Returns:
+            Tuple of (should_reindex, reason).
+        """
+        # No previous state - first run
+        if not previous_state:
+            return (True, "first_run")
+
+        # Different repository
+        if (
+            previous_state["repository"]["owner"] != self._config.owner
+            or previous_state["repository"]["name"] != self._config.repo
+        ):
+            return (True, "different_repository")
+
+        # Same commit - no changes
+        if previous_state["lastCommitSha"] == self._config.currentCommit:
+            print("No changes detected")
+            return (False, None)
+
+        # Check for force push
+        is_force_push = self._github.is_force_push(
+            self._config.owner,
+            self._config.repo,
+            previous_state["lastCommitSha"],
+            self._config.currentCommit,
+        )
+
+        if is_force_push:
+            return (True, "force_push")
+
+        # Get comparison
+        comparison = self._github.compare_commits(
+            self._config.owner,
+            self._config.repo,
+            previous_state["lastCommitSha"],
+            self._config.currentCommit,
+        )
+
+        # Too many commits
+        max_commits = self._config.maxCommits or DEFAULT_MAX_COMMITS
+        if comparison["commits"] > max_commits:
+            return (
+                True,
+                f"too_many_commits ({comparison['commits']} > {max_commits})",
+            )
+
+        # Too many file changes
+        max_files = self._config.maxFiles or DEFAULT_MAX_FILES
+        if comparison["totalChanges"] > max_files:
+            return (
+                True,
+                f"too_many_files ({comparison['totalChanges']} > {max_files})",
+            )
+
+        # Check if ignore files changed
+        ignore_changed = self._github.ignore_files_changed(
+            self._config.owner,
+            self._config.repo,
+            previous_state["lastCommitSha"],
+            self._config.currentCommit,
+        )
+
+        if ignore_changed:
+            return (True, "ignore_files_changed")
+
+        return (False, None)
+
+    def _full_reindex(self, reason: Optional[str]) -> IndexResult:
+        """
+        Perform full repository re-index.
+
+        Args:
+            reason: The reason for the full re-index.
+
+        Returns:
+            IndexResult with the result of the full re-index.
+        """
+        print(f"Performing full re-index (reason: {reason or 'unknown'})")
+
+        # Download entire repository as tarball
+        files = self._github.download_tarball(
+            self._config.owner, self._config.repo, self._config.currentCommit
+        )
+
+        # Add all files to index
+        files_to_index = [
+            File(path=path, contents=contents) for path, contents in files.items()
+        ]
+
+        print(f"Adding {len(files_to_index)} files to index...")
+        self._context.add_to_index(files_to_index)
+
+        # Export DirectContext state
+        context_state = self._context.export()
+        context_state_dict = context_state.to_dict()
+
+        new_state: IndexState = {
+            "contextState": context_state_dict,
+            "lastCommitSha": self._config.currentCommit,
+            "repository": RepositoryInfo(
+                owner=self._config.owner,
+                name=self._config.repo,
+            ),
+        }
+
+        # Save state
+        self._save_state(new_state)
+
+        return IndexResult(
+            success=True,
+            type="full",
+            filesIndexed=len(files_to_index),
+            filesDeleted=0,
+            checkpointId=context_state.checkpoint_id or "",
+            commitSha=self._config.currentCommit,
+            reindexReason=reason,
+        )
+
+    def _incremental_update(self, previous_state: IndexState) -> IndexResult:
+        """
+        Perform incremental update.
+
+        Args:
+            previous_state: The previous index state.
+
+        Returns:
+            IndexResult with the result of the incremental update.
+        """
+        print("Performing incremental update...")
+
+        # Create a temporary file with the previous context state
+        # Use delete=False because Windows can't reopen a NamedTemporaryFile while it's open
+        temp_file = tempfile.NamedTemporaryFile(
+            mode="w", suffix=".json", prefix="github-indexer-incremental-", delete=False
+        )
+        temp_path = Path(temp_file.name)
+        try:
+            json.dump(previous_state["contextState"], temp_file, indent=2)
+            temp_file.close()  # Close before reading on Windows
+
+            # Create a new context from the previous state
+            self._context = DirectContext.import_from_file(
+                str(temp_path),
+                api_key=self._config.apiToken,
+                api_url=self._config.apiUrl,
+            )
+        finally:
+            temp_path.unlink(missing_ok=True)
+
+        # Get file changes
+        comparison = self._github.compare_commits(
+            self._config.owner,
+            self._config.repo,
+            previous_state["lastCommitSha"],
+            self._config.currentCommit,
+        )
+
+        # Process changes
+        files_to_add, files_to_delete = self._process_file_changes(comparison["files"])
+
+        print(f"Adding {len(files_to_add)} files, deleting {len(files_to_delete)} files")
+
+        # Update index
+        if files_to_add:
+            self._context.add_to_index(files_to_add)
+
+        if files_to_delete:
+            self._context.remove_from_index(files_to_delete)
+
+        # Export DirectContext state
+        context_state = self._context.export()
+        context_state_dict = context_state.to_dict()
+
+        new_state: IndexState = {
+            "contextState": context_state_dict,
+            "lastCommitSha": self._config.currentCommit,
+            "repository": previous_state["repository"],
+        }
+
+        # Save state
+        self._save_state(new_state)
+
+        return IndexResult(
+            success=True,
+            type="incremental",
+            filesIndexed=len(files_to_add),
+            filesDeleted=len(files_to_delete),
+            checkpointId=context_state.checkpoint_id or "",
+            commitSha=self._config.currentCommit,
+        )
+
+    def _process_file_changes(
+        self, changes: list[FileChange]
+    ) -> tuple[list[File], list[str]]:
+        """
+        Process file changes and categorize them for indexing.
+
+        Args:
+            changes: List of file changes from the comparison.
+
+        Returns:
+            Tuple of (files_to_add, files_to_delete).
+        """
+        files_to_add: list[File] = []
+        files_to_delete: list[str] = []
+
+        for change in changes:
+            if change.status in ("added", "modified"):
+                if change.contents:
+                    files_to_add.append(
+                        File(path=change.path, contents=change.contents)
+                    )
+            elif change.status == "removed":
+                files_to_delete.append(change.path)
+            elif change.status == "renamed":
+                if change.previousFilename:
+                    files_to_delete.append(change.previousFilename)
+                if change.contents:
+                    files_to_add.append(
+                        File(path=change.path, contents=change.contents)
+                    )
+
+        return files_to_add, files_to_delete
+
diff --git a/src/main.py b/src/main.py
new file mode 100644
index 0000000..fd10065
--- /dev/null
+++ b/src/main.py
@@ -0,0 +1,167 @@
+#!/usr/bin/env python3
+"""
+Main entry point for GitHub Action Indexer
+
+Usage:
+    cd examples/python-sdk/context
+    python -m github_action_indexer index
+"""
+
+import os
+import re
+import sys
+
+from auggie_sdk.context import DirectContext
+
+from .index_manager import IndexManager
+from .models import IndexConfig
+
+
+def get_api_credentials() -> tuple[str, str]:
+    """Get API credentials from environment variables."""
+    api_token = os.environ.get("AUGMENT_API_TOKEN")
+    if not api_token:
+        raise ValueError("AUGMENT_API_TOKEN environment variable is required")
+
+    api_url = os.environ.get("AUGMENT_API_URL")
+    if not api_url:
+        raise ValueError(
+            "AUGMENT_API_URL environment variable is required. Please set it to your "
+            "tenant-specific URL (e.g., 'https://your-tenant.api.augmentcode.com/')"
+        )
+
+    return api_token, api_url
+
+
+def parse_repository_info() -> tuple[str, str, str, str]:
+    """
+    Parse repository information from environment variables.
+    Returns (owner, repo, branch, current_commit).
+    """
+    repository = os.environ.get("GITHUB_REPOSITORY", "")
+    parts = repository.split("/")
+
+    if len(parts) != 2 or not parts[0] or not parts[1]:
+        raise ValueError('GITHUB_REPOSITORY must be in format "owner/repo"')
+
+    owner, repo = parts
+
+    # Extract branch name from GitHub ref
+    github_ref = os.environ.get("GITHUB_REF", "")
+    github_ref_name = os.environ.get("GITHUB_REF_NAME", "")
+
+    if github_ref.startswith("refs/heads/"):
+        branch = github_ref_name
+    elif github_ref.startswith("refs/tags/"):
+        branch = f"tag/{github_ref_name}"
+    elif github_ref_name:
+        branch = github_ref_name
+    else:
+        branch = os.environ.get("BRANCH", "main")
+
+    current_commit = os.environ.get("GITHUB_SHA", "")
+    if not current_commit:
+        raise ValueError("GITHUB_SHA environment variable is required")
+
+    return owner, repo, branch, current_commit
+
+
+def load_config() -> IndexConfig:
+    """Load configuration from environment variables."""
+    github_token = os.environ.get("GITHUB_TOKEN")
+    if not github_token:
+        raise ValueError("GITHUB_TOKEN environment variable is required")
+
+    api_token, api_url = get_api_credentials()
+    owner, repo, branch, current_commit = parse_repository_info()
+
+    max_commits = os.environ.get("MAX_COMMITS")
+    max_files = os.environ.get("MAX_FILES")
+
+    return IndexConfig(
+        apiToken=api_token,
+        apiUrl=api_url,
+        githubToken=github_token,
+        owner=owner,
+        repo=repo,
+        branch=branch,
+        currentCommit=current_commit,
+        maxCommits=int(max_commits) if max_commits else None,
+        maxFiles=int(max_files) if max_files else None,
+    )
+
+
+def get_state_path(branch: str) -> str:
+    """Get the state file path for the current branch."""
+    sanitized_branch = re.sub(r"[^a-zA-Z0-9\-_]", "-", branch)
+    return os.environ.get(
+        "STATE_PATH", f".augment-index-state/{sanitized_branch}/state.json"
+    )
+
+
+def main() -> None:
+    """Main function."""
+    print("GitHub Action Indexer - Starting...")
+
+    try:
+        # Load configuration
+        config = load_config()
+        state_path = get_state_path(config.branch)
+
+        print(f"Repository: {config.owner}/{config.repo}")
+        print(f"Branch: {config.branch}")
+        print(f"Commit ref: {config.currentCommit}")
+        print(f"State path: {state_path}")
+
+        # Create DirectContext
+        context = DirectContext.create(api_key=config.apiToken, api_url=config.apiUrl)
+
+        # Create index manager and resolve commit SHA
+        manager = IndexManager(context, config, state_path)
+        manager.resolve_commit_sha()
+
+        print(f"Resolved commit SHA: {config.currentCommit}")
+
+        # Perform indexing
+        result = manager.index()
+
+        # Print results
+        print("\n=== Indexing Results ===")
+        print(f"Success: {result.success}")
+        print(f"Type: {result.type}")
+        print(f"Files Indexed: {result.filesIndexed}")
+        print(f"Files Deleted: {result.filesDeleted}")
+        print(f"Checkpoint ID: {result.checkpointId}")
+        print(f"Commit SHA: {result.commitSha}")
+
+        if result.reindexReason:
+            print(f"Re-index Reason: {result.reindexReason}")
+
+        if result.error:
+            print(f"Error: {result.error}", file=sys.stderr)
+            sys.exit(1)
+
+        # Set GitHub Actions output
+        github_output = os.environ.get("GITHUB_OUTPUT")
+        if github_output:
+            output_lines = [
+                f"success={result.success}",
+                f"type={result.type}",
+                f"files_indexed={result.filesIndexed}",
+                f"files_deleted={result.filesDeleted}",
+                f"checkpoint_id={result.checkpointId}",
+                f"commit_sha={result.commitSha}",
+            ]
+            with open(github_output, "a") as f:
+                f.write("\n".join(output_lines) + "\n")
+
+        print("\nIndexing completed successfully!")
+
+    except Exception as error:
+        print(f"Fatal error: {error}", file=sys.stderr)
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
+
diff --git a/src/models.py b/src/models.py
new file mode 100644
index 0000000..8b3dfc0
--- /dev/null
+++ b/src/models.py
@@ -0,0 +1,131 @@
+"""
+Types for the GitHub Action Indexer
+
+This module defines the data types used by the GitHub Action Indexer
+for tracking index state, file changes, configuration, and results.
+"""
+
+from dataclasses import dataclass
+from typing import Literal, Optional
+
+from typing_extensions import TypedDict
+
+from auggie_sdk.context.models import DirectContextState
+
+
+class RepositoryInfo(TypedDict):
+    """Repository information for index state."""
+
+    owner: str  # Repository owner
+    name: str  # Repository name
+
+
+class IndexState(TypedDict):
+    """
+    Persistent state for the GitHub Action Indexer.
+
+    This state is stored between indexing runs to enable incremental indexing.
+    """
+
+    contextState: DirectContextState
+    """DirectContext state (checkpoint, blobs, etc.)"""
+
+    lastCommitSha: str
+    """Last indexed commit SHA (must be a full 40-character SHA, not a ref like 'HEAD')"""
+
+    repository: RepositoryInfo
+    """Repository information - used to verify we're indexing the same repository"""
+
+
+@dataclass
+class FileChange:
+    """
+    Represents a file change detected between commits.
+
+    Used to track what files need to be indexed or removed from the index.
+    """
+
+    path: str
+    """File path"""
+
+    status: Literal["added", "modified", "removed", "renamed"]
+    """Change status: added, modified, removed, renamed"""
+
+    previousFilename: Optional[str] = None
+    """Previous filename (for renames)"""
+
+    contents: Optional[str] = None
+    """File contents (for added/modified files)"""
+
+    oldBlobName: Optional[str] = None
+    """Blob name from previous index (for modified/removed files)"""
+
+
+@dataclass
+class IndexConfig:
+    """
+    Configuration for the GitHub Action Indexer.
+
+    Contains all the settings needed to perform indexing of a GitHub repository.
+    """
+
+    apiToken: str
+    """Augment API token"""
+
+    apiUrl: str
+    """Augment API URL (provided via AUGMENT_API_URL env var)"""
+
+    githubToken: str
+    """GitHub token"""
+
+    owner: str
+    """Repository owner"""
+
+    repo: str
+    """Repository name"""
+
+    branch: str
+    """Branch to index"""
+
+    currentCommit: str
+    """Current commit SHA"""
+
+    maxCommits: Optional[int] = None
+    """Maximum commits before full re-index"""
+
+    maxFiles: Optional[int] = None
+    """Maximum file changes before full re-index"""
+
+
+@dataclass
+class IndexResult:
+    """
+    Result from an indexing operation.
+
+    Contains information about what was indexed and whether it was successful.
+    """
+
+    success: bool
+    """Whether indexing was successful"""
+
+    type: Literal["full", "incremental", "no-changes"]
+    """Type of indexing performed"""
+
+    filesIndexed: int
+    """Number of files indexed"""
+
+    filesDeleted: int
+    """Number of files deleted"""
+
+    checkpointId: str
+    """New checkpoint ID"""
+
+    commitSha: str
+    """Commit SHA that was indexed"""
+
+    error: Optional[str] = None
+    """Error message if failed"""
+
+    reindexReason: Optional[str] = None
+    """Reason for full re-index (if applicable)"""
+
diff --git a/src/search.py b/src/search.py
new file mode 100644
index 0000000..fdac426
--- /dev/null
+++ b/src/search.py
@@ -0,0 +1,132 @@
+#!/usr/bin/env python3
+"""
+CLI tool to search the indexed repository
+
+Usage:
+    cd examples/python-sdk/context
+    python -m github_action_indexer search "your search query"
+    python -m github_action_indexer search "your search query" --max-chars 5000
+"""
+
+import argparse
+import json
+import os
+import re
+import sys
+import tempfile
+from pathlib import Path
+from typing import Optional
+
+from auggie_sdk.context import DirectContext
+
+from .models import IndexState
+
+
+def get_state_path() -> str:
+    """Get the state file path for the current branch."""
+    branch = os.environ.get("BRANCH", "main")
+    sanitized_branch = re.sub(r"[^a-zA-Z0-9\-_]", "-", branch)
+    return os.environ.get(
+        "STATE_PATH", f".augment-index-state/{sanitized_branch}/state.json"
+    )
+
+
+def load_state(state_path: str) -> Optional[IndexState]:
+    """Load index state from file system."""
+    try:
+        with open(state_path, "r") as f:
+            data = f.read()
+        return json.loads(data)
+    except FileNotFoundError:
+        return None
+
+
+def main() -> None:
+    """Main search function."""
+    # Parse command line arguments
+    parser = argparse.ArgumentParser(
+        description="Search the indexed repository",
+        epilog='Example: python search.py "authentication functions"',
+    )
+    parser.add_argument("query", help="Search query")
+    parser.add_argument(
+        "--max-chars",
+        type=int,
+        help="Maximum number of characters in output",
+        dest="max_chars",
+    )
+    args = parser.parse_args()
+
+    # Get API credentials
+    api_token = os.environ.get("AUGMENT_API_TOKEN")
+    if not api_token:
+        print("Error: AUGMENT_API_TOKEN environment variable is required", file=sys.stderr)
+        sys.exit(1)
+
+    api_url = os.environ.get("AUGMENT_API_URL")
+    if not api_url:
+        print(
+            "Error: AUGMENT_API_URL environment variable is required. Please set it to your "
+            "tenant-specific URL (e.g., 'https://your-tenant.api.augmentcode.com/')",
+            file=sys.stderr,
+        )
+        sys.exit(1)
+
+    print(f'Searching for: "{args.query}"')
+    if args.max_chars is not None:
+        print(f"Limiting results to max {args.max_chars} characters\n")
+    else:
+        print()
+
+    try:
+        # Load the index state first
+        state_path = get_state_path()
+        print(f"Loading index state from: {state_path}")
+        state = load_state(state_path)
+
+        if not state:
+            print("Error: No index state found. Run indexing first.", file=sys.stderr)
+            print("  python -m github_action_indexer index", file=sys.stderr)
+            sys.exit(1)
+
+        # Create a temporary file with the context state for import
+        # Use delete=False because Windows can't reopen a NamedTemporaryFile while it's open
+        temp_file = tempfile.NamedTemporaryFile(
+            mode="w", suffix=".json", prefix="github-indexer-state-", delete=False
+        )
+        temp_path = Path(temp_file.name)
+        try:
+            json.dump(state["contextState"], temp_file, indent=2)
+            temp_file.close()  # Close before reading on Windows
+
+            # Import state using DirectContext.import_from_file
+            context = DirectContext.import_from_file(
+                str(temp_path), api_key=api_token, api_url=api_url
+            )
+        finally:
+            temp_path.unlink(missing_ok=True)
+
+        file_count = len(state["contextState"].get("blobs", []))
+
+        print(f"Loaded index: {file_count} files indexed")
+        print(f"Repository: {state['repository']['owner']}/{state['repository']['name']}")
+        print(f"Last indexed commit: {state['lastCommitSha']}\n")
+
+        # Perform search with optional character limit
+        results = context.search(args.query, max_output_length=args.max_chars)
+
+        if not results or results.strip() == "":
+            print("No results found.")
+            return
+
+        print("Search results:\n")
+        print(results)
+
+    except Exception as error:
+        print(f"Search failed: {error}", file=sys.stderr)
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
+