diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 0000000..ba3e206 --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,39 @@ +name: Tests + +on: + push + +jobs: + test: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] + + steps: + - uses: actions/checkout@v3 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -e ".[dev]" + + - name: Format check with black + run: | + black --check src tests + + - name: Type check with mypy + run: | + mypy src + + - name: Run unit tests + run: | + pytest tests/test_client.py -v --cov=semcache --cov-report=xml + + + # todo run integration tests pointing at real docker image of semcache \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..77cbcd7 --- /dev/null +++ b/.gitignore @@ -0,0 +1,140 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +.idea/ + +# VSCode +.vscode/ + +# macOS +.DS_Store + +# Windows +Thumbs.db +ehthumbs.db +Desktop.ini + +# Project specific +*.log +.cache/ \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..411e98d --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 Sensoris + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..1864d41 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,6 @@ +include LICENSE +include README.md +include pyproject.toml +include src/semcache/py.typed +recursive-exclude * __pycache__ +recursive-exclude * *.py[co] \ No newline at end of file diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..ac57f5c --- /dev/null +++ b/Makefile @@ -0,0 +1,36 @@ +.PHONY: install install-dev test test-integration test-all coverage format type-check clean build upload upload-test + +install: + pip install -e . + +install-dev: + pip install -e ".[dev]" + +test: + PYTHONPATH=src pytest tests/test_client.py -v + +test-integration: + PYTHONPATH=src pytest tests/test_integration.py + +format: + black src tests + +type-check: + mypy src + +clean: + rm -rf build/ + rm -rf dist/ + rm -rf *.egg-info + rm -rf src/*.egg-info + find . -type d -name __pycache__ -exec rm -rf {} + + find . -type f -name "*.pyc" -delete + +build: clean + python -m build + +upload-test: build + twine upload --repository testpypi dist/* + +upload: build + twine upload dist/* \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..dad23cf --- /dev/null +++ b/README.md @@ -0,0 +1,148 @@ +# Semcache Python SDK + +A Python client library for [Semcache](https://github.com/sensoris/semcache) + +## Installation + +```bash +pip install semcache +``` + +## Quick Start + +```python +from semcache import Semcache + +# Initialize the client +client = Semcache(base_url="http://localhost:8080") + +# Store a key-data pair +client.put("What is the capital of France?", "Paris") + +# Retrieve data by semantic similarity +response = client.get("What's the capital city of France?") +print(response) # "Paris" +``` + +The above snippet requires a running Semcache server. You can start one using Docker: + +```bash +docker run -p 8080:8080 ghcr.io/sensoris/semcache:latest +```` + +## Configuration + +```python +client = Semcache( + base_url="http://localhost:8080", # Semcache server URL + timeout=30, # Request timeout in seconds +) +``` + +## Usage Examples + +### Basic Usage + +```python +from semcache import Semcache + +# Create a client instance +client = Semcache() + +# Store some key-data pairs +client.put("What is Python?", "Python is a high-level programming language") +client.put("What is machine learning?", "Machine learning is a subset of AI that enables systems to learn from data") + +# Retrieve data - exact match not required +response = client.get("Tell me about Python") +print(response) # "Python is a high-level programming language" +``` + +### Error Handling + +```python +from semcache import Semcache, SemcacheConnectionError, SemcacheTimeoutError + +client = Semcache(base_url="http://localhost:8080", timeout=5) + +try: + client.put("test query", "test response") +except SemcacheConnectionError: + print("Failed to connect to Semcache server") +except SemcacheTimeoutError: + print("Request timed out") +``` + +## API Reference + +### `Semcache(base_url="http://localhost:8080", timeout=30)` + +Initialize a new Semcache client. + +**Parameters:** +- `base_url` (str): The base URL of the Semcache server +- `timeout` (int): Request timeout in seconds + +### `put(key: str, data: str) -> None` + +Store a key-data pair in the cache. + +**Parameters:** +- `key` (str): The key/query to cache +- `data` (str): The data/response to cache + +**Raises:** +- `SemcacheError`: If the request fails + +### `get(key: str) -> Optional[str]` + +Retrieve cached data for a key using semantic similarity. + +**Parameters:** +- `key` (str): The key/query to look up + +**Returns:** +- `Optional[str]`: The cached data if found, None otherwise + +**Raises:** +- `SemcacheError`: If the request fails + +## Exceptions + +- `SemcacheError`: Base exception for all Semcache errors +- `SemcacheConnectionError`: Raised when unable to connect to the server +- `SemcacheTimeoutError`: Raised when a request times out +- `SemcacheAPIError`: Raised when the API returns an error response + +## Development + +### Setup Development Environment + +```bash +# Create a virtual environment +python -m venv venv +source venv/bin/activate # On Windows: venv\Scripts\activate + +# Install development dependencies +pip install -e ".[dev]" +``` + +### Run Tests + +```bash +pytest +``` + +### Format Code + +```bash +black src tests +``` + +## License + +This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. + +## Contributing + +Contributions are welcome! Please feel free to submit a pull request. \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index eec738b..06618a2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,19 +6,19 @@ build-backend = "setuptools.build_meta" name = "semcache" version = "0.1.0" authors = [ - {name = "Sensoris", email = "contact@semcache.io"}, + {name = "Louis Cameron Booth", email = "contact@semcache.io"}, + {name = "Jacob Hedén Malm", email = "contact@semcache.io"}, ] -description = "A Python library for the Semcache API " +description = "A Python library for the Semcache API" readme = "README.md" license = {text = "MIT"} -requires-python = ">=3.7" +requires-python = ">=3.8" classifiers = [ "Development Status :: 3 - Alpha", "Intended Audience :: Developers", "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", @@ -57,7 +57,7 @@ where = ["src"] semcache = ["py.typed"] [tool.mypy] -python_version = "3.7" +python_version = "3.8" warn_return_any = true warn_unused_configs = true disallow_untyped_defs = true @@ -73,7 +73,7 @@ strict_equality = true [tool.black] line-length = 88 -target-version = ['py37'] +target-version = ['py38'] include = '\.pyi?$' extend-exclude = ''' /( diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 0000000..d6d5fc3 --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,13 @@ +# Development dependencies +pytest>=7.0.0 +pytest-cov>=4.0.0 +black>=23.0.0 +mypy>=1.0.0 +types-requests>=2.28.0 +types-urllib3>=1.26.0 +build>=0.10.0 +twine>=4.0.0 + +# Runtime dependencies (for development) +requests>=2.28.0 +urllib3>=1.26.0 \ No newline at end of file diff --git a/src/semcache/__init__.py b/src/semcache/__init__.py new file mode 100644 index 0000000..13a168e --- /dev/null +++ b/src/semcache/__init__.py @@ -0,0 +1,22 @@ +""" +Semcache Python SDK + +A Python library for the Semcache API. +""" + +from .client import Semcache +from .exceptions import ( + SemcacheError, + SemcacheConnectionError, + SemcacheTimeoutError, + SemcacheAPIError, +) + +__version__ = "0.1.0" +__all__ = [ + "Semcache", + "SemcacheError", + "SemcacheConnectionError", + "SemcacheTimeoutError", + "SemcacheAPIError", +] diff --git a/src/semcache/client.py b/src/semcache/client.py new file mode 100644 index 0000000..e9f6c2c --- /dev/null +++ b/src/semcache/client.py @@ -0,0 +1,117 @@ +from typing import Optional, Any + +import requests + +from .exceptions import ( + SemcacheError, + SemcacheConnectionError, + SemcacheTimeoutError, + SemcacheAPIError, +) + + +class Semcache: + """ + Client for interacting with the Semcache server. + + Args: + base_url: The base URL of the Semcache server (default: "http://localhost:8080") + timeout: Request timeout in seconds (default: 30) + + Example: + >>> from semcache import Semcache + >>> client = Semcache() + >>> client.put("What is the capital of France?", "Paris") + >>> result = client.get("What is the capital of France?") + >>> print(result) + Paris + """ + + def __init__( + self, + base_url: str = "http://localhost:8080", + timeout: int = 30, + ): + self.base_url = base_url.rstrip("/") + self.timeout = timeout + self.session = requests.Session() + + def put(self, key: str, data: str) -> None: + """ + Store a key-data pair in the cache. + + Args: + key: The key/prompt to cache + data: The data/response to cache + + Raises: + SemcacheError: If the request fails + """ + url = f"{self.base_url}/semcache/v1/put" + json_data = {"key": key, "data": data} + + try: + response = self.session.put(url, json=json_data, timeout=self.timeout) + response.raise_for_status() + except requests.exceptions.Timeout: + raise SemcacheTimeoutError( + f"Request timed out after {self.timeout} seconds" + ) + except requests.exceptions.ConnectionError: + raise SemcacheConnectionError( + f"Failed to connect to Semcache server at {url}" + ) + except requests.exceptions.HTTPError as e: + raise SemcacheAPIError(f"HTTP {response.status_code}: {response.text}") + except Exception as e: + raise SemcacheError(f"Unexpected error: {str(e)}") + + def get(self, key: str) -> Optional[str]: + """ + Retrieve a cached value for a key. + + Args: + key: The key/prompt to look up + + Returns: + The cached data if found, None if not found + + Raises: + SemcacheError: If the request fails + """ + url = f"{self.base_url}/semcache/v1/get" + json_data = {"key": key} + + try: + response = self.session.post(url, json=json_data, timeout=self.timeout) + + if response.status_code == 404: + return None + + response.raise_for_status() + return response.text + + except requests.exceptions.Timeout: + raise SemcacheTimeoutError( + f"Request timed out after {self.timeout} seconds" + ) + except requests.exceptions.ConnectionError: + raise SemcacheConnectionError( + f"Failed to connect to Semcache server at {url}" + ) + except requests.exceptions.HTTPError: + raise SemcacheAPIError(f"HTTP {response.status_code}: {response.text}") + except Exception as e: + raise SemcacheError(f"Unexpected error: {str(e)}") + + def __enter__(self) -> "Semcache": + """Context manager support.""" + return self + + def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None: + """Close session on context exit.""" + self.session.close() + + def close(self) -> None: + """Close the underlying session.""" + self.session.close() diff --git a/src/semcache/exceptions.py b/src/semcache/exceptions.py new file mode 100644 index 0000000..14139c5 --- /dev/null +++ b/src/semcache/exceptions.py @@ -0,0 +1,27 @@ +""" +Custom exceptions for the Semcache Python SDK. +""" + + +class SemcacheError(Exception): + """Base exception for all Semcache errors.""" + + pass + + +class SemcacheConnectionError(SemcacheError): + """Raised when unable to connect to the Semcache server.""" + + pass + + +class SemcacheTimeoutError(SemcacheError): + """Raised when a request to the Semcache server times out.""" + + pass + + +class SemcacheAPIError(SemcacheError): + """Raised when the Semcache API returns an error response.""" + + pass diff --git a/src/semcache/py.typed b/src/semcache/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..d4839a6 --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1 @@ +# Tests package diff --git a/tests/test_client.py b/tests/test_client.py new file mode 100644 index 0000000..d84b0ed --- /dev/null +++ b/tests/test_client.py @@ -0,0 +1,170 @@ +""" +Tests for the Semcache client. +""" + +from unittest.mock import Mock, patch + +import pytest +import requests + +from semcache import ( + Semcache, + SemcacheConnectionError, + SemcacheTimeoutError, + SemcacheAPIError, +) + + +class TestSemcache: + """Test cases for the Semcache client.""" + + def test_initialization_default(self): + """Test client initialization with default values.""" + client = Semcache() + assert client.base_url == "http://localhost:8080" + assert client.timeout == 30 + + def test_initialization_custom(self): + """Test client initialization with custom values.""" + client = Semcache(base_url="http://example.com:9090", timeout=60) + assert client.base_url == "http://example.com:9090" + assert client.timeout == 60 + + def test_base_url_trailing_slash(self): + """Test that trailing slashes are removed from base URL.""" + client = Semcache(base_url="http://localhost:8080/") + assert client.base_url == "http://localhost:8080" + + @patch("semcache.client.requests.Session") + def test_put_success(self, mock_session_class): + """Test successful put operation.""" + # Setup mock + mock_session = Mock() + mock_session_class.return_value = mock_session + mock_response = Mock() + mock_response.status_code = 200 + mock_response.raise_for_status.return_value = None + mock_session.put.return_value = mock_response + + # Test + client = Semcache() + client.put("test key", "test data") + + # Verify + mock_session.put.assert_called_once_with( + "http://localhost:8080/semcache/v1/put", + json={"key": "test key", "data": "test data"}, + timeout=30, + ) + + @patch("semcache.client.requests.Session") + def test_get_success(self, mock_session_class): + """Test successful get operation.""" + # Setup mock + mock_session = Mock() + mock_session_class.return_value = mock_session + mock_response = Mock() + mock_response.status_code = 200 + mock_response.raise_for_status.return_value = None + mock_response.text = "test data" + mock_session.post.return_value = mock_response + + # Test + client = Semcache() + result = client.get("test key") + + # Verify + assert result == "test data" + mock_session.post.assert_called_once_with( + "http://localhost:8080/semcache/v1/get", + json={"key": "test key"}, + timeout=30, + ) + + @patch("semcache.client.requests.Session") + def test_get_not_found(self, mock_session_class): + """Test get operation when item is not found (404).""" + # Setup mock + mock_session = Mock() + mock_session_class.return_value = mock_session + mock_response = Mock() + mock_response.status_code = 404 + mock_session.post.return_value = mock_response + + # Test + client = Semcache() + result = client.get("test key") + + # Verify + assert result is None + + @patch("semcache.client.requests.Session") + def test_connection_error(self, mock_session_class): + """Test connection error handling.""" + # Setup mock + mock_session = Mock() + mock_session_class.return_value = mock_session + mock_session.put.side_effect = requests.exceptions.ConnectionError() + + # Test + client = Semcache() + with pytest.raises(SemcacheConnectionError) as exc_info: + client.put("test", "test") + + assert "Failed to connect to Semcache server" in str(exc_info.value) + + @patch("semcache.client.requests.Session") + def test_timeout_error(self, mock_session_class): + """Test timeout error handling.""" + # Setup mock + mock_session = Mock() + mock_session_class.return_value = mock_session + mock_session.post.side_effect = requests.exceptions.Timeout() + + # Test + client = Semcache() + with pytest.raises(SemcacheTimeoutError) as exc_info: + client.get("test") + + assert "Request timed out after 30 seconds" in str(exc_info.value) + + @patch("semcache.client.requests.Session") + def test_http_error(self, mock_session_class): + """Test HTTP error handling.""" + # Setup mock + mock_session = Mock() + mock_session_class.return_value = mock_session + mock_response = Mock() + mock_response.status_code = 400 + mock_response.text = "Bad Request" + mock_response.raise_for_status.side_effect = requests.exceptions.HTTPError() + mock_session.put.return_value = mock_response + + # Test + client = Semcache() + with pytest.raises(SemcacheAPIError) as exc_info: + client.put("test", "test") + + assert "HTTP 400: Bad Request" in str(exc_info.value) + + def test_context_manager(self): + """Test context manager functionality.""" + with patch("semcache.client.requests.Session") as mock_session_class: + mock_session = Mock() + mock_session_class.return_value = mock_session + + with Semcache() as client: + assert isinstance(client, Semcache) + + mock_session.close.assert_called_once() + + def test_close_method(self): + """Test close method.""" + with patch("semcache.client.requests.Session") as mock_session_class: + mock_session = Mock() + mock_session_class.return_value = mock_session + + client = Semcache() + client.close() + + mock_session.close.assert_called_once() diff --git a/tests/test_integration.py b/tests/test_integration.py new file mode 100644 index 0000000..f06bec1 --- /dev/null +++ b/tests/test_integration.py @@ -0,0 +1,89 @@ +""" +Integration tests for Semcache client. + +These tests require a running Semcache server at http://localhost:8080. +Run with: pytest tests/test_integration.py -v -s +""" + +import time + +import pytest + +from semcache import Semcache, SemcacheConnectionError + + +class TestSemcacheIntegration: + @pytest.fixture + def client(self): + client = Semcache(base_url="http://localhost:8080") + yield client + client.close() + + @pytest.fixture(autouse=True) + def check_server_available(self, client): + """Skip tests if Semcache server is not available.""" + try: + # Try a simple operation to check if server is running + # This should return None or a string, but not raise ConnectionError + _ = client.get("test") + except SemcacheConnectionError: + pytest.skip("Semcache server not available at http://localhost:8080") + + def test_put_and_get(self, client): + client.put("What is Python?", "Python is a programming language") + + time.sleep(0.1) + + result = client.get("What is Python?") + assert result == "Python is a programming language" + + def test_semantic_similarity(self, client): + client.put("What is the capital of France?", "Paris is the capital of France") + + time.sleep(0.1) + + # Try similar keys + similar_keys = [ + "What's the capital city of France?", + "Tell me the capital of France", + "France's capital is?", + ] + + for key in similar_keys: + result = client.get(key) + assert result == "Paris is the capital of France", f"Failed for key: {key}" + + def test_get_nonexistent(self, client): + result = client.get("This key definitely doesn't exist in the cache") + assert result is None + + def test_overwrite_value(self, client): + client.put("test key", "initial value") + time.sleep(0.1) + + client.put("test key", "updated value") + time.sleep(0.1) + + # Verify new value is returned + result = client.get("test key") + assert result == "updated value" + + def test_unicode_content(self, client): + unicode_key = "What is café?" + unicode_data = "Café is coffee in French ☕" + + client.put(unicode_key, unicode_data) + time.sleep(0.1) + + result = client.get(unicode_key) + assert result == unicode_data + + def test_large_content(self, client): + large_data = "x" * 10000 # 10KB of text + + client.put("large content test", large_data) + time.sleep(0.1) + + result = client.get("large content test") + assert result == large_data + assert len(result) == 10000