diff --git a/.gitignore b/.gitignore index 51de6b6..9ed5f99 100644 --- a/.gitignore +++ b/.gitignore @@ -169,3 +169,5 @@ cython_debug/ # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ + +hyperdb/templates/data.js diff --git a/README.md b/README.md index c9a21df..10b6b63 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@
Hyper DB -   + 
@@ -30,9 +30,9 @@ - @@ -48,7 +48,7 @@
-## :dart: About +## :dart: About Hypergraph-DB is a lightweight, flexible, and Python-based database designed to model and manage **hypergraphs**—a generalized graph structure where edges (hyperedges) can connect any number of vertices. This makes Hypergraph-DB an ideal solution for representing complex relationships between entities in various domains, such as knowledge graphs, social networks, and scientific data modeling. @@ -73,15 +73,15 @@ This example demonstrates the efficiency of Hypergraph-DB, even when working wit The following table shows the results of stress tests performed on Hypergraph-DB with varying scales. The tests measure the time taken to add vertices, add hyperedges, and query vertices and hyperedges. | **Number of Vertices** | **Number of Hyperedges** | **Add Vertices (s)** | **Add Edges (s)** | **Query Vertices (s/queries)** | **Query Edges (s/queries)** | **Total Time (s)** | -|-------------------------|--------------------------|-----------------------|-------------------|-------------------------------|----------------------------|--------------------| -| 5,000 | 1,000 | 0.01 | 0.01 | 0.00/2,000 | 0.01/2,000 | 0.02 | -| 10,000 | 2,000 | 0.01 | 0.01 | 0.00/4,000 | 0.02/4,000 | 0.05 | -| 25,000 | 5,000 | 0.03 | 0.04 | 0.01/10,000 | 0.05/10,000 | 0.13 | -| 50,000 | 10,000 | 0.06 | 0.07 | 0.02/20,000 | 0.12/20,000 | 0.26 | -| 100,000 | 20,000 | 0.12 | 0.17 | 0.04/40,000 | 0.24/40,000 | 0.58 | -| 250,000 | 50,000 | 0.35 | 0.40 | 0.11/100,000 | 0.61/100,000 | 1.47 | -| 500,000 | 100,000 | 0.85 | 1.07 | 0.22/200,000 | 1.20/200,000 | 3.34 | -| 1,000,000 | 200,000 | 1.75 | 1.82 | 0.51/400,000 | 2.52/400,000 | 6.60 | +| ---------------------- | ------------------------ | -------------------- | ----------------- | ------------------------------ | --------------------------- | ------------------ | +| 5,000 | 1,000 | 0.01 | 0.01 | 0.00/2,000 | 0.01/2,000 | 0.02 | +| 10,000 | 2,000 | 0.01 | 0.01 | 0.00/4,000 | 0.02/4,000 | 0.05 | +| 25,000 | 5,000 | 0.03 | 0.04 | 0.01/10,000 | 0.05/10,000 | 0.13 | +| 50,000 | 10,000 | 0.06 | 0.07 | 0.02/20,000 | 0.12/20,000 | 0.26 | +| 100,000 | 20,000 | 0.12 | 0.17 | 0.04/40,000 | 0.24/40,000 | 0.58 | +| 250,000 | 50,000 | 0.35 | 0.40 | 0.11/100,000 | 0.61/100,000 | 1.47 | +| 500,000 | 100,000 | 0.85 | 1.07 | 0.22/200,000 | 1.20/200,000 | 3.34 | +| 1,000,000 | 200,000 | 1.75 | 1.82 | 0.51/400,000 | 2.52/400,000 | 6.60 | --- @@ -92,6 +92,7 @@ The following table shows the results of stress tests performed on Hypergraph-DB 2. **Query Performance**: Querying vertices and hyperedges remains fast, even for large-scale hypergraphs. For instance: + - Querying **200,000 vertices** takes only **0.22 seconds**. - Querying **200,000 hyperedges** takes only **1.20 seconds**. @@ -102,30 +103,40 @@ This performance makes **Hypergraph-DB** a great choice for applications requiri --- -## :sparkles: Features +## :sparkles: Features -:heavy_check_mark: **Flexible Hypergraph Representation** - - Supports vertices (`v`) and hyperedges (`e`), where hyperedges can connect any number of vertices. - - Hyperedges are represented as sorted tuples of vertex IDs, ensuring consistency and efficient operations. +:heavy_check_mark: **Flexible Hypergraph Representation** -:heavy_check_mark: **Vertex and Hyperedge Management** - - Add, update, delete, and query vertices and hyperedges with ease. - - Built-in methods to retrieve neighbors, incident edges, and other relationships. +- Supports vertices (`v`) and hyperedges (`e`), where hyperedges can connect any number of vertices. +- Hyperedges are represented as sorted tuples of vertex IDs, ensuring consistency and efficient operations. -:heavy_check_mark: **Neighbor Queries** - - Get neighboring vertices or hyperedges for a given vertex or hyperedge. +:heavy_check_mark: **Vertex and Hyperedge Management** -:heavy_check_mark: **Persistence** - - Save and load hypergraphs to/from disk using efficient serialization (`pickle`). - - Ensures data integrity and supports large-scale data storage. +- Add, update, delete, and query vertices and hyperedges with ease. +- Built-in methods to retrieve neighbors, incident edges, and other relationships. -:heavy_check_mark: **Customizable and Extensible** - - Built on Python’s `dataclasses`, making it easy to extend and customize for specific use cases. +:heavy_check_mark: **Neighbor Queries** ---- +- Get neighboring vertices or hyperedges for a given vertex or hyperedge. + +:heavy_check_mark: **Persistence** + +- Save and load hypergraphs to/from disk using efficient serialization (`pickle`). +- Ensures data integrity and supports large-scale data storage. + +:heavy_check_mark: **Interactive Visualization** -## :rocket: Installation +- Built-in web-based visualization tool with `draw()` method. +- Interactive exploration of hypergraph structure, vertex properties, and relationships. +- Real-time visualization that reflects the current state of your hypergraph. +:heavy_check_mark: **Customizable and Extensible** + +- Built on Python's `dataclasses`, making it easy to extend and customize for specific use cases. + +--- + +## :rocket: Installation Hypergraph-DB is a Python library. You can install it directly from PyPI using `pip`. @@ -146,7 +157,7 @@ pip install -r requirements.txt --- -## :checkered_flag: Starting +## :checkered_flag: Starting This section provides a quick guide to get started with Hypergraph-DB, including iusage, and running basic operations. Below is an example of how to use Hypergraph-DB, based on the provided test cases. @@ -246,26 +257,45 @@ print(hg2.all_v) # Output: {1, 2, 4, 5, 6, 7, 8, 9, 10} print(hg2.all_e) # Output: {(4, 5, 7, 9), (9, 10), (1, 2, 7), (1, 2), (2, 6, 9), (1, 4, 6), (2, 5, 6)} ``` +#### **7. Visualization with draw()** + +Hypergraph-DB includes a built-in visualization tool that allows you to interactively explore your hypergraph data through a web-based interface. + +```python +# Visualize the hypergraph in your browser +hg.draw() +``` + +The `draw()` function will: ---- +- Start a local web server (default port: 8080) +- Automatically open your default web browser +- Display an interactive visualization of your hypergraph +- Show vertex degrees, relationships, and hyperedge information +- Allow you to explore the graph structure visually +**Features of the visualization:** -## :memo: License +- **Interactive HyperGraph View**: Navigate through vertices and hyperedges +- **Vertex Information**: View vertex properties, degrees, and descriptions +- **Hyperedge Details**: Explore hyperedge relationships and metadata +- **Degree-based Sorting**: Vertices are sorted by degree for easy identification of important nodes +- **Real-time Data**: The visualization reflects the current state of your hypergraph -Hypergraph-DB is open-source and licensed under the [Apache License 2.0](LICENSE). Feel free to use, modify, and distribute it as per the license terms. +--- +## :memo: License + +Hypergraph-DB is open-source and licensed under the [Apache License 2.0](LICENSE). Feel free to use, modify, and distribute it as per the license terms. --- -## :email: Contact +## :email: Contact Hypergraph-DB is maintained by [iMoon-Lab](http://moon-lab.tech/), Tsinghua University. If you have any questions, please feel free to contact us via email: [Yifan Feng](mailto:evanfeng97@gmail.com). - Made with :heart: by Yifan Feng   Back to top - - diff --git a/hyperdb/base.py b/hyperdb/base.py index 6ee2a7d..9f7a6c5 100644 --- a/hyperdb/base.py +++ b/hyperdb/base.py @@ -283,3 +283,20 @@ def stats(self) -> dict: Return basic statistics of the hypergraph. """ raise NotImplementedError + + def draw(self, port: int = 8080, open_browser: bool = True): + """ + Draw the hypergraph data of the current HyperDB instance + + Args: + ``port``: Server port number, defaults to 8080 + ``open_browser``: Whether to automatically open the browser, defaults to True + + """ + from .draw import draw_hypergraph + + return draw_hypergraph( + hypergraph_db=self, + port=port, + open_browser=open_browser + ) diff --git a/hyperdb/draw.py b/hyperdb/draw.py new file mode 100644 index 0000000..34bb815 --- /dev/null +++ b/hyperdb/draw.py @@ -0,0 +1,204 @@ +import os +import webbrowser +import threading +import json +from pathlib import Path +from typing import Dict, Any +import http.server +import socketserver +from urllib.parse import urlparse + +from .hypergraph import HypergraphDB + + +class HypergraphViewer: + """Hypergraph visualization tool""" + + def __init__(self, hypergraph_db: HypergraphDB, port: int = 8080): + self.hypergraph_db = hypergraph_db + self.port = port + self.html_content = self._generate_html_with_data() + + def _generate_html_with_data(self): + """Generate HTML content with embedded data""" + # Get all data + database_info = { + "name": "current_hypergraph", + "vertices": self.hypergraph_db.num_v, + "edges": self.hypergraph_db.num_e + } + + # Get vertex list + vertices = list(self.hypergraph_db.all_v)[:100] + vertex_data = [] + + for v_id in vertices: + v_data = self.hypergraph_db.v(v_id, {}) + vertex_data.append({ + "id": v_id, + "degree": self.hypergraph_db.degree_v(v_id), + "entity_type": v_data.get("entity_type", ""), + "description": v_data.get("description", "")[:100] + "..." if len(v_data.get("description", "")) > 100 else v_data.get("description", "") + }) + + # Sort by degree + vertex_data.sort(key=lambda x: x["degree"], reverse=True) + + # Get graph data for all vertices + graph_data = {} + for vertex in vertex_data: + vertex_id = vertex["id"] + graph_data[vertex_id] = self._get_vertex_neighbor_data(self.hypergraph_db, vertex_id) + + # Embed data into HTML + return self._get_html_template(database_info, vertex_data, graph_data) + + + def _get_vertex_neighbor_data(self, hypergraph_db: HypergraphDB, vertex_id: str) -> Dict[str, Any]: + """Get vertex neighbor data""" + hg = hypergraph_db + + if not hg.has_v(vertex_id): + raise ValueError(f"Vertex {vertex_id} not found") + + # Get all neighbor hyperedges of the vertex + neighbor_edges = hg.nbr_e_of_v(vertex_id) + + # Collect all related vertices + all_vertices = {vertex_id} + edges_data = {} + + for edge_tuple in neighbor_edges: + # Add all vertices in the hyperedge + all_vertices.update(edge_tuple) + + # Get hyperedge data + edge_data = hg.e(edge_tuple, {}) + edge_key = "|#|".join(str(item) for item in edge_tuple) + edges_data[edge_key] = { + "keywords": edge_data.get("keywords", ""), + "summary": edge_data.get("summary", ""), + "weight": len(edge_tuple) # Hyperedge weight equals the number of vertices it contains + } + + # Get data for all vertices + vertices_data = {} + for v_id in all_vertices: + v_data = hg.v(v_id, {}) + vertices_data[v_id] = { + "entity_name": v_data.get("entity_name", v_id), + "entity_type": v_data.get("entity_type", ""), + "description": v_data.get("description", ""), + "additional_properties": v_data.get("additional_properties", "") + } + + return { + "vertices": vertices_data, + "edges": edges_data + } + + def _get_html_template(self, database_info: Dict, vertex_data: list, graph_data: Dict) -> str: + """Get HTML template with embedded data""" + # Serialize data to JSON string + embedded_data = { + "database": database_info, + "vertices": vertex_data, + "graphs": graph_data + } + data_json = json.dumps(embedded_data, ensure_ascii=False) + + # Read HTML template file + template_path = Path(__file__).parent / "templates" / "hypergraph_viewer.html" + + try: + with open(template_path, 'r', encoding='utf-8') as f: + html_template = f.read() + except FileNotFoundError: + raise FileNotFoundError(f"HTML template file not found: {template_path}") + + # Replace placeholders in template + html_content = html_template.replace("{{DATA_JSON}}", data_json) + + return html_content + + def start_server(self, open_browser: bool = True): + """Start simple HTTP server""" + + class CustomHTTPRequestHandler(http.server.BaseHTTPRequestHandler): + def __init__(self, html_content, *args, **kwargs): + self.html_content = html_content + super().__init__(*args, **kwargs) + + def do_GET(self): + self.send_response(200) + self.send_header('Content-type', 'text/html; charset=utf-8') + self.end_headers() + self.wfile.write(self.html_content.encode('utf-8')) + + def log_message(self, format, *args): + # Disable log output + pass + + def run_server(): + handler = lambda *args, **kwargs: CustomHTTPRequestHandler(self.html_content, *args, **kwargs) + with socketserver.TCPServer(("127.0.0.1", self.port), handler) as httpd: + httpd.serve_forever() + + # Start server in new thread + server_thread = threading.Thread(target=run_server, daemon=True) + server_thread.start() + + if open_browser: + # Wait for server to start + import time + time.sleep(1) + + # Open browser + url = f"http://127.0.0.1:{self.port}" + print(f"🚀 Hypergraph visualization server started: {url}") + webbrowser.open(url) + + return server_thread + + +def draw_hypergraph(hypergraph_db: HypergraphDB, port: int = 8080, open_browser: bool = True): + """ + Main function to draw hypergraph + + Args: + hypergraph_db: HypergraphDB instance + port: Server port + open_browser: Whether to automatically open browser + + Returns: + HypergraphViewer instance + """ + print("🎨 Starting hypergraph visualization...") + print(f"📁 Vertices: {hypergraph_db.num_v}, Hyperedges: {hypergraph_db.num_e}") + + viewer = HypergraphViewer(hypergraph_db=hypergraph_db, port=port) + + # Start server + server_thread = viewer.start_server(open_browser=open_browser) + + try: + print("⌨️ Press Ctrl+C to stop server") + # Keep main thread running + server_thread.join() + except KeyboardInterrupt: + print("\n🛑 Server stopped") + + return viewer + + +# Convenience function +def draw(hypergraph_db: HypergraphDB, port: int = 8899): + """ + Convenient hypergraph drawing function + + Args: + hypergraph_db: HypergraphDB instance + port: Server port + """ + return draw_hypergraph(hypergraph_db, port, True) + diff --git a/hyperdb/templates/hypergraph_viewer.html b/hyperdb/templates/hypergraph_viewer.html new file mode 100644 index 0000000..c2e6edb --- /dev/null +++ b/hyperdb/templates/hypergraph_viewer.html @@ -0,0 +1,625 @@ + + + + + + Hypergraph Visualization + + + + + + + + + +
+ + + +