From 03fdea3374dcf5265aae6dbdddedff42f8b8d4b1 Mon Sep 17 00:00:00 2001 From: yuxz Date: Wed, 5 Nov 2025 21:13:50 +0800 Subject: [PATCH] feat: add HIF format support for hypergraph export and import --- README.md | 6 + docs/api/index.md | 34 +++++- docs/api/index.zh.md | 98 +++++++++------ hyperdb/hypergraph.py | 251 +++++++++++++++++++++++++++++++++++++++ tests/test_hypergraph.py | 104 ++++++++++++++++ 5 files changed, 451 insertions(+), 42 deletions(-) diff --git a/README.md b/README.md index 86a6180..eb46e8e 100644 --- a/README.md +++ b/README.md @@ -304,6 +304,12 @@ hg.save("my_hypergraph.hgdb") hg2 = HypergraphDB(storage_file="my_hypergraph.hgdb") print(hg2.all_v) # Output: {1, 2, 4, 5, 6, 7, 8, 9, 10} print(hg2.all_e) # Output: {(4, 5, 7, 9), (9, 10), (1, 2, 7), (1, 2), (2, 6, 9), (1, 4, 6), (2, 5, 6)} + +# Or save in HIF format +hg.save_as_hif("my_hypergraph.hif.json") + +# Load the hypergraph from a HIF file +hg.load_from_hif("my_hypergraph.hif.json") ``` #### **7. 🎨 Interactive Visualization** diff --git a/docs/api/index.md b/docs/api/index.md index bb00ef2..009de80 100644 --- a/docs/api/index.md +++ b/docs/api/index.md @@ -62,11 +62,15 @@ The foundational base class that defines the core hypergraph structure and basic ### Persistence Operations -| Method | Description | -|--------|-------------| -| `save(filepath)` | Save hypergraph to file | -| `load(filepath)` | Load hypergraph from file | -| `copy()` | Create a deep copy of the hypergraph | +| Method | Description | +| ------------------------- | -------------------------------------------------- | +| `save(filepath)` | Save hypergraph to file | +| `load(filepath)` | Load hypergraph from file | +| `to_hif()` | Export to HIF (Hypergraph Interchange Format) JSON | +| `save_as_hif(filepath)` | Save hypergraph as HIF format JSON file | +| `from_hif(hif_data)` | Load hypergraph from HIF format data | +| `load_from_hif(filepath)` | Load hypergraph from HIF format JSON file | +| `copy()` | Create a deep copy of the hypergraph | ### Visualization @@ -136,6 +140,26 @@ hg.add_e(("person1", "person2", "person3"), { }) ``` +### HIF Format Import/Export + +Hypergraph-DB supports HIF (Hypergraph Interchange Format) for standardized hypergraph data exchange. + +#### Export to HIF Format + +```python +# Export and save to file +hg.to_hif("my_hypergraph.hif.json") + +# Or use save_as_hif method +hg.save_as_hif("my_hypergraph.hif.json") +``` + +#### Import from HIF Format + +```python +hg.load_from_hif("my_hypergraph.hif.json") +``` + ## Error Handling The API includes comprehensive error handling: diff --git a/docs/api/index.zh.md b/docs/api/index.zh.md index 3bac767..71e430c 100644 --- a/docs/api/index.zh.md +++ b/docs/api/index.zh.md @@ -39,47 +39,51 @@ class CustomHypergraphDB(BaseHypergraphDB): ### 基础操作 -| 方法 | 描述 | 示例 | -|------|------|------| -| `add_v(id, data)` | 添加顶点 | `hg.add_v("A", {"name": "Alice"})` | -| `add_e(tuple, data)` | 添加超边 | `hg.add_e(("A", "B"), {"type": "friend"})` | -| `remove_v(id)` | 移除顶点 | `hg.remove_v("A")` | -| `remove_e(tuple)` | 移除超边 | `hg.remove_e(("A", "B"))` | -| `v(id)` | 获取顶点数据 | `data = hg.v("A")` | -| `e(tuple)` | 获取超边数据 | `data = hg.e(("A", "B"))` | +| 方法 | 描述 | 示例 | +| -------------------- | ------------ | ------------------------------------------ | +| `add_v(id, data)` | 添加顶点 | `hg.add_v("A", {"name": "Alice"})` | +| `add_e(tuple, data)` | 添加超边 | `hg.add_e(("A", "B"), {"type": "friend"})` | +| `remove_v(id)` | 移除顶点 | `hg.remove_v("A")` | +| `remove_e(tuple)` | 移除超边 | `hg.remove_e(("A", "B"))` | +| `v(id)` | 获取顶点数据 | `data = hg.v("A")` | +| `e(tuple)` | 获取超边数据 | `data = hg.e(("A", "B"))` | ### 查询操作 -| 方法 | 描述 | 示例 | -|------|------|------| -| `has_v(id)` | 检查顶点是否存在 | `hg.has_v("A")` | -| `has_e(tuple)` | 检查超边是否存在 | `hg.has_e(("A", "B"))` | -| `degree_v(id)` | 顶点度数 | `deg = hg.degree_v("A")` | -| `degree_e(tuple)` | 超边大小 | `size = hg.degree_e(("A", "B"))` | -| `nbr_v(id)` | 顶点的邻居顶点 | `neighbors = hg.nbr_v("A")` | -| `nbr_e_of_v(id)` | 顶点的邻居超边 | `edges = hg.nbr_e_of_v("A")` | -| `nbr_v_of_e(tuple)` | 超边的邻居顶点 | `vertices = hg.nbr_v_of_e(("A", "B"))` | +| 方法 | 描述 | 示例 | +| ------------------- | ---------------- | -------------------------------------- | +| `has_v(id)` | 检查顶点是否存在 | `hg.has_v("A")` | +| `has_e(tuple)` | 检查超边是否存在 | `hg.has_e(("A", "B"))` | +| `degree_v(id)` | 顶点度数 | `deg = hg.degree_v("A")` | +| `degree_e(tuple)` | 超边大小 | `size = hg.degree_e(("A", "B"))` | +| `nbr_v(id)` | 顶点的邻居顶点 | `neighbors = hg.nbr_v("A")` | +| `nbr_e_of_v(id)` | 顶点的邻居超边 | `edges = hg.nbr_e_of_v("A")` | +| `nbr_v_of_e(tuple)` | 超边的邻居顶点 | `vertices = hg.nbr_v_of_e(("A", "B"))` | ### 全局属性 -| 属性 | 描述 | 示例 | -|------|------|------| +| 属性 | 描述 | 示例 | +| ------- | -------- | --------------------- | | `all_v` | 所有顶点 | `vertices = hg.all_v` | -| `all_e` | 所有超边 | `edges = hg.all_e` | -| `num_v` | 顶点数量 | `count = hg.num_v` | -| `num_e` | 超边数量 | `count = hg.num_e` | +| `all_e` | 所有超边 | `edges = hg.all_e` | +| `num_v` | 顶点数量 | `count = hg.num_v` | +| `num_e` | 超边数量 | `count = hg.num_e` | ### 持久化操作 -| 方法 | 描述 | 示例 | -|------|------|------| -| `save(path)` | 保存到文件 | `hg.save("graph.hgdb")` | -| `load(path)` | 从文件加载 | `hg.load("graph.hgdb")` | +| 方法 | 描述 | 示例 | +| ------------------------- | ------------------- | --------------------------------------------------------- | +| `save(path)` | 保存到文件 | `hg.save("graph.hgdb")` | +| `load(path)` | 从文件加载 | `hg.load("graph.hgdb")` | +| `to_hif(filepath=None)` | 导出为 HIF 格式 | `hif_data = hg.to_hif()` 或 `hg.to_hif("graph.hif.json")` | +| `save_as_hif(filepath)` | 保存为 HIF 格式文件 | `hg.save_as_hif("graph.hif.json")` | +| `from_hif(hif_data)` | 从 HIF 格式数据加载 | `hg.from_hif(hif_data)` 或 `hg.from_hif(json_string)` | +| `load_from_hif(filepath)` | 从 HIF 格式文件加载 | `hg.load_from_hif("graph.hif.json")` | ### 可视化 -| 方法 | 描述 | 示例 | -|------|------|------| +| 方法 | 描述 | 示例 | +| -------------------------- | ---------- | -------------------- | | `draw(port, open_browser)` | 启动可视化 | `hg.draw(port=8080)` | [查看完整可视化 API →](visualization.zh.md) @@ -144,6 +148,26 @@ updated_user = hg.v("user1") updated_edge = hg.e(("user1", "user2")) ``` +### HIF 格式导入导出 + +Hypergraph-DB 支持 HIF (Hypergraph Interchange Format) 格式,用于标准化的超图数据交换。 + +#### 导出到 HIF 格式 + +```python +# 导出并保存到文件 +hg.to_hif("my_hypergraph.hif.json") + +# 或者使用 save_as_hif 方法 +hg.save_as_hif("my_hypergraph.hif.json") +``` + +#### 从 HIF 格式导入 + +```python +hg.load_from_hif("my_hypergraph.hif.json") +``` + ## 错误处理 ### 常见异常 @@ -152,16 +176,16 @@ updated_edge = hg.e(("user1", "user2")) try: # 尝试添加顶点 hg.add_v("user1", {"name": "张三"}) - + # 尝试添加超边(顶点必须已存在) hg.add_e(("user1", "user999"), {"type": "朋友"}) - + except AssertionError as e: print(f"断言错误: {e}") - + except KeyError as e: print(f"键错误: {e}") - + except Exception as e: print(f"其他错误: {e}") ``` @@ -199,24 +223,24 @@ from hyperdb import HypergraphDB class AnalyticsHypergraphDB(HypergraphDB): """扩展了分析功能的超图数据库""" - + def clustering_coefficient(self, vertex_id: str) -> float: """计算顶点的聚类系数""" neighbors = self.nbr_v(vertex_id) if len(neighbors) < 2: return 0.0 - + # 计算邻居之间的连接 connections = 0 total_possible = len(neighbors) * (len(neighbors) - 1) // 2 - + for edge in self.all_e: edge_vertices = self.nbr_v_of_e(edge) if len(edge_vertices.intersection(neighbors)) >= 2: connections += 1 - + return connections / total_possible if total_possible > 0 else 0.0 - + def k_core_decomposition(self, k: int) -> Set[str]: """k-核分解:找出度数至少为k的顶点""" return {v for v in self.all_v if self.degree_v(v) >= k} diff --git a/hyperdb/hypergraph.py b/hyperdb/hypergraph.py index 8b85791..999bb07 100644 --- a/hyperdb/hypergraph.py +++ b/hyperdb/hypergraph.py @@ -1,3 +1,4 @@ +import json import pickle as pkl from collections import defaultdict from collections.abc import Hashable @@ -336,3 +337,253 @@ def nbr_v(self, v_id: Any, exclude_self=True) -> set: if exclude_self: nbrs.remove(v_id) return nbrs + + def to_hif(self, file_path: Optional[Union[str, Path]] = None) -> Dict[str, Any]: + r""" + Export the hypergraph to HIF (Hypergraph Interchange Format) format. + + Args: + ``file_path`` (``Union[str, Path]``, optional): If provided, save to file. Otherwise return dict. + + Returns: + ``Dict[str, Any]``: HIF format dictionary. + """ + # Build incidences array (required) + incidences = [] + edge_to_id = {} + + # First pass: assign edge IDs + for e_tuple in self._e_data.keys(): + e_data = self._e_data[e_tuple] + # Check if edge has an ID/name attribute + if "id" in e_data: + edge_id = e_data["id"] + elif "name" in e_data: + edge_id = e_data["name"] + else: + # Create a unique edge identifier + # Use a string representation of sorted vertices + edge_id = "_".join(str(v) for v in sorted(e_tuple)) + edge_to_id[e_tuple] = edge_id + + # Second pass: build incidences + for e_tuple in self._e_data.keys(): + edge_id = edge_to_id[e_tuple] + e_data = self._e_data[e_tuple] + + # Extract attrs (all fields except weight) + e_attrs = {k: v for k, v in e_data.items() if k != "weight"} + + for v_id in e_tuple: + v_data = self._v_data.get(v_id, {}) + # Extract attrs for incidence (all fields except weight) + v_attrs = {k: v for k, v in v_data.items() if k != "weight"} + + incidence = { + "edge": edge_id, + "node": v_id, + "attrs": v_attrs, + } + # Only include weight if it exists + if "weight" in v_data: + incidence["weight"] = v_data["weight"] + incidences.append(incidence) + + # Build nodes array (optional) + nodes = [] + for v_id in self._v_data.keys(): + v_data = self._v_data[v_id] + v_attrs = {k: v for k, v in v_data.items() if k != "weight"} + + node = {"node": v_id, "attrs": v_attrs} + # Only include weight if it exists + if "weight" in v_data: + node["weight"] = v_data["weight"] + nodes.append(node) + + # Build edges array (optional) + edges = [] + for e_tuple in self._e_data.keys(): + e_data = self._e_data[e_tuple] + edge_id = edge_to_id[e_tuple] + e_attrs = {k: v for k, v in e_data.items() if k != "weight"} + + edge = {"edge": edge_id, "attrs": e_attrs} + # Only include weight if it exists + if "weight" in e_data: + edge["weight"] = e_data["weight"] + edges.append(edge) + + # Build HIF structure + hif_data = { + "incidences": incidences, + "network-type": "undirected", # Default to undirected + } + + if nodes: + hif_data["nodes"] = nodes + if edges: + hif_data["edges"] = edges + + # Save to file if path provided + if file_path is not None: + if isinstance(file_path, str): + file_path = Path(file_path) + try: + with open(file_path, "w", encoding="utf-8") as f: + json.dump(hif_data, f, ensure_ascii=False, indent=2) + except Exception as e: + raise IOError(f"Failed to save HIF file: {e}") + + return hif_data + + def save_as_hif(self, file_path: Union[str, Path]) -> bool: + r""" + Save the hypergraph to HIF format JSON file. + + Args: + ``file_path`` (``Union[str, Path]``): The file path to save the HIF file. + + Returns: + ``bool``: True if successful, False otherwise. + """ + try: + self.to_hif(file_path) + return True + except Exception: + return False + + def from_hif(self, hif_data: Union[str, Path, Dict]) -> bool: + r""" + Load hypergraph from HIF format data. + + Args: + ``hif_data`` (``Union[str, Path, Dict]``): HIF data as dict, file path, or JSON string. + + Returns: + ``bool``: True if successful, False otherwise. + """ + try: + # Load data if it's a file path or JSON string + if isinstance(hif_data, (str, Path)): + if isinstance(hif_data, str): + # Check if it's a JSON string or file path + if hif_data.strip().startswith("{"): + data = json.loads(hif_data) + else: + file_path = Path(hif_data) + with open(file_path, "r", encoding="utf-8") as f: + data = json.load(f) + else: + with open(hif_data, "r", encoding="utf-8") as f: + data = json.load(f) + elif isinstance(hif_data, dict): + data = hif_data + else: + return False + + # Validate required field + if "incidences" not in data: + return False + + # Clear existing data + self._v_data = {} + self._e_data = {} + self._v_inci = defaultdict(set) + self._clear_cache() + + # Build edge mapping from incidences + edge_nodes_map: Dict[str, Set[str]] = {} # edge_id -> set of nodes + edge_attrs_map: Dict[str, Dict[str, Any]] = {} # edge_id -> attrs + node_attrs_map: Dict[str, Dict[str, Any]] = {} # node_id -> attrs + + # Process incidences + for incidence in data["incidences"]: + edge_id = incidence["edge"] + node_id = incidence["node"] + + # Initialize edge if not seen + if edge_id not in edge_nodes_map: + edge_nodes_map[edge_id] = set() + + edge_nodes_map[edge_id].add(node_id) + + # Store node attributes from incidence + if "attrs" in incidence: + if node_id not in node_attrs_map: + node_attrs_map[node_id] = {} + node_attrs_map[node_id].update(incidence["attrs"]) + + # Process nodes array if present + if "nodes" in data: + for node in data["nodes"]: + node_id = node["node"] + if node_id not in node_attrs_map: + node_attrs_map[node_id] = {} + if "attrs" in node: + node_attrs_map[node_id].update(node["attrs"]) + if "weight" in node: + node_attrs_map[node_id]["weight"] = node["weight"] + + # Process edges array if present + if "edges" in data: + for edge in data["edges"]: + edge_id = edge["edge"] + if edge_id not in edge_attrs_map: + edge_attrs_map[edge_id] = {} + if "attrs" in edge: + edge_attrs_map[edge_id].update(edge["attrs"]) + if "weight" in edge: + edge_attrs_map[edge_id]["weight"] = edge["weight"] + + # Build hypergraph from edge mappings + for edge_id, node_set in edge_nodes_map.items(): + if len(node_set) < 2: + continue # Skip edges with less than 2 nodes + + # Convert to sorted tuple for edge key + e_tuple = tuple(sorted(node_set)) + + # Add nodes + for node_id in node_set: + if node_id not in self._v_data: + node_data = node_attrs_map.get(node_id, {}).copy() + self._v_data[node_id] = node_data + self._v_inci[node_id] = set() + + # Add edge + e_data = edge_attrs_map.get(edge_id, {}).copy() + # Store the original HIF edge ID if it's meaningful (not just a generated ID) + # Check if edge_id looks like a generated ID (contains underscores and numbers) + # If it's a meaningful string (like a paper title), store it as 'id' or 'name' + if not ( + edge_id.startswith("_") + or "_" in edge_id + and all(c.isdigit() or c == "_" for c in edge_id.replace("_", "")) + ): + # It's a meaningful ID, store it + if "id" not in e_data and "name" not in e_data: + e_data["id"] = edge_id + self._e_data[e_tuple] = e_data + + # Update incidence mapping + for node_id in node_set: + self._v_inci[node_id].add(e_tuple) + + self._clear_cache() + return True + + except Exception: + return False + + def load_from_hif(self, file_path: Union[str, Path]) -> bool: + r""" + Load hypergraph from HIF format JSON file. + + Args: + ``file_path`` (``Union[str, Path]``): The file path to load the HIF file from. + + Returns: + ``bool``: True if successful, False otherwise. + """ + return self.from_hif(file_path) diff --git a/tests/test_hypergraph.py b/tests/test_hypergraph.py index d21498d..22ebb82 100644 --- a/tests/test_hypergraph.py +++ b/tests/test_hypergraph.py @@ -232,3 +232,107 @@ def test_save_and_load(hg, tmpdir): hg3 = HypergraphDB() hg3.load(file_path) assert hg == hg3 + + +def test_hif_export_and_import(hg, tmpdir): + """Test HIF format export and import""" + file_path = str(tmpdir.join("test.hif.json")) + + # Export to HIF format + hif_data = hg.to_hif(file_path) + + # Verify HIF structure + assert "incidences" in hif_data + assert "network-type" in hif_data + assert hif_data["network-type"] == "undirected" + assert len(hif_data["incidences"]) > 0 + + # Load from HIF format + hg2 = HypergraphDB() + assert hg2.load_from_hif(file_path) is True + + # Verify data integrity + assert hg2.num_v == hg.num_v + assert hg2.num_e == hg.num_e + assert hg2.all_v == hg.all_v + assert hg2.all_e == hg.all_e + + # Verify node data + for v_id in hg.all_v: + assert hg2.has_v(v_id) + # Compare node data (excluding potential HIF-specific fields) + v1_data = hg.v(v_id, {}) + v2_data = hg2.v(v_id, {}) + assert v1_data == v2_data + + # Verify edge data + for e_tuple in hg.all_e: + assert hg2.has_e(e_tuple) + e1_data = hg.e(e_tuple, {}) + e2_data = hg2.e(e_tuple, {}) + assert e1_data == e2_data + + +def test_hif_roundtrip(hg, tmpdir): + """Test HIF format roundtrip conversion""" + file_path = str(tmpdir.join("test_roundtrip.hif.json")) + + # Export to HIF + hg.to_hif(file_path) + + # Import from HIF + hg2 = HypergraphDB() + hg2.load_from_hif(file_path) + + # Export again + file_path2 = str(tmpdir.join("test_roundtrip2.hif.json")) + hg2.to_hif(file_path2) + + # Verify both exports are identical + import json + + with open(file_path, "r") as f1, open(file_path2, "r") as f2: + data1 = json.load(f1) + data2 = json.load(f2) + + # Compare structures (order may differ in arrays) + assert len(data1["incidences"]) == len(data2["incidences"]) + if "nodes" in data1: + assert len(data1["nodes"]) == len(data2["nodes"]) + if "edges" in data1: + assert len(data1["edges"]) == len(data2["edges"]) + + +def test_hif_export_with_attributes(hg, tmpdir): + """Test HIF export preserves node and edge attributes""" + # Add some attributes + hg.update_v(1, {"name": "Alice", "age": 30, "weight": 2.0}) + hg.update_e((1, 2), {"relation": "knows", "strength": "high", "weight": 1.5}) + + file_path = str(tmpdir.join("test_attrs.hif.json")) + hif_data = hg.to_hif(file_path) + + # Verify attributes are in nodes/edges arrays + assert "nodes" in hif_data + assert "edges" in hif_data + + # Find node 1 in nodes array + node_1 = next((n for n in hif_data["nodes"] if n["node"] == 1), None) + assert node_1 is not None + assert "name" in node_1["attrs"] + assert node_1["attrs"]["name"] == "Alice" + assert node_1["weight"] == 2.0 + + # Find edge in edges array + edge_12 = next( + (e for e in hif_data["edges"] if "1" in str(e["edge"]) and "2" in str(e["edge"])), + None, + ) + if edge_12: + assert "relation" in edge_12["attrs"] or "strength" in edge_12["attrs"] + + # Load and verify + hg2 = HypergraphDB() + hg2.load_from_hif(file_path) + assert hg2.v(1)["name"] == "Alice" + assert hg2.v(1).get("weight") == 2.0