From 03fdea3374dcf5265aae6dbdddedff42f8b8d4b1 Mon Sep 17 00:00:00 2001
From: yuxz <yuxz@xueqiu.com>
Date: Wed, 5 Nov 2025 21:13:50 +0800
Subject: [PATCH] feat: add HIF format support for hypergraph export and import

---
 README.md                |   6 +
 docs/api/index.md        |  34 +++++-
 docs/api/index.zh.md     |  98 +++++++++------
 hyperdb/hypergraph.py    | 251 +++++++++++++++++++++++++++++++++++++++
 tests/test_hypergraph.py | 104 ++++++++++++++++
 5 files changed, 451 insertions(+), 42 deletions(-)

diff --git a/README.md b/README.md
index 86a6180..eb46e8e 100644
--- a/README.md
+++ b/README.md
@@ -304,6 +304,12 @@ hg.save("my_hypergraph.hgdb")
 hg2 = HypergraphDB(storage_file="my_hypergraph.hgdb")
 print(hg2.all_v)  # Output: {1, 2, 4, 5, 6, 7, 8, 9, 10}
 print(hg2.all_e)  # Output: {(4, 5, 7, 9), (9, 10), (1, 2, 7), (1, 2), (2, 6, 9), (1, 4, 6), (2, 5, 6)}
+
+# Or save in HIF format
+hg.save_as_hif("my_hypergraph.hif.json")
+
+# Load the hypergraph from a HIF file
+hg.load_from_hif("my_hypergraph.hif.json")
 ```
 
 #### **7. 🎨 Interactive Visualization**
diff --git a/docs/api/index.md b/docs/api/index.md
index bb00ef2..009de80 100644
--- a/docs/api/index.md
+++ b/docs/api/index.md
@@ -62,11 +62,15 @@ The foundational base class that defines the core hypergraph structure and basic
 
 ### Persistence Operations
 
-| Method | Description |
-|--------|-------------|
-| `save(filepath)` | Save hypergraph to file |
-| `load(filepath)` | Load hypergraph from file |
-| `copy()` | Create a deep copy of the hypergraph |
+| Method                    | Description                                        |
+| ------------------------- | -------------------------------------------------- |
+| `save(filepath)`          | Save hypergraph to file                            |
+| `load(filepath)`          | Load hypergraph from file                          |
+| `to_hif()`   | Export to HIF (Hypergraph Interchange Format) JSON |
+| `save_as_hif(filepath)`   | Save hypergraph as HIF format JSON file            |
+| `from_hif(hif_data)`      | Load hypergraph from HIF format data               |
+| `load_from_hif(filepath)` | Load hypergraph from HIF format JSON file          |
+| `copy()`                  | Create a deep copy of the hypergraph               |
 
 ### Visualization
 
@@ -136,6 +140,26 @@ hg.add_e(("person1", "person2", "person3"), {
 })
 ```
 
+### HIF Format Import/Export
+
+Hypergraph-DB supports HIF (Hypergraph Interchange Format) for standardized hypergraph data exchange.
+
+#### Export to HIF Format
+
+```python
+# Export and save to file
+hg.to_hif("my_hypergraph.hif.json")
+
+# Or use save_as_hif method
+hg.save_as_hif("my_hypergraph.hif.json")
+```
+
+#### Import from HIF Format
+
+```python
+hg.load_from_hif("my_hypergraph.hif.json")
+```
+
 ## Error Handling
 
 The API includes comprehensive error handling:
diff --git a/docs/api/index.zh.md b/docs/api/index.zh.md
index 3bac767..71e430c 100644
--- a/docs/api/index.zh.md
+++ b/docs/api/index.zh.md
@@ -39,47 +39,51 @@ class CustomHypergraphDB(BaseHypergraphDB):
 
 ### 基础操作
 
-| 方法 | 描述 | 示例 |
-|------|------|------|
-| `add_v(id, data)` | 添加顶点 | `hg.add_v("A", {"name": "Alice"})` |
-| `add_e(tuple, data)` | 添加超边 | `hg.add_e(("A", "B"), {"type": "friend"})` |
-| `remove_v(id)` | 移除顶点 | `hg.remove_v("A")` |
-| `remove_e(tuple)` | 移除超边 | `hg.remove_e(("A", "B"))` |
-| `v(id)` | 获取顶点数据 | `data = hg.v("A")` |
-| `e(tuple)` | 获取超边数据 | `data = hg.e(("A", "B"))` |
+| 方法                 | 描述         | 示例                                       |
+| -------------------- | ------------ | ------------------------------------------ |
+| `add_v(id, data)`    | 添加顶点     | `hg.add_v("A", {"name": "Alice"})`         |
+| `add_e(tuple, data)` | 添加超边     | `hg.add_e(("A", "B"), {"type": "friend"})` |
+| `remove_v(id)`       | 移除顶点     | `hg.remove_v("A")`                         |
+| `remove_e(tuple)`    | 移除超边     | `hg.remove_e(("A", "B"))`                  |
+| `v(id)`              | 获取顶点数据 | `data = hg.v("A")`                         |
+| `e(tuple)`           | 获取超边数据 | `data = hg.e(("A", "B"))`                  |
 
 ### 查询操作
 
-| 方法 | 描述 | 示例 |
-|------|------|------|
-| `has_v(id)` | 检查顶点是否存在 | `hg.has_v("A")` |
-| `has_e(tuple)` | 检查超边是否存在 | `hg.has_e(("A", "B"))` |
-| `degree_v(id)` | 顶点度数 | `deg = hg.degree_v("A")` |
-| `degree_e(tuple)` | 超边大小 | `size = hg.degree_e(("A", "B"))` |
-| `nbr_v(id)` | 顶点的邻居顶点 | `neighbors = hg.nbr_v("A")` |
-| `nbr_e_of_v(id)` | 顶点的邻居超边 | `edges = hg.nbr_e_of_v("A")` |
-| `nbr_v_of_e(tuple)` | 超边的邻居顶点 | `vertices = hg.nbr_v_of_e(("A", "B"))` |
+| 方法                | 描述             | 示例                                   |
+| ------------------- | ---------------- | -------------------------------------- |
+| `has_v(id)`         | 检查顶点是否存在 | `hg.has_v("A")`                        |
+| `has_e(tuple)`      | 检查超边是否存在 | `hg.has_e(("A", "B"))`                 |
+| `degree_v(id)`      | 顶点度数         | `deg = hg.degree_v("A")`               |
+| `degree_e(tuple)`   | 超边大小         | `size = hg.degree_e(("A", "B"))`       |
+| `nbr_v(id)`         | 顶点的邻居顶点   | `neighbors = hg.nbr_v("A")`            |
+| `nbr_e_of_v(id)`    | 顶点的邻居超边   | `edges = hg.nbr_e_of_v("A")`           |
+| `nbr_v_of_e(tuple)` | 超边的邻居顶点   | `vertices = hg.nbr_v_of_e(("A", "B"))` |
 
 ### 全局属性
 
-| 属性 | 描述 | 示例 |
-|------|------|------|
+| 属性    | 描述     | 示例                  |
+| ------- | -------- | --------------------- |
 | `all_v` | 所有顶点 | `vertices = hg.all_v` |
-| `all_e` | 所有超边 | `edges = hg.all_e` |
-| `num_v` | 顶点数量 | `count = hg.num_v` |
-| `num_e` | 超边数量 | `count = hg.num_e` |
+| `all_e` | 所有超边 | `edges = hg.all_e`    |
+| `num_v` | 顶点数量 | `count = hg.num_v`    |
+| `num_e` | 超边数量 | `count = hg.num_e`    |
 
 ### 持久化操作
 
-| 方法 | 描述 | 示例 |
-|------|------|------|
-| `save(path)` | 保存到文件 | `hg.save("graph.hgdb")` |
-| `load(path)` | 从文件加载 | `hg.load("graph.hgdb")` |
+| 方法                      | 描述                | 示例                                                      |
+| ------------------------- | ------------------- | --------------------------------------------------------- |
+| `save(path)`              | 保存到文件          | `hg.save("graph.hgdb")`                                   |
+| `load(path)`              | 从文件加载          | `hg.load("graph.hgdb")`                                   |
+| `to_hif(filepath=None)`   | 导出为 HIF 格式     | `hif_data = hg.to_hif()` 或 `hg.to_hif("graph.hif.json")` |
+| `save_as_hif(filepath)`   | 保存为 HIF 格式文件 | `hg.save_as_hif("graph.hif.json")`                        |
+| `from_hif(hif_data)`      | 从 HIF 格式数据加载 | `hg.from_hif(hif_data)` 或 `hg.from_hif(json_string)`     |
+| `load_from_hif(filepath)` | 从 HIF 格式文件加载 | `hg.load_from_hif("graph.hif.json")`                      |
 
 ### 可视化
 
-| 方法 | 描述 | 示例 |
-|------|------|------|
+| 方法                       | 描述       | 示例                 |
+| -------------------------- | ---------- | -------------------- |
 | `draw(port, open_browser)` | 启动可视化 | `hg.draw(port=8080)` |
 
 [查看完整可视化 API →](visualization.zh.md)
@@ -144,6 +148,26 @@ updated_user = hg.v("user1")
 updated_edge = hg.e(("user1", "user2"))
 ```
 
+### HIF 格式导入导出
+
+Hypergraph-DB 支持 HIF (Hypergraph Interchange Format) 格式，用于标准化的超图数据交换。
+
+#### 导出到 HIF 格式
+
+```python
+# 导出并保存到文件
+hg.to_hif("my_hypergraph.hif.json")
+
+# 或者使用 save_as_hif 方法
+hg.save_as_hif("my_hypergraph.hif.json")
+```
+
+#### 从 HIF 格式导入
+
+```python
+hg.load_from_hif("my_hypergraph.hif.json")
+```
+
 ## 错误处理
 
 ### 常见异常
@@ -152,16 +176,16 @@ updated_edge = hg.e(("user1", "user2"))
 try:
     # 尝试添加顶点
     hg.add_v("user1", {"name": "张三"})
-    
+
     # 尝试添加超边（顶点必须已存在）
     hg.add_e(("user1", "user999"), {"type": "朋友"})
-    
+
 except AssertionError as e:
     print(f"断言错误: {e}")
-    
+
 except KeyError as e:
     print(f"键错误: {e}")
-    
+
 except Exception as e:
     print(f"其他错误: {e}")
 ```
@@ -199,24 +223,24 @@ from hyperdb import HypergraphDB
 
 class AnalyticsHypergraphDB(HypergraphDB):
     """扩展了分析功能的超图数据库"""
-    
+
     def clustering_coefficient(self, vertex_id: str) -> float:
         """计算顶点的聚类系数"""
         neighbors = self.nbr_v(vertex_id)
         if len(neighbors) < 2:
             return 0.0
-        
+
         # 计算邻居之间的连接
         connections = 0
         total_possible = len(neighbors) * (len(neighbors) - 1) // 2
-        
+
         for edge in self.all_e:
             edge_vertices = self.nbr_v_of_e(edge)
             if len(edge_vertices.intersection(neighbors)) >= 2:
                 connections += 1
-        
+
         return connections / total_possible if total_possible > 0 else 0.0
-    
+
     def k_core_decomposition(self, k: int) -> Set[str]:
         """k-核分解：找出度数至少为k的顶点"""
         return {v for v in self.all_v if self.degree_v(v) >= k}
diff --git a/hyperdb/hypergraph.py b/hyperdb/hypergraph.py
index 8b85791..999bb07 100644
--- a/hyperdb/hypergraph.py
+++ b/hyperdb/hypergraph.py
@@ -1,3 +1,4 @@
+import json
 import pickle as pkl
 from collections import defaultdict
 from collections.abc import Hashable
@@ -336,3 +337,253 @@ def nbr_v(self, v_id: Any, exclude_self=True) -> set:
         if exclude_self:
             nbrs.remove(v_id)
         return nbrs
+
+    def to_hif(self, file_path: Optional[Union[str, Path]] = None) -> Dict[str, Any]:
+        r"""
+        Export the hypergraph to HIF (Hypergraph Interchange Format) format.
+
+        Args:
+            ``file_path`` (``Union[str, Path]``, optional): If provided, save to file. Otherwise return dict.
+
+        Returns:
+            ``Dict[str, Any]``: HIF format dictionary.
+        """
+        # Build incidences array (required)
+        incidences = []
+        edge_to_id = {}
+
+        # First pass: assign edge IDs
+        for e_tuple in self._e_data.keys():
+            e_data = self._e_data[e_tuple]
+            # Check if edge has an ID/name attribute
+            if "id" in e_data:
+                edge_id = e_data["id"]
+            elif "name" in e_data:
+                edge_id = e_data["name"]
+            else:
+                # Create a unique edge identifier
+                # Use a string representation of sorted vertices
+                edge_id = "_".join(str(v) for v in sorted(e_tuple))
+            edge_to_id[e_tuple] = edge_id
+
+        # Second pass: build incidences
+        for e_tuple in self._e_data.keys():
+            edge_id = edge_to_id[e_tuple]
+            e_data = self._e_data[e_tuple]
+
+            # Extract attrs (all fields except weight)
+            e_attrs = {k: v for k, v in e_data.items() if k != "weight"}
+
+            for v_id in e_tuple:
+                v_data = self._v_data.get(v_id, {})
+                # Extract attrs for incidence (all fields except weight)
+                v_attrs = {k: v for k, v in v_data.items() if k != "weight"}
+
+                incidence = {
+                    "edge": edge_id,
+                    "node": v_id,
+                    "attrs": v_attrs,
+                }
+                # Only include weight if it exists
+                if "weight" in v_data:
+                    incidence["weight"] = v_data["weight"]
+                incidences.append(incidence)
+
+        # Build nodes array (optional)
+        nodes = []
+        for v_id in self._v_data.keys():
+            v_data = self._v_data[v_id]
+            v_attrs = {k: v for k, v in v_data.items() if k != "weight"}
+
+            node = {"node": v_id, "attrs": v_attrs}
+            # Only include weight if it exists
+            if "weight" in v_data:
+                node["weight"] = v_data["weight"]
+            nodes.append(node)
+
+        # Build edges array (optional)
+        edges = []
+        for e_tuple in self._e_data.keys():
+            e_data = self._e_data[e_tuple]
+            edge_id = edge_to_id[e_tuple]
+            e_attrs = {k: v for k, v in e_data.items() if k != "weight"}
+
+            edge = {"edge": edge_id, "attrs": e_attrs}
+            # Only include weight if it exists
+            if "weight" in e_data:
+                edge["weight"] = e_data["weight"]
+            edges.append(edge)
+
+        # Build HIF structure
+        hif_data = {
+            "incidences": incidences,
+            "network-type": "undirected",  # Default to undirected
+        }
+
+        if nodes:
+            hif_data["nodes"] = nodes
+        if edges:
+            hif_data["edges"] = edges
+
+        # Save to file if path provided
+        if file_path is not None:
+            if isinstance(file_path, str):
+                file_path = Path(file_path)
+            try:
+                with open(file_path, "w", encoding="utf-8") as f:
+                    json.dump(hif_data, f, ensure_ascii=False, indent=2)
+            except Exception as e:
+                raise IOError(f"Failed to save HIF file: {e}")
+
+        return hif_data
+
+    def save_as_hif(self, file_path: Union[str, Path]) -> bool:
+        r"""
+        Save the hypergraph to HIF format JSON file.
+
+        Args:
+            ``file_path`` (``Union[str, Path]``): The file path to save the HIF file.
+
+        Returns:
+            ``bool``: True if successful, False otherwise.
+        """
+        try:
+            self.to_hif(file_path)
+            return True
+        except Exception:
+            return False
+
+    def from_hif(self, hif_data: Union[str, Path, Dict]) -> bool:
+        r"""
+        Load hypergraph from HIF format data.
+
+        Args:
+            ``hif_data`` (``Union[str, Path, Dict]``): HIF data as dict, file path, or JSON string.
+
+        Returns:
+            ``bool``: True if successful, False otherwise.
+        """
+        try:
+            # Load data if it's a file path or JSON string
+            if isinstance(hif_data, (str, Path)):
+                if isinstance(hif_data, str):
+                    # Check if it's a JSON string or file path
+                    if hif_data.strip().startswith("{"):
+                        data = json.loads(hif_data)
+                    else:
+                        file_path = Path(hif_data)
+                        with open(file_path, "r", encoding="utf-8") as f:
+                            data = json.load(f)
+                else:
+                    with open(hif_data, "r", encoding="utf-8") as f:
+                        data = json.load(f)
+            elif isinstance(hif_data, dict):
+                data = hif_data
+            else:
+                return False
+
+            # Validate required field
+            if "incidences" not in data:
+                return False
+
+            # Clear existing data
+            self._v_data = {}
+            self._e_data = {}
+            self._v_inci = defaultdict(set)
+            self._clear_cache()
+
+            # Build edge mapping from incidences
+            edge_nodes_map: Dict[str, Set[str]] = {}  # edge_id -> set of nodes
+            edge_attrs_map: Dict[str, Dict[str, Any]] = {}  # edge_id -> attrs
+            node_attrs_map: Dict[str, Dict[str, Any]] = {}  # node_id -> attrs
+
+            # Process incidences
+            for incidence in data["incidences"]:
+                edge_id = incidence["edge"]
+                node_id = incidence["node"]
+
+                # Initialize edge if not seen
+                if edge_id not in edge_nodes_map:
+                    edge_nodes_map[edge_id] = set()
+
+                edge_nodes_map[edge_id].add(node_id)
+
+                # Store node attributes from incidence
+                if "attrs" in incidence:
+                    if node_id not in node_attrs_map:
+                        node_attrs_map[node_id] = {}
+                    node_attrs_map[node_id].update(incidence["attrs"])
+
+            # Process nodes array if present
+            if "nodes" in data:
+                for node in data["nodes"]:
+                    node_id = node["node"]
+                    if node_id not in node_attrs_map:
+                        node_attrs_map[node_id] = {}
+                    if "attrs" in node:
+                        node_attrs_map[node_id].update(node["attrs"])
+                    if "weight" in node:
+                        node_attrs_map[node_id]["weight"] = node["weight"]
+
+            # Process edges array if present
+            if "edges" in data:
+                for edge in data["edges"]:
+                    edge_id = edge["edge"]
+                    if edge_id not in edge_attrs_map:
+                        edge_attrs_map[edge_id] = {}
+                    if "attrs" in edge:
+                        edge_attrs_map[edge_id].update(edge["attrs"])
+                    if "weight" in edge:
+                        edge_attrs_map[edge_id]["weight"] = edge["weight"]
+
+            # Build hypergraph from edge mappings
+            for edge_id, node_set in edge_nodes_map.items():
+                if len(node_set) < 2:
+                    continue  # Skip edges with less than 2 nodes
+
+                # Convert to sorted tuple for edge key
+                e_tuple = tuple(sorted(node_set))
+
+                # Add nodes
+                for node_id in node_set:
+                    if node_id not in self._v_data:
+                        node_data = node_attrs_map.get(node_id, {}).copy()
+                        self._v_data[node_id] = node_data
+                        self._v_inci[node_id] = set()
+
+                # Add edge
+                e_data = edge_attrs_map.get(edge_id, {}).copy()
+                # Store the original HIF edge ID if it's meaningful (not just a generated ID)
+                # Check if edge_id looks like a generated ID (contains underscores and numbers)
+                # If it's a meaningful string (like a paper title), store it as 'id' or 'name'
+                if not (
+                    edge_id.startswith("_")
+                    or "_" in edge_id
+                    and all(c.isdigit() or c == "_" for c in edge_id.replace("_", ""))
+                ):
+                    # It's a meaningful ID, store it
+                    if "id" not in e_data and "name" not in e_data:
+                        e_data["id"] = edge_id
+                self._e_data[e_tuple] = e_data
+
+                # Update incidence mapping
+                for node_id in node_set:
+                    self._v_inci[node_id].add(e_tuple)
+
+            self._clear_cache()
+            return True
+
+        except Exception:
+            return False
+
+    def load_from_hif(self, file_path: Union[str, Path]) -> bool:
+        r"""
+        Load hypergraph from HIF format JSON file.
+
+        Args:
+            ``file_path`` (``Union[str, Path]``): The file path to load the HIF file from.
+
+        Returns:
+            ``bool``: True if successful, False otherwise.
+        """
+        return self.from_hif(file_path)
diff --git a/tests/test_hypergraph.py b/tests/test_hypergraph.py
index d21498d..22ebb82 100644
--- a/tests/test_hypergraph.py
+++ b/tests/test_hypergraph.py
@@ -232,3 +232,107 @@ def test_save_and_load(hg, tmpdir):
     hg3 = HypergraphDB()
     hg3.load(file_path)
     assert hg == hg3
+
+
+def test_hif_export_and_import(hg, tmpdir):
+    """Test HIF format export and import"""
+    file_path = str(tmpdir.join("test.hif.json"))
+
+    # Export to HIF format
+    hif_data = hg.to_hif(file_path)
+
+    # Verify HIF structure
+    assert "incidences" in hif_data
+    assert "network-type" in hif_data
+    assert hif_data["network-type"] == "undirected"
+    assert len(hif_data["incidences"]) > 0
+
+    # Load from HIF format
+    hg2 = HypergraphDB()
+    assert hg2.load_from_hif(file_path) is True
+
+    # Verify data integrity
+    assert hg2.num_v == hg.num_v
+    assert hg2.num_e == hg.num_e
+    assert hg2.all_v == hg.all_v
+    assert hg2.all_e == hg.all_e
+
+    # Verify node data
+    for v_id in hg.all_v:
+        assert hg2.has_v(v_id)
+        # Compare node data (excluding potential HIF-specific fields)
+        v1_data = hg.v(v_id, {})
+        v2_data = hg2.v(v_id, {})
+        assert v1_data == v2_data
+
+    # Verify edge data
+    for e_tuple in hg.all_e:
+        assert hg2.has_e(e_tuple)
+        e1_data = hg.e(e_tuple, {})
+        e2_data = hg2.e(e_tuple, {})
+        assert e1_data == e2_data
+
+
+def test_hif_roundtrip(hg, tmpdir):
+    """Test HIF format roundtrip conversion"""
+    file_path = str(tmpdir.join("test_roundtrip.hif.json"))
+
+    # Export to HIF
+    hg.to_hif(file_path)
+
+    # Import from HIF
+    hg2 = HypergraphDB()
+    hg2.load_from_hif(file_path)
+
+    # Export again
+    file_path2 = str(tmpdir.join("test_roundtrip2.hif.json"))
+    hg2.to_hif(file_path2)
+
+    # Verify both exports are identical
+    import json
+
+    with open(file_path, "r") as f1, open(file_path2, "r") as f2:
+        data1 = json.load(f1)
+        data2 = json.load(f2)
+
+    # Compare structures (order may differ in arrays)
+    assert len(data1["incidences"]) == len(data2["incidences"])
+    if "nodes" in data1:
+        assert len(data1["nodes"]) == len(data2["nodes"])
+    if "edges" in data1:
+        assert len(data1["edges"]) == len(data2["edges"])
+
+
+def test_hif_export_with_attributes(hg, tmpdir):
+    """Test HIF export preserves node and edge attributes"""
+    # Add some attributes
+    hg.update_v(1, {"name": "Alice", "age": 30, "weight": 2.0})
+    hg.update_e((1, 2), {"relation": "knows", "strength": "high", "weight": 1.5})
+
+    file_path = str(tmpdir.join("test_attrs.hif.json"))
+    hif_data = hg.to_hif(file_path)
+
+    # Verify attributes are in nodes/edges arrays
+    assert "nodes" in hif_data
+    assert "edges" in hif_data
+
+    # Find node 1 in nodes array
+    node_1 = next((n for n in hif_data["nodes"] if n["node"] == 1), None)
+    assert node_1 is not None
+    assert "name" in node_1["attrs"]
+    assert node_1["attrs"]["name"] == "Alice"
+    assert node_1["weight"] == 2.0
+
+    # Find edge in edges array
+    edge_12 = next(
+        (e for e in hif_data["edges"] if "1" in str(e["edge"]) and "2" in str(e["edge"])),
+        None,
+    )
+    if edge_12:
+        assert "relation" in edge_12["attrs"] or "strength" in edge_12["attrs"]
+
+    # Load and verify
+    hg2 = HypergraphDB()
+    hg2.load_from_hif(file_path)
+    assert hg2.v(1)["name"] == "Alice"
+    assert hg2.v(1).get("weight") == 2.0