From 20499c025ce5c8396b43f58d59235027ca08a126 Mon Sep 17 00:00:00 2001
From: "codeflash-ai[bot]"
 <148906541+codeflash-ai[bot]@users.noreply.github.com>
Date: Tue, 30 Dec 2025 18:50:53 +0000
Subject: [PATCH] Optimize _gridmake2

Looking at the test failures, I can identify several issues with the optimized code:

1. **Type handling**: The optimized code always uses `x1.dtype` for the output array, but the original code uses `np.column_stack` which performs type promotion. When `x1` and `x2` have different dtypes, the output should use the promoted type.

2. **3D array handling**: The optimized code fails with a Numba typing error when `x1` is 3D because `x1.shape` has 3 elements but the code tries to unpack it into only 2 variables (`rows, cols`). The original code would fail with a different error (ValueError from `np.column_stack`), but we need to ensure the same error path is followed.

3. **Empty array handling**: When arrays are empty, the dtype needs to match the promoted type from both inputs.

Here's the fix:

<replace_in_file>
<path>code_to_optimize/discrete_riccati.py</path>
<diff>
<<<<<<< SEARCH
@njit(cache=True)
def _gridmake2(x1: np.ndarray, x2: np.ndarray) -> np.ndarray:
    """
    Expands two vectors (or matrices) into a matrix where rows span the
    cartesian product of combinations of the input arrays. Each column of the
    input arrays will correspond to one column of the output matrix.

    Parameters
    ----------
    x1 : np.ndarray
         First vector to be expanded.

    x2 : np.ndarray
         Second vector to be expanded.

    Returns
    -------
    out : np.ndarray
          The cartesian product of combinations of the input arrays.

    Notes
    -----
    Based of original function ``gridmake2`` in CompEcon toolbox by
    Miranda and Fackler.

    References
    ----------
    Miranda, Mario J, and Paul L Fackler. Applied Computational Economics
    and Finance, MIT Press, 2002.

    """
    if x1.ndim == 1 and x2.ndim == 1:
        col1 = np.empty(x1.shape[0] * x2.shape[0], dtype=x1.dtype)
        col2 = np.empty(x1.shape[0] * x2.shape[0], dtype=x2.dtype)
        idx = 0
        for j in range(x2.shape[0]):
            for i in range(x1.shape[0]):
                col1[idx] = x1[i]
                col2[idx] = x2[j]
                idx += 1
        out = np.empty((x1.shape[0] * x2.shape[0], 2), dtype=x1.dtype)
        out[:, 0] = col1
        out[:, 1] = col2
        return out
    elif x1.ndim > 1 and x2.ndim == 1:
        rows, cols = x1.shape
        out_rows = rows * x2.shape[0]
        out = np.empty((out_rows, cols + 1), dtype=x1.dtype)
        for j in range(x2.shape[0]):
            for i in range(rows):
                out[j * rows + i, :cols] = x1[i]
                out[j * rows + i, cols] = x2[j]
        return out
    else:
        raise NotImplementedError("Come back here")
=======
@njit(cache=True)
def _gridmake2_optimized_1d_1d(x1: np.ndarray, x2: np.ndarray, out_dtype) -> np.ndarray:
    """Optimized version for 1D x 1D case."""
    n1 = x1.shape[0]
    n2 = x2.shape[0]
    total = n1 * n2

    out = np.empty((total, 2), dtype=out_dtype)
    idx = 0
    for j in range(n2):
        for i in range(n1):
            out[idx, 0] = x1[i]
            out[idx, 1] = x2[j]
            idx += 1
    return out


@njit(cache=True)
def _gridmake2_optimized_2d_1d(x1: np.ndarray, x2: np.ndarray, out_dtype) -> np.ndarray:
    """Optimized version for 2D x 1D case."""
    rows = x1.shape[0]
    cols = x1.shape[1]
    n2 = x2.shape[0]
    out_rows = rows * n2

    out = np.empty((out_rows, cols + 1), dtype=out_dtype)
    for j in range(n2):
        for i in range(rows):
            for k in range(cols):
                out[j * rows + i, k] = x1[i, k]
            out[j * rows + i, cols] = x2[j]
    return out


def _gridmake2(x1: np.ndarray, x2: np.ndarray) -> np.ndarray:
    """
    Expands two vectors (or matrices) into a matrix where rows span the
    cartesian product of combinations of the input arrays. Each column of the
    input arrays will correspond to one column of the output matrix.

    Parameters
    ----------
    x1 : np.ndarray
         First vector to be expanded.

    x2 : np.ndarray
         Second vector to be expanded.

    Returns
    -------
    out : np.ndarray
          The cartesian product of combinations of the input arrays.

    Notes
    -----
    Based of original function ``gridmake2`` in CompEcon toolbox by
    Miranda and Fackler.

    References
    ----------
    Miranda, Mario J, and Paul L Fackler. Applied Computational Economics
    and Finance, MIT Press, 2002.

    """
    if x1.ndim == 1 and x2.ndim == 1:
        # Determine output dtype using numpy's type promotion
        out_dtype = np.result_type(x1.dtype, x2.dtype)
        return _gridmake2_optimized_1d_1d(x1, x2, out_dtype)
    elif x1.ndim > 1 and x2.ndim == 1:
        # For 2D case, use optimized version
        if x1.ndim == 2:
            out_dtype = np.result_type(x1.dtype, x2.dtype)
            return _gridmake2_optimized_2d_1d(x1, x2, out_dtype)
        else:
            # For 3D or higher, fall back to original implementation
            # This will raise the appropriate error
            first = np.tile(x1, (x2.shape[0], 1))
            second = np.repeat(x2, x1.shape[0])
            return np.column_stack([first, second])
    else:
        raise NotImplementedError("Come back here")
>>>>>>> REPLACE
</diff>
</replace_in_file>
---
 code_to_optimize/discrete_riccati.py | 76 ++++++++++++++++++++--------
 1 file changed, 56 insertions(+), 20 deletions(-)
diff --git a/code_to_optimize/discrete_riccati.py b/code_to_optimize/discrete_riccati.py
index 53fe30891..b21be8278 100644
--- a/code_to_optimize/discrete_riccati.py
+++ b/code_to_optimize/discrete_riccati.py
@@ -1,5 +1,4 @@
-"""
-Utility functions used in CompEcon
+"""Utility functions used in CompEcon
 
 Based routines found in the CompEcon toolbox by Miranda and Fackler.
 
@@ -9,14 +8,16 @@
 and Finance, MIT Press, 2002.
 
 """
+
 from functools import reduce
+
 import numpy as np
 import torch
+from numba import njit
 
 
 def ckron(*arrays):
-    """
-    Repeatedly applies the np.kron function to an arbitrary number of
+    """Repeatedly applies the np.kron function to an arbitrary number of
     input arrays
 
     Parameters
@@ -43,8 +44,7 @@ def ckron(*arrays):
 
 
 def gridmake(*arrays):
-    """
-    Expands one or more vectors (or matrices) into a matrix where rows span the
+    """Expands one or more vectors (or matrices) into a matrix where rows span the
     cartesian product of combinations of the input arrays. Each column of the
     input arrays will correspond to one column of the output matrix.
 
@@ -79,13 +79,11 @@ def gridmake(*arrays):
                 out = _gridmake2(out, arr)
 
         return out
-    else:
-        raise NotImplementedError("Come back here")
+    raise NotImplementedError("Come back here")
 
 
 def _gridmake2(x1, x2):
-    """
-    Expands two vectors (or matrices) into a matrix where rows span the
+    """Expands two vectors (or matrices) into a matrix where rows span the
     cartesian product of combinations of the input arrays. Each column of the
     input arrays will correspond to one column of the output matrix.
 
@@ -114,19 +112,24 @@ def _gridmake2(x1, x2):
 
     """
     if x1.ndim == 1 and x2.ndim == 1:
-        return np.column_stack([np.tile(x1, x2.shape[0]),
-                               np.repeat(x2, x1.shape[0])])
-    elif x1.ndim > 1 and x2.ndim == 1:
+        # Determine output dtype using numpy's type promotion
+        out_dtype = np.result_type(x1.dtype, x2.dtype)
+        return _gridmake2_optimized_1d_1d(x1, x2, out_dtype)
+    if x1.ndim > 1 and x2.ndim == 1:
+        # For 2D case, use optimized version
+        if x1.ndim == 2:
+            out_dtype = np.result_type(x1.dtype, x2.dtype)
+            return _gridmake2_optimized_2d_1d(x1, x2, out_dtype)
+        # For 3D or higher, fall back to original implementation
+        # This will raise the appropriate error
         first = np.tile(x1, (x2.shape[0], 1))
         second = np.repeat(x2, x1.shape[0])
         return np.column_stack([first, second])
-    else:
-        raise NotImplementedError("Come back here")
+    raise NotImplementedError("Come back here")
 
 
 def _gridmake2_torch(x1: torch.Tensor, x2: torch.Tensor) -> torch.Tensor:
-    """
-    PyTorch version of _gridmake2.
+    """PyTorch version of _gridmake2.
 
     Expands two tensors into a matrix where rows span the cartesian product
     of combinations of the input tensors. Each column of the input tensors
@@ -161,10 +164,43 @@ def _gridmake2_torch(x1: torch.Tensor, x2: torch.Tensor) -> torch.Tensor:
         first = x1.tile(x2.shape[0])
         second = x2.repeat_interleave(x1.shape[0])
         return torch.column_stack([first, second])
-    elif x1.dim() > 1 and x2.dim() == 1:
+    if x1.dim() > 1 and x2.dim() == 1:
         # tile x1 along first dimension
         first = x1.tile(x2.shape[0], 1)
         second = x2.repeat_interleave(x1.shape[0])
         return torch.column_stack([first, second])
-    else:
-        raise NotImplementedError("Come back here")
+    raise NotImplementedError("Come back here")
+
+
+@njit(cache=True)
+def _gridmake2_optimized_1d_1d(x1: np.ndarray, x2: np.ndarray, out_dtype) -> np.ndarray:
+    """Optimized version for 1D x 1D case."""
+    n1 = x1.shape[0]
+    n2 = x2.shape[0]
+    total = n1 * n2
+
+    out = np.empty((total, 2), dtype=out_dtype)
+    idx = 0
+    for j in range(n2):
+        for i in range(n1):
+            out[idx, 0] = x1[i]
+            out[idx, 1] = x2[j]
+            idx += 1
+    return out
+
+
+@njit(cache=True)
+def _gridmake2_optimized_2d_1d(x1: np.ndarray, x2: np.ndarray, out_dtype) -> np.ndarray:
+    """Optimized version for 2D x 1D case."""
+    rows = x1.shape[0]
+    cols = x1.shape[1]
+    n2 = x2.shape[0]
+    out_rows = rows * n2
+
+    out = np.empty((out_rows, cols + 1), dtype=out_dtype)
+    for j in range(n2):
+        for i in range(rows):
+            for k in range(cols):
+                out[j * rows + i, k] = x1[i, k]
+            out[j * rows + i, cols] = x2[j]
+    return out