2025-06-18 13:20:18 +00:00
1 changed files with 43 additions and 7 deletions
--- a/src/polysolve/init.py
+++ b/src/polysolve/init.py
@@ -12,8 +12,8 @@ try:
 except ImportError:
    _CUPY_AVAILABLE = False

-# The CUDA kernel for the fitness function
-_FITNESS_KERNEL = """
+# The CUDA kernels for the fitness function
+_FITNESS_KERNEL_FLOAT = """
 extern "C" __global__ void fitness_kernel(
    const double* coefficients, 
    int num_coefficients, 
@@ -37,6 +37,31 @@ extern "C" __global__ void fitness_kernel(
    }
 }
 """
+_FITNESS_KERNEL_INT = """
+extern "C" __global__ void fitness_kernel(
+    const long long* coefficients, 
+    int num_coefficients, 
+    const double* x_vals, 
+    double* ranks, 
+    int size, 
+    double y_val)
+{
+    int idx = threadIdx.x + blockIdx.x * blockDim.x;
+    if (idx < size)
+    {
+        double ans = 0;
+        int lrgst_expo = num_coefficients - 1;
+        for (int i = 0; i < num_coefficients; ++i)
+        {
+            ans += coefficients[i] * pow(x_vals[idx], (double)(lrgst_expo - i));
+        }
+
+        ans -= y_val;
+        ranks[idx] = (ans == 0) ? 1.7976931348623157e+308 : fabs(1.0 / ans);
+    }
+}
+"""
+

@dataclass
 class GA_Options:
@@ -96,8 +121,14 @@ class Function:
            )
        if coefficients[0] == 0 and self._largest_exponent > 0:
            raise ValueError("The first constant (for the largest exponent) cannot be 0.")
+        
+        # Check if any coefficient is a float
+        is_float = any(isinstance(c, float) for c in coefficients)

-        self.coefficients = np.array(coefficients, dtype=np.float64)
+        # Choose the dtype based on the input
+        target_dtype = np.float64 if is_float else np.int64
+
+        self.coefficients = np.array(coefficients, dtype=target_dtype)
        self._initialized = True

    def _check_initialized(self):
@@ -276,11 +307,16 @@ class Function:

    def _solve_x_cuda(self, y_val: float, options: GA_Options) -> np.ndarray:
        """Genetic algorithm implementation using CuPy (GPU/CUDA)."""
-        # Load the raw CUDA kernel
-        fitness_gpu = cupy.RawKernel(_FITNESS_KERNEL, 'fitness_kernel')
        
-        # Move coefficients to GPU
-        d_coefficients = cupy.array(self.coefficients, dtype=cupy.float64)
+        # Check the dtype of our coefficients array
+        if self.coefficients.dtype == np.float64:
+            fitness_gpu = cupy.RawKernel(_FITNESS_KERNEL_FLOAT, 'fitness_kernel')
+            d_coefficients = cupy.array(self.coefficients, dtype=cupy.float64)
+        elif self.coefficients.dtype == np.int64:
+            fitness_gpu = cupy.RawKernel(_FITNESS_KERNEL_INT, 'fitness_kernel')
+            d_coefficients = cupy.array(self.coefficients, dtype=cupy.int64)
+        else:
+            raise TypeError(f"Unsupported dtype for CUDA solver: {self.coefficients.dtype}")
        
        # Create initial random solutions on the GPU
        d_solutions = cupy.random.uniform(