From 434f8f0d69212f7a7826d1f69bf702ab587e1145 Mon Sep 17 00:00:00 2001 From: Jonathan Rampersad Date: Wed, 18 Jun 2025 09:19:30 -0400 Subject: [PATCH] FEAT: Dynamically select coefficient dtype and CUDA kernel --- src/polysolve/__init__.py | 50 +++++++++++++++++++++++++++++++++------ 1 file changed, 43 insertions(+), 7 deletions(-) diff --git a/src/polysolve/__init__.py b/src/polysolve/__init__.py index 931b2b1..436f56c 100644 --- a/src/polysolve/__init__.py +++ b/src/polysolve/__init__.py @@ -12,8 +12,8 @@ try: except ImportError: _CUPY_AVAILABLE = False -# The CUDA kernel for the fitness function -_FITNESS_KERNEL = """ +# The CUDA kernels for the fitness function +_FITNESS_KERNEL_FLOAT = """ extern "C" __global__ void fitness_kernel( const double* coefficients, int num_coefficients, @@ -37,6 +37,31 @@ extern "C" __global__ void fitness_kernel( } } """ +_FITNESS_KERNEL_INT = """ +extern "C" __global__ void fitness_kernel( + const long long* coefficients, + int num_coefficients, + const double* x_vals, + double* ranks, + int size, + double y_val) +{ + int idx = threadIdx.x + blockIdx.x * blockDim.x; + if (idx < size) + { + double ans = 0; + int lrgst_expo = num_coefficients - 1; + for (int i = 0; i < num_coefficients; ++i) + { + ans += coefficients[i] * pow(x_vals[idx], (double)(lrgst_expo - i)); + } + + ans -= y_val; + ranks[idx] = (ans == 0) ? 1.7976931348623157e+308 : fabs(1.0 / ans); + } +} +""" + @dataclass class GA_Options: @@ -96,8 +121,14 @@ class Function: ) if coefficients[0] == 0 and self._largest_exponent > 0: raise ValueError("The first constant (for the largest exponent) cannot be 0.") + + # Check if any coefficient is a float + is_float = any(isinstance(c, float) for c in coefficients) - self.coefficients = np.array(coefficients, dtype=np.float64) + # Choose the dtype based on the input + target_dtype = np.float64 if is_float else np.int64 + + self.coefficients = np.array(coefficients, dtype=target_dtype) self._initialized = True def _check_initialized(self): @@ -276,11 +307,16 @@ class Function: def _solve_x_cuda(self, y_val: float, options: GA_Options) -> np.ndarray: """Genetic algorithm implementation using CuPy (GPU/CUDA).""" - # Load the raw CUDA kernel - fitness_gpu = cupy.RawKernel(_FITNESS_KERNEL, 'fitness_kernel') - # Move coefficients to GPU - d_coefficients = cupy.array(self.coefficients, dtype=cupy.float64) + # Check the dtype of our coefficients array + if self.coefficients.dtype == np.float64: + fitness_gpu = cupy.RawKernel(_FITNESS_KERNEL_FLOAT, 'fitness_kernel') + d_coefficients = cupy.array(self.coefficients, dtype=cupy.float64) + elif self.coefficients.dtype == np.int64: + fitness_gpu = cupy.RawKernel(_FITNESS_KERNEL_INT, 'fitness_kernel') + d_coefficients = cupy.array(self.coefficients, dtype=cupy.int64) + else: + raise TypeError(f"Unsupported dtype for CUDA solver: {self.coefficients.dtype}") # Create initial random solutions on the GPU d_solutions = cupy.random.uniform(