FEAT: Added support for float coefficients #13
@ -12,8 +12,8 @@ try:
|
|||||||
except ImportError:
|
except ImportError:
|
||||||
_CUPY_AVAILABLE = False
|
_CUPY_AVAILABLE = False
|
||||||
|
|
||||||
# The CUDA kernel for the fitness function
|
# The CUDA kernels for the fitness function
|
||||||
_FITNESS_KERNEL = """
|
_FITNESS_KERNEL_FLOAT = """
|
||||||
extern "C" __global__ void fitness_kernel(
|
extern "C" __global__ void fitness_kernel(
|
||||||
const double* coefficients,
|
const double* coefficients,
|
||||||
int num_coefficients,
|
int num_coefficients,
|
||||||
@ -37,6 +37,31 @@ extern "C" __global__ void fitness_kernel(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
"""
|
"""
|
||||||
|
_FITNESS_KERNEL_INT = """
|
||||||
|
extern "C" __global__ void fitness_kernel(
|
||||||
|
const long long* coefficients,
|
||||||
|
int num_coefficients,
|
||||||
|
const double* x_vals,
|
||||||
|
double* ranks,
|
||||||
|
int size,
|
||||||
|
double y_val)
|
||||||
|
{
|
||||||
|
int idx = threadIdx.x + blockIdx.x * blockDim.x;
|
||||||
|
if (idx < size)
|
||||||
|
{
|
||||||
|
double ans = 0;
|
||||||
|
int lrgst_expo = num_coefficients - 1;
|
||||||
|
for (int i = 0; i < num_coefficients; ++i)
|
||||||
|
{
|
||||||
|
ans += coefficients[i] * pow(x_vals[idx], (double)(lrgst_expo - i));
|
||||||
|
}
|
||||||
|
|
||||||
|
ans -= y_val;
|
||||||
|
ranks[idx] = (ans == 0) ? 1.7976931348623157e+308 : fabs(1.0 / ans);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class GA_Options:
|
class GA_Options:
|
||||||
@ -96,8 +121,14 @@ class Function:
|
|||||||
)
|
)
|
||||||
if coefficients[0] == 0 and self._largest_exponent > 0:
|
if coefficients[0] == 0 and self._largest_exponent > 0:
|
||||||
raise ValueError("The first constant (for the largest exponent) cannot be 0.")
|
raise ValueError("The first constant (for the largest exponent) cannot be 0.")
|
||||||
|
|
||||||
|
# Check if any coefficient is a float
|
||||||
|
is_float = any(isinstance(c, float) for c in coefficients)
|
||||||
|
|
||||||
self.coefficients = np.array(coefficients, dtype=np.float64)
|
# Choose the dtype based on the input
|
||||||
|
target_dtype = np.float64 if is_float else np.int64
|
||||||
|
|
||||||
|
self.coefficients = np.array(coefficients, dtype=target_dtype)
|
||||||
self._initialized = True
|
self._initialized = True
|
||||||
|
|
||||||
def _check_initialized(self):
|
def _check_initialized(self):
|
||||||
@ -276,11 +307,16 @@ class Function:
|
|||||||
|
|
||||||
def _solve_x_cuda(self, y_val: float, options: GA_Options) -> np.ndarray:
|
def _solve_x_cuda(self, y_val: float, options: GA_Options) -> np.ndarray:
|
||||||
"""Genetic algorithm implementation using CuPy (GPU/CUDA)."""
|
"""Genetic algorithm implementation using CuPy (GPU/CUDA)."""
|
||||||
# Load the raw CUDA kernel
|
|
||||||
fitness_gpu = cupy.RawKernel(_FITNESS_KERNEL, 'fitness_kernel')
|
|
||||||
|
|
||||||
# Move coefficients to GPU
|
# Check the dtype of our coefficients array
|
||||||
d_coefficients = cupy.array(self.coefficients, dtype=cupy.float64)
|
if self.coefficients.dtype == np.float64:
|
||||||
|
fitness_gpu = cupy.RawKernel(_FITNESS_KERNEL_FLOAT, 'fitness_kernel')
|
||||||
|
d_coefficients = cupy.array(self.coefficients, dtype=cupy.float64)
|
||||||
|
elif self.coefficients.dtype == np.int64:
|
||||||
|
fitness_gpu = cupy.RawKernel(_FITNESS_KERNEL_INT, 'fitness_kernel')
|
||||||
|
d_coefficients = cupy.array(self.coefficients, dtype=cupy.int64)
|
||||||
|
else:
|
||||||
|
raise TypeError(f"Unsupported dtype for CUDA solver: {self.coefficients.dtype}")
|
||||||
|
|
||||||
# Create initial random solutions on the GPU
|
# Create initial random solutions on the GPU
|
||||||
d_solutions = cupy.random.uniform(
|
d_solutions = cupy.random.uniform(
|
||||||
|
Reference in New Issue
Block a user