Compare commits

..

2 Commits

Author SHA1 Message Date
b415df2983 feat: add complex root finding and dynamic CUDA shared memory optimization
Major update extending the library to solve for complex roots and optimizing GPU performance using Shared Memory.

Complex Number Support:
- Implemented `_solve_complex_cuda` and `_solve_complex_numpy` to find roots in the complex plane.
- Added specialized CUDA kernels (`_FITNESS_KERNEL_COMPLEX`, `_FITNESS_KERNEL_COMPLEX_DYNAMIC`) handling complex arithmetic (multiplication/addition) directly on the GPU.
- Updated `Function` class and `set_coeffs` to handle `np.complex128` data types.
- Updated `quadratic_solve` to return complex roots using `cmath`.

CUDA Performance & Optimization:
- Implemented Dynamic Shared Memory kernels (`extern __shared__`) to cache polynomial coefficients on the GPU block, significantly reducing global memory latency.
- Added intelligent fallback logic: The solver checks `MaxSharedMemoryPerBlock`. If the polynomial is too large for Shared Memory, it falls back to the standard Global Memory kernel to prevent crashes.
- Split complex coefficients into separate Real and Imaginary arrays for CUDA kernel efficiency.

Polynomial Logic:
- Added `_strip_leading_zeros` helper to ensure polynomial degree is correctly maintained after arithmetic operations (e.g., preventing `0x^2 + x` from being treated as degree 2).
- Updated `__init__` to allow direct coefficient injection.

GA Algorithm:
- Updated crossover logic to support 2D search space (Real + Imaginary) for complex solutions.
- Refined fitness function to explicitly handle `isinf`/`isnan` for numerical stability.
2025-12-05 13:47:29 -04:00
602269889b Got rid of min/max_range to exclusively use Cauchy's Bound. Updated quadratic solve to handle complex roots. 2025-11-24 15:05:13 -04:00
3 changed files with 14 additions and 37 deletions

View File

@@ -12,7 +12,6 @@ A Python library for representing, manipulating, and solving polynomial equation
## Key Features
* **Numerically Stable Solver**: Makes complex calculations **practical**. Leverage your GPU to power the robust genetic algorithm, solving high-degree polynomials accurately in a reasonable timeframe.
* **Complex Number Support**: Fully supports complex coefficients and finding roots in the complex plane (e.g., $x^2 + 1 = 0 \to \pm i$).
* **Numba Accelerated CPU Solver**: The default genetic algorithm is JIT-compiled with Numba for high-speed CPU performance, right out of the box.
* **CUDA Accelerated**: Leverage NVIDIA GPUs for a massive performance boost when finding roots in large solution spaces.
* **Create and Manipulate Polynomials**: Easily define polynomials of any degree using integer or float coefficients, and perform arithmetic operations like addition, subtraction, multiplication, and scaling.
@@ -76,19 +75,12 @@ roots_analytic = f1.quadratic_solve()
print(f"Analytic roots: {sorted(roots_analytic)}")
# > Analytic roots: [-1.0, 2.5]
# 6. Find REAL roots with the genetic algorithm (Numba CPU)
# This is the default, JIT-compiled CPU solver.
# 6. Find roots with the genetic algorithm (Numba CPU)
#    This is the default, JIT-compiled CPU solver.
ga_opts = GA_Options(num_of_generations=20)
roots_ga = f1.get_real_roots(ga_opts, use_cuda=False)
print(f"Approximate real roots: {roots_ga[:2]}")
# > Approximate real roots: [-1.000..., 2.500...]
# 7. Find ALL roots (Real + Complex)
# Use get_roots() to search the complex plane.
f_complex = Function(2, [1, 0, 1]) # x^2 + 1
roots_all = f_complex.get_roots(ga_opts)
print(f"Approximate complex roots: {roots_all}")
# > Approximate complex roots: [-1.00...j, 1.00...j]
print(f"Approximate roots from GA: {roots_ga[:2]}")
# > Approximate roots from GA: [-1.000..., 2.500...]
# If you installed a CUDA extra, you can run it on the GPU:
# roots_ga_gpu = f1.get_real_roots(ga_opts, use_cuda=True)
@@ -122,10 +114,7 @@ ga_robust_search = GA_Options(
# Increase the crossover blend factor to 0.75.
# This allows new solutions to be created further
# away from their parents, increasing exploration.
blend_alpha=0.75,
# Enable complex root finding (default is True)
find_complex=True
blend_alpha=0.75
)
# Pass the custom options to the solver

View File

@@ -343,12 +343,7 @@ class GA_Options:
stacklevel=2
)
if self.min_range != 0.0 or self.max_range != 0.0:
warnings.warn(
"The 'min_range' and 'max_range' parameters are deprecated and will be ignored. "
"Search bounds are now automatically calculated using Cauchy's bound.",
DeprecationWarning,
stacklevel=2
)
warnings.warn("min_range and max_range are no longer used, instead cauchy's bound is used to find these values")
def _get_cauchy_bound(coeffs: np.ndarray) -> float:
"""
@@ -693,7 +688,7 @@ class Function:
# Use mutation_strength
noise = np.random.normal(0, options.mutation_strength, mutation_size)
dst_solutions[idx_cross_end:idx_mut_end] = mutation_candidates * (1.0 + noise)
dst_solutions[idx_cross_end:idx_mut_end] = mutation_candidates + noise
# 4. New Randoms: Add new blood to prevent getting stuck
dst_solutions[idx_mut_end:] = np.random.uniform(min_r, max_r, random_size)
@@ -805,7 +800,7 @@ class Function:
noise_real = np.random.normal(0, options.mutation_strength, mutation_size)
noise_imag = np.random.normal(0, options.mutation_strength, mutation_size)
dst_solutions[idx_cross_end:idx_mut_end] = (mut_candidates.real * (1.0 + noise_real)) + 1j * (mut_candidates.imag * (1.0 + noise_imag))
dst_solutions[idx_cross_end:idx_mut_end] = (mut_candidates.real + noise_real) + 1j * (mut_candidates.imag + noise_imag)
# 4. New Randoms: Add new blood to prevent getting stuck
rand_real = np.random.uniform(min_r, max_r, random_size)
@@ -935,7 +930,7 @@ class Function:
# Use mutation_strength
noise = cupy.random.normal(0, options.mutation_strength, mutation_size)
d_dst_solutions[idx_cross_end:idx_mut_end] = d_mutation_candidates * (1.0 + noise)
d_dst_solutions[idx_cross_end:idx_mut_end] = d_mutation_candidates + noise
# 4. New Randoms
d_dst_solutions[idx_mut_end:] = cupy.random.uniform(
@@ -1100,7 +1095,7 @@ class Function:
noise_imag = cupy.random.normal(0, options.mutation_strength, mutation_size)
# Apply Mutation: Scale Real/Imag independently to allow "rotation" off the line
d_dst_solutions[idx_cross_end:idx_mut_end] = (mut_candidates.real * (1.0 + noise_real)) + 1j * (mut_candidates.imag * (1.0 + noise_imag))
d_dst_solutions[idx_cross_end:idx_mut_end] = (mut_candidates.real + noise_real) + 1j * (mut_candidates.imag + noise_imag)
# 4. Random Injection: Fresh genetic material
rand_real = cupy.random.uniform(min_r, max_r, random_size, dtype=cupy.float64)
@@ -1132,9 +1127,7 @@ class Function:
d_unique = cupy.unique(rounded_real + 1j * rounded_imag)
# Sort the unique roots and copy back to CPU
final_solutions_gpu = cupy.sort(d_unique)
return final_solutions_gpu.get()
return cupy.asnumpy(d_unique)
def __str__(self) -> str:
@@ -1306,7 +1299,7 @@ class Function:
return np.allclose(c1, c2)
def quadratic_solve(self) -> Optional[List[Union[complex, float]]]:
def quadratic_solve(self) -> Optional[List[complex]]:
"""
Calculates the roots (real or complex) of a quadratic function.
@@ -1335,14 +1328,9 @@ class Function:
else:
# Standard case: Use Vieta's formula
root2 = (c / a) / root1
roots = np.array([root1, root2])
roots.sort()
if np.all(np.abs(roots.imag) < 1e-15):
return roots.real.astype(np.float64)
return roots
# Return roots in a consistent order
return [root1, root2]
# Example Usage
if __name__ == '__main__':