From 9d967210fac859dc92b886fd2890d0e696de4345 Mon Sep 17 00:00:00 2001 From: Jonathan Rampersad Date: Thu, 30 Oct 2025 11:31:00 -0400 Subject: [PATCH] feat(ga): Overhaul GA for multi-root robustness and CPU performance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### 🚀 Performance (CPU) * Replaces `np.polyval` with a parallel Numba JIT function (`_calculate_ranks_numba`). * Replaces $O(N \log N)$ `np.argsort` with $O(N)$ `np.argpartition` in the GA loop. * Adds `numba` as a core dependency. ### 🧠 Robustness (Algorithm) * Implements Blend Crossover (BLX-$\alpha$) for better, extrapolative exploration. * Uses a hybrid selection model (top X% for crossover, 100% for mutation) to preserve root niches. * Adds `selection_percentile` and `blend_alpha` to `GA_Options` for tuning. --- README.md | 26 ++++--- pyproject.toml | 5 +- src/polysolve/__init__.py | 138 +++++++++++++++++++++++++++++--------- tests/test_polysolve.py | 14 ++-- 4 files changed, 132 insertions(+), 51 deletions(-) diff --git a/README.md b/README.md index 01352a3..4f9fb05 100644 --- a/README.md +++ b/README.md @@ -5,13 +5,14 @@ [![PyPI version](https://img.shields.io/pypi/v/polysolve.svg)](https://pypi.org/project/polysolve/) [![PyPI pyversions](https://img.shields.io/pypi/pyversions/polysolve.svg)](https://pypi.org/project/polysolve/) -A Python library for representing, manipulating, and solving polynomial equations using a high-performance genetic algorithm, with optional CUDA/GPU acceleration. +A Python library for representing, manipulating, and solving polynomial equations. Features a high-performance, Numba-accelerated genetic algorithm for CPU, with an optional CUDA/GPU backend for massive-scale parallel solving. --- ## Key Features * **Numerically Stable Solver**: Makes complex calculations **practical**. Leverage your GPU to power the robust genetic algorithm, solving high-degree polynomials accurately in a reasonable timeframe. +* **Numba Accelerated CPU Solver**: The default genetic algorithm is JIT-compiled with Numba for high-speed CPU performance, right out of the box. * **CUDA Accelerated**: Leverage NVIDIA GPUs for a massive performance boost when finding roots in large solution spaces. * **Create and Manipulate Polynomials**: Easily define polynomials of any degree using integer or float coefficients, and perform arithmetic operations like addition, subtraction, multiplication, and scaling. * **Analytical Solvers**: Includes standard, exact solvers for simple cases (e.g., `quadratic_solve`). @@ -74,8 +75,8 @@ roots_analytic = f1.quadratic_solve() print(f"Analytic roots: {sorted(roots_analytic)}") # > Analytic roots: [-1.0, 2.5] -# 6. Find roots with the genetic algorithm (CPU) -# This can solve polynomials of any degree. +# 6. Find roots with the genetic algorithm (Numba CPU) +#    This is the default, JIT-compiled CPU solver. ga_opts = GA_Options(num_of_generations=20) roots_ga = f1.get_real_roots(ga_opts, use_cuda=False) print(f"Approximate roots from GA: {roots_ga[:2]}") @@ -98,13 +99,22 @@ The default options are balanced, but for very complex polynomials, you may want ```python from polysolve import GA_Options -# Create a config for a much deeper, more accurate search -# (slower, but better for high-degree, complex functions) -ga_accurate = GA_Options( +# Create a config for a deep search, optimized for finding +# *all* real roots (even if they are far apart). +ga_robust_search = GA_Options( num_of_generations=50, # Run for more generations data_size=500000, # Use a larger population - elite_ratio=0.1, # Keep the top 10% - mutation_ratio=0.5 # Mutate 50% + + # --- Key Tuning Parameters for Multi-Root Finding --- + + # Widen the parent pool to 75% to keep more "niches" + # (solution-clouds around different roots) alive. + selection_percentile=0.75, + + # Increase the crossover blend factor to 0.75. + # This allows new solutions to be created further + # away from their parents, increasing exploration. + blend_alpha=0.75 ) # Pass the custom options to the solver diff --git a/pyproject.toml b/pyproject.toml index e4b1379..a0db21b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta" [project] # --- Core Metadata --- name = "polysolve" -version = "0.5.1" +version = "0.6.0" authors = [ { name="Jonathan Rampersad", email="jonathan@jono-rams.work" }, ] @@ -33,7 +33,8 @@ classifiers = [ # --- Dependencies --- dependencies = [ - "numpy>=1.21" + "numpy>=1.21", + "numba" ] # --- Optional Dependencies (Extras) --- diff --git a/src/polysolve/__init__.py b/src/polysolve/__init__.py index e1ddd05..19b7233 100644 --- a/src/polysolve/__init__.py +++ b/src/polysolve/__init__.py @@ -1,5 +1,6 @@ import math import numpy as np +import numba from dataclasses import dataclass from typing import List, Optional, Union import warnings @@ -37,6 +38,31 @@ extern "C" __global__ void fitness_kernel( } """ +@numba.jit(nopython=True, fastmath=True, parallel=True) +def _calculate_ranks_numba(solutions, coefficients, y_val, ranks): + """ + A Numba-jitted, parallel function to calculate fitness. + This replaces np.polyval and the rank calculation. + """ + num_coefficients = coefficients.shape[0] + data_size = solutions.shape[0] + + # This prange will be run in parallel on all your CPU cores + for idx in numba.prange(data_size): + x_val = solutions[idx] + + # Horner's method (same as np.polyval) + ans = coefficients[0] + for i in range(1, num_coefficients): + ans = ans * x_val + coefficients[i] + + ans -= y_val + + if ans == 0.0: + ranks[idx] = 1.7976931348623157e+308 # np.finfo(float).max + else: + ranks[idx] = abs(1.0 / ans) + @dataclass class GA_Options: """ @@ -62,6 +88,16 @@ class GA_Options: mutation_ratio (float): The percentage (e.g., 0.40 for 40%) of the next generation to be created by mutating solutions from the parent pool. Default: 0.40 + selection_percentile (float): The top percentage (e.g., 0.66 for 66%) + of solutions to use as the parent pool + for crossover. A smaller value speeds + up single-root convergence; a larger + value helps find multiple roots. + Default: 0.66 + blend_alpha (float): The expansion factor for Blend Crossover (BLX-alpha). + 0.0 = average crossover (no expansion). + 0.5 = 50% expansion beyond the parent range. + Default: 0.5 root_precision (int): The number of decimal places to round roots to when clustering. A smaller number (e.g., 3) groups roots more aggressively. A larger number @@ -76,6 +112,8 @@ class GA_Options: elite_ratio: float = 0.05 crossover_ratio: float = 0.45 mutation_ratio: float = 0.40 + selection_percentile: float = 0.66 + blend_alpha: float = 0.5 root_precision: int = 5 def __post_init__(self): @@ -87,6 +125,14 @@ class GA_Options: ) if any(r < 0 for r in [self.elite_ratio, self.crossover_ratio, self.mutation_ratio]): raise ValueError("GA ratios cannot be negative.") + if not (0 < self.selection_percentile <= 1.0): + raise ValueError( + f"selection_percentile must be between 0 (exclusive) and 1.0 (inclusive), but got {self.selection_percentile}" + ) + if self.blend_alpha < 0: + raise ValueError( + f"blend_alpha cannot be negative, but got {self.blend_alpha}" + ) def _get_cauchy_bound(coeffs: np.ndarray) -> float: """ @@ -320,40 +366,57 @@ class Function: # Create initial random solutions solutions = np.random.uniform(min_r, max_r, data_size) + # Pre-allocate ranks array + ranks = np.empty(data_size, dtype=np.float64) + for _ in range(options.num_of_generations): # Calculate fitness for all solutions (vectorized) - y_calculated = np.polyval(self.coefficients, solutions) - error = y_calculated - y_val + _calculate_ranks_numba(solutions, self.coefficients, y_val, ranks) + + parent_pool_size = int(data_size * options.selection_percentile) + + # 1. Get indices for the elite solutions (O(N) operation) + # We find the 'elite_size'-th largest element. + elite_indices = np.argpartition(-ranks, elite_size)[:elite_size] - with np.errstate(divide='ignore'): - ranks = np.where(error == 0, np.finfo(float).max, np.abs(1.0 / error)) - - # Sort solutions by fitness (descending) - sorted_indices = np.argsort(-ranks) - solutions = solutions[sorted_indices] + # 2. Get indices for the parent pool (O(N) operation) + # We find the 'parent_pool_size'-th largest element. + parent_pool_indices = np.argpartition(-ranks, parent_pool_size)[:parent_pool_size] # --- Create the next generation --- # 1. Elitism: Keep the best solutions as-is - elite_solutions = solutions[:elite_size] - - # Define a "parent pool" of the top 50% of solutions to breed from - parent_pool = solutions[:data_size // 2] + elite_solutions = solutions[elite_indices] # 2. Crossover: Breed two parents to create a child - # Select from the full list (indices 0 to data_size-1) - parent1_indices = np.random.randint(0, data_size, crossover_size) - parent2_indices = np.random.randint(0, data_size, crossover_size) - parents1 = solutions[parent1_indices] - parents2 = solutions[parent2_indices] - # Simple "average" crossover - crossover_solutions = (parents1 + parents2) / 2.0 + # Select from the fitter PARENT POOL + parents1_idx = np.random.choice(parent_pool_indices, crossover_size) + parents2_idx = np.random.choice(parent_pool_indices, crossover_size) + + parents1 = solutions[parents1_idx] + parents2 = solutions[parents2_idx] + # Blend Crossover (BLX-alpha) + alpha = options.blend_alpha + + # Find min/max for all parent pairs + p_min = np.minimum(parents1, parents2) + p_max = np.maximum(parents1, parents2) + + # Calculate range (I) + parent_range = p_max - p_min + + # Calculate new min/max for the expanded range + new_min = p_min - (alpha * parent_range) + new_max = p_max + (alpha * parent_range) + + # Create a new random child within the expanded range + crossover_solutions = np.random.uniform(new_min, new_max, crossover_size) # 3. Mutation: # Select from the full list (indices 0 to data_size-1) mutation_candidates = solutions[np.random.randint(0, data_size, mutation_size)] - # Use mutation_strength (the new name) + # Use mutation_strength mutation_factors = np.random.uniform( 1 - options.mutation_strength, 1 + options.mutation_strength, @@ -374,10 +437,7 @@ class Function: # --- Final Step: Return the best results --- # After all generations, do one last ranking to find the best solutions - y_calculated = np.polyval(self.coefficients, solutions) - error = y_calculated - y_val - with np.errstate(divide='ignore'): - ranks = np.where(error == 0, np.finfo(float).max, np.abs(1.0 / error)) + _calculate_ranks_numba(solutions, self.coefficients, y_val, ranks) # 1. Define quality based on the user's desired precision # (e.g., precision=5 -> rank > 1e6, precision=8 -> rank > 1e9) @@ -458,17 +518,31 @@ class Function: # 1. Elitism d_elite_solutions = d_solutions[:elite_size] - # Define a "parent pool" of the top 50% of solutions to breed from - d_parent_pool = d_solutions[:data_size // 2] - parent_pool_size = d_parent_pool.size - # 2. Crossover - # Select from the full list (indices 0 to data_size-1) - parent1_indices = cupy.random.randint(0, data_size, crossover_size) - parent2_indices = cupy.random.randint(0, data_size, crossover_size) + parent_pool_size = int(data_size * options.selection_percentile) + # Select from the fitter PARENT POOL + parent1_indices = cupy.random.randint(0, parent_pool_size, crossover_size) + parent2_indices = cupy.random.randint(0, parent_pool_size, crossover_size) + # Get parents directly from the sorted solutions array using the pool-sized indices d_parents1 = d_solutions[parent1_indices] d_parents2 = d_solutions[parent2_indices] - d_crossover_solutions = (d_parents1 + d_parents2) / 2.0 + + # Blend Crossover (BLX-alpha) + alpha = options.blend_alpha + + # Find min/max for all parent pairs + d_p_min = cupy.minimum(d_parents1, d_parents2) + d_p_max = cupy.maximum(d_parents1, d_parents2) + + # Calculate range (I) + d_parent_range = d_p_max - d_p_min + + # Calculate new min/max for the expanded range + d_new_min = d_p_min - (alpha * d_parent_range) + d_new_max = d_p_max + (alpha * d_parent_range) + + # Create a new random child within the expanded range + d_crossover_solutions = cupy.random.uniform(d_new_min, d_new_max, crossover_size) # 3. Mutation # Select from the full list (indices 0 to data_size-1) diff --git a/tests/test_polysolve.py b/tests/test_polysolve.py index 1f9a8cb..00bdb37 100644 --- a/tests/test_polysolve.py +++ b/tests/test_polysolve.py @@ -1,5 +1,6 @@ import pytest import numpy as np +import numpy.testing as npt # Try to import cupy to check for CUDA availability try: @@ -101,7 +102,7 @@ def test_get_real_roots_numpy(quadratic_func): Tests that the NumPy-based genetic algorithm approximates the roots correctly. """ # Using more generations for higher accuracy in testing - ga_opts = GA_Options(num_of_generations=50, data_size=200000, root_precision=3) + ga_opts = GA_Options(num_of_generations=50, data_size=200000, selection_percentile=0.66, root_precision=3) roots = quadratic_func.get_real_roots(ga_opts, use_cuda=False) @@ -109,11 +110,7 @@ def test_get_real_roots_numpy(quadratic_func): # We don't know which order they'll be in, so we check for presence. expected_roots = np.array([-1.0, 2.5]) - # Check that at least one found root is close to -1.0 - assert np.any(np.isclose(roots, expected_roots[0], atol=1e-2)) - - # Check that at least one found root is close to 2.5 - assert np.any(np.isclose(roots, expected_roots[1], atol=1e-2)) + npt.assert_allclose(np.sort(roots), np.sort(expected_roots), atol=1e-2) @pytest.mark.skipif(not _CUPY_AVAILABLE, reason="CuPy is not installed, skipping CUDA test.") @@ -124,13 +121,12 @@ def test_get_real_roots_cuda(quadratic_func): It will be skipped automatically if CuPy is not available. """ - ga_opts = GA_Options(num_of_generations=50, data_size=200000, root_precision=3) + ga_opts = GA_Options(num_of_generations=50, data_size=200000, selection_percentile=0.66, root_precision=3) roots = quadratic_func.get_real_roots(ga_opts, use_cuda=True) expected_roots = np.array([-1.0, 2.5]) # Verify that the CUDA implementation also finds the correct roots within tolerance. - assert np.any(np.isclose(roots, expected_roots[0], atol=1e-2)) - assert np.any(np.isclose(roots, expected_roots[1], atol=1e-2)) + npt.assert_allclose(np.sort(roots), np.sort(expected_roots), atol=1e-2)