feat(ga): Implement quality filtering and precision-based clustering (#19)
All checks were successful
Publish Python Package to PyPI / deploy (push) Successful in 12s

The previous GA logic was returning the "top N" solutions, which led to test failures when the algorithm correctly converged on only one of all possible roots (e.g., returning 1000 variations of -1.0).

This commit fixes the root-finding logic to correctly identify and return *all* unique, high-quality roots:

1.  **feat(api):** Adds `root_precision` to `GA_Options`. This new parameter (default: 5) allows the user to control the number of decimal places for clustering unique roots.

2.  **fix(ga):** Replaces the flawed "top N" logic in both `_solve_x_numpy` and `_solve_x_cuda`. The new process is:
    * Dynamically sets a `quality_threshold` based on the user's `root_precision` (e.g., `precision=5` requires a rank > `1e6`).
    * Filters the *entire* final population for all solutions that meet this quality threshold.
    * Rounds these high-quality solutions to `root_precision`.
    * Returns only the `np.unique()` results.

This ensures the solver returns all distinct roots that meet the accuracy requirements, rather than just the top N variations of a single root.

Reviewed-on: #19
Co-authored-by: Jonathan Rampersad <rampersad.jonathan@gmail.com>
Co-committed-by: Jonathan Rampersad <rampersad.jonathan@gmail.com>
This commit was merged in pull request #19.
This commit is contained in:
2025-10-27 19:26:50 +00:00
committed by Jonathan Rampersad
parent 962eab5af7
commit 4e46c11f83
3 changed files with 46 additions and 21 deletions

View File

@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
[project]
# --- Core Metadata ---
name = "polysolve"
version = "0.4.2"
version = "0.5.0"
authors = [
{ name="Jonathan Rampersad", email="jonathan@jono-rams.work" },
]

View File

@@ -49,8 +49,6 @@ class GA_Options:
Default: 100.0
num_of_generations (int): The number of iterations the algorithm will run.
Default: 10
sample_size (int): The number of top solutions to *return* at the end.
Default: 1000
data_size (int): The total number of solutions (population size)
generated in each generation. Default: 100000
mutation_strength (float): The percentage (e.g., 0.01 for 1%) by which
@@ -64,16 +62,21 @@ class GA_Options:
mutation_ratio (float): The percentage (e.g., 0.40 for 40%) of the next
generation to be created by mutating solutions
from the parent pool. Default: 0.40
root_precision (int): The number of decimal places to round roots to
when clustering. A smaller number (e.g., 3)
groups roots more aggressively. A larger number
(e.g., 7) is more precise but may return
multiple near-identical roots. Default: 5
"""
min_range: float = -100.0
max_range: float = 100.0
num_of_generations: int = 10
sample_size: int = 1000
data_size: int = 100000
mutation_strength: float = 0.01
elite_ratio: float = 0.05
crossover_ratio: float = 0.45
mutation_ratio: float = 0.40
root_precision: int = 5
def __post_init__(self):
"""Validates the GA options after initialization."""
@@ -84,11 +87,6 @@ class GA_Options:
)
if any(r < 0 for r in [self.elite_ratio, self.crossover_ratio, self.mutation_ratio]):
raise ValueError("GA ratios cannot be negative.")
if self.data_size < self.sample_size:
warnings.warn(
f"data_size ({self.data_size}) is less than sample_size ({self.sample_size}). "
"The number of returned solutions will be limited to data_size."
)
def _get_cauchy_bound(coeffs: np.ndarray) -> float:
"""
@@ -380,12 +378,26 @@ class Function:
error = y_calculated - y_val
with np.errstate(divide='ignore'):
ranks = np.where(error == 0, np.finfo(float).max, np.abs(1.0 / error))
sorted_indices = np.argsort(-ranks)
# Get the top 'sample_size' solutions the user asked for
best_solutions = solutions[sorted_indices][:options.sample_size]
# 1. Define quality based on the user's desired precision
# (e.g., precision=5 -> rank > 1e6, precision=8 -> rank > 1e9)
# We add +1 for a buffer, ensuring we only get high-quality roots.
quality_threshold = 10**(options.root_precision + 1)
# 2. Get all solutions that meet this quality threshold
high_quality_solutions = solutions[ranks > quality_threshold]
if high_quality_solutions.size == 0:
# No roots found that meet the quality, return empty
return np.array([])
return np.sort(best_solutions)
# 3. Cluster these high-quality solutions by rounding
rounded_solutions = np.round(high_quality_solutions, options.root_precision)
# 4. Return only the unique roots
unique_roots = np.unique(rounded_solutions)
return np.sort(unique_roots)
def _solve_x_cuda(self, y_val: float, options: GA_Options) -> np.ndarray:
"""Genetic algorithm implementation using CuPy (GPU/CUDA)."""
@@ -490,13 +502,26 @@ class Function:
(blocks_per_grid,), (threads_per_block,),
(d_coefficients, d_coefficients.size, d_solutions, d_ranks, d_solutions.size, y_val)
)
sorted_indices = cupy.argsort(-d_ranks)
# Get the top 'sample_size' solutions
d_best_solutions = d_solutions[sorted_indices][:options.sample_size]
# 1. Define quality based on the user's desired precision
# (e.g., precision=5 -> rank > 1e6, precision=8 -> rank > 1e9)
# We add +1 for a buffer, ensuring we only get high-quality roots.
quality_threshold = 10**(options.root_precision + 1)
# 2. Get all solutions that meet this quality threshold
d_high_quality_solutions = d_solutions[d_ranks > quality_threshold]
# Get the final sample, sort it, and copy back to CPU
final_solutions_gpu = cupy.sort(d_best_solutions)
if d_high_quality_solutions.size == 0:
return np.array([])
# 3. Cluster these high-quality solutions on the GPU by rounding
d_rounded_solutions = cupy.round(d_high_quality_solutions, options.root_precision)
# 4. Get only the unique roots
d_unique_roots = cupy.unique(d_rounded_solutions)
# Sort the unique roots and copy back to CPU
final_solutions_gpu = cupy.sort(d_unique_roots)
return final_solutions_gpu.get()
@@ -692,7 +717,7 @@ if __name__ == '__main__':
print(f"Analytic roots of f1: {roots_analytic}") # Expected: -1, 2.5
# 2. Genetic algorithm solution
ga_opts = GA_Options(num_of_generations=20, data_size=50000, sample_size=10)
ga_opts = GA_Options(num_of_generations=20, data_size=50000)
print("\nFinding roots with Genetic Algorithm (CPU)...")
roots_ga_cpu = f1.get_real_roots(ga_opts)
print(f"Approximate roots from GA (CPU): {roots_ga_cpu}")

View File

@@ -101,7 +101,7 @@ def test_get_real_roots_numpy(quadratic_func):
Tests that the NumPy-based genetic algorithm approximates the roots correctly.
"""
# Using more generations for higher accuracy in testing
ga_opts = GA_Options(num_of_generations=25, data_size=50000)
ga_opts = GA_Options(num_of_generations=50, data_size=200000, root_precision=3)
roots = quadratic_func.get_real_roots(ga_opts, use_cuda=False)
@@ -124,7 +124,7 @@ def test_get_real_roots_cuda(quadratic_func):
It will be skipped automatically if CuPy is not available.
"""
ga_opts = GA_Options(num_of_generations=25, data_size=50000)
ga_opts = GA_Options(num_of_generations=50, data_size=200000, root_precision=3)
roots = quadratic_func.get_real_roots(ga_opts, use_cuda=True)