feat(ga): Implement quality filtering and precision-based clustering (#19)
All checks were successful
Publish Python Package to PyPI / deploy (push) Successful in 12s
All checks were successful
Publish Python Package to PyPI / deploy (push) Successful in 12s
The previous GA logic was returning the "top N" solutions, which led to test failures when the algorithm correctly converged on only one of all possible roots (e.g., returning 1000 variations of -1.0).
This commit fixes the root-finding logic to correctly identify and return *all* unique, high-quality roots:
1. **feat(api):** Adds `root_precision` to `GA_Options`. This new parameter (default: 5) allows the user to control the number of decimal places for clustering unique roots.
2. **fix(ga):** Replaces the flawed "top N" logic in both `_solve_x_numpy` and `_solve_x_cuda`. The new process is:
* Dynamically sets a `quality_threshold` based on the user's `root_precision` (e.g., `precision=5` requires a rank > `1e6`).
* Filters the *entire* final population for all solutions that meet this quality threshold.
* Rounds these high-quality solutions to `root_precision`.
* Returns only the `np.unique()` results.
This ensures the solver returns all distinct roots that meet the accuracy requirements, rather than just the top N variations of a single root.
Reviewed-on: #19
Co-authored-by: Jonathan Rampersad <rampersad.jonathan@gmail.com>
Co-committed-by: Jonathan Rampersad <rampersad.jonathan@gmail.com>
This commit was merged in pull request #19.
This commit is contained in:
@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
|
|||||||
[project]
|
[project]
|
||||||
# --- Core Metadata ---
|
# --- Core Metadata ---
|
||||||
name = "polysolve"
|
name = "polysolve"
|
||||||
version = "0.4.2"
|
version = "0.5.0"
|
||||||
authors = [
|
authors = [
|
||||||
{ name="Jonathan Rampersad", email="jonathan@jono-rams.work" },
|
{ name="Jonathan Rampersad", email="jonathan@jono-rams.work" },
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -49,8 +49,6 @@ class GA_Options:
|
|||||||
Default: 100.0
|
Default: 100.0
|
||||||
num_of_generations (int): The number of iterations the algorithm will run.
|
num_of_generations (int): The number of iterations the algorithm will run.
|
||||||
Default: 10
|
Default: 10
|
||||||
sample_size (int): The number of top solutions to *return* at the end.
|
|
||||||
Default: 1000
|
|
||||||
data_size (int): The total number of solutions (population size)
|
data_size (int): The total number of solutions (population size)
|
||||||
generated in each generation. Default: 100000
|
generated in each generation. Default: 100000
|
||||||
mutation_strength (float): The percentage (e.g., 0.01 for 1%) by which
|
mutation_strength (float): The percentage (e.g., 0.01 for 1%) by which
|
||||||
@@ -64,16 +62,21 @@ class GA_Options:
|
|||||||
mutation_ratio (float): The percentage (e.g., 0.40 for 40%) of the next
|
mutation_ratio (float): The percentage (e.g., 0.40 for 40%) of the next
|
||||||
generation to be created by mutating solutions
|
generation to be created by mutating solutions
|
||||||
from the parent pool. Default: 0.40
|
from the parent pool. Default: 0.40
|
||||||
|
root_precision (int): The number of decimal places to round roots to
|
||||||
|
when clustering. A smaller number (e.g., 3)
|
||||||
|
groups roots more aggressively. A larger number
|
||||||
|
(e.g., 7) is more precise but may return
|
||||||
|
multiple near-identical roots. Default: 5
|
||||||
"""
|
"""
|
||||||
min_range: float = -100.0
|
min_range: float = -100.0
|
||||||
max_range: float = 100.0
|
max_range: float = 100.0
|
||||||
num_of_generations: int = 10
|
num_of_generations: int = 10
|
||||||
sample_size: int = 1000
|
|
||||||
data_size: int = 100000
|
data_size: int = 100000
|
||||||
mutation_strength: float = 0.01
|
mutation_strength: float = 0.01
|
||||||
elite_ratio: float = 0.05
|
elite_ratio: float = 0.05
|
||||||
crossover_ratio: float = 0.45
|
crossover_ratio: float = 0.45
|
||||||
mutation_ratio: float = 0.40
|
mutation_ratio: float = 0.40
|
||||||
|
root_precision: int = 5
|
||||||
|
|
||||||
def __post_init__(self):
|
def __post_init__(self):
|
||||||
"""Validates the GA options after initialization."""
|
"""Validates the GA options after initialization."""
|
||||||
@@ -84,11 +87,6 @@ class GA_Options:
|
|||||||
)
|
)
|
||||||
if any(r < 0 for r in [self.elite_ratio, self.crossover_ratio, self.mutation_ratio]):
|
if any(r < 0 for r in [self.elite_ratio, self.crossover_ratio, self.mutation_ratio]):
|
||||||
raise ValueError("GA ratios cannot be negative.")
|
raise ValueError("GA ratios cannot be negative.")
|
||||||
if self.data_size < self.sample_size:
|
|
||||||
warnings.warn(
|
|
||||||
f"data_size ({self.data_size}) is less than sample_size ({self.sample_size}). "
|
|
||||||
"The number of returned solutions will be limited to data_size."
|
|
||||||
)
|
|
||||||
|
|
||||||
def _get_cauchy_bound(coeffs: np.ndarray) -> float:
|
def _get_cauchy_bound(coeffs: np.ndarray) -> float:
|
||||||
"""
|
"""
|
||||||
@@ -380,12 +378,26 @@ class Function:
|
|||||||
error = y_calculated - y_val
|
error = y_calculated - y_val
|
||||||
with np.errstate(divide='ignore'):
|
with np.errstate(divide='ignore'):
|
||||||
ranks = np.where(error == 0, np.finfo(float).max, np.abs(1.0 / error))
|
ranks = np.where(error == 0, np.finfo(float).max, np.abs(1.0 / error))
|
||||||
sorted_indices = np.argsort(-ranks)
|
|
||||||
|
|
||||||
# Get the top 'sample_size' solutions the user asked for
|
# 1. Define quality based on the user's desired precision
|
||||||
best_solutions = solutions[sorted_indices][:options.sample_size]
|
# (e.g., precision=5 -> rank > 1e6, precision=8 -> rank > 1e9)
|
||||||
|
# We add +1 for a buffer, ensuring we only get high-quality roots.
|
||||||
|
quality_threshold = 10**(options.root_precision + 1)
|
||||||
|
|
||||||
|
# 2. Get all solutions that meet this quality threshold
|
||||||
|
high_quality_solutions = solutions[ranks > quality_threshold]
|
||||||
|
|
||||||
|
if high_quality_solutions.size == 0:
|
||||||
|
# No roots found that meet the quality, return empty
|
||||||
|
return np.array([])
|
||||||
|
|
||||||
return np.sort(best_solutions)
|
# 3. Cluster these high-quality solutions by rounding
|
||||||
|
rounded_solutions = np.round(high_quality_solutions, options.root_precision)
|
||||||
|
|
||||||
|
# 4. Return only the unique roots
|
||||||
|
unique_roots = np.unique(rounded_solutions)
|
||||||
|
|
||||||
|
return np.sort(unique_roots)
|
||||||
|
|
||||||
def _solve_x_cuda(self, y_val: float, options: GA_Options) -> np.ndarray:
|
def _solve_x_cuda(self, y_val: float, options: GA_Options) -> np.ndarray:
|
||||||
"""Genetic algorithm implementation using CuPy (GPU/CUDA)."""
|
"""Genetic algorithm implementation using CuPy (GPU/CUDA)."""
|
||||||
@@ -490,13 +502,26 @@ class Function:
|
|||||||
(blocks_per_grid,), (threads_per_block,),
|
(blocks_per_grid,), (threads_per_block,),
|
||||||
(d_coefficients, d_coefficients.size, d_solutions, d_ranks, d_solutions.size, y_val)
|
(d_coefficients, d_coefficients.size, d_solutions, d_ranks, d_solutions.size, y_val)
|
||||||
)
|
)
|
||||||
sorted_indices = cupy.argsort(-d_ranks)
|
|
||||||
|
|
||||||
# Get the top 'sample_size' solutions
|
# 1. Define quality based on the user's desired precision
|
||||||
d_best_solutions = d_solutions[sorted_indices][:options.sample_size]
|
# (e.g., precision=5 -> rank > 1e6, precision=8 -> rank > 1e9)
|
||||||
|
# We add +1 for a buffer, ensuring we only get high-quality roots.
|
||||||
|
quality_threshold = 10**(options.root_precision + 1)
|
||||||
|
|
||||||
|
# 2. Get all solutions that meet this quality threshold
|
||||||
|
d_high_quality_solutions = d_solutions[d_ranks > quality_threshold]
|
||||||
|
|
||||||
# Get the final sample, sort it, and copy back to CPU
|
if d_high_quality_solutions.size == 0:
|
||||||
final_solutions_gpu = cupy.sort(d_best_solutions)
|
return np.array([])
|
||||||
|
|
||||||
|
# 3. Cluster these high-quality solutions on the GPU by rounding
|
||||||
|
d_rounded_solutions = cupy.round(d_high_quality_solutions, options.root_precision)
|
||||||
|
|
||||||
|
# 4. Get only the unique roots
|
||||||
|
d_unique_roots = cupy.unique(d_rounded_solutions)
|
||||||
|
|
||||||
|
# Sort the unique roots and copy back to CPU
|
||||||
|
final_solutions_gpu = cupy.sort(d_unique_roots)
|
||||||
return final_solutions_gpu.get()
|
return final_solutions_gpu.get()
|
||||||
|
|
||||||
|
|
||||||
@@ -692,7 +717,7 @@ if __name__ == '__main__':
|
|||||||
print(f"Analytic roots of f1: {roots_analytic}") # Expected: -1, 2.5
|
print(f"Analytic roots of f1: {roots_analytic}") # Expected: -1, 2.5
|
||||||
|
|
||||||
# 2. Genetic algorithm solution
|
# 2. Genetic algorithm solution
|
||||||
ga_opts = GA_Options(num_of_generations=20, data_size=50000, sample_size=10)
|
ga_opts = GA_Options(num_of_generations=20, data_size=50000)
|
||||||
print("\nFinding roots with Genetic Algorithm (CPU)...")
|
print("\nFinding roots with Genetic Algorithm (CPU)...")
|
||||||
roots_ga_cpu = f1.get_real_roots(ga_opts)
|
roots_ga_cpu = f1.get_real_roots(ga_opts)
|
||||||
print(f"Approximate roots from GA (CPU): {roots_ga_cpu}")
|
print(f"Approximate roots from GA (CPU): {roots_ga_cpu}")
|
||||||
|
|||||||
@@ -101,7 +101,7 @@ def test_get_real_roots_numpy(quadratic_func):
|
|||||||
Tests that the NumPy-based genetic algorithm approximates the roots correctly.
|
Tests that the NumPy-based genetic algorithm approximates the roots correctly.
|
||||||
"""
|
"""
|
||||||
# Using more generations for higher accuracy in testing
|
# Using more generations for higher accuracy in testing
|
||||||
ga_opts = GA_Options(num_of_generations=25, data_size=50000)
|
ga_opts = GA_Options(num_of_generations=50, data_size=200000, root_precision=3)
|
||||||
|
|
||||||
roots = quadratic_func.get_real_roots(ga_opts, use_cuda=False)
|
roots = quadratic_func.get_real_roots(ga_opts, use_cuda=False)
|
||||||
|
|
||||||
@@ -124,7 +124,7 @@ def test_get_real_roots_cuda(quadratic_func):
|
|||||||
It will be skipped automatically if CuPy is not available.
|
It will be skipped automatically if CuPy is not available.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
ga_opts = GA_Options(num_of_generations=25, data_size=50000)
|
ga_opts = GA_Options(num_of_generations=50, data_size=200000, root_precision=3)
|
||||||
|
|
||||||
roots = quadratic_func.get_real_roots(ga_opts, use_cuda=True)
|
roots = quadratic_func.get_real_roots(ga_opts, use_cuda=True)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user