feat(ga): Implement quality filtering and precision-based clustering
The previous GA logic was returning the "top N" solutions, which led to test failures when the algorithm correctly converged on only one of all possible roots (e.g., returning 1000 variations of -1.0).
This commit fixes the root-finding logic to correctly identify and return *all* unique, high-quality roots:
1. **feat(api):** Adds `root_precision` to `GA_Options`. This new parameter (default: 5) allows the user to control the number of decimal places for clustering unique roots.
2. **fix(ga):** Replaces the flawed "top N" logic in both `_solve_x_numpy` and `_solve_x_cuda`. The new process is:
* Dynamically sets a `quality_threshold` based on the user's `root_precision` (e.g., `precision=5` requires a rank > `1e6`).
* Filters the *entire* final population for all solutions that meet this quality threshold.
* Rounds these high-quality solutions to `root_precision`.
* Returns only the `np.unique()` results.
This ensures the solver returns all distinct roots that meet the accuracy requirements, rather than just the top N variations of a single root.
This commit is contained in:
@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
|
||||
[project]
|
||||
# --- Core Metadata ---
|
||||
name = "polysolve"
|
||||
version = "0.4.2"
|
||||
version = "0.5.0"
|
||||
authors = [
|
||||
{ name="Jonathan Rampersad", email="jonathan@jono-rams.work" },
|
||||
]
|
||||
|
||||
@@ -49,8 +49,6 @@ class GA_Options:
|
||||
Default: 100.0
|
||||
num_of_generations (int): The number of iterations the algorithm will run.
|
||||
Default: 10
|
||||
sample_size (int): The number of top solutions to *return* at the end.
|
||||
Default: 1000
|
||||
data_size (int): The total number of solutions (population size)
|
||||
generated in each generation. Default: 100000
|
||||
mutation_strength (float): The percentage (e.g., 0.01 for 1%) by which
|
||||
@@ -64,16 +62,21 @@ class GA_Options:
|
||||
mutation_ratio (float): The percentage (e.g., 0.40 for 40%) of the next
|
||||
generation to be created by mutating solutions
|
||||
from the parent pool. Default: 0.40
|
||||
root_precision (int): The number of decimal places to round roots to
|
||||
when clustering. A smaller number (e.g., 3)
|
||||
groups roots more aggressively. A larger number
|
||||
(e.g., 7) is more precise but may return
|
||||
multiple near-identical roots. Default: 5
|
||||
"""
|
||||
min_range: float = -100.0
|
||||
max_range: float = 100.0
|
||||
num_of_generations: int = 10
|
||||
sample_size: int = 1000
|
||||
data_size: int = 100000
|
||||
mutation_strength: float = 0.01
|
||||
elite_ratio: float = 0.05
|
||||
crossover_ratio: float = 0.45
|
||||
mutation_ratio: float = 0.40
|
||||
root_precision: int = 5
|
||||
|
||||
def __post_init__(self):
|
||||
"""Validates the GA options after initialization."""
|
||||
@@ -84,11 +87,6 @@ class GA_Options:
|
||||
)
|
||||
if any(r < 0 for r in [self.elite_ratio, self.crossover_ratio, self.mutation_ratio]):
|
||||
raise ValueError("GA ratios cannot be negative.")
|
||||
if self.data_size < self.sample_size:
|
||||
warnings.warn(
|
||||
f"data_size ({self.data_size}) is less than sample_size ({self.sample_size}). "
|
||||
"The number of returned solutions will be limited to data_size."
|
||||
)
|
||||
|
||||
def _get_cauchy_bound(coeffs: np.ndarray) -> float:
|
||||
"""
|
||||
@@ -380,12 +378,26 @@ class Function:
|
||||
error = y_calculated - y_val
|
||||
with np.errstate(divide='ignore'):
|
||||
ranks = np.where(error == 0, np.finfo(float).max, np.abs(1.0 / error))
|
||||
sorted_indices = np.argsort(-ranks)
|
||||
|
||||
# Get the top 'sample_size' solutions the user asked for
|
||||
best_solutions = solutions[sorted_indices][:options.sample_size]
|
||||
# 1. Define quality based on the user's desired precision
|
||||
# (e.g., precision=5 -> rank > 1e6, precision=8 -> rank > 1e9)
|
||||
# We add +1 for a buffer, ensuring we only get high-quality roots.
|
||||
quality_threshold = 10**(options.root_precision + 1)
|
||||
|
||||
# 2. Get all solutions that meet this quality threshold
|
||||
high_quality_solutions = solutions[ranks > quality_threshold]
|
||||
|
||||
if high_quality_solutions.size == 0:
|
||||
# No roots found that meet the quality, return empty
|
||||
return np.array([])
|
||||
|
||||
return np.sort(best_solutions)
|
||||
# 3. Cluster these high-quality solutions by rounding
|
||||
rounded_solutions = np.round(high_quality_solutions, options.root_precision)
|
||||
|
||||
# 4. Return only the unique roots
|
||||
unique_roots = np.unique(rounded_solutions)
|
||||
|
||||
return np.sort(unique_roots)
|
||||
|
||||
def _solve_x_cuda(self, y_val: float, options: GA_Options) -> np.ndarray:
|
||||
"""Genetic algorithm implementation using CuPy (GPU/CUDA)."""
|
||||
@@ -490,13 +502,26 @@ class Function:
|
||||
(blocks_per_grid,), (threads_per_block,),
|
||||
(d_coefficients, d_coefficients.size, d_solutions, d_ranks, d_solutions.size, y_val)
|
||||
)
|
||||
sorted_indices = cupy.argsort(-d_ranks)
|
||||
|
||||
# Get the top 'sample_size' solutions
|
||||
d_best_solutions = d_solutions[sorted_indices][:options.sample_size]
|
||||
# 1. Define quality based on the user's desired precision
|
||||
# (e.g., precision=5 -> rank > 1e6, precision=8 -> rank > 1e9)
|
||||
# We add +1 for a buffer, ensuring we only get high-quality roots.
|
||||
quality_threshold = 10**(options.root_precision + 1)
|
||||
|
||||
# 2. Get all solutions that meet this quality threshold
|
||||
d_high_quality_solutions = d_solutions[d_ranks > quality_threshold]
|
||||
|
||||
# Get the final sample, sort it, and copy back to CPU
|
||||
final_solutions_gpu = cupy.sort(d_best_solutions)
|
||||
if d_high_quality_solutions.size == 0:
|
||||
return np.array([])
|
||||
|
||||
# 3. Cluster these high-quality solutions on the GPU by rounding
|
||||
d_rounded_solutions = cupy.round(d_high_quality_solutions, options.root_precision)
|
||||
|
||||
# 4. Get only the unique roots
|
||||
d_unique_roots = cupy.unique(d_rounded_solutions)
|
||||
|
||||
# Sort the unique roots and copy back to CPU
|
||||
final_solutions_gpu = cupy.sort(d_unique_roots)
|
||||
return final_solutions_gpu.get()
|
||||
|
||||
|
||||
@@ -692,7 +717,7 @@ if __name__ == '__main__':
|
||||
print(f"Analytic roots of f1: {roots_analytic}") # Expected: -1, 2.5
|
||||
|
||||
# 2. Genetic algorithm solution
|
||||
ga_opts = GA_Options(num_of_generations=20, data_size=50000, sample_size=10)
|
||||
ga_opts = GA_Options(num_of_generations=20, data_size=50000)
|
||||
print("\nFinding roots with Genetic Algorithm (CPU)...")
|
||||
roots_ga_cpu = f1.get_real_roots(ga_opts)
|
||||
print(f"Approximate roots from GA (CPU): {roots_ga_cpu}")
|
||||
|
||||
Reference in New Issue
Block a user