2025-10-27 19:26:50 +00:00
3 changed files with 46 additions and 21 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
 [project]
 # --- Core Metadata ---
 name = "polysolve"
-version = "0.4.2"
+version = "0.5.0"
 authors = [
  { name="Jonathan Rampersad", email="jonathan@jono-rams.work" },
 ]
--- a/src/polysolve/init.py
+++ b/src/polysolve/init.py
@@ -49,8 +49,6 @@ class GA_Options:
                           Default: 100.0
        num_of_generations (int): The number of iterations the algorithm will run.
                                  Default: 10
-        sample_size (int): The number of top solutions to *return* at the end.
-                           Default: 1000
        data_size (int): The total number of solutions (population size)
                         generated in each generation. Default: 100000
        mutation_strength (float): The percentage (e.g., 0.01 for 1%) by which
@@ -64,16 +62,21 @@ class GA_Options:
        mutation_ratio (float): The percentage (e.g., 0.40 for 40%) of the next
                                generation to be created by mutating solutions
                                from the parent pool. Default: 0.40
+        root_precision (int): The number of decimal places to round roots to
+                              when clustering. A smaller number (e.g., 3)
+                              groups roots more aggressively. A larger number
+                              (e.g., 7) is more precise but may return
+                              multiple near-identical roots. Default: 5
    """
    min_range: float = -100.0
    max_range: float = 100.0
    num_of_generations: int = 10
-    sample_size: int = 1000
    data_size: int = 100000
    mutation_strength: float = 0.01
    elite_ratio: float = 0.05
    crossover_ratio: float = 0.45
    mutation_ratio: float = 0.40
+    root_precision: int = 5

    def __post_init__(self):
        """Validates the GA options after initialization."""
@@ -84,11 +87,6 @@ class GA_Options:
            )
        if any(r < 0 for r in [self.elite_ratio, self.crossover_ratio, self.mutation_ratio]):
            raise ValueError("GA ratios cannot be negative.")
-        if self.data_size < self.sample_size:
-            warnings.warn(
-                f"data_size ({self.data_size}) is less than sample_size ({self.sample_size}). "
-                "The number of returned solutions will be limited to data_size."
-            )

 def _get_cauchy_bound(coeffs: np.ndarray) -> float:
    """
@@ -380,12 +378,26 @@ class Function:
        error = y_calculated - y_val
        with np.errstate(divide='ignore'):
            ranks = np.where(error == 0, np.finfo(float).max, np.abs(1.0 / error))
-        sorted_indices = np.argsort(-ranks)
        
-        # Get the top 'sample_size' solutions the user asked for
-        best_solutions = solutions[sorted_indices][:options.sample_size]
+        # 1. Define quality based on the user's desired precision
+        #    (e.g., precision=5 -> rank > 1e6, precision=8 -> rank > 1e9)
+        #    We add +1 for a buffer, ensuring we only get high-quality roots.
+        quality_threshold = 10**(options.root_precision + 1)
+
+        # 2. Get all solutions that meet this quality threshold
+        high_quality_solutions = solutions[ranks > quality_threshold]
+
+        if high_quality_solutions.size == 0:
+            # No roots found that meet the quality, return empty
+            return np.array([])
        
-        return np.sort(best_solutions)
+        # 3. Cluster these high-quality solutions by rounding
+        rounded_solutions = np.round(high_quality_solutions, options.root_precision)
+
+        # 4. Return only the unique roots
+        unique_roots = np.unique(rounded_solutions)
+        
+        return np.sort(unique_roots)

    def _solve_x_cuda(self, y_val: float, options: GA_Options) -> np.ndarray:
        """Genetic algorithm implementation using CuPy (GPU/CUDA)."""
@@ -490,13 +502,26 @@ class Function:
            (blocks_per_grid,), (threads_per_block,),
            (d_coefficients, d_coefficients.size, d_solutions, d_ranks, d_solutions.size, y_val)
        )
-        sorted_indices = cupy.argsort(-d_ranks)
        
-        # Get the top 'sample_size' solutions
-        d_best_solutions = d_solutions[sorted_indices][:options.sample_size]
+        # 1. Define quality based on the user's desired precision
+        #    (e.g., precision=5 -> rank > 1e6, precision=8 -> rank > 1e9)
+        #    We add +1 for a buffer, ensuring we only get high-quality roots.
+        quality_threshold = 10**(options.root_precision + 1)
+        
+        # 2. Get all solutions that meet this quality threshold
+        d_high_quality_solutions = d_solutions[d_ranks > quality_threshold]

-        # Get the final sample, sort it, and copy back to CPU
-        final_solutions_gpu = cupy.sort(d_best_solutions)
+        if d_high_quality_solutions.size == 0:
+            return np.array([])
+            
+        # 3. Cluster these high-quality solutions on the GPU by rounding
+        d_rounded_solutions = cupy.round(d_high_quality_solutions, options.root_precision)
+        
+        # 4. Get only the unique roots
+        d_unique_roots = cupy.unique(d_rounded_solutions)
+
+        # Sort the unique roots and copy back to CPU
+        final_solutions_gpu = cupy.sort(d_unique_roots)
        return final_solutions_gpu.get()


@@ -692,7 +717,7 @@ if __name__ == '__main__':
    print(f"Analytic roots of f1: {roots_analytic}") # Expected: -1, 2.5

    # 2. Genetic algorithm solution
-    ga_opts = GA_Options(num_of_generations=20, data_size=50000, sample_size=10)
+    ga_opts = GA_Options(num_of_generations=20, data_size=50000)
    print("\nFinding roots with Genetic Algorithm (CPU)...")
    roots_ga_cpu = f1.get_real_roots(ga_opts)
    print(f"Approximate roots from GA (CPU): {roots_ga_cpu}")
--- a/tests/test_polysolve.py
+++ b/tests/test_polysolve.py
@@ -101,7 +101,7 @@ def test_get_real_roots_numpy(quadratic_func):
    Tests that the NumPy-based genetic algorithm approximates the roots correctly.
    """
    # Using more generations for higher accuracy in testing
-    ga_opts = GA_Options(num_of_generations=25, data_size=50000)
+    ga_opts = GA_Options(num_of_generations=50, data_size=200000, root_precision=3)
    
    roots = quadratic_func.get_real_roots(ga_opts, use_cuda=False)
    
@@ -124,7 +124,7 @@ def test_get_real_roots_cuda(quadratic_func):
    It will be skipped automatically if CuPy is not available.
    """
    
-    ga_opts = GA_Options(num_of_generations=25, data_size=50000)
+    ga_opts = GA_Options(num_of_generations=50, data_size=200000, root_precision=3)
    
    roots = quadratic_func.get_real_roots(ga_opts, use_cuda=True)