Added CUDA Acceleration with thrust library

This commit is contained in:
2025-04-09 18:31:27 -04:00
parent 6a6c9c9131
commit 9dc1151385
7 changed files with 138 additions and 284 deletions

View File

@ -68,7 +68,7 @@ PROJECT_LOGO =
# entered, it will be relative to the location where doxygen was started. If
# left blank the current directory will be used.
OUTPUT_DIRECTORY = G:/Dev/Exponential/docs
OUTPUT_DIRECTORY = G:/repos/Exponential/docs
# If the CREATE_SUBDIRS tag is set to YES then doxygen will create up to 4096
# sub-directories (in 2 levels) under the output directory of each output format

View File

@ -1,31 +1,25 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 17
VisualStudioVersion = 17.3.32929.385
VisualStudioVersion = 17.13.35919.96 d17.13
MinimumVisualStudioVersion = 10.0.40219.1
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Exponential", "Exponential\Exponential.vcxproj", "{74C04891-9509-4EA4-BC52-6D86492B203A}"
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Exponential", "Exponential\Exponential.vcxproj", "{2830B1BE-546C-E378-6C77-93E4E7132BE8}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|x64 = Debug|x64
Debug|x86 = Debug|x86
Release|x64 = Release|x64
Release|x86 = Release|x86
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{74C04891-9509-4EA4-BC52-6D86492B203A}.Debug|x64.ActiveCfg = Debug|x64
{74C04891-9509-4EA4-BC52-6D86492B203A}.Debug|x64.Build.0 = Debug|x64
{74C04891-9509-4EA4-BC52-6D86492B203A}.Debug|x86.ActiveCfg = Debug|Win32
{74C04891-9509-4EA4-BC52-6D86492B203A}.Debug|x86.Build.0 = Debug|Win32
{74C04891-9509-4EA4-BC52-6D86492B203A}.Release|x64.ActiveCfg = Release|x64
{74C04891-9509-4EA4-BC52-6D86492B203A}.Release|x64.Build.0 = Release|x64
{74C04891-9509-4EA4-BC52-6D86492B203A}.Release|x86.ActiveCfg = Release|Win32
{74C04891-9509-4EA4-BC52-6D86492B203A}.Release|x86.Build.0 = Release|Win32
{2830B1BE-546C-E378-6C77-93E4E7132BE8}.Debug|x64.ActiveCfg = Debug|x64
{2830B1BE-546C-E378-6C77-93E4E7132BE8}.Debug|x64.Build.0 = Debug|x64
{2830B1BE-546C-E378-6C77-93E4E7132BE8}.Release|x64.ActiveCfg = Release|x64
{2830B1BE-546C-E378-6C77-93E4E7132BE8}.Release|x64.Build.0 = Release|x64
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {C33279A4-898B-46DE-9C0E-87EE72B702AF}
SolutionGuid = {7D6F270A-75F4-460B-A7D5-E224EBA791ED}
EndGlobalSection
EndGlobal

View File

@ -11,6 +11,33 @@
#include <exception>
#include <type_traits>
#ifdef USE_CUDA_ACCELERATION
#include <cmath>
#include <cfloat>
#include <cuda.h>
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
#include <thrust/sort.h>
#include <thrust/reverse.h>
#include <thrust/execution_policy.h>
__global__ void Fitness(int lrgst_expo, int64_t* constants, int sizeOfCons, double* x_vals, double* ranks, int sizeOfSols, double y_val)
{
int idx = threadIdx.x + blockIdx.x * blockDim.x;
if (idx < sizeOfSols)
{
double ans = 0;
for (int i = lrgst_expo; i >= 0; i--)
ans += constants[i] * pow(x_vals[idx], (lrgst_expo - i));
ans -= y_val;
ranks[idx] = (ans == 0) ? DBL_MAX : fabs(1 / ans);
}
}
#endif
namespace JRAMPERSAD
{
namespace EXPONENTIAL
@ -66,6 +93,8 @@ namespace JRAMPERSAD
return res;
}
#ifndef USE_CUDA_ACCELERATION
// Genetic Algorithm helper struct
struct GA_Solution
{
@ -86,6 +115,7 @@ namespace JRAMPERSAD
rank = (ans == 0) ? DBL_MAX : ABS(1 / ans);
}
};
#endif
}
using namespace detail;
@ -504,6 +534,7 @@ namespace JRAMPERSAD
return ans;
}
#ifndef USE_CUDA_ACCELERATION
inline std::vector<double> Function::solve_x(const double& y_val, const GA_Options& options) const
{
try
@ -522,12 +553,12 @@ namespace JRAMPERSAD
solutions.resize(options.data_size);
for (unsigned int i = 0; i < options.sample_size; i++)
solutions[i] = (GA_Solution{lrgst_expo, 0, unif(device), y_val});
solutions[i] = (GA_Solution{ lrgst_expo, 0, unif(device), y_val });
for (unsigned int count = 0; count < options.num_of_generations; count++)
{
std::generate(std::execution::par, solutions.begin() + options.sample_size, solutions.end(), [this, &unif, &device, &y_val]() {
return GA_Solution{lrgst_expo, 0, unif(device), y_val};
return GA_Solution{ lrgst_expo, 0, unif(device), y_val };
});
@ -589,6 +620,67 @@ namespace JRAMPERSAD
}
return ans;
}
#else
std::vector<double> Function::solve_x(const double& y_val, const GA_Options& options) const
{
// Create initial random solutions
std::random_device device;
std::uniform_real_distribution<double> unif(options.min_range, options.max_range);
int64_t* cons = new int64_t[constants.size()];
int64_t* d_cons;
for (int i = 0; i < constants.size(); i++)
cons[i] = constants[i];
cudaMalloc(&d_cons, sizeof(int64_t) * constants.size());
cudaMemcpy(d_cons, cons, sizeof(int64_t) * constants.size(), cudaMemcpyHostToDevice);
thrust::host_vector<double> xVals(options.data_size);
thrust::device_vector<double> d_xVals(options.data_size);
thrust::device_vector<double> d_ranks(options.data_size);
for (unsigned int i = 0; i < options.sample_size; i++)
{
xVals[i] = unif(device);
}
for (unsigned int count = 0; count < options.num_of_generations; count++)
{
for (unsigned int i = options.sample_size; i < options.data_size; i++)
{
xVals[i] = unif(device);
}
d_xVals = xVals;
Fitness << <(options.data_size / 8192) + 1, 512 >> > (lrgst_expo, d_cons, (int)(constants.size()), d_xVals, d_ranks, options.data_size, y_val);
thrust::sort_by_key(thrust::device, d_ranks.begin(), d_ranks.end(), d_xVals.begin());
thrust::reverse(d_xVals.begin(), d_xVals.end());
xVals = d_xVals;
if (count + 1 == options.num_of_generations)
{
break;
}
std::uniform_real_distribution<double> m((1 - options.mutation_percentage), (1 + options.mutation_percentage));
auto x_begin = &xVals[0];
auto x_end = &xVals[options.sample_size - 1];
std::for_each(x_begin, x_end, [&m, &device](auto& v) {
v *= m(device);
});
}
std::vector<double> ans;
for (unsigned int i = 0; i < options.sample_size; i++)
ans.push_back(xVals[i]);
delete[] cons;
cudaFree(d_cons);
return ans;
}
#endif
}
}

View File

@ -1,14 +1,6 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
@ -18,50 +10,31 @@
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<ItemGroup>
<ClInclude Include="Exponential.cuh" />
</ItemGroup>
<PropertyGroup Label="Globals">
<VCProjectVersion>16.0</VCProjectVersion>
<Keyword>Win32Proj</Keyword>
<ProjectGuid>{74c04891-9509-4ea4-bc52-6d86492b203a}</ProjectGuid>
<ProjectGuid>{2830B1BE-546C-E378-6C77-93E4E7132BE8}</ProjectGuid>
<RootNamespace>Exponential</RootNamespace>
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v143</PlatformToolset>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v143</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<CharacterSet>MultiByte</CharacterSet>
<PlatformToolset>v143</PlatformToolset>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v143</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>Unicode</CharacterSet>
<CharacterSet>MultiByte</CharacterSet>
<PlatformToolset>v143</PlatformToolset>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
</ImportGroup>
<ImportGroup Label="Shared">
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 12.6.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
@ -70,78 +43,47 @@
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<GenerateDebugInformation>true</GenerateDebugInformation>
</Link>
</ItemDefinitionGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<LinkIncremental>true</LinkIncremental>
</PropertyGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
<LanguageStandard>stdcpp20</LanguageStandard>
<GenerateXMLDocumentationFiles>false</GenerateXMLDocumentationFiles>
<XMLDocumentationFileName>$(SolutionDir)</XMLDocumentationFileName>
<AssemblerOutput>NoListing</AssemblerOutput>
<Optimization>Disabled</Optimization>
<PreprocessorDefinitions>WIN32;WIN64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<SubSystem>Console</SubSystem>
<AdditionalDependencies>cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
<CudaCompile>
<TargetMachinePlatform>64</TargetMachinePlatform>
</CudaCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
<LanguageStandard>stdcpp20</LanguageStandard>
<GenerateXMLDocumentationFiles>false</GenerateXMLDocumentationFiles>
<XMLDocumentationFileName>$(SolutionDir)</XMLDocumentationFileName>
<AssemblerOutput>NoListing</AssemblerOutput>
<PreprocessorDefinitions>WIN32;WIN64;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<LanguageStandard>stdcpp17</LanguageStandard>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<GenerateDebugInformation>true</GenerateDebugInformation>
<SubSystem>Console</SubSystem>
<AdditionalDependencies>cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
<CudaCompile>
<TargetMachinePlatform>64</TargetMachinePlatform>
<CodeGeneration>compute_89,sm_89</CodeGeneration>
</CudaCompile>
</ItemDefinitionGroup>
<ItemGroup>
<ClInclude Include="Exponential.h" />
<ClInclude Include="Timer.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="Source.cpp" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 12.6.targets" />
</ImportGroup>
</Project>

View File

@ -1,30 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<Filter Include="Source Files">
<UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
<Extensions>cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
</Filter>
<Filter Include="Header Files">
<UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
<Extensions>h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd</Extensions>
</Filter>
<Filter Include="Resource Files">
<UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
<Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
</Filter>
</ItemGroup>
<ItemGroup>
<ClInclude Include="Exponential.h">
<Filter>Source Files</Filter>
</ClInclude>
<ClInclude Include="Timer.h">
<Filter>Source Files</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="Source.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
</Project>

View File

@ -1,102 +0,0 @@
#include <iostream>
#include <chrono>
#include <thread>
#include <mutex>
#include "Exponential.h"
#include "Timer.h"
using namespace JRAMPERSAD;
using EXPONENTIAL::Function;
typedef TIMER::Timer timer;
void CalcRoots(std::mutex& m, const Function& func, EXPONENTIAL::GA_Options options)
{
m.lock();
std::cout << "Starting calculation...\n";
m.unlock();
timer t;
auto gr = func.get_real_roots(options);
t.SetEnd();
m.lock();
std::cout << "Time took to calculate approx root values: " << t.GetTimeInS() << "s\n";
std::cout << "Approximate values of x where y = 0 are: \n";
std::for_each(gr.begin(), gr.end(),
[](const auto& val) {
std::cout << "x:" << val << '\n';
});
m.unlock();
}
void SolveX(std::mutex& m, const Function& func, EXPONENTIAL::GA_Options options, const double& y)
{
timer t;
auto res = func.solve_x(y, options);
t.SetEnd();
m.lock();
std::cout << "Time took to calculate approx x values: " << t.GetTimeInS() << "s\n";
std::cout << "Approximate values of x where y = " << y << " are: \n";
std::for_each(res.begin(), res.end(),
[](const auto& val) {
std::cout << "x:" << val << '\n';
});
m.unlock();
}
int main()
{
std::vector<int64_t> vec{ 1, 5, 4 };
Function f{2};
INITIALIZE_EXPO_FUNCTION(f, vec);
Function g{3};
INITIALIZE_EXPO_FUNCTION(g, { 1, -6, 11, -6 });
EXPONENTIAL::GA_Options options;
options.mutation_percentage = 0.005;
options.num_of_generations = 1;
options.sample_size = 1;
options.data_size = 2;
options.min_range = 0.13;
options.max_range = 0.14;
auto res = (f + g).get_real_roots(options);
std::for_each(res.begin(), res.end(),
[](const auto& val) {
std::cout << "x:" << val << '\n';
});
std::cout << (f + g) << " when x = 0.13056\n" << (f + g).solve_y(0.13056);
std::mutex m;
//std::thread th(CalcRoots, std::ref(m), std::cref(g), options);
//std::thread th1(SolveX, std::ref(m), std::cref(g), options, 5);
//std::thread th2(SolveX, std::ref(m), std::cref(g), options, 23);
//CalcRoots(m, g);
m.lock();
//std::cout << g << " when x = 4.961015\n" << "y = " << g.solve_y(4.961015) << "\n\n";
//std::cout << g << " when x = 4.30891\n" << "y = " << g.solve_y(4.30891) << "\n\n";
//std::cout << g << " when x = 2\n" << "y = " << g.solve_y(2) << "\n\n";
//std::cout << g << " when x = 3\n" << "y = " << g.solve_y(3) << "\n\n";
//std::cout << "Median: " << MATH::MEDIAN(gr) << '\n';
//std::cout << "Mean: " << MATH::MEAN(gr) << '\n';
//std::cout << "Calculating Roots for function f(x) = " << g << '\n';
//std::cout << "The y-intercept of the function f(x) is " << g.solve_y(0) << '\n';
//std::cout << "dy/dx of f(x) is " << g.differential() << '\n';
//std::cout << "f(x) = " << f << std::endl;
//std::cout << "g(x) = " << g << std::endl;
//std::cout << "f(x) + g(x) = " << f + g << std::endl;
m.unlock();
//th.join();
//th1.join();
//th2.join();
return 0;
}

View File

@ -1,42 +0,0 @@
#pragma once
#include <ostream>
#include <chrono>
namespace TIMER{
struct Timer
{
std::chrono::time_point<std::chrono::steady_clock> start, end;
std::chrono::duration<float> duration;
Timer()
{
Reset();
}
~Timer()
{
}
inline void Reset() noexcept
{
start = std::chrono::high_resolution_clock::now();
}
void SetEnd() noexcept
{
end = std::chrono::high_resolution_clock::now();
duration = end - start;
}
inline float GetTimeInMS() const noexcept
{
return float(duration.count() * 1000.f);
}
inline float GetTimeInS() const noexcept
{
return float(duration.count());
}
};
}