diff --git a/.github/workflows/publish_site.yml b/.github/workflows/publish_site.yml
index 21b8cc44..2bc3b185 100644
--- a/.github/workflows/publish_site.yml
+++ b/.github/workflows/publish_site.yml
@@ -12,6 +12,14 @@ jobs:
steps:
- uses: actions/checkout@v4
+ - name: Setup .NET
+ uses: actions/setup-dotnet@v3
+ with:
+ dotnet-version: 8.0.x
+
+ - name: Build Templates
+ run: make build
+
- name: Install Doxygen
run: sudo apt-get install doxygen graphviz -y
shell: bash
@@ -28,4 +36,4 @@ jobs:
uses: JamesIves/github-pages-deploy-action@v4
with:
token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
- folder: docs/html
\ No newline at end of file
+ folder: docs/html
diff --git a/.gitignore b/.gitignore
index 25fb8c99..ac1e66dc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,6 +9,8 @@ docs/*
.vscode
*.opencover.xml
*.sln
+AcceleratorHandler.cs
+Gpu.cs
ProcessedREADME.md
# User-specific files
diff --git a/DotMP-Tests/ParallelTests.cs b/DotMP-Tests/CPUTests.cs
similarity index 97%
rename from DotMP-Tests/ParallelTests.cs
rename to DotMP-Tests/CPUTests.cs
index 4f38d367..038ff4ba 100644
--- a/DotMP-Tests/ParallelTests.cs
+++ b/DotMP-Tests/CPUTests.cs
@@ -28,9 +28,9 @@
namespace DotMPTests
{
///
- /// Tests for the DotMP library.
+ /// CPU tests for the DotMP library.
///
- public class ParallelTests
+ public class CPUTests
{
private readonly ITestOutputHelper output;
@@ -38,7 +38,7 @@ public class ParallelTests
/// Constructor to write output.
///
/// Output object.
- public ParallelTests(ITestOutputHelper output)
+ public CPUTests(ITestOutputHelper output)
{
this.output = output;
}
@@ -522,7 +522,7 @@ public void Critical_works()
DotMP.Parallel.ParallelRegion(num_threads: threads, action: () =>
{
for (int i = 0; i < iters; i++)
- DotMP.Parallel.Critical(0, () => ++total);
+ DotMP.Parallel.Critical(() => ++total);
});
total.Should().Be((int)threads * iters);
@@ -531,14 +531,13 @@ public void Critical_works()
DotMP.Parallel.ParallelRegion(num_threads: 4, action: () =>
{
- if (DotMP.Parallel.GetThreadNum() == 0) DotMP.Parallel.Critical(0, () => Thread.Sleep(1000));
- if (DotMP.Parallel.GetThreadNum() == 1) DotMP.Parallel.Critical(1, () => Thread.Sleep(1000));
- if (DotMP.Parallel.GetThreadNum() == 2) DotMP.Parallel.Critical(0, () => Thread.Sleep(1000));
- if (DotMP.Parallel.GetThreadNum() == 3) DotMP.Parallel.Critical(1, () => Thread.Sleep(1000));
+ if (DotMP.Parallel.GetThreadNum() % 2 == 0) DotMP.Parallel.Critical(() => Thread.Sleep(1000));
+ if (DotMP.Parallel.GetThreadNum() % 2 == 1) DotMP.Parallel.Critical(() => Thread.Sleep(1000));
});
double elapsed = DotMP.Parallel.GetWTime() - start;
- elapsed.Should().BeLessThan(2200);
+ elapsed.Should().BeLessThan(2.2);
+ elapsed.Should().BeGreaterThan(2.0);
}
///
@@ -571,7 +570,7 @@ public void Single_works()
{
for (int i = 0; i < 10; i++)
{
- DotMP.Parallel.Single(0, () => DotMP.Atomic.Inc(ref total));
+ DotMP.Parallel.Single(() => DotMP.Atomic.Inc(ref total));
}
});
@@ -583,7 +582,7 @@ public void Single_works()
{
for (int i = 0; i < 10; i++)
{
- DotMP.Parallel.Single(0, () => DotMP.Atomic.Inc(ref total));
+ DotMP.Parallel.Single(() => DotMP.Atomic.Inc(ref total));
}
});
@@ -749,7 +748,7 @@ public void Ordered_works()
DotMP.Parallel.ParallelFor(0, 1024, schedule: DotMP.Schedule.Static,
num_threads: threads, action: i =>
{
- DotMP.Parallel.Ordered(0, () =>
+ DotMP.Parallel.Ordered(() =>
{
incrementing[i] = ctr++;
});
@@ -1111,7 +1110,7 @@ public void Tasking_works()
DotMP.Parallel.ParallelRegion(num_threads: threads, action: () =>
{
- DotMP.Parallel.Single(0, () =>
+ DotMP.Parallel.Single(() =>
{
for (int i = 0; i < threads * 2; i++)
{
@@ -1139,7 +1138,7 @@ public void Tasking_works()
DotMP.Parallel.ParallelRegion(num_threads: threads, action: () =>
{
- DotMP.Parallel.Single(0, () =>
+ DotMP.Parallel.Single(() =>
{
for (int i = 0; i < tasks_to_spawn; i++)
{
@@ -1199,7 +1198,7 @@ public void Nested_tasks_work()
DotMP.Parallel.ParallelRegion(num_threads: threads, action: () =>
{
- DotMP.Parallel.Single(0, () =>
+ DotMP.Parallel.Single(() =>
{
DotMP.Parallel.Task(() =>
{
@@ -1369,7 +1368,7 @@ public void Non_parallel_single_should_except()
{
Assert.Throws(() =>
{
- DotMP.Parallel.Single(0, () => { });
+ DotMP.Parallel.Single(() => { });
});
}
@@ -1381,7 +1380,7 @@ public void Non_parallel_critical_should_except()
{
Assert.Throws(() =>
{
- DotMP.Parallel.Critical(0, () => { });
+ DotMP.Parallel.Critical(() => { });
});
}
@@ -1395,7 +1394,7 @@ public void Nested_worksharing_should_except()
{
DotMP.Parallel.ParallelFor(0, 10, num_threads: 4, action: i =>
{
- DotMP.Parallel.Single(0, () => { });
+ DotMP.Parallel.Single(() => { });
});
});
@@ -1403,7 +1402,7 @@ public void Nested_worksharing_should_except()
{
DotMP.Parallel.ParallelRegion(num_threads: 4, action: () =>
{
- DotMP.Parallel.Single(0, () =>
+ DotMP.Parallel.Single(() =>
{
DotMP.Parallel.For(0, 10, action: i => { });
});
@@ -1427,7 +1426,7 @@ public void Non_for_ordered_should_except()
{
Assert.Throws(() =>
{
- DotMP.Parallel.Ordered(0, () => { });
+ DotMP.Parallel.Ordered(() => { });
});
}
diff --git a/DotMP-Tests/GPUTests.cs b/DotMP-Tests/GPUTests.cs
new file mode 100644
index 00000000..7da2b446
--- /dev/null
+++ b/DotMP-Tests/GPUTests.cs
@@ -0,0 +1,136 @@
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+using System.Text.Json.Serialization;
+using System.Threading;
+using DotMP;
+using DotMP.GPU;
+using FluentAssertions;
+using Xunit;
+using Xunit.Abstractions;
+
+
+namespace DotMPTests
+{
+ ///
+ /// CPU tests for the DotMP library.
+ ///
+ public class GPUTests
+ {
+ ///
+ /// Tests to make sure that for loops work in GPU kernels.
+ ///
+ [Fact]
+ public void GPU_for_works()
+ {
+ double[] a = new double[50000];
+ double[] x = new double[50000];
+ double[] y = new double[50000];
+ float[] res = new float[50000];
+ float[] res_cpu = new float[50000];
+
+ random_init(a);
+ random_init(x);
+ random_init(y);
+
+ {
+ using var a_gpu = new DotMP.GPU.Buffer(a, DotMP.GPU.Buffer.Behavior.To);
+ using var x_gpu = new DotMP.GPU.Buffer(x, DotMP.GPU.Buffer.Behavior.To);
+ using var y_gpu = new DotMP.GPU.Buffer(y, DotMP.GPU.Buffer.Behavior.To);
+ using var res_gpu = new DotMP.GPU.Buffer(res, DotMP.GPU.Buffer.Behavior.From);
+
+ DotMP.GPU.Parallel.ParallelFor(0, a.Length, a_gpu, x_gpu, y_gpu, res_gpu,
+ (i, a, x, y, res) =>
+ {
+ res[i] = (float)(a[i] * x[i] + y[i]);
+ });
+ }
+
+ for (int i = 0; i < a.Length; i++)
+ {
+ res_cpu[i] = (float)(a[i] * x[i] + y[i]);
+ }
+
+ Assert.Equal(res_cpu, res);
+
+ double[] a_old = a.Select(a => a).ToArray();
+
+ using (var a_gpu = new DotMP.GPU.Buffer(a, DotMP.GPU.Buffer.Behavior.ToFrom))
+ {
+ DotMP.GPU.Parallel.ParallelFor(0, a.Length, a_gpu, (i, a) =>
+ {
+ a[i]++;
+ });
+ }
+
+ for (int i = 0; i < a.Length; i++)
+ {
+ a_old[i]++;
+ }
+
+ Assert.Equal(a, a_old);
+ }
+
+ ///
+ /// Tests to make sure that DotMP.GPU.Parallel.ForCollapse produces correct results.
+ ///
+ [Fact]
+ public void Collapse_works()
+ {
+ int[,] iters_hit = new int[1024, 1024];
+
+ using (var buf = new Buffer(iters_hit, DotMP.GPU.Buffer.Behavior.ToFrom))
+ {
+ DotMP.GPU.Parallel.ParallelForCollapse((258, 512), (512, 600), buf, (i, j, iters_hit) =>
+ {
+ iters_hit[i, j]++;
+ });
+ }
+
+ for (int i = 0; i < 1024; i++)
+ for (int j = 0; j < 1024; j++)
+ if (i >= 258 && i < 512 && j >= 512 && j < 600)
+ iters_hit[i, j].Should().Be(1);
+ else
+ iters_hit[i, j].Should().Be(0);
+
+ iters_hit = null;
+
+ int[,,] iters_hit_3 = new int[128, 128, 64];
+
+ using (var buf = new Buffer(iters_hit_3, DotMP.GPU.Buffer.Behavior.ToFrom))
+ {
+ DotMP.GPU.Parallel.ParallelForCollapse((35, 64), (16, 100), (10, 62), buf, action: (i, j, k, iters_hit_3) =>
+ {
+ iters_hit_3[i, j, k]++;
+ });
+ }
+
+ for (int i = 0; i < 128; i++)
+ for (int j = 0; j < 128; j++)
+ for (int k = 0; k < 64; k++)
+ if (i >= 35 && i < 64 && j >= 16 && j < 100 && k >= 10 && k < 62)
+ iters_hit_3[i, j, k].Should().Be(1);
+ else
+ iters_hit_3[i, j, k].Should().Be(0);
+
+ iters_hit_3 = null;
+ }
+
+ ///
+ /// Randomly initialize an array of type T.
+ ///
+ /// The type to initialize to.
+ /// The allocated array to store values into.
+ private void random_init(T[] arr)
+ {
+ Random r = new Random();
+
+ for (int i = 0; i < arr.Length; i++)
+ {
+ arr[i] = (T)Convert.ChangeType(r.NextDouble() * 128, typeof(T));
+ }
+ }
+ }
+}
diff --git a/DotMP/DotMP.csproj b/DotMP/DotMP.csproj
index e8cc69f0..d045a50c 100644
--- a/DotMP/DotMP.csproj
+++ b/DotMP/DotMP.csproj
@@ -4,7 +4,7 @@
net6.0;net7.0;net8.0
DotMP
DotMP
- 1.6.0
+ 2.0-pre1
Phillip Allen Lane,et al.
A library for fork-join parallelism in .NET, with an OpenMP-like API.
https://github.com/computablee/DotMP
@@ -23,4 +23,21 @@
+
+
+
+
+
+ True
+ True
+ GPU/AcceleratorHandler.tt
+
+
+
+ True
+ True
+ GPU/Gpu.tt
+
+
+
diff --git a/DotMP/GPU/AcceleratorHandler.tt b/DotMP/GPU/AcceleratorHandler.tt
new file mode 100644
index 00000000..2527d1ba
--- /dev/null
+++ b/DotMP/GPU/AcceleratorHandler.tt
@@ -0,0 +1,488 @@
+/*
+* DotMP - A collection of powerful abstractions for parallel programming in .NET with an OpenMP-like API.
+* Copyright (C) 2023 Phillip Allen Lane
+*
+* This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser
+* General Public License as published by the Free Software Foundation; either version 2.1 of the License, or
+* (at your option) any later version.
+*
+* This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
+* implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+* License for more details.
+*
+* You should have received a copy of the GNU Lesser General Public License along with this library; if not,
+* write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+*/
+
+<#@ template debug="false" hostspecific="false" language="C#" #>
+<#@ output extension=".cs" #>
+<# var letters = new char[] { 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'A', 'B', 'C', 'D', 'E', 'F' };
+ int max = 13; #>
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using ILGPU;
+using ILGPU.Runtime;
+
+namespace DotMP.GPU
+{
+ ///
+ /// The handler class managing GPU acceleration.
+ ///
+ internal sealed class AcceleratorHandler
+ {
+ ///
+ /// Determines if a GPU context has been initialized yet.
+ ///
+ private static bool initialized = false;
+ ///
+ /// The GPU context.
+ ///
+ private static Context context;
+ ///
+ /// The accelerator object.
+ ///
+ internal static Accelerator accelerator;
+ ///
+ /// Block size to use for kernels.
+ ///
+ private static int block_size;
+ ///
+ /// Kernel cache.
+ ///
+ private static Dictionary kernels = new Dictionary();
+ ///
+ /// Index cache for 1D kernels.
+ ///
+ private static Dictionary>> indices1d = new Dictionary>>();
+ ///
+ /// Index cache for 2D kernels.
+ ///
+ private static Dictionary, Buffer>> indices2d =
+ new Dictionary, Buffer>>();
+ ///
+ /// Index cache for 3D kernels.
+ ///
+ private static Dictionary, ValueTuple, ValueTuple, Buffer, Buffer, Buffer>> indices3d =
+ new Dictionary, ValueTuple, ValueTuple, Buffer, Buffer, Buffer>>();
+
+ ///
+ /// Default constructor. If this is the first time it's called, it initializes all relevant singleton data.
+ ///
+ internal AcceleratorHandler()
+ {
+ if (initialized) return;
+
+ context = Context.Create()
+ .Optimize(OptimizationLevel.O2)
+ .Inlining(InliningMode.Aggressive)
+ .AllAccelerators()
+ //.Math(MathMode.Fast32BitOnly)
+ .ToContext();
+ var selectedDevice = context.Devices[0];
+
+ foreach (var d in context.Devices)
+ {
+ Console.WriteLine("Detected {0} accelerator.", d.ToString());
+
+ if (selectedDevice.AcceleratorType == AcceleratorType.CPU && d.AcceleratorType == AcceleratorType.OpenCL)
+ selectedDevice = d;
+ if (selectedDevice.AcceleratorType != AcceleratorType.Cuda && d.AcceleratorType == AcceleratorType.Cuda)
+ selectedDevice = d;
+ }
+
+ accelerator = selectedDevice.CreateAccelerator(context);
+ //accelerator = context.Devices[0].CreateAccelerator(context);
+
+ Console.WriteLine("Using {0} accelerator.", accelerator.AcceleratorType.ToString());
+ initialized = true;
+ block_size = accelerator.AcceleratorType == AcceleratorType.CPU ? 16 : 256;
+ }
+
+ ///
+ /// Synchronize pending operations.
+ ///
+ private void Synchronize() => accelerator.Synchronize();
+
+<# for (int c = 1; c <= max; c++) { #>
+ ///
+ /// Get the kernel associated with this lambda.
+ ///
+ /// The action provided on the CPU.
+ /// The calling location.
+ /// The GPU kernel.
+ private Action GPUArray<<#= letters[i] #>><#= (i != c - 1) ? "," : "" #> <# } #>
+ > GetKernel<
+<# for (int i = 0; i < c; i++) { #> <#= letters[i] + ((i != c - 1) ? "," : "") #> <# } #>
+ >(Action GPUArray<<#= letters[i] #>><#= (i != c - 1) ? ", " : "" #> <# } #>
+ > action, string src)
+<# for (int i = 0; i < c; i++) { #>
+ where <#= letters[i] #> : unmanaged
+<# } #>
+ {
+ if (!kernels.ContainsKey(src))
+ kernels.Add(src, accelerator.LoadStreamKernel(action));
+
+ return (Action GPUArray<<#= letters[i] #>><#= (i != c - 1) ? "," : "" #> <# } #>
+ >) kernels[src];
+ }
+<# } #>
+
+<# for (int c = 1; c <= max - 1; c++) { #>
+ ///
+ /// Get the kernel associated with this lambda.
+ ///
+ /// The action provided on the CPU.
+ /// The calling location.
+ /// The GPU kernel.
+ private Action GPUArray<<#= letters[i] #>><#= (i != c - 1) ? "," : "" #> <# } #>
+ > GetKernel<
+<# for (int i = 0; i < c; i++) { #> <#= letters[i] + ((i != c - 1) ? "," : "") #> <# } #>
+ >(Action GPUArray<<#= letters[i] #>><#= (i != c - 1) ? ", " : "" #> <# } #>
+ > action, string src)
+<# for (int i = 0; i < c; i++) { #>
+ where <#= letters[i] #> : unmanaged
+<# } #>
+ {
+ if (!kernels.ContainsKey(src))
+ kernels.Add(src, accelerator.LoadStreamKernel(action));
+
+ return (Action GPUArray<<#= letters[i] #>><#= (i != c - 1) ? "," : "" #> <# } #>
+ >) kernels[src];
+ }
+<# } #>
+
+<# for (int c = 1; c <= max - 2; c++) { #>
+ ///
+ /// Get the kernel associated with this lambda.
+ ///
+ /// The action provided on the CPU.
+ /// The calling location.
+ /// The GPU kernel.
+ private Action GPUArray<<#= letters[i] #>><#= (i != c - 1) ? "," : "" #> <# } #>
+ > GetKernel<
+<# for (int i = 0; i < c; i++) { #> <#= letters[i] + ((i != c - 1) ? "," : "") #> <# } #>
+ >(Action GPUArray<<#= letters[i] #>><#= (i != c - 1) ? ", " : "" #> <# } #>
+ > action, string src)
+<# for (int i = 0; i < c; i++) { #>
+ where <#= letters[i] #> : unmanaged
+<# } #>
+ {
+ if (!kernels.ContainsKey(src))
+ kernels.Add(src, accelerator.LoadStreamKernel(action));
+
+ return (Action GPUArray<<#= letters[i] #>><#= (i != c - 1) ? "," : "" #> <# } #>
+ >) kernels[src];
+ }
+<# } #>
+
+ ///
+ /// Precomputes and caches the indices for a 1D for loop.
+ ///
+ /// The range of the for loop.
+ /// The calling location in the source code.
+ /// The calculated index.
+ internal Index Get1DIdx((int, int) range, string src)
+ {
+ if (indices1d.ContainsKey(src))
+ {
+ var data = indices1d[src];
+ if (data.Item1 == range.Item1 && data.Item2 == range.Item2)
+ return new Index(data.Item3);
+ else
+ data.Item3.Dispose();
+ }
+
+ int[] indices = new int[range.Item2 - range.Item1];
+
+ for (int i = 0; i < indices.Length; i++)
+ indices[i] = i + range.Item1;
+
+ var buf = new Buffer(indices, Buffer.Behavior.To);
+ indices1d[src] = (range.Item1, range.Item2, buf);
+ return new Index(buf);
+ }
+
+ ///
+ /// Precomputes and caches the indices for a 2D for loop.
+ ///
+ /// The outer range of the for loop.
+ /// The inner range of the for loop.
+ /// The calling location in the source code.
+ /// A tuple of calculated indices.
+ internal ValueTuple Get2DIdx((int, int) range1, (int, int) range2, string src)
+ {
+ if (indices2d.ContainsKey(src))
+ {
+ var data = indices2d[src];
+ if (data.Item1 == range1.Item1 && data.Item2 == range1.Item2 &&
+ data.Item3 == range2.Item1 && data.Item4 == range2.Item2)
+ return (new Index(data.Item5), new Index(data.Item6));
+ else
+ {
+ data.Item5.Dispose();
+ data.Item6.Dispose();
+ }
+ }
+
+ int[] indi = new int[(range1.Item2 - range1.Item1) * (range2.Item2 - range2.Item1)];
+ int[] indj = new int[(range1.Item2 - range1.Item1) * (range2.Item2 - range2.Item1)];
+
+ int ci = range1.Item1, cj = range2.Item1;
+
+ for (int i = 0; i < indi.Length; i++)
+ {
+ indi[i] = ci;
+ indj[i] = cj;
+
+ if (++cj == range2.Item2)
+ {
+ cj = range2.Item1;
+ ++ci;
+ }
+ }
+
+ var b1 = new Buffer(indi, Buffer.Behavior.To);
+ var b2 = new Buffer(indj, Buffer.Behavior.To);
+ indices2d[src] = (range1.Item1, range1.Item2, range2.Item1, range2.Item2, b1, b2);
+
+ return (new Index(b1), new Index(b2));
+ }
+
+ ///
+ /// Precomputes and caches the indices for a 3D for loop.
+ ///
+ /// The outer range of the for loop.
+ /// The middle range of the for loop.
+ /// The inner range of the for loop.
+ /// The calling location in the source code.
+ /// A tuple of calculated indices.
+ internal ValueTuple Get3DIdx((int, int) range1, (int, int) range2, (int, int) range3, string src)
+ {
+ if (indices3d.ContainsKey(src))
+ {
+ var data = indices3d[src];
+ if (data.Item1.Item1 == range1.Item1 && data.Item1.Item2 == range1.Item2 &&
+ data.Item2.Item1 == range2.Item1 && data.Item2.Item2 == range2.Item2 &&
+ data.Item3.Item1 == range3.Item1 && data.Item3.Item2 == range3.Item2)
+ return (new Index(data.Item4), new Index(data.Item5), new Index(data.Item6));
+ else
+ {
+ data.Item4.Dispose();
+ data.Item5.Dispose();
+ data.Item6.Dispose();
+ }
+ }
+
+ int[] indi = new int[(range1.Item2 - range1.Item1) * (range2.Item2 - range2.Item1) * (range3.Item2 - range3.Item1)];
+ int[] indj = new int[(range1.Item2 - range1.Item1) * (range2.Item2 - range2.Item1) * (range3.Item2 - range3.Item1)];
+ int[] indk = new int[(range1.Item2 - range1.Item1) * (range2.Item2 - range2.Item1) * (range3.Item2 - range3.Item1)];
+
+ int ci = range1.Item1, cj = range2.Item1, ck = range3.Item1;
+
+ for (int i = 0; i < indi.Length; i++)
+ {
+ indi[i] = ci;
+ indj[i] = cj;
+ indk[i] = ck;
+
+ if (++ck == range3.Item2)
+ {
+ ck = range3.Item1;
+
+ if (++cj == range2.Item2)
+ {
+ cj = range2.Item1;
+ ++ci;
+ }
+ }
+ }
+
+ var b1 = new Buffer(indi, Buffer.Behavior.To);
+ var b2 = new Buffer(indj, Buffer.Behavior.To);
+ var b3 = new Buffer(indk, Buffer.Behavior.To);
+ indices3d[src] = ((range1.Item1, range1.Item2), (range2.Item1, range2.Item2), (range3.Item1, range3.Item2), b1, b2, b3);
+
+ return (new Index(b1), new Index(b2), new Index(b3));
+ }
+
+
+<# for (int c = 1; c <= max; c++) { #>
+ ///
+ /// Dispatches a linear kernel with the given number of parameters.
+ ///
+ /// The range of the for loop.
+<# for (int i = 0; i < c; i++) { #>
+ /// Buffer #<#= i + 1 #> to run the kernel with.
+<# } #>
+ /// The kernel to run on the GPU.
+ /// The originating caller location.
+ internal void DispatchKernel<
+<# for (int i = 0; i < c; i++) { #> <#= letters[i] + ((i != c - 1) ? "," : "") #> <# } #>
+ >((int, int) range1,
+<# for (int i = 0; i < c; i++) { #> Buffer<<#= letters[i] #>> buf<#= i + 1 #>, <# } #>
+ Action GPUArray<<#= letters[i] #>><#= (i != c - 1) ? "," : "" #> <# } #>
+ > action, string src)
+<# for (int i = 0; i < c; i++) { #>
+ where <#= letters[i] #> : unmanaged
+<# } #>
+ {
+ var idx = Get1DIdx(range1, src);
+ var len = range1.Item2 - range1.Item1;
+
+ var kernel = GetKernel(action, src);
+
+<# for (int i = 0; i < c; i++) { #>
+ var gpu<#= i + 1 #> = new GPUArray<<#= letters[i] #>>(buf<#= i + 1 #>);
+<# } #>
+
+ kernel((len / block_size, block_size), idx
+<# for (int i = 0; i < c; i++) { #>
+ , gpu<#= i + 1 #>
+<# } #>
+ );
+
+ int not_done = len % block_size;
+
+ if (not_done > 0)
+ {
+ int offset = len - not_done;
+ idx.AddOffset(offset);
+
+ kernel((1, not_done), idx
+<# for (int i = 0; i < c; i++) { #>
+ , gpu<#= i + 1 #>
+<# } #>
+ );
+ }
+
+ Synchronize();
+ }
+<# } #>
+
+<# for (int c = 1; c <= max - 1; c++) { #>
+ ///
+ /// Dispatches a 2D kernel with the given number of parameters.
+ ///
+ /// The outer range of the for loop.
+ /// The inner range of the for loop.
+<# for (int i = 0; i < c; i++) { #>
+ /// Buffer #<#= i + 1 #> to run the kernel with.
+<# } #>
+ /// The kernel to run on the GPU.
+ /// The originating caller location.
+ internal void DispatchKernel<
+<# for (int i = 0; i < c; i++) { #> <#= letters[i] + ((i != c - 1) ? "," : "") #> <# } #>
+ >((int, int) range1, (int, int) range2,
+<# for (int i = 0; i < c; i++) { #> Buffer<<#= letters[i] #>> buf<#= i + 1 #>, <# } #>
+ Action GPUArray<<#= letters[i] #>><#= (i != c - 1) ? "," : "" #> <# } #>
+ > action, string src)
+<# for (int i = 0; i < c; i++) { #>
+ where <#= letters[i] #> : unmanaged
+<# } #>
+ {
+ var len = (range1.Item2 - range1.Item1) * (range2.Item2 - range2.Item1);
+ (var i, var j) = Get2DIdx(range1, range2, src);
+
+ var kernel = GetKernel(action, src);
+
+<# for (int i = 0; i < c; i++) { #>
+ var gpu<#= i + 1 #> = new GPUArray<<#= letters[i] #>>(buf<#= i + 1 #>);
+<# } #>
+
+ kernel((len / block_size, block_size), i, j
+<# for (int i = 0; i < c; i++) { #>
+ , gpu<#= i + 1 #>
+<# } #>
+ );
+
+ int not_done = len % block_size;
+
+ if (not_done > 0)
+ {
+ int offset = len - not_done;
+ i.AddOffset(offset);
+ j.AddOffset(offset);
+
+ kernel((1, not_done), i, j
+<# for (int i = 0; i < c; i++) { #>
+ , gpu<#= i + 1 #>
+<# } #>
+ );
+ }
+
+ Synchronize();
+ }
+<# } #>
+
+<# for (int c = 1; c <= max - 2; c++) { #>
+ ///
+ /// Dispatches a 3D kernel with the given number of parameters.
+ ///
+ /// The outer range of the for loop.
+ /// The middle range of the for loop.
+ /// The inner range of the for loop.
+<# for (int i = 0; i < c; i++) { #>
+ /// Buffer #<#= i + 1 #> to run the kernel with.
+<# } #>
+ /// The kernel to run on the GPU.
+ /// The originating caller location.
+ internal void DispatchKernel<
+<# for (int i = 0; i < c; i++) { #> <#= letters[i] + ((i != c - 1) ? "," : "") #> <# } #>
+ >((int, int) range1, (int, int) range2, (int, int) range3,
+<# for (int i = 0; i < c; i++) { #> Buffer<<#= letters[i] #>> buf<#= i + 1 #>, <# } #>
+ Action GPUArray<<#= letters[i] #>><#= (i != c - 1) ? "," : "" #> <# } #>
+ > action, string src)
+<# for (int i = 0; i < c; i++) { #>
+ where <#= letters[i] #> : unmanaged
+<# } #>
+ {
+ var len = (range1.Item2 - range1.Item1) * (range2.Item2 - range2.Item1) * (range3.Item2 - range3.Item1);
+ (var i, var j, var k) = Get3DIdx(range1, range2, range3, src);
+
+ var kernel = GetKernel(action, src);
+
+<# for (int i = 0; i < c; i++) { #>
+ var gpu<#= i + 1 #> = new GPUArray<<#= letters[i] #>>(buf<#= i + 1 #>);
+<# } #>
+
+ kernel((len / block_size, block_size), i, j, k
+<# for (int i = 0; i < c; i++) { #>
+ , gpu<#= i + 1 #>
+<# } #>
+ );
+
+ int not_done = len % block_size;
+
+ if (not_done > 0)
+ {
+ int offset = len - not_done;
+ i.AddOffset(offset);
+ j.AddOffset(offset);
+ k.AddOffset(offset);
+
+ kernel((1, not_done), i, j, k
+<# for (int i = 0; i < c; i++) { #>
+ , gpu<#= i + 1 #>
+<# } #>
+ );
+ }
+
+ Synchronize();
+ }
+<# } #>
+ }
+}
diff --git a/DotMP/GPU/AssemblyAttributes.cs b/DotMP/GPU/AssemblyAttributes.cs
new file mode 100644
index 00000000..7077a588
--- /dev/null
+++ b/DotMP/GPU/AssemblyAttributes.cs
@@ -0,0 +1,19 @@
+/*
+* DotMP - A collection of powerful abstractions for parallel programming in .NET with an OpenMP-like API.
+* Copyright (C) 2023 Phillip Allen Lane
+*
+* This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser
+* General Public License as published by the Free Software Foundation; either version 2.1 of the License, or
+* (at your option) any later version.
+*
+* This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
+* implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+* License for more details.
+*
+* You should have received a copy of the GNU Lesser General Public License along with this library; if not,
+* write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+*/
+
+using System.Runtime.CompilerServices;
+
+[assembly: InternalsVisibleTo("ILGPURuntime")]
\ No newline at end of file
diff --git a/DotMP/GPU/Buffer.cs b/DotMP/GPU/Buffer.cs
new file mode 100644
index 00000000..26832163
--- /dev/null
+++ b/DotMP/GPU/Buffer.cs
@@ -0,0 +1,226 @@
+/*
+* DotMP - A collection of powerful abstractions for parallel programming in .NET with an OpenMP-like API.
+* Copyright (C) 2023 Phillip Allen Lane
+*
+* This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser
+* General Public License as published by the Free Software Foundation; either version 2.1 of the License, or
+* (at your option) any later version.
+*
+* This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
+* implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+* License for more details.
+*
+* You should have received a copy of the GNU Lesser General Public License along with this library; if not,
+* write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+*/
+
+using System;
+using System.Runtime.CompilerServices;
+using ILGPU;
+using ILGPU.Runtime;
+
+namespace DotMP.GPU
+{
+ namespace Buffer
+ {
+ ///
+ /// Specifies the behavior of the buffer.
+ ///
+ public enum Behavior
+ {
+ ///
+ /// Specifies that data should be transfered to the GPU, but not from it.
+ ///
+ To,
+ ///
+ /// Specifies that data should be transfered from the GPU, but not to it.
+ ///
+ From,
+ ///
+ /// Specifies that data should be transfered both to and from the GPU.
+ ///
+ ToFrom,
+ ///
+ /// Specifies that the data shouldn't be transfered to or from the GPU. For internal use.
+ ///
+ NoCopy
+ }
+ }
+
+ ///
+ /// Buffer to manage GPU memory. Should only be created on the CPU.
+ ///
+ public sealed class Buffer : IDisposable
+ where T : unmanaged
+ {
+ ///
+ /// The ILGPU buffer for 1D arrays.
+ ///
+ private MemoryBuffer1D buf1d;
+
+ ///
+ /// The ILGPU buffer for 2D arrays.
+ ///
+ private MemoryBuffer2D buf2d;
+
+ ///
+ /// The ILGPU buffer for 3D arrays.
+ ///
+ private MemoryBuffer3D buf3d;
+
+ ///
+ /// Behavior of the data, as specified by Behavior.
+ ///
+ private Buffer.Behavior behavior;
+
+ ///
+ /// The CPU 1D array, so that we can copy the data back.
+ ///
+ private T[] data1d;
+
+ ///
+ /// The CPU 2D array, so that we can copy the data back.
+ ///
+ private T[,] data2d;
+
+ ///
+ /// The CPU 3D array, so that we can copy the data back.
+ ///
+ private T[,,] data3d;
+
+ ///
+ /// The number of dimensions in the array.
+ ///
+ internal int Dimensions { get; private set; }
+
+ ///
+ /// Constructor for buffer object. Allocates a 1D array on the GPU and makes it available for the next GPU kernel.
+ ///
+ /// The data to allocate on the GPU.
+ /// The behavior of the data, see Behavior.
+ public Buffer(T[] data, Buffer.Behavior behavior)
+ {
+ new AcceleratorHandler();
+
+ this.behavior = behavior;
+ this.data1d = data;
+
+ switch (behavior)
+ {
+ case Buffer.Behavior.To:
+ case Buffer.Behavior.ToFrom:
+ buf1d = AcceleratorHandler.accelerator.Allocate1D(data);
+ break;
+ case Buffer.Behavior.From:
+ case Buffer.Behavior.NoCopy:
+ buf1d = AcceleratorHandler.accelerator.Allocate1D(data.Length);
+ break;
+ }
+
+ Dimensions = 1;
+ }
+
+ ///
+ /// Constructor for buffer object. Allocates a 2D array on the GPU and makes it available for the next GPU kernel.
+ ///
+ /// The data to allocate on the GPU.
+ /// The behavior of the data, see Behavior.
+ public Buffer(T[,] data, Buffer.Behavior behavior)
+ {
+ new AcceleratorHandler();
+
+ this.behavior = behavior;
+ this.data2d = data;
+
+ switch (behavior)
+ {
+ case Buffer.Behavior.To:
+ case Buffer.Behavior.ToFrom:
+ buf2d = AcceleratorHandler.accelerator.Allocate2DDenseY(data);
+ break;
+ case Buffer.Behavior.From:
+ case Buffer.Behavior.NoCopy:
+ buf2d = AcceleratorHandler.accelerator.Allocate2DDenseY((data.GetLength(0), data.GetLength(1)));
+ break;
+ }
+
+ Dimensions = 2;
+ }
+
+ ///
+ /// Constructor for buffer object. Allocates a 3D array on the GPU and makes it available for the next GPU kernel.
+ ///
+ /// The data to allocate on the GPU.
+ /// The behavior of the data, see Behavior.
+ public Buffer(T[,,] data, Buffer.Behavior behavior)
+ {
+ new AcceleratorHandler();
+
+ this.behavior = behavior;
+ this.data3d = data;
+
+ switch (behavior)
+ {
+ case Buffer.Behavior.To:
+ case Buffer.Behavior.ToFrom:
+ buf3d = AcceleratorHandler.accelerator.Allocate3DDenseXY(data);
+ break;
+ case Buffer.Behavior.From:
+ case Buffer.Behavior.NoCopy:
+ buf3d = AcceleratorHandler.accelerator.Allocate3DDenseXY((data.GetLength(0), data.GetLength(1), data.GetLength(2)));
+ break;
+ }
+
+ Dimensions = 3;
+ }
+
+ ///
+ /// Dispose of the buffer, freeing GPU memory and copying any relevant data back to the CPU.
+ ///
+ public void Dispose()
+ {
+ if (Dimensions == 1)
+ {
+ if (behavior == Buffer.Behavior.From || behavior == Buffer.Behavior.ToFrom)
+ {
+ buf1d.GetAsArray1D().CopyTo(data1d, 0);
+ }
+
+ buf1d.Dispose();
+ }
+ else if (Dimensions == 2)
+ {
+ if (behavior == Buffer.Behavior.From || behavior == Buffer.Behavior.ToFrom)
+ {
+ System.Buffer.BlockCopy(buf2d.GetAsArray2D(), 0, data2d, 0, Unsafe.SizeOf() * data2d.Length);
+ }
+
+ buf2d.Dispose();
+ }
+ else if (Dimensions == 3)
+ {
+ if (behavior == Buffer.Behavior.From || behavior == Buffer.Behavior.ToFrom)
+ {
+ System.Buffer.BlockCopy(buf3d.GetAsArray3D(), 0, data3d, 0, Unsafe.SizeOf() * data3d.Length);
+ }
+
+ buf3d.Dispose();
+ }
+ }
+
+ ///
+ /// Get the view of the memory for the GPU.
+ ///
+ internal ArrayView1D View1D { get => buf1d.View; }
+
+ ///
+ /// Get the view of the memory for the GPU.
+ ///
+ internal ArrayView2D View2D { get => buf2d.View; }
+
+ ///
+ /// Get the view of the memory for the GPU.
+ ///
+ internal ArrayView3D View3D { get => buf3d.View; }
+ }
+}
\ No newline at end of file
diff --git a/DotMP/GPU/Exceptions.cs b/DotMP/GPU/Exceptions.cs
new file mode 100644
index 00000000..4705041b
--- /dev/null
+++ b/DotMP/GPU/Exceptions.cs
@@ -0,0 +1,21 @@
+/*
+* DotMP - A collection of powerful abstractions for parallel programming in .NET with an OpenMP-like API.
+* Copyright (C) 2023 Phillip Allen Lane
+*
+* This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser
+* General Public License as published by the Free Software Foundation; either version 2.1 of the License, or
+* (at your option) any later version.
+*
+* This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
+* implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+* License for more details.
+*
+* You should have received a copy of the GNU Lesser General Public License along with this library; if not,
+* write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+*/
+
+using System;
+
+namespace DotMP.GPU
+{
+}
\ No newline at end of file
diff --git a/DotMP/GPU/Gpu.tt b/DotMP/GPU/Gpu.tt
new file mode 100644
index 00000000..6cf2d841
--- /dev/null
+++ b/DotMP/GPU/Gpu.tt
@@ -0,0 +1,140 @@
+/*
+* DotMP - A collection of powerful abstractions for parallel programming in .NET with an OpenMP-like API.
+* Copyright (C) 2023 Phillip Allen Lane
+*
+* This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser
+* General Public License as published by the Free Software Foundation; either version 2.1 of the License, or
+* (at your option) any later version.
+*
+* This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
+* implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+* License for more details.
+*
+* You should have received a copy of the GNU Lesser General Public License along with this library; if not,
+* write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+*/
+
+<#@ template debug="false" hostspecific="false" language="C#" #>
+<#@ output extension=".cs" #>
+<# var letters = new char[] { 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'A', 'B', 'C', 'D', 'E', 'F' };
+ int max = 13; #>
+
+using System;
+using System.Runtime.CompilerServices;
+
+namespace DotMP.GPU
+{
+ ///
+ /// The main class of DotMP's GPU API, powered by the ILGPU project.
+ /// Contains all the main methods for constructing and running GPU kernels.
+ /// The GPU API is not thread-safe at the current moment, so its methods should not be called from within a Parallel.ParallelRegion!
+ ///
+ public static class Parallel
+ {
+ ///
+ /// Formats the caller information for determining uniqueness of a call.
+ ///
+ /// The calling file.
+ /// The calling line number.
+ /// A formatted string representing "{filename}:{linenum}"
+ private static string FormatCaller(string filename, int linenum)
+ {
+ return string.Format("{0}:{1}", filename, linenum);
+ }
+
+<# for (int c = 1; c <= max; c++) { #>
+ ///
+ /// Creates a GPU parallel for loop.
+ /// The body of the kernel is run on a GPU target.
+ ///
+ /// The start of the loop, inclusive.
+ /// The end of the loop, exclusive.
+<# for (int i = 0; i < c; i++) { #>
+ /// Buffer #<#= i + 1 #> to run the kernel with.
+<# } #>
+ /// The kernel to run on the GPU.
+ /// The line number this method was called from.
+ /// The path to the file this method was called from.
+ public static void ParallelFor<
+<# for (int i = 0; i < c; i++) { #> <#= letters[i] + ((i != c - 1) ? "," : "") #> <# } #>
+ >(int start, int end,
+<# for (int i = 0; i < c; i++) { #> Buffer<<#= letters[i] #>> buf<#= i + 1 #>, <# } #>
+ Action GPUArray<<#= letters[i] #>><#= (i != c - 1) ? "," : "" #> <# } #>
+ > action, [CallerFilePath] string path = "", [CallerLineNumber] int line = 0)
+<# for (int i = 0; i < c; i++) { #> where <#= letters[i] #> : unmanaged <# } #>
+ {
+ var handler = new AcceleratorHandler();
+ string src = FormatCaller(path, line);
+ handler.DispatchKernel((start, end),
+<# for (int i = 0; i < c; i++) { #> buf<#= i + 1 #>, <# } #>
+ action, src);
+ }
+<# } #>
+
+<# for (int c = 1; c <= max - 1; c++) { #>
+ ///
+ /// Creates a collapsed GPU parallel for loop.
+ /// The body of the kernel is run on a GPU target.
+ ///
+ /// The range of the outer for loop.
+ /// The range of the inner for loop.
+<# for (int i = 0; i < c; i++) { #>
+ /// Buffer #<#= i + 1 #> to run the kernel with.
+<# } #>
+ /// The kernel to run on the GPU.
+ /// The line number this method was called from.
+ /// The path to the file this method was called from.
+ public static void ParallelForCollapse<
+<# for (int i = 0; i < c; i++) { #> <#= letters[i] + ((i != c - 1) ? "," : "") #> <# } #>
+ >((int, int) range1, (int, int) range2,
+<# for (int i = 0; i < c; i++) { #> Buffer<<#= letters[i] #>> buf<#= i + 1 #>, <# } #>
+ Action GPUArray<<#= letters[i] #>><#= (i != c - 1) ? "," : "" #> <# } #>
+ > action, [CallerFilePath] string path = "", [CallerLineNumber] int line = 0)
+<# for (int i = 0; i < c; i++) { #>
+ where <#= letters[i] #> : unmanaged
+<# } #>
+ {
+ var handler = new AcceleratorHandler();
+ string src = FormatCaller(path, line);
+ handler.DispatchKernel(range1, range2,
+<# for (int i = 0; i < c; i++) { #> buf<#= i + 1 #>, <# } #>
+ action, src);
+ }
+<# } #>
+
+<# for (int c = 1; c <= max - 2; c++) { #>
+ ///
+ /// Creates a collapsed GPU parallel for loop.
+ /// The body of the kernel is run on a GPU target.
+ ///
+ /// The range of the outer for loop.
+ /// The range of the middle for loop.
+ /// The range of the inner for loop.
+<# for (int i = 0; i < c; i++) { #>
+ /// Buffer #<#= i + 1 #> to run the kernel with.
+<# } #>
+ /// The kernel to run on the GPU.
+ /// The line number this method was called from.
+ /// The path to the file this method was called from.
+ public static void ParallelForCollapse<
+<# for (int i = 0; i < c; i++) { #> <#= letters[i] + ((i != c - 1) ? "," : "") #> <# } #>
+ >((int, int) range1, (int, int) range2, (int, int) range3,
+<# for (int i = 0; i < c; i++) { #> Buffer<<#= letters[i] #>> buf<#= i + 1 #>, <# } #>
+ Action GPUArray<<#= letters[i] #>><#= (i != c - 1) ? "," : "" #> <# } #>
+ > action, [CallerFilePath] string path = "", [CallerLineNumber] int line = 0)
+<# for (int i = 0; i < c; i++) { #>
+ where <#= letters[i] #> : unmanaged
+<# } #>
+ {
+ var handler = new AcceleratorHandler();
+ string src = FormatCaller(path, line);
+ handler.DispatchKernel(range1, range2, range3,
+<# for (int i = 0; i < c; i++) { #> buf<#= i + 1 #>, <# } #>
+ action, src);
+ }
+<# } #>
+ }
+}
diff --git a/DotMP/GPU/GpuArray.cs b/DotMP/GPU/GpuArray.cs
new file mode 100644
index 00000000..036fe1a4
--- /dev/null
+++ b/DotMP/GPU/GpuArray.cs
@@ -0,0 +1,141 @@
+/*
+* DotMP - A collection of powerful abstractions for parallel programming in .NET with an OpenMP-like API.
+* Copyright (C) 2023 Phillip Allen Lane
+*
+* This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser
+* General Public License as published by the Free Software Foundation; either version 2.1 of the License, or
+* (at your option) any later version.
+*
+* This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
+* implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+* License for more details.
+*
+* You should have received a copy of the GNU Lesser General Public License along with this library; if not,
+* write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+*/
+
+using ILGPU;
+using ILGPU.IR.Values;
+using ILGPU.Runtime;
+using System;
+using System.Diagnostics.CodeAnalysis;
+
+namespace DotMP.GPU
+{
+ ///
+ /// Wrapper object for representing arrays on the GPU.
+ ///
+ ///
+ [ExcludeFromCodeCoverage]
+ public struct GPUArray
+ where T : unmanaged
+ {
+ ///
+ /// The ILGPU view for 1D arrays.
+ ///
+ private ArrayView1D view1d;
+
+ ///
+ /// The ILGPU view for 2D arrays.
+ ///
+ private ArrayView2D view2d;
+
+ ///
+ /// The ILGPU view for 3D arrays.
+ ///
+ private ArrayView3D view3d;
+
+ ///
+ /// Number of dimensions.
+ ///
+ private int dims;
+
+ ///
+ /// Constructor.
+ ///
+ /// The Buffer to create an array from.
+ internal GPUArray(Buffer buf)
+ {
+ switch (buf.Dimensions)
+ {
+ default:
+ case 1:
+ view1d = buf.View1D;
+ // BAND-AID FIX: Cannot use empty ArrayViews on OpenCL devices.
+ view2d = new Buffer(new T[1, 1], Buffer.Behavior.NoCopy).View2D;
+ // BAND-AID FIX: Cannot use empty ArrayViews on OpenCL devices.
+ view3d = new Buffer(new T[1, 1, 1], Buffer.Behavior.NoCopy).View3D;
+ break;
+ case 2:
+ // BAND-AID FIX: Cannot use empty ArrayViews on OpenCL devices.
+ view1d = new Buffer(new T[1], Buffer.Behavior.NoCopy).View1D;
+ view2d = buf.View2D;
+ // BAND-AID FIX: Cannot use empty ArrayViews on OpenCL devices.
+ view3d = new Buffer(new T[1, 1, 1], Buffer.Behavior.NoCopy).View3D;
+ break;
+ case 3:
+ // BAND-AID FIX: Cannot use empty ArrayViews on OpenCL devices.
+ view1d = new Buffer(new T[1], Buffer.Behavior.NoCopy).View1D;
+ // BAND-AID FIX: Cannot use empty ArrayViews on OpenCL devices.
+ view2d = new Buffer(new T[1, 1], Buffer.Behavior.NoCopy).View2D;
+ view3d = buf.View3D;
+ break;
+ }
+
+ dims = buf.Dimensions;
+ }
+
+ ///
+ /// Overload for [] operator.
+ ///
+ /// The ID to index into.
+ /// The data at that ID.
+ public ref T this[int idx]
+ {
+ get => ref view1d[idx];
+ }
+
+ ///
+ /// Overload for [,] operator.
+ ///
+ /// The first ID to index into.
+ /// The second ID to index into.
+ /// The data at that ID.
+ public ref T this[int i, int j]
+ {
+ get => ref view2d[i, j];
+ }
+
+ ///
+ /// Overload for [,,] operator.
+ ///
+ /// The first ID to index into.
+ /// The second ID to index into.
+ /// The third ID to index into.
+ /// The data at that ID.
+ public ref T this[int i, int j, int k]
+ {
+ get => ref view3d[i, j, k];
+ }
+
+ ///
+ /// Gets the length of the array.
+ ///
+ public int Length
+ {
+ get
+ {
+ switch (dims)
+ {
+ case 1:
+ default:
+ return view1d.IntLength;
+ case 2:
+ return view2d.IntLength;
+ case 3:
+ return view3d.IntLength;
+ }
+ }
+ }
+ }
+}
diff --git a/DotMP/GPU/Index.cs b/DotMP/GPU/Index.cs
new file mode 100644
index 00000000..8de4dc3e
--- /dev/null
+++ b/DotMP/GPU/Index.cs
@@ -0,0 +1,78 @@
+/*
+* DotMP - A collection of powerful abstractions for parallel programming in .NET with an OpenMP-like API.
+* Copyright (C) 2023 Phillip Allen Lane
+*
+* This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser
+* General Public License as published by the Free Software Foundation; either version 2.1 of the License, or
+* (at your option) any later version.
+*
+* This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
+* implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+* License for more details.
+*
+* You should have received a copy of the GNU Lesser General Public License along with this library; if not,
+* write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+*/
+
+using ILGPU;
+using ILGPU.Runtime;
+using System.Diagnostics.CodeAnalysis;
+using System.Linq;
+using System.Runtime.CompilerServices;
+using System.Xml;
+
+namespace DotMP.GPU
+{
+ ///
+ /// Represents an index passed as the first index argument.
+ ///
+ [ExcludeFromCodeCoverage]
+ public struct Index
+ {
+ ///
+ /// Lookup table for indices.
+ ///
+ private ArrayView1D lookup;
+ ///
+ /// Offset for followup kernels.
+ ///
+ private int offset;
+ ///
+ /// Cached index.
+ ///
+ private int idx;
+
+ ///
+ /// Constructor.
+ ///
+ /// Buffer representing the indices.
+ internal Index(Buffer buf)
+ {
+ this.lookup = buf.View1D;
+ offset = 0;
+ idx = -1;
+ }
+
+ ///
+ /// Adds an offset in preperation for a followup kernel.
+ ///
+ /// The offset to set.
+ internal void AddOffset(int offset)
+ {
+ this.offset = offset;
+ }
+
+ ///
+ /// Calculates the index and caches for future use.
+ ///
+ /// The Index object to cast to int.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static implicit operator int(Index i)
+ {
+ if (i.idx == -1)
+ i.idx = i.lookup[Grid.GlobalLinearIndex + i.offset];
+
+ return i.idx;
+ }
+ }
+}
diff --git a/DotMP/Parallel.cs b/DotMP/Parallel.cs
index a7f4f8c9..a64ae99f 100644
--- a/DotMP/Parallel.cs
+++ b/DotMP/Parallel.cs
@@ -16,31 +16,34 @@
using System;
using System.Collections.Generic;
+using System.Runtime.CompilerServices;
+using System.ComponentModel;
using System.Threading;
using DotMP.Exceptions;
using DotMP.Schedulers;
+using System.Diagnostics.CodeAnalysis;
namespace DotMP
{
///
/// The main class of DotMP.
/// Contains all the main methods for parallelism.
- /// For users, this is the main class you want to worry about, along with Lock, Shared, and Atomic
+ /// For users, this is the main class you want to worry about, along with Lock, Shared, Atomic, and GPU.
///
public static class Parallel
{
///
/// The dictionary for critical regions.
///
- private static volatile Dictionary critical_lock = new Dictionary();
+ private static volatile Dictionary critical_lock = new Dictionary();
///
/// The dictionary for single regions.
///
- private static volatile HashSet single_thread = new HashSet();
+ private static volatile HashSet single_thread = new HashSet();
///
/// The dictionary for ordered regions.
///
- private static volatile Dictionary ordered = new Dictionary();
+ private static volatile Dictionary ordered = new Dictionary();
///
/// Barrier object for DotMP.Parallel.Barrier()
///
@@ -174,6 +177,17 @@ private static void ValidateParams(int start = 0, int end = 0, IScheduler schedu
throw new InvalidArgumentsException(string.Format("Chunk size must be specified with user-defined schedulers, as it cannot be inferred."));
}
+ ///
+ /// Formats the caller information for determining uniqueness of a call.
+ ///
+ /// The calling file.
+ /// The calling line number.
+ /// A formatted string representing "{filename}:{linenum}"
+ private static string FormatCaller(string filename, int linenum)
+ {
+ return string.Format("{0}:{1}", filename, linenum);
+ }
+
///
/// Creates a for loop inside a parallel region.
/// A for loop created with For inside of a parallel region is executed in parallel, with iterations being distributed among the threads, and potentially out-of-order.
@@ -1096,11 +1110,16 @@ public static void ParallelSections(uint? num_threads = null, params Action[] ac
/// Creates a critical region.
/// A critical region is a region of code that can only be executed by one thread at a time.
/// If a thread encounters a critical region while another thread is inside a critical region, it will wait until the other thread is finished.
+ ///
+ /// THIS METHOD IS NOW DEPRECATED.
///
/// The ID of the critical region. Must be unique per region but consistent across all threads.
/// The action to be performed in the critical region.
/// The ID of the critical region.
/// Thrown when not in a parallel region.
+ [Obsolete("This version of Critical is deprecated. Omit the id parameter for the updated version. This overload will be removed in a future release.")]
+ [EditorBrowsable(EditorBrowsableState.Never)]
+ [ExcludeFromCodeCoverage]
public static int Critical(int id, Action action)
{
if (!InParallel())
@@ -1110,6 +1129,45 @@ public static int Critical(int id, Action action)
object lock_obj;
+ lock (critical_lock)
+ {
+ if (!critical_lock.ContainsKey(id.ToString()))
+ {
+ critical_lock.Add(id.ToString(), new object());
+ }
+
+ lock_obj = critical_lock[id.ToString()];
+ }
+
+ lock (lock_obj)
+ {
+ action();
+ }
+
+ return id;
+ }
+
+ ///
+ /// Creates a critical region.
+ /// A critical region is a region of code that can only be executed by one thread at a time.
+ /// If a thread encounters a critical region while another thread is inside a critical region, it will wait until the other thread is finished.
+ ///
+ /// The action to be performed in the critical region.
+ /// The line number this method was called from.
+ /// The path to the file this method was called from.
+ /// The ID of the critical region.
+ /// Thrown when not in a parallel region.
+ public static void Critical(Action action, [CallerFilePath] string path = "", [CallerLineNumber] int line = 0)
+ {
+ string id = FormatCaller(path, line);
+
+ if (!InParallel())
+ {
+ throw new NotInParallelRegionException("Cannot use DotMP Critical outside of a parallel region.");
+ }
+
+ object lock_obj;
+
lock (critical_lock)
{
if (!critical_lock.ContainsKey(id))
@@ -1124,8 +1182,6 @@ public static int Critical(int id, Action action)
{
action();
}
-
- return id;
}
///
@@ -1180,11 +1236,16 @@ public static void Master(Action action)
/// Creates a single region.
/// A single region is only executed once per Parallel.ParallelRegion.
/// The first thread to encounter the single region marks the region as encountered, then executes it.
+ ///
+ /// THIS METHOD IS NOW DEPRECATED.
///
/// The ID of the single region. Must be unique per region but consistent across all threads.
/// The action to be performed in the single region.
/// Thrown when not in a parallel region.
/// Thrown when nested inside another worksharing region.
+ [Obsolete("This version of Single is deprecated. Omit the id parameter for the updated version. This overload will be removed in a future release.")]
+ [EditorBrowsable(EditorBrowsableState.Never)]
+ [ExcludeFromCodeCoverage]
public static void Single(int id, Action action)
{
var freg = new ForkedRegion();
@@ -1204,6 +1265,55 @@ public static void Single(int id, Action action)
Interlocked.Increment(ref freg.in_workshare);
+ lock (single_thread)
+ {
+ if (!single_thread.Contains(id.ToString()))
+ {
+ single_thread.Add(id.ToString());
+ new_single = true;
+ }
+ }
+
+ if (new_single)
+ {
+ action();
+ }
+
+ Interlocked.Decrement(ref freg.in_workshare);
+
+ Barrier();
+ }
+
+ ///
+ /// Creates a single region.
+ /// A single region is only executed once per Parallel.ParallelRegion.
+ /// The first thread to encounter the single region marks the region as encountered, then executes it.
+ ///
+ /// The action to be performed in the single region.
+ /// The line number this method was called from.
+ /// The path to the file this method was called from.
+ /// Thrown when not in a parallel region.
+ /// Thrown when nested inside another worksharing region.
+ public static void Single(Action action, [CallerFilePath] string path = "", [CallerLineNumber] int line = 0)
+ {
+ string id = FormatCaller(path, line);
+ var freg = new ForkedRegion();
+ bool new_single = false;
+
+ if (!freg.in_parallel)
+ {
+ throw new NotInParallelRegionException("Cannot use DotMP Single outside of a parallel region.");
+ }
+
+ var ws = new WorkShare();
+
+ if (ws.in_for)
+ {
+ throw new CannotPerformNestedWorksharingException("Cannot use DotMP Single nested within other worksharing constructs.");
+ }
+
+ Interlocked.Increment(ref freg.in_workshare);
+
lock (single_thread)
{
if (!single_thread.Contains(id))
@@ -1227,10 +1337,15 @@ public static void Single(int id, Action action)
/// Creates an ordered region.
/// An ordered region is a region of code that is executed in order inside of a For() or ForReduction<T>() loop.
/// This also acts as an implicit Critical() region.
+ ///
+ /// THIS METHOD IS NOW DEPRECATED.
///
/// The ID of the ordered region. Must be unique per region but consistent across all threads.
/// The action to be performed in the ordered region.
/// Thrown when not in a parallel region.
+ [Obsolete("This version of Ordered is deprecated. Omit the id parameter for the updated version. This overload will be removed in a future release.")]
+ [EditorBrowsable(EditorBrowsableState.Never)]
+ [ExcludeFromCodeCoverage]
public static void Ordered(int id, Action action)
{
var freg = new ForkedRegion();
@@ -1240,6 +1355,46 @@ public static void Ordered(int id, Action action)
throw new NotInParallelRegionException("Cannot use DotMP Ordered outside of a parallel region.");
}
+ lock (ordered)
+ {
+ if (!ordered.ContainsKey(id.ToString()))
+ {
+ ordered.Add(id.ToString(), 0);
+ }
+ Thread.MemoryBarrier();
+ }
+
+ WorkShare ws = new WorkShare();
+
+ while (ordered[id.ToString()] != ws.working_iter) ;
+
+ action();
+
+ lock (ordered)
+ {
+ ordered[id.ToString()]++;
+ }
+ }
+
+ ///
+ /// Creates an ordered region.
+ /// An ordered region is a region of code that is executed in order inside of a For() or ForReduction<T>() loop.
+ /// This also acts as an implicit Critical() region.
+ ///
+ /// The action to be performed in the ordered region.
+ /// The line number this method was called from.
+ /// The path to the file this method was called from.
+ /// Thrown when not in a parallel region.
+ public static void Ordered(Action action, [CallerFilePath] string path = "", [CallerLineNumber] int line = 0)
+ {
+ string id = FormatCaller(path, line);
+ var freg = new ForkedRegion();
+
+ if (!freg.in_parallel)
+ {
+ throw new NotInParallelRegionException("Cannot use DotMP Ordered outside of a parallel region.");
+ }
+
lock (ordered)
{
if (!ordered.ContainsKey(id))
@@ -1270,7 +1425,7 @@ public static int GetNumThreads()
{
var freg = new ForkedRegion();
- return (freg.reg is not null)
+ return freg.in_parallel
? (int)freg.reg.num_threads
: 1;
}
diff --git a/benchmarks/GPUHeatTransfer/GPUHeatTransfer.csproj b/benchmarks/GPUHeatTransfer/GPUHeatTransfer.csproj
new file mode 100644
index 00000000..9cf0a6f0
--- /dev/null
+++ b/benchmarks/GPUHeatTransfer/GPUHeatTransfer.csproj
@@ -0,0 +1,18 @@
+
+
+
+ Exe
+ net6.0
+ enable
+ enable
+
+
+
+
+
+
+
+
+
+
+
diff --git a/benchmarks/GPUHeatTransfer/Program.cs b/benchmarks/GPUHeatTransfer/Program.cs
new file mode 100644
index 00000000..75d0747f
--- /dev/null
+++ b/benchmarks/GPUHeatTransfer/Program.cs
@@ -0,0 +1,307 @@
+/*
+* DotMP - A collection of powerful abstractions for parallel programming in .NET with an OpenMP-like API.
+* Copyright (C) 2023 Phillip Allen Lane
+*
+* This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser
+* General Public License as published by the Free Software Foundation; either version 2.1 of the License, or
+* (at your option) any later version.
+*
+* This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
+* implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+* License for more details.
+*
+* You should have received a copy of the GNU Lesser General Public License along with this library; if not,
+* write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+*/
+
+using BenchmarkDotNet.Attributes;
+using BenchmarkDotNet.Jobs;
+using BenchmarkDotNet.Running;
+using BenchmarkDotNet.Diagnosers;
+
+/* jscpd:ignore-start */
+
+[SimpleJob(RuntimeMoniker.Net60)]
+[ThreadingDiagnoser]
+[HardwareCounters]
+[EventPipeProfiler(EventPipeProfile.CpuSampling)]
+// test heat transfer using Parallel.For
+public class HeatTransfer
+{
+ // scratch array
+ private double[,] scratch = new double[0, 0];
+ // grid array
+ private double[,] grid = new double[0, 0];
+
+ // parallel type enum
+ public enum ParType { DMPFor, DMPGPU }
+
+ // test dims of 100x100, 1000x1000, and 5000x5000
+ [Params(768)]
+ public int dim;
+
+ // test with 10 steps and 100 steps
+ [Params(100)]
+ public int steps;
+
+ // test with all 3 parallel types
+ [Params(ParType.DMPFor, ParType.DMPGPU)]
+ public ParType type;
+
+ // change this to configure the number of threads to use
+ public uint num_threads = 6;
+
+ // buffer for grid
+ private DotMP.GPU.Buffer gridbuf;
+
+ // buffer for scratch
+ private DotMP.GPU.Buffer scratchbuf;
+
+ // run the setup
+ [GlobalSetup]
+ public void Setup()
+ {
+ scratch = new double[dim, dim];
+ grid = new double[dim, dim];
+
+ for (int i = 0; i < dim; i++)
+ {
+ grid[0, i] = 100.0;
+ grid[i, 0] = 100.0;
+ grid[dim - 1, i] = 100.0;
+ grid[i, dim - 1] = 100.0;
+ }
+
+ if (type == ParType.DMPGPU)
+ {
+ gridbuf = new DotMP.GPU.Buffer(grid, DotMP.GPU.Buffer.Behavior.ToFrom);
+ scratchbuf = new DotMP.GPU.Buffer(scratch, DotMP.GPU.Buffer.Behavior.NoCopy);
+ }
+ }
+
+ //run the simulation
+ [Benchmark]
+ public void DoSimulation()
+ {
+ Action action = () =>
+ {
+ //do the steps
+ for (int i = 0; i < steps; i++)
+ {
+ DoStep();
+ }
+ };
+
+ if (type == ParType.DMPGPU)
+ {
+ action();
+ //gridbuf.Dispose();
+ //scratchbuf.Dispose();
+ }
+ else
+ {
+ // spawn a parallel region
+ DotMP.Parallel.ParallelRegion(num_threads: num_threads, action: action);
+ }
+ }
+
+ //do a step of the heat transfer simulation
+ public void DoStep()
+ {
+ switch (type)
+ {
+ case ParType.DMPFor:
+ //iterate over all cells not on the border
+ DotMP.Parallel.For(1, dim - 1, schedule: DotMP.Schedule.Guided, action: i =>
+ {
+ for (int j = 1; j < dim - 1; j++)
+ {
+ //set the scratch array to the average of the surrounding cells
+ scratch[i, j] = 0.25 * (grid[i - 1, j] + grid[i + 1, j] + grid[i, j - 1] + grid[i, j + 1]);
+ }
+ });
+
+ //copy the scratch array to the grid array
+ DotMP.Parallel.For(1, dim - 1, schedule: DotMP.Schedule.Guided, action: i =>
+ {
+ for (int j = 1; j < dim - 1; j++)
+ {
+ grid[i, j] = scratch[i, j];
+ }
+ });
+ break;
+
+ case ParType.DMPGPU:
+ DotMP.GPU.Parallel.ParallelForCollapse((1, dim - 1), (1, dim - 1), gridbuf, scratchbuf, (i, j, grid, scratch) =>
+ {
+ //set the scratch array to the average of the surrounding cells
+ scratch[i, j] = 0.25 * (grid[i - 1, j] + grid[i + 1, j] + grid[i, j - 1] + grid[i, j + 1]);
+ });
+
+ DotMP.GPU.Parallel.ParallelForCollapse((1, dim - 1), (1, dim - 1), gridbuf, scratchbuf, (i, j, grid, scratch) =>
+ {
+ grid[i, j] = scratch[i, j];
+ });
+ break;
+ }
+ }
+}
+
+// test heat transfer using Parallel.For
+public class HeatTransferVerify
+{
+ // scratch array
+ private double[,] scratch = new double[0, 0];
+ // grid array
+ private double[,] grid = new double[0, 0];
+
+ // parallel type enum
+ public enum ParType { DMPFor, DMPGPU }
+
+ // test dims of 100x100, 1000x1000, and 5000x5000
+ public int dim = 1000;
+
+ // test with 10 steps and 100 steps
+ public int steps = 100;
+
+ // test with all 3 parallel types
+ public ParType type = ParType.DMPFor;
+
+ // change this to configure the number of threads to use
+ public uint num_threads = 6;
+
+ // buffer for grid
+ private DotMP.GPU.Buffer gridbuf;
+
+ // buffer for scratch
+ private DotMP.GPU.Buffer scratchbuf;
+
+ // run the setup
+ public void Setup()
+ {
+ scratch = new double[dim, dim];
+ grid = new double[dim, dim];
+
+ for (int i = 0; i < dim; i++)
+ {
+ grid[0, i] = 100.0;
+ grid[i, 0] = 100.0;
+ grid[dim - 1, i] = 100.0;
+ grid[i, dim - 1] = 100.0;
+ }
+
+ if (type == ParType.DMPGPU)
+ {
+ gridbuf = new DotMP.GPU.Buffer(grid, DotMP.GPU.Buffer.Behavior.ToFrom);
+ scratchbuf = new DotMP.GPU.Buffer(scratch, DotMP.GPU.Buffer.Behavior.NoCopy);
+ }
+ }
+
+ //run the simulation
+ public void DoSimulation()
+ {
+ Action action = () =>
+ {
+ //do the steps
+ for (int i = 0; i < steps; i++)
+ {
+ DoStep();
+ }
+ };
+
+ if (type == ParType.DMPGPU)
+ {
+ action();
+ gridbuf.Dispose();
+ scratchbuf.Dispose();
+ }
+ else
+ {
+ // spawn a parallel region
+ DotMP.Parallel.ParallelRegion(num_threads: num_threads, action: action);
+ }
+ }
+
+ //do a step of the heat transfer simulation
+ public void DoStep()
+ {
+ switch (type)
+ {
+ case ParType.DMPFor:
+ //iterate over all cells not on the border
+ DotMP.Parallel.For(1, dim - 1, schedule: DotMP.Schedule.Guided, action: i =>
+ {
+ for (int j = 1; j < dim - 1; j++)
+ {
+ //set the scratch array to the average of the surrounding cells
+ scratch[i, j] = 0.25 * (grid[i - 1, j] + grid[i + 1, j] + grid[i, j - 1] + grid[i, j + 1]);
+ }
+ });
+
+ //copy the scratch array to the grid array
+ DotMP.Parallel.For(1, dim - 1, schedule: DotMP.Schedule.Guided, action: i =>
+ {
+ for (int j = 1; j < dim - 1; j++)
+ {
+ grid[i, j] = scratch[i, j];
+ }
+ });
+ break;
+
+ case ParType.DMPGPU:
+ DotMP.GPU.Parallel.ParallelForCollapse((1, dim - 1), (1, dim - 1), gridbuf, scratchbuf, (i, j, grid, scratch) =>
+ {
+ //set the scratch array to the average of the surrounding cells
+ scratch[i, j] = 0.25 * (grid[i - 1, j] + grid[i + 1, j] + grid[i, j - 1] + grid[i, j + 1]);
+ });
+
+ DotMP.GPU.Parallel.ParallelForCollapse((1, dim - 1), (1, dim - 1), gridbuf, scratchbuf, (i, j, grid, scratch) =>
+ {
+ grid[i, j] = scratch[i, j];
+ });
+ break;
+ }
+ }
+
+ public void Verify()
+ {
+ type = ParType.DMPFor;
+ Setup();
+ DoSimulation();
+ double[,] gridA = grid;
+
+ type = ParType.DMPGPU;
+ Setup();
+ DoSimulation();
+ double[,] gridB = grid;
+
+ bool wrong = false;
+
+ for (int i = 0; i < dim; i++)
+ for (int j = 0; j < dim; j++)
+ if (gridA[i, j] != gridB[i, j])
+ {
+ wrong = true;
+ Console.WriteLine("Wrong at ({0}, {1}), expected {2}, got {3}.", i, j, gridA[i, j], gridB[i, j]);
+ }
+
+ if (wrong)
+ Console.WriteLine("WRONG RESULT");
+ else
+ Console.WriteLine("RIGHT RESULT");
+ }
+}
+
+/* jscpd:ignore-end */
+
+// driver
+public class Program
+{
+ public static void Main(string[] args)
+ {
+ if (args.Length > 0 && args[0] == "verify")
+ new HeatTransferVerify().Verify();
+ else
+ BenchmarkRunner.Run();
+ }
+}
diff --git a/benchmarks/GPUOverhead/GPUOverhead.csproj b/benchmarks/GPUOverhead/GPUOverhead.csproj
new file mode 100644
index 00000000..9cf0a6f0
--- /dev/null
+++ b/benchmarks/GPUOverhead/GPUOverhead.csproj
@@ -0,0 +1,18 @@
+
+
+
+ Exe
+ net6.0
+ enable
+ enable
+
+
+
+
+
+
+
+
+
+
+
diff --git a/benchmarks/GPUOverhead/Program.cs b/benchmarks/GPUOverhead/Program.cs
new file mode 100644
index 00000000..9c0dde2c
--- /dev/null
+++ b/benchmarks/GPUOverhead/Program.cs
@@ -0,0 +1,56 @@
+/*
+* DotMP - A collection of powerful abstractions for parallel programming in .NET with an OpenMP-like API.
+* Copyright (C) 2023 Phillip Allen Lane
+*
+* This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser
+* General Public License as published by the Free Software Foundation; either version 2.1 of the License, or
+* (at your option) any later version.
+*
+* This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
+* implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+* License for more details.
+*
+* You should have received a copy of the GNU Lesser General Public License along with this library; if not,
+* write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+*/
+
+using BenchmarkDotNet.Attributes;
+using BenchmarkDotNet.Jobs;
+using BenchmarkDotNet.Running;
+using BenchmarkDotNet.Diagnosers;
+
+/* jscpd:ignore-start */
+
+[SimpleJob(RuntimeMoniker.Net60)]
+[ThreadingDiagnoser]
+[HardwareCounters]
+[EventPipeProfiler(EventPipeProfile.CpuSampling)]
+public class Overhead
+{
+ DotMP.GPU.Buffer buf;
+
+ // run the setup
+ [GlobalSetup]
+ public void Setup()
+ {
+ buf = new DotMP.GPU.Buffer(new int[1, 1], DotMP.GPU.Buffer.Behavior.NoCopy);
+ }
+
+ //run the simulation
+ [Benchmark]
+ public void TestOverhead()
+ {
+ DotMP.GPU.Parallel.ParallelForCollapse((0, 500), (0, 500), buf, (i, j, buf) => { });
+ }
+}
+
+/* jscpd:ignore-end */
+
+// driver
+public class Program
+{
+ public static void Main(string[] args)
+ {
+ BenchmarkRunner.Run();
+ }
+}
diff --git a/benchmarks/ILGPUOverhead/ILGPUOverhead.csproj b/benchmarks/ILGPUOverhead/ILGPUOverhead.csproj
new file mode 100644
index 00000000..9cf0a6f0
--- /dev/null
+++ b/benchmarks/ILGPUOverhead/ILGPUOverhead.csproj
@@ -0,0 +1,18 @@
+
+
+
+ Exe
+ net6.0
+ enable
+ enable
+
+
+
+
+
+
+
+
+
+
+
diff --git a/benchmarks/ILGPUOverhead/Program.cs b/benchmarks/ILGPUOverhead/Program.cs
new file mode 100644
index 00000000..6153183c
--- /dev/null
+++ b/benchmarks/ILGPUOverhead/Program.cs
@@ -0,0 +1,63 @@
+/*
+* DotMP - A collection of powerful abstractions for parallel programming in .NET with an OpenMP-like API.
+* Copyright (C) 2023 Phillip Allen Lane
+*
+* This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser
+* General Public License as published by the Free Software Foundation; either version 2.1 of the License, or
+* (at your option) any later version.
+*
+* This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
+* implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+* License for more details.
+*
+* You should have received a copy of the GNU Lesser General Public License along with this library; if not,
+* write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+*/
+
+using BenchmarkDotNet.Attributes;
+using BenchmarkDotNet.Jobs;
+using BenchmarkDotNet.Running;
+using BenchmarkDotNet.Diagnosers;
+using System;
+using ILGPU;
+using ILGPU.Runtime;
+
+/* jscpd:ignore-start */
+
+[SimpleJob(RuntimeMoniker.Net60)]
+[ThreadingDiagnoser]
+[HardwareCounters]
+[EventPipeProfiler(EventPipeProfile.CpuSampling)]
+public class Overhead
+{
+ Action> kernel;
+ ArrayView1D data;
+
+ // run the setup
+ [GlobalSetup]
+ public void Setup()
+ {
+ var context = Context.CreateDefault();
+ var accelerator = context.Devices[1].CreateAccelerator(context);
+ kernel = accelerator.LoadStreamKernel>(arr => { });
+ data = accelerator.Allocate1D(1);
+ }
+
+ //run the simulation
+ [Benchmark]
+ public void TestOverhead()
+ {
+ kernel((1, 256), data);
+ }
+}
+
+/* jscpd:ignore-end */
+
+// driver
+public class Program
+{
+ public static void Main(string[] args)
+ {
+ BenchmarkRunner.Run();
+ }
+}