Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 9 additions & 11 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest, windows-latest]
os: [ubuntu-latest, ubuntu-24.04-arm, windows-latest]

steps:
- name: Setup .NET
Expand All @@ -38,24 +38,22 @@ jobs:
- name: Test (Release)
run: dotnet test -c Release --no-build

- name: Test (Debug, Avx2=Disabled)
- name: Test (Debug, AVX2=0)
env:
COMPlus_EnableAVX2: "0"
DOTNET_EnableAVX2: "0"
run: dotnet test -c Debug --no-build

- name: Test (Release, Avx2=Disabled)
- name: Test (Release, AVX2=0)
env:
COMPlus_EnableAVX2: "0"
DOTNET_EnableAVX2: "0"
run: dotnet test -c Release --no-build

- name: Test (Debug, Avx2=Disabled, Sse2=Disabled)
- name: Test (Debug, HWIntrinsic=0)
env:
COMPlus_EnableAVX2: "0"
COMPlus_EnableSSE2: "0"
DOTNET_EnableHWIntrinsic: "0"
run: dotnet test -c Debug --no-build

- name: Test (Release, Avx2=Disabled, Sse2=Disabled)
- name: Test (Release, HWIntrinsic=0)
env:
COMPlus_EnableAVX2: "0"
COMPlus_EnableSSE2: "0"
DOTNET_EnableHWIntrinsic: "0"
run: dotnet test -c Release --no-build
2 changes: 1 addition & 1 deletion Ramstack.Globbing.Tests/Ramstack.Globbing.Tests.csproj
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net6.0</TargetFramework>
<TargetFrameworks>net6.0;net7.0</TargetFrameworks>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<LangVersion>preview</LangVersion>
Expand Down
19 changes: 15 additions & 4 deletions Ramstack.Globbing.Tests/SimdConfigurationTests.cs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics.Arm;
using System.Runtime.Intrinsics.X86;

namespace Ramstack.Globbing;
Expand All @@ -8,10 +10,19 @@ public class SimdConfigurationTests
[Test]
public void VerifySimdConfiguration()
{
var isAvx2Disabled = Environment.GetEnvironmentVariable("COMPlus_EnableAVX2") == "0";
var isSse2Disabled = Environment.GetEnvironmentVariable("COMPlus_EnableSSE2") == "0";
if (Environment.GetEnvironmentVariable("DOTNET_EnableHWIntrinsic") == "0")
{
Assert.That(Sse2.IsSupported, Is.False);
Assert.That(Sse41.IsSupported, Is.False);
Assert.That(Avx2.IsSupported, Is.False);
Assert.That(AdvSimd.IsSupported, Is.False);
}

Assert.That(isAvx2Disabled, Is.EqualTo(!Avx2.IsSupported));
Assert.That(isSse2Disabled, Is.EqualTo(!Sse2.IsSupported));
if (RuntimeInformation.ProcessArchitecture == Architecture.X64 && Environment.GetEnvironmentVariable("DOTNET_EnableAVX2") == "0")
{
Assert.That(Sse2.IsSupported, Is.True);
Assert.That(Sse41.IsSupported, Is.True);
Assert.That(Avx2.IsSupported, Is.False);
}
}
}
116 changes: 100 additions & 16 deletions Ramstack.Globbing/Internal/PathHelper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.Arm;
using System.Runtime.Intrinsics.X86;

namespace Ramstack.Globbing.Internal;
Expand Down Expand Up @@ -172,14 +173,46 @@ static void ConvertPathToPosixStyleImpl(ref char p, nint length)
}
while (i < tail);

//
// Process remaining chars
// NOTE: An extra one write for the 'length == Vector128<ushort>.Count'
//

value = LoadVector128(ref p, tail);
mask = Sse2.CompareEqual(value, backslash);
result = Sse41.BlendVariable(value, slash, mask);
WriteVector128(ref p, tail, result);
}
else if (AdvSimd.IsSupported && length >= Vector128<ushort>.Count)
{
Vector128<ushort> value;
Vector128<ushort> mask;
Vector128<ushort> result;

var slash = Vector128.Create((ushort)'/');
var backslash = Vector128.Create((ushort)'\\');
var tail = length - Vector128<ushort>.Count;

do
{
value = LoadVector128(ref p, i);
mask = AdvSimd.CompareEqual(value, backslash);
result = AdvSimd.BitwiseSelect(mask, slash, value);
WriteVector128(ref p, i, result);

i += Vector128<ushort>.Count;
}
while (i < tail);

//
// Process remaining chars
// NOTE: An extra one write for the 'length == Vector128<ushort>.Count'
//
value = LoadVector128(ref p, tail);
mask = AdvSimd.CompareEqual(value, backslash);
result = AdvSimd.BitwiseSelect(mask, slash, value);
WriteVector128(ref p, tail, result);
}
else
{
for (; i < length; i++)
Expand All @@ -198,10 +231,10 @@ static void ConvertPathToPosixStyleImpl(ref char p, nint length)
/// <returns>
/// A 256-bit bitmask for escaping characters.
/// </returns>
private static Vector256<ushort> CreateAllowEscaping256Bitmask(MatchFlags flags)
private static Vector256<ushort> CreateBackslash256Bitmask(MatchFlags flags)
{
var mask = Vector256<ushort>.Zero;
if (flags != MatchFlags.Windows)
if (flags == MatchFlags.Windows)
mask = Vector256<ushort>.AllBitsSet;

return mask;
Expand All @@ -214,10 +247,10 @@ private static Vector256<ushort> CreateAllowEscaping256Bitmask(MatchFlags flags)
/// <returns>
/// A 128-bit bitmask for escaping characters.
/// </returns>
private static Vector128<ushort> CreateAllowEscaping128Bitmask(MatchFlags flags)
private static Vector128<ushort> CreateBackslash128Bitmask(MatchFlags flags)
{
var mask = Vector128<ushort>.Zero;
if (flags != MatchFlags.Windows)
if (flags == MatchFlags.Windows)
mask = Vector128<ushort>.AllBitsSet;

return mask;
Expand Down Expand Up @@ -301,15 +334,37 @@ public PathSegmentIterator() =>

while ((int)_position < length)
{
if ((Avx2.IsSupported || Sse2.IsSupported) && _mask != 0)
if ((Avx2.IsSupported || Sse2.IsSupported || AdvSimd.IsSupported) && _mask != 0)
{
var offset = BitOperations.TrailingZeroCount(_mask);
_last = (int)(_position + (nint)((uint)offset >> 1));
if (AdvSimd.IsSupported)
{
//
// On ARM, ExtractMostSignificantBits returns a mask where each bit
// represents one vector element (1 bit per ushort), so offset
// directly corresponds to the element index
//
_last = (int)(_position + (nint)(uint)offset);

//
// Clear the bits for the current separator to process the next position in the mask
//
_mask &= ~(0b_11u << offset);
//
// Clear the bits for the current separator
//
_mask &= ~(1u << offset);
}
else
{
//
// On x86, MoveMask (and ExtractMostSignificantBits on byte-based vectors)
// returns a mask where each bit represents one byte (2 bits per ushort),
// so we need to divide offset by 2 to get the actual element index
//
_last = (int)(_position + (nint)((uint)offset >> 1));

//
// Clear the bits for the current separator
//
_mask &= ~(0b_11u << offset);
}

//
// Advance position to the next chunk when no separators remain in the mask
Expand Down Expand Up @@ -340,14 +395,14 @@ public PathSegmentIterator() =>
if (Avx2.IsSupported && (int)_position + Vector256<ushort>.Count <= length)
{
var chunk = LoadVector256(ref source, _position);
var allowEscapingMask = CreateAllowEscaping256Bitmask(flags);
var backslashMask = CreateBackslash256Bitmask(flags);
var slash = Vector256.Create((ushort)'/');
var backslash = Vector256.Create((ushort)'\\');

var comparison = Avx2.Or(
Avx2.CompareEqual(chunk, slash),
Avx2.AndNot(
allowEscapingMask,
Avx2.And(
backslashMask,
Avx2.CompareEqual(chunk, backslash)));

//
Expand All @@ -367,14 +422,14 @@ public PathSegmentIterator() =>
else if (Sse2.IsSupported && !Avx2.IsSupported && (int)_position + Vector128<ushort>.Count <= length)
{
var chunk = LoadVector128(ref source, _position);
var allowEscapingMask = CreateAllowEscaping128Bitmask(flags);
var backslashMask = CreateBackslash128Bitmask(flags);
var slash = Vector128.Create((ushort)'/');
var backslash = Vector128.Create((ushort)'\\');

var comparison = Sse2.Or(
Sse2.CompareEqual(chunk, slash),
Sse2.AndNot(
allowEscapingMask,
Sse2.And(
backslashMask,
Sse2.CompareEqual(chunk, backslash)));

//
Expand All @@ -391,6 +446,35 @@ public PathSegmentIterator() =>
if (_mask == 0)
_position += Vector128<ushort>.Count;
}
#if NET7_0_OR_GREATER
else if (AdvSimd.IsSupported && (int)_position + Vector128<ushort>.Count <= length)
{
var chunk = LoadVector128(ref source, _position);
var backslashMask = CreateBackslash128Bitmask(flags);
var slash = Vector128.Create((ushort)'/');
var backslash = Vector128.Create((ushort)'\\');

var comparison = AdvSimd.Or(
AdvSimd.CompareEqual(chunk, slash),
AdvSimd.And(
backslashMask,
AdvSimd.CompareEqual(chunk, backslash)));

//
// Store the comparison bitmask and reuse it across iterations
// as long as it contains non-zero bits.
// This avoids reloading SIMD registers and repeating comparisons
// on the same chunk of data.
//
_mask = comparison.ExtractMostSignificantBits();

//
// Advance position to the next chunk when no separators found
//
if (_mask == 0)
_position += Vector128<ushort>.Count;
}
#endif
else
{
for (; (int)_position < length; _position++)
Expand Down
4 changes: 2 additions & 2 deletions Ramstack.Globbing/Ramstack.Globbing.csproj
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<Project Sdk="Microsoft.NET.Sdk">
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net6.0</TargetFramework>
<TargetFrameworks>net6.0;net7.0</TargetFrameworks>
<Description>Fast and zero-allocation .NET globbing library for matching file paths using glob patterns.</Description>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
Expand Down