diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 6b8dcdc..a3e9f9d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -11,7 +11,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [ubuntu-latest, windows-latest] + os: [ubuntu-latest, ubuntu-24.04-arm, windows-latest] steps: - name: Setup .NET @@ -38,24 +38,22 @@ jobs: - name: Test (Release) run: dotnet test -c Release --no-build - - name: Test (Debug, Avx2=Disabled) + - name: Test (Debug, AVX2=0) env: - COMPlus_EnableAVX2: "0" + DOTNET_EnableAVX2: "0" run: dotnet test -c Debug --no-build - - name: Test (Release, Avx2=Disabled) + - name: Test (Release, AVX2=0) env: - COMPlus_EnableAVX2: "0" + DOTNET_EnableAVX2: "0" run: dotnet test -c Release --no-build - - name: Test (Debug, Avx2=Disabled, Sse2=Disabled) + - name: Test (Debug, HWIntrinsic=0) env: - COMPlus_EnableAVX2: "0" - COMPlus_EnableSSE2: "0" + DOTNET_EnableHWIntrinsic: "0" run: dotnet test -c Debug --no-build - - name: Test (Release, Avx2=Disabled, Sse2=Disabled) + - name: Test (Release, HWIntrinsic=0) env: - COMPlus_EnableAVX2: "0" - COMPlus_EnableSSE2: "0" + DOTNET_EnableHWIntrinsic: "0" run: dotnet test -c Release --no-build diff --git a/Ramstack.Globbing.Tests/Ramstack.Globbing.Tests.csproj b/Ramstack.Globbing.Tests/Ramstack.Globbing.Tests.csproj index 2a75b0b..2f124f9 100644 --- a/Ramstack.Globbing.Tests/Ramstack.Globbing.Tests.csproj +++ b/Ramstack.Globbing.Tests/Ramstack.Globbing.Tests.csproj @@ -1,6 +1,6 @@ - net6.0 + net6.0;net7.0 enable enable preview diff --git a/Ramstack.Globbing.Tests/SimdConfigurationTests.cs b/Ramstack.Globbing.Tests/SimdConfigurationTests.cs index ba541a2..7371115 100644 --- a/Ramstack.Globbing.Tests/SimdConfigurationTests.cs +++ b/Ramstack.Globbing.Tests/SimdConfigurationTests.cs @@ -1,3 +1,5 @@ +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics.Arm; using System.Runtime.Intrinsics.X86; namespace Ramstack.Globbing; @@ -8,10 +10,19 @@ public class SimdConfigurationTests [Test] public void VerifySimdConfiguration() { - var isAvx2Disabled = Environment.GetEnvironmentVariable("COMPlus_EnableAVX2") == "0"; - var isSse2Disabled = Environment.GetEnvironmentVariable("COMPlus_EnableSSE2") == "0"; + if (Environment.GetEnvironmentVariable("DOTNET_EnableHWIntrinsic") == "0") + { + Assert.That(Sse2.IsSupported, Is.False); + Assert.That(Sse41.IsSupported, Is.False); + Assert.That(Avx2.IsSupported, Is.False); + Assert.That(AdvSimd.IsSupported, Is.False); + } - Assert.That(isAvx2Disabled, Is.EqualTo(!Avx2.IsSupported)); - Assert.That(isSse2Disabled, Is.EqualTo(!Sse2.IsSupported)); + if (RuntimeInformation.ProcessArchitecture == Architecture.X64 && Environment.GetEnvironmentVariable("DOTNET_EnableAVX2") == "0") + { + Assert.That(Sse2.IsSupported, Is.True); + Assert.That(Sse41.IsSupported, Is.True); + Assert.That(Avx2.IsSupported, Is.False); + } } } diff --git a/Ramstack.Globbing/Internal/PathHelper.cs b/Ramstack.Globbing/Internal/PathHelper.cs index 744faec..2cff6c5 100644 --- a/Ramstack.Globbing/Internal/PathHelper.cs +++ b/Ramstack.Globbing/Internal/PathHelper.cs @@ -4,6 +4,7 @@ using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; using System.Runtime.Intrinsics.X86; namespace Ramstack.Globbing.Internal; @@ -172,14 +173,46 @@ static void ConvertPathToPosixStyleImpl(ref char p, nint length) } while (i < tail); + // // Process remaining chars // NOTE: An extra one write for the 'length == Vector128.Count' + // value = LoadVector128(ref p, tail); mask = Sse2.CompareEqual(value, backslash); result = Sse41.BlendVariable(value, slash, mask); WriteVector128(ref p, tail, result); } + else if (AdvSimd.IsSupported && length >= Vector128.Count) + { + Vector128 value; + Vector128 mask; + Vector128 result; + + var slash = Vector128.Create((ushort)'/'); + var backslash = Vector128.Create((ushort)'\\'); + var tail = length - Vector128.Count; + + do + { + value = LoadVector128(ref p, i); + mask = AdvSimd.CompareEqual(value, backslash); + result = AdvSimd.BitwiseSelect(mask, slash, value); + WriteVector128(ref p, i, result); + + i += Vector128.Count; + } + while (i < tail); + + // + // Process remaining chars + // NOTE: An extra one write for the 'length == Vector128.Count' + // + value = LoadVector128(ref p, tail); + mask = AdvSimd.CompareEqual(value, backslash); + result = AdvSimd.BitwiseSelect(mask, slash, value); + WriteVector128(ref p, tail, result); + } else { for (; i < length; i++) @@ -198,10 +231,10 @@ static void ConvertPathToPosixStyleImpl(ref char p, nint length) /// /// A 256-bit bitmask for escaping characters. /// - private static Vector256 CreateAllowEscaping256Bitmask(MatchFlags flags) + private static Vector256 CreateBackslash256Bitmask(MatchFlags flags) { var mask = Vector256.Zero; - if (flags != MatchFlags.Windows) + if (flags == MatchFlags.Windows) mask = Vector256.AllBitsSet; return mask; @@ -214,10 +247,10 @@ private static Vector256 CreateAllowEscaping256Bitmask(MatchFlags flags) /// /// A 128-bit bitmask for escaping characters. /// - private static Vector128 CreateAllowEscaping128Bitmask(MatchFlags flags) + private static Vector128 CreateBackslash128Bitmask(MatchFlags flags) { var mask = Vector128.Zero; - if (flags != MatchFlags.Windows) + if (flags == MatchFlags.Windows) mask = Vector128.AllBitsSet; return mask; @@ -301,15 +334,37 @@ public PathSegmentIterator() => while ((int)_position < length) { - if ((Avx2.IsSupported || Sse2.IsSupported) && _mask != 0) + if ((Avx2.IsSupported || Sse2.IsSupported || AdvSimd.IsSupported) && _mask != 0) { var offset = BitOperations.TrailingZeroCount(_mask); - _last = (int)(_position + (nint)((uint)offset >> 1)); + if (AdvSimd.IsSupported) + { + // + // On ARM, ExtractMostSignificantBits returns a mask where each bit + // represents one vector element (1 bit per ushort), so offset + // directly corresponds to the element index + // + _last = (int)(_position + (nint)(uint)offset); - // - // Clear the bits for the current separator to process the next position in the mask - // - _mask &= ~(0b_11u << offset); + // + // Clear the bits for the current separator + // + _mask &= ~(1u << offset); + } + else + { + // + // On x86, MoveMask (and ExtractMostSignificantBits on byte-based vectors) + // returns a mask where each bit represents one byte (2 bits per ushort), + // so we need to divide offset by 2 to get the actual element index + // + _last = (int)(_position + (nint)((uint)offset >> 1)); + + // + // Clear the bits for the current separator + // + _mask &= ~(0b_11u << offset); + } // // Advance position to the next chunk when no separators remain in the mask @@ -340,14 +395,14 @@ public PathSegmentIterator() => if (Avx2.IsSupported && (int)_position + Vector256.Count <= length) { var chunk = LoadVector256(ref source, _position); - var allowEscapingMask = CreateAllowEscaping256Bitmask(flags); + var backslashMask = CreateBackslash256Bitmask(flags); var slash = Vector256.Create((ushort)'/'); var backslash = Vector256.Create((ushort)'\\'); var comparison = Avx2.Or( Avx2.CompareEqual(chunk, slash), - Avx2.AndNot( - allowEscapingMask, + Avx2.And( + backslashMask, Avx2.CompareEqual(chunk, backslash))); // @@ -367,14 +422,14 @@ public PathSegmentIterator() => else if (Sse2.IsSupported && !Avx2.IsSupported && (int)_position + Vector128.Count <= length) { var chunk = LoadVector128(ref source, _position); - var allowEscapingMask = CreateAllowEscaping128Bitmask(flags); + var backslashMask = CreateBackslash128Bitmask(flags); var slash = Vector128.Create((ushort)'/'); var backslash = Vector128.Create((ushort)'\\'); var comparison = Sse2.Or( Sse2.CompareEqual(chunk, slash), - Sse2.AndNot( - allowEscapingMask, + Sse2.And( + backslashMask, Sse2.CompareEqual(chunk, backslash))); // @@ -391,6 +446,35 @@ public PathSegmentIterator() => if (_mask == 0) _position += Vector128.Count; } + #if NET7_0_OR_GREATER + else if (AdvSimd.IsSupported && (int)_position + Vector128.Count <= length) + { + var chunk = LoadVector128(ref source, _position); + var backslashMask = CreateBackslash128Bitmask(flags); + var slash = Vector128.Create((ushort)'/'); + var backslash = Vector128.Create((ushort)'\\'); + + var comparison = AdvSimd.Or( + AdvSimd.CompareEqual(chunk, slash), + AdvSimd.And( + backslashMask, + AdvSimd.CompareEqual(chunk, backslash))); + + // + // Store the comparison bitmask and reuse it across iterations + // as long as it contains non-zero bits. + // This avoids reloading SIMD registers and repeating comparisons + // on the same chunk of data. + // + _mask = comparison.ExtractMostSignificantBits(); + + // + // Advance position to the next chunk when no separators found + // + if (_mask == 0) + _position += Vector128.Count; + } + #endif else { for (; (int)_position < length; _position++) diff --git a/Ramstack.Globbing/Ramstack.Globbing.csproj b/Ramstack.Globbing/Ramstack.Globbing.csproj index 4cbbbbb..78e0e99 100644 --- a/Ramstack.Globbing/Ramstack.Globbing.csproj +++ b/Ramstack.Globbing/Ramstack.Globbing.csproj @@ -1,6 +1,6 @@ - + - net6.0 + net6.0;net7.0 Fast and zero-allocation .NET globbing library for matching file paths using glob patterns. enable enable