diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 8e97837..6b8dcdc 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -37,3 +37,25 @@ jobs: - name: Test (Release) run: dotnet test -c Release --no-build + + - name: Test (Debug, Avx2=Disabled) + env: + COMPlus_EnableAVX2: "0" + run: dotnet test -c Debug --no-build + + - name: Test (Release, Avx2=Disabled) + env: + COMPlus_EnableAVX2: "0" + run: dotnet test -c Release --no-build + + - name: Test (Debug, Avx2=Disabled, Sse2=Disabled) + env: + COMPlus_EnableAVX2: "0" + COMPlus_EnableSSE2: "0" + run: dotnet test -c Debug --no-build + + - name: Test (Release, Avx2=Disabled, Sse2=Disabled) + env: + COMPlus_EnableAVX2: "0" + COMPlus_EnableSSE2: "0" + run: dotnet test -c Release --no-build diff --git a/Ramstack.Globbing.Tests/SimdConfigurationTests.cs b/Ramstack.Globbing.Tests/SimdConfigurationTests.cs new file mode 100644 index 0000000..6ddb86a --- /dev/null +++ b/Ramstack.Globbing.Tests/SimdConfigurationTests.cs @@ -0,0 +1,17 @@ +using System.Runtime.Intrinsics.X86; + +namespace Ramstack.Globbing; + +[TestFixture] +public class SimdConfigurationTests +{ + [Test] + public void VerifySimdConfiguration() + { + var isAvx2Disabled = Environment.GetEnvironmentVariable("COMPlus_EnableAVX2") == "0"; + var isSse2Disabled = Environment.GetEnvironmentVariable("COMPlus_EnableSSE2") == "0"; + + Assert.That(isAvx2Disabled, Is.EqualTo(!Avx2.IsSupported)); + Assert.That(isSse2Disabled, Is.EqualTo(!Sse2.IsSupported)); + } +} diff --git a/Ramstack.Globbing/Internal/PathHelper.cs b/Ramstack.Globbing/Internal/PathHelper.cs index f318b41..d37baf3 100644 --- a/Ramstack.Globbing/Internal/PathHelper.cs +++ b/Ramstack.Globbing/Internal/PathHelper.cs @@ -305,18 +305,32 @@ public PathSegmentIterator(int length) => [MethodImpl(MethodImplOptions.AggressiveInlining)] public (int start, int final) GetNext(ref char source, MatchFlags flags) { + // + // Number of bits per char (ushort) in the MoveMask output + // + const uint BitsPerChar = 0b11; + var start = _last + 1; while (_position < _length) { - if (Avx2.IsSupported && _mask != 0) + if ((Avx2.IsSupported || Sse2.IsSupported) && _mask != 0) { var offset = BitOperations.TrailingZeroCount(_mask); _last = _position + (nint)((uint)offset >> 1); - _mask &= ~(3u << offset); + // + // Clear the bits for the current separator to process the next position in the mask + // + _mask &= ~(BitsPerChar << offset); + + // + // Advance position to the next chunk when no separators remain in the mask + // if (_mask == 0) - _position += Vector256.Count; + _position += Avx2.IsSupported + ? Vector256.Count + : Vector128.Count; return ((int)start, (int)_last); } @@ -334,10 +348,47 @@ public PathSegmentIterator(int length) => allowEscapingMask, Avx2.CompareEqual(chunk, backslash))); + // + // Store the comparison bitmask and reuse it across iterations + // as long as it contains non-zero bits. + // This avoids reloading SIMD registers and repeating comparisons + // on the same chunk of data. + // _mask = (uint)Avx2.MoveMask(comparison.AsByte()); + + // + // Advance position to the next chunk when no separators found + // if (_mask == 0) _position += Vector256.Count; } + else if (Sse2.IsSupported && !Avx2.IsSupported && _position + Vector128.Count <= _length) + { + var chunk = LoadVector128(ref source, _position); + var allowEscapingMask = CreateAllowEscaping128Bitmask(flags); + var slash = Vector128.Create((ushort)'/'); + var backslash = Vector128.Create((ushort)'\\'); + + var comparison = Sse2.Or( + Sse2.CompareEqual(chunk, slash), + Sse2.AndNot( + allowEscapingMask, + Sse2.CompareEqual(chunk, backslash))); + + // + // Store the comparison bitmask and reuse it across iterations + // as long as it contains non-zero bits. + // This avoids reloading SIMD registers and repeating comparisons + // on the same chunk of data. + // + _mask = (uint)Sse2.MoveMask(comparison.AsByte()); + + // + // Advance position to the next chunk when no separators found + // + if (_mask == 0) + _position += Vector128.Count; + } else { for (; _position < _length; _position++)