diff --git a/src/Ramstack.Parsing/Collections/ArrayBuilder.cs b/src/Ramstack.Parsing/Collections/ArrayBuilder.cs index cd1270a..12eadc4 100644 --- a/src/Ramstack.Parsing/Collections/ArrayBuilder.cs +++ b/src/Ramstack.Parsing/Collections/ArrayBuilder.cs @@ -3,6 +3,20 @@ namespace Ramstack.Parsing.Collections; /// /// Represents an array builder for elements of type . /// +/// +/// This structure is optimized for performance and relies on proper initialization. +/// +/// Always initialize instances using one of the available constructors with the new keyword, +/// including the parameterless constructor: new ArrayBuilder<T>(). +/// +/// Do not use default(ArrayBuilder<T>) or similar patterns, as this results +/// in an uninitialized instance with an invalid internal state, +/// causing a when methods are called. +/// +/// This design choice was made to avoid unnecessary overhead and maintain control over +/// internal state initialization in modern C# versions where parameterless constructors +/// in structs are supported. +/// /// The type of elements stored in the array builder. [DebuggerDisplay("Count = {Count}")] [DebuggerTypeProxy(typeof(ArrayBuilderDebugView<>))] @@ -33,12 +47,10 @@ public readonly ref T this[int index] get { var array = _array; - if ((uint)index >= (uint)_count) + if ((uint)index >= (uint)_count || (uint)index >= (uint)array.Length) ThrowHelper.ThrowArgumentOutOfRangeException(); - return ref Unsafe.Add( - ref MemoryMarshal.GetArrayDataReference(array), - (nint)(uint)index); + return ref array[index]; } } diff --git a/src/Ramstack.Parsing/Literal.String.cs b/src/Ramstack.Parsing/Literal.String.cs index 266d14a..add7274 100644 --- a/src/Ramstack.Parsing/Literal.String.cs +++ b/src/Ramstack.Parsing/Literal.String.cs @@ -128,16 +128,26 @@ private static int TryParseImpl(ReadOnlySpan s, out TResult? value) } else { - if (index + 5 >= s.Length) + var p = s.Slice(index); + if (p.Length < 6) goto FAIL; - ref var r = ref Unsafe.AsRef(in s[index]); - - var v1 = (nint)Unsafe.Add(ref r, 2); - var v2 = (nint)Unsafe.Add(ref r, 3); - var v3 = (nint)Unsafe.Add(ref r, 4); - var v4 = (nint)Unsafe.Add(ref r, 5); - + var v1 = (nint)p[2]; + var v2 = (nint)p[3]; + var v3 = (nint)p[4]; + var v4 = (nint)p[5]; + + // We use Unsafe-methods here to index into the HexTable without bounds checks. + // + // The check 'if ((uint)(v1 | v2 | v3 | v4) <= 127)' guarantees that all four values + // are within the range 0..127. + // + // This makes it safe to access HexTable[v1], HexTable[v2], HexTable[v3], and HexTable[v4] + // without additional bounds checks, because HexTable has exactly 128 elements. + // By using Unsafe.Add, we avoid the overhead of safety checks and improve performance. + // + // Once the JIT compiler can properly recognize and optimize this pattern, we may be able + // to replace these unsafe calls with safe indexed access without performance loss. if ((uint)(v1 | v2 | v3 | v4) > 127) goto FAIL; diff --git a/src/Ramstack.Parsing/Literal.UnicodeEscapeSequence.cs b/src/Ramstack.Parsing/Literal.UnicodeEscapeSequence.cs index 20d814c..7464aa5 100644 --- a/src/Ramstack.Parsing/Literal.UnicodeEscapeSequence.cs +++ b/src/Ramstack.Parsing/Literal.UnicodeEscapeSequence.cs @@ -51,6 +51,17 @@ public override bool TryParse(ref ParseContext context, out T value) var v3 = (nint)s[4]; var v4 = (nint)s[5]; + // We use Unsafe-methods here to index into the HexTable without bounds checks. + // + // The check 'if ((uint)(v1 | v2 | v3 | v4) <= 127)' guarantees that all four values + // are within the range 0..127. + // + // This makes it safe to access HexTable[v1], HexTable[v2], HexTable[v3], and HexTable[v4] + // without additional bounds checks, because HexTable has exactly 128 elements. + // By using Unsafe.Add, we avoid the overhead of safety checks and improve performance. + // + // Once the JIT compiler can properly recognize and optimize this pattern, we may be able + // to replace these unsafe calls with safe indexed access without performance loss. if ((uint)(v1 | v2 | v3 | v4) <= 127) { ref var table = ref MemoryMarshal.GetReference(HexTable); diff --git a/src/Ramstack.Parsing/ParseContext.cs b/src/Ramstack.Parsing/ParseContext.cs index 94d0841..068b76c 100644 --- a/src/Ramstack.Parsing/ParseContext.cs +++ b/src/Ramstack.Parsing/ParseContext.cs @@ -32,6 +32,8 @@ public readonly ReadOnlySpan Remaining var source = _source; var length = source.Length - _position; + Debug.Assert(_source[_position..].Length == length); + ref var reference = ref Unsafe.Add( ref MemoryMarshal.GetReference(source), (nint)(uint)_position); diff --git a/src/Ramstack.Parsing/Parser.Repeat.cs b/src/Ramstack.Parsing/Parser.Repeat.cs index eda211c..513beee 100644 --- a/src/Ramstack.Parsing/Parser.Repeat.cs +++ b/src/Ramstack.Parsing/Parser.Repeat.cs @@ -451,7 +451,8 @@ public override bool TryParse(ref ParseContext context, [NotNullWhen(true)] out while (s.Length != 0) { var index = _searcher.IndexOfAnyExcept(s); - if (index >= 0) + + if ((uint)index < (uint)s.Length) { count += index; @@ -461,13 +462,21 @@ public override bool TryParse(ref ParseContext context, [NotNullWhen(true)] out index++; count++; - Debug.Assert((uint)index <= (uint)s.Length); - - s = MemoryMarshal.CreateReadOnlySpan( - ref Unsafe.Add(ref MemoryMarshal.GetReference(s), (nint)(uint)index), - s.Length - index); + // At this point, "index" is guaranteed to be within [0..s.Length]. + // While the Slice method performs its own bounds check and would throw if out of range, + // our explicit check helps the JIT recognize that the call to Slice is safe. + // This allows the JIT to eliminate the internal bounds check + // and avoid generating unnecessary exception-handling code inside Slice. + // + // In other words, the check would happen anyway, but by placing it here explicitly, + // we eliminate the need for exception code inside Slice itself. + // + // Even though logically (uint)index < (uint)s.Length implies (uint)index <= (uint)s.Length, + // the JIT cannot currently prove this identity and therefore cannot optimize away + // the redundant check in Slice. - // s = s.Slice(index); + if ((uint)index <= (uint)s.Length) + s = s.Slice(index); } else { @@ -535,7 +544,7 @@ public override bool TryParse(ref ParseContext context, out Unit value) while (s.Length != 0) { var index = _searcher.IndexOfAnyExcept(s); - if (index >= 0) + if ((uint)index < (uint)s.Length) { count += index; @@ -545,13 +554,21 @@ public override bool TryParse(ref ParseContext context, out Unit value) index++; count++; - Debug.Assert((uint)index <= (uint)s.Length); - - s = MemoryMarshal.CreateReadOnlySpan( - ref Unsafe.Add(ref MemoryMarshal.GetReference(s), (nint)(uint)index), - s.Length - index); + // At this point, "index" is guaranteed to be within [0..s.Length]. + // While the Slice method performs its own bounds check and would throw if out of range, + // our explicit check helps the JIT recognize that the call to Slice is safe. + // This allows the JIT to eliminate the internal bounds check + // and avoid generating unnecessary exception-handling code inside Slice. + // + // In other words, the check would happen anyway, but by placing it here explicitly, + // we eliminate the need for exception code inside Slice itself. + // + // Even though logically (uint)index < (uint)s.Length implies (uint)index <= (uint)s.Length, + // the JIT cannot currently prove this identity and therefore cannot optimize away + // the redundant check in Slice. - // s = s.Slice(index); + if ((uint)index <= (uint)s.Length) + s = s.Slice(index); } else { diff --git a/src/Ramstack.Parsing/Parser.Set.cs b/src/Ramstack.Parsing/Parser.Set.cs index b1ac036..326c6b2 100644 --- a/src/Ramstack.Parsing/Parser.Set.cs +++ b/src/Ramstack.Parsing/Parser.Set.cs @@ -374,8 +374,20 @@ public bool Contains(char ch) => /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - public int IndexOfAnyExcept(ReadOnlySpan s) => - IndexOfAnyExcept(ref MemoryMarshal.GetReference(s), s.Length, _start, _count); + public int IndexOfAnyExcept(ReadOnlySpan s) + { + static int IndexOfAnyExceptCore(ref char s, int length, uint start, uint count) + { + var span = MemoryMarshal.CreateReadOnlySpan(ref s, length); + for (var i = 0; i < span.Length; i++) + if (span[i] - start >= count) + return i; + + return -1; + } + + return IndexOfAnyExceptCore(ref MemoryMarshal.GetReference(s), s.Length, _start, _count); + } /// public CharClass GetCharClass(CharClassUnicodeCategory categories) @@ -387,22 +399,6 @@ public CharClass GetCharClass(CharClassUnicodeCategory categories) [CharClassRange.Create((char)lo, (char)hi)], categories); } - - private static int IndexOfAnyExcept(ref char s, int length, uint start, uint count) - { - ref var p = ref s; - - for (; length > 0; length--) - { - if (p - start >= count) - // ReSharper disable once RedundantCast - return (int)((nint)Unsafe.ByteOffset(ref s, ref p) >>> 1); - - p = ref Unsafe.Add(ref p, 1); - } - - return -1; - } } #endregion @@ -423,37 +419,30 @@ public bool Contains(char ch) => /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - public int IndexOfAnyExcept(ReadOnlySpan s) => - IndexOfAnyExcept(ref MemoryMarshal.GetReference(s), s.Length, chars); - - /// - public CharClass GetCharClass(CharClassUnicodeCategory categories) => - new CharClass(CharClassRange.Create(chars), categories); - - private static int IndexOfAnyExcept(ref char s, int length, string chars) + public int IndexOfAnyExcept(ReadOnlySpan s) { - _ = chars.Length; - - #if NET7_0_OR_GREATER - - return MemoryMarshal.CreateReadOnlySpan(ref s, length).IndexOfAnyExcept(chars); - - #else - - ref var p = ref s; - - for (; length > 0; length--) + static int IndexOfAnyExceptCore(ref char s, int length, string chars) { - if (!chars.Contains(p)) - return (int)((nint)Unsafe.ByteOffset(ref s, ref p) >>> 1); - - p = ref Unsafe.Add(ref p, 1); + _ = chars.Length; + var span = MemoryMarshal.CreateReadOnlySpan(ref s, length); + + #if NET7_0_OR_GREATER + return span.IndexOfAnyExcept(chars); + #else + for (var i = 0; i < span.Length; i++) + if (!chars.Contains(span[i])) + return i; + + return -1; + #endif } - return -1; - - #endif + return IndexOfAnyExceptCore(ref MemoryMarshal.GetReference(s), s.Length, chars); } + + /// + public CharClass GetCharClass(CharClassUnicodeCategory categories) => + new CharClass(CharClassRange.Create(chars), categories); } #endregion @@ -461,7 +450,7 @@ private static int IndexOfAnyExcept(ref char s, int length, string chars) #region Inner type: BitVectorSearcher /// - /// Represents a searcher that uses a bit vector to efficiently determine + /// Represents a searcher that uses bit vector to efficiently determine /// whether a character belongs to a specified set of ranges. /// /// The underlying storage type for the bit vector, which must be an unmanaged type. @@ -495,28 +484,26 @@ public bool Contains(char ch) => /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - public int IndexOfAnyExcept(ReadOnlySpan s) => - IndexOfAnyExcept(ref MemoryMarshal.GetReference(s), s.Length, in _vector, _offset); - - /// - public CharClass GetCharClass(CharClassUnicodeCategory categories) => - new CharClass(_vector.ToCharClassRanges(_offset), categories); - - private static int IndexOfAnyExcept(ref char s, int length, in BitVector vector, int offset) + public int IndexOfAnyExcept(ReadOnlySpan s) { - ref var p = ref s; - - for (; length > 0; length--) + static int IndexOfAnyExceptCore(ref char s, int length, in BitVector vector, int offset) { - if (!vector.IsBitSet(p - offset)) - // ReSharper disable once RedundantCast - return (int)((nint)Unsafe.ByteOffset(ref s, ref p) >>> 1); + var span = MemoryMarshal.CreateReadOnlySpan(ref s, length); + + for (var i = 0; i < span.Length; i++) + if (!vector.IsBitSet(span[i] - offset)) + return i; + + return -1; - p = ref Unsafe.Add(ref p, 1); } - return -1; + return IndexOfAnyExceptCore(ref MemoryMarshal.GetReference(s), s.Length, in _vector, _offset); } + + /// + public CharClass GetCharClass(CharClassUnicodeCategory categories) => + new CharClass(_vector.ToCharClassRanges(_offset), categories); } #endregion @@ -524,7 +511,7 @@ private static int IndexOfAnyExcept(ref char s, int length, in BitVector - /// Represents a searcher that utilizes SIMD operations to efficiently determine + /// Represents a searcher that uses SIMD operations to efficiently determine /// whether a character falls within a specified set of ranges. /// /// The SIMD vector width, which must be an unmanaged type. @@ -558,8 +545,20 @@ public bool Contains(char ch) => /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - public int IndexOfAnyExcept(ReadOnlySpan s) => - IndexOfAnyExcept(ref MemoryMarshal.GetReference(s), s.Length, _ranges); + public int IndexOfAnyExcept(ReadOnlySpan s) + { + static int IndexOfAnyExceptCore(ref char s, int length, ushort[] ranges) + { + var span = MemoryMarshal.CreateReadOnlySpan(ref s, length); + for (var i = 0; i < span.Length; i++) + if (!ContainsCore(span[i], ranges)) + return i; + + return -1; + } + + return IndexOfAnyExceptCore(ref MemoryMarshal.GetReference(s), s.Length, _ranges); + } /// public CharClass GetCharClass(CharClassUnicodeCategory categories) @@ -641,22 +640,6 @@ private static bool ContainsCore(char ch, ushort[] ranges) return false; } - - private static int IndexOfAnyExcept(ref char s, int length, ushort[] ranges) - { - ref var p = ref s; - - for (; length > 0; length--) - { - if (!ContainsCore(p, ranges)) - // ReSharper disable once RedundantCast - return (int)((nint)Unsafe.ByteOffset(ref s, ref p) >>> 1); - - p = ref Unsafe.Add(ref p, 1); - } - - return -1; - } } #endregion @@ -696,8 +679,20 @@ public bool Contains(char ch) => /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - public int IndexOfAnyExcept(ReadOnlySpan s) => - IndexOfAnyExcept(ref MemoryMarshal.GetReference(s), s.Length, _ranges); + public int IndexOfAnyExcept(ReadOnlySpan s) + { + static int IndexOfAnyExceptCore(ref char s, int length, (uint lo, uint hi)[] ranges) + { + var span = MemoryMarshal.CreateReadOnlySpan(ref s, length); + for (var i = 0; i < span.Length; i++) + if (!ContainsCore(span[i], ranges)) + return i; + + return -1; + } + + return IndexOfAnyExceptCore(ref MemoryMarshal.GetReference(s), s.Length, _ranges); + } /// public CharClass GetCharClass(CharClassUnicodeCategory categories) @@ -745,22 +740,6 @@ private static bool ContainsCore(char ch, (uint lo, uint hi)[] ranges) return false; } - - private static int IndexOfAnyExcept(ref char s, int length, (uint lo, uint hi)[] ranges) - { - ref var p = ref s; - - for (; length > 0; length--) - { - if (!ContainsCore(p, ranges)) - // ReSharper disable once RedundantCast - return (int)((nint)Unsafe.ByteOffset(ref s, ref p) >>> 1); - - p = ref Unsafe.Add(ref p, 1); - } - - return -1; - } } #endregion diff --git a/src/Ramstack.Parsing/Utilities/BitVector.cs b/src/Ramstack.Parsing/Utilities/BitVector.cs deleted file mode 100644 index 1fe2067..0000000 --- a/src/Ramstack.Parsing/Utilities/BitVector.cs +++ /dev/null @@ -1,92 +0,0 @@ -namespace Ramstack.Parsing.Utilities; - -/// -/// Represents a bit vector structure. -/// -internal readonly struct BitVector -{ - private readonly uint[] _values; - - /// - /// Gets the total number of bits that this instance can represent. - /// - public int Length => _values.Length * 32; - - /// - /// Initializes a new instance of the structure, - /// capable of storing bits (from index 0 to - 1). - /// - /// The total number of bits this vector should be able to store. - /// For example, if you pass 101, you can set and check bits from index 0 up to 100. - public BitVector(int length) - { - Argument.ThrowIfNegative(length); - _values = new uint[(length + 31) >>> 5]; - } - - /// - /// Sets the bit at the specified position to 1. - /// - /// The bit position to set. - /// - /// Ensure that is within the maximum allowed bit position - /// defined when creating this . - /// - public void Set(int position) - { - var index = (uint)position >> 5; - var value = 1u << position; - _values[index] |= value; - } - - /// - /// Checks whether the bit at the specified position is set to 1. - /// - /// The bit position to check. - /// - /// if the specified bit is set; otherwise, . - /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool IsBitSet(int position) - { - var index = (uint)position >> 5; - var array = _values; - - if (index < (uint)array.Length) - { - return (Unsafe.Add( - ref MemoryMarshal.GetArrayDataReference(array), - index) & (1u << position)) != 0; - } - - return false; - } - - /// - /// Returns a object representing the internal bitset, - /// applying the specified offset to each character value. - /// - /// The offset to apply to the character values. Must be non-negative. - /// - /// A object representing the characters - /// indicated by the set bits in the internal representation. - /// - public CharClass GetCharClass(int offset) - { - Argument.ThrowIfNegative(offset); - - var count = Length; - var array = new ArrayBuilder(count); - - for (var i = 0; i < count; i++) - { - if (IsBitSet(i)) - { - var range = CharClassRange.Create((char)(i + offset)); - array.TryAdd(range); - } - } - - return new CharClass(array.ToArray()); - } -} diff --git a/src/Ramstack.Parsing/Utilities/StringBuffer.cs b/src/Ramstack.Parsing/Utilities/StringBuffer.cs index 5295be4..3d98310 100644 --- a/src/Ramstack.Parsing/Utilities/StringBuffer.cs +++ b/src/Ramstack.Parsing/Utilities/StringBuffer.cs @@ -5,6 +5,20 @@ namespace Ramstack.Parsing.Utilities; /// /// Represents a mutable buffer for building strings. /// +/// +/// This structure is optimized for performance and relies on proper initialization. +/// +/// Always initialize instances using one of the available constructors with the new keyword, +/// including the parameterless constructor: new StringBuffer(). +/// +/// Do not use default(StringBuffer) or similar patterns, as this results +/// in an uninitialized instance with an invalid internal state, +/// causing a when methods are called. +/// +/// This design choice was made to avoid unnecessary overhead and maintain control over +/// internal state initialization in modern C# versions where parameterless constructors +/// in structs are supported. +/// internal struct StringBuffer : IDisposable { private char[] _chars; @@ -51,26 +65,6 @@ public void Append(char c) _count = count + 1; } - /// - /// Attempts to append a single character to the string buffer. - /// - /// The character to append. - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool TryAppend(char c) - { - var chars = _chars; - var count = _count; - - if ((uint)count < (uint)chars.Length) - { - chars[count] = c; - _count = count + 1; - return true; - } - - return false; - } - /// /// Appends a span of characters to the string buffer. /// @@ -109,24 +103,6 @@ ref MemoryMarshal.GetArrayDataReference(chars), } } - /// - /// Returns a representing the written data of the current instance. - /// - /// - /// A that represents the data within the current instance. - /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public readonly Span AsSpan() - { - var chars = _chars; - var count = _count; - _ = chars.Length; - - return MemoryMarshal.CreateSpan( - ref MemoryMarshal.GetArrayDataReference(chars), - count); - } - /// [MethodImpl(MethodImplOptions.AggressiveInlining)] public override string ToString()