diff --git a/src/Ramstack.Parsing/Collections/ArrayBuilder.cs b/src/Ramstack.Parsing/Collections/ArrayBuilder.cs
index cd1270a..12eadc4 100644
--- a/src/Ramstack.Parsing/Collections/ArrayBuilder.cs
+++ b/src/Ramstack.Parsing/Collections/ArrayBuilder.cs
@@ -3,6 +3,20 @@ namespace Ramstack.Parsing.Collections;
///
/// Represents an array builder for elements of type .
///
+///
+/// This structure is optimized for performance and relies on proper initialization.
+///
+/// Always initialize instances using one of the available constructors with the new keyword,
+/// including the parameterless constructor: new ArrayBuilder<T>().
+///
+/// Do not use default(ArrayBuilder<T>) or similar patterns, as this results
+/// in an uninitialized instance with an invalid internal state,
+/// causing a when methods are called.
+///
+/// This design choice was made to avoid unnecessary overhead and maintain control over
+/// internal state initialization in modern C# versions where parameterless constructors
+/// in structs are supported.
+///
/// The type of elements stored in the array builder.
[DebuggerDisplay("Count = {Count}")]
[DebuggerTypeProxy(typeof(ArrayBuilderDebugView<>))]
@@ -33,12 +47,10 @@ public readonly ref T this[int index]
get
{
var array = _array;
- if ((uint)index >= (uint)_count)
+ if ((uint)index >= (uint)_count || (uint)index >= (uint)array.Length)
ThrowHelper.ThrowArgumentOutOfRangeException();
- return ref Unsafe.Add(
- ref MemoryMarshal.GetArrayDataReference(array),
- (nint)(uint)index);
+ return ref array[index];
}
}
diff --git a/src/Ramstack.Parsing/Literal.String.cs b/src/Ramstack.Parsing/Literal.String.cs
index 266d14a..add7274 100644
--- a/src/Ramstack.Parsing/Literal.String.cs
+++ b/src/Ramstack.Parsing/Literal.String.cs
@@ -128,16 +128,26 @@ private static int TryParseImpl(ReadOnlySpan s, out TResult? value)
}
else
{
- if (index + 5 >= s.Length)
+ var p = s.Slice(index);
+ if (p.Length < 6)
goto FAIL;
- ref var r = ref Unsafe.AsRef(in s[index]);
-
- var v1 = (nint)Unsafe.Add(ref r, 2);
- var v2 = (nint)Unsafe.Add(ref r, 3);
- var v3 = (nint)Unsafe.Add(ref r, 4);
- var v4 = (nint)Unsafe.Add(ref r, 5);
-
+ var v1 = (nint)p[2];
+ var v2 = (nint)p[3];
+ var v3 = (nint)p[4];
+ var v4 = (nint)p[5];
+
+ // We use Unsafe-methods here to index into the HexTable without bounds checks.
+ //
+ // The check 'if ((uint)(v1 | v2 | v3 | v4) <= 127)' guarantees that all four values
+ // are within the range 0..127.
+ //
+ // This makes it safe to access HexTable[v1], HexTable[v2], HexTable[v3], and HexTable[v4]
+ // without additional bounds checks, because HexTable has exactly 128 elements.
+ // By using Unsafe.Add, we avoid the overhead of safety checks and improve performance.
+ //
+ // Once the JIT compiler can properly recognize and optimize this pattern, we may be able
+ // to replace these unsafe calls with safe indexed access without performance loss.
if ((uint)(v1 | v2 | v3 | v4) > 127)
goto FAIL;
diff --git a/src/Ramstack.Parsing/Literal.UnicodeEscapeSequence.cs b/src/Ramstack.Parsing/Literal.UnicodeEscapeSequence.cs
index 20d814c..7464aa5 100644
--- a/src/Ramstack.Parsing/Literal.UnicodeEscapeSequence.cs
+++ b/src/Ramstack.Parsing/Literal.UnicodeEscapeSequence.cs
@@ -51,6 +51,17 @@ public override bool TryParse(ref ParseContext context, out T value)
var v3 = (nint)s[4];
var v4 = (nint)s[5];
+ // We use Unsafe-methods here to index into the HexTable without bounds checks.
+ //
+ // The check 'if ((uint)(v1 | v2 | v3 | v4) <= 127)' guarantees that all four values
+ // are within the range 0..127.
+ //
+ // This makes it safe to access HexTable[v1], HexTable[v2], HexTable[v3], and HexTable[v4]
+ // without additional bounds checks, because HexTable has exactly 128 elements.
+ // By using Unsafe.Add, we avoid the overhead of safety checks and improve performance.
+ //
+ // Once the JIT compiler can properly recognize and optimize this pattern, we may be able
+ // to replace these unsafe calls with safe indexed access without performance loss.
if ((uint)(v1 | v2 | v3 | v4) <= 127)
{
ref var table = ref MemoryMarshal.GetReference(HexTable);
diff --git a/src/Ramstack.Parsing/ParseContext.cs b/src/Ramstack.Parsing/ParseContext.cs
index 94d0841..068b76c 100644
--- a/src/Ramstack.Parsing/ParseContext.cs
+++ b/src/Ramstack.Parsing/ParseContext.cs
@@ -32,6 +32,8 @@ public readonly ReadOnlySpan Remaining
var source = _source;
var length = source.Length - _position;
+ Debug.Assert(_source[_position..].Length == length);
+
ref var reference = ref Unsafe.Add(
ref MemoryMarshal.GetReference(source),
(nint)(uint)_position);
diff --git a/src/Ramstack.Parsing/Parser.Repeat.cs b/src/Ramstack.Parsing/Parser.Repeat.cs
index eda211c..513beee 100644
--- a/src/Ramstack.Parsing/Parser.Repeat.cs
+++ b/src/Ramstack.Parsing/Parser.Repeat.cs
@@ -451,7 +451,8 @@ public override bool TryParse(ref ParseContext context, [NotNullWhen(true)] out
while (s.Length != 0)
{
var index = _searcher.IndexOfAnyExcept(s);
- if (index >= 0)
+
+ if ((uint)index < (uint)s.Length)
{
count += index;
@@ -461,13 +462,21 @@ public override bool TryParse(ref ParseContext context, [NotNullWhen(true)] out
index++;
count++;
- Debug.Assert((uint)index <= (uint)s.Length);
-
- s = MemoryMarshal.CreateReadOnlySpan(
- ref Unsafe.Add(ref MemoryMarshal.GetReference(s), (nint)(uint)index),
- s.Length - index);
+ // At this point, "index" is guaranteed to be within [0..s.Length].
+ // While the Slice method performs its own bounds check and would throw if out of range,
+ // our explicit check helps the JIT recognize that the call to Slice is safe.
+ // This allows the JIT to eliminate the internal bounds check
+ // and avoid generating unnecessary exception-handling code inside Slice.
+ //
+ // In other words, the check would happen anyway, but by placing it here explicitly,
+ // we eliminate the need for exception code inside Slice itself.
+ //
+ // Even though logically (uint)index < (uint)s.Length implies (uint)index <= (uint)s.Length,
+ // the JIT cannot currently prove this identity and therefore cannot optimize away
+ // the redundant check in Slice.
- // s = s.Slice(index);
+ if ((uint)index <= (uint)s.Length)
+ s = s.Slice(index);
}
else
{
@@ -535,7 +544,7 @@ public override bool TryParse(ref ParseContext context, out Unit value)
while (s.Length != 0)
{
var index = _searcher.IndexOfAnyExcept(s);
- if (index >= 0)
+ if ((uint)index < (uint)s.Length)
{
count += index;
@@ -545,13 +554,21 @@ public override bool TryParse(ref ParseContext context, out Unit value)
index++;
count++;
- Debug.Assert((uint)index <= (uint)s.Length);
-
- s = MemoryMarshal.CreateReadOnlySpan(
- ref Unsafe.Add(ref MemoryMarshal.GetReference(s), (nint)(uint)index),
- s.Length - index);
+ // At this point, "index" is guaranteed to be within [0..s.Length].
+ // While the Slice method performs its own bounds check and would throw if out of range,
+ // our explicit check helps the JIT recognize that the call to Slice is safe.
+ // This allows the JIT to eliminate the internal bounds check
+ // and avoid generating unnecessary exception-handling code inside Slice.
+ //
+ // In other words, the check would happen anyway, but by placing it here explicitly,
+ // we eliminate the need for exception code inside Slice itself.
+ //
+ // Even though logically (uint)index < (uint)s.Length implies (uint)index <= (uint)s.Length,
+ // the JIT cannot currently prove this identity and therefore cannot optimize away
+ // the redundant check in Slice.
- // s = s.Slice(index);
+ if ((uint)index <= (uint)s.Length)
+ s = s.Slice(index);
}
else
{
diff --git a/src/Ramstack.Parsing/Parser.Set.cs b/src/Ramstack.Parsing/Parser.Set.cs
index b1ac036..326c6b2 100644
--- a/src/Ramstack.Parsing/Parser.Set.cs
+++ b/src/Ramstack.Parsing/Parser.Set.cs
@@ -374,8 +374,20 @@ public bool Contains(char ch) =>
///
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public int IndexOfAnyExcept(ReadOnlySpan s) =>
- IndexOfAnyExcept(ref MemoryMarshal.GetReference(s), s.Length, _start, _count);
+ public int IndexOfAnyExcept(ReadOnlySpan s)
+ {
+ static int IndexOfAnyExceptCore(ref char s, int length, uint start, uint count)
+ {
+ var span = MemoryMarshal.CreateReadOnlySpan(ref s, length);
+ for (var i = 0; i < span.Length; i++)
+ if (span[i] - start >= count)
+ return i;
+
+ return -1;
+ }
+
+ return IndexOfAnyExceptCore(ref MemoryMarshal.GetReference(s), s.Length, _start, _count);
+ }
///
public CharClass GetCharClass(CharClassUnicodeCategory categories)
@@ -387,22 +399,6 @@ public CharClass GetCharClass(CharClassUnicodeCategory categories)
[CharClassRange.Create((char)lo, (char)hi)],
categories);
}
-
- private static int IndexOfAnyExcept(ref char s, int length, uint start, uint count)
- {
- ref var p = ref s;
-
- for (; length > 0; length--)
- {
- if (p - start >= count)
- // ReSharper disable once RedundantCast
- return (int)((nint)Unsafe.ByteOffset(ref s, ref p) >>> 1);
-
- p = ref Unsafe.Add(ref p, 1);
- }
-
- return -1;
- }
}
#endregion
@@ -423,37 +419,30 @@ public bool Contains(char ch) =>
///
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public int IndexOfAnyExcept(ReadOnlySpan s) =>
- IndexOfAnyExcept(ref MemoryMarshal.GetReference(s), s.Length, chars);
-
- ///
- public CharClass GetCharClass(CharClassUnicodeCategory categories) =>
- new CharClass(CharClassRange.Create(chars), categories);
-
- private static int IndexOfAnyExcept(ref char s, int length, string chars)
+ public int IndexOfAnyExcept(ReadOnlySpan s)
{
- _ = chars.Length;
-
- #if NET7_0_OR_GREATER
-
- return MemoryMarshal.CreateReadOnlySpan(ref s, length).IndexOfAnyExcept(chars);
-
- #else
-
- ref var p = ref s;
-
- for (; length > 0; length--)
+ static int IndexOfAnyExceptCore(ref char s, int length, string chars)
{
- if (!chars.Contains(p))
- return (int)((nint)Unsafe.ByteOffset(ref s, ref p) >>> 1);
-
- p = ref Unsafe.Add(ref p, 1);
+ _ = chars.Length;
+ var span = MemoryMarshal.CreateReadOnlySpan(ref s, length);
+
+ #if NET7_0_OR_GREATER
+ return span.IndexOfAnyExcept(chars);
+ #else
+ for (var i = 0; i < span.Length; i++)
+ if (!chars.Contains(span[i]))
+ return i;
+
+ return -1;
+ #endif
}
- return -1;
-
- #endif
+ return IndexOfAnyExceptCore(ref MemoryMarshal.GetReference(s), s.Length, chars);
}
+
+ ///
+ public CharClass GetCharClass(CharClassUnicodeCategory categories) =>
+ new CharClass(CharClassRange.Create(chars), categories);
}
#endregion
@@ -461,7 +450,7 @@ private static int IndexOfAnyExcept(ref char s, int length, string chars)
#region Inner type: BitVectorSearcher
///
- /// Represents a searcher that uses a bit vector to efficiently determine
+ /// Represents a searcher that uses bit vector to efficiently determine
/// whether a character belongs to a specified set of ranges.
///
/// The underlying storage type for the bit vector, which must be an unmanaged type.
@@ -495,28 +484,26 @@ public bool Contains(char ch) =>
///
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public int IndexOfAnyExcept(ReadOnlySpan s) =>
- IndexOfAnyExcept(ref MemoryMarshal.GetReference(s), s.Length, in _vector, _offset);
-
- ///
- public CharClass GetCharClass(CharClassUnicodeCategory categories) =>
- new CharClass(_vector.ToCharClassRanges(_offset), categories);
-
- private static int IndexOfAnyExcept(ref char s, int length, in BitVector vector, int offset)
+ public int IndexOfAnyExcept(ReadOnlySpan s)
{
- ref var p = ref s;
-
- for (; length > 0; length--)
+ static int IndexOfAnyExceptCore(ref char s, int length, in BitVector vector, int offset)
{
- if (!vector.IsBitSet(p - offset))
- // ReSharper disable once RedundantCast
- return (int)((nint)Unsafe.ByteOffset(ref s, ref p) >>> 1);
+ var span = MemoryMarshal.CreateReadOnlySpan(ref s, length);
+
+ for (var i = 0; i < span.Length; i++)
+ if (!vector.IsBitSet(span[i] - offset))
+ return i;
+
+ return -1;
- p = ref Unsafe.Add(ref p, 1);
}
- return -1;
+ return IndexOfAnyExceptCore(ref MemoryMarshal.GetReference(s), s.Length, in _vector, _offset);
}
+
+ ///
+ public CharClass GetCharClass(CharClassUnicodeCategory categories) =>
+ new CharClass(_vector.ToCharClassRanges(_offset), categories);
}
#endregion
@@ -524,7 +511,7 @@ private static int IndexOfAnyExcept(ref char s, int length, in BitVector
- /// Represents a searcher that utilizes SIMD operations to efficiently determine
+ /// Represents a searcher that uses SIMD operations to efficiently determine
/// whether a character falls within a specified set of ranges.
///
/// The SIMD vector width, which must be an unmanaged type.
@@ -558,8 +545,20 @@ public bool Contains(char ch) =>
///
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public int IndexOfAnyExcept(ReadOnlySpan s) =>
- IndexOfAnyExcept(ref MemoryMarshal.GetReference(s), s.Length, _ranges);
+ public int IndexOfAnyExcept(ReadOnlySpan s)
+ {
+ static int IndexOfAnyExceptCore(ref char s, int length, ushort[] ranges)
+ {
+ var span = MemoryMarshal.CreateReadOnlySpan(ref s, length);
+ for (var i = 0; i < span.Length; i++)
+ if (!ContainsCore(span[i], ranges))
+ return i;
+
+ return -1;
+ }
+
+ return IndexOfAnyExceptCore(ref MemoryMarshal.GetReference(s), s.Length, _ranges);
+ }
///
public CharClass GetCharClass(CharClassUnicodeCategory categories)
@@ -641,22 +640,6 @@ private static bool ContainsCore(char ch, ushort[] ranges)
return false;
}
-
- private static int IndexOfAnyExcept(ref char s, int length, ushort[] ranges)
- {
- ref var p = ref s;
-
- for (; length > 0; length--)
- {
- if (!ContainsCore(p, ranges))
- // ReSharper disable once RedundantCast
- return (int)((nint)Unsafe.ByteOffset(ref s, ref p) >>> 1);
-
- p = ref Unsafe.Add(ref p, 1);
- }
-
- return -1;
- }
}
#endregion
@@ -696,8 +679,20 @@ public bool Contains(char ch) =>
///
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public int IndexOfAnyExcept(ReadOnlySpan s) =>
- IndexOfAnyExcept(ref MemoryMarshal.GetReference(s), s.Length, _ranges);
+ public int IndexOfAnyExcept(ReadOnlySpan s)
+ {
+ static int IndexOfAnyExceptCore(ref char s, int length, (uint lo, uint hi)[] ranges)
+ {
+ var span = MemoryMarshal.CreateReadOnlySpan(ref s, length);
+ for (var i = 0; i < span.Length; i++)
+ if (!ContainsCore(span[i], ranges))
+ return i;
+
+ return -1;
+ }
+
+ return IndexOfAnyExceptCore(ref MemoryMarshal.GetReference(s), s.Length, _ranges);
+ }
///
public CharClass GetCharClass(CharClassUnicodeCategory categories)
@@ -745,22 +740,6 @@ private static bool ContainsCore(char ch, (uint lo, uint hi)[] ranges)
return false;
}
-
- private static int IndexOfAnyExcept(ref char s, int length, (uint lo, uint hi)[] ranges)
- {
- ref var p = ref s;
-
- for (; length > 0; length--)
- {
- if (!ContainsCore(p, ranges))
- // ReSharper disable once RedundantCast
- return (int)((nint)Unsafe.ByteOffset(ref s, ref p) >>> 1);
-
- p = ref Unsafe.Add(ref p, 1);
- }
-
- return -1;
- }
}
#endregion
diff --git a/src/Ramstack.Parsing/Utilities/BitVector.cs b/src/Ramstack.Parsing/Utilities/BitVector.cs
deleted file mode 100644
index 1fe2067..0000000
--- a/src/Ramstack.Parsing/Utilities/BitVector.cs
+++ /dev/null
@@ -1,92 +0,0 @@
-namespace Ramstack.Parsing.Utilities;
-
-///
-/// Represents a bit vector structure.
-///
-internal readonly struct BitVector
-{
- private readonly uint[] _values;
-
- ///
- /// Gets the total number of bits that this instance can represent.
- ///
- public int Length => _values.Length * 32;
-
- ///
- /// Initializes a new instance of the structure,
- /// capable of storing bits (from index 0 to - 1).
- ///
- /// The total number of bits this vector should be able to store.
- /// For example, if you pass 101, you can set and check bits from index 0 up to 100.
- public BitVector(int length)
- {
- Argument.ThrowIfNegative(length);
- _values = new uint[(length + 31) >>> 5];
- }
-
- ///
- /// Sets the bit at the specified position to 1.
- ///
- /// The bit position to set.
- ///
- /// Ensure that is within the maximum allowed bit position
- /// defined when creating this .
- ///
- public void Set(int position)
- {
- var index = (uint)position >> 5;
- var value = 1u << position;
- _values[index] |= value;
- }
-
- ///
- /// Checks whether the bit at the specified position is set to 1.
- ///
- /// The bit position to check.
- ///
- /// if the specified bit is set; otherwise, .
- ///
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public bool IsBitSet(int position)
- {
- var index = (uint)position >> 5;
- var array = _values;
-
- if (index < (uint)array.Length)
- {
- return (Unsafe.Add(
- ref MemoryMarshal.GetArrayDataReference(array),
- index) & (1u << position)) != 0;
- }
-
- return false;
- }
-
- ///
- /// Returns a object representing the internal bitset,
- /// applying the specified offset to each character value.
- ///
- /// The offset to apply to the character values. Must be non-negative.
- ///
- /// A object representing the characters
- /// indicated by the set bits in the internal representation.
- ///
- public CharClass GetCharClass(int offset)
- {
- Argument.ThrowIfNegative(offset);
-
- var count = Length;
- var array = new ArrayBuilder(count);
-
- for (var i = 0; i < count; i++)
- {
- if (IsBitSet(i))
- {
- var range = CharClassRange.Create((char)(i + offset));
- array.TryAdd(range);
- }
- }
-
- return new CharClass(array.ToArray());
- }
-}
diff --git a/src/Ramstack.Parsing/Utilities/StringBuffer.cs b/src/Ramstack.Parsing/Utilities/StringBuffer.cs
index 5295be4..3d98310 100644
--- a/src/Ramstack.Parsing/Utilities/StringBuffer.cs
+++ b/src/Ramstack.Parsing/Utilities/StringBuffer.cs
@@ -5,6 +5,20 @@ namespace Ramstack.Parsing.Utilities;
///
/// Represents a mutable buffer for building strings.
///
+///
+/// This structure is optimized for performance and relies on proper initialization.
+///
+/// Always initialize instances using one of the available constructors with the new keyword,
+/// including the parameterless constructor: new StringBuffer().
+///
+/// Do not use default(StringBuffer) or similar patterns, as this results
+/// in an uninitialized instance with an invalid internal state,
+/// causing a when methods are called.
+///
+/// This design choice was made to avoid unnecessary overhead and maintain control over
+/// internal state initialization in modern C# versions where parameterless constructors
+/// in structs are supported.
+///
internal struct StringBuffer : IDisposable
{
private char[] _chars;
@@ -51,26 +65,6 @@ public void Append(char c)
_count = count + 1;
}
- ///
- /// Attempts to append a single character to the string buffer.
- ///
- /// The character to append.
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public bool TryAppend(char c)
- {
- var chars = _chars;
- var count = _count;
-
- if ((uint)count < (uint)chars.Length)
- {
- chars[count] = c;
- _count = count + 1;
- return true;
- }
-
- return false;
- }
-
///
/// Appends a span of characters to the string buffer.
///
@@ -109,24 +103,6 @@ ref MemoryMarshal.GetArrayDataReference(chars),
}
}
- ///
- /// Returns a representing the written data of the current instance.
- ///
- ///
- /// A that represents the data within the current instance.
- ///
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public readonly Span AsSpan()
- {
- var chars = _chars;
- var count = _count;
- _ = chars.Length;
-
- return MemoryMarshal.CreateSpan(
- ref MemoryMarshal.GetArrayDataReference(chars),
- count);
- }
-
///
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public override string ToString()