Skip to content

Commit 79d5db8

Browse files
committed
Ref #352 - Make a performance improvement to use less "ord()" function
Signed-off-by: William Desportes <williamdes@wdes.fr>
1 parent 7b906ab commit 79d5db8

File tree

1 file changed

+123
-1
lines changed

1 file changed

+123
-1
lines changed

src/UtfString.php

Lines changed: 123 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,119 @@ class UtfString implements ArrayAccess
6868
*/
6969
public $charLen = 0;
7070

71+
/**
72+
* A map of ASCII binary values to their ASCII code
73+
* This is to improve performance and avoid calling ord($byte)
74+
*
75+
* Source: https://www.freecodecamp.org/news/ascii-table-hex-to-ascii-value-character-code-chart-2/
76+
*
77+
* @var array<int,int>
78+
*/
79+
protected static $asciiMap = [
80+
"\0" => 0, // (00000000) NUL Null
81+
"\t" => 9, // (00001001) HT Horizontal Tab
82+
"\n" => 10, // (00001010) LF Newline / Line Feed
83+
"\v" => 11, // (00001011) VT Vertical Tab
84+
"\f" => 12, // (00001100) FF Form Feed
85+
"\r" => 13, // (00001101) CR Carriage Return
86+
' ' => 32, // (00100000) SP Space
87+
'!' => 33, // (00100001) ! Exclamation mark
88+
'"' => 34, // (00100010) " Double quote
89+
'#' => 35, // (00100011) # Number
90+
'$' => 36, // (00100100) $ Dollar
91+
'%' => 37, // (00100101) % Percent
92+
'&' => 38, // (00100110) & Ampersand
93+
'\'' => 39, // (00100111) ' Single quote
94+
'(' => 40, // (00101000) ( Left parenthesis
95+
')' => 41, // (00101001) ) Right parenthesis
96+
'*' => 42, // (00101010) * Asterisk
97+
'+' => 43, // (00101011) + Plus
98+
',' => 44, // (00101100) , Comma
99+
'-' => 45, // (00101101) - Minus
100+
'.' => 46, // (00101110) . Period
101+
'/' => 47, // (00101111) / Slash
102+
'0' => 48, // (00110000) 0 Zero
103+
'1' => 49, // (00110001) 1 One
104+
'2' => 50, // (00110010) 2 Two
105+
'3' => 51, // (00110011) 3 Three
106+
'4' => 52, // (00110100) 4 Four
107+
'5' => 53, // (00110101) 5 Five
108+
'6' => 54, // (00110110) 6 Six
109+
'7' => 55, // (00110111) 7 Seven
110+
'8' => 56, // (00111000) 8 Eight
111+
'9' => 57, // (00111001) 9 Nine
112+
':' => 58, // (00111010) : Colon
113+
';' => 59, // (00111011) ; Semicolon
114+
'<' => 60, // (00111100) < Less than
115+
'=' => 61, // (00111101) = Equal sign
116+
'>' => 62, // (00111110) > Greater than
117+
'?' => 63, // (00111111) ? Question mark
118+
'@' => 64, // (01000000) @ At sign
119+
'A' => 65, // (01000001) A Uppercase A
120+
'B' => 66, // (01000010) B Uppercase B
121+
'C' => 67, // (01000011) C Uppercase C
122+
'D' => 68, // (01000100) D Uppercase D
123+
'E' => 69, // (01000101) E Uppercase E
124+
'F' => 70, // (01000110) F Uppercase F
125+
'G' => 71, // (01000111) G Uppercase G
126+
'H' => 72, // (01001000) H Uppercase H
127+
'I' => 73, // (01001001) I Uppercase I
128+
'J' => 74, // (01001010) J Uppercase J
129+
'K' => 75, // (01001011) K Uppercase K
130+
'L' => 76, // (01001100) L Uppercase L
131+
'M' => 77, // (01001101) M Uppercase M
132+
'N' => 78, // (01001110) N Uppercase N
133+
'O' => 79, // (01001111) O Uppercase O
134+
'P' => 80, // (01010000) P Uppercase P
135+
'Q' => 81, // (01010001) Q Uppercase Q
136+
'R' => 82, // (01010010) R Uppercase R
137+
'S' => 83, // (01010011) S Uppercase S
138+
'T' => 84, // (01010100) T Uppercase T
139+
'U' => 85, // (01010101) U Uppercase U
140+
'V' => 86, // (01010110) V Uppercase V
141+
'W' => 87, // (01010111) W Uppercase W
142+
'X' => 88, // (01011000) X Uppercase X
143+
'Y' => 89, // (01011001) Y Uppercase Y
144+
'Z' => 90, // (01011010) Z Uppercase Z
145+
'[' => 91, // (01011011) [ Left square bracket
146+
'\\' => 92, // (01011100) \ backslash
147+
']' => 93, // (01011101) ] Right square bracket
148+
'^' => 94, // (01011110) ^ Caret / circumflex
149+
'_' => 95, // (01011111) _ Underscore
150+
'`' => 96, // (01100000) ` Grave / accent
151+
'a' => 97, // (01100001) a Lowercase a
152+
'b' => 98, // (01100010) b Lowercase b
153+
'c' => 99, // (01100011) c Lowercase c
154+
'd' => 100, // (01100100) d Lowercase d
155+
'e' => 101, // (01100101) e Lowercase e
156+
'f' => 102, // (01100110) f Lowercase
157+
'g' => 103, // (01100111) g Lowercase g
158+
'h' => 104, // (01101000) h Lowercase h
159+
'i' => 105, // (01101001) i Lowercase i
160+
'j' => 106, // (01101010) j Lowercase j
161+
'k' => 107, // (01101011) k Lowercase k
162+
'l' => 108, // (01101100) l Lowercase l
163+
'm' => 109, // (01101101) m Lowercase m
164+
'n' => 110, // (01101110) n Lowercase n
165+
'o' => 111, // (01101111) o Lowercase o
166+
'p' => 112, // (01110000) p Lowercase p
167+
'q' => 113, // (01110001) q Lowercase q
168+
'r' => 114, // (01110010) r Lowercase r
169+
's' => 115, // (01110011) s Lowercase s
170+
't' => 116, // (01110100) t Lowercase t
171+
'u' => 117, // (01110101) u Lowercase u
172+
'v' => 118, // (01110110) v Lowercase v
173+
'w' => 119, // (01110111) w Lowercase w
174+
'x' => 120, // (01111000) x Lowercase x
175+
'y' => 121, // (01111001) y Lowercase y
176+
'z' => 122, // (01111010) z Lowercase z
177+
'{' => 123, // (01111011) { Left curly bracket
178+
'|' => 124, // (01111100) | Vertical bar
179+
'}' => 125, // (01111101) } Right curly bracket
180+
'~' => 126, // (01111110) ~ Tilde
181+
"\x7f" => 127, // (01111111) DEL Delete
182+
];
183+
71184
/**
72185
* @param string $str the string
73186
*/
@@ -182,7 +295,16 @@ public function offsetUnset($offset)
182295
*/
183296
public static function getCharLength($byte)
184297
{
185-
$byte = ord($byte);
298+
// Use the default ASCII map as queries are mostly ASCII chars
299+
// ord($byte) has a performance cost
300+
301+
if (! isset(static::$asciiMap[$byte])) {
302+
// Complete the cache with missing items
303+
static::$asciiMap[$byte] = ord($byte);
304+
}
305+
306+
$byte = static::$asciiMap[$byte];
307+
186308
if ($byte < 128) {
187309
return 1;
188310
}

0 commit comments

Comments
 (0)