@@ -23,7 +23,7 @@ format ::= '$' int | '${' int '}' | '${' int ':' modifier '}' | '${' int ':+' if
2323
2424regex ::= JS regex value
2525
26- options ::= JS regex options // NOTE: Unrecognised options should be ignored for the best fault tolerance (can log a warning though)
26+ options ::= JS regex options
2727
2828modifier = '/' var
2929
@@ -52,73 +52,123 @@ int ::= [0-9]+
5252// Grab anything that isn't \ or $, then try to build a special node out of it, and (at the top level) if that fails then just accept it as text
5353topLevelContent = content :(text / escapedTopLevel / tabStop / choice / variable / any )* { return coalesce (content); }
5454
55- tabStopContent = content :(tabStopText / escapedTabStop / tabStop / choice / variable )* { return coalesce (content); }
55+ // Placeholder content. The same as top level, except we need to fail on '}' so that it can end the tab stop (the `any` rule would eat it if we used it here)
56+ tabStopContent = content :(tabStopText / escapedTabStop / tabStop / choice / variable / notCloseBrace )* { return coalesce (content); }
5657
57- tabStop = tabStopSimple / tabStopWithoutPlaceholder / tabStopWithPlaceholder / tabStopWithTransform
58+ // The forms of a tab stop. They all start with '$', so we pull that out here.
59+ tabStop = '$' t :(tabStopSimple / tabStopWithoutPlaceholder / tabStopWithPlaceholder / tabStopWithTransform ) { return t; }
5860
59- tabStopSimple = '$' n :integer { return { index: n, content: [] }; }
61+ // The simplest form is just $n for some integer `n`
62+ tabStopSimple = n :integer { return { index: n, content: [] }; }
6063
61- tabStopWithoutPlaceholder = '${' n :integer '}' { return { index: n, content: [] }; }
64+ // The next simplest form is equivalent to the above, but wrapped in `{}`
65+ tabStopWithoutPlaceholder = '{' n :integer '}' { return { index: n, content: [] }; }
6266
63- tabStopWithPlaceholder = '${' n :integer ':' content :tabStopContent '}' { return { index: n, content }; }
67+ // When a ':' follows `n`, the content after the ':' is the placeholder and it can be anything
68+ tabStopWithPlaceholder = '{' n :integer ':' content :tabStopContent '}' { return { index: n, content }; }
6469
65- tabStopWithTransform = '${' n :integer t :transformation '}' { return { index: n, transformation: t }; }
70+ // When a transform follows `n` (indicated by '${n:/...')
71+ tabStopWithTransform = '{' n :integer t :transformation '}' { return { index: n, transformation: t }; }
6672
67- transformation = '/' capture :regexString '/' replace :replace '/' flags :flags { return { capture, flags, replace }; }
73+ // Builds a capture regex and substitution tree. If the capture is not a valid regex, then the match fails
74+ transformation = '/' find :regexString '/' replace :replace '/' flags :flags & {
75+ // Predicate: only succeed if the `find` + `flags` values make a valid regex
76+ // If so, then store the regex into `find` to be used in the following
77+ // match transformation action
78+ try {
79+ find = new RegExp (find, flags);
80+ return true ;
81+ } catch (e) {
82+ return false ;
83+ }
84+ } {
85+ return { find, replace };
86+ }
6887
69- // TODO: enforce this is a valid regex, or fail (can do at transform level where we make regex though)
88+ // Pulls out the portion that would be for the find regex. Validation is done
89+ // higher up, where we also have access to the flags.
7090regexString = r :([^/\\ ] / '\\ ' c :. { return ' \\ ' + c } )* { return r .join (" " ); }
7191
72- replace = (format / replaceText / replaceModifier / escapedReplace )*
92+ // The form of a substitution for a transformation. It is a mix of plain text + modifiers + backreferences to the find capture groups
93+ // It cannot access tab stop values.
94+ replace = (replaceText / format / replaceModifier / escapedReplace )*
7395
74- format = formatSimple / formatPlain / formatWithModifier / formatWithIf / formatWithIfElse / formatWithElse
96+ // A reference to a capture group of the find regex of a transformation. Can conditionally
97+ // resolve based on if the match occurred, and have arbitrary modifiers applied to it.
98+ // The common '$' prefix has been pulled out.
99+ format = '$' f :(formatSimple / formatPlain / formatWithModifier / formatWithIf / formatWithIfElse / formatWithElse ) { return f; }
75100
76- formatSimple = '$' n :integer { return { backreference: n }; }
101+ // The simplest format form, resembles a simpel tab stop except `n` refers to the capture group index, not a tab stop
102+ formatSimple = n :integer { return { backreference: n }; }
77103
78- formatPlain = '${' n :integer '}' { return { backreference: n }; }
104+ // The same as the simple variant, but `n` is enclosed in {}
105+ formatPlain = '{' n :integer '}' { return { backreference: n }; }
79106
80- formatWithModifier = '${' n :integer ':' modifier :modifier '}' { return { backreference: n, modifier }; }
107+ // A modifier is something like "/upcase", "/pascalcase". If recognised, it resolves to the
108+ // application of a JS function to the `n`th captured group.
109+ formatWithModifier = '{' n :integer ':' modifier :modifier '}' { return { backreference: n, modifier }; }
81110
82- formatWithIf = '${' n :integer ':+' ifContent :replace '}' { return { backreference: n, ifContent }; }
111+ // If the `n`th capture group is non-empty, then resolve to the `ifContent` value, else an empty string
112+ // Note that ifContent is a replace itself; it's formats still refer to the original transformation find though,
113+ // as transformations cannot be nested.
114+ formatWithIf = '{' n :integer ':+' ifContent :replace '}' { return { backreference: n, ifContent }; }
83115
84- formatWithIfElse = '${' n :integer ':?' ifContent :replace ':' elseContent :replace '}' { return { backreference: n, ifContent, elseContent }; }
116+ // Same as the if case, but resolve to `elseContent` if empty instead of the empty string
117+ formatWithIfElse = '{' n :integer ':?' ifContent :replace ':' elseContent :replace '}' { return { backreference: n, ifContent, elseContent }; }
85118
86- formatWithElse = '${' n :integer ':' '-' ? elseContent :replace { return { backreference: n, elseContent }; }
119+ // Same as the if case, but reversed behaviour with empty vs non-empty `n`th match
120+ formatWithElse = '{' n :integer ':' '-' ? elseContent :replace { return { backreference: n, elseContent }; }
87121
122+ // Used in `format`s to transform a string using a JS function
88123modifier = '/' modifier :var { return modifier; }
89124
125+ // Regex flags. Validation is performed when the regex itself is also known.
90126flags = f :[a-z]* { return f; }
91127
128+ // A tab stop that offers a choice between several fixed values. These values are plain text only.
129+ // This feature is not implemented, but the syntax is parsed to reserve it for future use.
130+ // It will currently just default to a regular tab stop with the first value as it's placeholder.
92131choice = '${' n :integer '|' choiceText (',' choiceText )* '|}'
93132
94- variable = variableSimple / variablePlain / variableWithPlaceholder / variableWithTransform
133+ // Syntactically looks like a named tab stop. Variables are resolved in JS and may be
134+ // further processed with a transformation. Unrecognised variables are transformed into
135+ // tab stops with the variable name as a placeholder.
136+ variable = '$' v :(variableSimple / variablePlain / variableWithPlaceholder / variableWithTransform ) { return v; }
95137
96- variableSimple = '$' v :var { return { variable: v }; }
138+ variableSimple = v :var { return { variable: v }; }
97139
98- variablePlain = '$ {' v :var '}' { return { variable: v }; }
140+ variablePlain = '{' v :var '}' { return { variable: v }; }
99141
100- variableWithPlaceholder = '$ {' v :var ':' content :tabStopContent '}' { return { variable: v, content }; }
142+ variableWithPlaceholder = '{' v :var ':' content :tabStopContent '}' { return { variable: v, content }; }
101143
102- variableWithTransform = '$ {' v :var t :transformation '}' { return { variable: v, transformation: t }; }
144+ variableWithTransform = '{' v :var t :transformation '}' { return { variable: v, transformation: t }; }
103145
146+ // Top level text. Anything that cannot be the start of something special. False negatives are handled later by the `any` rule
104147text = t :([^$\\ }])+ { return t .join (" " ) }
105148
149+ // None-special text inside a tab stop placeholder. Should be no different to regular top level text.
106150tabStopText = text
107151
152+ // None-special text inside a choice. $, {, }, etc. are all regular text in this context.
108153choiceText = t :[^,|]+ { return t .join (" " ); }
109154
155+ // None-special text inside a replace (substitution part of transformation). Same as normal text, but `/` is special (the end of the regex-like pattern)
110156replaceText = t :[^$\\ }/]+ { return t .join (" " ); }
111157
112158// Match an escaped character. The set of characters that can be escaped is based on context, generally restricted to the minimum set that enables expressing any text content
113159escapedTopLevel = '\\ ' c :[$\\ }] { return c; }
114160
161+ // Characters that can be escaped in a tab stop placeholder are the same as top level
115162escapedTabStop = escapedTopLevel
116163
164+ // Only `,` and `|` can be escaped in a choice, as everything else is plain text
117165escapedChoice = '\\ ' c :[$\\ ,|] { return c; }
118166
119- replaceModifier = '\\ ' m :[uUlL] { return { modifier: m }; }
167+ // Same as top level, but `/` can also be escaped
168+ escapedReplace = '\\ ' c :[$\\ /] { return c; }
120169
121- escapedReplace = '\\ ' c :[$\\ ] { return c; }
170+ // We handle 'modifiers' separately to escapes. These indicate a change in state when building the replacement (e.g., capitalisation)
171+ replaceModifier = '\\ ' m :[uUlL] { return { modifier: m }; }
122172
123173// Match nonnegative integers like those used for tab stop ordering
124174integer = digits :[0-9]+ { return parseInt (digits .join (" " ), 10 ); }
@@ -128,3 +178,6 @@ var = a:[a-zA-Z_] b:[a-zA-Z_0-9]* { return a + b.join(""); }
128178
129179// Match any single character. Useful to resolve any parse errors where something that looked like it would be special had malformed syntax.
130180any = a :. { return a; }
181+
182+ // Match anything that isn't a '}'. Useful for parse errors inside placeholder text, as `}` should be used to end the tab stop region
183+ notCloseBrace = a :[^}] { return a; }
0 commit comments