Skip to content

Commit 5e843c6

Browse files
CopilotcodingjoeCopilotpre-commit-ci[bot]
authored
Replace string replacement with HTML parser in render method (#361)
* Initial plan * Replace fragile string replacement with HTML parser for render method Co-authored-by: codingjoe <1772890+codingjoe@users.noreply.github.com> * Refactor HTML parser to reduce code duplication and improve documentation Co-authored-by: codingjoe <1772890+codingjoe@users.noreply.github.com> * Compress parser code and add comprehensive unit tests Co-authored-by: codingjoe <1772890+codingjoe@users.noreply.github.com> * Add test for non-file input with self-closing tag to cover line 52 Co-authored-by: codingjoe <1772890+codingjoe@users.noreply.github.com> * Update s3file/forms.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Add entity and character reference handlers with comprehensive tests Co-authored-by: codingjoe <1772890+codingjoe@users.noreply.github.com> * Add tests for comment, declaration, and processing instruction handlers Co-authored-by: codingjoe <1772890+codingjoe@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: codingjoe <1772890+codingjoe@users.noreply.github.com> Co-authored-by: Johannes Maron <johannes@maron.family> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 2d36f16 commit 5e843c6

File tree

2 files changed

+178
-5
lines changed

2 files changed

+178
-5
lines changed

s3file/forms.py

Lines changed: 71 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
import base64
2+
import html
23
import logging
34
import pathlib
45
import uuid
6+
from html.parser import HTMLParser
57

68
from django.conf import settings
79
from django.templatetags.static import static
@@ -16,6 +18,71 @@
1618
logger = logging.getLogger("s3file")
1719

1820

21+
class InputToS3FileRewriter(HTMLParser):
22+
"""HTML parser that rewrites <input type="file"> to <s3-file> custom elements."""
23+
24+
def __init__(self):
25+
super().__init__()
26+
self.output = []
27+
28+
def handle_starttag(self, tag, attrs):
29+
if tag == "input" and dict(attrs).get("type") == "file":
30+
self.output.append("<s3-file")
31+
for name, value in attrs:
32+
if name != "type":
33+
self.output.append(
34+
f' {name}="{html.escape(value, quote=True)}"'
35+
if value
36+
else f" {name}"
37+
)
38+
self.output.append(">")
39+
else:
40+
self.output.append(self.get_starttag_text())
41+
42+
def handle_endtag(self, tag):
43+
self.output.append(f"</{tag}>")
44+
45+
def handle_data(self, data):
46+
self.output.append(data)
47+
48+
def handle_startendtag(self, tag, attrs):
49+
if tag == "input" and dict(attrs).get("type") == "file":
50+
self.output.append("<s3-file")
51+
for name, value in attrs:
52+
if name != "type":
53+
self.output.append(
54+
f' {name}="{html.escape(value, quote=True)}"'
55+
if value
56+
else f" {name}"
57+
)
58+
self.output.append(">")
59+
else:
60+
self.output.append(self.get_starttag_text())
61+
62+
def handle_comment(self, data):
63+
# Preserve HTML comments in the output
64+
self.output.append(f"<!--{data}-->")
65+
66+
def handle_decl(self, decl):
67+
# Preserve declarations such as <!DOCTYPE ...> in the output
68+
self.output.append(f"<!{decl}>")
69+
70+
def handle_pi(self, data):
71+
# Preserve processing instructions such as <?xml ...?> in the output
72+
self.output.append(f"<?{data}>")
73+
74+
def handle_entityref(self, name):
75+
# Preserve HTML entities like &amp;, &lt;, &gt;
76+
self.output.append(f"&{name};")
77+
78+
def handle_charref(self, name):
79+
# Preserve character references like &#39;, &#x27;
80+
self.output.append(f"&#{name};")
81+
82+
def get_html(self):
83+
return "".join(self.output)
84+
85+
1986
@html_safe
2087
class Asset:
2188
"""A generic asset that can be included in a template."""
@@ -99,11 +166,10 @@ def build_attrs(self, *args, **kwargs):
99166

100167
def render(self, name, value, attrs=None, renderer=None):
101168
"""Render the widget as a custom element for Safari compatibility."""
102-
return mark_safe( # noqa: S308
103-
str(super().render(name, value, attrs=attrs, renderer=renderer)).replace(
104-
f'<input type="{self.input_type}"', "<s3-file"
105-
)
106-
)
169+
html_output = str(super().render(name, value, attrs=attrs, renderer=renderer))
170+
parser = InputToS3FileRewriter()
171+
parser.feed(html_output)
172+
return mark_safe(parser.get_html()) # noqa: S308
107173

108174
def get_conditions(self, accept):
109175
conditions = [

tests/test_forms.py

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,98 @@ def test_str(self, settings):
7272
assert str(js) == '<script src="/static/path" type="module"></script>'
7373

7474

75+
class TestInputToS3FileRewriter:
76+
def test_transforms_file_input(self):
77+
parser = forms.InputToS3FileRewriter()
78+
parser.feed('<input type="file" name="test">')
79+
assert parser.get_html() == '<s3-file name="test">'
80+
81+
def test_preserves_non_file_input(self):
82+
parser = forms.InputToS3FileRewriter()
83+
parser.feed('<input type="text" name="test">')
84+
assert parser.get_html() == '<input type="text" name="test">'
85+
86+
def test_handles_attribute_ordering(self):
87+
parser = forms.InputToS3FileRewriter()
88+
parser.feed('<input name="test" type="file" class="foo">')
89+
result = parser.get_html()
90+
assert result.startswith("<s3-file")
91+
assert 'name="test"' in result
92+
assert 'class="foo"' in result
93+
assert 'type="file"' not in result
94+
95+
def test_handles_multiple_attributes(self):
96+
parser = forms.InputToS3FileRewriter()
97+
parser.feed(
98+
'<input type="file" name="test" accept="image/*" required multiple>'
99+
)
100+
result = parser.get_html()
101+
assert result.startswith("<s3-file")
102+
assert 'name="test"' in result
103+
assert 'accept="image/*"' in result
104+
assert "required" in result
105+
assert "multiple" in result
106+
107+
def test_escapes_html_entities(self):
108+
parser = forms.InputToS3FileRewriter()
109+
parser.feed('<input type="file" name="test" data-value="test&value">')
110+
result = parser.get_html()
111+
assert 'data-value="test&amp;value"' in result
112+
113+
def test_preserves_existing_html_entities(self):
114+
# Test that already-escaped entities in input are preserved (not double-escaped)
115+
parser = forms.InputToS3FileRewriter()
116+
parser.feed('<input type="file" name="test" data-value="test&amp;value">')
117+
result = parser.get_html()
118+
# Should preserve the &amp; entity, not convert to &amp;amp;
119+
assert 'data-value="test&amp;value"' in result
120+
assert '&amp;amp;' not in result
121+
122+
def test_preserves_character_references(self):
123+
# Test that character references are preserved (may be in decimal or hex format)
124+
parser = forms.InputToS3FileRewriter()
125+
parser.feed('<input type="file" name="test" data-value="test&#39;s">')
126+
result = parser.get_html()
127+
# The character reference should be preserved (either &#39; or &#x27; both represent ')
128+
assert ('data-value="test&#39;s"' in result or 'data-value="test&#x27;s"' in result)
129+
# Verify the actual apostrophe character is NOT directly in the output (should be a reference)
130+
assert 'data-value="test\'s"' not in result or '&#' in result
131+
132+
def test_handles_self_closing_tag(self):
133+
parser = forms.InputToS3FileRewriter()
134+
parser.feed('<input type="file" name="test" />')
135+
assert parser.get_html() == '<s3-file name="test">'
136+
137+
def test_preserves_non_file_self_closing_tag(self):
138+
parser = forms.InputToS3FileRewriter()
139+
parser.feed('<input type="text" name="test" />')
140+
assert parser.get_html() == '<input type="text" name="test" />'
141+
142+
def test_preserves_surrounding_elements(self):
143+
parser = forms.InputToS3FileRewriter()
144+
parser.feed('<p><input type="file" name="test"></p>')
145+
result = parser.get_html()
146+
assert result == '<p><s3-file name="test"></p>'
147+
148+
def test_preserves_html_comments(self):
149+
parser = forms.InputToS3FileRewriter()
150+
parser.feed('<!-- comment --><input type="file" name="test">')
151+
result = parser.get_html()
152+
assert result == '<!-- comment --><s3-file name="test">'
153+
154+
def test_preserves_declarations(self):
155+
parser = forms.InputToS3FileRewriter()
156+
parser.feed('<!DOCTYPE html><input type="file" name="test">')
157+
result = parser.get_html()
158+
assert result == '<!DOCTYPE html><s3-file name="test">'
159+
160+
def test_preserves_processing_instructions(self):
161+
parser = forms.InputToS3FileRewriter()
162+
parser.feed('<?xml version="1.0"?><input type="file" name="test">')
163+
result = parser.get_html()
164+
assert result == '<?xml version="1.0"?><s3-file name="test">'
165+
166+
75167
@contextmanager
76168
def wait_for_page_load(driver, timeout=30):
77169
old_page = driver.find_element(By.TAG_NAME, "html")
@@ -186,6 +278,21 @@ def test_render_wraps_in_s3_file_element(self, freeze_upload_folder):
186278
# Check that the output is the s3-file custom element
187279
assert html.startswith("<s3-file")
188280

281+
def test_render_preserves_attributes(self, freeze_upload_folder):
282+
widget = ClearableFileInput(attrs={"class": "test-class", "accept": "image/*"})
283+
html = widget.render(name="file", value=None)
284+
assert html.startswith("<s3-file")
285+
assert 'name="file"' in html
286+
assert 'class="test-class"' in html
287+
assert 'accept="image/*"' in html
288+
assert 'type="file"' not in html
289+
290+
def test_render_excludes_type_attribute(self, freeze_upload_folder):
291+
widget = ClearableFileInput()
292+
html = widget.render(name="file", value=None)
293+
assert 'type="file"' not in html
294+
assert html.startswith("<s3-file")
295+
189296
@pytest.mark.selenium
190297
def test_no_js_error(self, driver, live_server):
191298
driver.get(live_server + self.create_url)

0 commit comments

Comments
 (0)