Coverage for tdom/parser_test.py: 100%
95 statements
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-31 17:14 +0000
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-31 17:14 +0000
1import pytest
2from markupsafe import Markup
4from .nodes import Comment, DocumentType, Element, Fragment, Text
5from .parser import parse_html
8def test_parse_empty():
9 node = parse_html("")
10 assert node == Text("")
13def test_parse_text():
14 node = parse_html("Hello, world!")
15 assert node == Text("Hello, world!")
18def test_parse_text_with_entities():
19 node = parse_html("Panini's")
20 assert node == Text("Panini's")
23def test_parse_void_element():
24 node = parse_html("<br>")
25 assert node == Element("br")
28def test_parse_void_element_self_closed():
29 node = parse_html("<br />")
30 assert node == Element("br")
33def test_parse_uppercase_void_element():
34 node = parse_html("<BR>")
35 assert node == Element("br")
38def test_parse_standard_element_with_text():
39 node = parse_html("<div>Hello, world!</div>")
40 assert node == Element("div", children=[Text("Hello, world!")])
43def test_parse_nested_elements():
44 node = parse_html("<div><span>Nested</span> content</div>")
45 assert node == Element(
46 "div",
47 children=[
48 Element("span", children=[Text("Nested")]),
49 Text(" content"),
50 ],
51 )
54def test_parse_element_with_attributes():
55 node = parse_html('<a href="https://example.com" target="_blank">Link</a>')
56 assert node == Element(
57 "a",
58 attrs={"href": "https://example.com", "target": "_blank"},
59 children=[Text("Link")],
60 )
63def test_parse_comment():
64 node = parse_html("<!-- This is a comment -->")
65 assert node == Comment(" This is a comment ")
68def test_parse_doctype():
69 node = parse_html("<!DOCTYPE html>")
70 assert node == DocumentType("html")
73def test_parse_explicit_fragment_empty():
74 node = parse_html("<></>")
75 assert node == Fragment(children=[])
78def test_parse_explicit_fragment_with_content():
79 node = parse_html("<><div>Item 1</div><div>Item 2</div></>")
80 assert node == Fragment(
81 children=[
82 Element("div", children=[Text("Item 1")]),
83 Element("div", children=[Text("Item 2")]),
84 ]
85 )
88def test_parse_explicit_fragment_with_text():
89 node = parse_html("<>Hello, <span>world</span>!</>")
90 assert node == Fragment(
91 children=[
92 Text("Hello, "),
93 Element("span", children=[Text("world")]),
94 Text("!"),
95 ]
96 )
99def test_parse_explicit_fragment_nested():
100 node = parse_html("<div><>Nested <span>fragment</span></></div>")
101 assert node == Element(
102 "div",
103 children=[
104 Fragment(
105 children=[
106 Text("Nested "),
107 Element("span", children=[Text("fragment")]),
108 ]
109 )
110 ],
111 )
114def test_parse_multiple_voids():
115 node = parse_html("<br><hr><hr /><hr /><br /><br><br>")
116 assert node == Fragment(
117 children=[
118 Element("br"),
119 Element("hr"),
120 Element("hr"),
121 Element("hr"),
122 Element("br"),
123 Element("br"),
124 Element("br"),
125 ]
126 )
129def test_parse_mixed_content():
130 node = parse_html(
131 '<!DOCTYPE html><!-- Comment --><div class="container">'
132 "Hello, <br class='funky' />world <!-- neato -->!</div>"
133 )
134 assert node == Fragment(
135 children=[
136 DocumentType("html"),
137 Comment(" Comment "),
138 Element(
139 "div",
140 attrs={"class": "container"},
141 children=[
142 Text("Hello, "),
143 Element("br", attrs={"class": "funky"}),
144 Text("world "),
145 Comment(" neato "),
146 Text("!"),
147 ],
148 ),
149 ]
150 )
153def test_parse_entities_are_escaped():
154 node = parse_html("<p></p></p>")
155 assert node == Element(
156 "p",
157 children=[Text("</p>")],
158 )
159 assert str(node) == "<p></p></p>"
162def test_parse_script_tag_content():
163 node = parse_html("<script>if (a < b && c > d) { alert('wow'); }</script>")
164 assert node == Element(
165 "script",
166 children=[Text(Markup("if (a < b && c > d) { alert('wow'); }"))],
167 )
168 assert str(node) == ("<script>if (a < b && c > d) { alert('wow'); }</script>")
171def test_parse_script_with_entities():
172 # The <script> tag (and <style>) tag uses the CDATA content model.
173 node = parse_html("<script>var x = 'a & b';</script>")
174 assert node == Element(
175 "script",
176 children=[Text(Markup("var x = 'a & b';"))],
177 )
178 assert str(node) == "<script>var x = 'a & b';</script>"
181def test_parse_textarea_tag_content():
182 node = parse_html("<textarea>if (a < b && c > d) { alert('wow'); }</textarea>")
183 assert node == Element(
184 "textarea",
185 children=[Text(Markup("if (a < b && c > d) { alert('wow'); }"))],
186 )
187 assert str(node) == "<textarea>if (a < b && c > d) { alert('wow'); }</textarea>"
190def test_parse_textarea_with_entities():
191 # The <textarea> (and <title>) tag uses the RCDATA content model.
192 node = parse_html("<textarea>var x = 'a & b';</textarea>")
193 assert node == Element(
194 "textarea",
195 children=[Text(Markup("var x = 'a & b';"))],
196 )
197 assert str(node) == "<textarea>var x = 'a & b';</textarea>"
200def test_parse_title_unusual():
201 node = parse_html("<title>My & Awesome <Site></title>")
202 assert node == Element(
203 "title",
204 children=[Text(Markup("My & Awesome <Site>"))],
205 )
206 assert str(node) == "<title>My & Awesome <Site></title>"
209def test_parse_mismatched_tags():
210 with pytest.raises(ValueError):
211 _ = parse_html("<div><span>Mismatched</div></span>")
214def test_parse_unclosed_tag():
215 with pytest.raises(ValueError):
216 _ = parse_html("<div>Unclosed")
219def test_parse_unexpected_closing_tag():
220 with pytest.raises(ValueError):
221 _ = parse_html("Unopened</div>")
224def test_nested_self_closing_tags():
225 node = parse_html("<div></div><br>")
226 assert node == Fragment(
227 children=[
228 Element("div"),
229 Element("br"),
230 ]
231 )
234def test_parse_html_iter_preserves_chunks():
235 chunks = [
236 "<div>",
237 "Hello ",
238 "there, ",
239 "<span>world</span>",
240 "!</div>",
241 ]
242 node = parse_html(chunks)
243 assert node == Element(
244 "div",
245 children=[
246 Text("Hello "),
247 Text("there, "),
248 Element("span", children=[Text("world")]),
249 Text("!"),
250 ],
251 )