Coverage for tdom/parser_test.py: 100%
90 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-17 19:54 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-17 19:54 +0000
1import pytest
3from .nodes import Comment, DocumentType, Element, Fragment, Text
4from .parser import parse_html
7def test_parse_empty():
8 node = parse_html("")
9 assert node == Text("")
12def test_parse_text():
13 node = parse_html("Hello, world!")
14 assert node == Text("Hello, world!")
17def test_parse_text_with_entities():
18 node = parse_html("Panini's")
19 assert node == Text("Panini's")
22def test_parse_void_element():
23 node = parse_html("<br>")
24 assert node == Element("br")
27def test_parse_void_element_self_closed():
28 node = parse_html("<br />")
29 assert node == Element("br")
32def test_parse_uppercase_void_element():
33 node = parse_html("<BR>")
34 assert node == Element("br")
37def test_parse_standard_element_with_text():
38 node = parse_html("<div>Hello, world!</div>")
39 assert node == Element("div", children=[Text("Hello, world!")])
42def test_parse_nested_elements():
43 node = parse_html("<div><span>Nested</span> content</div>")
44 assert node == Element(
45 "div",
46 children=[
47 Element("span", children=[Text("Nested")]),
48 Text(" content"),
49 ],
50 )
53def test_parse_element_with_attributes():
54 node = parse_html('<a href="https://example.com" target="_blank">Link</a>')
55 assert node == Element(
56 "a",
57 attrs={"href": "https://example.com", "target": "_blank"},
58 children=[Text("Link")],
59 )
62def test_parse_comment():
63 node = parse_html("<!-- This is a comment -->")
64 assert node == Comment(" This is a comment ")
67def test_parse_doctype():
68 node = parse_html("<!DOCTYPE html>")
69 assert node == DocumentType("html")
72def test_parse_explicit_fragment_empty():
73 node = parse_html("<></>")
74 assert node == Fragment(children=[])
77def test_parse_explicit_fragment_with_content():
78 node = parse_html("<><div>Item 1</div><div>Item 2</div></>")
79 assert node == Fragment(
80 children=[
81 Element("div", children=[Text("Item 1")]),
82 Element("div", children=[Text("Item 2")]),
83 ]
84 )
87def test_parse_explicit_fragment_with_text():
88 node = parse_html("<>Hello, <span>world</span>!</>")
89 assert node == Fragment(
90 children=[
91 Text("Hello, "),
92 Element("span", children=[Text("world")]),
93 Text("!"),
94 ]
95 )
98def test_parse_explicit_fragment_nested():
99 node = parse_html("<div><>Nested <span>fragment</span></></div>")
100 assert node == Element(
101 "div",
102 children=[
103 Fragment(
104 children=[
105 Text("Nested "),
106 Element("span", children=[Text("fragment")]),
107 ]
108 )
109 ],
110 )
113def test_parse_multiple_voids():
114 node = parse_html("<br><hr><hr /><hr /><br /><br><br>")
115 assert node == Fragment(
116 children=[
117 Element("br"),
118 Element("hr"),
119 Element("hr"),
120 Element("hr"),
121 Element("br"),
122 Element("br"),
123 Element("br"),
124 ]
125 )
128def test_parse_mixed_content():
129 node = parse_html(
130 '<!DOCTYPE html><!-- Comment --><div class="container">'
131 "Hello, <br class='funky' />world <!-- neato -->!</div>"
132 )
133 assert node == Fragment(
134 children=[
135 DocumentType("html"),
136 Comment(" Comment "),
137 Element(
138 "div",
139 attrs={"class": "container"},
140 children=[
141 Text("Hello, "),
142 Element("br", attrs={"class": "funky"}),
143 Text("world "),
144 Comment(" neato "),
145 Text("!"),
146 ],
147 ),
148 ]
149 )
152def test_parse_script_tag_content():
153 node = parse_html("<script>if (a < b && c > d) { alert('wow'); }</script>")
154 assert node == Element(
155 "script",
156 children=[Text("if (a < b && c > d) { alert('wow'); }")],
157 )
158 assert str(node) == ("<script>if (a < b && c > d) { alert('wow'); }</script>")
161def test_parse_script_with_entities():
162 # The <script> tag (and <style>) tag uses the CDATA content model.
163 node = parse_html("<script>var x = 'a & b';</script>")
164 assert node == Element(
165 "script",
166 children=[Text("var x = 'a & b';")],
167 )
168 assert str(node) == "<script>var x = 'a & b';</script>"
171def test_parse_textarea_tag_content():
172 node = parse_html("<textarea>if (a < b && c > d) { alert('wow'); }</textarea>")
173 assert node == Element(
174 "textarea",
175 children=[Text("if (a < b && c > d) { alert('wow'); }")],
176 )
177 assert str(node) == "<textarea>if (a < b && c > d) { alert('wow'); }</textarea>"
180def test_parse_textarea_with_entities():
181 # The <textarea> (and <title>) tag uses the RCDATA content model.
182 node = parse_html("<textarea>var x = 'a & b';</textarea>")
183 assert node == Element(
184 "textarea",
185 children=[Text("var x = 'a & b';")],
186 )
187 assert str(node) == "<textarea>var x = 'a & b';</textarea>"
190def test_parse_title_unusual():
191 node = parse_html("<title>My & Awesome <Site></title>")
192 assert node == Element(
193 "title",
194 children=[Text("My & Awesome <Site>")],
195 )
196 assert str(node) == "<title>My & Awesome <Site></title>"
199def test_parse_mismatched_tags():
200 with pytest.raises(ValueError):
201 _ = parse_html("<div><span>Mismatched</div></span>")
204def test_parse_unclosed_tag():
205 with pytest.raises(ValueError):
206 _ = parse_html("<div>Unclosed")
209def test_parse_unexpected_closing_tag():
210 with pytest.raises(ValueError):
211 _ = parse_html("Unopened</div>")
214def test_nested_self_closing_tags():
215 node = parse_html("<div></div><br>")
216 assert node == Fragment(
217 children=[
218 Element("div"),
219 Element("br"),
220 ]
221 )
224def test_parse_html_iter_preserves_chunks():
225 chunks = [
226 "<div>",
227 "Hello ",
228 "there, ",
229 "<span>world</span>",
230 "!</div>",
231 ]
232 node = parse_html(chunks)
233 assert node == Element(
234 "div",
235 children=[
236 Text("Hello "),
237 Text("there, "),
238 Element("span", children=[Text("world")]),
239 Text("!"),
240 ],
241 )