Coverage for tdom/parser_test.py: 100%

90 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-17 19:54 +0000

1import pytest 

2 

3from .nodes import Comment, DocumentType, Element, Fragment, Text 

4from .parser import parse_html 

5 

6 

7def test_parse_empty(): 

8 node = parse_html("") 

9 assert node == Text("") 

10 

11 

12def test_parse_text(): 

13 node = parse_html("Hello, world!") 

14 assert node == Text("Hello, world!") 

15 

16 

17def test_parse_text_with_entities(): 

18 node = parse_html("Panini's") 

19 assert node == Text("Panini's") 

20 

21 

22def test_parse_void_element(): 

23 node = parse_html("<br>") 

24 assert node == Element("br") 

25 

26 

27def test_parse_void_element_self_closed(): 

28 node = parse_html("<br />") 

29 assert node == Element("br") 

30 

31 

32def test_parse_uppercase_void_element(): 

33 node = parse_html("<BR>") 

34 assert node == Element("br") 

35 

36 

37def test_parse_standard_element_with_text(): 

38 node = parse_html("<div>Hello, world!</div>") 

39 assert node == Element("div", children=[Text("Hello, world!")]) 

40 

41 

42def test_parse_nested_elements(): 

43 node = parse_html("<div><span>Nested</span> content</div>") 

44 assert node == Element( 

45 "div", 

46 children=[ 

47 Element("span", children=[Text("Nested")]), 

48 Text(" content"), 

49 ], 

50 ) 

51 

52 

53def test_parse_element_with_attributes(): 

54 node = parse_html('<a href="https://example.com" target="_blank">Link</a>') 

55 assert node == Element( 

56 "a", 

57 attrs={"href": "https://example.com", "target": "_blank"}, 

58 children=[Text("Link")], 

59 ) 

60 

61 

62def test_parse_comment(): 

63 node = parse_html("<!-- This is a comment -->") 

64 assert node == Comment(" This is a comment ") 

65 

66 

67def test_parse_doctype(): 

68 node = parse_html("<!DOCTYPE html>") 

69 assert node == DocumentType("html") 

70 

71 

72def test_parse_explicit_fragment_empty(): 

73 node = parse_html("<></>") 

74 assert node == Fragment(children=[]) 

75 

76 

77def test_parse_explicit_fragment_with_content(): 

78 node = parse_html("<><div>Item 1</div><div>Item 2</div></>") 

79 assert node == Fragment( 

80 children=[ 

81 Element("div", children=[Text("Item 1")]), 

82 Element("div", children=[Text("Item 2")]), 

83 ] 

84 ) 

85 

86 

87def test_parse_explicit_fragment_with_text(): 

88 node = parse_html("<>Hello, <span>world</span>!</>") 

89 assert node == Fragment( 

90 children=[ 

91 Text("Hello, "), 

92 Element("span", children=[Text("world")]), 

93 Text("!"), 

94 ] 

95 ) 

96 

97 

98def test_parse_explicit_fragment_nested(): 

99 node = parse_html("<div><>Nested <span>fragment</span></></div>") 

100 assert node == Element( 

101 "div", 

102 children=[ 

103 Fragment( 

104 children=[ 

105 Text("Nested "), 

106 Element("span", children=[Text("fragment")]), 

107 ] 

108 ) 

109 ], 

110 ) 

111 

112 

113def test_parse_multiple_voids(): 

114 node = parse_html("<br><hr><hr /><hr /><br /><br><br>") 

115 assert node == Fragment( 

116 children=[ 

117 Element("br"), 

118 Element("hr"), 

119 Element("hr"), 

120 Element("hr"), 

121 Element("br"), 

122 Element("br"), 

123 Element("br"), 

124 ] 

125 ) 

126 

127 

128def test_parse_mixed_content(): 

129 node = parse_html( 

130 '<!DOCTYPE html><!-- Comment --><div class="container">' 

131 "Hello, <br class='funky' />world <!-- neato -->!</div>" 

132 ) 

133 assert node == Fragment( 

134 children=[ 

135 DocumentType("html"), 

136 Comment(" Comment "), 

137 Element( 

138 "div", 

139 attrs={"class": "container"}, 

140 children=[ 

141 Text("Hello, "), 

142 Element("br", attrs={"class": "funky"}), 

143 Text("world "), 

144 Comment(" neato "), 

145 Text("!"), 

146 ], 

147 ), 

148 ] 

149 ) 

150 

151 

152def test_parse_script_tag_content(): 

153 node = parse_html("<script>if (a < b && c > d) { alert('wow'); }</script>") 

154 assert node == Element( 

155 "script", 

156 children=[Text("if (a < b && c > d) { alert('wow'); }")], 

157 ) 

158 assert str(node) == ("<script>if (a < b && c > d) { alert('wow'); }</script>") 

159 

160 

161def test_parse_script_with_entities(): 

162 # The <script> tag (and <style>) tag uses the CDATA content model. 

163 node = parse_html("<script>var x = 'a &amp; b';</script>") 

164 assert node == Element( 

165 "script", 

166 children=[Text("var x = 'a &amp; b';")], 

167 ) 

168 assert str(node) == "<script>var x = 'a &amp; b';</script>" 

169 

170 

171def test_parse_textarea_tag_content(): 

172 node = parse_html("<textarea>if (a < b && c > d) { alert('wow'); }</textarea>") 

173 assert node == Element( 

174 "textarea", 

175 children=[Text("if (a < b && c > d) { alert('wow'); }")], 

176 ) 

177 assert str(node) == "<textarea>if (a < b && c > d) { alert('wow'); }</textarea>" 

178 

179 

180def test_parse_textarea_with_entities(): 

181 # The <textarea> (and <title>) tag uses the RCDATA content model. 

182 node = parse_html("<textarea>var x = 'a &amp; b';</textarea>") 

183 assert node == Element( 

184 "textarea", 

185 children=[Text("var x = 'a & b';")], 

186 ) 

187 assert str(node) == "<textarea>var x = 'a & b';</textarea>" 

188 

189 

190def test_parse_title_unusual(): 

191 node = parse_html("<title>My & Awesome <Site></title>") 

192 assert node == Element( 

193 "title", 

194 children=[Text("My & Awesome <Site>")], 

195 ) 

196 assert str(node) == "<title>My & Awesome <Site></title>" 

197 

198 

199def test_parse_mismatched_tags(): 

200 with pytest.raises(ValueError): 

201 _ = parse_html("<div><span>Mismatched</div></span>") 

202 

203 

204def test_parse_unclosed_tag(): 

205 with pytest.raises(ValueError): 

206 _ = parse_html("<div>Unclosed") 

207 

208 

209def test_parse_unexpected_closing_tag(): 

210 with pytest.raises(ValueError): 

211 _ = parse_html("Unopened</div>") 

212 

213 

214def test_nested_self_closing_tags(): 

215 node = parse_html("<div></div><br>") 

216 assert node == Fragment( 

217 children=[ 

218 Element("div"), 

219 Element("br"), 

220 ] 

221 ) 

222 

223 

224def test_parse_html_iter_preserves_chunks(): 

225 chunks = [ 

226 "<div>", 

227 "Hello ", 

228 "there, ", 

229 "<span>world</span>", 

230 "!</div>", 

231 ] 

232 node = parse_html(chunks) 

233 assert node == Element( 

234 "div", 

235 children=[ 

236 Text("Hello "), 

237 Text("there, "), 

238 Element("span", children=[Text("world")]), 

239 Text("!"), 

240 ], 

241 )