Coverage for tdom/parser_test.py: 100%

95 statements  

« prev     ^ index     » next       coverage.py v7.10.7, created at 2025-10-31 17:14 +0000

1import pytest 

2from markupsafe import Markup 

3 

4from .nodes import Comment, DocumentType, Element, Fragment, Text 

5from .parser import parse_html 

6 

7 

8def test_parse_empty(): 

9 node = parse_html("") 

10 assert node == Text("") 

11 

12 

13def test_parse_text(): 

14 node = parse_html("Hello, world!") 

15 assert node == Text("Hello, world!") 

16 

17 

18def test_parse_text_with_entities(): 

19 node = parse_html("Panini's") 

20 assert node == Text("Panini's") 

21 

22 

23def test_parse_void_element(): 

24 node = parse_html("<br>") 

25 assert node == Element("br") 

26 

27 

28def test_parse_void_element_self_closed(): 

29 node = parse_html("<br />") 

30 assert node == Element("br") 

31 

32 

33def test_parse_uppercase_void_element(): 

34 node = parse_html("<BR>") 

35 assert node == Element("br") 

36 

37 

38def test_parse_standard_element_with_text(): 

39 node = parse_html("<div>Hello, world!</div>") 

40 assert node == Element("div", children=[Text("Hello, world!")]) 

41 

42 

43def test_parse_nested_elements(): 

44 node = parse_html("<div><span>Nested</span> content</div>") 

45 assert node == Element( 

46 "div", 

47 children=[ 

48 Element("span", children=[Text("Nested")]), 

49 Text(" content"), 

50 ], 

51 ) 

52 

53 

54def test_parse_element_with_attributes(): 

55 node = parse_html('<a href="https://example.com" target="_blank">Link</a>') 

56 assert node == Element( 

57 "a", 

58 attrs={"href": "https://example.com", "target": "_blank"}, 

59 children=[Text("Link")], 

60 ) 

61 

62 

63def test_parse_comment(): 

64 node = parse_html("<!-- This is a comment -->") 

65 assert node == Comment(" This is a comment ") 

66 

67 

68def test_parse_doctype(): 

69 node = parse_html("<!DOCTYPE html>") 

70 assert node == DocumentType("html") 

71 

72 

73def test_parse_explicit_fragment_empty(): 

74 node = parse_html("<></>") 

75 assert node == Fragment(children=[]) 

76 

77 

78def test_parse_explicit_fragment_with_content(): 

79 node = parse_html("<><div>Item 1</div><div>Item 2</div></>") 

80 assert node == Fragment( 

81 children=[ 

82 Element("div", children=[Text("Item 1")]), 

83 Element("div", children=[Text("Item 2")]), 

84 ] 

85 ) 

86 

87 

88def test_parse_explicit_fragment_with_text(): 

89 node = parse_html("<>Hello, <span>world</span>!</>") 

90 assert node == Fragment( 

91 children=[ 

92 Text("Hello, "), 

93 Element("span", children=[Text("world")]), 

94 Text("!"), 

95 ] 

96 ) 

97 

98 

99def test_parse_explicit_fragment_nested(): 

100 node = parse_html("<div><>Nested <span>fragment</span></></div>") 

101 assert node == Element( 

102 "div", 

103 children=[ 

104 Fragment( 

105 children=[ 

106 Text("Nested "), 

107 Element("span", children=[Text("fragment")]), 

108 ] 

109 ) 

110 ], 

111 ) 

112 

113 

114def test_parse_multiple_voids(): 

115 node = parse_html("<br><hr><hr /><hr /><br /><br><br>") 

116 assert node == Fragment( 

117 children=[ 

118 Element("br"), 

119 Element("hr"), 

120 Element("hr"), 

121 Element("hr"), 

122 Element("br"), 

123 Element("br"), 

124 Element("br"), 

125 ] 

126 ) 

127 

128 

129def test_parse_mixed_content(): 

130 node = parse_html( 

131 '<!DOCTYPE html><!-- Comment --><div class="container">' 

132 "Hello, <br class='funky' />world <!-- neato -->!</div>" 

133 ) 

134 assert node == Fragment( 

135 children=[ 

136 DocumentType("html"), 

137 Comment(" Comment "), 

138 Element( 

139 "div", 

140 attrs={"class": "container"}, 

141 children=[ 

142 Text("Hello, "), 

143 Element("br", attrs={"class": "funky"}), 

144 Text("world "), 

145 Comment(" neato "), 

146 Text("!"), 

147 ], 

148 ), 

149 ] 

150 ) 

151 

152 

153def test_parse_entities_are_escaped(): 

154 node = parse_html("<p>&lt;/p&gt;</p>") 

155 assert node == Element( 

156 "p", 

157 children=[Text("</p>")], 

158 ) 

159 assert str(node) == "<p>&lt;/p&gt;</p>" 

160 

161 

162def test_parse_script_tag_content(): 

163 node = parse_html("<script>if (a < b && c > d) { alert('wow'); }</script>") 

164 assert node == Element( 

165 "script", 

166 children=[Text(Markup("if (a < b && c > d) { alert('wow'); }"))], 

167 ) 

168 assert str(node) == ("<script>if (a < b && c > d) { alert('wow'); }</script>") 

169 

170 

171def test_parse_script_with_entities(): 

172 # The <script> tag (and <style>) tag uses the CDATA content model. 

173 node = parse_html("<script>var x = 'a &amp; b';</script>") 

174 assert node == Element( 

175 "script", 

176 children=[Text(Markup("var x = 'a &amp; b';"))], 

177 ) 

178 assert str(node) == "<script>var x = 'a &amp; b';</script>" 

179 

180 

181def test_parse_textarea_tag_content(): 

182 node = parse_html("<textarea>if (a < b && c > d) { alert('wow'); }</textarea>") 

183 assert node == Element( 

184 "textarea", 

185 children=[Text(Markup("if (a < b && c > d) { alert('wow'); }"))], 

186 ) 

187 assert str(node) == "<textarea>if (a < b && c > d) { alert('wow'); }</textarea>" 

188 

189 

190def test_parse_textarea_with_entities(): 

191 # The <textarea> (and <title>) tag uses the RCDATA content model. 

192 node = parse_html("<textarea>var x = 'a &amp; b';</textarea>") 

193 assert node == Element( 

194 "textarea", 

195 children=[Text(Markup("var x = 'a & b';"))], 

196 ) 

197 assert str(node) == "<textarea>var x = 'a & b';</textarea>" 

198 

199 

200def test_parse_title_unusual(): 

201 node = parse_html("<title>My & Awesome <Site></title>") 

202 assert node == Element( 

203 "title", 

204 children=[Text(Markup("My & Awesome <Site>"))], 

205 ) 

206 assert str(node) == "<title>My & Awesome <Site></title>" 

207 

208 

209def test_parse_mismatched_tags(): 

210 with pytest.raises(ValueError): 

211 _ = parse_html("<div><span>Mismatched</div></span>") 

212 

213 

214def test_parse_unclosed_tag(): 

215 with pytest.raises(ValueError): 

216 _ = parse_html("<div>Unclosed") 

217 

218 

219def test_parse_unexpected_closing_tag(): 

220 with pytest.raises(ValueError): 

221 _ = parse_html("Unopened</div>") 

222 

223 

224def test_nested_self_closing_tags(): 

225 node = parse_html("<div></div><br>") 

226 assert node == Fragment( 

227 children=[ 

228 Element("div"), 

229 Element("br"), 

230 ] 

231 ) 

232 

233 

234def test_parse_html_iter_preserves_chunks(): 

235 chunks = [ 

236 "<div>", 

237 "Hello ", 

238 "there, ", 

239 "<span>world</span>", 

240 "!</div>", 

241 ] 

242 node = parse_html(chunks) 

243 assert node == Element( 

244 "div", 

245 children=[ 

246 Text("Hello "), 

247 Text("there, "), 

248 Element("span", children=[Text("world")]), 

249 Text("!"), 

250 ], 

251 )