Coverage for tdom / parser.py: 99%

166 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-05-03 21:23 +0000

1from collections.abc import Sequence 

2from dataclasses import dataclass, field 

3from html.parser import HTMLParser 

4from string.templatelib import Interpolation, Template 

5 

6from .htmlspec import VOID_ELEMENTS 

7from .placeholders import PlaceholderState 

8from .template_utils import combine_template_refs 

9from .tnodes import ( 

10 TAttribute, 

11 TComment, 

12 TComponent, 

13 TDocumentType, 

14 TElement, 

15 TFragment, 

16 TInterpolatedAttribute, 

17 TLiteralAttribute, 

18 TNode, 

19 TSpreadAttribute, 

20 TTemplatedAttribute, 

21 TText, 

22) 

23 

24type HTMLAttribute = tuple[str, str | None] 

25type HTMLAttributesDict = dict[str, str | None] 

26 

27 

28@dataclass 

29class OpenTElement: 

30 tag: str 

31 attrs: tuple[TAttribute, ...] 

32 children: list[TNode] = field(default_factory=list) 

33 

34 

35@dataclass 

36class OpenTFragment: 

37 children: list[TNode] = field(default_factory=list) 

38 

39 

40@dataclass 

41class OpenTComponent: 

42 start_i_index: int 

43 attrs: tuple[TAttribute, ...] 

44 children: list[TNode] = field(default_factory=list) 

45 

46 

47type OpenTag = OpenTElement | OpenTFragment | OpenTComponent 

48 

49 

50@dataclass 

51class SourceTracker: 

52 """Tracks source locations within a Template for error reporting.""" 

53 

54 # TODO: write utilities to generate complete error messages, with the 

55 # template itself in context and the relevant line/column underlined/etc. 

56 

57 template: Template 

58 i_index: int = -1 # The current interpolation index. 

59 

60 @property 

61 def interpolations(self) -> tuple[Interpolation, ...]: 

62 return self.template.interpolations 

63 

64 def advance_interpolation(self) -> int: 

65 """Call before processing an interpolation to move to the next one.""" 

66 self.i_index += 1 

67 return self.i_index 

68 

69 def get_expression( 

70 self, i_index: int, fallback_prefix: str = "interpolation" 

71 ) -> str: 

72 """ 

73 Resolve an interpolation index to its original expression for error messages. 

74 Falls back to a synthetic expression if the original is empty. 

75 """ 

76 ip = self.interpolations[i_index] 

77 return ip.expression if ip.expression else f"{{{fallback_prefix}-{i_index}}}" 

78 

79 def format_starttag(self, i_index: int) -> str: 

80 """Format a component start tag for error messages.""" 

81 return self.get_expression(i_index, fallback_prefix="component-starttag") 

82 

83 

84class TemplateParser(HTMLParser): 

85 root: OpenTFragment 

86 stack: list[OpenTag] 

87 placeholders: PlaceholderState 

88 source: SourceTracker | None 

89 

90 def __init__(self, *, convert_charrefs: bool = True): 

91 # This calls HTMLParser.reset() which we override to set up our state. 

92 super().__init__(convert_charrefs=convert_charrefs) 

93 

94 # ------------------------------------------ 

95 # Parse state helpers 

96 # ------------------------------------------ 

97 

98 def get_parent(self) -> OpenTag: 

99 """Return the current parent node to which new children should be added.""" 

100 return self.stack[-1] if self.stack else self.root 

101 

102 def append_child(self, child: TNode) -> None: 

103 parent = self.get_parent() 

104 parent.children.append(child) 

105 

106 # ------------------------------------------ 

107 # Attribute Helpers 

108 # ------------------------------------------ 

109 

110 def make_tattr(self, attr: HTMLAttribute) -> TAttribute: 

111 """Build a TAttribute from a raw attribute tuple.""" 

112 

113 name, value = attr 

114 

115 name_ref = self.placeholders.remove_placeholders(name) 

116 value_ref = ( 

117 self.placeholders.remove_placeholders(value) if value is not None else None 

118 ) 

119 

120 if name_ref.is_literal: 

121 if value_ref is None or value_ref.is_literal: 

122 return TLiteralAttribute(name=name, value=value) 

123 elif value_ref.is_singleton: 

124 return TInterpolatedAttribute( 

125 name=name, value_i_index=value_ref.i_indexes[0] 

126 ) 

127 else: 

128 return TTemplatedAttribute(name=name, value_ref=value_ref) 

129 if value_ref is not None: 

130 raise ValueError( 

131 "Attribute names cannot contain interpolations if the value is also interpolated." 

132 ) 

133 if not name_ref.is_singleton: 

134 raise ValueError( 

135 "Spread attributes must have exactly one interpolation in the name." 

136 ) 

137 return TSpreadAttribute(i_index=name_ref.i_indexes[0]) 

138 

139 def make_tattrs(self, attrs: Sequence[HTMLAttribute]) -> tuple[TAttribute, ...]: 

140 """Build TAttributes from raw attribute tuples.""" 

141 return tuple(self.make_tattr(attr) for attr in attrs) 

142 

143 # ------------------------------------------ 

144 # Tag Helpers 

145 # ------------------------------------------ 

146 

147 def make_open_tag(self, tag: str, attrs: Sequence[HTMLAttribute]) -> OpenTag: 

148 """Build an OpenTag from a raw tag and attribute tuples.""" 

149 tag_ref = self.placeholders.remove_placeholders(tag) 

150 

151 if tag_ref.is_literal: 

152 return OpenTElement(tag=tag, attrs=self.make_tattrs(attrs)) 

153 

154 if not tag_ref.is_singleton: 

155 raise ValueError( 

156 "Component element tags must have exactly one interpolation." 

157 ) 

158 

159 # HERE BE DRAGONS: the interpolation at i_index should be a 

160 # component callable. We do not check this in the parser, instead 

161 # relying on higher layers to validate types and render correctly. 

162 i_index = tag_ref.i_indexes[0] 

163 return OpenTComponent( 

164 start_i_index=i_index, 

165 attrs=self.make_tattrs(attrs), 

166 ) 

167 

168 def finalize_tag( 

169 self, open_tag: OpenTag, endtag_i_index: int | None = None 

170 ) -> TNode: 

171 """Finalize an OpenTag into a TNode.""" 

172 match open_tag: 

173 case OpenTElement(tag=tag, attrs=attrs, children=children): 

174 return TElement(tag=tag, attrs=attrs, children=tuple(children)) 

175 case OpenTFragment(children=children): 

176 return TFragment(children=tuple(children)) 

177 case OpenTComponent( 

178 start_i_index=start_i_index, 

179 attrs=attrs, 

180 children=children, 

181 ): 

182 return TComponent( 

183 start_i_index=start_i_index, 

184 end_i_index=endtag_i_index, 

185 attrs=attrs, 

186 children=tuple(children), 

187 ) 

188 

189 def validate_end_tag(self, tag: str, open_tag: OpenTag) -> int | None: 

190 """Validate that closing tag matches open tag. Return component end index if applicable.""" 

191 assert self.source, "Parser source tracker not initialized." 

192 tag_ref = self.placeholders.remove_placeholders(tag) 

193 

194 match open_tag: 

195 case OpenTElement(): 

196 if not tag_ref.is_literal: 

197 raise ValueError( 

198 f"Component closing tag found for element <{open_tag.tag}>." 

199 ) 

200 if tag != open_tag.tag: 

201 raise ValueError( 

202 f"Mismatched closing tag </{tag}> for element <{open_tag.tag}>." 

203 ) 

204 return None 

205 

206 case OpenTFragment(): 

207 raise NotImplementedError("We do not support anonymous fragments.") 

208 

209 case OpenTComponent(start_i_index=start_i_index): 

210 if tag_ref.is_literal: 

211 raise ValueError( 

212 f"Mismatched closing tag </{tag}> for component starting at {self.source.format_starttag(start_i_index)}." 

213 ) 

214 if not tag_ref.is_singleton: 

215 raise ValueError( 

216 "Component end tags must have exactly one interpolation." 

217 ) 

218 # HERE BE DRAGONS: the interpolation at end_i_index shuld be a 

219 # component callable that matches the start tag. We do not check 

220 # any of this in the parser, instead relying on higher layers. 

221 return tag_ref.i_indexes[0] 

222 

223 # ------------------------------------------ 

224 # HTMLParser tag callbacks 

225 # ------------------------------------------ 

226 

227 def handle_starttag(self, tag: str, attrs: Sequence[HTMLAttribute]) -> None: 

228 open_tag = self.make_open_tag(tag, attrs) 

229 if isinstance(open_tag, OpenTElement) and open_tag.tag in VOID_ELEMENTS: 

230 final_tag = self.finalize_tag(open_tag) 

231 self.append_child(final_tag) 

232 else: 

233 self.stack.append(open_tag) 

234 

235 def handle_startendtag(self, tag: str, attrs: Sequence[HTMLAttribute]) -> None: 

236 """Dispatch a self-closing tag, `<tag />` to specialized handlers.""" 

237 open_tag = self.make_open_tag(tag, attrs) 

238 final_tag = self.finalize_tag(open_tag) 

239 self.append_child(final_tag) 

240 

241 def handle_endtag(self, tag: str) -> None: 

242 if not self.stack: 

243 raise ValueError(f"Unexpected closing tag </{tag}> with no open tag.") 

244 

245 open_tag = self.stack.pop() 

246 endtag_i_index = self.validate_end_tag(tag, open_tag) 

247 final_tag = self.finalize_tag(open_tag, endtag_i_index) 

248 self.append_child(final_tag) 

249 

250 # ------------------------------------------ 

251 # HTMLParser other callbacks 

252 # ------------------------------------------ 

253 

254 def handle_data(self, data: str) -> None: 

255 ref = self.placeholders.remove_placeholders(data) 

256 parent = self.get_parent() 

257 if parent.children and isinstance(parent.children[-1], TText): 

258 parent.children[-1] = TText( 

259 ref=combine_template_refs(parent.children[-1].ref, ref) 

260 ) 

261 else: 

262 self.append_child(TText(ref=ref)) 

263 

264 def handle_comment(self, data: str) -> None: 

265 ref = self.placeholders.remove_placeholders(data) 

266 comment = TComment(ref) 

267 self.append_child(comment) 

268 

269 def handle_decl(self, decl: str) -> None: 

270 ref = self.placeholders.remove_placeholders(decl) 

271 if not ref.is_literal: 

272 raise ValueError("Interpolations are not allowed in declarations.") 

273 elif decl.upper().startswith("DOCTYPE "): 

274 doctype_content = decl[7:].strip() 

275 doctype = TDocumentType(doctype_content) 

276 self.append_child(doctype) 

277 else: 

278 raise NotImplementedError( 

279 "Only well formed DOCTYPE declarations are currently supported." 

280 ) 

281 

282 def reset(self): 

283 super().reset() 

284 self.root = OpenTFragment() 

285 self.stack = [] 

286 self.placeholders = PlaceholderState() 

287 self.source = None 

288 

289 def close(self) -> None: 

290 if self.stack: 

291 raise ValueError("Invalid HTML structure: unclosed tags remain.") 

292 if not self.placeholders.is_empty: 

293 raise ValueError("Some placeholders were never resolved.") 

294 super().close() 

295 

296 # ------------------------------------------ 

297 # Getting the parsed node tree 

298 # ------------------------------------------ 

299 

300 def get_tnode(self) -> TNode: 

301 """Get the Node tree parsed from the input HTML.""" 

302 # TODO: consider always returning a TTag? 

303 if len(self.root.children) > 1: 

304 # The parse structure results in multiple root elements, so we 

305 # return a Fragment to hold them all. 

306 return self.finalize_tag(self.root) 

307 elif len(self.root.children) == 1: 

308 # The parse structure results in a single root element, so we 

309 # return that element directly. This will be a non-Fragment Node. 

310 return self.root.children[0] 

311 else: 

312 # Special case: the parse structure is empty; we treat 

313 # this as an empty document fragment. 

314 # CONSIDER: or as an empty text node? 

315 return self.finalize_tag(self.root) 

316 

317 # ------------------------------------------ 

318 # Feeding and parsing 

319 # ------------------------------------------ 

320 

321 def feed_str(self, s: str) -> None: 

322 """Feed a string part of a Template to the parser.""" 

323 self.feed(s) 

324 

325 def feed_interpolation(self, index: int) -> None: 

326 placeholder = self.placeholders.add_placeholder(index) 

327 self.feed(placeholder) 

328 

329 def feed_template(self, template: Template) -> None: 

330 """Feed a Template's content to the parser.""" 

331 assert self.source is None, "Did you forget to call reset?" 

332 self.source = SourceTracker(template) 

333 for i_index in range(len(template.interpolations)): 

334 self.feed_str(template.strings[i_index]) 

335 self.source.advance_interpolation() 

336 self.feed_interpolation(i_index) 

337 self.feed_str(template.strings[-1]) 

338 

339 @staticmethod 

340 def parse(t: Template) -> TNode: 

341 """ 

342 Parse a Template containing valid HTML and substitutions and return 

343 a TNode tree representing its structure. This cachable structure can later 

344 be resolved against actual interpolation values to produce a Node tree. 

345 """ 

346 parser = TemplateParser() 

347 parser.feed_template(t) 

348 parser.close() 

349 return parser.get_tnode()