Coverage for tdom / parser.py: 99%

166 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2026-01-12 16:43 +0000

1import typing as t 

2from dataclasses import dataclass, field 

3from html.parser import HTMLParser 

4from string.templatelib import Interpolation, Template 

5 

6from .nodes import VOID_ELEMENTS 

7from .placeholders import PlaceholderState 

8from .tnodes import ( 

9 TAttribute, 

10 TComment, 

11 TComponent, 

12 TDocumentType, 

13 TElement, 

14 TFragment, 

15 TInterpolatedAttribute, 

16 TLiteralAttribute, 

17 TNode, 

18 TSpreadAttribute, 

19 TTemplatedAttribute, 

20 TText, 

21) 

22from .template_utils import combine_template_refs 

23 

24 

25type HTMLAttribute = tuple[str, str | None] 

26type HTMLAttributesDict = dict[str, str | None] 

27 

28 

29@dataclass 

30class OpenTElement: 

31 tag: str 

32 attrs: tuple[TAttribute, ...] 

33 children: list[TNode] = field(default_factory=list) 

34 

35 

36@dataclass 

37class OpenTFragment: 

38 children: list[TNode] = field(default_factory=list) 

39 

40 

41@dataclass 

42class OpenTComponent: 

43 start_i_index: int 

44 attrs: tuple[TAttribute, ...] 

45 children: list[TNode] = field(default_factory=list) 

46 

47 

48type OpenTag = OpenTElement | OpenTFragment | OpenTComponent 

49 

50 

51@dataclass 

52class SourceTracker: 

53 """Tracks source locations within a Template for error reporting.""" 

54 

55 # TODO: write utilities to generate complete error messages, with the 

56 # template itself in context and the relevant line/column underlined/etc. 

57 

58 template: Template 

59 i_index: int = -1 # The current interpolation index. 

60 

61 @property 

62 def interpolations(self) -> tuple[Interpolation, ...]: 

63 return self.template.interpolations 

64 

65 def advance_interpolation(self) -> int: 

66 """Call before processing an interpolation to move to the next one.""" 

67 self.i_index += 1 

68 return self.i_index 

69 

70 def get_expression( 

71 self, i_index: int, fallback_prefix: str = "interpolation" 

72 ) -> str: 

73 """ 

74 Resolve an interpolation index to its original expression for error messages. 

75 Falls back to a synthetic expression if the original is empty. 

76 """ 

77 ip = self.interpolations[i_index] 

78 return ip.expression if ip.expression else f"{{{fallback_prefix}-{i_index}}}" 

79 

80 def format_starttag(self, i_index: int) -> str: 

81 """Format a component start tag for error messages.""" 

82 return self.get_expression(i_index, fallback_prefix="component-starttag") 

83 

84 

85class TemplateParser(HTMLParser): 

86 root: OpenTFragment 

87 stack: list[OpenTag] 

88 placeholders: PlaceholderState 

89 source: SourceTracker | None 

90 

91 def __init__(self, *, convert_charrefs: bool = True): 

92 # This calls HTMLParser.reset() which we override to set up our state. 

93 super().__init__(convert_charrefs=convert_charrefs) 

94 

95 # ------------------------------------------ 

96 # Parse state helpers 

97 # ------------------------------------------ 

98 

99 def get_parent(self) -> OpenTag: 

100 """Return the current parent node to which new children should be added.""" 

101 return self.stack[-1] if self.stack else self.root 

102 

103 def append_child(self, child: TNode) -> None: 

104 parent = self.get_parent() 

105 parent.children.append(child) 

106 

107 # ------------------------------------------ 

108 # Attribute Helpers 

109 # ------------------------------------------ 

110 

111 def make_tattr(self, attr: HTMLAttribute) -> TAttribute: 

112 """Build a TAttribute from a raw attribute tuple.""" 

113 

114 name, value = attr 

115 name_ref = self.placeholders.remove_placeholders(name) 

116 value_ref = ( 

117 self.placeholders.remove_placeholders(value) if value is not None else None 

118 ) 

119 

120 if name_ref.is_literal: 

121 if value_ref is None or value_ref.is_literal: 

122 return TLiteralAttribute(name=name, value=value) 

123 elif value_ref.is_singleton: 

124 return TInterpolatedAttribute( 

125 name=name, value_i_index=value_ref.i_indexes[0] 

126 ) 

127 else: 

128 return TTemplatedAttribute(name=name, value_ref=value_ref) 

129 if value_ref is not None: 

130 raise ValueError( 

131 "Attribute names cannot contain interpolations if the value is also interpolated." 

132 ) 

133 if not name_ref.is_singleton: 

134 raise ValueError( 

135 "Spread attributes must have exactly one interpolation in the name." 

136 ) 

137 return TSpreadAttribute(i_index=name_ref.i_indexes[0]) 

138 

139 def make_tattrs(self, attrs: t.Sequence[HTMLAttribute]) -> tuple[TAttribute, ...]: 

140 """Build TAttributes from raw attribute tuples.""" 

141 return tuple(self.make_tattr(attr) for attr in attrs) 

142 

143 # ------------------------------------------ 

144 # Tag Helpers 

145 # ------------------------------------------ 

146 

147 def make_open_tag(self, tag: str, attrs: t.Sequence[HTMLAttribute]) -> OpenTag: 

148 """Build an OpenTag from a raw tag and attribute tuples.""" 

149 tag_ref = self.placeholders.remove_placeholders(tag) 

150 

151 if tag_ref.is_literal: 

152 return OpenTElement(tag=tag, attrs=self.make_tattrs(attrs)) 

153 

154 if not tag_ref.is_singleton: 

155 raise ValueError( 

156 "Component element tags must have exactly one interpolation." 

157 ) 

158 

159 # HERE BE DRAGONS: the interpolation at i_index should be a 

160 # component callable. We do not check this in the parser, instead 

161 # relying on higher layers to validate types and render correctly. 

162 i_index = tag_ref.i_indexes[0] 

163 return OpenTComponent( 

164 start_i_index=i_index, 

165 attrs=self.make_tattrs(attrs), 

166 ) 

167 

168 def finalize_tag( 

169 self, open_tag: OpenTag, endtag_i_index: int | None = None 

170 ) -> TNode: 

171 """Finalize an OpenTag into a TNode.""" 

172 match open_tag: 

173 case OpenTElement(tag=tag, attrs=attrs, children=children): 

174 return TElement(tag=tag, attrs=attrs, children=tuple(children)) 

175 case OpenTFragment(children=children): 

176 return TFragment(children=tuple(children)) 

177 case OpenTComponent( 

178 start_i_index=start_i_index, 

179 attrs=attrs, 

180 children=children, 

181 ): 

182 return TComponent( 

183 start_i_index=start_i_index, 

184 end_i_index=endtag_i_index, 

185 attrs=attrs, 

186 children=tuple(children), 

187 ) 

188 

189 def validate_end_tag(self, tag: str, open_tag: OpenTag) -> int | None: 

190 """Validate that closing tag matches open tag. Return component end index if applicable.""" 

191 assert self.source, "Parser source tracker not initialized." 

192 tag_ref = self.placeholders.remove_placeholders(tag) 

193 

194 match open_tag: 

195 case OpenTElement(): 

196 if not tag_ref.is_literal: 

197 raise ValueError( 

198 f"Component closing tag found for element <{open_tag.tag}>." 

199 ) 

200 if tag != open_tag.tag: 

201 raise ValueError( 

202 f"Mismatched closing tag </{tag}> for element <{open_tag.tag}>." 

203 ) 

204 return None 

205 

206 case OpenTFragment(): 

207 raise NotImplementedError("We do not support anonymous fragments.") 

208 

209 case OpenTComponent(start_i_index=start_i_index): 

210 if tag_ref.is_literal: 

211 raise ValueError( 

212 f"Mismatched closing tag </{tag}> for component starting at {self.source.format_starttag(start_i_index)}." 

213 ) 

214 if not tag_ref.is_singleton: 

215 raise ValueError( 

216 "Component end tags must have exactly one interpolation." 

217 ) 

218 # HERE BE DRAGONS: the interpolation at end_i_index shuld be a 

219 # component callable that matches the start tag. We do not check 

220 # any of this in the parser, instead relying on higher layers. 

221 return tag_ref.i_indexes[0] 

222 

223 # ------------------------------------------ 

224 # HTMLParser tag callbacks 

225 # ------------------------------------------ 

226 

227 def handle_starttag(self, tag: str, attrs: t.Sequence[HTMLAttribute]) -> None: 

228 open_tag = self.make_open_tag(tag, attrs) 

229 if isinstance(open_tag, OpenTElement) and open_tag.tag in VOID_ELEMENTS: 

230 final_tag = self.finalize_tag(open_tag) 

231 self.append_child(final_tag) 

232 else: 

233 self.stack.append(open_tag) 

234 

235 def handle_startendtag(self, tag: str, attrs: t.Sequence[HTMLAttribute]) -> None: 

236 """Dispatch a self-closing tag, `<tag />` to specialized handlers.""" 

237 open_tag = self.make_open_tag(tag, attrs) 

238 final_tag = self.finalize_tag(open_tag) 

239 self.append_child(final_tag) 

240 

241 def handle_endtag(self, tag: str) -> None: 

242 if not self.stack: 

243 raise ValueError(f"Unexpected closing tag </{tag}> with no open tag.") 

244 

245 open_tag = self.stack.pop() 

246 endtag_i_index = self.validate_end_tag(tag, open_tag) 

247 final_tag = self.finalize_tag(open_tag, endtag_i_index) 

248 self.append_child(final_tag) 

249 

250 # ------------------------------------------ 

251 # HTMLParser other callbacks 

252 # ------------------------------------------ 

253 

254 def handle_data(self, data: str) -> None: 

255 ref = self.placeholders.remove_placeholders(data) 

256 parent = self.get_parent() 

257 if parent.children and isinstance(parent.children[-1], TText): 

258 parent.children[-1] = TText( 

259 ref=combine_template_refs(parent.children[-1].ref, ref) 

260 ) 

261 else: 

262 self.append_child(TText(ref=ref)) 

263 

264 def handle_comment(self, data: str) -> None: 

265 ref = self.placeholders.remove_placeholders(data) 

266 comment = TComment(ref) 

267 self.append_child(comment) 

268 

269 def handle_decl(self, decl: str) -> None: 

270 ref = self.placeholders.remove_placeholders(decl) 

271 if not ref.is_literal: 

272 raise ValueError("Interpolations are not allowed in declarations.") 

273 elif decl.upper().startswith("DOCTYPE "): 

274 doctype_content = decl[7:].strip() 

275 doctype = TDocumentType(doctype_content) 

276 self.append_child(doctype) 

277 else: 

278 raise NotImplementedError( 

279 "Only well formed DOCTYPE declarations are currently supported." 

280 ) 

281 

282 def reset(self): 

283 super().reset() 

284 self.root = OpenTFragment() 

285 self.stack = [] 

286 self.placeholders = PlaceholderState() 

287 self.source = None 

288 

289 def close(self) -> None: 

290 if self.stack: 

291 raise ValueError("Invalid HTML structure: unclosed tags remain.") 

292 if not self.placeholders.is_empty: 

293 raise ValueError("Some placeholders were never resolved.") 

294 super().close() 

295 

296 # ------------------------------------------ 

297 # Getting the parsed node tree 

298 # ------------------------------------------ 

299 

300 def get_tnode(self) -> TNode: 

301 """Get the Node tree parsed from the input HTML.""" 

302 # TODO: consider always returning a TTag? 

303 if len(self.root.children) > 1: 

304 # The parse structure results in multiple root elements, so we 

305 # return a Fragment to hold them all. 

306 return self.finalize_tag(self.root) 

307 elif len(self.root.children) == 1: 

308 # The parse structure results in a single root element, so we 

309 # return that element directly. This will be a non-Fragment Node. 

310 return self.root.children[0] 

311 else: 

312 # Special case: the parse structure is empty; we treat 

313 # this as an empty document fragment. 

314 # CONSIDER: or as an empty text node? 

315 return self.finalize_tag(self.root) 

316 

317 # ------------------------------------------ 

318 # Feeding and parsing 

319 # ------------------------------------------ 

320 

321 def feed_str(self, s: str) -> None: 

322 """Feed a string part of a Template to the parser.""" 

323 self.feed(s) 

324 

325 def feed_interpolation(self, index: int) -> None: 

326 placeholder = self.placeholders.add_placeholder(index) 

327 self.feed(placeholder) 

328 

329 def feed_template(self, template: Template) -> None: 

330 """Feed a Template's content to the parser.""" 

331 assert self.source is None, "Did you forget to call reset?" 

332 self.source = SourceTracker(template) 

333 for i_index in range(len(template.interpolations)): 

334 self.feed_str(template.strings[i_index]) 

335 self.source.advance_interpolation() 

336 self.feed_interpolation(i_index) 

337 self.feed_str(template.strings[-1]) 

338 

339 @staticmethod 

340 def parse(t: Template) -> TNode: 

341 """ 

342 Parse a Template containing valid HTML and substitutions and return 

343 a TNode tree representing its structure. This cachable structure can later 

344 be resolved against actual interpolation values to produce a Node tree. 

345 """ 

346 parser = TemplateParser() 

347 parser.feed_template(t) 

348 parser.close() 

349 return parser.get_tnode()