Coverage for tdom / parser.py: 90%

187 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2025-12-17 23:32 +0000

1import typing as t 

2from dataclasses import dataclass, field 

3from html.parser import HTMLParser 

4from string.templatelib import Interpolation, Template 

5 

6from .nodes import VOID_ELEMENTS 

7from .placeholders import FRAGMENT_TAG, PlaceholderState 

8from .tnodes import ( 

9 TAttribute, 

10 TComment, 

11 TComponent, 

12 TDocumentType, 

13 TElement, 

14 TFragment, 

15 TInterpolatedAttribute, 

16 TLiteralAttribute, 

17 TNode, 

18 TSpreadAttribute, 

19 TTemplatedAttribute, 

20 TText, 

21) 

22 

23type HTMLAttribute = tuple[str, str | None] 

24type HTMLAttributesDict = dict[str, str | None] 

25 

26 

27@dataclass 

28class OpenTElement: 

29 tag: str 

30 attrs: tuple[TAttribute, ...] 

31 children: list[TNode] = field(default_factory=list) 

32 

33 

34@dataclass 

35class OpenTFragment: 

36 children: list[TNode] = field(default_factory=list) 

37 

38 

39@dataclass 

40class OpenTComponent: 

41 # TODO: hold on to start_s_index when we start to need it. 

42 start_i_index: int 

43 attrs: tuple[TAttribute, ...] 

44 children: list[TNode] = field(default_factory=list) 

45 

46 

47type OpenTag = OpenTElement | OpenTFragment | OpenTComponent 

48 

49 

50@dataclass 

51class SourceTracker: 

52 """Tracks source locations within a Template for error reporting.""" 

53 

54 # TODO: write utilities to generate complete error messages, with the 

55 # template itself in context and the relevant line/column underlined/etc. 

56 

57 template: Template 

58 i_index: int = -1 # The current interpolation index. 

59 

60 @property 

61 def interpolations(self) -> tuple[Interpolation, ...]: 

62 return self.template.interpolations 

63 

64 @property 

65 def s_index(self) -> int: 

66 """The current string index.""" 

67 return self.i_index + 1 

68 

69 def advance_interpolation(self) -> int: 

70 """Call before processing an interpolation to move to the next one.""" 

71 self.i_index += 1 

72 return self.i_index 

73 

74 def get_expression( 

75 self, i_index: int, fallback_prefix: str = "interpolation" 

76 ) -> str: 

77 """ 

78 Resolve an interpolation index to its original expression for error messages. 

79 Falls back to a synthetic expression if the original is empty. 

80 """ 

81 ip = self.interpolations[i_index] 

82 return ip.expression if ip.expression else f"{{{fallback_prefix}-{i_index}}}" 

83 

84 def get_interpolation_value(self, i_index: int): 

85 """Get the runtime value at the given interpolation index.""" 

86 return self.interpolations[i_index].value 

87 

88 def format_starttag(self, i_index: int) -> str: 

89 """Format a component start tag for error messages.""" 

90 return self.get_expression(i_index, fallback_prefix="component-starttag") 

91 

92 def format_endtag(self, i_index: int) -> str: 

93 """Format a component end tag for error messages.""" 

94 return self.get_expression(i_index, fallback_prefix="component-endtag") 

95 

96 def format_open_tag(self, open_tag: OpenTag) -> str: 

97 """Format any open tag for error messages.""" 

98 match open_tag: 

99 case OpenTElement(tag=tag): 

100 return tag 

101 case OpenTFragment(): 

102 return "" 

103 case OpenTComponent(start_i_index=i_index): 

104 return self.format_starttag(i_index) 

105 

106 

107class TemplateParser(HTMLParser): 

108 root: OpenTFragment 

109 stack: list[OpenTag] 

110 placeholders: PlaceholderState 

111 source: SourceTracker | None 

112 

113 def __init__(self, *, convert_charrefs: bool = True): 

114 # This calls HTMLParser.reset() which we override to set up our state. 

115 super().__init__(convert_charrefs=convert_charrefs) 

116 

117 # ------------------------------------------ 

118 # Parse state helpers 

119 # ------------------------------------------ 

120 

121 def get_parent(self) -> OpenTag: 

122 """Return the current parent node to which new children should be added.""" 

123 return self.stack[-1] if self.stack else self.root 

124 

125 def append_child(self, child: TNode) -> None: 

126 parent = self.get_parent() 

127 parent.children.append(child) 

128 

129 # ------------------------------------------ 

130 # Attribute Helpers 

131 # ------------------------------------------ 

132 

133 def make_tattr(self, attr: HTMLAttribute) -> TAttribute: 

134 """Build a TAttribute from a raw attribute tuple.""" 

135 

136 name, value = attr 

137 name_ref = self.placeholders.remove_placeholders(name) 

138 value_ref = ( 

139 self.placeholders.remove_placeholders(value) if value is not None else None 

140 ) 

141 

142 if name_ref.is_literal: 

143 if value_ref is None or value_ref.is_literal: 

144 return TLiteralAttribute(name=name, value=value) 

145 elif value_ref.is_singleton: 

146 return TInterpolatedAttribute( 

147 name=name, value_i_index=value_ref.i_indexes[0] 

148 ) 

149 else: 

150 return TTemplatedAttribute(name=name, value_ref=value_ref) 

151 if value_ref is not None: 

152 raise ValueError( 

153 "Attribute names cannot contain interpolations if the value is also interpolated." 

154 ) 

155 if not name_ref.is_singleton: 

156 raise ValueError( 

157 "Spread attributes must have exactly one interpolation in the name." 

158 ) 

159 return TSpreadAttribute(i_index=name_ref.i_indexes[0]) 

160 

161 def make_tattrs(self, attrs: t.Sequence[HTMLAttribute]) -> tuple[TAttribute, ...]: 

162 """Build TAttributes from raw attribute tuples.""" 

163 return tuple(self.make_tattr(attr) for attr in attrs) 

164 

165 # ------------------------------------------ 

166 # Tag Helpers 

167 # ------------------------------------------ 

168 

169 def make_open_tag(self, tag: str, attrs: t.Sequence[HTMLAttribute]) -> OpenTag: 

170 """Build an OpenTag from a raw tag and attribute tuples.""" 

171 tag_ref = self.placeholders.remove_placeholders(tag) 

172 

173 if tag_ref.is_literal: 

174 if tag == FRAGMENT_TAG: 

175 if attrs: 

176 raise ValueError("Fragments cannot have attributes.") 

177 return OpenTFragment() 

178 return OpenTElement(tag=tag, attrs=self.make_tattrs(attrs)) 

179 

180 if not tag_ref.is_singleton: 

181 raise ValueError( 

182 "Component element tags must have exactly one interpolation." 

183 ) 

184 

185 # HERE BE DRAGONS: the interpolation at i_index should be a 

186 # component callable. We do not check this in the parser, instead 

187 # relying on higher layers to validate types and render correctly. 

188 i_index = tag_ref.i_indexes[0] 

189 return OpenTComponent( 

190 start_i_index=i_index, 

191 attrs=self.make_tattrs(attrs), 

192 ) 

193 

194 def finalize_tag( 

195 self, open_tag: OpenTag, endtag_i_index: int | None = None 

196 ) -> TNode: 

197 """Finalize an OpenTag into a TNode.""" 

198 match open_tag: 

199 case OpenTElement(tag=tag, attrs=attrs, children=children): 

200 return TElement(tag=tag, attrs=attrs, children=tuple(children)) 

201 case OpenTFragment(children=children): 

202 return TFragment(children=tuple(children)) 

203 case OpenTComponent( 

204 start_i_index=start_i_index, 

205 attrs=attrs, 

206 children=children, 

207 ): 

208 return TComponent( 

209 start_i_index=start_i_index, 

210 end_i_index=endtag_i_index, 

211 attrs=attrs, 

212 children=tuple(children), 

213 ) 

214 

215 def validate_end_tag(self, tag: str, open_tag: OpenTag) -> int | None: 

216 """Validate that closing tag matches open tag. Return component end index if applicable.""" 

217 assert self.source, "Parser source tracker not initialized." 

218 tag_ref = self.placeholders.remove_placeholders(tag) 

219 

220 match open_tag: 

221 case OpenTElement(): 

222 if not tag_ref.is_literal: 

223 raise ValueError( 

224 f"Component closing tag found for element <{open_tag.tag}>." 

225 ) 

226 if tag != open_tag.tag: 

227 raise ValueError( 

228 f"Mismatched closing tag </{tag}> for element <{open_tag.tag}>." 

229 ) 

230 return None 

231 

232 case OpenTFragment(): 

233 if not tag_ref.is_literal: 

234 raise ValueError("Component closing tag found for fragment.") 

235 if tag != FRAGMENT_TAG: 

236 raise ValueError(f"Mismatched closing tag </{tag}> for fragment.") 

237 return None 

238 

239 case OpenTComponent(start_i_index=start_i_index): 

240 if tag_ref.is_literal: 

241 raise ValueError( 

242 f"Mismatched closing tag </{tag}> for component starting at {self.source.format_starttag(start_i_index)}." 

243 ) 

244 if not tag_ref.is_singleton: 

245 raise ValueError( 

246 "Component end tags must have exactly one interpolation." 

247 ) 

248 # HERE BE DRAGONS: the interpolation at end_i_index shuld be a 

249 # component callable that matches the start tag. We do not check 

250 # any of this in the parser, instead relying on higher layers. 

251 return tag_ref.i_indexes[0] 

252 

253 # ------------------------------------------ 

254 # HTMLParser tag callbacks 

255 # ------------------------------------------ 

256 

257 def handle_starttag(self, tag: str, attrs: t.Sequence[HTMLAttribute]) -> None: 

258 open_tag = self.make_open_tag(tag, attrs) 

259 if isinstance(open_tag, OpenTElement) and open_tag.tag in VOID_ELEMENTS: 

260 final_tag = self.finalize_tag(open_tag) 

261 self.append_child(final_tag) 

262 else: 

263 self.stack.append(open_tag) 

264 

265 def handle_startendtag(self, tag: str, attrs: t.Sequence[HTMLAttribute]) -> None: 

266 """Dispatch a self-closing tag, `<tag />` to specialized handlers.""" 

267 open_tag = self.make_open_tag(tag, attrs) 

268 final_tag = self.finalize_tag(open_tag) 

269 self.append_child(final_tag) 

270 

271 def handle_endtag(self, tag: str) -> None: 

272 if not self.stack: 

273 raise ValueError(f"Unexpected closing tag </{tag}> with no open tag.") 

274 

275 open_tag = self.stack.pop() 

276 endtag_i_index = self.validate_end_tag(tag, open_tag) 

277 final_tag = self.finalize_tag(open_tag, endtag_i_index) 

278 self.append_child(final_tag) 

279 

280 # ------------------------------------------ 

281 # HTMLParser other callbacks 

282 # ------------------------------------------ 

283 

284 def handle_data(self, data: str) -> None: 

285 ref = self.placeholders.remove_placeholders(data) 

286 text = TText(ref) 

287 self.append_child(text) 

288 

289 def handle_comment(self, data: str) -> None: 

290 ref = self.placeholders.remove_placeholders(data) 

291 comment = TComment(ref) 

292 self.append_child(comment) 

293 

294 def handle_decl(self, decl: str) -> None: 

295 ref = self.placeholders.remove_placeholders(decl) 

296 if not ref.is_literal: 

297 raise ValueError("Interpolations are not allowed in declarations.") 

298 if not decl.upper().startswith("DOCTYPE"): 

299 raise NotImplementedError( 

300 "Only DOCTYPE declarations are currently supported." 

301 ) 

302 doctype_content = decl[7:].strip() 

303 doctype = TDocumentType(doctype_content) 

304 self.append_child(doctype) 

305 

306 def reset(self): 

307 super().reset() 

308 self.root = OpenTFragment() 

309 self.stack = [] 

310 self.placeholders = PlaceholderState() 

311 self.source = None 

312 

313 def close(self) -> None: 

314 if self.stack: 

315 raise ValueError("Invalid HTML structure: unclosed tags remain.") 

316 if not self.placeholders.is_empty: 

317 raise ValueError("Some placeholders were never resolved.") 

318 super().close() 

319 

320 # ------------------------------------------ 

321 # Getting the parsed node tree 

322 # ------------------------------------------ 

323 

324 def get_tnode(self) -> TNode: 

325 """Get the Node tree parsed from the input HTML.""" 

326 # TODO: consider always returning a TTag? 

327 if len(self.root.children) > 1: 

328 # The parse structure results in multiple root elements, so we 

329 # return a Fragment to hold them all. 

330 return TFragment(children=tuple(self.root.children)) 

331 elif len(self.root.children) == 1: 

332 # The parse structure results in a single root element, so we 

333 # return that element directly. This will be a non-Fragment Node. 

334 return self.root.children[0] 

335 else: 

336 # Special case: the parse structure is empty; we treat 

337 # this as an empty document fragment. 

338 # CONSIDER: or as an empty text node? 

339 return TFragment(children=tuple()) 

340 

341 # ------------------------------------------ 

342 # Feeding and parsing 

343 # ------------------------------------------ 

344 

345 def feed_str(self, s: str) -> None: 

346 """Feed a string part of a Template to the parser.""" 

347 # TODO: add tracking for this, or maybe just deprecate it? 

348 s = s.replace("<>", f"<{FRAGMENT_TAG}>").replace("</>", f"</{FRAGMENT_TAG}>") 

349 self.feed(s) 

350 

351 def feed_interpolation(self, index: int) -> None: 

352 placeholder = self.placeholders.add_placeholder(index) 

353 self.feed(placeholder) 

354 

355 def feed_template(self, template: Template) -> None: 

356 """Feed a Template's content to the parser.""" 

357 assert self.source is None, "Did you forget to call reset?" 

358 self.source = SourceTracker(template) 

359 for i_index in range(len(template.interpolations)): 

360 self.feed_str(template.strings[i_index]) 

361 self.source.advance_interpolation() 

362 self.feed_interpolation(i_index) 

363 self.feed_str(template.strings[-1]) 

364 

365 @staticmethod 

366 def parse(t: Template) -> TNode: 

367 """ 

368 Parse a Template containing valid HTML and substitutions and return 

369 a TNode tree representing its structure. This cachable structure can later 

370 be resolved against actual interpolation values to produce a Node tree. 

371 """ 

372 parser = TemplateParser() 

373 parser.feed_template(t) 

374 parser.close() 

375 return parser.get_tnode()