Coverage for tdom / parser.py: 99%
166 statements
« prev ^ index » next coverage.py v7.13.0, created at 2026-01-12 16:43 +0000
« prev ^ index » next coverage.py v7.13.0, created at 2026-01-12 16:43 +0000
1import typing as t
2from dataclasses import dataclass, field
3from html.parser import HTMLParser
4from string.templatelib import Interpolation, Template
6from .nodes import VOID_ELEMENTS
7from .placeholders import PlaceholderState
8from .tnodes import (
9 TAttribute,
10 TComment,
11 TComponent,
12 TDocumentType,
13 TElement,
14 TFragment,
15 TInterpolatedAttribute,
16 TLiteralAttribute,
17 TNode,
18 TSpreadAttribute,
19 TTemplatedAttribute,
20 TText,
21)
22from .template_utils import combine_template_refs
25type HTMLAttribute = tuple[str, str | None]
26type HTMLAttributesDict = dict[str, str | None]
29@dataclass
30class OpenTElement:
31 tag: str
32 attrs: tuple[TAttribute, ...]
33 children: list[TNode] = field(default_factory=list)
36@dataclass
37class OpenTFragment:
38 children: list[TNode] = field(default_factory=list)
41@dataclass
42class OpenTComponent:
43 start_i_index: int
44 attrs: tuple[TAttribute, ...]
45 children: list[TNode] = field(default_factory=list)
48type OpenTag = OpenTElement | OpenTFragment | OpenTComponent
51@dataclass
52class SourceTracker:
53 """Tracks source locations within a Template for error reporting."""
55 # TODO: write utilities to generate complete error messages, with the
56 # template itself in context and the relevant line/column underlined/etc.
58 template: Template
59 i_index: int = -1 # The current interpolation index.
61 @property
62 def interpolations(self) -> tuple[Interpolation, ...]:
63 return self.template.interpolations
65 def advance_interpolation(self) -> int:
66 """Call before processing an interpolation to move to the next one."""
67 self.i_index += 1
68 return self.i_index
70 def get_expression(
71 self, i_index: int, fallback_prefix: str = "interpolation"
72 ) -> str:
73 """
74 Resolve an interpolation index to its original expression for error messages.
75 Falls back to a synthetic expression if the original is empty.
76 """
77 ip = self.interpolations[i_index]
78 return ip.expression if ip.expression else f"{{{fallback_prefix}-{i_index}}}"
80 def format_starttag(self, i_index: int) -> str:
81 """Format a component start tag for error messages."""
82 return self.get_expression(i_index, fallback_prefix="component-starttag")
85class TemplateParser(HTMLParser):
86 root: OpenTFragment
87 stack: list[OpenTag]
88 placeholders: PlaceholderState
89 source: SourceTracker | None
91 def __init__(self, *, convert_charrefs: bool = True):
92 # This calls HTMLParser.reset() which we override to set up our state.
93 super().__init__(convert_charrefs=convert_charrefs)
95 # ------------------------------------------
96 # Parse state helpers
97 # ------------------------------------------
99 def get_parent(self) -> OpenTag:
100 """Return the current parent node to which new children should be added."""
101 return self.stack[-1] if self.stack else self.root
103 def append_child(self, child: TNode) -> None:
104 parent = self.get_parent()
105 parent.children.append(child)
107 # ------------------------------------------
108 # Attribute Helpers
109 # ------------------------------------------
111 def make_tattr(self, attr: HTMLAttribute) -> TAttribute:
112 """Build a TAttribute from a raw attribute tuple."""
114 name, value = attr
115 name_ref = self.placeholders.remove_placeholders(name)
116 value_ref = (
117 self.placeholders.remove_placeholders(value) if value is not None else None
118 )
120 if name_ref.is_literal:
121 if value_ref is None or value_ref.is_literal:
122 return TLiteralAttribute(name=name, value=value)
123 elif value_ref.is_singleton:
124 return TInterpolatedAttribute(
125 name=name, value_i_index=value_ref.i_indexes[0]
126 )
127 else:
128 return TTemplatedAttribute(name=name, value_ref=value_ref)
129 if value_ref is not None:
130 raise ValueError(
131 "Attribute names cannot contain interpolations if the value is also interpolated."
132 )
133 if not name_ref.is_singleton:
134 raise ValueError(
135 "Spread attributes must have exactly one interpolation in the name."
136 )
137 return TSpreadAttribute(i_index=name_ref.i_indexes[0])
139 def make_tattrs(self, attrs: t.Sequence[HTMLAttribute]) -> tuple[TAttribute, ...]:
140 """Build TAttributes from raw attribute tuples."""
141 return tuple(self.make_tattr(attr) for attr in attrs)
143 # ------------------------------------------
144 # Tag Helpers
145 # ------------------------------------------
147 def make_open_tag(self, tag: str, attrs: t.Sequence[HTMLAttribute]) -> OpenTag:
148 """Build an OpenTag from a raw tag and attribute tuples."""
149 tag_ref = self.placeholders.remove_placeholders(tag)
151 if tag_ref.is_literal:
152 return OpenTElement(tag=tag, attrs=self.make_tattrs(attrs))
154 if not tag_ref.is_singleton:
155 raise ValueError(
156 "Component element tags must have exactly one interpolation."
157 )
159 # HERE BE DRAGONS: the interpolation at i_index should be a
160 # component callable. We do not check this in the parser, instead
161 # relying on higher layers to validate types and render correctly.
162 i_index = tag_ref.i_indexes[0]
163 return OpenTComponent(
164 start_i_index=i_index,
165 attrs=self.make_tattrs(attrs),
166 )
168 def finalize_tag(
169 self, open_tag: OpenTag, endtag_i_index: int | None = None
170 ) -> TNode:
171 """Finalize an OpenTag into a TNode."""
172 match open_tag:
173 case OpenTElement(tag=tag, attrs=attrs, children=children):
174 return TElement(tag=tag, attrs=attrs, children=tuple(children))
175 case OpenTFragment(children=children):
176 return TFragment(children=tuple(children))
177 case OpenTComponent(
178 start_i_index=start_i_index,
179 attrs=attrs,
180 children=children,
181 ):
182 return TComponent(
183 start_i_index=start_i_index,
184 end_i_index=endtag_i_index,
185 attrs=attrs,
186 children=tuple(children),
187 )
189 def validate_end_tag(self, tag: str, open_tag: OpenTag) -> int | None:
190 """Validate that closing tag matches open tag. Return component end index if applicable."""
191 assert self.source, "Parser source tracker not initialized."
192 tag_ref = self.placeholders.remove_placeholders(tag)
194 match open_tag:
195 case OpenTElement():
196 if not tag_ref.is_literal:
197 raise ValueError(
198 f"Component closing tag found for element <{open_tag.tag}>."
199 )
200 if tag != open_tag.tag:
201 raise ValueError(
202 f"Mismatched closing tag </{tag}> for element <{open_tag.tag}>."
203 )
204 return None
206 case OpenTFragment():
207 raise NotImplementedError("We do not support anonymous fragments.")
209 case OpenTComponent(start_i_index=start_i_index):
210 if tag_ref.is_literal:
211 raise ValueError(
212 f"Mismatched closing tag </{tag}> for component starting at {self.source.format_starttag(start_i_index)}."
213 )
214 if not tag_ref.is_singleton:
215 raise ValueError(
216 "Component end tags must have exactly one interpolation."
217 )
218 # HERE BE DRAGONS: the interpolation at end_i_index shuld be a
219 # component callable that matches the start tag. We do not check
220 # any of this in the parser, instead relying on higher layers.
221 return tag_ref.i_indexes[0]
223 # ------------------------------------------
224 # HTMLParser tag callbacks
225 # ------------------------------------------
227 def handle_starttag(self, tag: str, attrs: t.Sequence[HTMLAttribute]) -> None:
228 open_tag = self.make_open_tag(tag, attrs)
229 if isinstance(open_tag, OpenTElement) and open_tag.tag in VOID_ELEMENTS:
230 final_tag = self.finalize_tag(open_tag)
231 self.append_child(final_tag)
232 else:
233 self.stack.append(open_tag)
235 def handle_startendtag(self, tag: str, attrs: t.Sequence[HTMLAttribute]) -> None:
236 """Dispatch a self-closing tag, `<tag />` to specialized handlers."""
237 open_tag = self.make_open_tag(tag, attrs)
238 final_tag = self.finalize_tag(open_tag)
239 self.append_child(final_tag)
241 def handle_endtag(self, tag: str) -> None:
242 if not self.stack:
243 raise ValueError(f"Unexpected closing tag </{tag}> with no open tag.")
245 open_tag = self.stack.pop()
246 endtag_i_index = self.validate_end_tag(tag, open_tag)
247 final_tag = self.finalize_tag(open_tag, endtag_i_index)
248 self.append_child(final_tag)
250 # ------------------------------------------
251 # HTMLParser other callbacks
252 # ------------------------------------------
254 def handle_data(self, data: str) -> None:
255 ref = self.placeholders.remove_placeholders(data)
256 parent = self.get_parent()
257 if parent.children and isinstance(parent.children[-1], TText):
258 parent.children[-1] = TText(
259 ref=combine_template_refs(parent.children[-1].ref, ref)
260 )
261 else:
262 self.append_child(TText(ref=ref))
264 def handle_comment(self, data: str) -> None:
265 ref = self.placeholders.remove_placeholders(data)
266 comment = TComment(ref)
267 self.append_child(comment)
269 def handle_decl(self, decl: str) -> None:
270 ref = self.placeholders.remove_placeholders(decl)
271 if not ref.is_literal:
272 raise ValueError("Interpolations are not allowed in declarations.")
273 elif decl.upper().startswith("DOCTYPE "):
274 doctype_content = decl[7:].strip()
275 doctype = TDocumentType(doctype_content)
276 self.append_child(doctype)
277 else:
278 raise NotImplementedError(
279 "Only well formed DOCTYPE declarations are currently supported."
280 )
282 def reset(self):
283 super().reset()
284 self.root = OpenTFragment()
285 self.stack = []
286 self.placeholders = PlaceholderState()
287 self.source = None
289 def close(self) -> None:
290 if self.stack:
291 raise ValueError("Invalid HTML structure: unclosed tags remain.")
292 if not self.placeholders.is_empty:
293 raise ValueError("Some placeholders were never resolved.")
294 super().close()
296 # ------------------------------------------
297 # Getting the parsed node tree
298 # ------------------------------------------
300 def get_tnode(self) -> TNode:
301 """Get the Node tree parsed from the input HTML."""
302 # TODO: consider always returning a TTag?
303 if len(self.root.children) > 1:
304 # The parse structure results in multiple root elements, so we
305 # return a Fragment to hold them all.
306 return self.finalize_tag(self.root)
307 elif len(self.root.children) == 1:
308 # The parse structure results in a single root element, so we
309 # return that element directly. This will be a non-Fragment Node.
310 return self.root.children[0]
311 else:
312 # Special case: the parse structure is empty; we treat
313 # this as an empty document fragment.
314 # CONSIDER: or as an empty text node?
315 return self.finalize_tag(self.root)
317 # ------------------------------------------
318 # Feeding and parsing
319 # ------------------------------------------
321 def feed_str(self, s: str) -> None:
322 """Feed a string part of a Template to the parser."""
323 self.feed(s)
325 def feed_interpolation(self, index: int) -> None:
326 placeholder = self.placeholders.add_placeholder(index)
327 self.feed(placeholder)
329 def feed_template(self, template: Template) -> None:
330 """Feed a Template's content to the parser."""
331 assert self.source is None, "Did you forget to call reset?"
332 self.source = SourceTracker(template)
333 for i_index in range(len(template.interpolations)):
334 self.feed_str(template.strings[i_index])
335 self.source.advance_interpolation()
336 self.feed_interpolation(i_index)
337 self.feed_str(template.strings[-1])
339 @staticmethod
340 def parse(t: Template) -> TNode:
341 """
342 Parse a Template containing valid HTML and substitutions and return
343 a TNode tree representing its structure. This cachable structure can later
344 be resolved against actual interpolation values to produce a Node tree.
345 """
346 parser = TemplateParser()
347 parser.feed_template(t)
348 parser.close()
349 return parser.get_tnode()