Coverage for tdom / parser.py: 90%
187 statements
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-17 23:32 +0000
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-17 23:32 +0000
1import typing as t
2from dataclasses import dataclass, field
3from html.parser import HTMLParser
4from string.templatelib import Interpolation, Template
6from .nodes import VOID_ELEMENTS
7from .placeholders import FRAGMENT_TAG, PlaceholderState
8from .tnodes import (
9 TAttribute,
10 TComment,
11 TComponent,
12 TDocumentType,
13 TElement,
14 TFragment,
15 TInterpolatedAttribute,
16 TLiteralAttribute,
17 TNode,
18 TSpreadAttribute,
19 TTemplatedAttribute,
20 TText,
21)
23type HTMLAttribute = tuple[str, str | None]
24type HTMLAttributesDict = dict[str, str | None]
27@dataclass
28class OpenTElement:
29 tag: str
30 attrs: tuple[TAttribute, ...]
31 children: list[TNode] = field(default_factory=list)
34@dataclass
35class OpenTFragment:
36 children: list[TNode] = field(default_factory=list)
39@dataclass
40class OpenTComponent:
41 # TODO: hold on to start_s_index when we start to need it.
42 start_i_index: int
43 attrs: tuple[TAttribute, ...]
44 children: list[TNode] = field(default_factory=list)
47type OpenTag = OpenTElement | OpenTFragment | OpenTComponent
50@dataclass
51class SourceTracker:
52 """Tracks source locations within a Template for error reporting."""
54 # TODO: write utilities to generate complete error messages, with the
55 # template itself in context and the relevant line/column underlined/etc.
57 template: Template
58 i_index: int = -1 # The current interpolation index.
60 @property
61 def interpolations(self) -> tuple[Interpolation, ...]:
62 return self.template.interpolations
64 @property
65 def s_index(self) -> int:
66 """The current string index."""
67 return self.i_index + 1
69 def advance_interpolation(self) -> int:
70 """Call before processing an interpolation to move to the next one."""
71 self.i_index += 1
72 return self.i_index
74 def get_expression(
75 self, i_index: int, fallback_prefix: str = "interpolation"
76 ) -> str:
77 """
78 Resolve an interpolation index to its original expression for error messages.
79 Falls back to a synthetic expression if the original is empty.
80 """
81 ip = self.interpolations[i_index]
82 return ip.expression if ip.expression else f"{{{fallback_prefix}-{i_index}}}"
84 def get_interpolation_value(self, i_index: int):
85 """Get the runtime value at the given interpolation index."""
86 return self.interpolations[i_index].value
88 def format_starttag(self, i_index: int) -> str:
89 """Format a component start tag for error messages."""
90 return self.get_expression(i_index, fallback_prefix="component-starttag")
92 def format_endtag(self, i_index: int) -> str:
93 """Format a component end tag for error messages."""
94 return self.get_expression(i_index, fallback_prefix="component-endtag")
96 def format_open_tag(self, open_tag: OpenTag) -> str:
97 """Format any open tag for error messages."""
98 match open_tag:
99 case OpenTElement(tag=tag):
100 return tag
101 case OpenTFragment():
102 return ""
103 case OpenTComponent(start_i_index=i_index):
104 return self.format_starttag(i_index)
107class TemplateParser(HTMLParser):
108 root: OpenTFragment
109 stack: list[OpenTag]
110 placeholders: PlaceholderState
111 source: SourceTracker | None
113 def __init__(self, *, convert_charrefs: bool = True):
114 # This calls HTMLParser.reset() which we override to set up our state.
115 super().__init__(convert_charrefs=convert_charrefs)
117 # ------------------------------------------
118 # Parse state helpers
119 # ------------------------------------------
121 def get_parent(self) -> OpenTag:
122 """Return the current parent node to which new children should be added."""
123 return self.stack[-1] if self.stack else self.root
125 def append_child(self, child: TNode) -> None:
126 parent = self.get_parent()
127 parent.children.append(child)
129 # ------------------------------------------
130 # Attribute Helpers
131 # ------------------------------------------
133 def make_tattr(self, attr: HTMLAttribute) -> TAttribute:
134 """Build a TAttribute from a raw attribute tuple."""
136 name, value = attr
137 name_ref = self.placeholders.remove_placeholders(name)
138 value_ref = (
139 self.placeholders.remove_placeholders(value) if value is not None else None
140 )
142 if name_ref.is_literal:
143 if value_ref is None or value_ref.is_literal:
144 return TLiteralAttribute(name=name, value=value)
145 elif value_ref.is_singleton:
146 return TInterpolatedAttribute(
147 name=name, value_i_index=value_ref.i_indexes[0]
148 )
149 else:
150 return TTemplatedAttribute(name=name, value_ref=value_ref)
151 if value_ref is not None:
152 raise ValueError(
153 "Attribute names cannot contain interpolations if the value is also interpolated."
154 )
155 if not name_ref.is_singleton:
156 raise ValueError(
157 "Spread attributes must have exactly one interpolation in the name."
158 )
159 return TSpreadAttribute(i_index=name_ref.i_indexes[0])
161 def make_tattrs(self, attrs: t.Sequence[HTMLAttribute]) -> tuple[TAttribute, ...]:
162 """Build TAttributes from raw attribute tuples."""
163 return tuple(self.make_tattr(attr) for attr in attrs)
165 # ------------------------------------------
166 # Tag Helpers
167 # ------------------------------------------
169 def make_open_tag(self, tag: str, attrs: t.Sequence[HTMLAttribute]) -> OpenTag:
170 """Build an OpenTag from a raw tag and attribute tuples."""
171 tag_ref = self.placeholders.remove_placeholders(tag)
173 if tag_ref.is_literal:
174 if tag == FRAGMENT_TAG:
175 if attrs:
176 raise ValueError("Fragments cannot have attributes.")
177 return OpenTFragment()
178 return OpenTElement(tag=tag, attrs=self.make_tattrs(attrs))
180 if not tag_ref.is_singleton:
181 raise ValueError(
182 "Component element tags must have exactly one interpolation."
183 )
185 # HERE BE DRAGONS: the interpolation at i_index should be a
186 # component callable. We do not check this in the parser, instead
187 # relying on higher layers to validate types and render correctly.
188 i_index = tag_ref.i_indexes[0]
189 return OpenTComponent(
190 start_i_index=i_index,
191 attrs=self.make_tattrs(attrs),
192 )
194 def finalize_tag(
195 self, open_tag: OpenTag, endtag_i_index: int | None = None
196 ) -> TNode:
197 """Finalize an OpenTag into a TNode."""
198 match open_tag:
199 case OpenTElement(tag=tag, attrs=attrs, children=children):
200 return TElement(tag=tag, attrs=attrs, children=tuple(children))
201 case OpenTFragment(children=children):
202 return TFragment(children=tuple(children))
203 case OpenTComponent(
204 start_i_index=start_i_index,
205 attrs=attrs,
206 children=children,
207 ):
208 return TComponent(
209 start_i_index=start_i_index,
210 end_i_index=endtag_i_index,
211 attrs=attrs,
212 children=tuple(children),
213 )
215 def validate_end_tag(self, tag: str, open_tag: OpenTag) -> int | None:
216 """Validate that closing tag matches open tag. Return component end index if applicable."""
217 assert self.source, "Parser source tracker not initialized."
218 tag_ref = self.placeholders.remove_placeholders(tag)
220 match open_tag:
221 case OpenTElement():
222 if not tag_ref.is_literal:
223 raise ValueError(
224 f"Component closing tag found for element <{open_tag.tag}>."
225 )
226 if tag != open_tag.tag:
227 raise ValueError(
228 f"Mismatched closing tag </{tag}> for element <{open_tag.tag}>."
229 )
230 return None
232 case OpenTFragment():
233 if not tag_ref.is_literal:
234 raise ValueError("Component closing tag found for fragment.")
235 if tag != FRAGMENT_TAG:
236 raise ValueError(f"Mismatched closing tag </{tag}> for fragment.")
237 return None
239 case OpenTComponent(start_i_index=start_i_index):
240 if tag_ref.is_literal:
241 raise ValueError(
242 f"Mismatched closing tag </{tag}> for component starting at {self.source.format_starttag(start_i_index)}."
243 )
244 if not tag_ref.is_singleton:
245 raise ValueError(
246 "Component end tags must have exactly one interpolation."
247 )
248 # HERE BE DRAGONS: the interpolation at end_i_index shuld be a
249 # component callable that matches the start tag. We do not check
250 # any of this in the parser, instead relying on higher layers.
251 return tag_ref.i_indexes[0]
253 # ------------------------------------------
254 # HTMLParser tag callbacks
255 # ------------------------------------------
257 def handle_starttag(self, tag: str, attrs: t.Sequence[HTMLAttribute]) -> None:
258 open_tag = self.make_open_tag(tag, attrs)
259 if isinstance(open_tag, OpenTElement) and open_tag.tag in VOID_ELEMENTS:
260 final_tag = self.finalize_tag(open_tag)
261 self.append_child(final_tag)
262 else:
263 self.stack.append(open_tag)
265 def handle_startendtag(self, tag: str, attrs: t.Sequence[HTMLAttribute]) -> None:
266 """Dispatch a self-closing tag, `<tag />` to specialized handlers."""
267 open_tag = self.make_open_tag(tag, attrs)
268 final_tag = self.finalize_tag(open_tag)
269 self.append_child(final_tag)
271 def handle_endtag(self, tag: str) -> None:
272 if not self.stack:
273 raise ValueError(f"Unexpected closing tag </{tag}> with no open tag.")
275 open_tag = self.stack.pop()
276 endtag_i_index = self.validate_end_tag(tag, open_tag)
277 final_tag = self.finalize_tag(open_tag, endtag_i_index)
278 self.append_child(final_tag)
280 # ------------------------------------------
281 # HTMLParser other callbacks
282 # ------------------------------------------
284 def handle_data(self, data: str) -> None:
285 ref = self.placeholders.remove_placeholders(data)
286 text = TText(ref)
287 self.append_child(text)
289 def handle_comment(self, data: str) -> None:
290 ref = self.placeholders.remove_placeholders(data)
291 comment = TComment(ref)
292 self.append_child(comment)
294 def handle_decl(self, decl: str) -> None:
295 ref = self.placeholders.remove_placeholders(decl)
296 if not ref.is_literal:
297 raise ValueError("Interpolations are not allowed in declarations.")
298 if not decl.upper().startswith("DOCTYPE"):
299 raise NotImplementedError(
300 "Only DOCTYPE declarations are currently supported."
301 )
302 doctype_content = decl[7:].strip()
303 doctype = TDocumentType(doctype_content)
304 self.append_child(doctype)
306 def reset(self):
307 super().reset()
308 self.root = OpenTFragment()
309 self.stack = []
310 self.placeholders = PlaceholderState()
311 self.source = None
313 def close(self) -> None:
314 if self.stack:
315 raise ValueError("Invalid HTML structure: unclosed tags remain.")
316 if not self.placeholders.is_empty:
317 raise ValueError("Some placeholders were never resolved.")
318 super().close()
320 # ------------------------------------------
321 # Getting the parsed node tree
322 # ------------------------------------------
324 def get_tnode(self) -> TNode:
325 """Get the Node tree parsed from the input HTML."""
326 # TODO: consider always returning a TTag?
327 if len(self.root.children) > 1:
328 # The parse structure results in multiple root elements, so we
329 # return a Fragment to hold them all.
330 return TFragment(children=tuple(self.root.children))
331 elif len(self.root.children) == 1:
332 # The parse structure results in a single root element, so we
333 # return that element directly. This will be a non-Fragment Node.
334 return self.root.children[0]
335 else:
336 # Special case: the parse structure is empty; we treat
337 # this as an empty document fragment.
338 # CONSIDER: or as an empty text node?
339 return TFragment(children=tuple())
341 # ------------------------------------------
342 # Feeding and parsing
343 # ------------------------------------------
345 def feed_str(self, s: str) -> None:
346 """Feed a string part of a Template to the parser."""
347 # TODO: add tracking for this, or maybe just deprecate it?
348 s = s.replace("<>", f"<{FRAGMENT_TAG}>").replace("</>", f"</{FRAGMENT_TAG}>")
349 self.feed(s)
351 def feed_interpolation(self, index: int) -> None:
352 placeholder = self.placeholders.add_placeholder(index)
353 self.feed(placeholder)
355 def feed_template(self, template: Template) -> None:
356 """Feed a Template's content to the parser."""
357 assert self.source is None, "Did you forget to call reset?"
358 self.source = SourceTracker(template)
359 for i_index in range(len(template.interpolations)):
360 self.feed_str(template.strings[i_index])
361 self.source.advance_interpolation()
362 self.feed_interpolation(i_index)
363 self.feed_str(template.strings[-1])
365 @staticmethod
366 def parse(t: Template) -> TNode:
367 """
368 Parse a Template containing valid HTML and substitutions and return
369 a TNode tree representing its structure. This cachable structure can later
370 be resolved against actual interpolation values to produce a Node tree.
371 """
372 parser = TemplateParser()
373 parser.feed_template(t)
374 parser.close()
375 return parser.get_tnode()