Coverage for tdom/processor.py: 100%
220 statements
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-31 17:14 +0000
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-31 17:14 +0000
1import random
2import re
3import string
4import sys
5import typing as t
6from collections.abc import Iterable
7from dataclasses import dataclass
8from functools import lru_cache
9from string.templatelib import Interpolation, Template
11from markupsafe import Markup
13from .callables import CallableInfo, get_callable_info
14from .classnames import classnames
15from .nodes import Element, Fragment, Node, Text
16from .parser import parse_html
17from .utils import format_interpolation as base_format_interpolation
20@t.runtime_checkable
21class HasHTMLDunder(t.Protocol):
22 def __html__(self) -> str: ... # pragma: no cover
25# --------------------------------------------------------------------------
26# Value formatting
27# --------------------------------------------------------------------------
30def _format_safe(value: object, format_spec: str) -> str:
31 """Use Markup() to mark a value as safe HTML."""
32 assert format_spec == "safe"
33 return Markup(value)
36def _format_unsafe(value: object, format_spec: str) -> str:
37 """Convert a value to a plain string, forcing it to be treated as unsafe."""
38 assert format_spec == "unsafe"
39 return str(value)
42CUSTOM_FORMATTERS = (("safe", _format_safe), ("unsafe", _format_unsafe))
45def format_interpolation(interpolation: Interpolation) -> object:
46 return base_format_interpolation(
47 interpolation,
48 formatters=CUSTOM_FORMATTERS,
49 )
52# --------------------------------------------------------------------------
53# Instrumentation, Parsing, and Caching
54# --------------------------------------------------------------------------
56_PLACEHOLDER_PREFIX = f"t🐍{''.join(random.choices(string.ascii_lowercase, k=2))}-"
57_PLACEHOLDER_SUFFIX = f"-{''.join(random.choices(string.ascii_lowercase, k=2))}🐍t"
58_PLACEHOLDER_PATTERN = re.compile(
59 re.escape(_PLACEHOLDER_PREFIX) + r"(\d+)" + re.escape(_PLACEHOLDER_SUFFIX)
60)
63def _placeholder(i: int) -> str:
64 """Generate a placeholder for the i-th interpolation."""
65 return f"{_PLACEHOLDER_PREFIX}{i}{_PLACEHOLDER_SUFFIX}"
68@dataclass(frozen=True, slots=True)
69class _PlaceholderMatch:
70 start: int
71 end: int
72 index: int | None
75def _find_placeholder(s: str) -> int | None:
76 """
77 If the string is exactly one placeholder, return its index. Otherwise, None.
78 """
79 match = _PLACEHOLDER_PATTERN.fullmatch(s)
80 return int(match.group(1)) if match else None
83def _find_all_placeholders(s: str) -> t.Iterable[_PlaceholderMatch]:
84 """
85 Find all placeholders in a string, returning their positions and indices.
87 If there is non-placeholder text in the string, its position is also
88 returned with index None.
89 """
90 matches = list(_PLACEHOLDER_PATTERN.finditer(s))
91 last_end = 0
92 for match in matches:
93 if match.start() > last_end:
94 yield _PlaceholderMatch(last_end, match.start(), None)
95 index = int(match.group(1))
96 yield _PlaceholderMatch(match.start(), match.end(), index)
97 last_end = match.end()
98 if last_end < len(s):
99 yield _PlaceholderMatch(last_end, len(s), None)
102def _replace_placeholders(
103 value: str, interpolations: tuple[Interpolation, ...]
104) -> tuple[bool, object]:
105 """
106 Replace any placeholders embedded within a string attribute value.
108 If there are no placeholders, return False and the original string.
110 If there is exactly one placeholder and nothing else, return True and the
111 corresponding interpolation value.
113 If there are multiple placeholders or surrounding text, return True and
114 a concatenated string with all placeholders replaced and interpolations
115 formatted and converted to strings.
116 """
117 matches = tuple(_find_all_placeholders(value))
119 # Case 1: No placeholders found
120 if len(matches) == 1 and matches[0].index is None:
121 return False, value
123 # Case 2: Single placeholder and no surrounding text
124 if len(matches) == 1 and matches[0].index is not None:
125 index = matches[0].index
126 formatted = format_interpolation(interpolations[index])
127 return True, formatted
129 # Case 3: Multiple placeholders or surrounding text
130 parts = [
131 value[match.start : match.end]
132 if match.index is None
133 else str(format_interpolation(interpolations[match.index]))
134 for match in matches
135 ]
136 return True, "".join(parts)
139def _instrument(
140 strings: tuple[str, ...], callable_infos: tuple[CallableInfo | None, ...]
141) -> t.Iterable[str]:
142 """
143 Join the strings with placeholders in between where interpolations go.
145 This is used to prepare the template string for parsing, so that we can
146 later substitute the actual interpolated values into the parse tree.
148 The placeholders are chosen to be unlikely to collide with typical HTML
149 content.
150 """
151 count = len(strings)
153 callable_placeholders: dict[int, str] = {}
155 for i, s in enumerate(strings):
156 yield s
157 # There are always count-1 placeholders between count strings.
158 if i < count - 1:
159 placeholder = _placeholder(i)
161 # Special case for component callables: if the interpolation
162 # is a callable, we need to make sure that any matching closing
163 # tag uses the same placeholder.
164 callable_info = callable_infos[i]
165 if callable_info:
166 placeholder = callable_placeholders.setdefault(
167 callable_info.id, placeholder
168 )
170 yield placeholder
173@lru_cache(maxsize=0 if "pytest" in sys.modules else 512)
174def _instrument_and_parse_internal(
175 strings: tuple[str, ...], callable_infos: tuple[CallableInfo | None, ...]
176) -> Node:
177 """
178 Instrument the strings and parse the resulting HTML.
180 The result is cached to avoid re-parsing the same template multiple times.
181 """
182 instrumented = _instrument(strings, callable_infos)
183 return parse_html(instrumented)
186def _callable_info(value: object) -> CallableInfo | None:
187 """Return a unique identifier for a callable, or None if not callable."""
188 return get_callable_info(value) if callable(value) else None
191def _instrument_and_parse(template: Template) -> Node:
192 """Instrument and parse a template, returning a tree of Nodes."""
193 # This is a thin wrapper around the cached internal function that does the
194 # actual work. This exists to handle the syntax we've settled on for
195 # component invocation, namely that callables are directly included as
196 # interpolations both in the open *and* the close tags. We need to make
197 # sure that matching tags... match!
198 #
199 # If we used `tdom`'s approach of component closing tags of <//> then we
200 # wouldn't have to do this. But I worry that tdom's syntax is harder to read
201 # (it's easy to miss the closing tag) and may prove unfamiliar for
202 # users coming from other templating systems.
203 callable_infos = tuple(
204 _callable_info(interpolation.value) for interpolation in template.interpolations
205 )
206 return _instrument_and_parse_internal(template.strings, callable_infos)
209# --------------------------------------------------------------------------
210# Placeholder Substitution
211# --------------------------------------------------------------------------
214def _force_dict(value: t.Any, *, kind: str) -> dict:
215 """Try to convert a value to a dict, raising TypeError if not possible."""
216 try:
217 return dict(value)
218 except (TypeError, ValueError):
219 raise TypeError(
220 f"Cannot use {type(value).__name__} as value for {kind} attributes"
221 ) from None
224def _process_aria_attr(value: object) -> t.Iterable[tuple[str, str | None]]:
225 """Produce aria-* attributes based on the interpolated value for "aria"."""
226 d = _force_dict(value, kind="aria")
227 for sub_k, sub_v in d.items():
228 if sub_v is True:
229 yield f"aria-{sub_k}", "true"
230 elif sub_v is False:
231 yield f"aria-{sub_k}", "false"
232 elif sub_v is None:
233 pass
234 else:
235 yield f"aria-{sub_k}", str(sub_v)
238def _process_data_attr(value: object) -> t.Iterable[tuple[str, str | None]]:
239 """Produce data-* attributes based on the interpolated value for "data"."""
240 d = _force_dict(value, kind="data")
241 for sub_k, sub_v in d.items():
242 if sub_v is True:
243 yield f"data-{sub_k}", None
244 elif sub_v not in (False, None):
245 yield f"data-{sub_k}", str(sub_v)
248def _process_class_attr(value: object) -> t.Iterable[tuple[str, str | None]]:
249 """Substitute a class attribute based on the interpolated value."""
250 yield ("class", classnames(value))
253def _process_style_attr(value: object) -> t.Iterable[tuple[str, str | None]]:
254 """Substitute a style attribute based on the interpolated value."""
255 if isinstance(value, str):
256 yield ("style", value)
257 return
258 try:
259 d = _force_dict(value, kind="style")
260 style_str = "; ".join(f"{k}: {v}" for k, v in d.items())
261 yield ("style", style_str)
262 except TypeError:
263 raise TypeError("'style' attribute value must be a string or dict") from None
266def _substitute_spread_attrs(
267 value: object,
268) -> t.Iterable[tuple[str, object | None]]:
269 """
270 Substitute a spread attribute based on the interpolated value.
272 A spread attribute is one where the key is a placeholder, indicating that
273 the entire attribute set should be replaced by the interpolated value.
274 The value must be a dict or iterable of key-value pairs.
275 """
276 d = _force_dict(value, kind="spread")
277 for sub_k, sub_v in d.items():
278 yield from _process_attr(sub_k, sub_v)
281# A collection of custom handlers for certain attribute names that have
282# special semantics. This is in addition to the special-casing in
283# _substitute_attr() itself.
284CUSTOM_ATTR_PROCESSORS = {
285 "class": _process_class_attr,
286 "data": _process_data_attr,
287 "style": _process_style_attr,
288 "aria": _process_aria_attr,
289}
292def _process_attr(
293 key: str,
294 value: object,
295) -> t.Iterable[tuple[str, object | None]]:
296 """
297 Substitute a single attribute based on its key and the interpolated value.
299 A single parsed attribute with a placeholder may result in multiple
300 attributes in the final output, for instance if the value is a dict or
301 iterable of key-value pairs. Likewise, a value of False will result in
302 the attribute being omitted entirely; nothing is yielded in that case.
303 """
304 # Special handling for certain attribute names that have special semantics
305 if custom_processor := CUSTOM_ATTR_PROCESSORS.get(key):
306 yield from custom_processor(value)
307 return
309 # General handling for all other attributes:
310 match value:
311 case True:
312 yield (key, None)
313 case False | None:
314 pass
315 case _:
316 yield (key, value)
319def _substitute_interpolated_attrs(
320 attrs: dict[str, str | None], interpolations: tuple[Interpolation, ...]
321) -> dict[str, object | None]:
322 """
323 Replace placeholder values in attributes with their interpolated values.
325 This only handles step (1): value substitution. No special processing
326 of attribute names or value types is performed.
327 """
328 new_attrs: dict[str, object | None] = {}
329 for key, value in attrs.items():
330 if value is not None:
331 has_placeholders, new_value = _replace_placeholders(value, interpolations)
332 if has_placeholders:
333 new_attrs[key] = new_value
334 continue
336 if (index := _find_placeholder(key)) is not None:
337 # Spread attributes
338 interpolation = interpolations[index]
339 spread_value = format_interpolation(interpolation)
340 for sub_k, sub_v in _substitute_spread_attrs(spread_value):
341 new_attrs[sub_k] = sub_v
342 else:
343 # Static attribute
344 new_attrs[key] = value
345 return new_attrs
348def _process_html_attrs(attrs: dict[str, object]) -> dict[str, str | None]:
349 """
350 Process attributes for HTML elements.
352 This handles steps (2) and (3): special attribute name handling and
353 value type processing (True -> None, False -> omit, etc.)
354 """
355 processed_attrs: dict[str, str | None] = {}
356 for key, value in attrs.items():
357 for sub_k, sub_v in _process_attr(key, value):
358 # Convert to string, preserving None
359 processed_attrs[sub_k] = str(sub_v) if sub_v is not None else None
360 return processed_attrs
363def _substitute_attrs(
364 attrs: dict[str, str | None], interpolations: tuple[Interpolation, ...]
365) -> dict[str, str | None]:
366 """
367 Substitute placeholders in attributes for HTML elements.
369 This is the full pipeline: interpolation + HTML processing.
370 """
371 interpolated_attrs = _substitute_interpolated_attrs(attrs, interpolations)
372 return _process_html_attrs(interpolated_attrs)
375def _substitute_and_flatten_children(
376 children: t.Iterable[Node], interpolations: tuple[Interpolation, ...]
377) -> list[Node]:
378 """Substitute placeholders in a list of children and flatten any fragments."""
379 new_children: list[Node] = []
380 for child in children:
381 substituted = _substitute_node(child, interpolations)
382 if isinstance(substituted, Fragment):
383 # This can happen if an interpolation results in a Fragment, for
384 # instance if it is iterable.
385 new_children.extend(substituted.children)
386 else:
387 new_children.append(substituted)
388 return new_children
391def _node_from_value(value: object) -> Node:
392 """
393 Convert an arbitrary value to a Node.
395 This is the primary action performed when replacing interpolations in child
396 content positions.
397 """
398 match value:
399 case str():
400 return Text(value)
401 case Node():
402 return value
403 case Template():
404 return html(value)
405 # Consider: falsey values, not just False and None?
406 case False | None:
407 return Fragment(children=[])
408 case Iterable():
409 children = [_node_from_value(v) for v in value]
410 return Fragment(children=children)
411 case HasHTMLDunder():
412 # CONSIDER: should we do this lazily?
413 return Text(Markup(value.__html__()))
414 case c if callable(c):
415 # Treat all callable values in child content positions as if
416 # they are zero-arg functions that return a value to be rendered.
417 return _node_from_value(c())
418 case _:
419 # CONSIDER: should we do this lazily?
420 return Text(str(value))
423def _kebab_to_snake(name: str) -> str:
424 """Convert a kebab-case name to snake_case."""
425 return name.replace("-", "_").lower()
428def _invoke_component(
429 new_attrs: dict[str, object | None],
430 new_children: list[Node],
431 interpolation: Interpolation,
432) -> Node:
433 """
434 Invoke a component callable with the provided attributes and children.
436 Components are any callable that meets the required calling signature.
437 Typically, that's a function, but it could also be the constructor or
438 __call__() method for a class; dataclass constructors match our expected
439 invocation style.
441 We validate the callable's signature and invoke it with keyword-only
442 arguments, then convert the result to a Node.
444 Component invocation rules:
446 1. All arguments are passed as keywords only. Components cannot require
447 positional arguments.
449 2. Children are passed via a "children" parameter when:
451 - Child content exists in the template AND
452 - The callable accepts "children" OR has **kwargs
454 If no children exist but the callable accepts "children", we pass an
455 empty tuple.
457 3. All other attributes are converted from kebab-case to snake_case
458 and passed as keyword arguments if the callable accepts them (or has
459 **kwargs). Attributes that don't match parameters are silently ignored.
460 """
461 value = format_interpolation(interpolation)
462 if not callable(value):
463 raise TypeError(
464 f"Expected a callable for component invocation, got {type(value).__name__}"
465 )
466 callable_info = get_callable_info(value)
468 if callable_info.requires_positional:
469 raise TypeError(
470 "Component callables cannot have required positional arguments."
471 )
473 kwargs: dict[str, object] = {}
475 # Add all supported attributes
476 for attr_name, attr_value in new_attrs.items():
477 snake_name = _kebab_to_snake(attr_name)
478 if snake_name in callable_info.named_params or callable_info.kwargs:
479 kwargs[snake_name] = attr_value
481 # Add children if appropriate
482 if "children" in callable_info.named_params or callable_info.kwargs:
483 kwargs["children"] = tuple(new_children)
485 # Check to make sure we've fully satisfied the callable's requirements
486 missing = callable_info.required_named_params - kwargs.keys()
487 if missing:
488 raise TypeError(
489 f"Missing required parameters for component: {', '.join(missing)}"
490 )
492 result = value(**kwargs)
493 return _node_from_value(result)
496def _substitute_node(p_node: Node, interpolations: tuple[Interpolation, ...]) -> Node:
497 """Substitute placeholders in a node based on the corresponding interpolations."""
498 match p_node:
499 case Text(text) if (index := _find_placeholder(text)) is not None:
500 interpolation = interpolations[index]
501 value = format_interpolation(interpolation)
502 return _node_from_value(value)
503 case Element(tag=tag, attrs=attrs, children=children):
504 new_children = _substitute_and_flatten_children(children, interpolations)
505 if (index := _find_placeholder(tag)) is not None:
506 component_attrs = _substitute_interpolated_attrs(attrs, interpolations)
507 return _invoke_component(
508 component_attrs, new_children, interpolations[index]
509 )
510 else:
511 html_attrs = _substitute_attrs(attrs, interpolations)
512 return Element(tag=tag, attrs=html_attrs, children=new_children)
513 case Fragment(children=children):
514 new_children = _substitute_and_flatten_children(children, interpolations)
515 return Fragment(children=new_children)
516 case _:
517 return p_node
520# --------------------------------------------------------------------------
521# Public API
522# --------------------------------------------------------------------------
525def html(template: Template) -> Node:
526 """Parse a t-string and return a tree of Nodes."""
527 # Parse the HTML, returning a tree of nodes with placeholders
528 # where interpolations go.
529 p_node = _instrument_and_parse(template)
530 return _substitute_node(p_node, template.interpolations)