Coverage for tdom/processor.py: 100%

220 statements  

« prev     ^ index     » next       coverage.py v7.10.7, created at 2025-10-31 17:14 +0000

1import random 

2import re 

3import string 

4import sys 

5import typing as t 

6from collections.abc import Iterable 

7from dataclasses import dataclass 

8from functools import lru_cache 

9from string.templatelib import Interpolation, Template 

10 

11from markupsafe import Markup 

12 

13from .callables import CallableInfo, get_callable_info 

14from .classnames import classnames 

15from .nodes import Element, Fragment, Node, Text 

16from .parser import parse_html 

17from .utils import format_interpolation as base_format_interpolation 

18 

19 

20@t.runtime_checkable 

21class HasHTMLDunder(t.Protocol): 

22 def __html__(self) -> str: ... # pragma: no cover 

23 

24 

25# -------------------------------------------------------------------------- 

26# Value formatting 

27# -------------------------------------------------------------------------- 

28 

29 

30def _format_safe(value: object, format_spec: str) -> str: 

31 """Use Markup() to mark a value as safe HTML.""" 

32 assert format_spec == "safe" 

33 return Markup(value) 

34 

35 

36def _format_unsafe(value: object, format_spec: str) -> str: 

37 """Convert a value to a plain string, forcing it to be treated as unsafe.""" 

38 assert format_spec == "unsafe" 

39 return str(value) 

40 

41 

42CUSTOM_FORMATTERS = (("safe", _format_safe), ("unsafe", _format_unsafe)) 

43 

44 

45def format_interpolation(interpolation: Interpolation) -> object: 

46 return base_format_interpolation( 

47 interpolation, 

48 formatters=CUSTOM_FORMATTERS, 

49 ) 

50 

51 

52# -------------------------------------------------------------------------- 

53# Instrumentation, Parsing, and Caching 

54# -------------------------------------------------------------------------- 

55 

56_PLACEHOLDER_PREFIX = f"t🐍{''.join(random.choices(string.ascii_lowercase, k=2))}-" 

57_PLACEHOLDER_SUFFIX = f"-{''.join(random.choices(string.ascii_lowercase, k=2))}🐍t" 

58_PLACEHOLDER_PATTERN = re.compile( 

59 re.escape(_PLACEHOLDER_PREFIX) + r"(\d+)" + re.escape(_PLACEHOLDER_SUFFIX) 

60) 

61 

62 

63def _placeholder(i: int) -> str: 

64 """Generate a placeholder for the i-th interpolation.""" 

65 return f"{_PLACEHOLDER_PREFIX}{i}{_PLACEHOLDER_SUFFIX}" 

66 

67 

68@dataclass(frozen=True, slots=True) 

69class _PlaceholderMatch: 

70 start: int 

71 end: int 

72 index: int | None 

73 

74 

75def _find_placeholder(s: str) -> int | None: 

76 """ 

77 If the string is exactly one placeholder, return its index. Otherwise, None. 

78 """ 

79 match = _PLACEHOLDER_PATTERN.fullmatch(s) 

80 return int(match.group(1)) if match else None 

81 

82 

83def _find_all_placeholders(s: str) -> t.Iterable[_PlaceholderMatch]: 

84 """ 

85 Find all placeholders in a string, returning their positions and indices. 

86 

87 If there is non-placeholder text in the string, its position is also 

88 returned with index None. 

89 """ 

90 matches = list(_PLACEHOLDER_PATTERN.finditer(s)) 

91 last_end = 0 

92 for match in matches: 

93 if match.start() > last_end: 

94 yield _PlaceholderMatch(last_end, match.start(), None) 

95 index = int(match.group(1)) 

96 yield _PlaceholderMatch(match.start(), match.end(), index) 

97 last_end = match.end() 

98 if last_end < len(s): 

99 yield _PlaceholderMatch(last_end, len(s), None) 

100 

101 

102def _replace_placeholders( 

103 value: str, interpolations: tuple[Interpolation, ...] 

104) -> tuple[bool, object]: 

105 """ 

106 Replace any placeholders embedded within a string attribute value. 

107 

108 If there are no placeholders, return False and the original string. 

109 

110 If there is exactly one placeholder and nothing else, return True and the 

111 corresponding interpolation value. 

112 

113 If there are multiple placeholders or surrounding text, return True and 

114 a concatenated string with all placeholders replaced and interpolations 

115 formatted and converted to strings. 

116 """ 

117 matches = tuple(_find_all_placeholders(value)) 

118 

119 # Case 1: No placeholders found 

120 if len(matches) == 1 and matches[0].index is None: 

121 return False, value 

122 

123 # Case 2: Single placeholder and no surrounding text 

124 if len(matches) == 1 and matches[0].index is not None: 

125 index = matches[0].index 

126 formatted = format_interpolation(interpolations[index]) 

127 return True, formatted 

128 

129 # Case 3: Multiple placeholders or surrounding text 

130 parts = [ 

131 value[match.start : match.end] 

132 if match.index is None 

133 else str(format_interpolation(interpolations[match.index])) 

134 for match in matches 

135 ] 

136 return True, "".join(parts) 

137 

138 

139def _instrument( 

140 strings: tuple[str, ...], callable_infos: tuple[CallableInfo | None, ...] 

141) -> t.Iterable[str]: 

142 """ 

143 Join the strings with placeholders in between where interpolations go. 

144 

145 This is used to prepare the template string for parsing, so that we can 

146 later substitute the actual interpolated values into the parse tree. 

147 

148 The placeholders are chosen to be unlikely to collide with typical HTML 

149 content. 

150 """ 

151 count = len(strings) 

152 

153 callable_placeholders: dict[int, str] = {} 

154 

155 for i, s in enumerate(strings): 

156 yield s 

157 # There are always count-1 placeholders between count strings. 

158 if i < count - 1: 

159 placeholder = _placeholder(i) 

160 

161 # Special case for component callables: if the interpolation 

162 # is a callable, we need to make sure that any matching closing 

163 # tag uses the same placeholder. 

164 callable_info = callable_infos[i] 

165 if callable_info: 

166 placeholder = callable_placeholders.setdefault( 

167 callable_info.id, placeholder 

168 ) 

169 

170 yield placeholder 

171 

172 

173@lru_cache(maxsize=0 if "pytest" in sys.modules else 512) 

174def _instrument_and_parse_internal( 

175 strings: tuple[str, ...], callable_infos: tuple[CallableInfo | None, ...] 

176) -> Node: 

177 """ 

178 Instrument the strings and parse the resulting HTML. 

179 

180 The result is cached to avoid re-parsing the same template multiple times. 

181 """ 

182 instrumented = _instrument(strings, callable_infos) 

183 return parse_html(instrumented) 

184 

185 

186def _callable_info(value: object) -> CallableInfo | None: 

187 """Return a unique identifier for a callable, or None if not callable.""" 

188 return get_callable_info(value) if callable(value) else None 

189 

190 

191def _instrument_and_parse(template: Template) -> Node: 

192 """Instrument and parse a template, returning a tree of Nodes.""" 

193 # This is a thin wrapper around the cached internal function that does the 

194 # actual work. This exists to handle the syntax we've settled on for 

195 # component invocation, namely that callables are directly included as 

196 # interpolations both in the open *and* the close tags. We need to make 

197 # sure that matching tags... match! 

198 # 

199 # If we used `tdom`'s approach of component closing tags of <//> then we 

200 # wouldn't have to do this. But I worry that tdom's syntax is harder to read 

201 # (it's easy to miss the closing tag) and may prove unfamiliar for 

202 # users coming from other templating systems. 

203 callable_infos = tuple( 

204 _callable_info(interpolation.value) for interpolation in template.interpolations 

205 ) 

206 return _instrument_and_parse_internal(template.strings, callable_infos) 

207 

208 

209# -------------------------------------------------------------------------- 

210# Placeholder Substitution 

211# -------------------------------------------------------------------------- 

212 

213 

214def _force_dict(value: t.Any, *, kind: str) -> dict: 

215 """Try to convert a value to a dict, raising TypeError if not possible.""" 

216 try: 

217 return dict(value) 

218 except (TypeError, ValueError): 

219 raise TypeError( 

220 f"Cannot use {type(value).__name__} as value for {kind} attributes" 

221 ) from None 

222 

223 

224def _process_aria_attr(value: object) -> t.Iterable[tuple[str, str | None]]: 

225 """Produce aria-* attributes based on the interpolated value for "aria".""" 

226 d = _force_dict(value, kind="aria") 

227 for sub_k, sub_v in d.items(): 

228 if sub_v is True: 

229 yield f"aria-{sub_k}", "true" 

230 elif sub_v is False: 

231 yield f"aria-{sub_k}", "false" 

232 elif sub_v is None: 

233 pass 

234 else: 

235 yield f"aria-{sub_k}", str(sub_v) 

236 

237 

238def _process_data_attr(value: object) -> t.Iterable[tuple[str, str | None]]: 

239 """Produce data-* attributes based on the interpolated value for "data".""" 

240 d = _force_dict(value, kind="data") 

241 for sub_k, sub_v in d.items(): 

242 if sub_v is True: 

243 yield f"data-{sub_k}", None 

244 elif sub_v not in (False, None): 

245 yield f"data-{sub_k}", str(sub_v) 

246 

247 

248def _process_class_attr(value: object) -> t.Iterable[tuple[str, str | None]]: 

249 """Substitute a class attribute based on the interpolated value.""" 

250 yield ("class", classnames(value)) 

251 

252 

253def _process_style_attr(value: object) -> t.Iterable[tuple[str, str | None]]: 

254 """Substitute a style attribute based on the interpolated value.""" 

255 if isinstance(value, str): 

256 yield ("style", value) 

257 return 

258 try: 

259 d = _force_dict(value, kind="style") 

260 style_str = "; ".join(f"{k}: {v}" for k, v in d.items()) 

261 yield ("style", style_str) 

262 except TypeError: 

263 raise TypeError("'style' attribute value must be a string or dict") from None 

264 

265 

266def _substitute_spread_attrs( 

267 value: object, 

268) -> t.Iterable[tuple[str, object | None]]: 

269 """ 

270 Substitute a spread attribute based on the interpolated value. 

271 

272 A spread attribute is one where the key is a placeholder, indicating that 

273 the entire attribute set should be replaced by the interpolated value. 

274 The value must be a dict or iterable of key-value pairs. 

275 """ 

276 d = _force_dict(value, kind="spread") 

277 for sub_k, sub_v in d.items(): 

278 yield from _process_attr(sub_k, sub_v) 

279 

280 

281# A collection of custom handlers for certain attribute names that have 

282# special semantics. This is in addition to the special-casing in 

283# _substitute_attr() itself. 

284CUSTOM_ATTR_PROCESSORS = { 

285 "class": _process_class_attr, 

286 "data": _process_data_attr, 

287 "style": _process_style_attr, 

288 "aria": _process_aria_attr, 

289} 

290 

291 

292def _process_attr( 

293 key: str, 

294 value: object, 

295) -> t.Iterable[tuple[str, object | None]]: 

296 """ 

297 Substitute a single attribute based on its key and the interpolated value. 

298 

299 A single parsed attribute with a placeholder may result in multiple 

300 attributes in the final output, for instance if the value is a dict or 

301 iterable of key-value pairs. Likewise, a value of False will result in 

302 the attribute being omitted entirely; nothing is yielded in that case. 

303 """ 

304 # Special handling for certain attribute names that have special semantics 

305 if custom_processor := CUSTOM_ATTR_PROCESSORS.get(key): 

306 yield from custom_processor(value) 

307 return 

308 

309 # General handling for all other attributes: 

310 match value: 

311 case True: 

312 yield (key, None) 

313 case False | None: 

314 pass 

315 case _: 

316 yield (key, value) 

317 

318 

319def _substitute_interpolated_attrs( 

320 attrs: dict[str, str | None], interpolations: tuple[Interpolation, ...] 

321) -> dict[str, object | None]: 

322 """ 

323 Replace placeholder values in attributes with their interpolated values. 

324 

325 This only handles step (1): value substitution. No special processing 

326 of attribute names or value types is performed. 

327 """ 

328 new_attrs: dict[str, object | None] = {} 

329 for key, value in attrs.items(): 

330 if value is not None: 

331 has_placeholders, new_value = _replace_placeholders(value, interpolations) 

332 if has_placeholders: 

333 new_attrs[key] = new_value 

334 continue 

335 

336 if (index := _find_placeholder(key)) is not None: 

337 # Spread attributes 

338 interpolation = interpolations[index] 

339 spread_value = format_interpolation(interpolation) 

340 for sub_k, sub_v in _substitute_spread_attrs(spread_value): 

341 new_attrs[sub_k] = sub_v 

342 else: 

343 # Static attribute 

344 new_attrs[key] = value 

345 return new_attrs 

346 

347 

348def _process_html_attrs(attrs: dict[str, object]) -> dict[str, str | None]: 

349 """ 

350 Process attributes for HTML elements. 

351 

352 This handles steps (2) and (3): special attribute name handling and 

353 value type processing (True -> None, False -> omit, etc.) 

354 """ 

355 processed_attrs: dict[str, str | None] = {} 

356 for key, value in attrs.items(): 

357 for sub_k, sub_v in _process_attr(key, value): 

358 # Convert to string, preserving None 

359 processed_attrs[sub_k] = str(sub_v) if sub_v is not None else None 

360 return processed_attrs 

361 

362 

363def _substitute_attrs( 

364 attrs: dict[str, str | None], interpolations: tuple[Interpolation, ...] 

365) -> dict[str, str | None]: 

366 """ 

367 Substitute placeholders in attributes for HTML elements. 

368 

369 This is the full pipeline: interpolation + HTML processing. 

370 """ 

371 interpolated_attrs = _substitute_interpolated_attrs(attrs, interpolations) 

372 return _process_html_attrs(interpolated_attrs) 

373 

374 

375def _substitute_and_flatten_children( 

376 children: t.Iterable[Node], interpolations: tuple[Interpolation, ...] 

377) -> list[Node]: 

378 """Substitute placeholders in a list of children and flatten any fragments.""" 

379 new_children: list[Node] = [] 

380 for child in children: 

381 substituted = _substitute_node(child, interpolations) 

382 if isinstance(substituted, Fragment): 

383 # This can happen if an interpolation results in a Fragment, for 

384 # instance if it is iterable. 

385 new_children.extend(substituted.children) 

386 else: 

387 new_children.append(substituted) 

388 return new_children 

389 

390 

391def _node_from_value(value: object) -> Node: 

392 """ 

393 Convert an arbitrary value to a Node. 

394 

395 This is the primary action performed when replacing interpolations in child 

396 content positions. 

397 """ 

398 match value: 

399 case str(): 

400 return Text(value) 

401 case Node(): 

402 return value 

403 case Template(): 

404 return html(value) 

405 # Consider: falsey values, not just False and None? 

406 case False | None: 

407 return Fragment(children=[]) 

408 case Iterable(): 

409 children = [_node_from_value(v) for v in value] 

410 return Fragment(children=children) 

411 case HasHTMLDunder(): 

412 # CONSIDER: should we do this lazily? 

413 return Text(Markup(value.__html__())) 

414 case c if callable(c): 

415 # Treat all callable values in child content positions as if 

416 # they are zero-arg functions that return a value to be rendered. 

417 return _node_from_value(c()) 

418 case _: 

419 # CONSIDER: should we do this lazily? 

420 return Text(str(value)) 

421 

422 

423def _kebab_to_snake(name: str) -> str: 

424 """Convert a kebab-case name to snake_case.""" 

425 return name.replace("-", "_").lower() 

426 

427 

428def _invoke_component( 

429 new_attrs: dict[str, object | None], 

430 new_children: list[Node], 

431 interpolation: Interpolation, 

432) -> Node: 

433 """ 

434 Invoke a component callable with the provided attributes and children. 

435 

436 Components are any callable that meets the required calling signature. 

437 Typically, that's a function, but it could also be the constructor or 

438 __call__() method for a class; dataclass constructors match our expected 

439 invocation style. 

440 

441 We validate the callable's signature and invoke it with keyword-only 

442 arguments, then convert the result to a Node. 

443 

444 Component invocation rules: 

445 

446 1. All arguments are passed as keywords only. Components cannot require 

447 positional arguments. 

448 

449 2. Children are passed via a "children" parameter when: 

450 

451 - Child content exists in the template AND 

452 - The callable accepts "children" OR has **kwargs 

453 

454 If no children exist but the callable accepts "children", we pass an 

455 empty tuple. 

456 

457 3. All other attributes are converted from kebab-case to snake_case 

458 and passed as keyword arguments if the callable accepts them (or has 

459 **kwargs). Attributes that don't match parameters are silently ignored. 

460 """ 

461 value = format_interpolation(interpolation) 

462 if not callable(value): 

463 raise TypeError( 

464 f"Expected a callable for component invocation, got {type(value).__name__}" 

465 ) 

466 callable_info = get_callable_info(value) 

467 

468 if callable_info.requires_positional: 

469 raise TypeError( 

470 "Component callables cannot have required positional arguments." 

471 ) 

472 

473 kwargs: dict[str, object] = {} 

474 

475 # Add all supported attributes 

476 for attr_name, attr_value in new_attrs.items(): 

477 snake_name = _kebab_to_snake(attr_name) 

478 if snake_name in callable_info.named_params or callable_info.kwargs: 

479 kwargs[snake_name] = attr_value 

480 

481 # Add children if appropriate 

482 if "children" in callable_info.named_params or callable_info.kwargs: 

483 kwargs["children"] = tuple(new_children) 

484 

485 # Check to make sure we've fully satisfied the callable's requirements 

486 missing = callable_info.required_named_params - kwargs.keys() 

487 if missing: 

488 raise TypeError( 

489 f"Missing required parameters for component: {', '.join(missing)}" 

490 ) 

491 

492 result = value(**kwargs) 

493 return _node_from_value(result) 

494 

495 

496def _substitute_node(p_node: Node, interpolations: tuple[Interpolation, ...]) -> Node: 

497 """Substitute placeholders in a node based on the corresponding interpolations.""" 

498 match p_node: 

499 case Text(text) if (index := _find_placeholder(text)) is not None: 

500 interpolation = interpolations[index] 

501 value = format_interpolation(interpolation) 

502 return _node_from_value(value) 

503 case Element(tag=tag, attrs=attrs, children=children): 

504 new_children = _substitute_and_flatten_children(children, interpolations) 

505 if (index := _find_placeholder(tag)) is not None: 

506 component_attrs = _substitute_interpolated_attrs(attrs, interpolations) 

507 return _invoke_component( 

508 component_attrs, new_children, interpolations[index] 

509 ) 

510 else: 

511 html_attrs = _substitute_attrs(attrs, interpolations) 

512 return Element(tag=tag, attrs=html_attrs, children=new_children) 

513 case Fragment(children=children): 

514 new_children = _substitute_and_flatten_children(children, interpolations) 

515 return Fragment(children=new_children) 

516 case _: 

517 return p_node 

518 

519 

520# -------------------------------------------------------------------------- 

521# Public API 

522# -------------------------------------------------------------------------- 

523 

524 

525def html(template: Template) -> Node: 

526 """Parse a t-string and return a tree of Nodes.""" 

527 # Parse the HTML, returning a tree of nodes with placeholders 

528 # where interpolations go. 

529 p_node = _instrument_and_parse(template) 

530 return _substitute_node(p_node, template.interpolations)