Coverage for tdom/parser.py: 90%

1import typing as t

2from dataclasses import dataclass, field

3from html.parser import HTMLParser

4from string.templatelib import Interpolation, Template

6from .nodes import VOID_ELEMENTS

7from .placeholders import FRAGMENT_TAG, PlaceholderState

8from .tnodes import (

9 TAttribute,

10 TComment,

11 TComponent,

12 TDocumentType,

13 TElement,

14 TFragment,

15 TInterpolatedAttribute,

16 TLiteralAttribute,

17 TNode,

18 TSpreadAttribute,

19 TTemplatedAttribute,

20 TText,

21)

23type HTMLAttribute = tuple[str, str | None]

24type HTMLAttributesDict = dict[str, str | None]

27@dataclass

28class OpenTElement:

29 tag: str

30 attrs: tuple[TAttribute, ...]

31 children: list[TNode] = field(default_factory=list)

34@dataclass

35class OpenTFragment:

36 children: list[TNode] = field(default_factory=list)

39@dataclass

40class OpenTComponent:

41 # TODO: hold on to start_s_index when we start to need it.

42 start_i_index: int

43 attrs: tuple[TAttribute, ...]

44 children: list[TNode] = field(default_factory=list)

47type OpenTag = OpenTElement | OpenTFragment | OpenTComponent

50@dataclass

51class SourceTracker:

52 """Tracks source locations within a Template for error reporting."""

54 # TODO: write utilities to generate complete error messages, with the

55 # template itself in context and the relevant line/column underlined/etc.

57 template: Template

58 i_index: int = -1 # The current interpolation index.

60 @property

61 def interpolations(self) -> tuple[Interpolation, ...]:

62 return self.template.interpolations

64 @property

65 def s_index(self) -> int:

66 """The current string index."""

67 return self.i_index + 1

69 def advance_interpolation(self) -> int:

70 """Call before processing an interpolation to move to the next one."""

71 self.i_index += 1

72 return self.i_index

74 def get_expression(

75 self, i_index: int, fallback_prefix: str = "interpolation"

76 ) -> str:

77 """

78 Resolve an interpolation index to its original expression for error messages.

79 Falls back to a synthetic expression if the original is empty.

80 """

81 ip = self.interpolations[i_index]

82 return ip.expression if ip.expression else f"{{{fallback_prefix}-{i_index}}}"

84 def get_interpolation_value(self, i_index: int):

85 """Get the runtime value at the given interpolation index."""

86 return self.interpolations[i_index].value

88 def format_starttag(self, i_index: int) -> str:

89 """Format a component start tag for error messages."""

90 return self.get_expression(i_index, fallback_prefix="component-starttag")

92 def format_endtag(self, i_index: int) -> str:

93 """Format a component end tag for error messages."""

94 return self.get_expression(i_index, fallback_prefix="component-endtag")

96 def format_open_tag(self, open_tag: OpenTag) -> str:

97 """Format any open tag for error messages."""

98 match open_tag:

99 case OpenTElement(tag=tag):

100 return tag

101 case OpenTFragment():

102 return ""

103 case OpenTComponent(start_i_index=i_index):

104 return self.format_starttag(i_index)

105

106

107class TemplateParser(HTMLParser):

108 root: OpenTFragment

109 stack: list[OpenTag]

110 placeholders: PlaceholderState

111 source: SourceTracker | None

112

113 def __init__(self, *, convert_charrefs: bool = True):

114 # This calls HTMLParser.reset() which we override to set up our state.

115 super().__init__(convert_charrefs=convert_charrefs)

116

117 # ------------------------------------------

118 # Parse state helpers

119 # ------------------------------------------

120

121 def get_parent(self) -> OpenTag:

122 """Return the current parent node to which new children should be added."""

123 return self.stack[-1] if self.stack else self.root

124

125 def append_child(self, child: TNode) -> None:

126 parent = self.get_parent()

127 parent.children.append(child)

128

129 # ------------------------------------------

130 # Attribute Helpers

131 # ------------------------------------------

132

133 def make_tattr(self, attr: HTMLAttribute) -> TAttribute:

134 """Build a TAttribute from a raw attribute tuple."""

135

136 name, value = attr

137 name_ref = self.placeholders.remove_placeholders(name)

138 value_ref = (

139 self.placeholders.remove_placeholders(value) if value is not None else None

140 )

141

142 if name_ref.is_literal:

143 if value_ref is None or value_ref.is_literal:

144 return TLiteralAttribute(name=name, value=value)

145 elif value_ref.is_singleton:

146 return TInterpolatedAttribute(

147 name=name, value_i_index=value_ref.i_indexes[0]

148 )

149 else:

150 return TTemplatedAttribute(name=name, value_ref=value_ref)

151 if value_ref is not None:

152 raise ValueError(

153 "Attribute names cannot contain interpolations if the value is also interpolated."

154 )

155 if not name_ref.is_singleton:

156 raise ValueError(

157 "Spread attributes must have exactly one interpolation in the name."

158 )

159 return TSpreadAttribute(i_index=name_ref.i_indexes[0])

160

161 def make_tattrs(self, attrs: t.Sequence[HTMLAttribute]) -> tuple[TAttribute, ...]:

162 """Build TAttributes from raw attribute tuples."""

163 return tuple(self.make_tattr(attr) for attr in attrs)

164

165 # ------------------------------------------

166 # Tag Helpers

167 # ------------------------------------------

168

169 def make_open_tag(self, tag: str, attrs: t.Sequence[HTMLAttribute]) -> OpenTag:

170 """Build an OpenTag from a raw tag and attribute tuples."""

171 tag_ref = self.placeholders.remove_placeholders(tag)

172

173 if tag_ref.is_literal:

174 if tag == FRAGMENT_TAG:

175 if attrs:

176 raise ValueError("Fragments cannot have attributes.")

177 return OpenTFragment()

178 return OpenTElement(tag=tag, attrs=self.make_tattrs(attrs))

179

180 if not tag_ref.is_singleton:

181 raise ValueError(

182 "Component element tags must have exactly one interpolation."

183 )

184

185 # HERE BE DRAGONS: the interpolation at i_index should be a

186 # component callable. We do not check this in the parser, instead

187 # relying on higher layers to validate types and render correctly.

188 i_index = tag_ref.i_indexes[0]

189 return OpenTComponent(

190 start_i_index=i_index,

191 attrs=self.make_tattrs(attrs),

192 )

193

194 def finalize_tag(

195 self, open_tag: OpenTag, endtag_i_index: int | None = None

196 ) -> TNode:

197 """Finalize an OpenTag into a TNode."""

198 match open_tag:

199 case OpenTElement(tag=tag, attrs=attrs, children=children):

200 return TElement(tag=tag, attrs=attrs, children=tuple(children))

201 case OpenTFragment(children=children):

202 return TFragment(children=tuple(children))

203 case OpenTComponent(

204 start_i_index=start_i_index,

205 attrs=attrs,

206 children=children,

207 ):

208 return TComponent(

209 start_i_index=start_i_index,

210 end_i_index=endtag_i_index,

211 attrs=attrs,

212 children=tuple(children),

213 )

214

215 def validate_end_tag(self, tag: str, open_tag: OpenTag) -> int | None:

216 """Validate that closing tag matches open tag. Return component end index if applicable."""

217 assert self.source, "Parser source tracker not initialized."

218 tag_ref = self.placeholders.remove_placeholders(tag)

219

220 match open_tag:

221 case OpenTElement():

222 if not tag_ref.is_literal:

223 raise ValueError(

224 f"Component closing tag found for element <{open_tag.tag}>."

225 )

226 if tag != open_tag.tag:

227 raise ValueError(

228 f"Mismatched closing tag </{tag}> for element <{open_tag.tag}>."

229 )

230 return None

231

232 case OpenTFragment():

233 if not tag_ref.is_literal:

234 raise ValueError("Component closing tag found for fragment.")

235 if tag != FRAGMENT_TAG:

236 raise ValueError(f"Mismatched closing tag </{tag}> for fragment.")

237 return None

238

239 case OpenTComponent(start_i_index=start_i_index):

240 if tag_ref.is_literal:

241 raise ValueError(

242 f"Mismatched closing tag </{tag}> for component starting at {self.source.format_starttag(start_i_index)}."

243 )

244 if not tag_ref.is_singleton:

245 raise ValueError(

246 "Component end tags must have exactly one interpolation."

247 )

248 # HERE BE DRAGONS: the interpolation at end_i_index shuld be a

249 # component callable that matches the start tag. We do not check

250 # any of this in the parser, instead relying on higher layers.

251 return tag_ref.i_indexes[0]

252

253 # ------------------------------------------

254 # HTMLParser tag callbacks

255 # ------------------------------------------

256

257 def handle_starttag(self, tag: str, attrs: t.Sequence[HTMLAttribute]) -> None:

258 open_tag = self.make_open_tag(tag, attrs)

259 if isinstance(open_tag, OpenTElement) and open_tag.tag in VOID_ELEMENTS:

260 final_tag = self.finalize_tag(open_tag)

261 self.append_child(final_tag)

262 else:

263 self.stack.append(open_tag)

264

265 def handle_startendtag(self, tag: str, attrs: t.Sequence[HTMLAttribute]) -> None:

266 """Dispatch a self-closing tag, `<tag />` to specialized handlers."""

267 open_tag = self.make_open_tag(tag, attrs)

268 final_tag = self.finalize_tag(open_tag)

269 self.append_child(final_tag)

270

271 def handle_endtag(self, tag: str) -> None:

272 if not self.stack:

273 raise ValueError(f"Unexpected closing tag </{tag}> with no open tag.")

274

275 open_tag = self.stack.pop()

276 endtag_i_index = self.validate_end_tag(tag, open_tag)

277 final_tag = self.finalize_tag(open_tag, endtag_i_index)

278 self.append_child(final_tag)

279

280 # ------------------------------------------

281 # HTMLParser other callbacks

282 # ------------------------------------------

283

284 def handle_data(self, data: str) -> None:

285 ref = self.placeholders.remove_placeholders(data)

286 text = TText(ref)

287 self.append_child(text)

288

289 def handle_comment(self, data: str) -> None:

290 ref = self.placeholders.remove_placeholders(data)

291 comment = TComment(ref)

292 self.append_child(comment)

293

294 def handle_decl(self, decl: str) -> None:

295 ref = self.placeholders.remove_placeholders(decl)

296 if not ref.is_literal:

297 raise ValueError("Interpolations are not allowed in declarations.")

298 if not decl.upper().startswith("DOCTYPE"):

299 raise NotImplementedError(

300 "Only DOCTYPE declarations are currently supported."

301 )

302 doctype_content = decl[7:].strip()

303 doctype = TDocumentType(doctype_content)

304 self.append_child(doctype)

305

306 def reset(self):

307 super().reset()

308 self.root = OpenTFragment()

309 self.stack = []

310 self.placeholders = PlaceholderState()

311 self.source = None

312

313 def close(self) -> None:

314 if self.stack:

315 raise ValueError("Invalid HTML structure: unclosed tags remain.")

316 if not self.placeholders.is_empty:

317 raise ValueError("Some placeholders were never resolved.")

318 super().close()

319

320 # ------------------------------------------

321 # Getting the parsed node tree

322 # ------------------------------------------

323

324 def get_tnode(self) -> TNode:

325 """Get the Node tree parsed from the input HTML."""

326 # TODO: consider always returning a TTag?

327 if len(self.root.children) > 1:

328 # The parse structure results in multiple root elements, so we

329 # return a Fragment to hold them all.

330 return TFragment(children=tuple(self.root.children))

331 elif len(self.root.children) == 1:

332 # The parse structure results in a single root element, so we

333 # return that element directly. This will be a non-Fragment Node.

334 return self.root.children[0]

335 else:

336 # Special case: the parse structure is empty; we treat

337 # this as an empty document fragment.

338 # CONSIDER: or as an empty text node?

339 return TFragment(children=tuple())

340

341 # ------------------------------------------

342 # Feeding and parsing

343 # ------------------------------------------

344

345 def feed_str(self, s: str) -> None:

346 """Feed a string part of a Template to the parser."""

347 # TODO: add tracking for this, or maybe just deprecate it?

348 s = s.replace("<>", f"<{FRAGMENT_TAG}>").replace("</>", f"</{FRAGMENT_TAG}>")

349 self.feed(s)

350

351 def feed_interpolation(self, index: int) -> None:

352 placeholder = self.placeholders.add_placeholder(index)

353 self.feed(placeholder)

354

355 def feed_template(self, template: Template) -> None:

356 """Feed a Template's content to the parser."""

357 assert self.source is None, "Did you forget to call reset?"

358 self.source = SourceTracker(template)

359 for i_index in range(len(template.interpolations)):

360 self.feed_str(template.strings[i_index])

361 self.source.advance_interpolation()

362 self.feed_interpolation(i_index)

363 self.feed_str(template.strings[-1])

364

365 @staticmethod

366 def parse(t: Template) -> TNode:

367 """

368 Parse a Template containing valid HTML and substitutions and return

369 a TNode tree representing its structure. This cachable structure can later

370 be resolved against actual interpolation values to produce a Node tree.

371 """

372 parser = TemplateParser()

373 parser.feed_template(t)

374 parser.close()

375 return parser.get_tnode()

Coverage for tdom / parser.py: 90%

187 statements