Coverage for tdom / nodes.py: 96%

69 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2025-12-17 23:32 +0000

1from dataclasses import dataclass, field 

2 

3from .escaping import ( 

4 escape_html_comment, 

5 escape_html_script, 

6 escape_html_style, 

7 escape_html_text, 

8) 

9 

10# See https://developer.mozilla.org/en-US/docs/Glossary/Void_element 

11VOID_ELEMENTS = frozenset( 

12 [ 

13 "area", 

14 "base", 

15 "br", 

16 "col", 

17 "embed", 

18 "hr", 

19 "img", 

20 "input", 

21 "link", 

22 "meta", 

23 "param", 

24 "source", 

25 "track", 

26 "wbr", 

27 ] 

28) 

29 

30 

31CDATA_CONTENT_ELEMENTS = frozenset(["script", "style"]) 

32RCDATA_CONTENT_ELEMENTS = frozenset(["textarea", "title"]) 

33CONTENT_ELEMENTS = CDATA_CONTENT_ELEMENTS | RCDATA_CONTENT_ELEMENTS 

34 

35 

36# FUTURE: add a pretty-printer to nodes for debugging 

37# FUTURE: make nodes frozen (and have the parser work with mutable builders) 

38 

39 

40@dataclass(slots=True) 

41class Node: 

42 def __html__(self) -> str: 

43 """Return the HTML representation of the node.""" 

44 # By default, just return the string representation 

45 return str(self) 

46 

47 

48@dataclass(slots=True) 

49class Text(Node): 

50 text: str # which may be markupsafe.Markup in practice. 

51 

52 def __str__(self) -> str: 

53 # Use markupsafe's escape to handle HTML escaping 

54 return escape_html_text(self.text) 

55 

56 def __eq__(self, other: object) -> bool: 

57 # This is primarily of use for testing purposes. We only consider 

58 # two Text nodes equal if their string representations match. 

59 return isinstance(other, Text) and str(self) == str(other) 

60 

61 

62@dataclass(slots=True) 

63class Fragment(Node): 

64 children: list[Node] = field(default_factory=list) 

65 

66 def __str__(self) -> str: 

67 return "".join(str(child) for child in self.children) 

68 

69 

70@dataclass(slots=True) 

71class Comment(Node): 

72 text: str 

73 

74 def __str__(self) -> str: 

75 return f"<!--{escape_html_comment(self.text)}-->" 

76 

77 

78@dataclass(slots=True) 

79class DocumentType(Node): 

80 text: str = "html" 

81 

82 def __str__(self) -> str: 

83 return f"<!DOCTYPE {self.text}>" 

84 

85 

86@dataclass(slots=True) 

87class Element(Node): 

88 tag: str 

89 attrs: dict[str, str | None] = field(default_factory=dict) 

90 children: list[Node] = field(default_factory=list) 

91 

92 def __post_init__(self): 

93 """Ensure all preconditions are met.""" 

94 if not self.tag: 

95 raise ValueError("Element tag cannot be empty.") 

96 

97 # Void elements cannot have children 

98 if self.is_void and self.children: 

99 raise ValueError(f"Void element <{self.tag}> cannot have children.") 

100 

101 @property 

102 def is_void(self) -> bool: 

103 return self.tag in VOID_ELEMENTS 

104 

105 @property 

106 def is_content(self) -> bool: 

107 return self.tag in CONTENT_ELEMENTS 

108 

109 def _children_to_str(self): 

110 if not self.children: 

111 return "" 

112 if self.tag in ("script", "style"): 

113 chunks = [] 

114 for child in self.children: 

115 if isinstance(child, Text): 

116 chunks.append(child.text) 

117 else: 

118 raise ValueError( 

119 "Cannot serialize non-text content inside a script tag." 

120 ) 

121 raw_children_str = "".join(chunks) 

122 if self.tag == "script": 

123 return escape_html_script(raw_children_str) 

124 elif self.tag == "style": 

125 return escape_html_style(raw_children_str) 

126 else: 

127 raise ValueError("Unsupported tag for single-level bulk escaping.") 

128 else: 

129 return "".join(str(child) for child in self.children) 

130 

131 def __str__(self) -> str: 

132 # We use markupsafe's escape to handle HTML escaping of attribute values 

133 # which means it's possible to mark them as safe if needed. 

134 attrs_str = "".join( 

135 f" {key}" if value is None else f' {key}="{escape_html_text(value)}"' 

136 for key, value in self.attrs.items() 

137 ) 

138 if self.is_void: 

139 return f"<{self.tag}{attrs_str} />" 

140 if not self.children: 

141 return f"<{self.tag}{attrs_str}></{self.tag}>" 

142 children_str = self._children_to_str() 

143 return f"<{self.tag}{attrs_str}>{children_str}</{self.tag}>"