Coverage for tdom / escaping.py: 100%
37 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-03 21:23 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-03 21:23 +0000
1import re
3from markupsafe import escape as markup_escape
5from .protocols import HasHTMLDunder
7escape_html_text = markup_escape # unify api for test of project
10GT = ">"
11LT = "<"
14def escape_html_comment(text: str, allow_markup: bool = False) -> str:
15 """Escape text injected into an HTML comment."""
16 if not text:
17 return text
18 if allow_markup and isinstance(text, HasHTMLDunder):
19 return text.__html__()
21 if not allow_markup and type(text) is not str:
22 # String manipulation triggers regular html escapes on Markup
23 # so we coerce the subclass of `str` into a true `str` before
24 # we start string manipulating.
25 text = str(text)
27 # - text must not start with the string ">"
28 if text[0] == ">":
29 text = GT + text[1:]
31 # - nor start with the string "->"
32 if text[:2] == "->":
33 text = "-" + GT + text[2:]
35 # - nor contain the strings "<!--", "-->", or "--!>"
36 text = text.replace("<!--", LT + "!--")
37 text = text.replace("-->", "--" + GT)
38 text = text.replace("--!>", "--!" + GT)
40 # - nor end with the string "<!-".
41 if text[-3:] == "<!-":
42 text = text[:-3] + LT + "!-"
44 return text
47# @NOTE: We use a group to preserve the case of the tagname, ie. StylE -> StylE
48# @NOTE: Rawstrings are needed for the groupname to resolve correctly
49# otherwise the slash must be escaped twice again.
50STYLE_RES = (
51 (
52 re.compile("</(?P<tagname>style)>", re.IGNORECASE | re.ASCII),
53 LT + r"/\g<tagname>>",
54 ),
55)
58def escape_html_style(text: str, allow_markup: bool = False) -> str:
59 """Escape text injected into an HTML style element."""
60 if allow_markup and isinstance(text, HasHTMLDunder):
61 return text.__html__()
62 for matche_re, replace_text in STYLE_RES:
63 text = re.sub(matche_re, replace_text, text)
64 return text
67SCRIPT_RES = (
68 # @NOTE: Slashes are unescaped inside `repl` text in ADDITION to
69 # python's default unescaping. So for a regular python str() you need
70 # `//` but for a python str() in res.sub(*, repl, *) you need 4 slashes,
71 # `////`, but we can use a rawstring to only need 2 slashes, ie. `//`.
72 # in order to get a single slash out the other side.
73 # @NOTE: We use a group to preserve the case of the tagname,
74 # ie. ScripT->ScripT.
75 # @NOTE: Rawstrings are also needed for the groupname to resolve correctly
76 # otherwise the slash must be escaped twice again.
77 (re.compile("<!--", re.IGNORECASE | re.ASCII), r"\\x3c!--"),
78 (re.compile("<(?P<tagname>script)", re.IGNORECASE | re.ASCII), r"\\x3c\g<tagname>"),
79 (
80 re.compile("</(?P<tagname>script)", re.IGNORECASE | re.ASCII),
81 r"\\x3c/\g<tagname>",
82 ),
83)
86def escape_html_script(text: str, allow_markup: bool = False) -> str:
87 """
88 Escape text injected into an HTML script element.
90 https://html.spec.whatwg.org/multipage/scripting.html#restrictions-for-contents-of-script-elements
92 (from link) The easiest and safest way to avoid the rather strange restrictions
93 described in this section is to always escape an ASCII case-insensitive
94 match for:
95 - "<!--" as "\x3c!--"
96 - "<script" as "\x3cscript"
97 - "</script" as "\x3c/script"`
98 """
99 if allow_markup and isinstance(text, HasHTMLDunder):
100 return text.__html__()
101 for match_re, replace_text in SCRIPT_RES:
102 text = re.sub(match_re, replace_text, text)
103 return text