Coverage for tdom / escaping.py: 100%
28 statements
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-17 23:32 +0000
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-17 23:32 +0000
1import re
3from markupsafe import escape as markup_escape
5escape_html_text = markup_escape # unify api for test of project
8GT = ">"
9LT = "<"
12def escape_html_comment(text: str) -> str:
13 """Escape text injected into an HTML comment."""
14 if not text:
15 return text
16 # - text must not start with the string ">"
17 if text[0] == ">":
18 text = GT + text[1:]
20 # - nor start with the string "->"
21 if text[:2] == "->":
22 text = "-" + GT + text[2:]
24 # - nor contain the strings "<!--", "-->", or "--!>"
25 text = text.replace("<!--", LT + "!--")
26 text = text.replace("-->", "--" + GT)
27 text = text.replace("--!>", "--!" + GT)
29 # - nor end with the string "<!-".
30 if text[-3:] == "<!-":
31 text = text[:-3] + LT + "!-"
33 return text
36# @NOTE: We use a group to preserve the case of the tagname, ie. StylE -> StylE
37# @NOTE: Rawstrings are needed for the groupname to resolve correctly
38# otherwise the slash must be escaped twice again.
39STYLE_RES = ((re.compile("</(?P<tagname>style)>", re.I | re.A), LT + r"/\g<tagname>>"),)
42def escape_html_style(text: str) -> str:
43 """Escape text injected into an HTML style element."""
44 for matche_re, replace_text in STYLE_RES:
45 text = re.sub(matche_re, replace_text, text)
46 return text
49SCRIPT_RES = (
50 # @NOTE: Slashes are unescaped inside `repl` text in ADDITION to
51 # python's default unescaping. So for a regular python str() you need
52 # `//` but for a python str() in res.sub(*, repl, *) you need 4 slashes,
53 # `////`, but we can use a rawstring to only need 2 slashes, ie. `//`.
54 # in order to get a single slash out the other side.
55 # @NOTE: We use a group to preserve the case of the tagname,
56 # ie. ScripT->ScripT.
57 # @NOTE: Rawstrings are also needed for the groupname to resolve correctly
58 # otherwise the slash must be escaped twice again.
59 (re.compile("<!--", re.I | re.A), r"\\x3c!--"),
60 (re.compile("<(?P<tagname>script)", re.I | re.A), r"\\x3c\g<tagname>"),
61 (re.compile("</(?P<tagname>script)", re.I | re.A), r"\\x3c/\g<tagname>"),
62)
65def escape_html_script(text: str) -> str:
66 """
67 Escape text injected into an HTML script element.
69 https://html.spec.whatwg.org/multipage/scripting.html#restrictions-for-contents-of-script-elements
71 (from link) The easiest and safest way to avoid the rather strange restrictions
72 described in this section is to always escape an ASCII case-insensitive
73 match for:
74 - "<!--" as "\x3c!--"
75 - "<script" as "\x3cscript"
76 - "</script" as "\x3c/script"`
77 """
78 for match_re, replace_text in SCRIPT_RES:
79 text = re.sub(match_re, replace_text, text)
80 return text