Coverage for tdom/escaping.py: 100%

1import re

3from markupsafe import escape as markup_escape

5escape_html_text = markup_escape # unify api for test of project

8GT = ">"

9LT = "<"

12def escape_html_comment(text: str) -> str:

13 """Escape text injected into an HTML comment."""

14 if not text:

15 return text

16 # - text must not start with the string ">"

17 if text[0] == ">":

18 text = GT + text[1:]

20 # - nor start with the string "->"

21 if text[:2] == "->":

22 text = "-" + GT + text[2:]

24 # - nor contain the strings "", or "--!>"

25 text = text.replace("<!--", LT + "!--")

26 text = text.replace("-->", "--" + GT)

27 text = text.replace("--!>", "--!" + GT)

29 # - nor end with the string "<!-".

30 if text[-3:] == "<!-":

31 text = text[:-3] + LT + "!-"

33 return text

36# @NOTE: We use a group to preserve the case of the tagname, ie. StylE -> StylE

37# @NOTE: Rawstrings are needed for the groupname to resolve correctly

38# otherwise the slash must be escaped twice again.

39STYLE_RES = ((re.compile("</(?P<tagname>style)>", re.I | re.A), LT + r"/\g<tagname>>"),)

42def escape_html_style(text: str) -> str:

43 """Escape text injected into an HTML style element."""

44 for matche_re, replace_text in STYLE_RES:

45 text = re.sub(matche_re, replace_text, text)

46 return text

49SCRIPT_RES = (

50 # @NOTE: Slashes are unescaped inside `repl` text in ADDITION to

51 # python's default unescaping. So for a regular python str() you need

52 # `//` but for a python str() in res.sub(*, repl, *) you need 4 slashes,

53 # `////`, but we can use a rawstring to only need 2 slashes, ie. `//`.

54 # in order to get a single slash out the other side.

55 # @NOTE: We use a group to preserve the case of the tagname,

56 # ie. ScripT->ScripT.

57 # @NOTE: Rawstrings are also needed for the groupname to resolve correctly

58 # otherwise the slash must be escaped twice again.

59 (re.compile("<!--", re.I | re.A), r"\\x3c!--"),

60 (re.compile("<(?P<tagname>script)", re.I | re.A), r"\\x3c\g<tagname>"),

61 (re.compile("</(?P<tagname>script)", re.I | re.A), r"\\x3c/\g<tagname>"),

62)

65def escape_html_script(text: str) -> str:

66 """

67 Escape text injected into an HTML script element.

69 https://html.spec.whatwg.org/multipage/scripting.html#restrictions-for-contents-of-script-elements

71 (from link) The easiest and safest way to avoid the rather strange restrictions

72 described in this section is to always escape an ASCII case-insensitive

73 match for:

74 - "<!--" as "\x3c!--"

75 - "<script" as "\x3cscript"

76 - "</script" as "\x3c/script"`

77 """

78 for match_re, replace_text in SCRIPT_RES:

79 text = re.sub(match_re, replace_text, text)

80 return text

Coverage for tdom / escaping.py: 100%

28 statements