Coverage for tdom / escaping.py: 100%

28 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2025-12-17 23:32 +0000

1import re 

2 

3from markupsafe import escape as markup_escape 

4 

5escape_html_text = markup_escape # unify api for test of project 

6 

7 

8GT = ">" 

9LT = "<" 

10 

11 

12def escape_html_comment(text: str) -> str: 

13 """Escape text injected into an HTML comment.""" 

14 if not text: 

15 return text 

16 # - text must not start with the string ">" 

17 if text[0] == ">": 

18 text = GT + text[1:] 

19 

20 # - nor start with the string "->" 

21 if text[:2] == "->": 

22 text = "-" + GT + text[2:] 

23 

24 # - nor contain the strings "<!--", "-->", or "--!>" 

25 text = text.replace("<!--", LT + "!--") 

26 text = text.replace("-->", "--" + GT) 

27 text = text.replace("--!>", "--!" + GT) 

28 

29 # - nor end with the string "<!-". 

30 if text[-3:] == "<!-": 

31 text = text[:-3] + LT + "!-" 

32 

33 return text 

34 

35 

36# @NOTE: We use a group to preserve the case of the tagname, ie. StylE -> StylE 

37# @NOTE: Rawstrings are needed for the groupname to resolve correctly 

38# otherwise the slash must be escaped twice again. 

39STYLE_RES = ((re.compile("</(?P<tagname>style)>", re.I | re.A), LT + r"/\g<tagname>>"),) 

40 

41 

42def escape_html_style(text: str) -> str: 

43 """Escape text injected into an HTML style element.""" 

44 for matche_re, replace_text in STYLE_RES: 

45 text = re.sub(matche_re, replace_text, text) 

46 return text 

47 

48 

49SCRIPT_RES = ( 

50 # @NOTE: Slashes are unescaped inside `repl` text in ADDITION to 

51 # python's default unescaping. So for a regular python str() you need 

52 # `//` but for a python str() in res.sub(*, repl, *) you need 4 slashes, 

53 # `////`, but we can use a rawstring to only need 2 slashes, ie. `//`. 

54 # in order to get a single slash out the other side. 

55 # @NOTE: We use a group to preserve the case of the tagname, 

56 # ie. ScripT->ScripT. 

57 # @NOTE: Rawstrings are also needed for the groupname to resolve correctly 

58 # otherwise the slash must be escaped twice again. 

59 (re.compile("<!--", re.I | re.A), r"\\x3c!--"), 

60 (re.compile("<(?P<tagname>script)", re.I | re.A), r"\\x3c\g<tagname>"), 

61 (re.compile("</(?P<tagname>script)", re.I | re.A), r"\\x3c/\g<tagname>"), 

62) 

63 

64 

65def escape_html_script(text: str) -> str: 

66 """ 

67 Escape text injected into an HTML script element. 

68 

69 https://html.spec.whatwg.org/multipage/scripting.html#restrictions-for-contents-of-script-elements 

70 

71 (from link) The easiest and safest way to avoid the rather strange restrictions 

72 described in this section is to always escape an ASCII case-insensitive 

73 match for: 

74 - "<!--" as "\x3c!--" 

75 - "<script" as "\x3cscript" 

76 - "</script" as "\x3c/script"` 

77 """ 

78 for match_re, replace_text in SCRIPT_RES: 

79 text = re.sub(match_re, replace_text, text) 

80 return text