77class HTMLSanitizerMixin (object ):
88 """ sanitization of XHTML+MathML+SVG and of inline style attributes."""
99
10- acceptable_elements = ['a' , 'abbr' , 'acronym' , 'address' , 'area' , 'b' ,
11- 'big' , 'blockquote' , 'br' , 'button' , 'caption' , 'center' , 'cite' ,
12- 'code' , 'col' , 'colgroup' , 'dd' , 'del' , 'dfn' , 'dir' , 'div' , 'dl' , 'dt' ,
13- 'em' , 'fieldset' , 'font' , 'form' , 'h1' , 'h2' , 'h3' , 'h4' , 'h5' , 'h6' ,
14- 'hr' , 'i' , 'img' , 'input' , 'ins' , 'kbd' , 'label' , 'legend' , 'li' , 'map' ,
15- 'menu' , 'ol' , 'optgroup' , 'option' , 'p' , 'pre' , 'q' , 's' , 'samp' ,
16- 'select' , 'small' , 'span' , 'strike' , 'strong' , 'sub' , 'sup' , 'table' ,
17- 'tbody' , 'td' , 'textarea' , 'tfoot' , 'th' , 'thead' , 'tr' , 'tt' , 'u' ,
18- 'ul' , 'var' ]
10+ acceptable_elements = ['a' , 'abbr' , 'acronym' , 'address' , 'area' ,
11+ 'article' , 'aside' , 'audio' , 'b' , 'big' , 'blockquote' , 'br' , 'button' ,
12+ 'canvas' , 'caption' , 'center' , 'cite' , 'code' , 'col' , 'colgroup' ,
13+ 'command' , 'datagrid' , 'datalist' , 'dd' , 'del' , 'details' , 'dfn' ,
14+ 'dialog' , 'dir' , 'div' , 'dl' , 'dt' , 'em' , 'event-source' , 'fieldset' ,
15+ 'figure' , 'footer' , 'font' , 'form' , 'header' , 'h1' , 'h2' , 'h3' , 'h4' ,
16+ 'h5' , 'h6' , 'hr' , 'i' , 'img' , 'input' , 'ins' , 'keygen' , 'kbd' ,
17+ 'label' , 'legend' , 'li' , 'm' , 'map' , 'menu' , 'meter' , 'multicol' ,
18+ 'nav' , 'nextid' , 'ol' , 'output' , 'optgroup' , 'option' , 'p' , 'pre' ,
19+ 'progress' , 'q' , 's' , 'samp' , 'section' , 'select' , 'small' , 'sound' ,
20+ 'source' , 'spacer' , 'span' , 'strike' , 'strong' , 'sub' , 'sup' , 'table' ,
21+ 'tbody' , 'td' , 'textarea' , 'time' , 'tfoot' , 'th' , 'thead' , 'tr' , 'tt' ,
22+ 'u' , 'ul' , 'var' , 'video' ]
1923
2024 mathml_elements = ['maction' , 'math' , 'merror' , 'mfrac' , 'mi' ,
2125 'mmultiscripts' , 'mn' , 'mo' , 'mover' , 'mpadded' , 'mphantom' ,
@@ -24,24 +28,35 @@ class HTMLSanitizerMixin(object):
2428 'munderover' , 'none' ]
2529
2630 svg_elements = ['a' , 'animate' , 'animateColor' , 'animateMotion' ,
27- 'animateTransform' , 'circle ' , 'defs ' , 'desc ' , 'ellipse ' , 'font-face ' ,
28- 'font-face- name' , 'font-face-src' , 'g' , 'glyph' , 'hkern' ,
31+ 'animateTransform' , 'clipPath ' , 'circle ' , 'defs ' , 'desc ' , 'ellipse ' ,
32+ 'font-face' , 'font-face- name' , 'font-face-src' , 'g' , 'glyph' , 'hkern' ,
2933 'linearGradient' , 'line' , 'marker' , 'metadata' , 'missing-glyph' ,
3034 'mpath' , 'path' , 'polygon' , 'polyline' , 'radialGradient' , 'rect' ,
3135 'set' , 'stop' , 'svg' , 'switch' , 'text' , 'title' , 'tspan' , 'use' ]
3236
3337 acceptable_attributes = ['abbr' , 'accept' , 'accept-charset' , 'accesskey' ,
34- 'action' , 'align' , 'alt' , 'axis' , 'border' , 'cellpadding' ,
35- 'cellspacing' , 'char' , 'charoff' , 'charset' , 'checked' , 'cite' , 'class' ,
36- 'clear' , 'cols' , 'colspan' , 'color' , 'compact' , 'coords' , 'datetime' ,
37- 'dir' , 'disabled' , 'enctype' , 'for' , 'frame' , 'headers' , 'height' ,
38- 'href' , 'hreflang' , 'hspace' , 'id' , 'ismap' , 'label' , 'lang' ,
39- 'longdesc' , 'maxlength' , 'media' , 'method' , 'multiple' , 'name' ,
40- 'nohref' , 'noshade' , 'nowrap' , 'prompt' , 'readonly' , 'rel' , 'rev' ,
41- 'rows' , 'rowspan' , 'rules' , 'scope' , 'selected' , 'shape' , 'size' ,
42- 'span' , 'src' , 'start' , 'style' , 'summary' , 'tabindex' , 'target' ,
43- 'title' , 'type' , 'usemap' , 'valign' , 'value' , 'vspace' , 'width' ,
44- 'xml:lang' ]
38+ 'action' , 'align' , 'alt' , 'autocomplete' , 'autofocus' , 'axis' ,
39+ 'background' , 'balance' , 'bgcolor' , 'bgproperties' , 'border' ,
40+ 'bordercolor' , 'bordercolordark' , 'bordercolorlight' , 'bottompadding' ,
41+ 'cellpadding' , 'cellspacing' , 'ch' , 'challenge' , 'char' , 'charoff' ,
42+ 'choff' , 'charset' , 'checked' , 'cite' , 'class' , 'clear' , 'color' ,
43+ 'cols' , 'colspan' , 'compact' , 'contenteditable' , 'controls' , 'coords' ,
44+ 'data' , 'datafld' , 'datapagesize' , 'datasrc' , 'datetime' , 'default' ,
45+ 'delay' , 'dir' , 'disabled' , 'draggable' , 'dynsrc' , 'enctype' , 'end' ,
46+ 'face' , 'for' , 'form' , 'frame' , 'galleryimg' , 'gutter' , 'headers' ,
47+ 'height' , 'hidefocus' , 'hidden' , 'high' , 'href' , 'hreflang' , 'hspace' ,
48+ 'icon' , 'id' , 'inputmode' , 'ismap' , 'keytype' , 'label' , 'leftspacing' ,
49+ 'lang' , 'list' , 'longdesc' , 'loop' , 'loopcount' , 'loopend' ,
50+ 'loopstart' , 'low' , 'lowsrc' , 'max' , 'maxlength' , 'media' , 'method' ,
51+ 'min' , 'multiple' , 'name' , 'nohref' , 'noshade' , 'nowrap' , 'open' ,
52+ 'optimum' , 'pattern' , 'ping' , 'point-size' , 'prompt' , 'pqg' ,
53+ 'radiogroup' , 'readonly' , 'rel' , 'repeat-max' , 'repeat-min' ,
54+ 'replace' , 'required' , 'rev' , 'rightspacing' , 'rows' , 'rowspan' ,
55+ 'rules' , 'scope' , 'selected' , 'shape' , 'size' , 'span' , 'src' , 'start' ,
56+ 'step' , 'style' , 'summary' , 'suppress' , 'tabindex' , 'target' ,
57+ 'template' , 'title' , 'toppadding' , 'type' , 'unselectable' , 'usemap' ,
58+ 'urn' , 'valign' , 'value' , 'variable' , 'volume' , 'vspace' , 'vrml' ,
59+ 'width' , 'wrap' , 'xml:lang' ]
4560
4661 mathml_attributes = ['actiontype' , 'align' , 'columnalign' , 'columnalign' ,
4762 'columnalign' , 'columnlines' , 'columnspacing' , 'columnspan' , 'depth' ,
@@ -54,43 +69,45 @@ class HTMLSanitizerMixin(object):
5469 'xlink:type' , 'xmlns' , 'xmlns:xlink' ]
5570
5671 svg_attributes = ['accent-height' , 'accumulate' , 'additive' , 'alphabetic' ,
57- 'arabic-form' , 'ascent' , 'attributeName' , 'attributeType' ,
58- 'baseProfile' , 'bbox' , 'begin' , 'by' , 'calcMode' , 'cap-height' ,
59- 'class' , 'color' , 'color-rendering' , 'content' , 'cx' , 'cy' , 'd' , 'dx ' ,
60- ' dy' , 'descent' , 'display' , 'dur' , 'end' , 'fill' , 'fill-opacity ' ,
61- 'fill-rule ' , 'font-family ' , 'font-size ' , 'font-stretch' , 'font-style ' ,
62- 'font-variant ' , 'font-weight ' , 'from ' , 'fx ' , 'fy' , 'g1' , 'g2 ' ,
63- 'glyph-name ' , 'gradientUnits ' , 'hanging ' , 'height ' , 'horiz-adv-x ' ,
64- 'horiz-origin -x' , 'id ' , 'ideographic ' , 'k ' , 'keyPoints ' ,
65- 'keySplines' , 'keyTimes' , 'lang' , 'marker-end' , 'marker-mid ' ,
66- 'marker-start ' , 'markerHeight ' , 'markerUnits ' , 'markerWidth ' ,
67- 'mathematical' , 'max' , 'min' , 'name' , 'offset' , 'opacity' , 'orient ' ,
68- 'origin ' , 'overline-position ' , 'overline-thickness ' , 'panose-1 ' ,
69- 'path ' , 'pathLength ' , 'points ' , 'preserveAspectRatio ' , 'r' , 'refX ' ,
70- 'refY ' , 'repeatCount ' , 'repeatDur ' , 'requiredExtensions ' ,
71- 'requiredFeatures ' , 'restart ' , 'rotate ' , 'rx' , 'ry' , 'slope ' ,
72- 'stemh ' , 'stemv ' , 'stop-color ' , 'stop-opacity ' ,
73- 'strikethrough-position ' , 'strikethrough-thickness ' , 'stroke ' ,
74- 'stroke-dasharray' , 'stroke-dashoffset' , 'stroke-linecap' ,
75- 'stroke-linejoin' , 'stroke-miterlimit' , 'stroke-opacity' ,
76- 'stroke-width' , 'systemLanguage' , 'target' , 'text-anchor' , 'to' ,
77- 'transform' , 'type' , 'u1' , 'u2' , 'underline-position' ,
78- 'underline-thickness' , 'unicode' , 'unicode-range' , 'units-per-em' ,
79- 'values' , 'version' , 'viewBox' , 'visibility' , 'width' , 'widths' , 'x' ,
80- 'x-height' , 'x1' , 'x2' , 'xlink:actuate' , 'xlink:arcrole' ,
81- 'xlink:href' , 'xlink:role' , 'xlink:show' , 'xlink:title' ,
82- 'xlink:type' , ' xml:base' , 'xml:lang' , 'xml:space' , 'xmlns' ,
83- 'xmlns:xlink' , 'y' , 'y1' , 'y2' , 'zoomAndPan' ]
72+ 'arabic-form' , 'ascent' , 'attributeName' , 'attributeType' ,
73+ 'baseProfile' , 'bbox' , 'begin' , 'by' , 'calcMode' , 'cap-height' ,
74+ 'class' , 'clip-path' , ' color' , 'color-rendering' , 'content' , 'cx' ,
75+ 'cy' , 'd' , 'dx' , ' dy' , 'descent' , 'display' , 'dur' , 'end' , 'fill' ,
76+ 'fill-opacity ' , 'fill-rule ' , 'font-family ' , 'font-size ' ,
77+ 'font-stretch ' , 'font-style ' , 'font-variant ' , 'font-weight ' , 'from ' ,
78+ 'fx ' , 'fy ' , 'g1 ' , 'g2 ' , 'glyph-name' , 'gradientUnits' , 'hanging ' ,
79+ 'height' , 'horiz-adv -x' , 'horiz-origin-x ' , 'id ' , 'ideographic ' , 'k ' ,
80+ 'keyPoints' , 'keySplines' , 'keyTimes' , 'lang' , 'marker-end' ,
81+ 'marker-mid ' , 'marker-start ' , 'markerHeight ' , 'markerUnits ' ,
82+ 'markerWidth' , 'mathematical' , 'max' , 'min' , 'name' , 'offset' ,
83+ 'opacity ' , 'orient ' , 'origin ' , 'overline-position ' ,
84+ 'overline-thickness ' , 'panose-1 ' , 'path ' , 'pathLength ' , 'points ' ,
85+ 'preserveAspectRatio' , 'r ' , 'refX ' , 'refY ' , 'repeatCount ' ,
86+ 'repeatDur ' , 'requiredExtensions ' , 'requiredFeatures ' , 'restart ' ,
87+ 'rotate' , 'rx ' , 'ry ' , 'slope ' , 'stemh' , 'stemv' , ' stop-color ' ,
88+ 'stop-opacity ' , 'strikethrough-position ' , 'strikethrough-thickness ' ,
89+ 'stroke' , 'stroke-dasharray' , 'stroke-dashoffset' , 'stroke-linecap' ,
90+ 'stroke-linejoin' , 'stroke-miterlimit' , 'stroke-opacity' ,
91+ 'stroke-width' , 'systemLanguage' , 'target' , 'text-anchor' , 'to' ,
92+ 'transform' , 'type' , 'u1' , 'u2' , 'underline-position' ,
93+ 'underline-thickness' , 'unicode' , 'unicode-range' , 'units-per-em' ,
94+ 'values' , 'version' , 'viewBox' , 'visibility' , 'width' , 'widths' , 'x' ,
95+ 'x-height' , 'x1' , 'x2' , 'xlink:actuate' , 'xlink:arcrole' ,
96+ 'xlink:href' , 'xlink:role' , 'xlink:show' , 'xlink:title' , 'xlink:type ' ,
97+ ' xml:base' , 'xml:lang' , 'xml:space' , 'xmlns' , 'xmlns:xlink' , 'y ' ,
98+ 'y1' , 'y2' , 'zoomAndPan' ]
8499
85100 attr_val_is_uri = ['href' , 'src' , 'cite' , 'action' , 'longdesc' ,
86- 'xlink:href' , 'xml:base' ]
101+ 'xlink:href' , 'xml:base' ]
87102
88103 svg_attr_val_allows_ref = ['clip-path' , 'color-profile' , 'cursor' , 'fill' ,
89- 'filter' , 'marker' , 'marker-start' , 'marker-mid' , 'marker-end' , 'mask' , 'stroke' ]
104+ 'filter' , 'marker' , 'marker-start' , 'marker-mid' , 'marker-end' ,
105+ 'mask' , 'stroke' ]
90106
91- svg_allow_local_href = ['altGlyph' , 'animate' , 'animateColor' , 'animateMotion' ,
92- 'animateTransform' , 'cursor' , 'feImage' , 'filter' , 'linearGradient' , 'pattern' ,
93- 'radialGradient' , 'textpath' , 'tref' , 'set' , 'use' ]
107+ svg_allow_local_href = ['altGlyph' , 'animate' , 'animateColor' ,
108+ 'animateMotion' , 'animateTransform' , 'cursor' , 'feImage' , 'filter' ,
109+ 'linearGradient' , 'pattern' , 'radialGradient' , 'textpath' , 'tref' ,
110+ 'set' , 'use' ]
94111
95112 acceptable_css_properties = ['azimuth' , 'background-color' ,
96113 'border-bottom-color' , 'border-collapse' , 'border-color' ,
@@ -140,7 +157,13 @@ class HTMLSanitizerMixin(object):
140157 # sanitize_html('<a href="javascript: sucker();">Click here for $100</a>')
141158 # => <a>Click here for $100</a>
142159 def sanitize_token (self , token ):
143- if token ["type" ] in (tokenTypes ["StartTag" ], tokenTypes ["EndTag" ],
160+
161+ # accommodate filters which use token_type differently
162+ token_type = token ["type" ]
163+ if token_type in tokenTypes .keys ():
164+ token_type = tokenTypes [token_type ]
165+
166+ if token_type in (tokenTypes ["StartTag" ], tokenTypes ["EndTag" ],
144167 tokenTypes ["EmptyTag" ]):
145168 if token ["name" ] in self .allowed_elements :
146169 if token .has_key ("data" ):
@@ -172,19 +195,24 @@ def sanitize_token(self, token):
172195 token ["data" ] = [[name ,val ] for name ,val in attrs .items ()]
173196 return token
174197 else :
175- if token [ "type" ] == tokenTypes ["EndTag" ]:
198+ if token_type == tokenTypes ["EndTag" ]:
176199 token ["data" ] = "</%s>" % token ["name" ]
177200 elif token ["data" ]:
178201 attrs = '' .join ([' %s="%s"' % (k ,escape (v )) for k ,v in token ["data" ]])
179202 token ["data" ] = "<%s%s>" % (token ["name" ],attrs )
180203 else :
181204 token ["data" ] = "<%s>" % token ["name" ]
182- if token [ "selfClosing" ] :
205+ if token . get ( "selfClosing" ) :
183206 token ["data" ]= token ["data" ][:- 1 ] + "/>"
184- token ["type" ] = tokenTypes ["Characters" ]
207+
208+ if token ["type" ] in tokenTypes .keys ():
209+ token ["type" ] = "Characters"
210+ else :
211+ token ["type" ] = tokenTypes ["Characters" ]
212+
185213 del token ["name" ]
186214 return token
187- elif token [ "type" ] == tokenTypes ["Comment" ]:
215+ elif token_type == tokenTypes ["Comment" ]:
188216 pass
189217 else :
190218 return token
0 commit comments