Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
seleniumhq
GitHub Repository: seleniumhq/selenium
Path: blob/trunk/third_party/closure/goog/html/sanitizer/htmlsanitizer.js
2868 views
1
// Copyright 2016 The Closure Library Authors. All Rights Reserved.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
// http://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS-IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
15
16
/**
17
* @fileoverview An HTML sanitizer that can satisfy a variety of security
18
* policies.
19
*
20
* This package provides html sanitizing functions. It does not enforce string
21
* to string conversion, instead returning a dom-like element when possible.
22
*
23
* Examples of usage of the static {@code goog.goog.html.sanitizer.sanitize}:
24
* <pre>
25
* var safeHtml = goog.html.sanitizer.sanitize('<script src="xss.js" />');
26
* goog.dom.safe.setInnerHtml(el, safeHtml);
27
* </pre>
28
*
29
* @supported IE 10+, Chrome 26+, Firefox 22+, Safari 7.1+, Opera 15+
30
*/
31
32
goog.provide('goog.html.sanitizer.HtmlSanitizer');
33
goog.provide('goog.html.sanitizer.HtmlSanitizer.Builder');
34
goog.provide('goog.html.sanitizer.HtmlSanitizerAttributePolicy');
35
goog.provide('goog.html.sanitizer.HtmlSanitizerPolicy');
36
goog.provide('goog.html.sanitizer.HtmlSanitizerPolicyContext');
37
goog.provide('goog.html.sanitizer.HtmlSanitizerPolicyHints');
38
goog.provide('goog.html.sanitizer.HtmlSanitizerUrlPolicy');
39
40
goog.require('goog.array');
41
goog.require('goog.asserts');
42
goog.require('goog.dom');
43
goog.require('goog.dom.NodeType');
44
goog.require('goog.functions');
45
goog.require('goog.html.SafeHtml');
46
goog.require('goog.html.SafeStyle');
47
goog.require('goog.html.SafeUrl');
48
goog.require('goog.html.sanitizer.AttributeSanitizedWhitelist');
49
goog.require('goog.html.sanitizer.AttributeWhitelist');
50
goog.require('goog.html.sanitizer.CssSanitizer');
51
goog.require('goog.html.sanitizer.TagBlacklist');
52
goog.require('goog.html.sanitizer.TagWhitelist');
53
goog.require('goog.html.uncheckedconversions');
54
goog.require('goog.object');
55
goog.require('goog.string');
56
goog.require('goog.string.Const');
57
goog.require('goog.userAgent');
58
59
60
/**
61
* Type for optional hints to policy handler functions.
62
* @typedef {{
63
* tagName: (string|undefined),
64
* attributeName: (string|undefined),
65
* cssProperty: (string|undefined)
66
* }}
67
*/
68
goog.html.sanitizer.HtmlSanitizerPolicyHints;
69
70
71
/**
72
* Type for optional context objects to the policy handler functions.
73
* @typedef {{
74
* cssStyle: (?CSSStyleDeclaration|undefined)
75
* }}
76
*/
77
goog.html.sanitizer.HtmlSanitizerPolicyContext;
78
79
80
/**
81
* Type for a policy function.
82
* @typedef {function(string, goog.html.sanitizer.HtmlSanitizerPolicyHints=,
83
* goog.html.sanitizer.HtmlSanitizerPolicyContext=,
84
* goog.html.sanitizer.HtmlSanitizerPolicy=):?string}
85
*/
86
goog.html.sanitizer.HtmlSanitizerPolicy;
87
88
89
/**
90
* Type for a URL policy function.
91
*
92
* @typedef {function(string, !goog.html.sanitizer.HtmlSanitizerPolicyHints=):
93
* ?goog.html.SafeUrl}
94
*/
95
goog.html.sanitizer.HtmlSanitizerUrlPolicy;
96
97
98
/**
99
* Type for attribute policy configuration.
100
* @typedef {{
101
* tagName: string,
102
* attributeName: string,
103
* policy: ?goog.html.sanitizer.HtmlSanitizerPolicy
104
* }}
105
*/
106
goog.html.sanitizer.HtmlSanitizerAttributePolicy;
107
108
109
/**
110
* Whether the HTML sanitizer is supported. For now mainly exclude
111
* IE9 or below where we know the sanitizer is insecure.
112
* @const @private {boolean}
113
*/
114
goog.html.sanitizer.HTML_SANITIZER_SUPPORTED_ =
115
!goog.userAgent.IE || document.documentMode >= 10;
116
117
118
/**
119
* Whether the template tag is supported.
120
* @const @package
121
*/
122
goog.html.sanitizer.HTML_SANITIZER_TEMPLATE_SUPPORTED =
123
!goog.userAgent.IE || document.documentMode == null;
124
125
126
/**
127
* Prefix used by all internal html sanitizer booking properties.
128
* @const @private {string}
129
*/
130
goog.html.sanitizer.HTML_SANITIZER_BOOKKEEPING_PREFIX_ = 'data-sanitizer-';
131
132
133
/**
134
* Temporary attribute name in which html sanitizer uses for bookkeeping.
135
* @const @private {string}
136
*/
137
goog.html.sanitizer.HTML_SANITIZER_BOOKKEEPING_ATTR_NAME_ =
138
goog.html.sanitizer.HTML_SANITIZER_BOOKKEEPING_PREFIX_ + 'elem-num';
139
140
141
/**
142
* Attribute name added to span tags that replace unknown tags. The value of
143
* this attribute is the name of the tag before the sanitization occurred.
144
* @const @private
145
*/
146
goog.html.sanitizer.HTML_SANITIZER_SANITIZED_ATTR_NAME_ =
147
goog.html.sanitizer.HTML_SANITIZER_BOOKKEEPING_PREFIX_ + 'original-tag';
148
149
150
/**
151
* Attribute name added to blacklisted tags to then filter them from the output.
152
* @const @private
153
*/
154
goog.html.sanitizer.HTML_SANITIZER_BLACKLISTED_TAG_ =
155
goog.html.sanitizer.HTML_SANITIZER_BOOKKEEPING_PREFIX_ + 'blacklisted-tag';
156
157
158
/**
159
* Map of property descriptors we use to avoid looking up the prototypes
160
* multiple times.
161
* @const @private {!Object<string, !ObjectPropertyDescriptor>}
162
*/
163
goog.html.sanitizer.HTML_SANITIZER_PROPERTY_DESCRIPTORS_ =
164
goog.html.sanitizer.HTML_SANITIZER_SUPPORTED_ ? {
165
'attributes':
166
Object.getOwnPropertyDescriptor(Element.prototype, 'attributes'),
167
'setAttribute':
168
Object.getOwnPropertyDescriptor(Element.prototype, 'setAttribute'),
169
'innerHTML':
170
Object.getOwnPropertyDescriptor(Element.prototype, 'innerHTML'),
171
'nodeName': Object.getOwnPropertyDescriptor(Node.prototype, 'nodeName'),
172
'parentNode':
173
Object.getOwnPropertyDescriptor(Node.prototype, 'parentNode'),
174
'childNodes':
175
Object.getOwnPropertyDescriptor(Node.prototype, 'childNodes'),
176
'style': Object.getOwnPropertyDescriptor(HTMLElement.prototype, 'style')
177
} :
178
{};
179
180
181
/**
182
* Creates an HTML sanitizer.
183
* @param {!goog.html.sanitizer.HtmlSanitizer.Builder=} opt_builder
184
* @final @constructor @struct
185
*/
186
goog.html.sanitizer.HtmlSanitizer = function(opt_builder) {
187
var builder = opt_builder || new goog.html.sanitizer.HtmlSanitizer.Builder();
188
189
builder.installPolicies_();
190
191
/** @private {boolean} */
192
this.shouldSanitizeTemplateContents_ =
193
builder.shouldSanitizeTemplateContents_;
194
195
/** @private {!Object<string, !goog.html.sanitizer.HtmlSanitizerPolicy>} */
196
this.attributeHandlers_ = goog.object.clone(builder.attributeWhitelist_);
197
198
/** @private {!Object<string, boolean>} */
199
this.tagBlacklist_ = goog.object.clone(builder.tagBlacklist_);
200
201
/** @private {!Object<string, boolean>} */
202
this.tagWhitelist_ = goog.object.clone(builder.tagWhitelist_);
203
204
/** @private {boolean} */
205
this.shouldAddOriginalTagNames_ = builder.shouldAddOriginalTagNames_;
206
207
// Add whitelist data-* attributes from the builder to the attributeHandlers
208
// with a default cleanUpAttribute function. data-* attributes are inert as
209
// per HTML5 specs, so not much sanitization needed.
210
goog.array.forEach(builder.dataAttributeWhitelist_, function(dataAttr) {
211
goog.asserts.assert(goog.string.startsWith(dataAttr, 'data-'));
212
goog.asserts.assert(!goog.string.startsWith(
213
dataAttr, goog.html.sanitizer.HTML_SANITIZER_BOOKKEEPING_PREFIX_));
214
215
this.attributeHandlers_['* ' + dataAttr.toUpperCase()] =
216
/** @type {!goog.html.sanitizer.HtmlSanitizerPolicy} */ (
217
goog.html.sanitizer.HtmlSanitizer.cleanUpAttribute_);
218
}, this);
219
};
220
221
222
223
/**
224
* Converts a HtmlSanitizerUrlPolicy to a HtmlSanitizerPolicy by calling the
225
* HtmlSanitizerUrlPolicy with the required arguments and unwrapping the
226
* returned SafeUrl.
227
* @param {!goog.html.sanitizer.HtmlSanitizerUrlPolicy} customUrlPolicy
228
* @return {!goog.html.sanitizer.HtmlSanitizerPolicy}
229
* @private
230
*/
231
goog.html.sanitizer.HtmlSanitizer.sanitizeUrl_ = function(customUrlPolicy) {
232
return /** @type {!goog.html.sanitizer.HtmlSanitizerPolicy} */ (
233
function(url, policyHints) {
234
var trimmed = goog.html.sanitizer.HtmlSanitizer.cleanUpAttribute_(
235
url, policyHints);
236
var safeUrl = customUrlPolicy(trimmed, policyHints);
237
if (safeUrl && goog.html.SafeUrl.unwrap(safeUrl) !=
238
goog.html.SafeUrl.INNOCUOUS_STRING) {
239
return goog.html.SafeUrl.unwrap(safeUrl);
240
} else {
241
return null;
242
}
243
});
244
};
245
246
247
248
/**
249
* The builder for the HTML Sanitizer. All methods except build return this.
250
* @final @constructor @struct
251
*/
252
goog.html.sanitizer.HtmlSanitizer.Builder = function() {
253
/**
254
* A set of attribute sanitization functions. Default built-in handlers are
255
* all tag-agnostic by design. Note that some attributes behave differently
256
* when attached to different nodes (for example, the href attribute will
257
* generally not make a network request, but &lt;link href=""&gt; does), and
258
* so when necessary a tag-specific handler can be used to override a
259
* tag-agnostic one.
260
* @private {!Object<string, !goog.html.sanitizer.HtmlSanitizerPolicy>}
261
*/
262
this.attributeWhitelist_ = {};
263
goog.array.forEach(
264
[
265
goog.html.sanitizer.AttributeWhitelist,
266
goog.html.sanitizer.AttributeSanitizedWhitelist
267
],
268
function(wl) {
269
goog.array.forEach(goog.object.getKeys(wl), function(attr) {
270
this.attributeWhitelist_[attr] =
271
/** @type {!goog.html.sanitizer.HtmlSanitizerPolicy} */
272
(goog.html.sanitizer.HtmlSanitizer.cleanUpAttribute_);
273
}, this);
274
},
275
this);
276
277
/**
278
* A set of attribute handlers that should not inherit their default policy
279
* during build().
280
* @private {!Object<string, boolean>}
281
*/
282
this.attributeOverrideList_ = {};
283
284
285
/**
286
* Keeps track of whether we allow form tags.
287
* @private {boolean}
288
*/
289
this.allowFormTag_ = false;
290
291
/**
292
* Whether the content of TEMPLATE tags (assuming TEMPLATE is whitelisted)
293
* should be sanitized or passed through.
294
* @private {boolean}
295
*/
296
this.shouldSanitizeTemplateContents_ = true;
297
298
/**
299
* List of data attributes to whitelist. Data-attributes are inert and don't
300
* require sanitization.
301
* @private {!Array<string>}
302
*/
303
this.dataAttributeWhitelist_ = [];
304
305
/**
306
* A tag blacklist, to effectively remove an element and its children from the
307
* dom.
308
* @private {!Object<string, boolean>}
309
*/
310
this.tagBlacklist_ = {};
311
312
/**
313
* A tag whitelist, to effectively allow an element and its children from the
314
* dom.
315
* @private {!Object<string, boolean>}
316
*/
317
this.tagWhitelist_ = goog.object.clone(goog.html.sanitizer.TagWhitelist);
318
319
/**
320
* Whether non-whitelisted and non-blacklisted tags that have been converted
321
* to &lt;span&rt; tags will contain the original tag in a data attribute.
322
* @private {boolean}
323
*/
324
this.shouldAddOriginalTagNames_ = false;
325
326
/**
327
* A function to be applied to URLs found on the parsing process which do not
328
* trigger requests.
329
* @private {!goog.html.sanitizer.HtmlSanitizerPolicy}
330
*/
331
this.urlPolicy_ = goog.html.sanitizer.HtmlSanitizer.defaultUrlPolicy_;
332
333
/**
334
* A function to be applied to urls found on the parsing process which may
335
* trigger requests.
336
* @private {!goog.html.sanitizer.HtmlSanitizerPolicy}
337
*/
338
this.networkRequestUrlPolicy_ =
339
goog.html.sanitizer.HtmlSanitizer.defaultNetworkRequestUrlPolicy_;
340
341
/**
342
* A function to be applied to names found on the parsing process.
343
* @private {!goog.html.sanitizer.HtmlSanitizerPolicy}
344
*/
345
this.namePolicy_ = goog.html.sanitizer.HtmlSanitizer.defaultNamePolicy_;
346
347
/**
348
* A function to be applied to other tokens (i.e. classes and IDs) found on
349
* the parsing process.
350
* @private {!goog.html.sanitizer.HtmlSanitizerPolicy}
351
*/
352
this.tokenPolicy_ = goog.html.sanitizer.HtmlSanitizer.defaultTokenPolicy_;
353
354
/**
355
* A function to sanitize inline CSS styles.
356
* @private {(undefined|function(
357
* !goog.html.sanitizer.HtmlSanitizerPolicy,
358
* string,
359
* !goog.html.sanitizer.HtmlSanitizerPolicyHints,
360
* !goog.html.sanitizer.HtmlSanitizerPolicyContext):?string)}
361
*/
362
this.sanitizeCssPolicy_ = undefined;
363
364
/**
365
* True iff policies have been installed for the instance.
366
* @private {boolean}
367
*/
368
this.policiesInstalled_ = false;
369
};
370
371
372
/**
373
* Extends the list of allowed data attributes.
374
* @param {!Array<string>} dataAttributeWhitelist
375
* @return {!goog.html.sanitizer.HtmlSanitizer.Builder}
376
*/
377
goog.html.sanitizer.HtmlSanitizer.Builder.prototype.allowDataAttributes =
378
function(dataAttributeWhitelist) {
379
goog.array.extend(this.dataAttributeWhitelist_, dataAttributeWhitelist);
380
return this;
381
};
382
383
384
/**
385
* Allows form tags in the HTML. Without this all form tags and content will be
386
* dropped.
387
* @return {!goog.html.sanitizer.HtmlSanitizer.Builder}
388
*/
389
goog.html.sanitizer.HtmlSanitizer.Builder.prototype.allowFormTag = function() {
390
this.allowFormTag_ = true;
391
return this;
392
};
393
394
395
/**
396
* Extends the tag whitelist (Package-internal utility method only).
397
* @param {!Array<string>} tags The list of tags to be added to the whitelist.
398
* @return {!goog.html.sanitizer.HtmlSanitizer.Builder}
399
* @package
400
*/
401
goog.html.sanitizer.HtmlSanitizer.Builder.prototype
402
.alsoAllowTagsPrivateDoNotAccessOrElse = function(tags) {
403
goog.array.forEach(tags, function(tag) {
404
this.tagWhitelist_[tag.toUpperCase()] = true;
405
}, this);
406
return this;
407
};
408
409
410
/**
411
* Extends the attribute whitelist (Package-internal utility method only).
412
* @param {!Array<(string|!goog.html.sanitizer.HtmlSanitizerAttributePolicy)>}
413
* attrs The list of attributes to be added to the whitelist.
414
* @return {!goog.html.sanitizer.HtmlSanitizer.Builder}
415
* @package
416
*/
417
goog.html.sanitizer.HtmlSanitizer.Builder.prototype
418
.alsoAllowAttributesPrivateDoNotAccessOrElse = function(attrs) {
419
goog.array.forEach(attrs, function(attr) {
420
if (goog.isString(attr)) {
421
attr = {tagName: '*', attributeName: attr, policy: null};
422
}
423
var handlerName = goog.html.sanitizer.HtmlSanitizer.attrIdentifier_(
424
attr.tagName, attr.attributeName);
425
this.attributeWhitelist_[handlerName] = attr.policy ?
426
attr.policy :
427
/** @type {!goog.html.sanitizer.HtmlSanitizerPolicy} */ (
428
goog.html.sanitizer.HtmlSanitizer.cleanUpAttribute_);
429
this.attributeOverrideList_[handlerName] = true;
430
}, this);
431
return this;
432
};
433
434
435
/**
436
* Turns off sanitization of template tag contents and pass them unmodified
437
* (Package-internal utility method only).
438
* @return {!goog.html.sanitizer.HtmlSanitizer.Builder}
439
* @throws {!Error}
440
* @package
441
*/
442
goog.html.sanitizer.HtmlSanitizer.Builder.prototype
443
.keepUnsanitizedTemplateContentsPrivateDoNotAccessOrElse = function() {
444
if (!goog.html.sanitizer.HTML_SANITIZER_TEMPLATE_SUPPORTED) {
445
throw new Error(
446
'Cannot let unsanitized template contents through on ' +
447
'browsers that do not support TEMPLATE');
448
}
449
this.shouldSanitizeTemplateContents_ = false;
450
return this;
451
};
452
453
454
/**
455
* Allows only the provided whitelist of tags. Tags still need to be in the
456
* TagWhitelist to be allowed.
457
* <p>
458
* SPAN tags are ALWAYS ALLOWED as part of the mechanism required to preserve
459
* the HTML tree structure (when removing non-blacklisted tags and
460
* non-whitelisted tags).
461
* @param {!Array<string>} tagWhitelist
462
* @return {!goog.html.sanitizer.HtmlSanitizer.Builder}
463
* @throws {Error} Thrown if an attempt is made to allow a non-whitelisted tag.
464
*/
465
goog.html.sanitizer.HtmlSanitizer.Builder.prototype.onlyAllowTags = function(
466
tagWhitelist) {
467
this.tagWhitelist_ = {'SPAN': true};
468
goog.array.forEach(tagWhitelist, function(tag) {
469
tag = tag.toUpperCase();
470
if (goog.html.sanitizer.TagWhitelist[tag]) {
471
this.tagWhitelist_[tag] = true;
472
} else {
473
throw new Error(
474
'Only whitelisted tags can be allowed. See ' +
475
'goog.html.sanitizer.TagWhitelist');
476
}
477
}, this);
478
return this;
479
};
480
481
482
/**
483
* Allows only the provided whitelist of attributes, possibly setting a custom
484
* policy for them. The set of tag/attribute combinations need to be a subset of
485
* the currently allowed combinations.
486
* <p>
487
* Note that you cannot define a generic handler for an attribute if only a
488
* tag-specific one is present, and vice versa. To configure the sanitizer to
489
* accept an attribute only for a specific tag when only a generic handler is
490
* whitelisted, use the goog.html.sanitizer.HtmlSanitizerPolicyHints parameter
491
* and simply reject the attribute in unwanted tags.
492
* <p>
493
* Also note that the sanitizer's policy is still called after the provided one,
494
* to ensure that supplying misconfigured policy cannot introduce
495
* vulnerabilities. To completely override an existing attribute policy or to
496
* allow new attributes, see the goog.html.sanitizer.unsafe package.
497
* @param {!Array<(string|!goog.html.sanitizer.HtmlSanitizerAttributePolicy)>}
498
* attrWhitelist The subset of attributes that the sanitizer will accept.
499
* Attributes can come in of two forms:
500
* - string: allow all values for this attribute on all tags.
501
* - HtmlSanitizerAttributePolicy: allows specifying a policy for a
502
* particular tag. The tagName can be "*", which means all tags. If no
503
* policy is passed, the default is to allow all values.
504
* The tag and attribute names are case-insensitive.
505
* Note that the policy for id, URLs, names etc is controlled separately
506
* (using withCustom* methods).
507
* @return {!goog.html.sanitizer.HtmlSanitizer.Builder}
508
* @throws {Error} Thrown if an attempt is made to allow a non-whitelisted
509
* attribute.
510
*/
511
goog.html.sanitizer.HtmlSanitizer.Builder.prototype.onlyAllowAttributes =
512
function(attrWhitelist) {
513
var oldWhitelist = this.attributeWhitelist_;
514
this.attributeWhitelist_ = {};
515
goog.array.forEach(attrWhitelist, function(attr) {
516
if (goog.typeOf(attr) === 'string') {
517
attr = {tagName: '*', attributeName: attr.toUpperCase(), policy: null};
518
}
519
var handlerName = goog.html.sanitizer.HtmlSanitizer.attrIdentifier_(
520
attr.tagName, attr.attributeName);
521
if (!oldWhitelist[handlerName]) {
522
throw new Error('Only whitelisted attributes can be allowed.');
523
}
524
this.attributeWhitelist_[handlerName] = attr.policy ?
525
attr.policy :
526
/** @type {goog.html.sanitizer.HtmlSanitizerPolicy} */ (
527
goog.html.sanitizer.HtmlSanitizer.cleanUpAttribute_);
528
}, this);
529
return this;
530
};
531
532
533
/**
534
* Adds the original tag name in the data attribute 'original-tag' when unknown
535
* tags are sanitized to &lt;span&rt;, so that caller can distinguish them from
536
* actual &lt;span&rt; tags.
537
* @return {!goog.html.sanitizer.HtmlSanitizer.Builder}
538
*/
539
goog.html.sanitizer.HtmlSanitizer.Builder.prototype.addOriginalTagNames =
540
function() {
541
this.shouldAddOriginalTagNames_ = true;
542
return this;
543
};
544
545
546
/**
547
* Sets a custom network URL policy.
548
* @param {!goog.html.sanitizer.HtmlSanitizerUrlPolicy}
549
* customNetworkReqUrlPolicy
550
* @return {!goog.html.sanitizer.HtmlSanitizer.Builder}
551
*/
552
goog.html.sanitizer.HtmlSanitizer.Builder.prototype
553
.withCustomNetworkRequestUrlPolicy = function(customNetworkReqUrlPolicy) {
554
this.networkRequestUrlPolicy_ =
555
goog.html.sanitizer.HtmlSanitizer.sanitizeUrl_(customNetworkReqUrlPolicy);
556
return this;
557
};
558
559
560
/**
561
* Sets a custom non-network URL policy.
562
* @param {!goog.html.sanitizer.HtmlSanitizerUrlPolicy} customUrlPolicy
563
* @return {!goog.html.sanitizer.HtmlSanitizer.Builder}
564
*/
565
goog.html.sanitizer.HtmlSanitizer.Builder.prototype.withCustomUrlPolicy =
566
function(customUrlPolicy) {
567
this.urlPolicy_ =
568
goog.html.sanitizer.HtmlSanitizer.sanitizeUrl_(customUrlPolicy);
569
return this;
570
};
571
572
573
/**
574
* Sets a custom name policy.
575
* @param {!goog.html.sanitizer.HtmlSanitizerPolicy} customNamePolicy
576
* @return {!goog.html.sanitizer.HtmlSanitizer.Builder}
577
*/
578
goog.html.sanitizer.HtmlSanitizer.Builder.prototype.withCustomNamePolicy =
579
function(customNamePolicy) {
580
this.namePolicy_ = customNamePolicy;
581
return this;
582
};
583
584
585
/**
586
* Sets a custom token policy.
587
* @param {!goog.html.sanitizer.HtmlSanitizerPolicy} customTokenPolicy
588
* @return {!goog.html.sanitizer.HtmlSanitizer.Builder}
589
*/
590
goog.html.sanitizer.HtmlSanitizer.Builder.prototype.withCustomTokenPolicy =
591
function(customTokenPolicy) {
592
this.tokenPolicy_ = customTokenPolicy;
593
return this;
594
};
595
596
597
/**
598
* Allows inline CSS styles.
599
* @return {!goog.html.sanitizer.HtmlSanitizer.Builder}
600
*/
601
goog.html.sanitizer.HtmlSanitizer.Builder.prototype.allowCssStyles =
602
function() {
603
this.sanitizeCssPolicy_ = goog.html.sanitizer.HtmlSanitizer.sanitizeCssBlock_;
604
return this;
605
};
606
607
608
/**
609
* Wraps a custom policy function with the sanitizer's default policy.
610
* @param {?goog.html.sanitizer.HtmlSanitizerPolicy} customPolicy The custom
611
* policy for the tag/attribute combination.
612
* @param {!goog.html.sanitizer.HtmlSanitizerPolicy} defaultPolicy The
613
* sanitizer's policy that is always called after the custom policy.
614
* @return {!goog.html.sanitizer.HtmlSanitizerPolicy}
615
* @private
616
*/
617
goog.html.sanitizer.HtmlSanitizer.wrapPolicy_ = function(
618
customPolicy, defaultPolicy) {
619
return /** @type {!goog.html.sanitizer.HtmlSanitizerPolicy} */ (function(
620
value, hints, ctx, policy) {
621
var result = customPolicy(value, hints, ctx, policy);
622
return result == null ? null : defaultPolicy(result, hints, ctx, policy);
623
});
624
};
625
626
627
/**
628
* Installs the sanitizer's default policy for a specific tag/attribute
629
* combination on the provided whitelist, but only if a policy already exists.
630
* @param {!Object<string, !goog.html.sanitizer.HtmlSanitizerPolicy>}
631
* whitelist The whitelist to modify.
632
* @param {!Object<string, boolean>} overrideList The set of attributes handlers
633
* that should not be wrapped with a default policy.
634
* @param {string} key The tag/attribute combination
635
* @param {!goog.html.sanitizer.HtmlSanitizerPolicy} defaultPolicy The
636
* sanitizer's policy.
637
* @private
638
*/
639
goog.html.sanitizer.HtmlSanitizer.installDefaultPolicy_ = function(
640
whitelist, overrideList, key, defaultPolicy) {
641
if (whitelist[key] && !overrideList[key]) {
642
whitelist[key] = goog.html.sanitizer.HtmlSanitizer.wrapPolicy_(
643
whitelist[key], defaultPolicy);
644
}
645
};
646
647
648
/**
649
* Builds and returns a goog.html.sanitizer.HtmlSanitizer object.
650
* @return {!goog.html.sanitizer.HtmlSanitizer}
651
*/
652
goog.html.sanitizer.HtmlSanitizer.Builder.prototype.build = function() {
653
return new goog.html.sanitizer.HtmlSanitizer(this);
654
};
655
656
/**
657
* Installs the sanitization policies for the attributes.
658
* May only be called once.
659
* @private
660
*/
661
goog.html.sanitizer.HtmlSanitizer.Builder.prototype.installPolicies_ =
662
function() {
663
if (this.policiesInstalled_) {
664
throw new Error('HtmlSanitizer.Builder.build() can only be used once.');
665
}
666
667
if (!this.allowFormTag_) {
668
this.tagBlacklist_['FORM'] = true;
669
}
670
671
var installPolicy = goog.html.sanitizer.HtmlSanitizer.installDefaultPolicy_;
672
673
// Binding all the non-trivial attribute sanitizers to the appropriate,
674
// potentially customizable, handling functions at build().
675
installPolicy(
676
this.attributeWhitelist_, this.attributeOverrideList_, '* USEMAP',
677
/** @type {!goog.html.sanitizer.HtmlSanitizerPolicy} */ (
678
goog.html.sanitizer.HtmlSanitizer.sanitizeUrlFragment_));
679
680
var urlAttributes = ['* ACTION', '* CITE', '* HREF'];
681
goog.array.forEach(urlAttributes, function(attribute) {
682
installPolicy(
683
this.attributeWhitelist_, this.attributeOverrideList_, attribute,
684
this.urlPolicy_);
685
}, this);
686
687
var networkUrlAttributes = [
688
// LONGDESC can result in a network request. See b/23381636.
689
'* LONGDESC', '* SRC', 'LINK HREF'
690
];
691
goog.array.forEach(networkUrlAttributes, function(attribute) {
692
installPolicy(
693
this.attributeWhitelist_, this.attributeOverrideList_, attribute,
694
this.networkRequestUrlPolicy_);
695
}, this);
696
697
var nameAttributes = ['* FOR', '* HEADERS', '* NAME'];
698
goog.array.forEach(nameAttributes, function(attribute) {
699
installPolicy(
700
this.attributeWhitelist_, this.attributeOverrideList_, attribute,
701
/** @type {!goog.html.sanitizer.HtmlSanitizerPolicy} */ (goog.partial(
702
goog.html.sanitizer.HtmlSanitizer.sanitizeName_,
703
this.namePolicy_)));
704
}, this);
705
706
installPolicy(
707
this.attributeWhitelist_, this.attributeOverrideList_, 'A TARGET',
708
/** @type {!goog.html.sanitizer.HtmlSanitizerPolicy} */ (goog.partial(
709
goog.html.sanitizer.HtmlSanitizer.allowedAttributeValues_,
710
['_blank', '_self'])));
711
712
installPolicy(
713
this.attributeWhitelist_, this.attributeOverrideList_, '* CLASS',
714
/** @type {!goog.html.sanitizer.HtmlSanitizerPolicy} */ (goog.partial(
715
goog.html.sanitizer.HtmlSanitizer.sanitizeClasses_,
716
this.tokenPolicy_)));
717
718
installPolicy(
719
this.attributeWhitelist_, this.attributeOverrideList_, '* ID',
720
/** @type {!goog.html.sanitizer.HtmlSanitizerPolicy} */ (goog.partial(
721
goog.html.sanitizer.HtmlSanitizer.sanitizeId_, this.tokenPolicy_)));
722
723
if (this.sanitizeCssPolicy_) {
724
installPolicy(
725
this.attributeWhitelist_, this.attributeOverrideList_, '* STYLE',
726
/** @type {!goog.html.sanitizer.HtmlSanitizerPolicy} */ (goog.partial(
727
this.sanitizeCssPolicy_, this.networkRequestUrlPolicy_)));
728
} else {
729
installPolicy(
730
this.attributeWhitelist_, this.attributeOverrideList_, '* STYLE',
731
goog.functions.NULL);
732
}
733
this.policiesInstalled_ = true;
734
};
735
736
737
/**
738
* The default policy for URLs: allow any.
739
* @param {string} token The URL to undergo this policy.
740
* @return {?string}
741
* @private
742
*/
743
goog.html.sanitizer.HtmlSanitizer.defaultUrlPolicy_ =
744
goog.html.sanitizer.HtmlSanitizer.sanitizeUrl_(goog.html.SafeUrl.sanitize);
745
746
747
/**
748
* The default policy for URLs which cause network requests: drop all.
749
* @param {string} token The URL to undergo this policy.
750
* @return {null}
751
* @private
752
*/
753
goog.html.sanitizer.HtmlSanitizer.defaultNetworkRequestUrlPolicy_ =
754
goog.functions.NULL;
755
756
757
/**
758
* The default policy for attribute names: drop all.
759
* @param {string} token The name to undergo this policy.
760
* @return {?string}
761
* @private
762
*/
763
goog.html.sanitizer.HtmlSanitizer.defaultNamePolicy_ = goog.functions.NULL;
764
765
766
/**
767
* The default policy for other tokens (i.e. class names and IDs): drop all.
768
* @param {string} token The token to undergo this policy.
769
* @return {?string}
770
* @private
771
*/
772
goog.html.sanitizer.HtmlSanitizer.defaultTokenPolicy_ = goog.functions.NULL;
773
774
775
776
/**
777
* Returns a key into the attribute handlers dictionary given a node name and
778
* an attribute name. If no node name is given, returns a key applying to all
779
* nodes.
780
* @param {?string} nodeName
781
* @param {string} attributeName
782
* @return {string} key into attribute handlers dict
783
* @private
784
*/
785
goog.html.sanitizer.HtmlSanitizer.attrIdentifier_ = function(
786
nodeName, attributeName) {
787
if (!nodeName) {
788
nodeName = '*';
789
}
790
return (nodeName + ' ' + attributeName).toUpperCase();
791
};
792
793
794
/**
795
* Sanitizes a block of CSS rules.
796
* @param {goog.html.sanitizer.HtmlSanitizerPolicy} policySanitizeUrl
797
* @param {string} attrValue
798
* @param {goog.html.sanitizer.HtmlSanitizerPolicyHints} policyHints
799
* @param {goog.html.sanitizer.HtmlSanitizerPolicyContext} policyContext
800
* @return {?string} sanitizedCss from the policyContext
801
* @private
802
*/
803
goog.html.sanitizer.HtmlSanitizer.sanitizeCssBlock_ = function(
804
policySanitizeUrl, attrValue, policyHints, policyContext) {
805
if (!policyContext.cssStyle) {
806
return null;
807
}
808
var naiveUriRewriter = function(uri, prop) {
809
policyHints.cssProperty = prop;
810
return goog.html.uncheckedconversions
811
.safeUrlFromStringKnownToSatisfyTypeContract(
812
goog.string.Const.from(
813
'HtmlSanitizerPolicy created with networkRequestUrlPolicy_ ' +
814
'when installing \'* STYLE\' handler.'),
815
policySanitizeUrl(uri, policyHints) || '');
816
};
817
var sanitizedStyle = goog.html.SafeStyle.unwrap(
818
goog.html.sanitizer.CssSanitizer.sanitizeInlineStyle(
819
policyContext.cssStyle, naiveUriRewriter));
820
return sanitizedStyle == '' ? null : sanitizedStyle;
821
};
822
823
824
/**
825
* Cleans up an attribute value that we don't particularly want to do anything
826
* to. At the moment we just trim the whitespace.
827
* @param {string} attrValue
828
* @param {goog.html.sanitizer.HtmlSanitizerPolicyHints} policyHints
829
* @return {string} sanitizedAttrValue
830
* @private
831
*/
832
goog.html.sanitizer.HtmlSanitizer.cleanUpAttribute_ = function(
833
attrValue, policyHints) {
834
return goog.string.trim(attrValue);
835
};
836
837
838
/**
839
* Allows a set of attribute values.
840
* @param {!Array<string>} allowedValues Set of allowed values lowercased.
841
* @param {string} attrValue
842
* @param {goog.html.sanitizer.HtmlSanitizerPolicyHints} policyHints
843
* @return {?string} sanitizedAttrValue
844
* @private
845
*/
846
goog.html.sanitizer.HtmlSanitizer.allowedAttributeValues_ = function(
847
allowedValues, attrValue, policyHints) {
848
var trimmed = goog.string.trim(attrValue);
849
return goog.array.contains(allowedValues, trimmed.toLowerCase()) ? trimmed :
850
null;
851
};
852
853
854
/**
855
* Sanitizes URL fragments.
856
* @param {string} urlFragment
857
* @param {goog.html.sanitizer.HtmlSanitizerPolicyHints} policyHints
858
* @return {?string} sanitizedAttrValue
859
* @private
860
*/
861
goog.html.sanitizer.HtmlSanitizer.sanitizeUrlFragment_ = function(
862
urlFragment, policyHints) {
863
var trimmed = goog.string.trim(urlFragment);
864
if (trimmed && trimmed.charAt(0) == '#') {
865
// We do not apply the name or token policy to Url Fragments by design.
866
return trimmed;
867
}
868
return null;
869
};
870
871
872
/**
873
* Runs an attribute name through a name policy.
874
* @param {goog.html.sanitizer.HtmlSanitizerPolicy} namePolicy
875
* @param {string} attrName
876
* @param {goog.html.sanitizer.HtmlSanitizerPolicyHints} policyHints
877
* @return {?string} sanitizedAttrValue
878
* @private
879
*/
880
goog.html.sanitizer.HtmlSanitizer.sanitizeName_ = function(
881
namePolicy, attrName, policyHints) {
882
var trimmed = goog.string.trim(attrName);
883
/* TODO(user): fail on names which contain illegal characters.
884
* NOTE(jasvir):
885
* There are two cases to be concerned about - escaped quotes in attribute
886
* values which is the responsibility of the serializer and illegal
887
* characters. The latter does violate the spec but I do not believe it has
888
* a security consequence.
889
*/
890
return namePolicy(trimmed, policyHints);
891
};
892
893
894
/**
895
* Ensures that the class prefix is present on all space-separated tokens
896
* (i.e. all class names).
897
* @param {goog.html.sanitizer.HtmlSanitizerPolicy} tokenPolicy
898
* @param {string} attrValue
899
* @param {goog.html.sanitizer.HtmlSanitizerPolicyHints} policyHints
900
* @return {?string} sanitizedAttrValue
901
* @private
902
*/
903
goog.html.sanitizer.HtmlSanitizer.sanitizeClasses_ = function(
904
tokenPolicy, attrValue, policyHints) {
905
// TODO(user): use a browser-supplied class list instead of a string.
906
var classes = attrValue.split(/(?:\s+)/);
907
var sanitizedClasses = [];
908
for (var i = 0; i < classes.length; i++) {
909
// TODO(user): skip classes which contain illegal characters.
910
var sanitizedClass = tokenPolicy(classes[i], policyHints);
911
if (sanitizedClass) {
912
sanitizedClasses.push(sanitizedClass);
913
}
914
}
915
return sanitizedClasses.length == 0 ? null : sanitizedClasses.join(' ');
916
};
917
918
919
/**
920
* Ensures that the id prefix is present.
921
* @param {goog.html.sanitizer.HtmlSanitizerPolicy} tokenPolicy
922
* @param {string} attrValue
923
* @param {goog.html.sanitizer.HtmlSanitizerPolicyHints} policyHints
924
* @return {?string} sanitizedAttrValue
925
* @private
926
*/
927
goog.html.sanitizer.HtmlSanitizer.sanitizeId_ = function(
928
tokenPolicy, attrValue, policyHints) {
929
var trimmed = goog.string.trim(attrValue);
930
// TODO(user): fail on IDs which contain illegal characters.
931
return tokenPolicy(trimmed, policyHints);
932
};
933
934
935
/**
936
* Parses a string of unsanitized HTML and provides an iterator over the
937
* resulting DOM tree nodes. This DOM parsing must be wholly inert (that is,
938
* it does not cause execution of any active content or cause the browser to
939
* issue any requests). The returned iterator is guaranteed to iterate over a
940
* parent element before iterating over any of its children.
941
* @param {string} unsanitizedHtml
942
* @return {!TreeWalker} Dom tree iterator
943
* @private
944
*/
945
goog.html.sanitizer.HtmlSanitizer.getDomTreeWalker_ = function(
946
unsanitizedHtml) {
947
var iteratorParent;
948
// Use a <template> element if possible.
949
var templateElement = document.createElement('template');
950
if ('content' in templateElement) {
951
templateElement.innerHTML = unsanitizedHtml;
952
iteratorParent = templateElement.content;
953
} else {
954
// In browsers where <template> is not implemented, use an HTMLDocument.
955
var doc = document.implementation.createHTMLDocument('x');
956
iteratorParent = doc.body;
957
doc.body.innerHTML = unsanitizedHtml;
958
}
959
return document.createTreeWalker(
960
iteratorParent, NodeFilter.SHOW_ELEMENT | NodeFilter.SHOW_TEXT, null,
961
false);
962
};
963
964
// TODO(pelizzi): both getAttribute* functions accept a Node but are defined on
965
// Element. Investigate.
966
967
/**
968
* Returns an element's attributes without falling prey to things like
969
* &lt;form&gt;&lt;input name="attributes"&gt;
970
* &lt;input name="attributes"&gt;&lt;/form&gt;.
971
* @param {!Node} node
972
* @return {?NamedNodeMap}
973
* @private
974
*/
975
goog.html.sanitizer.HtmlSanitizer.getAttributes_ = function(node) {
976
var attrDescriptor =
977
goog.html.sanitizer.HTML_SANITIZER_PROPERTY_DESCRIPTORS_['attributes'];
978
if (attrDescriptor && attrDescriptor.get) {
979
return attrDescriptor.get.apply(node);
980
} else {
981
return node.attributes instanceof NamedNodeMap ? node.attributes : null;
982
}
983
};
984
985
/**
986
* Returns a specific attribute from an element without falling prey to
987
* clobbering.
988
* @param {!Node} node
989
* @param {string} attrName
990
* @return {string}
991
* @private
992
*/
993
goog.html.sanitizer.HtmlSanitizer.getAttribute_ = function(node, attrName) {
994
var protoFn = Element.prototype.getAttribute;
995
if (protoFn && node instanceof Element) {
996
var ret = protoFn.call(/** @type {!Element} */ (node), attrName);
997
return ret || ''; // FireFox returns null
998
} else {
999
return '';
1000
}
1001
};
1002
1003
/**
1004
* Sets an element's attributes without falling prey to things like
1005
* &lt;form&gt;&lt;input name="attributes"&gt;
1006
* &lt;input name="attributes"&gt;&lt;/form&gt;.
1007
* @param {!Node} node
1008
* @param {string} name
1009
* @param {string} value
1010
* @private
1011
*/
1012
goog.html.sanitizer.HtmlSanitizer.setAttribute_ = function(node, name, value) {
1013
var attrDescriptor =
1014
goog.html.sanitizer.HTML_SANITIZER_PROPERTY_DESCRIPTORS_['setAttribute'];
1015
if (attrDescriptor && attrDescriptor.value) {
1016
try {
1017
attrDescriptor.value.call(node, name, value);
1018
} catch (e) {
1019
// IE throws an exception if the src attribute contains HTTP credentials.
1020
// However the attribute gets set anyway.
1021
if (e.message.indexOf('A security problem occurred') == -1) {
1022
throw e;
1023
}
1024
}
1025
}
1026
};
1027
1028
1029
/**
1030
* Returns a node's innerHTML property value without falling prey to clobbering.
1031
* @param {!Node} node
1032
* @return {string}
1033
* @private
1034
*/
1035
goog.html.sanitizer.HtmlSanitizer.getInnerHTML_ = function(node) {
1036
var descriptor =
1037
goog.html.sanitizer.HTML_SANITIZER_PROPERTY_DESCRIPTORS_['innerHTML'];
1038
if (descriptor && descriptor.get) {
1039
return descriptor.get.apply(node);
1040
} else {
1041
return (typeof node.innerHTML == 'string') ? node.innerHTML : '';
1042
}
1043
};
1044
1045
1046
/**
1047
* Returns an element's style without falling prey to things like
1048
* &lt;form&gt;&lt;input name="style"&gt;
1049
* &lt;input name="style"&gt;&lt;/form&gt;.
1050
* @param {!Node} node
1051
* @return {?CSSStyleDeclaration}
1052
* @private
1053
*/
1054
goog.html.sanitizer.HtmlSanitizer.getStyle_ = function(node) {
1055
var styleDescriptor =
1056
goog.html.sanitizer.HTML_SANITIZER_PROPERTY_DESCRIPTORS_['style'];
1057
if (node instanceof HTMLElement && styleDescriptor && styleDescriptor.get) {
1058
return styleDescriptor.get.apply(node);
1059
} else {
1060
return node.style instanceof CSSStyleDeclaration ? node.style : null;
1061
}
1062
};
1063
1064
1065
/**
1066
* Returns a node's nodeName without falling prey to things like
1067
* &lt;form&gt;&lt;input name="nodeName"&gt;&lt;/form&gt;.
1068
* @param {!Node} node
1069
* @return {string}
1070
* @private
1071
*/
1072
goog.html.sanitizer.HtmlSanitizer.getNodeName_ = function(node) {
1073
var nodeNameDescriptor =
1074
goog.html.sanitizer.HTML_SANITIZER_PROPERTY_DESCRIPTORS_['nodeName'];
1075
if (nodeNameDescriptor && nodeNameDescriptor.get) {
1076
return nodeNameDescriptor.get.apply(node);
1077
} else {
1078
return (typeof node.nodeName == 'string') ? node.nodeName : 'unknown';
1079
}
1080
};
1081
1082
1083
/**
1084
* Returns a node's parentNode without falling prey to things like
1085
* &lt;form&gt;&lt;input name="parentNode"&gt;&lt;/form&gt;.
1086
* @param {?Node} node
1087
* @return {?Node}
1088
* @private
1089
*/
1090
goog.html.sanitizer.HtmlSanitizer.getParentNode_ = function(node) {
1091
if (node == null) {
1092
return null;
1093
}
1094
var parentNodeDescriptor =
1095
goog.html.sanitizer.HTML_SANITIZER_PROPERTY_DESCRIPTORS_['parentNode'];
1096
if (parentNodeDescriptor && parentNodeDescriptor.get) {
1097
return parentNodeDescriptor.get.apply(node);
1098
} else {
1099
// We need to ensure that parentNode is returning the actual parent node
1100
// and not a child node that happens to have a name of "parentNode".
1101
// We check that the node returned by parentNode is itself not named
1102
// "parentNode" - this could happen legitimately but on IE we have no better
1103
// means of avoiding the pitfall.
1104
var parentNode = node.parentNode;
1105
if (parentNode && parentNode.name && typeof parentNode.name == 'string' &&
1106
parentNode.name.toLowerCase() == 'parentnode') {
1107
return null;
1108
} else {
1109
return parentNode;
1110
}
1111
}
1112
};
1113
1114
1115
/**
1116
* Returns the value of node.childNodes without falling prey to clobbering.
1117
* @param {!Node} node
1118
* @return {?NodeList}
1119
* @private
1120
*/
1121
goog.html.sanitizer.HtmlSanitizer.getChildNodes_ = function(node) {
1122
var descriptor =
1123
goog.html.sanitizer.HTML_SANITIZER_PROPERTY_DESCRIPTORS_['childNodes'];
1124
if (goog.dom.isElement(node) && descriptor && descriptor.get) {
1125
return descriptor.get.apply(node);
1126
} else {
1127
return node.childNodes instanceof NodeList ? node.childNodes : null;
1128
}
1129
};
1130
1131
1132
/**
1133
* Parses the DOM tree of a given HTML string, then walks the tree. For each
1134
* element, it creates a new sanitized version, applies sanitized attributes,
1135
* and returns a SafeHtml object representing the sanitized tree.
1136
* @param {?string} unsanitizedHtml
1137
* @return {!goog.html.SafeHtml} Sanitized HTML
1138
* @final
1139
*/
1140
goog.html.sanitizer.HtmlSanitizer.prototype.sanitize = function(
1141
unsanitizedHtml) {
1142
var sanitizedParent = this.sanitizeToDomNode(unsanitizedHtml);
1143
var sanitizedString = new XMLSerializer().serializeToString(sanitizedParent);
1144
1145
// Remove the outer span added in sanitizeToDomNode. We could create an
1146
// element from it and then pull out the innerHTML, but this is more
1147
// performant.
1148
if (goog.string.startsWith(sanitizedString, '<span')) {
1149
if (goog.string.endsWith(sanitizedString, '</span>')) {
1150
sanitizedString = sanitizedString.slice(
1151
sanitizedString.indexOf('>') + 1, -1 * ('</span>'.length));
1152
} else if (goog.string.endsWith(sanitizedString, '/>')) {
1153
sanitizedString = '';
1154
}
1155
}
1156
1157
return goog.html.uncheckedconversions
1158
.safeHtmlFromStringKnownToSatisfyTypeContract(
1159
goog.string.Const.from('Output of HTML sanitizer'), sanitizedString);
1160
};
1161
1162
1163
/**
1164
* Parses the DOM tree of a given HTML string, then walks the tree. For each
1165
* element, it creates a new sanitized version, applies sanitized attributes,
1166
* and returns a span element containing the sanitized content.
1167
* @param {?string} unsanitizedHtml
1168
* @return {!HTMLSpanElement} Sanitized HTML
1169
* @final
1170
*/
1171
goog.html.sanitizer.HtmlSanitizer.prototype.sanitizeToDomNode = function(
1172
unsanitizedHtml) {
1173
var sanitizedParent =
1174
/** @type {!HTMLSpanElement} */ (document.createElement('span'));
1175
1176
if (!goog.html.sanitizer.HTML_SANITIZER_SUPPORTED_ || !unsanitizedHtml) {
1177
// TODO(danesh): IE9 or earlier versions don't provide an easy way to
1178
// parse HTML inertly. Handle in a way other than an empty span perhaps.
1179
return sanitizedParent;
1180
}
1181
1182
// Get the treeWalker initialized.
1183
try {
1184
var treeWalker =
1185
goog.html.sanitizer.HtmlSanitizer.getDomTreeWalker_(unsanitizedHtml);
1186
} catch (e) {
1187
return sanitizedParent;
1188
}
1189
1190
// Used in order to find the correct parent node in the sanitizedParent.
1191
var elementMap = {};
1192
// Used in order to give a unique identifier to each node for lookups.
1193
var elemNum = 0;
1194
// Used for iteration.
1195
var dirtyNode;
1196
while (dirtyNode = treeWalker.nextNode()) {
1197
elemNum++;
1198
1199
// Get a clean (sanitized) version of the dirty node.
1200
var cleanNode = this.sanitizeElement_(dirtyNode);
1201
if (cleanNode.nodeType != goog.dom.NodeType.TEXT) {
1202
this.sanitizeAttrs_(dirtyNode, cleanNode);
1203
elementMap[elemNum] = cleanNode;
1204
goog.html.sanitizer.HtmlSanitizer.setAttribute_(
1205
dirtyNode, goog.html.sanitizer.HTML_SANITIZER_BOOKKEEPING_ATTR_NAME_,
1206
String(elemNum));
1207
}
1208
1209
// TODO(pelizzi): [IMPROVEMENT] type-checking against clobbering (e.g.
1210
// ClobberedNode wrapper). Closure can unwrap these at compile time, see
1211
// ClosureOptimizePrimitives.java, jakubvrana has created one for
1212
// goog.dom.Tag. Alternatively, create two actual wrappers that expose
1213
// clobber-safe functions, getters and setters for Node and Element.
1214
1215
// TODO(pelizzi): [IMPROVEMENT] consider switching from elementMap[elemNum]
1216
// to a WeakMap for browsers that support it (e.g. use a ElementWeakMap that
1217
// falls back to using data attributes).
1218
// @type {ElementWeakMap<ClobberedNode, Node>}
1219
1220
// TODO(pelizzi): [IMPROVEMENT] add an API to sanitize *from* DOM nodes so
1221
// that we don't have to use innerHTML on template recursion but instead we
1222
// can use importNode. The API could also be public as it is still a way to
1223
// make a document fragment conform to a policy, somewhat useful.
1224
1225
// Template tag contents require special handling as they are not traversed
1226
// by the treewalker.
1227
var dirtyNodeName =
1228
goog.html.sanitizer.HtmlSanitizer.getNodeName_(dirtyNode);
1229
if (goog.html.sanitizer.HTML_SANITIZER_TEMPLATE_SUPPORTED &&
1230
dirtyNodeName.toLowerCase() === 'template' &&
1231
!cleanNode.hasAttribute(
1232
goog.html.sanitizer.HTML_SANITIZER_BLACKLISTED_TAG_)) {
1233
this.processTemplateContents_(dirtyNode, cleanNode);
1234
}
1235
1236
// Finds the parent to which cleanNode should be appended.
1237
var dirtyParent =
1238
goog.html.sanitizer.HtmlSanitizer.getParentNode_(dirtyNode);
1239
var isSanitizedParent = false;
1240
if (goog.isNull(dirtyParent)) {
1241
isSanitizedParent = true;
1242
} else if (
1243
goog.html.sanitizer.HtmlSanitizer.getNodeName_(dirtyParent)
1244
.toLowerCase() == 'body' ||
1245
dirtyParent.nodeType == goog.dom.NodeType.DOCUMENT_FRAGMENT) {
1246
var dirtyGrandParent =
1247
goog.html.sanitizer.HtmlSanitizer.getParentNode_(dirtyParent);
1248
// The following checks if target is an immediate child of the inert
1249
// parent template element
1250
if (dirtyParent.nodeType == goog.dom.NodeType.DOCUMENT_FRAGMENT &&
1251
goog.isNull(dirtyGrandParent)) {
1252
isSanitizedParent = true;
1253
} else if (
1254
goog.html.sanitizer.HtmlSanitizer.getNodeName_(dirtyParent)
1255
.toLowerCase() == 'body') {
1256
// The following checks if target is an immediate child of the inert
1257
// parent HtmlDocument
1258
var dirtyGrtGrandParent =
1259
goog.html.sanitizer.HtmlSanitizer.getParentNode_(dirtyGrandParent);
1260
if (goog.isNull(goog.html.sanitizer.HtmlSanitizer.getParentNode_(
1261
dirtyGrtGrandParent))) {
1262
isSanitizedParent = true;
1263
}
1264
}
1265
}
1266
var target;
1267
if (isSanitizedParent || !dirtyParent) {
1268
target = sanitizedParent;
1269
} else {
1270
target = elementMap[goog.html.sanitizer.HtmlSanitizer.getAttribute_(
1271
dirtyParent,
1272
goog.html.sanitizer.HTML_SANITIZER_BOOKKEEPING_ATTR_NAME_)];
1273
}
1274
if (target.content) {
1275
target = target.content;
1276
}
1277
// Do not attach blacklisted tags that have been sanitized into templates.
1278
if (!goog.dom.isElement(cleanNode) ||
1279
!cleanNode.hasAttribute(
1280
goog.html.sanitizer.HTML_SANITIZER_BLACKLISTED_TAG_)) {
1281
target.appendChild(cleanNode);
1282
}
1283
}
1284
1285
return sanitizedParent;
1286
};
1287
1288
1289
/**
1290
* Returns a sanitized version of an element, with no children or user-provided
1291
* attributes.
1292
* @param {!Node} dirtyNode
1293
* @return {!Node}
1294
* @private
1295
*/
1296
goog.html.sanitizer.HtmlSanitizer.prototype.sanitizeElement_ = function(
1297
dirtyNode) {
1298
// Text nodes don't need to be sanitized.
1299
if (dirtyNode.nodeType == goog.dom.NodeType.TEXT) {
1300
return document.createTextNode(dirtyNode.data);
1301
}
1302
// Non text nodes get an empty node based on black/white lists.
1303
var elemName =
1304
goog.html.sanitizer.HtmlSanitizer.getNodeName_(dirtyNode).toUpperCase();
1305
var sanitized = false;
1306
var blacklisted = false;
1307
var cleanElemName;
1308
if (elemName in goog.html.sanitizer.TagBlacklist ||
1309
elemName in this.tagBlacklist_) {
1310
// If it's in the inert blacklist, replace with template (and then add a
1311
// special data attribute to distinguish it from real template tags).
1312
// Note that this node will not be added to the final output, i.e. the
1313
// template tag is only an internal representation, and eventually will be
1314
// deleted.
1315
cleanElemName = 'template';
1316
blacklisted = true;
1317
} else if (this.tagWhitelist_[elemName]) {
1318
// If it's in the whitelist, keep as is.
1319
cleanElemName = elemName;
1320
} else {
1321
// If it's not in any list, replace with span. If the relevant builder
1322
// option is enabled, they will bear the original tag name in a data
1323
// attribute.
1324
cleanElemName = 'span';
1325
sanitized = true;
1326
}
1327
var cleanElem = document.createElement(cleanElemName);
1328
if (this.shouldAddOriginalTagNames_ && sanitized) {
1329
goog.html.sanitizer.HtmlSanitizer.setAttribute_(
1330
cleanElem, goog.html.sanitizer.HTML_SANITIZER_SANITIZED_ATTR_NAME_,
1331
elemName.toLowerCase());
1332
}
1333
if (blacklisted) {
1334
goog.html.sanitizer.HtmlSanitizer.setAttribute_(
1335
cleanElem, goog.html.sanitizer.HTML_SANITIZER_BLACKLISTED_TAG_, '');
1336
}
1337
return cleanElem;
1338
};
1339
1340
1341
/**
1342
* Applies sanitized versions of attributes from a dirtyNode to a corresponding
1343
* cleanNode.
1344
* @param {!Node} dirtyNode
1345
* @param {!Node} cleanNode
1346
* @return {!Node} cleanNode with sanitized attributes
1347
* @private
1348
*/
1349
goog.html.sanitizer.HtmlSanitizer.prototype.sanitizeAttrs_ = function(
1350
dirtyNode, cleanNode) {
1351
var attributes = goog.html.sanitizer.HtmlSanitizer.getAttributes_(dirtyNode);
1352
if (attributes == null) {
1353
return cleanNode;
1354
}
1355
for (var i = 0, attribute; attribute = attributes[i]; i++) {
1356
if (attribute.specified) {
1357
var cleanValue = this.sanitizeAttribute_(dirtyNode, attribute);
1358
if (!goog.isNull(cleanValue)) {
1359
goog.html.sanitizer.HtmlSanitizer.setAttribute_(
1360
cleanNode, attribute.name, cleanValue);
1361
}
1362
}
1363
}
1364
return cleanNode;
1365
};
1366
1367
1368
/**
1369
* Sanitizes an attribute value by looking up an attribute handler for the given
1370
* node and attribute names.
1371
* @param {!Node} dirtyNode
1372
* @param {!Attr} attribute
1373
* @return {?string} sanitizedAttrValue
1374
* @private
1375
*/
1376
goog.html.sanitizer.HtmlSanitizer.prototype.sanitizeAttribute_ = function(
1377
dirtyNode, attribute) {
1378
var attributeName = attribute.name;
1379
if (goog.string.startsWith(
1380
goog.html.sanitizer.HTML_SANITIZER_BOOKKEEPING_PREFIX_,
1381
attributeName)) {
1382
return null;
1383
}
1384
1385
var nodeName = goog.html.sanitizer.HtmlSanitizer.getNodeName_(dirtyNode);
1386
var unsanitizedAttrValue = attribute.value;
1387
1388
// Create policy hints object
1389
var policyHints = {
1390
tagName: goog.string.trim(nodeName).toLowerCase(),
1391
attributeName: goog.string.trim(attributeName).toLowerCase()
1392
};
1393
var policyContext = goog.html.sanitizer.HtmlSanitizer.getContext_(
1394
policyHints.attributeName, dirtyNode);
1395
1396
// Prefer attribute handler for this specific tag.
1397
var tagHandlerIndex = goog.html.sanitizer.HtmlSanitizer.attrIdentifier_(
1398
nodeName, attributeName);
1399
if (tagHandlerIndex in this.attributeHandlers_) {
1400
var handler = this.attributeHandlers_[tagHandlerIndex];
1401
return handler(unsanitizedAttrValue, policyHints, policyContext);
1402
}
1403
// Fall back on attribute handler for wildcard tag.
1404
var genericHandlerIndex =
1405
goog.html.sanitizer.HtmlSanitizer.attrIdentifier_(null, attributeName);
1406
if (genericHandlerIndex in this.attributeHandlers_) {
1407
var handler = this.attributeHandlers_[genericHandlerIndex];
1408
return handler(unsanitizedAttrValue, policyHints, policyContext);
1409
}
1410
return null;
1411
};
1412
1413
1414
/**
1415
* Processes the contents of a template tag. These are not traversed through the
1416
* treewalker because they belong to a separate document, and thus require
1417
* special handling.
1418
* <p>
1419
* If the relevant builder option is enabled and the template tag is allowed,
1420
* this method copies the contents over to the output DOM tree without
1421
* sanitization, otherwise the template contents are sanitized recursively.
1422
* @param {!Node} dirtyNode
1423
* @param {!Node} cleanNode
1424
* @private
1425
*/
1426
goog.html.sanitizer.HtmlSanitizer.prototype.processTemplateContents_ = function(
1427
dirtyNode, cleanNode) {
1428
// If the template element was sanitized into a span tag, do not insert
1429
// unsanitized tags!
1430
if (this.shouldSanitizeTemplateContents_ ||
1431
cleanNode.nodeName.toLowerCase() !== 'template') {
1432
var dirtyNodeHTML =
1433
goog.html.sanitizer.HtmlSanitizer.getInnerHTML_(dirtyNode);
1434
var templateSpan = this.sanitizeToDomNode(dirtyNodeHTML);
1435
// appendChild with a forEach instead of an innertHTML as the latter is
1436
// slower.
1437
goog.array.forEach(templateSpan.childNodes, function(node) {
1438
cleanNode.appendChild(node);
1439
});
1440
} else {
1441
var templateDoc =
1442
/** @type {!HTMLTemplateElement} */ (cleanNode).content.ownerDocument;
1443
var dirtyCopy =
1444
goog.asserts.assert(templateDoc.importNode(dirtyNode, true));
1445
var dirtyCopyChildren =
1446
goog.html.sanitizer.HtmlSanitizer.getChildNodes_(dirtyCopy);
1447
// appendChild with a forEach instead of an innerHTML as the latter is
1448
// slower and vulnerable to mXSS.
1449
goog.array.forEach(dirtyCopyChildren, function(node) {
1450
cleanNode.appendChild(node);
1451
});
1452
}
1453
};
1454
1455
1456
/**
1457
* Retrieves a HtmlSanitizerPolicyContext from a dirty node given an attribute
1458
* name.
1459
* @param {string} attributeName
1460
* @param {!Node} dirtyNode
1461
* @return {!goog.html.sanitizer.HtmlSanitizerPolicyContext}
1462
* @private
1463
*/
1464
goog.html.sanitizer.HtmlSanitizer.getContext_ = function(
1465
attributeName, dirtyNode) {
1466
var policyContext = {cssStyle: undefined};
1467
if (attributeName == 'style') {
1468
policyContext.cssStyle =
1469
goog.html.sanitizer.HtmlSanitizer.getStyle_(dirtyNode);
1470
}
1471
return policyContext;
1472
};
1473
1474
1475
/**
1476
* Sanitizes a HTML string using a sanitizer with default options.
1477
* @param {string} unsanitizedHtml
1478
* @return {!goog.html.SafeHtml} sanitizedHtml
1479
*/
1480
goog.html.sanitizer.HtmlSanitizer.sanitize = function(unsanitizedHtml) {
1481
var sanitizer = new goog.html.sanitizer.HtmlSanitizer.Builder().build();
1482
return sanitizer.sanitize(unsanitizedHtml);
1483
};
1484
1485