CoCalc -- htmlprettyprinter.js

GitHub Repository: seleniumhq/selenium
Path: blob/trunk/third_party/closure/goog/format/htmlprettyprinter.js
²⁸⁶⁸ views
1
// Copyright 2008 The Closure Library Authors. All Rights Reserved.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//      http://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS-IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14

15
/**
16
 * @fileoverview Provides functions to parse and pretty-print HTML strings.
17
 *
18
 */
19

20
goog.provide('goog.format.HtmlPrettyPrinter');
21
goog.provide('goog.format.HtmlPrettyPrinter.Buffer');
22

23
goog.require('goog.dom.TagName');
24
goog.require('goog.object');
25
goog.require('goog.string.StringBuffer');
26

27

28

29
/**
30
 * This class formats HTML to be more human-readable.
31
 * TODO(user): Add hierarchical indentation.
32
 * @param {number=} opt_timeOutMillis Max # milliseconds to spend on #format. If
33
 *     this time is exceeded, return partially formatted. 0 or negative number
34
 *     indicates no timeout.
35
 * @constructor
36
 * @final
37
 */
38
goog.format.HtmlPrettyPrinter = function(opt_timeOutMillis) {
39
  /**
40
   * Max # milliseconds to spend on #format.
41
   * @type {number}
42
   * @private
43
   */
44
  this.timeOutMillis_ =
45
      opt_timeOutMillis && opt_timeOutMillis > 0 ? opt_timeOutMillis : 0;
46
};
47

48

49
/**
50
 * Singleton.
51
 * @private {goog.format.HtmlPrettyPrinter?}
52
 */
53
goog.format.HtmlPrettyPrinter.instance_ = null;
54

55

56
/**
57
 * Singleton lazy initializer.
58
 * @return {!goog.format.HtmlPrettyPrinter} Singleton.
59
 * @private
60
 */
61
goog.format.HtmlPrettyPrinter.getInstance_ = function() {
62
  if (!goog.format.HtmlPrettyPrinter.instance_) {
63
    goog.format.HtmlPrettyPrinter.instance_ =
64
        new goog.format.HtmlPrettyPrinter();
65
  }
66
  return goog.format.HtmlPrettyPrinter.instance_;
67
};
68

69

70
/**
71
 * Static utility function. See prototype #format.
72
 * @param {string} html The HTML text to pretty print.
73
 * @return {string} Formatted result.
74
 */
75
goog.format.HtmlPrettyPrinter.format = function(html) {
76
  return goog.format.HtmlPrettyPrinter.getInstance_().format(html);
77
};
78

79

80
/**
81
 * List of patterns used to tokenize HTML for pretty printing. Cache
82
 * subexpression for tag name.
83
 * comment|meta-tag|tag|text|other-less-than-characters
84
 * @private {!RegExp}
85
 * @const
86
 */
87
goog.format.HtmlPrettyPrinter.TOKEN_REGEX_ =
88
    /(?:<!--.*?-->|<!.*?>|<(\/?)(\w+)[^<>]*>|[^<]+|<)/g;
89

90

91
/**
92
 * Tags whose contents we don't want pretty printed.
93
 * @private {!Object}
94
 * @const
95
 */
96
goog.format.HtmlPrettyPrinter.NON_PRETTY_PRINTED_TAGS_ = goog.object.createSet(
97
    goog.dom.TagName.SCRIPT, goog.dom.TagName.STYLE, goog.dom.TagName.PRE,
98
    'XMP');
99

100

101
/**
102
 * 'Block' tags. We should add newlines before and after these tags during
103
 * pretty printing. Tags drawn mostly from HTML4 definitions for block and other
104
 * non-online tags, excepting the ones in
105
 * #goog.format.HtmlPrettyPrinter.NON_PRETTY_PRINTED_TAGS_.
106
 * @private {!Object}
107
 * @const
108
 */
109
goog.format.HtmlPrettyPrinter.BLOCK_TAGS_ = goog.object.createSet(
110
    goog.dom.TagName.ADDRESS, goog.dom.TagName.APPLET, goog.dom.TagName.AREA,
111
    goog.dom.TagName.BASE, goog.dom.TagName.BASEFONT,
112
    goog.dom.TagName.BLOCKQUOTE, goog.dom.TagName.BODY,
113
    goog.dom.TagName.CAPTION, goog.dom.TagName.CENTER, goog.dom.TagName.COL,
114
    goog.dom.TagName.COLGROUP, goog.dom.TagName.DIR, goog.dom.TagName.DIV,
115
    goog.dom.TagName.DL, goog.dom.TagName.FIELDSET, goog.dom.TagName.FORM,
116
    goog.dom.TagName.FRAME, goog.dom.TagName.FRAMESET, goog.dom.TagName.H1,
117
    goog.dom.TagName.H2, goog.dom.TagName.H3, goog.dom.TagName.H4,
118
    goog.dom.TagName.H5, goog.dom.TagName.H6, goog.dom.TagName.HEAD,
119
    goog.dom.TagName.HR, goog.dom.TagName.HTML, goog.dom.TagName.IFRAME,
120
    goog.dom.TagName.ISINDEX, goog.dom.TagName.LEGEND, goog.dom.TagName.LINK,
121
    goog.dom.TagName.MENU, goog.dom.TagName.META, goog.dom.TagName.NOFRAMES,
122
    goog.dom.TagName.NOSCRIPT, goog.dom.TagName.OL, goog.dom.TagName.OPTGROUP,
123
    goog.dom.TagName.OPTION, goog.dom.TagName.P, goog.dom.TagName.PARAM,
124
    goog.dom.TagName.TABLE, goog.dom.TagName.TBODY, goog.dom.TagName.TD,
125
    goog.dom.TagName.TFOOT, goog.dom.TagName.TH, goog.dom.TagName.THEAD,
126
    goog.dom.TagName.TITLE, goog.dom.TagName.TR, goog.dom.TagName.UL);
127

128

129
/**
130
 * Non-block tags that break flow. We insert a line break after, but not before
131
 * these. Tags drawn from HTML4 definitions.
132
 * @private {!Object}
133
 * @const
134
 */
135
goog.format.HtmlPrettyPrinter.BREAKS_FLOW_TAGS_ = goog.object.createSet(
136
    goog.dom.TagName.BR, goog.dom.TagName.DD, goog.dom.TagName.DT,
137
    goog.dom.TagName.LI, goog.dom.TagName.NOFRAMES);
138

139

140
/**
141
 * Empty tags. These are treated as both start and end tags.
142
 * @private {!Object}
143
 * @const
144
 */
145
goog.format.HtmlPrettyPrinter.EMPTY_TAGS_ = goog.object.createSet(
146
    goog.dom.TagName.BR, goog.dom.TagName.HR, goog.dom.TagName.ISINDEX);
147

148

149
/**
150
 * Breaks up HTML so it's easily readable by the user.
151
 * @param {string} html The HTML text to pretty print.
152
 * @return {string} Formatted result.
153
 * @throws {Error} Regex error, data loss, or endless loop detected.
154
 */
155
goog.format.HtmlPrettyPrinter.prototype.format = function(html) {
156
  // Trim leading whitespace, but preserve first indent; in other words, keep
157
  // any spaces immediately before the first non-whitespace character (that's
158
  // what $1 is), but remove all other leading whitespace. This adjustment
159
  // historically had been made in Docs. The motivation is that some
160
  // browsers prepend several line breaks in designMode.
161
  html = html.replace(/^\s*?( *\S)/, '$1');
162

163
  // Trim trailing whitespace.
164
  html = html.replace(/\s+$/, '');
165

166
  // Keep track of how much time we've used.
167
  var timeOutMillis = this.timeOutMillis_;
168
  var startMillis = timeOutMillis ? goog.now() : 0;
169

170
  // Handles concatenation of the result and required line breaks.
171
  var buffer = new goog.format.HtmlPrettyPrinter.Buffer();
172

173
  // Declare these for efficiency since we access them in a loop.
174
  var tokenRegex = goog.format.HtmlPrettyPrinter.TOKEN_REGEX_;
175
  var nonPpTags = goog.format.HtmlPrettyPrinter.NON_PRETTY_PRINTED_TAGS_;
176
  var blockTags = goog.format.HtmlPrettyPrinter.BLOCK_TAGS_;
177
  var breaksFlowTags = goog.format.HtmlPrettyPrinter.BREAKS_FLOW_TAGS_;
178
  var emptyTags = goog.format.HtmlPrettyPrinter.EMPTY_TAGS_;
179

180
  // Used to verify we're making progress through our regex tokenization.
181
  var lastIndex = 0;
182

183
  // Use this to track non-pretty-printed tags and children.
184
  var nonPpTagStack = [];
185

186
  // Loop through each matched token.
187
  var match;
188
  while (match = tokenRegex.exec(html)) {
189
    // Get token.
190
    var token = match[0];
191

192
    // Is this token a tag? match.length == 3 for tags, 1 for all others.
193
    if (match.length == 3) {
194
      var tagName = match[2];
195
      if (tagName) {
196
        tagName = tagName.toUpperCase();
197
      }
198

199
      // Non-pretty-printed tags?
200
      if (nonPpTags.hasOwnProperty(tagName)) {
201
        // End tag?
202
        if (match[1] == '/') {
203
          // Do we have a matching start tag?
204
          var stackSize = nonPpTagStack.length;
205
          var startTagName = stackSize ? nonPpTagStack[stackSize - 1] : null;
206
          if (startTagName == tagName) {
207
            // End of non-pretty-printed block. Line break after.
208
            nonPpTagStack.pop();
209
            buffer.pushToken(false, token, !nonPpTagStack.length);
210
          } else {
211
            // Malformed HTML. No line breaks.
212
            buffer.pushToken(false, token, false);
213
          }
214
        } else {
215
          // Start of non-pretty-printed block. Line break before.
216
          buffer.pushToken(!nonPpTagStack.length, token, false);
217
          nonPpTagStack.push(tagName);
218
        }
219
      } else if (nonPpTagStack.length) {
220
        // Inside non-pretty-printed block, no new line breaks.
221
        buffer.pushToken(false, token, false);
222
      } else if (blockTags.hasOwnProperty(tagName)) {
223
        // Put line break before start block and after end block tags.
224
        var isEmpty = emptyTags.hasOwnProperty(tagName);
225
        var isEndTag = match[1] == '/';
226
        buffer.pushToken(isEmpty || !isEndTag, token, isEmpty || isEndTag);
227
      } else if (breaksFlowTags.hasOwnProperty(tagName)) {
228
        var isEmpty = emptyTags.hasOwnProperty(tagName);
229
        var isEndTag = match[1] == '/';
230
        // Put line break after end flow-breaking tags.
231
        buffer.pushToken(false, token, isEndTag || isEmpty);
232
      } else {
233
        // All other tags, no line break.
234
        buffer.pushToken(false, token, false);
235
      }
236
    } else {
237
      // Non-tags, no line break.
238
      buffer.pushToken(false, token, false);
239
    }
240

241
    // Double check that we're making progress.
242
    var newLastIndex = tokenRegex.lastIndex;
243
    if (!token || newLastIndex <= lastIndex) {
244
      throw Error('Regex failed to make progress through source html.');
245
    }
246
    lastIndex = newLastIndex;
247

248
    // Out of time?
249
    if (timeOutMillis) {
250
      if (goog.now() - startMillis > timeOutMillis) {
251
        // Push unprocessed data as one big token and reset regex object.
252
        buffer.pushToken(false, html.substring(tokenRegex.lastIndex), false);
253
        tokenRegex.lastIndex = 0;
254
        break;
255
      }
256
    }
257
  }
258

259
  // Ensure we end in a line break.
260
  buffer.lineBreak();
261

262
  // Construct result string.
263
  var result = String(buffer);
264

265
  // Length should be original length plus # line breaks added.
266
  var expectedLength = html.length + buffer.breakCount;
267
  if (result.length != expectedLength) {
268
    throw Error('Lost data pretty printing html.');
269
  }
270

271
  return result;
272
};
273

274

275

276
/**
277
 * This class is a buffer to which we push our output. It tracks line breaks to
278
 * make sure we don't add unnecessary ones.
279
 * @constructor
280
 * @final
281
 */
282
goog.format.HtmlPrettyPrinter.Buffer = function() {
283
  /**
284
   * Tokens to be output in #toString.
285
   * @type {goog.string.StringBuffer}
286
   * @private
287
   */
288
  this.out_ = new goog.string.StringBuffer();
289
};
290

291

292
/**
293
 * Tracks number of line breaks added.
294
 * @type {number}
295
 */
296
goog.format.HtmlPrettyPrinter.Buffer.prototype.breakCount = 0;
297

298

299
/**
300
 * Tracks if we are at the start of a new line.
301
 * @type {boolean}
302
 * @private
303
 */
304
goog.format.HtmlPrettyPrinter.Buffer.prototype.isBeginningOfNewLine_ = true;
305

306

307
/**
308
 * Tracks if we need a new line before the next token.
309
 * @type {boolean}
310
 * @private
311
 */
312
goog.format.HtmlPrettyPrinter.Buffer.prototype.needsNewLine_ = false;
313

314

315
/**
316
 * Adds token and necessary line breaks to output buffer.
317
 * @param {boolean} breakBefore If true, add line break before token if
318
 *     necessary.
319
 * @param {string} token Token to push.
320
 * @param {boolean} breakAfter If true, add line break after token if
321
 *     necessary.
322
 */
323
goog.format.HtmlPrettyPrinter.Buffer.prototype.pushToken = function(
324
    breakBefore, token, breakAfter) {
325
  // If this token needs a preceding line break, and
326
  // we haven't already added a line break, and
327
  // this token does not start with a line break,
328
  // then add line break.
329
  // Due to FF3.0 bug with lists, we don't insert a /n
330
  // right before </ul>. See bug 1520665.
331
  if ((this.needsNewLine_ || breakBefore) && !/^\r?\n/.test(token) &&
332
      !/\/ul/i.test(token)) {
333
    this.lineBreak();
334
  }
335

336
  // Token.
337
  this.out_.append(token);
338

339
  // Remember if this string ended with a line break so we know we don't have to
340
  // insert another one before the next token.
341
  this.isBeginningOfNewLine_ = /\r?\n$/.test(token);
342

343
  // Remember if this token requires a line break after it. We don't insert it
344
  // here because we might not have to if the next token starts with a line
345
  // break.
346
  this.needsNewLine_ = breakAfter && !this.isBeginningOfNewLine_;
347
};
348

349

350
/**
351
 * Append line break if we need one.
352
 */
353
goog.format.HtmlPrettyPrinter.Buffer.prototype.lineBreak = function() {
354
  if (!this.isBeginningOfNewLine_) {
355
    this.out_.append('\n');
356
    ++this.breakCount;
357
  }
358
};
359

360

361
/**
362
 * @return {string} String representation of tokens.
363
 * @override
364
 */
365
goog.format.HtmlPrettyPrinter.Buffer.prototype.toString = function() {
366
  return this.out_.toString();
367
};
368

369
Product

Resources

Company