Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
seleniumhq
GitHub Repository: seleniumhq/selenium
Path: blob/trunk/third_party/closure/goog/format/htmlprettyprinter.js
2868 views
1
// Copyright 2008 The Closure Library Authors. All Rights Reserved.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
// http://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS-IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
15
/**
16
* @fileoverview Provides functions to parse and pretty-print HTML strings.
17
*
18
*/
19
20
goog.provide('goog.format.HtmlPrettyPrinter');
21
goog.provide('goog.format.HtmlPrettyPrinter.Buffer');
22
23
goog.require('goog.dom.TagName');
24
goog.require('goog.object');
25
goog.require('goog.string.StringBuffer');
26
27
28
29
/**
30
* This class formats HTML to be more human-readable.
31
* TODO(user): Add hierarchical indentation.
32
* @param {number=} opt_timeOutMillis Max # milliseconds to spend on #format. If
33
* this time is exceeded, return partially formatted. 0 or negative number
34
* indicates no timeout.
35
* @constructor
36
* @final
37
*/
38
goog.format.HtmlPrettyPrinter = function(opt_timeOutMillis) {
39
/**
40
* Max # milliseconds to spend on #format.
41
* @type {number}
42
* @private
43
*/
44
this.timeOutMillis_ =
45
opt_timeOutMillis && opt_timeOutMillis > 0 ? opt_timeOutMillis : 0;
46
};
47
48
49
/**
50
* Singleton.
51
* @private {goog.format.HtmlPrettyPrinter?}
52
*/
53
goog.format.HtmlPrettyPrinter.instance_ = null;
54
55
56
/**
57
* Singleton lazy initializer.
58
* @return {!goog.format.HtmlPrettyPrinter} Singleton.
59
* @private
60
*/
61
goog.format.HtmlPrettyPrinter.getInstance_ = function() {
62
if (!goog.format.HtmlPrettyPrinter.instance_) {
63
goog.format.HtmlPrettyPrinter.instance_ =
64
new goog.format.HtmlPrettyPrinter();
65
}
66
return goog.format.HtmlPrettyPrinter.instance_;
67
};
68
69
70
/**
71
* Static utility function. See prototype #format.
72
* @param {string} html The HTML text to pretty print.
73
* @return {string} Formatted result.
74
*/
75
goog.format.HtmlPrettyPrinter.format = function(html) {
76
return goog.format.HtmlPrettyPrinter.getInstance_().format(html);
77
};
78
79
80
/**
81
* List of patterns used to tokenize HTML for pretty printing. Cache
82
* subexpression for tag name.
83
* comment|meta-tag|tag|text|other-less-than-characters
84
* @private {!RegExp}
85
* @const
86
*/
87
goog.format.HtmlPrettyPrinter.TOKEN_REGEX_ =
88
/(?:<!--.*?-->|<!.*?>|<(\/?)(\w+)[^<>]*>|[^<]+|<)/g;
89
90
91
/**
92
* Tags whose contents we don't want pretty printed.
93
* @private {!Object}
94
* @const
95
*/
96
goog.format.HtmlPrettyPrinter.NON_PRETTY_PRINTED_TAGS_ = goog.object.createSet(
97
goog.dom.TagName.SCRIPT, goog.dom.TagName.STYLE, goog.dom.TagName.PRE,
98
'XMP');
99
100
101
/**
102
* 'Block' tags. We should add newlines before and after these tags during
103
* pretty printing. Tags drawn mostly from HTML4 definitions for block and other
104
* non-online tags, excepting the ones in
105
* #goog.format.HtmlPrettyPrinter.NON_PRETTY_PRINTED_TAGS_.
106
* @private {!Object}
107
* @const
108
*/
109
goog.format.HtmlPrettyPrinter.BLOCK_TAGS_ = goog.object.createSet(
110
goog.dom.TagName.ADDRESS, goog.dom.TagName.APPLET, goog.dom.TagName.AREA,
111
goog.dom.TagName.BASE, goog.dom.TagName.BASEFONT,
112
goog.dom.TagName.BLOCKQUOTE, goog.dom.TagName.BODY,
113
goog.dom.TagName.CAPTION, goog.dom.TagName.CENTER, goog.dom.TagName.COL,
114
goog.dom.TagName.COLGROUP, goog.dom.TagName.DIR, goog.dom.TagName.DIV,
115
goog.dom.TagName.DL, goog.dom.TagName.FIELDSET, goog.dom.TagName.FORM,
116
goog.dom.TagName.FRAME, goog.dom.TagName.FRAMESET, goog.dom.TagName.H1,
117
goog.dom.TagName.H2, goog.dom.TagName.H3, goog.dom.TagName.H4,
118
goog.dom.TagName.H5, goog.dom.TagName.H6, goog.dom.TagName.HEAD,
119
goog.dom.TagName.HR, goog.dom.TagName.HTML, goog.dom.TagName.IFRAME,
120
goog.dom.TagName.ISINDEX, goog.dom.TagName.LEGEND, goog.dom.TagName.LINK,
121
goog.dom.TagName.MENU, goog.dom.TagName.META, goog.dom.TagName.NOFRAMES,
122
goog.dom.TagName.NOSCRIPT, goog.dom.TagName.OL, goog.dom.TagName.OPTGROUP,
123
goog.dom.TagName.OPTION, goog.dom.TagName.P, goog.dom.TagName.PARAM,
124
goog.dom.TagName.TABLE, goog.dom.TagName.TBODY, goog.dom.TagName.TD,
125
goog.dom.TagName.TFOOT, goog.dom.TagName.TH, goog.dom.TagName.THEAD,
126
goog.dom.TagName.TITLE, goog.dom.TagName.TR, goog.dom.TagName.UL);
127
128
129
/**
130
* Non-block tags that break flow. We insert a line break after, but not before
131
* these. Tags drawn from HTML4 definitions.
132
* @private {!Object}
133
* @const
134
*/
135
goog.format.HtmlPrettyPrinter.BREAKS_FLOW_TAGS_ = goog.object.createSet(
136
goog.dom.TagName.BR, goog.dom.TagName.DD, goog.dom.TagName.DT,
137
goog.dom.TagName.LI, goog.dom.TagName.NOFRAMES);
138
139
140
/**
141
* Empty tags. These are treated as both start and end tags.
142
* @private {!Object}
143
* @const
144
*/
145
goog.format.HtmlPrettyPrinter.EMPTY_TAGS_ = goog.object.createSet(
146
goog.dom.TagName.BR, goog.dom.TagName.HR, goog.dom.TagName.ISINDEX);
147
148
149
/**
150
* Breaks up HTML so it's easily readable by the user.
151
* @param {string} html The HTML text to pretty print.
152
* @return {string} Formatted result.
153
* @throws {Error} Regex error, data loss, or endless loop detected.
154
*/
155
goog.format.HtmlPrettyPrinter.prototype.format = function(html) {
156
// Trim leading whitespace, but preserve first indent; in other words, keep
157
// any spaces immediately before the first non-whitespace character (that's
158
// what $1 is), but remove all other leading whitespace. This adjustment
159
// historically had been made in Docs. The motivation is that some
160
// browsers prepend several line breaks in designMode.
161
html = html.replace(/^\s*?( *\S)/, '$1');
162
163
// Trim trailing whitespace.
164
html = html.replace(/\s+$/, '');
165
166
// Keep track of how much time we've used.
167
var timeOutMillis = this.timeOutMillis_;
168
var startMillis = timeOutMillis ? goog.now() : 0;
169
170
// Handles concatenation of the result and required line breaks.
171
var buffer = new goog.format.HtmlPrettyPrinter.Buffer();
172
173
// Declare these for efficiency since we access them in a loop.
174
var tokenRegex = goog.format.HtmlPrettyPrinter.TOKEN_REGEX_;
175
var nonPpTags = goog.format.HtmlPrettyPrinter.NON_PRETTY_PRINTED_TAGS_;
176
var blockTags = goog.format.HtmlPrettyPrinter.BLOCK_TAGS_;
177
var breaksFlowTags = goog.format.HtmlPrettyPrinter.BREAKS_FLOW_TAGS_;
178
var emptyTags = goog.format.HtmlPrettyPrinter.EMPTY_TAGS_;
179
180
// Used to verify we're making progress through our regex tokenization.
181
var lastIndex = 0;
182
183
// Use this to track non-pretty-printed tags and children.
184
var nonPpTagStack = [];
185
186
// Loop through each matched token.
187
var match;
188
while (match = tokenRegex.exec(html)) {
189
// Get token.
190
var token = match[0];
191
192
// Is this token a tag? match.length == 3 for tags, 1 for all others.
193
if (match.length == 3) {
194
var tagName = match[2];
195
if (tagName) {
196
tagName = tagName.toUpperCase();
197
}
198
199
// Non-pretty-printed tags?
200
if (nonPpTags.hasOwnProperty(tagName)) {
201
// End tag?
202
if (match[1] == '/') {
203
// Do we have a matching start tag?
204
var stackSize = nonPpTagStack.length;
205
var startTagName = stackSize ? nonPpTagStack[stackSize - 1] : null;
206
if (startTagName == tagName) {
207
// End of non-pretty-printed block. Line break after.
208
nonPpTagStack.pop();
209
buffer.pushToken(false, token, !nonPpTagStack.length);
210
} else {
211
// Malformed HTML. No line breaks.
212
buffer.pushToken(false, token, false);
213
}
214
} else {
215
// Start of non-pretty-printed block. Line break before.
216
buffer.pushToken(!nonPpTagStack.length, token, false);
217
nonPpTagStack.push(tagName);
218
}
219
} else if (nonPpTagStack.length) {
220
// Inside non-pretty-printed block, no new line breaks.
221
buffer.pushToken(false, token, false);
222
} else if (blockTags.hasOwnProperty(tagName)) {
223
// Put line break before start block and after end block tags.
224
var isEmpty = emptyTags.hasOwnProperty(tagName);
225
var isEndTag = match[1] == '/';
226
buffer.pushToken(isEmpty || !isEndTag, token, isEmpty || isEndTag);
227
} else if (breaksFlowTags.hasOwnProperty(tagName)) {
228
var isEmpty = emptyTags.hasOwnProperty(tagName);
229
var isEndTag = match[1] == '/';
230
// Put line break after end flow-breaking tags.
231
buffer.pushToken(false, token, isEndTag || isEmpty);
232
} else {
233
// All other tags, no line break.
234
buffer.pushToken(false, token, false);
235
}
236
} else {
237
// Non-tags, no line break.
238
buffer.pushToken(false, token, false);
239
}
240
241
// Double check that we're making progress.
242
var newLastIndex = tokenRegex.lastIndex;
243
if (!token || newLastIndex <= lastIndex) {
244
throw Error('Regex failed to make progress through source html.');
245
}
246
lastIndex = newLastIndex;
247
248
// Out of time?
249
if (timeOutMillis) {
250
if (goog.now() - startMillis > timeOutMillis) {
251
// Push unprocessed data as one big token and reset regex object.
252
buffer.pushToken(false, html.substring(tokenRegex.lastIndex), false);
253
tokenRegex.lastIndex = 0;
254
break;
255
}
256
}
257
}
258
259
// Ensure we end in a line break.
260
buffer.lineBreak();
261
262
// Construct result string.
263
var result = String(buffer);
264
265
// Length should be original length plus # line breaks added.
266
var expectedLength = html.length + buffer.breakCount;
267
if (result.length != expectedLength) {
268
throw Error('Lost data pretty printing html.');
269
}
270
271
return result;
272
};
273
274
275
276
/**
277
* This class is a buffer to which we push our output. It tracks line breaks to
278
* make sure we don't add unnecessary ones.
279
* @constructor
280
* @final
281
*/
282
goog.format.HtmlPrettyPrinter.Buffer = function() {
283
/**
284
* Tokens to be output in #toString.
285
* @type {goog.string.StringBuffer}
286
* @private
287
*/
288
this.out_ = new goog.string.StringBuffer();
289
};
290
291
292
/**
293
* Tracks number of line breaks added.
294
* @type {number}
295
*/
296
goog.format.HtmlPrettyPrinter.Buffer.prototype.breakCount = 0;
297
298
299
/**
300
* Tracks if we are at the start of a new line.
301
* @type {boolean}
302
* @private
303
*/
304
goog.format.HtmlPrettyPrinter.Buffer.prototype.isBeginningOfNewLine_ = true;
305
306
307
/**
308
* Tracks if we need a new line before the next token.
309
* @type {boolean}
310
* @private
311
*/
312
goog.format.HtmlPrettyPrinter.Buffer.prototype.needsNewLine_ = false;
313
314
315
/**
316
* Adds token and necessary line breaks to output buffer.
317
* @param {boolean} breakBefore If true, add line break before token if
318
* necessary.
319
* @param {string} token Token to push.
320
* @param {boolean} breakAfter If true, add line break after token if
321
* necessary.
322
*/
323
goog.format.HtmlPrettyPrinter.Buffer.prototype.pushToken = function(
324
breakBefore, token, breakAfter) {
325
// If this token needs a preceding line break, and
326
// we haven't already added a line break, and
327
// this token does not start with a line break,
328
// then add line break.
329
// Due to FF3.0 bug with lists, we don't insert a /n
330
// right before </ul>. See bug 1520665.
331
if ((this.needsNewLine_ || breakBefore) && !/^\r?\n/.test(token) &&
332
!/\/ul/i.test(token)) {
333
this.lineBreak();
334
}
335
336
// Token.
337
this.out_.append(token);
338
339
// Remember if this string ended with a line break so we know we don't have to
340
// insert another one before the next token.
341
this.isBeginningOfNewLine_ = /\r?\n$/.test(token);
342
343
// Remember if this token requires a line break after it. We don't insert it
344
// here because we might not have to if the next token starts with a line
345
// break.
346
this.needsNewLine_ = breakAfter && !this.isBeginningOfNewLine_;
347
};
348
349
350
/**
351
* Append line break if we need one.
352
*/
353
goog.format.HtmlPrettyPrinter.Buffer.prototype.lineBreak = function() {
354
if (!this.isBeginningOfNewLine_) {
355
this.out_.append('\n');
356
++this.breakCount;
357
}
358
};
359
360
361
/**
362
* @return {string} String representation of tokens.
363
* @override
364
*/
365
goog.format.HtmlPrettyPrinter.Buffer.prototype.toString = function() {
366
return this.out_.toString();
367
};
368
369