Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
seleniumhq
GitHub Repository: seleniumhq/selenium
Path: blob/trunk/third_party/closure/goog/html/utils.js
2868 views
1
// Copyright 2013 The Closure Library Authors. All Rights Reserved.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
// http://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS-IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
15
16
/**
17
* @fileoverview HTML processing utilities for HTML in string form.
18
*/
19
20
goog.provide('goog.html.utils');
21
22
goog.require('goog.string');
23
24
25
/**
26
* Extracts plain text from HTML.
27
*
28
* This behaves similarly to extracting textContent from a hypothetical DOM
29
* element containing the specified HTML. Block-level elements such as div are
30
* surrounded with whitespace, but inline elements are not. Span is treated as
31
* a block level element because it is often used as a container. Breaking
32
* spaces are compressed and trimmed.
33
*
34
* @param {string} value The input HTML to have tags removed.
35
* @return {string} The plain text of value without tags, HTML comments, or
36
* other non-text content. Does NOT return safe HTML!
37
*/
38
goog.html.utils.stripHtmlTags = function(value) {
39
// TODO(user): Make a version that extracts text attributes such as alt.
40
return goog.string.unescapeEntities(
41
goog.string.trim(
42
value
43
.replace(
44
goog.html.utils.HTML_TAG_REGEX_,
45
function(fullMatch, tagName) {
46
return goog.html.utils.INLINE_HTML_TAG_REGEX_.test(
47
tagName) ?
48
'' :
49
' ';
50
})
51
.replace(/[\t\n ]+/g, ' ')));
52
};
53
54
55
/**
56
* Matches all tags that do not require extra space.
57
*
58
* @private @const
59
*/
60
goog.html.utils.INLINE_HTML_TAG_REGEX_ =
61
/^(?:abbr|acronym|address|b|em|i|small|strong|su[bp]|u)$/i;
62
63
64
/**
65
* Matches all tags, HTML comments, and DOCTYPEs in tag soup HTML.
66
* By removing these, and replacing any '<' or '>' characters with
67
* entities we guarantee that the result can be embedded into
68
* an attribute without introducing a tag boundary.
69
*
70
* @private @const
71
*/
72
goog.html.utils.HTML_TAG_REGEX_ = /<[!\/]?([a-z0-9]+)([\/ ][^>]*)?>/gi;
73
74