Path: blob/trunk/third_party/closure/goog/html/utils.js
2868 views
// Copyright 2013 The Closure Library Authors. All Rights Reserved.1//2// Licensed under the Apache License, Version 2.0 (the "License");3// you may not use this file except in compliance with the License.4// You may obtain a copy of the License at5//6// http://www.apache.org/licenses/LICENSE-2.07//8// Unless required by applicable law or agreed to in writing, software9// distributed under the License is distributed on an "AS-IS" BASIS,10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.11// See the License for the specific language governing permissions and12// limitations under the License.131415/**16* @fileoverview HTML processing utilities for HTML in string form.17*/1819goog.provide('goog.html.utils');2021goog.require('goog.string');222324/**25* Extracts plain text from HTML.26*27* This behaves similarly to extracting textContent from a hypothetical DOM28* element containing the specified HTML. Block-level elements such as div are29* surrounded with whitespace, but inline elements are not. Span is treated as30* a block level element because it is often used as a container. Breaking31* spaces are compressed and trimmed.32*33* @param {string} value The input HTML to have tags removed.34* @return {string} The plain text of value without tags, HTML comments, or35* other non-text content. Does NOT return safe HTML!36*/37goog.html.utils.stripHtmlTags = function(value) {38// TODO(user): Make a version that extracts text attributes such as alt.39return goog.string.unescapeEntities(40goog.string.trim(41value42.replace(43goog.html.utils.HTML_TAG_REGEX_,44function(fullMatch, tagName) {45return goog.html.utils.INLINE_HTML_TAG_REGEX_.test(46tagName) ?47'' :48' ';49})50.replace(/[\t\n ]+/g, ' ')));51};525354/**55* Matches all tags that do not require extra space.56*57* @private @const58*/59goog.html.utils.INLINE_HTML_TAG_REGEX_ =60/^(?:abbr|acronym|address|b|em|i|small|strong|su[bp]|u)$/i;616263/**64* Matches all tags, HTML comments, and DOCTYPEs in tag soup HTML.65* By removing these, and replacing any '<' or '>' characters with66* entities we guarantee that the result can be embedded into67* an attribute without introducing a tag boundary.68*69* @private @const70*/71goog.html.utils.HTML_TAG_REGEX_ = /<[!\/]?([a-z0-9]+)([\/ ][^>]*)?>/gi;727374