Path: blob/trunk/third_party/closure/goog/dom/annotate.js
2868 views
// Copyright 2006 The Closure Library Authors. All Rights Reserved.1//2// Licensed under the Apache License, Version 2.0 (the "License");3// you may not use this file except in compliance with the License.4// You may obtain a copy of the License at5//6// http://www.apache.org/licenses/LICENSE-2.07//8// Unless required by applicable law or agreed to in writing, software9// distributed under the License is distributed on an "AS-IS" BASIS,10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.11// See the License for the specific language governing permissions and12// limitations under the License.1314/**15* @fileoverview Methods for annotating occurrences of query terms in text or16* in a DOM tree. Adapted from Gmail code.17*18*/1920goog.provide('goog.dom.annotate');21goog.provide('goog.dom.annotate.AnnotateFn');2223goog.require('goog.array');24goog.require('goog.asserts');25goog.require('goog.dom');26goog.require('goog.dom.NodeType');27goog.require('goog.dom.TagName');28goog.require('goog.dom.safe');29goog.require('goog.html.SafeHtml');30goog.require('goog.object');313233/**34* A function that takes:35* (1) the number of the term that is "hit",36* (2) the HTML (search term) to be annotated,37* and returns the annotated term as an HTML.38* @typedef {function(number, !goog.html.SafeHtml): !goog.html.SafeHtml}39*/40goog.dom.annotate.AnnotateFn;414243/**44* Calls {@code annotateFn} for each occurrence of a search term in text nodes45* under {@code node}. Returns the number of hits.46*47* @param {Node} node A DOM node.48* @param {Array<!Array<string|boolean>>} terms49* An array of [searchTerm, matchWholeWordOnly] tuples.50* The matchWholeWordOnly value is a per-term attribute because some terms51* may be CJK, while others are not. (For correctness, matchWholeWordOnly52* should always be false for CJK terms.).53* @param {goog.dom.annotate.AnnotateFn} annotateFn54* @param {*=} opt_ignoreCase Whether to ignore the case of the query55* terms when looking for matches.56* @param {Array<string>=} opt_classesToSkip Nodes with one of these CSS class57* names (and its descendants) will be skipped.58* @param {number=} opt_maxMs Number of milliseconds after which this function,59* if still annotating, should stop and return.60*61* @return {boolean} Whether any terms were annotated.62*/63goog.dom.annotate.annotateTerms = function(64node, terms, annotateFn, opt_ignoreCase, opt_classesToSkip, opt_maxMs) {65if (opt_ignoreCase) {66terms = goog.dom.annotate.lowercaseTerms_(terms);67}68var stopTime = opt_maxMs > 0 ? goog.now() + opt_maxMs : 0;6970return goog.dom.annotate.annotateTermsInNode_(71node, terms, annotateFn, opt_ignoreCase, opt_classesToSkip || [],72stopTime, 0);73};747576/**77* The maximum recursion depth allowed. Any DOM nodes deeper than this are78* ignored.79* @type {number}80* @private81*/82goog.dom.annotate.MAX_RECURSION_ = 200;838485/**86* The node types whose descendants should not be affected by annotation.87* @private {!Object<string, boolean>}88*/89goog.dom.annotate.NODES_TO_SKIP_ = goog.object.createSet(90goog.dom.TagName.SCRIPT, goog.dom.TagName.STYLE, goog.dom.TagName.TEXTAREA);919293/**94* Recursive helper function.95*96* @param {Node} node A DOM node.97* @param {Array<!Array<string|boolean>>} terms98* An array of [searchTerm, matchWholeWordOnly] tuples.99* The matchWholeWordOnly value is a per-term attribute because some terms100* may be CJK, while others are not. (For correctness, matchWholeWordOnly101* should always be false for CJK terms.).102* @param {goog.dom.annotate.AnnotateFn} annotateFn103* @param {*} ignoreCase Whether to ignore the case of the query terms104* when looking for matches.105* @param {Array<string>} classesToSkip Nodes with one of these CSS class106* names will be skipped (as will their descendants).107* @param {number} stopTime Deadline for annotation operation (ignored if 0).108* @param {number} recursionLevel How deep this recursive call is; pass the109* value 0 in the initial call.110* @return {boolean} Whether any terms were annotated.111* @private112*/113goog.dom.annotate.annotateTermsInNode_ = function(114node, terms, annotateFn, ignoreCase, classesToSkip, stopTime,115recursionLevel) {116if ((stopTime > 0 && goog.now() >= stopTime) ||117recursionLevel > goog.dom.annotate.MAX_RECURSION_) {118return false;119}120121var annotated = false;122123if (node.nodeType == goog.dom.NodeType.TEXT) {124var html = goog.dom.annotate.helpAnnotateText_(125node.nodeValue, terms, annotateFn, ignoreCase);126if (html != null) {127// Replace the text with the annotated html. First we put the html into128// a temporary node, to get its DOM structure. To avoid adding a wrapper129// element as a side effect, we'll only actually use the temporary node's130// children.131var tempNode =132goog.dom.getDomHelper(node).createElement(goog.dom.TagName.SPAN);133goog.dom.safe.setInnerHtml(tempNode, html);134135var parentNode = node.parentNode;136var nodeToInsert;137while ((nodeToInsert = tempNode.firstChild) != null) {138// Each parentNode.insertBefore call removes the inserted node from139// tempNode's list of children.140parentNode.insertBefore(nodeToInsert, node);141}142143parentNode.removeChild(node);144annotated = true;145}146} else if (147node.hasChildNodes() &&148!goog.dom.annotate149.NODES_TO_SKIP_[/** @type {!Element} */ (node).tagName]) {150var classes = /** @type {!Element} */ (node).className.split(/\s+/);151var skip = goog.array.some(classes, function(className) {152return goog.array.contains(classesToSkip, className);153});154155if (!skip) {156++recursionLevel;157var curNode = node.firstChild;158while (curNode) {159var nextNode = curNode.nextSibling;160var curNodeAnnotated = goog.dom.annotate.annotateTermsInNode_(161curNode, terms, annotateFn, ignoreCase, classesToSkip, stopTime,162recursionLevel);163annotated = annotated || curNodeAnnotated;164curNode = nextNode;165}166}167}168169return annotated;170};171172173/**174* Regular expression that matches non-word characters.175*176* Performance note: Testing a one-character string using this regex is as fast177* as the equivalent string test ("a-zA-Z0-9_".indexOf(c) < 0), give or take a178* few percent. (The regex is about 5% faster in IE 6 and about 4% slower in179* Firefox 1.5.) If performance becomes critical, it may be better to convert180* the character to a numerical char code and check whether it falls in the181* word character ranges. A quick test suggests that could be 33% faster.182*183* @type {RegExp}184* @private185*/186goog.dom.annotate.NONWORD_RE_ = /\W/;187188189/**190* Annotates occurrences of query terms in plain text. This process consists of191* identifying all occurrences of all query terms, calling a provided function192* to get the appropriate replacement HTML for each occurrence, and193* HTML-escaping all the text.194*195* @param {string} text The plain text to be searched.196* @param {Array<Array<?>>} terms An array of197* [{string} searchTerm, {boolean} matchWholeWordOnly] tuples.198* The matchWholeWordOnly value is a per-term attribute because some terms199* may be CJK, while others are not. (For correctness, matchWholeWordOnly200* should always be false for CJK terms.).201* @param {goog.dom.annotate.AnnotateFn} annotateFn202* @param {*=} opt_ignoreCase Whether to ignore the case of the query203* terms when looking for matches.204* @return {goog.html.SafeHtml} The HTML equivalent of {@code text} with terms205* annotated, or null if the text did not contain any of the terms.206*/207goog.dom.annotate.annotateText = function(208text, terms, annotateFn, opt_ignoreCase) {209if (opt_ignoreCase) {210terms = goog.dom.annotate.lowercaseTerms_(terms);211}212return goog.dom.annotate.helpAnnotateText_(213text, terms, annotateFn, opt_ignoreCase);214};215216217/**218* Annotates occurrences of query terms in plain text. This process consists of219* identifying all occurrences of all query terms, calling a provided function220* to get the appropriate replacement HTML for each occurrence, and221* HTML-escaping all the text.222*223* @param {string} text The plain text to be searched.224* @param {Array<Array<?>>} terms An array of225* [{string} searchTerm, {boolean} matchWholeWordOnly] tuples.226* If {@code ignoreCase} is true, each search term must already be lowercase.227* The matchWholeWordOnly value is a per-term attribute because some terms228* may be CJK, while others are not. (For correctness, matchWholeWordOnly229* should always be false for CJK terms.).230* @param {goog.dom.annotate.AnnotateFn} annotateFn231* @param {*} ignoreCase Whether to ignore the case of the query terms232* when looking for matches.233* @return {goog.html.SafeHtml} The HTML equivalent of {@code text} with terms234* annotated, or null if the text did not contain any of the terms.235* @private236*/237goog.dom.annotate.helpAnnotateText_ = function(238text, terms, annotateFn, ignoreCase) {239var hit = false;240var textToSearch = ignoreCase ? text.toLowerCase() : text;241var textLen = textToSearch.length;242var numTerms = terms.length;243244// Each element will be an array of hit positions for the term.245var termHits = new Array(numTerms);246247// First collect all the hits into allHits.248for (var i = 0; i < numTerms; i++) {249var term = terms[i];250var hits = [];251var termText = term[0];252if (termText != '') {253var matchWholeWordOnly = term[1];254var termLen = termText.length;255var pos = 0;256// Find each hit for term t and append to termHits.257while (pos < textLen) {258var hitPos = textToSearch.indexOf(termText, pos);259if (hitPos == -1) {260break;261} else {262var prevCharPos = hitPos - 1;263var nextCharPos = hitPos + termLen;264if (!matchWholeWordOnly ||265((prevCharPos < 0 ||266goog.dom.annotate.NONWORD_RE_.test(267textToSearch.charAt(prevCharPos))) &&268(nextCharPos >= textLen ||269goog.dom.annotate.NONWORD_RE_.test(270textToSearch.charAt(nextCharPos))))) {271hits.push(hitPos);272hit = true;273}274pos = hitPos + termLen;275}276}277}278termHits[i] = hits;279}280281if (hit) {282var html = [];283var pos = 0;284285while (true) {286// First determine which of the n terms is the next hit.287var termIndexOfNextHit;288var posOfNextHit = -1;289290for (var i = 0; i < numTerms; i++) {291var hits = termHits[i];292// pull off the position of the next hit of term t293// (it's always the first in the array because we're shifting294// hits off the front of the array as we process them)295// this is the next candidate to consider for the next overall hit296if (!goog.array.isEmpty(hits)) {297var hitPos = hits[0];298299// Discard any hits embedded in the previous hit.300while (hitPos >= 0 && hitPos < pos) {301hits.shift();302hitPos = goog.array.isEmpty(hits) ? -1 : hits[0];303}304305if (hitPos >= 0 && (posOfNextHit < 0 || hitPos < posOfNextHit)) {306termIndexOfNextHit = i;307posOfNextHit = hitPos;308}309}310}311312// Quit if there are no more hits.313if (posOfNextHit < 0) break;314goog.asserts.assertNumber(termIndexOfNextHit);315316// Remove the next hit from our hit list.317termHits[termIndexOfNextHit].shift();318319// Append everything from the end of the last hit up to this one.320html.push(text.substr(pos, posOfNextHit - pos));321322// Append the annotated term.323var termLen = terms[termIndexOfNextHit][0].length;324var termHtml =325goog.html.SafeHtml.htmlEscape(text.substr(posOfNextHit, termLen));326html.push(327annotateFn(goog.asserts.assertNumber(termIndexOfNextHit), termHtml));328329pos = posOfNextHit + termLen;330}331332// Append everything after the last hit.333html.push(text.substr(pos));334return goog.html.SafeHtml.concat(html);335} else {336return null;337}338};339340341/**342* Converts terms to lowercase.343*344* @param {Array<Array<?>>} terms An array of345* [{string} searchTerm, {boolean} matchWholeWordOnly] tuples.346* @return {!Array<Array<?>>} An array of347* [{string} searchTerm, {boolean} matchWholeWordOnly] tuples.348* @private349*/350goog.dom.annotate.lowercaseTerms_ = function(terms) {351var lowercaseTerms = [];352for (var i = 0; i < terms.length; ++i) {353var term = terms[i];354lowercaseTerms[i] = [term[0].toLowerCase(), term[1]];355}356return lowercaseTerms;357};358359360