CoCalc -- annotate.js

GitHub Repository: seleniumhq/selenium
Path: blob/trunk/third_party/closure/goog/dom/annotate.js
²⁸⁶⁸ views
1
// Copyright 2006 The Closure Library Authors. All Rights Reserved.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//      http://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS-IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14

15
/**
16
 * @fileoverview Methods for annotating occurrences of query terms in text or
17
 *   in a DOM tree. Adapted from Gmail code.
18
 *
19
 */
20

21
goog.provide('goog.dom.annotate');
22
goog.provide('goog.dom.annotate.AnnotateFn');
23

24
goog.require('goog.array');
25
goog.require('goog.asserts');
26
goog.require('goog.dom');
27
goog.require('goog.dom.NodeType');
28
goog.require('goog.dom.TagName');
29
goog.require('goog.dom.safe');
30
goog.require('goog.html.SafeHtml');
31
goog.require('goog.object');
32

33

34
/**
35
 * A function that takes:
36
 *   (1) the number of the term that is "hit",
37
 *   (2) the HTML (search term) to be annotated,
38
 * and returns the annotated term as an HTML.
39
 * @typedef {function(number, !goog.html.SafeHtml): !goog.html.SafeHtml}
40
 */
41
goog.dom.annotate.AnnotateFn;
42

43

44
/**
45
 * Calls {@code annotateFn} for each occurrence of a search term in text nodes
46
 * under {@code node}. Returns the number of hits.
47
 *
48
 * @param {Node} node  A DOM node.
49
 * @param {Array<!Array<string|boolean>>} terms
50
 *   An array of [searchTerm, matchWholeWordOnly] tuples.
51
 *   The matchWholeWordOnly value is a per-term attribute because some terms
52
 *   may be CJK, while others are not. (For correctness, matchWholeWordOnly
53
 *   should always be false for CJK terms.).
54
 * @param {goog.dom.annotate.AnnotateFn} annotateFn
55
 * @param {*=} opt_ignoreCase  Whether to ignore the case of the query
56
 *   terms when looking for matches.
57
 * @param {Array<string>=} opt_classesToSkip  Nodes with one of these CSS class
58
 *   names (and its descendants) will be skipped.
59
 * @param {number=} opt_maxMs  Number of milliseconds after which this function,
60
 *   if still annotating, should stop and return.
61
 *
62
 * @return {boolean} Whether any terms were annotated.
63
 */
64
goog.dom.annotate.annotateTerms = function(
65
    node, terms, annotateFn, opt_ignoreCase, opt_classesToSkip, opt_maxMs) {
66
  if (opt_ignoreCase) {
67
    terms = goog.dom.annotate.lowercaseTerms_(terms);
68
  }
69
  var stopTime = opt_maxMs > 0 ? goog.now() + opt_maxMs : 0;
70

71
  return goog.dom.annotate.annotateTermsInNode_(
72
      node, terms, annotateFn, opt_ignoreCase, opt_classesToSkip || [],
73
      stopTime, 0);
74
};
75

76

77
/**
78
 * The maximum recursion depth allowed. Any DOM nodes deeper than this are
79
 * ignored.
80
 * @type {number}
81
 * @private
82
 */
83
goog.dom.annotate.MAX_RECURSION_ = 200;
84

85

86
/**
87
 * The node types whose descendants should not be affected by annotation.
88
 * @private {!Object<string, boolean>}
89
 */
90
goog.dom.annotate.NODES_TO_SKIP_ = goog.object.createSet(
91
    goog.dom.TagName.SCRIPT, goog.dom.TagName.STYLE, goog.dom.TagName.TEXTAREA);
92

93

94
/**
95
 * Recursive helper function.
96
 *
97
 * @param {Node} node  A DOM node.
98
 * @param {Array<!Array<string|boolean>>} terms
99
 *     An array of [searchTerm, matchWholeWordOnly] tuples.
100
 *     The matchWholeWordOnly value is a per-term attribute because some terms
101
 *     may be CJK, while others are not. (For correctness, matchWholeWordOnly
102
 *     should always be false for CJK terms.).
103
 * @param {goog.dom.annotate.AnnotateFn} annotateFn
104
 * @param {*} ignoreCase  Whether to ignore the case of the query terms
105
 *     when looking for matches.
106
 * @param {Array<string>} classesToSkip  Nodes with one of these CSS class
107
 *     names will be skipped (as will their descendants).
108
 * @param {number} stopTime  Deadline for annotation operation (ignored if 0).
109
 * @param {number} recursionLevel  How deep this recursive call is; pass the
110
 *     value 0 in the initial call.
111
 * @return {boolean} Whether any terms were annotated.
112
 * @private
113
 */
114
goog.dom.annotate.annotateTermsInNode_ = function(
115
    node, terms, annotateFn, ignoreCase, classesToSkip, stopTime,
116
    recursionLevel) {
117
  if ((stopTime > 0 && goog.now() >= stopTime) ||
118
      recursionLevel > goog.dom.annotate.MAX_RECURSION_) {
119
    return false;
120
  }
121

122
  var annotated = false;
123

124
  if (node.nodeType == goog.dom.NodeType.TEXT) {
125
    var html = goog.dom.annotate.helpAnnotateText_(
126
        node.nodeValue, terms, annotateFn, ignoreCase);
127
    if (html != null) {
128
      // Replace the text with the annotated html. First we put the html into
129
      // a temporary node, to get its DOM structure. To avoid adding a wrapper
130
      // element as a side effect, we'll only actually use the temporary node's
131
      // children.
132
      var tempNode =
133
          goog.dom.getDomHelper(node).createElement(goog.dom.TagName.SPAN);
134
      goog.dom.safe.setInnerHtml(tempNode, html);
135

136
      var parentNode = node.parentNode;
137
      var nodeToInsert;
138
      while ((nodeToInsert = tempNode.firstChild) != null) {
139
        // Each parentNode.insertBefore call removes the inserted node from
140
        // tempNode's list of children.
141
        parentNode.insertBefore(nodeToInsert, node);
142
      }
143

144
      parentNode.removeChild(node);
145
      annotated = true;
146
    }
147
  } else if (
148
      node.hasChildNodes() &&
149
      !goog.dom.annotate
150
           .NODES_TO_SKIP_[/** @type {!Element} */ (node).tagName]) {
151
    var classes = /** @type {!Element} */ (node).className.split(/\s+/);
152
    var skip = goog.array.some(classes, function(className) {
153
      return goog.array.contains(classesToSkip, className);
154
    });
155

156
    if (!skip) {
157
      ++recursionLevel;
158
      var curNode = node.firstChild;
159
      while (curNode) {
160
        var nextNode = curNode.nextSibling;
161
        var curNodeAnnotated = goog.dom.annotate.annotateTermsInNode_(
162
            curNode, terms, annotateFn, ignoreCase, classesToSkip, stopTime,
163
            recursionLevel);
164
        annotated = annotated || curNodeAnnotated;
165
        curNode = nextNode;
166
      }
167
    }
168
  }
169

170
  return annotated;
171
};
172

173

174
/**
175
 * Regular expression that matches non-word characters.
176
 *
177
 * Performance note: Testing a one-character string using this regex is as fast
178
 * as the equivalent string test ("a-zA-Z0-9_".indexOf(c) < 0), give or take a
179
 * few percent. (The regex is about 5% faster in IE 6 and about 4% slower in
180
 * Firefox 1.5.) If performance becomes critical, it may be better to convert
181
 * the character to a numerical char code and check whether it falls in the
182
 * word character ranges. A quick test suggests that could be 33% faster.
183
 *
184
 * @type {RegExp}
185
 * @private
186
 */
187
goog.dom.annotate.NONWORD_RE_ = /\W/;
188

189

190
/**
191
 * Annotates occurrences of query terms in plain text. This process consists of
192
 * identifying all occurrences of all query terms, calling a provided function
193
 * to get the appropriate replacement HTML for each occurrence, and
194
 * HTML-escaping all the text.
195
 *
196
 * @param {string} text  The plain text to be searched.
197
 * @param {Array<Array<?>>} terms  An array of
198
 *   [{string} searchTerm, {boolean} matchWholeWordOnly] tuples.
199
 *   The matchWholeWordOnly value is a per-term attribute because some terms
200
 *   may be CJK, while others are not. (For correctness, matchWholeWordOnly
201
 *   should always be false for CJK terms.).
202
 * @param {goog.dom.annotate.AnnotateFn} annotateFn
203
 * @param {*=} opt_ignoreCase  Whether to ignore the case of the query
204
 *   terms when looking for matches.
205
 * @return {goog.html.SafeHtml} The HTML equivalent of {@code text} with terms
206
 *   annotated, or null if the text did not contain any of the terms.
207
 */
208
goog.dom.annotate.annotateText = function(
209
    text, terms, annotateFn, opt_ignoreCase) {
210
  if (opt_ignoreCase) {
211
    terms = goog.dom.annotate.lowercaseTerms_(terms);
212
  }
213
  return goog.dom.annotate.helpAnnotateText_(
214
      text, terms, annotateFn, opt_ignoreCase);
215
};
216

217

218
/**
219
 * Annotates occurrences of query terms in plain text. This process consists of
220
 * identifying all occurrences of all query terms, calling a provided function
221
 * to get the appropriate replacement HTML for each occurrence, and
222
 * HTML-escaping all the text.
223
 *
224
 * @param {string} text  The plain text to be searched.
225
 * @param {Array<Array<?>>} terms  An array of
226
 *   [{string} searchTerm, {boolean} matchWholeWordOnly] tuples.
227
 *   If {@code ignoreCase} is true, each search term must already be lowercase.
228
 *   The matchWholeWordOnly value is a per-term attribute because some terms
229
 *   may be CJK, while others are not. (For correctness, matchWholeWordOnly
230
 *   should always be false for CJK terms.).
231
 * @param {goog.dom.annotate.AnnotateFn} annotateFn
232
 * @param {*} ignoreCase  Whether to ignore the case of the query terms
233
 *   when looking for matches.
234
 * @return {goog.html.SafeHtml} The HTML equivalent of {@code text} with terms
235
 *   annotated, or null if the text did not contain any of the terms.
236
 * @private
237
 */
238
goog.dom.annotate.helpAnnotateText_ = function(
239
    text, terms, annotateFn, ignoreCase) {
240
  var hit = false;
241
  var textToSearch = ignoreCase ? text.toLowerCase() : text;
242
  var textLen = textToSearch.length;
243
  var numTerms = terms.length;
244

245
  // Each element will be an array of hit positions for the term.
246
  var termHits = new Array(numTerms);
247

248
  // First collect all the hits into allHits.
249
  for (var i = 0; i < numTerms; i++) {
250
    var term = terms[i];
251
    var hits = [];
252
    var termText = term[0];
253
    if (termText != '') {
254
      var matchWholeWordOnly = term[1];
255
      var termLen = termText.length;
256
      var pos = 0;
257
      // Find each hit for term t and append to termHits.
258
      while (pos < textLen) {
259
        var hitPos = textToSearch.indexOf(termText, pos);
260
        if (hitPos == -1) {
261
          break;
262
        } else {
263
          var prevCharPos = hitPos - 1;
264
          var nextCharPos = hitPos + termLen;
265
          if (!matchWholeWordOnly ||
266
              ((prevCharPos < 0 ||
267
                goog.dom.annotate.NONWORD_RE_.test(
268
                    textToSearch.charAt(prevCharPos))) &&
269
               (nextCharPos >= textLen ||
270
                goog.dom.annotate.NONWORD_RE_.test(
271
                    textToSearch.charAt(nextCharPos))))) {
272
            hits.push(hitPos);
273
            hit = true;
274
          }
275
          pos = hitPos + termLen;
276
        }
277
      }
278
    }
279
    termHits[i] = hits;
280
  }
281

282
  if (hit) {
283
    var html = [];
284
    var pos = 0;
285

286
    while (true) {
287
      // First determine which of the n terms is the next hit.
288
      var termIndexOfNextHit;
289
      var posOfNextHit = -1;
290

291
      for (var i = 0; i < numTerms; i++) {
292
        var hits = termHits[i];
293
        // pull off the position of the next hit of term t
294
        // (it's always the first in the array because we're shifting
295
        // hits off the front of the array as we process them)
296
        // this is the next candidate to consider for the next overall hit
297
        if (!goog.array.isEmpty(hits)) {
298
          var hitPos = hits[0];
299

300
          // Discard any hits embedded in the previous hit.
301
          while (hitPos >= 0 && hitPos < pos) {
302
            hits.shift();
303
            hitPos = goog.array.isEmpty(hits) ? -1 : hits[0];
304
          }
305

306
          if (hitPos >= 0 && (posOfNextHit < 0 || hitPos < posOfNextHit)) {
307
            termIndexOfNextHit = i;
308
            posOfNextHit = hitPos;
309
          }
310
        }
311
      }
312

313
      // Quit if there are no more hits.
314
      if (posOfNextHit < 0) break;
315
      goog.asserts.assertNumber(termIndexOfNextHit);
316

317
      // Remove the next hit from our hit list.
318
      termHits[termIndexOfNextHit].shift();
319

320
      // Append everything from the end of the last hit up to this one.
321
      html.push(text.substr(pos, posOfNextHit - pos));
322

323
      // Append the annotated term.
324
      var termLen = terms[termIndexOfNextHit][0].length;
325
      var termHtml =
326
          goog.html.SafeHtml.htmlEscape(text.substr(posOfNextHit, termLen));
327
      html.push(
328
          annotateFn(goog.asserts.assertNumber(termIndexOfNextHit), termHtml));
329

330
      pos = posOfNextHit + termLen;
331
    }
332

333
    // Append everything after the last hit.
334
    html.push(text.substr(pos));
335
    return goog.html.SafeHtml.concat(html);
336
  } else {
337
    return null;
338
  }
339
};
340

341

342
/**
343
 * Converts terms to lowercase.
344
 *
345
 * @param {Array<Array<?>>} terms  An array of
346
 *   [{string} searchTerm, {boolean} matchWholeWordOnly] tuples.
347
 * @return {!Array<Array<?>>}  An array of
348
 *   [{string} searchTerm, {boolean} matchWholeWordOnly] tuples.
349
 * @private
350
 */
351
goog.dom.annotate.lowercaseTerms_ = function(terms) {
352
  var lowercaseTerms = [];
353
  for (var i = 0; i < terms.length; ++i) {
354
    var term = terms[i];
355
    lowercaseTerms[i] = [term[0].toLowerCase(), term[1]];
356
  }
357
  return lowercaseTerms;
358
};
359

360
Product

Resources

Company