Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
seleniumhq
GitHub Repository: seleniumhq/selenium
Path: blob/trunk/third_party/closure/goog/dom/annotate.js
2868 views
1
// Copyright 2006 The Closure Library Authors. All Rights Reserved.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
// http://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS-IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
15
/**
16
* @fileoverview Methods for annotating occurrences of query terms in text or
17
* in a DOM tree. Adapted from Gmail code.
18
*
19
*/
20
21
goog.provide('goog.dom.annotate');
22
goog.provide('goog.dom.annotate.AnnotateFn');
23
24
goog.require('goog.array');
25
goog.require('goog.asserts');
26
goog.require('goog.dom');
27
goog.require('goog.dom.NodeType');
28
goog.require('goog.dom.TagName');
29
goog.require('goog.dom.safe');
30
goog.require('goog.html.SafeHtml');
31
goog.require('goog.object');
32
33
34
/**
35
* A function that takes:
36
* (1) the number of the term that is "hit",
37
* (2) the HTML (search term) to be annotated,
38
* and returns the annotated term as an HTML.
39
* @typedef {function(number, !goog.html.SafeHtml): !goog.html.SafeHtml}
40
*/
41
goog.dom.annotate.AnnotateFn;
42
43
44
/**
45
* Calls {@code annotateFn} for each occurrence of a search term in text nodes
46
* under {@code node}. Returns the number of hits.
47
*
48
* @param {Node} node A DOM node.
49
* @param {Array<!Array<string|boolean>>} terms
50
* An array of [searchTerm, matchWholeWordOnly] tuples.
51
* The matchWholeWordOnly value is a per-term attribute because some terms
52
* may be CJK, while others are not. (For correctness, matchWholeWordOnly
53
* should always be false for CJK terms.).
54
* @param {goog.dom.annotate.AnnotateFn} annotateFn
55
* @param {*=} opt_ignoreCase Whether to ignore the case of the query
56
* terms when looking for matches.
57
* @param {Array<string>=} opt_classesToSkip Nodes with one of these CSS class
58
* names (and its descendants) will be skipped.
59
* @param {number=} opt_maxMs Number of milliseconds after which this function,
60
* if still annotating, should stop and return.
61
*
62
* @return {boolean} Whether any terms were annotated.
63
*/
64
goog.dom.annotate.annotateTerms = function(
65
node, terms, annotateFn, opt_ignoreCase, opt_classesToSkip, opt_maxMs) {
66
if (opt_ignoreCase) {
67
terms = goog.dom.annotate.lowercaseTerms_(terms);
68
}
69
var stopTime = opt_maxMs > 0 ? goog.now() + opt_maxMs : 0;
70
71
return goog.dom.annotate.annotateTermsInNode_(
72
node, terms, annotateFn, opt_ignoreCase, opt_classesToSkip || [],
73
stopTime, 0);
74
};
75
76
77
/**
78
* The maximum recursion depth allowed. Any DOM nodes deeper than this are
79
* ignored.
80
* @type {number}
81
* @private
82
*/
83
goog.dom.annotate.MAX_RECURSION_ = 200;
84
85
86
/**
87
* The node types whose descendants should not be affected by annotation.
88
* @private {!Object<string, boolean>}
89
*/
90
goog.dom.annotate.NODES_TO_SKIP_ = goog.object.createSet(
91
goog.dom.TagName.SCRIPT, goog.dom.TagName.STYLE, goog.dom.TagName.TEXTAREA);
92
93
94
/**
95
* Recursive helper function.
96
*
97
* @param {Node} node A DOM node.
98
* @param {Array<!Array<string|boolean>>} terms
99
* An array of [searchTerm, matchWholeWordOnly] tuples.
100
* The matchWholeWordOnly value is a per-term attribute because some terms
101
* may be CJK, while others are not. (For correctness, matchWholeWordOnly
102
* should always be false for CJK terms.).
103
* @param {goog.dom.annotate.AnnotateFn} annotateFn
104
* @param {*} ignoreCase Whether to ignore the case of the query terms
105
* when looking for matches.
106
* @param {Array<string>} classesToSkip Nodes with one of these CSS class
107
* names will be skipped (as will their descendants).
108
* @param {number} stopTime Deadline for annotation operation (ignored if 0).
109
* @param {number} recursionLevel How deep this recursive call is; pass the
110
* value 0 in the initial call.
111
* @return {boolean} Whether any terms were annotated.
112
* @private
113
*/
114
goog.dom.annotate.annotateTermsInNode_ = function(
115
node, terms, annotateFn, ignoreCase, classesToSkip, stopTime,
116
recursionLevel) {
117
if ((stopTime > 0 && goog.now() >= stopTime) ||
118
recursionLevel > goog.dom.annotate.MAX_RECURSION_) {
119
return false;
120
}
121
122
var annotated = false;
123
124
if (node.nodeType == goog.dom.NodeType.TEXT) {
125
var html = goog.dom.annotate.helpAnnotateText_(
126
node.nodeValue, terms, annotateFn, ignoreCase);
127
if (html != null) {
128
// Replace the text with the annotated html. First we put the html into
129
// a temporary node, to get its DOM structure. To avoid adding a wrapper
130
// element as a side effect, we'll only actually use the temporary node's
131
// children.
132
var tempNode =
133
goog.dom.getDomHelper(node).createElement(goog.dom.TagName.SPAN);
134
goog.dom.safe.setInnerHtml(tempNode, html);
135
136
var parentNode = node.parentNode;
137
var nodeToInsert;
138
while ((nodeToInsert = tempNode.firstChild) != null) {
139
// Each parentNode.insertBefore call removes the inserted node from
140
// tempNode's list of children.
141
parentNode.insertBefore(nodeToInsert, node);
142
}
143
144
parentNode.removeChild(node);
145
annotated = true;
146
}
147
} else if (
148
node.hasChildNodes() &&
149
!goog.dom.annotate
150
.NODES_TO_SKIP_[/** @type {!Element} */ (node).tagName]) {
151
var classes = /** @type {!Element} */ (node).className.split(/\s+/);
152
var skip = goog.array.some(classes, function(className) {
153
return goog.array.contains(classesToSkip, className);
154
});
155
156
if (!skip) {
157
++recursionLevel;
158
var curNode = node.firstChild;
159
while (curNode) {
160
var nextNode = curNode.nextSibling;
161
var curNodeAnnotated = goog.dom.annotate.annotateTermsInNode_(
162
curNode, terms, annotateFn, ignoreCase, classesToSkip, stopTime,
163
recursionLevel);
164
annotated = annotated || curNodeAnnotated;
165
curNode = nextNode;
166
}
167
}
168
}
169
170
return annotated;
171
};
172
173
174
/**
175
* Regular expression that matches non-word characters.
176
*
177
* Performance note: Testing a one-character string using this regex is as fast
178
* as the equivalent string test ("a-zA-Z0-9_".indexOf(c) < 0), give or take a
179
* few percent. (The regex is about 5% faster in IE 6 and about 4% slower in
180
* Firefox 1.5.) If performance becomes critical, it may be better to convert
181
* the character to a numerical char code and check whether it falls in the
182
* word character ranges. A quick test suggests that could be 33% faster.
183
*
184
* @type {RegExp}
185
* @private
186
*/
187
goog.dom.annotate.NONWORD_RE_ = /\W/;
188
189
190
/**
191
* Annotates occurrences of query terms in plain text. This process consists of
192
* identifying all occurrences of all query terms, calling a provided function
193
* to get the appropriate replacement HTML for each occurrence, and
194
* HTML-escaping all the text.
195
*
196
* @param {string} text The plain text to be searched.
197
* @param {Array<Array<?>>} terms An array of
198
* [{string} searchTerm, {boolean} matchWholeWordOnly] tuples.
199
* The matchWholeWordOnly value is a per-term attribute because some terms
200
* may be CJK, while others are not. (For correctness, matchWholeWordOnly
201
* should always be false for CJK terms.).
202
* @param {goog.dom.annotate.AnnotateFn} annotateFn
203
* @param {*=} opt_ignoreCase Whether to ignore the case of the query
204
* terms when looking for matches.
205
* @return {goog.html.SafeHtml} The HTML equivalent of {@code text} with terms
206
* annotated, or null if the text did not contain any of the terms.
207
*/
208
goog.dom.annotate.annotateText = function(
209
text, terms, annotateFn, opt_ignoreCase) {
210
if (opt_ignoreCase) {
211
terms = goog.dom.annotate.lowercaseTerms_(terms);
212
}
213
return goog.dom.annotate.helpAnnotateText_(
214
text, terms, annotateFn, opt_ignoreCase);
215
};
216
217
218
/**
219
* Annotates occurrences of query terms in plain text. This process consists of
220
* identifying all occurrences of all query terms, calling a provided function
221
* to get the appropriate replacement HTML for each occurrence, and
222
* HTML-escaping all the text.
223
*
224
* @param {string} text The plain text to be searched.
225
* @param {Array<Array<?>>} terms An array of
226
* [{string} searchTerm, {boolean} matchWholeWordOnly] tuples.
227
* If {@code ignoreCase} is true, each search term must already be lowercase.
228
* The matchWholeWordOnly value is a per-term attribute because some terms
229
* may be CJK, while others are not. (For correctness, matchWholeWordOnly
230
* should always be false for CJK terms.).
231
* @param {goog.dom.annotate.AnnotateFn} annotateFn
232
* @param {*} ignoreCase Whether to ignore the case of the query terms
233
* when looking for matches.
234
* @return {goog.html.SafeHtml} The HTML equivalent of {@code text} with terms
235
* annotated, or null if the text did not contain any of the terms.
236
* @private
237
*/
238
goog.dom.annotate.helpAnnotateText_ = function(
239
text, terms, annotateFn, ignoreCase) {
240
var hit = false;
241
var textToSearch = ignoreCase ? text.toLowerCase() : text;
242
var textLen = textToSearch.length;
243
var numTerms = terms.length;
244
245
// Each element will be an array of hit positions for the term.
246
var termHits = new Array(numTerms);
247
248
// First collect all the hits into allHits.
249
for (var i = 0; i < numTerms; i++) {
250
var term = terms[i];
251
var hits = [];
252
var termText = term[0];
253
if (termText != '') {
254
var matchWholeWordOnly = term[1];
255
var termLen = termText.length;
256
var pos = 0;
257
// Find each hit for term t and append to termHits.
258
while (pos < textLen) {
259
var hitPos = textToSearch.indexOf(termText, pos);
260
if (hitPos == -1) {
261
break;
262
} else {
263
var prevCharPos = hitPos - 1;
264
var nextCharPos = hitPos + termLen;
265
if (!matchWholeWordOnly ||
266
((prevCharPos < 0 ||
267
goog.dom.annotate.NONWORD_RE_.test(
268
textToSearch.charAt(prevCharPos))) &&
269
(nextCharPos >= textLen ||
270
goog.dom.annotate.NONWORD_RE_.test(
271
textToSearch.charAt(nextCharPos))))) {
272
hits.push(hitPos);
273
hit = true;
274
}
275
pos = hitPos + termLen;
276
}
277
}
278
}
279
termHits[i] = hits;
280
}
281
282
if (hit) {
283
var html = [];
284
var pos = 0;
285
286
while (true) {
287
// First determine which of the n terms is the next hit.
288
var termIndexOfNextHit;
289
var posOfNextHit = -1;
290
291
for (var i = 0; i < numTerms; i++) {
292
var hits = termHits[i];
293
// pull off the position of the next hit of term t
294
// (it's always the first in the array because we're shifting
295
// hits off the front of the array as we process them)
296
// this is the next candidate to consider for the next overall hit
297
if (!goog.array.isEmpty(hits)) {
298
var hitPos = hits[0];
299
300
// Discard any hits embedded in the previous hit.
301
while (hitPos >= 0 && hitPos < pos) {
302
hits.shift();
303
hitPos = goog.array.isEmpty(hits) ? -1 : hits[0];
304
}
305
306
if (hitPos >= 0 && (posOfNextHit < 0 || hitPos < posOfNextHit)) {
307
termIndexOfNextHit = i;
308
posOfNextHit = hitPos;
309
}
310
}
311
}
312
313
// Quit if there are no more hits.
314
if (posOfNextHit < 0) break;
315
goog.asserts.assertNumber(termIndexOfNextHit);
316
317
// Remove the next hit from our hit list.
318
termHits[termIndexOfNextHit].shift();
319
320
// Append everything from the end of the last hit up to this one.
321
html.push(text.substr(pos, posOfNextHit - pos));
322
323
// Append the annotated term.
324
var termLen = terms[termIndexOfNextHit][0].length;
325
var termHtml =
326
goog.html.SafeHtml.htmlEscape(text.substr(posOfNextHit, termLen));
327
html.push(
328
annotateFn(goog.asserts.assertNumber(termIndexOfNextHit), termHtml));
329
330
pos = posOfNextHit + termLen;
331
}
332
333
// Append everything after the last hit.
334
html.push(text.substr(pos));
335
return goog.html.SafeHtml.concat(html);
336
} else {
337
return null;
338
}
339
};
340
341
342
/**
343
* Converts terms to lowercase.
344
*
345
* @param {Array<Array<?>>} terms An array of
346
* [{string} searchTerm, {boolean} matchWholeWordOnly] tuples.
347
* @return {!Array<Array<?>>} An array of
348
* [{string} searchTerm, {boolean} matchWholeWordOnly] tuples.
349
* @private
350
*/
351
goog.dom.annotate.lowercaseTerms_ = function(terms) {
352
var lowercaseTerms = [];
353
for (var i = 0; i < terms.length; ++i) {
354
var term = terms[i];
355
lowercaseTerms[i] = [term[0].toLowerCase(), term[1]];
356
}
357
return lowercaseTerms;
358
};
359
360