Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
rapid7
GitHub Repository: rapid7/metasploit-framework
Path: blob/master/lib/anemone/page_store.rb
24269 views
1
require 'forwardable'
2
3
module Anemone
4
class PageStore
5
extend Forwardable
6
7
def_delegators :@storage, :keys, :values, :size, :each
8
9
def initialize(storage = {})
10
@storage = storage
11
end
12
13
# We typically index the hash with a URI,
14
# but convert it to a String for easier retrieval
15
def [](index)
16
@storage[index.to_s]
17
end
18
19
def []=(index, other)
20
@storage[index.to_s] = other
21
end
22
23
def delete(key)
24
@storage.delete key.to_s
25
end
26
27
def has_key?(key)
28
@storage.has_key? key.to_s
29
end
30
31
def each_value
32
each { |key, value| yield value }
33
end
34
35
def values
36
result = []
37
each { |key, value| result << value }
38
result
39
end
40
41
def touch_key(key)
42
self[key] = Page.new(key)
43
end
44
45
def touch_keys(keys)
46
@storage.merge! keys.inject({}) { |h, k| h[k.to_s] = Page.new(k); h }
47
end
48
49
# Does this PageStore contain the specified URL?
50
# HTTP and HTTPS versions of a URL are considered to be the same page.
51
def has_page?(url)
52
schemes = %w(http https)
53
if schemes.include? url.scheme
54
u = url.dup
55
return schemes.any? { |s| u.scheme = s; has_key?(u) }
56
end
57
58
has_key? url
59
end
60
61
#
62
# Use a breadth-first search to calculate the single-source
63
# shortest paths from *root* to all pages in the PageStore
64
#
65
def shortest_paths!(root)
66
root = URI(root) if root.is_a?(String)
67
raise "Root node not found" if !has_key?(root)
68
69
q = Queue.new
70
71
q.enq root
72
root_page = self[root]
73
root_page.depth = 0
74
root_page.visited = true
75
self[root] = root_page
76
while !q.empty?
77
page = self[q.deq]
78
page.links.each do |u|
79
begin
80
link = self[u]
81
next if link.nil? || !link.fetched? || link.visited
82
83
q << u unless link.redirect?
84
link.visited = true
85
link.depth = page.depth + 1
86
self[u] = link
87
88
if link.redirect?
89
u = link.redirect_to
90
redo
91
end
92
end
93
end
94
end
95
96
self
97
end
98
99
#
100
# Removes all Pages from storage where redirect? is true
101
#
102
def uniq!
103
each_value { |page| delete page.url if page.redirect? }
104
self
105
end
106
107
#
108
# If given a single URL (as a String or URI), returns an Array of Pages which link to that URL
109
# If given an Array of URLs, returns a Hash (URI => [Page, Page...]) of Pages linking to those URLs
110
#
111
def pages_linking_to(urls)
112
unless urls.is_a?(Array)
113
urls = [urls]
114
single = true
115
end
116
117
urls.map! do |url|
118
unless url.is_a?(URI)
119
URI(url) rescue nil
120
else
121
url
122
end
123
end
124
urls.compact
125
126
links = {}
127
urls.each { |url| links[url] = [] }
128
values.each do |page|
129
urls.each { |url| links[url] << page if page.links.include?(url) }
130
end
131
132
if single and !links.empty?
133
return links[urls.first]
134
else
135
return links
136
end
137
end
138
139
#
140
# If given a single URL (as a String or URI), returns an Array of URLs which link to that URL
141
# If given an Array of URLs, returns a Hash (URI => [URI, URI...]) of URLs linking to those URLs
142
#
143
def urls_linking_to(urls)
144
unless urls.is_a?(Array)
145
urls = [urls] unless urls.is_a?(Array)
146
single = true
147
end
148
149
links = pages_linking_to(urls)
150
links.each { |url, pages| links[url] = pages.map{|p| p.url} }
151
152
if single and !links.empty?
153
return links[urls.first]
154
else
155
return links
156
end
157
end
158
159
end
160
end
161
162