Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
rapid7
GitHub Repository: rapid7/metasploit-framework
Path: blob/master/modules/auxiliary/crawler/msfcrawler.rb
19593 views
1
##
2
# This module requires Metasploit: https://metasploit.com/download
3
# Current source: https://github.com/rapid7/metasploit-framework
4
##
5
6
#
7
# Web Crawler.
8
#
9
# Author: Efrain Torres et [at] metasploit.com 2010
10
#
11
#
12
13
# openssl before rubygems mac os
14
require 'English'
15
require 'openssl'
16
require 'pathname'
17
require 'uri'
18
require 'rinda/rinda'
19
require 'rinda/tuplespace'
20
21
class MetasploitModule < Msf::Auxiliary
22
include Msf::Auxiliary::Scanner
23
include Msf::Auxiliary::Report
24
25
def initialize(info = {})
26
super(
27
update_info(
28
info,
29
'Name' => 'Metasploit Web Crawler',
30
'Description' => 'This auxiliary module is a modular web crawler, to be used in conjunction with wmap (someday) or standalone.',
31
'Author' => 'et',
32
'License' => MSF_LICENSE,
33
'Notes' => {
34
'Stability' => [CRASH_SAFE],
35
'SideEffects' => [IOC_IN_LOGS],
36
'Reliability' => []
37
}
38
)
39
)
40
41
register_options([
42
OptString.new('PATH', [true, 'Starting crawling path', '/']),
43
OptInt.new('RPORT', [true, 'Remote port', 80 ])
44
])
45
46
register_advanced_options([
47
OptPath.new(
48
'CrawlerModulesDir',
49
[
50
true,
51
'The base directory containing the crawler modules',
52
File.join(Msf::Config.data_directory, 'msfcrawler')
53
]
54
),
55
OptBool.new('EnableUl', [ false, 'Enable maximum number of request per URI', true ]),
56
OptBool.new('StoreDB', [ false, 'Store requests in database', false ]),
57
OptInt.new('MaxUriLimit', [ true, 'Number max. request per URI', 10]),
58
OptInt.new('SleepTime', [ true, 'Sleep time (secs) between requests', 0]),
59
OptInt.new('TakeTimeout', [ true, 'Timeout for loop ending', 15]),
60
OptInt.new('ReadTimeout', [ true, 'Read timeout (-1 forever)', 3]),
61
OptInt.new('ThreadNum', [ true, 'Threads number', 20]),
62
OptString.new('DontCrawl', [true, 'Filestypes not to crawl', '.exe,.zip,.tar,.bz2,.run,.asc,.gz'])
63
])
64
end
65
66
attr_accessor :ctarget, :cport, :cssl
67
68
def run
69
# i = 0
70
# a = []
71
72
self.ctarget = datastore['RHOSTS']
73
self.cport = datastore['RPORT']
74
self.cssl = datastore['SSL']
75
inipath = datastore['PATH']
76
77
cinipath = (inipath.nil? || inipath.empty?) ? '/' : inipath
78
79
inireq = {
80
'rhost' => ctarget,
81
'rport' => cport,
82
'uri' => cinipath,
83
'method' => 'GET',
84
'ctype' => 'text/plain',
85
'ssl' => cssl,
86
'query' => nil,
87
'data' => nil
88
}
89
90
@not_viewed_queue = ::Rinda::TupleSpace.new
91
@viewed_queue = Hash.new
92
@uri_limits = Hash.new
93
@current_site = ctarget
94
95
insertnewpath(inireq)
96
97
print_status("Loading modules: #{datastore['CrawlerModulesDir']}")
98
load_modules(datastore['CrawlerModulesDir'])
99
print_status('OK')
100
101
if datastore['EnableUl']
102
print_status("URI LIMITS ENABLED: #{datastore['MaxUriLimit']} (Maximum number of requests per uri)")
103
end
104
105
print_status("Target: #{ctarget} Port: #{cport} Path: #{cinipath} SSL: #{cssl}")
106
107
begin
108
reqfilter = reqtemplate(ctarget, cport, cssl)
109
110
# i = 0
111
112
loop do
113
####
114
# if i <= datastore['ThreadNum']
115
# a.push(Thread.new {
116
####
117
118
hashreq = @not_viewed_queue.take(reqfilter, datastore['TakeTimeout'])
119
120
ul = false
121
if @uri_limits.include?(hashreq['uri']) && datastore['EnableUl']
122
# puts "Request #{@uri_limits[hashreq['uri']]}/#{$maxurilimit} #{hashreq['uri']}"
123
if @uri_limits[hashreq['uri']] >= datastore['MaxUriLimit']
124
# puts "URI LIMIT Reached: #{$maxurilimit} for uri #{hashreq['uri']}"
125
ul = true
126
end
127
else
128
@uri_limits[hashreq['uri']] = 0
129
end
130
131
if !@viewed_queue.include?(hashsig(hashreq)) && !ul
132
133
@viewed_queue[hashsig(hashreq)] = Time.now
134
@uri_limits[hashreq['uri']] += 1
135
136
if !File.extname(hashreq['uri']).empty? && datastore['DontCrawl'].include?(File.extname(hashreq['uri']))
137
vprint_status "URI not crawled #{hashreq['uri']}"
138
else
139
prx = nil
140
# if self.useproxy
141
# prx = "HTTP:"+self.proxyhost.to_s+":"+self.proxyport.to_s
142
# end
143
144
c = Rex::Proto::Http::Client.new(
145
ctarget,
146
cport.to_i,
147
{},
148
cssl,
149
nil,
150
prx
151
)
152
153
sendreq(c, hashreq)
154
end
155
else
156
vprint_line "#{hashreq['uri']} already visited. "
157
end
158
159
####
160
# })
161
162
# i += 1
163
# else
164
# sleep(0.01) and a.delete_if {|x| not x.alive?} while not a.empty?
165
# i = 0
166
# end
167
####
168
end
169
rescue ::Rinda::RequestExpiredError
170
print_status('END.')
171
return
172
end
173
174
print_status('Finished crawling')
175
end
176
177
def reqtemplate(target, port, ssl)
178
hreq = {
179
'rhost' => target,
180
'rport' => port,
181
'uri' => nil,
182
'method' => nil,
183
'ctype' => nil,
184
'ssl' => ssl,
185
'query' => nil,
186
'data' => nil
187
}
188
189
return hreq
190
end
191
192
def storedb(hashreq, response)
193
# Added host/port/ssl for report_web_page support
194
info = {
195
web_site: @current_site,
196
path: hashreq['uri'],
197
query: hashreq['query'],
198
host: hashreq['rhost'],
199
port: hashreq['rport'],
200
ssl: !hashreq['ssl'].nil?,
201
data: hashreq['data'],
202
code: response.code,
203
body: response.body,
204
headers: response.headers
205
}
206
207
# if response['content-type']
208
# info[:ctype] = response['content-type'][0]
209
# end
210
211
# if response['set-cookie']
212
# info[:cookie] = page.headers['set-cookie'].join("\n")
213
# end
214
215
# if page.headers['authorization']
216
# info[:auth] = page.headers['authorization'].join("\n")
217
# end
218
219
# if page.headers['location']
220
# info[:location] = page.headers['location'][0]
221
# end
222
223
# if page.headers['last-modified']
224
# info[:mtime] = page.headers['last-modified'][0]
225
# end
226
227
# Report the web page to the database
228
report_web_page(info)
229
end
230
231
#
232
# Modified version of load_protocols from psnuffle by Max Moser <[email protected]>
233
#
234
235
def load_modules(crawlermodulesdir)
236
base = crawlermodulesdir
237
if !File.directory?(base)
238
raise 'The Crawler modules parameter is set to an invalid directory'
239
end
240
241
@crawlermodules = {}
242
cmodules = Dir.new(base).entries.grep(/\.rb$/).sort
243
cmodules.each do |n|
244
f = File.join(base, n)
245
m = ::Module.new
246
begin
247
m.module_eval(File.read(f, File.size(f)))
248
m.constants.grep(/^Crawler(.*)/) do
249
cmod = ::Regexp.last_match(1)
250
klass = m.const_get("Crawler#{cmod}")
251
@crawlermodules[cmod.downcase] = klass.new(self)
252
253
print_status("Loaded crawler module #{cmod} from #{f}...")
254
end
255
rescue StandardError => e
256
print_error("Crawler module #{n} failed to load: #{e.class} #{e} #{e.backtrace}")
257
end
258
end
259
end
260
261
def sendreq(nclient, reqopts = {})
262
r = nclient.request_raw(reqopts)
263
resp = nclient.send_recv(r, datastore['ReadTimeout'])
264
265
unless resp
266
print_status('No response')
267
sleep(datastore['SleepTime'])
268
return
269
end
270
271
#
272
# Quickfix for bug packet.rb to_s line: 190
273
# In case modules or crawler calls to_s on de-chunked responses
274
#
275
resp.transfer_chunked = false
276
277
if datastore['StoreDB']
278
storedb(reqopts, resp)
279
end
280
281
print_status ">> [#{resp.code}] #{reqopts['uri']}"
282
283
if reqopts['query'] && !reqopts['query'].empty?
284
print_status ">>> [Q] #{reqopts['query']}"
285
end
286
287
if reqopts['data']
288
print_status ">>> [D] #{reqopts['data']}"
289
end
290
291
case resp.code
292
when 200
293
@crawlermodules.each_key do |k|
294
@crawlermodules[k].parse(reqopts, resp)
295
end
296
when 301..303
297
print_line("[#{resp.code}] Redirection to: #{resp['Location']}")
298
vprint_status urltohash('GET', resp['Location'], reqopts['uri'], nil)
299
insertnewpath(urltohash('GET', resp['Location'], reqopts['uri'], nil))
300
when 404
301
print_status "[404] Invalid link #{reqopts['uri']}"
302
else
303
print_status "Unhandled #{resp.code}"
304
end
305
306
sleep(datastore['SleepTime'])
307
rescue StandardError => e
308
print_status("Error: #{e.message}")
309
vprint_status("#{$ERROR_INFO}: #{$ERROR_INFO.backtrace}")
310
end
311
312
#
313
# Add new path (uri) to test non-viewed queue
314
#
315
316
def insertnewpath(hashreq)
317
hashreq['uri'] = canonicalize(hashreq['uri'])
318
319
if (hashreq['rhost'] == datastore['RHOSTS']) && (hashreq['rport'] == datastore['RPORT'])
320
if !@viewed_queue.include?(hashsig(hashreq))
321
if !@not_viewed_queue.read_all(hashreq).empty?
322
vprint_status "Already in queue to be viewed: #{hashreq['uri']}"
323
else
324
vprint_status "Inserted: #{hashreq['uri']}"
325
326
@not_viewed_queue.write(hashreq)
327
end
328
else
329
vprint_status "#{hashreq['uri']} already visited at #{@viewed_queue[hashsig(hashreq)]}"
330
end
331
end
332
end
333
334
#
335
# Build a new hash for a local path
336
#
337
338
def urltohash(method, url, basepath, dat)
339
# method: HTTP method
340
# url: uri?[query]
341
# basepath: base path/uri to determine absolute path when relative
342
# data: body data, nil if GET and query = uri.query
343
344
uri = URI.parse(url)
345
uritargetssl = (uri.scheme == 'https') ? true : false
346
347
uritargethost = uri.host
348
if uri.host.nil? || uri.host.empty?
349
uritargethost = ctarget
350
uritargetssl = cssl
351
end
352
353
uritargetport = uri.port
354
if uri.port.nil?
355
uritargetport = cport
356
end
357
358
uritargetpath = uri.path
359
if uri.path.nil? || uri.path.empty?
360
uritargetpath = '/'
361
end
362
363
newp = Pathname.new(uritargetpath)
364
oldp = Pathname.new(basepath)
365
if !newp.absolute?
366
if oldp.to_s[-1, 1] == '/'
367
newp = oldp + newp
368
elsif !newp.to_s.empty?
369
newp = File.join(oldp.dirname, newp)
370
end
371
end
372
373
hashreq = {
374
'rhost' => uritargethost,
375
'rport' => uritargetport,
376
'uri' => newp.to_s,
377
'method' => method,
378
'ctype' => 'text/plain',
379
'ssl' => uritargetssl,
380
'query' => uri.query,
381
'data' => nil
382
}
383
384
if (method == 'GET') && !dat.nil?
385
hashreq['query'] = dat
386
else
387
hashreq['data'] = dat
388
end
389
390
return hashreq
391
end
392
393
def canonicalize(uri)
394
uri = URI(uri) unless uri.is_a?(URI)
395
uri.normalize!
396
397
path = uri.path.dup
398
segments = path.split('/')
399
resolved = []
400
401
segments.each do |segment|
402
next if segment == '.' || segment.empty?
403
404
if segment == '..'
405
resolved.pop unless resolved.empty?
406
else
407
resolved << segment
408
end
409
end
410
411
uri.path = '/' + resolved.join('/')
412
uri.to_s
413
end
414
415
def hashsig(hashreq)
416
hashreq.to_s
417
end
418
end
419
420
class BaseParser
421
attr_accessor :crawler
422
423
def initialize(crawler)
424
self.crawler = crawler
425
end
426
427
def parse(_request, _result)
428
nil
429
end
430
431
#
432
# Add new path (uri) to test hash queue
433
#
434
def insertnewpath(hashreq)
435
crawler.insertnewpath(hashreq)
436
end
437
438
def hashsig(hashreq)
439
crawler.hashsig(hashreq)
440
end
441
442
def urltohash(method, url, basepath, dat)
443
crawler.urltohash(method, url, basepath, dat)
444
end
445
446
def targetssl
447
crawler.cssl
448
end
449
450
def targetport
451
crawler.cport
452
end
453
454
def targethost
455
crawler.ctarget
456
end
457
458
def targetinipath
459
crawler.cinipath
460
end
461
end
462
463