CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
rapid7

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.

GitHub Repository: rapid7/metasploit-framework
Path: blob/master/modules/auxiliary/crawler/msfcrawler.rb
Views: 11777
1
##
2
# This module requires Metasploit: https://metasploit.com/download
3
# Current source: https://github.com/rapid7/metasploit-framework
4
##
5
6
#
7
# Web Crawler.
8
#
9
# Author: Efrain Torres et [at] metasploit.com 2010
10
#
11
#
12
13
# openssl before rubygems mac os
14
require 'openssl'
15
require 'pathname'
16
require 'uri'
17
18
class MetasploitModule < Msf::Auxiliary
19
include Msf::Auxiliary::Scanner
20
include Msf::Auxiliary::Report
21
22
def initialize(info = {})
23
super(update_info(info,
24
'Name' => 'Metasploit Web Crawler',
25
'Description' => 'This auxiliary module is a modular web crawler, to be used in conjunction with wmap (someday) or standalone.',
26
'Author' => 'et',
27
'License' => MSF_LICENSE
28
))
29
30
register_options([
31
OptString.new('PATH', [true, "Starting crawling path", '/']),
32
OptInt.new('RPORT', [true, "Remote port", 80 ])
33
])
34
35
register_advanced_options([
36
OptPath.new('CrawlerModulesDir', [true, 'The base directory containing the crawler modules',
37
File.join(Msf::Config.data_directory, "msfcrawler")
38
]),
39
OptBool.new('EnableUl', [ false, "Enable maximum number of request per URI", true ]),
40
OptBool.new('StoreDB', [ false, "Store requests in database", false ]),
41
OptInt.new('MaxUriLimit', [ true, "Number max. request per URI", 10]),
42
OptInt.new('SleepTime', [ true, "Sleep time (secs) between requests", 0]),
43
OptInt.new('TakeTimeout', [ true, "Timeout for loop ending", 15]),
44
OptInt.new('ReadTimeout', [ true, "Read timeout (-1 forever)", 3]),
45
OptInt.new('ThreadNum', [ true, "Threads number", 20]),
46
OptString.new('DontCrawl', [true, "Filestypes not to crawl", '.exe,.zip,.tar,.bz2,.run,.asc,.gz'])
47
])
48
end
49
50
attr_accessor :ctarget, :cport, :cssl
51
52
def run
53
i, a = 0, []
54
55
self.ctarget = datastore['RHOSTS']
56
self.cport = datastore['RPORT']
57
self.cssl = datastore['SSL']
58
inipath = datastore['PATH']
59
60
cinipath = (inipath.nil? or inipath.empty?) ? '/' : inipath
61
62
inireq = {
63
'rhost' => ctarget,
64
'rport' => cport,
65
'uri' => cinipath,
66
'method' => 'GET',
67
'ctype' => 'text/plain',
68
'ssl' => cssl,
69
'query' => nil,
70
'data' => nil
71
}
72
73
@NotViewedQueue = Rinda::TupleSpace.new
74
@ViewedQueue = Hash.new
75
@UriLimits = Hash.new
76
@curent_site = self.ctarget
77
78
insertnewpath(inireq)
79
80
print_status("Loading modules: #{datastore['CrawlerModulesDir']}")
81
load_modules(datastore['CrawlerModulesDir'])
82
print_status("OK")
83
84
if datastore['EnableUl']
85
print_status("URI LIMITS ENABLED: #{datastore['MaxUriLimit']} (Maximum number of requests per uri)")
86
end
87
88
print_status("Target: #{self.ctarget} Port: #{self.cport} Path: #{cinipath} SSL: #{self.cssl}")
89
90
91
begin
92
reqfilter = reqtemplate(self.ctarget,self.cport,self.cssl)
93
94
i =0
95
96
loop do
97
98
####
99
#if i <= datastore['ThreadNum']
100
# a.push(Thread.new {
101
####
102
103
hashreq = @NotViewedQueue.take(reqfilter, datastore['TakeTimeout'])
104
105
ul = false
106
if @UriLimits.include?(hashreq['uri']) and datastore['EnableUl']
107
#puts "Request #{@UriLimits[hashreq['uri']]}/#{$maxurilimit} #{hashreq['uri']}"
108
if @UriLimits[hashreq['uri']] >= datastore['MaxUriLimit']
109
#puts "URI LIMIT Reached: #{$maxurilimit} for uri #{hashreq['uri']}"
110
ul = true
111
end
112
else
113
@UriLimits[hashreq['uri']] = 0
114
end
115
116
if !@ViewedQueue.include?(hashsig(hashreq)) and !ul
117
118
@ViewedQueue[hashsig(hashreq)] = Time.now
119
@UriLimits[hashreq['uri']] += 1
120
121
if !File.extname(hashreq['uri']).empty? and datastore['DontCrawl'].include? File.extname(hashreq['uri'])
122
vprint_status "URI not crawled #{hashreq['uri']}"
123
else
124
prx = nil
125
#if self.useproxy
126
# prx = "HTTP:"+self.proxyhost.to_s+":"+self.proxyport.to_s
127
#end
128
129
c = Rex::Proto::Http::Client.new(
130
self.ctarget,
131
self.cport.to_i,
132
{},
133
self.cssl,
134
nil,
135
prx
136
)
137
138
sendreq(c,hashreq)
139
end
140
else
141
vprint_line "#{hashreq['uri']} already visited. "
142
end
143
144
####
145
#})
146
147
#i += 1
148
#else
149
# sleep(0.01) and a.delete_if {|x| not x.alive?} while not a.empty?
150
# i = 0
151
#end
152
####
153
154
end
155
rescue Rinda::RequestExpiredError
156
print_status("END.")
157
return
158
end
159
160
print_status("Finished crawling")
161
end
162
163
def reqtemplate(target,port,ssl)
164
hreq = {
165
'rhost' => target,
166
'rport' => port,
167
'uri' => nil,
168
'method' => nil,
169
'ctype' => nil,
170
'ssl' => ssl,
171
'query' => nil,
172
'data' => nil
173
}
174
175
return hreq
176
end
177
178
def storedb(hashreq,response,dbpath)
179
180
# Added host/port/ssl for report_web_page support
181
info = {
182
:web_site => @current_site,
183
:path => hashreq['uri'],
184
:query => hashreq['query'],
185
:host => hashreq['rhost'],
186
:port => hashreq['rport'],
187
:ssl => !hashreq['ssl'].nil?,
188
:data => hashreq['data'],
189
:code => response.code,
190
:body => response.body,
191
:headers => response.headers
192
}
193
194
#if response['content-type']
195
# info[:ctype] = response['content-type'][0]
196
#end
197
198
#if response['set-cookie']
199
# info[:cookie] = page.headers['set-cookie'].join("\n")
200
#end
201
202
#if page.headers['authorization']
203
# info[:auth] = page.headers['authorization'].join("\n")
204
#end
205
206
#if page.headers['location']
207
# info[:location] = page.headers['location'][0]
208
#end
209
210
#if page.headers['last-modified']
211
# info[:mtime] = page.headers['last-modified'][0]
212
#end
213
214
# Report the web page to the database
215
report_web_page(info)
216
end
217
218
#
219
# Modified version of load_protocols from psnuffle by Max Moser <[email protected]>
220
#
221
222
def load_modules(crawlermodulesdir)
223
224
base = crawlermodulesdir
225
if (not File.directory?(base))
226
raise RuntimeError,"The Crawler modules parameter is set to an invalid directory"
227
end
228
229
@crawlermodules = {}
230
cmodules = Dir.new(base).entries.grep(/\.rb$/).sort
231
cmodules.each do |n|
232
f = File.join(base, n)
233
m = ::Module.new
234
begin
235
m.module_eval(File.read(f, File.size(f)))
236
m.constants.grep(/^Crawler(.*)/) do
237
cmod = $1
238
klass = m.const_get("Crawler#{cmod}")
239
@crawlermodules[cmod.downcase] = klass.new(self)
240
241
print_status("Loaded crawler module #{cmod} from #{f}...")
242
end
243
rescue ::Exception => e
244
print_error("Crawler module #{n} failed to load: #{e.class} #{e} #{e.backtrace}")
245
end
246
end
247
end
248
249
def sendreq(nclient,reqopts={})
250
251
begin
252
r = nclient.request_raw(reqopts)
253
resp = nclient.send_recv(r, datastore['ReadTimeout'])
254
255
if resp
256
#
257
# Quickfix for bug packet.rb to_s line: 190
258
# In case modules or crawler calls to_s on de-chunked responses
259
#
260
resp.transfer_chunked = false
261
262
if datastore['StoreDB']
263
storedb(reqopts,resp,$dbpathmsf)
264
end
265
266
print_status ">> [#{resp.code}] #{reqopts['uri']}"
267
268
if reqopts['query'] and !reqopts['query'].empty?
269
print_status ">>> [Q] #{reqopts['query']}"
270
end
271
272
if reqopts['data']
273
print_status ">>> [D] #{reqopts['data']}"
274
end
275
276
case resp.code
277
when 200
278
@crawlermodules.each_key do |k|
279
@crawlermodules[k].parse(reqopts,resp)
280
end
281
when 301..303
282
print_line("[#{resp.code}] Redirection to: #{resp['Location']}")
283
vprint_status urltohash('GET',resp['Location'],reqopts['uri'],nil)
284
insertnewpath(urltohash('GET',resp['Location'],reqopts['uri'],nil))
285
when 404
286
print_status "[404] Invalid link #{reqopts['uri']}"
287
else
288
print_status "Unhandled #{resp.code}"
289
end
290
291
else
292
print_status "No response"
293
end
294
sleep(datastore['SleepTime'])
295
rescue
296
print_status "ERROR"
297
vprint_status "#{$!}: #{$!.backtrace}"
298
end
299
end
300
301
#
302
# Add new path (uri) to test non-viewed queue
303
#
304
305
def insertnewpath(hashreq)
306
307
hashreq['uri'] = canonicalize(hashreq['uri'])
308
309
if hashreq['rhost'] == datastore['RHOSTS'] and hashreq['rport'] == datastore['RPORT']
310
if !@ViewedQueue.include?(hashsig(hashreq))
311
if @NotViewedQueue.read_all(hashreq).size > 0
312
vprint_status "Already in queue to be viewed: #{hashreq['uri']}"
313
else
314
vprint_status "Inserted: #{hashreq['uri']}"
315
316
@NotViewedQueue.write(hashreq)
317
end
318
else
319
vprint_status "#{hashreq['uri']} already visited at #{@ViewedQueue[hashsig(hashreq)]}"
320
end
321
end
322
end
323
324
#
325
# Build a new hash for a local path
326
#
327
328
def urltohash(m,url,basepath,dat)
329
330
# m: method
331
# url: uri?[query]
332
# basepath: base path/uri to determine absolute path when relative
333
# data: body data, nil if GET and query = uri.query
334
335
uri = URI.parse(url)
336
uritargetssl = (uri.scheme == "https") ? true : false
337
338
uritargethost = uri.host
339
if (uri.host.nil? or uri.host.empty?)
340
uritargethost = self.ctarget
341
uritargetssl = self.cssl
342
end
343
344
uritargetport = uri.port
345
if (uri.port.nil?)
346
uritargetport = self.cport
347
end
348
349
uritargetpath = uri.path
350
if (uri.path.nil? or uri.path.empty?)
351
uritargetpath = "/"
352
end
353
354
newp = Pathname.new(uritargetpath)
355
oldp = Pathname.new(basepath)
356
if !newp.absolute?
357
if oldp.to_s[-1,1] == '/'
358
newp = oldp+newp
359
else
360
if !newp.to_s.empty?
361
newp = File.join(oldp.dirname,newp)
362
end
363
end
364
end
365
366
hashreq = {
367
'rhost' => uritargethost,
368
'rport' => uritargetport,
369
'uri' => newp.to_s,
370
'method' => m,
371
'ctype' => 'text/plain',
372
'ssl' => uritargetssl,
373
'query' => uri.query,
374
'data' => nil
375
}
376
377
if m == 'GET' and !dat.nil?
378
hashreq['query'] = dat
379
else
380
hashreq['data'] = dat
381
end
382
383
return hashreq
384
end
385
386
# Taken from http://www.ruby-forum.com/topic/140101 by Rob Biedenharn
387
def canonicalize(uri)
388
389
u = uri.kind_of?(URI) ? uri : URI.parse(uri.to_s)
390
u.normalize!
391
newpath = u.path
392
while newpath.gsub!(%r{([^/]+)/\.\./?}) { |match|
393
$1 == '..' ? match : ''
394
} do end
395
newpath = newpath.gsub(%r{/\./}, '/').sub(%r{/\.\z}, '/')
396
u.path = newpath
397
# Ugly fix
398
u.path = u.path.gsub("\/..\/","\/")
399
u.to_s
400
end
401
402
def hashsig(hashreq)
403
hashreq.to_s
404
end
405
end
406
407
class BaseParser
408
attr_accessor :crawler
409
410
def initialize(c)
411
self.crawler = c
412
end
413
414
def parse(request,result)
415
nil
416
end
417
418
#
419
# Add new path (uri) to test hash queue
420
#
421
def insertnewpath(hashreq)
422
self.crawler.insertnewpath(hashreq)
423
end
424
425
def hashsig(hashreq)
426
self.crawler.hashsig(hashreq)
427
end
428
429
def urltohash(m,url,basepath,dat)
430
self.crawler.urltohash(m,url,basepath,dat)
431
end
432
433
def targetssl
434
self.crawler.cssl
435
end
436
437
def targetport
438
self.crawler.cport
439
end
440
441
def targethost
442
self.crawler.ctarget
443
end
444
445
def targetinipath
446
self.crawler.cinipath
447
end
448
end
449
450