CoCalc -- msfcrawler.rb

GitHub Repository: rapid7/metasploit-framework
Path: blob/master/modules/auxiliary/crawler/msfcrawler.rb
²⁴⁵⁵⁸ views
1
##
2
# This module requires Metasploit: https://metasploit.com/download
3
# Current source: https://github.com/rapid7/metasploit-framework
4
##
5

6
#
7
# Web Crawler.
8
#
9
# Author:  Efrain Torres   et [at] metasploit.com 2010
10
#
11
#
12

13
# openssl before rubygems mac os
14
require 'English'
15
require 'openssl'
16
require 'pathname'
17
require 'uri'
18
require 'rinda/rinda'
19
require 'rinda/tuplespace'
20

21
class MetasploitModule < Msf::Auxiliary
22
  include Msf::Auxiliary::Scanner
23
  include Msf::Auxiliary::Report
24

25
  def initialize(info = {})
26
    super(
27
      update_info(
28
        info,
29
        'Name' => 'Metasploit Web Crawler',
30
        'Description' => 'This auxiliary module is a modular web crawler, to be used in conjunction with wmap (someday) or standalone.',
31
        'Author' => 'et',
32
        'License' => MSF_LICENSE,
33
        'Notes' => {
34
          'Stability' => [CRASH_SAFE],
35
          'SideEffects' => [IOC_IN_LOGS],
36
          'Reliability' => []
37
        }
38
      )
39
    )
40

41
    register_options([
42
      OptString.new('PATH', [true, 'Starting crawling path', '/']),
43
      OptInt.new('RPORT', [true, 'Remote port', 80 ])
44
    ])
45

46
    register_advanced_options([
47
      OptPath.new(
48
        'CrawlerModulesDir',
49
        [
50
          true,
51
          'The base directory containing the crawler modules',
52
          File.join(Msf::Config.data_directory, 'msfcrawler')
53
        ]
54
      ),
55
      OptBool.new('EnableUl', [ false, 'Enable maximum number of request per URI', true ]),
56
      OptBool.new('StoreDB', [ false, 'Store requests in database', false ]),
57
      OptInt.new('MaxUriLimit', [ true, 'Number max. request per URI', 10]),
58
      OptInt.new('SleepTime', [ true, 'Sleep time (secs) between requests', 0]),
59
      OptInt.new('TakeTimeout', [ true, 'Timeout for loop ending', 15]),
60
      OptInt.new('ReadTimeout', [ true, 'Read timeout (-1 forever)', 3]),
61
      OptInt.new('ThreadNum', [ true, 'Threads number', 20]),
62
      OptString.new('DontCrawl', [true, 'Filestypes not to crawl', '.exe,.zip,.tar,.bz2,.run,.asc,.gz'])
63
    ])
64
  end
65

66
  attr_accessor :ctarget, :cport, :cssl
67

68
  def run
69
    # i = 0
70
    # a = []
71

72
    self.ctarget = datastore['RHOSTS']
73
    self.cport = datastore['RPORT']
74
    self.cssl = datastore['SSL']
75
    inipath = datastore['PATH']
76

77
    cinipath = (inipath.nil? || inipath.empty?) ? '/' : inipath
78

79
    inireq = {
80
      'rhost' => ctarget,
81
      'rport' => cport,
82
      'uri' => cinipath,
83
      'method' => 'GET',
84
      'ctype' => 'text/plain',
85
      'ssl' => cssl,
86
      'query' => nil,
87
      'data' => nil
88
    }
89

90
    @not_viewed_queue = ::Rinda::TupleSpace.new
91
    @viewed_queue = Hash.new
92
    @uri_limits = Hash.new
93
    @current_site = ctarget
94

95
    insertnewpath(inireq)
96

97
    print_status("Loading modules: #{datastore['CrawlerModulesDir']}")
98
    load_modules(datastore['CrawlerModulesDir'])
99
    print_status('OK')
100

101
    if datastore['EnableUl']
102
      print_status("URI LIMITS ENABLED: #{datastore['MaxUriLimit']} (Maximum number of requests per uri)")
103
    end
104

105
    print_status("Target: #{ctarget} Port: #{cport} Path: #{cinipath} SSL: #{cssl}")
106

107
    begin
108
      reqfilter = reqtemplate(ctarget, cport, cssl)
109

110
      # i = 0
111

112
      loop do
113
        ####
114
        # if i <= datastore['ThreadNum']
115
        #   a.push(Thread.new {
116
        ####
117

118
        hashreq = @not_viewed_queue.take(reqfilter, datastore['TakeTimeout'])
119

120
        ul = false
121
        if @uri_limits.include?(hashreq['uri']) && datastore['EnableUl']
122
          # puts "Request #{@uri_limits[hashreq['uri']]}/#{$maxurilimit} #{hashreq['uri']}"
123
          if @uri_limits[hashreq['uri']] >= datastore['MaxUriLimit']
124
            # puts "URI LIMIT Reached: #{$maxurilimit} for uri #{hashreq['uri']}"
125
            ul = true
126
          end
127
        else
128
          @uri_limits[hashreq['uri']] = 0
129
        end
130

131
        if !@viewed_queue.include?(hashsig(hashreq)) && !ul
132

133
          @viewed_queue[hashsig(hashreq)] = Time.now
134
          @uri_limits[hashreq['uri']] += 1
135

136
          if !File.extname(hashreq['uri']).empty? && datastore['DontCrawl'].include?(File.extname(hashreq['uri']))
137
            vprint_status "URI not crawled #{hashreq['uri']}"
138
          else
139
            prx = nil
140
            # if self.useproxy
141
            #   prx = "HTTP:"+self.proxyhost.to_s+":"+self.proxyport.to_s
142
            # end
143

144
            c = Rex::Proto::Http::Client.new(
145
              ctarget,
146
              cport.to_i,
147
              {},
148
              cssl,
149
              nil,
150
              prx
151
            )
152

153
            sendreq(c, hashreq)
154
          end
155
        else
156
          vprint_line "#{hashreq['uri']} already visited. "
157
        end
158

159
        ####
160
        # })
161

162
        # i += 1
163
        # else
164
        #   sleep(0.01) and a.delete_if {|x| not x.alive?} while not a.empty?
165
        #   i = 0
166
        # end
167
        ####
168
      end
169
    rescue ::Rinda::RequestExpiredError
170
      print_status('END.')
171
      return
172
    end
173

174
    print_status('Finished crawling')
175
  end
176

177
  def reqtemplate(target, port, ssl)
178
    hreq = {
179
      'rhost' => target,
180
      'rport' => port,
181
      'uri' => nil,
182
      'method' => nil,
183
      'ctype' => nil,
184
      'ssl' => ssl,
185
      'query' => nil,
186
      'data' => nil
187
    }
188

189
    return hreq
190
  end
191

192
  def storedb(hashreq, response)
193
    # Added host/port/ssl for report_web_page support
194
    info = {
195
      web_site: @current_site,
196
      path: hashreq['uri'],
197
      query: hashreq['query'],
198
      host: hashreq['rhost'],
199
      port: hashreq['rport'],
200
      ssl: !hashreq['ssl'].nil?,
201
      data: hashreq['data'],
202
      code: response.code,
203
      body: response.body,
204
      headers: response.headers
205
    }
206

207
    # if response['content-type']
208
    #   info[:ctype] = response['content-type'][0]
209
    # end
210

211
    # if response['set-cookie']
212
    #   info[:cookie] = page.headers['set-cookie'].join("\n")
213
    # end
214

215
    # if page.headers['authorization']
216
    #   info[:auth] = page.headers['authorization'].join("\n")
217
    # end
218

219
    # if page.headers['location']
220
    #   info[:location] = page.headers['location'][0]
221
    # end
222

223
    # if page.headers['last-modified']
224
    #   info[:mtime] = page.headers['last-modified'][0]
225
    # end
226

227
    # Report the web page to the database
228
    report_web_page(info)
229
  end
230

231
  #
232
  # Modified version of load_protocols from psnuffle by Max Moser  <[email protected]>
233
  #
234

235
  def load_modules(crawlermodulesdir)
236
    base = crawlermodulesdir
237
    if !File.directory?(base)
238
      raise 'The Crawler modules parameter is set to an invalid directory'
239
    end
240

241
    @crawlermodules = {}
242
    cmodules = Dir.new(base).entries.grep(/\.rb$/).sort
243
    cmodules.each do |n|
244
      f = File.join(base, n)
245
      m = ::Module.new
246
      begin
247
        m.module_eval(File.read(f, File.size(f)))
248
        m.constants.grep(/^Crawler(.*)/) do
249
          cmod = ::Regexp.last_match(1)
250
          klass = m.const_get("Crawler#{cmod}")
251
          @crawlermodules[cmod.downcase] = klass.new(self)
252

253
          print_status("Loaded crawler module #{cmod} from #{f}...")
254
        end
255
      rescue StandardError => e
256
        print_error("Crawler module #{n} failed to load: #{e.class} #{e} #{e.backtrace}")
257
      end
258
    end
259
  end
260

261
  def sendreq(nclient, reqopts = {})
262
    r = nclient.request_raw(reqopts)
263
    resp = nclient.send_recv(r, datastore['ReadTimeout'])
264

265
    unless resp
266
      print_status('No response')
267
      sleep(datastore['SleepTime'])
268
      return
269
    end
270

271
    #
272
    # Quickfix for bug packet.rb to_s line: 190
273
    # In case modules or crawler calls to_s on de-chunked responses
274
    #
275
    resp.transfer_chunked = false
276

277
    if datastore['StoreDB']
278
      storedb(reqopts, resp)
279
    end
280

281
    print_status ">> [#{resp.code}] #{reqopts['uri']}"
282

283
    if reqopts['query'] && !reqopts['query'].empty?
284
      print_status ">>> [Q] #{reqopts['query']}"
285
    end
286

287
    if reqopts['data']
288
      print_status ">>> [D] #{reqopts['data']}"
289
    end
290

291
    case resp.code
292
    when 200
293
      @crawlermodules.each_key do |k|
294
        @crawlermodules[k].parse(reqopts, resp)
295
      end
296
    when 301..303
297
      print_line("[#{resp.code}] Redirection to: #{resp['Location']}")
298
      vprint_status urltohash('GET', resp['Location'], reqopts['uri'], nil)
299
      insertnewpath(urltohash('GET', resp['Location'], reqopts['uri'], nil))
300
    when 404
301
      print_status "[404] Invalid link #{reqopts['uri']}"
302
    else
303
      print_status "Unhandled #{resp.code}"
304
    end
305

306
    sleep(datastore['SleepTime'])
307
  rescue StandardError => e
308
    print_status("Error: #{e.message}")
309
    vprint_status("#{$ERROR_INFO}: #{$ERROR_INFO.backtrace}")
310
  end
311

312
  #
313
  # Add new path (uri) to test non-viewed queue
314
  #
315

316
  def insertnewpath(hashreq)
317
    hashreq['uri'] = canonicalize(hashreq['uri'])
318

319
    if (hashreq['rhost'] == datastore['RHOSTS']) && (hashreq['rport'] == datastore['RPORT'])
320
      if !@viewed_queue.include?(hashsig(hashreq))
321
        if !@not_viewed_queue.read_all(hashreq).empty?
322
          vprint_status "Already in queue to be viewed: #{hashreq['uri']}"
323
        else
324
          vprint_status "Inserted: #{hashreq['uri']}"
325

326
          @not_viewed_queue.write(hashreq)
327
        end
328
      else
329
        vprint_status "#{hashreq['uri']} already visited at #{@viewed_queue[hashsig(hashreq)]}"
330
      end
331
    end
332
  end
333

334
  #
335
  # Build a new hash for a local path
336
  #
337

338
  def urltohash(method, url, basepath, dat)
339
    # method: HTTP method
340
    # url: uri?[query]
341
    # basepath: base path/uri to determine absolute path when relative
342
    # data: body data, nil if GET and query = uri.query
343

344
    uri = URI.parse(url)
345
    uritargetssl = (uri.scheme == 'https') ? true : false
346

347
    uritargethost = uri.host
348
    if uri.host.nil? || uri.host.empty?
349
      uritargethost = ctarget
350
      uritargetssl = cssl
351
    end
352

353
    uritargetport = uri.port
354
    if uri.port.nil?
355
      uritargetport = cport
356
    end
357

358
    uritargetpath = uri.path
359
    if uri.path.nil? || uri.path.empty?
360
      uritargetpath = '/'
361
    end
362

363
    newp = Pathname.new(uritargetpath)
364
    oldp = Pathname.new(basepath)
365
    if !newp.absolute?
366
      if oldp.to_s[-1, 1] == '/'
367
        newp = oldp + newp
368
      elsif !newp.to_s.empty?
369
        newp = File.join(oldp.dirname, newp)
370
      end
371
    end
372

373
    hashreq = {
374
      'rhost' => uritargethost,
375
      'rport' => uritargetport,
376
      'uri' => newp.to_s,
377
      'method' => method,
378
      'ctype' => 'text/plain',
379
      'ssl' => uritargetssl,
380
      'query' => uri.query,
381
      'data' => nil
382
    }
383

384
    if (method == 'GET') && !dat.nil?
385
      hashreq['query'] = dat
386
    else
387
      hashreq['data'] = dat
388
    end
389

390
    return hashreq
391
  end
392

393
  def canonicalize(uri)
394
    uri = URI(uri) unless uri.is_a?(URI)
395
    uri.normalize!
396

397
    path = uri.path.dup
398
    segments = path.split('/')
399
    resolved = []
400

401
    segments.each do |segment|
402
      next if segment == '.' || segment.empty?
403

404
      if segment == '..'
405
        resolved.pop unless resolved.empty?
406
      else
407
        resolved << segment
408
      end
409
    end
410

411
    uri.path = '/' + resolved.join('/')
412
    uri.to_s
413
  end
414

415
  def hashsig(hashreq)
416
    hashreq.to_s
417
  end
418
end
419

420
class BaseParser
421
  attr_accessor :crawler
422

423
  def initialize(crawler)
424
    self.crawler = crawler
425
  end
426

427
  def parse(_request, _result)
428
    nil
429
  end
430

431
  #
432
  # Add new path (uri) to test hash queue
433
  #
434
  def insertnewpath(hashreq)
435
    crawler.insertnewpath(hashreq)
436
  end
437

438
  def hashsig(hashreq)
439
    crawler.hashsig(hashreq)
440
  end
441

442
  def urltohash(method, url, basepath, dat)
443
    crawler.urltohash(method, url, basepath, dat)
444
  end
445

446
  def targetssl
447
    crawler.cssl
448
  end
449

450
  def targetport
451
    crawler.cport
452
  end
453

454
  def targethost
455
    crawler.ctarget
456
  end
457

458
  def targetinipath
459
    crawler.cinipath
460
  end
461
end
462

463
Product

Resources

Company