Path: blob/master/modules/auxiliary/crawler/msfcrawler.rb
19593 views
##1# This module requires Metasploit: https://metasploit.com/download2# Current source: https://github.com/rapid7/metasploit-framework3##45#6# Web Crawler.7#8# Author: Efrain Torres et [at] metasploit.com 20109#10#1112# openssl before rubygems mac os13require 'English'14require 'openssl'15require 'pathname'16require 'uri'17require 'rinda/rinda'18require 'rinda/tuplespace'1920class MetasploitModule < Msf::Auxiliary21include Msf::Auxiliary::Scanner22include Msf::Auxiliary::Report2324def initialize(info = {})25super(26update_info(27info,28'Name' => 'Metasploit Web Crawler',29'Description' => 'This auxiliary module is a modular web crawler, to be used in conjunction with wmap (someday) or standalone.',30'Author' => 'et',31'License' => MSF_LICENSE,32'Notes' => {33'Stability' => [CRASH_SAFE],34'SideEffects' => [IOC_IN_LOGS],35'Reliability' => []36}37)38)3940register_options([41OptString.new('PATH', [true, 'Starting crawling path', '/']),42OptInt.new('RPORT', [true, 'Remote port', 80 ])43])4445register_advanced_options([46OptPath.new(47'CrawlerModulesDir',48[49true,50'The base directory containing the crawler modules',51File.join(Msf::Config.data_directory, 'msfcrawler')52]53),54OptBool.new('EnableUl', [ false, 'Enable maximum number of request per URI', true ]),55OptBool.new('StoreDB', [ false, 'Store requests in database', false ]),56OptInt.new('MaxUriLimit', [ true, 'Number max. request per URI', 10]),57OptInt.new('SleepTime', [ true, 'Sleep time (secs) between requests', 0]),58OptInt.new('TakeTimeout', [ true, 'Timeout for loop ending', 15]),59OptInt.new('ReadTimeout', [ true, 'Read timeout (-1 forever)', 3]),60OptInt.new('ThreadNum', [ true, 'Threads number', 20]),61OptString.new('DontCrawl', [true, 'Filestypes not to crawl', '.exe,.zip,.tar,.bz2,.run,.asc,.gz'])62])63end6465attr_accessor :ctarget, :cport, :cssl6667def run68# i = 069# a = []7071self.ctarget = datastore['RHOSTS']72self.cport = datastore['RPORT']73self.cssl = datastore['SSL']74inipath = datastore['PATH']7576cinipath = (inipath.nil? || inipath.empty?) ? '/' : inipath7778inireq = {79'rhost' => ctarget,80'rport' => cport,81'uri' => cinipath,82'method' => 'GET',83'ctype' => 'text/plain',84'ssl' => cssl,85'query' => nil,86'data' => nil87}8889@not_viewed_queue = ::Rinda::TupleSpace.new90@viewed_queue = Hash.new91@uri_limits = Hash.new92@current_site = ctarget9394insertnewpath(inireq)9596print_status("Loading modules: #{datastore['CrawlerModulesDir']}")97load_modules(datastore['CrawlerModulesDir'])98print_status('OK')99100if datastore['EnableUl']101print_status("URI LIMITS ENABLED: #{datastore['MaxUriLimit']} (Maximum number of requests per uri)")102end103104print_status("Target: #{ctarget} Port: #{cport} Path: #{cinipath} SSL: #{cssl}")105106begin107reqfilter = reqtemplate(ctarget, cport, cssl)108109# i = 0110111loop do112####113# if i <= datastore['ThreadNum']114# a.push(Thread.new {115####116117hashreq = @not_viewed_queue.take(reqfilter, datastore['TakeTimeout'])118119ul = false120if @uri_limits.include?(hashreq['uri']) && datastore['EnableUl']121# puts "Request #{@uri_limits[hashreq['uri']]}/#{$maxurilimit} #{hashreq['uri']}"122if @uri_limits[hashreq['uri']] >= datastore['MaxUriLimit']123# puts "URI LIMIT Reached: #{$maxurilimit} for uri #{hashreq['uri']}"124ul = true125end126else127@uri_limits[hashreq['uri']] = 0128end129130if !@viewed_queue.include?(hashsig(hashreq)) && !ul131132@viewed_queue[hashsig(hashreq)] = Time.now133@uri_limits[hashreq['uri']] += 1134135if !File.extname(hashreq['uri']).empty? && datastore['DontCrawl'].include?(File.extname(hashreq['uri']))136vprint_status "URI not crawled #{hashreq['uri']}"137else138prx = nil139# if self.useproxy140# prx = "HTTP:"+self.proxyhost.to_s+":"+self.proxyport.to_s141# end142143c = Rex::Proto::Http::Client.new(144ctarget,145cport.to_i,146{},147cssl,148nil,149prx150)151152sendreq(c, hashreq)153end154else155vprint_line "#{hashreq['uri']} already visited. "156end157158####159# })160161# i += 1162# else163# sleep(0.01) and a.delete_if {|x| not x.alive?} while not a.empty?164# i = 0165# end166####167end168rescue ::Rinda::RequestExpiredError169print_status('END.')170return171end172173print_status('Finished crawling')174end175176def reqtemplate(target, port, ssl)177hreq = {178'rhost' => target,179'rport' => port,180'uri' => nil,181'method' => nil,182'ctype' => nil,183'ssl' => ssl,184'query' => nil,185'data' => nil186}187188return hreq189end190191def storedb(hashreq, response)192# Added host/port/ssl for report_web_page support193info = {194web_site: @current_site,195path: hashreq['uri'],196query: hashreq['query'],197host: hashreq['rhost'],198port: hashreq['rport'],199ssl: !hashreq['ssl'].nil?,200data: hashreq['data'],201code: response.code,202body: response.body,203headers: response.headers204}205206# if response['content-type']207# info[:ctype] = response['content-type'][0]208# end209210# if response['set-cookie']211# info[:cookie] = page.headers['set-cookie'].join("\n")212# end213214# if page.headers['authorization']215# info[:auth] = page.headers['authorization'].join("\n")216# end217218# if page.headers['location']219# info[:location] = page.headers['location'][0]220# end221222# if page.headers['last-modified']223# info[:mtime] = page.headers['last-modified'][0]224# end225226# Report the web page to the database227report_web_page(info)228end229230#231# Modified version of load_protocols from psnuffle by Max Moser <[email protected]>232#233234def load_modules(crawlermodulesdir)235base = crawlermodulesdir236if !File.directory?(base)237raise 'The Crawler modules parameter is set to an invalid directory'238end239240@crawlermodules = {}241cmodules = Dir.new(base).entries.grep(/\.rb$/).sort242cmodules.each do |n|243f = File.join(base, n)244m = ::Module.new245begin246m.module_eval(File.read(f, File.size(f)))247m.constants.grep(/^Crawler(.*)/) do248cmod = ::Regexp.last_match(1)249klass = m.const_get("Crawler#{cmod}")250@crawlermodules[cmod.downcase] = klass.new(self)251252print_status("Loaded crawler module #{cmod} from #{f}...")253end254rescue StandardError => e255print_error("Crawler module #{n} failed to load: #{e.class} #{e} #{e.backtrace}")256end257end258end259260def sendreq(nclient, reqopts = {})261r = nclient.request_raw(reqopts)262resp = nclient.send_recv(r, datastore['ReadTimeout'])263264unless resp265print_status('No response')266sleep(datastore['SleepTime'])267return268end269270#271# Quickfix for bug packet.rb to_s line: 190272# In case modules or crawler calls to_s on de-chunked responses273#274resp.transfer_chunked = false275276if datastore['StoreDB']277storedb(reqopts, resp)278end279280print_status ">> [#{resp.code}] #{reqopts['uri']}"281282if reqopts['query'] && !reqopts['query'].empty?283print_status ">>> [Q] #{reqopts['query']}"284end285286if reqopts['data']287print_status ">>> [D] #{reqopts['data']}"288end289290case resp.code291when 200292@crawlermodules.each_key do |k|293@crawlermodules[k].parse(reqopts, resp)294end295when 301..303296print_line("[#{resp.code}] Redirection to: #{resp['Location']}")297vprint_status urltohash('GET', resp['Location'], reqopts['uri'], nil)298insertnewpath(urltohash('GET', resp['Location'], reqopts['uri'], nil))299when 404300print_status "[404] Invalid link #{reqopts['uri']}"301else302print_status "Unhandled #{resp.code}"303end304305sleep(datastore['SleepTime'])306rescue StandardError => e307print_status("Error: #{e.message}")308vprint_status("#{$ERROR_INFO}: #{$ERROR_INFO.backtrace}")309end310311#312# Add new path (uri) to test non-viewed queue313#314315def insertnewpath(hashreq)316hashreq['uri'] = canonicalize(hashreq['uri'])317318if (hashreq['rhost'] == datastore['RHOSTS']) && (hashreq['rport'] == datastore['RPORT'])319if !@viewed_queue.include?(hashsig(hashreq))320if !@not_viewed_queue.read_all(hashreq).empty?321vprint_status "Already in queue to be viewed: #{hashreq['uri']}"322else323vprint_status "Inserted: #{hashreq['uri']}"324325@not_viewed_queue.write(hashreq)326end327else328vprint_status "#{hashreq['uri']} already visited at #{@viewed_queue[hashsig(hashreq)]}"329end330end331end332333#334# Build a new hash for a local path335#336337def urltohash(method, url, basepath, dat)338# method: HTTP method339# url: uri?[query]340# basepath: base path/uri to determine absolute path when relative341# data: body data, nil if GET and query = uri.query342343uri = URI.parse(url)344uritargetssl = (uri.scheme == 'https') ? true : false345346uritargethost = uri.host347if uri.host.nil? || uri.host.empty?348uritargethost = ctarget349uritargetssl = cssl350end351352uritargetport = uri.port353if uri.port.nil?354uritargetport = cport355end356357uritargetpath = uri.path358if uri.path.nil? || uri.path.empty?359uritargetpath = '/'360end361362newp = Pathname.new(uritargetpath)363oldp = Pathname.new(basepath)364if !newp.absolute?365if oldp.to_s[-1, 1] == '/'366newp = oldp + newp367elsif !newp.to_s.empty?368newp = File.join(oldp.dirname, newp)369end370end371372hashreq = {373'rhost' => uritargethost,374'rport' => uritargetport,375'uri' => newp.to_s,376'method' => method,377'ctype' => 'text/plain',378'ssl' => uritargetssl,379'query' => uri.query,380'data' => nil381}382383if (method == 'GET') && !dat.nil?384hashreq['query'] = dat385else386hashreq['data'] = dat387end388389return hashreq390end391392def canonicalize(uri)393uri = URI(uri) unless uri.is_a?(URI)394uri.normalize!395396path = uri.path.dup397segments = path.split('/')398resolved = []399400segments.each do |segment|401next if segment == '.' || segment.empty?402403if segment == '..'404resolved.pop unless resolved.empty?405else406resolved << segment407end408end409410uri.path = '/' + resolved.join('/')411uri.to_s412end413414def hashsig(hashreq)415hashreq.to_s416end417end418419class BaseParser420attr_accessor :crawler421422def initialize(crawler)423self.crawler = crawler424end425426def parse(_request, _result)427nil428end429430#431# Add new path (uri) to test hash queue432#433def insertnewpath(hashreq)434crawler.insertnewpath(hashreq)435end436437def hashsig(hashreq)438crawler.hashsig(hashreq)439end440441def urltohash(method, url, basepath, dat)442crawler.urltohash(method, url, basepath, dat)443end444445def targetssl446crawler.cssl447end448449def targetport450crawler.cport451end452453def targethost454crawler.ctarget455end456457def targetinipath458crawler.cinipath459end460end461462463