Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.
Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.
Path: blob/master/modules/auxiliary/crawler/msfcrawler.rb
Views: 11777
##1# This module requires Metasploit: https://metasploit.com/download2# Current source: https://github.com/rapid7/metasploit-framework3##45#6# Web Crawler.7#8# Author: Efrain Torres et [at] metasploit.com 20109#10#1112# openssl before rubygems mac os13require 'openssl'14require 'pathname'15require 'uri'1617class MetasploitModule < Msf::Auxiliary18include Msf::Auxiliary::Scanner19include Msf::Auxiliary::Report2021def initialize(info = {})22super(update_info(info,23'Name' => 'Metasploit Web Crawler',24'Description' => 'This auxiliary module is a modular web crawler, to be used in conjunction with wmap (someday) or standalone.',25'Author' => 'et',26'License' => MSF_LICENSE27))2829register_options([30OptString.new('PATH', [true, "Starting crawling path", '/']),31OptInt.new('RPORT', [true, "Remote port", 80 ])32])3334register_advanced_options([35OptPath.new('CrawlerModulesDir', [true, 'The base directory containing the crawler modules',36File.join(Msf::Config.data_directory, "msfcrawler")37]),38OptBool.new('EnableUl', [ false, "Enable maximum number of request per URI", true ]),39OptBool.new('StoreDB', [ false, "Store requests in database", false ]),40OptInt.new('MaxUriLimit', [ true, "Number max. request per URI", 10]),41OptInt.new('SleepTime', [ true, "Sleep time (secs) between requests", 0]),42OptInt.new('TakeTimeout', [ true, "Timeout for loop ending", 15]),43OptInt.new('ReadTimeout', [ true, "Read timeout (-1 forever)", 3]),44OptInt.new('ThreadNum', [ true, "Threads number", 20]),45OptString.new('DontCrawl', [true, "Filestypes not to crawl", '.exe,.zip,.tar,.bz2,.run,.asc,.gz'])46])47end4849attr_accessor :ctarget, :cport, :cssl5051def run52i, a = 0, []5354self.ctarget = datastore['RHOSTS']55self.cport = datastore['RPORT']56self.cssl = datastore['SSL']57inipath = datastore['PATH']5859cinipath = (inipath.nil? or inipath.empty?) ? '/' : inipath6061inireq = {62'rhost' => ctarget,63'rport' => cport,64'uri' => cinipath,65'method' => 'GET',66'ctype' => 'text/plain',67'ssl' => cssl,68'query' => nil,69'data' => nil70}7172@NotViewedQueue = Rinda::TupleSpace.new73@ViewedQueue = Hash.new74@UriLimits = Hash.new75@curent_site = self.ctarget7677insertnewpath(inireq)7879print_status("Loading modules: #{datastore['CrawlerModulesDir']}")80load_modules(datastore['CrawlerModulesDir'])81print_status("OK")8283if datastore['EnableUl']84print_status("URI LIMITS ENABLED: #{datastore['MaxUriLimit']} (Maximum number of requests per uri)")85end8687print_status("Target: #{self.ctarget} Port: #{self.cport} Path: #{cinipath} SSL: #{self.cssl}")888990begin91reqfilter = reqtemplate(self.ctarget,self.cport,self.cssl)9293i =09495loop do9697####98#if i <= datastore['ThreadNum']99# a.push(Thread.new {100####101102hashreq = @NotViewedQueue.take(reqfilter, datastore['TakeTimeout'])103104ul = false105if @UriLimits.include?(hashreq['uri']) and datastore['EnableUl']106#puts "Request #{@UriLimits[hashreq['uri']]}/#{$maxurilimit} #{hashreq['uri']}"107if @UriLimits[hashreq['uri']] >= datastore['MaxUriLimit']108#puts "URI LIMIT Reached: #{$maxurilimit} for uri #{hashreq['uri']}"109ul = true110end111else112@UriLimits[hashreq['uri']] = 0113end114115if !@ViewedQueue.include?(hashsig(hashreq)) and !ul116117@ViewedQueue[hashsig(hashreq)] = Time.now118@UriLimits[hashreq['uri']] += 1119120if !File.extname(hashreq['uri']).empty? and datastore['DontCrawl'].include? File.extname(hashreq['uri'])121vprint_status "URI not crawled #{hashreq['uri']}"122else123prx = nil124#if self.useproxy125# prx = "HTTP:"+self.proxyhost.to_s+":"+self.proxyport.to_s126#end127128c = Rex::Proto::Http::Client.new(129self.ctarget,130self.cport.to_i,131{},132self.cssl,133nil,134prx135)136137sendreq(c,hashreq)138end139else140vprint_line "#{hashreq['uri']} already visited. "141end142143####144#})145146#i += 1147#else148# sleep(0.01) and a.delete_if {|x| not x.alive?} while not a.empty?149# i = 0150#end151####152153end154rescue Rinda::RequestExpiredError155print_status("END.")156return157end158159print_status("Finished crawling")160end161162def reqtemplate(target,port,ssl)163hreq = {164'rhost' => target,165'rport' => port,166'uri' => nil,167'method' => nil,168'ctype' => nil,169'ssl' => ssl,170'query' => nil,171'data' => nil172}173174return hreq175end176177def storedb(hashreq,response,dbpath)178179# Added host/port/ssl for report_web_page support180info = {181:web_site => @current_site,182:path => hashreq['uri'],183:query => hashreq['query'],184:host => hashreq['rhost'],185:port => hashreq['rport'],186:ssl => !hashreq['ssl'].nil?,187:data => hashreq['data'],188:code => response.code,189:body => response.body,190:headers => response.headers191}192193#if response['content-type']194# info[:ctype] = response['content-type'][0]195#end196197#if response['set-cookie']198# info[:cookie] = page.headers['set-cookie'].join("\n")199#end200201#if page.headers['authorization']202# info[:auth] = page.headers['authorization'].join("\n")203#end204205#if page.headers['location']206# info[:location] = page.headers['location'][0]207#end208209#if page.headers['last-modified']210# info[:mtime] = page.headers['last-modified'][0]211#end212213# Report the web page to the database214report_web_page(info)215end216217#218# Modified version of load_protocols from psnuffle by Max Moser <[email protected]>219#220221def load_modules(crawlermodulesdir)222223base = crawlermodulesdir224if (not File.directory?(base))225raise RuntimeError,"The Crawler modules parameter is set to an invalid directory"226end227228@crawlermodules = {}229cmodules = Dir.new(base).entries.grep(/\.rb$/).sort230cmodules.each do |n|231f = File.join(base, n)232m = ::Module.new233begin234m.module_eval(File.read(f, File.size(f)))235m.constants.grep(/^Crawler(.*)/) do236cmod = $1237klass = m.const_get("Crawler#{cmod}")238@crawlermodules[cmod.downcase] = klass.new(self)239240print_status("Loaded crawler module #{cmod} from #{f}...")241end242rescue ::Exception => e243print_error("Crawler module #{n} failed to load: #{e.class} #{e} #{e.backtrace}")244end245end246end247248def sendreq(nclient,reqopts={})249250begin251r = nclient.request_raw(reqopts)252resp = nclient.send_recv(r, datastore['ReadTimeout'])253254if resp255#256# Quickfix for bug packet.rb to_s line: 190257# In case modules or crawler calls to_s on de-chunked responses258#259resp.transfer_chunked = false260261if datastore['StoreDB']262storedb(reqopts,resp,$dbpathmsf)263end264265print_status ">> [#{resp.code}] #{reqopts['uri']}"266267if reqopts['query'] and !reqopts['query'].empty?268print_status ">>> [Q] #{reqopts['query']}"269end270271if reqopts['data']272print_status ">>> [D] #{reqopts['data']}"273end274275case resp.code276when 200277@crawlermodules.each_key do |k|278@crawlermodules[k].parse(reqopts,resp)279end280when 301..303281print_line("[#{resp.code}] Redirection to: #{resp['Location']}")282vprint_status urltohash('GET',resp['Location'],reqopts['uri'],nil)283insertnewpath(urltohash('GET',resp['Location'],reqopts['uri'],nil))284when 404285print_status "[404] Invalid link #{reqopts['uri']}"286else287print_status "Unhandled #{resp.code}"288end289290else291print_status "No response"292end293sleep(datastore['SleepTime'])294rescue295print_status "ERROR"296vprint_status "#{$!}: #{$!.backtrace}"297end298end299300#301# Add new path (uri) to test non-viewed queue302#303304def insertnewpath(hashreq)305306hashreq['uri'] = canonicalize(hashreq['uri'])307308if hashreq['rhost'] == datastore['RHOSTS'] and hashreq['rport'] == datastore['RPORT']309if !@ViewedQueue.include?(hashsig(hashreq))310if @NotViewedQueue.read_all(hashreq).size > 0311vprint_status "Already in queue to be viewed: #{hashreq['uri']}"312else313vprint_status "Inserted: #{hashreq['uri']}"314315@NotViewedQueue.write(hashreq)316end317else318vprint_status "#{hashreq['uri']} already visited at #{@ViewedQueue[hashsig(hashreq)]}"319end320end321end322323#324# Build a new hash for a local path325#326327def urltohash(m,url,basepath,dat)328329# m: method330# url: uri?[query]331# basepath: base path/uri to determine absolute path when relative332# data: body data, nil if GET and query = uri.query333334uri = URI.parse(url)335uritargetssl = (uri.scheme == "https") ? true : false336337uritargethost = uri.host338if (uri.host.nil? or uri.host.empty?)339uritargethost = self.ctarget340uritargetssl = self.cssl341end342343uritargetport = uri.port344if (uri.port.nil?)345uritargetport = self.cport346end347348uritargetpath = uri.path349if (uri.path.nil? or uri.path.empty?)350uritargetpath = "/"351end352353newp = Pathname.new(uritargetpath)354oldp = Pathname.new(basepath)355if !newp.absolute?356if oldp.to_s[-1,1] == '/'357newp = oldp+newp358else359if !newp.to_s.empty?360newp = File.join(oldp.dirname,newp)361end362end363end364365hashreq = {366'rhost' => uritargethost,367'rport' => uritargetport,368'uri' => newp.to_s,369'method' => m,370'ctype' => 'text/plain',371'ssl' => uritargetssl,372'query' => uri.query,373'data' => nil374}375376if m == 'GET' and !dat.nil?377hashreq['query'] = dat378else379hashreq['data'] = dat380end381382return hashreq383end384385# Taken from http://www.ruby-forum.com/topic/140101 by Rob Biedenharn386def canonicalize(uri)387388u = uri.kind_of?(URI) ? uri : URI.parse(uri.to_s)389u.normalize!390newpath = u.path391while newpath.gsub!(%r{([^/]+)/\.\./?}) { |match|392$1 == '..' ? match : ''393} do end394newpath = newpath.gsub(%r{/\./}, '/').sub(%r{/\.\z}, '/')395u.path = newpath396# Ugly fix397u.path = u.path.gsub("\/..\/","\/")398u.to_s399end400401def hashsig(hashreq)402hashreq.to_s403end404end405406class BaseParser407attr_accessor :crawler408409def initialize(c)410self.crawler = c411end412413def parse(request,result)414nil415end416417#418# Add new path (uri) to test hash queue419#420def insertnewpath(hashreq)421self.crawler.insertnewpath(hashreq)422end423424def hashsig(hashreq)425self.crawler.hashsig(hashreq)426end427428def urltohash(m,url,basepath,dat)429self.crawler.urltohash(m,url,basepath,dat)430end431432def targetssl433self.crawler.cssl434end435436def targetport437self.crawler.cport438end439440def targethost441self.crawler.ctarget442end443444def targetinipath445self.crawler.cinipath446end447end448449450