CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
rapid7

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.

GitHub Repository: rapid7/metasploit-framework
Path: blob/master/lib/rex/parser/nmap_xml.rb
Views: 11623
1
# -*- coding: binary -*-
2
3
require 'rexml/document'
4
5
module Rex
6
module Parser
7
8
#
9
# Stream parser for nmap -oX xml output
10
#
11
# Yields a hash representing each host found in the xml stream. Each host
12
# will look something like the following:
13
# {
14
# "status" => "up",
15
# "addrs" => { "ipv4" => "192.168.0.1", "mac" => "00:0d:87:a1:df:72" },
16
# "ports" => [
17
# { "portid" => "22", "state" => "closed", ... },
18
# { "portid" => "80", "state" => "open", ... },
19
# ...
20
# ]
21
# }
22
#
23
# Usage:
24
# parser = NmapXMLStreamParser.new { |host|
25
# # do stuff with the host
26
# }
27
# REXML::Document.parse_stream(File.new(nmap_xml), parser)
28
# -- or --
29
# parser = NmapXMLStreamParser.new
30
# parser.on_found_host = Proc.new { |host|
31
# # do stuff with the host
32
# }
33
# REXML::Document.parse_stream(File.new(nmap_xml), parser)
34
#
35
# This parser does not maintain state as well as a tree parser, so malformed
36
# xml will trip it up. Nmap shouldn't ever output malformed xml, so it's not
37
# a big deal.
38
#
39
class NmapXMLStreamParser
40
41
#
42
# Callback for processing each found host
43
#
44
attr_accessor :on_found_host
45
46
#
47
# Create a new stream parser for NMAP XML output
48
#
49
# If given a block, it will be stored in +on_found_host+, otherwise you
50
# need to set it explicitly, e.g.:
51
# parser = NmapXMLStreamParser.new
52
# parser.on_found_host = Proc.new { |host|
53
# # do stuff with the host
54
# }
55
# REXML::Document.parse_stream(File.new(nmap_xml), parser)
56
#
57
def initialize(&block)
58
reset_state
59
on_found_host = block if block
60
end
61
62
def reset_state
63
@host = { "status" => nil, "addrs" => {}, "ports" => [], "scripts" => {} }
64
@state = nil
65
end
66
67
def tag_start(name, attributes)
68
begin
69
case name
70
when "address"
71
@host["addrs"][attributes["addrtype"]] = attributes["addr"]
72
if (attributes["addrtype"] =~ /ipv[46]/)
73
@host["addr"] = attributes["addr"]
74
end
75
when "osclass"
76
# If there is more than one, take the highest accuracy. In case of
77
# a tie, this will have the effect of taking the last one in the
78
# list. Last is really no better than first but nmap appears to
79
# put OSes in chronological order, at least for Windows.
80
# Accordingly, this will report XP instead of 2000, 7 instead of
81
# Vista, etc, when each has the same accuracy.
82
if (@host["os_accuracy"].to_i <= attributes["accuracy"].to_i)
83
@host["os_vendor"] = attributes["vendor"]
84
@host["os_family"] = attributes["osfamily"]
85
@host["os_version"] = attributes["osgen"]
86
@host["os_accuracy"] = attributes["accuracy"]
87
end
88
when "osmatch"
89
if(attributes["accuracy"].to_i == 100)
90
@host["os_match"] = attributes["name"]
91
end
92
when "uptime"
93
@host["last_boot"] = attributes["lastboot"]
94
when "hostname"
95
if(attributes["type"] == "PTR")
96
@host["reverse_dns"] = attributes["name"]
97
end
98
when "status"
99
# <status> refers to the liveness of the host; values are "up" or "down"
100
@host["status"] = attributes["state"]
101
@host["status_reason"] = attributes["reason"]
102
when "port"
103
@host["ports"].push(attributes)
104
when "state"
105
# <state> refers to the state of a port; values are "open", "closed", or "filtered"
106
@host["ports"].last["state"] = attributes["state"]
107
when "service"
108
# Store any service and script info with the associated port. There shouldn't
109
# be any collisions on attribute names here, so just merge them.
110
@host["ports"].last.merge!(attributes)
111
when "script"
112
# Associate scripts under a port tag with the appropriate port.
113
# Other scripts from <hostscript> tags can only be associated with
114
# the host and scripts from <postscript> tags don't really belong
115
# to anything, so ignore them
116
if @state == :in_port_tag
117
@host["ports"].last["scripts"] ||= {}
118
@host["ports"].last["scripts"][attributes["id"]] = attributes["output"]
119
elsif @host
120
@host["scripts"] ||= {}
121
@host["scripts"][attributes["id"]] = attributes["output"]
122
else
123
# post scripts are used for things like comparing all the found
124
# ssh keys to see if multiple hosts have the same key
125
# fingerprint. Ignore them.
126
end
127
when "trace"
128
@host["trace"] = {"port" => attributes["port"], "proto" => attributes["proto"], "hops" => [] }
129
when "hop"
130
if @host["trace"]
131
@host["trace"]["hops"].push(attributes)
132
end
133
end
134
rescue NoMethodError => err
135
raise err unless err.message =~ /NilClass/
136
end
137
end
138
139
def tag_end(name)
140
case name
141
when "port"
142
@state = nil
143
when "host"
144
on_found_host.call(@host) if on_found_host
145
reset_state
146
end
147
end
148
149
# We don't need these methods, but they're necessary to keep REXML happy
150
def text(str) # :nodoc:
151
end
152
def xmldecl(version, encoding, standalone) # :nodoc:
153
end
154
def cdata # :nodoc:
155
end
156
def comment(str) # :nodoc:
157
end
158
def instruction(name, instruction) # :nodoc:
159
end
160
def attlist # :nodoc:
161
end
162
end
163
164
end
165
end
166
167
168