CoCalc -- shikata_ga

GitHub Repository: rapid7/metasploit-framework
Path: blob/master/modules/encoders/x86/shikata_ga_nai.rb
¹⁹⁸⁵² views
1
##
2
# This module requires Metasploit: https://metasploit.com/download
3
# Current source: https://github.com/rapid7/metasploit-framework
4
##
5

6
require 'rex/poly'
7

8
class MetasploitModule < Msf::Encoder::XorAdditiveFeedback
9

10
  # The shikata encoder has an excellent ranking because it is polymorphic.
11
  # Party time, excellent!
12
  Rank = ExcellentRanking
13

14
  def initialize
15
    super(
16
      'Name' => 'Polymorphic XOR Additive Feedback Encoder',
17
      'Description' => %q{
18
        This encoder implements a polymorphic XOR additive feedback encoder.
19
        The decoder stub is generated based on dynamic instruction
20
        substitution and dynamic block ordering.  Registers are also
21
        selected dynamically.
22
      },
23
      'Author' => 'spoonm',
24
      'Arch' => ARCH_X86,
25
      'License' => MSF_LICENSE,
26
      'Decoder' => {
27
        'KeySize' => 4,
28
        'BlockSize' => 4
29
      })
30
  end
31

32
  #
33
  # Generates the shikata decoder stub.
34
  #
35
  def decoder_stub(state)
36
    # If the decoder stub has not already been generated for this state, do
37
    # it now.  The decoder stub method may be called more than once.
38
    if state.decoder_stub.nil?
39

40
      # Sanity check that saved_registers doesn't overlap with modified_registers
41
      if !(modified_registers & saved_registers).empty?
42
        raise BadGenerateError
43
      end
44

45
      # Shikata will only cut off the last 1-4 bytes of it's own end
46
      # depending on the alignment of the original buffer
47
      cutoff = 4 - (state.buf.length & 3)
48
      block = generate_shikata_block(state, state.buf.length + cutoff, cutoff) || (raise BadGenerateError)
49

50
      # Set the state specific key offset to wherever the XORK ended up.
51
      state.decoder_key_offset = block.index('XORK')
52

53
      # Take the last 1-4 bytes of shikata and prepend them to the buffer
54
      # that is going to be encoded to make it align on a 4-byte boundary.
55
      state.buf = block.slice!(block.length - cutoff, cutoff) + state.buf
56

57
      # Cache this decoder stub.  The reason we cache the decoder stub is
58
      # because we need to ensure that the same stub is returned every time
59
      # for a given encoder state.
60
      state.decoder_stub = block
61
    end
62

63
    state.decoder_stub
64
  end
65

66
  # Indicate that this module can preserve some registers
67
  def can_preserve_registers?
68
    true
69
  end
70

71
  # A list of registers always touched by this encoder
72
  def modified_registers
73
    # ESP is assumed and is handled through preserves_stack?
74
    [
75
      # The counter register is hardcoded
76
      Rex::Arch::X86::ECX,
77
      # These are modified by div and mul operations
78
      Rex::Arch::X86::EAX, Rex::Arch::X86::EDX
79
    ]
80
  end
81

82
  # Always blacklist these registers in our block generation
83
  def block_generator_register_blacklist
84
    [Rex::Arch::X86::ESP, Rex::Arch::X86::ECX] | saved_registers
85
  end
86

87
  protected
88

89
  #
90
  # Returns the set of FPU instructions that can be used for the FPU block of
91
  # the decoder stub.
92
  #
93
  def fpu_instructions
94
    fpus = []
95

96
    0xe8.upto(0xee) { |x| fpus << "\xd9" + x.chr }
97
    0xc0.upto(0xcf) { |x| fpus << "\xd9" + x.chr }
98
    0xc0.upto(0xdf) { |x| fpus << "\xda" + x.chr }
99
    0xc0.upto(0xdf) { |x| fpus << "\xdb" + x.chr }
100
    0xc0.upto(0xc7) { |x| fpus << "\xdd" + x.chr }
101

102
    fpus << "\xd9\xd0"
103
    fpus << "\xd9\xe1"
104
    fpus << "\xd9\xf6"
105
    fpus << "\xd9\xf7"
106
    fpus << "\xd9\xe5"
107

108
    # This FPU instruction seems to fail consistently on Linux
109
    # fpus << "\xdb\xe1"
110

111
    fpus
112
  end
113

114
  #
115
  # Returns a polymorphic decoder stub that is capable of decoding a buffer
116
  # of the supplied length and encodes the last cutoff bytes of itself.
117
  #
118
  def generate_shikata_block(state, length, cutoff)
119
    # Declare logical registers
120
    Rex::Poly::LogicalRegister::X86.new('count', 'ecx')
121
    addr_reg = Rex::Poly::LogicalRegister::X86.new('addr')
122
    key_reg = nil
123

124
    if state.context_encoding
125
      key_reg = Rex::Poly::LogicalRegister::X86.new('key', 'eax')
126
    else
127
      key_reg = Rex::Poly::LogicalRegister::X86.new('key')
128
    end
129

130
    # Declare individual blocks
131
    endb = Rex::Poly::SymbolicBlock::End.new
132

133
    # Clear the counter register
134
    clear_register = Rex::Poly::LogicalBlock.new(
135
      'clear_register',
136
      "\x31\xc9",  # xor ecx,ecx
137
      "\x29\xc9",  # sub ecx,ecx
138
      "\x33\xc9",  # xor ecx,ecx
139
      "\x2b\xc9"   # sub ecx,ecx
140
    )
141

142
    # Initialize the counter after zeroing it
143
    init_counter = Rex::Poly::LogicalBlock.new('init_counter')
144

145
    # Divide the length by four but ensure that it aligns on a block size
146
    # boundary (4 byte).
147
    length += 4 + (4 - (length & 3)) & 3
148
    length /= 4
149

150
    if (length <= 255)
151
      init_counter.add_perm("\xb1" + [ length ].pack('C'))
152
    elsif (length <= 65536)
153
      init_counter.add_perm("\x66\xb9" + [ length ].pack('v'))
154
    else
155
      init_counter.add_perm("\xb9" + [ length ].pack('V'))
156
    end
157

158
    # Key initialization block
159
    init_key = nil
160

161
    # If using context encoding, we use a mov reg, [addr]
162
    if state.context_encoding
163
      init_key = Rex::Poly::LogicalBlock.new(
164
        'init_key',
165
        proc { |b| (0xa1 + b.regnum_of(key_reg)).chr + 'XORK' }
166
      )
167
    # Otherwise, we do a direct mov reg, val
168
    else
169
      init_key = Rex::Poly::LogicalBlock.new('init_key',
170
                                             proc { |b| (0xb8 + b.regnum_of(key_reg)).chr + 'XORK' })
171
    end
172

173
    xor = proc { |b| "\x31" + (0x40 + b.regnum_of(addr_reg) + (8 * b.regnum_of(key_reg))).chr }
174
    add = proc { |b| "\x03" + (0x40 + b.regnum_of(addr_reg) + (8 * b.regnum_of(key_reg))).chr }
175

176
    sub4 = proc { |b| sub_immediate(b.regnum_of(addr_reg), -4) }
177
    add4 = proc { |b| add_immediate(b.regnum_of(addr_reg), 4) }
178

179
    if datastore['BufferRegister']
180

181
      buff_reg = Rex::Poly::LogicalRegister::X86.new('buff', datastore['BufferRegister'])
182
      offset = (datastore['BufferOffset'] ? datastore['BufferOffset'].to_i : 0)
183
      if ((offset < -255) || (offset > 255)) && state.badchars.include?("\x00")
184
        raise EncodingError, "Can't generate NULL-free decoder with a BufferOffset bigger than one byte"
185
      end
186

187
      mov = proc do |b|
188
        # mov <buff_reg>, <addr_reg>
189
        "\x89" + (0xc0 + b.regnum_of(addr_reg) + (8 * b.regnum_of(buff_reg))).chr
190
      end
191
      add_offset = proc { |b| add_immediate(b.regnum_of(addr_reg), offset) }
192
      sub_offset = proc { |b| sub_immediate(b.regnum_of(addr_reg), -offset) }
193

194
      getpc = Rex::Poly::LogicalBlock.new('getpc')
195
      getpc.add_perm(proc { |b| mov.call(b) + add_offset.call(b) })
196
      getpc.add_perm(proc { |b| mov.call(b) + sub_offset.call(b) })
197

198
      # With an offset of less than four, inc is smaller than or the same size as add
199
      if (offset > 0) && (offset < 4)
200
        getpc.add_perm(proc { |b| mov.call(b) + inc(b.regnum_of(addr_reg)) * offset })
201
      elsif (offset < 0) && (offset > -4)
202
        getpc.add_perm(proc { |b| mov.call(b) + dec(b.regnum_of(addr_reg)) * -offset })
203
      end
204

205
      # NOTE: Adding a perm with possibly different sizes is normally
206
      # wrong since it will change the SymbolicBlock::End offset during
207
      # various stages of generation.  In this case, though, offset is
208
      # constant throughout the whole process, so it isn't a problem.
209
      getpc.add_perm(proc do |b|
210
        if (offset < -255) || (offset > 255)
211
          # lea addr_reg, [buff_reg + DWORD offset]
212
          # NOTE: This will generate NULL bytes!
213
          "\x8d" + (0x80 + b.regnum_of(buff_reg) + (8 * b.regnum_of(addr_reg))).chr + [offset].pack('V')
214
        elsif (offset > -255) && (offset != 0) && (offset < 255)
215
          # lea addr_reg, [buff_reg + byte offset]
216
          "\x8d" + (0x40 + b.regnum_of(buff_reg) + (8 * b.regnum_of(addr_reg))).chr + [offset].pack('c')
217
        else
218
          # lea addr_reg, [buff_reg]
219
          "\x8d" + (b.regnum_of(buff_reg) + (8 * b.regnum_of(addr_reg))).chr
220
        end
221
      end)
222

223
      # BufferReg+BufferOffset points right at the beginning of our
224
      # buffer, so in contrast to the fnstenv technique, we don't have to
225
      # sub off any other offsets.
226
      xor1 = proc { |b| xor.call(b) + [ (b.offset_of(endb) - cutoff) ].pack('c') }
227
      xor2 = proc { |b| xor.call(b) + [ (b.offset_of(endb) - 4 - cutoff) ].pack('c') }
228
      add1 = proc { |b| add.call(b) + [ (b.offset_of(endb) - cutoff) ].pack('c') }
229
      add2 = proc { |b| add.call(b) + [ (b.offset_of(endb) - 4 - cutoff) ].pack('c') }
230

231
    else
232
      # FPU blocks
233
      fpu = Rex::Poly::LogicalBlock.new('fpu',
234
                                        *fpu_instructions)
235

236
      fnstenv = Rex::Poly::LogicalBlock.new('fnstenv',
237
                                            "\xd9\x74\x24\xf4")
238
      fnstenv.depends_on(fpu)
239

240
      # Get EIP off the stack
241
      getpc = Rex::Poly::LogicalBlock.new('getpc',
242
                                          proc { |b| (0x58 + b.regnum_of(addr_reg)).chr })
243
      getpc.depends_on(fnstenv)
244

245
      # Subtract the offset of the fpu instruction since that's where eip points after fnstenv
246
      xor1 = proc { |b| xor.call(b) + [ (b.offset_of(endb) - b.offset_of(fpu) - cutoff) ].pack('c') }
247
      xor2 = proc { |b| xor.call(b) + [ (b.offset_of(endb) - b.offset_of(fpu) - 4 - cutoff) ].pack('c') }
248
      add1 = proc { |b| add.call(b) + [ (b.offset_of(endb) - b.offset_of(fpu) - cutoff) ].pack('c') }
249
      add2 = proc { |b| add.call(b) + [ (b.offset_of(endb) - b.offset_of(fpu) - 4 - cutoff) ].pack('c') }
250
    end
251

252
    # Decoder loop block
253
    loop_block = Rex::Poly::LogicalBlock.new('loop_block')
254

255
    loop_block.add_perm(
256
      proc { |b| xor1.call(b) + add1.call(b) + sub4.call(b) },
257
      proc { |b| xor1.call(b) + sub4.call(b) + add2.call(b) },
258
      proc { |b| sub4.call(b) + xor2.call(b) + add2.call(b) },
259
      proc { |b| xor1.call(b) + add1.call(b) + add4.call(b) },
260
      proc { |b| xor1.call(b) + add4.call(b) + add2.call(b) },
261
      proc { |b| add4.call(b) + xor2.call(b) + add2.call(b) }
262
    )
263

264
    # Loop instruction block
265
    loop_inst = Rex::Poly::LogicalBlock.new('loop_inst',
266
                                            "\xe2\xf5")
267
    # In the current implementation the loop block is a constant size,
268
    # so really no need for a fancy calculation.  Nevertheless, here's
269
    # one way to do it:
270
    # Proc.new { |b|
271
    #	# loop <loop_block label>
272
    #	# -2 to account for the size of this instruction
273
    #	"\xe2" + [ -2 - b.size_of(loop_block) ].pack('c')
274
    # })
275

276
    # Define block dependencies
277
    clear_register.depends_on(getpc)
278
    init_counter.depends_on(clear_register)
279
    loop_block.depends_on(init_counter, init_key)
280
    loop_inst.depends_on(loop_block)
281

282
    begin
283
      # Generate a permutation saving the ECX, ESP, and user defined registers
284
      loop_inst.generate(block_generator_register_blacklist, nil, state.badchars)
285
    rescue RuntimeError, EncodingError => e
286
      # The Rex::Poly block generator can raise RuntimeError variants
287
      raise EncodingError, e.to_s
288
    end
289
  end
290

291
  # Convert the SaveRegisters to an array of x86 register constants
292
  def saved_registers
293
    Rex::Arch::X86.register_names_to_ids(datastore['SaveRegisters'])
294
  end
295

296
  def sub_immediate(regnum, imm)
297
    return '' if imm.nil? || (imm == 0)
298

299
    if (imm > 255) || (imm < -255)
300
      "\x81" + (0xe8 + regnum).chr + [imm].pack('V')
301
    else
302
      "\x83" + (0xe8 + regnum).chr + [imm].pack('c')
303
    end
304
  end
305

306
  def add_immediate(regnum, imm)
307
    return '' if imm.nil? || (imm == 0)
308

309
    if (imm > 255) || (imm < -255)
310
      "\x81" + (0xc0 + regnum).chr + [imm].pack('V')
311
    else
312
      "\x83" + (0xc0 + regnum).chr + [imm].pack('c')
313
    end
314
  end
315

316
  def inc(regnum)
317
    [0x40 + regnum].pack('C')
318
  end
319

320
  def dec(regnum)
321
    [0x48 + regnum].pack('C')
322
  end
323
end
324

325
Product

Resources

Company