Skip to content

Memory Read/Write and Comparison

This case extends the basic memory read and write functions in the plugin, and encapsulates API interfaces to implement functions such as ShellCode read and write, memory swapping, memory region comparison, disk and memory image comparison, and memory feature code retrieval. The flexible use of these functional functions can achieve twice the result with half the effort for subsequent reverse analysis.

Implement ShellCode writing to memory

When conducting vulnerability testing, it is usually necessary to use ShellCode to verify the security of the system. ShellCode is a special type of binary code that does not have an executable file header and can only be executed in memory. Attackers can inject ShellCode into the memory of the target system by exploiting vulnerabilities, thereby performing malicious operations.

For example, suppose we use the Metasploit tool to generate the following ShellCode:

c
"\xfc\xe8\x8f\x00\x00\x00\x60\x31\xd2\x89\xe5\x64\x8b\x52\x30"
"\x8b\x52\x0c\x8b\x52\x14\x0f\xb7\x4a\x26\x31\xff\x8b\x72\x28"
"\x31\xc0\xac\x3c\x61\x7c\x02\x2c\x20\xc1\xcf\x0d\x01\xc7\x49"
"\x75\xef\x52\x57\x8b\x52\x10\x8b\x42\x3c\x01\xd0\x8b\x40\x78"
"\x85\xc0\x74\x4c\x01\xd0\x8b\x58\x20\x01\xd3\x50\x8b\x48\x18"
"\x85\xc9\x74\x3c\x31\xff\x49\x8b\x34\x8b\x01\xd6\x31\xc0\xac"

In order for the code to be injected into the peer memory and run, we need to do three steps: first, allocate a heap space on the peer by calling the create_alloc function, then call the set_memory_protect function to set the memory properties to read write execution, and finally write the ShellCode into memory by calling the set_memory_byte function, and set the EIP pointer to point to the entrance. The code is summarized as follows;

python
from x32dbg import Debugger

# Read shellcode into memory
def read_shellcode(path):
    shellcode_list = []
    with open(path, "r", encoding="utf-8") as fp:
        for index in fp.readlines():
            shellcode_line = index.replace('"', "").replace(" ", "").replace("\n", "").replace(";", "")
            for code in shellcode_line.split("\\x"):
                if code != "" and code != "\\n":
                    shellcode_list.append("0x" + code)
    return shellcode_list

if __name__ == "__main__":
    dbg = Debugger(address="127.0.0.1",port=6589)
    if False == dbg.connect():
        exit()

    # Save the current EIP address
    local_eip = dbg.get_eip()
    print("Current EIP address = {}".format(hex(local_eip)))

    # Opening up heap space
    address = dbg.create_alloc(0,1024)
    print("Opening up heap space = {}".format(hex(address)))
    if address == False:
        exit()

    # Set memory read/write execution properties
    dbg.set_memory_protect(address, 32, 1024)

    # Read shellcode from text
    shellcode = read_shellcode("d://shellcode.txt")

    # Loop write to memory
    for code_byte in range(0, len(shellcode)):
        bytef = int(shellcode[code_byte], 16)
        dbg.set_memory_byte(code_byte + address, bytef)

    # Set EIP location
    dbg.set_eip(address)
    input("Click Enter to restore")

    # Restore EIP address
    dbg.delete_alloc(address)
    dbg.set_eip(local_eip)
    print("Restore EIP address = {}".format(hex(local_eip)))

    dbg.close()

Implement ShellCode writing to a file

To save a specific memory area in ShellCode text format to a file, the get_memory_byte function can be called to retrieve the machine code in memory byte by byte and output it in the specified format.

The following is an example code that demonstrates how to implement this process:

python
from x32dbg import Debugger

# Save specific memory regions in ShellCode format to text
def write_shellcode(dbg, address, size, path):
    with open(path, "w", encoding="utf-8") as fp:
        for index in range(size):
            # Read machine code
            read_code = dbg.get_memory_byte(address + index)
            if read_code is None:
                read_code = 0

            # Output hexadecimal format
            hex_code = format(read_code, '02x')

            # Write file and print out
            if (index + 1) % 16 == 0:
                print("\\x" + hex_code)
                fp.write("\\x" + hex_code + "\n")
            else:
                print("\\x" + hex_code, end="")
                fp.write("\\x" + hex_code)

if __name__ == "__main__":
    dbg = Debugger(address="127.0.0.1", port=6589)
    if not dbg.connect():
        exit()

    # Start exporting at the current EIP location
    eip = dbg.get_eip()
    print("Current EIP address = {}".format(hex(eip)))

    write_shellcode(dbg, eip, 128, "d://shellcode.txt")

    dbg.close()

Implement memory area swapping

To achieve the exchange of memory regions, data movement can be achieved by calling memory read and write functions multiple times.

Here is a simple example code that shows how to encapsulate a memory_exchange function to exchange data between two memory regions:

python
from x32dbg import Debugger

# Swap two memory regions
def memory_xchage(dbg,memory_ptr_x,memory_ptr_y,bytes):
    ref = False
    for index in range(0,bytes):
        # Read two memory regions
        read_byte_x = dbg.get_memory_byte(memory_ptr_x + index)
        read_byte_y = dbg.get_memory_byte(memory_ptr_y + index)

        # Swap memory
        ref = dbg.set_memory_byte(memory_ptr_x + index,read_byte_y)
        ref = dbg.set_memory_byte(memory_ptr_y + index, read_byte_x)
    return ref

if __name__ == "__main__":
    dbg = Debugger(address="127.0.0.1", port=6589)
    if not dbg.connect():
        exit()

    # Memory swap
    flag = memory_xchage(dbg, 5963792,5963888,4)
    print("Memory swap status = {}".format(flag))

    dbg.close()

Comparison between memory and disk

Sometimes we need to compare the differences between specific memory regions. Calling the get_memory_hex_ascii function can be used to read the machine code in memory, while get_file_hex_ascii can be used to read the machine code in disk. By comparing the two, we can detect the differences between the memory region and the previous file.

python
from x32dbg import Debugger

# Obtain the machine code in the memory image of the program
def get_memory_hex_ascii(address,offset,len):
    count = 0
    ref_memory_list = []
    for index in range(offset,len):
        char = dbg.get_memory_byte(address + index)
        count = count + 1

        if count % 16 == 0:
            if (char) < 16:
                print("0" + hex((char))[2:])
                ref_memory_list.append("0" + hex((char))[2:])
            else:
                print(hex((char))[2:])
                ref_memory_list.append(hex((char))[2:])
        else:
            if (char) < 16:
                print("0" + hex((char))[2:] + " ",end="")
                ref_memory_list.append("0" + hex((char))[2:])
            else:
                print(hex((char))[2:] + " ",end="")
                ref_memory_list.append(hex((char))[2:])
    return ref_memory_list

# Read the machine code from the disk image in the program
def get_file_hex_ascii(path,offset,len):
    count = 0
    ref_file_list = []

    with open(path, "rb") as fp:
        # file_size = os.path.getsize(path)
        fp.seek(offset)

        for item in range(offset,offset + len):
            char = fp.read(1)
            count = count + 1
            if count % 16 == 0:
                if ord(char) < 16:
                    print("0" + hex(ord(char))[2:])
                    ref_file_list.append("0" + hex(ord(char))[2:])
                else:
                    print(hex(ord(char))[2:])
                    ref_file_list.append(hex(ord(char))[2:])
            else:
                if ord(char) < 16:
                    print("0" + hex(ord(char))[2:] + " ", end="")
                    ref_file_list.append("0" + hex(ord(char))[2:])
                else:
                    print(hex(ord(char))[2:] + " ", end="")
                    ref_file_list.append(hex(ord(char))[2:])
    return ref_file_list

if __name__ == "__main__":
    dbg = Debugger(address="127.0.0.1", port=6589)
    if not dbg.connect():
        exit()

    module_base = dbg.get_base_from_address(dbg.get_memory_localbase())
    print("Module base address = {}".format(hex(module_base)))

    # Obtain memory machine code
    memory_hex_byte = get_memory_hex_ascii(module_base,0,96)

    # Obtain disk machine code
    file_hex_byte = get_file_hex_ascii("d://lyshark.exe",0,96)

    # Loop comparison machine code
    for index in range(0,len(memory_hex_byte)):
        # Compare disk and memory for differences
        if memory_hex_byte[index] != file_hex_byte[index]:
            # Output if there are differences
            print("relative position: [{}] --> Disk Bytes: 0x{} --> Memory Bytes: 0x{}".
                  format(index,memory_hex_byte[index],file_hex_byte[index]))
    dbg.close()

Search for memory signature codes

The search for machine code in memory can be achieved using the find_memory series of functions, or you can implement it yourself by using the get_memory_byte function to read the machine code in a specific area, and by encapsulating a SearchHexCode to achieve the search function for the machine code. When a specific machine code is found, it returns true; otherwise, it returns false.

python
from x32dbg import Debugger

# Obtain machine codes in specific areas
def get_memory_hex(address,offset,len):
    count = 0
    ref_memory_list = []
    for index in range(offset,len):
        ref_memory_list.append(int(dbg.get_memory_byte(address + index)))
        count = count + 1
    return ref_memory_list

# Convert decimal integers to hexadecimal strings
def IntToHexCode(code):
    hex_code = []

    for index in code:
        if index >= 0 and index <= 15:
            #print("0" + str(hex(index).replace("0x","")))
            hex_code.append("0" + str(hex(index).replace("0x","")))
        else:
            hex_code.append(hex(index).replace("0x",""))
            #print(hex(index).replace("0x",""))

    return hex_code

# Match in byte array to ensure consistency with signature
def SearchHexCode(Code,SearchCode,ReadByte):
    SearchCount = len(SearchCode)
    #print("Total length of feature codes = {}".format(SearchCount))
    for item in range(0,ReadByte):
        count = 0
        # Slice hexadecimal numbers and traverse SearchCount backwards each time
        OpCode = Code[ 0+item :SearchCount+item ]
        #print("Cutting arrays: {} --> contrast: {}".format(OpCode,SearchCode))
        try:
            for x in range(0,SearchCount):
                if OpCode[x] == SearchCode[x]:
                    count = count + 1
                    #print("Find feature code count: {} {} {}".format(count,OpCode[x],SearchCode[x]))
                    if count == SearchCount:
                        # If found, return True; otherwise, return False
                        return True
                        exit(0)
        except Exception:
            pass
    return False

if __name__ == "__main__":
    dbg = Debugger(address="127.0.0.1", port=6589)
    if not dbg.connect():
        exit()

    # Starting from EIP reading
    eip = dbg.get_eip()
    print("Eip = {}".format(hex(eip)))

    # Read memory and convert to hexadecimal string
    ref = get_memory_hex(eip,0,256)

    # Machine code array
    hex_code = IntToHexCode(ref)

    # Machine code length
    hex_size = len(hex_code)

    # Search for machine code
    search = ['68', 'b0', 'a3', '07', '01', 'e8', 'e5', '08']

    # Start searching for features
    # hex_code = Bytecode | search =  Signature code| hex_size = Search size
    ret = SearchHexCode(hex_code, search, hex_size)

    if ret == True:
        print("Feature {} exists".format(search))
    else:
        print("Feature {} non-existent".format(search))

    dbg.close()

When you need to search for disassembly code, you can read down a specified number of instructions through the get_disassembly_count function and call the SearchDisassemblyCode function for comparison.

python
from x32dbg import Debugger

# Retrieve whether a specific assembly code fragment exists
def SearchDisassemblyCode(OpCodeList,SearchCode,ReadByte):
    SearchCount = len(SearchCode)
    for item in range(0,ReadByte):
        count = 0
        OpCode_Dic = OpCodeList[ 0 + item : SearchCount + item ]
        # print("Cutting dictionary: {}".format(OpCode_Dic))
        try:
            for x in range(0,SearchCount):
                if OpCode_Dic[x].get("Assembly") == SearchCode[x]:
                    #print(OpCode_Dic[x].get("Address"),OpCode_Dic[x].get("Assembly"))
                    count = count + 1
                    if count == SearchCount:
                        #print(OpCode_Dic[0].get("Address"))
                        return OpCode_Dic[0].get("Address")
                        exit(0)
        except Exception:
            pass

if __name__ == "__main__":
    dbg = Debugger(address="127.0.0.1", port=6589)
    if not dbg.connect():
        exit()

    # Starting from EIP reading
    eip = dbg.get_eip()
    print("Eip = {}".format(hex(eip)))

    # Disassemble 1024 lines
    disasm_dict = dbg.get_disassembly_count(eip,1024)

    # Search for an instruction sequence
    SearchCode = [
        ["ret", "push ebp", "mov ebp,esp"],
        ["push ecx", "push ebx"],
        ["push esi","push eax"]
    ]

    # Retrieve Memory Instruction Set
    for item in range(0,len(SearchCode)):
        Search = SearchCode[item]
        # disasm_dict = Return assembly instructions
        # Search =  Finding instruction sets
        # ReadByte = Retrieve length downwards
        ret = SearchDisassemblyCode(disasm_dict,Search,1024)
        if ret != None:
            print("Instruction set: {} | First appearance address: {}".format(SearchCode[item],hex(ret)))

    dbg.close()