Spaces:
Sleeping
Sleeping
import editdistance | |
from hexdump2 import hexdump | |
import gradio as gr | |
import shlex | |
import subprocess | |
import tempfile | |
description = """This is a space testing a method for evaluating the quality of decompilation. | |
Currently unhandled features: | |
* PIC stuff | |
* Global references | |
* Function calls | |
* Wildcards in target function? | |
* How to extract compilable decompilation from decompilers? | |
""" | |
def trim(str, n): | |
return "\n".join(str.splitlines()[n:]) | |
def trim_objdump(str): | |
return trim(str, 7) | |
def disassemble_bytes(byte_data, architecture): | |
with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as temp_bin_file: | |
temp_bin_file.write(byte_data) | |
temp_bin_file_name = temp_bin_file.name | |
disassembly = subprocess.run( | |
["objdump", "-D", "-b", "binary", "-m", architecture, temp_bin_file_name], | |
capture_output=True, | |
text=True | |
).stdout | |
disassembly = trim_objdump(disassembly) | |
return disassembly | |
def compile(compiler, flags, source): | |
# Create a temporary file for the C source code | |
with tempfile.NamedTemporaryFile(suffix=".c", delete=False) as temp_c_file: | |
temp_c_file.write(source.encode()) | |
temp_c_file_name = temp_c_file.name | |
# Create a temporary file for the object file | |
with tempfile.NamedTemporaryFile(suffix=".o", delete=False) as temp_o_file: | |
temp_o_file_name = temp_o_file.name | |
# Compile the C file to an object file | |
result = subprocess.run( | |
[compiler, "-c", temp_c_file_name] | |
+ shlex.split(flags) | |
+ ["-o", temp_o_file_name], | |
capture_output=True, | |
text=True, | |
) | |
compile_output = result.stdout + result.stderr | |
# Create a temporary file for the raw bytes | |
with tempfile.NamedTemporaryFile(suffix=".raw", delete=True) as raw_bytes_file: | |
subprocess.run( | |
[ | |
"objcopy", | |
"--only-section", | |
".text", | |
# XXX in reality we should probably look at the sections | |
"--only-section", | |
".text.*", | |
"-O", | |
"binary", | |
temp_o_file_name, | |
raw_bytes_file.name, | |
] | |
) | |
compiled_bytes = raw_bytes_file.read() | |
# Disassemble the object file | |
disassembly = subprocess.run( | |
["objdump", "-d", temp_o_file_name], | |
capture_output=True, | |
text=True | |
).stdout | |
disassembly = trim_objdump(disassembly) | |
if result.returncode == 0: | |
return compiled_bytes, compile_output, disassembly | |
else: | |
return None, compile_output, disassembly | |
def predict(target_bytes, source, compiler, flags, architecture): | |
target_bytes = bytes.fromhex(target_bytes) | |
compiled_bytes, compile_output, compiled_disassembly = compile(compiler, flags, source) | |
target_disassembly = disassemble_bytes(target_bytes, architecture) | |
if compiled_bytes is not None: | |
return ( | |
hexdump(compiled_bytes, result="return"), | |
hexdump(target_bytes, result="return"), | |
editdistance.eval(compiled_bytes, target_bytes), | |
compile_output, | |
compiled_disassembly, | |
target_disassembly | |
) | |
else: | |
return ( | |
"Compilation failed", | |
hexdump(target_bytes, result="return"), | |
-1, | |
compile_output, | |
compiled_disassembly, | |
target_disassembly | |
) | |
def run(): | |
demo = gr.Interface( | |
fn=predict, | |
description=description, | |
inputs=[ | |
gr.Textbox( | |
lines=10, | |
label="Bytes of Target Function (in hex)", | |
value="b8 2a 00 00 00 c3", | |
), | |
gr.Textbox( | |
lines=10, | |
label="Decompiled C Source Code", | |
value="int foo() { return 0; }", | |
), | |
gr.Textbox(label="Compiler", value="g++"), | |
gr.Textbox(label="Compiler Flags", value="-O2"), | |
gr.Textbox(label="Architecture (for disassembler)", value="i386"), | |
], | |
outputs=[ | |
gr.Textbox(label="Compiled bytes"), | |
gr.Textbox(label="Target bytes"), | |
gr.Number(label="Edit distance (lower is better)"), | |
gr.Textbox(label="Compiler Output"), | |
gr.Textbox(label="Compiled Disassembly"), | |
gr.Textbox(label="Target Disassembly"), | |
], | |
) | |
demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True) | |
run() | |