Python 字节码反汇编器#

参考:

先看给出函数示例:

def myfunc(alist):
    return len(alist)

下面的命令可以用来显示 myfunc() 的反汇编:

import dis

dis.dis(myfunc)
1           0 RESUME                   0

  2           2 LOAD_GLOBAL              1 (NULL + len)
             12 LOAD_FAST                0 (alist)
             14 CALL                     1
             22 RETURN_VALUE

左上角的 2 是行号。

字节码分析#

字节码分析 API 允许将 Python 代码片段包装在 Bytecode 对象中,以便轻松访问已编译代码的详细信息。

bytecode = dis.Bytecode(myfunc)
for instr in bytecode:
    print(instr.opname)
RESUME
LOAD_GLOBAL
LOAD_FAST
CALL
RETURN_VALUE

字节码#

使用三方库 bytecode

安装:

pip install bytecode

抽象字节码#

下面使用抽象字节码执行 print('Hello World!')

from bytecode import Instr, Bytecode

bytecode = Bytecode([Instr("LOAD_NAME", 'print'),
                     Instr("LOAD_CONST", 'Hello World!'),
                     Instr("CALL_FUNCTION", 1),
                     Instr("POP_TOP"),
                     Instr("LOAD_CONST", None),
                     Instr("RETURN_VALUE")])
code = bytecode.to_code()
exec(code)
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/bytecode/instr.py:769, in BaseInstr._set(self, name, arg)
    768 try:
--> 769     opcode = _opcode.opmap[name]
    770 except KeyError:

KeyError: 'CALL_FUNCTION'

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
Cell In[4], line 5
      1 from bytecode import Instr, Bytecode
      3 bytecode = Bytecode([Instr("LOAD_NAME", 'print'),
      4                      Instr("LOAD_CONST", 'Hello World!'),
----> 5                      Instr("CALL_FUNCTION", 1),
      6                      Instr("POP_TOP"),
      7                      Instr("LOAD_CONST", None),
      8                      Instr("RETURN_VALUE")])
      9 code = bytecode.to_code()
     10 exec(code)

File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/bytecode/instr.py:570, in BaseInstr.__init__(self, name, arg, lineno, location)
    562 def __init__(
    563     self,
    564     name: str,
   (...)
    568     location: Optional[InstrLocation] = None,
    569 ) -> None:
--> 570     self._set(name, arg)
    571     if location:
    572         self._location = location

File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/bytecode/instr.py:771, in BaseInstr._set(self, name, arg)
    769     opcode = _opcode.opmap[name]
    770 except KeyError:
--> 771     raise ValueError(f"invalid operation name: {name}")  # noqa
    773 if opcode >= MIN_INSTRUMENTED_OPCODE:
    774     raise ValueError(
    775         f"operation {name} is an instrumented or pseudo opcode. "
    776         "Only base opcodes are supported"
    777     )

ValueError: invalid operation name: CALL_FUNCTION

具体字节码#

使用具体字节码执行 print('Hello World!') 的示例:

from bytecode import ConcreteInstr, ConcreteBytecode

bytecode = ConcreteBytecode()
bytecode.names = ['print']
bytecode.consts = ['Hello World!', None]
bytecode.extend([ConcreteInstr("LOAD_NAME", 0),
                 ConcreteInstr("LOAD_CONST", 0),
                 ConcreteInstr("CALL_FUNCTION", 1),
                 ConcreteInstr("POP_TOP"),
                 ConcreteInstr("LOAD_CONST", 1),
                 ConcreteInstr("RETURN_VALUE")])
code = bytecode.to_code()
exec(code)
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/bytecode/instr.py:769, in BaseInstr._set(self, name, arg)
    768 try:
--> 769     opcode = _opcode.opmap[name]
    770 except KeyError:

KeyError: 'CALL_FUNCTION'

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
Cell In[5], line 8
      4 bytecode.names = ['print']
      5 bytecode.consts = ['Hello World!', None]
      6 bytecode.extend([ConcreteInstr("LOAD_NAME", 0),
      7                  ConcreteInstr("LOAD_CONST", 0),
----> 8                  ConcreteInstr("CALL_FUNCTION", 1),
      9                  ConcreteInstr("POP_TOP"),
     10                  ConcreteInstr("LOAD_CONST", 1),
     11                  ConcreteInstr("RETURN_VALUE")])
     12 code = bytecode.to_code()
     13 exec(code)

File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/bytecode/concrete.py:101, in ConcreteInstr.__init__(self, name, arg, lineno, location, extended_args)
     88 def __init__(
     89     self,
     90     name: str,
   (...)
     98     # Python to properly compute the size and avoid messing up the jump
     99     # targets
    100     self._extended_args = extended_args
--> 101     super().__init__(name, arg, lineno=lineno, location=location)

File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/bytecode/instr.py:570, in BaseInstr.__init__(self, name, arg, lineno, location)
    562 def __init__(
    563     self,
    564     name: str,
   (...)
    568     location: Optional[InstrLocation] = None,
    569 ) -> None:
--> 570     self._set(name, arg)
    571     if location:
    572         self._location = location

File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/bytecode/concrete.py:122, in ConcreteInstr._set(self, name, arg)
    117 def _set(
    118     self,
    119     name: str,
    120     arg: int,
    121 ) -> None:
--> 122     super()._set(name, arg)
    123     size = 2
    124     if arg is not UNSET:

File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/bytecode/instr.py:771, in BaseInstr._set(self, name, arg)
    769     opcode = _opcode.opmap[name]
    770 except KeyError:
--> 771     raise ValueError(f"invalid operation name: {name}")  # noqa
    773 if opcode >= MIN_INSTRUMENTED_OPCODE:
    774     raise ValueError(
    775         f"operation {name} is an instrumented or pseudo opcode. "
    776         "Only base opcodes are supported"
    777     )

ValueError: invalid operation name: CALL_FUNCTION

设置编译器标志#

BytecodeConcreteBytecodeControlFlowGraph 实例都有 flags 属性,它是 CompilerFlag 枚举的实例。可以像操作任何二进制标志一样操作该值。

设置 OPTIMIZED flags:

from bytecode import Bytecode, CompilerFlags

bytecode = Bytecode()
bytecode.flags |= CompilerFlags.OPTIMIZED

清除 OPTIMIZED flags:

from bytecode import Bytecode, CompilerFlags

bytecode = Bytecode()
bytecode.flags ^= CompilerFlags.OPTIMIZED

可以使用 update_flags 方法根据存储在代码对象中的指令更新 flags。

简单的循环#

for x in (1, 2, 3): print(x)

from bytecode import Label, Instr, Bytecode

loop_start = Label()
loop_done = Label()
loop_exit = Label()
code = Bytecode(
    [
        # Python 3.8 removed SETUP_LOOP
        Instr("LOAD_CONST", (1, 2, 3)),
        Instr("GET_ITER"),
        loop_start,
            Instr("FOR_ITER", loop_exit),
            Instr("STORE_NAME", "x"),
            Instr("LOAD_NAME", "print"),
            Instr("LOAD_NAME", "x"),
            Instr("CALL_FUNCTION", 1),
            Instr("POP_TOP"),
            Instr("JUMP_ABSOLUTE", loop_start),
        # Python 3.8 removed the need to manually manage blocks in loops
        # This is now handled internally by the interpreter
        loop_exit,
            Instr("LOAD_CONST", None),
            Instr("RETURN_VALUE"),
    ]
)

# The conversion to Python code object resolve jump targets:
# abstract labels are replaced with concrete offsets
code = code.to_code()
exec(code)
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/bytecode/instr.py:769, in BaseInstr._set(self, name, arg)
    768 try:
--> 769     opcode = _opcode.opmap[name]
    770 except KeyError:

KeyError: 'CALL_FUNCTION'

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
Cell In[8], line 16
      4 loop_done = Label()
      5 loop_exit = Label()
      6 code = Bytecode(
      7     [
      8         # Python 3.8 removed SETUP_LOOP
      9         Instr("LOAD_CONST", (1, 2, 3)),
     10         Instr("GET_ITER"),
     11         loop_start,
     12             Instr("FOR_ITER", loop_exit),
     13             Instr("STORE_NAME", "x"),
     14             Instr("LOAD_NAME", "print"),
     15             Instr("LOAD_NAME", "x"),
---> 16             Instr("CALL_FUNCTION", 1),
     17             Instr("POP_TOP"),
     18             Instr("JUMP_ABSOLUTE", loop_start),
     19         # Python 3.8 removed the need to manually manage blocks in loops
     20         # This is now handled internally by the interpreter
     21         loop_exit,
     22             Instr("LOAD_CONST", None),
     23             Instr("RETURN_VALUE"),
     24     ]
     25 )
     27 # The conversion to Python code object resolve jump targets:
     28 # abstract labels are replaced with concrete offsets
     29 code = code.to_code()

File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/bytecode/instr.py:570, in BaseInstr.__init__(self, name, arg, lineno, location)
    562 def __init__(
    563     self,
    564     name: str,
   (...)
    568     location: Optional[InstrLocation] = None,
    569 ) -> None:
--> 570     self._set(name, arg)
    571     if location:
    572         self._location = location

File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/bytecode/instr.py:771, in BaseInstr._set(self, name, arg)
    769     opcode = _opcode.opmap[name]
    770 except KeyError:
--> 771     raise ValueError(f"invalid operation name: {name}")  # noqa
    773 if opcode >= MIN_INSTRUMENTED_OPCODE:
    774     raise ValueError(
    775         f"operation {name} is an instrumented or pseudo opcode. "
    776         "Only base opcodes are supported"
    777     )

ValueError: invalid operation name: CALL_FUNCTION

条件调整#

print('yes' if test else 'no')

from bytecode import Label, Instr, Bytecode

label_else = Label()
label_print = Label()
bytecode = Bytecode([Instr('LOAD_NAME', 'print'),
                     Instr('LOAD_NAME', 'test'),
                     Instr('POP_JUMP_IF_FALSE', label_else),
                         Instr('LOAD_CONST', 'yes'),
                         Instr('JUMP_FORWARD', label_print),
                     label_else,
                         Instr('LOAD_CONST', 'no'),
                     label_print,
                         Instr('CALL_FUNCTION', 1),
                     Instr('LOAD_CONST', None),
                     Instr('RETURN_VALUE')])
code = bytecode.to_code()

test = 0
exec(code)

test = 1
exec(code)
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/bytecode/instr.py:769, in BaseInstr._set(self, name, arg)
    768 try:
--> 769     opcode = _opcode.opmap[name]
    770 except KeyError:

KeyError: 'CALL_FUNCTION'

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
Cell In[9], line 13
      3 label_else = Label()
      4 label_print = Label()
      5 bytecode = Bytecode([Instr('LOAD_NAME', 'print'),
      6                      Instr('LOAD_NAME', 'test'),
      7                      Instr('POP_JUMP_IF_FALSE', label_else),
      8                          Instr('LOAD_CONST', 'yes'),
      9                          Instr('JUMP_FORWARD', label_print),
     10                      label_else,
     11                          Instr('LOAD_CONST', 'no'),
     12                      label_print,
---> 13                          Instr('CALL_FUNCTION', 1),
     14                      Instr('LOAD_CONST', None),
     15                      Instr('RETURN_VALUE')])
     16 code = bytecode.to_code()
     18 test = 0

File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/bytecode/instr.py:570, in BaseInstr.__init__(self, name, arg, lineno, location)
    562 def __init__(
    563     self,
    564     name: str,
   (...)
    568     location: Optional[InstrLocation] = None,
    569 ) -> None:
--> 570     self._set(name, arg)
    571     if location:
    572         self._location = location

File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/bytecode/instr.py:771, in BaseInstr._set(self, name, arg)
    769     opcode = _opcode.opmap[name]
    770 except KeyError:
--> 771     raise ValueError(f"invalid operation name: {name}")  # noqa
    773 if opcode >= MIN_INSTRUMENTED_OPCODE:
    774     raise ValueError(
    775         f"operation {name} is an instrumented or pseudo opcode. "
    776         "Only base opcodes are supported"
    777     )

ValueError: invalid operation name: CALL_FUNCTION

Control Flow Graph (CFG)#

为了分析或优化现有的代码,bytecode 提供了 ControlFlowGraph 类,它是控制流图(CFG)。

控制流图用于在转换为代码时进行堆栈深度分析。因为它比 CPython 更擅长识别死代码,所以它可以减少堆栈大小。

转储条件跳转示例的控制流图:

from bytecode import Label, Instr, Bytecode, ControlFlowGraph, dump_bytecode

label_else = Label()
label_print = Label()
bytecode = Bytecode([Instr('LOAD_NAME', 'print'),
                     Instr('LOAD_NAME', 'test'),
                     Instr('POP_JUMP_IF_FALSE', label_else),
                         Instr('LOAD_CONST', 'yes'),
                         Instr('JUMP_FORWARD', label_print),
                     label_else,
                         Instr('LOAD_CONST', 'no'),
                     label_print,
                         Instr('CALL_FUNCTION', 1),
                     Instr('LOAD_CONST', None),
                     Instr('RETURN_VALUE')])

blocks = ControlFlowGraph.from_bytecode(bytecode)
dump_bytecode(blocks)
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/bytecode/instr.py:769, in BaseInstr._set(self, name, arg)
    768 try:
--> 769     opcode = _opcode.opmap[name]
    770 except KeyError:

KeyError: 'CALL_FUNCTION'

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
Cell In[10], line 13
      3 label_else = Label()
      4 label_print = Label()
      5 bytecode = Bytecode([Instr('LOAD_NAME', 'print'),
      6                      Instr('LOAD_NAME', 'test'),
      7                      Instr('POP_JUMP_IF_FALSE', label_else),
      8                          Instr('LOAD_CONST', 'yes'),
      9                          Instr('JUMP_FORWARD', label_print),
     10                      label_else,
     11                          Instr('LOAD_CONST', 'no'),
     12                      label_print,
---> 13                          Instr('CALL_FUNCTION', 1),
     14                      Instr('LOAD_CONST', None),
     15                      Instr('RETURN_VALUE')])
     17 blocks = ControlFlowGraph.from_bytecode(bytecode)
     18 dump_bytecode(blocks)

File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/bytecode/instr.py:570, in BaseInstr.__init__(self, name, arg, lineno, location)
    562 def __init__(
    563     self,
    564     name: str,
   (...)
    568     location: Optional[InstrLocation] = None,
    569 ) -> None:
--> 570     self._set(name, arg)
    571     if location:
    572         self._location = location

File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/bytecode/instr.py:771, in BaseInstr._set(self, name, arg)
    769     opcode = _opcode.opmap[name]
    770 except KeyError:
--> 771     raise ValueError(f"invalid operation name: {name}")  # noqa
    773 if opcode >= MIN_INSTRUMENTED_OPCODE:
    774     raise ValueError(
    775         f"operation {name} is an instrumented or pseudo opcode. "
    776         "Only base opcodes are supported"
    777     )

ValueError: invalid operation name: CALL_FUNCTION

备注

  • block #1 是开始块,以 POP_JUMP_IF_FALSE 条件跳转结束,跟着的是 block #2

  • block #2JUMP_FORWARD 无条件跳跃结束

  • block #3 不包含 jump,后面跟着 block #4

  • block #4 是最终的块