Python 字节码反汇编器#
参考:
先看给出函数示例:
def myfunc(alist):
return len(alist)
下面的命令可以用来显示 myfunc()
的反汇编:
import dis
dis.dis(myfunc)
1 0 RESUME 0
2 2 LOAD_GLOBAL 1 (NULL + len)
12 LOAD_FAST 0 (alist)
14 CALL 1
22 RETURN_VALUE
左上角的 2
是行号。
字节码分析#
字节码分析 API 允许将 Python 代码片段包装在 Bytecode
对象中,以便轻松访问已编译代码的详细信息。
bytecode = dis.Bytecode(myfunc)
for instr in bytecode:
print(instr.opname)
RESUME
LOAD_GLOBAL
LOAD_FAST
CALL
RETURN_VALUE
字节码#
使用三方库 bytecode
。
安装:
pip install bytecode
抽象字节码#
下面使用抽象字节码执行 print('Hello World!')
:
from bytecode import Instr, Bytecode
bytecode = Bytecode([Instr("LOAD_NAME", 'print'),
Instr("LOAD_CONST", 'Hello World!'),
Instr("CALL_FUNCTION", 1),
Instr("POP_TOP"),
Instr("LOAD_CONST", None),
Instr("RETURN_VALUE")])
code = bytecode.to_code()
exec(code)
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/bytecode/instr.py:769, in BaseInstr._set(self, name, arg)
768 try:
--> 769 opcode = _opcode.opmap[name]
770 except KeyError:
KeyError: 'CALL_FUNCTION'
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
Cell In[4], line 5
1 from bytecode import Instr, Bytecode
3 bytecode = Bytecode([Instr("LOAD_NAME", 'print'),
4 Instr("LOAD_CONST", 'Hello World!'),
----> 5 Instr("CALL_FUNCTION", 1),
6 Instr("POP_TOP"),
7 Instr("LOAD_CONST", None),
8 Instr("RETURN_VALUE")])
9 code = bytecode.to_code()
10 exec(code)
File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/bytecode/instr.py:570, in BaseInstr.__init__(self, name, arg, lineno, location)
562 def __init__(
563 self,
564 name: str,
(...)
568 location: Optional[InstrLocation] = None,
569 ) -> None:
--> 570 self._set(name, arg)
571 if location:
572 self._location = location
File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/bytecode/instr.py:771, in BaseInstr._set(self, name, arg)
769 opcode = _opcode.opmap[name]
770 except KeyError:
--> 771 raise ValueError(f"invalid operation name: {name}") # noqa
773 if opcode >= MIN_INSTRUMENTED_OPCODE:
774 raise ValueError(
775 f"operation {name} is an instrumented or pseudo opcode. "
776 "Only base opcodes are supported"
777 )
ValueError: invalid operation name: CALL_FUNCTION
具体字节码#
使用具体字节码执行 print('Hello World!')
的示例:
from bytecode import ConcreteInstr, ConcreteBytecode
bytecode = ConcreteBytecode()
bytecode.names = ['print']
bytecode.consts = ['Hello World!', None]
bytecode.extend([ConcreteInstr("LOAD_NAME", 0),
ConcreteInstr("LOAD_CONST", 0),
ConcreteInstr("CALL_FUNCTION", 1),
ConcreteInstr("POP_TOP"),
ConcreteInstr("LOAD_CONST", 1),
ConcreteInstr("RETURN_VALUE")])
code = bytecode.to_code()
exec(code)
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/bytecode/instr.py:769, in BaseInstr._set(self, name, arg)
768 try:
--> 769 opcode = _opcode.opmap[name]
770 except KeyError:
KeyError: 'CALL_FUNCTION'
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
Cell In[5], line 8
4 bytecode.names = ['print']
5 bytecode.consts = ['Hello World!', None]
6 bytecode.extend([ConcreteInstr("LOAD_NAME", 0),
7 ConcreteInstr("LOAD_CONST", 0),
----> 8 ConcreteInstr("CALL_FUNCTION", 1),
9 ConcreteInstr("POP_TOP"),
10 ConcreteInstr("LOAD_CONST", 1),
11 ConcreteInstr("RETURN_VALUE")])
12 code = bytecode.to_code()
13 exec(code)
File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/bytecode/concrete.py:101, in ConcreteInstr.__init__(self, name, arg, lineno, location, extended_args)
88 def __init__(
89 self,
90 name: str,
(...)
98 # Python to properly compute the size and avoid messing up the jump
99 # targets
100 self._extended_args = extended_args
--> 101 super().__init__(name, arg, lineno=lineno, location=location)
File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/bytecode/instr.py:570, in BaseInstr.__init__(self, name, arg, lineno, location)
562 def __init__(
563 self,
564 name: str,
(...)
568 location: Optional[InstrLocation] = None,
569 ) -> None:
--> 570 self._set(name, arg)
571 if location:
572 self._location = location
File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/bytecode/concrete.py:122, in ConcreteInstr._set(self, name, arg)
117 def _set(
118 self,
119 name: str,
120 arg: int,
121 ) -> None:
--> 122 super()._set(name, arg)
123 size = 2
124 if arg is not UNSET:
File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/bytecode/instr.py:771, in BaseInstr._set(self, name, arg)
769 opcode = _opcode.opmap[name]
770 except KeyError:
--> 771 raise ValueError(f"invalid operation name: {name}") # noqa
773 if opcode >= MIN_INSTRUMENTED_OPCODE:
774 raise ValueError(
775 f"operation {name} is an instrumented or pseudo opcode. "
776 "Only base opcodes are supported"
777 )
ValueError: invalid operation name: CALL_FUNCTION
设置编译器标志#
Bytecode
、ConcreteBytecode
和 ControlFlowGraph
实例都有 flags
属性,它是 CompilerFlag
枚举的实例。可以像操作任何二进制标志一样操作该值。
设置 OPTIMIZED
flags:
from bytecode import Bytecode, CompilerFlags
bytecode = Bytecode()
bytecode.flags |= CompilerFlags.OPTIMIZED
清除 OPTIMIZED
flags:
from bytecode import Bytecode, CompilerFlags
bytecode = Bytecode()
bytecode.flags ^= CompilerFlags.OPTIMIZED
可以使用 update_flags
方法根据存储在代码对象中的指令更新 flags。
简单的循环#
for x in (1, 2, 3): print(x)
:
from bytecode import Label, Instr, Bytecode
loop_start = Label()
loop_done = Label()
loop_exit = Label()
code = Bytecode(
[
# Python 3.8 removed SETUP_LOOP
Instr("LOAD_CONST", (1, 2, 3)),
Instr("GET_ITER"),
loop_start,
Instr("FOR_ITER", loop_exit),
Instr("STORE_NAME", "x"),
Instr("LOAD_NAME", "print"),
Instr("LOAD_NAME", "x"),
Instr("CALL_FUNCTION", 1),
Instr("POP_TOP"),
Instr("JUMP_ABSOLUTE", loop_start),
# Python 3.8 removed the need to manually manage blocks in loops
# This is now handled internally by the interpreter
loop_exit,
Instr("LOAD_CONST", None),
Instr("RETURN_VALUE"),
]
)
# The conversion to Python code object resolve jump targets:
# abstract labels are replaced with concrete offsets
code = code.to_code()
exec(code)
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/bytecode/instr.py:769, in BaseInstr._set(self, name, arg)
768 try:
--> 769 opcode = _opcode.opmap[name]
770 except KeyError:
KeyError: 'CALL_FUNCTION'
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
Cell In[8], line 16
4 loop_done = Label()
5 loop_exit = Label()
6 code = Bytecode(
7 [
8 # Python 3.8 removed SETUP_LOOP
9 Instr("LOAD_CONST", (1, 2, 3)),
10 Instr("GET_ITER"),
11 loop_start,
12 Instr("FOR_ITER", loop_exit),
13 Instr("STORE_NAME", "x"),
14 Instr("LOAD_NAME", "print"),
15 Instr("LOAD_NAME", "x"),
---> 16 Instr("CALL_FUNCTION", 1),
17 Instr("POP_TOP"),
18 Instr("JUMP_ABSOLUTE", loop_start),
19 # Python 3.8 removed the need to manually manage blocks in loops
20 # This is now handled internally by the interpreter
21 loop_exit,
22 Instr("LOAD_CONST", None),
23 Instr("RETURN_VALUE"),
24 ]
25 )
27 # The conversion to Python code object resolve jump targets:
28 # abstract labels are replaced with concrete offsets
29 code = code.to_code()
File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/bytecode/instr.py:570, in BaseInstr.__init__(self, name, arg, lineno, location)
562 def __init__(
563 self,
564 name: str,
(...)
568 location: Optional[InstrLocation] = None,
569 ) -> None:
--> 570 self._set(name, arg)
571 if location:
572 self._location = location
File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/bytecode/instr.py:771, in BaseInstr._set(self, name, arg)
769 opcode = _opcode.opmap[name]
770 except KeyError:
--> 771 raise ValueError(f"invalid operation name: {name}") # noqa
773 if opcode >= MIN_INSTRUMENTED_OPCODE:
774 raise ValueError(
775 f"operation {name} is an instrumented or pseudo opcode. "
776 "Only base opcodes are supported"
777 )
ValueError: invalid operation name: CALL_FUNCTION
条件调整#
print('yes' if test else 'no')
:
from bytecode import Label, Instr, Bytecode
label_else = Label()
label_print = Label()
bytecode = Bytecode([Instr('LOAD_NAME', 'print'),
Instr('LOAD_NAME', 'test'),
Instr('POP_JUMP_IF_FALSE', label_else),
Instr('LOAD_CONST', 'yes'),
Instr('JUMP_FORWARD', label_print),
label_else,
Instr('LOAD_CONST', 'no'),
label_print,
Instr('CALL_FUNCTION', 1),
Instr('LOAD_CONST', None),
Instr('RETURN_VALUE')])
code = bytecode.to_code()
test = 0
exec(code)
test = 1
exec(code)
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/bytecode/instr.py:769, in BaseInstr._set(self, name, arg)
768 try:
--> 769 opcode = _opcode.opmap[name]
770 except KeyError:
KeyError: 'CALL_FUNCTION'
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
Cell In[9], line 13
3 label_else = Label()
4 label_print = Label()
5 bytecode = Bytecode([Instr('LOAD_NAME', 'print'),
6 Instr('LOAD_NAME', 'test'),
7 Instr('POP_JUMP_IF_FALSE', label_else),
8 Instr('LOAD_CONST', 'yes'),
9 Instr('JUMP_FORWARD', label_print),
10 label_else,
11 Instr('LOAD_CONST', 'no'),
12 label_print,
---> 13 Instr('CALL_FUNCTION', 1),
14 Instr('LOAD_CONST', None),
15 Instr('RETURN_VALUE')])
16 code = bytecode.to_code()
18 test = 0
File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/bytecode/instr.py:570, in BaseInstr.__init__(self, name, arg, lineno, location)
562 def __init__(
563 self,
564 name: str,
(...)
568 location: Optional[InstrLocation] = None,
569 ) -> None:
--> 570 self._set(name, arg)
571 if location:
572 self._location = location
File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/bytecode/instr.py:771, in BaseInstr._set(self, name, arg)
769 opcode = _opcode.opmap[name]
770 except KeyError:
--> 771 raise ValueError(f"invalid operation name: {name}") # noqa
773 if opcode >= MIN_INSTRUMENTED_OPCODE:
774 raise ValueError(
775 f"operation {name} is an instrumented or pseudo opcode. "
776 "Only base opcodes are supported"
777 )
ValueError: invalid operation name: CALL_FUNCTION
Control Flow Graph (CFG)#
为了分析或优化现有的代码,bytecode
提供了 ControlFlowGraph
类,它是控制流图(CFG)。
控制流图用于在转换为代码时进行堆栈深度分析。因为它比 CPython 更擅长识别死代码,所以它可以减少堆栈大小。
转储条件跳转示例的控制流图:
from bytecode import Label, Instr, Bytecode, ControlFlowGraph, dump_bytecode
label_else = Label()
label_print = Label()
bytecode = Bytecode([Instr('LOAD_NAME', 'print'),
Instr('LOAD_NAME', 'test'),
Instr('POP_JUMP_IF_FALSE', label_else),
Instr('LOAD_CONST', 'yes'),
Instr('JUMP_FORWARD', label_print),
label_else,
Instr('LOAD_CONST', 'no'),
label_print,
Instr('CALL_FUNCTION', 1),
Instr('LOAD_CONST', None),
Instr('RETURN_VALUE')])
blocks = ControlFlowGraph.from_bytecode(bytecode)
dump_bytecode(blocks)
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/bytecode/instr.py:769, in BaseInstr._set(self, name, arg)
768 try:
--> 769 opcode = _opcode.opmap[name]
770 except KeyError:
KeyError: 'CALL_FUNCTION'
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
Cell In[10], line 13
3 label_else = Label()
4 label_print = Label()
5 bytecode = Bytecode([Instr('LOAD_NAME', 'print'),
6 Instr('LOAD_NAME', 'test'),
7 Instr('POP_JUMP_IF_FALSE', label_else),
8 Instr('LOAD_CONST', 'yes'),
9 Instr('JUMP_FORWARD', label_print),
10 label_else,
11 Instr('LOAD_CONST', 'no'),
12 label_print,
---> 13 Instr('CALL_FUNCTION', 1),
14 Instr('LOAD_CONST', None),
15 Instr('RETURN_VALUE')])
17 blocks = ControlFlowGraph.from_bytecode(bytecode)
18 dump_bytecode(blocks)
File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/bytecode/instr.py:570, in BaseInstr.__init__(self, name, arg, lineno, location)
562 def __init__(
563 self,
564 name: str,
(...)
568 location: Optional[InstrLocation] = None,
569 ) -> None:
--> 570 self._set(name, arg)
571 if location:
572 self._location = location
File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/bytecode/instr.py:771, in BaseInstr._set(self, name, arg)
769 opcode = _opcode.opmap[name]
770 except KeyError:
--> 771 raise ValueError(f"invalid operation name: {name}") # noqa
773 if opcode >= MIN_INSTRUMENTED_OPCODE:
774 raise ValueError(
775 f"operation {name} is an instrumented or pseudo opcode. "
776 "Only base opcodes are supported"
777 )
ValueError: invalid operation name: CALL_FUNCTION
备注
block #1
是开始块,以POP_JUMP_IF_FALSE
条件跳转结束,跟着的是block #2
block #2
以JUMP_FORWARD
无条件跳跃结束block #3
不包含 jump,后面跟着block #4
block #4
是最终的块