🌐 AI搜索 & 代理 主页
Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[submodule "submodules/cpython-v3.13.9"]
path = submodules/cpython-v3.13.9
url = https://github.com/python/cpython.git
branch = 3.13
shallow = true
276 changes: 276 additions & 0 deletions crates/compiler-core/generate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,276 @@
#!/usr/bin/env python
from __future__ import annotations

import dataclasses
import enum
import functools
import io
import pathlib
import subprocess
import sys
import typing

import tomllib

if typing.TYPE_CHECKING:
from collections.abc import Iterator

CPYTHON_VERSION = "v3.13.9"


CRATE_ROOT = pathlib.Path(__file__).parent
CONF_FILE = CRATE_ROOT / "instructions.toml"
OUT_FILE = CRATE_ROOT / "src" / "bytecode" / "instruction.rs"

ROOT = CRATE_ROOT.parents[1]
SUBMODULES = ROOT / "submodules"
CPYTHON_DIR = SUBMODULES / f"cpython-{CPYTHON_VERSION}"
CPYTHON_TOOLS_DIR = CPYTHON_DIR / "Tools" / "cases_generator"
DIS_DOC = CPYTHON_DIR / "Doc" / "library" / "dis.rst"

sys.path.append(CPYTHON_TOOLS_DIR.as_posix())

import analyzer
from generators_common import DEFAULT_INPUT

U8_MAX = 255


@dataclasses.dataclass(frozen=True, slots=True)
class OpargMetadata:
name: str | None = None
typ: str | None = None


@dataclasses.dataclass(slots=True)
class InstructionOverride:
enabled: bool = True
name: str | None = None
oparg: OpargMetadata = dataclasses.field(default_factory=OpargMetadata)
properties: analyzer.Properties | None = None

def __post_init__(self):
if isinstance(self.oparg, dict):
self.oparg = OpargMetadata(**self.oparg)

if isinstance(self.properties, dict):
self.properties = dataclasses.replace(
analyzer.SKIP_PROPERTIES, **self.properties
)


@dataclasses.dataclass(slots=True)
class Instruction:
# TODO: Maybe add a post_init hook to show warning incase of oparg being set for
# instructions with no oparg?
instruction: analyzer.Instruction | analyzer.PseudoInstruction
override: InstructionOverride = dataclasses.field(
default_factory=InstructionOverride
)

@property
def rust_name(self) -> str:
return self.override.name or snake_case_to_pascal_case(self.instruction.name)

@property
def rust_enum_variant(self) -> str:
if self.properties.oparg:
fields = f"{{ {self.oparg_name}: Arg<{self.oparg_typ}> }}"
else:
fields = ""

return f"{self.rust_name} {fields} = {self.instruction.opcode}"

@property
def properties(self) -> analyzer.Properties:
return self.override.properties or self.instruction.properties

@property
def oparg_name(self) -> str | None:
if name := self.override.oparg.name:
return name

if not self.properties.oparg:
return None

oparg_names_map = build_oparg_names_map()
if name := oparg_names_map.get(self.instruction.name):
return name

return self._oparg.field_name

@property
def oparg_typ(self) -> str | None:
if typ := self.override.oparg.typ:
return typ

properties = self.properties
if not properties.oparg:
return None

try:
return self._oparg.name
except ValueError:
return "u32" # Fallback

@property
def _oparg(self) -> Oparg:
try:
return Oparg.try_from_properties(self.properties)
except ValueError as err:
err.add_note(self.instruction.name)
raise err

@classmethod
def from_analysis(
cls, analysis: analyzer.Analysis, overrides: dict[str, dict]
) -> Iterator[typing.Self]:
insts = {}
for name, inst in analysis.instructions.items():
override = InstructionOverride(**overrides.get(name, {}))
if not override.enabled:
continue

opcode = inst.opcode
insts[opcode] = cls(inst, override)

# Because we are treating pseudos like real opcodes,
# we need to find an alternative opcode for them (they go over u8::MAX)
occupied = set(insts)
for opcode, inst in insts.items():
if opcode <= U8_MAX:
continue

# Preserve `HAVE_ARG` semantics.
if inst.properties.oparg:
rang = range(analysis.have_arg, U8_MAX + 1)
else:
rang = range(0, analysis.have_arg)

new_opcode = next(i for i in rang if i not in occupied)
occupied.add(new_opcode)
inst.instruction.opcode = new_opcode

yield from insts.values()


@enum.unique
class Oparg(enum.Enum):
Label = enum.auto()
NameIdx = enum.auto()

@property
def field_name(self) -> str:
match self:
case self.Label:
return "target"
case self.NameIdx:
return "namei"

@classmethod
def try_from_properties(cls, properties: analyzer.Properties) -> typing.Self:
# TODO: `properties.uses_co_consts` -> `ConstIdx`
# TODO: `properties.uses_locals` -> `LocalIdx`

if properties.uses_co_names:
return cls.NameIdx
elif properties.jumps:
return cls.Label
else:
raise ValueError(f"Could not detect oparg type of {properties}")


@functools.cache
def build_oparg_names_map() -> dict[str, str]:
doc = DIS_DOC.read_text()

out = {}
for line in doc.splitlines():
if not line.startswith(".. opcode:: "):
continue

# At this point `line` would look something like:
#
# `.. opcode:: OPCODE_NAME`
# or
# `.. opcode:: OPCODE_NAME (oparg_name)`
#
# We only care about the later.

parts = line.split()
if len(parts) != 4:
continue

_, _, cpython_name, oparg = parts
out[cpython_name] = oparg.removeprefix("(").removesuffix(")")

return out


def snake_case_to_pascal_case(name: str) -> str:
return name.title().replace("_", "")


def rustfmt(code: str) -> str:
return subprocess.check_output(["rustfmt", "--emit=stdout"], input=code, text=True)


def get_analysis() -> analyser.Analysis:
analysis = analyzer.analyze_files([DEFAULT_INPUT])
Comment on lines +218 to +219
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Typo in type annotation: analyser should be analyzer.

The function's return type annotation uses analyser.Analysis but the import on line 29 is analyzer. This will cause a NameError at runtime if type hints are evaluated.

-def get_analysis() -> analyser.Analysis:
+def get_analysis() -> analyzer.Analysis:
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
def get_analysis() -> analyser.Analysis:
analysis = analyzer.analyze_files([DEFAULT_INPUT])
def get_analysis() -> analyzer.Analysis:
analysis = analyzer.analyze_files([DEFAULT_INPUT])
🧰 Tools
🪛 Flake8 (7.3.0)

[error] 137-137: undefined name 'analyser'

(F821)

🤖 Prompt for AI Agents
In crates/compiler-core/generate.py around lines 137 to 138, the return type
annotation mistakenly references analyser.Analysis while the module is imported
as analyzer; update the annotation to analyzer.Analysis to match the import (or
change the import to analyser if that was intended) so the type name resolves
correctly; ensure any other occurrences use the same identifier to avoid
NameError.


# We don't differentiate between real and pseudos yet
analysis.instructions |= analysis.pseudos
return analysis
Comment on lines +221 to +223
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

#[repr(u8)] incompatible with pseudo-instruction IDs.

Line 141 merges analysis.pseudos into analysis.instructions, but pseudo-instructions have IDs > 255 (e.g., Jump = 256). Combined with the hardcoded #[repr(u8)] on line 150, this produces an enum that won't compile.

Either:

  1. Filter out instructions with IDs > 255:
if opcodes.get(name, {}).get("enabled", True) and analysis.opmap.get(name, 0) <= 255
  1. Or change the repr based on max ID:
max_id = max(inst.id for inst in instructions)
repr_type = "u16" if max_id > 255 else "u8"
# Then use {repr_type} in the template

Also applies to: 148-151

🤖 Prompt for AI Agents
crates/compiler-core/generate.py around lines 140-142 and 148-151: after merging
analysis.pseudos into analysis.instructions the enum is generated with a
hardcoded #[repr(u8)] which fails if any instruction id > 255 (pseudo IDs like
256); fix by computing the maximum instruction id after the merge (e.g., max_id
= max(inst.id for inst in analysis.instructions)) and choose repr_type = "u16"
if max_id > 255 else "u8", then inject {repr_type} into the template instead of
the hardcoded u8; alternatively, if you prefer to exclude pseudos from the enum,
filter analysis.pseudos when merging (only include ops with enabled and op id <=
255) before generating the enum.



def write_enum(outfile: typing.IO, instructions: list[Instruction]) -> None:
variants = ",\n".join(inst.rust_enum_variant for inst in instructions)
outfile.write(
f"""
/// A Single bytecode instruction.
#[repr(u8)]
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum Instruction {{
{variants}
}}
"""
)


def main():
analysis = get_analysis()
conf = tomllib.loads(CONF_FILE.read_text())
overrides = conf["overrides"]

instructions = sorted(
Instruction.from_analysis(analysis, overrides), key=lambda inst: inst.rust_name
)

outfile = io.StringIO()
write_enum(outfile, instructions)

generated = outfile.getvalue()

imports = ",".join(
{
inst.oparg_typ
for inst in instructions
if ((inst.oparg_typ is not None) and (inst.oparg_typ != "u32"))
}
)
script_path = pathlib.Path(__file__).resolve().relative_to(ROOT).as_posix()
output = rustfmt(
f"""
// This file is generated by {script_path}
// Do not edit!

use crate::bytecode::{{Arg, {imports}}};

{generated}
"""
)
OUT_FILE.write_text(output)


if __name__ == "__main__":
main()
Loading
Loading