diff --git a/.gitignore b/.gitignore index 7ebd0ef65..f921e89ca 100644 --- a/.gitignore +++ b/.gitignore @@ -31,3 +31,13 @@ TECHNICAL_BRIEF.md /compare-baseline Tools/bytecodes_gen/bytecodes_gen /bytecodes_gen + +# test_zipimport scratch artifacts +junk*.zip +junk*/ + +ziptestmodule +ziptestmodule.py + +# test_module_with_large_stack writes this into the cwd +longlist.py diff --git a/builtins/compile.go b/builtins/compile.go index 6eb198678..2388516c0 100644 --- a/builtins/compile.go +++ b/builtins/compile.go @@ -109,7 +109,7 @@ func parseCompileArgs(args []objects.Object, kwargs map[string]objects.Object) ( return compileArgs{}, err } } - filename, err := stringArg(bound[1], "filename") + filename, err := compileFilenameArg(bound[1]) if err != nil { return compileArgs{}, err } @@ -150,6 +150,31 @@ func parseCompileArgs(args []objects.Object, kwargs map[string]objects.Object) ( }, nil } +// compileFilenameArg decodes the filename argument. compile() runs it +// through PyUnicode_FSDecoder, which accepts str, bytes, or any +// os.PathLike (pathlib.Path) by invoking __fspath__. importlib's source +// loaders pass a pathlib.Path here, so a bare str check is too strict. +// +// CPython: Python/bltinmodule.c builtin_compile (filename: object, +// +// PyUnicode_FSDecoder) and Objects/unicodeobject.c PyOS_FSPath +func compileFilenameArg(o objects.Object) (string, error) { + switch v := o.(type) { + case *objects.Unicode: + return v.Value(), nil + case *objects.Bytes: + return string(v.Bytes()), nil + } + if fspath, err := objects.GetAttr(o, objects.NewStr("__fspath__")); err == nil { + result, callErr := objects.CallNoArgs(fspath) + if callErr != nil { + return "", callErr + } + return compileFilenameArg(result) + } + return "", fmt.Errorf("TypeError: compile() filename must be str, bytes or os.PathLike, not %s", o.Type().Name) +} + // compileSourceArg accepts the first positional argument to compile(). // str routes through ParseString. bytes / bytearray route through // ParseBytes so the PEP 263 coding cookie controls the decode. AST diff --git a/builtins/eval.go b/builtins/eval.go index 7e23c1dd0..d6c28351d 100644 --- a/builtins/eval.go +++ b/builtins/eval.go @@ -329,3 +329,23 @@ func runCode(code *objects.Code, globals, locals, closure objects.Object) (objec } return currentEvaluator(code, globals, locals, closure) } + +// RunInFreshNamespace compiles and runs source in a brand-new __main__ +// namespace and returns PyRun_SimpleStringFlags's result code: 0 when the +// code runs to completion, -1 when it raises. It backs the subinterpreter +// test entries (_testcapi.run_in_subinterp and +// _testinternalcapi.run_in_subinterp_with_config). Every gopy extension is +// a Go builtin compiled into the runtime (multi-phase by construction), so +// importing one inside a subinterpreter behaves exactly like a fresh- +// namespace exec in the current process; the only observable result the +// callers read is the integer status. +// +// CPython: Python/pythonrun.c:592 PyRun_SimpleStringFlags +func RunInFreshNamespace(source string) int { + ns := objects.NewDict() + _ = ns.SetItem(objects.NewStr("__name__"), objects.NewStr("__main__")) + if _, err := Exec([]objects.Object{objects.NewStr(source), ns}, nil); err != nil { + return -1 + } + return 0 +} diff --git a/builtins/import.go b/builtins/import.go index 5b6d08e8a..332a680c2 100644 --- a/builtins/import.go +++ b/builtins/import.go @@ -20,15 +20,22 @@ import ( ) // Importer resolves a module by name, with pkgname as the anchor for -// relative imports and level as the dot-count. fromlist is empty for -// `import a.b.c` and non-empty for `from a.b import c, d`. The hook -// returns the resolved module along with the same chain CPython hands -// back: when fromlist is empty the caller wants the top-level package, -// when fromlist is non-empty the caller wants the deepest module so +// relative imports and level as the dot-count. fromlist is the raw +// object the caller passed (None for `import a.b.c`, a sequence for +// `from a.b import c, d`); it is handed to _handle_fromlist unchanged, +// so a non-str entry surfaces as the TypeError _handle_fromlist raises +// rather than an early gopy-only rejection, and an arbitrary iterable +// is iterated the same way CPython iterates it. globals is the dict the +// caller handed to __import__ (or nil); the live importlib re-derives +// the package anchor from it via _calc___package__, so it must be the +// caller's explicit globals, not the running frame's. The hook returns +// the resolved module along with the same chain CPython hands back: +// when fromlist is empty the caller wants the top-level package, when +// fromlist is non-empty the caller wants the deepest module so // IMPORT_FROM can grab attributes off it. // // CPython: Python/import.c:1561 PyImport_ImportModuleLevelObject -type Importer func(name, pkgname string, level int, fromlist []string) (objects.Object, error) +type Importer func(name, pkgname string, level int, fromlist objects.Object, globals objects.Object) (objects.Object, error) var currentImporter Importer @@ -73,13 +80,13 @@ func Import(args []objects.Object, kwargs map[string]objects.Object) (objects.Ob } } pkgname := pkgnameFromGlobals(parsed.globals) - return currentImporter(parsed.name, pkgname, parsed.level, parsed.fromlist) + return currentImporter(parsed.name, pkgname, parsed.level, parsed.fromlist, parsed.globals) } type importArgs struct { name string globals objects.Object - fromlist []string + fromlist objects.Object level int } @@ -130,9 +137,15 @@ func parseImportArgs(args []objects.Object, kwargs map[string]objects.Object) (i return importArgs{}, fmt.Errorf("ValueError: level must be >= 0") } } - fromlist, err := fromlistArg(bound[3]) - if err != nil { - return importArgs{}, err + // fromlist reaches the import machinery untouched. CPython's + // builtin___import___impl performs no type or element check; an empty + // tuple stands in for a missing argument, and _handle_fromlist raises + // the TypeError for any non-str entry or iterates a custom iterable. + // + // CPython: Python/bltinmodule.c:259 builtin___import___impl + fromlist := bound[3] + if fromlist == nil { + fromlist = objects.NewTuple(nil) } return importArgs{ name: name, @@ -142,42 +155,6 @@ func parseImportArgs(args []objects.Object, kwargs map[string]objects.Object) (i }, nil } -// fromlistArg unpacks the fromlist argument into a flat []string. -// None and missing both mean "empty"; a tuple or list is iterated; any -// other type is a TypeError. The element check matches CPython's -// import.c which rejects non-str entries before lookup. -// -// CPython: Python/import.c:1726 import_from -func fromlistArg(o objects.Object) ([]string, error) { - if o == nil || objects.IsNone(o) { - return nil, nil - } - var raw []objects.Object - switch v := o.(type) { - case *objects.Tuple: - raw = make([]objects.Object, v.Len()) - for i := range raw { - raw[i] = v.Item(i) - } - case *objects.List: - raw = make([]objects.Object, v.Len()) - for i := range raw { - raw[i] = v.Item(i) - } - default: - return nil, fmt.Errorf("TypeError: fromlist must be a tuple or list") - } - out := make([]string, 0, len(raw)) - for _, item := range raw { - s, err := stringArg(item, "fromlist item") - if err != nil { - return nil, err - } - out = append(out, s) - } - return out, nil -} - // stringArg coerces o to a Go string, raising TypeError when o isn't a // Python str. The label is the argument name used in the error. func stringArg(o objects.Object, label string) (string, error) { diff --git a/builtins/import_test.go b/builtins/import_test.go index f01b80b48..2bc5a292a 100644 --- a/builtins/import_test.go +++ b/builtins/import_test.go @@ -15,17 +15,40 @@ type importCall struct { fromlist []string } +// fromlistStrings flattens the raw fromlist object the hook now +// receives into the []string the assertions below compare against. It +// mirrors how _handle_fromlist iterates the object, stopping at the +// first non-str entry (none of these tests pass one). +func fromlistStrings(o objects.Object) []string { + var out []string + switch v := o.(type) { + case *objects.Tuple: + for i := 0; i < v.Len(); i++ { + if u, ok := v.Item(i).(*objects.Unicode); ok { + out = append(out, u.Value()) + } + } + case *objects.List: + for i := 0; i < v.Len(); i++ { + if u, ok := v.Item(i).(*objects.Unicode); ok { + out = append(out, u.Value()) + } + } + } + return out +} + func captureImporter(t *testing.T, mod objects.Object, returnErr error) *importCall { t.Helper() prev := currentImporter t.Cleanup(func() { SetImporter(prev) }) got := &importCall{} - SetImporter(func(name, pkgname string, level int, fromlist []string) (objects.Object, error) { + SetImporter(func(name, pkgname string, level int, fromlist objects.Object, _ objects.Object) (objects.Object, error) { got.name = name got.pkgname = pkgname got.level = level - got.fromlist = fromlist + got.fromlist = fromlistStrings(fromlist) return mod, returnErr }) return got @@ -198,16 +221,26 @@ func TestImportNegativeLevel(t *testing.T) { } } -func TestImportFromlistRejectsString(t *testing.T) { - captureImporter(t, nil, nil) - _, err := Import([]objects.Object{ +func TestImportFromlistPassesThroughRawObject(t *testing.T) { + // CPython's builtin___import__ never type-checks fromlist; it hands the + // object straight to _handle_fromlist, which iterates it. A str is a + // valid (if unusual) fromlist, so __import__ must not reject it early. + mod := objects.NewModule("a") + got := captureImporter(t, mod, nil) + out, err := Import([]objects.Object{ objects.NewStr("a"), objects.None(), objects.None(), - objects.NewStr("notalist"), + objects.NewStr("xy"), }, nil) - if err == nil || !strings.Contains(err.Error(), "fromlist must be a tuple or list") { - t.Fatalf("__import__: err=%v, want fromlist TypeError", err) + if err != nil { + t.Fatalf("__import__: %v", err) + } + if out != mod { + t.Fatalf("__import__ returned %v, want %v", out, mod) + } + if got.name != "a" { + t.Fatalf("hook name = %q, want a", got.name) } } diff --git a/builtins/init.go b/builtins/init.go index 87576b9c2..034997686 100644 --- a/builtins/init.go +++ b/builtins/init.go @@ -23,6 +23,13 @@ import ( var wireOnce sync.Once +// DefaultImport holds the interpreter's original __import__ builtin so the +// IMPORT_NAME fast path can recognize it by identity even after user code +// rebinds builtins.__import__. +// +// CPython: pycore_interp.h interp->imports.import_func +var DefaultImport objects.Object + // Init constructs the builtins dict and stamps the v0.6 surface into // it: None / True / False / NotImplemented as named constants, and // print as the single callable. defaultFile is the io.Writer the @@ -151,6 +158,13 @@ func Init(defaultFile io.Writer) (*objects.Dict, error) { if err := setBuiltin(dict, "__import__", importFn); err != nil { return nil, err } + // Capture the interpreter's original __import__ so the IMPORT_NAME fast + // path can compare against it by identity. Re-reading the builtins + // module is wrong: a test that swaps builtins.__import__ would make the + // swapped callable compare equal to "the default" and never get called. + // + // CPython: pycore_interp.h interp->imports.import_func (captured at init) + DefaultImport = importFn // breakpoint() forwards to sys.breakpointhook. Register the builtin // here and hand the default hook to sys so sys.breakpointhook and diff --git a/builtins/iters_objects.go b/builtins/iters_objects.go index 2897b20bc..f7ae548bc 100644 --- a/builtins/iters_objects.go +++ b/builtins/iters_objects.go @@ -390,12 +390,50 @@ type zipIter struct { var ZipType = objects.NewType("zip", []*objects.Type{objects.ObjectType()}) func init() { - ZipType.Iter = func(o objects.Object) (objects.Object, error) { return o, nil } + ZipType.Iter = objects.SelfIter ZipType.IterNext = zipIterNext + ZipType.Dealloc = zipDealloc + ZipType.TpTraverse = zipTraverse objects.SetTypeDescr(ZipType, "__reduce__", objects.NewMethodDescrConv(ZipType, "__reduce__", objects.MethNoArgs, zipReduce)) objects.SetTypeDescr(ZipType, "__setstate__", objects.NewMethodDescrConv(ZipType, "__setstate__", objects.MethO, zipSetState)) } +// zipDealloc releases the owned reference on every source iterator. +// Mirrors zip_dealloc's ittuple release. +// +// CPython: Objects/enumobject.c:3201 zip_dealloc +func zipDealloc(o objects.Object) { + z, ok := o.(*zipIter) + if !ok { + return + } + for _, it := range z.iters { + if it != nil { + objects.Decref(it) + } + } +} + +// zipTraverse lets the cyclic collector trace through the source +// iterators. Mirrors zip_traverse. +// +// CPython: Objects/enumobject.c:3213 zip_traverse +func zipTraverse(o objects.Object, visit objects.Visitor) error { + z, ok := o.(*zipIter) + if !ok { + return nil + } + for _, it := range z.iters { + if it == nil { + continue + } + if err := visit(it); err != nil { + return err + } + } + return nil +} + // CPython: Python/bltinmodule.c:3222 zip_next func zipIterNext(o objects.Object) (objects.Object, error) { z, ok := o.(*zipIter) diff --git a/cmd/gopy/main.go b/cmd/gopy/main.go index 1c79b6dfc..e784900fc 100644 --- a/cmd/gopy/main.go +++ b/cmd/gopy/main.go @@ -6,6 +6,7 @@ package main import ( + "archive/zip" "bytes" "fmt" "os" @@ -17,6 +18,7 @@ import ( "github.com/tamnd/gopy/builtins" "github.com/tamnd/gopy/codecs" "github.com/tamnd/gopy/compile" + pyerrors "github.com/tamnd/gopy/errors" "github.com/tamnd/gopy/getopt" "github.com/tamnd/gopy/imp" "github.com/tamnd/gopy/module/gc" @@ -58,7 +60,16 @@ func mainWithProfile() int { _ = f.Close() }() } - return run(os.Args[1:], os.Stdout, os.Stderr) + exitcode := run(os.Args[1:], os.Stdout, os.Stderr) + // bpo-1054041: if a KeyboardInterrupt went unhandled, exit through + // the default SIGINT handler so a calling shell sees the ^C and the + // process reports death-by-signal rather than a plain error code. + // + // CPython: Modules/main.c:786 Py_RunMain (unhandled_keyboard_interrupt) + if pyerrors.UnhandledKeyboardInterrupt() { + exitcode = exitSigint() + } + return exitcode } // run drives _PyOS_GetOpt the same way pymain_init walks argv before @@ -93,6 +104,7 @@ func run(args []string, stdout, stderr *os.File) int { modName string hasC, hasM bool xOptions []string + safePath bool ) opts: @@ -118,6 +130,18 @@ opts: break opts case 'X': xOptions = append(xOptions, st.OptArg) + case 'P': + // -P sets safe_path: the script directory / cwd / '' is not + // prepended to sys.path[0]. + // + // CPython: Python/initconfig.c:2098 config_parse_cmdline ('P') + safePath = true + case 'I': + // -I (isolated) implies -P plus -E/-s; the safe_path effect on + // sys.path[0] is what the shadowing tests exercise. + // + // CPython: Python/initconfig.c:2080 config_parse_cmdline ('I') + safePath = true default: // Other CPython flags (-b, -B, -O, -W, ...) are accepted for // option-set parity. Wiring each to the runtime config lands @@ -134,6 +158,17 @@ opts: codecs.SetDevMode(true) } + // safe_path: -P, -I, or PYTHONSAFEPATH suppresses prepending the + // script directory / cwd / "" to sys.path[0], and is exposed as + // sys.flags.safe_path. installPathFinder reads safePathMode to skip + // the unsafe leading entry. + // + // CPython: Python/initconfig.c:1828 config_init_safe_path + if safePath || os.Getenv("PYTHONSAFEPATH") != "" { + safePathMode = true + sys.SetSafePath(true) + } + switch { case showVersion: fmt.Fprintln(stdout, build.VersionString()) @@ -146,14 +181,49 @@ opts: } if st.OptInd < len(argv) { - scriptPath := argv[st.OptInd] - sys.SetArgv(append([]string{scriptPath}, argv[st.OptInd+1:]...)) - return runFile(scriptPath, stdout, stderr) + return runPositional(argv[st.OptInd], argv[st.OptInd+1:], stdout, stderr) } sys.SetArgv([]string{""}) return runInteractive(stdout, stderr) } +// runPositional dispatches the trailing positional argument. If it names +// a package (a directory or a ZIP archive) it is an import-path entry, +// not a source file: prepend it to sys.path and run its __main__ +// submodule. CPython detects this in pymain_get_importer +// (PyImport_GetImporter runs the path hooks) and runs +// pymain_run_module(L"__main__", 0); set_argv0=0 leaves argv[0] as the +// archive path. A plain .py file falls through to pymain_run_file. +// +// CPython: Modules/main.c:127 pymain_get_importer +// CPython: Modules/main.c:691 pymain_run_python (main_importer_path branch) +func runPositional(scriptPath string, rest []string, stdout, stderr *os.File) int { + sys.SetArgv(append([]string{scriptPath}, rest...)) + if isImporterPath(scriptPath) { + return runImporterMain(scriptPath, stdout, stderr) + } + return runFile(scriptPath, stdout, stderr) +} + +// safePathMode records whether -P / -I / PYTHONSAFEPATH was supplied, so +// installPathFinder omits the unsafe leading sys.path[0] entry. +// +// CPython: Python/initconfig.c:1828 config_init_safe_path +var safePathMode bool + +// sysPath0Entry / sysPath0Present hold the leading sys.path entry +// (script directory, or "" for -c / -m / interactive). CPython prepends +// config->sys_path_0 to sys.path AFTER site.main() runs, so +// site.removeduppaths() never rewrites a "" entry into an absolute cwd. +// gopy mirrors that: installPathFinder records the entry here, and +// prependSysPath0 inserts it once site has run. +// +// CPython: Modules/main.c:pymain_run_python (sys.path[0] insertion) +var ( + sysPath0Entry string + sysPath0Present bool +) + // hasXOption reports whether the -X option named key was supplied, // matching against the part before any '=' so "-X dev" and "-X dev=1" // both count. @@ -179,12 +249,37 @@ func hasXOption(xOptions []string, key string) bool { // CPython: Python/initconfig.c:1734 _PyConfig_InitPathConfig // CPython: Lib/importlib/_bootstrap_external.py:1196 PathFinder func installPathFinder(scriptPath string) { + // The leading sys.path entry (script dir, or "" for -c / -m / + // interactive) is NOT placed in `paths` here: CPython inserts + // config->sys_path_0 after site.main() runs, so site.removeduppaths() + // does not rewrite a "" entry into an absolute cwd. prependSysPath0 + // adds it once site has run. var paths []string switch { + case safePathMode: + // safe_path drops the leading script-dir / cwd / "" entry so an + // importable name is resolved only from PYTHONPATH and the stdlib. + // CPython leaves config->sys_path_0 unset, which disables the + // module-shadowing heuristic. + // + // CPython: Python/initconfig.c:1828 config_init_safe_path + sysPath0Present = false + imp.SetConfigSysPath0("", false) case scriptPath != "": - paths = append(paths, filepath.Dir(scriptPath)) + // config->sys_path_0 for a script is the ABSOLUTE directory of the + // script file (CPython resolves it), so the shadowing check and the + // live sys.path[0] both use an absolute path. + dir := filepath.Dir(scriptPath) + if abs, err := filepath.Abs(dir); err == nil { + dir = abs + } + sysPath0Entry = dir + sysPath0Present = true + imp.SetConfigSysPath0(dir, true) default: - paths = append(paths, "") + sysPath0Entry = "" + sysPath0Present = true + imp.SetConfigSysPath0("", true) } if env := os.Getenv("PYTHONPATH"); env != "" { for _, p := range strings.Split(env, string(os.PathListSeparator)) { @@ -195,11 +290,49 @@ func installPathFinder(scriptPath string) { } if root := findStdlibRoot(); root != "" { paths = append(paths, root) + // Materialize the compiled-in extension modules as stub files in a + // lib-dynload directory and add it to sys.path, the gopy analog of + // CPython's /lib-dynload. The real PathFinder -> FileFinder + // discovers them by suffix and routes them through ExtensionFileLoader + // -> _imp.create_dynamic, so module.__spec__.loader is an + // ExtensionFileLoader exactly as for a CPython .so. The stub lives + // outside the vendored stdlib tree so that tree stays pristine. + // + // CPython: Modules/getpath.py (lib-dynload on sys.path) + dynload := filepath.Join(os.TempDir(), "gopy-lib-dynload") + if err := imp.MaterializeExtensions(dynload); err == nil { + paths = append(paths, dynload) + } + // Expose the resolved stdlib root as sys._stdlib_dir so + // FrozenImporter._resolve_filename can compute __file__ and a + // frozen package's __path__ against the on-disk Lib copy, letting + // an unfrozen submodule (e.g. __phello__.spam from disk) be found + // when its parent was loaded frozen. + // + // CPython: Lib/importlib/_bootstrap.py:1108 _resolve_filename + sys.SetStdlibDir(root) + // Pin the resolved root into the environment so any subprocess + // this interpreter spawns through sys.executable bootstraps from + // the same stdlib, even when it runs in an unrelated cwd (e.g. + // subprocess.run(cwd=tmpdir)). CPython's child interpreters + // self-locate from the executable's prefix; gopy carries it + // explicitly via GOPY_STDLIB. + // + // CPython: Modules/getpath.py:550 calculate_path (prefix inherited) + if os.Getenv("GOPY_STDLIB") == "" { + _ = os.Setenv("GOPY_STDLIB", root) + } } imp.SetPathFinder(&imp.PathFinder{ Paths: paths, Compiler: gopyCompile, }) + // Frozen test modules (__hello__, __phello__ and friends) keep their + // source verbatim and compile lazily through the same compiler the + // path finder uses. + // + // CPython: Python/frozen.c _PyImport_FrozenModules + imp.FrozenCompiler = gopyCompile sys.SetPath(paths) // Wire the meta-path finder to consult the live sys.path so // `sys.path.insert(0, x)` from user code is honored on the next @@ -210,6 +343,20 @@ func installPathFinder(scriptPath string) { imp.SetLivePathHook(sys.LivePath) } +// prependSysPath0 inserts the leading sys.path entry (config->sys_path_0) +// recorded by installPathFinder. CPython does this after site.main() +// runs, so the entry (notably "" for -c) is never absolutized by +// site.removeduppaths(). Call it once the site bootstrap has completed. +// +// CPython: Modules/main.c:pymain_run_python (sys.path[0] insertion) +func prependSysPath0() { + if !sysPath0Present { + return + } + cur := sys.LivePath() + sys.SetPath(append([]string{sysPath0Entry}, cur...)) +} + // bootstrapEncodings imports the encodings package so its // search_function lands in the codec search path. CPython does this // from _PyCodec_Init at the tail of interpreter startup, after the @@ -225,6 +372,86 @@ func installPathFinder(scriptPath string) { // // CPython: Python/codecs.c:1690 _PyCodec_Init (PyImport_ImportModule "encodings") func bootstrapEncodings(ts *state.Thread, globals *objects.Dict, stderr *os.File) int { + // Initialize the importlib bootstrap before any Python-level import. + // CPython freezes importlib._bootstrap / _bootstrap_external and runs + // init_importlib well before _PyCodec_Init. gopy loads them as regular + // .py modules on first reference; the encodings preload below pulls + // _bootstrap_external in transitively (encodings -> codecs -> + // importlib.util -> _bootstrap_external). Importing it here first means + // it is fully cached before encodings runs, so its own load does not + // re-enter the import system while the encodings package is still + // half-initialized (which would strand `from . import aliases`). + // + // CPython: Python/pylifecycle.c:1041 init_importlib_external + // Two-phase importlib install, mirroring init_importlib / + // init_importlib_external. importlib/__init__.py self-bootstraps via + // its `except ImportError` branch (gopy has no frozen _frozen_importlib), + // which runs _bootstrap._setup(sys, _imp) and binds _bootstrap_external. + // Phase 2 then calls _bootstrap_external._install(_bootstrap) directly + // (CPython's _install_external_importers imports _frozen_importlib_external, + // which gopy lacks), appending PathFinder to sys.meta_path and the + // FileFinder path hook to sys.path_hooks. + // + // CPython: Python/pylifecycle.c:1041 init_importlib_external + // CPython runs init_importlib exactly once, against a fresh per-process + // interpreter. gopy reuses one process-wide sys.modules across every run() + // invocation (the cmd/gopy tests call run() several times in a single + // binary), so the install must be idempotent. Once the import system is + // live, sys.modules already holds _frozen_importlib aliased to the source + // _bootstrap module, which carries no __origname__; re-running + // _bootstrap._install would make _setup re-scan sys.modules and trip the + // frozen fix-up assert on it. Guard the whole install on the first run. + // + // CPython: Python/pylifecycle.c:1041 init_importlib_external + install := "import sys\n" + + "if '_frozen_importlib' not in sys.modules:\n" + + " import importlib, _imp\n" + + " from importlib import _bootstrap, _bootstrap_external\n" + + " _bootstrap._install(sys, _imp)\n" + + " _bootstrap_external._install(_bootstrap)\n" + + // CPython's C bootstrap freezes _bootstrap / _bootstrap_external and + // publishes them under the _frozen_importlib* names; importlib then + // aliases those exact objects to importlib._bootstrap[_external]. gopy + // loads them as plain .py modules, so re-publish the same objects + // under the frozen names to keep sys.modules['_frozen_importlib'] and + // importlib._bootstrap identical (issue #15386 / bootstrap tests). + // + // CPython: Lib/importlib/__init__.py:50 (_bootstrap aliasing) + " sys.modules['_frozen_importlib'] = _bootstrap\n" + + " sys.modules['_frozen_importlib_external'] = _bootstrap_external\n" + + // CPython registers the zipimporter path hook ahead of FileFinder + // (C-side, _PyImportZip_Init) so a sys.path entry pointing at a zip + // archive is claimed before the directory finder rejects it. + // CPython: Python/pylifecycle.c init_importlib_external (zipimport) + " try:\n" + + " import zipimport\n" + + " sys.path_hooks.insert(0, zipimport.zipimporter)\n" + + " except ImportError:\n" + + " pass\n" + + // CPython freezes importlib._bootstrap[_external] and the importlib + // package, so _setup gives them a __spec__ via the frozen loader + // before any user import runs. gopy loads these as plain .py files + // through the Go-side driver during this bootstrap, before the + // machinery is live, so they reach sys.modules without __spec__. + // Rebuild a SourceFileLoader spec for every still-spec-less module + // that carries a __file__ (importlib, importlib._bootstrap, + // _bootstrap_external, importlib.util), matching the spec PathFinder + // would have produced. Without __spec__ on the importlib package, + // `import importlib.util` raises AttributeError at _bootstrap.py:1325. + // + // CPython: Lib/importlib/_bootstrap.py:1517 _setup (spec fix-up loop) + " for _n in list(sys.modules):\n" + + " _m = sys.modules[_n]\n" + + " if getattr(_m, '__spec__', None) is None and getattr(_m, '__file__', None):\n" + + " try:\n" + + " _sp = _bootstrap_external.spec_from_file_location(_n, _m.__file__)\n" + + " _bootstrap._init_module_attrs(_sp, _m, override=True)\n" + + " except Exception:\n" + + " pass\n" + if _, err := pythonrun.RunString(ts, install, "", parser.ModeFile, globals, nil); err != nil { + fmt.Fprintln(stderr, "preload importlib:", err) + return 1 + } if _, err := pythonrun.RunString(ts, "import encodings", "", parser.ModeFile, globals, nil); err != nil { fmt.Fprintln(stderr, "preload encodings:", err) return 1 @@ -232,6 +459,24 @@ func bootstrapEncodings(ts *state.Thread, globals *objects.Dict, stderr *os.File return 0 } +// bootstrapSite imports the site module, which runs site.main() at import +// time (the no_site flag is clear) to install the interpreter builtins +// exit / quit / help / copyright / credits / license via setquit / +// setcopyright / sethelper. CPython drives this from init_import_site +// during Py_Initialize after the import system is online; without it +// sys.flags.no_site reads 0 (claiming site loaded) while the builtins it +// installs are missing, so code.InteractiveConsole(local_exit=True) and +// other site-dependent paths diverge. +// +// CPython: Python/pylifecycle.c:1255 init_import_site (PyImport_ImportModule "site") +func bootstrapSite(ts *state.Thread, globals *objects.Dict, stderr *os.File) int { + if _, err := pythonrun.RunString(ts, "import site", "", parser.ModeFile, globals, nil); err != nil { + fmt.Fprintln(stderr, "preload site:", err) + return 1 + } + return 0 +} + // findStdlibRoot locates the vendored gopy stdlib tree. CPython's // equivalent is Modules/getpath.py's prefix discovery; the gopy port // (pathconfig/) targets the CPython install layout, not the gopy @@ -293,10 +538,34 @@ func isDir(p string) bool { } func isFile(p string) bool { - info, err := os.Stat(p) + info, err := os.Stat(p) //nolint:gosec // p is os.Executable/os.Getwd/$GOPY_STDLIB or the argv script path the user asked us to run. return err == nil && info.Mode().IsRegular() } +// isImporterPath reports whether p is an import-path entry rather than a +// source file: a directory (which the FileFinder path hook always claims) +// or a ZIP archive (which the zipimporter path hook claims). CPython makes +// the same determination by running PyImport_GetImporter over sys.path_hooks +// in pymain_get_importer; a regular .py file yields None and is run as a +// plain script instead. +// +// CPython: Modules/main.c:127 pymain_get_importer +// CPython: Lib/zipimport.py zipimporter.__init__ (ZIP end-of-central-directory probe) +func isImporterPath(p string) bool { + if isDir(p) { + return true + } + if !isFile(p) { + return false + } + zr, err := zip.OpenReader(p) + if err != nil { + return false + } + _ = zr.Close() + return true +} + // gopyCompile is the SourceCompiler injected into PathFinder. It is // the parser + compiler chain that pythonrun.RunString runs. // @@ -311,6 +580,15 @@ func gopyCompile(src []byte, filename string) (*objects.Code, error) { if len(src) == 0 || src[len(src)-1] != '\n' { src = append(src, '\n') } + // CPython freezes importlib._bootstrap[_external], so the code objects of + // the import machinery carry the synthetic co_filename + // "" rather than a source path. gopy loads + // them from source; stamp the same frozen name so tracebacks that pass + // through the machinery read identically (test_import_bug) and + // remove_importlib_frames can recognize them. + // + // CPython: Python/pylifecycle.c:1041 init_importlib (frozen modules) + filename = frozenImportlibName(filename) mod, err := parser.ParseBytes(src, filename, parser.ModeFile) if err != nil { return nil, err @@ -345,6 +623,21 @@ func gopyCompile(src []byte, filename string) (*objects.Code, error) { return out, nil } +// frozenImportlibName maps the source paths of the two importlib bootstrap +// modules to the synthetic co_filename CPython gives their frozen code +// objects. Any other path is returned unchanged. +// +// CPython: Python/import.c:3501 remove_importlib_frames (frozen names) +func frozenImportlibName(filename string) string { + switch { + case strings.HasSuffix(filename, "importlib/_bootstrap_external.py"): + return "" + case strings.HasSuffix(filename, "importlib/_bootstrap.py"): + return "" + } + return filename +} + // runSource is the gopy -c entry. It dispatches to // pythonrun.RunSimpleString, the port of CPython's // PyRun_SimpleStringFlags. @@ -362,6 +655,10 @@ func runSource(src string, stdout, stderr *os.File) int { if rc := bootstrapEncodings(ts, mainGlobals, stderr); rc != 0 { return rc } + if rc := bootstrapSite(ts, mainGlobals, stderr); rc != 0 { + return rc + } + prependSysPath0() rc := pythonrun.RunSimpleString(ts, src, mainGlobals, stderr) gc.RunShutdownFinalizers() pythonrun.FlushStdFiles() @@ -388,6 +685,10 @@ func runModule(modName string, modArgs []string, stdout, stderr *os.File) int { if rc := bootstrapEncodings(ts, mainGlobals, stderr); rc != 0 { return rc } + if rc := bootstrapSite(ts, mainGlobals, stderr); rc != 0 { + return rc + } + prependSysPath0() // Equivalent of CPython's pymain_run_module which calls // runpy._run_module_as_main(modName) on the Python side. src := fmt.Sprintf("import runpy\nrunpy._run_module_as_main(%q)\n", modName) @@ -397,6 +698,57 @@ func runModule(modName string, modArgs []string, stdout, stderr *os.File) int { return rc } +// runImporterMain is the gopy entry. When the positional +// argument is an import-path entry (directory or ZIP archive), CPython +// prepends it to sys.path and runs its __main__ submodule via +// pymain_run_module(L"__main__", 0). The set_argv0=0 argument tells +// runpy._run_module_as_main NOT to rewrite argv[0]: it stays the archive +// path, which the dispatch already placed there. The module loads through +// the path hook that claims the entry (FileFinder for a directory, +// zipimporter for a ZIP), so its code object carries the loader's +// co_filename (e.g. "app.zip/__main__.py") and tracebacks resolve source +// through the loader's get_source rather than reading the raw archive. +// +// CPython: Modules/main.c:691 pymain_run_python (main_importer_path branch) +// CPython: Modules/main.c:326 pymain_run_module (set_argv0=0) +func runImporterMain(importerPath string, stdout, stderr *os.File) int { + g, err := bootstrapBuiltins(stdout, stderr) + if err != nil { + fmt.Fprintln(stderr, "builtins:", err) + return 1 + } + // sys.path[0] is the import-path entry itself (the dir/zip), not its + // parent directory. CPython sets path0 = main_importer_path directly, + // even under -P / -I (the safe_path branch is skipped once an importer + // claims the entry). + // + // CPython: Modules/main.c:647 path0 = Py_NewRef(main_importer_path) + installPathFinder("") + abs, absErr := filepath.Abs(importerPath) + if absErr != nil { + abs = importerPath + } + sysPath0Entry = abs + sysPath0Present = true + imp.SetConfigSysPath0(abs, true) + mainGlobals := newMainGlobals(g, "__main__") + ts := state.NewThread() + if rc := bootstrapEncodings(ts, mainGlobals, stderr); rc != 0 { + return rc + } + if rc := bootstrapSite(ts, mainGlobals, stderr); rc != 0 { + return rc + } + prependSysPath0() + // pymain_run_module(L"__main__", 0): run __main__ from the import-path + // entry without altering argv[0]. + src := "import runpy\nrunpy._run_module_as_main(\"__main__\", False)\n" + rc := pythonrun.RunSimpleString(ts, src, mainGlobals, stderr) + gc.RunShutdownFinalizers() + pythonrun.FlushStdFiles() + return rc +} + // runFile is the gopy entry. Mirrors pymain_run_file in // the file-positional branch. // @@ -416,11 +768,70 @@ func runFile(path string, stdout, stderr *os.File) int { return 1 } installPathFinder(path) - mainGlobals := newMainGlobals(g, mainModuleName(path)) + modName := mainModuleName(path) + mainGlobals := newMainGlobals(g, modName) + // Anchor relative imports inside a vendored test package. CPython's + // import machinery stamps __package__ when it loads the module; a + // synthesized main module would otherwise have no anchor and any + // `from . import x` inside it would raise "no known parent package". + // + // CPython: Lib/importlib/_bootstrap.py:1350 _calc___package__ + if pkg := mainPackageName(path, modName); pkg != "" { + _ = mainGlobals.SetItem(objects.NewStr("__package__"), objects.NewStr(pkg)) + // A package's __init__ also carries __path__ pointing at its dir, + // so submodule imports (`from .data import x`) find sibling files. + if filepath.Base(path) == "__init__.py" { + if abs, absErr := filepath.Abs(filepath.Dir(path)); absErr == nil { + _ = mainGlobals.SetItem(objects.NewStr("__path__"), + objects.NewList([]objects.Object{objects.NewStr(abs)})) + } + } + } ts := state.NewThread() if rc := bootstrapEncodings(ts, mainGlobals, stderr); rc != 0 { return rc } + if rc := bootstrapSite(ts, mainGlobals, stderr); rc != 0 { + return rc + } + prependSysPath0() + // A vendored test runs under "test."; regrtest imports it as a + // normal module, so its __spec__ is a real ModuleSpec. Build the same + // file-location spec here so code that resolves the module by name and + // reads __spec__ (pyclbr, runpy, inspect) matches CPython. The plain + // "__main__" run keeps __spec__ None, like `python script.py`. The + // snippet runs through pythonrun so importlib.util loads under a real + // Executor, the same way bootstrapEncodings drives the encodings import. + // + // CPython: Lib/test/libregrtest/runtest.py (imports test.) + if modName != "__main__" { + abs, absErr := filepath.Abs(path) + if absErr != nil { + abs = path + } + // regrtest imports the test under "test." the normal way, so + // the import machinery runs setattr(parent_package, child, module): + // the `test` package ends up with a `test_import` attribute. gopy + // pre-injects the gate module into sys.modules without that parent + // binding, so a test like data/circular_imports/.../child.py that + // evaluates `test.test_import.<...>` as an expression would fail its + // first hop getattr(test, 'test_import'). Import the parent package + // and bind the leaf to mirror what _find_and_load does. + // + // CPython: Lib/importlib/_bootstrap.py:1350 setattr(parent_module, child, module) + src := fmt.Sprintf("import importlib, importlib.util as _u, sys as _s\n"+ + "_m = _s.modules.get(%q)\n"+ + "if _m is not None and getattr(_m, '__spec__', None) is None:\n"+ + " _m.__spec__ = _u.spec_from_file_location(%q, %q)\n"+ + " _m.__loader__ = _m.__spec__.loader\n"+ + "_parent, _, _child = %q.rpartition('.')\n"+ + "if _m is not None and _parent:\n"+ + " setattr(importlib.import_module(_parent), _child, _m)\n"+ + "del importlib, _u, _s, _m, _parent, _child\n", modName, modName, abs, modName) + if _, err := pythonrun.RunString(ts, src, "", parser.ModeFile, mainGlobals, nil); err != nil { + fmt.Fprintln(stderr, "attach main spec:", err) + } + } var rc int if suffix, ok := unittestRunnerSuffix(path); ok { src, readErr := os.ReadFile(path) //nolint:gosec // reading a caller-supplied test file path is the entire contract @@ -451,7 +862,12 @@ func runFile(path string, stdout, stderr *os.File) int { // CPython: Lib/test/libregrtest/runtest.py unittest.main func unittestRunnerSuffix(path string) (string, bool) { base := filepath.Base(path) - if !strings.HasPrefix(base, "test_") || !strings.HasSuffix(base, ".py") { + // A package test is laid out as test_xxx/__init__.py; accept it too so + // the runner fires even though its basename is not test_*.py. The + // module runs under "test.test_xxx" (not "__main__"), so its own + // `if __name__ == '__main__'` guard never triggers the suite. + isPkgInit := base == "__init__.py" && strings.HasPrefix(filepath.Base(filepath.Dir(path)), "test_") + if !isPkgInit && (!strings.HasPrefix(base, "test_") || !strings.HasSuffix(base, ".py")) { return "", false } src, err := os.ReadFile(path) //nolint:gosec // reading a caller-supplied test file path is the entire contract @@ -485,12 +901,56 @@ func unittestRunnerSuffix(path string) (string, bool) { // CPython: Lib/test/libregrtest/runtest.py (imports test.) func mainModuleName(path string) string { base := filepath.Base(path) + // A package laid out as test_xxx/__init__.py runs under the dotted + // name "test.test_xxx": regrtest imports the directory as a package, so + // the __init__ body sees __name__ == "test.test_xxx" and relative + // imports inside it resolve against that anchor. + if base == "__init__.py" { + parent := filepath.Base(filepath.Dir(path)) + if strings.HasPrefix(parent, "test_") { + return "test." + parent + } + return "__main__" + } if strings.HasPrefix(base, "test_") && strings.HasSuffix(base, ".py") { - return "test." + strings.TrimSuffix(base, ".py") + stem := strings.TrimSuffix(base, ".py") + // A module that lives inside a vendored test package (the parent + // directory is a test_xxx/ with an __init__.py) runs under the + // package-qualified name "test.test_xxx.test_yyy", not the bare + // "test.test_yyy". Otherwise it would shadow the package object in + // sys.modules, and a sibling import like + // `from test.test_doctest.decorator_mod import ...` would find the + // module instead of the package and raise "not a package". + // + // CPython: Lib/test/libregrtest/runtest.py (imports test..) + parent := filepath.Base(filepath.Dir(path)) + if strings.HasPrefix(parent, "test_") && isFile(filepath.Join(filepath.Dir(path), "__init__.py")) { + return "test." + parent + "." + stem + } + return "test." + stem } return "__main__" } +// mainPackageName returns the __package__ anchor for the main module at +// path. A package __init__ anchors at its own dotted name; a plain module +// anchors at its parent package. Relative imports inside the file resolve +// against this value. +// +// CPython: Lib/importlib/_bootstrap.py:1350 _calc___package__ +func mainPackageName(path, modName string) string { + if modName == "__main__" { + return "" + } + if filepath.Base(path) == "__init__.py" { + return modName + } + if dot := strings.LastIndex(modName, "."); dot >= 0 { + return modName[:dot] + } + return "" +} + // runInteractive is the gopy bare-invocation entry: print the banner // and hand control to pythonrun.InteractiveLoop. Mirrors // pymain_run_stdin. @@ -509,6 +969,10 @@ func runInteractive(stdout, stderr *os.File) int { if rc := bootstrapEncodings(ts, mainGlobals, stderr); rc != 0 { return rc } + if rc := bootstrapSite(ts, mainGlobals, stderr); rc != 0 { + return rc + } + prependSysPath0() rc := pythonrun.InteractiveLoop(ts, os.Stdin, stdout, stderr, mainGlobals) pythonrun.FlushStdFiles() if rc != 0 { @@ -556,7 +1020,35 @@ func bootstrapBuiltins(stdout, stderr *os.File) (*objects.Dict, error) { func newMainGlobals(builtinsDict *objects.Dict, name string) *objects.Dict { mainDict := objects.NewDict() _ = mainDict.SetItem(objects.NewStr("__name__"), objects.NewStr(name)) - _ = mainDict.SetItem(objects.NewStr("__builtins__"), builtinsDict) + // CPython binds __main__.__builtins__ to the builtins *module* object; + // every other module receives the builtins dict instead. The frame + // builder unwraps the module back to its dict for LOAD_GLOBAL, so the + // only observable difference is that `del __builtins__.__import__` + // reaches a module attribute, after which the import machinery raises + // ImportError (test_import.test_delete_builtins_import). + // + // CPython: Python/pylifecycle.c init_interp_main (binds __main__.__builtins__) + var builtinsBinding objects.Object = builtinsDict + if bm, ok := imp.GetModule("builtins"); ok { + builtinsBinding = bm + } + _ = mainDict.SetItem(objects.NewStr("__builtins__"), builtinsBinding) + // add_main_module gives __main__ a __loader__ of BuiltinImporter when the + // dict has none yet. imp.is_builtin("__main__") is False, but CPython picks + // BuiltinImporter as the most appropriate initial loader so that every + // module in sys.modules answers hasattr(m, '__loader__') + // (test_importlib test_everyone_has___loader__). + // + // CPython: Python/pylifecycle.c add_main_module (sets __loader__) + if has, _ := mainDict.Contains(objects.NewStr("__loader__")); !has { + _ = mainDict.SetItem(objects.NewStr("__loader__"), imp.BuiltinImporterLoader()) + } + // CPython always binds __main__.__spec__: None for `-c`/script runs, + // a real ModuleSpec under `-m`. runFile overwrites this with a + // file-location spec for vendored "test." runs. + // + // CPython: Python/pylifecycle.c init_interp_main (sets __main__.__spec__) + _ = mainDict.SetItem(objects.NewStr("__spec__"), objects.None()) mod := objects.NewModuleWithDict(name, mainDict) if _, ok := imp.GetModule(name); !ok { imp.AddModule(name, mod) diff --git a/cmd/gopy/sigint_unix.go b/cmd/gopy/sigint_unix.go new file mode 100644 index 000000000..b71976c69 --- /dev/null +++ b/cmd/gopy/sigint_unix.go @@ -0,0 +1,24 @@ +//go:build !windows + +package main + +import ( + "os/signal" + "syscall" +) + +// exitSigint resets SIGINT to its default disposition and delivers it +// to this process, so an unhandled KeyboardInterrupt terminates the +// interpreter by signal (exit status -SIGINT / 128+SIGINT). +// +// CPython: Modules/main.c:730 exit_sigint +func exitSigint() int { + signal.Reset(syscall.SIGINT) + if err := syscall.Kill(syscall.Getpid(), syscall.SIGINT); err != nil { + // Impossible in normal environments; fall back to the code + // CPython returns when the signal could not be delivered. + return int(syscall.SIGINT) + 128 + } + // Give the signal a moment to be delivered before falling through. + select {} +} diff --git a/cmd/gopy/sigint_windows.go b/cmd/gopy/sigint_windows.go new file mode 100644 index 000000000..a2f7209b9 --- /dev/null +++ b/cmd/gopy/sigint_windows.go @@ -0,0 +1,13 @@ +//go:build windows + +package main + +// exitSigint mirrors the Windows branch of CPython's exit_sigint: there is +// no POSIX kill(getpid, SIGINT), so the interpreter exits with SIGINT+128 +// (the value CPython returns when raise(SIGINT) does not abort the process). +// +// CPython: Modules/main.c:730 exit_sigint +func exitSigint() int { + // SIGINT is 2 on Windows; 2 + 128 = 130. + return 2 + 128 +} diff --git a/errors/api.go b/errors/api.go index 86994b350..aa932215c 100644 --- a/errors/api.go +++ b/errors/api.go @@ -24,6 +24,68 @@ func SetString(ts *state.Thread, t *objects.Type, msg string) { Set(ts, t, args) } +// MakeModuleNotFound builds (without raising) a ModuleNotFoundError +// instance carrying the `name` member, so a caller that returns it as a +// Go error preserves the attribute through synthesizeException. +// +// CPython: Python/import.c:1759 import_name (ModuleNotFoundError, name=) +func MakeModuleNotFound(name string) *Exception { + msg := "No module named '" + name + "'" + exc := New(PyExc_ModuleNotFoundError, objects.NewTuple([]objects.Object{objects.NewStr(msg)})) + _ = exc.EnsureAttrDict().SetItem(objects.NewStr("name"), objects.NewStr(name)) + return exc +} + +// SetModuleNotFound raises ModuleNotFoundError("No module named %r", +// name=name), stamping the `name` member the import machinery promises +// on every miss so callers like runpy can read exc.name. +// +// CPython: Python/import.c:1759 import_name (ModuleNotFoundError, name=) +func SetModuleNotFound(ts *state.Thread, name string) { + msg := "No module named '" + name + "'" + exc := New(PyExc_ModuleNotFoundError, objects.NewTuple([]objects.Object{objects.NewStr(msg)})) + _ = exc.EnsureAttrDict().SetItem(objects.NewStr("name"), objects.NewStr(name)) + Raise(ts, exc) +} + +// SetModuleNotFoundHalted raises ModuleNotFoundError(f'import of {name} +// halted; None in sys.modules', name=name), the exact exception CPython's +// _bootstrap._find_and_load produces when sys.modules[name] is None. The +// `name` member is what importlib/abc.py reads to recognize a blocked +// _frozen_importlib import. +// +// CPython: Lib/importlib/_bootstrap.py:1387 _find_and_load (None sentinel) +func SetModuleNotFoundHalted(ts *state.Thread, name string) { + msg := "import of " + name + " halted; None in sys.modules" + exc := New(PyExc_ModuleNotFoundError, objects.NewTuple([]objects.Object{objects.NewStr(msg)})) + _ = exc.EnsureAttrDict().SetItem(objects.NewStr("name"), objects.NewStr(name)) + Raise(ts, exc) +} + +// SetImportErrorWithNameFrom raises ImportError(msg, name=modName, +// path=origin, name_from=nameFrom), stamping the three members the +// IMPORT_FROM diagnostic promises so a caught exception exposes +// exc.name / exc.path / exc.name_from. Empty modName/origin leave the +// corresponding member unset (read back as None), matching the NULL +// arguments _PyErr_SetImportErrorWithNameFrom forwards to new_importerror. +// +// CPython: Python/errors.c:1152 _PyErr_SetImportErrorWithNameFrom +func SetImportErrorWithNameFrom(ts *state.Thread, msg, modName, origin, nameFrom string) { + exc := New(PyExc_ImportError, objects.NewTuple([]objects.Object{objects.NewStr(msg)})) + d := exc.EnsureAttrDict() + _ = d.SetItem(objects.NewStr("msg"), objects.NewStr(msg)) + if modName != "" { + _ = d.SetItem(objects.NewStr("name"), objects.NewStr(modName)) + } + if origin != "" { + _ = d.SetItem(objects.NewStr("path"), objects.NewStr(origin)) + } + if nameFrom != "" { + _ = d.SetItem(objects.NewStr("name_from"), objects.NewStr(nameFrom)) + } + Raise(ts, exc) +} + // Format raises an exception built from a printf-style template. // Returns nil so callers can `return errors.Format(ts, ...)`. // diff --git a/errors/builtins.go b/errors/builtins.go index 7332c428a..75c1cdb89 100644 --- a/errors/builtins.go +++ b/errors/builtins.go @@ -117,6 +117,11 @@ func init() { // CPython: Objects/exceptions.c:684 StopIteration_init objects.SetTypeDescr(PyExc_StopIteration, "value", objects.NewGetSetDescr("value", stopIterValueGet, stopIterValueSet)) + // SystemExit exposes a dedicated `code` member seeded by SystemExit_init. + // + // CPython: Objects/exceptions.c:880 SystemExit_members + objects.SetTypeDescr(PyExc_SystemExit, "code", + objects.NewGetSetDescr("code", sysExitCodeGet, sysExitCodeSet)) // AsyncGenStopIterationHook lets objects/async_gen.go raise a typed // StopIteration(value) without importing this package. Mirrors // _PyGen_SetStopIterationValue in the async_gen_unwrap_value path. @@ -270,7 +275,15 @@ func newExcType(name string, bases []*objects.Type) *objects.Type { // CPython: Objects/exceptions.c:2503 NameError_init // CPython: Objects/exceptions.c:2586 AttributeError_init func excTpNew(cls *objects.Type, args []objects.Object, kwargs map[string]objects.Object) (objects.Object, error) { - exc := New(cls, objects.NewTuple(args)) + // Allocate with empty args, mirroring BaseException_new's behavior when + // the real positional arguments are about to be installed by tp_init + // (typeCallViaTpNew always runs __init__, which lands on baseExceptionInit + // and stores the args via setArgsSteal). Building the real tuple here too + // would leave it orphaned the moment tp_init replaces it, stranding every + // positional argument at a phantom reference the collector cannot reclaim. + // + // CPython: Objects/exceptions.c:48 BaseException_new (empty args; BaseException_init sets them) + exc := New(cls, nil) if len(kwargs) > 0 { d := exc.EnsureAttrDict() for k, v := range kwargs { @@ -280,6 +293,50 @@ func excTpNew(cls *objects.Type, args []objects.Object, kwargs map[string]object return exc, nil } +// sysExitCodeGet returns SystemExit's `code`. An explicit assignment is +// preserved in the dedicated slot; otherwise the value is derived from the +// constructor args exactly as SystemExit_init seeds it (args[0] for one +// arg, the args tuple for several, None for none). +// +// CPython: Objects/exceptions.c:866 SystemExit_init +// CPython: Objects/exceptions.c:880 SystemExit_members (code) +func sysExitCodeGet(owner objects.Object) (objects.Object, error) { + e, ok := owner.(*Exception) + if !ok { + return objects.None(), nil + } + if e.SysExitCode != nil { + return e.SysExitCode, nil + } + if e.Args == nil { + return objects.None(), nil + } + switch e.Args.Len() { + case 0: + return objects.None(), nil + case 1: + return e.Args.Item(0), nil + default: + return e.Args, nil + } +} + +// sysExitCodeSet writes only the dedicated SysExitCode slot, leaving +// args untouched. Mirrors the _Py_T_OBJECT member on PySystemExitObject. +// +// CPython: Objects/exceptions.c:880 SystemExit_members (code) +func sysExitCodeSet(owner objects.Object, value objects.Object) error { + e, ok := owner.(*Exception) + if !ok { + return stderrors.New("TypeError: descriptor 'code' requires SystemExit") + } + if value == nil { + value = objects.None() + } + e.SysExitCode = value + return nil +} + // excStr ports BaseException_str: empty for no args, str(args[0]) for // a single arg, repr(args) otherwise. // diff --git a/errors/errnocodes_other.go b/errors/errnocodes_other.go new file mode 100644 index 000000000..1ed571010 --- /dev/null +++ b/errors/errnocodes_other.go @@ -0,0 +1,33 @@ +//go:build !windows + +package errors + +import "syscall" + +// errno codes for the errnomap promotion table. On non-Windows +// platforms Go's syscall package carries the real POSIX values, which +// vary across systems (ETIMEDOUT is 110 on Linux, 60 on macOS), so we +// take them from syscall rather than hard-coding. +// +// CPython: Objects/exceptions.c:4470 _PyExc_InitState ADD_ERRNO panel +var ( + errEAGAIN = int(syscall.EAGAIN) + errEALREADY = int(syscall.EALREADY) + errEINPROGRESS = int(syscall.EINPROGRESS) + errEWOULDBLOCK = int(syscall.EWOULDBLOCK) + errEPIPE = int(syscall.EPIPE) + errESHUTDOWN = int(syscall.ESHUTDOWN) + errECHILD = int(syscall.ECHILD) + errECONNABORTED = int(syscall.ECONNABORTED) + errECONNREFUSED = int(syscall.ECONNREFUSED) + errECONNRESET = int(syscall.ECONNRESET) + errEEXIST = int(syscall.EEXIST) + errENOENT = int(syscall.ENOENT) + errEISDIR = int(syscall.EISDIR) + errENOTDIR = int(syscall.ENOTDIR) + errEINTR = int(syscall.EINTR) + errEACCES = int(syscall.EACCES) + errEPERM = int(syscall.EPERM) + errESRCH = int(syscall.ESRCH) + errETIMEDOUT = int(syscall.ETIMEDOUT) +) diff --git a/errors/errnocodes_windows.go b/errors/errnocodes_windows.go new file mode 100644 index 000000000..5d5b5a08f --- /dev/null +++ b/errors/errnocodes_windows.go @@ -0,0 +1,35 @@ +//go:build windows + +package errors + +// errno codes for the errnomap promotion table on Windows. Go's +// syscall package fabricates E* constants as 1<<29+iota there, so we +// hard-code the Universal CRT values CPython actually uses +// (EEXIST == 17). These line up with the winerror->errno translation +// the VM applies before promotion and with the errno module's table. +// ESHUTDOWN has no ucrt definition, so CPython omits it on Windows; the +// negative sentinel makes errnomap skip it. +// +// CPython: Objects/exceptions.c:4470 _PyExc_InitState ADD_ERRNO panel +// (values from ucrt ) +const ( + errEAGAIN = 11 + errEALREADY = 103 + errEINPROGRESS = 112 + errEWOULDBLOCK = 140 + errEPIPE = 32 + errESHUTDOWN = -1 + errECHILD = 10 + errECONNABORTED = 106 + errECONNREFUSED = 107 + errECONNRESET = 108 + errEEXIST = 17 + errENOENT = 2 + errEISDIR = 21 + errENOTDIR = 20 + errEINTR = 4 + errEACCES = 13 + errEPERM = 1 + errESRCH = 3 + errETIMEDOUT = 138 +) diff --git a/errors/exc_import_init.go b/errors/exc_import_init.go index 2e22359fa..c3e59026c 100644 --- a/errors/exc_import_init.go +++ b/errors/exc_import_init.go @@ -21,6 +21,58 @@ func init() { objects.SetTypeDescr(PyExc_ImportError, "__init__", objects.NewMethodDescr(PyExc_ImportError, "__init__", importErrorInit). WithKwParams("ImportError", importErrorKwlist, len(importErrorKwlist))) + + // msg / name / path / name_from are Py_T_OBJECT members on + // PyImportErrorObject: reading a member that was never set yields + // None rather than raising AttributeError, and writing stores the + // value. runpy/importlib both read e.name on a caught ImportError, + // so the attribute must always exist. msg is set from the single + // positional arg by ImportError_init rather than via a keyword. + // + // CPython: Objects/exceptions.c:1932 ImportError_members + for _, name := range append([]string{"msg"}, importErrorKwlist...) { + field := name + objects.SetTypeDescr(PyExc_ImportError, field, objects.NewGetSetDescr(field, + func(o objects.Object) (objects.Object, error) { return importErrorMember(o, field) }, + func(o, v objects.Object) error { return importErrorMemberSet(o, field, v) })) + } +} + +// importErrorMember reads an ImportError member from the instance attr +// dict, returning None when unset to mirror Py_T_OBJECT's NULL->None. +// +// CPython: Include/descrobject.h Py_T_OBJECT (member_get NULL -> None) +func importErrorMember(o objects.Object, field string) (objects.Object, error) { + e, ok := o.(*Exception) + if !ok { + return objects.None(), nil + } + d := e.AttrDict() + if d == nil { + return objects.None(), nil + } + // A missing member reads back as None (Py_T_OBJECT NULL->None), so a + // lookup miss is not an error here; discard it deliberately. + v, _ := d.GetItem(objects.NewStr(field)) + if v == nil { + return objects.None(), nil + } + return v, nil +} + +// importErrorMemberSet writes an ImportError member through the +// instance attr dict, allocating it lazily. +// +// CPython: Objects/exceptions.c:1893 ImportError_members (member_set) +func importErrorMemberSet(o objects.Object, field string, v objects.Object) error { + e, ok := o.(*Exception) + if !ok { + return nil + } + if v == nil { + v = objects.None() + } + return e.EnsureAttrDict().SetItem(objects.NewStr(field), v) } // importErrorInit ports ImportError_init: it runs BaseException_init over @@ -46,6 +98,14 @@ func importErrorInit(args []objects.Object, kwargs map[string]objects.Object) (o return objects.None(), nil } + // msg is set from the lone positional argument: PyTuple_GET_SIZE(args) + // counts the exception args tuple, which here is args[1:]. + // + // CPython: Objects/exceptions.c:1836 ImportError_init (self->msg) + if len(args) == 2 { + _ = e.EnsureAttrDict().SetItem(objects.NewStr("msg"), args[1]) + } + if len(kwargs) > 0 { // PyArg_ParseTupleAndKeywords("|$OOO") with an empty positional // tuple: the surplus check counts every keyword against the three diff --git a/errors/exc_os.go b/errors/exc_os.go index af61fe3a3..8630c1d64 100644 --- a/errors/exc_os.go +++ b/errors/exc_os.go @@ -1,8 +1,6 @@ package errors import ( - "syscall" - "github.com/tamnd/gopy/objects" ) @@ -43,29 +41,35 @@ var errnomap = map[int]*objects.Type{} func init() { add := func(code int, t *objects.Type) { + // Codes a platform does not define arrive as a negative + // sentinel from the errnocodes table; skip them so they never + // collide with a real errno (errnos are always positive). + if code < 0 { + return + } if _, dup := errnomap[code]; !dup { errnomap[code] = t } } - add(int(syscall.EAGAIN), PyExc_BlockingIOError) - add(int(syscall.EALREADY), PyExc_BlockingIOError) - add(int(syscall.EINPROGRESS), PyExc_BlockingIOError) - add(int(syscall.EWOULDBLOCK), PyExc_BlockingIOError) - add(int(syscall.EPIPE), PyExc_BrokenPipeError) - add(int(syscall.ESHUTDOWN), PyExc_BrokenPipeError) - add(int(syscall.ECHILD), PyExc_ChildProcessError) - add(int(syscall.ECONNABORTED), PyExc_ConnectionAbortedError) - add(int(syscall.ECONNREFUSED), PyExc_ConnectionRefusedError) - add(int(syscall.ECONNRESET), PyExc_ConnectionResetError) - add(int(syscall.EEXIST), PyExc_FileExistsError) - add(int(syscall.ENOENT), PyExc_FileNotFoundError) - add(int(syscall.EISDIR), PyExc_IsADirectoryError) - add(int(syscall.ENOTDIR), PyExc_NotADirectoryError) - add(int(syscall.EINTR), PyExc_InterruptedError) - add(int(syscall.EACCES), PyExc_PermissionError) - add(int(syscall.EPERM), PyExc_PermissionError) - add(int(syscall.ESRCH), PyExc_ProcessLookupError) - add(int(syscall.ETIMEDOUT), PyExc_TimeoutError) + add(errEAGAIN, PyExc_BlockingIOError) + add(errEALREADY, PyExc_BlockingIOError) + add(errEINPROGRESS, PyExc_BlockingIOError) + add(errEWOULDBLOCK, PyExc_BlockingIOError) + add(errEPIPE, PyExc_BrokenPipeError) + add(errESHUTDOWN, PyExc_BrokenPipeError) + add(errECHILD, PyExc_ChildProcessError) + add(errECONNABORTED, PyExc_ConnectionAbortedError) + add(errECONNREFUSED, PyExc_ConnectionRefusedError) + add(errECONNRESET, PyExc_ConnectionResetError) + add(errEEXIST, PyExc_FileExistsError) + add(errENOENT, PyExc_FileNotFoundError) + add(errEISDIR, PyExc_IsADirectoryError) + add(errENOTDIR, PyExc_NotADirectoryError) + add(errEINTR, PyExc_InterruptedError) + add(errEACCES, PyExc_PermissionError) + add(errEPERM, PyExc_PermissionError) + add(errESRCH, PyExc_ProcessLookupError) + add(errETIMEDOUT, PyExc_TimeoutError) } // ErrnoSubclass returns the OSError subclass that CPython would pick diff --git a/errors/exc_os_init.go b/errors/exc_os_init.go index 0d6108954..1cadb98f0 100644 --- a/errors/exc_os_init.go +++ b/errors/exc_os_init.go @@ -126,7 +126,7 @@ func osErrorInit(e *Exception, cls *objects.Type, args []objects.Object, myerrno if nargs >= 2 && nargs <= 5 { // filename, filename2 and winerror are stripped from args // for compatibility with old in-place unpacking code. - e.setArgs(objects.NewTuple([]objects.Object{args[0], args[1]})) + e.setArgsSteal(objects.NewTuple([]objects.Object{args[0], args[1]})) } } errnoStore: diff --git a/errors/exc_os_internal_test.go b/errors/exc_os_internal_test.go new file mode 100644 index 000000000..dd9763f12 --- /dev/null +++ b/errors/exc_os_internal_test.go @@ -0,0 +1,40 @@ +package errors + +import ( + "testing" + + "github.com/tamnd/gopy/objects" +) + +// TestErrnoSubclass drives ErrnoSubclass off the same platform errno +// codes errnomap is built from (errEEXIST and friends), so the mapping +// is exercised with the values that actually reach it at runtime: real +// POSIX numbers on Unix, ucrt numbers on Windows. +func TestErrnoSubclass(t *testing.T) { + cases := []struct { + errno int + want *objects.Type + }{ + {errENOENT, PyExc_FileNotFoundError}, + {errEEXIST, PyExc_FileExistsError}, + {errEACCES, PyExc_PermissionError}, + {errEPERM, PyExc_PermissionError}, + {errEINTR, PyExc_InterruptedError}, + {errEPIPE, PyExc_BrokenPipeError}, + {errECHILD, PyExc_ChildProcessError}, + {errEISDIR, PyExc_IsADirectoryError}, + {errENOTDIR, PyExc_NotADirectoryError}, + {errECONNREFUSED, PyExc_ConnectionRefusedError}, + {errECONNRESET, PyExc_ConnectionResetError}, + {errECONNABORTED, PyExc_ConnectionAbortedError}, + {errESRCH, PyExc_ProcessLookupError}, + {errETIMEDOUT, PyExc_TimeoutError}, + {0, PyExc_OSError}, + {99999, PyExc_OSError}, + } + for _, c := range cases { + if got := ErrnoSubclass(c.errno); got != c.want { + t.Errorf("ErrnoSubclass(%d) = %v, want %v", c.errno, got, c.want) + } + } +} diff --git a/errors/exc_os_test.go b/errors/exc_os_test.go index b2e79c9df..6fb6a9ba4 100644 --- a/errors/exc_os_test.go +++ b/errors/exc_os_test.go @@ -1,7 +1,6 @@ package errors_test import ( - "syscall" "testing" "github.com/tamnd/gopy/errors" @@ -39,32 +38,3 @@ func TestOSErrorHierarchy(t *testing.T) { t.Fatal("BrokenPipeError must inherit from ConnectionError") } } - -func TestErrnoSubclass(t *testing.T) { - cases := []struct { - errno int - want *objects.Type - }{ - {int(syscall.ENOENT), errors.PyExc_FileNotFoundError}, - {int(syscall.EEXIST), errors.PyExc_FileExistsError}, - {int(syscall.EACCES), errors.PyExc_PermissionError}, - {int(syscall.EPERM), errors.PyExc_PermissionError}, - {int(syscall.EINTR), errors.PyExc_InterruptedError}, - {int(syscall.EPIPE), errors.PyExc_BrokenPipeError}, - {int(syscall.ECHILD), errors.PyExc_ChildProcessError}, - {int(syscall.EISDIR), errors.PyExc_IsADirectoryError}, - {int(syscall.ENOTDIR), errors.PyExc_NotADirectoryError}, - {int(syscall.ECONNREFUSED), errors.PyExc_ConnectionRefusedError}, - {int(syscall.ECONNRESET), errors.PyExc_ConnectionResetError}, - {int(syscall.ECONNABORTED), errors.PyExc_ConnectionAbortedError}, - {int(syscall.ESRCH), errors.PyExc_ProcessLookupError}, - {int(syscall.ETIMEDOUT), errors.PyExc_TimeoutError}, - {0, errors.PyExc_OSError}, - {99999, errors.PyExc_OSError}, - } - for _, c := range cases { - if got := errors.ErrnoSubclass(c.errno); got != c.want { - t.Errorf("ErrnoSubclass(%d) = %v, want %v", c.errno, got, c.want) - } - } -} diff --git a/errors/exception.go b/errors/exception.go index 32d58b111..bf9615121 100644 --- a/errors/exception.go +++ b/errors/exception.go @@ -62,6 +62,16 @@ type Exception struct { // CPython: Objects/exceptions.c:867 PyBaseExceptionGroupObject EG *ExceptionGroupState + // SysExitCode stores SystemExit's separate `code` member per + // PySystemExitObject. SystemExit_init seeds it from the positional + // args (args[0] for one arg, the args tuple for several, None for + // none); assigning exc.code rewrites only this slot, leaving args + // untouched. Meaningful only when ExcType is SystemExit or a subclass. + // + // CPython: Objects/exceptions.c:854 PySystemExitObject + // CPython: Objects/exceptions.c:866 SystemExit_init + SysExitCode objects.Object + // NotesObj holds a __notes__ value that is not a plain list. CPython // stores __notes__ as an ordinary instance attribute that may hold any // object; add_note only requires a list when it appends. The common @@ -135,6 +145,23 @@ func (e *Exception) setArgs(t *objects.Tuple) { } } +// setArgsSteal stores a freshly built args tuple whose single counted +// reference is transferred to the exception, releasing the previous one. The +// inits all build their args with NewTuple (refcount 1, no other owner), so +// taking another reference the way setArgs does for a borrowed value would +// strand the tuple, and through it the positional arguments, at a phantom +1 +// after the exception is gone. +// +// CPython: Objects/exceptions.c:60 BaseException_init (self->args = Py_NewRef(args), +// balanced by the caller's DECREF of the freshly built args tuple) +func (e *Exception) setArgsSteal(t *objects.Tuple) { + old := e.Args + e.Args = t + if old != nil { + objects.Decref(old) + } +} + // New constructs an exception with the given type and args. Mirrors // BaseException_new + BaseException_init. // @@ -143,14 +170,15 @@ func New(t *objects.Type, args *objects.Tuple) *Exception { if args == nil { args = objects.NewTuple(nil) } - // BaseException owns its args tuple: excTraverse visits Args, so the - // cyclic collector accounts for an exception->args edge during trial - // deletion. The edge must correspond to a real counted reference or - // subtractRefs drives the tuple to zero and tuple_dealloc nils its - // items while the live exception still points at it (e.args goes - // empty). Take that reference here. + // BaseException owns a counted reference on its args tuple: excTraverse + // visits Args, and the raise/normalize machinery decrefs the args tuple + // once as it consumes the Go-level error, so the exception needs its own + // reference to keep args[0] alive in the handler. The fresh-tuple inits + // (baseExceptionInit and friends) use setArgsSteal to avoid stacking a + // second reference on a tuple no caller else owns; the type-call path + // allocates with empty args here and lets tp_init install the real tuple. // - // CPython: Objects/exceptions.c:60 BaseException_init (self->args = Py_NewRef(args)) + // CPython: Objects/exceptions.c:48 BaseException_new (self->args = Py_NewRef(args)) objects.Incref(args) e := &Exception{ExcType: t, Args: args} if t != nil { diff --git a/errors/exception_attrs.go b/errors/exception_attrs.go index d3d049495..a48a82939 100644 --- a/errors/exception_attrs.go +++ b/errors/exception_attrs.go @@ -207,7 +207,7 @@ func baseExceptionInit(args []objects.Object, _ map[string]objects.Object) (obje if !ok { return objects.None(), nil } - e.setArgs(objects.NewTuple(args[1:])) + e.setArgsSteal(objects.NewTuple(args[1:])) return objects.None(), nil } @@ -251,7 +251,7 @@ func argsSet(owner objects.Object, value objects.Object) error { } items = append(items, next) } - e.setArgs(objects.NewTuple(items)) + e.setArgsSteal(objects.NewTuple(items)) return nil } diff --git a/errors/systemexit.go b/errors/systemexit.go index 9915b7b0c..0448dc17b 100644 --- a/errors/systemexit.go +++ b/errors/systemexit.go @@ -8,6 +8,23 @@ import ( "github.com/tamnd/gopy/state" ) +// unhandledKeyboardInterrupt records that a KeyboardInterrupt reached +// the top-level print path. Modules/main.c reads the matching runtime +// flag after Py_RunMain and re-raises SIGINT so the process dies by +// signal (exit status -SIGINT) rather than a plain non-zero code. +// +// CPython: Python/pythonrun.c:625 unhandled_keyboard_interrupt store +var unhandledKeyboardInterrupt bool + +// UnhandledKeyboardInterrupt reports whether a KeyboardInterrupt was +// surfaced to the top-level handler since the last reset. The CLI +// entry point consults it to decide whether to exit via SIGINT. +// +// CPython: Modules/main.c:786 _PyRuntime.signals.unhandled_keyboard_interrupt +func UnhandledKeyboardInterrupt() bool { + return unhandledKeyboardInterrupt +} + // HandleSystemExit inspects the current exception. If it is a // SystemExit, the exit code is read off the args and the exception // is cleared; the caller propagates the code. KeyboardInterrupt is @@ -26,6 +43,7 @@ func HandleSystemExit(ts *state.Thread) (code int, handled bool) { return 0, false } if Match(exc, PyExc_KeyboardInterrupt) { + unhandledKeyboardInterrupt = true return 0, false } if !Match(exc, PyExc_SystemExit) { diff --git a/frame/frame.go b/frame/frame.go index 5900d6fdb..c2ef827c7 100644 --- a/frame/frame.go +++ b/frame/frame.go @@ -266,6 +266,17 @@ func (f *Frame) Init(co *objects.Code, globals, builtins objects.Object, fn obje f.Globals = globals f.Builtins = builtins f.Locals = nil + // The frame owns a counted reference on its function object for the + // duration of the call; _PyEvalFramePushAndInit transfers/holds + // f_funcobj and clear_thread_frame drops it. Without this the CALL + // that consumed the callable's only stack reference would let the + // function reach refcount zero (and run func_dealloc, clearing its + // closure) while its own frame is still executing. + // + // CPython: Python/ceval.c:1860 _PyEval_BuildFrame sets f_funcobj + if fn != nil { + objects.Incref(fn) + } f.Func = fn f.Previous = prev f.InstrPtr = 0 @@ -377,6 +388,12 @@ func (f *Frame) Clear() { f.Globals = nil f.Builtins = nil f.Locals = nil + // Release the frame's reference on f_funcobj acquired in Init. + // + // CPython: Python/frame.c clear_thread_frame Py_DECREF(frame->f_funcobj) + if f.Func != nil { + objects.Decref(f.Func) + } f.Func = nil f.Previous = nil } diff --git a/hamt/api.go b/hamt/api.go index 72368e811..7b01552ce 100644 --- a/hamt/api.go +++ b/hamt/api.go @@ -25,6 +25,7 @@ type Hamt struct { func New() *Hamt { h := &Hamt{root: emptyBitmap} h.Init(HamtType) + objects.Incref(emptyBitmap) // h owns +1 on its root (no-op: immortal) return h } @@ -43,9 +44,11 @@ func (h *Hamt) Assoc(key, val objects.Object) (*Hamt, error) { return nil, err } if newRoot == h.root { + objects.Decref(newRoot) // drop the Incref(self) from the unchanged path + objects.Incref(h) // _PyHamt_Assoc returns a new reference return h, nil } - out := &Hamt{root: newRoot, count: h.count} + out := &Hamt{root: newRoot, count: h.count} // adopt owned newRoot if addedLeaf { out.count++ } @@ -67,11 +70,12 @@ func (h *Hamt) Without(key objects.Object) (*Hamt, bool, error) { case wError: return nil, false, err case wNotFound: + objects.Incref(h) // _PyHamt_Without returns a new reference return h, false, nil case wEmpty: return New(), true, nil case wNewNode: - out := &Hamt{root: newRoot, count: h.count - 1} + out := &Hamt{root: newRoot, count: h.count - 1} // adopt owned newRoot out.Init(HamtType) return out, true, nil default: diff --git a/hamt/hamt.go b/hamt/hamt.go index aac1a70d5..1ce228c6b 100644 --- a/hamt/hamt.go +++ b/hamt/hamt.go @@ -5,6 +5,14 @@ // maximum non-collision depth is 7. A level-7 collision node lifts the // total maximum depth to 8 (MaxTreeDepth). // +// Nodes are refcounted objects.Object values: each node owns one +// reference on every key, value, and child it stores. assoc/without +// return an owned reference (the caller must Decref it); the unchanged +// path returns Incref(self). This mirrors CPython's tp_dealloc / +// Py_NewRef / Py_SETREF discipline 1:1 so that a value stored only in a +// ContextVar (via the HAMT) carries an honest refcount and does not get +// torn down while it is still reachable. +// // CPython: Python/hamt.c package hamt @@ -47,8 +55,12 @@ const ( ) // node is the internal interface for the three node shapes. The -// dispatch matches hamt_node_assoc / without / find in CPython. +// dispatch matches hamt_node_assoc / without / find in CPython. Every +// node embeds objects.Header so a node value is a full objects.Object: +// it can be stored in a bitmap's value slot, incref'd, and decref'd +// uniformly with the keys and values it sits beside. type node interface { + objects.Object assoc(shift uint32, hash int32, key, val objects.Object) (newNode node, addedLeaf bool, err error) without(shift uint32, hash int32, key objects.Object) (res withoutResult, newNode node, err error) find(shift uint32, hash int32, key objects.Object) (val objects.Object, found bool, err error) @@ -61,6 +73,7 @@ type node interface { // // CPython: Python/hamt.c:316 PyHamtNode_Bitmap type bitmapNode struct { + objects.Header bitmap uint32 array []objects.Object } @@ -71,6 +84,7 @@ type bitmapNode struct { // // CPython: Python/hamt.c:316 PyHamtNode_Array type arrayNode struct { + objects.Header count int children [arrayNodeSize]node } @@ -81,17 +95,69 @@ type arrayNode struct { // // CPython: Python/hamt.c:325 PyHamtNode_Collision type collisionNode struct { + objects.Header hash int32 array []objects.Object } +// Node type objects. They exist so a node satisfies objects.Object and +// so Decref can reach the per-shape Dealloc that releases the stored +// references. HAMT is private to the runtime, so the bare type name is +// all we register. +// +// CPython: Python/hamt.c:2843 _PyHamt_BitmapNode_Type / _PyHamt_ArrayNode_Type / _PyHamt_CollisionNode_Type +var ( + bitmapNodeType = objects.NewType("hamt_bitmap_node", []*objects.Type{objects.ObjectType()}) + arrayNodeType = objects.NewType("hamt_array_node", []*objects.Type{objects.ObjectType()}) + collisionNodeType = objects.NewType("hamt_collision_node", []*objects.Type{objects.ObjectType()}) +) + // emptyBitmap is the singleton empty bitmap node. CPython caches the -// same instance; we cache to keep New() allocation-free for empty -// HAMTs. +// same instance and statically allocates it (immortal); we stamp it +// immortal so the Incref/Decref it sees as the root of every empty +// Hamt and as the working node in assoc are all no-ops and it never +// deallocs. // // CPython: Python/hamt.c:498 _Py_SINGLETON(hamt_bitmap_node_empty) var emptyBitmap = &bitmapNode{} +func init() { + bitmapNodeType.Dealloc = bitmapNodeDealloc + bitmapNodeType.TpTraverse = bitmapNodeTraverse + arrayNodeType.Dealloc = arrayNodeDealloc + arrayNodeType.TpTraverse = arrayNodeTraverse + collisionNodeType.Dealloc = collisionNodeDealloc + collisionNodeType.TpTraverse = collisionNodeTraverse + + emptyBitmap.Init(bitmapNodeType) + emptyBitmap.MakeImmortal() +} + +// xIncref / xDecref mirror Py_XINCREF / Py_XDECREF: a nil operand is a +// no-op. A nil bitmap key slot is the common case (it marks a child +// node in the value slot), so the guard earns its keep. +func xIncref(o objects.Object) { + if o != nil { + objects.Incref(o) + } +} + +func xDecref(o objects.Object) { + if o != nil { + objects.Decref(o) + } +} + +// setref stores v into arr[i] and drops the reference the slot held +// before. Mirrors Py_SETREF / Py_XSETREF: the store happens before the +// decref so a self-referential value cannot be freed mid-swap. v is +// already an owned reference (or nil); the slot adopts it. +func setref(arr []objects.Object, i int, v objects.Object) { + old := arr[i] + arr[i] = v + xDecref(old) +} + // hamtHash reduces a Python hash to 32 bits via XOR-fold. CPython // pins this exact reducer so test fixtures can target specific tree // shapes; do not change the formula. @@ -135,14 +201,17 @@ func hamtBitindex(bitmap, bit uint32) uint32 { ///////////////////////////////// Bitmap node ///////////////////////// // newBitmap returns a bitmap node with `size` empty slots. size==0 -// reuses the empty singleton. +// reuses the immortal empty singleton (CPython returns Py_NewRef of +// the statically allocated singleton; here the incref is a no-op). // // CPython: Python/hamt.c:489 hamt_node_bitmap_new func newBitmap(size int) *bitmapNode { if size == 0 { return emptyBitmap } - return &bitmapNode{array: make([]objects.Object, size)} + b := &bitmapNode{array: make([]objects.Object, size)} + b.Init(bitmapNodeType) + return b } // count returns the number of (k, v) pairs in the bitmap node. CPython @@ -153,12 +222,16 @@ func (b *bitmapNode) count() int { return len(b.array) / 2 } -// clone copies the bitmap and the slot array. +// clone copies the bitmap and the slot array, taking a reference on +// every copied entry. // // CPython: Python/hamt.c:533 hamt_node_bitmap_clone func (b *bitmapNode) clone() *bitmapNode { c := newBitmap(len(b.array)) - copy(c.array, b.array) + for i := range b.array { + c.array[i] = b.array[i] + xIncref(c.array[i]) + } c.bitmap = b.bitmap return c } @@ -174,9 +247,11 @@ func (b *bitmapNode) cloneWithout(bit uint32) *bitmapNode { valIdx := keyIdx + 1 for i := uint32(0); i < keyIdx; i++ { c.array[i] = b.array[i] + xIncref(c.array[i]) } for i := valIdx + 1; i < uint32(len(b.array)); i++ { c.array[i-2] = b.array[i] + xIncref(b.array[i]) } c.bitmap = b.bitmap & ^bit return c @@ -184,7 +259,8 @@ func (b *bitmapNode) cloneWithout(bit uint32) *bitmapNode { // newBitmapOrCollision returns a node holding two key/value pairs // that collided in the parent bitmap node. It promotes to a collision -// node only when the full 32-bit hashes are identical. +// node only when the full 32-bit hashes are identical. The returned +// node is an owned reference. // // CPython: Python/hamt.c:584 hamt_node_new_bitmap_or_collision func newBitmapOrCollision(shift uint32, key1 objects.Object, val1 objects.Object, key2Hash int32, key2, val2 objects.Object) (node, error) { @@ -193,17 +269,25 @@ func newBitmapOrCollision(shift uint32, key1 objects.Object, val1 objects.Object return nil, err } if key1Hash == key2Hash { - return &collisionNode{ - hash: key1Hash, - array: []objects.Object{key1, val1, key2, val2}, - }, nil + n := newCollision(key1Hash, 4) + n.array[0] = key1 + objects.Incref(key1) + n.array[1] = val1 + objects.Incref(val1) + n.array[2] = key2 + objects.Incref(key2) + n.array[3] = val2 + objects.Incref(val2) + return n, nil } n := newBitmap(0) n2, _, err := n.assoc(shift, key1Hash, key1, val1) + objects.Decref(n) if err != nil { return nil, err } n3, _, err := n2.assoc(shift, key2Hash, key2, val2) + objects.Decref(n2) if err != nil { return nil, err } @@ -212,7 +296,8 @@ func newBitmapOrCollision(shift uint32, key1 objects.Object, val1 objects.Object // assoc on a bitmap node. The four code paths (sub-node descent, // equal-key replace, collision promotion, and the new-key insert / -// promote-to-array branch) line up with CPython 1:1. +// promote-to-array branch) line up with CPython 1:1. The returned +// node is an owned reference. // // CPython: Python/hamt.c:642 hamt_node_bitmap_assoc func (b *bitmapNode) assoc(shift uint32, hash int32, key, val objects.Object) (node, bool, error) { @@ -227,14 +312,16 @@ func (b *bitmapNode) assoc(shift uint32, hash int32, key, val objects.Object) (n n := bits.OnesCount32(b.bitmap) if n >= bitmapPromoteThreshold { jdx := hamtMask(hash, shift) - newArr := &arrayNode{count: n + 1} + newArr := newArray(n + 1) empty := newBitmap(0) child, _, err := empty.assoc(shift+5, hash, key, val) if err != nil { + objects.Decref(empty) + objects.Decref(newArr) return nil, false, err } - newArr.children[jdx] = child + newArr.children[jdx] = child // borrow: adopt the owned ref // Re-distribute existing entries. j := 0 @@ -243,20 +330,27 @@ func (b *bitmapNode) assoc(shift uint32, hash int32, key, val objects.Object) (n continue } if b.array[j] == nil { - newArr.children[i] = b.array[j+1].(node) + cn := b.array[j+1].(node) + objects.Incref(cn) + newArr.children[i] = cn } else { rehash, err := hamtHash(b.array[j]) if err != nil { + objects.Decref(empty) + objects.Decref(newArr) return nil, false, err } child, _, err := empty.assoc(shift+5, rehash, b.array[j], b.array[j+1]) if err != nil { + objects.Decref(empty) + objects.Decref(newArr) return nil, false, err } - newArr.children[i] = child + newArr.children[i] = child // borrow } j += 2 } + objects.Decref(empty) return newArr, true, nil } @@ -266,11 +360,15 @@ func (b *bitmapNode) assoc(shift uint32, hash int32, key, val objects.Object) (n out := newBitmap(2 * (n + 1)) for i := uint32(0); i < keyIdx; i++ { out.array[i] = b.array[i] + xIncref(b.array[i]) } out.array[keyIdx] = key + objects.Incref(key) out.array[valIdx] = val + objects.Incref(val) for i := keyIdx; i < uint32(len(b.array)); i++ { out.array[i+2] = b.array[i] + xIncref(b.array[i]) } out.bitmap = b.bitmap | bit return out, true, nil @@ -294,10 +392,12 @@ func (b *bitmapNode) assocFilled(shift uint32, hash int32, idx uint32, key, val return nil, false, err } if subNode == oldSub { + objects.Decref(subNode) + objects.Incref(b) return b, addedLeaf, nil } ret := b.clone() - ret.array[valIdx] = subNode.(objects.Object) + setref(ret.array, int(valIdx), subNode) // adopt owned subNode return ret, addedLeaf, nil } @@ -307,10 +407,12 @@ func (b *bitmapNode) assocFilled(shift uint32, hash int32, idx uint32, key, val } if eq { if val == valOrNode { + objects.Incref(b) return b, false, nil } ret := b.clone() - ret.array[valIdx] = val + objects.Incref(val) + setref(ret.array, int(valIdx), val) return ret, false, nil } @@ -319,12 +421,13 @@ func (b *bitmapNode) assocFilled(shift uint32, hash int32, idx uint32, key, val return nil, false, err } ret := b.clone() - ret.array[keyIdx] = nil - ret.array[valIdx] = subNode.(objects.Object) + setref(ret.array, int(keyIdx), nil) // drop the old key + setref(ret.array, int(valIdx), subNode) // adopt owned subNode return ret, true, nil } -// without on a bitmap node. +// without on a bitmap node. On wNewNode the returned node is an owned +// reference. // // CPython: Python/hamt.c:902 hamt_node_bitmap_without func (b *bitmapNode) without(shift uint32, hash int32, key objects.Object) (withoutResult, node, error) { @@ -350,13 +453,16 @@ func (b *bitmapNode) without(shift uint32, hash int32, key objects.Object) (with if sb.count() == 1 && sb.array[0] != nil { // Inline a single-entry bitmap into the parent. clone := b.clone() - clone.array[keyIdx] = sb.array[0] - clone.array[valIdx] = sb.array[1] + objects.Incref(sb.array[0]) + setref(clone.array, int(keyIdx), sb.array[0]) + objects.Incref(sb.array[1]) + setref(clone.array, int(valIdx), sb.array[1]) + objects.Decref(sub) return wNewNode, clone, nil } } clone := b.clone() - clone.array[valIdx] = sub.(objects.Object) + setref(clone.array, int(valIdx), sub) // adopt owned sub return wNewNode, clone, nil case wError, wNotFound: return res, nil, err @@ -378,7 +484,8 @@ func (b *bitmapNode) without(shift uint32, hash int32, key objects.Object) (with return wNewNode, b.cloneWithout(bit), nil } -// find on a bitmap node. +// find on a bitmap node. The returned value is borrowed (CPython +// returns a borrowed reference from hamt_node_bitmap_find). // // CPython: Python/hamt.c:1040 hamt_node_bitmap_find func (b *bitmapNode) find(shift uint32, hash int32, key objects.Object) (objects.Object, bool, error) { @@ -404,8 +511,49 @@ func (b *bitmapNode) find(shift uint32, hash int32, key objects.Object) (objects return nil, false, nil } +// bitmapNodeDealloc releases the reference the node holds on every +// slot. The empty singleton is immortal and never reaches here. +// +// CPython: Python/hamt.c:1102 hamt_node_bitmap_dealloc +func bitmapNodeDealloc(o objects.Object) { + b := o.(*bitmapNode) + if b == emptyBitmap { + return + } + for i := len(b.array) - 1; i >= 0; i-- { + xDecref(b.array[i]) + b.array[i] = nil + } +} + +// bitmapNodeTraverse visits every slot for the cyclic collector. +// +// CPython: Python/hamt.c:1085 hamt_node_bitmap_traverse +func bitmapNodeTraverse(o objects.Object, visit objects.Visitor) error { + b := o.(*bitmapNode) + for i := len(b.array) - 1; i >= 0; i-- { + if b.array[i] == nil { + continue + } + if err := visit(b.array[i]); err != nil { + return err + } + } + return nil +} + ///////////////////////////////// Collision node ////////////////////// +// newCollision allocates a collision node with `size` slots and the +// given shared hash. +// +// CPython: Python/hamt.c:1192 hamt_node_collision_new +func newCollision(hash int32, size int) *collisionNode { + c := &collisionNode{hash: hash, array: make([]objects.Object, size)} + c.Init(collisionNodeType) + return c +} + // findIndex linearly scans for `key`. Returns the index of the key or // -1 if absent. // @@ -423,7 +571,7 @@ func (c *collisionNode) findIndex(key objects.Object) (int, error) { return -1, nil } -// assoc on a collision node. +// assoc on a collision node. The returned node is an owned reference. // // CPython: Python/hamt.c:1268 hamt_node_collision_assoc func (c *collisionNode) assoc(shift uint32, hash int32, key, val objects.Object) (node, bool, error) { @@ -434,19 +582,29 @@ func (c *collisionNode) assoc(shift uint32, hash int32, key, val objects.Object) } if idx < 0 { // Append the new pair. - out := &collisionNode{hash: c.hash, array: make([]objects.Object, len(c.array)+2)} - copy(out.array, c.array) + out := newCollision(c.hash, len(c.array)+2) + for i := range c.array { + out.array[i] = c.array[i] + objects.Incref(c.array[i]) + } out.array[len(c.array)] = key + objects.Incref(key) out.array[len(c.array)+1] = val + objects.Incref(val) return out, true, nil } // Replace value. if c.array[idx+1] == val { + objects.Incref(c) return c, false, nil } - out := &collisionNode{hash: c.hash, array: make([]objects.Object, len(c.array))} - copy(out.array, c.array) - out.array[idx+1] = val + out := newCollision(c.hash, len(c.array)) + for i := range c.array { + out.array[i] = c.array[i] + objects.Incref(c.array[i]) + } + objects.Incref(val) + setref(out.array, idx+1, val) return out, false, nil } // Different 32-bit hash: lift into a bitmap node containing the @@ -454,10 +612,14 @@ func (c *collisionNode) assoc(shift uint32, hash int32, key, val objects.Object) wrap := newBitmap(2) wrap.bitmap = hamtBitpos(c.hash, shift) wrap.array[1] = c - return wrap.assoc(shift, hash, key, val) + objects.Incref(c) + res, addedLeaf, err := wrap.assoc(shift, hash, key, val) + objects.Decref(wrap) + return res, addedLeaf, err } -// without on a collision node. +// without on a collision node. On wNewNode the returned node is an +// owned reference. // // CPython: Python/hamt.c:1378 hamt_node_collision_without func (c *collisionNode) without(shift uint32, hash int32, key objects.Object) (withoutResult, node, error) { @@ -480,25 +642,31 @@ func (c *collisionNode) without(shift uint32, hash int32, key objects.Object) (w out := newBitmap(2) if idx == 0 { out.array[0] = c.array[2] + objects.Incref(c.array[2]) out.array[1] = c.array[3] + objects.Incref(c.array[3]) } else { out.array[0] = c.array[0] + objects.Incref(c.array[0]) out.array[1] = c.array[1] + objects.Incref(c.array[1]) } out.bitmap = hamtBitpos(hash, shift) return wNewNode, out, nil } - out := &collisionNode{hash: c.hash, array: make([]objects.Object, len(c.array)-2)} + out := newCollision(c.hash, len(c.array)-2) for i := 0; i < idx; i++ { out.array[i] = c.array[i] + objects.Incref(c.array[i]) } for i := idx + 2; i < len(c.array); i++ { out.array[i-2] = c.array[i] + objects.Incref(c.array[i]) } return wNewNode, out, nil } -// find on a collision node. +// find on a collision node. The returned value is borrowed. // // CPython: Python/hamt.c:1466 hamt_node_collision_find func (c *collisionNode) find(shift uint32, hash int32, key objects.Object) (objects.Object, bool, error) { @@ -512,19 +680,59 @@ func (c *collisionNode) find(shift uint32, hash int32, key objects.Object) (obje return c.array[idx+1], true, nil } +// collisionNodeDealloc releases every stored reference. +// +// CPython: Python/hamt.c:1503 hamt_node_collision_dealloc +func collisionNodeDealloc(o objects.Object) { + c := o.(*collisionNode) + for i := len(c.array) - 1; i >= 0; i-- { + xDecref(c.array[i]) + c.array[i] = nil + } +} + +// collisionNodeTraverse visits every stored value. +// +// CPython: Python/hamt.c:1489 hamt_node_collision_traverse +func collisionNodeTraverse(o objects.Object, visit objects.Visitor) error { + c := o.(*collisionNode) + for i := len(c.array) - 1; i >= 0; i-- { + if c.array[i] == nil { + continue + } + if err := visit(c.array[i]); err != nil { + return err + } + } + return nil +} + ///////////////////////////////// Array node ////////////////////////// -// clone deep-copies the children slice. Cheap because every entry is -// itself a pointer. +// newArray allocates an array node with the given non-nil child count. +// +// CPython: Python/hamt.c:1557 hamt_node_array_new +func newArray(count int) *arrayNode { + a := &arrayNode{count: count} + a.Init(arrayNodeType) + return a +} + +// clone copies the children, taking a reference on each non-nil child. // // CPython: Python/hamt.c:1581 hamt_node_array_clone func (a *arrayNode) clone() *arrayNode { - out := &arrayNode{count: a.count} - out.children = a.children + out := newArray(a.count) + for i := 0; i < arrayNodeSize; i++ { + out.children[i] = a.children[i] + if a.children[i] != nil { + objects.Incref(a.children[i]) + } + } return out } -// assoc on an array node. +// assoc on an array node. The returned node is an owned reference. // // CPython: Python/hamt.c:1604 hamt_node_array_assoc func (a *arrayNode) assoc(shift uint32, hash int32, key, val objects.Object) (node, bool, error) { @@ -532,28 +740,41 @@ func (a *arrayNode) assoc(shift uint32, hash int32, key, val objects.Object) (no child := a.children[idx] if child == nil { empty := newBitmap(0) - newChild, _, err := empty.assoc(shift+5, hash, key, val) + newChild, addedLeaf, err := empty.assoc(shift+5, hash, key, val) + objects.Decref(empty) if err != nil { return nil, false, err } - out := &arrayNode{count: a.count + 1} - out.children = a.children - out.children[idx] = newChild - return out, true, nil + out := newArray(a.count + 1) + for i := 0; i < arrayNodeSize; i++ { + out.children[i] = a.children[i] + if a.children[i] != nil { + objects.Incref(a.children[i]) + } + } + out.children[idx] = newChild // borrow: slot was nil, adopt owned ref + return out, addedLeaf, nil } newChild, addedLeaf, err := child.assoc(shift+5, hash, key, val) if err != nil { return nil, false, err } if newChild == child { + objects.Decref(newChild) + objects.Incref(a) return a, addedLeaf, nil } out := a.clone() - out.children[idx] = newChild + old := out.children[idx] + out.children[idx] = newChild // adopt owned ref + if old != nil { + objects.Decref(old) + } return out, addedLeaf, nil } -// without on an array node. +// without on an array node. On wNewNode the returned node is an owned +// reference. // // CPython: Python/hamt.c:1687 hamt_node_array_without func (a *arrayNode) without(shift uint32, hash int32, key objects.Object) (withoutResult, node, error) { @@ -568,7 +789,11 @@ func (a *arrayNode) without(shift uint32, hash int32, key objects.Object) (witho return res, nil, err case wNewNode: clone := a.clone() - clone.children[idx] = sub + old := clone.children[idx] + clone.children[idx] = sub // adopt owned sub + if old != nil { + objects.Decref(old) + } return wNewNode, clone, nil case wEmpty: newCount := a.count - 1 @@ -578,7 +803,10 @@ func (a *arrayNode) without(shift uint32, hash int32, key objects.Object) (witho if newCount >= bitmapPromoteThreshold { out := a.clone() out.count = newCount - out.children[idx] = nil + if out.children[idx] != nil { + objects.Decref(out.children[idx]) + out.children[idx] = nil + } return wNewNode, out, nil } // Demote to a bitmap node. @@ -597,10 +825,13 @@ func (a *arrayNode) without(shift uint32, hash int32, key objects.Object) (witho bitmap |= 1 << i if bn, ok := n.(*bitmapNode); ok && bn.count() == 1 && bn.array[0] != nil { out.array[newI] = bn.array[0] + objects.Incref(bn.array[0]) out.array[newI+1] = bn.array[1] + objects.Incref(bn.array[1]) } else { out.array[newI] = nil - out.array[newI+1] = n.(objects.Object) + out.array[newI+1] = n + objects.Incref(n) } newI += 2 } @@ -611,7 +842,7 @@ func (a *arrayNode) without(shift uint32, hash int32, key objects.Object) (witho } } -// find on an array node. +// find on an array node. The returned value is borrowed. // // CPython: Python/hamt.c:1841 hamt_node_array_find func (a *arrayNode) find(shift uint32, hash int32, key objects.Object) (objects.Object, bool, error) { @@ -623,14 +854,31 @@ func (a *arrayNode) find(shift uint32, hash int32, key objects.Object) (objects. return child.find(shift+5, hash, key) } -// arrayNode and collisionNode satisfy objects.Object so bitmap slots -// can carry them in the value position. The Hdr/Type slots are not -// needed by the runtime (HAMT is private to the runtime), but we -// implement them so the value can flow through any code that expects -// an Object. -func (a *arrayNode) Type() *objects.Type { return nil } -func (a *arrayNode) Hdr() *objects.Header { return nil } -func (b *bitmapNode) Type() *objects.Type { return nil } -func (b *bitmapNode) Hdr() *objects.Header { return nil } -func (c *collisionNode) Type() *objects.Type { return nil } -func (c *collisionNode) Hdr() *objects.Header { return nil } +// arrayNodeDealloc releases the reference held on every non-nil child. +// +// CPython: Python/hamt.c:1872 hamt_node_array_dealloc +func arrayNodeDealloc(o objects.Object) { + a := o.(*arrayNode) + for i := 0; i < arrayNodeSize; i++ { + if a.children[i] != nil { + objects.Decref(a.children[i]) + a.children[i] = nil + } + } +} + +// arrayNodeTraverse visits every non-nil child. +// +// CPython: Python/hamt.c:1857 hamt_node_array_traverse +func arrayNodeTraverse(o objects.Object, visit objects.Visitor) error { + a := o.(*arrayNode) + for i := 0; i < arrayNodeSize; i++ { + if a.children[i] == nil { + continue + } + if err := visit(a.children[i]); err != nil { + return err + } + } + return nil +} diff --git a/hamt/types.go b/hamt/types.go index c0b537bfa..4b27db1ff 100644 --- a/hamt/types.go +++ b/hamt/types.go @@ -15,6 +15,33 @@ import "github.com/tamnd/gopy/objects" // CPython: Python/hamt.c:2814 _PyHamt_Type var HamtType = objects.NewType("hamt", []*objects.Type{objects.ObjectType()}) +func init() { + HamtType.Dealloc = hamtDealloc + HamtType.TpTraverse = hamtTraverse +} + +// hamtDealloc releases the reference the Hamt holds on its root node. +// +// CPython: Python/hamt.c:2420 hamt_tp_clear / hamt_dealloc +func hamtDealloc(o objects.Object) { + h := o.(*Hamt) + if h.root != nil { + objects.Decref(h.root) + h.root = nil + } +} + +// hamtTraverse visits the root node for the cyclic collector. +// +// CPython: Python/hamt.c:2410 hamt_tp_traverse +func hamtTraverse(o objects.Object, visit objects.Visitor) error { + h := o.(*Hamt) + if h.root != nil { + return visit(h.root) + } + return nil +} + // HamtKeysType, HamtValuesType, HamtItemsType wrap the three iterator // shapes CPython exposes through _PyHamt_NewIterKeys etc. They are // runtime-private so we register only the bare type name. diff --git a/imp/extension.go b/imp/extension.go new file mode 100644 index 000000000..c4fd20839 --- /dev/null +++ b/imp/extension.go @@ -0,0 +1,1058 @@ +package imp + +import ( + "fmt" + "os" + "path/filepath" + "runtime" + "sort" + "sync" + + pyerrors "github.com/tamnd/gopy/errors" + "github.com/tamnd/gopy/objects" +) + +// This file ports the slice of CPython's extension-module import machinery +// the standard-library test suite drives through _testmultiphase / +// _testsinglephase and the SubinterpImportTests: the PEP 489 "multiple +// interpreters" / per-interpreter-GIL compatibility check and the +// subinterpreter interpreter-state that check consults. +// +// gopy cannot dlopen a compiled C extension, so the extensions are ported +// as Go builtins and registered here keyed by module name, each carrying +// the PEP 489 slot metadata its PyModuleDef declares. _imp.create_dynamic +// dispatches to this registry, applying CheckExtSubinterpCompat exactly the +// way Objects/moduleobject.c:359 PyModule_FromDefAndSpec2 and +// Python/import.c:1555 _PyImport_CheckSubinterpIncompatibleExtensionAllowed +// do before the module body runs. + +// Multiple-interpreters support levels, the Py_mod_multiple_interpreters +// slot values an extension's PyModuleDef may carry. +// +// CPython: Include/moduleobject.h:90 Py_MOD_MULTIPLE_INTERPRETERS_* +const ( + // MultiInterpNotSupported is Py_MOD_MULTIPLE_INTERPRETERS_NOT_SUPPORTED. + MultiInterpNotSupported = iota + // MultiInterpSupported is Py_MOD_MULTIPLE_INTERPRETERS_SUPPORTED, the + // default when a multi-phase module declares no slot. + MultiInterpSupported + // MultiInterpPerInterpreterGIL is Py_MOD_PER_INTERPRETER_GIL_SUPPORTED. + MultiInterpPerInterpreterGIL +) + +// PEP 489 slot IDs, mirroring the Py_mod_* values an extension's +// PyModuleDef_Slot table carries. ExtSlotLast is _Py_mod_LAST_SLOT, the +// largest valid ID; a table entry outside [1, ExtSlotLast] is the +// "unknown slot ID" error PyModule_FromDefAndSpec2 reports. +// +// CPython: Include/moduleobject.h:74 Py_mod_create / Py_mod_exec / ... +const ( + ExtSlotCreate = 1 // Py_mod_create + ExtSlotExec = 2 // Py_mod_exec + ExtSlotMultipleInterpreters = 3 // Py_mod_multiple_interpreters + ExtSlotGIL = 4 // Py_mod_gil + ExtSlotLast = 4 // _Py_mod_LAST_SLOT +) + +// PyInit result kinds the export_* test variants reproduce, the +// classification _PyImport_RunModInitFunc assigns to a PyInit_* return +// before PyModule_FromDefAndSpec2 ever runs. ExtInitNormal is the only +// kind a real multi-phase module produces. +// +// CPython: Python/importdl.c:416 _PyImport_RunModInitFunc +const ( + // ExtInitNormal: PyInit returned an initialized PyModuleDef. Proceed + // to the create/exec protocol (export_unreported_exception also lands + // here, with InitRaised set, so the ERR_UNREPORTED_EXC branch fires). + ExtInitNormal = iota + // ExtInitReturnedNil: PyInit returned NULL (export_null / export_raise). + ExtInitReturnedNil + // ExtInitUninitialized: PyInit returned a PyModuleDef that was never + // passed through PyModuleDef_Init (export_uninitialized). + ExtInitUninitialized +) + +// ExtMethod is one PyMethodDef the module def carries in m_methods: a +// module-level function added to the module (or non-module create result) +// during the create phase by _add_methods_to_object. +// +// CPython: Objects/moduleobject.c:_add_methods_to_object +type ExtMethod struct { + Name string + Fn func([]objects.Object, map[string]objects.Object) (objects.Object, error) +} + +// ExtSlot is one PyModuleDef_Slot. ID is the raw slot identifier (so the +// bad_slot_large / bad_slot_negative variants can carry out-of-range IDs). +// Create / Exec are set for Py_mod_create / Py_mod_exec; Value carries the +// Py_mod_multiple_interpreters / Py_mod_gil integer. +// +// Create returns (object, raised): a nil object models a create function +// that returned NULL, and a non-nil raised models PyErr_Occurred() after +// the call (the create_* error variants). Exec returns (ret, raised) the +// way a C execfunc returns an int and may leave an exception set: ret != 0 +// is failure, raised is the exception it set (if any). +// +// CPython: Include/moduleobject.h:74 PyModuleDef_Slot +type ExtSlot struct { + ID int + Create func() (objects.Object, *pyerrors.Exception) + Exec func(m objects.Object) (int, *pyerrors.Exception) + Value int +} + +// ExtModuleDef is gopy's analog of a C extension's PyModuleDef plus the +// PEP 489 slot table the loader reads. Init builds the fully populated +// module body (gopy has no separate create / exec phase for builtins, so +// the create_dynamic step runs Init and exec_dynamic is a no-op). +// +// CPython: Include/moduleobject.h:74 PyModuleDef_Slot +type ExtModuleDef struct { + Name string + // SinglePhase marks a legacy single-phase-init module. Such modules + // never support loading under multiple interpreters, so the compat + // check rejects them in any non-main interpreter that enforces it. + SinglePhase bool + // HasMultiInterpSlot records whether the def declared a + // Py_mod_multiple_interpreters slot. When false a multi-phase module + // defaults to MultiInterpSupported. + HasMultiInterpSlot bool + // MultiInterp is the Py_mod_multiple_interpreters slot value. + MultiInterp int + // MSize is the PyModuleDef.m_size of a single-phase module: -1 for a + // "basic" module with no per-module state that does not support repeated + // initialization (its __dict__ is cached in m_copy and copied on reload), + // 0 for a "reinit" module, and >0 for a module that carries its own state. + // Only -1 modules are reloaded from the cached dict; the others re-run + // their init function on every load. + // + // CPython: Python/import.c:920 single-phase init module kinds + MSize int + // DefName is the def's m_name, the module's __name__. It defaults to Name + // but differs for an "indirect" variant whose init function builds a + // module under another def's name (PyInit__testsinglephase_basic_wrapper). + DefName string + // ShareDefWith names a registered module whose def (and thus its + // modules_by_index slot and cached m_copy) this entry reuses, the gopy + // analog of one init function calling another's. + // + // CPython: Python/import.c:960 "two or more modules share a PyModuleDef" + ShareDefWith string + // CheckCacheFirst marks the *_check_cache_first variants, whose init + // returns PyState_FindModule(def) before creating a fresh module and which + // are never recorded in the extensions cache. + // + // CPython: Modules/_testsinglephase.c:690 _check_cache_first modules + CheckCacheFirst bool + // Init builds the module. A non-nil error models a PyInit function + // that raised before returning its def. Used by the legacy single-phase + // path and by the simple multi-phase mains that have no slot table. + Init func() (*objects.Module, error) + + // MultiPhase marks a PEP 489 multi-phase def driven through the + // create/exec slot protocol (PyModule_FromDefAndSpec2 + + // PyModule_ExecDef) rather than a single Init. When set, the fields + // below describe the def the loader reads. + MultiPhase bool + // Doc is m_doc, set on the module by PyModule_SetDocString after create. + Doc string + // Methods is m_methods, added to the module by _add_methods_to_object. + Methods []ExtMethod + // Slots is the m_slots table, scanned in declaration order. + Slots []ExtSlot + // Variant marks a def reachable only by an explicit ExtensionFileLoader + // (the test loads it by name against the main extension's origin); it is + // not materialized as a discoverable stub on the path. The PyInit_x and + // pkg.* / non-ASCII test variants live in the one _testmultiphase + // extension, so they have no file of their own. + Variant bool + // InitKind classifies the PyInit_* return for the export_* edge cases. + // ExtInitNormal for a real multi-phase def. + InitKind int + // InitRaised, when non-nil, is the exception PyInit_* set before + // returning. With InitKind ExtInitReturnedNil it is the EXCEPTION re-raised + // as-is (export_raise); with ExtInitNormal it is the ERR_UNREPORTED_EXC the + // loader chains a SystemError onto (export_unreported_exception). + InitRaised func() *pyerrors.Exception +} + +var ( + extMu sync.Mutex + extRegistry = map[string]*ExtModuleDef{} +) + +// RegisterExtModule records an extension module by name. Test-extension +// packages call it from their package init, the gopy stand-in for the +// inittab entry a compiled extension would expose. +func RegisterExtModule(def *ExtModuleDef) { + extMu.Lock() + extRegistry[def.Name] = def + extMu.Unlock() +} + +// FindExtModule returns the registered extension def for name, or nil. +func FindExtModule(name string) *ExtModuleDef { + extMu.Lock() + def := extRegistry[name] + extMu.Unlock() + return def +} + +// ExtModuleNames returns the registered extension-module names, sorted. +func ExtModuleNames() []string { + extMu.Lock() + names := make([]string, 0, len(extRegistry)) + for n := range extRegistry { + names = append(names, n) + } + extMu.Unlock() + sort.Strings(names) + return names +} + +// interpState models the slice of PyInterpreterState the extension compat +// check reads: whether this is the main interpreter, whether it runs with +// its own GIL, and the check_multi_interp_extensions config flag (plus the +// _imp._override_multi_interp_extensions_check override). +// +// CPython: Include/internal/pycore_interp.h PyInterpreterState (ceval.own_gil, +// feature flags) +type interpState struct { + isMain bool + ownGil bool + checkMulti bool + // override is the _imp._override_multi_interp_extensions_check value: + // <0 force-disable, 0 use config, >0 force-enable. + override int + // id is the interpreter id; the main interpreter is 0. It tags the + // extensions-cache entries a single-phase module records so a reload only + // reuses a dict the same interpreter owns. + id int64 + // modByIndex is the interpreter's modules_by_index cache: m_index -> + // module, the table PyState_FindModule / look_up_self consults. + // + // CPython: Include/internal/pycore_interp.h modules_by_index + modByIndex map[int]*objects.Module + // hiddenExt holds the registered extension-module sys.modules entries + // this subinterpreter shadowed on entry. CPython gives every interpreter + // its own sys.modules, so a subinterpreter re-imports an extension through + // import_find_extension (firing the compat gate) even when the main + // interpreter already cached it. gopy shares one sys.modules dict, so a + // push removes those entries (forcing the re-import) and the matching pop + // restores them. nil on the main interpreter. + // + // CPython: Include/internal/pycore_interp.h imports.modules + hiddenExt map[string]objects.Object +} + +var ( + interpMu sync.Mutex + interpStack = []*interpState{{isMain: true, id: 0, modByIndex: map[int]*objects.Module{}}} + nextInterpID int64 +) + +// currentInterp returns the interpreter state on top of the stack. gopy +// runs subinterpreter scripts synchronously on the calling goroutine, so a +// single push/pop stack tracks the active interpreter for the duration of a +// run_in_subinterp_with_config / _interpreters.run_string call. +func currentInterp() *interpState { + interpMu.Lock() + defer interpMu.Unlock() + return interpStack[len(interpStack)-1] +} + +// PushSubinterp pushes a fresh non-main interpreter state for the duration +// of a subinterpreter run. ownGil reflects the config gil ('own' -> true, +// 'shared'/'default' -> false); checkMulti is config.check_multi_interp_extensions. +// +// CPython: Python/pylifecycle.c:586 init_interp_create_gil (own_gil) and +// Python/interpconfig.c:262 check_multi_interp_extensions feature flag. +func PushSubinterp(ownGil, checkMulti bool) { + s := &interpState{ + ownGil: ownGil, + checkMulti: checkMulti, + modByIndex: map[int]*objects.Module{}, + hiddenExt: hideExtModules(), + } + interpMu.Lock() + nextInterpID++ + s.id = nextInterpID + interpStack = append(interpStack, s) + interpMu.Unlock() +} + +// PopSubinterp pops the interpreter state pushed by PushSubinterp. The main +// interpreter at the bottom of the stack is never popped. +func PopSubinterp() { + interpMu.Lock() + var popped *interpState + if len(interpStack) > 1 { + popped = interpStack[len(interpStack)-1] + interpStack = interpStack[:len(interpStack)-1] + } + interpMu.Unlock() + if popped != nil { + restoreExtModules(popped.hiddenExt) + } +} + +// hideExtModules removes every registered extension module's sys.modules +// entry, returning the removed entries so PopSubinterp can restore them. A +// fresh subinterpreter has an empty sys.modules, so its first `import name` +// of an extension misses and re-runs the import (firing the PEP 489 compat +// gate through import_find_extension) instead of returning the main +// interpreter's cached module. gopy shares the one sys.modules dict, so the +// removal models the per-interpreter cache for the duration of the run. +// +// CPython: Python/import.c:1964 import_find_extension +func hideExtModules() map[string]objects.Object { + hidden := map[string]objects.Object{} + for _, name := range ExtModuleNames() { + if v, ok := GetModuleRaw(name); ok { + hidden[name] = v + RemoveModule(name) + } + } + return hidden +} + +// restoreExtModules undoes hideExtModules when a subinterpreter run ends: it +// drops any extension entry the subinterpreter left behind and reinstates the +// main interpreter's originals, so the shared sys.modules looks untouched. +func restoreExtModules(hidden map[string]objects.Object) { + for _, name := range ExtModuleNames() { + RemoveModule(name) + } + for name, v := range hidden { + sysModulesMu.Lock() + _ = sysModules.SetItem(objects.NewStr(name), v) + sysModulesMu.Unlock() + } +} + +// SetMultiInterpOverride sets the current interpreter's +// check_multi_interp_extensions override and returns the previous value. +// +// CPython: Python/import.c:5052 _imp__override_multi_interp_extensions_check_impl +func SetMultiInterpOverride(override int) int { + interpMu.Lock() + defer interpMu.Unlock() + s := interpStack[len(interpStack)-1] + old := s.override + s.override = override + return old +} + +// checkMultiInterpExtensions reports whether the current interpreter +// enforces the subinterpreter-incompatible-extension check. +// +// CPython: Python/import.c:1538 check_multi_interp_extensions +func checkMultiInterpExtensions(s *interpState) bool { + if s.override < 0 { + return false + } + if s.override > 0 { + return true + } + return s.checkMulti +} + +// CheckExtSubinterpCompat applies the PEP 489 multiple-interpreters / +// per-interpreter-GIL compatibility check to def against the active +// interpreter. It returns an ImportError-tagged error when the module may +// not be loaded in the current subinterpreter, and nil otherwise. +// +// CPython: Objects/moduleobject.c:359 PyModule_FromDefAndSpec2 (slot gate) +// CPython: Python/import.c:1555 _PyImport_CheckSubinterpIncompatibleExtensionAllowed +func CheckExtSubinterpCompat(def *ExtModuleDef) error { + s := currentInterp() + if s.isMain { + return nil + } + // Single-phase-init modules never support multiple interpreters; the + // fresh-import and cached-reload paths both call the check directly. + // + // CPython: Python/import.c:1983 import_find_extension / 2198 import_run_extension + if def.SinglePhase { + if checkMultiInterpExtensions(s) { + return subinterpIncompatible(def.Name) + } + return nil + } + + multi := MultiInterpSupported + if def.HasMultiInterpSlot { + multi = def.MultiInterp + } + switch { + case multi == MultiInterpNotSupported: + if checkMultiInterpExtensions(s) { + return subinterpIncompatible(def.Name) + } + case multi != MultiInterpPerInterpreterGIL && s.ownGil: + // Supported-but-not-per-interpreter-GIL: only rejected when the + // subinterpreter runs with its own GIL. + if checkMultiInterpExtensions(s) { + return subinterpIncompatible(def.Name) + } + } + return nil +} + +// subinterpIncompatible builds the ImportError the compat check raises. The +// message matches CPython byte-for-byte so the SubinterpImportTests' +// equality assertions on str(exc) pass. +// +// CPython: Python/import.c:1560 PyErr_Format(PyExc_ImportError, ...) +func subinterpIncompatible(name string) error { + msg := fmt.Sprintf("module %s does not support loading in subinterpreters", name) + exc := pyerrors.New(pyerrors.PyExc_ImportError, objects.NewTuple([]objects.Object{objects.NewStr(msg)})) + _ = exc.EnsureAttrDict().SetItem(objects.NewStr("name"), objects.NewStr(name)) + return objects.NewRaisedError(exc, "ImportError: "+msg) +} + +// extDef is gopy's analog of a single PyModuleDef instance: the unit the +// extensions cache and the modules_by_index table key on. Two registry +// entries that share a def (an init function that calls another's) point at +// the same extDef, so they share an m_index (PyState_FindModule / look_up_self) +// and, for a basic module, the cached m_copy. +// +// CPython: Include/internal/pycore_moduleobject.h PyModuleDef_Base +type extDef struct { + name string // m_name; the module's __name__, may differ from the import name + mSize int // PyModuleDef.m_size + index int // m_index into modules_by_index; 0 until assigned on first load +} + +// extCacheKey keys the extensions cache by (filename, name), exactly as +// _PyRuntime.imports.extensions does. +// +// CPython: Python/import.c:1379 _extensions_cache_set +type extCacheKey struct{ path, name string } + +// extCacheValue is the cached single-phase module record: its def, a shallow +// copy of the module __dict__ after the first load (m_copy, basic modules +// only), and the interpreter that owns the copy. +// +// CPython: Python/import.c:1024 struct extensions_cache_value +type extCacheValue struct { + def *extDef + mCopy *objects.Dict + interpid int64 +} + +var ( + extCacheMu sync.Mutex + extCache = map[extCacheKey]*extCacheValue{} + extDefs = map[string]*extDef{} // def name -> shared def + nextModIdx = 0 + modToDef = map[*objects.Module]*extDef{} // built module -> its def +) + +// defFor returns the shared extDef for a registered single-phase module, +// creating it on first use. Entries that name a ShareDefWith reuse the +// referenced module's def so they land in the same modules_by_index slot. +func defFor(def *ExtModuleDef) *extDef { + name := def.DefName + if name == "" { + name = def.Name + } + if def.ShareDefWith != "" { + if shared := FindExtModule(def.ShareDefWith); shared != nil { + sn := shared.DefName + if sn == "" { + sn = shared.Name + } + name = sn + } + } + if ed, ok := extDefs[name]; ok { + return ed + } + ed := &extDef{name: name, mSize: def.MSize} + extDefs[name] = ed + return ed +} + +// CreateExtModule dispatches _imp.create_dynamic to the extension registry. +// It mirrors Python/import.c import_run_extension: a cached single-phase +// module is reloaded from the cache, otherwise the init runs fresh behind the +// PEP 489 compat gate and (for single-phase modules) its result is recorded +// in the extensions cache. path is spec.origin, the extensions-cache key +// alongside name. The caller attaches __file__ / __spec__ / __loader__. +// +// found is false when name is not a registered gopy extension, letting the +// caller fall back to the "gopy cannot dlopen" ImportError. +// +// CPython: Python/import.c:2001 import_run_extension +func CreateExtModule(name, path string) (mod objects.Object, found bool, err error) { + def := FindExtModule(name) + if def == nil { + return nil, false, nil + } + if def.MultiPhase { + // PEP 489 multi-phase: run _PyImport_RunModInitFunc result validation + // then PyModule_FromDefAndSpec2 (the create step). The exec slots run + // later, from exec_dynamic -> ExecExtModule. The result may be a + // non-module object (the nonmodule create variants), which the + // dynamic-loader contract surfaces verbatim. + m, cerr := createMultiPhase(def, name) + if cerr != nil { + return nil, true, cerr + } + return m, true, nil + } + if !def.SinglePhase { + // Legacy multi-phase main with an Init closure and no slot table: the + // compat gate stands in for PyModule_FromDefAndSpec2's slot check. + if cerr := CheckExtSubinterpCompat(def); cerr != nil { + return nil, true, cerr + } + m, ierr := def.Init() + if ierr != nil { + return nil, true, ierr + } + return m, true, nil + } + + ed := func() *extDef { + extCacheMu.Lock() + defer extCacheMu.Unlock() + return defFor(def) + }() + + // import_find_extension: a cached single-phase module is reloaded without + // re-running its init. The *_check_cache_first variants are never cached. + // + // CPython: Python/import.c:1964 import_find_extension + if !def.CheckCacheFirst { + extCacheMu.Lock() + cached, ok := extCache[extCacheKey{path, name}] + extCacheMu.Unlock() + if ok { + mod, rerr := reloadSinglephase(def, ed, cached, name) + return mod, true, rerr + } + } + mod, rerr := runSinglephase(def, ed, name, path) + return mod, true, rerr +} + +// sysErr surfaces a fresh SystemError(msg) as a Go error so it raises the +// exact exception type the C loader's PyErr_Format(PyExc_SystemError, ...) +// would. +// +// CPython: Python/errors.c PyErr_Format +func sysErr(msg string) error { + exc := pyerrors.New(pyerrors.PyExc_SystemError, objects.NewTuple([]objects.Object{objects.NewStr(msg)})) + return objects.NewRaisedError(exc, "SystemError: "+msg) +} + +// raiseExc surfaces an existing exception object verbatim, the analog of the +// C loader returning after a slot left PyErr_Occurred() set (it leaves the +// pending exception in place rather than chaining onto it). +func raiseExc(exc *pyerrors.Exception) error { + msg := exc.ExcType.Name + if exc.Args != nil && exc.Args.Len() > 0 { + if s, ok := exc.Args.Item(0).(*objects.Unicode); ok { + msg = exc.ExcType.Name + ": " + s.Value() + } + } + return objects.NewRaisedError(exc, msg) +} + +// chainedSysErr builds the SystemError(msg) the loader raises through +// _PyErr_FormatFromCause: __cause__ and __context__ point at the offending +// exception and __suppress_context__ is set, so `raise ... from cause` +// chaining is preserved (the test asserts cm.exception.__cause__ is not None). +// +// CPython: Python/errors.c:1438 _PyErr_FormatFromCause +func chainedSysErr(cause *pyerrors.Exception, msg string) error { + exc := pyerrors.New(pyerrors.PyExc_SystemError, objects.NewTuple([]objects.Object{objects.NewStr(msg)})) + exc.Cause = cause + exc.Context = cause + exc.ContextSet = true + exc.Suppress = true + return objects.NewRaisedError(exc, "SystemError: "+msg) +} + +var ( + pendingExecMu sync.Mutex + pendingExec = map[*objects.Module]*ExtModuleDef{} +) + +// registerPendingExec records the def whose exec slots ExecExtModule should +// run when exec_dynamic reaches this module, the gopy stand-in for the +// md_def the C module carries from create through exec. +func registerPendingExec(mod *objects.Module, def *ExtModuleDef) { + pendingExecMu.Lock() + pendingExec[mod] = def + pendingExecMu.Unlock() +} + +// takePendingExec returns and clears the pending exec def for mod. +func takePendingExec(mod *objects.Module) *ExtModuleDef { + pendingExecMu.Lock() + def := pendingExec[mod] + delete(pendingExec, mod) + pendingExecMu.Unlock() + return def +} + +// createMultiPhase ports the create half of the PEP 489 multi-phase load: +// _PyImport_RunModInitFunc's classification of the PyInit_* return, then +// PyModule_FromDefAndSpec2 (the create step). The exec slots run later, from +// exec_dynamic -> ExecExtModule. +// +// CPython: Python/importdl.c:118 _PyImport_RunModInitFunc (apply_error) +// CPython: Objects/moduleobject.c:269 PyModule_FromDefAndSpec2 +func createMultiPhase(def *ExtModuleDef, name string) (objects.Object, error) { + switch def.InitKind { + case ExtInitReturnedNil: + // PyInit returned NULL. If it set an exception (export_raise) re-raise + // it as-is; otherwise it is the "without raising an exception" case. + // + // CPython: Python/importdl.c apply_error (ERR_MISSING) + if def.InitRaised != nil { + return nil, raiseExc(def.InitRaised()) + } + return nil, sysErr(fmt.Sprintf("initialization of %s failed without raising an exception", name)) + case ExtInitUninitialized: + // PyInit returned a def that never went through PyModuleDef_Init. + // + // CPython: Python/importdl.c apply_error (ERR_UNINITIALIZED) + return nil, sysErr(fmt.Sprintf("init function of %s returned uninitialized object", name)) + } + // ExtInitNormal. export_unreported_exception returns a real def but leaves + // an exception set, which the loader chains a SystemError onto. + // + // CPython: Python/importdl.c apply_error (ERR_UNREPORTED_EXC) + if def.InitRaised != nil { + return nil, chainedSysErr(def.InitRaised(), fmt.Sprintf("initialization of %s raised unreported exception", name)) + } + return fromDefAndSpec(def, name) +} + +// fromDefAndSpec ports PyModule_FromDefAndSpec2: the m_size guard, the slot +// scan (validating IDs and rejecting duplicate create / multiple-interpreters +// slots), the subinterpreter compat gate, the create slot (or PyModule_New), +// the non-module state / exec-slot checks, and the methods / doc population. +// +// scanExtSlots walks the def's slot table once, validating slot IDs and +// rejecting a duplicate create slot or repeated multiple-interpreters / gil +// slots. It returns the lone create slot (nil when absent) and whether any +// exec slot is present, the two facts fromDefAndSpec needs downstream. +// +// CPython: Objects/moduleobject.c:269 PyModule_FromDefAndSpec2 (slot scan) +func scanExtSlots(def *ExtModuleDef, name string) (createSlot *ExtSlot, hasExec bool, err error) { + sawCreate := false + sawMultiInterp := false + sawGIL := false + for i := range def.Slots { + s := &def.Slots[i] + switch s.ID { + case ExtSlotCreate: + if sawCreate { + return nil, false, sysErr(fmt.Sprintf("module %s has multiple create slots", name)) + } + sawCreate = true + createSlot = s + case ExtSlotExec: + hasExec = true + case ExtSlotMultipleInterpreters: + if sawMultiInterp { + return nil, false, sysErr(fmt.Sprintf("module %s has more than one 'multiple interpreters' slots", name)) + } + sawMultiInterp = true + case ExtSlotGIL: + if sawGIL { + return nil, false, sysErr(fmt.Sprintf("module %s has more than one 'gil' slot", name)) + } + sawGIL = true + default: + return nil, false, sysErr(fmt.Sprintf("module %s uses unknown slot ID %d", name, s.ID)) + } + } + return createSlot, hasExec, nil +} + +// CPython: Objects/moduleobject.c:269 PyModule_FromDefAndSpec2 +func fromDefAndSpec(def *ExtModuleDef, name string) (objects.Object, error) { + if def.MSize < 0 { + return nil, sysErr(fmt.Sprintf("module %s: m_size may not be negative for multi-phase initialization", name)) + } + + createSlot, hasExec, serr := scanExtSlots(def, name) + if serr != nil { + return nil, serr + } + + if cerr := CheckExtSubinterpCompat(def); cerr != nil { + return nil, cerr + } + + var m objects.Object + if createSlot != nil { + obj, raised := createSlot.Create() + if obj == nil { + if raised != nil { + return nil, raiseExc(raised) + } + return nil, sysErr(fmt.Sprintf("creation of module %s failed without setting an exception", name)) + } + if raised != nil { + return nil, chainedSysErr(raised, fmt.Sprintf("creation of module %s raised unreported exception", name)) + } + m = obj + } else { + defName := def.DefName + if defName == "" { + defName = name + } + m = objects.NewModule(defName) + } + + mod, isModule := m.(*objects.Module) + if !isModule { + // A non-module create result may not carry module state or exec slots. + if def.MSize > 0 { + return nil, sysErr(fmt.Sprintf("module %s is not a module object, but requests module state", name)) + } + if hasExec { + return nil, sysErr(fmt.Sprintf("module %s specifies execution slots, but did not create a ModuleType instance", name)) + } + } + + if err := addExtMethods(m, def.Methods); err != nil { + return nil, err + } + if def.Doc != "" { + if err := objects.SetAttr(m, objects.NewStr("__doc__"), objects.NewStr(def.Doc)); err != nil { + return nil, err + } + } + if isModule { + registerPendingExec(mod, def) + } + return m, nil +} + +// addExtMethods adds each m_methods entry to the create result as a bound +// module function, the gopy analog of _add_methods_to_object building a +// PyCFunction with the module as self and SetAttr-ing it. +// +// CPython: Objects/moduleobject.c:176 _add_methods_to_object +func addExtMethods(m objects.Object, methods []ExtMethod) error { + for _, meth := range methods { + fn := objects.NewBuiltinFunction(meth.Name, meth.Fn) + if err := objects.SetAttr(m, objects.NewStr(meth.Name), fn); err != nil { + return err + } + } + return nil +} + +// ExecExtModule ports PyModule_ExecDef: it runs the def's Py_mod_exec slots in +// declaration order, mapping a non-zero return or a left-set exception to the +// SystemError the C loader raises. It backs _imp.exec_dynamic. +// +// CPython: Objects/moduleobject.c:463 PyModule_ExecDef +func ExecExtModule(m objects.Object) error { + mod, ok := m.(*objects.Module) + if !ok { + return nil + } + def := takePendingExec(mod) + if def == nil { + return nil + } + name := def.Name + for i := range def.Slots { + s := &def.Slots[i] + if s.ID != ExtSlotExec || s.Exec == nil { + continue + } + ret, raised := s.Exec(mod) + if ret != 0 { + if raised != nil { + return raiseExc(raised) + } + return sysErr(fmt.Sprintf("execution of module %s failed without setting an exception", name)) + } + if raised != nil { + return chainedSysErr(raised, fmt.Sprintf("execution of module %s raised unreported exception", name)) + } + } + return nil +} + +// runSinglephase ports the fresh-load path: it runs the init (on the "main +// interpreter", before the compat gate), applies the subinterpreter compat +// check, then records the module in modules_by_index and the extensions +// cache. A failing init inside a subinterpreter takes the gh-144601 path. +// +// CPython: Python/import.c:2078 import_run_extension +func runSinglephase(def *ExtModuleDef, ed *extDef, name, path string) (*objects.Module, error) { + inSubinterp := !currentInterp().isMain + mod, initErr := def.Init() + if initErr != nil { + if inSubinterp { + // gh-144601: the exception object can't be transferred across + // interpreters. Print it as an unraisable exception, then raise + // a different exception for the calling interpreter. + // + // CPython: Python/import.c:2156 PyErr_FormatUnraisable + if objects.WriteUnraisableHook != nil { + objects.WriteUnraisableHook(nil, "Exception while importing from subinterpreter", initErr) + } + // CPython: Python/import.c:2168 PyErr_SetString(PyExc_ImportError, ...) + return nil, fmt.Errorf("ImportError: failed to import from subinterpreter due to exception") + } + return nil, initErr + } + if cerr := CheckExtSubinterpCompat(def); cerr != nil { + return nil, cerr + } + + s := currentInterp() + extCacheMu.Lock() + if ed.index == 0 { + nextModIdx++ + ed.index = nextModIdx + } + modToDef[mod] = ed + // update_global_state_for_extension caches the def under the main + // interpreter or for any m_size == -1 module; a basic module also stores + // a shallow copy of its dict for later reloads. The *_check_cache_first + // variants are deliberately not cached. + // + // CPython: Python/import.c:1761 update_global_state_for_extension + if !def.CheckCacheFirst && (s.isMain || ed.mSize == -1) { + var mCopy *objects.Dict + if ed.mSize == -1 { + mCopy = snapshotDict(mod.Dict()) + } + extCache[extCacheKey{path, name}] = &extCacheValue{def: ed, mCopy: mCopy, interpid: s.id} + } + extCacheMu.Unlock() + + setModuleByIndex(s, ed.index, mod) + return mod, nil +} + +// reloadSinglephase ports reload_singlephase_extension: a basic module +// (m_size == -1) is rebuilt by copying its cached dict into a fresh module +// without re-running init (so its global initialized_count is unchanged); a +// module with state re-runs its init function. +// +// CPython: Python/import.c:1869 reload_singlephase_extension +func reloadSinglephase(def *ExtModuleDef, ed *extDef, cached *extCacheValue, name string) (*objects.Module, error) { + // It may have been imported before in an interpreter that allows legacy + // modules but is barred in the current one. + if cerr := CheckExtSubinterpCompat(def); cerr != nil { + return nil, cerr + } + s := currentInterp() + if ed.mSize == -1 { + // import_add_module: reuse the existing sys.modules entry so the + // reloaded module is the same object, then PyDict_Update its dict + // from the cached copy without re-running init. + // + // CPython: Python/import.c:1884 import_add_module / PyDict_Update + mod, ok := GetModule(name) + if !ok { + mod = objects.NewModule(ed.name) + AddModule(name, mod) + } + dst := mod.Dict() + for _, k := range cached.mCopy.Keys() { + v, gerr := cached.mCopy.GetItem(k) + if gerr != nil { + return nil, gerr + } + if serr := dst.SetItem(k, v); serr != nil { + return nil, serr + } + } + extCacheMu.Lock() + modToDef[mod] = ed + extCacheMu.Unlock() + setModuleByIndex(s, ed.index, mod) + return mod, nil + } + // m_size >= 0: re-run the init function. + mod, err := def.Init() + if err != nil { + return nil, err + } + extCacheMu.Lock() + modToDef[mod] = ed + extCacheMu.Unlock() + setModuleByIndex(s, ed.index, mod) + return mod, nil +} + +// snapshotDict returns a shallow copy of d, the gopy analog of the m_copy +// the import machinery saves after a basic module is first loaded. +// +// CPython: Python/import.c:1140 fixup_cached_def (def->m_base.m_copy) +func snapshotDict(d *objects.Dict) *objects.Dict { + out := objects.NewDict() + for _, k := range d.Keys() { + if v, err := d.GetItem(k); err == nil { + _ = out.SetItem(k, v) + } + } + return out +} + +// setModuleByIndex records mod in the interpreter's modules_by_index table, +// the slot PyState_FindModule / look_up_self reads. +// +// CPython: Python/import.c:651 _modules_by_index_set +func setModuleByIndex(s *interpState, index int, mod *objects.Module) { + if index <= 0 { + return + } + interpMu.Lock() + if s.modByIndex == nil { + s.modByIndex = map[int]*objects.Module{} + } + s.modByIndex[index] = mod + interpMu.Unlock() +} + +// ModuleSelf returns the module currently cached in modules_by_index for the +// def mod belongs to, the value PyState_FindModule(def) yields. It backs the +// test extension's look_up_self() method. +// +// CPython: Modules/_testsinglephase.c:374 common_look_up_self (PyState_FindModule) +func ModuleSelf(mod *objects.Module) objects.Object { + extCacheMu.Lock() + ed := modToDef[mod] + extCacheMu.Unlock() + if ed == nil || ed.index == 0 { + return objects.None() + } + s := currentInterp() + interpMu.Lock() + found := s.modByIndex[ed.index] + interpMu.Unlock() + if found == nil { + return objects.None() + } + return found +} + +// ClearExtension clears the internally cached data for a single-phase +// extension: its modules_by_index slot, the cached def's m_index/m_copy, and +// the extensions-cache entry. It backs _testinternalcapi.clear_extension. +// +// CPython: Python/import.c:903 _PyImport_ClearExtension +// +// (Python/import.c:2241 clear_singlephase_extension) +func ClearExtension(name, path string) error { + extCacheMu.Lock() + cached, ok := extCache[extCacheKey{path, name}] + if !ok { + extCacheMu.Unlock() + return nil + } + ed := cached.def + index := ed.index + ed.index = 0 + delete(extCache, extCacheKey{path, name}) + extCacheMu.Unlock() + + if index > 0 { + s := currentInterp() + interpMu.Lock() + delete(s.modByIndex, index) + interpMu.Unlock() + } + return nil +} + +// extensionSuffix is the file suffix gopy advertises for its +// (Go-implemented) extension modules. CPython derives it from the ABI tag +// and platform triple; gopy keeps the shape ("..so") so __file__ reads +// like a real extension path and the ExtensionFileLoader path hook matches. +// +// CPython: Lib/importlib/_bootstrap_external.py:_get_supported_file_loaders +func extensionSuffix() string { + return fmt.Sprintf(".gopy-314-%s-%s.so", runtime.GOOS, runtime.GOARCH) +} + +// ExtensionSuffixes returns the extension-module suffixes _imp.extension_suffixes +// reports. A single gopy suffix is enough for the test extensions. +// +// CPython: Python/import.c:4807 _imp_extension_suffixes_impl +func ExtensionSuffixes() []string { + return []string{extensionSuffix()} +} + +var ( + extDirMu sync.Mutex + extDirVal string +) + +// SetExtensionDir records the directory the materialized extension stub +// files live in (the gopy analog of CPython's lib-dynload). The path +// finder discovers the stubs there and ExtensionOrigin reports __file__ +// against it. +func SetExtensionDir(dir string) { + extDirMu.Lock() + extDirVal = dir + extDirMu.Unlock() +} + +func extensionDir() string { + extDirMu.Lock() + defer extDirMu.Unlock() + return extDirVal +} + +// ExtensionOrigin synthesizes the __file__ path for a Go-implemented +// extension: /, the location a compiled extension +// would occupy. When the extension dir is unset the bare filename is +// returned. +func ExtensionOrigin(name string) string { + suffix := extensionSuffix() + if dir := extensionDir(); dir != "" { + return filepath.Join(dir, name+suffix) + } + return name + suffix +} + +// MaterializeExtensions writes an empty stub file into dir +// for every registered extension module, the gopy stand-in for the +// compiled .so files CPython ships in lib-dynload. The real Python +// PathFinder -> FileFinder discovers these by suffix and hands them to +// ExtensionFileLoader, whose create_module calls _imp.create_dynamic -> +// CreateExtModule. The stub bytes are never read; the Go registry holds +// the actual module body. dir is recorded as the extension dir. +func MaterializeExtensions(dir string) error { + if err := os.MkdirAll(dir, 0o750); err != nil { + return err + } + suffix := extensionSuffix() + for _, name := range ExtModuleNames() { + // Variant defs (PyInit_x, pkg.*, the non-ASCII names) live inside the + // one _testmultiphase extension and are reached only by an explicit + // ExtensionFileLoader against that origin, so they never get a + // discoverable stub of their own. Skip them, matching CPython's + // single-.so-many-symbols model. + if def := FindExtModule(name); def != nil && def.Variant { + continue + } + p := filepath.Join(dir, name+suffix) + if _, err := os.Stat(p); err == nil { + continue + } + if err := os.WriteFile(p, nil, 0o600); err != nil { + return err + } + } + SetExtensionDir(dir) + return nil +} diff --git a/imp/frozen.go b/imp/frozen.go index 134d338f6..add356b42 100644 --- a/imp/frozen.go +++ b/imp/frozen.go @@ -9,7 +9,9 @@ package imp import ( + "errors" "sync" + "sync/atomic" "github.com/tamnd/gopy/objects" ) @@ -22,17 +24,137 @@ import ( type FrozenModule struct { // Name is the dotted module name, e.g. "importlib._bootstrap". Name string - // Code is the precompiled code object. nil for placeholder entries. + // Code is the precompiled code object. nil for placeholder entries + // and for source-backed entries (compiled lazily from Source). Code *objects.Code + // Source is the canonical .py source for entries whose bytecode is + // produced lazily by FrozenCompiler rather than pre-embedded. This + // stands in for CPython's marshaled frozen blob: gopy stores the + // source text (vendored verbatim) and compiles it on first use. + Source string + // OrigName is the name find_frozen reports for the entry. Frozen + // aliases (e.g. __phello_alias__ -> __hello__) point at a different + // source module; FrozenImporter._resolve_filename keys the on-disk + // __file__ off this. Empty means the entry is its own origin. + // + // CPython: Python/frozen.c _PyImport_FrozenAliases + OrigName string + // OrigNone marks an alias entry whose alias target is NULL, so + // find_frozen reports origname None (e.g. __hello_only__). It + // overrides OrigName/Name when reporting the origin. + // + // CPython: Python/frozen.c:123 aliases {"__hello_only__", NULL} + OrigNone bool + // Embedded marks a genuinely frozen entry that always yields a code + // object, even when Source is empty (e.g. the empty __phello__.ham + // package __init__). CPython freezes these as real, non-empty + // marshaled code; gopy compiles the (possibly empty) Source on demand. + Embedded bool // IsPackage is true when the frozen module is a package (has __path__). IsPackage bool + + compileMu sync.Mutex + compiled *objects.Code + compileErr error + didCompile bool +} + +// FrozenCompiler turns frozen module source into a code object. It is +// installed once at interpreter startup (cmd/gopy wires gopyCompile) +// so the imp package need not depend on parser/compile directly, +// mirroring the SourceCompiler indirection used for path imports. +// +// CPython: Python/pythonrun.c:1102 Py_CompileStringExFlags +var FrozenCompiler func(src []byte, filename string) (*objects.Code, error) + +// CodeObject returns the entry's code object, compiling Source on first +// use. It returns (nil, nil) for a pure placeholder (no Code, no +// Source). The compiled result is cached so repeated imports reuse one +// code object, matching CPython's single marshaled blob per entry. +func (m *FrozenModule) CodeObject() (*objects.Code, error) { + if m.Code != nil { + return m.Code, nil + } + if m.Source == "" && !m.Embedded { + return nil, nil + } + m.compileMu.Lock() + defer m.compileMu.Unlock() + if m.didCompile { + return m.compiled, m.compileErr + } + m.didCompile = true + if FrozenCompiler == nil { + m.compileErr = errors.New("imp: frozen compiler not installed") + return nil, m.compileErr + } + m.compiled, m.compileErr = FrozenCompiler([]byte(m.Source), "") + return m.compiled, m.compileErr +} + +// HasCode reports whether the entry can yield a code object, either +// pre-embedded or compilable from Source. Placeholder entries (the +// importlib bootstrap stubs, which gopy loads from disk) return false. +func (m *FrozenModule) HasCode() bool { + return m.Code != nil || m.Source != "" || m.Embedded +} + +// Origin returns the name find_frozen reports for the entry and whether +// that origin is None. CPython seeds origname with the entry's own name, +// then resolve_module_alias overrides it for alias entries (possibly to +// NULL). _imp.find_frozen reports None when the resolved origname is +// NULL or empty. +// +// CPython: Python/import.c:3052 find_frozen (origname seed + alias) +// CPython: Python/import.c:4533 _imp_find_frozen_impl (NULL/empty -> None) +func (m *FrozenModule) Origin() (string, bool) { + if m.OrigNone { + return "", true + } + if m.OrigName != "" { + return m.OrigName, false + } + return m.Name, false } var ( frozenMu sync.RWMutex frozenModules = map[string]*FrozenModule{} + + // frozenOverride mirrors PyConfig.use_frozen_modules under the test + // override: >0 forces frozen on, <0 forces it off, 0 uses the + // default. test.support.import_helper toggles it via + // _imp._override_frozen_modules_for_tests. + // + // CPython: Python/import.c:2821 use_frozen + frozenOverride atomic.Int32 ) +// SetFrozenOverride records the test override for frozen-module lookup +// and returns the previous value. +// +// CPython: Python/import.c:5034 _imp__override_frozen_modules_for_tests_impl +func SetFrozenOverride(v int) int { + return int(frozenOverride.Swap(int32(v))) +} + +// UseFrozen reports whether frozen-module lookup is currently enabled. +// gopy's default (override 0) is on, matching CPython's release-build +// PyConfig.use_frozen_modules default; entries without embedded code +// still fall through to the path finder via HasCode. +// +// CPython: Python/import.c:2821 use_frozen +func UseFrozen() bool { + switch v := frozenOverride.Load(); { + case v > 0: + return true + case v < 0: + return false + default: + return true + } +} + // RegisterFrozen adds or replaces a frozen module in the table. It is // safe to call from multiple goroutines and from init(). // diff --git a/imp/frozen_bootstrap.go b/imp/frozen_bootstrap.go index 0990133c4..a92deb468 100644 --- a/imp/frozen_bootstrap.go +++ b/imp/frozen_bootstrap.go @@ -11,19 +11,26 @@ package imp func init() { - // _frozen_importlib — Lib/importlib/_bootstrap.py - // CPython: Python/frozen.c:L56 + // _frozen_importlib — Lib/importlib/_bootstrap.py. gopy loads the + // bootstrap from disk at startup and caches it in sys.modules, so this + // frozen code is never executed; it exists so FrozenImporter.find_spec + // reports the module with origname "importlib._bootstrap", matching + // the build-time frozen alias. + // + // CPython: Python/frozen.c:70 bootstrap_modules / :116 aliases RegisterFrozen(&FrozenModule{ Name: "_frozen_importlib", - Code: nil, + Embedded: true, + OrigName: "importlib._bootstrap", IsPackage: false, }) // _frozen_importlib_external — Lib/importlib/_bootstrap_external.py - // CPython: Python/frozen.c:L63 + // CPython: Python/frozen.c:71 bootstrap_modules / :117 aliases RegisterFrozen(&FrozenModule{ Name: "_frozen_importlib_external", - Code: nil, + Embedded: true, + OrigName: "importlib._bootstrap_external", IsPackage: false, }) diff --git a/imp/frozen_test_modules.go b/imp/frozen_test_modules.go new file mode 100644 index 000000000..2f4c0daca --- /dev/null +++ b/imp/frozen_test_modules.go @@ -0,0 +1,86 @@ +// Frozen test-module registrations. CPython compiles a handful of toy +// modules (__hello__, __phello__ and friends) into the interpreter so +// the import machinery has frozen targets to exercise without touching +// the filesystem. test_frozen and the importlib frozen tests import +// them through FrozenImporter. +// +// gopy keeps the source text (vendored verbatim from CPython's Lib/) +// rather than a marshaled blob and compiles it lazily via +// FrozenCompiler. The same modules are also vendored on disk under the +// stdlib root so the "frozen disabled" code paths can load them through +// the path finder, exactly as CPython ships Lib/__hello__.py alongside +// the frozen copy. +// +// CPython: Python/frozen.c:98 _PyImport_FrozenModules test entries +package imp + +// Canonical source for the frozen test modules. These mirror +// Lib/__hello__.py and the Lib/__phello__/ package byte-for-byte. +// +// CPython: Lib/__hello__.py +const frozenHelloSource = `initialized = True + +class TestFrozenUtf8_1: + """\u00b6""" + +class TestFrozenUtf8_2: + """\u03c0""" + +class TestFrozenUtf8_4: + """\U0001f600""" + +def main(): + print("Hello world!") + +if __name__ == '__main__': + main() +` + +// CPython: Lib/__phello__/__init__.py and Lib/__phello__/spam.py (same body) +const frozenPhelloSource = `initialized = True + +def main(): + print("Hello world!") + +if __name__ == '__main__': + main() +` + +// frozenOnlySource is the body frozen as __hello_only__. CPython freezes +// it from Tools/freeze/flag.py, which has no on-disk stdlib copy, so the +// alias table records a NULL origin (loader_state.filename stays None). +// +// CPython: Tools/freeze/flag.py +const frozenOnlySource = `initialized = True +print("Hello world!") +` + +func init() { + // __hello__ and its aliases share one source module; the alias + // entries report __hello__ as their origin so FrozenImporter resolves + // the on-disk __file__ against Lib/__hello__.py. + // + // CPython: Python/frozen.c:96 test_modules / :114 aliases + RegisterFrozen(&FrozenModule{Name: "__hello__", Source: frozenHelloSource}) + RegisterFrozen(&FrozenModule{Name: "__hello_alias__", Source: frozenHelloSource, OrigName: "__hello__"}) + RegisterFrozen(&FrozenModule{Name: "__phello_alias__", Source: frozenHelloSource, OrigName: "__hello__", IsPackage: true}) + RegisterFrozen(&FrozenModule{Name: "__phello_alias__.spam", Source: frozenHelloSource, OrigName: "__hello__"}) + + // __phello__ is a real frozen package. Its __init__ alias reports + // the synthetic "<__phello__" origin (the leading "<" tells + // FrozenImporter._resolve_filename to map it to the package __init__). + // + // CPython: Python/frozen.c:100-107 test_modules / :121 aliases + RegisterFrozen(&FrozenModule{Name: "__phello__", Source: frozenPhelloSource, IsPackage: true}) + RegisterFrozen(&FrozenModule{Name: "__phello__.__init__", Source: frozenPhelloSource, OrigName: "<__phello__"}) + RegisterFrozen(&FrozenModule{Name: "__phello__.ham", Embedded: true, IsPackage: true}) + RegisterFrozen(&FrozenModule{Name: "__phello__.ham.__init__", Embedded: true, OrigName: "<__phello__.ham"}) + RegisterFrozen(&FrozenModule{Name: "__phello__.ham.eggs", Embedded: true}) + RegisterFrozen(&FrozenModule{Name: "__phello__.spam", Source: frozenPhelloSource}) + + // __hello_only__ is frozen-only (no stdlib source), so its alias + // origin is NULL and find_frozen reports origname None. + // + // CPython: Python/frozen.c:108 test_modules / :123 aliases + RegisterFrozen(&FrozenModule{Name: "__hello_only__", Source: frozenOnlySource, OrigNone: true}) +} diff --git a/imp/import.go b/imp/import.go index 27faf852c..c688c3c68 100644 --- a/imp/import.go +++ b/imp/import.go @@ -18,6 +18,34 @@ import ( // ErrModuleNotFound is returned when no finder can locate the named module. var ErrModuleNotFound = fmt.Errorf("imp: ModuleNotFoundError") +// ErrBlockedNone tags the case where sys.modules[name] is None, the +// sentinel test.support.import_helper.import_fresh_module installs to block +// a module. CPython's _bootstrap raises ModuleNotFoundError(f'import of +// {name} halted; None in sys.modules', name=name); the `name` member is what +// importlib/abc.py inspects (`except ImportError as exc: if exc.name != ...`), +// so the VM must synthesize a typed error carrying it rather than a bare +// ImportError. It wraps ErrModuleNotFound so existing not-found checks match. +// +// CPython: Lib/importlib/_bootstrap.py:1387 _find_and_load (None sentinel) +var ErrBlockedNone = fmt.Errorf("%w: blocked None in sys.modules", ErrModuleNotFound) + +// ImportWarnHook routes an ImportWarning through the live _warnings +// machinery so it walks the filter list and any recording context +// manager (catch_warnings / assertWarns). It is nil until module +// _warnings wires it during init; the imp package cannot import +// _warnings directly because _warnings imports imp. +// +// CPython: Lib/importlib/_bootstrap.py:1353 _warnings.warn(msg, ImportWarning) +var ImportWarnHook func(message string) error + +// ErrModuleExecFailed tags a load failure that happened while executing a +// located module's body (rather than failing to locate it). The real Python +// exception is already live on the thread state with its own traceback, so +// the import opcode must propagate it instead of synthesizing a fresh +// ModuleNotFoundError. A nested `import missing` inside the body wraps +// ErrModuleNotFound, so callers check this sentinel first. +var ErrModuleExecFailed = fmt.Errorf("imp: module body raised") + // ImportModule performs an absolute import of name. It is the // zero-level convenience wrapper around ImportModuleLevel. // @@ -26,6 +54,32 @@ func ImportModule(exec Executor, name string) (*objects.Module, error) { return ImportModuleLevel(exec, name, "", 0) } +// ImportModuleLevelObject imports name relative to pkgname at the given +// level and returns whatever sys.modules holds, which need not be a +// module: a test (or pathological code) can inject an arbitrary object +// under a name, and CPython's import returns it unchanged so the +// IMPORT_FROM / _handle_fromlist that follows operates through plain +// attribute access. Normal imports always yield a real module, in which +// case this behaves exactly like ImportModuleLevel. +// +// CPython: Python/import.c:1561 PyImport_ImportModuleLevelObject +func ImportModuleLevelObject(exec Executor, name, pkgname string, level int) (objects.Object, error) { + absName, err := resolveAbsName(name, pkgname, level) + if err != nil { + return nil, err + } + if raw, present := GetModuleRaw(absName); present { + if objects.IsNone(raw) { + return nil, fmt.Errorf("%w: %q", ErrBlockedNone, absName) + } + if _, ok := raw.(*objects.Module); !ok { + // A non-module cached entry: return it verbatim. + return raw, nil + } + } + return ImportModuleLevel(exec, name, pkgname, level) +} + // ImportModuleLevel imports name relative to pkgname at the given // level. level=0 is an absolute import; level>0 is relative. // @@ -55,13 +109,27 @@ func ImportModuleLevel(exec Executor, name, pkgname string, level int) (*objects // CPython: Python/import.c:L1613 sys_modules_get_dict if raw, present := GetModuleRaw(absName); present { if objects.IsNone(raw) { - return nil, fmt.Errorf("ImportError: import of %q halted; None in sys.modules", absName) + return nil, fmt.Errorf("%w: %q", ErrBlockedNone, absName) } if mod, ok := raw.(*objects.Module); ok { return mod, nil } } + // 1b. Custom sys.meta_path finders. CPython's _find_spec walks + // sys.meta_path in order; the BuiltinImporter, FrozenImporter and + // PathFinder entries are realized by the Go steps below, so here we + // consult only the additional finders a program (or a test) inserts. + // A finder inserted at meta_path[0] therefore wins over the built-in + // and frozen lookups, matching CPython's ordering. + // + // CPython: Lib/importlib/_bootstrap.py:912 _find_spec + if mod, found, err := metaPathFind(exec, absName); err != nil { + return nil, err + } else if found { + return mod, nil + } + // 2. Frozen module. // CPython: Python/import.c:L1632 import_find_and_load if fm, ok := FindFrozen(absName); ok && fm.Code != nil { @@ -84,9 +152,53 @@ func ImportModuleLevel(exec Executor, name, pkgname string, level int) (*objects // CPython: Objects/moduleobject.c:606 PyModule_AddFunctions mod.StampBuiltinModule() AddModule(absName, mod) + // CPython's BuiltinImporter sets __spec__/__loader__ on every + // built-in module; gopy's inittab path mirrors that so tools + // (pyclbr, runpy, inspect) that read module.__spec__ work. + // + // CPython: Lib/importlib/_bootstrap.py:736 BuiltinImporter.exec_module + AttachBuiltinSpec(exec, mod, absName) return mod, nil } + // 3b. Go-implemented C extension (the test-extension registry). CPython + // reaches these through PathFinder -> ExtensionFileLoader after a + // lib-dynload `.so` matches; gopy ports the extension as a Go builtin + // registered by name and builds it via the same create_dynamic compat + // gate, then attaches the ExtensionFileLoader spec so module.__spec__ + // reads like a real extension. + // + // CPython: Python/import.c:2001 import_run_extension + if ext := FindExtModule(absName); ext != nil { + obj, found, eerr := CreateExtModule(absName, ExtensionOrigin(absName)) + if eerr != nil { + return nil, eerr + } + if found { + mod, isModule := obj.(*objects.Module) + if !isModule { + // A non-module create result (a multi-phase nonmodule variant) + // is only ever reached through an explicit ExtensionFileLoader, + // never a plain `import name`, so it does not belong here. + return nil, fmt.Errorf("ImportError: create_dynamic for %s did not return a module", absName) + } + AddModule(absName, mod) + AttachExtensionSpec(exec, mod, absName, ExtensionOrigin(absName)) + parent, tail := splitParent(absName) + bindOnParent(parent, tail, mod) + // A plain `import name` of a multi-phase extension runs both the + // create and exec phases; importlib's ExtensionFileLoader splits + // them across create_dynamic / exec_dynamic, but the Go-side + // shortcut here must drive exec itself. + // + // CPython: Objects/moduleobject.c:463 PyModule_ExecDef + if eerr := ExecExtModule(mod); eerr != nil { + return nil, eerr + } + return mod, nil + } + } + // 4. Path-based finder (sys.path). // CPython: Lib/importlib/_bootstrap_external.py:1284 PathFinder.find_spec // @@ -107,6 +219,88 @@ func ImportModuleLevel(exec Executor, name, pkgname string, level int) (*objects return nil, fmt.Errorf("%w: No module named %q", ErrModuleNotFound, absName) } +// metaPathFind consults the custom finders on sys.meta_path for absName. +// It skips the BuiltinImporter, FrozenImporter and PathFinder entries +// (identified by their class __name__), which gopy realizes in Go, and +// calls find_spec(name, path, None) on every other finder. The first +// finder that returns a spec drives loadFromSpec; a None return means the +// finder declined and the walk continues. +// +// CPython: Lib/importlib/_bootstrap.py:912 _find_spec +func metaPathFind(exec Executor, absName string) (*objects.Module, bool, error) { + sysMod, ok := GetModule("sys") + if !ok { + return nil, false, nil + } + mpObj, _ := sysMod.Dict().GetItem(objects.NewStr("meta_path")) + mp, _ := mpObj.(*objects.List) + if mp == nil || mp.Len() == 0 { + return nil, false, nil + } + // The parent package's __path__ becomes the `path` argument for a + // submodule import, mirroring _find_and_load's parent.__path__ read. + // + // CPython: Lib/importlib/_bootstrap.py:1227 path = parent_module.__path__ + pathArg := objects.None() + if parent, _ := splitParent(absName); parent != "" { + if pm, ok := GetModule(parent); ok { + if pp, err := pm.Dict().GetItem(objects.NewStr("__path__")); err == nil && pp != nil { + pathArg = pp + } + } + } + nameObj := objects.NewStr(absName) + for i := 0; i < mp.Len(); i++ { + finder := mp.Item(i) + if isBuiltinFinder(finder) { + continue + } + findSpec, err := objects.GetAttr(finder, objects.NewStr("find_spec")) + if err != nil { + // A legacy finder without find_spec does not participate; + // CPython's _find_spec skips it the same way. + continue + } + spec, err := objects.Call(findSpec, objects.NewTuple([]objects.Object{nameObj, pathArg, objects.None()}), nil) + if err != nil { + return nil, false, err + } + if spec == nil || objects.IsNone(spec) { + continue + } + mod, err := loadFromSpec(exec, absName, spec) + if err != nil { + return nil, false, err + } + parent, tail := splitParent(absName) + bindOnParent(parent, tail, mod) + return mod, true, nil + } + return nil, false, nil +} + +// isBuiltinFinder reports whether finder is one of the three importers +// gopy realizes in Go (BuiltinImporter, FrozenImporter, PathFinder). +// Those are class objects exposing __name__; the custom finders programs +// install on meta_path are instances that do not. +// +// CPython: Lib/importlib/_bootstrap.py:736 BuiltinImporter / :976 PathFinder +func isBuiltinFinder(finder objects.Object) bool { + nameAttr, err := objects.GetAttr(finder, objects.NewStr("__name__")) + if err != nil { + return false + } + name, ok := nameAttr.(*objects.Unicode) + if !ok { + return false + } + switch name.Value() { + case "BuiltinImporter", "FrozenImporter", "PathFinder", "WindowsRegistryFinder": + return true + } + return false +} + // resolveAbsName converts a relative import (level > 0) to an // absolute module name using pkgname as the anchor. // diff --git a/imp/inittab.go b/imp/inittab.go index fc207f130..b4c57511f 100644 --- a/imp/inittab.go +++ b/imp/inittab.go @@ -64,6 +64,43 @@ func ExtendInittab(entries []InittabEntry) error { return nil } +// shadowedByStdlib lists inittab names that CPython ships as pure-Python +// stdlib modules (.py files on sys.path), so they never appear in +// CPython's PyImport_Inittab. gopy keeps a Go implementation in the +// inittab as an early-bootstrap import shortcut, but the live import +// machinery must treat them as not-built-in: BuiltinImporter declines +// them and PathFinder loads the vendored source, so e.g. +// 'fnmatch' in sys.builtin_module_names stays False as on a normal +// CPython build, and is_builtin agrees with builtin_module_names. +var shadowedByStdlib = map[string]bool{ + "os": true, + "warnings": true, + "dataclasses": true, + "difflib": true, + "fnmatch": true, +} + +// ShadowedByStdlib reports whether name is registered in the inittab only +// as a bootstrap shortcut while CPython ships it as pure-Python stdlib, +// so it must be reported as not-built-in by is_builtin and excluded from +// sys.builtin_module_names. +func ShadowedByStdlib(name string) bool { + return shadowedByStdlib[name] +} + +// IsBuiltinName reports whether name resolves to a statically linked +// built-in module, the membership test behind both _imp.is_builtin and +// sys.builtin_module_names. Names shadowed by a pure-Python stdlib module +// are excluded so they load from source the way they do on CPython. +// +// CPython: Python/import.c:4720 _imp_is_builtin_impl +func IsBuiltinName(name string) bool { + if shadowedByStdlib[name] { + return false + } + return FindInitFunc(name) != nil +} + // FindInitFunc returns the InitFunc registered for name, or nil if the // module is not in the built-in table. // diff --git a/imp/pathfinder.go b/imp/pathfinder.go index bdcc730df..831db2a94 100644 --- a/imp/pathfinder.go +++ b/imp/pathfinder.go @@ -19,12 +19,16 @@ package imp import ( + "errors" "fmt" "os" "path/filepath" + "runtime" "strings" "sync" + pyerrors "github.com/tamnd/gopy/errors" + "github.com/tamnd/gopy/marshal" "github.com/tamnd/gopy/objects" ) @@ -105,65 +109,417 @@ func (p *PathFinder) FindModule(exec Executor, name string) (*objects.Module, er // CPython: Lib/importlib/_bootstrap.py:1227 _find_and_load pm, err := ImportModuleLevel(exec, parent, "", 0) if err != nil { + // A parent that was located but raised while executing its + // __init__ must surface that exception verbatim (CPython + // propagates it from _find_and_load), so do not relabel it + // as a finder miss. Only a genuine parent-not-found is a + // miss the child lookup can recover from. + // + // CPython: Lib/importlib/_bootstrap.py:1227 _find_and_load + if errors.Is(err, ErrModuleExecFailed) || !errors.Is(err, ErrModuleNotFound) { + return nil, err + } return nil, fmt.Errorf("%w: parent package %q: %w", errFinderMiss, parent, err) } parentMod = pm } + // Importing the parent package may have imported this child as a side + // effect (e.g. the parent's __init__ ran `from .child import ...`), + // caching it in sys.modules and possibly rebinding the parent's + // attribute to something other than the submodule. In that case CPython + // returns the already-cached child and never reloads or re-binds it, so + // the parent's rebinding survives. + // + // CPython: Lib/importlib/_bootstrap.py:1290 _find_and_load_unlocked + if cached, ok := GetModule(name); ok { + return cached, nil + } paths, err := readPackagePath(parentMod) if err != nil { return nil, err } search = paths + + // Track this child on the parent spec for the duration of the load so a + // circular import that does getattr(parent, tail) before tail finishes + // loading gets the "cannot access submodule" diagnostic. + // + // CPython: Lib/importlib/_bootstrap.py:1340 parent_spec._uninitialized_submodules.append(child) + pop := pushUninitializedSubmodule(parentMod, tail) + defer pop() } + // PEP 420: a directory matching the tail with no __init__.py and no + // flat-file match is a namespace portion. CPython's PathFinder + // accumulates portions across every path entry and, only after no + // regular module is found anywhere, builds a namespace package whose + // __path__ is the collected portions. + // + // CPython: Lib/importlib/_bootstrap_external.py:1430 FileFinder.find_spec + // (namespace portion path) / Lib/importlib/_bootstrap.py:1167 PathFinder + var namespacePortions []string for _, entry := range search { - dir := entry - if dir == "" { - dir = "." - } - // Package case: //__init__.py. - // CPython: Lib/importlib/_bootstrap_external.py:1378 cache_module in cache - pkgDir := filepath.Join(dir, tail) - pkgInit := filepath.Join(pkgDir, "__init__.py") - if isFile(pkgInit) { - mod, err := loadAsPackage(exec, p.Compiler, pkgInit, pkgDir, name) - if err != nil { - return nil, err - } - bindOnParent(parent, tail, mod) - return mod, nil + mod, err := p.scanEntry(exec, entry, name, parent, tail, &namespacePortions) + if err != nil { + return nil, err } - // Module case: /.py. - // CPython: Lib/importlib/_bootstrap_external.py:1391 suffix loop - modFile := filepath.Join(dir, tail+".py") - if isFile(modFile) { - mod, err := loadAsModule(exec, p.Compiler, modFile, name, parent) - if err != nil { - return nil, err - } - bindOnParent(parent, tail, mod) + if mod != nil { return mod, nil } } + if len(namespacePortions) > 0 { + mod := loadAsNamespace(exec, name, parent, namespacePortions) + bindOnParent(parent, tail, mod) + return mod, nil + } return nil, fmt.Errorf("%w: %s", errFinderMiss, name) } +// scanEntry searches one sys.path entry for name. It returns (mod, nil) +// when the module was found and loaded, (nil, nil) when this entry did +// not match (FindModule should keep scanning), or (nil, err) on a load +// failure that must propagate. A PEP 420 namespace portion contributed +// by this entry is appended to *namespacePortions, leaving the module +// unresolved so the caller can fall back to a namespace package. +// +// CPython: Lib/importlib/_bootstrap_external.py:1357 FileFinder.find_spec +func (p *PathFinder) scanEntry(exec Executor, entry, name, parent, tail string, namespacePortions *[]string) (*objects.Module, error) { + dir := entry + if dir == "" { + dir = "." + } + // spec_from_file_location runs the resolved location through + // _path_abspath, so every __file__, __path__ and __cached__ a + // path-based import produces is absolute even when the sys.path + // entry is relative ('', '.', or a relative directory). Absolutize + // the directory up front so the file paths joined below, the + // bytecode-cache path, and the spec origin all agree and match + // CPython's absolute strings. + // + // CPython: Lib/importlib/_bootstrap_external.py:782 spec_from_file_location (_path_abspath) + if abs, err := filepath.Abs(dir); err == nil { + dir = abs + } + // A sys.path entry that is not a directory (a zip archive, or a + // path that points inside one) is handled by a custom importer + // registered on sys.path_hooks, exactly as CPython's PathFinder + // routes such entries through zipimport.zipimporter. Only consult + // the hooks for non-directories so the directory scan below stays + // the fast path for the common case. + // + // CPython: Lib/importlib/_bootstrap_external.py:1236 _path_importer_cache + if !isDir(dir) { + spec, handled, herr := pathHookSpec(exec, entry, name) + if herr != nil { + return nil, herr + } + if !handled { + return nil, nil + } + // A namespace spec from the importer (loader None, search + // locations set) is a PEP 420 portion: collect it and keep + // scanning, exactly as CPython's PathFinder extends + // namespace_path instead of returning. A spec with a real + // loader is a concrete module, so load and return it. + // + // CPython: Lib/importlib/_bootstrap_external.py:1284 PathFinder._get_spec + if portions, isNS := namespacePortionsOf(spec); isNS { + *namespacePortions = append(*namespacePortions, portions...) + return nil, nil + } + mod, lerr := loadFromSpec(exec, name, spec) + if lerr != nil { + return nil, lerr + } + bindOnParent(parent, tail, mod) + return mod, nil + } + return p.scanDir(exec, dir, name, parent, tail, namespacePortions) +} + +// scanDir searches a single directory sys.path entry for name, trying +// the source package, sourceless package, source module, and sourceless +// module loaders in CPython's suffix order. It returns the loaded module, +// (nil, nil) for a miss, or (nil, err) on a load failure. A bare package +// directory with no loadable __init__ is recorded as a PEP 420 portion. +// +// CPython: Lib/importlib/_bootstrap_external.py:1391 FileFinder suffix loop +func (p *PathFinder) scanDir(exec Executor, dir, name, parent, tail string, namespacePortions *[]string) (*objects.Module, error) { + pkgDir := filepath.Join(dir, tail) + // (suffix file, loader) tried in CPython's order: source package, + // sourceless package, source module, sourceless module. + loaders := []struct { + file string + base string // case-sensitivity check target + load func() (*objects.Module, error) + }{ + {filepath.Join(pkgDir, "__init__.py"), pkgDir, func() (*objects.Module, error) { + return loadAsPackage(exec, p.Compiler, filepath.Join(pkgDir, "__init__.py"), pkgDir, name) + }}, + {filepath.Join(pkgDir, "__init__.pyc"), pkgDir, func() (*objects.Module, error) { + return loadAsPackageBytecode(exec, filepath.Join(pkgDir, "__init__.pyc"), pkgDir, name) + }}, + {filepath.Join(dir, tail+".py"), filepath.Join(dir, tail+".py"), func() (*objects.Module, error) { + return loadAsModule(exec, p.Compiler, filepath.Join(dir, tail+".py"), name, parent) + }}, + {filepath.Join(dir, tail+".pyc"), filepath.Join(dir, tail+".pyc"), func() (*objects.Module, error) { + return loadAsModuleBytecode(exec, filepath.Join(dir, tail+".pyc"), name, parent) + }}, + } + for _, l := range loaders { + if !isFile(l.file) || !caseOK(l.base) { + continue + } + mod, err := l.load() + if err != nil { + return nil, err + } + bindOnParent(parent, tail, mod) + return mod, nil + } + if isDir(pkgDir) && caseOK(pkgDir) { + *namespacePortions = append(*namespacePortions, pkgDir) + } + return nil, nil +} + +// pathHookSpec consults sys.path_hooks for a custom importer able to load +// modules out of entry (zipimport.zipimporter for a .zip archive) and asks +// that importer for name's spec. +// +// handled is false when no hook claims entry, or when the importer claims +// entry but has no spec for name, so FindModule keeps scanning the +// remaining path entries. herr carries a find_spec failure that must +// propagate. The spec is returned unloaded so FindModule can tell a +// concrete module apart from a PEP 420 namespace portion. +// +// CPython: Lib/importlib/_bootstrap_external.py:1284 PathFinder._get_spec +func pathHookSpec(exec Executor, entry, name string) (spec objects.Object, handled bool, herr error) { + _ = exec + importer, ok := pathHookImporter(entry) + if !ok { + return nil, false, nil + } + findSpec, err := objects.GetAttr(importer, objects.NewStr("find_spec")) + if err != nil { + return nil, false, err + } + s, err := objects.Call(findSpec, objects.NewTuple([]objects.Object{objects.NewStr(name)}), nil) + if err != nil { + return nil, true, err + } + if s == nil || objects.IsNone(s) { + // The archive exists but does not contain name: a miss, not an + // error. CPython's PathFinder moves on to the next path entry. + return nil, false, nil + } + return s, true, nil +} + +// namespacePortionsOf reports whether spec is a PEP 420 namespace spec +// (loader None) and, if so, returns its submodule_search_locations as a +// slice of strings. A spec with a real loader is a concrete module and +// returns isNS false. +// +// CPython: Lib/importlib/_bootstrap_external.py:1284 PathFinder._get_spec +// (spec.submodule_search_locations / namespace_path extension) +func namespacePortionsOf(spec objects.Object) (portions []string, isNS bool) { + loader, err := objects.GetAttr(spec, objects.NewStr("loader")) + if err != nil || !objects.IsNone(loader) { + return nil, false + } + ssl, err := objects.GetAttr(spec, objects.NewStr("submodule_search_locations")) + if err != nil || ssl == nil || objects.IsNone(ssl) { + return nil, false + } + switch v := ssl.(type) { + case *objects.List: + for i := 0; i < v.Len(); i++ { + if s, ok := v.Item(i).(*objects.Unicode); ok { + portions = append(portions, s.Value()) + } + } + case *objects.Tuple: + for i := 0; i < v.Len(); i++ { + if s, ok := v.Item(i).(*objects.Unicode); ok { + portions = append(portions, s.Value()) + } + } + } + return portions, true +} + +// pathHookImporter returns the importer object responsible for entry, +// consulting sys.path_importer_cache first and then sys.path_hooks. A +// hook that raises (ImportError) declines entry, so the next hook is +// tried; when none claim it the result is cached as None and ok is false. +// +// CPython: Lib/importlib/_bootstrap_external.py:1236 PathFinder._path_importer_cache +func pathHookImporter(entry string) (objects.Object, bool) { + sysMod, ok := GetModule("sys") + if !ok { + return nil, false + } + key := objects.NewStr(entry) + cacheObj, _ := sysMod.Dict().GetItem(objects.NewStr("path_importer_cache")) + cache, _ := cacheObj.(*objects.Dict) + if cache != nil { + if v, err := cache.GetItem(key); err == nil && v != nil { + if objects.IsNone(v) { + return nil, false + } + return v, true + } + } + hooksObj, _ := sysMod.Dict().GetItem(objects.NewStr("path_hooks")) + hooks, _ := hooksObj.(*objects.List) + if hooks == nil { + return nil, false + } + for i := 0; i < hooks.Len(); i++ { + importer, err := objects.Call(hooks.Item(i), objects.NewTuple([]objects.Object{key}), nil) + if err != nil { + // ImportError from a hook means "I do not handle this entry". + continue + } + if importer != nil && !objects.IsNone(importer) { + if cache != nil { + _ = cache.SetItem(key, importer) + } + return importer, true + } + } + if cache != nil { + _ = cache.SetItem(key, objects.None()) + } + return nil, false +} + +// loadFromSpec builds a module from spec via importlib.util.module_from_spec, +// registers it in sys.modules, and runs spec.loader.exec_module, mirroring +// the body of _bootstrap._load_unlocked. gopy cannot call _load itself +// because that path enters the import lock machinery, which needs the +// _weakref injection CPython performs in _setup and gopy does not run. +// +// CPython: Lib/importlib/_bootstrap.py:921 _load_unlocked +func loadFromSpec(exec Executor, name string, spec objects.Object) (*objects.Module, error) { + util, ok := GetModule("importlib.util") + if !ok { + util, ok = ensureImportlibUtil(exec) + if !ok { + return nil, fmt.Errorf("imp: loadFromSpec %q: importlib.util unavailable", name) + } + } + mfs, err := util.Dict().GetItem(objects.NewStr("module_from_spec")) + if err != nil { + return nil, fmt.Errorf("imp: loadFromSpec %q: module_from_spec missing: %w", name, err) + } + modObj, err := objects.Call(mfs, objects.NewTuple([]objects.Object{spec}), nil) + if err != nil { + return nil, fmt.Errorf("imp: loadFromSpec %q: module_from_spec: %w", name, err) + } + module, ok := modObj.(*objects.Module) + if !ok { + return nil, fmt.Errorf("imp: loadFromSpec %q: module_from_spec returned %T", name, modObj) + } + AddModule(name, module) + loader, err := objects.GetAttr(spec, objects.NewStr("loader")) + if err != nil { + RemoveModule(name) + return nil, fmt.Errorf("imp: loadFromSpec %q: spec.loader: %w", name, err) + } + // A namespace-package spec carries loader None: module_from_spec has + // already populated __path__ from submodule_search_locations and there + // is no body to run, exactly as _load_unlocked skips exec_module when + // the loader is None. + // + // CPython: Lib/importlib/_bootstrap.py:945 _load_unlocked (loader is None) + if objects.IsNone(loader) { + if final, ok := GetModule(name); ok { + return final, nil + } + return module, nil + } + execMod, err := objects.GetAttr(loader, objects.NewStr("exec_module")) + if err != nil { + RemoveModule(name) + return nil, fmt.Errorf("imp: loadFromSpec %q: loader.exec_module: %w", name, err) + } + if _, err := objects.Call(execMod, objects.NewTuple([]objects.Object{module}), nil); err != nil { + RemoveModule(name) + return nil, fmt.Errorf("imp: loadFromSpec %q: exec_module: %w: %w", name, err, ErrModuleExecFailed) + } + // exec_module may reassign sys.modules[name]; re-read it the way + // CPython's _load_unlocked returns sys.modules[spec.name]. + if final, ok := GetModule(name); ok { + return final, nil + } + return module, nil +} + // bindOnParent installs child as an attribute on the parent package's // module dict. Mirrors the setattr step _find_and_load_unlocked runs // after a successful submodule load so `import a.b` makes `a.b` // resolve as an attribute on `a`. Errors are swallowed to match // CPython, which also catches AttributeError around the setattr. // -// CPython: Lib/importlib/_bootstrap.py:1234 setattr(parent_module, child, module) +// CPython: Lib/importlib/_bootstrap.py:1350 setattr(parent_module, child, module) func bindOnParent(parent, tail string, child *objects.Module) { if parent == "" { return } - pm, ok := GetModule(parent) - if !ok { + // CPython reads parent_module = sys.modules[parent] verbatim, so a test + // (or pathological code) that swaps in a non-module object still receives + // the setattr; GetModuleRaw preserves that object, GetModule would drop it. + pm, ok := GetModuleRaw(parent) + if !ok || objects.IsNone(pm) { return } - _ = pm.Dict().SetItem(objects.NewStr(tail), child) + // CPython binds `module = sys.modules.pop(spec.name)`, i.e. the object the + // body left in sys.modules, not the module shell the loader created. An + // __init__ that reassigns sys.modules[__name__] to a custom object is bound + // in that swapped form, so re-read the entry by full name and fall back to + // the loader's module only when nothing replaced it. + // + // CPython: Lib/importlib/_bootstrap.py:931 module = sys.modules.pop(spec.name) + bound := objects.Object(child) + if raw, present := GetModuleRaw(parent + "." + tail); present && !objects.IsNone(raw) { + bound = raw + } + // setattr(parent_module, child, module) runs the parent's real __setattr__ + // so a custom or unwritable parent participates. An AttributeError is + // caught and reported as an ImportWarning, exactly as + // _find_and_load_unlocked does. + // + // CPython: Lib/importlib/_bootstrap.py:1350 try: setattr(...) except AttributeError + if err := objects.SetAttr(pm, objects.NewStr(tail), bound); err != nil { + if isAttributeError(err) && ImportWarnHook != nil { + // CPython: Lib/importlib/_bootstrap.py:1352 msg = f"Cannot set ..." + msg := fmt.Sprintf("Cannot set an attribute on '%s' for child module '%s'", + parent, tail) + _ = ImportWarnHook(msg) + } + } +} + +// isAttributeError reports whether a Go error raised by SetAttr carries a +// Python AttributeError. SetAttr surfaces the exception wrapped in a +// RaisedError; an entry that is not an AttributeError propagates as a +// non-match so it is not silently turned into a warning. +func isAttributeError(err error) bool { + var re *objects.RaisedError + if errors.As(err, &re) { + if exc, ok := re.Exc.(*pyerrors.Exception); ok { + return pyerrors.Match(exc, pyerrors.PyExc_AttributeError) + } + } + // SetAttr also surfaces a missing-slot failure as a plain Go error whose + // text leads with the exception name, so match that shape too. + msg := err.Error() + if rest, ok := strings.CutPrefix(msg, "vm: "); ok { + msg = rest + } + return strings.HasPrefix(msg, "AttributeError:") } // splitParent splits a dotted module name into (parent, tail). @@ -251,19 +607,36 @@ func loadAsPackage(exec Executor, compiler SourceCompiler, initFile, pkgDir, nam return nil, fmt.Errorf("imp: loadAsPackage %q: __package__: %w", name, err) } AddModule(name, mod) + // CPython attaches __spec__ before exec_module; do the same so an + // __init__.py that imports from its own package during init reads + // spec.has_location / spec.origin. + // + // CPython: Lib/importlib/_bootstrap.py:573 module_from_spec + attachSpecAttrs(exec, mod, name, initFile, []string{pkgDir}) src, err := os.ReadFile(initFile) //nolint:gosec // initFile is filepath.Join of a trusted PathFinder.Paths entry. if err != nil { return nil, fmt.Errorf("imp: loadAsPackage %q: %w", name, err) } - code, err := compiler(src, initFile) - if err != nil { - return nil, fmt.Errorf("imp: loadAsPackage %q: compile: %w", name, err) + code, ok := readBytecodeCache(initFile) + if !ok { + var cerr error + code, cerr = compiler(src, initFile) + if cerr != nil { + return nil, fmt.Errorf("imp: loadAsPackage %q: compile: %w", name, cerr) + } + writeBytecodeCache(initFile, code) } - if _, err := exec.ExecCode(code, mod); err != nil { + setSpecInitializing(mod, true) + _, execErr := exec.ExecCode(code, mod) + setSpecInitializing(mod, false) + if execErr != nil { RemoveModule(name) - return nil, fmt.Errorf("imp: loadAsPackage %q: exec: %w", name, err) + return nil, fmt.Errorf("imp: loadAsPackage %q: exec: %w: %w", name, execErr, ErrModuleExecFailed) } + // Executing this package may have completed importlib's self-bootstrap, + // which unblocks the deferred spec queue (see maybeFlushPendingSpecs). + maybeFlushPendingSpecs(exec) // CPython: Python/import.c:2715 exec_code_in_module re-reads // sys.modules so an `__init__.py` that reassigns its own entry // (rare for packages, but the same shape as decimal/_pydecimal). @@ -273,6 +646,129 @@ func loadAsPackage(exec Executor, compiler SourceCompiler, initFile, pkgDir, nam return mod, nil } +// loadAsPackageBytecode is loadAsPackage for a sourceless package: the +// code object comes from /__init__.pyc instead of compiling +// __init__.py. __path__ is set before the body runs so a package whose +// __init__ does `from .submod import x` can resolve the parent's +// __path__. +// +// CPython: Lib/importlib/_bootstrap_external.py:1215 SourcelessFileLoader +func loadAsPackageBytecode(exec Executor, initFile, pkgDir, name string) (*objects.Module, error) { + code, err := readPycCode(initFile) + if err != nil { + return nil, fmt.Errorf("imp: loadAsPackageBytecode %q: %w", name, err) + } + mod, exists := GetModule(name) + if !exists { + mod = objects.NewModule(name) + } + d := mod.Dict() + if err := d.SetItem(objects.NewStr("__file__"), objects.NewStr(initFile)); err != nil { + return nil, fmt.Errorf("imp: loadAsPackageBytecode %q: __file__: %w", name, err) + } + if err := d.SetItem(objects.NewStr("__path__"), + objects.NewList([]objects.Object{objects.NewStr(pkgDir)})); err != nil { + return nil, fmt.Errorf("imp: loadAsPackageBytecode %q: __path__: %w", name, err) + } + if err := d.SetItem(objects.NewStr("__package__"), objects.NewStr(name)); err != nil { + return nil, fmt.Errorf("imp: loadAsPackageBytecode %q: __package__: %w", name, err) + } + AddModule(name, mod) + attachSpecAttrs(exec, mod, name, initFile, []string{pkgDir}) + setSpecInitializing(mod, true) + _, execErr := exec.ExecCode(code, mod) + setSpecInitializing(mod, false) + if execErr != nil { + RemoveModule(name) + return nil, fmt.Errorf("imp: loadAsPackageBytecode %q: exec: %w: %w", name, execErr, ErrModuleExecFailed) + } + if final, ok := GetModule(name); ok { + return final, nil + } + return mod, nil +} + +// loadAsModuleBytecode is loadAsModule for a sourceless module: the code +// object comes from /.pyc instead of compiling .py. +// +// CPython: Lib/importlib/_bootstrap_external.py:1215 SourcelessFileLoader +func loadAsModuleBytecode(exec Executor, file, name, parent string) (*objects.Module, error) { + code, err := readPycCode(file) + if err != nil { + return nil, fmt.Errorf("imp: loadAsModuleBytecode %q: %w", name, err) + } + mod, exists := GetModule(name) + if !exists { + mod = objects.NewModule(name) + } + d := mod.Dict() + if err := d.SetItem(objects.NewStr("__file__"), objects.NewStr(file)); err != nil { + return nil, fmt.Errorf("imp: loadAsModuleBytecode %q: __file__: %w", name, err) + } + if err := d.SetItem(objects.NewStr("__package__"), objects.NewStr(parent)); err != nil { + return nil, fmt.Errorf("imp: loadAsModuleBytecode %q: __package__: %w", name, err) + } + AddModule(name, mod) + attachSpecAttrs(exec, mod, name, file, nil) + setSpecInitializing(mod, true) + _, execErr := exec.ExecCode(code, mod) + setSpecInitializing(mod, false) + if execErr != nil { + RemoveModule(name) + return nil, fmt.Errorf("imp: loadAsModuleBytecode %q: exec: %w: %w", name, execErr, ErrModuleExecFailed) + } + if final, ok := GetModule(name); ok { + return final, nil + } + return mod, nil +} + +// readPycCode opens a .pyc file and returns its embedded code object, +// validating the magic-number header along the way. +// +// CPython: Lib/importlib/_bootstrap_external.py:1215 SourcelessFileLoader.get_code +func readPycCode(file string) (*objects.Code, error) { + f, err := os.Open(file) //nolint:gosec // file is filepath.Join of a trusted PathFinder.Paths entry. + if err != nil { + return nil, err + } + defer f.Close() + code, _, err := marshal.ReadPyc(f) + if err != nil { + return nil, err + } + return code, nil +} + +// loadAsNamespace builds a PEP 420 namespace package: a module with no +// __file__, a __path__ spanning every contributing directory, and a +// namespace __spec__ (loader None, origin None). The body is never +// executed because a namespace package has no __init__.py. +// +// CPython: Lib/importlib/_bootstrap.py:573 module_from_spec (namespace) / +// Lib/importlib/_bootstrap_external.py:1230 NamespaceLoader +func loadAsNamespace(exec Executor, name, parent string, portions []string) *objects.Module { + mod, exists := GetModule(name) + if !exists { + mod = objects.NewModule(name) + } + d := mod.Dict() + items := make([]objects.Object, len(portions)) + for i, s := range portions { + items[i] = objects.NewStr(s) + } + _ = d.SetItem(objects.NewStr("__path__"), objects.NewList(items)) + _ = d.SetItem(objects.NewStr("__package__"), objects.NewStr(name)) + _ = d.SetItem(objects.NewStr("__file__"), objects.None()) + if _, err := d.GetItem(objects.NewStr("__doc__")); err != nil { + _ = d.SetItem(objects.NewStr("__doc__"), objects.None()) + } + _ = parent + AddModule(name, mod) + attachNamespaceSpec(exec, mod, name, portions) + return mod +} + // loadAsModule is the flat-file equivalent: load source, set // __file__ and __package__ (which is the parent dotted name, or "" // for top-level), then exec. @@ -291,19 +787,36 @@ func loadAsModule(exec Executor, compiler SourceCompiler, file, name, parent str return nil, fmt.Errorf("imp: loadAsModule %q: __package__: %w", name, err) } AddModule(name, mod) + // CPython sets __spec__ in module_from_spec before exec_module runs the + // body, so a module that imports from itself during initialization can + // read spec.has_location / spec.origin. Attach before exec. + // + // CPython: Lib/importlib/_bootstrap.py:573 module_from_spec + attachSpecAttrs(exec, mod, name, file, nil) src, err := os.ReadFile(file) //nolint:gosec // file is filepath.Join of a trusted PathFinder.Paths entry. if err != nil { return nil, fmt.Errorf("imp: loadAsModule %q: %w", name, err) } - code, err := compiler(src, file) - if err != nil { - return nil, fmt.Errorf("imp: loadAsModule %q: compile: %w", name, err) + code, ok := readBytecodeCache(file) + if !ok { + var cerr error + code, cerr = compiler(src, file) + if cerr != nil { + return nil, fmt.Errorf("imp: loadAsModule %q: compile: %w", name, cerr) + } + writeBytecodeCache(file, code) } - if _, err := exec.ExecCode(code, mod); err != nil { + setSpecInitializing(mod, true) + _, execErr := exec.ExecCode(code, mod) + setSpecInitializing(mod, false) + if execErr != nil { RemoveModule(name) - return nil, fmt.Errorf("imp: loadAsModule %q: exec: %w", name, err) + return nil, fmt.Errorf("imp: loadAsModule %q: exec: %w: %w", name, execErr, ErrModuleExecFailed) } + // A freshly executed importlib submodule may have completed the package + // bootstrap; drain any specs deferred while it was incomplete. + maybeFlushPendingSpecs(exec) // CPython: Python/import.c:2715 exec_code_in_module re-reads // sys.modules so a module body that reassigns its own entry // (`sys.modules[__name__] = other`, e.g. decimal/_pydecimal) wins. @@ -313,6 +826,490 @@ func loadAsModule(exec Executor, compiler SourceCompiler, file, name, parent str return mod, nil } +// attachSpecAttrs populates the module-namespace surface CPython's +// _init_module_attrs fills from a ModuleSpec: __spec__, __loader__, +// __cached__ and a default __doc__. gopy's import runs Go-side, so the +// spec is built by calling importlib.util.spec_from_file_location once +// the body has run (the same shape CPython's FileFinder produces). +// +// importlib.util is itself a .py module, so the modules loaded before +// (and during) its own import cannot have their spec built yet. Those +// are queued in pendingSpecs and flushed the moment util becomes +// available, so importlib and its early dependencies still end up with +// a __spec__. +// +// CPython: Lib/importlib/_bootstrap.py:516 _init_module_attrs +func attachSpecAttrs(exec Executor, mod *objects.Module, name, origin string, searchLocations []string) { + d := mod.Dict() + // __doc__ defaults to None when the body stored no docstring. + docKey := objects.NewStr("__doc__") + if _, err := d.GetItem(docKey); err != nil { + _ = d.SetItem(docKey, objects.None()) + } + p := pendingSpec{mod: mod, name: name, origin: origin, search: searchLocations} + util, ok := ensureImportlibUtil(exec) + if !ok { + pendingMu.Lock() + pendingSpecs = append(pendingSpecs, p) + pendingMu.Unlock() + return + } + applySpec(util, p) + flushPendingSpecs(util) +} + +// setSpecInitializing flips mod.__spec__._initializing. CPython's +// module_from_spec wraps exec_module in `spec._initializing = True` / +// `finally: spec._initializing = False`, so a module that imports from +// itself during its own body sees a partially-initialized spec. gopy +// mirrors that around ExecCode so the circular-import and shadowing +// hints in _Py_module_getattro_impl / _PyEval_ImportFrom fire correctly. +// +// CPython: Lib/importlib/_bootstrap.py:573 module_from_spec +func setSpecInitializing(mod *objects.Module, on bool) { + spec, err := mod.Dict().GetItem(objects.NewStr("__spec__")) + if err != nil || spec == nil || objects.IsNone(spec) { + return + } + v := objects.False() + if on { + v = objects.True() + } + _ = objects.SetAttr(spec, objects.NewStr("_initializing"), v) +} + +// pushUninitializedSubmodule appends child to parentMod.__spec__. +// _uninitialized_submodules and returns a pop function that removes the +// last entry. CPython brackets the child's _load_unlocked with this +// append/pop so a circular import that reaches getattr(parent, child) +// while child is mid-load gets the "cannot access submodule" message. +// +// CPython: Lib/importlib/_bootstrap.py:1340 parent_spec._uninitialized_submodules.append(child) +func pushUninitializedSubmodule(parentMod *objects.Module, child string) func() { + noop := func() {} + if parentMod == nil { + return noop + } + spec, err := parentMod.Dict().GetItem(objects.NewStr("__spec__")) + if err != nil || spec == nil || objects.IsNone(spec) { + return noop + } + listObj, err := objects.GetAttr(spec, objects.NewStr("_uninitialized_submodules")) + if err != nil { + return noop + } + list, ok := listObj.(*objects.List) + if !ok { + return noop + } + list.Append(objects.NewStr(child)) + return func() { + // CPython: Lib/importlib/_bootstrap.py:1345 _uninitialized_submodules.pop() + if n := list.Len(); n > 0 { + list.SetSlice(n-1, n, nil) + } + } +} + +// attachNamespaceSpec binds a PEP 420 namespace ModuleSpec (loader None, +// origin None, submodule_search_locations = the portions) onto mod. Like +// the file path it defers when importlib.util is not importable yet. +// +// CPython: Lib/importlib/_bootstrap.py:573 module_from_spec (namespace) +func attachNamespaceSpec(exec Executor, mod *objects.Module, name string, portions []string) { + p := pendingSpec{mod: mod, name: name, search: portions, namespace: true} + util, ok := ensureImportlibUtil(exec) + if !ok { + pendingMu.Lock() + pendingSpecs = append(pendingSpecs, p) + pendingMu.Unlock() + return + } + applySpec(util, p) + flushPendingSpecs(util) +} + +// AttachBuiltinSpec gives a built-in (inittab) module the __spec__ / +// __loader__ surface CPython's BuiltinImporter installs: origin +// "built-in", no source, no file. It is deferred just like the +// file-based path when importlib.util is not importable yet. +// +// CPython: Lib/importlib/_bootstrap.py:736 BuiltinImporter.exec_module +func AttachBuiltinSpec(exec Executor, mod *objects.Module, name string) { + d := mod.Dict() + docKey := objects.NewStr("__doc__") + if _, err := d.GetItem(docKey); err != nil { + _ = d.SetItem(docKey, objects.None()) + } + p := pendingSpec{mod: mod, name: name, builtin: true} + util, ok := ensureImportlibUtil(exec) + if !ok { + pendingMu.Lock() + pendingSpecs = append(pendingSpecs, p) + pendingMu.Unlock() + return + } + applySpec(util, p) + flushPendingSpecs(util) +} + +// pendingSpec records a module whose spec could not be built yet because +// importlib.util was not importable at the time. +type pendingSpec struct { + mod *objects.Module + name string + origin string + search []string + builtin bool + namespace bool + extension bool +} + +// AttachExtensionSpec gives a Go-implemented extension module the +// __spec__ / __loader__ / __file__ surface CPython's ExtensionFileLoader +// installs: an ExtensionFileLoader instance as the loader and the +// synthesized lib-dynload path as origin / __file__. test_import's +// require_extension asserts module.__spec__.loader is ExtensionFileLoader, +// so the loader type must be exactly that. +// +// CPython: Lib/importlib/_bootstrap_external.py:1032 ExtensionFileLoader +func AttachExtensionSpec(exec Executor, mod *objects.Module, name, origin string) { + d := mod.Dict() + docKey := objects.NewStr("__doc__") + if _, err := d.GetItem(docKey); err != nil { + _ = d.SetItem(docKey, objects.None()) + } + _ = d.SetItem(objects.NewStr("__file__"), objects.NewStr(origin)) + p := pendingSpec{mod: mod, name: name, origin: origin, extension: true} + util, ok := ensureImportlibUtil(exec) + if !ok { + pendingMu.Lock() + pendingSpecs = append(pendingSpecs, p) + pendingMu.Unlock() + return + } + applySpec(util, p) + flushPendingSpecs(util) +} + +var ( + pendingMu sync.Mutex + pendingSpecs []pendingSpec +) + +// maybeFlushPendingSpecs drains the deferred-spec queue if anything is +// queued and importlib.util has become usable. Modules whose specs were +// deferred during importlib's bootstrap (importlib itself, _bootstrap, +// _bootstrap_external) are picked up here the moment the package finishes +// initializing, so a follow-up `import importlib.abc` finds a __spec__ on +// its parent package. +func maybeFlushPendingSpecs(exec Executor) { + pendingMu.Lock() + n := len(pendingSpecs) + pendingMu.Unlock() + if n == 0 { + return + } + if util, ok := ensureImportlibUtil(exec); ok { + flushPendingSpecs(util) + } +} + +// flushPendingSpecs drains the deferred-spec queue, building each +// module's spec now that importlib.util is available. +func flushPendingSpecs(util *objects.Module) { + pendingMu.Lock() + queue := pendingSpecs + pendingSpecs = nil + pendingMu.Unlock() + for _, p := range queue { + applySpec(util, p) + } +} + +// applySpec builds a ModuleSpec for p via importlib.util and binds the +// resulting __spec__/__loader__/__cached__ onto the module dict. +// Built-in modules use spec_from_loader with a "built-in" origin; file +// modules use spec_from_file_location. +// +// CPython: Lib/importlib/_bootstrap.py:516 _init_module_attrs +func applySpec(util *objects.Module, p pendingSpec) { + d := p.mod.Dict() + // importlib._bootstrap._setup already walks sys.modules and gives every + // built-in module a spec whose loader is BuiltinImporter. When that has + // run before this deferred flush, a freshly-built spec here would carry + // loader=None (importlib.machinery may not be importable yet) and clobber + // the correct __loader__. Leave _setup's work in place. + // + // CPython: Lib/importlib/_bootstrap.py:1517 _setup (built-in spec set-up) + if p.builtin { + if existing, err := d.GetItem(objects.NewStr("__spec__")); err == nil && existing != nil && !objects.IsNone(existing) { + if loader, lerr := objects.GetAttr(existing, objects.NewStr("loader")); lerr == nil && loader != nil && !objects.IsNone(loader) { + return + } + } + } + spec := buildSpec(util, p) + if spec == nil { + return + } + _ = d.SetItem(objects.NewStr("__spec__"), spec) + if loader, lerr := objects.GetAttr(spec, objects.NewStr("loader")); lerr == nil { + _ = d.SetItem(objects.NewStr("__loader__"), loader) + } + // __cached__ mirrors spec.cached (None for gopy's bytecode-less load). + if cached, cerr := objects.GetAttr(spec, objects.NewStr("cached")); cerr == nil { + _ = d.SetItem(objects.NewStr("__cached__"), cached) + } else { + _ = d.SetItem(objects.NewStr("__cached__"), objects.None()) + } +} + +// buildSpec calls the appropriate importlib.util constructor for p. +func buildSpec(util *objects.Module, p pendingSpec) objects.Object { + switch { + case p.namespace: + return buildNamespaceSpec(p) + case p.builtin: + return buildBuiltinSpec(util, p) + case p.extension: + return buildExtensionSpec(util, p) + default: + return buildFileSpec(util, p) + } +} + +// buildExtensionSpec builds a spec whose loader is an ExtensionFileLoader +// instance, mirroring the spec PathFinder produces for a compiled +// extension. spec_from_file_location with an explicit loader keeps the +// loader type exactly ExtensionFileLoader and records origin as __file__. +// +// CPython: Lib/importlib/_bootstrap_external.py:1546 ExtensionFileLoader path hook +func buildExtensionSpec(util *objects.Module, p pendingSpec) objects.Object { + machinery, ok := GetModule("importlib.machinery") + if !ok { + return nil + } + loaderCls, err := machinery.Dict().GetItem(objects.NewStr("ExtensionFileLoader")) + if err != nil || loaderCls == nil { + return nil + } + loader, lerr := objects.Call(loaderCls, + objects.NewTuple([]objects.Object{objects.NewStr(p.name), objects.NewStr(p.origin)}), nil) + if lerr != nil { + return nil + } + fn, err := util.Dict().GetItem(objects.NewStr("spec_from_file_location")) + if err != nil { + return nil + } + kwargs := objects.NewDict() + _ = kwargs.SetItem(objects.NewStr("loader"), loader) + args := objects.NewTuple([]objects.Object{objects.NewStr(p.name), objects.NewStr(p.origin)}) + spec, cerr := objects.Call(fn, args, kwargs) + if cerr != nil || spec == objects.None() { + return nil + } + return spec +} + +// buildNamespaceSpec builds a PEP 420 namespace spec: loader None, origin +// None, the portions as submodule_search_locations. machinery.ModuleSpec is +// the faithful constructor; util re-exports it. +// +// CPython: Lib/importlib/_bootstrap.py:573 module_from_spec +func buildNamespaceSpec(p pendingSpec) objects.Object { + machinery, ok := GetModule("importlib.machinery") + if !ok { + return nil + } + ctor, err := machinery.Dict().GetItem(objects.NewStr("ModuleSpec")) + if err != nil { + return nil + } + kwargs := objects.NewDict() + _ = kwargs.SetItem(objects.NewStr("is_package"), objects.True()) + args := objects.NewTuple([]objects.Object{objects.NewStr(p.name), objects.None()}) + spec, cerr := objects.Call(ctor, args, kwargs) + if cerr != nil || spec == objects.None() { + return nil + } + items := make([]objects.Object, len(p.search)) + for i, s := range p.search { + items[i] = objects.NewStr(s) + } + _ = objects.SetAttr(spec, objects.NewStr("submodule_search_locations"), objects.NewList(items)) + return spec +} + +// buildBuiltinSpec builds the spec for a built-in module. CPython's +// BuiltinImporter.find_spec passes the importer class itself as the loader, +// so every built-in module's __loader__ is BuiltinImporter, not None. Mirror +// that: a None loader would fail test_importlib's test_everyone_has___loader__. +// +// CPython: Lib/importlib/_bootstrap.py:760 BuiltinImporter.find_spec +func buildBuiltinSpec(util *objects.Module, p pendingSpec) objects.Object { + fn, err := util.Dict().GetItem(objects.NewStr("spec_from_loader")) + if err != nil { + return nil + } + args := objects.NewTuple([]objects.Object{objects.NewStr(p.name), builtinImporterLoader()}) + kwargs := objects.NewDict() + _ = kwargs.SetItem(objects.NewStr("origin"), objects.NewStr("built-in")) + spec, cerr := objects.Call(fn, args, kwargs) + if cerr != nil || spec == objects.None() { + return nil + } + return spec +} + +// builtinImporterLoader returns the BuiltinImporter class to use as a +// built-in module's __loader__. importlib.machinery re-exports +// _bootstrap.BuiltinImporter, but it may not be imported yet when a built-in +// module loads early, so fall back to importlib._bootstrap, which is always +// live by this point. None is the last resort. +func builtinImporterLoader() objects.Object { + for _, modName := range []string{"importlib.machinery", "importlib._bootstrap"} { + m, ok := GetModule(modName) + if !ok { + continue + } + if bi, lerr := m.Dict().GetItem(objects.NewStr("BuiltinImporter")); lerr == nil && bi != nil { + return bi + } + } + return objects.None() +} + +// BuiltinImporterLoader returns the BuiltinImporter class CPython uses as +// the initial __loader__ for the __main__ module. add_main_module stamps it +// when __main__'s dict has no __loader__ yet, so test_importlib's +// test_everyone_has___loader__ finds the attribute on __main__. +// +// CPython: Python/pylifecycle.c add_main_module (BuiltinImporter loader) +func BuiltinImporterLoader() objects.Object { + return builtinImporterLoader() +} + +// buildFileSpec builds a file-backed spec via spec_from_file_location. +func buildFileSpec(util *objects.Module, p pendingSpec) objects.Object { + fn, err := util.Dict().GetItem(objects.NewStr("spec_from_file_location")) + if err != nil { + return nil + } + kwargs := objects.NewDict() + if p.search != nil { + items := make([]objects.Object, len(p.search)) + for i, s := range p.search { + items[i] = objects.NewStr(s) + } + _ = kwargs.SetItem(objects.NewStr("submodule_search_locations"), + objects.NewList(items)) + } + args := objects.NewTuple([]objects.Object{objects.NewStr(p.name), objects.NewStr(p.origin)}) + spec, cerr := objects.Call(fn, args, kwargs) + if cerr != nil || spec == objects.None() { + return nil + } + return spec +} + +var ( + specBootstrapMu sync.Mutex + specBootstrapped bool +) + +// ensureImportlibUtil returns the importlib.util module, importing it on +// first use. The lazy import is guarded by specBootstrapped so the +// modules pulled in by importlib.util's own load (os, types, +// importlib._bootstrap_external) do not re-enter and recurse while that +// import is still in flight. +func ensureImportlibUtil(exec Executor) (*objects.Module, bool) { + if util, ok := GetModule("importlib.util"); ok { + // util is registered before its body runs, so a mid-import + // lookup sees the module without spec_from_file_location yet. + // Treat that partial state as "not ready" so the caller defers + // rather than flushing the pending queue against a stub. + if _, err := util.Dict().GetItem(objects.NewStr("spec_from_file_location")); err != nil { + return nil, false + } + // spec_from_file_location dereferences importlib._bootstrap_external's + // module-global `_bootstrap` (wired by _set_bootstrap_module). A fresh + // importlib re-import (test.support.import_helper.import_fresh_module) + // can leave util importable while that global is still None, so verify + // the builder is wired before reporting util ready. + if !specBuilderReady() { + return nil, false + } + return util, true + } + // Until importlib's package bootstrap finishes, importing importlib.util + // would pull in a fresh importlib._bootstrap_external whose module-global + // `_bootstrap` is still None (it is wired by _set_bootstrap_module at + // importlib/__init__.py:37). spec_from_file_location dereferences that + // global at _bootstrap_external.py:596, so building a spec mid-bootstrap + // crashes. Defer: the module loads without a spec now and the pending + // queue is flushed once importlib is fully initialized. This mirrors + // importlib's own rule ("Until bootstrapping is complete, DO NOT import + // any modules that attempt to import importlib._bootstrap"). + // + // CPython: Lib/importlib/__init__.py:6 (bootstrap-complete guard) + if !importlibBootstrapComplete() { + return nil, false + } + specBootstrapMu.Lock() + if specBootstrapped { + specBootstrapMu.Unlock() + return nil, false + } + specBootstrapped = true + specBootstrapMu.Unlock() + util, err := ImportModule(exec, "importlib.util") + specBootstrapMu.Lock() + specBootstrapped = false + specBootstrapMu.Unlock() + if err != nil { + return nil, false + } + return util, true +} + +// importlibBootstrapComplete reports whether the importlib package has +// finished its self-bootstrap. importlib/__init__.py defines import_module +// only after wiring _bootstrap / _bootstrap_external (lines 16-48), so the +// presence of that attribute is a reliable "bootstrap done" sentinel. When +// importlib is not loaded at all (very early startup), report complete so the +// legacy lazy-import path is preserved. +// +// CPython: Lib/importlib/__init__.py:71 def import_module +func importlibBootstrapComplete() bool { + mod, ok := GetModule("importlib") + if !ok { + return true + } + _, err := mod.Dict().GetItem(objects.NewStr("import_module")) + return err == nil +} + +// specBuilderReady reports whether importlib._bootstrap_external is wired to +// importlib._bootstrap. spec_from_file_location dereferences the module-global +// `_bootstrap` (set by _set_bootstrap_module), so a fresh re-import that has +// not run that wiring yet must not be asked to build a spec. +// +// CPython: Lib/importlib/_bootstrap_external.py:1552 _set_bootstrap_module +func specBuilderReady() bool { + be, ok := GetModule("importlib._bootstrap_external") + if !ok { + // Not yet loaded: util will pull it in wired, so treat as ready. + return true + } + v, err := be.Dict().GetItem(objects.NewStr("_bootstrap")) + if err != nil || v == nil { + return false + } + return !objects.IsNone(v) +} + // isFile reports whether path exists and is a regular file. It is the // gopy stand-in for importlib's _path_isfile helper. // @@ -325,6 +1322,62 @@ func isFile(path string) bool { return info.Mode().IsRegular() } +// isDir reports whether path exists and is a directory. It is the gopy +// stand-in for importlib's _path_isdir helper used by namespace-portion +// detection. +// +// CPython: Lib/importlib/_bootstrap_external.py:153 _path_isdir +func isDir(path string) bool { + info, err := os.Stat(path) + if err != nil { + return false + } + return info.IsDir() +} + +// caseOK reports whether the final component of an existing candidate path +// matches a real on-disk directory entry with exact case. On a +// case-insensitive but case-preserving filesystem (macOS, Windows) os.Stat +// succeeds for any case spelling, so a plain existence probe would let +// `import RAnDoM` resolve random.py. CPython's FileFinder guards against +// this by testing membership in set(os.listdir(dir)), the exact-case path +// cache, unless _relax_case() is true. caseOK reproduces that membership +// test by scanning the directory for an exact-case name match. +// +// CPython: Lib/importlib/_bootstrap_external.py:1378 cache_module in cache +func caseOK(path string) bool { + if relaxCase() { + return true + } + entries, err := os.ReadDir(filepath.Dir(path)) + if err != nil { + return false + } + base := filepath.Base(path) + for _, e := range entries { + if e.Name() == base { + return true + } + } + return false +} + +// relaxCase mirrors importlib's _relax_case: case folding is relaxed only +// on case-insensitive platforms (Windows, macOS) and only when PYTHONCASEOK +// is present in the environment. Case-sensitive platforms are always +// strict, where caseOK's directory scan is a redundant but harmless match. +// +// CPython: Lib/importlib/_bootstrap_external.py:50 _relax_case +func relaxCase() bool { + switch runtime.GOOS { + case "windows", "darwin": + _, ok := os.LookupEnv("PYTHONCASEOK") + return ok + default: + return false + } +} + var ( pathFinderMu sync.RWMutex pathFinder *PathFinder diff --git a/imp/shadowing.go b/imp/shadowing.go new file mode 100644 index 000000000..b17f59c20 --- /dev/null +++ b/imp/shadowing.go @@ -0,0 +1,314 @@ +package imp + +import ( + "fmt" + "os" + "strings" + + "github.com/tamnd/gopy/objects" +) + +// optionalAttr ports PyObject_GetOptionalAttr for the shadowing helpers: +// it returns (val, true, nil) on success and (nil, false, nil) when the +// attribute is missing (AttributeError). Any non-AttributeError failure +// propagates as the third return. +// +// CPython: Objects/object.c:1324 PyObject_GetOptionalAttr +func optionalAttr(o objects.Object, name string) (objects.Object, bool, error) { + v, err := objects.GetAttr(o, objects.NewStr(name)) + if err == nil { + return v, true, nil + } + if strings.Contains(err.Error(), "AttributeError") { + return nil, false, nil + } + return nil, false, err +} + +// SpecFileOrigin ports _PyModuleSpec_GetFileOrigin: returns the spec's +// origin string only when spec.has_location is truthy and spec.origin is +// a str. The bool reports whether a location origin was found. +// +// CPython: Objects/moduleobject.c:892 _PyModuleSpec_GetFileOrigin +func SpecFileOrigin(spec objects.Object) (string, bool, error) { + hasLoc, found, err := optionalAttr(spec, "has_location") + if err != nil || !found { + return "", false, err + } + if !objects.IsTrue(hasLoc) { + return "", false, nil + } + originObj, found, err := optionalAttr(spec, "origin") + if err != nil || !found { + return "", false, err + } + origin, ok := originObj.(*objects.Unicode) + if !ok { + return "", false, nil + } + return origin.Value(), true, nil +} + +// SpecIsInitializing ports _PyModuleSpec_IsInitializing: spec._initializing +// is truthy. +// +// CPython: Objects/moduleobject.c:858 _PyModuleSpec_IsInitializing +func SpecIsInitializing(spec objects.Object) (bool, error) { + v, found, err := optionalAttr(spec, "_initializing") + if err != nil || !found { + return false, err + } + return objects.IsTrue(v), nil +} + +// SpecIsUninitializedSubmodule ports _PyModuleSpec_IsUninitializedSubmodule: +// name is currently mid-import as a submodule, i.e. it appears in +// spec._uninitialized_submodules. A missing list reads as "not a submodule". +// +// CPython: Objects/moduleobject.c:876 _PyModuleSpec_IsUninitializedSubmodule +func SpecIsUninitializedSubmodule(spec objects.Object, name string) (bool, error) { + if spec == nil || objects.IsNone(spec) { + return false, nil + } + v, found, err := optionalAttr(spec, "_uninitialized_submodules") + if err != nil || !found { + return false, err + } + contains, err := objects.Contains(v, objects.NewStr(name)) + if err != nil { + return false, err + } + return contains, nil +} + +// ModuleIsPossiblyShadowing ports _PyModule_IsPossiblyShadowing: the +// module at origin could shadow a same-named module later on the search +// path. The check is: not sys.flags.safe_path and +// dirname(origin minus a trailing /__init__.py) == (sys.path[0] or cwd). +// +// CPython: Objects/moduleobject.c:923 _PyModule_IsPossiblyShadowing +func ModuleIsPossiblyShadowing(originFound bool, origin string) (bool, error) { + if !originFound { + return false, nil + } + if safePathEnabled() { + return false, nil + } + root := origin + sep := strings.LastIndex(root, string(os.PathSeparator)) + if sep < 0 { + return false, nil + } + // A package origin ends in __init__.py; step one directory up. + if root[sep+1:] == "__init__.py" { + root = root[:sep] + sep = strings.LastIndex(root, string(os.PathSeparator)) + if sep < 0 { + return false, nil + } + } + root = root[:sep] + + sysPath0, ok := sysPathZero() + if !ok { + return false, nil + } + if sysPath0 == "" { + cwd, err := os.Getwd() + if err != nil { + return false, err + } + sysPath0 = cwd + } + return sysPath0 == root, nil +} + +// safePathEnabled reports whether sys.flags.safe_path is truthy. +// +// CPython: Objects/moduleobject.c:937 config->safe_path +func safePathEnabled() bool { + sysMod, ok := GetModule("sys") + if !ok { + return false + } + flags, err := objects.GetAttr(sysMod, objects.NewStr("flags")) + if err != nil { + return false + } + sp, err := objects.GetAttr(flags, objects.NewStr("safe_path")) + if err != nil { + return false + } + return objects.IsTrue(sp) +} + +// configSysPath0 holds the startup-captured leading sys.path entry, the +// equivalent of CPython's config->sys_path_0. The shadowing check uses +// this snapshot, NOT live sys.path[0], so a script that mutates sys.path +// after startup does not change shadowing detection. +// +// CPython: Python/initconfig.c config->sys_path_0 +var ( + configSysPath0 string + configSysPath0Set bool +) + +// SetConfigSysPath0 records the startup leading sys.path entry. The bool +// reports whether the interpreter installed one at all (false under +// safe_path, where CPython leaves config->sys_path_0 NULL). +func SetConfigSysPath0(path string, present bool) { + configSysPath0 = path + configSysPath0Set = present +} + +// sysPathZero returns config->sys_path_0. The bool is false when no +// leading entry was captured (e.g. safe_path). +// +// CPython: Objects/moduleobject.c:967 config->sys_path_0 +func sysPathZero() (string, bool) { + if !configSysPath0Set { + return "", false + } + return configSysPath0, true +} + +// StdlibModuleNamesContains reports whether modName is in +// sys.stdlib_module_names. modName is passed as the live object (not a +// Go string) so an unhashable __name__ raises through PySet_Contains +// exactly as CPython does. The lookup is silent when stdlib_module_names +// is missing or is not a set/frozenset (PyAnySet_Check guards the call). +// +// CPython: Objects/moduleobject.c:1059 PySet_Contains(stdlib_modules, mod_name) +func StdlibModuleNamesContains(modName objects.Object) (bool, error) { + sysMod, ok := GetModule("sys") + if !ok { + return false, nil + } + namesObj, found, err := optionalAttr(sysMod, "stdlib_module_names") + if err != nil { + return false, err + } + if !found { + return false, nil + } + if !anySetCheck(namesObj) { + return false, nil + } + contains, err := objects.Contains(namesObj, modName) + if err != nil { + return false, err + } + return contains, nil +} + +// anySetCheck ports PyAnySet_Check: the object is a set or frozenset (or +// a subclass of either). +// +// CPython: Include/cpython/setobject.h PyAnySet_Check +func anySetCheck(o objects.Object) bool { + t := o.Type() + return objects.IsSubtype(t, objects.SetType) || objects.IsSubtype(t, objects.FrozensetType) +} + +// moduleGetattrError ports the error tail of _Py_module_getattro_impl: +// after a generic-attribute miss with no PEP 562 __getattr__, it builds +// the best-effort AttributeError, surfacing the stdlib-shadowing and +// circular-import hints. It returns a Go error whose message the objects +// layer synthesizes into the AttributeError. It is wired into +// objects.ModuleAttrErrorHook so module.go can reach the import system's +// spec helpers without an import cycle. +// +// CPython: Objects/moduleobject.c:1024 _Py_module_getattro_impl (error tail) +func moduleGetattrError(m *objects.Module, name string) error { + d := m.Dict() + + // __name__ must be a str (or str subclass); otherwise the generic + // "module has no attribute" message applies. CPython uses PyUnicode_Check + // here, so a str subclass passes. + modNameObj, _ := d.GetItem(objects.NewStr("__name__")) + if modNameObj == nil || !objects.IsSubtype(modNameObj.Type(), objects.StrType()) { + return fmt.Errorf("AttributeError: module has no attribute '%s'", name) + } + modName := unicodeContents(modNameObj) + nameQ := quoteU(name) + modQ := quoteU(modName) + + spec, serr := d.GetItem(objects.NewStr("__spec__")) + if serr != nil || spec == nil || objects.IsNone(spec) { + return fmt.Errorf("AttributeError: module %s has no attribute %s", modQ, nameQ) + } + + origin, originFound, oerr := SpecFileOrigin(spec) + if oerr != nil { + return oerr + } + shadowing, sherr := ModuleIsPossiblyShadowing(originFound, origin) + if sherr != nil { + return sherr + } + shadowingStdlib := false + if shadowing { + c, cerr := StdlibModuleNamesContains(modNameObj) + if cerr != nil { + return cerr + } + shadowingStdlib = c + } + + if shadowingStdlib { + return fmt.Errorf("AttributeError: module %s has no attribute %s (consider renaming %s since it has the same name as the standard library module named %s and prevents importing that standard library module)", + modQ, nameQ, quoteU(origin), modQ) + } + + initializing, ierr := SpecIsInitializing(spec) + if ierr != nil { + return ierr + } + switch { + case initializing && shadowing: + return fmt.Errorf("AttributeError: module %s has no attribute %s (consider renaming %s if it has the same name as a library you intended to import)", + modQ, nameQ, quoteU(origin)) + case initializing && originFound: + return fmt.Errorf("AttributeError: partially initialized module %s from %s has no attribute %s (most likely due to a circular import)", + modQ, quoteU(origin), nameQ) + case initializing: + return fmt.Errorf("AttributeError: partially initialized module %s has no attribute %s (most likely due to a circular import)", + modQ, nameQ) + } + + // Not initializing: the miss is a circular import only if the name is a + // submodule currently mid-load (tracked on spec._uninitialized_submodules). + // + // CPython: Objects/moduleobject.c:1116 _PyModuleSpec_IsUninitializedSubmodule + uninit, uerr := SpecIsUninitializedSubmodule(spec, name) + if uerr != nil { + return uerr + } + if uninit { + return fmt.Errorf("AttributeError: cannot access submodule %s of module %s (most likely due to a circular import)", + nameQ, modQ) + } + return fmt.Errorf("AttributeError: module %s has no attribute %s", modQ, nameQ) +} + +// unicodeContents returns the string contents of a str (or str subclass) +// object, mirroring how CPython's %U formats a PyUnicode payload. +func unicodeContents(o objects.Object) string { + if u, ok := o.(*objects.Unicode); ok { + return u.Value() + } + if s, err := objects.Str(o); err == nil { + return s + } + return "" +} + +// quoteU wraps s in single quotes, matching the literal 'quotes' the +// CPython getattro format strings put around each %U substitution. +func quoteU(s string) string { return "'" + s + "'" } + +// init wires the module-getattro error builder into the objects package +// so module attribute misses surface the import-system shadowing hints. +func init() { + objects.ModuleAttrErrorHook = moduleGetattrError +} diff --git a/imp/sysmodules.go b/imp/sysmodules.go index f82564207..44457f2e3 100644 --- a/imp/sysmodules.go +++ b/imp/sysmodules.go @@ -21,6 +21,36 @@ var ( func init() { objects.SysModulesGetter = func() *objects.Dict { return sysModules } + // Pin sys.modules as a cycle-collector root. CPython keeps it + // reachable through interp->modules; gopy holds it through this Go + // pointer, invisible to the refcount-based collector, so without + // this the module graph collapses and a singleton reachable only + // through its module __dict__ (for example + // _frozen_importlib._blocking_on, a self-cyclic _WeakValueDictionary) + // is reclaimed while still live. + // + // The sys.modules dict itself is never tracked (it is allocated + // during early init, before container tracking is live), so it is + // already an effective root the collector never reclaims. But that + // also means move_unreachable never traverses it, so the candidates + // it references are not re-floated. We walk its entries here and pin + // each direct target; move_unreachable's visit_reachable then pulls + // in the rest of the strongly-reachable closure (module __dict__, + // module globals, and so on). + // + // CPython: Python/gc.c:1430 gc_collect_main (interp->modules roots) + objects.GCStaticRootsHook = func(pin func(objects.Object)) { + sysModulesMu.RLock() + defer sysModulesMu.RUnlock() + tr := sysModules.Type().TpTraverse + if tr == nil { + return + } + _ = tr(sysModules, func(o objects.Object) error { + pin(o) + return nil + }) + } } // SysModules returns the dict backing sys.modules. The same pointer is diff --git a/imp/writepyc.go b/imp/writepyc.go new file mode 100644 index 000000000..ca5e7ff4a --- /dev/null +++ b/imp/writepyc.go @@ -0,0 +1,294 @@ +// Bytecode-cache writing for the source loaders. After a .py file is +// compiled, SourceFileLoader.exec_module writes the resulting code +// object to a PEP 3147 __pycache__/..pyc file so the next +// import skips recompilation. gopy's import runs Go-side, so the write +// path is reimplemented here against the marshal .pyc writer. +// +// CPython: Lib/importlib/_bootstrap_external.py:1129 SourceFileLoader.get_code +// CPython: Lib/importlib/_bootstrap_external.py:1185 SourceFileLoader.set_data +package imp + +import ( + "bytes" + "os" + "path/filepath" + "strconv" + "strings" + + "github.com/tamnd/gopy/marshal" + "github.com/tamnd/gopy/objects" +) + +// pycacheDir is the PEP 3147 cache subdirectory name. +// +// CPython: Lib/importlib/_bootstrap_external.py:60 _PYCACHE +const pycacheDir = "__pycache__" + +// isFrozenBootstrapSource reports whether sourcePath is one of the two +// importlib bootstrap modules CPython freezes (importlib._bootstrap and +// importlib._bootstrap_external). Those are never byte-compiled to a .pyc +// in CPython, so gopy excludes them from the bytecode cache to keep their +// "" co_filename intact. A cached +// .pyc would be rewritten to the real disk path by fixCoFilename, leaving +// the import-machinery frames un-trimmable by remove_importlib_frames. +// +// CPython: Python/pylifecycle.c:1041 init_importlib (frozen modules) +func isFrozenBootstrapSource(sourcePath string) bool { + return strings.HasSuffix(sourcePath, "importlib/_bootstrap.py") || + strings.HasSuffix(sourcePath, "importlib/_bootstrap_external.py") +} + +// dontWriteBytecode reports sys.dont_write_bytecode. When True the +// source loaders skip the cache write entirely, exactly like CPython's +// SourceFileLoader.get_code (the `not sys.dont_write_bytecode` guard). +// +// CPython: Lib/importlib/_bootstrap_external.py:1167 source_to_code cache guard +func dontWriteBytecode() bool { + sysMod, ok := GetModule("sys") + if !ok { + return true + } + v, err := objects.GetAttr(sysMod, objects.NewStr("dont_write_bytecode")) + if err != nil { + return true + } + return objects.IsTrue(v) +} + +// cacheTag returns sys.implementation.cache_tag, the per-interpreter +// bytecode-cache discriminator (e.g. "gopy-3140"). The empty string +// signals a missing tag, in which case the caller skips caching the +// same way cache_from_source raises NotImplementedError. +// +// CPython: Lib/importlib/_bootstrap_external.py:480 cache_from_source (tag read) +func cacheTag() string { + sysMod, ok := GetModule("sys") + if !ok { + return "" + } + impl, err := objects.GetAttr(sysMod, objects.NewStr("implementation")) + if err != nil { + return "" + } + tag, err := objects.GetAttr(impl, objects.NewStr("cache_tag")) + if err != nil { + return "" + } + t, ok := tag.(*objects.Unicode) + if !ok { + return "" + } + return t.Value() +} + +// pycachePrefix returns sys.pycache_prefix as (value, set). When set, +// caches live under that root directory mirroring the source's absolute +// path instead of an adjacent __pycache__. +// +// CPython: Lib/importlib/_bootstrap_external.py:490 cache_from_source (prefix branch) +func pycachePrefix() (string, bool) { + sysMod, ok := GetModule("sys") + if !ok { + return "", false + } + v, err := objects.GetAttr(sysMod, objects.NewStr("pycache_prefix")) + if err != nil || objects.IsNone(v) { + return "", false + } + p, ok := v.(*objects.Unicode) + if !ok { + return "", false + } + return p.Value(), true +} + +// cacheFromSource computes the .pyc path for a source file, matching +// importlib.util.cache_from_source so the path gopy writes is the same +// one spec_from_file_location records as __cached__ and the loader reads +// back. Only the optimization=” (sys.flags.optimize == 0) case is +// produced; gopy never runs at -O. +// +// CPython: Lib/importlib/_bootstrap_external.py:466 cache_from_source +func cacheFromSource(sourcePath string) string { + tag := cacheTag() + if tag == "" { + return "" + } + head, tail := filepath.Split(sourcePath) + base := tail + sep := "" + if dot := strings.LastIndex(tail, "."); dot >= 0 { + base, sep = tail[:dot], "." + if base == "" { + // A leading-dot name like ".pyc" keeps the whole tail as base. + base = tail + sep = "" + } + } + almost := base + sep + tag + filename := almost + ".pyc" + if prefix, ok := pycachePrefix(); ok { + // CPython rebuilds the source's absolute directory under the prefix, + // dropping the volume separator so the tree nests cleanly. + absHead, err := filepath.Abs(head) + if err != nil { + absHead = head + } + absHead = strings.TrimPrefix(absHead, string(filepath.Separator)) + return filepath.Join(prefix, absHead, filename) + } + return filepath.Join(filepath.Clean(head), pycacheDir, filename) +} + +// readBytecodeCache returns the cached code object for sourcePath when a +// fresh, valid .pyc exists under __pycache__. "Fresh" means the .pyc +// magic matches and its timestamp-mode header records exactly the +// source's current mtime and size, the same staleness test +// SourceFileLoader.get_code applies before trusting the cache. A hash- +// mode .pyc (PEP 552) is only trusted when its hash bit is unchecked; +// any other condition (missing, stale, unreadable, checked-hash) returns +// ok=false so the caller recompiles from source. +// +// CPython: Lib/importlib/_bootstrap_external.py:1129 SourceFileLoader.get_code +// CPython: Lib/importlib/_bootstrap_external.py:585 _validate_timestamp_pyc +func readBytecodeCache(sourcePath string) (*objects.Code, bool) { + if isFrozenBootstrapSource(sourcePath) { + // CPython freezes importlib._bootstrap[_external] and never loads + // them from a .pyc, so their code objects keep the synthetic + // "" co_filename for the + // life of the process. gopy loads them from source instead; reading + // a cached .pyc would route through fixCoFilename below and rewrite + // that co_filename to the real disk path, leaving the import-machinery + // frames un-trimmable by remove_importlib_frames. Skip the cache so + // the source compiler stamps the frozen name every time. + // + // CPython: Python/import.c:3500 remove_importlib_frames (frozen names) + return nil, false + } + dest := cacheFromSource(sourcePath) + if dest == "" { + return nil, false + } + info, err := os.Stat(sourcePath) + if err != nil { + return nil, false + } + f, err := os.Open(dest) //nolint:gosec // dest is cacheFromSource of a trusted source path. + if err != nil { + return nil, false + } + defer f.Close() + code, hdr, err := marshal.ReadPyc(f) + if err != nil { + return nil, false + } + if hdr.Flags&0x1 != 0 { + // Hash-based .pyc: an unchecked-hash cache is trusted unconditionally, + // a checked-hash cache would need the source hash recomputed, which + // the timestamp fast path does not do, so fall back to recompiling. + // + // CPython: Lib/importlib/_bootstrap_external.py:609 _validate_hash_pyc + if hdr.Flags&0x2 != 0 { + return code, true + } + return nil, false + } + mtime := uint32(info.ModTime().Unix()) + size := uint32(info.Size()) + if hdr.Mtime != mtime || hdr.SourceSize != size { + return nil, false + } + // The cached code object carries whatever co_filename it was compiled + // with (py_compile's dfile can differ from the real source). When the + // source still exists the loader rewrites co_filename to the actual + // path, recursing into nested code consts, exactly like _compile_bytecode + // calling _imp._fix_co_filename. + // + // CPython: Lib/importlib/_bootstrap_external.py:809 _compile_bytecode + // CPython: Python/import.c:1276 _imp__fix_co_filename_impl + fixCoFilename(code, code.Filename, sourcePath) + return code, true +} + +// fixCoFilename rewrites co_filename on code and every nested code const +// whose filename matches oldname, mirroring CPython's recursive +// update_code_filenames. Only matching consts are touched so that a code +// object compiled against a different file is left alone. +// +// CPython: Python/import.c:1243 update_code_filenames +func fixCoFilename(code *objects.Code, oldname, newname string) { + if code.Filename != oldname { + return + } + code.Filename = newname + for _, c := range code.Consts { + if nested, ok := c.(*objects.Code); ok { + fixCoFilename(nested, oldname, newname) + } + } + code.SyncConstObjs() +} + +// writeBytecodeCache writes code to the .pyc cache for sourcePath unless +// sys.dont_write_bytecode is set. The header records the source file's +// mtime and size so a stale cache is detected on the next import. A +// write failure is swallowed: CPython's set_data treats a NotADirectory +// or permission error as non-fatal (the import still succeeds from +// source), and so does gopy. +// +// CPython: Lib/importlib/_bootstrap_external.py:1167 get_code (cache write) +// CPython: Lib/importlib/_bootstrap_external.py:1185 set_data (atomic write) +func writeBytecodeCache(sourcePath string, code *objects.Code) { + if dontWriteBytecode() || isFrozenBootstrapSource(sourcePath) { + return + } + dest := cacheFromSource(sourcePath) + if dest == "" { + return + } + info, err := os.Stat(sourcePath) + if err != nil { + return + } + mtime := uint32(info.ModTime().Unix()) + size := uint32(info.Size()) + + var buf bytes.Buffer + if err := marshal.WritePyc(&buf, code, mtime, size); err != nil { + return + } + // 0o777 is CPython's makedirs mode for __pycache__; the umask narrows it. + // CPython: Lib/importlib/_bootstrap_external.py source_to_cache makedirs. + if err := os.MkdirAll(filepath.Dir(dest), 0o777); err != nil { //nolint:gosec // CPython __pycache__ mode, umask-narrowed + return + } + // The cache inherits the source's permission bits plus write access, so a + // read-only .py still yields a rewritable .pyc. + // + // CPython: Lib/importlib/_bootstrap_external.py:438 _calc_mode + mode := info.Mode().Perm() | 0o200 + + // Write atomically the way _write_atomic does: a uniquely-suffixed temp + // file in the cache directory opened O_EXCL with the computed mode, then + // rename over the target. The temp name is keyed off the pid so concurrent + // writers do not collide. + // + // CPython: Lib/importlib/_bootstrap_external.py:184 _write_atomic + tmp := dest + "." + strconv.Itoa(os.Getpid()) + ".tmp" + f, err := os.OpenFile(tmp, os.O_EXCL|os.O_CREATE|os.O_WRONLY, mode&0o666) //nolint:gosec // tmp derives from a trusted cache path. + if err != nil { + return + } + if _, err := f.Write(buf.Bytes()); err != nil { + _ = f.Close() + _ = os.Remove(tmp) + return + } + if err := f.Close(); err != nil { + _ = os.Remove(tmp) + return + } + if err := os.Rename(tmp, dest); err != nil { + _ = os.Remove(tmp) + } +} diff --git a/initconfig/config_get.go b/initconfig/config_get.go new file mode 100644 index 000000000..be0201f9e --- /dev/null +++ b/initconfig/config_get.go @@ -0,0 +1,163 @@ +package initconfig + +import "sort" + +// ConfigMemberType mirrors the PyConfigMemberType enum: the storage +// class of a PyConfig field, which decides how config_get turns the raw +// member into a Python object. +// +// CPython: Python/initconfig.c:60 PyConfigMemberType +type ConfigMemberType int + +const ( + ConfigMemberInt ConfigMemberType = iota + ConfigMemberUint + ConfigMemberBool + ConfigMemberULong + ConfigMemberWStr + ConfigMemberWStrOpt + ConfigMemberWStrList +) + +// configSpec is one row of PYCONFIG_SPEC: the option name, its member +// type, the sys attribute config_get delegates to when use_sys is set +// (empty for NO_SYS / SYS_FLAG rows), and a reader that pulls the raw +// member out of a PyConfig. gopy only lists the rows whose members the +// v0.x PyConfig subset actually models; the scoped-out fields documented +// on PyConfig (tracemalloc, dump_refs, perf_profiling, ...) are absent +// here exactly as they are absent from the struct, so config_get reports +// them as unknown names until their subsystems land. +// +// CPython: Python/initconfig.c:105 PYCONFIG_SPEC +type configSpec struct { + name string + typ ConfigMemberType + sysAttr string + get func(c *PyConfig) any +} + +// pyconfigSpec is the gopy port of PYCONFIG_SPEC. Rows preserve the +// CPython option names, member types, and SYS_ATTR delegations. +// +// CPython: Python/initconfig.c:105 PYCONFIG_SPEC +var pyconfigSpec = []configSpec{ + // --- Public options --- + {"argv", ConfigMemberWStrList, "argv", func(c *PyConfig) any { return c.Argv }}, + {"base_exec_prefix", ConfigMemberWStrOpt, "base_exec_prefix", func(c *PyConfig) any { return c.BaseExecPrefix }}, + {"base_executable", ConfigMemberWStrOpt, "_base_executable", func(c *PyConfig) any { return c.BaseExecutable }}, + {"base_prefix", ConfigMemberWStrOpt, "base_prefix", func(c *PyConfig) any { return c.BasePrefix }}, + {"bytes_warning", ConfigMemberUint, "", func(c *PyConfig) any { return c.BytesWarning }}, + {"exec_prefix", ConfigMemberWStrOpt, "exec_prefix", func(c *PyConfig) any { return c.ExecPrefix }}, + {"executable", ConfigMemberWStrOpt, "executable", func(c *PyConfig) any { return c.Executable }}, + {"inspect", ConfigMemberBool, "", func(c *PyConfig) any { return c.Inspect }}, + {"int_max_str_digits", ConfigMemberUint, "", func(c *PyConfig) any { return c.IntMaxStrDigits }}, + {"interactive", ConfigMemberBool, "", func(c *PyConfig) any { return c.Interactive }}, + {"module_search_paths", ConfigMemberWStrList, "path", func(c *PyConfig) any { return c.ModuleSearchPaths }}, + {"optimization_level", ConfigMemberUint, "", func(c *PyConfig) any { return c.OptimizationLevel }}, + {"parser_debug", ConfigMemberBool, "", func(c *PyConfig) any { return c.ParserDebug }}, + {"platlibdir", ConfigMemberWStr, "platlibdir", func(c *PyConfig) any { return c.Platlibdir }}, + {"prefix", ConfigMemberWStrOpt, "prefix", func(c *PyConfig) any { return c.Prefix }}, + {"pycache_prefix", ConfigMemberWStrOpt, "pycache_prefix", func(c *PyConfig) any { return c.PycachePrefix }}, + {"quiet", ConfigMemberBool, "", func(c *PyConfig) any { return c.Quiet }}, + {"stdlib_dir", ConfigMemberWStrOpt, "_stdlib_dir", func(c *PyConfig) any { return c.StdlibDir }}, + {"use_environment", ConfigMemberBool, "", func(c *PyConfig) any { return c.UseEnvironment }}, + {"verbose", ConfigMemberUint, "", func(c *PyConfig) any { return c.Verbose }}, + {"warnoptions", ConfigMemberWStrList, "warnoptions", func(c *PyConfig) any { return c.WarnOptions }}, + {"write_bytecode", ConfigMemberBool, "", func(c *PyConfig) any { return c.WriteBytecode }}, + {"xoptions", ConfigMemberWStrList, "_xoptions", func(c *PyConfig) any { return c.XOptions }}, + + // --- Read-only options --- + {"buffered_stdio", ConfigMemberBool, "", func(c *PyConfig) any { return c.BufferedStdio }}, + {"check_hash_pycs_mode", ConfigMemberWStr, "", func(c *PyConfig) any { return c.checkHashPycsMode }}, + {"code_debug_ranges", ConfigMemberBool, "", func(c *PyConfig) any { return c.CodeDebugRanges }}, + {"configure_c_stdio", ConfigMemberBool, "", func(c *PyConfig) any { return c.ConfigureCStdio }}, + {"dev_mode", ConfigMemberBool, "", func(c *PyConfig) any { return c.DevMode }}, + {"filesystem_encoding", ConfigMemberWStr, "", func(c *PyConfig) any { return c.FilesystemEncoding }}, + {"filesystem_errors", ConfigMemberWStr, "", func(c *PyConfig) any { return c.FilesystemErrors }}, + {"hash_seed", ConfigMemberULong, "", func(c *PyConfig) any { return c.HashSeed }}, + {"home", ConfigMemberWStrOpt, "", func(c *PyConfig) any { return c.Home }}, + {"import_time", ConfigMemberUint, "", func(c *PyConfig) any { return c.ImportTime }}, + {"install_signal_handlers", ConfigMemberBool, "", func(c *PyConfig) any { return c.InstallSignalHandlers }}, + {"isolated", ConfigMemberBool, "", func(c *PyConfig) any { return c.Isolated }}, + {"orig_argv", ConfigMemberWStrList, "orig_argv", func(c *PyConfig) any { return c.OrigArgv }}, + {"parse_argv", ConfigMemberBool, "", func(c *PyConfig) any { return c.ParseArgv }}, + {"pathconfig_warnings", ConfigMemberBool, "", func(c *PyConfig) any { return c.PathconfigWarnings }}, + {"program_name", ConfigMemberWStr, "", func(c *PyConfig) any { return c.ProgramName }}, + {"run_command", ConfigMemberWStrOpt, "", func(c *PyConfig) any { return c.RunCommand }}, + {"run_filename", ConfigMemberWStrOpt, "", func(c *PyConfig) any { return c.RunFilename }}, + {"run_module", ConfigMemberWStrOpt, "", func(c *PyConfig) any { return c.RunModule }}, + {"safe_path", ConfigMemberBool, "", func(c *PyConfig) any { return c.SafePath }}, + {"site_import", ConfigMemberBool, "", func(c *PyConfig) any { return c.SiteImport }}, + {"skip_source_first_line", ConfigMemberBool, "", func(c *PyConfig) any { return c.SkipSourceFirstLine }}, + {"stdio_encoding", ConfigMemberWStr, "", func(c *PyConfig) any { return c.StdioEncoding }}, + {"stdio_errors", ConfigMemberWStr, "", func(c *PyConfig) any { return c.StdioErrors }}, + {"use_frozen_modules", ConfigMemberBool, "", func(c *PyConfig) any { return c.UseFrozenModules }}, + {"use_hash_seed", ConfigMemberBool, "", func(c *PyConfig) any { return c.UseHashSeed }}, + {"user_site_directory", ConfigMemberBool, "", func(c *PyConfig) any { return c.UserSiteDirectory }}, + {"warn_default_encoding", ConfigMemberBool, "", func(c *PyConfig) any { return c.WarnDefaultEncoding }}, + + // --- Init-only options --- + {"_init_main", ConfigMemberBool, "", func(c *PyConfig) any { return c.InitMain }}, + {"_install_importlib", ConfigMemberBool, "", func(c *PyConfig) any { return c.InstallImportlib }}, + {"module_search_paths_set", ConfigMemberBool, "", func(c *PyConfig) any { return c.ModuleSearchPathsSet }}, + {"pythonpath_env", ConfigMemberWStrOpt, "", func(c *PyConfig) any { return c.PythonpathEnv }}, + {"sys_path_0", ConfigMemberWStrOpt, "", func(c *PyConfig) any { return c.SysPath0 }}, +} + +// configFindSpec locates the PYCONFIG_SPEC row for name. +// +// CPython: Python/initconfig.c:4360 config_find_spec +func configFindSpec(name string) *configSpec { + for i := range pyconfigSpec { + if pyconfigSpec[i].name == name { + return &pyconfigSpec[i] + } + } + return nil +} + +// ConfigMember is the raw value of a config option plus the metadata +// config_get needs to wrap it: its member type and, when the option is +// exposed through sys, the sys attribute name to read instead. +// +// CPython: Python/initconfig.c:4378 config_get +type ConfigMember struct { + Value any + Type ConfigMemberType + SysAttr string +} + +// ConfigGet looks up name in PYCONFIG_SPEC and returns its raw member +// from c. The bool reports whether the name is a known config option; +// an unknown name maps to the "unknown config option name" ValueError +// the caller raises. +// +// This is the gopy split of config_get: this half resolves the spec and +// reads the raw member (config_find_spec + config_get_spec_member); the +// _testcapi layer wraps the member into a Python object and handles the +// use_sys delegation, exactly as config_get does once it has the member. +// +// CPython: Python/initconfig.c:4458 PyConfig_Get +func (c *PyConfig) ConfigGet(name string) (ConfigMember, bool) { + spec := configFindSpec(name) + if spec == nil { + return ConfigMember{}, false + } + return ConfigMember{ + Value: spec.get(c), + Type: spec.typ, + SysAttr: spec.sysAttr, + }, true +} + +// ConfigNames returns the sorted list of every known config option name. +// +// CPython: Modules/_testcapi/config.c:74 _testcapi_config_names +func ConfigNames() []string { + names := make([]string, len(pyconfigSpec)) + for i := range pyconfigSpec { + names[i] = pyconfigSpec[i].name + } + sort.Strings(names) + return names +} diff --git a/marshal/code.go b/marshal/code.go index 899b2ac2c..c1af528da 100644 --- a/marshal/code.go +++ b/marshal/code.go @@ -13,6 +13,7 @@ import ( "encoding/binary" "fmt" + "github.com/tamnd/gopy/monitor" "github.com/tamnd/gopy/objects" "github.com/tamnd/gopy/specialize" ) @@ -68,16 +69,20 @@ func marshalCode(enc *encoder, c *objects.Code, flag byte) error { } } // Mirror CPython's _PyCode_GetCode pre-write deopt: walk every - // codeunit and rewrite specialized opcodes back to their adaptive - // parent, then zero each trailing cache cell. Without this step a - // .pyc would carry whatever specialization state the in-memory Code - // happened to warm by marshal time, which is non-deterministic - // across runs and breaks byte-equality with the cpython oracle. + // codeunit and recover the base opcode, rewriting specialized + // opcodes back to their adaptive parent AND stripping the + // INSTRUMENTED_ markers (and the INSTRUMENTED_LINE side table) + // that sys.settrace / sys.monitoring leave in the live bytecode, + // then zero each trailing cache cell. Without this a .pyc would + // carry whatever specialization or monitoring state the in-memory + // Code happened to warm by marshal time: non-deterministic across + // runs, and on reload an INSTRUMENTED_LINE with no monitoring data + // behind it would dispatch a NOP in place of the real opcode. // specialize.Enable on unmarshalCode re-runs Quicken so adaptive // counters get reseeded on load. // // CPython: Objects/codeobject.c:2310 _PyCode_GetCode (deopts before write) - if err := enc.writeCachedBytes(specialize.DeoptCode(c.Code), true); err != nil { + if err := enc.writeCachedBytes(monitor.BaseCode(c), true); err != nil { return err } consts := make([]any, len(c.Consts)) @@ -169,7 +174,16 @@ func unmarshalCode(d *decoder) (*objects.Code, error) { if !ok { return nil, fmt.Errorf("marshal: code.code expected bytes, got %T", codeObj) } - c.Code = code + // PyCode_New copies co_code into the per-code co_code_adaptive + // buffer that specialization and instrumentation mutate in place; + // the immutable co_code bytes object is never touched. gopy keeps + // one slice for both roles, so it must own a private copy here. + // Otherwise marshal's reference table, which dedups byte-identical + // co_code across sibling functions, hands two code objects the same + // backing array and an in-place rewrite on one corrupts the other. + // + // CPython: Objects/codeobject.c:117 _PyCode_New (co_code_adaptive copy) + c.Code = append([]byte(nil), code...) // consts tuple constsObj, err := d.read() @@ -391,21 +405,30 @@ func boolCount(b bool) int { } // splitLocalsplusnames reconstructs varnames/cellvars/freevars from -// the wire-format combined array. +// the wire-format combined array. The three buckets are not disjoint: +// an argument that is also closed over by a nested function carries +// both CO_FAST_LOCAL and CO_FAST_CELL, and CPython lists it in both +// co_varnames and co_cellvars. Routing it to cellvars only would drop +// it from co_varnames and shift the argument-name slice the frame uses +// to report keyword-only arguments. Match get_localsplus_names: select +// each bucket by an independent bit test. +// +// CPython: Objects/codeobject.c:424 get_localsplus_names func splitLocalsplusnames(names []any, kinds []byte) (varnames []string, cellvars []string, freevars []string) { for i, n := range names { s, _ := n.(string) if i >= len(kinds) { break } - switch { - case kinds[i]&coFastFree != 0: - freevars = append(freevars, s) - case kinds[i]&coFastCell != 0: - cellvars = append(cellvars, s) - default: + if kinds[i]&coFastLocal != 0 { varnames = append(varnames, s) } + if kinds[i]&coFastCell != 0 { + cellvars = append(cellvars, s) + } + if kinds[i]&coFastFree != 0 { + freevars = append(freevars, s) + } } return varnames, cellvars, freevars } @@ -424,6 +447,16 @@ func splitLocalsplusnames(names []any, kinds []byte) (varnames []string, cellvar // CPython: Objects/codeobject.c:203 intern_constants // CPython: Python/marshal.c:391 w_ref interned check. func wrapConstStrings(v any) any { + // A code object that round-tripped through Python (marshal.load then + // code.replace) carries co_consts as objects.Object values rather than + // the native Go consts a freshly-compiled gopy Code holds. Normalize + // those to the native marshal value set first so the rest of this + // function (and writeBody) sees ints, strings, tuples and code objects. + if obj, ok := v.(objects.Object); ok { + if n, err := fromObject(obj); err == nil { + v = n + } + } switch x := v.(type) { case string: if shouldInternString(x) { diff --git a/marshal/marshal.go b/marshal/marshal.go index 309d49361..9813d4e2e 100644 --- a/marshal/marshal.go +++ b/marshal/marshal.go @@ -19,6 +19,7 @@ import ( "math/big" "unsafe" + "github.com/tamnd/gopy/ast" "github.com/tamnd/gopy/objects" ) @@ -75,6 +76,30 @@ const flagRef = 0x80 // CPython: Python/marshal.c WFERR_UNMARSHALLABLE var ErrUnmarshallable = errors.New("marshal: object cannot be marshaled") +// The three EOF sentinels mirror the EOFError messages CPython's r_object / +// r_byte / r_string raise when the wire data runs out. The marshal module +// surface maps them to EOFError, every other decode error to ValueError. +// +// CPython: Python/marshal.c:833 r_string ("marshal data too short") +// CPython: Python/marshal.c:916 r_byte ("EOF read where not expected") +// CPython: Python/marshal.c:1172 r_object ("EOF read where object expected") +var ( + ErrEOFObjectExpected = errors.New("EOF read where object expected") + ErrEOFNotExpected = errors.New("EOF read where not expected") + ErrDataTooShort = errors.New("marshal data too short") +) + +// IsEOF reports whether err is one of the marshal EOF sentinels (or a raw +// io.EOF / io.ErrUnexpectedEOF that escaped conversion). The module surface +// uses it to choose EOFError over ValueError. +func IsEOF(err error) bool { + return errors.Is(err, ErrEOFObjectExpected) || + errors.Is(err, ErrEOFNotExpected) || + errors.Is(err, ErrDataTooShort) || + errors.Is(err, io.EOF) || + errors.Is(err, io.ErrUnexpectedEOF) +} + // Dump writes v to w in the version-5 wire format. // // CPython: Python/marshal.c PyMarshal_WriteObjectToFile @@ -330,6 +355,14 @@ func (e *encoder) write(v any) error { } return e.writeByte(typeFalse) } + // The Ellipsis singleton (the `...` const) is short-circuited before + // the FLAG_REF memo, exactly like None / True / False. gopy spells the + // const as ast.EllipsisType; the runtime ellipsis object maps here too. + // + // CPython: Python/marshal.c:476 w_object (v == Py_Ellipsis) + if isEllipsisValue(v) { + return e.writeByte(typeEllipsis) + } e.depth++ defer func() { e.depth-- }() @@ -560,7 +593,16 @@ func (b *byteReader) ReadByte() (byte, error) { } func (d *decoder) readByte() (byte, error) { - return d.r.ReadByte() + b, err := d.r.ReadByte() + if err != nil { + // CPython's r_byte raises EOFError "EOF read where not expected". + // CPython: Python/marshal.c:916 r_byte + if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) { + return 0, ErrEOFNotExpected + } + return 0, err + } + return b, nil } func (d *decoder) readN(n int) ([]byte, error) { @@ -568,6 +610,12 @@ func (d *decoder) readN(n int) ([]byte, error) { for i := 0; i < n; i++ { b, err := d.r.ReadByte() if err != nil { + // CPython reads byte strings through r_string, which raises + // EOFError "marshal data too short" when the buffer underruns. + // CPython: Python/marshal.c:833 r_string + if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) { + return nil, ErrDataTooShort + } return nil, err } out[i] = b @@ -598,6 +646,13 @@ func (d *decoder) readInt64() (int64, error) { func (d *decoder) read() (any, error) { tag, err := d.readByte() if err != nil { + // r_object reads the type code first; an EOF here is reported as + // "EOF read where object expected", distinct from r_byte's own + // "EOF read where not expected" used mid-object. + // CPython: Python/marshal.c:1172 r_object + if errors.Is(err, ErrEOFNotExpected) || errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) { + return nil, ErrEOFObjectExpected + } return nil, err } @@ -632,6 +687,13 @@ func (d *decoder) decodeTag(tag byte) (any, error) { return true, nil case typeFalse: return false, nil + case typeEllipsis: + // Decode to ast.EllipsisType, the same `...` const the compiler + // emits, so a marshaled-then-loaded code object round-trips + // identically to a freshly compiled one. + // + // CPython: Python/marshal.c r_object TYPE_ELLIPSIS + return ast.Ellipsis, nil case typeInt: v, err := d.readInt32() return int64(v), err @@ -835,12 +897,31 @@ func toObject(v any) (objects.Object, error) { return objects.NewFloat(x), nil case string: return objects.NewStr(x), nil + case ast.EllipsisType: + return objects.Ellipsis(), nil case objects.Object: return x, nil } return nil, fmt.Errorf("marshal: cannot convert %T to Object", v) } +// isEllipsisValue reports whether v is the marshalable Ellipsis const, +// in either of the two spellings gopy uses: the compiler emits the +// ast.EllipsisType node for a `...` literal, while a code object built +// at runtime (e.g. via code.replace) may carry the runtime ellipsis +// singleton instead. Both serialize to TYPE_ELLIPSIS. +// +// CPython: Python/marshal.c:476 w_object (v == Py_Ellipsis) +func isEllipsisValue(v any) bool { + if _, ok := v.(ast.EllipsisType); ok { + return true + } + if obj, ok := v.(objects.Object); ok { + return obj == objects.Ellipsis() + } + return false +} + // fromObject converts an objects.Object back to a plain Go marshal // value so that set and frozenset items can pass through write(). func fromObject(obj objects.Object) (any, error) { @@ -859,6 +940,20 @@ func fromObject(obj objects.Object) (any, error) { return x, nil case *objects.Code: return x, nil + case *objects.Complex: + return x.Complex128(), nil + case *objects.Bytes: + return x.Bytes(), nil + case *objects.Tuple: + out := make([]any, x.Len()) + for i := 0; i < x.Len(); i++ { + n, err := fromObject(x.Item(i)) + if err != nil { + return nil, err + } + out[i] = n + } + return out, nil } // None and str use unexported concrete types; dispatch via type slots. if obj.Type() == objects.NoneType() { diff --git a/module/_collections/module.go b/module/_collections/module.go index f2411a2da..9eab9fd8a 100644 --- a/module/_collections/module.go +++ b/module/_collections/module.go @@ -1486,7 +1486,18 @@ func defaultDictGetItem(o, key objects.Object) (objects.Object, error) { if err == nil { return v, nil } - // Key absent: call __missing__. + // Key absent: dict_subscript looks up __missing__ at the type level, so + // a defaultdict subclass that overrides it (and declines to insert) is + // honoured instead of always running defdict_missing. + // + // CPython: Objects/dictobject.c:2229 dict_subscript + missingFn, merr := objects.LookupSpecial(o, "__missing__") + if merr != nil { + return nil, merr + } + if missingFn != nil { + return objects.CallOneArg(missingFn, key) + } res, merr := defaultDictMissing([]objects.Object{o, key}, nil) if merr != nil { return nil, merr diff --git a/module/_elementtree/module.go b/module/_elementtree/module.go index 990f92439..3614269ae 100644 --- a/module/_elementtree/module.go +++ b/module/_elementtree/module.go @@ -35,7 +35,27 @@ func init() { var parseErrorType *objects.Type func init() { - parseErrorType = objects.NewType("ParseError", []*objects.Type{errors.PyExc_SyntaxError}) + // PyErr_NewException builds the class via type(name, (base,), dict), + // so the new type runs through inherit_slots and picks up the base's + // tp_new / tp_init / tp_str. NewExcType wires the standard exception + // slots; copying SyntaxError's TpNew on top mirrors inherit_slots + // adopting the base's tp_new (a bare objects.NewType leaves TpNew nil, + // and type_call then refuses construction with "cannot create + // 'ParseError' instances directly"). + // + // CPython: Modules/_elementtree.c:4505 PyErr_NewException + // CPython: Objects/typeobject.c:7521 inherit_slots (tp_new slot) + parseErrorType = errors.NewExcType("ParseError", []*objects.Type{errors.PyExc_SyntaxError}) + parseErrorType.TpNew = errors.PyExc_SyntaxError.TpNew + parseErrorType.Str = errors.PyExc_SyntaxError.Str + // PyErr_NewException splits the dotted name "xml.etree.ElementTree.ParseError" + // at the last dot: the prefix becomes __module__ and the leaf becomes the + // class name. traceback.format_exception_only qualifies the printed type as + // __module__ + '.' + __qualname__, so the module must be set here for the + // exception to render as "xml.etree.ElementTree.ParseError". + // + // CPython: Python/errors.c:911 PyErr_NewExceptionWithDoc (dotted-name split) + parseErrorType.Module = "xml.etree.ElementTree" } // buildModule constructs the _elementtree module dict. diff --git a/module/_functools/module.go b/module/_functools/module.go index acd0db4f1..be72bc74a 100644 --- a/module/_functools/module.go +++ b/module/_functools/module.go @@ -223,6 +223,11 @@ func newPartialType() *objects.Type { return objects.NewGenericAlias(cls, args[1]), nil }), )) + // Expose __get__ so partial registers as a method descriptor, matching + // add_operators installing the tp_descr_get wrapper for the C type. + // + // CPython: Objects/typeobject.c add_operators (tp_descr_get row) + objects.AddDescriptorSlotWrappers(t) return t } @@ -1376,6 +1381,12 @@ func newLruCacheWrapperType() *objects.Type { return objects.GetAttr(args[0], objects.NewStr("__qualname__")) }, )) + // Expose __get__ so inspect.ismethoddescriptor (and the descriptor + // attribute path) recognises the wrapper, matching add_operators + // installing the tp_descr_get wrapper for the C lru_cache type. + // + // CPython: Objects/typeobject.c add_operators (tp_descr_get row) + objects.AddDescriptorSlotWrappers(t) return t } diff --git a/module/_imp/module.go b/module/_imp/module.go index 841419443..a7b6c393a 100644 --- a/module/_imp/module.go +++ b/module/_imp/module.go @@ -1,22 +1,31 @@ // Package _imp is the gopy port of CPython's Modules/_imp module (the -// builtin half lives in Python/import.c). Only the slice consumed by -// the vendored importlib._bootstrap_external is materialized: +// builtin half lives in Python/import.c). It materializes the surface +// the vendored importlib._bootstrap / _bootstrap_external drive: // // - source_hash(key, source) Python/import.c:4869 // - pyc_magic_number_token (int) Python/import.c:4926 // - check_hash_based_pycs (str) Python/import.c:4920 +// - extension_suffixes() Python/import.c:4807 +// - find_frozen / get_frozen_object Python/import.c:4660 / 4592 +// - is_frozen / is_frozen_package Python/import.c:4720 / 4700 +// - create_builtin / exec_builtin Python/import.c:4488 / 4540 +// - create_dynamic / exec_dynamic Python/import.c:4380 / 4440 +// - _fix_co_filename Python/import.c:4318 // -// The rest of the C module (lock_held, find_frozen, create_builtin, -// ...) is intentionally absent — gopy's own imp package already serves -// those roles and importlib does not need _imp to reach them. +// The frozen / builtin entries bridge to gopy's own imp package (the +// frozen table and the inittab), which is the real store for those +// modules. create_dynamic / exec_dynamic raise ImportError: gopy cannot +// load CPython C extension shared objects. // // CPython: Python/import.c:4943 imp_module package _imp import ( + "bytes" "encoding/binary" "fmt" + pyerrors "github.com/tamnd/gopy/errors" "github.com/tamnd/gopy/hash" "github.com/tamnd/gopy/imp" "github.com/tamnd/gopy/marshal" @@ -70,44 +79,461 @@ func buildModule() (*objects.Module, error) { })); err != nil { return nil, err } - // is_builtin / is_frozen: gopy has no frozen/builtin import path, so - // both report negative. + // is_builtin(name): 1 when name is in the inittab, else 0. (-1 for a + // loaded-builtin-on-the-frozen-path edge case never arises here.) // - // CPython: Python/import.c:4943 imp_module + // CPython: Python/import.c:4720 _imp_is_builtin_impl if err := d.SetItem(objects.NewStr("is_builtin"), - objects.NewBuiltinFunction("is_builtin", func(_ []objects.Object, _ map[string]objects.Object) (objects.Object, error) { - return objects.NewInt(0), nil - })); err != nil { + objects.NewBuiltinFunction("is_builtin", isBuiltin)); err != nil { return nil, err } + // is_frozen(name): True when name is a frozen module with embedded + // bytecode. + // + // CPython: Python/import.c:4740 _imp_is_frozen_impl if err := d.SetItem(objects.NewStr("is_frozen"), - objects.NewBuiltinFunction("is_frozen", func(_ []objects.Object, _ map[string]objects.Object) (objects.Object, error) { - return objects.NewBool(false), nil + objects.NewBuiltinFunction("is_frozen", isFrozen)); err != nil { + return nil, err + } + // extension_suffixes(): gopy cannot dynamically load CPython C + // extension shared objects, so the list of extension suffixes is + // empty. ExtensionFileLoader is therefore never wired to any suffix + // in _bootstrap_external._setup. + // + // CPython: Python/import.c:4807 _imp_extension_suffixes_impl + if err := d.SetItem(objects.NewStr("extension_suffixes"), + objects.NewBuiltinFunction("extension_suffixes", func(_ []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + suffixes := imp.ExtensionSuffixes() + items := make([]objects.Object, len(suffixes)) + for i, s := range suffixes { + items[i] = objects.NewStr(s) + } + return objects.NewList(items), nil + })); err != nil { + return nil, err + } + // find_frozen / get_frozen_object / is_frozen_package bridge to + // gopy's frozen module table (imp/frozen.go). + // + // CPython: Python/import.c:4660 _imp_find_frozen_impl + // CPython: Python/import.c:4592 _imp_get_frozen_object_impl + // CPython: Python/import.c:4700 _imp_is_frozen_package_impl + if err := d.SetItem(objects.NewStr("find_frozen"), + objects.NewBuiltinFunction("find_frozen", findFrozen)); err != nil { + return nil, err + } + if err := d.SetItem(objects.NewStr("get_frozen_object"), + objects.NewBuiltinFunction("get_frozen_object", getFrozenObject)); err != nil { + return nil, err + } + if err := d.SetItem(objects.NewStr("is_frozen_package"), + objects.NewBuiltinFunction("is_frozen_package", isFrozenPackage)); err != nil { + return nil, err + } + // create_builtin / exec_builtin bridge to the inittab. gopy's + // initfunc builds a fully-initialized module in one step, so + // create_builtin runs it and exec_builtin is a no-op. + // + // CPython: Python/import.c:4488 _imp_create_builtin + // CPython: Python/import.c:4540 _imp_exec_builtin_impl + if err := d.SetItem(objects.NewStr("create_builtin"), + objects.NewBuiltinFunction("create_builtin", createBuiltin)); err != nil { + return nil, err + } + if err := d.SetItem(objects.NewStr("exec_builtin"), + objects.NewBuiltinFunction("exec_builtin", func(_ []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + return objects.NewInt(0), nil })); err != nil { return nil, err } + // create_dynamic / exec_dynamic: gopy cannot load CPython C + // extension shared objects. Match CPython's failure shape with an + // ImportError rather than silently succeeding. + // + // CPython: Python/import.c:4380 _imp_create_dynamic_impl + // CPython: Python/import.c:4440 _imp_exec_dynamic_impl + if err := d.SetItem(objects.NewStr("create_dynamic"), + objects.NewBuiltinFunction("create_dynamic", createDynamic)); err != nil { + return nil, err + } + if err := d.SetItem(objects.NewStr("exec_dynamic"), + objects.NewBuiltinFunction("exec_dynamic", execDynamic)); err != nil { + return nil, err + } + // _fix_co_filename(code, path): rewrite co_filename on a code object + // (and its nested code consts) in place. + // + // CPython: Python/import.c:4318 _imp__fix_co_filename_impl + if err := d.SetItem(objects.NewStr("_fix_co_filename"), + objects.NewBuiltinFunction("_fix_co_filename", fixCoFilename)); err != nil { + return nil, err + } // _override_frozen_modules_for_tests / _override_multi_interp_extensions_check: - // test.support.import_helper toggles these around test runs. gopy - // keeps them as no-ops returning a sentinel int matching CPython's - // previous-value convention. + // test.support.import_helper toggles these around test runs. + // _override_frozen_modules_for_tests records the override that + // use_frozen() consults (>0 on, <0 off, 0 default) and returns the + // previous value, matching the C impl. // // CPython: Python/import.c:5034 _imp__override_frozen_modules_for_tests_impl // CPython: Python/import.c:5052 _imp__override_multi_interp_extensions_check_impl if err := d.SetItem(objects.NewStr("_override_frozen_modules_for_tests"), - objects.NewBuiltinFunction("_override_frozen_modules_for_tests", func(_ []objects.Object, _ map[string]objects.Object) (objects.Object, error) { - return objects.None(), nil + objects.NewBuiltinFunction("_override_frozen_modules_for_tests", func(args []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + override, err := signedIntArg(args, "_override_frozen_modules_for_tests") + if err != nil { + return nil, err + } + return objects.NewInt(int64(imp.SetFrozenOverride(override))), nil })); err != nil { return nil, err } if err := d.SetItem(objects.NewStr("_override_multi_interp_extensions_check"), - objects.NewBuiltinFunction("_override_multi_interp_extensions_check", func(_ []objects.Object, _ map[string]objects.Object) (objects.Object, error) { - return objects.NewInt(0), nil + objects.NewBuiltinFunction("_override_multi_interp_extensions_check", func(args []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + override, err := signedIntArg(args, "_override_multi_interp_extensions_check") + if err != nil { + return nil, err + } + return objects.NewInt(int64(imp.SetMultiInterpOverride(override))), nil })); err != nil { return nil, err } return m, nil } +// nameArg pulls a single str positional out of args for the frozen / +// builtin query functions, which all take exactly one module name. +func nameArg(fn string, args []objects.Object) (string, error) { + if len(args) < 1 { + return "", fmt.Errorf("TypeError: %s() missing required argument", fn) + } + u, ok := args[0].(*objects.Unicode) + if !ok { + return "", fmt.Errorf("TypeError: %s() argument must be str, not '%T'", fn, args[0]) + } + return u.Value(), nil +} + +// signedIntArg pulls a single int positional out of args for the +// override toggles, which take one C int. A missing argument defaults +// to 0 (the "use default" override state). +func signedIntArg(args []objects.Object, fn string) (int, error) { + if len(args) < 1 { + return 0, nil + } + v, ok := args[0].(*objects.Int) + if !ok { + return 0, fmt.Errorf("TypeError: %s() argument must be int, not '%T'", fn, args[0]) + } + n, _ := v.Int64() + return int(n), nil +} + +// isBuiltin implements _imp.is_builtin(name). +// +// CPython: Python/import.c:4720 _imp_is_builtin_impl +func isBuiltin(args []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + name, err := nameArg("is_builtin", args) + if err != nil { + return nil, err + } + if imp.IsBuiltinName(name) { + return objects.NewInt(1), nil + } + return objects.NewInt(0), nil +} + +// isFrozen implements _imp.is_frozen(name): True only when the name has +// embedded bytecode (a placeholder entry with nil Code is not frozen). +// +// CPython: Python/import.c:4740 _imp_is_frozen_impl +func isFrozen(args []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + name, err := nameArg("is_frozen", args) + if err != nil { + return nil, err + } + if !imp.UseFrozen() { + return objects.NewBool(false), nil + } + fm, ok := imp.FindFrozen(name) + return objects.NewBool(ok && fm.HasCode()), nil +} + +// isFrozenPackage implements _imp.is_frozen_package(name). +// +// CPython: Python/import.c:4700 _imp_is_frozen_package_impl +func isFrozenPackage(args []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + name, err := nameArg("is_frozen_package", args) + if err != nil { + return nil, err + } + fm, ok := imp.FindFrozen(name) + if !ok || !fm.HasCode() { + return nil, fmt.Errorf("ImportError: No such frozen object named %s", name) + } + return objects.NewBool(fm.IsPackage), nil +} + +// findFrozen implements _imp.find_frozen(name, *, withdata=False). It +// returns a 3-tuple (data, is_package, origname) or None. gopy stores +// frozen modules as code objects, not marshalled blobs, so the data +// slot is always None (FrozenImporter.find_spec discards it and fetches +// the code later via get_frozen_object). +// +// CPython: Python/import.c:4660 _imp_find_frozen_impl +func findFrozen(args []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + name, err := nameArg("find_frozen", args) + if err != nil { + return nil, err + } + if !imp.UseFrozen() { + return objects.None(), nil + } + fm, ok := imp.FindFrozen(name) + if !ok || !fm.HasCode() { + return objects.None(), nil + } + origname, isNone := fm.Origin() + var origObj objects.Object = objects.None() + if !isNone { + origObj = objects.NewStr(origname) + } + return objects.NewTuple([]objects.Object{ + objects.None(), + objects.NewBool(fm.IsPackage), + origObj, + }), nil +} + +// getFrozenObject implements _imp.get_frozen_object(name, data=None). It +// returns the frozen module's code object. +// +// CPython: Python/import.c:4592 _imp_get_frozen_object_impl +func getFrozenObject(args []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + name, err := nameArg("get_frozen_object", args) + if err != nil { + return nil, err + } + + // When an explicit data buffer is supplied, CPython unmarshals it + // directly rather than consulting the frozen table; a buffer that does + // not decode to a code object raises ImportError "... is invalid". + if len(args) >= 2 && !objects.IsNone(args[1]) { + data, err := toBuffer(args[1]) + if err != nil { + return nil, fmt.Errorf("TypeError: get_frozen_object() argument 2 must be bytes, not '%T'", args[1]) + } + return unmarshalFrozenData(args[0], data) + } + + fm, ok := imp.FindFrozen(name) + if !ok || !fm.HasCode() { + return nil, fmt.Errorf("ImportError: No such frozen object named %s", name) + } + code, err := fm.CodeObject() + if err != nil { + return nil, err + } + if code == nil { + return nil, fmt.Errorf("ImportError: No such frozen object named %s", name) + } + return code, nil +} + +// unmarshalFrozenData ports unmarshal_frozen_code for the explicit-data +// path of get_frozen_object: an empty or non-code or undecodable buffer +// raises ImportError "Frozen object named %R is invalid" (a non-code +// object that decodes cleanly raises TypeError instead). +// +// CPython: Python/import.c unmarshal_frozen_code / set_frozen_error +func unmarshalFrozenData(nameObj objects.Object, data []byte) (objects.Object, error) { + nameRepr, rerr := objects.Repr(nameObj) + if rerr != nil { + return nil, rerr + } + if len(data) == 0 { + return nil, fmt.Errorf("ImportError: Frozen object named %s is invalid", nameRepr) + } + obj, err := marshal.Load(bytes.NewReader(data)) + if err != nil { + return nil, fmt.Errorf("ImportError: Frozen object named %s is invalid", nameRepr) + } + code, ok := obj.(*objects.Code) + if !ok { + return nil, fmt.Errorf("TypeError: frozen object %s is not a code object", nameRepr) + } + return code, nil +} + +// createDynamic implements _imp.create_dynamic(spec, file=None). gopy +// cannot load CPython C extension shared objects, so the load itself +// fails with ImportError. The spec.name / spec.origin validation that +// _Py_ext_module_loader_info_init_from_spec performs still runs first, +// so a name or origin with an embedded null raises ValueError exactly as +// CPython does before the unsupported-load failure. +// +// CPython: Python/import.c:4743 _imp_create_dynamic_impl +// CPython: Python/importdl.c:115 _Py_ext_module_loader_info_init +func createDynamic(args []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + if len(args) < 1 { + return nil, fmt.Errorf("TypeError: create_dynamic() missing required argument 'spec'") + } + spec := args[0] + + nameObj, err := objects.GetAttr(spec, objects.NewStr("name")) + if err != nil { + return nil, err + } + nameStr, ok := nameObj.(*objects.Unicode) + if !ok { + return nil, fmt.Errorf("TypeError: module name must be a string") + } + if err := checkEmbeddedNull(nameStr.Value()); err != nil { + return nil, err + } + + originObj, err := objects.GetAttr(spec, objects.NewStr("origin")) + if err != nil { + return nil, err + } + origin := "" + if !objects.IsNone(originObj) { + originStr, ok := originObj.(*objects.Unicode) + if !ok { + return nil, fmt.Errorf("TypeError: module filename must be a string") + } + if err := checkEmbeddedNull(originStr.Value()); err != nil { + return nil, err + } + origin = originStr.Value() + } + + // gopy compiles its extension modules into the runtime as Go builtins + // rather than dlopening a shared object. When the spec names a + // registered extension, run its Init (the create+exec phases) behind the + // PEP 489 multiple-interpreters compat check; otherwise fall back to the + // "cannot load a C extension" ImportError. + mod, found, err := imp.CreateExtModule(nameStr.Value(), origin) + if err != nil { + return nil, err + } + if found { + return mod, nil + } + + // No registered extension exposes this name. CPython reaches here after + // dlopen finds the shared object but no PyInit_ symbol, raising + // ImportError with the missing name stamped on the exception so callers + // can read exc.name. + // + // CPython: Python/importdl.c:178 _PyImport_LoadDynamicModuleWithSpec + msg := fmt.Sprintf("dynamic module does not define module export function (PyInit_%s)", nameStr.Value()) + exc := pyerrors.New(pyerrors.PyExc_ImportError, objects.NewTuple([]objects.Object{objects.NewStr(msg)})) + d := exc.EnsureAttrDict() + _ = d.SetItem(objects.NewStr("name"), objects.NewStr(nameStr.Value())) + _ = d.SetItem(objects.NewStr("msg"), objects.NewStr(msg)) + return nil, objects.NewRaisedError(exc, msg) +} + +// execDynamic implements _imp.exec_dynamic(module). For a multi-phase +// extension it runs the def's Py_mod_exec slots through PyModule_ExecDef; +// for a single-phase extension (whose body already ran during +// create_dynamic) it is a no-op. It returns 0 on success, matching the C +// impl's int return. +// +// CPython: Python/import.c:4801 _imp_exec_dynamic_impl +// CPython: Objects/moduleobject.c:463 PyModule_ExecDef +func execDynamic(args []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + if len(args) < 1 { + return nil, fmt.Errorf("TypeError: exec_dynamic() missing required argument 'mod'") + } + if err := imp.ExecExtModule(args[0]); err != nil { + return nil, err + } + return objects.NewInt(0), nil +} + +// checkEmbeddedNull mirrors the ValueError CPython raises when encoding a +// str that contains a NUL, the failure path the name / filename encode +// steps in _Py_ext_module_loader_info_init hit for an embedded null. +// +// CPython: Objects/unicodeobject.c PyUnicode_AsUTF8AndSize (embedded null) +func checkEmbeddedNull(s string) error { + for i := 0; i < len(s); i++ { + if s[i] == 0 { + return fmt.Errorf("ValueError: embedded null character") + } + } + return nil +} + +// createBuiltin implements _imp.create_builtin(spec). It reads spec.name +// and runs the matching inittab initializer, which builds a fully +// initialized module (gopy has no separate exec phase for builtins). +// +// CPython: Python/import.c:4488 _imp_create_builtin +func createBuiltin(args []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + if len(args) < 1 { + return nil, fmt.Errorf("TypeError: create_builtin() missing required argument 'spec'") + } + nameObj, err := objects.GetAttr(args[0], objects.NewStr("name")) + if err != nil { + return nil, err + } + u, ok := nameObj.(*objects.Unicode) + if !ok { + return nil, fmt.Errorf("TypeError: spec.name must be str, not '%T'", nameObj) + } + name := u.Value() + initFn := imp.FindInitFunc(name) + if initFn == nil { + return nil, fmt.Errorf("ImportError: no built-in module named %s", name) + } + mod, err := initFn() + if err != nil { + return nil, err + } + mod.StampBuiltinModule() + return mod, nil +} + +// fixCoFilename implements _imp._fix_co_filename(code, path). It rewrites +// co_filename on the code object in place. +// +// CPython: Python/import.c:4318 _imp__fix_co_filename_impl +func fixCoFilename(args []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + if len(args) < 2 { + return nil, fmt.Errorf("TypeError: _fix_co_filename() takes exactly 2 arguments") + } + code, ok := args[0].(*objects.Code) + if !ok { + return nil, fmt.Errorf("TypeError: _fix_co_filename() argument 1 must be code, not '%T'", args[0]) + } + path, ok := args[1].(*objects.Unicode) + if !ok { + return nil, fmt.Errorf("TypeError: _fix_co_filename() argument 2 must be str, not '%T'", args[1]) + } + updateCodeFilenames(code, code.Filename, path.Value()) + return objects.None(), nil +} + +// updateCodeFilenames rewrites co_filename to newname on co and on every +// nested code object reachable through co_consts that still carries the +// original oldname. A code compiled with a stale dfile (the .pyc records +// it) gets re-stamped to the real source path on import, including the +// code objects of the functions it defines. +// +// CPython: Python/import.c:4291 update_code_filenames +func updateCodeFilenames(co *objects.Code, oldname, newname string) { + if co.Filename != oldname { + return + } + co.Filename = newname + for _, c := range co.Consts { + if nested, ok := c.(*objects.Code); ok { + updateCodeFilenames(nested, oldname, newname) + } + } +} + // sourceHash mirrors _imp.source_hash(key, source). It hashes the // source buffer with SipHash-1-3 keyed by `key` and returns the result // as 8 little-endian bytes. diff --git a/module/_interpreters/module.go b/module/_interpreters/module.go index 3ec1432d1..942e5bd91 100644 --- a/module/_interpreters/module.go +++ b/module/_interpreters/module.go @@ -14,6 +14,7 @@ package _interpreters import ( "fmt" + "strings" "sync" "github.com/tamnd/gopy/builtins" @@ -67,11 +68,20 @@ type interp struct { whence int refs int64 ns *objects.Dict + // ownGil and checkMulti capture the PyInterpreterConfig the interpreter + // was created with: whether it runs with its own GIL and whether it + // enforces the subinterpreter-incompatible-extension check. The default + // _PyInterpreterConfig_INIT (isolated) sets both, so a bare create() + // produces an interpreter that rejects single-phase extension imports. + // + // CPython: Include/cpython/pylifecycle.h:52 _PyInterpreterConfig_INIT + ownGil bool + checkMulti bool } var ( mu sync.Mutex - registry = map[int64]*interp{} + registry = map[int64]*interp{} nextID int64 = 1 ) @@ -114,17 +124,57 @@ func argInt(args []objects.Object, i int) (int64, error) { // create allocates a new interpreter and returns its id. // // CPython: Modules/_interpretersmodule.c:768 interp_create -func create(_ []objects.Object, _ map[string]objects.Object) (objects.Object, error) { +func create(args []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + // The optional config selects the interpreter's isolation. The default + // (_PyInterpreterConfig_INIT) is fully isolated: its own GIL and the + // multi-interpreter extension check enabled. A "legacy" named config or + // an explicit object can relax that. + // + // CPython: Modules/_interpretersmodule.c:404 config_from_object + ownGil, checkMulti := true, true + if len(args) >= 1 && !objects.IsNone(args[0]) { + ownGil, checkMulti = configFromObject(args[0]) + } mu.Lock() defer mu.Unlock() id := nextID nextID++ ns := objects.NewDict() _ = ns.SetItem(objects.NewStr("__name__"), objects.NewStr("__main__")) - registry[id] = &interp{id: id, whence: whenceStdlib, refs: 0, ns: ns} + registry[id] = &interp{id: id, whence: whenceStdlib, refs: 0, ns: ns, ownGil: ownGil, checkMulti: checkMulti} return objects.NewInt(id), nil } +// configFromObject reads the (own_gil, check_multi_interp_extensions) pair +// from a create() config argument: a named-config string or an object whose +// attributes mirror PyInterpreterConfig. +// +// CPython: Python/interpconfig.c:262 _PyInterpreterConfig_InitFromDict +func configFromObject(cfg objects.Object) (ownGil, checkMulti bool) { + if name, ok := cfg.(*objects.Unicode); ok { + switch name.Value() { + case "legacy": + return false, false + case "empty": + return false, false + default: // "default", "isolated", "" + return true, true + } + } + ownGil, checkMulti = true, true + if gilObj, err := objects.GetAttr(cfg, objects.NewStr("gil")); err == nil { + if gilStr, ok := gilObj.(*objects.Unicode); ok { + ownGil = gilStr.Value() == "own" + } + } + if checkObj, err := objects.GetAttr(cfg, objects.NewStr("check_multi_interp_extensions")); err == nil { + if t, terr := objects.IsTruthy(checkObj); terr == nil { + checkMulti = t + } + } + return ownGil, checkMulti +} + // destroy finalizes and removes an interpreter. // // CPython: Modules/_interpretersmodule.c:874 interp_destroy @@ -286,6 +336,13 @@ func execCode(args []objects.Object, _ map[string]objects.Object) (objects.Objec if err != nil { return nil, err } + // Like run_string, exec runs in a fresh non-main interpreter state so the + // PEP 489 extension compat check observes the subinterpreter (own GIL, + // check_multi_interp_extensions) rather than the main interpreter. + // + // CPython: Modules/_interpretersmodule.c:650 _run_in_interpreter + imp.PushSubinterp(it.ownGil, it.checkMulti) + defer imp.PopSubinterp() if _, err := builtins.Exec([]objects.Object{code, it.ns}, nil); err != nil { return excinfoFor(err), nil } @@ -298,8 +355,29 @@ func execCode(args []objects.Object, _ map[string]objects.Object) (objects.Objec // // CPython: Modules/_interpretersmodule.c _PyXI_excinfo func excinfoFor(err error) objects.Object { + // CPython's _run_in_interpreter consumes the script's exception into the + // excinfo snapshot and clears it from the interpreter, so the failure + // does not leak into later operations (a pending exception otherwise + // surfaces during the next generator finalization). Mirror that clear. + // + // CPython: Python/crossinterp.c:1700 _PyXI_excinfo_InitFromException typeName := "Exception" msg := err.Error() + // Prefer the live pending exception object: it carries the real type + // regardless of how the Go error wraps it (RaisedError, the VM's reraise + // sentinel, a bare formatted error). A RaisedError that crossed back from + // the VM as a reraise sentinel would otherwise be read as a plain + // Exception, dropping the ImportError type the compat-check test asserts on. + if objects.SaveCurrentExceptionHook != nil { + if pending := objects.SaveCurrentExceptionHook(); pending != nil { + if exc, ok := pending.(objects.Object); ok && exc != nil { + typeName = exc.Type().Name + } + } + } + if objects.ClearCurrentExceptionHook != nil { + objects.ClearCurrentExceptionHook() + } if re, ok := err.(*objects.RaisedError); ok { if re.Exc != nil { typeName = re.Exc.Type().Name @@ -308,8 +386,20 @@ func excinfoFor(err error) objects.Object { msg = re.Msg } } + // The Go error text is rendered "Type: message"; the excinfo msg field is + // just the message (str(exc)), so strip a leading "typeName: " to avoid + // the formatted line reading "ImportError: ImportError: ...". + msg = strings.TrimPrefix(msg, typeName+": ") ns := objects.NewNamespace() - _ = objects.SetAttr(ns, objects.NewStr("type"), objects.NewStr(typeName)) + // excinfo.type is itself a namespace carrying the exception type's + // __name__/__qualname__/__module__, the shape _PyXI_excinfo_TypeAsObject + // builds so callers can read exc.type.__name__. + // + // CPython: Python/crossinterp.c:1517 _PyXI_excinfo_TypeAsObject + typeNS := objects.NewNamespace() + _ = objects.SetAttr(typeNS, objects.NewStr("__name__"), objects.NewStr(typeName)) + _ = objects.SetAttr(typeNS, objects.NewStr("__qualname__"), objects.NewStr(typeName)) + _ = objects.SetAttr(ns, objects.NewStr("type"), typeNS) _ = objects.SetAttr(ns, objects.NewStr("msg"), objects.NewStr(msg)) formatted := fmt.Sprintf("%s: %s", typeName, msg) _ = objects.SetAttr(ns, objects.NewStr("formatted"), objects.NewStr(formatted)) @@ -317,6 +407,43 @@ func excinfoFor(err error) objects.Object { return ns } +// runString runs a source string in the interpreter's __main__ namespace. +// Like exec it returns None on success or an excinfo namespace on an +// unhandled exception; the high-level caller decides what to do with it. +// +// CPython: Modules/_interpretersmodule.c:1174 interp_run_string +func runString(args []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + id, err := argInt(args, 0) + if err != nil { + return nil, err + } + if len(args) < 2 { + return nil, fmt.Errorf("TypeError: run_string() missing 'script'") + } + script, ok := args[1].(*objects.Unicode) + if !ok { + return nil, fmt.Errorf("TypeError: run_string() argument 2 must be a string, not %s", args[1].Type().Name) + } + mu.Lock() + it, err := lookup(id) + mu.Unlock() + if err != nil { + return nil, err + } + // A subinterpreter run is a fresh-namespace exec that pushes a non-main + // interpreter state, so the PEP 489 extension compat check and the + // gh-144601 single-phase failure path observe the subinterpreter the + // same way CPython's switched-to-main init does. + // + // CPython: Modules/_interpretersmodule.c:650 _run_in_interpreter + imp.PushSubinterp(it.ownGil, it.checkMulti) + defer imp.PopSubinterp() + if _, err := builtins.Exec([]objects.Object{script, it.ns}, nil); err != nil { + return excinfoFor(err), nil + } + return objects.None(), nil +} + func isShareable(args []objects.Object, _ map[string]objects.Object) (objects.Object, error) { // gopy shares object references directly, so everything is shareable. // @@ -361,6 +488,7 @@ func buildModule() (*objects.Module, error) { {"is_running", fn("is_running", isRunning)}, {"set___main___attrs", fn("set___main___attrs", setMainAttrs)}, {"exec", fn("exec", execCode)}, + {"run_string", fn("run_string", runString)}, {"is_shareable", fn("is_shareable", isShareable)}, } for _, e := range entries { diff --git a/module/_posixsubprocess/module.go b/module/_posixsubprocess/module.go index eaddc7ddc..0fc2b140e 100644 --- a/module/_posixsubprocess/module.go +++ b/module/_posixsubprocess/module.go @@ -21,7 +21,6 @@ import ( "io" "os" "os/exec" - "runtime" "github.com/tamnd/gopy/imp" "github.com/tamnd/gopy/objects" @@ -88,14 +87,17 @@ func forkExec(args []objects.Object, _ map[string]objects.Object) (objects.Objec executable = execs[0] } - // args[4]: cwd - string or None + // args[4]: cwd - PyUnicode_FSConverter accepts str, bytes, or any + // os.PathLike (pathlib.Path), so subprocess.run(cwd=Path(...)) works. + // + // CPython: Modules/_posixsubprocess.c subprocess_fork_exec ("O&" cwd_obj) cwd := "" if args[4] != nil && args[4] != objects.None() { - s, ok := args[4].(*objects.Unicode) - if !ok { - return nil, fmt.Errorf("TypeError: cwd must be str or None") + s, err := fsConvert(args[4]) + if err != nil { + return nil, err } - cwd = s.Value() + cwd = s } // args[5]: env_list - list of "KEY=VALUE" strings or None. @@ -137,11 +139,12 @@ func forkExec(args []objects.Object, _ map[string]objects.Object) (objects.Objec // owned by Python's subprocess machinery (subprocess.py closes them // explicitly after fork_exec returns). If Go's GC fires the default // finalizer before Python calls os.close(), the fd is closed out from - // under the caller and subsequent os.close() raises EBADF. - // Pattern mirrors module/os/stat_darwin.go osFstat runtime.SetFinalizer. + // under the caller and subsequent os.close() raises EBADF. The finalizer + // is armed on the inner *os.file, so objects.ClearOSFileFinalizer reaches + // it rather than the outer handle (a SetFinalizer no-op). if p2cread >= 0 { f := os.NewFile(uintptr(p2cread), "pipe:stdin") - runtime.SetFinalizer(f, nil) + objects.ClearOSFileFinalizer(f) cmd.Stdin = f } else { cmd.Stdin = io.NopCloser(os.Stdin) @@ -151,7 +154,7 @@ func forkExec(args []objects.Object, _ map[string]objects.Object) (objects.Objec // CPython: Modules/_posixsubprocess.c:730 dup2(c2pwrite, 1) if c2pwrite >= 0 { f := os.NewFile(uintptr(c2pwrite), "pipe:stdout") - runtime.SetFinalizer(f, nil) + objects.ClearOSFileFinalizer(f) cmd.Stdout = f } else { cmd.Stdout = os.Stdout @@ -161,7 +164,7 @@ func forkExec(args []objects.Object, _ map[string]objects.Object) (objects.Objec // CPython: Modules/_posixsubprocess.c:737 dup2(errwrite, 2) if errwrite >= 0 { f := os.NewFile(uintptr(errwrite), "pipe:stderr") - runtime.SetFinalizer(f, nil) + objects.ClearOSFileFinalizer(f) cmd.Stderr = f } else { cmd.Stderr = os.Stderr @@ -216,16 +219,47 @@ func toStringSlice(obj objects.Object) ([]string, error) { return out, nil } -// objectToString converts a Python str or bytes object to a Go string. +// objectToString converts a Python str, bytes, or os.PathLike object to a +// Go string. CPython runs each argv member through fsconvert_strdup, which +// is PyUnicode_FSConverter, so pathlib.Path arguments are accepted too. +// +// CPython: Modules/_posixsubprocess.c:130 fsconvert_strdup func objectToString(obj objects.Object) (string, error) { switch v := obj.(type) { case *objects.Unicode: return v.Value(), nil case *objects.Bytes: return string(v.Bytes()), nil - default: - return "", fmt.Errorf("expected str, got %s", obj.Type().Name) } + if fspath, err := objects.GetAttr(obj, objects.NewStr("__fspath__")); err == nil { + result, err := objects.CallNoArgs(fspath) + if err != nil { + return "", err + } + return objectToString(result) + } + return "", fmt.Errorf("expected str, got %s", obj.Type().Name) +} + +// fsConvert mirrors PyUnicode_FSConverter: it accepts a str, bytes, or +// any os.PathLike (pathlib.Path) by invoking __fspath__ and recursing. +// +// CPython: Modules/posixmodule.c PyUnicode_FSConverter / PyOS_FSPath +func fsConvert(obj objects.Object) (string, error) { + switch v := obj.(type) { + case *objects.Unicode: + return v.Value(), nil + case *objects.Bytes: + return string(v.Bytes()), nil + } + if fspath, err := objects.GetAttr(obj, objects.NewStr("__fspath__")); err == nil { + result, err := objects.CallNoArgs(fspath) + if err != nil { + return "", err + } + return fsConvert(result) + } + return "", fmt.Errorf("TypeError: cwd must be str or None") } // toIntFd extracts a file descriptor integer from a Python int object. diff --git a/module/_posixsubprocess/module_test.go b/module/_posixsubprocess/module_test.go index d08744f6b..3bfc753a0 100644 --- a/module/_posixsubprocess/module_test.go +++ b/module/_posixsubprocess/module_test.go @@ -59,29 +59,29 @@ func makeArgs(argv []string, executable string, cwd string) []objects.Object { // 23 arguments in CPython clinic order: return []objects.Object{ - argList, // args (process_args) - execList, // executable_list - objects.False(), // close_fds + argList, // args (process_args) + execList, // executable_list + objects.False(), // close_fds objects.NewTuple([]objects.Object{}), // pass_fds - cwdObj, // cwd - objects.None(), // env (inherit) - intObj(-1), // p2cread - intObj(-1), // p2cwrite - intObj(-1), // c2pread - intObj(-1), // c2pwrite - intObj(-1), // errread - intObj(-1), // errwrite - intObj(-1), // errpipe_read - intObj(-1), // errpipe_write - objects.True(), // restore_signals - objects.False(), // call_setsid - intObj(-1), // pgid_to_set - objects.None(), // gid - objects.None(), // extra_groups - objects.None(), // uid - intObj(-1), // child_umask - objects.None(), // preexec_fn - objects.False(), // use_vfork + cwdObj, // cwd + objects.None(), // env (inherit) + intObj(-1), // p2cread + intObj(-1), // p2cwrite + intObj(-1), // c2pread + intObj(-1), // c2pwrite + intObj(-1), // errread + intObj(-1), // errwrite + intObj(-1), // errpipe_read + intObj(-1), // errpipe_write + objects.True(), // restore_signals + objects.False(), // call_setsid + intObj(-1), // pgid_to_set + objects.None(), // gid + objects.None(), // extra_groups + objects.None(), // uid + intObj(-1), // child_umask + objects.None(), // preexec_fn + objects.False(), // use_vfork } } @@ -107,16 +107,11 @@ func TestForkExecTrue(t *testing.T) { if err != nil { t.Fatalf("fork_exec: %v", err) } - tup, ok := result.(*objects.Tuple) + // CPython returns PyLong_FromPid(pid): fork_exec yields the child PID as + // a plain int, not a tuple. subprocess.py assigns self.pid directly. + pidObj, ok := result.(*objects.Int) if !ok { - t.Fatalf("expected tuple, got %T", result) - } - if tup.Len() < 2 { - t.Fatalf("expected at least 2-tuple, got len %d", tup.Len()) - } - pidObj, ok := tup.Item(0).(*objects.Int) - if !ok { - t.Fatalf("pid is not an int: %T", tup.Item(0)) + t.Fatalf("expected int pid, got %T", result) } pid, _ := pidObj.Int64() if pid <= 0 { @@ -137,22 +132,15 @@ func TestForkExecEcho(t *testing.T) { if err != nil { t.Fatalf("fork_exec /bin/echo: %v", err) } - tup, ok := result.(*objects.Tuple) + // CPython: Modules/_posixsubprocess.c:1325 return PyLong_FromPid(pid). + pidObj, ok := result.(*objects.Int) if !ok { - t.Fatalf("expected tuple, got %T", result) - } - pidObj, ok := tup.Item(0).(*objects.Int) - if !ok { - t.Fatalf("pid is not an int: %T", tup.Item(0)) + t.Fatalf("expected int pid, got %T", result) } pid, _ := pidObj.Int64() if pid <= 0 { t.Fatalf("expected positive PID, got %d", pid) } - // Sentinel at index 1 must be None. - if tup.Item(1) != objects.None() { - t.Fatalf("expected None sentinel at index 1, got %v", tup.Item(1)) - } } // TestForkExecMissingArgs verifies that fewer than 23 arguments returns a diff --git a/module/_testcapi/config.go b/module/_testcapi/config.go new file mode 100644 index 000000000..5b737fc96 --- /dev/null +++ b/module/_testcapi/config.go @@ -0,0 +1,173 @@ +package testcapi + +import ( + "fmt" + "math/big" + "sync" + + "github.com/tamnd/gopy/imp" + "github.com/tamnd/gopy/initconfig" + "github.com/tamnd/gopy/objects" + "github.com/tamnd/gopy/state" +) + +// defaultConfig is the fallback configuration used when the lifecycle +// has not stamped a PyConfig onto the interpreter. The cmd entry point +// still resolves paths by hand rather than running initconfig end to +// end, so PyConfig_Get reads from the layered Python defaults, which +// already carry the runtime-true knobs the config tests inspect +// (code_debug_ranges on, write_bytecode on, optimization_level zero). +// +// CPython: Python/initconfig.c:1106 PyConfig_InitPythonConfig +var ( + defaultConfigOnce sync.Once + defaultConfig initconfig.PyConfig +) + +func sharedDefaultConfig() *initconfig.PyConfig { + defaultConfigOnce.Do(func() { + defaultConfig.InitPythonConfig() + }) + return &defaultConfig +} + +// activeConfig returns the *initconfig.PyConfig the lifecycle stamped on +// the main interpreter, the live configuration _Py_GetConfig hands to +// PyConfig_Get. It falls back to the layered Python defaults until the +// cmd entry point wires initconfig through to the interpreter. +// +// CPython: Python/initconfig.c:4461 PyConfig_Get (_Py_GetConfig) +func activeConfig() (*initconfig.PyConfig, error) { + interp := state.MainInterpreter() + if interp != nil { + if cfg, ok := interp.Config.(*initconfig.PyConfig); ok && cfg != nil { + return cfg, nil + } + } + return sharedDefaultConfig(), nil +} + +// configGetObject wraps a resolved config member into the Python object +// PyConfig_Get returns: ints/uints become int, bools become bool, the +// optional wide strings become None when empty, and the wide-string +// lists become tuples. SYS_ATTR members are read back from the live sys +// module instead, matching config_get's use_sys delegation. +// +// CPython: Python/initconfig.c:4378 config_get +func configGetObject(name string) (objects.Object, error) { + cfg, err := activeConfig() + if err != nil { + return nil, err + } + member, found := cfg.ConfigGet(name) + if !found { + // CPython: Python/initconfig.c:4451 config_unknown_name_error + return nil, fmt.Errorf("ValueError: unknown config option name: %s", name) + } + + // use_sys is always 1 for PyConfig_Get: a member exposed through sys + // reads the live sys attribute so command-line and runtime overrides + // are visible. + // + // CPython: Python/initconfig.c:4382 config_get (spec->sys.attr) + if member.SysAttr != "" { + return sysRequiredAttr(member.SysAttr) + } + + switch member.Type { + case initconfig.ConfigMemberInt, initconfig.ConfigMemberUint: + return objects.NewInt(int64(member.Value.(int))), nil + case initconfig.ConfigMemberBool: + return objects.NewBool(member.Value.(int) != 0), nil + case initconfig.ConfigMemberULong: + return objects.NewIntFromBig(new(big.Int).SetUint64(member.Value.(uint64))), nil + case initconfig.ConfigMemberWStr: + return objects.NewStr(member.Value.(string)), nil + case initconfig.ConfigMemberWStrOpt: + s := member.Value.(string) + if s == "" { + return objects.None(), nil + } + return objects.NewStr(s), nil + case initconfig.ConfigMemberWStrList: + items := member.Value.([]string) + objs := make([]objects.Object, len(items)) + for i, s := range items { + objs[i] = objects.NewStr(s) + } + return objects.NewTuple(objs), nil + default: + return nil, fmt.Errorf("SystemError: unreachable config member type") + } +} + +// sysRequiredAttr mirrors _PySys_GetRequiredAttrString: read the named +// attribute from the live sys module, raising RuntimeError when sys or +// the attribute is missing. +// +// CPython: Python/sysmodule.c:99 _PySys_GetRequiredAttrString +func sysRequiredAttr(attr string) (objects.Object, error) { + mod, ok := imp.GetModule("sys") + if !ok { + return nil, fmt.Errorf("RuntimeError: lost sys module") + } + v, err := mod.Dict().GetItem(objects.NewStr(attr)) + if err != nil { + return nil, err + } + if v == nil { + return nil, fmt.Errorf("RuntimeError: lost sys.%s", attr) + } + return v, nil +} + +// configGet ports _testcapi.config_get: parse the option name and return +// PyConfig_Get(name). +// +// CPython: Modules/_testcapi/config.c:4 _testcapi_config_get +func configGet(args []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + if len(args) != 1 { + return nil, fmt.Errorf("TypeError: config_get expected 1 argument, got %d", len(args)) + } + name, ok := args[0].(*objects.Unicode) + if !ok { + return nil, fmt.Errorf("TypeError: argument must be str, not %s", args[0].Type().Name) + } + return configGetObject(name.Value()) +} + +// configGetint ports _testcapi.config_getint: PyConfig_GetInt(name), +// which is PyConfig_Get(name) constrained to an int result. +// +// CPython: Modules/_testcapi/config.c:16 _testcapi_config_getint +func configGetint(args []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + if len(args) != 1 { + return nil, fmt.Errorf("TypeError: config_getint expected 1 argument, got %d", len(args)) + } + name, ok := args[0].(*objects.Unicode) + if !ok { + return nil, fmt.Errorf("TypeError: argument must be str, not %s", args[0].Type().Name) + } + obj, err := configGetObject(name.Value()) + if err != nil { + return nil, err + } + // CPython: Python/initconfig.c:4478 PyConfig_GetInt (PyLong_Check) + if _, ok := obj.(*objects.Int); !ok { + return nil, fmt.Errorf("TypeError: config option %s is not an int", name.Value()) + } + return obj, nil +} + +// configNames ports _testcapi.config_names: the frozenset of every known +// config option name. +// +// CPython: Modules/_testcapi/config.c:32 _testcapi_config_names +func configNames(_ []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + names := initconfig.ConfigNames() + items := make([]objects.Object, len(names)) + for i, n := range names { + items[i] = objects.NewStr(n) + } + return objects.NewFrozenset(items) +} diff --git a/module/_testcapi/module.go b/module/_testcapi/module.go index d100c7dee..eb7128111 100644 --- a/module/_testcapi/module.go +++ b/module/_testcapi/module.go @@ -17,6 +17,7 @@ import ( "math" "math/big" + "github.com/tamnd/gopy/builtins" "github.com/tamnd/gopy/imp" "github.com/tamnd/gopy/objects" ) @@ -240,6 +241,10 @@ func buildModule() (*objects.Module, error) { {"bad_get", badGet}, {"set_nomemory", setNomemory}, {"remove_mem_hooks", removeMemHooks}, + {"config_get", configGet}, + {"config_getint", configGetint}, + {"config_names", configNames}, + {"run_in_subinterp", runInSubinterp}, } for _, w := range wrappers { if err := d.SetItem(objects.NewStr(w.name), objects.NewBuiltinFunction(w.name, w.fn)); err != nil { @@ -312,6 +317,34 @@ func buildModule() (*objects.Module, error) { return m, nil } +// runInSubinterp ports _testcapi.run_in_subinterp(code). CPython creates a +// fresh subinterpreter with Py_NewInterpreter, runs code through +// PyRun_SimpleStringFlags, ends the interpreter, and returns the status. +// gopy has no single-phase C extensions to isolate, so the faithful +// behaviour is a fresh-namespace exec returning the PyRun_SimpleString +// status code. +// +// CPython: Modules/_testcapimodule.c:1969 run_in_subinterp +func runInSubinterp(args []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + if len(args) != 1 { + return nil, fmt.Errorf("TypeError: run_in_subinterp() takes exactly one argument (%d given)", len(args)) + } + code, ok := args[0].(*objects.Unicode) + if !ok { + return nil, fmt.Errorf("TypeError: run_in_subinterp() argument must be str, not %s", args[0].Type().Name) + } + // Py_NewInterpreter builds a legacy subinterpreter: it shares the main + // GIL and leaves check_multi_interp_extensions off, and it has its own + // sys.modules so any extension re-imports through import_find_extension. + // Push that interpreter state for the duration of the run so the script's + // "assert name not in sys.modules" holds and the re-import copies m_copy. + // + // CPython: Modules/_testcapimodule.c:1969 run_in_subinterp (Py_NewInterpreter) + imp.PushSubinterp(false, false) + defer imp.PopSubinterp() + return objects.NewInt(int64(builtins.RunInFreshNamespace(code.Value()))), nil +} + // setNomemory ports _testcapi.set_nomemory(start[, stop]). It arms the // allocation-fault injector so the allocation request at ordinal start // (counting from the call) begins failing, continuing until ordinal stop; diff --git a/module/_testinternalcapi/module.go b/module/_testinternalcapi/module.go index 993534b75..0b616fa1b 100644 --- a/module/_testinternalcapi/module.go +++ b/module/_testinternalcapi/module.go @@ -11,7 +11,9 @@ package testinternalcapi import ( "fmt" + "github.com/tamnd/gopy/builtins" "github.com/tamnd/gopy/imp" + "github.com/tamnd/gopy/module/sys" "github.com/tamnd/gopy/objects" ) @@ -31,6 +33,9 @@ func buildModule() (*objects.Module, error) { {"has_split_table", hasSplitTable}, {"get_static_builtin_types", getStaticBuiltinTypes}, {"identify_type_slot_wrappers", identifyTypeSlotWrappers}, + {"get_recursion_depth", getRecursionDepth}, + {"run_in_subinterp_with_config", runInSubinterpWithConfig}, + {"clear_extension", clearExtension}, } for _, f := range fns { if err := d.SetItem(objects.NewStr(f.name), objects.NewBuiltinFunction(f.name, f.fn)); err != nil { @@ -64,6 +69,103 @@ func buildModule() (*objects.Module, error) { return m, nil } +// runInSubinterpWithConfig ports run_in_subinterp_with_config(code, config, +// xi=False). CPython spins up a fresh PyInterpreterState configured by the +// PyInterpreterConfig the test built, runs the code with +// PyRun_SimpleStringFlags, tears the interpreter down, and returns that +// status. gopy compiles every extension into the runtime as a Go builtin +// (multi-phase by construction), so the config's isolation and +// check_multi_interp_extensions fields never reject an import: a faithful +// run is a fresh-namespace exec whose only observable output is the +// PyRun_SimpleString status code. The config object is accepted and +// ignored. +// +// CPython: Modules/_testinternalcapi.c:1816 run_in_subinterp_with_config +func runInSubinterpWithConfig(args []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + if len(args) < 1 { + return nil, fmt.Errorf("TypeError: run_in_subinterp_with_config() missing required argument 'code' (pos 1)") + } + code, ok := args[0].(*objects.Unicode) + if !ok { + return nil, fmt.Errorf("TypeError: run_in_subinterp_with_config() argument 'code' must be str, not %s", args[0].Type().Name) + } + + // gopy cannot spin up a real OS-level subinterpreter, so the run is a + // fresh-namespace exec. The config's gil and check_multi_interp_extensions + // fields are honoured: they are pushed onto the interpreter-state stack + // the PEP 489 extension compat check (imp.CheckExtSubinterpCompat) reads, + // so importing an incompatible extension from the script raises the same + // ImportError CPython's subinterpreter would. own_gil follows + // config.gil == 'own' (the ISOLATED gil=2 case). + // + // CPython: Python/pylifecycle.c:586 init_interp_create_gil (own_gil) + ownGil, checkMulti := false, false + if len(args) >= 2 && !objects.IsNone(args[1]) { + config := args[1] + if gilObj, err := objects.GetAttr(config, objects.NewStr("gil")); err == nil { + if gilStr, ok := gilObj.(*objects.Unicode); ok { + ownGil = gilStr.Value() == "own" + } + } + if checkObj, err := objects.GetAttr(config, objects.NewStr("check_multi_interp_extensions")); err == nil { + if t, terr := objects.IsTruthy(checkObj); terr == nil { + checkMulti = t + } + } + } + + imp.PushSubinterp(ownGil, checkMulti) + defer imp.PopSubinterp() + return objects.NewInt(int64(builtins.RunInFreshNamespace(code.Value()))), nil +} + +// clearExtension ports clear_extension(name, filename): it clears all +// internally cached data for a single-phase extension module so the test +// suite can re-import it fresh. It delegates to _PyImport_ClearExtension. +// +// CPython: Modules/_testinternalcapi.c:893 clear_extension +// +// (Python/import.c:903 _PyImport_ClearExtension) +func clearExtension(args []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + if len(args) != 2 { + return nil, fmt.Errorf("TypeError: clear_extension() takes exactly 2 arguments (%d given)", len(args)) + } + name, ok := args[0].(*objects.Unicode) + if !ok { + return nil, fmt.Errorf("TypeError: clear_extension() argument 1 must be str, not %s", args[0].Type().Name) + } + path := "" + if !objects.IsNone(args[1]) { + filename, ok := args[1].(*objects.Unicode) + if !ok { + return nil, fmt.Errorf("TypeError: clear_extension() argument 2 must be str, not %s", args[1].Type().Name) + } + path = filename.Value() + } + if err := imp.ClearExtension(name.Value(), path); err != nil { + return nil, err + } + return objects.None(), nil +} + +// getRecursionDepth returns the Python recursion depth of the caller, +// matching tstate->py_recursion_limit - tstate->py_recursion_remaining. +// gopy tracks depth by the active interpreter-frame chain, so the count +// of frames from the caller back to the root is the same quantity. The +// C probe pushes no Python frame, so the caller's frame is the base. +// +// CPython: Modules/_testinternalcapi.c:110 get_recursion_depth +func getRecursionDepth(_ []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + if sys.CurrentInterpreterFrameHook == nil { + return objects.NewInt(0), nil + } + depth := int64(0) + for f := sys.CurrentInterpreterFrameHook(); f != nil; f = f.FrameBack() { + depth++ + } + return objects.NewInt(depth), nil +} + // hasInlineValues reports whether obj currently keeps its attributes in // the type's inline-values array. It mirrors the C probe: the owning type // must carry Py_TPFLAGS_INLINE_VALUES and the instance's value array must @@ -89,7 +191,8 @@ func hasInlineValues(args []objects.Object, _ map[string]objects.Object) (object // inheritance across the static type set. // // CPython: Modules/_testinternalcapi.c:2334 get_static_builtin_types -// (Objects/typeobject.c _PyStaticType_GetBuiltins) +// +// (Objects/typeobject.c _PyStaticType_GetBuiltins) func getStaticBuiltinTypes(_ []objects.Object, _ map[string]objects.Object) (objects.Object, error) { types := []*objects.Type{ objects.ObjectType(), objects.TypeType(), @@ -112,7 +215,8 @@ func getStaticBuiltinTypes(_ []objects.Object, _ map[string]objects.Object) (obj // resolve to a wrapper_descriptor on that type. // // CPython: Objects/typeobject.c:11494 _PyType_GetSlotWrapperNames -// (Objects/typeobject.c:10952 slotdefs) +// +// (Objects/typeobject.c:10952 slotdefs) var slotWrapperNames = []string{ "__getattribute__", "__getattr__", "__setattr__", "__delattr__", "__repr__", "__hash__", "__call__", "__str__", @@ -137,7 +241,8 @@ var slotWrapperNames = []string{ // slotdefs table. // // CPython: Modules/_testinternalcapi.c:2341 identify_type_slot_wrappers -// (Objects/typeobject.c:11494 _PyType_GetSlotWrapperNames) +// +// (Objects/typeobject.c:11494 _PyType_GetSlotWrapperNames) func identifyTypeSlotWrappers(_ []objects.Object, _ map[string]objects.Object) (objects.Object, error) { items := make([]objects.Object, len(slotWrapperNames)) for i, n := range slotWrapperNames { diff --git a/module/_testmultiphase/module.go b/module/_testmultiphase/module.go new file mode 100644 index 000000000..96920e976 --- /dev/null +++ b/module/_testmultiphase/module.go @@ -0,0 +1,633 @@ +// Package testmultiphase is the gopy port of CPython's +// Modules/_testmultiphase.c, the C extension that exercises multi-phase +// initialization of extension modules (PEP 489). The standard-library +// test suite reaches for it indirectly: test.test_importlib.util runs +// import_helper.import_module("_testmultiphase") at import time, so any +// test that pulls in that helper (test_pkgutil, test_pyclbr, the +// test_importlib extension suites) raises SkipTest when the module is +// absent. +// +// gopy cannot dlopen the compiled extension, so every PyInit_* entry the C +// extension exposes is reproduced as a Go-native ExtModuleDef carrying the +// PEP 489 create/exec slot table its PyModuleDef declares. _imp.create_dynamic +// runs the create step (PyModule_FromDefAndSpec2) and _imp.exec_dynamic runs +// the exec slots (PyModule_ExecDef). The test loads the many variants through +// an explicit ExtensionFileLoader bound to the one _testmultiphase origin, the +// gopy analog of the single .so exposing many PyInit symbols, so the variants +// are registered as non-discoverable (Variant) defs. +// +// CPython: Modules/_testmultiphase.c:447 PyInit__testmultiphase +// CPython: Modules/_testmultiphase.c:381 execfunc +package testmultiphase + +import ( + "fmt" + "sync" + + pyerrors "github.com/tamnd/gopy/errors" + "github.com/tamnd/gopy/imp" + "github.com/tamnd/gopy/objects" +) + +// gilNotUsed is Py_MOD_GIL_NOT_USED, the value the test extension's defs put +// in their Py_mod_gil slot. gopy has a single GIL-free runtime, so the value +// only needs to round-trip through the slot scan. +// +// CPython: Include/moduleobject.h Py_MOD_GIL_NOT_USED +const gilNotUsed = 0 + +func init() { + // The four "real" modules each have their own PyInit symbol and are + // imported by name (test_import's SubinterpImportTests and the helper that + // loads _testmultiphase), so they are materialized as discoverable stubs. + // + // CPython: Modules/_testmultiphase.c:438 main_slots (PER_INTERPRETER_GIL) + imp.RegisterExtModule(&imp.ExtModuleDef{ + Name: "_testmultiphase", + MultiPhase: true, + Doc: "Test module _testmultiphase", + Methods: mainMethods(), + Slots: mainSlots(imp.MultiInterpPerInterpreterGIL, true), + HasMultiInterpSlot: true, + MultiInterp: imp.MultiInterpPerInterpreterGIL, + }) + // CPython: Modules/_testmultiphase.c:940 non_isolated_slots (NOT_SUPPORTED) + imp.RegisterExtModule(&imp.ExtModuleDef{ + Name: "_test_non_isolated", + MultiPhase: true, + Doc: "Test module _test_non_isolated", + Methods: mainMethods(), + Slots: mainSlots(imp.MultiInterpNotSupported, true), + HasMultiInterpSlot: true, + MultiInterp: imp.MultiInterpNotSupported, + }) + // CPython: Modules/_testmultiphase.c:958 shared_gil_only_slots (SUPPORTED, explicit) + imp.RegisterExtModule(&imp.ExtModuleDef{ + Name: "_test_shared_gil_only", + MultiPhase: true, + Doc: "Test module _test_shared_gil_only", + Methods: mainMethods(), + Slots: mainSlots(imp.MultiInterpSupported, true), + HasMultiInterpSlot: true, + MultiInterp: imp.MultiInterpSupported, + }) + // CPython: Modules/_testmultiphase.c:980 no_multiple_interpreter_slot_slots (no slot) + imp.RegisterExtModule(&imp.ExtModuleDef{ + Name: "_test_no_multiple_interpreter_slot", + MultiPhase: true, + Doc: "Test module _test_no_multiple_interpreter_slot", + Methods: mainMethods(), + Slots: mainSlots(0, false), + }) + + registerVariants() +} + +// mainSlots builds the slot table the main-like defs share: an exec slot +// running execfunc, an optional multiple-interpreters slot, and a gil slot. +// +// CPython: Modules/_testmultiphase.c:438 main_slots +func mainSlots(multiInterp int, hasMultiInterp bool) []imp.ExtSlot { + slots := []imp.ExtSlot{{ID: imp.ExtSlotExec, Exec: execMain}} + if hasMultiInterp { + slots = append(slots, imp.ExtSlot{ID: imp.ExtSlotMultipleInterpreters, Value: multiInterp}) + } + slots = append(slots, imp.ExtSlot{ID: imp.ExtSlotGIL, Value: gilNotUsed}) + return slots +} + +// mainMethods is testexport_methods: foo and call_state_registration_func. +// +// CPython: Modules/_testmultiphase.c:374 testexport_methods +func mainMethods() []imp.ExtMethod { + return []imp.ExtMethod{ + {Name: "foo", Fn: testexportFoo}, + {Name: "call_state_registration_func", Fn: callStateRegistrationFunc}, + } +} + +// registerVariants registers every test-only PyInit variant as a +// non-discoverable Variant def, reached only through an explicit +// ExtensionFileLoader against the _testmultiphase origin. +func registerVariants() { + // PyInit_x and pkg._testmultiphase both return main_def: a normal + // multi-phase module whose __name__ is the spec name (so 'x' / + // 'pkg._testmultiphase'), exercising short and dotted names. + // + // CPython: Modules/_testmultiphase.c:577 PyInit_x + for _, name := range []string{"x", "pkg._testmultiphase"} { + imp.RegisterExtModule(&imp.ExtModuleDef{ + Name: name, + MultiPhase: true, + Variant: true, + Doc: "Test module main", + Methods: mainMethods(), + Slots: mainSlots(imp.MultiInterpPerInterpreterGIL, true), + HasMultiInterpSlot: true, + MultiInterp: imp.MultiInterpPerInterpreterGIL, + }) + } + + // Non-ASCII-named modules: only a gil slot (no exec, no create), so the + // loader builds a bare module and sets __name__ (spec name) and __doc__. + // + // CPython: Modules/_testmultiphase.c:527 nonascii_slots / def_nonascii_* + imp.RegisterExtModule(&imp.ExtModuleDef{ + Name: "_testmultiphase_zkouška_načtení", + MultiPhase: true, + Variant: true, + Doc: "Module named in Czech", + Slots: []imp.ExtSlot{{ID: imp.ExtSlotGIL, Value: gilNotUsed}}, + }) + imp.RegisterExtModule(&imp.ExtModuleDef{ + Name: "_インポートテスト", + MultiPhase: true, + Variant: true, + Doc: "Module named in Japanese", + Slots: []imp.ExtSlot{{ID: imp.ExtSlotGIL, Value: gilNotUsed}}, + }) + + // Non-module create results. + // + // CPython: Modules/_testmultiphase.c:482 slots_create_nonmodule + imp.RegisterExtModule(&imp.ExtModuleDef{ + Name: "_testmultiphase_nonmodule", + MultiPhase: true, + Variant: true, + Doc: "Test module _testmultiphase_nonmodule", + Slots: []imp.ExtSlot{{ID: imp.ExtSlotCreate, Create: createNonmodule}}, + }) + imp.RegisterExtModule(&imp.ExtModuleDef{ + Name: "_testmultiphase_nonmodule_with_methods", + MultiPhase: true, + Variant: true, + Doc: "Test module _testmultiphase_nonmodule_with_methods", + Methods: []imp.ExtMethod{{Name: "bar", Fn: nonmoduleBar}}, + Slots: []imp.ExtSlot{{ID: imp.ExtSlotCreate, Create: createNonmodule}}, + }) + + // NULL (empty) slot table. + // + // CPython: Modules/_testmultiphase.c:583 null_slots_def + imp.RegisterExtModule(&imp.ExtModuleDef{ + Name: "_testmultiphase_null_slots", + MultiPhase: true, + Variant: true, + Doc: "Test module _testmultiphase_null_slots", + }) + + registerBadVariants() +} + +// registerBadVariants registers the misbehaving defs test_bad_modules drives, +// each of which must raise SystemError (chained for the unreported-exception +// cases). +// +// CPython: Modules/_testmultiphase.c:595 "Problematic modules" +func registerBadVariants() { + // bad_slot_large: a slot ID one past _Py_mod_LAST_SLOT. + imp.RegisterExtModule(&imp.ExtModuleDef{ + Name: "_testmultiphase_bad_slot_large", MultiPhase: true, Variant: true, + Doc: "Test module _testmultiphase_bad_slot_large", + Slots: []imp.ExtSlot{{ID: imp.ExtSlotLast + 1}}, + }) + // bad_slot_negative: a negative slot ID. + imp.RegisterExtModule(&imp.ExtModuleDef{ + Name: "_testmultiphase_bad_slot_negative", MultiPhase: true, Variant: true, + Doc: "Test module _testmultiphase_bad_slot_negative", + Slots: []imp.ExtSlot{{ID: -1}}, + }) + // create_int_with_state: a non-module create result with m_size > 0. + imp.RegisterExtModule(&imp.ExtModuleDef{ + Name: "_testmultiphase_create_int_with_state", MultiPhase: true, Variant: true, + Doc: "Not a PyModuleObject object, but requests per-module state", + MSize: 10, + Slots: []imp.ExtSlot{{ID: imp.ExtSlotCreate, Create: createNonmodule}}, + }) + // negative_size: m_size < 0 is illegal for multi-phase init. + imp.RegisterExtModule(&imp.ExtModuleDef{ + Name: "_testmultiphase_negative_size", MultiPhase: true, Variant: true, + Doc: "PyModuleDef with negative m_size", + MSize: -1, + Slots: []imp.ExtSlot{{ID: imp.ExtSlotCreate, Create: createNonmodule}}, + }) + // export_null: PyInit returned NULL without setting an exception. + imp.RegisterExtModule(&imp.ExtModuleDef{ + Name: "_testmultiphase_export_null", MultiPhase: true, Variant: true, + InitKind: imp.ExtInitReturnedNil, + }) + // export_uninitialized: PyInit returned a def that skipped PyModuleDef_Init. + imp.RegisterExtModule(&imp.ExtModuleDef{ + Name: "_testmultiphase_export_uninitialized", MultiPhase: true, Variant: true, + InitKind: imp.ExtInitUninitialized, + }) + // export_raise: PyInit set SystemError and returned NULL. + imp.RegisterExtModule(&imp.ExtModuleDef{ + Name: "_testmultiphase_export_raise", MultiPhase: true, Variant: true, + InitKind: imp.ExtInitReturnedNil, + InitRaised: func() *pyerrors.Exception { return newSystemError("bad export function") }, + }) + // export_unreported_exception: PyInit returned a real def but left an + // exception set; the loader chains a SystemError onto it. + imp.RegisterExtModule(&imp.ExtModuleDef{ + Name: "_testmultiphase_export_unreported_exception", MultiPhase: true, Variant: true, + InitKind: imp.ExtInitNormal, + InitRaised: func() *pyerrors.Exception { return newSystemError("bad export function") }, + }) + // create_null: create slot returned NULL without setting an exception. + imp.RegisterExtModule(&imp.ExtModuleDef{ + Name: "_testmultiphase_create_null", MultiPhase: true, Variant: true, + Doc: "Test module _testmultiphase_create_null", + Slots: []imp.ExtSlot{{ID: imp.ExtSlotCreate, Create: createNull}}, + }) + // create_raise: create slot set SystemError and returned NULL. + imp.RegisterExtModule(&imp.ExtModuleDef{ + Name: "_testmultiphase_create_raise", MultiPhase: true, Variant: true, + Doc: "Test module _testmultiphase_create_null", + Slots: []imp.ExtSlot{{ID: imp.ExtSlotCreate, Create: createRaise}}, + }) + // create_unreported_exception: create slot returned a module but left an + // exception set; the loader chains a SystemError onto it. + imp.RegisterExtModule(&imp.ExtModuleDef{ + Name: "_testmultiphase_create_unreported_exception", MultiPhase: true, Variant: true, + Doc: "Test module _testmultiphase_create_unreported_exception", + Slots: []imp.ExtSlot{{ID: imp.ExtSlotCreate, Create: createUnreported}}, + }) + // nonmodule_with_exec_slots: a non-module create result paired with exec slots. + imp.RegisterExtModule(&imp.ExtModuleDef{ + Name: "_testmultiphase_nonmodule_with_exec_slots", MultiPhase: true, Variant: true, + Doc: "Test module _testmultiphase_nonmodule_with_exec_slots", + Slots: []imp.ExtSlot{ + {ID: imp.ExtSlotCreate, Create: createNonmodule}, + {ID: imp.ExtSlotExec, Exec: execMain}, + {ID: imp.ExtSlotMultipleInterpreters, Value: imp.MultiInterpPerInterpreterGIL}, + {ID: imp.ExtSlotGIL, Value: gilNotUsed}, + }, + HasMultiInterpSlot: true, + MultiInterp: imp.MultiInterpPerInterpreterGIL, + }) + // exec_err: exec slot returned -1 without setting an exception. + imp.RegisterExtModule(&imp.ExtModuleDef{ + Name: "_testmultiphase_exec_err", MultiPhase: true, Variant: true, + Doc: "Test module _testmultiphase_exec_err", + Slots: execVariantSlots(execErr), + HasMultiInterpSlot: true, + MultiInterp: imp.MultiInterpPerInterpreterGIL, + }) + // exec_raise: exec slot set SystemError and returned -1. + imp.RegisterExtModule(&imp.ExtModuleDef{ + Name: "_testmultiphase_exec_raise", MultiPhase: true, Variant: true, + Doc: "Test module _testmultiphase_exec_raise", + Slots: execVariantSlots(execRaise), + HasMultiInterpSlot: true, + MultiInterp: imp.MultiInterpPerInterpreterGIL, + }) + // exec_unreported_exception: exec slot returned 0 but left an exception set. + imp.RegisterExtModule(&imp.ExtModuleDef{ + Name: "_testmultiphase_exec_unreported_exception", MultiPhase: true, Variant: true, + Doc: "Test module _testmultiphase_exec_unreported_exception", + Slots: execVariantSlots(execUnreported), + HasMultiInterpSlot: true, + MultiInterp: imp.MultiInterpPerInterpreterGIL, + }) + // multiple_create_slots: two Py_mod_create slots. + imp.RegisterExtModule(&imp.ExtModuleDef{ + Name: "_testmultiphase_multiple_create_slots", MultiPhase: true, Variant: true, + Doc: "Test module _testmultiphase_multiple_create_slots", + Slots: []imp.ExtSlot{ + {ID: imp.ExtSlotCreate, Create: createNoop}, + {ID: imp.ExtSlotCreate, Create: createNoop}, + }, + }) + // multiple_multiple_interpreters_slots: two Py_mod_multiple_interpreters slots. + imp.RegisterExtModule(&imp.ExtModuleDef{ + Name: "_testmultiphase_multiple_multiple_interpreters_slots", MultiPhase: true, Variant: true, + Doc: "Test module _testmultiphase_multiple_multiple_interpreters_slots", + Slots: []imp.ExtSlot{ + {ID: imp.ExtSlotMultipleInterpreters, Value: imp.MultiInterpPerInterpreterGIL}, + {ID: imp.ExtSlotMultipleInterpreters, Value: imp.MultiInterpPerInterpreterGIL}, + {ID: imp.ExtSlotGIL, Value: gilNotUsed}, + }, + }) +} + +// execVariantSlots builds the slot table the exec_* variants share: one exec +// slot plus multiple-interpreters and gil slots. +func execVariantSlots(exec func(objects.Object) (int, *pyerrors.Exception)) []imp.ExtSlot { + return []imp.ExtSlot{ + {ID: imp.ExtSlotExec, Exec: exec}, + {ID: imp.ExtSlotMultipleInterpreters, Value: imp.MultiInterpPerInterpreterGIL}, + {ID: imp.ExtSlotGIL, Value: gilNotUsed}, + } +} + +// newSystemError builds (without raising) a SystemError(msg) instance, the +// gopy analog of PyErr_SetString(PyExc_SystemError, msg) leaving an exception +// "set" before a slot returns. +func newSystemError(msg string) *pyerrors.Exception { + return pyerrors.New(pyerrors.PyExc_SystemError, objects.NewTuple([]objects.Object{objects.NewStr(msg)})) +} + +// createNonmodule ports createfunc_nonmodule: a SimpleNamespace(three=3). +// +// CPython: Modules/_testmultiphase.c:461 createfunc_nonmodule +func createNonmodule() (objects.Object, *pyerrors.Exception) { + ns := objects.NewNamespace() + _ = ns.Dict().SetItem(objects.NewStr("three"), objects.NewInt(3)) + return ns, nil +} + +// createNoop ports createfunc_noop: PyModule_New("spam"). +// +// CPython: Modules/_testmultiphase.c:683 createfunc_noop +func createNoop() (objects.Object, *pyerrors.Exception) { + return objects.NewModule("spam"), nil +} + +// createNull ports createfunc_null: returns NULL with no exception set. +// +// CPython: Modules/_testmultiphase.c:710 createfunc_null +func createNull() (objects.Object, *pyerrors.Exception) { + return nil, nil +} + +// createRaise ports createfunc_raise: sets SystemError and returns NULL. +// +// CPython: Modules/_testmultiphase.c:726 createfunc_raise +func createRaise() (objects.Object, *pyerrors.Exception) { + return nil, newSystemError("bad create function") +} + +// createUnreported ports createfunc_unreported_exception: sets SystemError but +// returns a module, so the loader chains a SystemError onto the leftover one. +// +// CPython: Modules/_testmultiphase.c:746 createfunc_unreported_exception +func createUnreported() (objects.Object, *pyerrors.Exception) { + return objects.NewModule("foo"), newSystemError("bad create function") +} + +// execErr ports execfunc_err: returns -1 without setting an exception. +// +// CPython: Modules/_testmultiphase.c:786 execfunc_err +func execErr(objects.Object) (int, *pyerrors.Exception) { + return -1, nil +} + +// execRaise ports execfunc_raise: sets SystemError and returns -1. +// +// CPython: Modules/_testmultiphase.c:805 execfunc_raise +func execRaise(objects.Object) (int, *pyerrors.Exception) { + return -1, newSystemError("bad exec function") +} + +// execUnreported ports execfunc_unreported_exception: sets SystemError but +// returns 0, so the loader chains a SystemError onto the leftover one. +// +// CPython: Modules/_testmultiphase.c:826 execfunc_unreported_exception +func execUnreported(objects.Object) (int, *pyerrors.Exception) { + return 0, newSystemError("bad exec function") +} + +// nonmoduleBar ports nonmodule_bar: bar(i, j) returns i - j. +// +// CPython: Modules/_testmultiphase.c:501 nonmodule_bar +func nonmoduleBar(args []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + if len(args) != 2 { + return nil, fmt.Errorf("TypeError: bar() takes exactly 2 arguments (%d given)", len(args)) + } + i, ok := args[0].(*objects.Int) + if !ok { + return nil, fmt.Errorf("TypeError: an integer is required (got type %s)", args[0].Type().Name) + } + j, ok := args[1].(*objects.Int) + if !ok { + return nil, fmt.Errorf("TypeError: an integer is required (got type %s)", args[1].Type().Name) + } + iv, _ := i.Int64() + jv, _ := j.Int64() + return objects.NewInt(iv - jv), nil +} + +// exampleObject backs _testimportexec.Example: a GC type whose attribute +// store is an explicit x_attr dict consulted ahead of the generic +// attribute machinery. +// +// CPython: Modules/_testmultiphase.c:25 ExampleObject +type exampleObject struct { + objects.Header + xAttr *objects.Dict +} + +// exampleType / strType / errorType are the singletons installed by +// execfunc. +// +// CPython: Modules/_testmultiphase.c:124 Example_Type_spec +// CPython: Modules/_testmultiphase.c:360 Str_Type_spec +// CPython: Modules/_testmultiphase.c:388 PyErr_NewException("_testimportexec.error") +var ( + exampleType *objects.Type + strType *objects.Type + errorType *objects.Type + buildTypesOnce sync.Once +) + +// exampleDemo ports Example_demo: demo(o=None) returns o when it is a +// str, otherwise None. +// +// CPython: Modules/_testmultiphase.c:57 Example_demo +func exampleDemo(args []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + if len(args) < 1 { + return nil, fmt.Errorf("TypeError: demo() missing self argument") + } + rest := args[1:] + if len(rest) > 1 { + return nil, fmt.Errorf("TypeError: demo() takes at most 1 argument (%d given)", len(rest)) + } + if len(rest) == 1 { + if _, ok := rest[0].(*objects.Unicode); ok { + return rest[0], nil + } + } + return objects.None(), nil +} + +// exampleGetattro ports Example_getattro: consult x_attr first, then fall +// back to PyObject_GenericGetAttr. +// +// CPython: Modules/_testmultiphase.c:77 Example_getattro +func exampleGetattro(o objects.Object, name objects.Object) (objects.Object, error) { + self, ok := o.(*exampleObject) + if ok && self.xAttr != nil { + found, err := self.xAttr.Contains(name) + if err != nil { + return nil, err + } + if found { + v, err := self.xAttr.GetItem(name) + if err != nil { + return nil, err + } + objects.Incref(v) + return v, nil + } + } + return objects.GenericGetAttr(o, name) +} + +// exampleSetattro ports Example_setattr: store into the lazily created +// x_attr dict; a delete of a missing key raises AttributeError. +// +// CPython: Modules/_testmultiphase.c:93 Example_setattr +func exampleSetattro(o objects.Object, name objects.Object, value objects.Object) error { + self, ok := o.(*exampleObject) + if !ok { + return fmt.Errorf("TypeError: not an Example") + } + if self.xAttr == nil { + self.xAttr = objects.NewDict() + } + if value == nil { + found, err := self.xAttr.Contains(name) + if err != nil { + return err + } + if !found { + return fmt.Errorf("AttributeError: delete non-existing Example attribute") + } + return self.xAttr.DelItem(name) + } + return self.xAttr.SetItem(name, value) +} + +// exampleTraverse keeps x_attr reachable for the collector. +// +// CPython: Modules/_testmultiphase.c:42 Example_traverse +func exampleTraverse(o objects.Object, visit objects.Visitor) error { + self, ok := o.(*exampleObject) + if !ok || self.xAttr == nil { + return nil + } + return visit(self.xAttr) +} + +// exampleNew constructs a bare Example instance. +// +// CPython: Modules/_testmultiphase.c:124 Example_Type_spec (tp_new via +// PyType_GenericNew default) +func exampleNew(cls *objects.Type, _ []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + o := &exampleObject{} + o.Init(cls) + return o, nil +} + +// testexportFoo ports testexport_foo: foo(i, j) returns i + j. +// +// CPython: Modules/_testmultiphase.c:309 testexport_foo +func testexportFoo(args []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + if len(args) != 2 { + return nil, fmt.Errorf("TypeError: foo() takes exactly 2 arguments (%d given)", len(args)) + } + i, ok := args[0].(*objects.Int) + if !ok { + return nil, fmt.Errorf("TypeError: an integer is required (got type %s)", args[0].Type().Name) + } + j, ok := args[1].(*objects.Int) + if !ok { + return nil, fmt.Errorf("TypeError: an integer is required (got type %s)", args[1].Type().Name) + } + iv, _ := i.Int64() + jv, _ := j.Int64() + return objects.NewInt(iv + jv), nil +} + +// callStateRegistrationFunc ports call_state_registration_func. gopy has no +// per-module C state registry: PyState_FindModule has nothing to find (case 0 +// returns None) and PyState_AddModule / PyState_RemoveModule fail with a +// SystemError on a multi-phase module, exactly the behaviour the test asserts. +// +// CPython: Modules/_testmultiphase.c:328 call_state_registration_func +func callStateRegistrationFunc(args []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + if len(args) != 1 { + return nil, fmt.Errorf("TypeError: call_state_registration_func() takes exactly 1 argument (%d given)", len(args)) + } + iv, ok := args[0].(*objects.Int) + if !ok { + return nil, fmt.Errorf("TypeError: an integer is required (got type %s)", args[0].Type().Name) + } + switch n, _ := iv.Int64(); n { + case 0: + // PyState_FindModule: nothing registered, so None. + return objects.None(), nil + case 1: + // PyState_AddModule on a multi-phase module fails. + return nil, objects.NewRaisedError(newSystemError("PyState_AddModule failed"), "SystemError: PyState_AddModule failed") + case 2: + // PyState_RemoveModule on a multi-phase module fails. + return nil, objects.NewRaisedError(newSystemError("PyState_RemoveModule failed"), "SystemError: PyState_RemoveModule failed") + } + return objects.None(), nil +} + +// buildTypes constructs the Example / Str / error types. It runs lazily at +// module-exec time rather than at Go package init: str's tp_new is wired by +// the builtins package during interpreter startup, and a str subclass built +// before that wiring inherits a nil tp_new (so Str(1) would allocate a plain +// instance instead of a real str). Deferring to exec guarantees str is ready. +func buildTypes() { + // CPython: Modules/_testmultiphase.c:114 Example_Type_slots + exampleType = objects.NewType("Example", []*objects.Type{objects.ObjectType()}) + exampleType.Module = "_testimportexec" + exampleType.TpFlags |= objects.TpFlagHaveGC + exampleType.TpNew = exampleNew + exampleType.Getattro = exampleGetattro + exampleType.Setattro = exampleSetattro + exampleType.TpTraverse = exampleTraverse + objects.SetTypeDescr(exampleType, "demo", objects.NewMethodDescr(exampleType, "demo", exampleDemo)) + + // Str is a str subclass (Py_tp_base = &PyUnicode_Type). Building it through + // NewUserType is the faithful PyType_FromSpec analog: it inherits str's + // tp_new, so Str() and Str(1) construct real str-subclass instances. + // + // CPython: Modules/_testmultiphase.c:360 Str_Type_spec (BASETYPE) + strType = objects.NewUserType("Str", []*objects.Type{objects.StrType()}, objects.NewDict()) + strType.Module = "_testimportexec" + strType.TpFlags |= objects.TpFlagBasetype + + // CPython: Modules/_testmultiphase.c:388 PyErr_NewException + errorType = pyerrors.NewExcType("error", []*objects.Type{pyerrors.PyExc_Exception}) + errorType.Module = "_testimportexec" +} + +// execMain ports execfunc: the exec slot that installs the Example / error / +// Str types and the int_const / str_const constants on the module. +// +// CPython: Modules/_testmultiphase.c:381 execfunc +func execMain(m objects.Object) (int, *pyerrors.Exception) { + mod, ok := m.(*objects.Module) + if !ok { + return -1, newSystemError("execfunc: not a module") + } + buildTypesOnce.Do(buildTypes) + d := mod.Dict() + // CPython: Modules/_testmultiphase.c:393 PyModule_Add "Example" + if err := d.SetItem(objects.NewStr("Example"), exampleType); err != nil { + return -1, newSystemError(err.Error()) + } + // CPython: Modules/_testmultiphase.c:399 PyModule_Add "error" + if err := d.SetItem(objects.NewStr("error"), errorType); err != nil { + return -1, newSystemError(err.Error()) + } + // CPython: Modules/_testmultiphase.c:405 PyModule_Add "Str" + if err := d.SetItem(objects.NewStr("Str"), strType); err != nil { + return -1, newSystemError(err.Error()) + } + // CPython: Modules/_testmultiphase.c:409 PyModule_AddIntConstant int_const 1969 + if err := d.SetItem(objects.NewStr("int_const"), objects.NewInt(1969)); err != nil { + return -1, newSystemError(err.Error()) + } + // CPython: Modules/_testmultiphase.c:413 PyModule_AddStringConstant str_const + if err := d.SetItem(objects.NewStr("str_const"), objects.NewStr("something different")); err != nil { + return -1, newSystemError(err.Error()) + } + return 0, nil +} diff --git a/module/_testsinglephase/module.go b/module/_testsinglephase/module.go new file mode 100644 index 000000000..716efec0a --- /dev/null +++ b/module/_testsinglephase/module.go @@ -0,0 +1,393 @@ +// Package testsinglephase is the gopy port of CPython's +// Modules/_testsinglephase.c, the legacy single-phase-init extension the +// import test suite drives through ExtensionFileLoader. CPython ships one +// compiled .so exposing several PyInit_ entry points; gopy registers each +// one as an extension module keyed by name, carrying the single-phase +// marker and the PyModuleDef m_size the import machinery reads. +// +// The variants mirror the kinds Python/import.c documents: +// - "basic" (_testsinglephase, m_size == -1): no per-module state, a +// process-global initialized_count, cached, reloaded from m_copy. +// - the indirect (_basic_wrapper) and direct (_basic_copy) basic variants. +// - "reinit" (_with_reinit, m_size == 0): re-runs init, no state. +// - "with state" (_with_state, m_size > 0): per-module state, re-runs init. +// - the *_check_cache_first variants: return PyState_FindModule first. +// - _testsinglephase_raise_exception: PyInit raises and returns NULL. +// +// CPython: Modules/_testsinglephase.c:489 init__testsinglephase_basic +package testsinglephase + +import ( + "fmt" + "sync" + "time" + + pyerrors "github.com/tamnd/gopy/errors" + "github.com/tamnd/gopy/imp" + "github.com/tamnd/gopy/objects" +) + +// variant selects how a registered entry stores its module state. +type variant int + +const ( + vBasic variant = iota // m_size == -1, process-global state + vReinit // m_size == 0, no state + vWithState // m_size > 0, per-module state +) + +func init() { + // gopy cannot dlopen the compiled extension, so each PyInit_ entry the C + // module exposes is registered as a single-phase gopy extension module. + // _imp.create_dynamic dispatches here; the single-phase marker drives the + // subinterpreter compat gate and the m_size selects the reload behaviour. + // + // CPython: Modules/_testsinglephase.c:489 _testsinglephase_basic + imp.RegisterExtModule(&imp.ExtModuleDef{ + Name: "_testsinglephase", + SinglePhase: true, + MSize: -1, + Init: func() (*objects.Module, error) { return buildBasic("_testsinglephase") }, + }) + // PyInit__testsinglephase_basic_wrapper just calls PyInit__testsinglephase, + // so it shares the def (and modules_by_index slot) and builds a module + // named "_testsinglephase". + // + // CPython: Modules/_testsinglephase.c:537 PyInit__testsinglephase_basic_wrapper + imp.RegisterExtModule(&imp.ExtModuleDef{ + Name: "_testsinglephase_basic_wrapper", + SinglePhase: true, + MSize: -1, + DefName: "_testsinglephase", + ShareDefWith: "_testsinglephase", + Init: func() (*objects.Module, error) { return buildBasic("_testsinglephase") }, + }) + // PyInit__testsinglephase_basic_copy has its own def but shares the basic + // methods and the process-global state. + // + // CPython: Modules/_testsinglephase.c:544 PyInit__testsinglephase_basic_copy + imp.RegisterExtModule(&imp.ExtModuleDef{ + Name: "_testsinglephase_basic_copy", + SinglePhase: true, + MSize: -1, + Init: func() (*objects.Module, error) { return buildBasic("_testsinglephase_basic_copy") }, + }) + // CPython: Modules/_testsinglephase.c:582 PyInit__testsinglephase_with_reinit + imp.RegisterExtModule(&imp.ExtModuleDef{ + Name: "_testsinglephase_with_reinit", + SinglePhase: true, + MSize: 0, + Init: func() (*objects.Module, error) { return buildStateful("_testsinglephase_with_reinit", vReinit) }, + }) + // CPython: Modules/_testsinglephase.c:659 PyInit__testsinglephase_with_state + imp.RegisterExtModule(&imp.ExtModuleDef{ + Name: "_testsinglephase_with_state", + SinglePhase: true, + MSize: 42, + Init: func() (*objects.Module, error) { return buildStateful("_testsinglephase_with_state", vWithState) }, + }) + // The *_check_cache_first variants return PyState_FindModule(def) before + // creating a fresh module and are never recorded in the extensions cache. + // + // CPython: Modules/_testsinglephase.c:704 _check_cache_first modules + for _, cc := range []struct { + name string + mSize int + }{ + {"_testsinglephase_check_cache_first", -1}, + {"_testsinglephase_with_reinit_check_cache_first", 0}, + {"_testsinglephase_with_state_check_cache_first", 42}, + } { + name := cc.name + imp.RegisterExtModule(&imp.ExtModuleDef{ + Name: name, + SinglePhase: true, + MSize: cc.mSize, + CheckCacheFirst: true, + Init: func() (*objects.Module, error) { return buildCheckCacheFirst(name) }, + }) + } + // CPython: Modules/_testsinglephase.c:805 PyInit__testsinglephase_raise_exception + imp.RegisterExtModule(&imp.ExtModuleDef{ + Name: "_testsinglephase_raise_exception", + SinglePhase: true, + MSize: -1, + Init: raiseException, + }) + // _testsinglephase_circular manages its own static cache (a process-global + // pointer) and imports a helper module from PyInit before adding itself to + // sys.modules, the gh-123950 circular-import fixture. Its def leaves m_size + // unset, so it is the reinit (m_size == 0) kind. + // + // CPython: Modules/_testsinglephase.c:780 PyInit__testsinglephase_circular + imp.RegisterExtModule(&imp.ExtModuleDef{ + Name: "_testsinglephase_circular", + SinglePhase: true, + MSize: 0, + Init: buildCircular, + }) +} + +// errorType is the _testsinglephase.error exception the module installs. +// +// CPython: Modules/_testsinglephase.c:303 PyErr_NewException("_testsinglephase.error") +var errorType = pyerrors.NewExcType("_testsinglephase.error", []*objects.Type{pyerrors.PyExc_Exception}) + +// moduleState mirrors the C module_state: the time the state was +// initialized. A zero initialized time means uninitialized. +// +// CPython: Modules/_testsinglephase.c:174 module_state +type moduleState struct { + initialized time.Time +} + +// notInitialized is global_state.initialized_count's sentinel value before +// the basic module is loaded or after _clear_globals. +// +// CPython: Modules/_testsinglephase.c:229 NOT_INITIALIZED +const notInitialized = -1 + +// globalState mirrors the C global_state shared by the basic module and its +// variants across (sub)interpreters: an initialized count and a single +// module_state. +// +// CPython: Modules/_testsinglephase.c:185 global_state +var globalState = struct { + mu sync.Mutex + initializedCount int64 + module moduleState +}{initializedCount: notInitialized} + +func secondsSinceEpoch(t time.Time) float64 { + if t.IsZero() { + return 0 + } + return float64(t.UnixNano()) / 1e9 +} + +// commonSum ports common_sum: sum(i, j) returns i + j. +// +// CPython: Modules/_testsinglephase.c:396 common_sum +func commonSum(args []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + if len(args) != 2 { + return nil, fmt.Errorf("TypeError: sum() takes exactly 2 arguments (%d given)", len(args)) + } + i, ok := args[0].(*objects.Int) + if !ok { + return nil, fmt.Errorf("TypeError: an integer is required (got type %s)", args[0].Type().Name) + } + j, ok := args[1].(*objects.Int) + if !ok { + return nil, fmt.Errorf("TypeError: an integer is required (got type %s)", args[1].Type().Name) + } + iv, _ := i.Int64() + jv, _ := j.Int64() + return objects.NewInt(iv + jv), nil +} + +// raiseException is PyInit__testsinglephase_raise_exception: it sets +// RuntimeError("evil") and returns NULL, the gh-144601 fixture for a +// PyInit that fails. +// +// CPython: Modules/_testsinglephase.c:805 PyInit__testsinglephase_raise_exception +func raiseException() (*objects.Module, error) { + exc := pyerrors.New(pyerrors.PyExc_RuntimeError, objects.NewTuple([]objects.Object{objects.NewStr("evil")})) + return nil, objects.NewRaisedError(exc, "") +} + +// installCommon installs the methods and constants every variant shares: +// look_up_self, sum, state_initialized, plus the error type and the +// int_const / str_const / _module_initialized attributes init_module sets. +// +// CPython: Modules/_testsinglephase.c:325 init_module +func installCommon(m *objects.Module, st *moduleState, hasState bool) error { + d := m.Dict() + methods := []struct { + name string + fn func([]objects.Object, map[string]objects.Object) (objects.Object, error) + }{ + {"look_up_self", func([]objects.Object, map[string]objects.Object) (objects.Object, error) { + return imp.ModuleSelf(m), nil + }}, + {"sum", commonSum}, + {"state_initialized", func([]objects.Object, map[string]objects.Object) (objects.Object, error) { + // common_state_initialized returns None when the module has no + // state (m_size == 0); otherwise the seconds-since-epoch the + // state was initialized (0.0 once cleared). + if !hasState { + return objects.None(), nil + } + return objects.NewFloat(secondsSinceEpoch(st.initialized)), nil + }}, + } + for _, mm := range methods { + if err := d.SetItem(objects.NewStr(mm.name), objects.NewBuiltinFunction(mm.name, mm.fn)); err != nil { + return err + } + } + // CPython: Modules/_testsinglephase.c:303 state->error + if err := d.SetItem(objects.NewStr("error"), errorType); err != nil { + return err + } + // CPython: Modules/_testsinglephase.c:308 state->int_const 1969 + if err := d.SetItem(objects.NewStr("int_const"), objects.NewInt(1969)); err != nil { + return err + } + // CPython: Modules/_testsinglephase.c:313 state->str_const + if err := d.SetItem(objects.NewStr("str_const"), objects.NewStr("something different")); err != nil { + return err + } + // CPython: Modules/_testsinglephase.c:338 _module_initialized + if err := d.SetItem(objects.NewStr("_module_initialized"), objects.NewFloat(secondsSinceEpoch(st.initialized))); err != nil { + return err + } + return nil +} + +// buildBasic ports init__testsinglephase_basic: the basic module shares the +// process-global module_state and bumps the global initialized_count. +// state_initialized reads that shared state, so it returns 0.0 (not None) +// once the globals are cleared. +// +// CPython: Modules/_testsinglephase.c:497 init__testsinglephase_basic +func buildBasic(defName string) (*objects.Module, error) { + globalState.mu.Lock() + if globalState.initializedCount == notInitialized { + globalState.initializedCount = 0 + } + // clear_state then init_state: stamp the global state's initialized time. + globalState.module.initialized = time.Now() + st := globalState.module + globalState.initializedCount++ + globalState.mu.Unlock() + + m := objects.NewModule(defName) + // state_initialized must read the live global state, not a copy, so the + // closure captures &globalState.module. + if err := installCommon(m, &globalState.module, true); err != nil { + return nil, err + } + // Re-stamp _module_initialized from the snapshot taken under the lock so + // it matches state_initialized at load time. + if err := m.Dict().SetItem(objects.NewStr("_module_initialized"), objects.NewFloat(secondsSinceEpoch(st.initialized))); err != nil { + return nil, err + } + d := m.Dict() + if err := d.SetItem(objects.NewStr("initialized_count"), objects.NewBuiltinFunction("initialized_count", basicInitializedCount)); err != nil { + return nil, err + } + if err := d.SetItem(objects.NewStr("_clear_globals"), objects.NewBuiltinFunction("_clear_globals", basicClearGlobals)); err != nil { + return nil, err + } + return m, nil +} + +// basicInitializedCount ports basic_initialized_count. +// +// CPython: Modules/_testsinglephase.c:416 basic_initialized_count +func basicInitializedCount(_ []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + globalState.mu.Lock() + defer globalState.mu.Unlock() + return objects.NewInt(globalState.initializedCount), nil +} + +// basicClearGlobals ports basic__clear_globals -> clear_global_state: it +// clears the shared module_state and resets initialized_count to +// NOT_INITIALIZED (-1). +// +// CPython: Modules/_testsinglephase.c:434 basic__clear_globals +// +// (Modules/_testsinglephase.c:197 clear_global_state) +func basicClearGlobals(_ []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + globalState.mu.Lock() + defer globalState.mu.Unlock() + globalState.module.initialized = time.Time{} + globalState.initializedCount = notInitialized + return objects.None(), nil +} + +// buildStateful ports the with_reinit (m_size == 0) and with_state +// (m_size > 0) variants: each load runs init fresh against a state that is +// not the process-global one. A reinit module has no readable state +// (state_initialized returns None); a with_state module reads its own. +// +// CPython: Modules/_testsinglephase.c:582 / 659 +func buildStateful(name string, v variant) (*objects.Module, error) { + st := &moduleState{initialized: time.Now()} + m := objects.NewModule(name) + hasState := v == vWithState + if err := installCommon(m, st, hasState); err != nil { + return nil, err + } + if v == vWithState { + // _clear_module_state clears the per-module state. + // + // CPython: Modules/_testsinglephase.c:452 basic__clear_module_state + if err := m.Dict().SetItem(objects.NewStr("_clear_module_state"), + objects.NewBuiltinFunction("_clear_module_state", func([]objects.Object, map[string]objects.Object) (objects.Object, error) { + st.initialized = time.Time{} + return objects.None(), nil + })); err != nil { + return nil, err + } + } + return m, nil +} + +// staticModuleCircular mirrors the C static_module_circular pointer: the +// _testsinglephase_circular module caches itself in a process-global so a +// re-entrant PyInit (the circular import) reuses the same partially built +// object. clear_static_var resets it. +// +// CPython: Modules/_testsinglephase.c:758 static_module_circular +var staticModuleCircular *objects.Module + +// circularHelperName is the module PyInit imports before returning, the +// half of the cycle that imports _testsinglephase_circular back again. +// +// CPython: Modules/_testsinglephase.c:788 helper_mod_name +const circularHelperName = "test.test_import.data.circular_imports.singlephase" + +// buildCircular ports PyInit__testsinglephase_circular: it lazily builds the +// module into a static pointer, imports the helper module (which re-imports +// this module before it is in sys.modules), then records helper_mod_name and +// returns the cached object. +// +// CPython: Modules/_testsinglephase.c:780 PyInit__testsinglephase_circular +func buildCircular() (*objects.Module, error) { + if staticModuleCircular == nil { + m := objects.NewModule("_testsinglephase_circular") + // CPython: Modules/_testsinglephase.c:761 circularmod_clear_static_var + if err := m.Dict().SetItem(objects.NewStr("clear_static_var"), + objects.NewBuiltinFunction("clear_static_var", func([]objects.Object, map[string]objects.Object) (objects.Object, error) { + result := staticModuleCircular + staticModuleCircular = nil + if result == nil { + return objects.None(), nil + } + return result, nil + })); err != nil { + return nil, err + } + staticModuleCircular = m + } + if objects.ImportModuleHook == nil { + return nil, fmt.Errorf("ImportError: import machinery unavailable") + } + if _, err := objects.ImportModuleHook(circularHelperName); err != nil { + return nil, err + } + // CPython: Modules/_testsinglephase.c:795 PyModule_AddStringConstant + if err := staticModuleCircular.Dict().SetItem(objects.NewStr("helper_mod_name"), objects.NewStr(circularHelperName)); err != nil { + return nil, err + } + return staticModuleCircular, nil +} + +// buildCheckCacheFirst ports the *_check_cache_first PyInit functions, which +// only ever load fresh: a bare module carrying its own name. +// +// CPython: Modules/_testsinglephase.c:704 _check_cache_first modules +func buildCheckCacheFirst(name string) (*objects.Module, error) { + return objects.NewModule(name), nil +} diff --git a/module/_thread/excepthook.go b/module/_thread/excepthook.go new file mode 100644 index 000000000..7f5aa9247 --- /dev/null +++ b/module/_thread/excepthook.go @@ -0,0 +1,167 @@ +// _thread._excepthook and the _ExceptHookArgs struct-sequence back +// threading.excepthook. When a Thread.run() lets an exception escape, +// threading._bootstrap_inner builds an _ExceptHookArgs(exc_type, +// exc_value, exc_traceback, thread) and hands it to threading.excepthook, +// whose C default is _thread._excepthook. The default prints +// "Exception in thread {name}:" followed by the traceback to the thread's +// stderr. +// +// CPython: Modules/_threadmodule.c:2275 thread_excepthook +package _thread + +import ( + "fmt" + + "github.com/tamnd/gopy/errors" + "github.com/tamnd/gopy/imp" + "github.com/tamnd/gopy/objects" +) + +// exceptHookArgsType is the _thread._ExceptHookArgs struct-sequence type. +// +// CPython: Modules/_threadmodule.c:2266 ExceptHookArgs_desc +var exceptHookArgsType = objects.NewStructSeqTypeDesc(objects.StructSeqDesc{ + Name: "_thread._ExceptHookArgs", + Fields: []objects.StructSeqField{ + {Name: "exc_type", Doc: "Exception type"}, + {Name: "exc_value", Doc: "Exception value"}, + {Name: "exc_traceback", Doc: "Exception traceback"}, + {Name: "thread", Doc: "Thread"}, + }, + NInSequence: 4, +}) + +// threadExceptHook is the default threading.excepthook. It expects a +// single _ExceptHookArgs and reports the uncaught thread exception. +// +// CPython: Modules/_threadmodule.c:2275 thread_excepthook +func threadExceptHook(args []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + if len(args) != 1 { + return nil, fmt.Errorf("TypeError: _thread._excepthook expected 1 argument") + } + hookArgs, ok := args[0].(*objects.StructSeq) + if !ok || hookArgs.Type() != exceptHookArgsType { + return nil, fmt.Errorf( + "TypeError: _thread.excepthook argument type must be ExceptHookArgs") + } + items := hookArgs.Items() + + // Silently ignore SystemExit, matching the C default. + excType := items[0] + if t, ok := excType.(*objects.Type); ok && objects.IsSubtype(t, errors.PyExc_SystemExit) { + return objects.None(), nil + } + + excValue := items[1] + thread := items[3] + + // Resolve the destination stream: sys.stderr, else thread._stderr. + // + // CPython: Modules/_threadmodule.c:2298 _PySys_GetOptionalAttr(stderr) + file := optionalSysStderr() + if file == nil || file == objects.None() { + if thread == objects.None() { + return objects.None(), nil + } + f, err := objects.GetAttr(thread, objects.NewStr("_stderr")) + if err != nil { + return nil, err + } + if f == objects.None() { + return objects.None(), nil + } + file = f + } + + if err := threadExceptHookFile(file, excValue, thread); err != nil { + return nil, err + } + return objects.None(), nil +} + +// threadExceptHookFile writes the thread name header and the traceback to +// file, then flushes it. +// +// CPython: Modules/_threadmodule.c:2197 thread_excepthook_file +func threadExceptHookFile(file, excValue, thread objects.Object) error { + if err := fileWriteString(file, "Exception in thread "); err != nil { + return err + } + + name := objects.Object(nil) + if thread != objects.None() { + n, err := objects.GetAttr(thread, objects.NewStr("name")) + if err == nil { + name = n + } + } + if name != nil { + s, err := objects.Str(name) + if err != nil { + return err + } + if err := fileWriteString(file, s); err != nil { + return err + } + } else { + if err := fileWriteString(file, ""); err != nil { + return err + } + } + + if err := fileWriteString(file, ":\n"); err != nil { + return err + } + + // Display the traceback through the same formatter sys.excepthook uses. + // + // CPython: Modules/_threadmodule.c:2241 _PyErr_Display + text := "" + if exc, ok := excValue.(*errors.Exception); ok { + text = errors.FormatException(exc) + } else { + repr, err := objects.Str(excValue) + if err == nil { + text = excValue.Type().Name + ": " + repr + "\n" + } else { + text = excValue.Type().Name + "\n" + } + } + if err := fileWriteString(file, text); err != nil { + return err + } + + // file.flush(), best effort. + if flush, err := objects.GetAttr(file, objects.NewStr("flush")); err == nil { + _, _ = objects.Call(flush, objects.NewTuple(nil), nil) + } + return nil +} + +// fileWriteString writes s through file.write, mirroring PyFile_WriteString. +// +// CPython: Objects/fileobject.c PyFile_WriteString +func fileWriteString(file objects.Object, s string) error { + write, err := objects.GetAttr(file, objects.NewStr("write")) + if err != nil { + return err + } + _, err = objects.Call(write, objects.NewTuple([]objects.Object{objects.NewStr(s)}), nil) + return err +} + +// optionalSysStderr returns sys.stderr, or nil if sys or its stderr is +// unavailable. +// +// CPython: Python/sysmodule.c _PySys_GetOptionalAttr +func optionalSysStderr() objects.Object { + sysMod, ok := imp.GetModule("sys") + if !ok { + return nil + } + f, err := objects.GetAttr(sysMod, objects.NewStr("stderr")) + if err != nil { + return nil + } + return f +} diff --git a/module/_thread/module.go b/module/_thread/module.go index d676101f9..422efa688 100644 --- a/module/_thread/module.go +++ b/module/_thread/module.go @@ -50,6 +50,7 @@ func buildModule() (*objects.Module, error) { {"_is_main_interpreter", threadIsMainInterpreter}, {"stack_size", threadStackSize}, {"_count", threadCount}, + {"_excepthook", threadExceptHook}, } for _, e := range entries { bf := objects.NewBuiltinFunction(e.name, e.fn) @@ -97,6 +98,13 @@ func buildModule() (*objects.Module, error) { return nil, err } + // _ExceptHookArgs: struct-sequence threading.excepthook receives. + // + // CPython: Modules/_threadmodule.c:2710 PyStructSequence_NewType + if err := d.SetItem(objects.NewStr("_ExceptHookArgs"), exceptHookArgsType); err != nil { + return nil, err + } + // error is the module-level exception class. errCls := objects.NewType("_thread.error", []*objects.Type{objects.ObjectType()}) if err := d.SetItem(objects.NewStr("error"), errCls); err != nil { @@ -491,9 +499,21 @@ func threadStartNewThread(args []objects.Object, kwargs map[string]objects.Objec go func() { defer atomic.AddInt64(&activeThreadCount, -1) if enter != nil { + // The identity is already known synchronously (the spawn hook + // returned it on the parent goroutine), so hand it back before + // enter() takes the GIL. enter() blocks until the GIL is free, + // and the parent is the holder: it only releases the lock once + // it returns from start_new_thread and later blocks (on a join, + // lock, or sleep) through Py_BEGIN_ALLOW_THREADS. Sending the id + // first lets the parent get that far instead of deadlocking + // against a child that cannot publish its id until it owns a GIL + // the parent still holds. + // + // CPython: Modules/_threadmodule.c:1166 thread_PyThread_start_new_thread + // returns the ident before the bootstrap thread runs. + idCh <- ident enter() defer leave() - idCh <- ident } else { idCh <- goid() } diff --git a/module/_thread/rlock.go b/module/_thread/rlock.go index f4a162ef2..ffae1ca09 100644 --- a/module/_thread/rlock.go +++ b/module/_thread/rlock.go @@ -213,7 +213,15 @@ func rlockAcquire(r *rlockObject, args []objects.Object, kwargs map[string]objec } if timeoutSecs < 0 { - r.gate.Lock() + // Block indefinitely on the gate. Drop the GIL while parked so the + // owning thread can run far enough to release the gate; holding the + // GIL here would deadlock the owner (and any other Python thread) + // against this goroutine. lockAcquire does the same for non-reentrant + // locks. + // + // CPython: Modules/_threadmodule.c:1083 rlock_acquire releases the GIL + // (ACQUIRE_LOCK runs under Py_BEGIN_ALLOW_THREADS) + objects.AllowThreads(func() { r.gate.Lock() }) r.mu.Lock() r.owner = me r.count = 1 @@ -222,19 +230,27 @@ func rlockAcquire(r *rlockObject, args []objects.Object, kwargs map[string]objec } deadline := time.Now().Add(time.Duration(timeoutSecs * float64(time.Second))) - for { - if r.gate.TryLock() { - r.mu.Lock() - r.owner = me - r.count = 1 - r.mu.Unlock() - return objects.True(), nil - } - if time.Now().After(deadline) { - return objects.False(), nil + acquired := false + objects.AllowThreads(func() { + for { + if r.gate.TryLock() { + acquired = true + return + } + if time.Now().After(deadline) { + return + } + time.Sleep(100 * time.Microsecond) } - time.Sleep(100 * time.Microsecond) + }) + if acquired { + r.mu.Lock() + r.owner = me + r.count = 1 + r.mu.Unlock() + return objects.True(), nil } + return objects.False(), nil } // rlockRelease decrements the recursion counter; when it hits zero the diff --git a/module/_time/module.go b/module/_time/module.go index 685b82e41..498e61790 100644 --- a/module/_time/module.go +++ b/module/_time/module.go @@ -355,7 +355,11 @@ func sleep(args []objects.Object, _ map[string]objects.Object) (objects.Object, return nil, fmt.Errorf("ValueError: sleep length must be non-negative") } if secs > 0 { - gotime.Sleep(gotime.Duration(secs * float64(gotime.Second))) + // Release the GIL while parked so other Python threads run. + // CPython: Modules/timemodule.c:394 time_sleep (Py_BEGIN_ALLOW_THREADS) + objects.AllowThreads(func() { + gotime.Sleep(gotime.Duration(secs * float64(gotime.Second))) + }) } return objects.None(), nil } diff --git a/module/_warnings/lexer.go b/module/_warnings/lexer.go index 17afdec07..d08f8df81 100644 --- a/module/_warnings/lexer.go +++ b/module/_warnings/lexer.go @@ -5,9 +5,10 @@ import ( "github.com/tamnd/gopy/compile" "github.com/tamnd/gopy/errors" + "github.com/tamnd/gopy/imp" "github.com/tamnd/gopy/objects" - "github.com/tamnd/gopy/parser/lexer" parsererrors "github.com/tamnd/gopy/parser/errors" + "github.com/tamnd/gopy/parser/lexer" ) // init wires the package-level hooks in parser/lexer and compile so @@ -30,6 +31,10 @@ func init() { // CPython: Objects/typeobject.c:4667 PyErr_WarnFormat(..., 1, ...) return WarnUnicode(errors.PyExc_RuntimeWarning, msg, 1, nil) } + imp.ImportWarnHook = func(msg string) error { + // CPython: Lib/importlib/_bootstrap.py:1353 _warnings.warn(msg, ImportWarning) + return WarnUnicode(errors.PyExc_ImportWarning, msg, 1, nil) + } } // FlushLexerWarnings posts every SyntaxWarning-class diagnostic the diff --git a/module/contextvars/context.go b/module/contextvars/context.go index 9c537d467..6e13d1525 100644 --- a/module/contextvars/context.go +++ b/module/contextvars/context.go @@ -36,15 +36,32 @@ func NewContext() *Context { } // newContextFromVars wraps vars in a freshly-allocated Context. Used -// by Copy and CopyCurrent. +// by Copy and CopyCurrent. The Context takes its own reference on the +// shared HAMT (Py_NewRef), so two contexts sharing a HAMT version each +// own +1 and the version survives until both release it. // // CPython: Python/context.c:458 context_new_from_vars func newContextFromVars(vars *hamt.Hamt) *Context { c := &Context{vars: vars} c.Init(ContextType) + objects.Incref(vars) return c } +// setVars replaces c.vars with the owned reference newVars, dropping +// the reference the context held on the prior HAMT version. The store +// happens before the decref so a shared version cannot be torn down +// mid-swap. +// +// CPython: Python/context.c:795 Py_SETREF(ctx->ctx_vars, new_vars) +func (c *Context) setVars(newVars *hamt.Hamt) { + old := c.vars + c.vars = newVars + if old != nil { + objects.Decref(old) + } +} + // Copy returns a shallow snapshot of c. The new Context shares the // HAMT with c (the HAMT is immutable, so sharing is safe) but starts // with prev=nil and entered=false. @@ -167,6 +184,32 @@ func contextRepr(c *Context) string { return r } +// contextDealloc releases the reference the context holds on its HAMT. +// prev is a borrowed back-link onto the thread's context stack (gopy's +// ts.SetContext is non-owning, mirroring Go's tracing GC), so it is not +// refcounted here. +// +// CPython: Python/context.c:495 context_tp_dealloc +func contextDealloc(o objects.Object) { + c := o.(*Context) + if c.vars != nil { + objects.Decref(c.vars) + c.vars = nil + } +} + +// contextTraverse visits the HAMT for the cyclic collector. prev is a +// borrowed back-link (see contextDealloc) so it is not visited. +// +// CPython: Python/context.c:481 context_tp_traverse +func contextTraverse(o objects.Object, visit objects.Visitor) error { + c := o.(*Context) + if c.vars != nil { + return visit(c.vars) + } + return nil +} + // Sentinels returned by Enter / Exit alongside the SetException // call. The VM-facing surface uses errors.Occurred to recover the // real Python exception; these Go values just signal "an exception diff --git a/module/contextvars/context_methods.go b/module/contextvars/context_methods.go index 02343494e..99ae5eae0 100644 --- a/module/contextvars/context_methods.go +++ b/module/contextvars/context_methods.go @@ -242,8 +242,15 @@ func contextGetMethod(c *Context, args []objects.Object, kwargs map[string]objec return nil, err } if !found { - return def, nil + val = def } + // Context.get returns a new reference, mirroring PyContextVar_Get: + // the value is borrowed from the HAMT (or the supplied default), so + // hand back an owned ref the VM can pop and decref without driving a + // HAMT-only value below its true refcount. + // + // CPython: Python/context.c:946 _contextvars_ContextVar_get_impl + objects.Incref(val) return val, nil } @@ -281,6 +288,11 @@ func contextSubscript(o, key objects.Object) (objects.Object, error) { } return nil, fmt.Errorf("KeyError") } + // ctx[var] returns a new reference: the value is borrowed from the + // HAMT, so the result the VM pops and decrefs must carry its own +1. + // + // CPython: Python/context.c:585 context_tp_subscript + objects.Incref(val) return val, nil } diff --git a/module/contextvars/contextvar.go b/module/contextvars/contextvar.go index ace78540b..3b5656515 100644 --- a/module/contextvars/contextvar.go +++ b/module/contextvars/contextvar.go @@ -52,10 +52,38 @@ func NewContextVar(name string, defaultVal objects.Object, hasDefault bool) *Con hasDefault: hasDefault, } cv.Init(ContextVarType) + if hasDefault && defaultVal != nil { + objects.Incref(defaultVal) // the ContextVar owns its default (Py_XNewRef) + } cv.hash = generateHash(unsafe.Pointer(cv), name) return cv } +// contextVarDealloc releases the default value reference. The cache +// slot is a borrowed reference (validated by tsid/version, never +// incref'd) so it is not released here. +// +// CPython: Python/context.c:945 contextvar_tp_dealloc +func contextVarDealloc(o objects.Object) { + cv := o.(*ContextVar) + if cv.hasDefault && cv.defaultVal != nil { + objects.Decref(cv.defaultVal) + cv.defaultVal = nil + } +} + +// contextVarTraverse visits the default value for the cyclic +// collector. +// +// CPython: Python/context.c:935 contextvar_tp_traverse +func contextVarTraverse(o objects.Object, visit objects.Visitor) error { + cv := o.(*ContextVar) + if cv.hasDefault && cv.defaultVal != nil { + return visit(cv.defaultVal) + } + return nil +} + // Name returns the variable's name as a Go string. func (cv *ContextVar) Name() string { return cv.name } @@ -198,7 +226,7 @@ func (cv *ContextVar) Set(ts *state.Thread, val objects.Object) (*Token, error) if err != nil { return nil, err } - ctx.vars = newVars + ctx.setVars(newVars) cv.cacheStore(ts, val) return tok, nil @@ -238,14 +266,14 @@ func (cv *ContextVar) Reset(ts *state.Thread, tok *Token) error { errors.SetString(ts, errors.PyExc_LookupError, cv.name) return errLookup } - ctx.vars = newVars + ctx.setVars(newVars) return nil } newVars, err := ctx.vars.Assoc(cv, tok.oldVal) if err != nil { return err } - ctx.vars = newVars + ctx.setVars(newVars) return nil } diff --git a/module/contextvars/methods.go b/module/contextvars/methods.go index fd21ab35f..f776564ea 100644 --- a/module/contextvars/methods.go +++ b/module/contextvars/methods.go @@ -124,10 +124,24 @@ func contextVarGet(cv *ContextVar, args []objects.Object, kwargs map[string]obje if err != nil { return nil, err } + var val objects.Object if len(args) == 1 { - return cv.GetWithDefault(ts, args[0]) + val, err = cv.GetWithDefault(ts, args[0]) + } else { + val, err = cv.Get(ts) } - return cv.Get(ts) + if err != nil { + return nil, err + } + // PyContextVar_Get returns a new reference: the value is borrowed + // from the HAMT (or is the constructor/argument default), and the + // caller frame owns the result the VM later pops and decrefs. Hand + // back an owned ref so a value held only by the context HAMT is not + // driven below its true refcount by repeated lookups. + // + // CPython: Python/context.c:309 PyContextVar_Get (Py_INCREF(*value)) + objects.Incref(val) + return val, nil } // contextVarSet ports ContextVar.set(value). diff --git a/module/contextvars/token.go b/module/contextvars/token.go index b61c90c7a..2ca47a3af 100644 --- a/module/contextvars/token.go +++ b/module/contextvars/token.go @@ -54,15 +54,71 @@ func init() { TokenType.Hash = func(objects.Object) (int64, error) { return 0, fmt.Errorf("TypeError: unhashable type: 'Token'") } + TokenType.Dealloc = tokenDealloc + TokenType.TpTraverse = tokenTraverse TokenType.TpFlags &^= objects.TpFlagBasetype } +// newToken records the binding to undo. The token owns +1 on the +// context, the variable, and the prior value, mirroring CPython's +// Py_NewRef(ctx) / Py_NewRef(var) / Py_XNewRef(val). The reference on +// oldVal is essential: the cv.Set that mints the token replaces oldVal +// in the HAMT immediately, so without this hold the old value could +// drop to refcount 0 before Reset replays it. +// +// CPython: Python/context.c:1135 token_new func newToken(ctx *Context, cv *ContextVar, oldVal objects.Object, hadOld bool) *Token { t := &Token{ctx: ctx, cv: cv, oldVal: oldVal, hadOld: hadOld} t.Init(TokenType) + objects.Incref(ctx) + objects.Incref(cv) + if oldVal != nil { + objects.Incref(oldVal) + } return t } +// tokenDealloc releases the references the token holds. +// +// CPython: Python/context.c:1147 token_tp_dealloc +func tokenDealloc(o objects.Object) { + t := o.(*Token) + if t.ctx != nil { + objects.Decref(t.ctx) + t.ctx = nil + } + if t.cv != nil { + objects.Decref(t.cv) + t.cv = nil + } + if t.oldVal != nil { + objects.Decref(t.oldVal) + t.oldVal = nil + } +} + +// tokenTraverse visits the token's held references for the cyclic +// collector. +// +// CPython: Python/context.c:1153 token_tp_traverse +func tokenTraverse(o objects.Object, visit objects.Visitor) error { + t := o.(*Token) + if t.ctx != nil { + if err := visit(t.ctx); err != nil { + return err + } + } + if t.cv != nil { + if err := visit(t.cv); err != nil { + return err + } + } + if t.oldVal != nil { + return visit(t.oldVal) + } + return nil +} + // Var returns the ContextVar this token was minted by. // // CPython: Python/context.c:1198 token_get_var diff --git a/module/contextvars/types.go b/module/contextvars/types.go index 87390d164..ff65a651e 100644 --- a/module/contextvars/types.go +++ b/module/contextvars/types.go @@ -23,10 +23,19 @@ var ( var tokenMissingType = objects.NewType("Token.MISSING", []*objects.Type{objects.ObjectType()}) func init() { + // Context owns +1 on its HAMT; release it on dealloc and expose it + // to the cyclic collector. + // + // CPython: Python/context.c:495 context_tp_dealloc / context_tp_traverse + ContextType.Dealloc = contextDealloc + ContextType.TpTraverse = contextTraverse + // ContextVar and Token are subscriptable via __class_getitem__. // CPython: Python/context.c contextvar_methods / token_methods objects.BindClassGetitem(ContextVarType) objects.BindClassGetitem(TokenType) + ContextVarType.Dealloc = contextVarDealloc + ContextVarType.TpTraverse = contextVarTraverse ContextVarType.Hash = func(o objects.Object) (int64, error) { return o.(*ContextVar).hash, nil } diff --git a/module/errno/entries_windows.go b/module/errno/entries_windows.go index d498916b5..53cd0e863 100644 --- a/module/errno/entries_windows.go +++ b/module/errno/entries_windows.go @@ -1,56 +1,99 @@ -// Windows errno table. The Windows C runtime defines a smaller subset -// of POSIX E* codes than Linux; this list mirrors the ones exposed by -// Go's syscall package on windows/amd64. +// Windows errno table. CPython's errno module on Windows exposes the +// codes the Universal CRT's defines, with the CRT's small +// POSIX-style values (EEXIST == 17), NOT Go's syscall package, which +// fabricates E* constants as 1<<29+iota on Windows. Hard-code the CRT +// values so errno.EEXIST and friends match CPython exactly and line up +// with the winerror->errno translation the VM applies to OSError. // -// CPython: Modules/errnomodule.c:121 add_errcode block (MS_WINDOWS arms) +// CPython: Modules/errnomodule.c add_errcode block (values from ucrt +// ; the classic 1-42 range plus the POSIX-2008 100-140 range) package errno -import "syscall" - // errnoEntries returns every (name, code) pair the errno module exposes // on Windows. // // CPython: Modules/errnomodule.c:88 errno_exec (Windows slice) func errnoEntries() []errnoEntry { return []errnoEntry{ - {"EPERM", int(syscall.EPERM)}, - {"ENOENT", int(syscall.ENOENT)}, - {"ESRCH", int(syscall.ESRCH)}, - {"EINTR", int(syscall.EINTR)}, - {"EIO", int(syscall.EIO)}, - {"ENXIO", int(syscall.ENXIO)}, - {"E2BIG", int(syscall.E2BIG)}, - {"ENOEXEC", int(syscall.ENOEXEC)}, - {"EBADF", int(syscall.EBADF)}, - {"ECHILD", int(syscall.ECHILD)}, - {"EAGAIN", int(syscall.EAGAIN)}, - {"ENOMEM", int(syscall.ENOMEM)}, - {"EACCES", int(syscall.EACCES)}, - {"EFAULT", int(syscall.EFAULT)}, - {"EBUSY", int(syscall.EBUSY)}, - {"EEXIST", int(syscall.EEXIST)}, - {"EXDEV", int(syscall.EXDEV)}, - {"ENODEV", int(syscall.ENODEV)}, - {"ENOTDIR", int(syscall.ENOTDIR)}, - {"EISDIR", int(syscall.EISDIR)}, - {"EINVAL", int(syscall.EINVAL)}, - {"ENFILE", int(syscall.ENFILE)}, - {"EMFILE", int(syscall.EMFILE)}, - {"ENOTTY", int(syscall.ENOTTY)}, - {"EFBIG", int(syscall.EFBIG)}, - {"ENOSPC", int(syscall.ENOSPC)}, - {"ESPIPE", int(syscall.ESPIPE)}, - {"EROFS", int(syscall.EROFS)}, - {"EMLINK", int(syscall.EMLINK)}, - {"EPIPE", int(syscall.EPIPE)}, - {"EDOM", int(syscall.EDOM)}, - {"ERANGE", int(syscall.ERANGE)}, - {"EDEADLK", int(syscall.EDEADLK)}, - {"ENAMETOOLONG", int(syscall.ENAMETOOLONG)}, - {"ENOLCK", int(syscall.ENOLCK)}, - {"ENOSYS", int(syscall.ENOSYS)}, - {"ENOTEMPTY", int(syscall.ENOTEMPTY)}, - {"EILSEQ", int(syscall.EILSEQ)}, + {"EPERM", 1}, + {"ENOENT", 2}, + {"ESRCH", 3}, + {"EINTR", 4}, + {"EIO", 5}, + {"ENXIO", 6}, + {"E2BIG", 7}, + {"ENOEXEC", 8}, + {"EBADF", 9}, + {"ECHILD", 10}, + {"EAGAIN", 11}, + {"ENOMEM", 12}, + {"EACCES", 13}, + {"EFAULT", 14}, + {"EBUSY", 16}, + {"EEXIST", 17}, + {"EXDEV", 18}, + {"ENODEV", 19}, + {"ENOTDIR", 20}, + {"EISDIR", 21}, + {"EINVAL", 22}, + {"ENFILE", 23}, + {"EMFILE", 24}, + {"ENOTTY", 25}, + {"EFBIG", 27}, + {"ENOSPC", 28}, + {"ESPIPE", 29}, + {"EROFS", 30}, + {"EMLINK", 31}, + {"EPIPE", 32}, + {"EDOM", 33}, + {"ERANGE", 34}, + {"EDEADLK", 36}, + {"EDEADLOCK", 36}, + {"ENAMETOOLONG", 38}, + {"ENOLCK", 39}, + {"ENOSYS", 40}, + {"ENOTEMPTY", 41}, + {"EILSEQ", 42}, + {"EADDRINUSE", 100}, + {"EADDRNOTAVAIL", 101}, + {"EAFNOSUPPORT", 102}, + {"EALREADY", 103}, + {"EBADMSG", 104}, + {"ECANCELED", 105}, + {"ECONNABORTED", 106}, + {"ECONNREFUSED", 107}, + {"ECONNRESET", 108}, + {"EDESTADDRREQ", 109}, + {"EHOSTUNREACH", 110}, + {"EIDRM", 111}, + {"EINPROGRESS", 112}, + {"EISCONN", 113}, + {"ELOOP", 114}, + {"EMSGSIZE", 115}, + {"ENETDOWN", 116}, + {"ENETRESET", 117}, + {"ENETUNREACH", 118}, + {"ENOBUFS", 119}, + {"ENODATA", 120}, + {"ENOLINK", 121}, + {"ENOMSG", 122}, + {"ENOPROTOOPT", 123}, + {"ENOSR", 124}, + {"ENOSTR", 125}, + {"ENOTCONN", 126}, + {"ENOTRECOVERABLE", 127}, + {"ENOTSOCK", 128}, + {"ENOTSUP", 129}, + {"EOPNOTSUPP", 130}, + {"EOVERFLOW", 132}, + {"EOWNERDEAD", 133}, + {"EPROTO", 134}, + {"EPROTONOSUPPORT", 135}, + {"EPROTOTYPE", 136}, + {"ETIME", 137}, + {"ETIMEDOUT", 138}, + {"ETXTBSY", 139}, + {"EWOULDBLOCK", 140}, } } diff --git a/module/gc/collector.go b/module/gc/collector.go index 720a389ec..6efaa5519 100644 --- a/module/gc/collector.go +++ b/module/gc/collector.go @@ -231,6 +231,16 @@ func collectMain(gen int) (int, []pendingCallback) { if state.debug&DebugSaveAll != 0 && state.garbage != nil { appendGarbage(state.garbage, stillUnreachable) } + // delete_garbage runs tp_clear on the proven-unreachable set. tp_clear + // decrefs members, and a member dropping to zero re-enters Untrack + // (which locks state.mu), so drop the collector mutex across the clear + // pass exactly as the finalize pass above does. state.collecting stays + // set, so maybeAutoCollect will not start a nested collection. + // + // CPython: Python/gc.c:1198 delete_garbage (tp_clear call) + state.mu.Unlock() + clearGarbage(stillUnreachable) + state.mu.Lock() reclaimUnreachable(stillUnreachable, state.tracked, state.finalized) clearUnreachableMask(young) clearAllFreeLists() diff --git a/module/gc/finalize.go b/module/gc/finalize.go index 861df09ab..0650421ad 100644 --- a/module/gc/finalize.go +++ b/module/gc/finalize.go @@ -30,7 +30,22 @@ import ( // // CPython: Python/gc.c:1067 finalize_garbage func finalizeGarbage(unreachable *gcHead, finalizers map[objects.Object]Finalizer, finalized map[objects.Object]struct{}) { + // Snapshot the node order before running any finalizer. A finalizer + // (and, with container tp_clear semantics, the Decref cascade a + // tp_dealloc/tp_clear triggers) can free another object whose type + // carries a Finalize slot, which routes through Decref -> + // GCUntrackHook -> listRemove and unlinks a node mid-list. Walking + // the live linked list across that mutation derefs a stale next + // pointer. Capturing the nodes up front keeps the iteration stable; + // the per-node gcFinalized flag still guards against re-finalizing a + // node the cascade already reclaimed. + // + // CPython: Python/gc.c:1067 finalize_garbage (gc_list_init snapshot) + var nodes []*gcHead for g := unreachable.next; g != unreachable; g = g.next { + nodes = append(nodes, g) + } + for _, g := range nodes { if g.flags&gcFinalized != 0 { continue } @@ -38,6 +53,16 @@ func finalizeGarbage(unreachable *gcHead, finalizers map[objects.Object]Finalize if finalized != nil { finalized[g.obj] = struct{}{} } + // Stamp the header's finalized bit too, not just the gc-layer + // flag. PyObject_CallFinalizerFromDealloc sets _PyGC_FINALIZED so + // no later teardown re-runs tp_finalize; gopy's eager Decref path + // checks exactly this header bit. Syncing it here means a member + // decref during the upcoming tp_clear pass (delete_garbage) that + // drops this object to zero will not re-fire __del__. + // + // CPython: Objects/object.c:471 PyObject_CallFinalizerFromDealloc + // (_PyGC_SET_FINALIZED after tp_finalize) + g.obj.Hdr().SetFinalized() if fn, ok := finalizers[g.obj]; ok { delete(finalizers, g.obj) fn(g.obj) @@ -93,6 +118,36 @@ func handleResurrected(unreachable, stillUnreachable *gcHead, tracked map[object return moveUnreachable(unreachable, stillUnreachable, tracked) } +// clearGarbage runs tp_clear on every object the collector has proven +// unreachable, the delete_garbage step that drops each object's held +// references so cycles break and the held values become collectible. +// gopy leaves memory to the Go GC, but the reference drop still matters: +// an instance's __dict__ can pin a value (a module lock, say) whose +// weakref callback must fire once nothing reaches it. Running tp_clear +// only here, after the collector has proven the object unreachable, is +// the sound place for that drop; the eager refcount-zero dealloc path +// deliberately does not clear, so an object the VM under-counts to zero +// while still live is left untouched. +// +// The caller drops state.mu before calling this, exactly as it does for +// finalizeGarbage: tp_clear decrefs members, and a member hitting zero +// routes through Decref -> GCUntrackHook -> Untrack, which locks +// state.mu. The node order is snapshotted up front because that same +// cascade can unlink nodes from the list mid-walk. +// +// CPython: Python/gc.c:1198 delete_garbage (tp_clear call) +func clearGarbage(unreachable *gcHead) { + var nodes []*gcHead + for g := unreachable.next; g != unreachable; g = g.next { + nodes = append(nodes, g) + } + for _, g := range nodes { + if t := g.obj.Type(); t != nil && t.TpClear != nil { + t.TpClear(g.obj) + } + } +} + // reclaimUnreachable drops every entry on unreachable from the // tracked map and unlinks it from the list. Stand-in for CPython's // delete_garbage: in CPython that loop calls tp_clear and decrements diff --git a/module/gc/refs.go b/module/gc/refs.go index 67aaeefa6..5bd9e111a 100644 --- a/module/gc/refs.go +++ b/module/gc/refs.go @@ -115,4 +115,100 @@ func pinRoots(containers *gcHead, tracked map[objects.Object]*gcHead) { } }) } + // Re-float the interpreter-lifetime singletons (sys.modules) that + // gopy holds through a Go pointer. CPython roots these via + // interp->modules; without this the whole module graph collapses to + // gc_refs == 0 and a module global reachable only through its module + // __dict__ is reclaimed while still live. + // + // We do not stop at the direct sys.modules entries. move_unreachable's + // visit_reachable would, in principle, pull in the rest of the + // strongly-reachable closure from the pinned modules. But gopy's + // containers do not Incref what they store (Frame f_funcobj, closure + // cells, instance __dict__, and so on), so subtract_refs over-decrements + // intermediate nodes deep in that closure. The single-level pin then + // leaves a live object reachable only through several under-counted + // hops (for example _frozen_importlib._blocking_on's instance __dict__, + // reached via module -> module __dict__ -> _WeakValueDictionary + // instance -> instance __dict__) exposed to a false reclaim whenever + // the partition order does not happen to resurrect every hop. CPython + // never sees this because every one of those edges carries a counted + // reference. We compensate by marking the entire closure reachable from + // the static roots here, so survival no longer depends on refcount + // accuracy along the chain. + // + // CPython: Python/gc.c:1430 gc_collect_main (interp->modules roots) + if h := objects.GCStaticRootsHook; h != nil { + markReachableClosure(h, tracked) + } +} + +// markReachableClosure pins every candidate strongly reachable from the +// interpreter-lifetime static roots (sys.modules) and floats it to +// refs >= 1 so move_unreachable keeps the whole live module graph. +// +// The walk recurses only through candidates (objects carrying the +// COLLECTING bit, i.e. members of the generations being collected). That +// keeps a young-generation collection cheap: the static roots are mostly +// long-lived modules promoted to the oldest generation, so traversing one +// of them lands immediately on its non-candidate __dict__ and stops. The +// false reclaim this guards against only arises in a full collection, +// where the deep chain (module -> module __dict__ -> _WeakValueDictionary +// instance -> instance __dict__) is entirely on the candidate list and +// gopy's no-Incref-on-store edges (instance __dict__) over-decrement an +// interior node out from under it; there the walk visits every candidate +// once, no worse than update_refs/subtract_refs already do for the same +// collection. +// +// CPython: Python/gc.c:1430 gc_collect_main (interp->modules roots) +func markReachableClosure(h func(pin func(objects.Object)), tracked map[objects.Object]*gcHead) { + var stack []*gcHead + seen := make(map[*gcHead]struct{}) + enqueue := func(o objects.Object) { + if o == nil { + return + } + g, ok := tracked[o] + if !ok || g.flags&gcCollecting == 0 { + // Not a candidate. Its reference to any candidate child is + // external (subtract_refs never scans it), so that child keeps + // its live refcount without our help; no need to recurse. + return + } + if _, dup := seen[g]; dup { + return + } + seen[g] = struct{}{} + if g.refs == 0 { + g.refs = 1 + } + stack = append(stack, g) + } + // Seed from the direct root entries. The root objects themselves may be + // non-candidates (immortal/old modules), so traverse each one level to + // reach its candidate children even when the root is not enqueued. + h(func(root objects.Object) { + if root == nil { + return + } + enqueue(root) + if tr := root.Type().TpTraverse; tr != nil { + _ = tr(root, func(child objects.Object) error { + enqueue(child) + return nil + }) + } + }) + for len(stack) > 0 { + g := stack[len(stack)-1] + stack = stack[:len(stack)-1] + tr := g.obj.Type().TpTraverse + if tr == nil { + continue + } + _ = tr(g.obj, func(child objects.Object) error { + enqueue(child) + return nil + }) + } } diff --git a/module/io/bufferedio.go b/module/io/bufferedio.go index 3f1817899..b00e8839b 100644 --- a/module/io/bufferedio.go +++ b/module/io/bufferedio.go @@ -346,6 +346,9 @@ func bufferedIOBaseGetattr(self objects.Object, nameObj objects.Object) (objects if !ok { return nil, fmt.Errorf("TypeError: attribute name must be string") } + if v, ok, err := ioUserInstanceAttr(self, nameObj); ok || err != nil { + return v, err + } switch name.Value() { case "detach": return objects.NewBuiltinFunction("detach", func(_ []objects.Object, _ map[string]objects.Object) (objects.Object, error) { @@ -372,7 +375,8 @@ func bufferedIOBaseGetattr(self objects.Object, nameObj objects.Object) (objects return nil, fmt.Errorf("UnsupportedOperation: write") }), nil } - return nil, fmt.Errorf("AttributeError: '_io._BufferedIOBase' object has no attribute '%s'", name.Value()) + // Dunders such as __class__/__dict__ resolve through the MRO walk. + return objects.GenericGetAttr(self, nameObj) } // bufferedIOBaseReadintoGeneric implements the shared concrete fallback for @@ -880,13 +884,8 @@ func (b *Buffered) bufferedWrite(args []objects.Object) (objects.Object, error) if len(args) < 1 { return nil, fmt.Errorf("TypeError: write() requires a data argument") } - var data []byte - switch v := args[0].(type) { - case *objects.Bytes: - data = v.Bytes() - case *objects.ByteArray: - data = v.Bytes() - default: + data, ok := objects.AsBytesLike(args[0]) + if !ok { return nil, fmt.Errorf("TypeError: a bytes-like object is required, not %s", args[0].Type().Name) } written := 0 @@ -1446,7 +1445,8 @@ func bufferedGetattr(self objects.Object, nameObj objects.Object) (objects.Objec return objects.NewStr(s), nil }), nil } - return nil, fmt.Errorf("AttributeError: '%s' object has no attribute '%s'", typeName, name.Value()) + // Dunders such as __class__/__dict__ resolve through the MRO walk. + return objects.GenericGetAttr(self, nameObj) } // --- constructors ------------------------------------------------------------ @@ -1775,7 +1775,8 @@ func rwPairGetattr(self objects.Object, nameObj objects.Object) (objects.Object, // CPython: Modules/_io/bufferedio.c:2441 bufferedrwpair_closed_get return objects.NewBool(p.writer.closed), nil } - return nil, fmt.Errorf("AttributeError: '_io.BufferedRWPair' object has no attribute '%s'", name.Value()) + // Dunders such as __class__/__dict__ resolve through the MRO walk. + return objects.GenericGetAttr(self, nameObj) } func init() { diff --git a/module/io/bytesio.go b/module/io/bytesio.go index c3e829119..e495803c8 100644 --- a/module/io/bytesio.go +++ b/module/io/bytesio.go @@ -45,6 +45,10 @@ func init() { BytesIOType.Iter = bytesIOIter BytesIOType.IterNext = bytesIOIterNext BytesIOType.Getattro = bytesIOGetattr + // BytesIO defines no __eq__, so it keeps object's identity-based hash. + // + // CPython: Modules/_io/bytesio.c:1062 PyBytesIO_Type (tp_hash inherited) + BytesIOType.Hash = objects.IdentityHash // LOAD_SPECIAL walks the type MRO for __enter__ / __exit__. // // CPython: Modules/_io/iobase.c:391 iobase_enter / :409 iobase_exit @@ -446,7 +450,10 @@ func bytesIOGetattr(o objects.Object, name objects.Object) (objects.Object, erro if fn := bytesIOMethod(b, n.Value()); fn != nil { return fn, nil } - return nil, fmt.Errorf("AttributeError: '_io.BytesIO' object has no attribute '%s'", n.Value()) + // Anything the custom dispatch above does not serve (dunders such as + // __class__, __dict__, __reduce_ex__) resolves through the normal MRO + // walk against BytesIOType's bases (object), matching stringIOGetattr. + return objects.GenericGetAttr(o, name) } // bytesIOMethod maps method names to BuiltinFunctions. diff --git a/module/io/codecs.go b/module/io/codecs.go index a99450e8e..3a9bbb48e 100644 --- a/module/io/codecs.go +++ b/module/io/codecs.go @@ -116,22 +116,6 @@ func encodeUTF32(s, variant string) []byte { // --- 8-bit code pages ------------------------------------------------------- -// charmapDecode decodes data using a 256-entry lookup table. -1 in the -// table means the byte is unmapped and should raise UnicodeDecodeError. -// -// CPython: Modules/_codecs/charmap.c PyUnicode_DecodeCharmap -func charmapDecode(data []byte, table *[256]rune, name string) (string, error) { - runes := make([]rune, len(data)) - for i, b := range data { - r := table[b] - if r < 0 { - return "", fmt.Errorf("UnicodeDecodeError: %s can't decode byte 0x%02x", name, b) - } - runes[i] = r - } - return string(runes), nil -} - // charmapEncode encodes s using the inverse of the 256-entry table. // The map is built once at table-construction time. // diff --git a/module/io/fileio.go b/module/io/fileio.go index ca9f51010..5af2be42e 100644 --- a/module/io/fileio.go +++ b/module/io/fileio.go @@ -14,10 +14,31 @@ import ( "fmt" "io" stdos "os" + "syscall" "github.com/tamnd/gopy/objects" ) +// clearGoFinalizer drops the Go runtime finalizer that os.NewFile arms on a +// borrowed descriptor. gopy owns the lifecycle of these fds through +// FileIO.Close and the closefd flag, so the descriptor is released +// deterministically when Python closes the file. Leaving Go's finalizer in +// place lets a later GC close a descriptor whose integer was already freed +// and reused by another open file, surfacing as a spurious EBADF +// ("bad file descriptor") on the unrelated file's next write. +// +// CPython: Modules/_io/fileio.c:159 _io_FileIO_close_impl owns the close; +// there is no background reclaim of the fd. +// +// os.NewFile arms the finalizer on the unexported inner *os.file, not on the +// returned *os.File, so runtime.SetFinalizer(f, nil) on the outer handle is a +// no-op and leaves the close finalizer live. os.File is struct{ file *file } +// with the inner pointer at offset 0, so read that pointer and clear the +// finalizer on the object it actually points at. +func clearGoFinalizer(f *stdos.File) { + objects.ClearOSFileFinalizer(f) +} + // SMALLCHUNK / DEFAULT_BUFFER_SIZE / LARGE_BUFFER_CUTOFF_SIZE mirror the // growth-policy constants used by readall() in CPython. // @@ -245,6 +266,7 @@ func fileIOCall(_ objects.Object, args []objects.Object, kwargs map[string]objec if f == nil { return nil, fmt.Errorf("OSError: bad file descriptor") } + clearGoFinalizer(f) fi := &FileIO{ f: f, nameIsInt: true, @@ -295,8 +317,12 @@ func fileIOCall(_ objects.Object, args []objects.Object, kwargs map[string]objec if f == nil { return nil, fmt.Errorf("OSError: bad file descriptor from opener") } + clearGoFinalizer(f) } else { f, err = stdos.OpenFile(name, flag, 0o666) + if err == nil { + clearGoFinalizer(f) + } if err != nil { // Preserve the os.PathError chain (errno + filename) with %w // so the unwind path can build a FileNotFoundError / @@ -308,6 +334,20 @@ func fileIOCall(_ objects.Object, args []objects.Object, kwargs map[string]objec return nil, fmt.Errorf("OSError: %w", err) } } + // open() succeeds on a directory on Unix, but a FileIO must never wrap + // one: fstat the descriptor and raise IsADirectoryError (EISDIR) when it + // names a directory, the way CPython rejects it at construction time + // rather than deferring the failure to the first read. + // + // CPython: Modules/_io/fileio.c:478 _io_FileIO___init___impl (S_ISDIR check) + if info, statErr := f.Stat(); statErr == nil && info.IsDir() { + _ = f.Close() + return nil, fmt.Errorf("OSError: %w", &stdos.PathError{ + Op: "open", + Path: name, + Err: syscall.EISDIR, + }) + } fi := &FileIO{ f: f, name: name, @@ -759,7 +799,8 @@ func fileIOGetattr(o objects.Object, name objects.Object) (objects.Object, error if fn := fileIOMethod(fi, n.Value()); fn != nil { return fn, nil } - return nil, fmt.Errorf("AttributeError: '_io.FileIO' object has no attribute '%s'", n.Value()) + // Dunders such as __class__/__dict__ resolve through the MRO walk. + return objects.GenericGetAttr(o, name) } // fileIOSetattr handles attribute assignment on FileIO. Only .name is diff --git a/module/io/iobase.go b/module/io/iobase.go index 4d823dc0b..1c55e2e1e 100644 --- a/module/io/iobase.go +++ b/module/io/iobase.go @@ -325,6 +325,29 @@ func IOBaseCannotPickle(self objects.Object) (objects.Object, error) { return nil, fmt.Errorf("TypeError: cannot pickle '%s' instances", name) } +// ioUserInstanceAttr resolves attribute lookups for Python subclasses of the +// io base types. Those instances are *Instance objects, so a method the +// subclass (or a mix-in) defines must win over the synthesized native method, +// exactly as PyObject_GenericGetAttr walks the MRO. It returns (value, true, +// nil) when the generic path resolves the name, (nil, false, nil) when the +// caller should fall back to the native method synthesis, and an error to +// propagate verbatim. +// +// CPython: Objects/object.c:1389 _PyObject_GenericGetAttrWithDict +func ioUserInstanceAttr(o objects.Object, name objects.Object) (objects.Object, bool, error) { + if _, ok := o.(*objects.Instance); !ok { + return nil, false, nil + } + v, err := objects.GenericGetAttr(o, name) + if err == nil { + return v, true, nil + } + if objects.IsAttributeError(err) { + return nil, false, nil + } + return nil, false, err +} + // iobaseGetattro dispatches attribute lookup for _IOBase objects. // // CPython: Modules/_io/iobase.c:860 iobase_getset + iobase_methods @@ -333,6 +356,9 @@ func iobaseGetattro(o objects.Object, name objects.Object) (objects.Object, erro if !ok { return nil, fmt.Errorf("TypeError: attribute name must be string") } + if v, ok, err := ioUserInstanceAttr(o, name); ok || err != nil { + return v, err + } return iobaseAttr(o, n.Value()) } @@ -341,6 +367,9 @@ func rawiobaseGetattro(o objects.Object, name objects.Object) (objects.Object, e if !ok { return nil, fmt.Errorf("TypeError: attribute name must be string") } + if v, ok, err := ioUserInstanceAttr(o, name); ok || err != nil { + return v, err + } // Instance dict shadows the type methods so subclasses (and tests) can // override read / readall / readinto / write via setattr, mirroring // CPython's PyObject_GenericGetAttr where the instance __dict__ wins @@ -372,7 +401,10 @@ func iobaseAttr(o objects.Object, name string) (objects.Object, error) { if fn := iobaseMethod(o, name); fn != nil { return fn, nil } - return nil, fmt.Errorf("AttributeError: '_io._IOBase' object has no attribute %q", name) + // User subclasses (*Instance) keep their attributes in a managed dict and + // resolve dunders (__class__, __dict__) through the MRO; defer to the + // generic path rather than raising the bare _IOBase message. + return objects.GenericGetAttr(o, objects.NewStr(name)) } // iobaseSetattro stores an attribute into the instance dict. @@ -383,7 +415,10 @@ func iobaseSetattro(o objects.Object, name objects.Object, value objects.Object) } d := iobaseGetDict(o) if d == nil { - return fmt.Errorf("AttributeError: cannot set attribute on _IOBase without dict") + // User subclasses of the io base types are *Instance objects carrying + // their own managed dict; route their attribute stores through the + // normal generic path instead of the native _IOBase dict. + return objects.GenericSetAttr(o, name, value) } if value == nil { return d.DelItem(objects.NewStr(n.Value())) diff --git a/module/io/module.go b/module/io/module.go index 37e9d7947..6ac77ebe5 100644 --- a/module/io/module.go +++ b/module/io/module.go @@ -15,6 +15,7 @@ import ( "fmt" "os" "strings" + "syscall" "github.com/tamnd/gopy/errors" "github.com/tamnd/gopy/imp" @@ -439,9 +440,12 @@ func ioOpen(a *ioOpenArgs) (objects.Object, error) { if f == nil { return nil, fmt.Errorf("OSError: bad file descriptor from opener") } + clearGoFinalizer(f) raw = NewFileIO(f, a.file, rawMode, readable, writable) } else { - f, err := os.OpenFile(a.file, flag, 0o600) + // 0o666 is CPython's default create mode for open(); the process + // umask narrows it. CPython: Modules/_io/fileio.c _io_FileIO___init___impl. + f, err := os.OpenFile(a.file, flag, 0o666) //nolint:gosec // CPython open() default mode, umask-narrowed if err != nil { // Preserve the os.PathError chain (errno + filename) with %w // so the unwind path builds a FileNotFoundError / @@ -452,6 +456,20 @@ func ioOpen(a *ioOpenArgs) (objects.Object, error) { // CPython: Modules/_io/fileio.c:451 _io_FileIO___init___impl return nil, fmt.Errorf("OSError: %w", err) } + // open() succeeds on a directory on Unix; reject it at construction + // with IsADirectoryError (EISDIR) so the failure surfaces here rather + // than on the first read. + // + // CPython: Modules/_io/fileio.c:478 _io_FileIO___init___impl (S_ISDIR check) + if info, statErr := f.Stat(); statErr == nil && info.IsDir() { + _ = f.Close() + return nil, fmt.Errorf("OSError: %w", &os.PathError{ + Op: "open", + Path: a.file, + Err: syscall.EISDIR, + }) + } + clearGoFinalizer(f) raw = NewFileIO(f, a.file, rawMode, readable, writable) } diff --git a/module/io/stringio.go b/module/io/stringio.go index a5db7218c..fca896e96 100644 --- a/module/io/stringio.go +++ b/module/io/stringio.go @@ -59,6 +59,10 @@ func init() { StringIOType.Iter = stringIOIter StringIOType.IterNext = stringIOIterNext StringIOType.Getattro = stringIOGetattr + // StringIO defines no __eq__, so it keeps object's identity-based hash. + // + // CPython: Modules/_io/stringio.c:1056 PyStringIO_Type (tp_hash inherited) + StringIOType.Hash = objects.IdentityHash registerStringIODescrs() objects.AddIterSlotWrappers(StringIOType) } diff --git a/module/io/textio_codec.go b/module/io/textio_codec.go index f4f70e9c0..1e19e3865 100644 --- a/module/io/textio_codec.go +++ b/module/io/textio_codec.go @@ -14,8 +14,6 @@ package io import ( "encoding/binary" "fmt" - "unicode/utf16" - "unicode/utf8" "github.com/tamnd/gopy/codecs" ) @@ -48,47 +46,51 @@ type IncrementalEncoder interface { Reset() } -// getIncrementalDecoder returns a fresh decoder for encoding. errors -// is the error-handling strategy ("strict", "replace", "ignore"); only -// "strict" is implemented today, matching the current one-shot -// `decodeBytes` behavior. Unknown encodings return a LookupError-shaped -// Go error so the caller can surface it to Python. +// getIncrementalDecoder returns a fresh decoder for encoding. errors is +// the error-handling strategy ("strict", "replace", "ignore", +// "backslashreplace", ...); the decoders carry it and hand the complete +// portion of each chunk to the shared codecs package, which applies the +// named handler exactly as bytes.decode does. Unknown encodings return a +// LookupError-shaped Go error so the caller can surface it to Python. // // CPython: Modules/_io/textio.c:912 _textiowrapper_set_decoder // (calls _PyCodecInfo_GetIncrementalDecoder). -func getIncrementalDecoder(encoding, _ string) (IncrementalDecoder, error) { +func getIncrementalDecoder(encoding, errors string) (IncrementalDecoder, error) { + if errors == "" { + errors = "strict" + } switch normalizeCodec(encoding) { case "utf-8": - return &utf8Decoder{}, nil + return &utf8Decoder{errors: errors}, nil case "ascii": - return &asciiDecoder{}, nil + return &asciiDecoder{errors: errors}, nil case "latin-1": - return &latin1Decoder{}, nil + return &latin1Decoder{errors: errors}, nil case "utf-16": - return &utf16Decoder{variant: ""}, nil + return &utf16Decoder{variant: "", errors: errors}, nil case "utf-16-le": - return &utf16Decoder{variant: "le"}, nil + return &utf16Decoder{variant: "le", errors: errors}, nil case "utf-16-be": - return &utf16Decoder{variant: "be"}, nil + return &utf16Decoder{variant: "be", errors: errors}, nil case "utf-32": - return &utf32Decoder{variant: ""}, nil + return &utf32Decoder{variant: "", errors: errors}, nil case "utf-32-le": - return &utf32Decoder{variant: "le"}, nil + return &utf32Decoder{variant: "le", errors: errors}, nil case "utf-32-be": - return &utf32Decoder{variant: "be"}, nil + return &utf32Decoder{variant: "be", errors: errors}, nil case "cp1252": - return &charmapDecoder{table: &cp1252Table.decode, name: "cp1252"}, nil + return &charmapDecoder{table: &cp1252Table.decode, name: "cp1252", errors: errors}, nil case "cp1250": - return &charmapDecoder{table: &cp1250Table.decode, name: "cp1250"}, nil + return &charmapDecoder{table: &cp1250Table.decode, name: "cp1250", errors: errors}, nil case "cp1251": - return &charmapDecoder{table: &cp1251Table.decode, name: "cp1251"}, nil + return &charmapDecoder{table: &cp1251Table.decode, name: "cp1251", errors: errors}, nil case "cp437": - return &charmapDecoder{table: &cp437Table.decode, name: "cp437"}, nil + return &charmapDecoder{table: &cp437Table.decode, name: "cp437", errors: errors}, nil case "mac-roman": - return &charmapDecoder{table: &macRomanTable.decode, name: "mac-roman"}, nil + return &charmapDecoder{table: &macRomanTable.decode, name: "mac-roman", errors: errors}, nil } if ci, err := codecs.Lookup(encoding); err == nil { - return ®istryDecoder{ci: ci}, nil + return ®istryDecoder{ci: ci, errors: errors}, nil } return nil, fmt.Errorf("LookupError: unknown encoding: %s", encoding) } @@ -143,7 +145,8 @@ func getIncrementalEncoder(encoding, _ string) (IncrementalEncoder, error) { // incomplete multi-byte sequence. CPython's utf-8 incremental decoder // keeps the same window because a code point spans at most four bytes. type utf8Decoder struct { - buf []byte + buf []byte + errors string } func (d *utf8Decoder) Decode(input []byte, final bool) (string, error) { @@ -154,28 +157,20 @@ func (d *utf8Decoder) Decode(input []byte, final bool) (string, error) { src = append(append([]byte{}, d.buf...), input...) d.buf = d.buf[:0] } - // Walk back from the end to find the longest tail that is either - // a complete utf-8 sequence or an incomplete (but valid so-far) - // prefix. RuneStart marks the first byte of a sequence. - keep := 0 - if !final && len(src) > 0 { - for i := len(src) - 1; i >= 0 && i >= len(src)-4; i-- { - if utf8.RuneStart(src[i]) { - if !utf8.FullRune(src[i:]) { - keep = len(src) - i - } - break - } - } - } - complete := src[:len(src)-keep] - if !utf8.Valid(complete) { - return "", fmt.Errorf("UnicodeDecodeError: invalid utf-8 sequence") + // DecodeUTF8Incremental holds back an incomplete trailing sequence + // when final is false and applies the configured error handler to + // the complete portion, so an invalid byte under "ignore" / + // "replace" / "backslashreplace" is repaired instead of raising. + // + // CPython: Objects/unicodeobject.c:4756 PyUnicode_DecodeUTF8Stateful + out, remaining, err := codecs.DecodeUTF8Incremental(src, d.errors, final) + if err != nil { + return "", err } - if keep > 0 { - d.buf = append(d.buf[:0], src[len(src)-keep:]...) + if len(remaining) > 0 { + d.buf = append(d.buf[:0], remaining...) } - return string(complete), nil + return out, nil } func (d *utf8Decoder) GetState() ([]byte, int64) { return append([]byte{}, d.buf...), 0 } @@ -187,15 +182,11 @@ func (d *utf8Decoder) Reset() { d.buf = d.buf[:0] } // --- ascii / latin-1 ------------------------------------------------------- -type asciiDecoder struct{} +type asciiDecoder struct{ errors string } -func (asciiDecoder) Decode(input []byte, _ bool) (string, error) { - for _, b := range input { - if b > 127 { - return "", fmt.Errorf("UnicodeDecodeError: ordinal not in range(128)") - } - } - return string(input), nil +func (d asciiDecoder) Decode(input []byte, _ bool) (string, error) { + out, _, err := codecs.Decode(input, "ascii", d.errors) + return out, err } func (asciiDecoder) GetState() ([]byte, int64) { return nil, 0 } func (asciiDecoder) SetState([]byte, int64) error { return nil } @@ -210,14 +201,13 @@ func encodeASCII(s string) ([]byte, error) { return []byte(s), nil } -type latin1Decoder struct{} +type latin1Decoder struct{ errors string } -func (latin1Decoder) Decode(input []byte, _ bool) (string, error) { - runes := make([]rune, len(input)) - for i, b := range input { - runes[i] = rune(b) - } - return string(runes), nil +func (d latin1Decoder) Decode(input []byte, _ bool) (string, error) { + // latin-1 maps every byte to a code point, so the error handler is + // never invoked, but route through codecs for uniformity. + out, _, err := codecs.Decode(input, "latin-1", d.errors) + return out, err } func (latin1Decoder) GetState() ([]byte, int64) { return nil, 0 } func (latin1Decoder) SetState([]byte, int64) error { return nil } @@ -242,6 +232,7 @@ func encodeLatin1(s string) ([]byte, error) { type utf16Decoder struct { variant string // "", "le", or "be" buf []byte + errors string // flags encodes endianness for tell/seek snapshots. // 0 = undecided (auto-variant before BOM sniff) // 1 = little-endian @@ -294,18 +285,21 @@ func (d *utf16Decoder) Decode(input []byte, final bool) (string, error) { } } keep := len(src) % 2 - if final && keep != 0 { - return "", fmt.Errorf("UnicodeDecodeError: utf-16 truncated (odd byte count)") + if final { + // A trailing half code unit on the final chunk is a truncation + // the error handler must see; hand the whole tail to codecs. + keep = 0 } body := src[:len(src)-keep] if keep > 0 { d.buf = append(d.buf, src[len(src)-keep:]...) } - units := make([]uint16, len(body)/2) - for i := range units { - units[i] = bo.Uint16(body[2*i:]) + name := "utf-16-le" + if bo == binary.BigEndian { + name = "utf-16-be" } - return string(utf16.Decode(units)), nil + out, _, err := codecs.Decode(body, name, d.errors) + return out, err } func (d *utf16Decoder) GetState() ([]byte, int64) { @@ -328,6 +322,7 @@ func (d *utf16Decoder) Reset() { type utf32Decoder struct { variant string buf []byte + errors string flags int64 } @@ -372,22 +367,21 @@ func (d *utf32Decoder) Decode(input []byte, final bool) (string, error) { } } keep := len(src) % 4 - if final && keep != 0 { - return "", fmt.Errorf("UnicodeDecodeError: utf-32 truncated (length %% 4 != 0)") + if final { + // A trailing partial code unit on the final chunk is a + // truncation the error handler must see. + keep = 0 } body := src[:len(src)-keep] if keep > 0 { d.buf = append(d.buf, src[len(src)-keep:]...) } - runes := make([]rune, 0, len(body)/4) - for i := 0; i < len(body); i += 4 { - cp := bo.Uint32(body[i:]) - if cp > 0x10FFFF || (cp >= 0xD800 && cp <= 0xDFFF) { - return "", fmt.Errorf("UnicodeDecodeError: invalid utf-32 codepoint U+%X", cp) - } - runes = append(runes, rune(cp)) + name := "utf-32-le" + if bo == binary.BigEndian { + name = "utf-32-be" } - return string(runes), nil + out, _, err := codecs.Decode(body, name, d.errors) + return out, err } func (d *utf32Decoder) GetState() ([]byte, int64) { @@ -408,12 +402,14 @@ func (d *utf32Decoder) Reset() { // --- charmap (single-byte) ------------------------------------------------- type charmapDecoder struct { - table *[256]rune - name string + table *[256]rune + name string + errors string } func (d *charmapDecoder) Decode(input []byte, _ bool) (string, error) { - return charmapDecode(input, d.table, d.name) + out, _, err := codecs.Decode(input, d.name, d.errors) + return out, err } func (d *charmapDecoder) GetState() ([]byte, int64) { return nil, 0 } func (d *charmapDecoder) SetState([]byte, int64) error { return nil } @@ -466,14 +462,15 @@ func (e *bomEncoder) Reset() { e.state = 0 } // // CPython: Python/codecs.c:570 _PyCodecInfo_GetIncrementalDecoder type registryDecoder struct { - ci *codecs.CodecInfo - buf []byte - out string + ci *codecs.CodecInfo + buf []byte + out string + errors string } func (d *registryDecoder) Decode(input []byte, final bool) (string, error) { d.buf = append(d.buf, input...) - s, _, err := d.ci.Decode(d.buf, "strict") + s, _, err := d.ci.Decode(d.buf, d.errors) if err != nil { // Allow buffering when not final: a trailing incomplete sequence // may complete on the next chunk. @@ -497,7 +494,7 @@ func (d *registryDecoder) SetState(buffer []byte, _ int64) error { if len(buffer) == 0 { return nil } - s, _, err := d.ci.Decode(d.buf, "strict") + s, _, err := d.ci.Decode(d.buf, d.errors) if err == nil { d.out = s } diff --git a/module/io/textiowrapper.go b/module/io/textiowrapper.go index b6da128f9..a8715bc81 100644 --- a/module/io/textiowrapper.go +++ b/module/io/textiowrapper.go @@ -969,7 +969,8 @@ func textIOWrapperGetattr(o objects.Object, name objects.Object) (objects.Object return v, nil } } - return nil, fmt.Errorf("AttributeError: '_io.TextIOWrapper' object has no attribute '%s'", n.Value()) + // Dunders such as __class__/__reduce_ex__ resolve through the MRO walk. + return objects.GenericGetAttr(o, name) } // textIOWrapperReadonlyAttrs are the C-level data descriptors that block @@ -1435,7 +1436,8 @@ func incrementalNLDecoderGetattr(o objects.Object, name objects.Object) (objects return objects.None(), nil }), nil } - return nil, fmt.Errorf("AttributeError: '_io.IncrementalNewlineDecoder' object has no attribute '%s'", n.Value()) + // Dunders such as __class__/__dict__ resolve through the MRO walk. + return objects.GenericGetAttr(o, name) } // translateNewlines applies universal newline tracking and (optionally) @@ -1538,11 +1540,14 @@ var TextIOBaseType = objects.NewType("_io._TextIOBase", []*objects.Type{IOBaseTy // textIOBaseGetattr dispatches attribute lookups on _TextIOBase instances. // // CPython: Modules/_io/textio.c:187 textiobase_methods + textiobase_getset -func textIOBaseGetattr(_ objects.Object, nameObj objects.Object) (objects.Object, error) { +func textIOBaseGetattr(self objects.Object, nameObj objects.Object) (objects.Object, error) { name, ok := nameObj.(*objects.Unicode) if !ok { return nil, fmt.Errorf("TypeError: attribute name must be string") } + if v, ok, err := ioUserInstanceAttr(self, nameObj); ok || err != nil { + return v, err + } switch name.Value() { case "detach": // CPython: Modules/_io/textio.c:66 _io__TextIOBase_detach_impl @@ -1574,7 +1579,8 @@ func textIOBaseGetattr(_ objects.Object, nameObj objects.Object) (objects.Object // CPython: Modules/_io/textio.c:180 _io__TextIOBase_errors_get_impl return objects.None(), nil } - return nil, fmt.Errorf("AttributeError: '_io._TextIOBase' object has no attribute '%s'", name.Value()) + // Dunders such as __class__/__dict__ resolve through the MRO walk. + return objects.GenericGetAttr(self, nameObj) } func init() { diff --git a/module/marshal/module.go b/module/marshal/module.go index fd4c46dc0..16af5ce48 100644 --- a/module/marshal/module.go +++ b/module/marshal/module.go @@ -95,6 +95,12 @@ func loads(args []objects.Object, kwargs map[string]objects.Object) (objects.Obj } val, err := marshal.Load(bytes.NewReader(src)) if err != nil { + // A truncated or empty buffer surfaces as EOFError, mirroring + // r_object/r_byte/r_string; any other decode failure is a ValueError. + // CPython: Python/marshal.c:1922 marshal_loads_impl + if marshal.IsEOF(err) { + return nil, fmt.Errorf("EOFError: %w", err) + } return nil, fmt.Errorf("ValueError: %w", err) } return wrap(val), nil @@ -158,8 +164,12 @@ func bufferOf(o objects.Object) ([]byte, error) { return v.Bytes(), nil case *objects.ByteArray: return v.Bytes(), nil + case *objects.MemoryView: + // Tobytes() serializes the exposed view (honoring offset/length), + // matching how marshal.loads consumes any bytes-like buffer. + return v.Tobytes().Bytes(), nil } - return nil, fmt.Errorf("TypeError: a bytes-like object is required, not '%T'", o) + return nil, fmt.Errorf("TypeError: a bytes-like object is required, not '%s'", o.Type().Name) } // unwrap converts a Python objects.Object into the native Go form the diff --git a/module/os/module.go b/module/os/module.go index dcbbe1973..0d3b638a6 100644 --- a/module/os/module.go +++ b/module/os/module.go @@ -102,14 +102,26 @@ func goFileModeToStMode(m goos.FileMode) int64 { return mode } -// newStatResult assembles an os.stat_result from the second-resolution -// components gathered by the platform stat helpers. The visible integer -// time slots truncate the float seconds; the hidden float and nanosecond -// timestamps and block fields follow the CPython layout. +// newStatResult assembles an os.stat_result from the nanosecond-resolution +// components gathered by the platform stat helpers. CPython derives three +// views from the same struct timespec: the visible integer slot is the +// floor-second, the hidden float slot is sec + 1e-9*nsec, and the hidden +// *_ns slot is the full nanosecond count. The block fields trail the +// timestamps in the CPython layout. atimeNs/mtimeNs/ctimeNs are full +// nanoseconds since the epoch. // // CPython: Modules/posixmodule.c:2456 _pystat_fromstructstat -func newStatResult(mode, ino, dev, nlink, uid, gid, size, atime, mtime, ctime, blksize, blocks, rdev int64) *objects.StructSeq { - ns := func(sec int64) objects.Object { return objects.NewInt(sec * 1_000_000_000) } +func newStatResult(mode, ino, dev, nlink, uid, gid, size, atimeNs, mtimeNs, ctimeNs, blksize, blocks, rdev int64) *objects.StructSeq { + // fillTime mirrors fill_time: the integer field floors toward negative + // infinity, the float field carries the fractional second. + // CPython: Modules/posixmodule.c:2417 fill_time + floorSec := func(ns int64) int64 { + sec := ns / 1_000_000_000 + if ns%1_000_000_000 != 0 && ns < 0 { + sec-- + } + return sec + } return objects.NewStructSeq(statResultType, []objects.Object{ objects.NewInt(mode), objects.NewInt(ino), @@ -118,15 +130,15 @@ func newStatResult(mode, ino, dev, nlink, uid, gid, size, atime, mtime, ctime, b objects.NewInt(uid), objects.NewInt(gid), objects.NewInt(size), - objects.NewInt(atime), // unnamed: integer st_atime - objects.NewInt(mtime), // unnamed: integer st_mtime - objects.NewInt(ctime), // unnamed: integer st_ctime - objects.NewFloat(float64(atime)), - objects.NewFloat(float64(mtime)), - objects.NewFloat(float64(ctime)), - ns(atime), - ns(mtime), - ns(ctime), + objects.NewInt(floorSec(atimeNs)), // unnamed: integer st_atime + objects.NewInt(floorSec(mtimeNs)), // unnamed: integer st_mtime + objects.NewInt(floorSec(ctimeNs)), // unnamed: integer st_ctime + objects.NewFloat(float64(atimeNs) / 1e9), + objects.NewFloat(float64(mtimeNs) / 1e9), + objects.NewFloat(float64(ctimeNs) / 1e9), + objects.NewInt(atimeNs), + objects.NewInt(mtimeNs), + objects.NewInt(ctimeNs), objects.NewInt(blksize), objects.NewInt(blocks), objects.NewInt(rdev), @@ -206,13 +218,18 @@ func osTimes(_ []objects.Object, _ map[string]objects.Object) (objects.Object, e } func init() { - _ = imp.AppendInittab("os", buildOS) - _ = imp.AppendInittab("posix", buildPosixModule) - // On Windows, Lib/os.py does `from nt import *`; register the same - // syscall surface under the "nt" name so `import nt` resolves. - // CPython: Modules/posixmodule.c posixmodule_init (registers as "nt" on Windows) + // CPython compiles posixmodule.c under a single name per platform: "nt" + // on Windows, "posix" everywhere else (Modules/posixmodule.c builds with + // MODNAME = "nt" when MS_WINDOWS). Lib/os.py then selects ntpath vs + // posixpath by testing which name is in sys.builtin_module_names, so + // registering both on Windows makes os.py pick posixpath and mangle + // drive-absolute paths. Mirror CPython: one name, gated on the platform. + // + // CPython: Modules/posixmodule.c posixmodule_init (MODNAME "nt" on Windows) if runtime.GOOS == "windows" { _ = imp.AppendInittab("nt", buildPosixModule) + } else { + _ = imp.AppendInittab("posix", buildPosixModule) } _ = imp.AppendInittab("os.path", buildOSPath) // posixpath and ntpath now load from stdlib/ via PathFinder. @@ -229,7 +246,13 @@ func buildPosixModule() (*objects.Module, error) { if err != nil { return nil, err } - posix := objects.NewModule("posix") + // The compiled module is named "nt" on Windows, "posix" elsewhere, + // matching the single MODNAME CPython's posixmodule.c builds with. + modName := "posix" + if runtime.GOOS == "windows" { + modName = "nt" + } + posix := objects.NewModule(modName) pd := posix.Dict() md := m.Dict() for _, k := range md.Keys() { @@ -280,13 +303,25 @@ func buildPath() (*objects.Module, error) { func buildOS() (*objects.Module, error) { // environ: populate from the real process environment. // CPython: Modules/posixmodule.c:1768 convertenviron + // posix.environ holds bytes keys/values on POSIX (Lib/os.py decodes + // them through fsdecode); the nt build keeps str. CPython: + // Modules/posixmodule.c convertenviron. + environBytes := runtime.GOOS != "windows" environDict := objects.NewDict() for _, kv := range goos.Environ() { k, v, ok := strings.Cut(kv, "=") if !ok { continue } - if err := environDict.SetItem(objects.NewStr(k), objects.NewStr(v)); err != nil { + var kObj, vObj objects.Object + if environBytes { + kObj = objects.NewBytes([]byte(k)) + vObj = objects.NewBytes([]byte(v)) + } else { + kObj = objects.NewStr(k) + vObj = objects.NewStr(v) + } + if err := environDict.SetItem(kObj, vObj); err != nil { return nil, err } } @@ -299,10 +334,14 @@ func buildOS() (*objects.Module, error) { linesep := "\n" pathsep := ":" osName := "posix" + // altsep is the alternate path separator: None on POSIX, '/' on Windows. + // CPython: Modules/posixmodule.c / Lib/ntpath.py:altsep + altsep := objects.None() if runtime.GOOS == "windows" { linesep = "\r\n" pathsep = ";" osName = "nt" + altsep = objects.NewStr("/") } entries := []struct { @@ -310,6 +349,7 @@ func buildOS() (*objects.Module, error) { val objects.Object }{ {"sep", objects.NewStr(sep)}, + {"altsep", altsep}, {"extsep", objects.NewStr(".")}, {"pardir", objects.NewStr("..")}, {"curdir", objects.NewStr(".")}, @@ -324,6 +364,8 @@ func buildOS() (*objects.Module, error) { {"listdir", objects.NewBuiltinFunction("listdir", listdir)}, {"stat", objects.NewBuiltinFunction("stat", stat)}, {"getenv", objects.NewBuiltinFunction("getenv", getenv)}, + {"putenv", objects.NewBuiltinFunction("putenv", putenv)}, + {"unsetenv", objects.NewBuiltinFunction("unsetenv", unsetenv)}, {"getpid", objects.NewBuiltinFunction("getpid", getpid)}, {"getuid", objects.NewBuiltinFunction("getuid", getuid)}, {"makedirs", objects.NewBuiltinFunction("makedirs", makedirs)}, @@ -431,7 +473,7 @@ func buildOS() (*objects.Module, error) { for _, group := range [][]struct { name string val objects.Object - }{entries, posixIdentityEntries()} { + }{entries, posixIdentityEntries(), winPathEntries()} { for _, e := range group { if err := d.SetItem(objects.NewStr(e.name), e.val); err != nil { return nil, err @@ -484,7 +526,12 @@ func osModuleGetattr(m *objects.Module) func([]objects.Object, map[string]object return cls, nil } if name != "path" { - return nil, fmt.Errorf("AttributeError: module 'os' has no attribute %q", name) + // Match the standard module-getattro miss message (single + // quotes). CPython's os.py has no __getattr__, so a missing + // attribute raises "module 'os' has no attribute 'X'". + // + // CPython: Objects/moduleobject.c:1024 _Py_module_getattro_impl + return nil, fmt.Errorf("AttributeError: module 'os' has no attribute '%s'", name) } pathMod := osPathModule() if err := m.Dict().SetItem(objects.NewStr("path"), pathMod); err != nil { @@ -867,12 +914,67 @@ func stat(args []objects.Object, _ map[string]objects.Object) (objects.Object, e ino, dev, nlink, uid, gid, atime, ctime := statSysFields(info) blksize, blocks, rdev := statBlockFields(info) return newStatResult(statMode(info), int64(ino), int64(dev), int64(nlink), - int64(uid), int64(gid), info.Size(), atime, info.ModTime().Unix(), ctime, + int64(uid), int64(gid), info.Size(), atime, info.ModTime().UnixNano(), ctime, blksize, blocks, rdev), nil } // getenv mirrors Lib/os.py:818 getenv: returns environ[key] or default. // CPython: Lib/os.py:818 getenv +// fsArg decodes a putenv/unsetenv argument that may arrive as str or +// bytes (Lib/os.py's posix _Environ fsencodes keys and values to bytes +// before calling putenv / unsetenv). +func fsArg(o objects.Object) (string, error) { + switch v := o.(type) { + case *objects.Bytes: + return string(v.Bytes()), nil + case *objects.ByteArray: + return string(v.Bytes()), nil + default: + return objects.Str(o) + } +} + +// putenv implements posix.putenv(key, value): set a process environment +// variable. Lib/os.py's _Environ.__setitem__ calls it before updating +// its backing dict. +// +// CPython: Modules/posixmodule.c os_putenv_impl +func putenv(args []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + if len(args) < 2 { + return nil, fmt.Errorf("TypeError: putenv() takes exactly 2 arguments (%d given)", len(args)) + } + key, err := fsArg(args[0]) + if err != nil { + return nil, err + } + value, err := fsArg(args[1]) + if err != nil { + return nil, err + } + if err := goos.Setenv(key, value); err != nil { + return nil, fmt.Errorf("OSError: %s", err.Error()) + } + return objects.None(), nil +} + +// unsetenv implements posix.unsetenv(key): remove a process environment +// variable. Lib/os.py's _Environ.__delitem__ calls it. +// +// CPython: Modules/posixmodule.c os_unsetenv_impl +func unsetenv(args []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + if len(args) < 1 { + return nil, fmt.Errorf("TypeError: unsetenv() takes exactly 1 argument (%d given)", len(args)) + } + key, err := fsArg(args[0]) + if err != nil { + return nil, err + } + if err := goos.Unsetenv(key); err != nil { + return nil, fmt.Errorf("OSError: %s", err.Error()) + } + return objects.None(), nil +} + func getenv(args []objects.Object, kwargs map[string]objects.Object) (objects.Object, error) { if len(args) == 0 { return nil, fmt.Errorf("TypeError: getenv() missing required argument: 'key'") @@ -1295,14 +1397,17 @@ func osLstat(args []objects.Object, _ map[string]objects.Object) (objects.Object return nil, fmt.Errorf("OSError: %w", serr) } ino, dev, nlink, uid, gid, atime, ctime := statSysFields(info) - mtime := info.ModTime().Unix() + mtime := info.ModTime().UnixNano() blksize, blocks, rdev := statBlockFields(info) return newStatResult(statMode(info), int64(ino), int64(dev), int64(nlink), int64(uid), int64(gid), info.Size(), atime, mtime, ctime, blksize, blocks, rdev), nil } -// osFstat returns the stat of an open file descriptor. -// The underlying fd is not closed; runtime.SetFinalizer is cleared on -// the temporary os.File wrapper so the GC never closes it. +// osFstat returns the stat of an open file descriptor. The work is +// delegated to the platform fstatResult helper, which calls fstat(2) +// directly through syscall rather than borrowing the fd in a temporary +// os.File. An os.File wrapper arms a finalizer on its inner file handle +// that runtime.SetFinalizer on the outer struct cannot clear, so a GC of +// the wrapper would close the live descriptor out from under its owner. // // CPython: Modules/posixmodule.c:3399 os_fstat_impl func osFstat(args []objects.Object, _ map[string]objects.Object) (objects.Object, error) { @@ -1314,16 +1419,7 @@ func osFstat(args []objects.Object, _ map[string]objects.Object) (objects.Object return nil, fmt.Errorf("TypeError: an integer is required") } fdVal, _ := fdObj.Int64() - f := goos.NewFile(uintptr(fdVal), "") - runtime.SetFinalizer(f, nil) - info, err := f.Stat() - if err != nil { - return nil, fmt.Errorf("OSError: %w", err) - } - ino, dev, nlink, uid, gid, atime, ctime := statSysFields(info) - mtime := info.ModTime().Unix() - blksize, blocks, rdev := statBlockFields(info) - return newStatResult(statMode(info), int64(ino), int64(dev), int64(nlink), int64(uid), int64(gid), info.Size(), atime, mtime, ctime, blksize, blocks, rdev), nil + return fstatResult(fdVal) } // osReplace atomically renames src to dst, replacing dst if it exists. diff --git a/module/os/module_test.go b/module/os/module_test.go index a4bccb24d..4c99b6667 100644 --- a/module/os/module_test.go +++ b/module/os/module_test.go @@ -119,13 +119,21 @@ func TestEnviron(t *testing.T) { if !ok { t.Fatalf("environ is %T, want *Dict", env) } + // On POSIX, posix.environ holds bytes keys/values (Lib/os.py decodes + // them); the nt build keeps str. Look the key up the same way. + keyObj := func(s string) objects.Object { + if runtime.GOOS == "windows" { + return objects.NewStr(s) + } + return objects.NewBytes([]byte(s)) + } pathKey := "PATH" if runtime.GOOS == "windows" { - if _, err2 := envDict.GetItem(objects.NewStr("Path")); err2 == nil { + if _, err2 := envDict.GetItem(keyObj("Path")); err2 == nil { pathKey = "Path" } } - v, err := envDict.GetItem(objects.NewStr(pathKey)) + v, err := envDict.GetItem(keyObj(pathKey)) if err != nil { t.Fatalf("environ[%q]: %v", pathKey, err) } diff --git a/module/os/posix_extra.go b/module/os/posix_extra.go index aa313c1cc..cef9e1a1e 100644 --- a/module/os/posix_extra.go +++ b/module/os/posix_extra.go @@ -25,21 +25,51 @@ func osChmod(args []objects.Object, _ map[string]objects.Object) (objects.Object if len(args) < 2 { return nil, fmt.Errorf("TypeError: chmod() missing required arguments") } - path, ok := args[0].(*objects.Unicode) - if !ok { - return nil, fmt.Errorf("TypeError: chmod() path must be str") + p, err := pathStringArg(args[0], "chmod") + if err != nil { + return nil, err } mode, ok := args[1].(*objects.Int) if !ok { return nil, fmt.Errorf("TypeError: chmod() mode must be int") } m, _ := mode.Int64() - if err := goos.Chmod(path.Value(), goos.FileMode(m)); err != nil { + if err := goos.Chmod(p, goos.FileMode(m)); err != nil { return nil, fmt.Errorf("OSError: %w", err) } return objects.None(), nil } +// pathStringArg coerces a path argument the way CPython's path_converter +// does: a str is taken verbatim, bytes are decoded, and any other object is +// run through os.fspath (__fspath__) so pathlib.Path and other PathLike +// objects are accepted. +// +// CPython: Modules/posixmodule.c:1093 path_converter +func pathStringArg(o objects.Object, fname string) (string, error) { + switch v := o.(type) { + case *objects.Unicode: + return v.Value(), nil + case *objects.Bytes: + return string(v.Bytes()), nil + } + m, err := objects.GetAttr(o, objects.NewStr("__fspath__")) + if err != nil { + return "", fmt.Errorf("TypeError: %s: path should be string, bytes or os.PathLike, not %s", fname, o.Type().Name) + } + r, err := objects.Call(m, objects.NewTuple(nil), nil) + if err != nil { + return "", err + } + switch v := r.(type) { + case *objects.Unicode: + return v.Value(), nil + case *objects.Bytes: + return string(v.Bytes()), nil + } + return "", fmt.Errorf("TypeError: expected __fspath__ to return str or bytes, not %s", r.Type().Name) +} + // osSymlink creates a symbolic link at link_name pointing at src. // // CPython: Modules/posixmodule.c os_symlink_impl @@ -47,15 +77,15 @@ func osSymlink(args []objects.Object, _ map[string]objects.Object) (objects.Obje if len(args) < 2 { return nil, fmt.Errorf("TypeError: symlink() requires src and dst") } - src, ok := args[0].(*objects.Unicode) - if !ok { - return nil, fmt.Errorf("TypeError: symlink() src must be str") + src, err := pathStringArg(args[0], "symlink") + if err != nil { + return nil, err } - dst, ok := args[1].(*objects.Unicode) - if !ok { - return nil, fmt.Errorf("TypeError: symlink() dst must be str") + dst, err := pathStringArg(args[1], "symlink") + if err != nil { + return nil, err } - if err := goos.Symlink(src.Value(), dst.Value()); err != nil { + if err := goos.Symlink(src, dst); err != nil { return nil, fmt.Errorf("OSError: %w", err) } return objects.None(), nil @@ -68,11 +98,11 @@ func osReadlink(args []objects.Object, _ map[string]objects.Object) (objects.Obj if len(args) < 1 { return nil, fmt.Errorf("TypeError: readlink() missing path") } - path, ok := args[0].(*objects.Unicode) - if !ok { - return nil, fmt.Errorf("TypeError: readlink() path must be str") + path, err := pathStringArg(args[0], "readlink") + if err != nil { + return nil, err } - target, err := goos.Readlink(path.Value()) + target, err := goos.Readlink(path) if err != nil { return nil, fmt.Errorf("OSError: %w", err) } @@ -86,15 +116,15 @@ func osLink(args []objects.Object, _ map[string]objects.Object) (objects.Object, if len(args) < 2 { return nil, fmt.Errorf("TypeError: link() requires src and dst") } - src, ok := args[0].(*objects.Unicode) - if !ok { - return nil, fmt.Errorf("TypeError: link() src must be str") + src, err := pathStringArg(args[0], "link") + if err != nil { + return nil, err } - dst, ok := args[1].(*objects.Unicode) - if !ok { - return nil, fmt.Errorf("TypeError: link() dst must be str") + dst, err := pathStringArg(args[1], "link") + if err != nil { + return nil, err } - if err := goos.Link(src.Value(), dst.Value()); err != nil { + if err := goos.Link(src, dst); err != nil { return nil, fmt.Errorf("OSError: %w", err) } return objects.None(), nil @@ -152,12 +182,17 @@ func osCPUCount(args []objects.Object, _ map[string]objects.Object) (objects.Obj return objects.NewInt(int64(n)), nil } -// osIsatty returns True if fd is a tty. The implementation Stats the -// fd through the goos package and tests the char-device bit, which -// matches what `isatty(3)` reports for the common cases _colorize -// cares about. +// osIsatty returns True if fd is a tty. It fstats the descriptor and +// tests the char-device type bit, which matches what `isatty(3)` reports +// for the common cases _colorize cares about. The stat goes through the +// platform fstatResult helper, which calls fstat(2) directly rather than +// borrowing the fd in a temporary os.File. A borrowed os.File arms a +// finalizer on its inner handle that runtime.SetFinalizer on the outer +// struct cannot clear, so a GC of the wrapper would close a descriptor we +// do not own and unrelated writes would later fail with EBADF. // -// CPython: Modules/posixmodule.c:11947 os_isatty_impl +// CPython: Modules/posixmodule.c:11947 os_isatty_impl borrows the fd +// and never closes it. func osIsatty(args []objects.Object, _ map[string]objects.Object) (objects.Object, error) { if len(args) < 1 { return nil, fmt.Errorf("TypeError: isatty() missing required argument: 'fd'") @@ -167,18 +202,18 @@ func osIsatty(args []objects.Object, _ map[string]objects.Object) (objects.Objec return nil, fmt.Errorf("TypeError: an integer is required") } fdVal, _ := fdObj.Int64() - f := goos.NewFile(uintptr(fdVal), "") - if f == nil { - return objects.NewBool(false), nil - } - info, err := f.Stat() + st, err := fstatResult(fdVal) if err != nil { // CPython os.isatty returns False on any error rather than - // raising; a Stat failure here means the fd is not a real + // raising; a stat failure here means the fd is not a real // device, which is exactly what callers want to know. return objects.NewBool(false), nil //nolint:nilerr // CPython os.isatty parity } - return objects.NewBool((info.Mode() & goos.ModeCharDevice) != 0), nil + // st_mode is the first stat_result slot. S_IFMT masks the file-type + // nibble; S_IFCHR marks a character device. + const sIFMT, sIFCHR = 0o170000, 0o020000 + mode, _ := st.Items()[0].(*objects.Int).Int64() + return objects.NewBool(mode&sIFMT == sIFCHR), nil } // osFsdecode decodes filename from the filesystem encoding (utf-8 on diff --git a/module/os/posix_unix.go b/module/os/posix_unix.go index f427a0713..d8e9a8685 100644 --- a/module/os/posix_unix.go +++ b/module/os/posix_unix.go @@ -280,3 +280,15 @@ func posixIdentityEntries() []struct { {"getgroups", objects.NewBuiltinFunction("getgroups", osGetgroups)}, } } + +// winPathEntries is empty on POSIX: posixmodule.c registers _path_splitroot +// and the listdrives family only inside its #ifdef MS_WINDOWS block, so on +// POSIX os._path_splitroot raises AttributeError just like CPython. +// +// CPython: Modules/posixmodule.c:4707 #ifdef MS_WINDOWS +func winPathEntries() []struct { + name string + val objects.Object +} { + return nil +} diff --git a/module/os/posix_windows.go b/module/os/posix_windows.go index 189fe8b5b..89185022d 100644 --- a/module/os/posix_windows.go +++ b/module/os/posix_windows.go @@ -289,3 +289,150 @@ func osUmask(args []objects.Object, _ map[string]objects.Object) (objects.Object } return objects.NewInt(0), nil } + +// winPathEntries returns the Windows-only path helpers posixmodule.c registers +// inside its #ifdef MS_WINDOWS block. Only _path_splitroot is needed by the +// stdlib bootstrap; the rest of the listdrives/_path_* family is unported. +// +// CPython: Modules/posixmodule.c:4707 #ifdef MS_WINDOWS +func winPathEntries() []struct { + name string + val objects.Object +} { + return []struct { + name string + val objects.Object + }{ + {"_path_splitroot", objects.NewBuiltinFunction("_path_splitroot", osPathSplitroot)}, + } +} + +// osPathSplitroot splits a Windows path into (root, rest), where root is +// everything up to and including the leading separator after a drive or UNC +// share. importlib._bootstrap_external uses it to reimplement os.path.join and +// os.path.isabs without importing ntpath at bootstrap time. +// +// The C accelerator runs PathCchSkipRoot over a copy with forward slashes +// folded to backslashes, then slices the original (unfolded) path at the root +// length. That is exactly the drive+root prefix ntpath.splitroot computes, so +// this port follows the ntpath.splitroot algorithm and joins its (drive, root) +// halves into the single root element the 2-tuple form returns. +// +// CPython: Modules/posixmodule.c:5230 os__path_splitroot_impl +// CPython: Lib/ntpath.py:172 splitroot +func osPathSplitroot(args []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + if len(args) != 1 { + return nil, fmt.Errorf("TypeError: _path_splitroot() takes exactly one argument (%d given)", len(args)) + } + s, err := objects.Str(args[0]) + if err != nil { + return nil, err + } + root, rest := splitrootWindows(s) + return objects.NewTuple([]objects.Object{objects.NewStr(root), objects.NewStr(rest)}), nil +} + +const ( + srSep = '\\' + srAlt = '/' + srColon = ':' +) + +// srAt reads the slash-folded rune at index i (out-of-range yields 0), so the +// structural tests run over the normp = p.replace('/', '\\') view. +func srAt(r []rune, i int) rune { + if i < 0 || i >= len(r) { + return 0 + } + if r[i] == srAlt { + return srSep + } + return r[i] +} + +// srSlice returns string(r[a:b]) clamped to bounds, the Python p[a:b] slice. +func srSlice(r []rune, a, b int) string { + if a < 0 { + a = 0 + } + if b > len(r) { + b = len(r) + } + if a >= b { + return "" + } + return string(r[a:b]) +} + +// srFindSep is normp.find('\\', start) over the slash-folded view. +func srFindSep(r []rune, start int) int { + for i := start; i < len(r); i++ { + if srAt(r, i) == srSep { + return i + } + } + return -1 +} + +// srHasUNCPrefix reports normp[:8].upper() == '\\\\?\\UNC\\'. +func srHasUNCPrefix(r []rune) bool { + want := [8]rune{srSep, srSep, '?', srSep, 'U', 'N', 'C', srSep} + if len(r) < 8 { + return false + } + for i := 0; i < 8; i++ { + c := srAt(r, i) + if c >= 'a' && c <= 'z' { + c -= 'a' - 'A' + } + if c != want[i] { + return false + } + } + return true +} + +// srSplitUNC handles \\server\share or \\?\UNC\server\share roots. +func srSplitUNC(p string, r []rune) (string, string) { + start := 2 + if srHasUNCPrefix(r) { + start = 8 + } + index := srFindSep(r, start) + if index == -1 { + return p, "" + } + index2 := srFindSep(r, index+1) + if index2 == -1 { + return p, "" + } + // drive=p[:index2], root=p[index2:index2+1], tail=p[index2+1:]. + return srSlice(r, 0, index2+1), srSlice(r, index2+1, len(r)) +} + +// splitrootWindows is the ntpath.splitroot algorithm folded to the 2-tuple +// (drive+root, tail) shape os._path_splitroot returns. It indexes by rune to +// preserve Python str (code-point) slicing semantics. +// +// CPython: Lib/ntpath.py:172 splitroot +func splitrootWindows(p string) (root, tail string) { + r := []rune(p) + switch { + case srAt(r, 0) == srSep: + if srAt(r, 1) == srSep { + return srSplitUNC(p, r) + } + // Relative path with root, e.g. \Windows: drive="", root=p[:1]. + return srSlice(r, 0, 1), srSlice(r, 1, len(r)) + case srAt(r, 1) == srColon: + if srAt(r, 2) == srSep { + // Absolute drive-letter path, e.g. X:\Windows. + return srSlice(r, 0, 3), srSlice(r, 3, len(r)) + } + // Relative path with drive, e.g. X:Windows: drive=p[:2], root="". + return srSlice(r, 0, 2), srSlice(r, 2, len(r)) + default: + // Relative path, e.g. Windows. + return "", p + } +} diff --git a/module/os/scandir.go b/module/os/scandir.go index ca4823a94..eb5a86881 100644 --- a/module/os/scandir.go +++ b/module/os/scandir.go @@ -187,6 +187,8 @@ func direntryGetattr(o objects.Object, name objects.Object) (objects.Object, err return objects.NewBuiltinFunction("is_file", direntryIsFile(de)), nil case "is_symlink": return objects.NewBuiltinFunction("is_symlink", direntryIsSymlink(de)), nil + case "is_junction": + return objects.NewBuiltinFunction("is_junction", direntryIsJunction(de)), nil case "stat": return objects.NewBuiltinFunction("stat", direntryStat(de)), nil case "inode": @@ -252,6 +254,17 @@ func direntryIsSymlink(de *DirEntry) func(args []objects.Object, kwargs map[stri } } +// direntryIsJunction builds the bound is_junction() method. Junctions +// are a Windows-only concept; on every platform gopy targets here the +// answer is always False. +// +// CPython: Modules/posixmodule.c DirEntry_is_junction +func direntryIsJunction(_ *DirEntry) func(args []objects.Object, kwargs map[string]objects.Object) (objects.Object, error) { + return func(_ []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + return objects.False(), nil + } +} + // direntryStat builds the bound stat(*, follow_symlinks=True) method. // // CPython: Modules/posixmodule.c:13278 DirEntry_stat diff --git a/module/os/stat_darwin.go b/module/os/stat_darwin.go index 007ddccf7..9bdcf3f27 100644 --- a/module/os/stat_darwin.go +++ b/module/os/stat_darwin.go @@ -13,10 +13,13 @@ import ( ) // statSysFields extracts platform fields from a FileInfo's syscall.Stat_t. -// Darwin/FreeBSD carry atime/ctime in Atimespec/Ctimespec. +// Darwin/FreeBSD carry atime/ctime in Atimespec/Ctimespec. The returned +// atime/ctime are full nanoseconds since the epoch so stat_result keeps +// the sub-second precision CPython's FileFinder relies on for cache +// invalidation. // CPython: Modules/posixmodule.c:3238 os_stat_impl func statSysFields(info goos.FileInfo) (ino, dev, nlink uint64, uid, gid uint32, atime, ctime int64) { - mtime := info.ModTime().Unix() + mtime := info.ModTime().UnixNano() atime = mtime ctime = mtime sys, ok := info.Sys().(*syscall.Stat_t) @@ -28,8 +31,8 @@ func statSysFields(info goos.FileInfo) (ino, dev, nlink uint64, uid, gid uint32, nlink = uint64(sys.Nlink) uid = sys.Uid gid = sys.Gid - atime = sys.Atimespec.Sec - ctime = sys.Ctimespec.Sec + atime = sys.Atimespec.Sec*1_000_000_000 + sys.Atimespec.Nsec + ctime = sys.Ctimespec.Sec*1_000_000_000 + sys.Ctimespec.Nsec return } @@ -60,6 +63,25 @@ func statBlockFields(info goos.FileInfo) (blksize, blocks, rdev int64) { return } +// fstatResult stats an open descriptor via fstat(2) and assembles the +// stat_result directly from the syscall.Stat_t. It never wraps the fd in +// an os.File, so no finalizer is armed that could close the live +// descriptor when the wrapper is garbage-collected. +// +// CPython: Modules/posixmodule.c:3399 os_fstat_impl +func fstatResult(fdVal int64) (*objects.StructSeq, error) { + var st syscall.Stat_t + if err := syscall.Fstat(int(fdVal), &st); err != nil { + return nil, fmt.Errorf("OSError: %w", err) + } + atime := st.Atimespec.Sec*1_000_000_000 + st.Atimespec.Nsec + mtime := st.Mtimespec.Sec*1_000_000_000 + st.Mtimespec.Nsec + ctime := st.Ctimespec.Sec*1_000_000_000 + st.Ctimespec.Nsec + return newStatResult(int64(st.Mode), int64(st.Ino), int64(st.Dev), int64(st.Nlink), + int64(st.Uid), int64(st.Gid), st.Size, atime, mtime, ctime, + int64(st.Blksize), st.Blocks, int64(st.Rdev)), nil +} + // getuid returns the real user ID of the calling process. // CPython: Modules/posixmodule.c:9635 os_getuid_impl func getuid(_ []objects.Object, _ map[string]objects.Object) (objects.Object, error) { diff --git a/module/os/stat_linux.go b/module/os/stat_linux.go index 95c11adbd..7c1d6c9d2 100644 --- a/module/os/stat_linux.go +++ b/module/os/stat_linux.go @@ -16,7 +16,7 @@ import ( // Linux carries atime/ctime in Atim/Ctim. // CPython: Modules/posixmodule.c:3238 os_stat_impl func statSysFields(info goos.FileInfo) (ino, dev, nlink uint64, uid, gid uint32, atime, ctime int64) { - mtime := info.ModTime().Unix() + mtime := info.ModTime().UnixNano() atime = mtime ctime = mtime sys, ok := info.Sys().(*syscall.Stat_t) @@ -28,8 +28,8 @@ func statSysFields(info goos.FileInfo) (ino, dev, nlink uint64, uid, gid uint32, nlink = uint64(sys.Nlink) //nolint:unconvert // Nlink is uint32 on linux/arm64 uid = sys.Uid gid = sys.Gid - atime = sys.Atim.Sec - ctime = sys.Ctim.Sec + atime = sys.Atim.Sec*1_000_000_000 + int64(sys.Atim.Nsec) //nolint:unconvert // Nsec is int32 on 32-bit linux + ctime = sys.Ctim.Sec*1_000_000_000 + int64(sys.Ctim.Nsec) //nolint:unconvert // Nsec is int32 on 32-bit linux return } @@ -60,6 +60,25 @@ func statBlockFields(info goos.FileInfo) (blksize, blocks, rdev int64) { return } +// fstatResult stats an open descriptor via fstat(2) and assembles the +// stat_result directly from the syscall.Stat_t. It never wraps the fd in +// an os.File, so no finalizer is armed that could close the live +// descriptor when the wrapper is garbage-collected. +// +// CPython: Modules/posixmodule.c:3399 os_fstat_impl +func fstatResult(fdVal int64) (*objects.StructSeq, error) { + var st syscall.Stat_t + if err := syscall.Fstat(int(fdVal), &st); err != nil { + return nil, fmt.Errorf("OSError: %w", err) + } + atime := st.Atim.Sec*1_000_000_000 + int64(st.Atim.Nsec) //nolint:unconvert // Nsec is int32 on 32-bit linux + mtime := st.Mtim.Sec*1_000_000_000 + int64(st.Mtim.Nsec) //nolint:unconvert // Nsec is int32 on 32-bit linux + ctime := st.Ctim.Sec*1_000_000_000 + int64(st.Ctim.Nsec) //nolint:unconvert // Nsec is int32 on 32-bit linux + return newStatResult(int64(st.Mode), int64(st.Ino), int64(st.Dev), int64(st.Nlink), + int64(st.Uid), int64(st.Gid), st.Size, atime, mtime, ctime, + int64(st.Blksize), int64(st.Blocks), int64(st.Rdev)), nil //nolint:unconvert // Blksize/Blocks are int32 on 32-bit linux +} + // getuid returns the real user ID of the calling process. // CPython: Modules/posixmodule.c:9635 os_getuid_impl func getuid(_ []objects.Object, _ map[string]objects.Object) (objects.Object, error) { diff --git a/module/os/stat_other.go b/module/os/stat_other.go index 2d45ff1d4..66934d412 100644 --- a/module/os/stat_other.go +++ b/module/os/stat_other.go @@ -7,14 +7,34 @@ package os import ( "fmt" goos "os" + "runtime" "github.com/tamnd/gopy/objects" ) +// fstatResult stats an open descriptor through a temporary os.File on +// platforms without a syscall.Stat_t. SetFinalizer is cleared on a +// best-effort basis; these fallback targets do not run the kqueue +// netpoller that makes the borrowed-fd close fatal on Darwin. +// +// CPython: Modules/posixmodule.c:3399 os_fstat_impl +func fstatResult(fdVal int64) (*objects.StructSeq, error) { + f := goos.NewFile(uintptr(fdVal), "") + runtime.SetFinalizer(f, nil) + info, err := f.Stat() + if err != nil { + return nil, fmt.Errorf("OSError: %w", err) + } + ino, dev, nlink, uid, gid, atime, ctime := statSysFields(info) + mtime := info.ModTime().UnixNano() + blksize, blocks, rdev := statBlockFields(info) + return newStatResult(statMode(info), int64(ino), int64(dev), int64(nlink), int64(uid), int64(gid), info.Size(), atime, mtime, ctime, blksize, blocks, rdev), nil +} + // statSysFields returns minimal values on unsupported platforms. // CPython: Modules/posixmodule.c:3238 os_stat_impl func statSysFields(info goos.FileInfo) (ino, dev, nlink uint64, uid, gid uint32, atime, ctime int64) { - mtime := info.ModTime().Unix() + mtime := info.ModTime().UnixNano() return 0, 0, 1, 0, 0, mtime, mtime } diff --git a/module/os/stat_windows.go b/module/os/stat_windows.go index c751cd9fb..18877ff46 100644 --- a/module/os/stat_windows.go +++ b/module/os/stat_windows.go @@ -5,12 +5,33 @@ package os import ( + "fmt" goos "os" + "runtime" "syscall" "github.com/tamnd/gopy/objects" ) +// fstatResult stats an open descriptor. Windows resolves the fd through a +// temporary os.File whose Stat goes via GetFileInformationByHandle; the +// netpoll-vs-finalizer hazard that motivates the POSIX raw-syscall path +// does not apply to Windows handles, so the wrapper is reused here. +// +// CPython: Modules/posixmodule.c:3399 os_fstat_impl +func fstatResult(fdVal int64) (*objects.StructSeq, error) { + f := goos.NewFile(uintptr(fdVal), "") + runtime.SetFinalizer(f, nil) + info, err := f.Stat() + if err != nil { + return nil, fmt.Errorf("OSError: %w", err) + } + ino, dev, nlink, uid, gid, atime, ctime := statSysFields(info) + mtime := info.ModTime().UnixNano() + blksize, blocks, rdev := statBlockFields(info) + return newStatResult(statMode(info), int64(ino), int64(dev), int64(nlink), int64(uid), int64(gid), info.Size(), atime, mtime, ctime, blksize, blocks, rdev), nil +} + // statSysFields extracts platform fields from a Windows FileInfo's // Win32FileAttributeData. Windows reports CreationTime / LastAccessTime // / LastWriteTime as FILETIME (100-ns intervals since 1601-01-01); we @@ -21,7 +42,7 @@ import ( // // CPython: Modules/posixmodule.c:1924 win32_stat func statSysFields(info goos.FileInfo) (ino, dev, nlink uint64, uid, gid uint32, atime, ctime int64) { - mtime := info.ModTime().Unix() + mtime := info.ModTime().UnixNano() atime = mtime ctime = mtime nlink = 1 @@ -29,8 +50,8 @@ func statSysFields(info goos.FileInfo) (ino, dev, nlink uint64, uid, gid uint32, if !ok || sys == nil { return } - atime = sys.LastAccessTime.Nanoseconds() / 1e9 - ctime = sys.CreationTime.Nanoseconds() / 1e9 + atime = sys.LastAccessTime.Nanoseconds() + ctime = sys.CreationTime.Nanoseconds() return } diff --git a/module/sys/config.go b/module/sys/config.go index f9552e1eb..bf5aefadc 100644 --- a/module/sys/config.go +++ b/module/sys/config.go @@ -61,6 +61,12 @@ func UpdateConfig(d *objects.Dict, cfg *initconfig.PyConfig) error { } } + // CPython: Python/sysmodule.c sets sys.dont_write_bytecode from the + // config alongside the flags structseq mirror. + if err := setItem(d, "dont_write_bytecode", objects.NewBool(cfg.WriteBytecode == 0)); err != nil { + return err + } + if cfg.PycachePrefix != "" { if err := setStr(d, "pycache_prefix", cfg.PycachePrefix); err != nil { return err diff --git a/module/sys/excepthook.go b/module/sys/excepthook.go index ea8d8df2b..88fa5873b 100644 --- a/module/sys/excepthook.go +++ b/module/sys/excepthook.go @@ -1,19 +1,19 @@ // sys.excepthook is invoked by the interpreter when an exception goes -// uncaught at the top level, and by threading.py to display exceptions -// raised in worker threads. CPython routes through PyErr_Display which -// formats a traceback. The gopy port writes a minimal "type: value" -// line to sys.stderr; full traceback formatting can plug in once the -// stack-walk hook is exposed. +// uncaught at the top level, by threading.py to display exceptions +// raised in worker threads, and by code.InteractiveConsole to render a +// traceback. CPython routes through PyErr_Display, which formats the +// full traceback (chained causes included) and writes it through the +// live sys.stderr object so a redirected or mocked stream captures it. // // CPython: Python/sysmodule.c sys_excepthook_impl // CPython: Python/pythonrun.c PyErr_Display - package sys import ( "fmt" "os" + "github.com/tamnd/gopy/errors" "github.com/tamnd/gopy/objects" ) @@ -21,15 +21,44 @@ func excepthookShim(args []objects.Object, _ map[string]objects.Object) (objects if len(args) < 3 { return objects.None(), nil } - exc := args[1] - repr, err := objects.Str(exc) - if err != nil { - // Best-effort hook: a failing repr is swallowed because the - // excepthook itself runs while an exception is already being - // reported and must not re-raise. - return objects.None(), nil //nolint:nilerr // intentional swallow + // PyErr_Display formats the value argument (args[1]); the type and + // traceback are derived from it. code.InteractiveConsole already + // stitched the traceback onto the value via with_traceback before + // calling the hook, so FormatException(value) renders the same frames. + text := excepthookText(args[1]) + + // Write through the live sys.stderr the way _PyErr_Display does, so a + // caller that swapped sys.stderr (tests mocking the stream, the REPL's + // captured stderr) sees the output instead of the process fd. + // + // CPython: Python/pythonrun.c _PyErr_Display (PySys_GetObject "stderr") + d := liveSysDict() + if d != nil { + if errf, _ := d.GetItem(objects.NewStr("stderr")); errf != nil && errf != objects.None() { + if write, err := objects.GetAttr(errf, objects.NewStr("write")); err == nil { + if _, err := objects.Call(write, objects.NewTuple([]objects.Object{objects.NewStr(text)}), nil); err == nil { + return objects.None(), nil + } + } + } } - tp := exc.Type().Name - fmt.Fprintf(os.Stderr, "%s: %s\n", tp, repr) + // Fall back to the process stderr only when sys.stderr is missing or + // unusable, mirroring CPython's last-resort write to the C-level + // stderr in _PyErr_Display. + fmt.Fprint(os.Stderr, text) return objects.None(), nil } + +// excepthookText renders the traceback string for the exception value, +// falling back to a "Type: repr" line when the object is not a gopy +// Exception (the hook must never raise while reporting an error). +func excepthookText(value objects.Object) string { + if exc, ok := value.(*errors.Exception); ok { + return errors.FormatException(exc) + } + repr, err := objects.Str(value) + if err != nil { + return value.Type().Name + "\n" + } + return value.Type().Name + ": " + repr + "\n" +} diff --git a/module/sys/module.go b/module/sys/module.go index 9906cea31..b2dcd4832 100644 --- a/module/sys/module.go +++ b/module/sys/module.go @@ -88,6 +88,50 @@ func SetPath(path []string) { } } +// pendingStdlibDir records the stdlib root the next sys-module build +// should expose as sys._stdlib_dir. FrozenImporter._resolve_filename +// reads it to locate the on-disk copy of a frozen module. SetStdlibDir +// also refreshes the live attribute when sys is already imported. +// +// CPython: Python/sysmodule.c:3951 _PySys_UpdateConfig (stdlib_dir) +var pendingStdlibDir string + +// SetStdlibDir records the stdlib root and exposes it as +// sys._stdlib_dir, refreshing the live attribute when sys is already +// imported. +// +// CPython: Python/sysmodule.c:3951 _PySys_UpdateConfig (stdlib_dir) +func SetStdlibDir(dir string) { + pendingStdlibDir = dir + if md := liveSysDict(); md != nil { + _ = md.SetItem(objects.NewStr("_stdlib_dir"), objects.NewStr(dir)) + } +} + +// pendingSafePath records the safe_path flag supplied on the command +// line (-P / -I / PYTHONSAFEPATH) before sys is built. buildModule +// reads it when stamping sys.flags; SetSafePath also refreshes the live +// flags struct-sequence when sys is already imported. +// +// CPython: Python/initconfig.c:1828 config_init_safe_path +var pendingSafePath bool + +// SetSafePath records safe_path and, when sys is already live, rebuilds +// sys.flags so sys.flags.safe_path reads True. +// +// CPython: Python/sysmodule.c:3478 set_flags_from_config (safe_path) +func SetSafePath(on bool) { + pendingSafePath = on + if md := liveSysDict(); md != nil { + cfg := &initconfig.PyConfig{} + cfg.InitPythonConfig() + if on { + cfg.SafePath = 1 + } + _ = md.SetItem(objects.NewStr("flags"), makeFlags(cfg)) + } +} + // LivePath returns the current sys.path entries as a Go slice, or nil // when sys has not been imported yet (PathFinder then falls back to // its static Paths snapshot, which is what unit tests that drive @@ -302,6 +346,9 @@ func buildModule() (*objects.Module, error) { // CPython: Python/sysmodule.c:3478 set_flags_from_config defaultCfg := &initconfig.PyConfig{} defaultCfg.InitPythonConfig() + if pendingSafePath { + defaultCfg.SafePath = 1 + } if err := setItem(md, "flags", makeFlags(defaultCfg)); err != nil { return nil, err } @@ -338,6 +385,15 @@ func buildModule() (*objects.Module, error) { return nil, err } } + // sys._stdlib_dir lets FrozenImporter._resolve_filename find the + // on-disk copy of a frozen module. + // + // CPython: Python/sysmodule.c:3951 _PySys_UpdateConfig (stdlib_dir) + if pendingStdlibDir != "" { + if err := setStr(md, "_stdlib_dir", pendingStdlibDir); err != nil { + return nil, err + } + } // sys.exc_info reads the per-thread handled-exception slot the vm // maintains across PUSH_EXC_INFO / POP_EXCEPT. unittest's // _Outcome.testPartExecutor and traceback.format_exc both call it diff --git a/module/sys/stdlib_module_names.go b/module/sys/stdlib_module_names.go new file mode 100644 index 000000000..b07c7c5f0 --- /dev/null +++ b/module/sys/stdlib_module_names.go @@ -0,0 +1,305 @@ +// Code generated from CPython Python/stdlib_module_names.h. DO NOT EDIT. +// +// CPython: Python/stdlib_module_names.h _Py_stdlib_module_names +package sys + +// stdlibModuleNames is the verbatim list backing sys.stdlib_module_names. +var stdlibModuleNames = []string{ + "__future__", + "_abc", + "_aix_support", + "_android_support", + "_apple_support", + "_ast", + "_ast_unparse", + "_asyncio", + "_bisect", + "_blake2", + "_bz2", + "_codecs", + "_codecs_cn", + "_codecs_hk", + "_codecs_iso2022", + "_codecs_jp", + "_codecs_kr", + "_codecs_tw", + "_collections", + "_collections_abc", + "_colorize", + "_compat_pickle", + "_contextvars", + "_csv", + "_ctypes", + "_curses", + "_curses_panel", + "_datetime", + "_dbm", + "_decimal", + "_elementtree", + "_frozen_importlib", + "_frozen_importlib_external", + "_functools", + "_gdbm", + "_hashlib", + "_heapq", + "_hmac", + "_imp", + "_interpchannels", + "_interpqueues", + "_interpreters", + "_io", + "_ios_support", + "_json", + "_locale", + "_lsprof", + "_lzma", + "_markupbase", + "_md5", + "_multibytecodec", + "_multiprocessing", + "_opcode", + "_opcode_metadata", + "_operator", + "_osx_support", + "_overlapped", + "_pickle", + "_posixshmem", + "_posixsubprocess", + "_py_abc", + "_py_warnings", + "_pydatetime", + "_pydecimal", + "_pyio", + "_pylong", + "_pyrepl", + "_queue", + "_random", + "_remote_debugging", + "_scproxy", + "_sha1", + "_sha2", + "_sha3", + "_signal", + "_sitebuiltins", + "_socket", + "_sqlite3", + "_sre", + "_ssl", + "_stat", + "_statistics", + "_string", + "_strptime", + "_struct", + "_suggestions", + "_symtable", + "_sysconfig", + "_thread", + "_threading_local", + "_tkinter", + "_tokenize", + "_tracemalloc", + "_types", + "_typing", + "_uuid", + "_warnings", + "_weakref", + "_weakrefset", + "_winapi", + "_wmi", + "_zoneinfo", + "_zstd", + "abc", + "annotationlib", + "antigravity", + "argparse", + "array", + "ast", + "asyncio", + "atexit", + "base64", + "bdb", + "binascii", + "bisect", + "builtins", + "bz2", + "cProfile", + "calendar", + "cmath", + "cmd", + "code", + "codecs", + "codeop", + "collections", + "colorsys", + "compileall", + "compression", + "concurrent", + "configparser", + "contextlib", + "contextvars", + "copy", + "copyreg", + "csv", + "ctypes", + "curses", + "dataclasses", + "datetime", + "dbm", + "decimal", + "difflib", + "dis", + "doctest", + "email", + "encodings", + "ensurepip", + "enum", + "errno", + "faulthandler", + "fcntl", + "filecmp", + "fileinput", + "fnmatch", + "fractions", + "ftplib", + "functools", + "gc", + "genericpath", + "getopt", + "getpass", + "gettext", + "glob", + "graphlib", + "grp", + "gzip", + "hashlib", + "heapq", + "hmac", + "html", + "http", + "idlelib", + "imaplib", + "importlib", + "inspect", + "io", + "ipaddress", + "itertools", + "json", + "keyword", + "linecache", + "locale", + "logging", + "lzma", + "mailbox", + "marshal", + "math", + "mimetypes", + "mmap", + "modulefinder", + "msvcrt", + "multiprocessing", + "netrc", + "nt", + "ntpath", + "nturl2path", + "numbers", + "opcode", + "operator", + "optparse", + "os", + "pathlib", + "pdb", + "pickle", + "pickletools", + "pkgutil", + "platform", + "plistlib", + "poplib", + "posix", + "posixpath", + "pprint", + "profile", + "pstats", + "pty", + "pwd", + "py_compile", + "pyclbr", + "pydoc", + "pydoc_data", + "pyexpat", + "queue", + "quopri", + "random", + "re", + "readline", + "reprlib", + "resource", + "rlcompleter", + "runpy", + "sched", + "secrets", + "select", + "selectors", + "shelve", + "shlex", + "shutil", + "signal", + "site", + "smtplib", + "socket", + "socketserver", + "sqlite3", + "sre_compile", + "sre_constants", + "sre_parse", + "ssl", + "stat", + "statistics", + "string", + "stringprep", + "struct", + "subprocess", + "symtable", + "sys", + "sysconfig", + "syslog", + "tabnanny", + "tarfile", + "tempfile", + "termios", + "textwrap", + "this", + "threading", + "time", + "timeit", + "tkinter", + "token", + "tokenize", + "tomllib", + "trace", + "traceback", + "tracemalloc", + "tty", + "turtle", + "turtledemo", + "types", + "typing", + "unicodedata", + "unittest", + "urllib", + "uuid", + "venv", + "warnings", + "wave", + "weakref", + "webbrowser", + "winreg", + "winsound", + "wsgiref", + "xml", + "xmlrpc", + "zipapp", + "zipfile", + "zipimport", + "zlib", + "zoneinfo", +} diff --git a/module/sys/sys.go b/module/sys/sys.go index bc68d2681..196c0e621 100644 --- a/module/sys/sys.go +++ b/module/sys/sys.go @@ -16,9 +16,13 @@ package sys import ( + "fmt" + "runtime" + "sort" "strconv" "github.com/tamnd/gopy/build" + "github.com/tamnd/gopy/imp" "github.com/tamnd/gopy/objects" ) @@ -51,6 +55,18 @@ func Init() (*objects.Dict, error) { if err := setStr(d, "float_repr_style", "short"); err != nil { return nil, err } + // sys.winver is the Windows-only DLL version string (MS_DLL_ID, the + // major.minor "3.14"). site._get_path reads it to build the per-user + // site-packages path under os.name == 'nt', so the bootstrap needs it + // before site runs. CPython sets it only on Windows. + // + // CPython: Python/sysmodule.c:3869 SET_SYS_FROM_STRING("winver", PyWin_DLLVersionString) + if runtime.GOOS == "windows" { + winver := strconv.Itoa(build.PythonMajorVersion) + "." + strconv.Itoa(build.PythonMinorVersion) + if err := setStr(d, "winver", winver); err != nil { + return nil, err + } + } if err := setInt(d, "hexversion", hexVersion()); err != nil { return nil, err @@ -74,7 +90,15 @@ func Init() (*objects.Dict, error) { if err := setItem(d, "builtin_module_names", builtinModuleNames()); err != nil { return nil, err } - if err := setItem(d, "stdlib_module_names", objects.NewTuple(nil)); err != nil { + stdlibNames := make([]objects.Object, len(stdlibModuleNames)) + for i, n := range stdlibModuleNames { + stdlibNames[i] = objects.NewStr(n) + } + stdlibSet, err := objects.NewFrozenset(stdlibNames) + if err != nil { + return nil, err + } + if err := setItem(d, "stdlib_module_names", stdlibSet); err != nil { return nil, err } if err := setItem(d, "hash_info", hashInfo()); err != nil { @@ -120,6 +144,69 @@ func Init() (*objects.Dict, error) { return nil, err } + // Private helper that strips the __dict__ and __weakref__ descriptors + // from a mutable type's dict and refreshes its caches. dataclasses + // calls it in _add_slots before rebuilding the class with __slots__, + // so the original (descriptor-bearing) class can be garbage collected + // (gh-135228). Immutable types are rejected. + // + // CPython: Python/sysmodule.c:2658 sys__clear_type_descriptors_impl + if err := setItem(d, "_clear_type_descriptors", objects.NewBuiltinFunction("_clear_type_descriptors", func(args []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + if len(args) != 1 { + return nil, fmt.Errorf("TypeError: _clear_type_descriptors() takes exactly one argument (%d given)", len(args)) + } + t, ok := args[0].(*objects.Type) + if !ok { + return nil, fmt.Errorf("TypeError: _clear_type_descriptors() argument 1 must be type, not %s", args[0].Type().Name) + } + if t.TpFlags&objects.TpFlagImmutable != 0 { + return nil, fmt.Errorf("TypeError: argument is immutable") + } + objects.DelTypeDescr(t, "__dict__") + objects.DelTypeDescr(t, "__weakref__") + // Fire PyType_Modified unconditionally, matching CPython which + // calls it after the pops even when neither descriptor was present. + t.InvalidateVersionTag() + return objects.None(), nil + })); err != nil { + return nil, err + } + + // Import-system state the runtime exposes at the top level. CPython + // stamps these in PySys_Create / the import bootstrap; runpy and + // pkgutil read them directly. gopy's import is Go-side so the hooks + // list and the importer cache stay empty, but the source loaders do + // write __pycache__/..pyc files, so the default matches + // CPython: bytecode writing is on unless -B / PYTHONDONTWRITEBYTECODE. + // + // CPython: Python/sysmodule.c _PySys_AddObject path_hooks/path_importer_cache + if err := setItem(d, "dont_write_bytecode", objects.NewBool(false)); err != nil { + return nil, err + } + // pycache_prefix controls where the import machinery writes .pyc + // caches; None means alongside the source. cache_from_source reads it. + // + // CPython: Python/sysmodule.c sets sys.pycache_prefix from PyConfig + if err := setItem(d, "pycache_prefix", objects.None()); err != nil { + return nil, err + } + if err := setItem(d, "path_hooks", objects.NewList(nil)); err != nil { + return nil, err + } + // meta_path is the meta-path finder list. CPython seeds it with + // BuiltinImporter, FrozenImporter and PathFinder; gopy resolves those + // three Go-side, so the list starts empty. It still has to exist as a + // real list: import_helper saves and restores it around every test, + // and user code is free to append custom finders. + // + // CPython: Python/pylifecycle.c init_importlib (sys.meta_path) + if err := setItem(d, "meta_path", objects.NewList(nil)); err != nil { + return nil, err + } + if err := setItem(d, "path_importer_cache", objects.NewDict()); err != nil { + return nil, err + } + return d, nil } @@ -193,13 +280,28 @@ func maxsize() int64 { return 1<<31 - 1 } -// versionInfo returns sys.version_info as a five-tuple -// (major, minor, micro, releaselevel, serial). The struct-sequence -// named-tuple lands with 1651-sys-C; v0.7 uses a plain tuple. +// versionInfoType is the struct-sequence type behind sys.version_info: +// a five-field named tuple (major, minor, micro, releaselevel, serial) +// whose type repr reads sys.version_info(major=3, minor=14, ...). It +// subclasses tuple so isinstance(sys.version_info, tuple) holds and the +// values stay index-addressable, while sys.version_info.minor and the +// other named members resolve through the struct-sequence members. +// +// CPython: Python/sysmodule.c:850 version_info_type / make_version_info +var versionInfoType = objects.NewStructSeqType("sys.version_info", []objects.StructSeqField{ + {Name: "major", Doc: "Major release number"}, + {Name: "minor", Doc: "Minor release number"}, + {Name: "micro", Doc: "Patch release number"}, + {Name: "releaselevel", Doc: "'alpha', 'beta', 'candidate', or 'final'"}, + {Name: "serial", Doc: "Serial release number"}, +}) + +// versionInfo returns sys.version_info as the named struct-sequence +// (major, minor, micro, releaselevel, serial). // // CPython: Python/sysmodule.c:3884 make_version_info -func versionInfo() *objects.Tuple { - return objects.NewTuple([]objects.Object{ +func versionInfo() *objects.StructSeq { + return objects.NewStructSeq(versionInfoType, []objects.Object{ objects.NewInt(int64(build.PythonMajorVersion)), objects.NewInt(int64(build.PythonMinorVersion)), objects.NewInt(0), @@ -242,17 +344,29 @@ func implementation() *objects.Namespace { return n } -// builtinModuleNames returns the tuple of module names that are -// compiled into the interpreter. Until 1623 lands the import system -// the list contains just the modules gopy initializes statically -// (builtins, sys). The slice grows as 1651 lands more modules. +// builtinModuleNames returns the sorted tuple of module names compiled +// into the interpreter. CPython builds this directly from +// PyImport_Inittab; gopy statically links every extension module into +// the binary, so the table is the inittab snapshot minus the handful of +// pure-Python modules gopy registers there only as an import shortcut +// (imp.ShadowedByStdlib), keeping this list in lockstep with +// _imp.is_builtin. // // CPython: Python/sysmodule.c:3859 list_builtin_module_names func builtinModuleNames() *objects.Tuple { - return objects.NewTuple([]objects.Object{ - objects.NewStr("builtins"), - objects.NewStr("sys"), - }) + names := make([]string, 0, 64) + for _, e := range imp.InittabSnapshot() { + if imp.ShadowedByStdlib(e.Name) { + continue + } + names = append(names, e.Name) + } + sort.Strings(names) + items := make([]objects.Object, len(names)) + for i, n := range names { + items[i] = objects.NewStr(n) + } + return objects.NewTuple(items) } // hashInfo is sys.hash_info as a SimpleNamespace. The field order diff --git a/module/sys/sys_test.go b/module/sys/sys_test.go index f44fbe1d8..63071436d 100644 --- a/module/sys/sys_test.go +++ b/module/sys/sys_test.go @@ -4,6 +4,7 @@ import ( "strings" "testing" + "github.com/tamnd/gopy/imp" "github.com/tamnd/gopy/objects" ) @@ -42,10 +43,11 @@ func TestInitVersionInfoShape(t *testing.T) { if err != nil { t.Fatalf("GetItem(version_info): %v", err) } - tup, ok := v.(*objects.Tuple) + ss, ok := v.(*objects.StructSeq) if !ok { - t.Fatalf("version_info is %T, want *Tuple", v) + t.Fatalf("version_info is %T, want *StructSeq", v) } + tup := ss.AsTuple() if tup.Len() != 5 { t.Fatalf("version_info has %d items, want 5", tup.Len()) } @@ -136,6 +138,15 @@ func TestInitMaxsizePositive(t *testing.T) { // builtins and sys are static-init, so they are advertised here. // Once 1623 lands the import system, this list grows. func TestInitBuiltinModuleNamesIncludesSys(t *testing.T) { + // builtin_module_names mirrors the live inittab. The full binary + // links the builtins module; register a stub here so the snapshot + // advertises it without importing module/builtins (which would form + // an import cycle through builtins -> module/sys). + if !imp.IsBuiltinName("builtins") { + _ = imp.AppendInittab("builtins", func() (*objects.Module, error) { + return objects.NewModule("builtins"), nil + }) + } d, err := Init() if err != nil { t.Fatalf("Init: %v", err) diff --git a/module/winreg/module.go b/module/winreg/module.go new file mode 100644 index 000000000..048829ff7 --- /dev/null +++ b/module/winreg/module.go @@ -0,0 +1,146 @@ +// Package winreg is the gopy port of CPython's PC/winreg.c. +// CPython only registers winreg on Windows. gopy registers it on every +// platform so importlib._bootstrap_external (which does `import winreg` +// at module top level under `if sys.platform == 'win32'`) imports +// regardless; stdlib consumers gate their use behind a win32 check so the +// module is only actually loaded on Windows. The exposed surface is the +// HKEY_*/KEY_*/REG_* integer constants and the `error` alias that +// _bootstrap_external's WindowsRegistryFinder reads at find_spec time. +// The function surface (OpenKey, QueryValue, EnumKey, CreateKey, the PyHKEY +// type, etc.) is not yet ported: it requires the Windows registry syscalls +// (advapi32), which gopy has no host binding for, and the default meta_path +// never installs WindowsRegistryFinder, so those names are never reached. +// Attribute lookups for the unported names raise AttributeError; that +// surfaces at call time on Windows for the deprecated registry finder only. +// +// CPython: PC/winreg.c:1 winreg module +// CPython: PC/winreg.c:2121 exec_module (constant registration) +package winreg + +import ( + "math/big" + "runtime" + + "github.com/tamnd/gopy/errors" + "github.com/tamnd/gopy/imp" + "github.com/tamnd/gopy/objects" +) + +func init() { + // CPython only builds winreg on Windows (it lives in PC/winreg.c and is + // registered through PC/config.c's _PyImport_Inittab only for the + // Windows build). _bootstrap_external imports it solely under + // `if sys.platform == 'win32'`, so on other platforms the name must stay + // absent: test.support.import_helper.import_module('winreg') then raises + // SkipTest, exactly as it does on CPython/macOS. + if runtime.GOOS != "windows" { + return + } + _ = imp.AppendInittab("winreg", buildModule) +} + +// buildModule constructs the winreg module dict with the constants +// CPython's exec_module registers. The HKEY_* predefined handles are the +// sign-extended 64-bit pointer values PyLong_FromVoidPtr yields on a 64-bit +// build (e.g. HKEY_CLASSES_ROOT == (HKEY)0x80000000 widened to +// 0xFFFFFFFF80000000). The KEY_*/REG_* access and value-type constants are +// the documented Windows SDK (winnt.h, winreg.h) literals inskey/ADD_INT +// register. +// +// CPython: PC/winreg.c:2121 exec_module +func buildModule() (*objects.Module, error) { + m := objects.NewModule("winreg") + d := m.Dict() + + // Predefined HKEY handles. CPython: PC/winreg.c:2125-2132 inskey. + // On a 64-bit build PyLong_FromVoidPtr sign-extends the (HKEY)0x8000000N + // pointer to a 64-bit value that overflows int64, so they are built from + // big.Int. + hkeys := map[string]uint64{ + "HKEY_CLASSES_ROOT": 0xFFFFFFFF80000000, + "HKEY_CURRENT_USER": 0xFFFFFFFF80000001, + "HKEY_LOCAL_MACHINE": 0xFFFFFFFF80000002, + "HKEY_USERS": 0xFFFFFFFF80000003, + "HKEY_PERFORMANCE_DATA": 0xFFFFFFFF80000004, + "HKEY_CURRENT_CONFIG": 0xFFFFFFFF80000005, + "HKEY_DYN_DATA": 0xFFFFFFFF80000006, + } + for name, value := range hkeys { + b := new(big.Int).SetUint64(value) + if err := d.SetItem(objects.NewStr(name), objects.NewIntFromBig(b)); err != nil { + return nil, err + } + } + + // Access-right and value-type constants. CPython: PC/winreg.c:2135-2210 + // ADD_INT. Values are the winnt.h / winreg.h public literals. + consts := map[string]int64{ + // Registry access rights (winnt.h). + "KEY_QUERY_VALUE": 0x0001, + "KEY_SET_VALUE": 0x0002, + "KEY_CREATE_SUB_KEY": 0x0004, + "KEY_ENUMERATE_SUB_KEYS": 0x0008, + "KEY_NOTIFY": 0x0010, + "KEY_CREATE_LINK": 0x0020, + "KEY_WOW64_64KEY": 0x0100, + "KEY_WOW64_32KEY": 0x0200, + "KEY_READ": 0x20019, + "KEY_WRITE": 0x20006, + "KEY_EXECUTE": 0x20019, + "KEY_ALL_ACCESS": 0xF003F, + + // RegCreateKeyEx / RegOpenKeyEx options (winnt.h). + "REG_OPTION_RESERVED": 0x0000, + "REG_OPTION_NON_VOLATILE": 0x0000, + "REG_OPTION_VOLATILE": 0x0001, + "REG_OPTION_CREATE_LINK": 0x0002, + "REG_OPTION_BACKUP_RESTORE": 0x0004, + "REG_OPTION_OPEN_LINK": 0x0008, + "REG_LEGAL_OPTION": 0x000F, + + // RegCreateKeyEx disposition (winnt.h). + "REG_CREATED_NEW_KEY": 0x00000001, + "REG_OPENED_EXISTING_KEY": 0x00000002, + + // RegRestoreKey / RegReplaceKey flags (winnt.h). + "REG_WHOLE_HIVE_VOLATILE": 0x00000001, + "REG_REFRESH_HIVE": 0x00000002, + "REG_NO_LAZY_FLUSH": 0x00000004, + + // RegNotifyChangeKeyValue filter (winnt.h). + "REG_NOTIFY_CHANGE_NAME": 0x00000001, + "REG_NOTIFY_CHANGE_ATTRIBUTES": 0x00000002, + "REG_NOTIFY_CHANGE_LAST_SET": 0x00000004, + "REG_NOTIFY_CHANGE_SECURITY": 0x00000008, + "REG_LEGAL_CHANGE_FILTER": 0x0000000F, + + // Registry value types (winnt.h). + "REG_NONE": 0, + "REG_SZ": 1, + "REG_EXPAND_SZ": 2, + "REG_BINARY": 3, + "REG_DWORD": 4, + "REG_DWORD_LITTLE_ENDIAN": 4, + "REG_DWORD_BIG_ENDIAN": 5, + "REG_LINK": 6, + "REG_MULTI_SZ": 7, + "REG_RESOURCE_LIST": 8, + "REG_FULL_RESOURCE_DESCRIPTOR": 9, + "REG_RESOURCE_REQUIREMENTS_LIST": 10, + "REG_QWORD": 11, + "REG_QWORD_LITTLE_ENDIAN": 11, + } + for name, value := range consts { + if err := d.SetItem(objects.NewStr(name), objects.NewInt(value)); err != nil { + return nil, err + } + } + + // winreg.error is an alias of OSError. CPython: PC/winreg.c:2218 + // (st->PyHKEY_Type aside, the module sets error = PyExc_OSError). + if err := d.SetItem(objects.NewStr("error"), errors.PyExc_OSError); err != nil { + return nil, err + } + + return m, nil +} diff --git a/module/zlib/module.go b/module/zlib/module.go index 201955f3b..91c708067 100644 --- a/module/zlib/module.go +++ b/module/zlib/module.go @@ -151,7 +151,14 @@ func compressObjFlush(args []objects.Object, _ map[string]objects.Object) (objec return nil, fmt.Errorf("ValueError: compressor has already been flushed") } - mode := zSyncFlush + // flush()'s mode argument defaults to Z_FINISH, not Z_SYNC_FLUSH: + // the common `compressobj().compress(x) + flush()` idiom must emit a + // complete stream (final deflate block) so a one-shot decompressor can + // read it back. zipfile relies on this when it stores compressed + // members, and zipimport then decompresses them with raw inflate. + // + // CPython: Modules/zlibmodule.c:478 zlib_Compress_flush_impl (mode=Z_FINISH) + mode := zFinish if len(args) >= 2 { m, err := intFromObj(args[1]) if err != nil { @@ -512,8 +519,11 @@ func zlibCRC32(args []objects.Object, kwargs map[string]objects.Object) (objects } result := crc32.Update(prev, crc32.IEEETable, data) - // CPython returns a signed 32-bit integer widened to Python int. - return objects.NewInt(int64(int32(result))), nil + // CPython returns the checksum as an unsigned 32-bit value widened to + // a Python int (PyLong_FromUnsignedLong(value & 0xffffffffU)). + // + // CPython: Modules/zlibmodule.c:1901 zlib_crc32_impl + return objects.NewInt(int64(uint64(result))), nil } // zlibAdler32 computes the Adler-32 checksum, optionally updating a previous value. @@ -545,7 +555,11 @@ func zlibAdler32(args []objects.Object, kwargs map[string]objects.Object) (objec } result := adler32Update(prev, data) - return objects.NewInt(int64(int32(result))), nil + // CPython returns the checksum as an unsigned 32-bit value widened to + // a Python int (PyLong_FromUnsignedLong(value & 0xffffffffU)). + // + // CPython: Modules/zlibmodule.c:1901 zlib_adler32_impl + return objects.NewInt(int64(uint64(result))), nil } // zlibCompressobj returns a streaming Compress object. @@ -752,6 +766,8 @@ func toBytes(o objects.Object) ([]byte, error) { switch v := o.(type) { case *objects.Bytes: return v.Bytes(), nil + case *objects.ByteArray: + return v.Bytes(), nil case *objects.Unicode: s, err := objects.Str(o) if err != nil { diff --git a/monitor/basecode.go b/monitor/basecode.go new file mode 100644 index 000000000..d8c669e17 --- /dev/null +++ b/monitor/basecode.go @@ -0,0 +1,70 @@ +// Recovering the underlying bytecode from a live code object. The +// interpreter rewrites bytecode in place for both specialization +// (quickening) and instrumentation (the INSTRUMENTED_ markers plus +// the INSTRUMENTED_LINE side table). marshal needs the original, +// un-instrumented, un-specialized bytes so a .pyc never captures the +// transient monitoring state of the process that wrote it. +// +// CPython: Python/instrumentation.c:637 _Py_GetBaseCodeUnit +// CPython: Objects/codeobject.c:2293 deopt_code + +package monitor + +import ( + "github.com/tamnd/gopy/compile" + "github.com/tamnd/gopy/objects" + "github.com/tamnd/gopy/specialize" +) + +// GetBaseCodeUnit returns the underlying opcode at codeunit i, stripping +// both specialization and instrumentation. The oparg byte is returned +// unchanged for the common case; instrumentation markers carry no oparg +// rewrite (ENTER_EXECUTOR, the only opcode that does, is JIT-only and +// never appears in gopy bytecode). +// +// CPython: Python/instrumentation.c:637 _Py_GetBaseCodeUnit +func GetBaseCodeUnit(code *objects.Code, i int) compile.Opcode { + opcode := compile.Opcode(code.Code[2*i]) + + // Below the instrumented range it is purely a specialized opcode: + // deopt to the adaptive parent and we are done. + if !IsInstrumented(opcode) { + return specialize.Deopt(opcode) + } + + data := CoMonitoring(code) + if opcode == compile.INSTRUMENTED_LINE && data != nil && data.Lines != nil { + opcode = compile.Opcode(getOriginalOpcode(data.Lines, i)) + } + if opcode == compile.INSTRUMENTED_INSTRUCTION && data != nil && i < len(data.PerInstructionOpcodes) { + opcode = compile.Opcode(data.PerInstructionOpcodes[i]) + } + if base := deinstrument[opcode]; base != 0 { + return base + } + return specialize.Deopt(opcode) +} + +// BaseCode returns a fresh slice holding code's bytecode with all +// specialization and instrumentation removed and every inline cache +// cell zeroed. This is the byte sequence marshal must write so .pyc +// output is independent of the warming/monitoring state of the +// process. On load specialize.Enable re-quickens the adaptive form. +// +// CPython: Objects/codeobject.c:2293 deopt_code (via _PyCode_GetCode) +func BaseCode(code *objects.Code) []byte { + out := make([]byte, len(code.Code)) + copy(out, code.Code) + size := len(out) / 2 + for i := 0; i < size; i++ { + base := GetBaseCodeUnit(code, i) + out[2*i] = byte(base) + caches := specialize.CacheCount(base) + for j := 1; j <= caches && i+j < size; j++ { + out[2*(i+j)] = 0 + out[2*(i+j)+1] = 0 + } + i += caches + } + return out +} diff --git a/monitor/install.go b/monitor/install.go index 74787f60d..ef89636c6 100644 --- a/monitor/install.go +++ b/monitor/install.go @@ -60,15 +60,33 @@ func forceInstrument(code *objects.Code, interp *InterpState) error { initializeLineTools(code, data, &active) } } + if active.Tools[EventInstruction] != 0 { + ensurePerInstruction(code, data) + if multipleTools(active.Tools[EventInstruction]) && data.PerInstructionTools == nil { + data.PerInstructionTools = make([]uint8, instructionCount(code)) + } + } for instr := 0; instr < len(data.Tools); { - op := compile.Opcode(byteAt(code.Code, instr)) - base := DeInstrument(specialize.Deopt(op)) + base := GetBaseCodeUnit(code, instr) if !OpcodeHasEvent(base) { instr += 1 + cacheCount(base, code) continue } ev := EventForOpcode(base) + // RESUME has no static event: it fires PY_START at function + // entry (arg 0) and PY_RESUME on a generator / coroutine + // re-entry (arg > 0), so the event depends on the oparg. + // + // CPython: Python/instrumentation.c:1824 force_instrument_lock_held + // (base_opcode == RESUME ? instr.op.arg > 0 ...) + if base == compile.RESUME { + if code.Code[2*instr+1] > 0 { + ev = EventPyResume + } else { + ev = EventPyStart + } + } if int(ev) < LocalEvents && ev != EventLine { if removed := removedEvents.Tools[ev]; removed != 0 { removeTools(code, data, instr, removed) @@ -80,6 +98,9 @@ func forceInstrument(code *objects.Code, interp *InterpState) error { instr += 1 + cacheCount(base, code) } + // GH-103845: line and instruction instrumentation must both be + // removed before either is added, otherwise the add pass clobbers + // freshly-installed markers. if removed := removedEvents.Tools[EventLine]; removed != 0 && data.Lines != nil { for instr := 0; instr < instructionCount(code); instr++ { if getOriginalOpcode(data.Lines, instr) != 0 { @@ -87,6 +108,17 @@ func forceInstrument(code *objects.Code, interp *InterpState) error { } } } + if removed := removedEvents.Tools[EventInstruction]; removed != 0 && data.PerInstructionOpcodes != nil { + for instr := firstTraceable(code); instr < instructionCount(code); { + base := GetBaseCodeUnit(code, instr) + if base == compile.RESUME || base == compile.END_FOR { + instr += 1 + cacheCount(base, code) + continue + } + removePerInstructionTools(code, data, instr, removed) + instr += 1 + cacheCount(base, code) + } + } if added := newEvents.Tools[EventLine]; added != 0 && data.Lines != nil { for instr := 0; instr < instructionCount(code); instr++ { if getOriginalOpcode(data.Lines, instr) != 0 { @@ -94,6 +126,17 @@ func forceInstrument(code *objects.Code, interp *InterpState) error { } } } + if added := newEvents.Tools[EventInstruction]; added != 0 && data.PerInstructionOpcodes != nil { + for instr := firstTraceable(code); instr < instructionCount(code); { + base := GetBaseCodeUnit(code, instr) + if base == compile.RESUME || base == compile.END_FOR { + instr += 1 + cacheCount(base, code) + continue + } + addPerInstructionTools(code, data, instr, added) + instr += 1 + cacheCount(base, code) + } + } code.MonitoringVersion = interp.GlobalVersion() return nil @@ -164,19 +207,7 @@ func addTools(code *objects.Code, data *CoMonitoringData, offset int, tools uint data.Tools = make([]uint8, instructionCount(code)) } data.Tools[offset] |= tools - op := compile.Opcode(byteAt(code.Code, offset)) - if IsInstrumented(op) { - return - } - base := specialize.Deopt(op) - instrumented := InstrumentedFor(base) - if instrumented == 0 { - return - } - code.Code[2*offset] = byte(instrumented) - if specialize.CacheCount(base) > 0 && code.Quickened { - specialize.StoreCounter(code.Code, offset, specialize.AdaptiveCounterWarmup()) - } + instrument(code, data, offset) } // removeTools clears the bits in tools at (offset, event). When the @@ -192,10 +223,118 @@ func removeTools(code *objects.Code, data *CoMonitoringData, offset int, tools u if data.Tools[offset] != 0 { return } - op := compile.Opcode(byteAt(code.Code, offset)) - if !IsInstrumented(op) { + deInstrument(code, data, offset) +} + +// opcodeRefKind selects which backing store currently holds the real +// opcode for a codeunit. CPython threads this as a bare uint8* opcode_ptr +// that is redirected into the line table or the per-instruction table as +// each instrumentation layer is unwrapped; Go cannot alias the three +// stores behind one pointer, so the backing store is carried explicitly +// and read/written through get/set. +// +// CPython: Python/instrumentation.c:757 instrument (opcode_ptr) +type opcodeRefKind int + +const ( + refLive opcodeRefKind = iota // code.Code[2*i] + refLine // original opcode in the line table + refPerInstr // per-instruction opcode side table +) + +// opcodeRef is the resolved location of the runnable opcode for codeunit +// i, mirroring CPython's opcode_ptr after it has been walked through the +// INSTRUMENTED_LINE and INSTRUMENTED_INSTRUCTION side tables. +type opcodeRef struct { + code *objects.Code + data *CoMonitoringData + i int + kind opcodeRefKind +} + +func (r opcodeRef) get() compile.Opcode { + switch r.kind { + case refLine: + return compile.Opcode(getOriginalOpcode(r.data.Lines, r.i)) + case refPerInstr: + return compile.Opcode(r.data.PerInstructionOpcodes[r.i]) + default: + return compile.Opcode(byteAt(r.code.Code, r.i)) + } +} + +func (r opcodeRef) set(op compile.Opcode) { + switch r.kind { + case refLine: + setOriginalOpcode(r.data.Lines, r.i, byte(op)) + case refPerInstr: + r.data.PerInstructionOpcodes[r.i] = byte(op) + default: + r.code.Code[2*r.i] = byte(op) + } +} + +// resolveOpcodeRef walks the live byte through the INSTRUMENTED_LINE and +// INSTRUMENTED_INSTRUCTION side tables, returning the location that holds +// the opcode an instrument / de_instrument should rewrite. This is the +// shared opcode_ptr-walking prologue of CPython's instrument(), +// de_instrument(), and de_instrument_per_instruction(). +// +// CPython: Python/instrumentation.c:757 instrument +func resolveOpcodeRef(code *objects.Code, data *CoMonitoringData, i int) (opcodeRef, compile.Opcode) { + ref := opcodeRef{code: code, data: data, i: i, kind: refLive} + op := ref.get() + if op == compile.INSTRUMENTED_LINE { + ref.kind = refLine + op = ref.get() + } + if op == compile.INSTRUMENTED_INSTRUCTION { + ref.kind = refPerInstr + op = ref.get() + } + return ref, op +} + +// instrument stamps the matching INSTRUMENTED_ opcode for codeunit i, +// writing through the resolved opcode_ptr so a site already hidden behind +// INSTRUMENTED_LINE or INSTRUMENTED_INSTRUCTION updates the hidden opcode +// in its side table rather than clobbering the visible marker. A slot +// that is already an instrumented variant is left untouched. +// +// CPython: Python/instrumentation.c:757 instrument +func instrument(code *objects.Code, data *CoMonitoringData, i int) { + ref, op := resolveOpcodeRef(code, data, i) + if IsInstrumented(op) { + return + } + deopt := specialize.Deopt(op) + instrumented := InstrumentedFor(deopt) + if instrumented == 0 { return } - base := DeInstrument(op) - code.Code[2*offset] = byte(base) + ref.set(instrumented) + if specialize.CacheCount(deopt) > 0 && code.Quickened { + specialize.StoreCounter(code.Code, i, specialize.AdaptiveCounterWarmup()) + } +} + +// deInstrument restores the original opcode for codeunit i, writing +// through the resolved opcode_ptr so a site hidden behind a line or +// per-instruction marker has its side-table opcode de-instrumented in +// place. A non-instrumented opcode is left alone. +// +// CPython: Python/instrumentation.c:676 de_instrument +func deInstrument(code *objects.Code, data *CoMonitoringData, i int) { + ref, op := resolveOpcodeRef(code, data, i) + if int(op) >= len(deinstrument) { + return + } + deinstrumented := deinstrument[op] + if deinstrumented == 0 { + return + } + ref.set(deinstrumented) + if specialize.CacheCount(deinstrumented) > 0 && code.Quickened { + specialize.StoreCounter(code.Code, i, specialize.AdaptiveCounterWarmup()) + } } diff --git a/monitor/instruction.go b/monitor/instruction.go new file mode 100644 index 000000000..f800f8b10 --- /dev/null +++ b/monitor/instruction.go @@ -0,0 +1,171 @@ +// Per-instruction (opcode) event instrumentation. The INSTRUCTION +// event fires once per executed instruction; bdb / pdb drives it +// through frame.f_trace_opcodes so a debugger can single-step at the +// bytecode level. Like LINE, it gets its own side table: the original +// opcode hidden behind INSTRUMENTED_INSTRUCTION lives in +// PerInstructionOpcodes so the dispatcher can recover and run it after +// firing the event. +// +// CPython: Python/instrumentation.c:799 instrument_per_instruction +// CPython: Python/instrumentation.c:1401 _Py_call_instrumentation_instruction + +package monitor + +import ( + "github.com/tamnd/gopy/compile" + "github.com/tamnd/gopy/objects" + "github.com/tamnd/gopy/specialize" +) + +// firstTraceable returns the codeunit index of the first RESUME, the +// boundary CPython stores as code->_co_firsttraceable. The prologue +// before it (MAKE_CELL / COPY_FREE_VARS / RETURN_GENERATOR) runs during +// frame setup and is never traced, so the per-instruction passes skip +// it. +// +// CPython: Objects/codeobject.c:581 entry_point loop +func firstTraceable(code *objects.Code) int { + n := instructionCount(code) + for i := 0; i < n; i++ { + // The RESUME byte may already be hidden behind an instrumentation + // marker (INSTRUMENTED_LINE / INSTRUMENTED_INSTRUCTION / a baked + // INSTRUMENTED_RESUME) from an earlier add pass. Resolve through + // GetBaseCodeUnit so the boundary is stable across add/remove + // cycles; a raw-byte scan would skip the masked RESUME and shift + // the boundary, orphaning prologue markers on the way back out. + // + // CPython stores this once as code->_co_firsttraceable, computed at + // code creation, so it never drifts under instrumentation. + if GetBaseCodeUnit(code, i) == compile.RESUME { + return i + } + } + return 0 +} + +// ensurePerInstruction lazily allocates the per-instruction opcode side +// table. Every codeunit is seeded with its de-instrumented opcode so a +// concurrent local-event change still knows the original to restore. +// +// CPython: Python/instrumentation.c:1747 update_instrumentation_data +func ensurePerInstruction(code *objects.Code, data *CoMonitoringData) { + if data.PerInstructionOpcodes != nil { + return + } + codeLen := instructionCount(code) + data.PerInstructionOpcodes = make([]uint8, codeLen) + for i := 0; i < codeLen; i++ { + op := compile.Opcode(byteAt(code.Code, i)) + data.PerInstructionOpcodes[i] = byte(specialize.Deopt(op)) + } +} + +// instrumentPerInstruction rewrites the opcode at codeunit i to +// INSTRUMENTED_INSTRUCTION, stashing the underlying opcode (resolving +// through INSTRUMENTED_LINE first) in PerInstructionOpcodes. +// +// CPython: Python/instrumentation.c:799 instrument_per_instruction +func instrumentPerInstruction(code *objects.Code, data *CoMonitoringData, i int) { + op := compile.Opcode(byteAt(code.Code, i)) + if op == compile.INSTRUMENTED_LINE { + op = compile.Opcode(getOriginalOpcode(data.Lines, i)) + } + if op == compile.INSTRUMENTED_INSTRUCTION { + return + } + if IsInstrumented(op) { + data.PerInstructionOpcodes[i] = byte(op) + } else { + data.PerInstructionOpcodes[i] = byte(specialize.Deopt(op)) + } + // When the line marker hides this codeunit the real opcode lives in + // the line table; otherwise it lives in the bytecode. Either way the + // visible byte that dispatch fetches becomes INSTRUMENTED_INSTRUCTION. + if compile.Opcode(byteAt(code.Code, i)) == compile.INSTRUMENTED_LINE { + setOriginalOpcode(data.Lines, i, byte(compile.INSTRUMENTED_INSTRUCTION)) + } else { + code.Code[2*i] = byte(compile.INSTRUMENTED_INSTRUCTION) + } +} + +// deInstrumentPerInstruction restores the opcode hidden behind +// INSTRUMENTED_INSTRUCTION at codeunit i. +// +// CPython: Python/instrumentation.c:731 de_instrument_per_instruction +func deInstrumentPerInstruction(code *objects.Code, data *CoMonitoringData, i int) { + hidesLine := false + op := compile.Opcode(byteAt(code.Code, i)) + if op == compile.INSTRUMENTED_LINE { + hidesLine = true + op = compile.Opcode(getOriginalOpcode(data.Lines, i)) + } + if op != compile.INSTRUMENTED_INSTRUCTION { + return + } + original := data.PerInstructionOpcodes[i] + if hidesLine { + setOriginalOpcode(data.Lines, i, original) + } else { + code.Code[2*i] = original + } +} + +// addPerInstructionTools sets the per-instruction tool bits at offset +// and stamps the bytecode with INSTRUMENTED_INSTRUCTION. +// +// CPython: Python/instrumentation.c:928 add_per_instruction_tools +func addPerInstructionTools(code *objects.Code, data *CoMonitoringData, offset int, tools uint8) { + if data.PerInstructionTools != nil { + data.PerInstructionTools[offset] |= tools + } + instrumentPerInstruction(code, data, offset) +} + +// removePerInstructionTools clears the per-instruction tool bits at +// offset and restores the original opcode when none remain. +// +// CPython: Python/instrumentation.c:946 remove_per_instruction_tools +func removePerInstructionTools(code *objects.Code, data *CoMonitoringData, offset int, tools uint8) { + shouldDeinstrument := false + if data.PerInstructionTools != nil { + data.PerInstructionTools[offset] &^= tools + shouldDeinstrument = data.PerInstructionTools[offset] == 0 + } else { + single := data.ActiveMonitors.Tools[EventInstruction] + shouldDeinstrument = (single & tools) == single + } + if shouldDeinstrument { + deInstrumentPerInstruction(code, data, offset) + } +} + +// CallInstrumentationInstruction fires INSTRUCTION for (code, instr) +// and returns the opcode the dispatcher should run in place of the +// INSTRUMENTED_INSTRUCTION marker. The re-entrance guard lives in the +// registered sys_trace_instruction handler, matching how the LINE path +// relies on its callback's tstate->tracing check. +// +// CPython: Python/instrumentation.c:1401 _Py_call_instrumentation_instruction +func CallInstrumentationInstruction(interp *InterpState, code *objects.Code, instr int) (compile.Opcode, error) { + data := CoMonitoring(code) + if data == nil || instr >= len(data.PerInstructionOpcodes) { + return compile.NOP, nil + } + next := compile.Opcode(data.PerInstructionOpcodes[instr]) + tools := uint8(0) + if data.PerInstructionTools != nil { + tools = data.PerInstructionTools[instr] + } else { + tools = data.ActiveMonitors.Tools[EventInstruction] + } + if tools != 0 && interp != nil { + state := MonState{Active: tools} + if err := FireInstruction(interp, &state, code, int32(instr)); err != nil { + return next, err + } + if data.PerInstructionTools != nil { + data.PerInstructionTools[instr] = state.Active + } + } + return next, nil +} diff --git a/monitor/interp.go b/monitor/interp.go index 6eee849d5..01e96b657 100644 --- a/monitor/interp.go +++ b/monitor/interp.go @@ -126,6 +126,20 @@ func CheckToolID(tool Tool) error { return nil } +// checkTool reports an error when tool is a freely-assignable slot +// (0..SYS_PROFILE-1) that no use_tool_id call has claimed. The two +// reserved slots SYS_PROFILE (6) and SYS_TRACE (7) bypass the check: +// sys.setprofile / sys.settrace own them implicitly, so the event +// setters never demand an explicit use_tool_id for them. +// +// CPython: Python/instrumentation.c:1987 check_tool +func (s *InterpState) checkTool(tool Tool) error { + if tool < ToolSysProfile && s.ToolNames[tool] == nil { + return fmt.Errorf("tool %d is not in use", tool) + } + return nil +} + // UseToolID claims tool for name. Errors if the slot is already in // use. Returns nil and stores the name on success. // diff --git a/monitor/line.go b/monitor/line.go index 57994462d..df85b88df 100644 --- a/monitor/line.go +++ b/monitor/line.go @@ -136,8 +136,7 @@ func initializeLines(code *objects.Code, line *LineInstrumentationData) { codeLen := instructionCount(code) currentLine := -1 for i := 0; i < codeLen; { - op := compile.Opcode(byteAt(code.Code, i)) - base := DeInstrument(specialize.Deopt(op)) + base := GetBaseCodeUnit(code, i) line2 := LineForOffset(code, i) setLineDelta(line, i, computeLineDelta(code, line2)) length := 1 + cacheCount(base, code) diff --git a/monitor/local.go b/monitor/local.go index 1f1108e41..68bb1e066 100644 --- a/monitor/local.go +++ b/monitor/local.go @@ -50,8 +50,8 @@ func (s *InterpState) SetLocalEvents(code *objects.Code, tool Tool, events Event if events>>LocalEvents != 0 { return fmt.Errorf("event set %#x has bits outside the %d local events", events, LocalEvents) } - if !s.IsToolInUse(tool) { - return fmt.Errorf("tool %d is not in use", tool) + if err := s.checkTool(tool); err != nil { + return err } data := EnsureCoMonitoringData(code) data.ToolVersions[tool] = s.ToolVersions[tool] @@ -72,8 +72,8 @@ func (s *InterpState) GetLocalEvents(code *objects.Code, tool Tool) (EventSet, e if err := CheckToolID(tool); err != nil { return 0, err } - if !s.IsToolInUse(tool) { - return 0, fmt.Errorf("tool %d is not in use", tool) + if err := s.checkTool(tool); err != nil { + return 0, err } data := CoMonitoring(code) if data == nil { diff --git a/objects/classmethod_descr.go b/objects/classmethod_descr.go index 8e007c788..af4b9eb37 100644 --- a/objects/classmethod_descr.go +++ b/objects/classmethod_descr.go @@ -105,11 +105,12 @@ func classMethodDescrGet2(descr Object, obj Object, ownerType *Type) (Object, er } Incref(t) bf := &BuiltinFunction{ - Name: d.def.Name, - Conv: MethVarargs | MethKeywords, - Self: t, - ownsSelf: true, - Doc: d.def.Doc, + Name: d.def.Name, + Conv: MethVarargs | MethKeywords, + Self: t, + ownsSelf: true, + methOrigin: d, + Doc: d.def.Doc, Fn: func(args []Object, kwargs map[string]Object) (Object, error) { return cfunctionCall(cf, args, kwargs) }, diff --git a/objects/copyreg_hook.go b/objects/copyreg_hook.go index 3cd45eb0d..8f138b0ab 100644 --- a/objects/copyreg_hook.go +++ b/objects/copyreg_hook.go @@ -38,3 +38,12 @@ var CurrentBuiltinsHook func() Object // // CPython: Python/import.c:1450 PyImport_ImportModule var ImportModuleHook func(name string) (Object, error) + +// ModuleReprHook formats a module's repr by delegating to the vendored +// importlib._bootstrap._module_repr, exactly as CPython's C +// module_repr forwards to _PyImport_ImportlibModuleRepr. Wired by +// vm.init(); nil during early bootstrap, where moduleRepr falls back to +// a minimal Go rendering. +// +// CPython: Python/import.c:3346 _PyImport_ImportlibModuleRepr +var ModuleReprHook func(m Object) (string, error) diff --git a/objects/descr.go b/objects/descr.go index f06f8e3f9..4812d0255 100644 --- a/objects/descr.go +++ b/objects/descr.go @@ -261,6 +261,50 @@ var typeDescrTable = map[*Type]map[string]Object{} // order (e.g. enum members, dataclass fields). var typeDescrOrder = map[*Type][]string{} +// typeTraverse is tp_traverse for the metatype. A class holds its methods +// and class attributes in typeDescrTable (plus the live ClassAttrDict / +// nonStrDict mirrors) and its lineage in the __bases__ tuple; CPython's +// type_traverse visits tp_dict / tp_bases / tp_mro so the cycle collector +// can subtract a type's outgoing references. Only heap types are +// gc-tracked (static types are immortal and never appear as candidates), +// so this runs for user classes and lets subtract_refs see, for example, +// a class -> method __globals__ -> module cycle and collapse it. The set +// visited here mirrors exactly the references a type owns; reachable ones +// get re-floated by move_unreachable's visit_reachable through the rooted +// module that defines the class. +// +// CPython: Objects/typeobject.c:1227 type_traverse +func typeTraverse(o Object, visit Visitor) error { + t, ok := o.(*Type) + if !ok { + return nil + } + for _, v := range typeDescrTable[t] { + if v == nil { + continue + } + if err := visit(v); err != nil { + return err + } + } + if t.ClassAttrDict != nil { + if err := visit(t.ClassAttrDict); err != nil { + return err + } + } + if t.nonStrDict != nil { + if err := visit(t.nonStrDict); err != nil { + return err + } + } + if t.BasesObj != nil { + if err := visit(t.BasesObj); err != nil { + return err + } + } + return nil +} + // TypeDescrNames returns the names registered on t through // SetTypeDescr, walking the MRO and de-duplicating. Used by builtins // dir() to introspect a class. diff --git a/objects/dict.go b/objects/dict.go index e32bb6256..fa1f8c585 100644 --- a/objects/dict.go +++ b/objects/dict.go @@ -1068,6 +1068,23 @@ func ReleaseDeadDictContents(d *Dict) { } } +// ClearOwnedContents unconditionally drops every value reference the +// dict owns and resets it to empty. Unlike ReleaseDeadDictContents it +// does not gate on the dict's own refcount: the caller asserts it is +// the sole owner of d (an instance __dict__ that was never handed to +// Python carries no second reference, so its loose refcount is not a +// reliable sole-owner signal). instanceDealloc uses it to release the +// references an instance's attribute values hold when the instance is +// reclaimed, mirroring the Py_CLEAR(*dictptr) -> dict_dealloc -> +// PyDict_Clear chain that subtype_dealloc runs. +// +// CPython: Objects/typeobject.c:2782 subtype_dealloc (clear_dict branch) +func ClearOwnedContents(d *Dict) { + d.lock() + d.clearContents() + d.unlock() +} + // dictPopMethod backs dict.pop(key[, default]). // // CPython: Objects/dictobject.c:3821 dict_pop_impl diff --git a/objects/dict_mutate.go b/objects/dict_mutate.go index 31d8d9cbf..2ae7bf5c7 100644 --- a/objects/dict_mutate.go +++ b/objects/dict_mutate.go @@ -303,6 +303,18 @@ func dictResize(d *Dict, minNew int) error { return err } } + // A resize rebuilds the table, so a key's slot index changes even + // though no key was added or removed. CPython hands the resized dict + // a freshly allocated PyDictKeysObject whose dk_version starts at 0, + // which invalidates every inline cache stamped against the old keys. + // gopy reuses the same *Dict, so reset the version here; otherwise a + // value-replacement that triggers a resize (the load check runs + // before the replace-vs-insert branch in dictInsert) would leave the + // stale dk_version in place and LOAD_ATTR_INSTANCE_VALUE would read + // the wrong slot. + // + // CPython: Objects/dictobject.c:2065 dictresize (new_keys->dk_version = 0) + d.invalidateKeysVersion() return nil } diff --git a/objects/file.go b/objects/file.go index 0ddf1c93d..b2a35cea9 100644 --- a/objects/file.go +++ b/objects/file.go @@ -22,8 +22,29 @@ import ( "fmt" "io" "os" + "runtime" + "unsafe" ) +// ClearOSFileFinalizer disarms the close finalizer that os.NewFile / +// os.OpenFile arm on a borrowed descriptor. The finalizer is set on the +// unexported inner *os.file, not on the returned *os.File, so +// runtime.SetFinalizer(f, nil) on the outer handle is a no-op and leaves the +// close live: a later GC then closes a descriptor whose integer was already +// freed and reused by another open file, surfacing as a spurious EBADF +// ("bad file descriptor") on the unrelated file's next write. os.File is +// struct{ file *file } with the inner pointer at offset 0, so read that +// pointer and clear the finalizer on the object it actually points at. +func ClearOSFileFinalizer(f *os.File) { + if f == nil { + return + } + inner := *(*unsafe.Pointer)(unsafe.Pointer(f)) + if inner != nil { + runtime.SetFinalizer((*byte)(inner), nil) + } +} + // File mirrors the union of FileIO + the buffer + TextIOWrapper. The // read/write side is decided at open time and does not change; mixing // '+' modes wires both rd and wr at construction. wr is io.Writer @@ -51,6 +72,16 @@ type File struct { f *os.File rd *bufio.Reader wr io.Writer + + // noCloseFd marks a borrowed descriptor (the standard streams wrap + // os.Stdout/os.Stderr or an inherited pipe). fileno() still reports + // f's fd, but Close() must not close it: tearing down the sys.stdout + // wrapper, or letting a transient wrapper be collected, must leave the + // process's real fd 1/2 open. Mirrors CPython opening the std streams + // with closefd=False. + // + // CPython: Modules/_io/fileio.c:399 _io_FileIO___init___impl (closefd) + noCloseFd bool } // FileType is the type singleton for File. CPython exposes three or @@ -115,6 +146,21 @@ func NewWriterFile(w io.Writer, name, mode string) *File { errors: "strict", wr: w, } + // A caller-supplied writer that is really an *os.File (the normal CLI + // case, where sys.stdout/stderr wrap os.Stdout/os.Stderr, and the + // subprocess case, where they wrap an inherited pipe fd) keeps a live + // descriptor. Record it so fileno() returns the real fd, matching + // CPython's fd-backed standard streams. Writes still pass straight + // through w (no bufio layer) so output ordering is unchanged; only a + // non-fd writer such as a test bytes.Buffer leaves f nil and makes + // fileno() raise io.UnsupportedOperation, as CPython does for a + // stream with no underlying descriptor. + // + // CPython: Python/sysmodule.c:3795 sys_init_streams (fd-backed FileIO) + if osf, ok := w.(*os.File); ok { + fi.f = osf + fi.noCloseFd = true + } fi.init(FileType) return fi } @@ -277,7 +323,7 @@ func (fi *File) Close() error { firstErr = ioErr(err) } } - if fi.f != nil { + if fi.f != nil && !fi.noCloseFd { if err := fi.f.Close(); err != nil && firstErr == nil { firstErr = ioErr(err) } @@ -397,7 +443,14 @@ func fileGetattr(o Object, name Object) (Object, error) { if fn := fileMethod(fi, n.v); fn != nil { return fn, nil } - return nil, fmt.Errorf("AttributeError: '%s' object has no attribute '%s'", FileType.Name, n.v) + // Anything the custom table does not handle (dunders such as + // __class__, __hash__, __eq__, the rich-compare set) resolves through + // the type the way PyObject_GenericGetAttr walks the MRO. Without this + // fallback, isinstance()/abc.__instancecheck__ probes against a file + // object raise spuriously because __class__ is missing. + // + // CPython: Objects/object.c:1430 _PyObject_GenericGetAttrWithDict + return GenericGetAttr(o, name) } // fileSetattr supports a single mutable attribute today: the mode diff --git a/objects/filter.go b/objects/filter.go index c263d7be8..dba61d8ab 100644 --- a/objects/filter.go +++ b/objects/filter.go @@ -28,6 +28,8 @@ var FilterType = NewType("filter", []*Type{objectType}) func init() { FilterType.Iter = SelfIter FilterType.IterNext = filterNext + FilterType.Dealloc = filterDealloc + FilterType.TpTraverse = filterTraverse AddIterSlotWrappers(FilterType) SetTypeDescr(FilterType, "__reduce__", NewMethodDescrConv(FilterType, "__reduce__", MethNoArgs, filterReduce)) } @@ -42,11 +44,56 @@ func NewFilter(fn, iterable Object) (*Filter, error) { if err != nil { return nil, err } + // The filter owns a counted reference to its predicate. it already + // carries the owned reference Iter() returned. Without owning fn, a + // bound-method predicate handed straight into filter() drops to + // refcount zero when the call returns, its dealloc decrefs imSelf, and + // a mid-iteration collection reaps an instance the loop still walks. + // + // CPython: Python/bltinmodule.c:501 filter_new (Py_INCREF(func)) + Incref(fn) f := &Filter{Func: fn, It: it} f.init(FilterType) return f, nil } +// filterDealloc releases the owned predicate and source-iterator +// references. Mirrors filter_dealloc's Py_XDECREF pair. +// +// CPython: Python/bltinmodule.c:540 filter_dealloc +func filterDealloc(o Object) { + f, ok := o.(*Filter) + if !ok { + return + } + if f.Func != nil { + Decref(f.Func) + } + if f.It != nil { + Decref(f.It) + } +} + +// filterTraverse lets the cyclic collector trace through the predicate +// and the source iterator. Mirrors filter_traverse. +// +// CPython: Python/bltinmodule.c:552 filter_traverse +func filterTraverse(o Object, visit Visitor) error { + f, ok := o.(*Filter) + if !ok { + return nil + } + if f.Func != nil { + if err := visit(f.Func); err != nil { + return err + } + } + if f.It != nil { + return visit(f.It) + } + return nil +} + // filterNext advances the iterator until the predicate accepts a // value or the source iterator is exhausted. // diff --git a/objects/frame.go b/objects/frame.go index f3471a90f..3d5a0229c 100644 --- a/objects/frame.go +++ b/objects/frame.go @@ -212,6 +212,12 @@ var frameType = NewType("frame", []*Type{objectType}) func init() { frameType.Repr = frameRepr + // Frame objects define no tp_richcompare, so they keep object's + // identity-based hash and can be used as dict keys (pdb keys its + // display dict on the current frame). + // + // CPython: Objects/frameobject.c:1238 PyFrame_Type (tp_hash = 0 inherits object's) + frameType.Hash = identityHash frameType.TpTraverse = frameTraverse frameType.Getattro = frameGetAttr frameType.Setattro = frameSetAttr @@ -455,7 +461,16 @@ func frameSetAttr(o Object, name Object, v Object) error { f.SetTraceLines(v == True()) return nil case "f_trace_opcodes": - f.SetTraceOpcodes(v == True()) + // CPython: Objects/frameobject.c:1140 frame_trace_opcodes_set_impl. + // The wrapper bool records the request; the hook mirrors it onto + // the activation record and, when a trace function is installed + // (enable) or unconditionally (disable), arms / tears down the + // opcode instrumentation on the live frame so events fire on the + // current instruction rather than only after the next RESUME. + f.traceOpcodes = v == True() + if h := SetOpcodeTraceHook; h != nil { + return h(f, f.traceOpcodes) + } return nil } return fmt.Errorf("AttributeError: 'frame' object attribute %q is read-only", n.v) diff --git a/objects/function.go b/objects/function.go index 1ee2e71c3..f00c3a3ca 100644 --- a/objects/function.go +++ b/objects/function.go @@ -738,6 +738,16 @@ func funcGetAttr(o Object, name Object) (Object, error) { if fn.Dict != nil { v, err := fn.Dict.GetItem(name) if err == nil && v != nil { + // func_getattro reads the attribute out of the function's + // __dict__ through PyDict_GetItemWithError and then Py_XINCREFs + // it before returning, so the caller owns the reference. Without + // the Incref the caller's arg-drop decrefs a value still held by + // __dict__ toward zero; a stored list then gets emptied by + // list_dealloc, so a second read of the same attribute sees an + // empty list (this is what drained mock's patched.patchings). + // + // CPython: Objects/funcobject.c:705 func_getattro (Py_XINCREF) + Incref(v) return v, nil } } @@ -885,6 +895,20 @@ func newFunction(name string, code *Code, globals Object, qualname string) (*Fun f.Builtins = CurrentBuiltinsHook() } f.init(FunctionType) + // A function holds its globals (and closure, defaults, annotations), + // so it can sit on a reference cycle and, more importantly, it is the + // only path from a class method back to the module that defined it + // (func.__globals__). CPython gives PyFunction_Type Py_TPFLAGS_HAVE_GC + // and func_traverse for exactly this; without tracking, a module + // reachable only through a live class's method globals (a source + // re-import of importlib._bootstrap held by a test class, say) + // collapses under the cycle collector and its singletons are reclaimed + // while still live. + // + // CPython: Objects/funcobject.c:582 func_traverse (PyFunction_Type tp_traverse) + if h := GCTrackHook; h != nil { + h(f) + } return f, nil } diff --git a/objects/function_builtin.go b/objects/function_builtin.go index 0b0ce57f2..2e411e7bc 100644 --- a/objects/function_builtin.go +++ b/objects/function_builtin.go @@ -73,6 +73,16 @@ type BuiltinFunction struct { // CPython: Objects/descrobject.c:230 method_get (PyCMethod_New) boundDescr *MethodDescr + // methOrigin is the stable descriptor a builtin method was minted from + // when no *MethodDescr drives its call path: classmethod_get binds a + // classmethod_descriptor (PyCMethod_New) into a builtin_function_or_method + // whose m_ml is the descriptor's PyMethodDef, shared across every binding. + // methFuncIdentical / builtinFunctionHash use it as the m_ml proxy so + // int.from_bytes == int.from_bytes even though the bindings are distinct. + // + // CPython: Objects/descrobject.c:95 classmethod_get (a->m_ml == b->m_ml) + methOrigin Object + // kwParams, when non-nil, names every keyword the Argument Clinic // signature accepts. builtinFunctionVectorcall runs the AC // extraneous-keyword scan over the original kwnames objects before @@ -338,6 +348,9 @@ func methFuncIdentical(a, b *BuiltinFunction) bool { if a.boundDescr != nil || b.boundDescr != nil { return a.boundDescr == b.boundDescr } + if a.methOrigin != nil || b.methOrigin != nil { + return a.methOrigin == b.methOrigin + } return a == b } @@ -382,9 +395,12 @@ func builtinFunctionHash(o Object) (int64, error) { } var y int64 var err error - if bf.boundDescr != nil { + switch { + case bf.boundDescr != nil: y, err = identityHash(bf.boundDescr) - } else { + case bf.methOrigin != nil: + y, err = identityHash(bf.methOrigin) + default: y, err = identityHash(bf) } if err != nil { diff --git a/objects/instance.go b/objects/instance.go index 23866c26b..0433d078a 100644 --- a/objects/instance.go +++ b/objects/instance.go @@ -86,6 +86,16 @@ var WriteUnraisableHook func(obj Object, errMsg string, err error) // CPython: Python/_warnings.c:1573 _PyErr_WarnUnawaitedCoroutine var WarnUnawaitedCoroutineHook func(coro Object) +// SetOpcodeTraceHook toggles per-instruction (opcode) tracing on a +// frame's code object so the legacy sys.settrace bridge fires +// PyTrace_OPCODE events for that frame. bdb / pdb sets +// frame.f_trace_opcodes mid-execution, so the hook must also +// re-instrument the live call chain immediately. objects/ stays +// independent of vm via this indirection. +// +// CPython: Python/ceval.c:_PyEval_SetOpcodeTrace +var SetOpcodeTraceHook func(f *Frame, enable bool) error + // WarnUnawaitedAgenMethodHook routes a never-awaited async-generator // asend/athrow/aclose awaitable through warnings so the consumer sees a // RuntimeWarning of the form "coroutine method 'asend' of '' @@ -306,6 +316,26 @@ func instanceDealloc(o Object) { } } +// instanceClear is the tp_clear slot for pure user-class instances. The +// cycle collector calls it from delete_garbage once the instance is +// proven unreachable, releasing the references its __dict__ holds so a +// cycle that runs through instance attributes is broken and the held +// values become collectible. A dict handed to Python (dictExposed) may +// be aliased by a live mapping object, so it is left to its own owner; +// only the sole-owner case is cleared, matching the dealloc path's old +// guard but firing from the collector instead of from refcount zero. +// +// CPython: Objects/typeobject.c:1411 subtype_clear +func instanceClear(o Object) { + inst, ok := o.(*Instance) + if !ok { + return + } + if inst.dict != nil && !inst.dictExposed { + ClearOwnedContents(inst.dict) + } +} + // instanceTraverse visits every Object reachable from a user-class // instance: each non-nil slot value plus the per-instance __dict__. // The cycle collector calls this through Type.TpTraverse to detect @@ -335,6 +365,21 @@ func instanceTraverse(o Object, visit Visitor) error { return err } } + // For a heap type, the instance holds a counted reference to its + // type (NewInstance increfs it, instanceDealloc releases it), so + // subtype_traverse visits the type. Without this edge the collector + // cannot see the instance->type link, and a class that is only kept + // alive by its own instances (a cycle through a method's __globals__, + // say) never collapses to unreachable. Static types are immortal and + // not gc-tracked, so visiting them is a harmless no-op; restrict to + // heap types to mirror CPython exactly. + // + // CPython: Objects/typeobject.c:1356 subtype_traverse (Py_VISIT(type)) + if t := i.Type(); t != nil && t.IsUser { + if err := visit(t); err != nil { + return err + } + } return nil } diff --git a/objects/list.go b/objects/list.go index cc937f4cf..7d8a82888 100644 --- a/objects/list.go +++ b/objects/list.go @@ -310,7 +310,8 @@ func listGetItem(o Object, i int) (Object, error) { i += len(l.items) } if i < 0 || i >= len(l.items) { - return nil, errIndexOutOfRange + // CPython: Objects/listobject.c:469 list_item ("list index out of range") + return nil, errors.New("IndexError: list index out of range") } return l.items[i], nil } @@ -498,7 +499,8 @@ func listSetItem(o Object, i int, v Object) error { i += len(l.items) } if i < 0 || i >= len(l.items) { - return errIndexOutOfRange + // CPython: Objects/listobject.c:3041 list_ass_item ("list assignment index out of range") + return errors.New("IndexError: list assignment index out of range") } // Route through SetItem so the displaced item is decreffed and the new // value increfed (list_ass_item's Py_SETREF), keeping the per-item @@ -564,7 +566,8 @@ func listDelIndex(l *List, i int) error { i += len(l.items) } if i < 0 || i >= len(l.items) { - return errIndexOutOfRange + // CPython: Objects/listobject.c:3041 list_ass_item ("list assignment index out of range") + return errors.New("IndexError: list assignment index out of range") } // Release the list's reference on the removed item. // diff --git a/objects/map.go b/objects/map.go index c034951a8..9152da8e4 100644 --- a/objects/map.go +++ b/objects/map.go @@ -33,6 +33,8 @@ var MapType = NewType("map", []*Type{objectType}) func init() { MapType.Iter = SelfIter MapType.IterNext = mapNext + MapType.Dealloc = mapDealloc + MapType.TpTraverse = mapTraverse AddIterSlotWrappers(MapType) SetTypeDescr(MapType, "__reduce__", NewMethodDescrConv(MapType, "__reduce__", MethNoArgs, mapReduce)) SetTypeDescr(MapType, "__setstate__", NewMethodDescrConv(MapType, "__setstate__", MethO, mapSetstate)) @@ -54,11 +56,69 @@ func NewMap(fn Object, iterables []Object, strict bool) (*Map, error) { } iters[i] = it } + // The map owns a counted reference to its function so a bound method + // or lambda handed straight into map() survives until the map is + // exhausted, even after the caller's stack slot is released. Each + // entry of iters already carries the owned reference Iter() returned, + // so only the function needs an explicit incref here. Without this the + // function (often a bound method whose imSelf is the only live handle + // on an instance) drops to refcount zero when map() returns, its + // dealloc decrefs that instance, and a mid-iteration collection then + // reaps an object the loop is still walking. + // + // CPython: Python/bltinmodule.c:1361 map_new (Py_INCREF(func); + // the iters PyTuple owns its entries) + Incref(fn) m := &Map{Func: fn, Iters: iters, Strict: strict} m.init(MapType) return m, nil } +// mapDealloc releases the owned references on the function and every +// source iterator. Mirrors map_dealloc's Py_XDECREF(lz->func) plus the +// iters-tuple release. +// +// CPython: Python/bltinmodule.c:1404 map_dealloc +func mapDealloc(o Object) { + m, ok := o.(*Map) + if !ok { + return + } + if m.Func != nil { + Decref(m.Func) + } + for _, it := range m.Iters { + if it != nil { + Decref(it) + } + } +} + +// mapTraverse lets the cyclic collector trace through the function and +// the source iterators. Mirrors map_traverse. +// +// CPython: Python/bltinmodule.c:1416 map_traverse +func mapTraverse(o Object, visit Visitor) error { + m, ok := o.(*Map) + if !ok { + return nil + } + if m.Func != nil { + if err := visit(m.Func); err != nil { + return err + } + } + for _, it := range m.Iters { + if it == nil { + continue + } + if err := visit(it); err != nil { + return err + } + } + return nil +} + // mapNext pulls one value from each source iterator and forwards // them to the function as a flat positional call. In strict mode an // uneven set of source lengths is reported as ValueError naming the diff --git a/objects/method.go b/objects/method.go index 5ba66ec48..8f97e70a3 100644 --- a/objects/method.go +++ b/objects/method.go @@ -73,11 +73,19 @@ func init() { // member descriptors with CPython's docstrings. // // CPython: Objects/classobject.c:114 method_memberlist + // method_memberlist exposes __func__/__self__ as T_OBJECT members, and + // member_get does Py_XINCREF before handing the value back: the result + // is a new reference the caller owns. Returning the bare field would + // leak a borrow the VM later decrefs, driving the bound instance below + // its true refcount (so a method held only via a container, e.g. an + // ExitStack callback, gets torn down while still reachable). + // + // CPython: Objects/descrobject.c member_get (Py_XINCREF) SetTypeDescr(BoundMethodType, "__func__", NewGetSetDescr("__func__", - func(o Object) (Object, error) { return o.(*BoundMethod).imFunc, nil }, + func(o Object) (Object, error) { v := o.(*BoundMethod).imFunc; Incref(v); return v, nil }, nil)) SetTypeDescr(BoundMethodType, "__self__", NewGetSetDescr("__self__", - func(o Object) (Object, error) { return o.(*BoundMethod).imSelf, nil }, + func(o Object) (Object, error) { v := o.(*BoundMethod).imSelf; Incref(v); return v, nil }, nil)) // method_getset: __doc__ proxies to the wrapped function so a // bound method shows its target's docstring. diff --git a/objects/module.go b/objects/module.go index b2323ae70..ac02ba570 100644 --- a/objects/module.go +++ b/objects/module.go @@ -15,6 +15,10 @@ type Module struct { Header dict *Dict state any // per-module state for Go-implemented modules + // dictReleased records that tp_clear or tp_dealloc has already dropped + // the module's reference to md_dict, so the second of the two (the + // collector clears then the last Decref deallocs) does not double-drop. + dictReleased bool // Initializing is true while the module body is executing. When set, // moduleGetAnnotations does not cache its result so that circular // imports see the annotations that existed at the point of access @@ -41,6 +45,16 @@ func init() { // // CPython: Objects/moduleobject.c:1416 PyModule_Type (tp_dictoffset set) ModuleType.HasDict = true + // PyModule_Type ships a __dict__ getset in module_getset. A bare module + // answers __dict__ through moduleGetattr, but a ModuleType subclass that + // reaches __dict__ via the generic path (importlib.util._LazyModule does + // `object.__getattribute__(self, '__dict__')`) needs the descriptor in + // the MRO. type_new_descriptors skips installing one on the subclass + // because the dict slot is inherited, so the descriptor must live on + // ModuleType itself. + // + // CPython: Objects/moduleobject.c:728 module_getset (__dict__ getset) + installInstanceDictDescr(ModuleType) // A module also carries md_weaklist (a non-zero tp_weaklistoffset), so // a subclass inherits the weakref slot rather than adding its own. This // keeps a ModuleType subclass layout-compatible with module, which @@ -50,6 +64,27 @@ func init() { ModuleType.HasWeakref = true ModuleType.Repr = moduleRepr ModuleType.Str = moduleRepr + // A module owns its md_dict, so the cycle collector must follow that + // edge: a module whose __dict__ holds functions whose __globals__ is + // that same dict forms a reference cycle (the common case for any + // executed module). Without tp_traverse the collector treats md_dict + // as externally rooted and never reclaims the cycle, so __del__ of a + // cyclic object defined in the module body would never run. + // + // CPython: Objects/moduleobject.c:739 module_traverse + ModuleType.TpTraverse = moduleTraverse + // module_clear releases md_dict so a module caught in a reference cycle + // (its __dict__ holds a function whose __globals__ is that same dict) + // can be torn down by the collector. module_dealloc drops the same + // reference on the last refcount, which is the path `del m` takes: the + // module is not itself part of the cycle, so it reaches refcount zero + // directly, and without dealloc its +1 on md_dict would stay live and + // pin the whole {dict, classes, functions} cycle as externally rooted. + // + // CPython: Objects/moduleobject.c:737 module_clear + // CPython: Objects/moduleobject.c:752 module_dealloc + ModuleType.TpClear = moduleClear + ModuleType.Dealloc = moduleDealloc // Modules are hashable by identity in CPython (tp_hash = PyObject_GenericHash). // CPython: Objects/moduleobject.c:766 PyModule_Type (tp_hash not overridden → id-based) ModuleType.Hash = IdentityHash @@ -62,6 +97,9 @@ func init() { ModuleType.TpNew = func(cls *Type, args []Object, kwargs map[string]Object) (Object, error) { m := &Module{dict: NewDict()} m.init(cls) + if h := GCTrackHook; h != nil { + h(m) + } return m, nil } @@ -162,6 +200,9 @@ func NewModule(name string) *Module { m := &Module{dict: NewDict()} m.init(ModuleType) _ = m.dict.SetItem(NewStr("__name__"), NewStr(name)) + if h := GCTrackHook; h != nil { + h(m) + } return m } @@ -180,9 +221,71 @@ func NewModuleWithDict(name string, d *Dict) *Module { if has, _ := d.Contains(NewStr("__name__")); !has { _ = d.SetItem(NewStr("__name__"), NewStr(name)) } + if h := GCTrackHook; h != nil { + h(m) + } return m } +// moduleTraverse visits the module's __dict__ (md_dict) and per-module +// state so the cycle collector can account for the references a module +// holds. CPython's module_traverse also visits md_dict. +// +// CPython: Objects/moduleobject.c:739 module_traverse +func moduleTraverse(o Object, visit Visitor) error { + m := o.(*Module) + if m.dict != nil { + if err := visit(m.dict); err != nil { + return err + } + } + return nil +} + +// moduleClear is the tp_clear slot. It releases md_dict so a module +// trapped in a reference cycle becomes collectible once the collector +// proves it unreachable. +// +// CPython: Objects/moduleobject.c:737 module_clear +func moduleClear(o Object) { + m, ok := o.(*Module) + if !ok || m.dictReleased { + return + } + // Drop the reference but keep the pointer: gopy leaves refcount-zero + // objects on the Go heap, so a stray borrowed reference that still + // reaches this module after teardown reads a valid (if logically dead) + // namespace rather than dereferencing nil. Py_CLEAR nulls md_dict + // because CPython frees the storage immediately; gopy cannot. + // + // CPython: Objects/moduleobject.c:737 module_clear (Py_CLEAR md_dict) + if m.dict != nil { + m.dictReleased = true + Decref(m.dict) + } +} + +// moduleDealloc is the tp_dealloc slot. It untracks the module from the +// collector and drops its reference to md_dict. `del m` on a module that +// is not itself part of a cycle takes this path; releasing md_dict here +// removes the external +1 that would otherwise keep a module-body cycle +// (functions whose __globals__ is md_dict) from ever collapsing. +// +// CPython: Objects/moduleobject.c:752 module_dealloc +func moduleDealloc(o Object) { + m, ok := o.(*Module) + if !ok { + return + } + if h := GCUntrackHook; h != nil { + h(m) + } + if m.dict != nil && !m.dictReleased { + m.dictReleased = true + Decref(m.dict) + } +} + // Dict returns the module's attribute dict (__dict__). // // CPython: Objects/moduleobject.c:459 PyModule_GetDict @@ -253,6 +356,16 @@ func (m *Module) State() any { return m.state } // CPython: Objects/moduleobject.c:486 PyModule_SetState (gopy analog) func (m *Module) SetState(s any) { m.state = s } +// ModuleAttrErrorHook, when set, builds the AttributeError raised for a +// module attribute miss. The import system (package imp) installs it so +// the message can surface the stdlib-shadowing and circular-import hints +// from _Py_module_getattro_impl, which depend on sys state the objects +// package cannot reach directly. Nil in unit tests that exercise objects +// in isolation; module.go then falls back to the plain message. +// +// CPython: Objects/moduleobject.c:1024 _Py_module_getattro_impl +var ModuleAttrErrorHook func(m *Module, name string) error + // moduleGetattr implements __getattr__ for module objects. It checks // __dict__ first, then falls back to the PEP 562 __getattr__ callable // stored in __dict__ under "__getattr__". @@ -335,9 +448,16 @@ func moduleGetattr(o Object, name Object) (Object, error) { if gaErr == nil { return callOneArg(gaObj, name) } - // Best-effort error message mirroring module_getattro's tail. + // Best-effort error message mirroring module_getattro's tail. The + // import system registers ModuleAttrErrorHook to surface the + // stdlib-shadowing and circular-import hints (_Py_module_getattro_impl), + // which need sys.path / sys.flags / sys.stdlib_module_names access the + // objects package cannot reach without an import cycle. // - // CPython: Objects/moduleobject.c:1042 PyErr_Format module has no attribute + // CPython: Objects/moduleobject.c:1024 _Py_module_getattro_impl (error tail) + if ModuleAttrErrorHook != nil { + return nil, ModuleAttrErrorHook(m, key) + } if modName := moduleStrAttr(m, "__name__"); modName != "" { return nil, fmt.Errorf("AttributeError: module '%s' has no attribute '%s'", modName, key) } @@ -418,31 +538,42 @@ func moduleSetattr(o Object, name, value Object) error { return m.dict.SetItem(name, value) } -// moduleRepr returns the canonical module repr. -// Four forms mirror CPython: -// - when __file__ is set -// - when __spec__.origin == 'built-in' -// - when __spec__.origin == 'frozen' -// - otherwise +// moduleRepr returns a module's repr by forwarding to the vendored +// importlib._bootstrap._module_repr, exactly as CPython's C module_repr +// delegates through _PyImport_ImportlibModuleRepr. The Python +// implementation handles the __spec__, __loader__ and __file__ variants +// (including namespace packages and the '?' name fallback) so the +// rendering matches CPython byte-for-byte. +// +// During early bootstrap (before vm.init wires the hook) the importlib +// machinery is not yet usable, so fall back to a minimal Go rendering +// that mirrors the catch-all branch of _module_repr. // -// CPython: Objects/moduleobject.c:228 module_repr +// CPython: Objects/moduleobject.c:848 module_repr +// CPython: Python/import.c:3346 _PyImport_ImportlibModuleRepr func moduleRepr(o Object) (string, error) { + if ModuleReprHook != nil { + return ModuleReprHook(o) + } + return ModuleReprFallback(o) +} + +// ModuleReprFallback renders the catch-all branch of +// importlib._bootstrap._module_repr without importing anything, for use +// before the import machinery is available. +// +// CPython: Lib/importlib/_bootstrap.py:544 _module_repr +func ModuleReprFallback(o Object) (string, error) { m := o.(*Module) - name := moduleStrAttr(m, "__name__") + name := "?" + if n, err := m.dict.GetItem(NewStr("__name__")); err == nil && n != nil { + if s, ok := n.(*Unicode); ok { + name = s.v + } + } if file := moduleStrAttr(m, "__file__"); file != "" { return fmt.Sprintf("", name, file), nil } - if spec, err := m.dict.GetItem(NewStr("__spec__")); err == nil && spec != nil { - if sm, ok := spec.(*Module); ok { - origin := moduleStrAttr(sm, "origin") - if origin == "built-in" { - return fmt.Sprintf("", name), nil - } - if origin == "frozen" { - return fmt.Sprintf("", name), nil - } - } - } return fmt.Sprintf("", name), nil } diff --git a/objects/object.go b/objects/object.go index 7e75b664f..2fd2dc69b 100644 --- a/objects/object.go +++ b/objects/object.go @@ -38,6 +38,17 @@ func init() { objectType.Repr = objectRepr objectType.Str = objectStr objectType.Hash = identityHash + // tp_getattro / tp_setattro. PyBaseObject_Type wires both to the + // generic implementations, so every type that does not override them + // inherits GenericGetAttr / GenericSetAttr. A plain object() therefore + // raises AttributeError ("'object' object has no attribute %r and no + // __dict__ for setting new attributes") on attribute assignment, not a + // bare TypeError. + // + // CPython: Objects/typeobject.c:7970 PyBaseObject_Type (tp_getattro = + // PyObject_GenericGetAttr, tp_setattro = PyObject_GenericSetAttr) + objectType.Getattro = GenericGetAttr + objectType.Setattro = GenericSetAttr // object_methods table. // @@ -986,6 +997,28 @@ func objectGetWeakref(o Object) (Object, error) { func objectGetDict(o Object) (Object, error) { switch v := o.(type) { + case *Module: + // A module always carries md_dict, even a user subclass of + // ModuleType that never sets tp_dictoffset (HasDict false). The + // generic object.__getattribute__ path reaches here for + // `object.__getattribute__(mod, '__dict__')` (importlib's + // _LazyModule does exactly this), so return md_dict directly + // rather than gating on HasDict like the AttrDictHolder arm below. + // + // CPython: Objects/moduleobject.c module_dict getset (md_dict) + // + // PyObject_GenericGetDict hands back a new reference; the eval + // loop treats a getset getter's result as owned and decrefs it, + // so a borrowed md_dict here would drive a live module's + // namespace toward refcount zero and defeat the cycle collector + // (the module __dict__ <-> class-method __globals__ cycle never + // loses the under-counted reference, so __del__ never fires). + // + // CPython: Objects/object.c:1226 _PyObject_GenericGetDict + // (Py_XINCREF(dict) before return) + d := v.Dict() + Incref(d) + return d, nil case *Instance: if v.dict == nil { if !v.Type().HasDict { @@ -1005,6 +1038,19 @@ func objectGetDict(o Object) (Object, error) { // managed dict over the inline values, leaving them to be detached // in _PyObject_FreeInstanceAttributes at dealloc. v.dictExposed = true + // Handing the dict to Python code drops the inline-values fast + // path: code can now store straight into the mapping (e.g. + // vars(self).update(...)) without routing through instanceSetAttr, + // so gopy can no longer keep the type's cached keys in sync. CPython + // materializes a combined dict here and clears values->valid, which + // deopts the LOAD_ATTR_*_WITH_VALUES arms; mirror that by flipping + // inlineValid so a class attribute can no longer be served from the + // cache while a direct instance store shadows it. + // + // CPython: Objects/dictobject.c:6857 make_dict_from_instance_attributes + // (PyDictValues stops being valid once the dict is built) + v.inlineValid = false + Incref(v.dict) return v.dict, nil case *Int: // The builtin int type has no tp_dictoffset, so (42).__dict__ @@ -1016,6 +1062,7 @@ func objectGetDict(o Object) (Object, error) { if v.attrs == nil { v.attrs = NewDict() } + Incref(v.attrs) return v.attrs, nil case *Unicode: if !v.Type().HasDict { @@ -1024,6 +1071,7 @@ func objectGetDict(o Object) (Object, error) { if v.attrs == nil { v.attrs = NewDict() } + Incref(v.attrs) return v.attrs, nil case AttrDictHolder: // Subclasses of C-port types (list, bytearray, ...) carry their @@ -1035,7 +1083,9 @@ func objectGetDict(o Object) (Object, error) { if !o.Type().HasDict { return nil, fmt.Errorf("AttributeError: '%s' object has no attribute '__dict__'", o.Type().Name) } - return v.EnsureAttrDict(), nil + d := v.EnsureAttrDict() + Incref(d) + return d, nil } return nil, fmt.Errorf("AttributeError: '%s' object has no attribute '__dict__'", o.Type().Name) } diff --git a/objects/seqiter.go b/objects/seqiter.go index 466e2f806..dacf5d1d3 100644 --- a/objects/seqiter.go +++ b/objects/seqiter.go @@ -70,6 +70,24 @@ var IsStopIterationHook func(error) bool // CPython: Python/errors.c:488 _PyErr_Clear var ClearCurrentExceptionHook func() +// SaveRaisedExceptionHook detaches and returns the thread-state's +// current raised exception (nil when none). RestoreRaisedExceptionHook +// reinstalls it. slotTpFinalize brackets a __del__ call with this pair +// so a finalizer that runs while the interpreter is mid-unwind (gopy +// fires __del__ synchronously from a Decref, which can land inside +// handle_exception while it is still building the traceback) cannot +// clear or overwrite the exception being propagated. Installed from vm. +// +// CPython: Objects/typeobject.c:9883 slot_tp_finalize +// (PyErr_GetRaisedException / PyErr_SetRaisedException bracket) +var SaveRaisedExceptionHook func() Object + +// RestoreRaisedExceptionHook reinstalls an exception previously +// detached by SaveRaisedExceptionHook. Passing nil clears the slot. +// +// CPython: Objects/typeobject.c:9883 slot_tp_finalize +var RestoreRaisedExceptionHook func(Object) + // ClearCurrentExceptionIfIterStopHook drops the thread-state's current // exception only when it is an IndexError or StopIteration, the two // exception types iteration legitimately swallows: iter_iternext clears diff --git a/objects/slot_wrap_descr.go b/objects/slot_wrap_descr.go index 76c088ad7..8158f4b4f 100644 --- a/objects/slot_wrap_descr.go +++ b/objects/slot_wrap_descr.go @@ -22,6 +22,17 @@ import "fmt" // already-defined-wins behavior. // // CPython: Objects/typeobject.c add_operators +// AddDescriptorSlotWrappers is the exported entry point for types +// defined in other packages (e.g. module/_functools) so their +// tp_descr_get / tp_descr_set surface a __get__ / __set__ / __delete__ +// wrapper, matching add_operators. Without it inspect.ismethoddescriptor +// (which probes hasattr(type, '__get__')) misclassifies the type. +// +// CPython: Objects/typeobject.c add_operators +func AddDescriptorSlotWrappers(t *Type) { + addDescriptorSlotWrappers(t) +} + func addDescriptorSlotWrappers(t *Type) { if t == nil { return diff --git a/objects/type.go b/objects/type.go index b7d216d46..00c4fbcb4 100644 --- a/objects/type.go +++ b/objects/type.go @@ -193,6 +193,18 @@ type Type struct { // CPython: Include/cpython/object.h tp_traverse TpTraverse func(o Object, visit Visitor) error + // TpClear mirrors tp_clear. The cycle collector's delete_garbage step + // calls it on each object it has proven unreachable, dropping the + // references the object holds so reference cycles break and the held + // values become collectible. It runs ONLY from the collector (which + // has confirmed unreachability), never from the eager refcount-zero + // dealloc path, so an object the VM has merely under-counted to zero + // while still live is never cleared out from under its users. + // + // CPython: Include/cpython/object.h tp_clear + // CPython: Python/gc.c:1198 delete_garbage (tp_clear call) + TpClear func(o Object) + Number *NumberMethods Sequence *SequenceMethods Mapping *MappingMethods @@ -420,6 +432,24 @@ type GCRoot interface { // CPython: Python/gc.c:1430 gc_collect_main (tstate->current_frame roots) var GCExecutingRootsHook func(pin func(Object)) +// GCStaticRootsHook reports the interpreter-lifetime singletons the +// cycle collector must treat as roots. CPython keeps sys.modules alive +// through interp->modules, a C reference held in the interpreter struct +// that gc_collect_main never collects; everything strongly reachable +// from a module is therefore reachable from a root and survives until +// interpreter shutdown. gopy holds the sys.modules dict through a plain +// Go pointer that the refcount-based collector cannot see, so the whole +// module graph collapses to gc_refs == 0 under subtract_refs. A module +// global whose only strong reference is its module __dict__ (for +// example _frozen_importlib._blocking_on, a self-cyclic +// _WeakValueDictionary) would then be reclaimed out from under live +// import machinery. This hook reports the sys.modules dict so the +// collector re-floats it; move_unreachable's visit_reachable then pulls +// in the strongly-reachable closure. nil until imp initializes. +// +// CPython: Python/gc.c:1430 gc_collect_main (interp->modules stays reachable) +var GCStaticRootsHook func(pin func(Object)) + // TpFlag values used by MATCH_MAPPING and MATCH_SEQUENCE. // // CPython: Include/object.h:L284 Py_TPFLAGS_MAPPING / Py_TPFLAGS_SEQUENCE @@ -639,6 +669,14 @@ func init() { // // CPython: Objects/typeobject.c:352 _PyStaticType_InitBuiltin typeType.MakeImmortal() + // type is a gc container: a heap type holds its methods, class + // attributes and lineage, so the cycle collector must be able to + // subtract those references. CPython gives PyType_Type a tp_traverse + // (type_traverse); gopy installs the metatype traverse here so a + // gc-tracked user class participates in cycle detection. + // + // CPython: Objects/typeobject.c:1227 type_traverse + typeType.TpTraverse = typeTraverse // type inherits from object. CPython: Objects/typeobject.c:6361 // PyType_Type sets tp_base = &PyBaseObject_Type, which puts object // in type's MRO so metatype lookup of __class__ / __dict__ finds diff --git a/objects/type_specialize.go b/objects/type_specialize.go index 65f76d73e..789a96a88 100644 --- a/objects/type_specialize.go +++ b/objects/type_specialize.go @@ -6,11 +6,24 @@ package objects // 32-bit version on first call. Returns 0 when the global counter // has wrapped (the specializer treats this as "give up"). // -// CPython: Python/typeobject.c:L312 _PyType_AssignVersionTag +// To respect the invariant that a type carries a valid version tag +// only when every one of its bases does, the tag is first assigned to +// all super classes. If any base cannot be assigned one (counter +// wrapped), this type gives up too. The invariant is what lets +// InvalidateVersionTag early-return on a zero tag: a base with tag 0 +// can have no subclass holding a live tag, so there is nothing cached +// to clear. +// +// CPython: Objects/typeobject.c:1344 assign_version_tag func (t *Type) VersionTag() uint32 { if t.versionTag != 0 { return t.versionTag } + for _, b := range t.Bases { + if b != nil && b.VersionTag() == 0 { + return 0 + } + } v := allocTypeVersionTag() if v == 0 { return 0 diff --git a/objects/usertype.go b/objects/usertype.go index 8638a9504..ad63955fe 100644 --- a/objects/usertype.go +++ b/objects/usertype.go @@ -276,6 +276,18 @@ func NewUserTypeMetaE(name string, bases []*Type, ns *Dict, kwargs map[string]Ob // CPython: Objects/typeobject.c:4153 type_new (heap types start at // refcount 1 from PyObject_GC_NewVar) atomic.StoreInt64(&t.Hdr().refcnt, 1) + // A heap type is a gc container: CPython gives it Py_TPFLAGS_HAVE_GC and + // type_traverse so the cycle collector can subtract a class's references + // to its methods, class attributes and bases. gopy tracks the type here + // (typeType.TpTraverse = typeTraverse does the visiting) so a class kept + // alive only by a cycle through one of its methods (a method whose + // __globals__ is the defining module, say) collapses to unreachable and + // the module dict it pins can be collected. + // + // CPython: Objects/typeobject.c:4153 type_new (heap types are PyObject_GC) + if h := GCTrackHook; h != nil { + h(t) + } stampMetaclass(t, meta) installSubclassAttrSlots(t) noSlotsDeclared := hasNoSlotsDeclared(ns) @@ -357,6 +369,17 @@ func NewUserTypeMetaE(name string, bases []*Type, ns *Dict, kwargs map[string]Ob if t.Dealloc == nil { t.Dealloc = instanceDealloc } + // subtype_clear drops the instance dict's references so the cycle + // collector's delete_garbage step can break reference cycles that run + // through instance attributes. Wire it for the same pure user classes + // that get instanceDealloc; built-in subclasses keep their inherited + // (or absent) tp_clear. Unlike the dealloc-time clear, this runs only + // after the collector has proven the instance unreachable. + // + // CPython: Objects/typeobject.c:1411 subtype_clear + if t.TpClear == nil { + t.TpClear = instanceClear + } // type_new warns once if the finished class dict carries a non-string // key (type('MyClass', (), {1: 2}) or a metaclass that injects ns[1]=2). // The namespace is the source of those keys; the special cells were @@ -673,6 +696,35 @@ func basesAllowInlineValues(bases []*Type, noSlotsDeclared bool) bool { return true } +// dropTransientDict releases the namespace copy type_new builds. When +// the release drops it to refcount zero it stands in for CPython's +// dict_dealloc: clear every key/value the copy owns (which releases the +// class methods it captured, whose __globals__ would otherwise pin the +// defining module dict) and untrack it from the cycle collector. +// +// gopy's Decref deliberately leaves a refcount-zero container tracked, +// without clearing it, so the next cycle pass can fire its weakref +// callbacks. That is wrong for this private transient: it carries no +// weakrefs, and leaving its captured method references live keeps the +// {namespace, class, instance} cycle rooted, so a Python __del__ never +// runs (test_module test_clear_dict_in_ref_cycle). Worse, when the copy +// later does get walked it counts as a reclaimed cycle member, which +// CPython never sees because its copy is freed the instant type_new +// finishes. Releasing it synchronously here matches CPython on both +// counts. +// +// CPython: Objects/dictobject.c dict_dealloc (PyObject_GC_UnTrack + key/value Py_DECREF) +func dropTransientDict(d *Dict) { + Decref(d) + if atomic.LoadInt64(&d.Hdr().refcnt) != 0 { + return + } + if h := GCUntrackHook; h != nil { + h(d) + } + d.clearContents() +} + // processClassNamespace patches __classcell__, installs __slots__ // descriptors, and copies the rest of ns onto t. func processClassNamespace(t *Type, ns *Dict) error { @@ -687,6 +739,26 @@ func processClassNamespace(t *Type, ns *Dict) error { // // CPython: Objects/typeobject.c:4612 type_new (dict = PyDict_Copy) ns = copyClassNamespace(ns) + // CPython keeps this copy as tp_dict. gopy instead installs each + // entry into the type's ClassAttrDict / typeDescrTable (SetTypeDescr + // re-increfs every stored value), so the copy is purely transient and + // must be released once its entries are installed. Leaving it at + // refcnt 1 keeps a live, untracked reference to the class methods, + // whose __globals__ is the defining module dict, which roots the whole + // {namespace, class, instance} cycle and stops gc.collect() from ever + // finalizing the instances (test_module test_clear_dict_in_ref_cycle). + // + // type_new's PyDict_Copy is freed synchronously the instant its + // refcount reaches zero, which removes it from the GC list before any + // collection can see it. gopy keeps refcount-zero containers tracked so + // the cycle collector can still fire their weakref callbacks, but this + // copy is a private transient that never escapes and carries no + // weakrefs, so a later gc.collect() would otherwise count it as a + // reclaimed cycle member. Untrack it on disposal to match CPython, where + // the copy is gone before the collector runs. + // + // CPython: Objects/dictobject.c dict_dealloc (PyObject_GC_UnTrack) + defer dropTransientDict(ns) // __classcell__ is the cell __build_class__ left in the namespace so // we can patch it with the new class. It is not a real attribute, // so install it before walking the rest of the namespace and skip @@ -2157,6 +2229,28 @@ func slotTpIterNext(o Object) (Object, error) { // CPython: Objects/typeobject.c:10585 slot_tp_finalize // CPython: Python/errors.c:1380 _PyErr_WriteUnraisable func slotTpFinalize(o Object) { + // Bracket the __del__ call with a save/restore of the thread's + // raised exception, exactly as CPython's slot_tp_finalize wraps the + // call in PyErr_GetRaisedException / PyErr_SetRaisedException. gopy + // fires __del__ synchronously from Decref, so a finalizer can run + // while the interpreter is mid-unwind (for instance handle_exception + // allocating a traceback entry triggers the cycle collector, which + // reclaims a now-dead __del__ object). Without this, a __del__ that + // raises and swallows its own exception (try: next(it) except + // StopIteration: pass) would clear the exception the outer frame is + // still propagating, leaving Occurred() nil and a typed-nil pushed + // onto the value stack. + // + // CPython: Objects/typeobject.c:9883 slot_tp_finalize + var saved Object + if h := SaveRaisedExceptionHook; h != nil { + saved = h() + } + defer func() { + if h := RestoreRaisedExceptionHook; h != nil { + h(saved) + } + }() fn, unbound, err := lookupMaybeMethod(o, "__del__") if err != nil { return diff --git a/stdlib/__hello__.py b/stdlib/__hello__.py new file mode 100644 index 000000000..c09d6a4f5 --- /dev/null +++ b/stdlib/__hello__.py @@ -0,0 +1,16 @@ +initialized = True + +class TestFrozenUtf8_1: + """\u00b6""" + +class TestFrozenUtf8_2: + """\u03c0""" + +class TestFrozenUtf8_4: + """\U0001f600""" + +def main(): + print("Hello world!") + +if __name__ == '__main__': + main() diff --git a/stdlib/__phello__/__init__.py b/stdlib/__phello__/__init__.py new file mode 100644 index 000000000..d37bd2766 --- /dev/null +++ b/stdlib/__phello__/__init__.py @@ -0,0 +1,7 @@ +initialized = True + +def main(): + print("Hello world!") + +if __name__ == '__main__': + main() diff --git a/stdlib/__phello__/ham/__init__.py b/stdlib/__phello__/ham/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/stdlib/__phello__/ham/eggs.py b/stdlib/__phello__/ham/eggs.py new file mode 100644 index 000000000..e69de29bb diff --git a/stdlib/__phello__/spam.py b/stdlib/__phello__/spam.py new file mode 100644 index 000000000..d37bd2766 --- /dev/null +++ b/stdlib/__phello__/spam.py @@ -0,0 +1,7 @@ +initialized = True + +def main(): + print("Hello world!") + +if __name__ == '__main__': + main() diff --git a/stdlib/_pyio.py b/stdlib/_pyio.py new file mode 100644 index 000000000..116ce4f37 --- /dev/null +++ b/stdlib/_pyio.py @@ -0,0 +1,2754 @@ +""" +Python implementation of the io module. +""" + +import os +import abc +import codecs +import errno +import stat +import sys +# Import _thread instead of threading to reduce startup cost +from _thread import allocate_lock as Lock +if sys.platform in {'win32', 'cygwin'}: + from msvcrt import setmode as _setmode +else: + _setmode = None + +import io +from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END, Reader, Writer) # noqa: F401 + +valid_seek_flags = {0, 1, 2} # Hardwired values +if hasattr(os, 'SEEK_HOLE') : + valid_seek_flags.add(os.SEEK_HOLE) + valid_seek_flags.add(os.SEEK_DATA) + +# open() uses max(min(blocksize, 8 MiB), DEFAULT_BUFFER_SIZE) +# when the device block size is available. +DEFAULT_BUFFER_SIZE = 128 * 1024 # bytes + +# NOTE: Base classes defined here are registered with the "official" ABCs +# defined in io.py. We don't use real inheritance though, because we don't want +# to inherit the C implementations. + +# Rebind for compatibility +BlockingIOError = BlockingIOError + +# Does open() check its 'errors' argument? +_CHECK_ERRORS = (hasattr(sys, "gettotalrefcount") or sys.flags.dev_mode) + + +def text_encoding(encoding, stacklevel=2): + """ + A helper function to choose the text encoding. + + When encoding is not None, this function returns it. + Otherwise, this function returns the default text encoding + (i.e. "locale" or "utf-8" depends on UTF-8 mode). + + This function emits an EncodingWarning if *encoding* is None and + sys.flags.warn_default_encoding is true. + + This can be used in APIs with an encoding=None parameter + that pass it to TextIOWrapper or open. + However, please consider using encoding="utf-8" for new APIs. + """ + if encoding is None: + if sys.flags.utf8_mode: + encoding = "utf-8" + else: + encoding = "locale" + if sys.flags.warn_default_encoding: + import warnings + warnings.warn("'encoding' argument not specified.", + EncodingWarning, stacklevel + 1) + return encoding + + +# Wrapper for builtins.open +# +# Trick so that open() won't become a bound method when stored +# as a class variable (as dbm.dumb does). +# +# See init_set_builtins_open() in Python/pylifecycle.c. +@staticmethod +def open(file, mode="r", buffering=-1, encoding=None, errors=None, + newline=None, closefd=True, opener=None): + + r"""Open file and return a stream. Raise OSError upon failure. + + file is either a text or byte string giving the name (and the path + if the file isn't in the current working directory) of the file to + be opened or an integer file descriptor of the file to be + wrapped. (If a file descriptor is given, it is closed when the + returned I/O object is closed, unless closefd is set to False.) + + mode is an optional string that specifies the mode in which the file is + opened. It defaults to 'r' which means open for reading in text mode. Other + common values are 'w' for writing (truncating the file if it already + exists), 'x' for exclusive creation of a new file, and 'a' for appending + (which on some Unix systems, means that all writes append to the end of the + file regardless of the current seek position). In text mode, if encoding is + not specified the encoding used is platform dependent. (For reading and + writing raw bytes use binary mode and leave encoding unspecified.) The + available modes are: + + ========= =============================================================== + Character Meaning + --------- --------------------------------------------------------------- + 'r' open for reading (default) + 'w' open for writing, truncating the file first + 'x' create a new file and open it for writing + 'a' open for writing, appending to the end of the file if it exists + 'b' binary mode + 't' text mode (default) + '+' open a disk file for updating (reading and writing) + ========= =============================================================== + + The default mode is 'rt' (open for reading text). For binary random + access, the mode 'w+b' opens and truncates the file to 0 bytes, while + 'r+b' opens the file without truncation. The 'x' mode implies 'w' and + raises an `FileExistsError` if the file already exists. + + Python distinguishes between files opened in binary and text modes, + even when the underlying operating system doesn't. Files opened in + binary mode (appending 'b' to the mode argument) return contents as + bytes objects without any decoding. In text mode (the default, or when + 't' is appended to the mode argument), the contents of the file are + returned as strings, the bytes having been first decoded using a + platform-dependent encoding or using the specified encoding if given. + + buffering is an optional integer used to set the buffering policy. + Pass 0 to switch buffering off (only allowed in binary mode), 1 to select + line buffering (only usable in text mode), and an integer > 1 to indicate + the size of a fixed-size chunk buffer. When no buffering argument is + given, the default buffering policy works as follows: + + * Binary files are buffered in fixed-size chunks; the size of the buffer + is max(min(blocksize, 8 MiB), DEFAULT_BUFFER_SIZE) + when the device block size is available. + On most systems, the buffer will typically be 128 kilobytes long. + + * "Interactive" text files (files for which isatty() returns True) + use line buffering. Other text files use the policy described above + for binary files. + + encoding is the str name of the encoding used to decode or encode the + file. This should only be used in text mode. The default encoding is + platform dependent, but any encoding supported by Python can be + passed. See the codecs module for the list of supported encodings. + + errors is an optional string that specifies how encoding errors are to + be handled---this argument should not be used in binary mode. Pass + 'strict' to raise a ValueError exception if there is an encoding error + (the default of None has the same effect), or pass 'ignore' to ignore + errors. (Note that ignoring encoding errors can lead to data loss.) + See the documentation for codecs.register for a list of the permitted + encoding error strings. + + newline is a string controlling how universal newlines works (it only + applies to text mode). It can be None, '', '\n', '\r', and '\r\n'. It works + as follows: + + * On input, if newline is None, universal newlines mode is + enabled. Lines in the input can end in '\n', '\r', or '\r\n', and + these are translated into '\n' before being returned to the + caller. If it is '', universal newline mode is enabled, but line + endings are returned to the caller untranslated. If it has any of + the other legal values, input lines are only terminated by the given + string, and the line ending is returned to the caller untranslated. + + * On output, if newline is None, any '\n' characters written are + translated to the system default line separator, os.linesep. If + newline is '', no translation takes place. If newline is any of the + other legal values, any '\n' characters written are translated to + the given string. + + closedfd is a bool. If closefd is False, the underlying file descriptor will + be kept open when the file is closed. This does not work when a file name is + given and must be True in that case. + + The newly created file is non-inheritable. + + A custom opener can be used by passing a callable as *opener*. The + underlying file descriptor for the file object is then obtained by calling + *opener* with (*file*, *flags*). *opener* must return an open file + descriptor (passing os.open as *opener* results in functionality similar to + passing None). + + open() returns a file object whose type depends on the mode, and + through which the standard file operations such as reading and writing + are performed. When open() is used to open a file in a text mode ('w', + 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open + a file in a binary mode, the returned class varies: in read binary + mode, it returns a BufferedReader; in write binary and append binary + modes, it returns a BufferedWriter, and in read/write mode, it returns + a BufferedRandom. + + It is also possible to use a string or bytearray as a file for both + reading and writing. For strings StringIO can be used like a file + opened in a text mode, and for bytes a BytesIO can be used like a file + opened in a binary mode. + """ + if not isinstance(file, int): + file = os.fspath(file) + if not isinstance(file, (str, bytes, int)): + raise TypeError("invalid file: %r" % file) + if not isinstance(mode, str): + raise TypeError("invalid mode: %r" % mode) + if not isinstance(buffering, int): + raise TypeError("invalid buffering: %r" % buffering) + if encoding is not None and not isinstance(encoding, str): + raise TypeError("invalid encoding: %r" % encoding) + if errors is not None and not isinstance(errors, str): + raise TypeError("invalid errors: %r" % errors) + modes = set(mode) + if modes - set("axrwb+t") or len(mode) > len(modes): + raise ValueError("invalid mode: %r" % mode) + creating = "x" in modes + reading = "r" in modes + writing = "w" in modes + appending = "a" in modes + updating = "+" in modes + text = "t" in modes + binary = "b" in modes + if text and binary: + raise ValueError("can't have text and binary mode at once") + if creating + reading + writing + appending > 1: + raise ValueError("can't have read/write/append mode at once") + if not (creating or reading or writing or appending): + raise ValueError("must have exactly one of read/write/append mode") + if binary and encoding is not None: + raise ValueError("binary mode doesn't take an encoding argument") + if binary and errors is not None: + raise ValueError("binary mode doesn't take an errors argument") + if binary and newline is not None: + raise ValueError("binary mode doesn't take a newline argument") + if binary and buffering == 1: + import warnings + warnings.warn("line buffering (buffering=1) isn't supported in binary " + "mode, the default buffer size will be used", + RuntimeWarning, 2) + raw = FileIO(file, + (creating and "x" or "") + + (reading and "r" or "") + + (writing and "w" or "") + + (appending and "a" or "") + + (updating and "+" or ""), + closefd, opener=opener) + result = raw + try: + line_buffering = False + if buffering == 1 or buffering < 0 and raw._isatty_open_only(): + buffering = -1 + line_buffering = True + if buffering < 0: + buffering = max(min(raw._blksize, 8192 * 1024), DEFAULT_BUFFER_SIZE) + if buffering < 0: + raise ValueError("invalid buffering size") + if buffering == 0: + if binary: + return result + raise ValueError("can't have unbuffered text I/O") + if updating: + buffer = BufferedRandom(raw, buffering) + elif creating or writing or appending: + buffer = BufferedWriter(raw, buffering) + elif reading: + buffer = BufferedReader(raw, buffering) + else: + raise ValueError("unknown mode: %r" % mode) + result = buffer + if binary: + return result + encoding = text_encoding(encoding) + text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering) + result = text + text.mode = mode + return result + except: + result.close() + raise + +# Define a default pure-Python implementation for open_code() +# that does not allow hooks. Warn on first use. Defined for tests. +def _open_code_with_warning(path): + """Opens the provided file with mode ``'rb'``. This function + should be used when the intent is to treat the contents as + executable code. + + ``path`` should be an absolute path. + + When supported by the runtime, this function can be hooked + in order to allow embedders more control over code files. + This functionality is not supported on the current runtime. + """ + import warnings + warnings.warn("_pyio.open_code() may not be using hooks", + RuntimeWarning, 2) + return open(path, "rb") + +try: + open_code = io.open_code +except AttributeError: + open_code = _open_code_with_warning + + +# In normal operation, both `UnsupportedOperation`s should be bound to the +# same object. +try: + UnsupportedOperation = io.UnsupportedOperation +except AttributeError: + class UnsupportedOperation(OSError, ValueError): + pass + + +class IOBase(metaclass=abc.ABCMeta): + + """The abstract base class for all I/O classes. + + This class provides dummy implementations for many methods that + derived classes can override selectively; the default implementations + represent a file that cannot be read, written or seeked. + + Even though IOBase does not declare read or write because + their signatures will vary, implementations and clients should + consider those methods part of the interface. Also, implementations + may raise UnsupportedOperation when operations they do not support are + called. + + The basic type used for binary data read from or written to a file is + bytes. Other bytes-like objects are accepted as method arguments too. + Text I/O classes work with str data. + + Note that calling any method (even inquiries) on a closed stream is + undefined. Implementations may raise OSError in this case. + + IOBase (and its subclasses) support the iterator protocol, meaning + that an IOBase object can be iterated over yielding the lines in a + stream. + + IOBase also supports the :keyword:`with` statement. In this example, + fp is closed after the suite of the with statement is complete: + + with open('spam.txt', 'r') as fp: + fp.write('Spam and eggs!') + """ + + ### Internal ### + + def _unsupported(self, name): + """Internal: raise an OSError exception for unsupported operations.""" + raise UnsupportedOperation("%s.%s() not supported" % + (self.__class__.__name__, name)) + + ### Positioning ### + + def seek(self, pos, whence=0): + """Change stream position. + + Change the stream position to byte offset pos. Argument pos is + interpreted relative to the position indicated by whence. Values + for whence are ints: + + * 0 -- start of stream (the default); offset should be zero or positive + * 1 -- current stream position; offset may be negative + * 2 -- end of stream; offset is usually negative + Some operating systems / file systems could provide additional values. + + Return an int indicating the new absolute position. + """ + self._unsupported("seek") + + def tell(self): + """Return an int indicating the current stream position.""" + return self.seek(0, 1) + + def truncate(self, pos=None): + """Truncate file to size bytes. + + Size defaults to the current IO position as reported by tell(). Return + the new size. + """ + self._unsupported("truncate") + + ### Flush and close ### + + def flush(self): + """Flush write buffers, if applicable. + + This is not implemented for read-only and non-blocking streams. + """ + self._checkClosed() + # XXX Should this return the number of bytes written??? + + __closed = False + + def close(self): + """Flush and close the IO object. + + This method has no effect if the file is already closed. + """ + if not self.__closed: + try: + self.flush() + finally: + self.__closed = True + + def __del__(self): + """Destructor. Calls close().""" + try: + closed = self.closed + except AttributeError: + # If getting closed fails, then the object is probably + # in an unusable state, so ignore. + return + + if closed: + return + + if dealloc_warn := getattr(self, "_dealloc_warn", None): + dealloc_warn(self) + + # If close() fails, the caller logs the exception with + # sys.unraisablehook. close() must be called at the end at __del__(). + self.close() + + ### Inquiries ### + + def seekable(self): + """Return a bool indicating whether object supports random access. + + If False, seek(), tell() and truncate() will raise OSError. + This method may need to do a test seek(). + """ + return False + + def _checkSeekable(self, msg=None): + """Internal: raise UnsupportedOperation if file is not seekable + """ + if not self.seekable(): + raise UnsupportedOperation("File or stream is not seekable." + if msg is None else msg) + + def readable(self): + """Return a bool indicating whether object was opened for reading. + + If False, read() will raise OSError. + """ + return False + + def _checkReadable(self, msg=None): + """Internal: raise UnsupportedOperation if file is not readable + """ + if not self.readable(): + raise UnsupportedOperation("File or stream is not readable." + if msg is None else msg) + + def writable(self): + """Return a bool indicating whether object was opened for writing. + + If False, write() and truncate() will raise OSError. + """ + return False + + def _checkWritable(self, msg=None): + """Internal: raise UnsupportedOperation if file is not writable + """ + if not self.writable(): + raise UnsupportedOperation("File or stream is not writable." + if msg is None else msg) + + @property + def closed(self): + """closed: bool. True iff the file has been closed. + + For backwards compatibility, this is a property, not a predicate. + """ + return self.__closed + + def _checkClosed(self, msg=None): + """Internal: raise a ValueError if file is closed + """ + if self.closed: + raise ValueError("I/O operation on closed file." + if msg is None else msg) + + ### Context manager ### + + def __enter__(self): # That's a forward reference + """Context management protocol. Returns self (an instance of IOBase).""" + self._checkClosed() + return self + + def __exit__(self, *args): + """Context management protocol. Calls close()""" + self.close() + + ### Lower-level APIs ### + + # XXX Should these be present even if unimplemented? + + def fileno(self): + """Returns underlying file descriptor (an int) if one exists. + + An OSError is raised if the IO object does not use a file descriptor. + """ + self._unsupported("fileno") + + def isatty(self): + """Return a bool indicating whether this is an 'interactive' stream. + + Return False if it can't be determined. + """ + self._checkClosed() + return False + + ### Readline[s] and writelines ### + + def readline(self, size=-1): + r"""Read and return a line of bytes from the stream. + + If size is specified, at most size bytes will be read. + Size should be an int. + + The line terminator is always b'\n' for binary files; for text + files, the newlines argument to open can be used to select the line + terminator(s) recognized. + """ + # For backwards compatibility, a (slowish) readline(). + if hasattr(self, "peek"): + def nreadahead(): + readahead = self.peek(1) + if not readahead: + return 1 + n = (readahead.find(b"\n") + 1) or len(readahead) + if size >= 0: + n = min(n, size) + return n + else: + def nreadahead(): + return 1 + if size is None: + size = -1 + else: + try: + size_index = size.__index__ + except AttributeError: + raise TypeError(f"{size!r} is not an integer") + else: + size = size_index() + res = bytearray() + while size < 0 or len(res) < size: + b = self.read(nreadahead()) + if not b: + break + res += b + if res.endswith(b"\n"): + break + return bytes(res) + + def __iter__(self): + self._checkClosed() + return self + + def __next__(self): + line = self.readline() + if not line: + raise StopIteration + return line + + def readlines(self, hint=None): + """Return a list of lines from the stream. + + hint can be specified to control the number of lines read: no more + lines will be read if the total size (in bytes/characters) of all + lines so far exceeds hint. + """ + if hint is None or hint <= 0: + return list(self) + n = 0 + lines = [] + for line in self: + lines.append(line) + n += len(line) + if n >= hint: + break + return lines + + def writelines(self, lines): + """Write a list of lines to the stream. + + Line separators are not added, so it is usual for each of the lines + provided to have a line separator at the end. + """ + self._checkClosed() + for line in lines: + self.write(line) + +io.IOBase.register(IOBase) + + +class RawIOBase(IOBase): + + """Base class for raw binary I/O.""" + + # The read() method is implemented by calling readinto(); derived + # classes that want to support read() only need to implement + # readinto() as a primitive operation. In general, readinto() can be + # more efficient than read(). + + # (It would be tempting to also provide an implementation of + # readinto() in terms of read(), in case the latter is a more suitable + # primitive operation, but that would lead to nasty recursion in case + # a subclass doesn't implement either.) + + def read(self, size=-1): + """Read and return up to size bytes, where size is an int. + + Returns an empty bytes object on EOF, or None if the object is + set not to block and has no data to read. + """ + if size is None: + size = -1 + if size < 0: + return self.readall() + b = bytearray(size.__index__()) + n = self.readinto(b) + if n is None: + return None + if n < 0 or n > len(b): + raise ValueError(f"readinto returned {n} outside buffer size {len(b)}") + del b[n:] + return bytes(b) + + def readall(self): + """Read until EOF, using multiple read() call.""" + res = bytearray() + while data := self.read(DEFAULT_BUFFER_SIZE): + res += data + if res: + return bytes(res) + else: + # b'' or None + return data + + def readinto(self, b): + """Read bytes into a pre-allocated bytes-like object b. + + Returns an int representing the number of bytes read (0 for EOF), or + None if the object is set not to block and has no data to read. + """ + self._unsupported("readinto") + + def write(self, b): + """Write the given buffer to the IO stream. + + Returns the number of bytes written, which may be less than the + length of b in bytes. + """ + self._unsupported("write") + +io.RawIOBase.register(RawIOBase) + + +class BufferedIOBase(IOBase): + + """Base class for buffered IO objects. + + The main difference with RawIOBase is that the read() method + supports omitting the size argument, and does not have a default + implementation that defers to readinto(). + + In addition, read(), readinto() and write() may raise + BlockingIOError if the underlying raw stream is in non-blocking + mode and not ready; unlike their raw counterparts, they will never + return None. + + A typical implementation should not inherit from a RawIOBase + implementation, but wrap one. + """ + + def read(self, size=-1): + """Read and return up to size bytes, where size is an int. + + If the argument is omitted, None, or negative, reads and + returns all data until EOF. + + If the argument is positive, and the underlying raw stream is + not 'interactive', multiple raw reads may be issued to satisfy + the byte count (unless EOF is reached first). But for + interactive raw streams (XXX and for pipes?), at most one raw + read will be issued, and a short result does not imply that + EOF is imminent. + + Returns an empty bytes array on EOF. + + Raises BlockingIOError if the underlying raw stream has no + data at the moment. + """ + self._unsupported("read") + + def read1(self, size=-1): + """Read up to size bytes with at most one read() system call, + where size is an int. + """ + self._unsupported("read1") + + def readinto(self, b): + """Read bytes into a pre-allocated bytes-like object b. + + Like read(), this may issue multiple reads to the underlying raw + stream, unless the latter is 'interactive'. + + Returns an int representing the number of bytes read (0 for EOF). + + Raises BlockingIOError if the underlying raw stream has no + data at the moment. + """ + + return self._readinto(b, read1=False) + + def readinto1(self, b): + """Read bytes into buffer *b*, using at most one system call + + Returns an int representing the number of bytes read (0 for EOF). + + Raises BlockingIOError if the underlying raw stream has no + data at the moment. + """ + + return self._readinto(b, read1=True) + + def _readinto(self, b, read1): + if not isinstance(b, memoryview): + b = memoryview(b) + b = b.cast('B') + + if read1: + data = self.read1(len(b)) + else: + data = self.read(len(b)) + n = len(data) + + b[:n] = data + + return n + + def write(self, b): + """Write the given bytes buffer to the IO stream. + + Return the number of bytes written, which is always the length of b + in bytes. + + Raises BlockingIOError if the buffer is full and the + underlying raw stream cannot accept more data at the moment. + """ + self._unsupported("write") + + def detach(self): + """ + Separate the underlying raw stream from the buffer and return it. + + After the raw stream has been detached, the buffer is in an unusable + state. + """ + self._unsupported("detach") + +io.BufferedIOBase.register(BufferedIOBase) + + +class _BufferedIOMixin(BufferedIOBase): + + """A mixin implementation of BufferedIOBase with an underlying raw stream. + + This passes most requests on to the underlying raw stream. It + does *not* provide implementations of read(), readinto() or + write(). + """ + + def __init__(self, raw): + self._raw = raw + + ### Positioning ### + + def seek(self, pos, whence=0): + new_position = self.raw.seek(pos, whence) + if new_position < 0: + raise OSError("seek() returned an invalid position") + return new_position + + def tell(self): + pos = self.raw.tell() + if pos < 0: + raise OSError("tell() returned an invalid position") + return pos + + def truncate(self, pos=None): + self._checkClosed() + self._checkWritable() + + # Flush the stream. We're mixing buffered I/O with lower-level I/O, + # and a flush may be necessary to synch both views of the current + # file state. + self.flush() + + if pos is None: + pos = self.tell() + # XXX: Should seek() be used, instead of passing the position + # XXX directly to truncate? + return self.raw.truncate(pos) + + ### Flush and close ### + + def flush(self): + if self.closed: + raise ValueError("flush on closed file") + self.raw.flush() + + def close(self): + if self.raw is not None and not self.closed: + try: + # may raise BlockingIOError or BrokenPipeError etc + self.flush() + finally: + self.raw.close() + + def detach(self): + if self.raw is None: + raise ValueError("raw stream already detached") + self.flush() + raw = self._raw + self._raw = None + return raw + + ### Inquiries ### + + def seekable(self): + return self.raw.seekable() + + @property + def raw(self): + return self._raw + + @property + def closed(self): + return self.raw.closed + + @property + def name(self): + return self.raw.name + + @property + def mode(self): + return self.raw.mode + + def __getstate__(self): + raise TypeError(f"cannot pickle {self.__class__.__name__!r} object") + + def __repr__(self): + modname = self.__class__.__module__ + clsname = self.__class__.__qualname__ + try: + name = self.name + except AttributeError: + return "<{}.{}>".format(modname, clsname) + else: + return "<{}.{} name={!r}>".format(modname, clsname, name) + + def _dealloc_warn(self, source): + if dealloc_warn := getattr(self.raw, "_dealloc_warn", None): + dealloc_warn(source) + + ### Lower-level APIs ### + + def fileno(self): + return self.raw.fileno() + + def isatty(self): + return self.raw.isatty() + + +class BytesIO(BufferedIOBase): + + """Buffered I/O implementation using an in-memory bytes buffer.""" + + # Initialize _buffer as soon as possible since it's used by __del__() + # which calls close() + _buffer = None + + def __init__(self, initial_bytes=None): + buf = bytearray() + if initial_bytes is not None: + buf += initial_bytes + self._buffer = buf + self._pos = 0 + + def __getstate__(self): + if self.closed: + raise ValueError("__getstate__ on closed file") + return self.__dict__.copy() + + def getvalue(self): + """Return the bytes value (contents) of the buffer + """ + if self.closed: + raise ValueError("getvalue on closed file") + return bytes(self._buffer) + + def getbuffer(self): + """Return a readable and writable view of the buffer. + """ + if self.closed: + raise ValueError("getbuffer on closed file") + return memoryview(self._buffer) + + def close(self): + if self._buffer is not None: + self._buffer.clear() + super().close() + + def read(self, size=-1): + if self.closed: + raise ValueError("read from closed file") + if size is None: + size = -1 + else: + try: + size_index = size.__index__ + except AttributeError: + raise TypeError(f"{size!r} is not an integer") + else: + size = size_index() + if size < 0: + size = len(self._buffer) + if len(self._buffer) <= self._pos: + return b"" + newpos = min(len(self._buffer), self._pos + size) + b = self._buffer[self._pos : newpos] + self._pos = newpos + return bytes(b) + + def read1(self, size=-1): + """This is the same as read. + """ + return self.read(size) + + def write(self, b): + if isinstance(b, str): + raise TypeError("can't write str to binary stream") + with memoryview(b) as view: + if self.closed: + raise ValueError("write to closed file") + + n = view.nbytes # Size of any bytes-like object + if n == 0: + return 0 + + pos = self._pos + if pos > len(self._buffer): + # Pad buffer to pos with null bytes. + self._buffer.resize(pos) + self._buffer[pos:pos + n] = view + self._pos += n + return n + + def seek(self, pos, whence=0): + if self.closed: + raise ValueError("seek on closed file") + try: + pos_index = pos.__index__ + except AttributeError: + raise TypeError(f"{pos!r} is not an integer") + else: + pos = pos_index() + if whence == 0: + if pos < 0: + raise ValueError("negative seek position %r" % (pos,)) + self._pos = pos + elif whence == 1: + self._pos = max(0, self._pos + pos) + elif whence == 2: + self._pos = max(0, len(self._buffer) + pos) + else: + raise ValueError("unsupported whence value") + return self._pos + + def tell(self): + if self.closed: + raise ValueError("tell on closed file") + return self._pos + + def truncate(self, pos=None): + if self.closed: + raise ValueError("truncate on closed file") + if pos is None: + pos = self._pos + else: + try: + pos_index = pos.__index__ + except AttributeError: + raise TypeError(f"{pos!r} is not an integer") + else: + pos = pos_index() + if pos < 0: + raise ValueError("negative truncate position %r" % (pos,)) + del self._buffer[pos:] + return pos + + def readable(self): + if self.closed: + raise ValueError("I/O operation on closed file.") + return True + + def writable(self): + if self.closed: + raise ValueError("I/O operation on closed file.") + return True + + def seekable(self): + if self.closed: + raise ValueError("I/O operation on closed file.") + return True + + +class BufferedReader(_BufferedIOMixin): + + """BufferedReader(raw[, buffer_size]) + + A buffer for a readable, sequential BaseRawIO object. + + The constructor creates a BufferedReader for the given readable raw + stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE + is used. + """ + + def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE): + """Create a new buffered reader using the given readable raw IO object. + """ + if not raw.readable(): + raise OSError('"raw" argument must be readable.') + + _BufferedIOMixin.__init__(self, raw) + if buffer_size <= 0: + raise ValueError("invalid buffer size") + self.buffer_size = buffer_size + self._reset_read_buf() + self._read_lock = Lock() + + def readable(self): + return self.raw.readable() + + def _reset_read_buf(self): + self._read_buf = b"" + self._read_pos = 0 + + def read(self, size=None): + """Read size bytes. + + Returns exactly size bytes of data unless the underlying raw IO + stream reaches EOF or if the call would block in non-blocking + mode. If size is negative, read until EOF or until read() would + block. + """ + if size is not None and size < -1: + raise ValueError("invalid number of bytes to read") + with self._read_lock: + return self._read_unlocked(size) + + def _read_unlocked(self, n=None): + nodata_val = b"" + empty_values = (b"", None) + buf = self._read_buf + pos = self._read_pos + + # Special case for when the number of bytes to read is unspecified. + if n is None or n == -1: + self._reset_read_buf() + if hasattr(self.raw, 'readall'): + chunk = self.raw.readall() + if chunk is None: + return buf[pos:] or None + else: + return buf[pos:] + chunk + chunks = [buf[pos:]] # Strip the consumed bytes. + current_size = 0 + while True: + # Read until EOF or until read() would block. + chunk = self.raw.read() + if chunk in empty_values: + nodata_val = chunk + break + current_size += len(chunk) + chunks.append(chunk) + return b"".join(chunks) or nodata_val + + # The number of bytes to read is specified, return at most n bytes. + avail = len(buf) - pos # Length of the available buffered data. + if n <= avail: + # Fast path: the data to read is fully buffered. + self._read_pos += n + return buf[pos:pos+n] + # Slow path: read from the stream until enough bytes are read, + # or until an EOF occurs or until read() would block. + chunks = [buf[pos:]] + wanted = max(self.buffer_size, n) + while avail < n: + chunk = self.raw.read(wanted) + if chunk in empty_values: + nodata_val = chunk + break + avail += len(chunk) + chunks.append(chunk) + # n is more than avail only when an EOF occurred or when + # read() would have blocked. + n = min(n, avail) + out = b"".join(chunks) + self._read_buf = out[n:] # Save the extra data in the buffer. + self._read_pos = 0 + return out[:n] if out else nodata_val + + def peek(self, size=0): + """Returns buffered bytes without advancing the position. + + The argument indicates a desired minimal number of bytes; we + do at most one raw read to satisfy it. We never return more + than self.buffer_size. + """ + self._checkClosed("peek of closed file") + with self._read_lock: + return self._peek_unlocked(size) + + def _peek_unlocked(self, n=0): + want = min(n, self.buffer_size) + have = len(self._read_buf) - self._read_pos + if have < want or have <= 0: + to_read = self.buffer_size - have + current = self.raw.read(to_read) + if current: + self._read_buf = self._read_buf[self._read_pos:] + current + self._read_pos = 0 + return self._read_buf[self._read_pos:] + + def read1(self, size=-1): + """Reads up to size bytes, with at most one read() system call.""" + # Returns up to size bytes. If at least one byte is buffered, we + # only return buffered bytes. Otherwise, we do one raw read. + self._checkClosed("read of closed file") + if size < 0: + size = self.buffer_size + if size == 0: + return b"" + with self._read_lock: + self._peek_unlocked(1) + return self._read_unlocked( + min(size, len(self._read_buf) - self._read_pos)) + + # Implementing readinto() and readinto1() is not strictly necessary (we + # could rely on the base class that provides an implementation in terms of + # read() and read1()). We do it anyway to keep the _pyio implementation + # similar to the io implementation (which implements the methods for + # performance reasons). + def _readinto(self, buf, read1): + """Read data into *buf* with at most one system call.""" + + self._checkClosed("readinto of closed file") + + # Need to create a memoryview object of type 'b', otherwise + # we may not be able to assign bytes to it, and slicing it + # would create a new object. + if not isinstance(buf, memoryview): + buf = memoryview(buf) + if buf.nbytes == 0: + return 0 + buf = buf.cast('B') + + written = 0 + with self._read_lock: + while written < len(buf): + + # First try to read from internal buffer + avail = min(len(self._read_buf) - self._read_pos, len(buf)) + if avail: + buf[written:written+avail] = \ + self._read_buf[self._read_pos:self._read_pos+avail] + self._read_pos += avail + written += avail + if written == len(buf): + break + + # If remaining space in callers buffer is larger than + # internal buffer, read directly into callers buffer + if len(buf) - written > self.buffer_size: + n = self.raw.readinto(buf[written:]) + if not n: + break # eof + written += n + + # Otherwise refill internal buffer - unless we're + # in read1 mode and already got some data + elif not (read1 and written): + if not self._peek_unlocked(1): + break # eof + + # In readinto1 mode, return as soon as we have some data + if read1 and written: + break + + return written + + def tell(self): + # GH-95782: Keep return value non-negative + return max(_BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos, 0) + + def seek(self, pos, whence=0): + if whence not in valid_seek_flags: + raise ValueError("invalid whence value") + self._checkClosed("seek of closed file") + with self._read_lock: + if whence == 1: + pos -= len(self._read_buf) - self._read_pos + pos = _BufferedIOMixin.seek(self, pos, whence) + self._reset_read_buf() + return pos + +class BufferedWriter(_BufferedIOMixin): + + """A buffer for a writeable sequential RawIO object. + + The constructor creates a BufferedWriter for the given writeable raw + stream. If the buffer_size is not given, it defaults to + DEFAULT_BUFFER_SIZE. + """ + + def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE): + if not raw.writable(): + raise OSError('"raw" argument must be writable.') + + _BufferedIOMixin.__init__(self, raw) + if buffer_size <= 0: + raise ValueError("invalid buffer size") + self.buffer_size = buffer_size + self._write_buf = bytearray() + self._write_lock = Lock() + + def writable(self): + return self.raw.writable() + + def write(self, b): + if isinstance(b, str): + raise TypeError("can't write str to binary stream") + with self._write_lock: + if self.closed: + raise ValueError("write to closed file") + # XXX we can implement some more tricks to try and avoid + # partial writes + if len(self._write_buf) > self.buffer_size: + # We're full, so let's pre-flush the buffer. (This may + # raise BlockingIOError with characters_written == 0.) + self._flush_unlocked() + before = len(self._write_buf) + self._write_buf.extend(b) + written = len(self._write_buf) - before + if len(self._write_buf) > self.buffer_size: + try: + self._flush_unlocked() + except BlockingIOError as e: + if len(self._write_buf) > self.buffer_size: + # We've hit the buffer_size. We have to accept a partial + # write and cut back our buffer. + overage = len(self._write_buf) - self.buffer_size + written -= overage + self._write_buf = self._write_buf[:self.buffer_size] + raise BlockingIOError(e.errno, e.strerror, written) + return written + + def truncate(self, pos=None): + with self._write_lock: + self._flush_unlocked() + if pos is None: + pos = self.raw.tell() + return self.raw.truncate(pos) + + def flush(self): + with self._write_lock: + self._flush_unlocked() + + def _flush_unlocked(self): + if self.closed: + raise ValueError("flush on closed file") + while self._write_buf: + try: + n = self.raw.write(self._write_buf) + except BlockingIOError: + raise RuntimeError("self.raw should implement RawIOBase: it " + "should not raise BlockingIOError") + if n is None: + raise BlockingIOError( + errno.EAGAIN, + "write could not complete without blocking", 0) + if n > len(self._write_buf) or n < 0: + raise OSError("write() returned incorrect number of bytes") + del self._write_buf[:n] + + def tell(self): + return _BufferedIOMixin.tell(self) + len(self._write_buf) + + def seek(self, pos, whence=0): + if whence not in valid_seek_flags: + raise ValueError("invalid whence value") + with self._write_lock: + self._flush_unlocked() + return _BufferedIOMixin.seek(self, pos, whence) + + def close(self): + with self._write_lock: + if self.raw is None or self.closed: + return + # We have to release the lock and call self.flush() (which will + # probably just re-take the lock) in case flush has been overridden in + # a subclass or the user set self.flush to something. This is the same + # behavior as the C implementation. + try: + # may raise BlockingIOError or BrokenPipeError etc + self.flush() + finally: + with self._write_lock: + self.raw.close() + + +class BufferedRWPair(BufferedIOBase): + + """A buffered reader and writer object together. + + A buffered reader object and buffered writer object put together to + form a sequential IO object that can read and write. This is typically + used with a socket or two-way pipe. + + reader and writer are RawIOBase objects that are readable and + writeable respectively. If the buffer_size is omitted it defaults to + DEFAULT_BUFFER_SIZE. + """ + + # XXX The usefulness of this (compared to having two separate IO + # objects) is questionable. + + def __init__(self, reader, writer, buffer_size=DEFAULT_BUFFER_SIZE): + """Constructor. + + The arguments are two RawIO instances. + """ + if not reader.readable(): + raise OSError('"reader" argument must be readable.') + + if not writer.writable(): + raise OSError('"writer" argument must be writable.') + + self.reader = BufferedReader(reader, buffer_size) + self.writer = BufferedWriter(writer, buffer_size) + + def read(self, size=-1): + if size is None: + size = -1 + return self.reader.read(size) + + def readinto(self, b): + return self.reader.readinto(b) + + def write(self, b): + return self.writer.write(b) + + def peek(self, size=0): + return self.reader.peek(size) + + def read1(self, size=-1): + return self.reader.read1(size) + + def readinto1(self, b): + return self.reader.readinto1(b) + + def readable(self): + return self.reader.readable() + + def writable(self): + return self.writer.writable() + + def flush(self): + return self.writer.flush() + + def close(self): + try: + self.writer.close() + finally: + self.reader.close() + + def isatty(self): + return self.reader.isatty() or self.writer.isatty() + + @property + def closed(self): + return self.writer.closed + + +class BufferedRandom(BufferedWriter, BufferedReader): + + """A buffered interface to random access streams. + + The constructor creates a reader and writer for a seekable stream, + raw, given in the first argument. If the buffer_size is omitted it + defaults to DEFAULT_BUFFER_SIZE. + """ + + def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE): + raw._checkSeekable() + BufferedReader.__init__(self, raw, buffer_size) + BufferedWriter.__init__(self, raw, buffer_size) + + def seek(self, pos, whence=0): + if whence not in valid_seek_flags: + raise ValueError("invalid whence value") + self.flush() + if self._read_buf: + # Undo read ahead. + with self._read_lock: + self.raw.seek(self._read_pos - len(self._read_buf), 1) + # First do the raw seek, then empty the read buffer, so that + # if the raw seek fails, we don't lose buffered data forever. + pos = self.raw.seek(pos, whence) + with self._read_lock: + self._reset_read_buf() + if pos < 0: + raise OSError("seek() returned invalid position") + return pos + + def tell(self): + if self._write_buf: + return BufferedWriter.tell(self) + else: + return BufferedReader.tell(self) + + def truncate(self, pos=None): + if pos is None: + pos = self.tell() + # Use seek to flush the read buffer. + return BufferedWriter.truncate(self, pos) + + def read(self, size=None): + if size is None: + size = -1 + self.flush() + return BufferedReader.read(self, size) + + def readinto(self, b): + self.flush() + return BufferedReader.readinto(self, b) + + def peek(self, size=0): + self.flush() + return BufferedReader.peek(self, size) + + def read1(self, size=-1): + self.flush() + return BufferedReader.read1(self, size) + + def readinto1(self, b): + self.flush() + return BufferedReader.readinto1(self, b) + + def write(self, b): + if self._read_buf: + # Undo readahead + with self._read_lock: + self.raw.seek(self._read_pos - len(self._read_buf), 1) + self._reset_read_buf() + return BufferedWriter.write(self, b) + + +def _new_buffersize(bytes_read): + # Parallels _io/fileio.c new_buffersize + if bytes_read > 65536: + addend = bytes_read >> 3 + else: + addend = 256 + bytes_read + if addend < DEFAULT_BUFFER_SIZE: + addend = DEFAULT_BUFFER_SIZE + return bytes_read + addend + + +class FileIO(RawIOBase): + _fd = -1 + _created = False + _readable = False + _writable = False + _appending = False + _seekable = None + _closefd = True + + def __init__(self, file, mode='r', closefd=True, opener=None): + """Open a file. The mode can be 'r' (default), 'w', 'x' or 'a' for reading, + writing, exclusive creation or appending. The file will be created if it + doesn't exist when opened for writing or appending; it will be truncated + when opened for writing. A FileExistsError will be raised if it already + exists when opened for creating. Opening a file for creating implies + writing so this mode behaves in a similar way to 'w'. Add a '+' to the mode + to allow simultaneous reading and writing. A custom opener can be used by + passing a callable as *opener*. The underlying file descriptor for the file + object is then obtained by calling opener with (*name*, *flags*). + *opener* must return an open file descriptor (passing os.open as *opener* + results in functionality similar to passing None). + """ + if self._fd >= 0: + # Have to close the existing file first. + self._stat_atopen = None + try: + if self._closefd: + os.close(self._fd) + finally: + self._fd = -1 + + if isinstance(file, float): + raise TypeError('integer argument expected, got float') + if isinstance(file, int): + if isinstance(file, bool): + import warnings + warnings.warn("bool is used as a file descriptor", + RuntimeWarning, stacklevel=2) + file = int(file) + fd = file + if fd < 0: + raise ValueError('negative file descriptor') + else: + fd = -1 + + if not isinstance(mode, str): + raise TypeError('invalid mode: %s' % (mode,)) + if not set(mode) <= set('xrwab+'): + raise ValueError('invalid mode: %s' % (mode,)) + if sum(c in 'rwax' for c in mode) != 1 or mode.count('+') > 1: + raise ValueError('Must have exactly one of create/read/write/append ' + 'mode and at most one plus') + + if 'x' in mode: + self._created = True + self._writable = True + flags = os.O_EXCL | os.O_CREAT + elif 'r' in mode: + self._readable = True + flags = 0 + elif 'w' in mode: + self._writable = True + flags = os.O_CREAT | os.O_TRUNC + elif 'a' in mode: + self._writable = True + self._appending = True + flags = os.O_APPEND | os.O_CREAT + + if '+' in mode: + self._readable = True + self._writable = True + + if self._readable and self._writable: + flags |= os.O_RDWR + elif self._readable: + flags |= os.O_RDONLY + else: + flags |= os.O_WRONLY + + flags |= getattr(os, 'O_BINARY', 0) + + noinherit_flag = (getattr(os, 'O_NOINHERIT', 0) or + getattr(os, 'O_CLOEXEC', 0)) + flags |= noinherit_flag + + owned_fd = None + try: + if fd < 0: + if not closefd: + raise ValueError('Cannot use closefd=False with file name') + if opener is None: + fd = os.open(file, flags, 0o666) + else: + fd = opener(file, flags) + if not isinstance(fd, int): + raise TypeError('expected integer from opener') + if fd < 0: + # bpo-27066: Raise a ValueError for bad value. + raise ValueError(f'opener returned {fd}') + owned_fd = fd + if not noinherit_flag: + os.set_inheritable(fd, False) + + self._closefd = closefd + self._stat_atopen = os.fstat(fd) + try: + if stat.S_ISDIR(self._stat_atopen.st_mode): + raise IsADirectoryError(errno.EISDIR, + os.strerror(errno.EISDIR), file) + except AttributeError: + # Ignore the AttributeError if stat.S_ISDIR or errno.EISDIR + # don't exist. + pass + + if _setmode: + # don't translate newlines (\r\n <=> \n) + _setmode(fd, os.O_BINARY) + + self.name = file + if self._appending: + # For consistent behaviour, we explicitly seek to the + # end of file (otherwise, it might be done only on the + # first write()). + try: + os.lseek(fd, 0, SEEK_END) + except OSError as e: + if e.errno != errno.ESPIPE: + raise + except: + self._stat_atopen = None + if owned_fd is not None: + os.close(owned_fd) + raise + self._fd = fd + + def _dealloc_warn(self, source): + if self._fd >= 0 and self._closefd and not self.closed: + import warnings + warnings.warn(f'unclosed file {source!r}', ResourceWarning, + stacklevel=2, source=self) + + def __getstate__(self): + raise TypeError(f"cannot pickle {self.__class__.__name__!r} object") + + def __repr__(self): + class_name = '%s.%s' % (self.__class__.__module__, + self.__class__.__qualname__) + if self.closed: + return '<%s [closed]>' % class_name + try: + name = self.name + except AttributeError: + return ('<%s fd=%d mode=%r closefd=%r>' % + (class_name, self._fd, self.mode, self._closefd)) + else: + return ('<%s name=%r mode=%r closefd=%r>' % + (class_name, name, self.mode, self._closefd)) + + @property + def _blksize(self): + if self._stat_atopen is None: + return DEFAULT_BUFFER_SIZE + + blksize = getattr(self._stat_atopen, "st_blksize", 0) + # WASI sets blsize to 0 + if not blksize: + return DEFAULT_BUFFER_SIZE + return blksize + + def _checkReadable(self): + if not self._readable: + raise UnsupportedOperation('File not open for reading') + + def _checkWritable(self, msg=None): + if not self._writable: + raise UnsupportedOperation('File not open for writing') + + def read(self, size=None): + """Read at most size bytes, returned as bytes. + + If size is less than 0, read all bytes in the file making + multiple read calls. See ``FileIO.readall``. + + Attempts to make only one system call, retrying only per + PEP 475 (EINTR). This means less data may be returned than + requested. + + In non-blocking mode, returns None if no data is available. + Return an empty bytes object at EOF. + """ + self._checkClosed() + self._checkReadable() + if size is None or size < 0: + return self.readall() + try: + return os.read(self._fd, size) + except BlockingIOError: + return None + + def readall(self): + """Read all data from the file, returned as bytes. + + Reads until either there is an error or read() returns size 0 + (indicates EOF). If the file is already at EOF, returns an + empty bytes object. + + In non-blocking mode, returns as much data as could be read + before EAGAIN. If no data is available (EAGAIN is returned + before bytes are read) returns None. + """ + self._checkClosed() + self._checkReadable() + if self._stat_atopen is None or self._stat_atopen.st_size <= 0: + bufsize = DEFAULT_BUFFER_SIZE + else: + # In order to detect end of file, need a read() of at least 1 + # byte which returns size 0. Oversize the buffer by 1 byte so the + # I/O can be completed with two read() calls (one for all data, one + # for EOF) without needing to resize the buffer. + bufsize = self._stat_atopen.st_size + 1 + + if self._stat_atopen.st_size > 65536: + try: + pos = os.lseek(self._fd, 0, SEEK_CUR) + if self._stat_atopen.st_size >= pos: + bufsize = self._stat_atopen.st_size - pos + 1 + except OSError: + pass + + result = bytearray(bufsize) + bytes_read = 0 + try: + while n := os.readinto(self._fd, memoryview(result)[bytes_read:]): + bytes_read += n + if bytes_read >= len(result): + result.resize(_new_buffersize(bytes_read)) + except BlockingIOError: + if not bytes_read: + return None + + assert len(result) - bytes_read >= 1, \ + "os.readinto buffer size 0 will result in erroneous EOF / returns 0" + result.resize(bytes_read) + return bytes(result) + + def readinto(self, buffer): + """Same as RawIOBase.readinto().""" + self._checkClosed() + self._checkReadable() + try: + return os.readinto(self._fd, buffer) + except BlockingIOError: + return None + + def write(self, b): + """Write bytes b to file, return number written. + + Only makes one system call, so not all of the data may be written. + The number of bytes actually written is returned. In non-blocking mode, + returns None if the write would block. + """ + self._checkClosed() + self._checkWritable() + try: + return os.write(self._fd, b) + except BlockingIOError: + return None + + def seek(self, pos, whence=SEEK_SET): + """Move to new file position. + + Argument offset is a byte count. Optional argument whence defaults to + SEEK_SET or 0 (offset from start of file, offset should be >= 0); other values + are SEEK_CUR or 1 (move relative to current position, positive or negative), + and SEEK_END or 2 (move relative to end of file, usually negative, although + many platforms allow seeking beyond the end of a file). + + Note that not all file objects are seekable. + """ + if isinstance(pos, float): + raise TypeError('an integer is required') + self._checkClosed() + return os.lseek(self._fd, pos, whence) + + def tell(self): + """tell() -> int. Current file position. + + Can raise OSError for non seekable files.""" + self._checkClosed() + return os.lseek(self._fd, 0, SEEK_CUR) + + def truncate(self, size=None): + """Truncate the file to at most size bytes. + + Size defaults to the current file position, as returned by tell(). + The current file position is changed to the value of size. + """ + self._checkClosed() + self._checkWritable() + if size is None: + size = self.tell() + os.ftruncate(self._fd, size) + self._stat_atopen = None + return size + + def close(self): + """Close the file. + + A closed file cannot be used for further I/O operations. close() may be + called more than once without error. + """ + if not self.closed: + self._stat_atopen = None + try: + if self._closefd and self._fd >= 0: + os.close(self._fd) + finally: + super().close() + + def seekable(self): + """True if file supports random-access.""" + self._checkClosed() + if self._seekable is None: + try: + self.tell() + except OSError: + self._seekable = False + else: + self._seekable = True + return self._seekable + + def readable(self): + """True if file was opened in a read mode.""" + self._checkClosed() + return self._readable + + def writable(self): + """True if file was opened in a write mode.""" + self._checkClosed() + return self._writable + + def fileno(self): + """Return the underlying file descriptor (an integer).""" + self._checkClosed() + return self._fd + + def isatty(self): + """True if the file is connected to a TTY device.""" + self._checkClosed() + return os.isatty(self._fd) + + def _isatty_open_only(self): + """Checks whether the file is a TTY using an open-only optimization. + + TTYs are always character devices. If the interpreter knows a file is + not a character device when it would call ``isatty``, can skip that + call. Inside ``open()`` there is a fresh stat result that contains that + information. Use the stat result to skip a system call. Outside of that + context TOCTOU issues (the fd could be arbitrarily modified by + surrounding code). + """ + if (self._stat_atopen is not None + and not stat.S_ISCHR(self._stat_atopen.st_mode)): + return False + return os.isatty(self._fd) + + @property + def closefd(self): + """True if the file descriptor will be closed by close().""" + return self._closefd + + @property + def mode(self): + """String giving the file mode""" + if self._created: + if self._readable: + return 'xb+' + else: + return 'xb' + elif self._appending: + if self._readable: + return 'ab+' + else: + return 'ab' + elif self._readable: + if self._writable: + return 'rb+' + else: + return 'rb' + else: + return 'wb' + + +class TextIOBase(IOBase): + + """Base class for text I/O. + + This class provides a character and line based interface to stream + I/O. + """ + + def read(self, size=-1): + """Read at most size characters from stream, where size is an int. + + Read from underlying buffer until we have size characters or we hit EOF. + If size is negative or omitted, read until EOF. + + Returns a string. + """ + self._unsupported("read") + + def write(self, s): + """Write string s to stream and returning an int.""" + self._unsupported("write") + + def truncate(self, pos=None): + """Truncate size to pos, where pos is an int.""" + self._unsupported("truncate") + + def readline(self): + """Read until newline or EOF. + + Returns an empty string if EOF is hit immediately. + """ + self._unsupported("readline") + + def detach(self): + """ + Separate the underlying buffer from the TextIOBase and return it. + + After the underlying buffer has been detached, the TextIO is in an + unusable state. + """ + self._unsupported("detach") + + @property + def encoding(self): + """Subclasses should override.""" + return None + + @property + def newlines(self): + """Line endings translated so far. + + Only line endings translated during reading are considered. + + Subclasses should override. + """ + return None + + @property + def errors(self): + """Error setting of the decoder or encoder. + + Subclasses should override.""" + return None + +io.TextIOBase.register(TextIOBase) + + +class IncrementalNewlineDecoder(codecs.IncrementalDecoder): + r"""Codec used when reading a file in universal newlines mode. It wraps + another incremental decoder, translating \r\n and \r into \n. It also + records the types of newlines encountered. When used with + translate=False, it ensures that the newline sequence is returned in + one piece. + """ + def __init__(self, decoder, translate, errors='strict'): + codecs.IncrementalDecoder.__init__(self, errors=errors) + self.translate = translate + self.decoder = decoder + self.seennl = 0 + self.pendingcr = False + + def decode(self, input, final=False): + # decode input (with the eventual \r from a previous pass) + if self.decoder is None: + output = input + else: + output = self.decoder.decode(input, final=final) + if self.pendingcr and (output or final): + output = "\r" + output + self.pendingcr = False + + # retain last \r even when not translating data: + # then readline() is sure to get \r\n in one pass + if output.endswith("\r") and not final: + output = output[:-1] + self.pendingcr = True + + # Record which newlines are read + crlf = output.count('\r\n') + cr = output.count('\r') - crlf + lf = output.count('\n') - crlf + self.seennl |= (lf and self._LF) | (cr and self._CR) \ + | (crlf and self._CRLF) + + if self.translate: + if crlf: + output = output.replace("\r\n", "\n") + if cr: + output = output.replace("\r", "\n") + + return output + + def getstate(self): + if self.decoder is None: + buf = b"" + flag = 0 + else: + buf, flag = self.decoder.getstate() + flag <<= 1 + if self.pendingcr: + flag |= 1 + return buf, flag + + def setstate(self, state): + buf, flag = state + self.pendingcr = bool(flag & 1) + if self.decoder is not None: + self.decoder.setstate((buf, flag >> 1)) + + def reset(self): + self.seennl = 0 + self.pendingcr = False + if self.decoder is not None: + self.decoder.reset() + + _LF = 1 + _CR = 2 + _CRLF = 4 + + @property + def newlines(self): + return (None, + "\n", + "\r", + ("\r", "\n"), + "\r\n", + ("\n", "\r\n"), + ("\r", "\r\n"), + ("\r", "\n", "\r\n") + )[self.seennl] + + +class TextIOWrapper(TextIOBase): + + r"""Character and line based layer over a BufferedIOBase object, buffer. + + encoding gives the name of the encoding that the stream will be + decoded or encoded with. It defaults to locale.getencoding(). + + errors determines the strictness of encoding and decoding (see the + codecs.register) and defaults to "strict". + + newline can be None, '', '\n', '\r', or '\r\n'. It controls the + handling of line endings. If it is None, universal newlines is + enabled. With this enabled, on input, the lines endings '\n', '\r', + or '\r\n' are translated to '\n' before being returned to the + caller. Conversely, on output, '\n' is translated to the system + default line separator, os.linesep. If newline is any other of its + legal values, that newline becomes the newline when the file is read + and it is returned untranslated. On output, '\n' is converted to the + newline. + + If line_buffering is True, a call to flush is implied when a call to + write contains a newline character. + """ + + _CHUNK_SIZE = 2048 + + # Initialize _buffer as soon as possible since it's used by __del__() + # which calls close() + _buffer = None + + # The write_through argument has no effect here since this + # implementation always writes through. The argument is present only + # so that the signature can match the signature of the C version. + def __init__(self, buffer, encoding=None, errors=None, newline=None, + line_buffering=False, write_through=False): + self._check_newline(newline) + encoding = text_encoding(encoding) + + if encoding == "locale": + encoding = self._get_locale_encoding() + + if not isinstance(encoding, str): + raise ValueError("invalid encoding: %r" % encoding) + + if not codecs.lookup(encoding)._is_text_encoding: + msg = "%r is not a text encoding" + raise LookupError(msg % encoding) + + if errors is None: + errors = "strict" + else: + if not isinstance(errors, str): + raise ValueError("invalid errors: %r" % errors) + if _CHECK_ERRORS: + codecs.lookup_error(errors) + + self._buffer = buffer + self._decoded_chars = '' # buffer for text returned from decoder + self._decoded_chars_used = 0 # offset into _decoded_chars for read() + self._snapshot = None # info for reconstructing decoder state + self._seekable = self._telling = self.buffer.seekable() + self._has_read1 = hasattr(self.buffer, 'read1') + self._configure(encoding, errors, newline, + line_buffering, write_through) + + def _check_newline(self, newline): + if newline is not None and not isinstance(newline, str): + raise TypeError("illegal newline type: %r" % (type(newline),)) + if newline not in (None, "", "\n", "\r", "\r\n"): + raise ValueError("illegal newline value: %r" % (newline,)) + + def _configure(self, encoding=None, errors=None, newline=None, + line_buffering=False, write_through=False): + self._encoding = encoding + self._errors = errors + self._encoder = None + self._decoder = None + self._b2cratio = 0.0 + + self._readuniversal = not newline + self._readtranslate = newline is None + self._readnl = newline + self._writetranslate = newline != '' + self._writenl = newline or os.linesep + + self._line_buffering = line_buffering + self._write_through = write_through + + # don't write a BOM in the middle of a file + if self._seekable and self.writable(): + position = self.buffer.tell() + if position != 0: + try: + self._get_encoder().setstate(0) + except LookupError: + # Sometimes the encoder doesn't exist + pass + + # self._snapshot is either None, or a tuple (dec_flags, next_input) + # where dec_flags is the second (integer) item of the decoder state + # and next_input is the chunk of input bytes that comes next after the + # snapshot point. We use this to reconstruct decoder states in tell(). + + # Naming convention: + # - "bytes_..." for integer variables that count input bytes + # - "chars_..." for integer variables that count decoded characters + + def __repr__(self): + result = "<{}.{}".format(self.__class__.__module__, + self.__class__.__qualname__) + try: + name = self.name + except AttributeError: + pass + else: + result += " name={0!r}".format(name) + try: + mode = self.mode + except AttributeError: + pass + else: + result += " mode={0!r}".format(mode) + return result + " encoding={0!r}>".format(self.encoding) + + @property + def encoding(self): + return self._encoding + + @property + def errors(self): + return self._errors + + @property + def line_buffering(self): + return self._line_buffering + + @property + def write_through(self): + return self._write_through + + @property + def buffer(self): + return self._buffer + + def reconfigure(self, *, + encoding=None, errors=None, newline=Ellipsis, + line_buffering=None, write_through=None): + """Reconfigure the text stream with new parameters. + + This also flushes the stream. + """ + if (self._decoder is not None + and (encoding is not None or errors is not None + or newline is not Ellipsis)): + raise UnsupportedOperation( + "It is not possible to set the encoding or newline of stream " + "after the first read") + + if errors is None: + if encoding is None: + errors = self._errors + else: + errors = 'strict' + elif not isinstance(errors, str): + raise TypeError("invalid errors: %r" % errors) + + if encoding is None: + encoding = self._encoding + else: + if not isinstance(encoding, str): + raise TypeError("invalid encoding: %r" % encoding) + if encoding == "locale": + encoding = self._get_locale_encoding() + + if newline is Ellipsis: + newline = self._readnl + self._check_newline(newline) + + if line_buffering is None: + line_buffering = self.line_buffering + if write_through is None: + write_through = self.write_through + + self.flush() + self._configure(encoding, errors, newline, + line_buffering, write_through) + + def seekable(self): + if self.closed: + raise ValueError("I/O operation on closed file.") + return self._seekable + + def readable(self): + return self.buffer.readable() + + def writable(self): + return self.buffer.writable() + + def flush(self): + self.buffer.flush() + self._telling = self._seekable + + def close(self): + if self.buffer is not None and not self.closed: + try: + self.flush() + finally: + self.buffer.close() + + @property + def closed(self): + return self.buffer.closed + + @property + def name(self): + return self.buffer.name + + def fileno(self): + return self.buffer.fileno() + + def isatty(self): + return self.buffer.isatty() + + def write(self, s): + 'Write data, where s is a str' + if self.closed: + raise ValueError("write to closed file") + if not isinstance(s, str): + raise TypeError("can't write %s to text stream" % + s.__class__.__name__) + length = len(s) + haslf = (self._writetranslate or self._line_buffering) and "\n" in s + if haslf and self._writetranslate and self._writenl != "\n": + s = s.replace("\n", self._writenl) + encoder = self._encoder or self._get_encoder() + # XXX What if we were just reading? + b = encoder.encode(s) + self.buffer.write(b) + if self._line_buffering and (haslf or "\r" in s): + self.flush() + if self._snapshot is not None: + self._set_decoded_chars('') + self._snapshot = None + if self._decoder: + self._decoder.reset() + return length + + def _get_encoder(self): + make_encoder = codecs.getincrementalencoder(self._encoding) + self._encoder = make_encoder(self._errors) + return self._encoder + + def _get_decoder(self): + make_decoder = codecs.getincrementaldecoder(self._encoding) + decoder = make_decoder(self._errors) + if self._readuniversal: + decoder = IncrementalNewlineDecoder(decoder, self._readtranslate) + self._decoder = decoder + return decoder + + # The following three methods implement an ADT for _decoded_chars. + # Text returned from the decoder is buffered here until the client + # requests it by calling our read() or readline() method. + def _set_decoded_chars(self, chars): + """Set the _decoded_chars buffer.""" + self._decoded_chars = chars + self._decoded_chars_used = 0 + + def _get_decoded_chars(self, n=None): + """Advance into the _decoded_chars buffer.""" + offset = self._decoded_chars_used + if n is None: + chars = self._decoded_chars[offset:] + else: + chars = self._decoded_chars[offset:offset + n] + self._decoded_chars_used += len(chars) + return chars + + def _get_locale_encoding(self): + try: + import locale + except ImportError: + # Importing locale may fail if Python is being built + return "utf-8" + else: + return locale.getencoding() + + def _rewind_decoded_chars(self, n): + """Rewind the _decoded_chars buffer.""" + if self._decoded_chars_used < n: + raise AssertionError("rewind decoded_chars out of bounds") + self._decoded_chars_used -= n + + def _read_chunk(self): + """ + Read and decode the next chunk of data from the BufferedReader. + """ + + # The return value is True unless EOF was reached. The decoded + # string is placed in self._decoded_chars (replacing its previous + # value). The entire input chunk is sent to the decoder, though + # some of it may remain buffered in the decoder, yet to be + # converted. + + if self._decoder is None: + raise ValueError("no decoder") + + if self._telling: + # To prepare for tell(), we need to snapshot a point in the + # file where the decoder's input buffer is empty. + + dec_buffer, dec_flags = self._decoder.getstate() + # Given this, we know there was a valid snapshot point + # len(dec_buffer) bytes ago with decoder state (b'', dec_flags). + + # Read a chunk, decode it, and put the result in self._decoded_chars. + if self._has_read1: + input_chunk = self.buffer.read1(self._CHUNK_SIZE) + else: + input_chunk = self.buffer.read(self._CHUNK_SIZE) + eof = not input_chunk + decoded_chars = self._decoder.decode(input_chunk, eof) + self._set_decoded_chars(decoded_chars) + if decoded_chars: + self._b2cratio = len(input_chunk) / len(self._decoded_chars) + else: + self._b2cratio = 0.0 + + if self._telling: + # At the snapshot point, len(dec_buffer) bytes before the read, + # the next input to be decoded is dec_buffer + input_chunk. + self._snapshot = (dec_flags, dec_buffer + input_chunk) + + return not eof + + def _pack_cookie(self, position, dec_flags=0, + bytes_to_feed=0, need_eof=False, chars_to_skip=0): + # The meaning of a tell() cookie is: seek to position, set the + # decoder flags to dec_flags, read bytes_to_feed bytes, feed them + # into the decoder with need_eof as the EOF flag, then skip + # chars_to_skip characters of the decoded result. For most simple + # decoders, tell() will often just give a byte offset in the file. + return (position | (dec_flags<<64) | (bytes_to_feed<<128) | + (chars_to_skip<<192) | bool(need_eof)<<256) + + def _unpack_cookie(self, bigint): + rest, position = divmod(bigint, 1<<64) + rest, dec_flags = divmod(rest, 1<<64) + rest, bytes_to_feed = divmod(rest, 1<<64) + need_eof, chars_to_skip = divmod(rest, 1<<64) + return position, dec_flags, bytes_to_feed, bool(need_eof), chars_to_skip + + def tell(self): + if not self._seekable: + raise UnsupportedOperation("underlying stream is not seekable") + if not self._telling: + raise OSError("telling position disabled by next() call") + self.flush() + position = self.buffer.tell() + decoder = self._decoder + if decoder is None or self._snapshot is None: + if self._decoded_chars: + # This should never happen. + raise AssertionError("pending decoded text") + return position + + # Skip backward to the snapshot point (see _read_chunk). + dec_flags, next_input = self._snapshot + position -= len(next_input) + + # How many decoded characters have been used up since the snapshot? + chars_to_skip = self._decoded_chars_used + if chars_to_skip == 0: + # We haven't moved from the snapshot point. + return self._pack_cookie(position, dec_flags) + + # Starting from the snapshot position, we will walk the decoder + # forward until it gives us enough decoded characters. + saved_state = decoder.getstate() + try: + # Fast search for an acceptable start point, close to our + # current pos. + # Rationale: calling decoder.decode() has a large overhead + # regardless of chunk size; we want the number of such calls to + # be O(1) in most situations (common decoders, sensible input). + # Actually, it will be exactly 1 for fixed-size codecs (all + # 8-bit codecs, also UTF-16 and UTF-32). + skip_bytes = int(self._b2cratio * chars_to_skip) + skip_back = 1 + assert skip_bytes <= len(next_input) + while skip_bytes > 0: + decoder.setstate((b'', dec_flags)) + # Decode up to temptative start point + n = len(decoder.decode(next_input[:skip_bytes])) + if n <= chars_to_skip: + b, d = decoder.getstate() + if not b: + # Before pos and no bytes buffered in decoder => OK + dec_flags = d + chars_to_skip -= n + break + # Skip back by buffered amount and reset heuristic + skip_bytes -= len(b) + skip_back = 1 + else: + # We're too far ahead, skip back a bit + skip_bytes -= skip_back + skip_back = skip_back * 2 + else: + skip_bytes = 0 + decoder.setstate((b'', dec_flags)) + + # Note our initial start point. + start_pos = position + skip_bytes + start_flags = dec_flags + if chars_to_skip == 0: + # We haven't moved from the start point. + return self._pack_cookie(start_pos, start_flags) + + # Feed the decoder one byte at a time. As we go, note the + # nearest "safe start point" before the current location + # (a point where the decoder has nothing buffered, so seek() + # can safely start from there and advance to this location). + bytes_fed = 0 + need_eof = False + # Chars decoded since `start_pos` + chars_decoded = 0 + for i in range(skip_bytes, len(next_input)): + bytes_fed += 1 + chars_decoded += len(decoder.decode(next_input[i:i+1])) + dec_buffer, dec_flags = decoder.getstate() + if not dec_buffer and chars_decoded <= chars_to_skip: + # Decoder buffer is empty, so this is a safe start point. + start_pos += bytes_fed + chars_to_skip -= chars_decoded + start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0 + if chars_decoded >= chars_to_skip: + break + else: + # We didn't get enough decoded data; signal EOF to get more. + chars_decoded += len(decoder.decode(b'', final=True)) + need_eof = True + if chars_decoded < chars_to_skip: + raise OSError("can't reconstruct logical file position") + + # The returned cookie corresponds to the last safe start point. + return self._pack_cookie( + start_pos, start_flags, bytes_fed, need_eof, chars_to_skip) + finally: + decoder.setstate(saved_state) + + def truncate(self, pos=None): + self.flush() + if pos is None: + pos = self.tell() + return self.buffer.truncate(pos) + + def detach(self): + if self.buffer is None: + raise ValueError("buffer is already detached") + self.flush() + buffer = self._buffer + self._buffer = None + return buffer + + def seek(self, cookie, whence=0): + def _reset_encoder(position): + """Reset the encoder (merely useful for proper BOM handling)""" + try: + encoder = self._encoder or self._get_encoder() + except LookupError: + # Sometimes the encoder doesn't exist + pass + else: + if position != 0: + encoder.setstate(0) + else: + encoder.reset() + + if self.closed: + raise ValueError("tell on closed file") + if not self._seekable: + raise UnsupportedOperation("underlying stream is not seekable") + if whence == SEEK_CUR: + if cookie != 0: + raise UnsupportedOperation("can't do nonzero cur-relative seeks") + # Seeking to the current position should attempt to + # sync the underlying buffer with the current position. + whence = 0 + cookie = self.tell() + elif whence == SEEK_END: + if cookie != 0: + raise UnsupportedOperation("can't do nonzero end-relative seeks") + self.flush() + position = self.buffer.seek(0, whence) + self._set_decoded_chars('') + self._snapshot = None + if self._decoder: + self._decoder.reset() + _reset_encoder(position) + return position + if whence != 0: + raise ValueError("unsupported whence (%r)" % (whence,)) + if cookie < 0: + raise ValueError("negative seek position %r" % (cookie,)) + self.flush() + + # The strategy of seek() is to go back to the safe start point + # and replay the effect of read(chars_to_skip) from there. + start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \ + self._unpack_cookie(cookie) + + # Seek back to the safe start point. + self.buffer.seek(start_pos) + self._set_decoded_chars('') + self._snapshot = None + + # Restore the decoder to its state from the safe start point. + if cookie == 0 and self._decoder: + self._decoder.reset() + elif self._decoder or dec_flags or chars_to_skip: + self._decoder = self._decoder or self._get_decoder() + self._decoder.setstate((b'', dec_flags)) + self._snapshot = (dec_flags, b'') + + if chars_to_skip: + # Just like _read_chunk, feed the decoder and save a snapshot. + input_chunk = self.buffer.read(bytes_to_feed) + self._set_decoded_chars( + self._decoder.decode(input_chunk, need_eof)) + self._snapshot = (dec_flags, input_chunk) + + # Skip chars_to_skip of the decoded characters. + if len(self._decoded_chars) < chars_to_skip: + raise OSError("can't restore logical file position") + self._decoded_chars_used = chars_to_skip + + _reset_encoder(cookie) + return cookie + + def read(self, size=None): + self._checkReadable() + if size is None: + size = -1 + else: + try: + size_index = size.__index__ + except AttributeError: + raise TypeError(f"{size!r} is not an integer") + else: + size = size_index() + decoder = self._decoder or self._get_decoder() + if size < 0: + chunk = self.buffer.read() + if chunk is None: + raise BlockingIOError("Read returned None.") + # Read everything. + result = (self._get_decoded_chars() + + decoder.decode(chunk, final=True)) + if self._snapshot is not None: + self._set_decoded_chars('') + self._snapshot = None + return result + else: + # Keep reading chunks until we have size characters to return. + eof = False + result = self._get_decoded_chars(size) + while len(result) < size and not eof: + eof = not self._read_chunk() + result += self._get_decoded_chars(size - len(result)) + return result + + def __next__(self): + self._telling = False + line = self.readline() + if not line: + self._snapshot = None + self._telling = self._seekable + raise StopIteration + return line + + def readline(self, size=None): + if self.closed: + raise ValueError("read from closed file") + if size is None: + size = -1 + else: + try: + size_index = size.__index__ + except AttributeError: + raise TypeError(f"{size!r} is not an integer") + else: + size = size_index() + + # Grab all the decoded text (we will rewind any extra bits later). + line = self._get_decoded_chars() + + start = 0 + # Make the decoder if it doesn't already exist. + if not self._decoder: + self._get_decoder() + + pos = endpos = None + while True: + if self._readtranslate: + # Newlines are already translated, only search for \n + pos = line.find('\n', start) + if pos >= 0: + endpos = pos + 1 + break + else: + start = len(line) + + elif self._readuniversal: + # Universal newline search. Find any of \r, \r\n, \n + # The decoder ensures that \r\n are not split in two pieces + + # In C we'd look for these in parallel of course. + nlpos = line.find("\n", start) + crpos = line.find("\r", start) + if crpos == -1: + if nlpos == -1: + # Nothing found + start = len(line) + else: + # Found \n + endpos = nlpos + 1 + break + elif nlpos == -1: + # Found lone \r + endpos = crpos + 1 + break + elif nlpos < crpos: + # Found \n + endpos = nlpos + 1 + break + elif nlpos == crpos + 1: + # Found \r\n + endpos = crpos + 2 + break + else: + # Found \r + endpos = crpos + 1 + break + else: + # non-universal + pos = line.find(self._readnl) + if pos >= 0: + endpos = pos + len(self._readnl) + break + + if size >= 0 and len(line) >= size: + endpos = size # reached length size + break + + # No line ending seen yet - get more data' + while self._read_chunk(): + if self._decoded_chars: + break + if self._decoded_chars: + line += self._get_decoded_chars() + else: + # end of file + self._set_decoded_chars('') + self._snapshot = None + return line + + if size >= 0 and endpos > size: + endpos = size # don't exceed size + + # Rewind _decoded_chars to just after the line ending we found. + self._rewind_decoded_chars(len(line) - endpos) + return line[:endpos] + + @property + def newlines(self): + return self._decoder.newlines if self._decoder else None + + def _dealloc_warn(self, source): + if dealloc_warn := getattr(self.buffer, "_dealloc_warn", None): + dealloc_warn(source) + + +class StringIO(TextIOWrapper): + """Text I/O implementation using an in-memory buffer. + + The initial_value argument sets the value of object. The newline + argument is like the one of TextIOWrapper's constructor. + """ + + def __init__(self, initial_value="", newline="\n"): + super(StringIO, self).__init__(BytesIO(), + encoding="utf-8", + errors="surrogatepass", + newline=newline) + # Issue #5645: make universal newlines semantics the same as in the + # C version, even under Windows. + if newline is None: + self._writetranslate = False + if initial_value is not None: + if not isinstance(initial_value, str): + raise TypeError("initial_value must be str or None, not {0}" + .format(type(initial_value).__name__)) + self.write(initial_value) + self.seek(0) + + def getvalue(self): + self.flush() + decoder = self._decoder or self._get_decoder() + old_state = decoder.getstate() + decoder.reset() + try: + return decoder.decode(self.buffer.getvalue(), final=True) + finally: + decoder.setstate(old_state) + + def __repr__(self): + # TextIOWrapper tells the encoding in its repr. In StringIO, + # that's an implementation detail. + return object.__repr__(self) + + @property + def errors(self): + return None + + @property + def encoding(self): + return None + + def detach(self): + # This doesn't make sense on StringIO. + self._unsupported("detach") diff --git a/stdlib/_sitebuiltins.py b/stdlib/_sitebuiltins.py new file mode 100644 index 000000000..81b36efc6 --- /dev/null +++ b/stdlib/_sitebuiltins.py @@ -0,0 +1,91 @@ +""" +The objects used by the site module to add custom builtins. +""" + +# Those objects are almost immortal and they keep a reference to their module +# globals. Defining them in the site module would keep too many references +# alive. +# Note this means this module should also avoid keep things alive in its +# globals. + +import sys + +class Quitter(object): + def __init__(self, name, eof): + self.name = name + self.eof = eof + def __repr__(self): + return 'Use %s() or %s to exit' % (self.name, self.eof) + def __call__(self, code=None): + # Shells like IDLE catch the SystemExit, but listen when their + # stdin wrapper is closed. + try: + sys.stdin.close() + except: + pass + raise SystemExit(code) + + +class _Printer(object): + """interactive prompt objects for printing the license text, a list of + contributors and the copyright notice.""" + + MAXLINES = 23 + + def __init__(self, name, data, files=(), dirs=()): + import os + self.__name = name + self.__data = data + self.__lines = [] + self.__filenames = [os.path.join(dir, filename) + for dir in dirs + for filename in files] + + def __setup(self): + if self.__lines: + return + data = None + for filename in self.__filenames: + try: + with open(filename, encoding='utf-8') as fp: + data = fp.read() + break + except OSError: + pass + if not data: + data = self.__data + self.__lines = data.split('\n') + self.__linecnt = len(self.__lines) + + def __repr__(self): + self.__setup() + if len(self.__lines) <= self.MAXLINES: + return "\n".join(self.__lines) + else: + return "Type %s() to see the full %s text" % ((self.__name,)*2) + + def __call__(self): + from _pyrepl.pager import get_pager + self.__setup() + + pager = get_pager() + text = "\n".join(self.__lines) + pager(text, title=self.__name) + + +class _Helper(object): + """Define the builtin 'help'. + + This is a wrapper around pydoc.help that provides a helpful message + when 'help' is typed at the Python interactive prompt. + + Calling help() at the Python prompt starts an interactive help session. + Calling help(thing) prints help for the python object 'thing'. + """ + + def __repr__(self): + return "Type help() for interactive help, " \ + "or help(object) for help about object." + def __call__(self, *args, **kwds): + import pydoc + return pydoc.help(*args, **kwds) diff --git a/stdlib/importlib/__init__.py b/stdlib/importlib/__init__.py index 6b3e6d195..a7d57561e 100644 --- a/stdlib/importlib/__init__.py +++ b/stdlib/importlib/__init__.py @@ -1,49 +1,136 @@ -"""importlib: gopy-side stub. +"""A pure Python implementation of import.""" +__all__ = ['__import__', 'import_module', 'invalidate_caches', 'reload'] -CPython ships a multi-module package whose top-level __init__.py -re-exports a handful of names from ._bootstrap and ._bootstrap_external. -gopy's import system is the Go side of the runtime so the bootstrap -plumbing has no analogue here. Until the full importlib port lands, -this module exists so `import importlib` and `import importlib.machinery` -resolve for downstream consumers (notably inspect.py, which only reads -the SUFFIXES constants and all_suffixes()). +# Bootstrap help ##################################################### -CPython: Lib/importlib/__init__.py -""" +# Until bootstrapping is complete, DO NOT import any modules that attempt +# to import importlib._bootstrap (directly or indirectly). Since this +# partially initialised package would be present in sys.modules, those +# modules would get an uninitialised copy of the source version, instead +# of a fully initialised version (either the frozen one or the one +# initialised below if the frozen one is not available). +import _imp # Just the builtin component, NOT the full Python module +import sys -from . import machinery # bind importlib.machinery attribute eagerly +try: + import _frozen_importlib as _bootstrap +except ImportError: + from . import _bootstrap + _bootstrap._setup(sys, _imp) +else: + # importlib._bootstrap is the built-in import, ensure we don't create + # a second copy of the module. + _bootstrap.__name__ = 'importlib._bootstrap' + _bootstrap.__package__ = 'importlib' + try: + _bootstrap.__file__ = __file__.replace('__init__.py', '_bootstrap.py') + except NameError: + # __file__ is not guaranteed to be defined, e.g. if this code gets + # frozen by a tool like cx_Freeze. + pass + sys.modules['importlib._bootstrap'] = _bootstrap -__all__ = ['import_module', 'invalidate_caches', 'machinery', 'reload'] +try: + import _frozen_importlib_external as _bootstrap_external +except ImportError: + from . import _bootstrap_external + _bootstrap_external._set_bootstrap_module(_bootstrap) + _bootstrap._bootstrap_external = _bootstrap_external +else: + _bootstrap_external.__name__ = 'importlib._bootstrap_external' + _bootstrap_external.__package__ = 'importlib' + try: + _bootstrap_external.__file__ = __file__.replace('__init__.py', '_bootstrap_external.py') + except NameError: + # __file__ is not guaranteed to be defined, e.g. if this code gets + # frozen by a tool like cx_Freeze. + pass + sys.modules['importlib._bootstrap_external'] = _bootstrap_external + +# To simplify imports in test code +_pack_uint32 = _bootstrap_external._pack_uint32 +_unpack_uint32 = _bootstrap_external._unpack_uint32 + +# Fully bootstrapped at this point, import whatever you like, circular +# dependencies and startup overhead minimisation permitting :) + + +# Public API ######################################################### + +from ._bootstrap import __import__ + + +def invalidate_caches(): + """Call the invalidate_caches() method on all meta path finders stored in + sys.meta_path (where implemented).""" + for finder in sys.meta_path: + if hasattr(finder, 'invalidate_caches'): + finder.invalidate_caches() def import_module(name, package=None): - """Import a module by name. Mirrors importlib.import_module.""" - if package is not None and name.startswith('.'): - name = _resolve_name(name, package) - __import__(name) - import sys - return sys.modules[name] + """Import a module. + The 'package' argument is required when performing a relative import. It + specifies the package to use as the anchor point from which to resolve the + relative import to an absolute import. -def _resolve_name(name, package): + """ level = 0 - while level < len(name) and name[level] == '.': - level += 1 - if level == 0: - return name - bits = package.rsplit('.', level - 1) - if len(bits) < level: - raise ImportError("attempted relative import beyond top-level package") - base = bits[0] - return '{}.{}'.format(base, name[level:]) if name[level:] else base + if name.startswith('.'): + if not package: + raise TypeError("the 'package' argument is required to perform a " + f"relative import for {name!r}") + for character in name: + if character != '.': + break + level += 1 + return _bootstrap._gcd_import(name[level:], package, level) -def invalidate_caches(): - """No-op: gopy's import system has no path-importer cache to flush.""" - return None +_RELOADING = {} def reload(module): - """Reload a module. The actual machinery is wired up Go-side.""" - import _imp - return _imp.reload(module) + """Reload the module and return it. + + The module must have been successfully imported before. + + """ + try: + name = module.__spec__.name + except AttributeError: + try: + name = module.__name__ + except AttributeError: + raise TypeError("reload() argument must be a module") from None + + if sys.modules.get(name) is not module: + raise ImportError(f"module {name} not in sys.modules", name=name) + if name in _RELOADING: + return _RELOADING[name] + _RELOADING[name] = module + try: + parent_name = name.rpartition('.')[0] + if parent_name: + try: + parent = sys.modules[parent_name] + except KeyError: + raise ImportError(f"parent {parent_name!r} not in sys.modules", + name=parent_name) from None + else: + pkgpath = parent.__path__ + else: + pkgpath = None + target = module + spec = module.__spec__ = _bootstrap._find_spec(name, pkgpath, target) + if spec is None: + raise ModuleNotFoundError(f"spec not found for the module {name!r}", name=name) + _bootstrap._exec(spec, module) + # The module may have replaced itself in sys.modules! + return sys.modules[name] + finally: + try: + del _RELOADING[name] + except KeyError: + pass diff --git a/stdlib/importlib/_abc.py b/stdlib/importlib/_abc.py new file mode 100644 index 000000000..693b46611 --- /dev/null +++ b/stdlib/importlib/_abc.py @@ -0,0 +1,39 @@ +"""Subset of importlib.abc used to reduce importlib.util imports.""" +from . import _bootstrap +import abc + + +class Loader(metaclass=abc.ABCMeta): + + """Abstract base class for import loaders.""" + + def create_module(self, spec): + """Return a module to initialize and into which to load. + + This method should raise ImportError if anything prevents it + from creating a new module. It may return None to indicate + that the spec should create the new module. + """ + # By default, defer to default semantics for the new module. + return None + + # We don't define exec_module() here since that would break + # hasattr checks we do to support backward compatibility. + + def load_module(self, fullname): + """Return the loaded module. + + The module must be added to sys.modules and have import-related + attributes set properly. The fullname is a str. + + ImportError is raised on failure. + + This method is deprecated in favor of loader.exec_module(). If + exec_module() exists then it is used to provide a backwards-compatible + functionality for this method. + + """ + if not hasattr(self, 'exec_module'): + raise ImportError + # Warning implemented in _load_module_shim(). + return _bootstrap._load_module_shim(self, fullname) diff --git a/stdlib/importlib/_bootstrap_external.py b/stdlib/importlib/_bootstrap_external.py index 6b6686eac..6a828ae75 100644 --- a/stdlib/importlib/_bootstrap_external.py +++ b/stdlib/importlib/_bootstrap_external.py @@ -1,21 +1,43 @@ -"""Pyc-writer slice of CPython's Lib/importlib/_bootstrap_external.py. +"""Core implementation of path-based import. -The vendored file ships only the parts py_compile needs: MAGIC_NUMBER, -_pack_uint32 / _unpack_uint*, _calc_mode, _write_atomic, _classify_pyc -plus the two pyc-data builders _code_to_timestamp_pyc / -_code_to_hash_pyc. The path / loader / spec scaffolding lives in the -companion util.py stub until spec 1711 wires the full module. +This module is NOT meant to be directly imported! It has been designed such +that it can be bootstrapped into Python as the implementation of import. As +such it requires the injection of specific modules and attributes in order to +work. One should use importlib as the public-facing version of this module. -CPython: Lib/importlib/_bootstrap_external.py """ +# IMPORTANT: Whenever making changes to this module, be sure to run a top-level +# `make regen-importlib` followed by `make` in order to get the frozen version +# of the module updated. Not doing so will result in the Makefile to fail for +# all others who don't have a ./python around to freeze the module in the early +# stages of compilation. +# + +# See importlib._setup() for what is injected into the global namespace. + +# When editing this code be aware that code executed at import time CANNOT +# reference any injected objects! This includes not only global code but also +# anything specified at the class level. + +# Module injected manually by _set_bootstrap_module() +_bootstrap = None +# Import builtin modules import _imp -import marshal -import os as _os +import _io import sys +import _warnings +import marshal _MS_WINDOWS = (sys.platform == 'win32') +if _MS_WINDOWS: + import nt as _os + import winreg +else: + import posix as _os + + if _MS_WINDOWS: path_separators = ['\\', '/'] else: @@ -25,45 +47,168 @@ path_sep = path_separators[0] path_sep_tuple = tuple(path_separators) path_separators = ''.join(path_separators) +_pathseps_with_colon = {f':{s}' for s in path_separators} + + +# Bootstrap-related code ###################################################### +_CASE_INSENSITIVE_PLATFORMS_STR_KEY = 'win', +_CASE_INSENSITIVE_PLATFORMS_BYTES_KEY = 'cygwin', 'darwin', 'ios', 'tvos', 'watchos' +_CASE_INSENSITIVE_PLATFORMS = (_CASE_INSENSITIVE_PLATFORMS_BYTES_KEY + + _CASE_INSENSITIVE_PLATFORMS_STR_KEY) + + +def _make_relax_case(): + if sys.platform.startswith(_CASE_INSENSITIVE_PLATFORMS): + if sys.platform.startswith(_CASE_INSENSITIVE_PLATFORMS_STR_KEY): + key = 'PYTHONCASEOK' + else: + key = b'PYTHONCASEOK' + + def _relax_case(): + """True if filenames must be checked case-insensitively and ignore environment flags are not set.""" + return not sys.flags.ignore_environment and key in _os.environ + else: + def _relax_case(): + """True if filenames must be checked case-insensitively.""" + return False + return _relax_case + +_relax_case = _make_relax_case() -# CPython: Lib/importlib/_bootstrap_external.py:79 _pack_uint32 def _pack_uint32(x): """Convert a 32-bit integer to little-endian.""" return (int(x) & 0xFFFFFFFF).to_bytes(4, 'little') -# CPython: Lib/importlib/_bootstrap_external.py:84 _unpack_uint64 def _unpack_uint64(data): """Convert 8 bytes in little-endian to an integer.""" assert len(data) == 8 return int.from_bytes(data, 'little') - -# CPython: Lib/importlib/_bootstrap_external.py:89 _unpack_uint32 def _unpack_uint32(data): """Convert 4 bytes in little-endian to an integer.""" assert len(data) == 4 return int.from_bytes(data, 'little') - -# CPython: Lib/importlib/_bootstrap_external.py:94 _unpack_uint16 def _unpack_uint16(data): """Convert 2 bytes in little-endian to an integer.""" assert len(data) == 2 return int.from_bytes(data, 'little') -# CPython: Lib/importlib/_bootstrap_external.py:200 _write_atomic +if _MS_WINDOWS: + def _path_join(*path_parts): + """Replacement for os.path.join().""" + if not path_parts: + return "" + if len(path_parts) == 1: + return path_parts[0] + root = "" + path = [] + for new_root, tail in map(_os._path_splitroot, path_parts): + if new_root.startswith(path_sep_tuple) or new_root.endswith(path_sep_tuple): + root = new_root.rstrip(path_separators) or root + path = [path_sep + tail] + elif new_root.endswith(':'): + if root.casefold() != new_root.casefold(): + # Drive relative paths have to be resolved by the OS, so we reset the + # tail but do not add a path_sep prefix. + root = new_root + path = [tail] + else: + path.append(tail) + else: + root = new_root or root + path.append(tail) + path = [p.rstrip(path_separators) for p in path if p] + if len(path) == 1 and not path[0]: + # Avoid losing the root's trailing separator when joining with nothing + return root + path_sep + return root + path_sep.join(path) + +else: + def _path_join(*path_parts): + """Replacement for os.path.join().""" + return path_sep.join([part.rstrip(path_separators) + for part in path_parts if part]) + + +def _path_split(path): + """Replacement for os.path.split().""" + i = max(path.rfind(p) for p in path_separators) + if i < 0: + return '', path + return path[:i], path[i + 1:] + + +def _path_stat(path): + """Stat the path. + + Made a separate function to make it easier to override in experiments + (e.g. cache stat results). + + """ + return _os.stat(path) + + +def _path_is_mode_type(path, mode): + """Test whether the path is the specified mode type.""" + try: + stat_info = _path_stat(path) + except OSError: + return False + return (stat_info.st_mode & 0o170000) == mode + + +def _path_isfile(path): + """Replacement for os.path.isfile.""" + return _path_is_mode_type(path, 0o100000) + + +def _path_isdir(path): + """Replacement for os.path.isdir.""" + if not path: + path = _os.getcwd() + return _path_is_mode_type(path, 0o040000) + + +if _MS_WINDOWS: + def _path_isabs(path): + """Replacement for os.path.isabs.""" + if not path: + return False + root = _os._path_splitroot(path)[0].replace('/', '\\') + return len(root) > 1 and (root.startswith('\\\\') or root.endswith('\\')) + +else: + def _path_isabs(path): + """Replacement for os.path.isabs.""" + return path.startswith(path_separators) + + +def _path_abspath(path): + """Replacement for os.path.abspath.""" + if not _path_isabs(path): + for sep in path_separators: + path = path.removeprefix(f".{sep}") + return _path_join(_os.getcwd(), path) + else: + return path + + def _write_atomic(path, data, mode=0o666): """Best-effort function to write data to a path atomically. Be prepared to handle a FileExistsError if concurrent writing of the temporary file is attempted.""" + # id() is used to generate a pseudo-random filename. path_tmp = f'{path}.{id(path)}' fd = _os.open(path_tmp, _os.O_EXCL | _os.O_CREAT | _os.O_WRONLY, mode & 0o666) try: - with open(fd, 'wb') as file: + # We first write data to a temporary file, and then use os.replace() to + # perform an atomic rename. + with _io.open(fd, 'wb') as file: file.write(data) _os.replace(path_tmp, path) except OSError: @@ -74,21 +219,169 @@ def _write_atomic(path, data, mode=0o666): raise -# CPython: Lib/importlib/_bootstrap_external.py:224 MAGIC_NUMBER +_code_type = type(_write_atomic.__code__) + MAGIC_NUMBER = _imp.pyc_magic_number_token.to_bytes(4, 'little') _PYCACHE = '__pycache__' _OPT = 'opt-' SOURCE_SUFFIXES = ['.py'] +if _MS_WINDOWS: + SOURCE_SUFFIXES.append('.pyw') + +EXTENSION_SUFFIXES = _imp.extension_suffixes() + BYTECODE_SUFFIXES = ['.pyc'] +# Deprecated. +DEBUG_BYTECODE_SUFFIXES = OPTIMIZED_BYTECODE_SUFFIXES = BYTECODE_SUFFIXES + +def cache_from_source(path, debug_override=None, *, optimization=None): + """Given the path to a .py file, return the path to its .pyc file. + + The .py file does not need to exist; this simply returns the path to the + .pyc file calculated as if the .py file were imported. + + The 'optimization' parameter controls the presumed optimization level of + the bytecode file. If 'optimization' is not None, the string representation + of the argument is taken and verified to be alphanumeric (else ValueError + is raised). + + The debug_override parameter is deprecated. If debug_override is not None, + a True value is the same as setting 'optimization' to the empty string + while a False value is equivalent to setting 'optimization' to '1'. + + If sys.implementation.cache_tag is None then NotImplementedError is raised. + + """ + if debug_override is not None: + _warnings.warn('the debug_override parameter is deprecated; use ' + "'optimization' instead", DeprecationWarning) + if optimization is not None: + message = 'debug_override or optimization must be set to None' + raise TypeError(message) + optimization = '' if debug_override else 1 + path = _os.fspath(path) + head, tail = _path_split(path) + base, sep, rest = tail.rpartition('.') + tag = sys.implementation.cache_tag + if tag is None: + raise NotImplementedError('sys.implementation.cache_tag is None') + almost_filename = ''.join([(base if base else rest), sep, tag]) + if optimization is None: + if sys.flags.optimize == 0: + optimization = '' + else: + optimization = sys.flags.optimize + optimization = str(optimization) + if optimization != '': + if not optimization.isalnum(): + raise ValueError(f'{optimization!r} is not alphanumeric') + almost_filename = f'{almost_filename}.{_OPT}{optimization}' + filename = almost_filename + BYTECODE_SUFFIXES[0] + if sys.pycache_prefix is not None: + # We need an absolute path to the py file to avoid the possibility of + # collisions within sys.pycache_prefix, if someone has two different + # `foo/bar.py` on their system and they import both of them using the + # same sys.pycache_prefix. Let's say sys.pycache_prefix is + # `C:\Bytecode`; the idea here is that if we get `Foo\Bar`, we first + # make it absolute (`C:\Somewhere\Foo\Bar`), then make it root-relative + # (`Somewhere\Foo\Bar`), so we end up placing the bytecode file in an + # unambiguous `C:\Bytecode\Somewhere\Foo\Bar\`. + head = _path_abspath(head) + + # Strip initial drive from a Windows path. We know we have an absolute + # path here, so the second part of the check rules out a POSIX path that + # happens to contain a colon at the second character. + # Slicing avoids issues with an empty (or short) `head`. + if head[1:2] == ':' and head[0:1] not in path_separators: + head = head[2:] + + # Strip initial path separator from `head` to complete the conversion + # back to a root-relative path before joining. + return _path_join( + sys.pycache_prefix, + head.lstrip(path_separators), + filename, + ) + return _path_join(head, _PYCACHE, filename) + + +def source_from_cache(path): + """Given the path to a .pyc. file, return the path to its .py file. + + The .pyc file does not need to exist; this simply returns the path to + the .py file calculated to correspond to the .pyc file. If path does + not conform to PEP 3147/488 format, ValueError will be raised. If + sys.implementation.cache_tag is None then NotImplementedError is raised. + + """ + if sys.implementation.cache_tag is None: + raise NotImplementedError('sys.implementation.cache_tag is None') + path = _os.fspath(path) + head, pycache_filename = _path_split(path) + found_in_pycache_prefix = False + if sys.pycache_prefix is not None: + stripped_path = sys.pycache_prefix.rstrip(path_separators) + if head.startswith(stripped_path + path_sep): + head = head[len(stripped_path):] + found_in_pycache_prefix = True + if not found_in_pycache_prefix: + head, pycache = _path_split(head) + if pycache != _PYCACHE: + raise ValueError(f'{_PYCACHE} not bottom-level directory in ' + f'{path!r}') + dot_count = pycache_filename.count('.') + if dot_count not in {2, 3}: + raise ValueError(f'expected only 2 or 3 dots in {pycache_filename!r}') + elif dot_count == 3: + optimization = pycache_filename.rsplit('.', 2)[-2] + if not optimization.startswith(_OPT): + raise ValueError("optimization portion of filename does not start " + f"with {_OPT!r}") + opt_level = optimization[len(_OPT):] + if not opt_level.isalnum(): + raise ValueError(f"optimization level {optimization!r} is not an " + "alphanumeric value") + base_filename = pycache_filename.partition('.')[0] + return _path_join(head, base_filename + SOURCE_SUFFIXES[0]) + + +def _get_sourcefile(bytecode_path): + """Convert a bytecode file path to a source path (if possible). + + This function exists purely for backwards-compatibility for + PyImport_ExecCodeModuleWithFilenames() in the C API. + + """ + if len(bytecode_path) == 0: + return None + rest, _, extension = bytecode_path.rpartition('.') + if not rest or extension.lower()[-3:-1] != 'py': + return bytecode_path + try: + source_path = source_from_cache(bytecode_path) + except (NotImplementedError, ValueError): + source_path = bytecode_path[:-1] + return source_path if _path_isfile(source_path) else bytecode_path + + +def _get_cached(filename): + if filename.endswith(tuple(SOURCE_SUFFIXES)): + try: + return cache_from_source(filename) + except NotImplementedError: + pass + elif filename.endswith(tuple(BYTECODE_SUFFIXES)): + return filename + else: + return None -# CPython: Lib/importlib/_bootstrap_external.py:381 _calc_mode def _calc_mode(path): """Calculate the mode permissions for a bytecode file.""" try: - mode = _os.stat(path).st_mode + mode = _path_stat(path).st_mode except OSError: mode = 0o666 # We always ensure write access so we can update cached files @@ -97,7 +390,37 @@ def _calc_mode(path): return mode -# CPython: Lib/importlib/_bootstrap_external.py:424 _classify_pyc +def _check_name(method): + """Decorator to verify that the module being requested matches the one the + loader can handle. + + The first argument (self) must define _name which the second argument is + compared against. If the comparison fails then ImportError is raised. + + """ + def _check_name_wrapper(self, name=None, *args, **kwargs): + if name is None: + name = self.name + elif self.name != name: + raise ImportError('loader for %s cannot handle %s' % + (self.name, name), name=name) + return method(self, name, *args, **kwargs) + + # FIXME: @_check_name is used to define class methods before the + # _bootstrap module is set by _set_bootstrap_module(). + if _bootstrap is not None: + _wrap = _bootstrap._wrap + else: + def _wrap(new, old): + for replace in ['__module__', '__name__', '__qualname__', '__doc__']: + if hasattr(old, replace): + setattr(new, replace, getattr(old, replace)) + new.__dict__.update(old.__dict__) + + _wrap(_check_name_wrapper, method) + return _check_name_wrapper + + def _classify_pyc(data, name, exc_details): """Perform basic validity checking of a pyc header and return the flags field, which determines how the pyc should be further validated against the source. @@ -117,9 +440,11 @@ def _classify_pyc(data, name, exc_details): magic = data[:4] if magic != MAGIC_NUMBER: message = f'bad magic number in {name!r}: {magic!r}' + _bootstrap._verbose_message('{}', message) raise ImportError(message, **exc_details) if len(data) < 16: message = f'reached EOF while reading pyc header of {name!r}' + _bootstrap._verbose_message('{}', message) raise EOFError(message) flags = _unpack_uint32(data[4:8]) # Only the first two flags are defined. @@ -129,22 +454,50 @@ def _classify_pyc(data, name, exc_details): return flags -# CPython: Lib/importlib/_bootstrap_external.py:457 _validate_timestamp_pyc def _validate_timestamp_pyc(data, source_mtime, source_size, name, exc_details): - """Validate a pyc against the source last-modified time.""" + """Validate a pyc against the source last-modified time. + + *data* is the contents of the pyc file. (Only the first 16 bytes are + required.) + + *source_mtime* is the last modified timestamp of the source file. + + *source_size* is None or the size of the source file in bytes. + + *name* is the name of the module being imported. It is used for logging. + + *exc_details* is a dictionary passed to ImportError if it raised for + improved debugging. + + An ImportError is raised if the bytecode is stale. + + """ if _unpack_uint32(data[8:12]) != (source_mtime & 0xFFFFFFFF): message = f'bytecode is stale for {name!r}' + _bootstrap._verbose_message('{}', message) raise ImportError(message, **exc_details) if (source_size is not None and _unpack_uint32(data[12:16]) != (source_size & 0xFFFFFFFF)): raise ImportError(f'bytecode is stale for {name!r}', **exc_details) -# CPython: Lib/importlib/_bootstrap_external.py:485 _validate_hash_pyc def _validate_hash_pyc(data, source_hash, name, exc_details): """Validate a hash-based pyc by checking the real source hash against the one in the pyc header. + + *data* is the contents of the pyc file. (Only the first 16 bytes are + required.) + + *source_hash* is the importlib.util.source_hash() of the source file. + + *name* is the name of the module being imported. It is used for logging. + + *exc_details* is a dictionary passed to ImportError if it raised for + improved debugging. + + An ImportError is raised if the bytecode is stale. + """ if data[8:16] != source_hash: raise ImportError( @@ -153,7 +506,19 @@ def _validate_hash_pyc(data, source_hash, name, exc_details): ) -# CPython: Lib/importlib/_bootstrap_external.py:522 _code_to_timestamp_pyc +def _compile_bytecode(data, name=None, bytecode_path=None, source_path=None): + """Compile bytecode as found in a pyc.""" + code = marshal.loads(data) + if isinstance(code, _code_type): + _bootstrap._verbose_message('code object from {!r}', bytecode_path) + if source_path is not None: + _imp._fix_co_filename(code, source_path) + return code + else: + raise ImportError(f'Non-code object in {bytecode_path!r}', + name=name, path=bytecode_path) + + def _code_to_timestamp_pyc(code, mtime=0, source_size=0): "Produce the data for a timestamp-based pyc." data = bytearray(MAGIC_NUMBER) @@ -164,7 +529,6 @@ def _code_to_timestamp_pyc(code, mtime=0, source_size=0): return data -# CPython: Lib/importlib/_bootstrap_external.py:532 _code_to_hash_pyc def _code_to_hash_pyc(code, source_hash, checked=True): "Produce the data for a hash-based pyc." data = bytearray(MAGIC_NUMBER) @@ -176,216 +540,1023 @@ def _code_to_hash_pyc(code, source_hash, checked=True): return data -# CPython exposes this as importlib.util.source_hash. We thread it -# through the same _imp builtin _bootstrap_external relies on. -# -# CPython: Lib/importlib/util.py source_hash (re-export of _imp.source_hash) -def source_hash(source_bytes): - """Return the hash of *source_bytes* as bytes.""" - return _imp.source_hash(_RAW_MAGIC_NUMBER, source_bytes) +def decode_source(source_bytes): + """Decode bytes representing source code and return the string. + Universal newline support is used in the decoding. + """ + import tokenize # To avoid bootstrap issues. + source_bytes_readline = _io.BytesIO(source_bytes).readline + encoding = tokenize.detect_encoding(source_bytes_readline) + newline_decoder = _io.IncrementalNewlineDecoder(None, True) + return newline_decoder.decode(source_bytes.decode(encoding[0])) -# _RAW_MAGIC_NUMBER mirrors CPython: the integer form of MAGIC_NUMBER is -# fed straight back into _imp.source_hash as the SipHash key. Keeping -# the conversion in one place avoids endian-swap mistakes at call sites. -# -# CPython: Lib/importlib/_bootstrap_external.py:223 _RAW_MAGIC_NUMBER -_RAW_MAGIC_NUMBER = _imp.pyc_magic_number_token +# Module specifications ####################################################### -# CPython: Lib/importlib/_bootstrap_external.py:543 decode_source -def decode_source(source_bytes): - """Decode bytes representing source code and return the string. +_POPULATE = object() + + +def spec_from_file_location(name, location=None, *, loader=None, + submodule_search_locations=_POPULATE): + """Return a module spec based on a file location. + + To indicate that the module is a package, set + submodule_search_locations to a list of directory paths. An + empty list is sufficient, though its not otherwise useful to the + import system. + + The loader must take a spec as its only __init__() arg. - Universal newline support is used in the decoding. """ - # gopy doesn't have tokenize.detect_encoding wired through this path - # yet, so fall back to utf-8 (matching what test.support feeds in). - if isinstance(source_bytes, str): - return source_bytes - return source_bytes.decode('utf-8') + if location is None: + # The caller may simply want a partially populated location- + # oriented spec. So we set the location to a bogus value and + # fill in as much as we can. + location = '' + if hasattr(loader, 'get_filename'): + # ExecutionLoader + try: + location = loader.get_filename(name) + except ImportError: + pass + else: + location = _os.fspath(location) + try: + location = _path_abspath(location) + except OSError: + pass + + # If the location is on the filesystem, but doesn't actually exist, + # we could return None here, indicating that the location is not + # valid. However, we don't have a good way of testing since an + # indirect location (e.g. a zip file or URL) will look like a + # non-existent file relative to the filesystem. + + spec = _bootstrap.ModuleSpec(name, loader, origin=location) + spec._set_fileattr = True + + # Pick a loader if one wasn't provided. + if loader is None: + for loader_class, suffixes in _get_supported_file_loaders(): + if location.endswith(tuple(suffixes)): + loader = loader_class(name, location) + spec.loader = loader + break + else: + return None + + # Set submodule_search_paths appropriately. + if submodule_search_locations is _POPULATE: + # Check the loader. + if hasattr(loader, 'is_package'): + try: + is_package = loader.is_package(name) + except ImportError: + pass + else: + if is_package: + spec.submodule_search_locations = [] + else: + spec.submodule_search_locations = submodule_search_locations + if spec.submodule_search_locations == []: + if location: + dirname = _path_split(location)[0] + spec.submodule_search_locations.append(dirname) + return spec -# CPython: Lib/importlib/_bootstrap_external.py:912 FileLoader -class FileLoader: - """Base file loader class. - The gopy port skips the readers/finders machinery and keeps only the - file-access shape py_compile needs. +def _bless_my_loader(module_globals): + """Helper function for _warnings.c + + See GH#97850 for details. """ + # 2022-10-06(warsaw): For now, this helper is only used in _warnings.c and + # that use case only has the module globals. This function could be + # extended to accept either that or a module object. However, in the + # latter case, it would be better to raise certain exceptions when looking + # at a module, which should have either a __loader__ or __spec__.loader. + # For backward compatibility, it is possible that we'll get an empty + # dictionary for the module globals, and that cannot raise an exception. + if not isinstance(module_globals, dict): + return None + + missing = object() + loader = module_globals.get('__loader__', None) + spec = module_globals.get('__spec__', missing) + + if loader is None: + if spec is missing: + # If working with a module: + # raise AttributeError('Module globals is missing a __spec__') + return None + elif spec is None: + raise ValueError('Module globals is missing a __spec__.loader') + + spec_loader = getattr(spec, 'loader', missing) + + if spec_loader in (missing, None): + if loader is None: + exc = AttributeError if spec_loader is missing else ValueError + raise exc('Module globals is missing a __spec__.loader') + _warnings.warn( + 'Module globals is missing a __spec__.loader', + DeprecationWarning) + spec_loader = loader + + assert spec_loader is not None + if loader is not None and loader != spec_loader: + _warnings.warn( + 'Module globals; __loader__ != __spec__.loader', + DeprecationWarning) + return loader + + return spec_loader + + +# Loaders ##################################################################### + +class WindowsRegistryFinder: + + """Meta path finder for modules declared in the Windows registry.""" + + REGISTRY_KEY = ( + 'Software\\Python\\PythonCore\\{sys_version}' + '\\Modules\\{fullname}') + REGISTRY_KEY_DEBUG = ( + 'Software\\Python\\PythonCore\\{sys_version}' + '\\Modules\\{fullname}\\Debug') + DEBUG_BUILD = (_MS_WINDOWS and '_d.pyd' in EXTENSION_SUFFIXES) + + @staticmethod + def _open_registry(key): + try: + return winreg.OpenKey(winreg.HKEY_CURRENT_USER, key) + except OSError: + return winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE, key) + + @classmethod + def _search_registry(cls, fullname): + if cls.DEBUG_BUILD: + registry_key = cls.REGISTRY_KEY_DEBUG + else: + registry_key = cls.REGISTRY_KEY + key = registry_key.format(fullname=fullname, + sys_version='%d.%d' % sys.version_info[:2]) + try: + with cls._open_registry(key) as hkey: + filepath = winreg.QueryValue(hkey, '') + except OSError: + return None + return filepath + + @classmethod + def find_spec(cls, fullname, path=None, target=None): + _warnings.warn('importlib.machinery.WindowsRegistryFinder is ' + 'deprecated; use site configuration instead. ' + 'Future versions of Python may not enable this ' + 'finder by default.', + DeprecationWarning, stacklevel=2) + + filepath = cls._search_registry(fullname) + if filepath is None: + return None + try: + _path_stat(filepath) + except OSError: + return None + for loader, suffixes in _get_supported_file_loaders(): + if filepath.endswith(tuple(suffixes)): + spec = _bootstrap.spec_from_loader(fullname, + loader(fullname, filepath), + origin=filepath) + return spec + + +class _LoaderBasics: + + """Base class of common code needed by both SourceLoader and + SourcelessFileLoader.""" + + def is_package(self, fullname): + """Concrete implementation of InspectLoader.is_package by checking if + the path returned by get_filename has a filename of '__init__.py'.""" + filename = _path_split(self.get_filename(fullname))[1] + filename_base = filename.rsplit('.', 1)[0] + tail_name = fullname.rpartition('.')[2] + return filename_base == '__init__' and tail_name != '__init__' + + def create_module(self, spec): + """Use default semantics for module creation.""" + + def exec_module(self, module): + """Execute the module.""" + code = self.get_code(module.__name__) + if code is None: + raise ImportError(f'cannot load module {module.__name__!r} when ' + 'get_code() returns None') + _bootstrap._call_with_frames_removed(exec, code, module.__dict__) + + def load_module(self, fullname): + """This method is deprecated.""" + # Warning implemented in _load_module_shim(). + return _bootstrap._load_module_shim(self, fullname) + + +class SourceLoader(_LoaderBasics): + + def path_mtime(self, path): + """Optional method that returns the modification time (an int) for the + specified path (a str). + + Raises OSError when the path cannot be handled. + """ + raise OSError + + def path_stats(self, path): + """Optional method returning a metadata dict for the specified + path (a str). + + Possible keys: + - 'mtime' (mandatory) is the numeric timestamp of last source + code modification; + - 'size' (optional) is the size in bytes of the source code. + + Implementing this method allows the loader to read bytecode files. + Raises OSError when the path cannot be handled. + """ + return {'mtime': self.path_mtime(path)} + + def _cache_bytecode(self, source_path, cache_path, data): + """Optional method which writes data (bytes) to a file path (a str). + + Implementing this method allows for the writing of bytecode files. + + The source path is needed in order to correctly transfer permissions + """ + # For backwards compatibility, we delegate to set_data() + return self.set_data(cache_path, data) + + def set_data(self, path, data): + """Optional method which writes data (bytes) to a file path (a str). + + Implementing this method allows for the writing of bytecode files. + """ + + + def get_source(self, fullname): + """Concrete implementation of InspectLoader.get_source.""" + path = self.get_filename(fullname) + try: + source_bytes = self.get_data(path) + except OSError as exc: + raise ImportError('source not available through get_data()', + name=fullname) from exc + return decode_source(source_bytes) + + def source_to_code(self, data, path, *, _optimize=-1): + """Return the code object compiled from source. + + The 'data' argument can be any object type that compile() supports. + """ + return _bootstrap._call_with_frames_removed(compile, data, path, 'exec', + dont_inherit=True, optimize=_optimize) + + def get_code(self, fullname): + """Concrete implementation of InspectLoader.get_code. + + Reading of bytecode requires path_stats to be implemented. To write + bytecode, set_data must also be implemented. + + """ + source_path = self.get_filename(fullname) + source_mtime = None + source_bytes = None + source_hash = None + hash_based = False + check_source = True + try: + bytecode_path = cache_from_source(source_path) + except NotImplementedError: + bytecode_path = None + else: + try: + st = self.path_stats(source_path) + except OSError: + pass + else: + source_mtime = int(st['mtime']) + try: + data = self.get_data(bytecode_path) + except OSError: + pass + else: + exc_details = { + 'name': fullname, + 'path': bytecode_path, + } + try: + flags = _classify_pyc(data, fullname, exc_details) + bytes_data = memoryview(data)[16:] + hash_based = flags & 0b1 != 0 + if hash_based: + check_source = flags & 0b10 != 0 + if (_imp.check_hash_based_pycs != 'never' and + (check_source or + _imp.check_hash_based_pycs == 'always')): + source_bytes = self.get_data(source_path) + source_hash = _imp.source_hash( + _imp.pyc_magic_number_token, + source_bytes, + ) + _validate_hash_pyc(data, source_hash, fullname, + exc_details) + else: + _validate_timestamp_pyc( + data, + source_mtime, + st['size'], + fullname, + exc_details, + ) + except (ImportError, EOFError): + pass + else: + _bootstrap._verbose_message('{} matches {}', bytecode_path, + source_path) + return _compile_bytecode(bytes_data, name=fullname, + bytecode_path=bytecode_path, + source_path=source_path) + if source_bytes is None: + source_bytes = self.get_data(source_path) + code_object = self.source_to_code(source_bytes, source_path) + _bootstrap._verbose_message('code object from {}', source_path) + if (not sys.dont_write_bytecode and bytecode_path is not None and + source_mtime is not None): + if hash_based: + if source_hash is None: + source_hash = _imp.source_hash(_imp.pyc_magic_number_token, + source_bytes) + data = _code_to_hash_pyc(code_object, source_hash, check_source) + else: + data = _code_to_timestamp_pyc(code_object, source_mtime, + len(source_bytes)) + try: + self._cache_bytecode(source_path, bytecode_path, data) + except NotImplementedError: + pass + return code_object + + +class FileLoader: + + """Base file loader class which implements the loader protocol methods that + require file system usage.""" def __init__(self, fullname, path): + """Cache the module name and the path to the file found by the + finder.""" self.name = fullname self.path = path - def get_filename(self, fullname=None): - if fullname is not None and fullname != self.name: - raise ImportError( - f'loader for {self.name} cannot handle {fullname}', - name=fullname, - ) + def __eq__(self, other): + return (self.__class__ == other.__class__ and + self.__dict__ == other.__dict__) + + def __hash__(self): + return hash(self.name) ^ hash(self.path) + + @_check_name + def load_module(self, fullname): + """Load a module from a file. + + This method is deprecated. Use exec_module() instead. + + """ + # The only reason for this method is for the name check. + # Issue #14857: Avoid the zero-argument form of super so the implementation + # of that form can be updated without breaking the frozen module. + return super(FileLoader, self).load_module(fullname) + + @_check_name + def get_filename(self, fullname): + """Return the path to the source file as found by the finder.""" return self.path def get_data(self, path): """Return the data from path as raw bytes.""" - with open(path, 'rb') as file: - return file.read() + if isinstance(self, (SourceLoader, SourcelessFileLoader, ExtensionFileLoader)): + with _io.open_code(str(path)) as file: + return file.read() + else: + with _io.FileIO(path, 'r') as file: + return file.read() + @_check_name + def get_resource_reader(self, module): + from importlib.readers import FileReader + return FileReader(self) -# CPython: Lib/importlib/_bootstrap_external.py:962 SourceFileLoader -class SourceFileLoader(FileLoader): - """Concrete loader for source files. Implements the slice of the - SourceLoader / FileLoader contract py_compile.compile() drives: - get_data, get_filename, source_to_code, path_stats. - """ - # CPython: Lib/importlib/_bootstrap_external.py:818 source_to_code - def source_to_code(self, data, path, *, _optimize=-1): - """Return the code object compiled from source.""" - return compile(data, path, 'exec', - dont_inherit=True, optimize=_optimize) +class SourceFileLoader(FileLoader, SourceLoader): + + """Concrete implementation of SourceLoader using the file system.""" - # CPython: Lib/importlib/_bootstrap_external.py:966 path_stats def path_stats(self, path): - st = _os.stat(path) + """Return the metadata for the path.""" + st = _path_stat(path) return {'mtime': st.st_mtime, 'size': st.st_size} - # CPython: Lib/importlib/_bootstrap_external.py:977 SourceFileLoader.get_code - def get_code(self, fullname=None): - if fullname is None: - fullname = self.name - source = self.get_data(self.get_filename(fullname)) - return self.source_to_code(source, self.path) + def _cache_bytecode(self, source_path, bytecode_path, data): + # Adapt between the two APIs + mode = _calc_mode(source_path) + return self.set_data(bytecode_path, data, _mode=mode) + + def set_data(self, path, data, *, _mode=0o666): + """Write bytes data to a file.""" + parent, filename = _path_split(path) + path_parts = [] + # Figure out what directories are missing. + while parent and not _path_isdir(parent): + parent, part = _path_split(parent) + path_parts.append(part) + # Create needed directories. + for part in reversed(path_parts): + parent = _path_join(parent, part) + try: + _os.mkdir(parent) + except FileExistsError: + # Probably another Python process already created the dir. + continue + except OSError as exc: + # Could be a permission error, read-only filesystem: just forget + # about writing the data. + _bootstrap._verbose_message('could not create {!r}: {!r}', + parent, exc) + return + try: + _write_atomic(path, data, _mode) + _bootstrap._verbose_message('created {!r}', path) + except OSError as exc: + # Same as above: just don't write the bytecode. + _bootstrap._verbose_message('could not create {!r}: {!r}', path, + exc) + + +class SourcelessFileLoader(FileLoader, _LoaderBasics): + + """Loader which handles sourceless file imports.""" + + def get_code(self, fullname): + path = self.get_filename(fullname) + data = self.get_data(path) + # Call _classify_pyc to do basic validation of the pyc but ignore the + # result. There's no source to check against. + exc_details = { + 'name': fullname, + 'path': path, + } + _classify_pyc(data, fullname, exc_details) + return _compile_bytecode( + memoryview(data)[16:], + name=fullname, + bytecode_path=path, + ) - # CPython: Lib/importlib/_bootstrap_external.py:886 SourceLoader.exec_module - def exec_module(self, module): - code = self.get_code(module.__name__) - exec(code, module.__dict__) + def get_source(self, fullname): + """Return None as there is no source code.""" + return None -# CPython: Lib/importlib/_bootstrap_external.py:101 _path_join -def _path_join(*path_parts): - """Replacement for os.path.join().""" - return path_sep.join([part.rstrip(path_separators) - for part in path_parts if part]) +class ExtensionFileLoader(FileLoader, _LoaderBasics): + """Loader for extension modules. -# CPython: Lib/importlib/_bootstrap_external.py:107 _path_split -def _path_split(path): - """Replacement for os.path.split().""" - i = max(path.rfind(p) for p in path_separators) - if i < 0: - return '', path - return path[:i], path[i + 1:] + The constructor is designed to work with FileFinder. + """ -# CPython: Lib/importlib/_bootstrap_external.py:202 _path_isabs -def _path_isabs(path): - """Replacement for os.path.isabs.""" - if not path: - return False - return path[0] in path_separators + def __init__(self, name, path): + self.name = name + self.path = path + def __eq__(self, other): + return (self.__class__ == other.__class__ and + self.__dict__ == other.__dict__) -# CPython: Lib/importlib/_bootstrap_external.py:217 _path_abspath -def _path_abspath(path): - """Replacement for os.path.abspath.""" - if not _path_isabs(path): - for sep in path_separators: - path = path.removeprefix(f".{sep}") - return _path_join(_os.getcwd(), path) - else: - return path + def __hash__(self): + return hash(self.name) ^ hash(self.path) + def create_module(self, spec): + """Create an uninitialized extension module""" + module = _bootstrap._call_with_frames_removed( + _imp.create_dynamic, spec) + _bootstrap._verbose_message('extension module {!r} loaded from {!r}', + spec.name, self.path) + return module -# CPython: Lib/importlib/_bootstrap_external.py:239 cache_from_source -def cache_from_source(path, debug_override=None, *, optimization=None): - """Given the path to a .py file, return the path to its .pyc file. + def exec_module(self, module): + """Initialize an extension module""" + _bootstrap._call_with_frames_removed(_imp.exec_dynamic, module) + _bootstrap._verbose_message('extension module {!r} executed from {!r}', + self.name, self.path) + + def is_package(self, fullname): + """Return True if the extension module is a package.""" + file_name = _path_split(self.path)[1] + return any(file_name == '__init__' + suffix + for suffix in EXTENSION_SUFFIXES) + + def get_code(self, fullname): + """Return None as an extension module cannot create a code object.""" + return None + + def get_source(self, fullname): + """Return None as extension modules have no source code.""" + return None + + @_check_name + def get_filename(self, fullname): + """Return the path to the source file as found by the finder.""" + return self.path - The .py file does not need to exist; this simply returns the path to the - .pyc file calculated as if the .py file were imported. - The 'optimization' parameter controls the presumed optimization level of - the bytecode file. If 'optimization' is not None, the string representation - of the argument is taken and verified to be alphanumeric (else ValueError - is raised). +class _NamespacePath: + """Represents a namespace package's path. It uses the module name + to find its parent module, and from there it looks up the parent's + __path__. When this changes, the module's own path is recomputed, + using path_finder. For top-level modules, the parent module's path + is sys.path.""" + + # When invalidate_caches() is called, this epoch is incremented + # https://bugs.python.org/issue45703 + _epoch = 0 + + def __init__(self, name, path, path_finder): + self._name = name + self._path = path + self._last_parent_path = tuple(self._get_parent_path()) + self._last_epoch = self._epoch + self._path_finder = path_finder + + def _find_parent_path_names(self): + """Returns a tuple of (parent-module-name, parent-path-attr-name)""" + parent, dot, me = self._name.rpartition('.') + if dot == '': + # This is a top-level module. sys.path contains the parent path. + return 'sys', 'path' + # Not a top-level module. parent-module.__path__ contains the + # parent path. + return parent, '__path__' + + def _get_parent_path(self): + parent_module_name, path_attr_name = self._find_parent_path_names() + return getattr(sys.modules[parent_module_name], path_attr_name) + + def _recalculate(self): + # If the parent's path has changed, recalculate _path + parent_path = tuple(self._get_parent_path()) # Make a copy + if parent_path != self._last_parent_path or self._epoch != self._last_epoch: + spec = self._path_finder(self._name, parent_path) + # Note that no changes are made if a loader is returned, but we + # do remember the new parent path + if spec is not None and spec.loader is None: + if spec.submodule_search_locations: + self._path = spec.submodule_search_locations + self._last_parent_path = parent_path # Save the copy + self._last_epoch = self._epoch + return self._path + + def __iter__(self): + return iter(self._recalculate()) + + def __getitem__(self, index): + return self._recalculate()[index] + + def __setitem__(self, index, path): + self._path[index] = path + + def __len__(self): + return len(self._recalculate()) + + def __repr__(self): + return f'_NamespacePath({self._path!r})' + + def __contains__(self, item): + return item in self._recalculate() + + def append(self, item): + self._path.append(item) + + +# This class is actually exposed publicly in a namespace package's __loader__ +# attribute, so it should be available through a non-private name. +# https://github.com/python/cpython/issues/92054 +class NamespaceLoader: + def __init__(self, name, path, path_finder): + self._path = _NamespacePath(name, path, path_finder) + + def is_package(self, fullname): + return True + + def get_source(self, fullname): + return '' + + def get_code(self, fullname): + return compile('', '', 'exec', dont_inherit=True) + + def create_module(self, spec): + """Use default semantics for module creation.""" - The debug_override parameter is deprecated. If debug_override is not None, - a True value is the same as setting 'optimization' to the empty string - while a False value is equivalent to setting 'optimization' to '1'. + def exec_module(self, module): + pass + + def load_module(self, fullname): + """Load a namespace module. + + This method is deprecated. Use exec_module() instead. + + """ + # The import system never calls this method. + _bootstrap._verbose_message('namespace module loaded with path {!r}', + self._path) + # Warning implemented in _load_module_shim(). + return _bootstrap._load_module_shim(self, fullname) + + def get_resource_reader(self, module): + from importlib.readers import NamespaceReader + return NamespaceReader(self._path) + + +# We use this exclusively in module_from_spec() for backward-compatibility. +_NamespaceLoader = NamespaceLoader + + +# Finders ##################################################################### + +class PathFinder: + + """Meta path finder for sys.path and package __path__ attributes.""" + + @staticmethod + def invalidate_caches(): + """Call the invalidate_caches() method on all path entry finders + stored in sys.path_importer_cache (where implemented).""" + for name, finder in list(sys.path_importer_cache.items()): + # Drop entry if finder name is a relative path. The current + # working directory may have changed. + if finder is None or not _path_isabs(name): + del sys.path_importer_cache[name] + elif hasattr(finder, 'invalidate_caches'): + finder.invalidate_caches() + # Also invalidate the caches of _NamespacePaths + # https://bugs.python.org/issue45703 + _NamespacePath._epoch += 1 + + from importlib.metadata import MetadataPathFinder + MetadataPathFinder.invalidate_caches() + + @staticmethod + def _path_hooks(path): + """Search sys.path_hooks for a finder for 'path'.""" + if sys.path_hooks is not None and not sys.path_hooks: + _warnings.warn('sys.path_hooks is empty', ImportWarning) + for hook in sys.path_hooks: + try: + return hook(path) + except ImportError: + continue + else: + return None + + @classmethod + def _path_importer_cache(cls, path): + """Get the finder for the path entry from sys.path_importer_cache. + + If the path entry is not in the cache, find the appropriate finder + and cache it. If no finder is available, store None. + + """ + if path == '': + try: + path = _os.getcwd() + except (FileNotFoundError, PermissionError): + # Don't cache the failure as the cwd can easily change to + # a valid directory later on. + return None + try: + finder = sys.path_importer_cache[path] + except KeyError: + finder = cls._path_hooks(path) + sys.path_importer_cache[path] = finder + return finder + + @classmethod + def _get_spec(cls, fullname, path, target=None): + """Find the loader or namespace_path for this module/package name.""" + # If this ends up being a namespace package, namespace_path is + # the list of paths that will become its __path__ + namespace_path = [] + for entry in path: + if not isinstance(entry, str): + continue + finder = cls._path_importer_cache(entry) + if finder is not None: + spec = finder.find_spec(fullname, target) + if spec is None: + continue + if spec.loader is not None: + return spec + portions = spec.submodule_search_locations + if portions is None: + raise ImportError('spec missing loader') + # This is possibly part of a namespace package. + # Remember these path entries (if any) for when we + # create a namespace package, and continue iterating + # on path. + namespace_path.extend(portions) + else: + spec = _bootstrap.ModuleSpec(fullname, None) + spec.submodule_search_locations = namespace_path + return spec + + @classmethod + def find_spec(cls, fullname, path=None, target=None): + """Try to find a spec for 'fullname' on sys.path or 'path'. + + The search is based on sys.path_hooks and sys.path_importer_cache. + """ + if path is None: + path = sys.path + spec = cls._get_spec(fullname, path, target) + if spec is None: + return None + elif spec.loader is None: + namespace_path = spec.submodule_search_locations + if namespace_path: + # We found at least one namespace path. Return a spec which + # can create the namespace package. + spec.origin = None + spec.submodule_search_locations = _NamespacePath(fullname, namespace_path, cls._get_spec) + return spec + else: + return None + else: + return spec + + @staticmethod + def find_distributions(*args, **kwargs): + """ + Find distributions. + + Return an iterable of all Distribution instances capable of + loading the metadata for packages matching ``context.name`` + (or all names if ``None`` indicated) along the paths in the list + of directories ``context.path``. + """ + from importlib.metadata import MetadataPathFinder + return MetadataPathFinder.find_distributions(*args, **kwargs) + + +class FileFinder: + + """File-based finder. + + Interactions with the file system are cached for performance, being + refreshed when the directory the finder is handling has been modified. - If sys.implementation.cache_tag is None then NotImplementedError is raised. """ - if debug_override is not None: - if optimization is not None: - message = 'debug_override or optimization must be set to None' - raise TypeError(message) - optimization = '' if debug_override else 1 - path = _os.fspath(path) - head, tail = _path_split(path) - base, sep, rest = tail.rpartition('.') - tag = sys.implementation.cache_tag - if tag is None: - raise NotImplementedError('sys.implementation.cache_tag is None') - almost_filename = ''.join([(base if base else rest), sep, tag]) - if optimization is None: - if sys.flags.optimize == 0: - optimization = '' + + def __init__(self, path, *loader_details): + """Initialize with the path to search on and a variable number of + 2-tuples containing the loader and the file suffixes the loader + recognizes.""" + loaders = [] + for loader, suffixes in loader_details: + loaders.extend((suffix, loader) for suffix in suffixes) + self._loaders = loaders + # Base (directory) path + if not path or path == '.': + self.path = _os.getcwd() else: - optimization = sys.flags.optimize - optimization = str(optimization) - if optimization != '': - if not optimization.isalnum(): - raise ValueError(f'{optimization!r} is not alphanumeric') - almost_filename = f'{almost_filename}.{_OPT}{optimization}' - filename = almost_filename + BYTECODE_SUFFIXES[0] - if getattr(sys, 'pycache_prefix', None) is not None: - head = _path_abspath(head) - if head[1:2] == ':' and head[0:1] not in path_separators: - head = head[2:] - return _path_join( - sys.pycache_prefix, - head.lstrip(path_separators), - filename, + self.path = _path_abspath(path) + self._path_mtime = -1 + self._path_cache = set() + self._relaxed_path_cache = set() + + def invalidate_caches(self): + """Invalidate the directory mtime.""" + self._path_mtime = -1 + + def _get_spec(self, loader_class, fullname, path, smsl, target): + loader = loader_class(fullname, path) + return spec_from_file_location(fullname, path, loader=loader, + submodule_search_locations=smsl) + + def find_spec(self, fullname, target=None): + """Try to find a spec for the specified module. + + Returns the matching spec, or None if not found. + """ + is_namespace = False + tail_module = fullname.rpartition('.')[2] + try: + mtime = _path_stat(self.path or _os.getcwd()).st_mtime + except OSError: + mtime = -1 + if mtime != self._path_mtime: + self._fill_cache() + self._path_mtime = mtime + # tail_module keeps the original casing, for __file__ and friends + if _relax_case(): + cache = self._relaxed_path_cache + cache_module = tail_module.lower() + else: + cache = self._path_cache + cache_module = tail_module + # Check if the module is the name of a directory (and thus a package). + if cache_module in cache: + base_path = _path_join(self.path, tail_module) + for suffix, loader_class in self._loaders: + init_filename = '__init__' + suffix + full_path = _path_join(base_path, init_filename) + if _path_isfile(full_path): + return self._get_spec(loader_class, fullname, full_path, [base_path], target) + else: + # If a namespace package, return the path if we don't + # find a module in the next section. + is_namespace = _path_isdir(base_path) + # Check for a file w/ a proper suffix exists. + for suffix, loader_class in self._loaders: + try: + full_path = _path_join(self.path, tail_module + suffix) + except ValueError: + return None + _bootstrap._verbose_message('trying {}', full_path, verbosity=2) + if cache_module + suffix in cache: + if _path_isfile(full_path): + return self._get_spec(loader_class, fullname, full_path, + None, target) + if is_namespace: + _bootstrap._verbose_message('possible namespace for {}', base_path) + spec = _bootstrap.ModuleSpec(fullname, None) + spec.submodule_search_locations = [base_path] + return spec + return None + + def _fill_cache(self): + """Fill the cache of potential modules and packages for this directory.""" + path = self.path + try: + contents = _os.listdir(path or _os.getcwd()) + except (FileNotFoundError, PermissionError, NotADirectoryError): + # Directory has either been removed, turned into a file, or made + # unreadable. + contents = [] + # We store two cached versions, to handle runtime changes of the + # PYTHONCASEOK environment variable. + if not sys.platform.startswith('win'): + self._path_cache = set(contents) + else: + # Windows users can import modules with case-insensitive file + # suffixes (for legacy reasons). Make the suffix lowercase here + # so it's done once instead of for every import. This is safe as + # the specified suffixes to check against are always specified in a + # case-sensitive manner. + lower_suffix_contents = set() + for item in contents: + name, dot, suffix = item.partition('.') + if dot: + new_name = f'{name}.{suffix.lower()}' + else: + new_name = name + lower_suffix_contents.add(new_name) + self._path_cache = lower_suffix_contents + if sys.platform.startswith(_CASE_INSENSITIVE_PLATFORMS): + self._relaxed_path_cache = {fn.lower() for fn in contents} + + @classmethod + def path_hook(cls, *loader_details): + """A class method which returns a closure to use on sys.path_hook + which will return an instance using the specified loaders and the path + called on the closure. + + If the path called on the closure is not a directory, ImportError is + raised. + + """ + def path_hook_for_FileFinder(path): + """Path hook for importlib.machinery.FileFinder.""" + if not _path_isdir(path): + raise ImportError('only directories are supported', path=path) + return cls(path, *loader_details) + + return path_hook_for_FileFinder + + def __repr__(self): + return f'FileFinder({self.path!r})' + + +class AppleFrameworkLoader(ExtensionFileLoader): + """A loader for modules that have been packaged as frameworks for + compatibility with Apple's iOS App Store policies. + """ + def create_module(self, spec): + # If the ModuleSpec has been created by the FileFinder, it will have + # been created with an origin pointing to the .fwork file. We need to + # redirect this to the location in the Frameworks folder, using the + # content of the .fwork file. + if spec.origin.endswith(".fwork"): + with _io.FileIO(spec.origin, 'r') as file: + framework_binary = file.read().decode().strip() + bundle_path = _path_split(sys.executable)[0] + spec.origin = _path_join(bundle_path, framework_binary) + + # If the loader is created based on the spec for a loaded module, the + # path will be pointing at the Framework location. If this occurs, + # get the original .fwork location to use as the module's __file__. + if self.path.endswith(".fwork"): + path = self.path + else: + with _io.FileIO(self.path + ".origin", 'r') as file: + origin = file.read().decode().strip() + bundle_path = _path_split(sys.executable)[0] + path = _path_join(bundle_path, origin) + + module = _bootstrap._call_with_frames_removed(_imp.create_dynamic, spec) + + _bootstrap._verbose_message( + "Apple framework extension module {!r} loaded from {!r} (path {!r})", + spec.name, + spec.origin, + path, ) - return _path_join(head, _PYCACHE, filename) + # Ensure that the __file__ points at the .fwork location + try: + module.__file__ = path + except AttributeError: + # Not important enough to report. + # (The error is also ignored in _bootstrap._init_module_attrs or + # import_run_extension in import.c) + pass -# CPython: Lib/importlib/_bootstrap_external.py:310 source_from_cache -def source_from_cache(path): - """Given the path to a .pyc. file, return the path to its .py file. + return module - The .pyc file does not need to exist; this simply returns the path to - the .py file calculated to correspond to the .pyc file. If path does - not conform to PEP 3147/488 format, ValueError will be raised. If - sys.implementation.cache_tag is None then NotImplementedError is raised. +# Import setup ############################################################### + +def _fix_up_module(ns, name, pathname, cpathname=None): + # This function is used by PyImport_ExecCodeModuleObject(). + loader = ns.get('__loader__') + spec = ns.get('__spec__') + if not loader: + if spec: + loader = spec.loader + elif pathname == cpathname: + loader = SourcelessFileLoader(name, pathname) + else: + loader = SourceFileLoader(name, pathname) + if not spec: + spec = spec_from_file_location(name, pathname, loader=loader) + if cpathname: + spec.cached = _path_abspath(cpathname) + try: + ns['__spec__'] = spec + ns['__loader__'] = loader + ns['__file__'] = pathname + ns['__cached__'] = cpathname + except Exception: + # Not important enough to report. + pass + + +def _get_supported_file_loaders(): + """Returns a list of file-based module loaders. + + Each item is a tuple (loader, suffixes). """ - if sys.implementation.cache_tag is None: - raise NotImplementedError('sys.implementation.cache_tag is None') - path = _os.fspath(path) - head, pycache_filename = _path_split(path) - found_in_pycache_prefix = False - if getattr(sys, 'pycache_prefix', None) is not None: - stripped_path = sys.pycache_prefix.rstrip(path_separators) - if head.startswith(stripped_path + path_sep): - head = head[len(stripped_path):] - found_in_pycache_prefix = True - if not found_in_pycache_prefix: - head, pycache = _path_split(head) - if pycache != _PYCACHE: - raise ValueError(f'{_PYCACHE} not bottom-level directory in ' - f'{path!r}') - dot_count = pycache_filename.count('.') - if dot_count not in {2, 3}: - raise ValueError(f'expected only 2 or 3 dots in {pycache_filename!r}') - elif dot_count == 3: - optimization = pycache_filename.rsplit('.', 2)[-2] - if not optimization.startswith(_OPT): - raise ValueError("optimization portion of filename does not start " - f"with {_OPT!r}") - opt_level = optimization[len(_OPT):] - if not opt_level.isalnum(): - raise ValueError(f"optimization level {opt_level!r} is not an " - "alphanumeric value") - base_filename = pycache_filename.partition('.')[0] - return _path_join(head, base_filename + SOURCE_SUFFIXES[0]) + extension_loaders = [] + if hasattr(_imp, 'create_dynamic'): + if sys.platform in {"ios", "tvos", "watchos"}: + extension_loaders = [(AppleFrameworkLoader, [ + suffix.replace(".so", ".fwork") + for suffix in _imp.extension_suffixes() + ])] + extension_loaders.append((ExtensionFileLoader, _imp.extension_suffixes())) + source = SourceFileLoader, SOURCE_SUFFIXES + bytecode = SourcelessFileLoader, BYTECODE_SUFFIXES + return extension_loaders + [source, bytecode] + + +def _set_bootstrap_module(_bootstrap_module): + global _bootstrap + _bootstrap = _bootstrap_module + + +def _install(_bootstrap_module): + """Install the path-based import components.""" + _set_bootstrap_module(_bootstrap_module) + supported_loaders = _get_supported_file_loaders() + sys.path_hooks.extend([FileFinder.path_hook(*supported_loaders)]) + sys.meta_path.append(PathFinder) diff --git a/stdlib/importlib/abc.py b/stdlib/importlib/abc.py new file mode 100644 index 000000000..1e47495f6 --- /dev/null +++ b/stdlib/importlib/abc.py @@ -0,0 +1,234 @@ +"""Abstract base classes related to import.""" +from . import _bootstrap_external +from . import machinery +try: + import _frozen_importlib +except ImportError as exc: + if exc.name != '_frozen_importlib': + raise + _frozen_importlib = None +try: + import _frozen_importlib_external +except ImportError: + _frozen_importlib_external = _bootstrap_external +from ._abc import Loader +import abc + + +__all__ = [ + 'Loader', 'MetaPathFinder', 'PathEntryFinder', + 'ResourceLoader', 'InspectLoader', 'ExecutionLoader', + 'FileLoader', 'SourceLoader', +] + + +def _register(abstract_cls, *classes): + for cls in classes: + abstract_cls.register(cls) + if _frozen_importlib is not None: + try: + frozen_cls = getattr(_frozen_importlib, cls.__name__) + except AttributeError: + frozen_cls = getattr(_frozen_importlib_external, cls.__name__) + abstract_cls.register(frozen_cls) + + +class MetaPathFinder(metaclass=abc.ABCMeta): + + """Abstract base class for import finders on sys.meta_path.""" + + # We don't define find_spec() here since that would break + # hasattr checks we do to support backward compatibility. + + def invalidate_caches(self): + """An optional method for clearing the finder's cache, if any. + This method is used by importlib.invalidate_caches(). + """ + +_register(MetaPathFinder, machinery.BuiltinImporter, machinery.FrozenImporter, + machinery.PathFinder, machinery.WindowsRegistryFinder) + + +class PathEntryFinder(metaclass=abc.ABCMeta): + + """Abstract base class for path entry finders used by PathFinder.""" + + def invalidate_caches(self): + """An optional method for clearing the finder's cache, if any. + This method is used by PathFinder.invalidate_caches(). + """ + +_register(PathEntryFinder, machinery.FileFinder) + + +class ResourceLoader(Loader): + + """Abstract base class for loaders which can return data from their + back-end storage to facilitate reading data to perform an import. + + This ABC represents one of the optional protocols specified by PEP 302. + + For directly loading resources, use TraversableResources instead. This class + primarily exists for backwards compatibility with other ABCs in this module. + + """ + + @abc.abstractmethod + def get_data(self, path): + """Abstract method which when implemented should return the bytes for + the specified path. The path must be a str.""" + raise OSError + + +class InspectLoader(Loader): + + """Abstract base class for loaders which support inspection about the + modules they can load. + + This ABC represents one of the optional protocols specified by PEP 302. + + """ + + def is_package(self, fullname): + """Optional method which when implemented should return whether the + module is a package. The fullname is a str. Returns a bool. + + Raises ImportError if the module cannot be found. + """ + raise ImportError + + def get_code(self, fullname): + """Method which returns the code object for the module. + + The fullname is a str. Returns a types.CodeType if possible, else + returns None if a code object does not make sense + (e.g. built-in module). Raises ImportError if the module cannot be + found. + """ + source = self.get_source(fullname) + if source is None: + return None + return self.source_to_code(source) + + @abc.abstractmethod + def get_source(self, fullname): + """Abstract method which should return the source code for the + module. The fullname is a str. Returns a str. + + Raises ImportError if the module cannot be found. + """ + raise ImportError + + @staticmethod + def source_to_code(data, path=''): + """Compile 'data' into a code object. + + The 'data' argument can be anything that compile() can handle. The'path' + argument should be where the data was retrieved (when applicable).""" + return compile(data, path, 'exec', dont_inherit=True) + + exec_module = _bootstrap_external._LoaderBasics.exec_module + load_module = _bootstrap_external._LoaderBasics.load_module + +_register(InspectLoader, machinery.BuiltinImporter, machinery.FrozenImporter, machinery.NamespaceLoader) + + +class ExecutionLoader(InspectLoader): + + """Abstract base class for loaders that wish to support the execution of + modules as scripts. + + This ABC represents one of the optional protocols specified in PEP 302. + + """ + + @abc.abstractmethod + def get_filename(self, fullname): + """Abstract method which should return the value that __file__ is to be + set to. + + Raises ImportError if the module cannot be found. + """ + raise ImportError + + def get_code(self, fullname): + """Method to return the code object for fullname. + + Should return None if not applicable (e.g. built-in module). + Raise ImportError if the module cannot be found. + """ + source = self.get_source(fullname) + if source is None: + return None + try: + path = self.get_filename(fullname) + except ImportError: + return self.source_to_code(source) + else: + return self.source_to_code(source, path) + +_register( + ExecutionLoader, + machinery.ExtensionFileLoader, + machinery.AppleFrameworkLoader, +) + + +class FileLoader(_bootstrap_external.FileLoader, ResourceLoader, ExecutionLoader): + + """Abstract base class partially implementing the ResourceLoader and + ExecutionLoader ABCs.""" + +_register(FileLoader, machinery.SourceFileLoader, + machinery.SourcelessFileLoader) + + +class SourceLoader(_bootstrap_external.SourceLoader, ResourceLoader, ExecutionLoader): + + """Abstract base class for loading source code (and optionally any + corresponding bytecode). + + To support loading from source code, the abstractmethods inherited from + ResourceLoader and ExecutionLoader need to be implemented. To also support + loading from bytecode, the optional methods specified directly by this ABC + is required. + + Inherited abstractmethods not implemented in this ABC: + + * ResourceLoader.get_data + * ExecutionLoader.get_filename + + """ + + def path_mtime(self, path): + """Return the (int) modification time for the path (str).""" + import warnings + warnings.warn('SourceLoader.path_mtime is deprecated in favour of ' + 'SourceLoader.path_stats().', + DeprecationWarning, stacklevel=2) + if self.path_stats.__func__ is SourceLoader.path_stats: + raise OSError + return int(self.path_stats(path)['mtime']) + + def path_stats(self, path): + """Return a metadata dict for the source pointed to by the path (str). + Possible keys: + - 'mtime' (mandatory) is the numeric timestamp of last source + code modification; + - 'size' (optional) is the size in bytes of the source code. + """ + if self.path_mtime.__func__ is SourceLoader.path_mtime: + raise OSError + return {'mtime': self.path_mtime(path)} + + def set_data(self, path, data): + """Write the bytes to the path (if possible). + + Accepts a str path and data as bytes. + + Any needed intermediary directories are to be created. If for some + reason the file cannot be written because of permissions, fail + silently. + """ + +_register(SourceLoader, machinery.SourceFileLoader) diff --git a/stdlib/importlib/machinery.py b/stdlib/importlib/machinery.py index e593f2bb9..63d726445 100644 --- a/stdlib/importlib/machinery.py +++ b/stdlib/importlib/machinery.py @@ -1,67 +1,50 @@ -"""importlib.machinery: gopy-side stub. - -The CPython module re-exports loader / finder classes plus suffix -constants from ._bootstrap and ._bootstrap_external. gopy's import -system is implemented Go-side, so most loaders and finders aren't -needed at the Python boundary; the SourceFileLoader re-export is -necessary because py_compile.compile() drives it directly. - -When a future spec lands the full importlib bootstrap port, this file -becomes the byte-equal vendor of Lib/importlib/machinery.py. - -CPython: Lib/importlib/machinery.py -""" - -from importlib._bootstrap_external import ( - FileLoader, - SourceFileLoader, +"""The machinery of importlib: finders, loaders, hooks, etc.""" + +from ._bootstrap import ModuleSpec +from ._bootstrap import BuiltinImporter +from ._bootstrap import FrozenImporter +from ._bootstrap_external import ( + SOURCE_SUFFIXES, BYTECODE_SUFFIXES, EXTENSION_SUFFIXES, + DEBUG_BYTECODE_SUFFIXES as _DEBUG_BYTECODE_SUFFIXES, + OPTIMIZED_BYTECODE_SUFFIXES as _OPTIMIZED_BYTECODE_SUFFIXES ) - -SOURCE_SUFFIXES = ['.py'] -DEBUG_BYTECODE_SUFFIXES = ['.pyc'] -OPTIMIZED_BYTECODE_SUFFIXES = ['.pyc'] -BYTECODE_SUFFIXES = DEBUG_BYTECODE_SUFFIXES -EXTENSION_SUFFIXES = [] +from ._bootstrap_external import WindowsRegistryFinder +from ._bootstrap_external import PathFinder +from ._bootstrap_external import FileFinder +from ._bootstrap_external import SourceFileLoader +from ._bootstrap_external import SourcelessFileLoader +from ._bootstrap_external import ExtensionFileLoader +from ._bootstrap_external import AppleFrameworkLoader +from ._bootstrap_external import NamespaceLoader def all_suffixes(): - """Returns a list of all recognized module suffixes for this process.""" + """Returns a list of all recognized module suffixes for this process""" return SOURCE_SUFFIXES + BYTECODE_SUFFIXES + EXTENSION_SUFFIXES -class FileFinder: - """Stub: gopy's import system is Go-side; pkgutil registers an - iterator against FileFinder but it's only consulted when the - user walks a package, which the spec 1711 test path doesn't. - """ - - def __init__(self, path, *loader_details): - self.path = path - self._loaders = loader_details - +__all__ = ['AppleFrameworkLoader', 'BYTECODE_SUFFIXES', 'BuiltinImporter', + 'DEBUG_BYTECODE_SUFFIXES', 'EXTENSION_SUFFIXES', + 'ExtensionFileLoader', 'FileFinder', 'FrozenImporter', 'ModuleSpec', + 'NamespaceLoader', 'OPTIMIZED_BYTECODE_SUFFIXES', 'PathFinder', + 'SOURCE_SUFFIXES', 'SourceFileLoader', 'SourcelessFileLoader', + 'WindowsRegistryFinder', 'all_suffixes'] -class ModuleSpec: - """Minimal stand-in for importlib.machinery.ModuleSpec.""" - def __init__(self, name, loader, *, origin=None, loader_state=None, - is_package=None): - self.name = name - self.loader = loader - self.origin = origin - self.loader_state = loader_state - self.submodule_search_locations = [] if is_package else None - self.has_location = origin is not None - self.cached = None +def __getattr__(name): + import warnings + if name == 'DEBUG_BYTECODE_SUFFIXES': + warnings.warn('importlib.machinery.DEBUG_BYTECODE_SUFFIXES is ' + 'deprecated; use importlib.machinery.BYTECODE_SUFFIXES ' + 'instead.', + DeprecationWarning, stacklevel=2) + return _DEBUG_BYTECODE_SUFFIXES + elif name == 'OPTIMIZED_BYTECODE_SUFFIXES': + warnings.warn('importlib.machinery.OPTIMIZED_BYTECODE_SUFFIXES is ' + 'deprecated; use importlib.machinery.BYTECODE_SUFFIXES ' + 'instead.', + DeprecationWarning, stacklevel=2) + return _OPTIMIZED_BYTECODE_SUFFIXES -__all__ = [ - 'BYTECODE_SUFFIXES', - 'DEBUG_BYTECODE_SUFFIXES', - 'EXTENSION_SUFFIXES', - 'FileLoader', - 'ModuleSpec', - 'OPTIMIZED_BYTECODE_SUFFIXES', - 'SOURCE_SUFFIXES', - 'SourceFileLoader', - 'all_suffixes', -] + raise AttributeError(f'module {__name__!r} has no attribute {name!r}') diff --git a/stdlib/importlib/metadata/__init__.py b/stdlib/importlib/metadata/__init__.py new file mode 100644 index 000000000..8ce62dd86 --- /dev/null +++ b/stdlib/importlib/metadata/__init__.py @@ -0,0 +1,1093 @@ +from __future__ import annotations + +import os +import re +import abc +import sys +import json +import email +import types +import inspect +import pathlib +import zipfile +import operator +import textwrap +import warnings +import functools +import itertools +import posixpath +import collections + +from . import _meta +from ._collections import FreezableDefaultDict, Pair +from ._functools import method_cache, pass_none +from ._itertools import always_iterable, unique_everseen +from ._meta import PackageMetadata, SimplePath + +from contextlib import suppress +from importlib import import_module +from importlib.abc import MetaPathFinder +from itertools import starmap +from typing import Any, Iterable, List, Mapping, Match, Optional, Set, cast + +__all__ = [ + 'Distribution', + 'DistributionFinder', + 'PackageMetadata', + 'PackageNotFoundError', + 'distribution', + 'distributions', + 'entry_points', + 'files', + 'metadata', + 'packages_distributions', + 'requires', + 'version', +] + + +class PackageNotFoundError(ModuleNotFoundError): + """The package was not found.""" + + def __str__(self) -> str: + return f"No package metadata was found for {self.name}" + + @property + def name(self) -> str: # type: ignore[override] + (name,) = self.args + return name + + +class Sectioned: + """ + A simple entry point config parser for performance + + >>> for item in Sectioned.read(Sectioned._sample): + ... print(item) + Pair(name='sec1', value='# comments ignored') + Pair(name='sec1', value='a = 1') + Pair(name='sec1', value='b = 2') + Pair(name='sec2', value='a = 2') + + >>> res = Sectioned.section_pairs(Sectioned._sample) + >>> item = next(res) + >>> item.name + 'sec1' + >>> item.value + Pair(name='a', value='1') + >>> item = next(res) + >>> item.value + Pair(name='b', value='2') + >>> item = next(res) + >>> item.name + 'sec2' + >>> item.value + Pair(name='a', value='2') + >>> list(res) + [] + """ + + _sample = textwrap.dedent( + """ + [sec1] + # comments ignored + a = 1 + b = 2 + + [sec2] + a = 2 + """ + ).lstrip() + + @classmethod + def section_pairs(cls, text): + return ( + section._replace(value=Pair.parse(section.value)) + for section in cls.read(text, filter_=cls.valid) + if section.name is not None + ) + + @staticmethod + def read(text, filter_=None): + lines = filter(filter_, map(str.strip, text.splitlines())) + name = None + for value in lines: + section_match = value.startswith('[') and value.endswith(']') + if section_match: + name = value.strip('[]') + continue + yield Pair(name, value) + + @staticmethod + def valid(line: str): + return line and not line.startswith('#') + + +class EntryPoint: + """An entry point as defined by Python packaging conventions. + + See `the packaging docs on entry points + `_ + for more information. + + >>> ep = EntryPoint( + ... name=None, group=None, value='package.module:attr [extra1, extra2]') + >>> ep.module + 'package.module' + >>> ep.attr + 'attr' + >>> ep.extras + ['extra1', 'extra2'] + """ + + pattern = re.compile( + r'(?P[\w.]+)\s*' + r'(:\s*(?P[\w.]+)\s*)?' + r'((?P\[.*\])\s*)?$' + ) + """ + A regular expression describing the syntax for an entry point, + which might look like: + + - module + - package.module + - package.module:attribute + - package.module:object.attribute + - package.module:attr [extra1, extra2] + + Other combinations are possible as well. + + The expression is lenient about whitespace around the ':', + following the attr, and following any extras. + """ + + name: str + value: str + group: str + + dist: Optional[Distribution] = None + + def __init__(self, name: str, value: str, group: str) -> None: + vars(self).update(name=name, value=value, group=group) + + def load(self) -> Any: + """Load the entry point from its definition. If only a module + is indicated by the value, return that module. Otherwise, + return the named object. + """ + match = cast(Match, self.pattern.match(self.value)) + module = import_module(match.group('module')) + attrs = filter(None, (match.group('attr') or '').split('.')) + return functools.reduce(getattr, attrs, module) + + @property + def module(self) -> str: + match = self.pattern.match(self.value) + assert match is not None + return match.group('module') + + @property + def attr(self) -> str: + match = self.pattern.match(self.value) + assert match is not None + return match.group('attr') + + @property + def extras(self) -> List[str]: + match = self.pattern.match(self.value) + assert match is not None + return re.findall(r'\w+', match.group('extras') or '') + + def _for(self, dist): + vars(self).update(dist=dist) + return self + + def matches(self, **params): + """ + EntryPoint matches the given parameters. + + >>> ep = EntryPoint(group='foo', name='bar', value='bing:bong [extra1, extra2]') + >>> ep.matches(group='foo') + True + >>> ep.matches(name='bar', value='bing:bong [extra1, extra2]') + True + >>> ep.matches(group='foo', name='other') + False + >>> ep.matches() + True + >>> ep.matches(extras=['extra1', 'extra2']) + True + >>> ep.matches(module='bing') + True + >>> ep.matches(attr='bong') + True + """ + attrs = (getattr(self, param) for param in params) + return all(map(operator.eq, params.values(), attrs)) + + def _key(self): + return self.name, self.value, self.group + + def __lt__(self, other): + return self._key() < other._key() + + def __eq__(self, other): + return self._key() == other._key() + + def __setattr__(self, name, value): + raise AttributeError("EntryPoint objects are immutable.") + + def __repr__(self): + return ( + f'EntryPoint(name={self.name!r}, value={self.value!r}, ' + f'group={self.group!r})' + ) + + def __hash__(self) -> int: + return hash(self._key()) + + +class EntryPoints(tuple): + """ + An immutable collection of selectable EntryPoint objects. + """ + + __slots__ = () + + def __getitem__(self, name: str) -> EntryPoint: # type: ignore[override] + """ + Get the EntryPoint in self matching name. + """ + try: + return next(iter(self.select(name=name))) + except StopIteration: + raise KeyError(name) + + def __repr__(self): + """ + Repr with classname and tuple constructor to + signal that we deviate from regular tuple behavior. + """ + return '%s(%r)' % (self.__class__.__name__, tuple(self)) + + def select(self, **params) -> EntryPoints: + """ + Select entry points from self that match the + given parameters (typically group and/or name). + """ + return EntryPoints(ep for ep in self if ep.matches(**params)) + + @property + def names(self) -> Set[str]: + """ + Return the set of all names of all entry points. + """ + return {ep.name for ep in self} + + @property + def groups(self) -> Set[str]: + """ + Return the set of all groups of all entry points. + """ + return {ep.group for ep in self} + + @classmethod + def _from_text_for(cls, text, dist): + return cls(ep._for(dist) for ep in cls._from_text(text)) + + @staticmethod + def _from_text(text): + return ( + EntryPoint(name=item.value.name, value=item.value.value, group=item.name) + for item in Sectioned.section_pairs(text or '') + ) + + +class PackagePath(pathlib.PurePosixPath): + """A reference to a path in a package""" + + hash: Optional[FileHash] + size: int + dist: Distribution + + def read_text(self, encoding: str = 'utf-8') -> str: # type: ignore[override] + return self.locate().read_text(encoding=encoding) + + def read_binary(self) -> bytes: + return self.locate().read_bytes() + + def locate(self) -> SimplePath: + """Return a path-like object for this path""" + return self.dist.locate_file(self) + + +class FileHash: + def __init__(self, spec: str) -> None: + self.mode, _, self.value = spec.partition('=') + + def __repr__(self) -> str: + return f'' + + +class DeprecatedNonAbstract: + # Required until Python 3.14 + def __new__(cls, *args, **kwargs): + all_names = { + name for subclass in inspect.getmro(cls) for name in vars(subclass) + } + abstract = { + name + for name in all_names + if getattr(getattr(cls, name), '__isabstractmethod__', False) + } + if abstract: + warnings.warn( + f"Unimplemented abstract methods {abstract}", + DeprecationWarning, + stacklevel=2, + ) + return super().__new__(cls) + + +class Distribution(DeprecatedNonAbstract): + """ + An abstract Python distribution package. + + Custom providers may derive from this class and define + the abstract methods to provide a concrete implementation + for their environment. Some providers may opt to override + the default implementation of some properties to bypass + the file-reading mechanism. + """ + + @abc.abstractmethod + def read_text(self, filename) -> Optional[str]: + """Attempt to load metadata file given by the name. + + Python distribution metadata is organized by blobs of text + typically represented as "files" in the metadata directory + (e.g. package-1.0.dist-info). These files include things + like: + + - METADATA: The distribution metadata including fields + like Name and Version and Description. + - entry_points.txt: A series of entry points as defined in + `the entry points spec `_. + - RECORD: A record of files according to + `this recording spec `_. + + A package may provide any set of files, including those + not listed here or none at all. + + :param filename: The name of the file in the distribution info. + :return: The text if found, otherwise None. + """ + + @abc.abstractmethod + def locate_file(self, path: str | os.PathLike[str]) -> SimplePath: + """ + Given a path to a file in this distribution, return a SimplePath + to it. + """ + + @classmethod + def from_name(cls, name: str) -> Distribution: + """Return the Distribution for the given package name. + + :param name: The name of the distribution package to search for. + :return: The Distribution instance (or subclass thereof) for the named + package, if found. + :raises PackageNotFoundError: When the named package's distribution + metadata cannot be found. + :raises ValueError: When an invalid value is supplied for name. + """ + if not name: + raise ValueError("A distribution name is required.") + try: + return next(iter(cls.discover(name=name))) + except StopIteration: + raise PackageNotFoundError(name) + + @classmethod + def discover( + cls, *, context: Optional[DistributionFinder.Context] = None, **kwargs + ) -> Iterable[Distribution]: + """Return an iterable of Distribution objects for all packages. + + Pass a ``context`` or pass keyword arguments for constructing + a context. + + :context: A ``DistributionFinder.Context`` object. + :return: Iterable of Distribution objects for packages matching + the context. + """ + if context and kwargs: + raise ValueError("cannot accept context and kwargs") + context = context or DistributionFinder.Context(**kwargs) + return itertools.chain.from_iterable( + resolver(context) for resolver in cls._discover_resolvers() + ) + + @staticmethod + def at(path: str | os.PathLike[str]) -> Distribution: + """Return a Distribution for the indicated metadata path. + + :param path: a string or path-like object + :return: a concrete Distribution instance for the path + """ + return PathDistribution(pathlib.Path(path)) + + @staticmethod + def _discover_resolvers(): + """Search the meta_path for resolvers (MetadataPathFinders).""" + declared = ( + getattr(finder, 'find_distributions', None) for finder in sys.meta_path + ) + return filter(None, declared) + + @property + def metadata(self) -> _meta.PackageMetadata: + """Return the parsed metadata for this Distribution. + + The returned object will have keys that name the various bits of + metadata per the + `Core metadata specifications `_. + + Custom providers may provide the METADATA file or override this + property. + """ + # deferred for performance (python/cpython#109829) + from . import _adapters + + opt_text = ( + self.read_text('METADATA') + or self.read_text('PKG-INFO') + # This last clause is here to support old egg-info files. Its + # effect is to just end up using the PathDistribution's self._path + # (which points to the egg-info file) attribute unchanged. + or self.read_text('') + ) + text = cast(str, opt_text) + return _adapters.Message(email.message_from_string(text)) + + @property + def name(self) -> str: + """Return the 'Name' metadata for the distribution package.""" + return self.metadata['Name'] + + @property + def _normalized_name(self): + """Return a normalized version of the name.""" + return Prepared.normalize(self.name) + + @property + def version(self) -> str: + """Return the 'Version' metadata for the distribution package.""" + return self.metadata['Version'] + + @property + def entry_points(self) -> EntryPoints: + """ + Return EntryPoints for this distribution. + + Custom providers may provide the ``entry_points.txt`` file + or override this property. + """ + return EntryPoints._from_text_for(self.read_text('entry_points.txt'), self) + + @property + def files(self) -> Optional[List[PackagePath]]: + """Files in this distribution. + + :return: List of PackagePath for this distribution or None + + Result is `None` if the metadata file that enumerates files + (i.e. RECORD for dist-info, or installed-files.txt or + SOURCES.txt for egg-info) is missing. + Result may be empty if the metadata exists but is empty. + + Custom providers are recommended to provide a "RECORD" file (in + ``read_text``) or override this property to allow for callers to be + able to resolve filenames provided by the package. + """ + + def make_file(name, hash=None, size_str=None): + result = PackagePath(name) + result.hash = FileHash(hash) if hash else None + result.size = int(size_str) if size_str else None + result.dist = self + return result + + @pass_none + def make_files(lines): + # Delay csv import, since Distribution.files is not as widely used + # as other parts of importlib.metadata + import csv + + return starmap(make_file, csv.reader(lines)) + + @pass_none + def skip_missing_files(package_paths): + return list(filter(lambda path: path.locate().exists(), package_paths)) + + return skip_missing_files( + make_files( + self._read_files_distinfo() + or self._read_files_egginfo_installed() + or self._read_files_egginfo_sources() + ) + ) + + def _read_files_distinfo(self): + """ + Read the lines of RECORD. + """ + text = self.read_text('RECORD') + return text and text.splitlines() + + def _read_files_egginfo_installed(self): + """ + Read installed-files.txt and return lines in a similar + CSV-parsable format as RECORD: each file must be placed + relative to the site-packages directory and must also be + quoted (since file names can contain literal commas). + + This file is written when the package is installed by pip, + but it might not be written for other installation methods. + Assume the file is accurate if it exists. + """ + text = self.read_text('installed-files.txt') + # Prepend the .egg-info/ subdir to the lines in this file. + # But this subdir is only available from PathDistribution's + # self._path. + subdir = getattr(self, '_path', None) + if not text or not subdir: + return + + paths = ( + (subdir / name) + .resolve() + .relative_to(self.locate_file('').resolve(), walk_up=True) + .as_posix() + for name in text.splitlines() + ) + return map('"{}"'.format, paths) + + def _read_files_egginfo_sources(self): + """ + Read SOURCES.txt and return lines in a similar CSV-parsable + format as RECORD: each file name must be quoted (since it + might contain literal commas). + + Note that SOURCES.txt is not a reliable source for what + files are installed by a package. This file is generated + for a source archive, and the files that are present + there (e.g. setup.py) may not correctly reflect the files + that are present after the package has been installed. + """ + text = self.read_text('SOURCES.txt') + return text and map('"{}"'.format, text.splitlines()) + + @property + def requires(self) -> Optional[List[str]]: + """Generated requirements specified for this Distribution""" + reqs = self._read_dist_info_reqs() or self._read_egg_info_reqs() + return reqs and list(reqs) + + def _read_dist_info_reqs(self): + return self.metadata.get_all('Requires-Dist') + + def _read_egg_info_reqs(self): + source = self.read_text('requires.txt') + return pass_none(self._deps_from_requires_text)(source) + + @classmethod + def _deps_from_requires_text(cls, source): + return cls._convert_egg_info_reqs_to_simple_reqs(Sectioned.read(source)) + + @staticmethod + def _convert_egg_info_reqs_to_simple_reqs(sections): + """ + Historically, setuptools would solicit and store 'extra' + requirements, including those with environment markers, + in separate sections. More modern tools expect each + dependency to be defined separately, with any relevant + extras and environment markers attached directly to that + requirement. This method converts the former to the + latter. See _test_deps_from_requires_text for an example. + """ + + def make_condition(name): + return name and f'extra == "{name}"' + + def quoted_marker(section): + section = section or '' + extra, sep, markers = section.partition(':') + if extra and markers: + markers = f'({markers})' + conditions = list(filter(None, [markers, make_condition(extra)])) + return '; ' + ' and '.join(conditions) if conditions else '' + + def url_req_space(req): + """ + PEP 508 requires a space between the url_spec and the quoted_marker. + Ref python/importlib_metadata#357. + """ + # '@' is uniquely indicative of a url_req. + return ' ' * ('@' in req) + + for section in sections: + space = url_req_space(section.value) + yield section.value + space + quoted_marker(section.name) + + @property + def origin(self): + return self._load_json('direct_url.json') + + def _load_json(self, filename): + return pass_none(json.loads)( + self.read_text(filename), + object_hook=lambda data: types.SimpleNamespace(**data), + ) + + +class DistributionFinder(MetaPathFinder): + """ + A MetaPathFinder capable of discovering installed distributions. + + Custom providers should implement this interface in order to + supply metadata. + """ + + class Context: + """ + Keyword arguments presented by the caller to + ``distributions()`` or ``Distribution.discover()`` + to narrow the scope of a search for distributions + in all DistributionFinders. + + Each DistributionFinder may expect any parameters + and should attempt to honor the canonical + parameters defined below when appropriate. + + This mechanism gives a custom provider a means to + solicit additional details from the caller beyond + "name" and "path" when searching distributions. + For example, imagine a provider that exposes suites + of packages in either a "public" or "private" ``realm``. + A caller may wish to query only for distributions in + a particular realm and could call + ``distributions(realm="private")`` to signal to the + custom provider to only include distributions from that + realm. + """ + + name = None + """ + Specific name for which a distribution finder should match. + A name of ``None`` matches all distributions. + """ + + def __init__(self, **kwargs): + vars(self).update(kwargs) + + @property + def path(self) -> List[str]: + """ + The sequence of directory path that a distribution finder + should search. + + Typically refers to Python installed package paths such as + "site-packages" directories and defaults to ``sys.path``. + """ + return vars(self).get('path', sys.path) + + @abc.abstractmethod + def find_distributions(self, context=Context()) -> Iterable[Distribution]: + """ + Find distributions. + + Return an iterable of all Distribution instances capable of + loading the metadata for packages matching the ``context``, + a DistributionFinder.Context instance. + """ + + +class FastPath: + """ + Micro-optimized class for searching a root for children. + + Root is a path on the file system that may contain metadata + directories either as natural directories or within a zip file. + + >>> FastPath('').children() + ['...'] + + FastPath objects are cached and recycled for any given root. + + >>> FastPath('foobar') is FastPath('foobar') + True + """ + + @functools.lru_cache() # type: ignore + def __new__(cls, root): + return super().__new__(cls) + + def __init__(self, root): + self.root = root + + def joinpath(self, child): + return pathlib.Path(self.root, child) + + def children(self): + with suppress(Exception): + return os.listdir(self.root or '.') + with suppress(Exception): + return self.zip_children() + return [] + + def zip_children(self): + zip_path = zipfile.Path(self.root) + names = zip_path.root.namelist() + self.joinpath = zip_path.joinpath + + return dict.fromkeys(child.split(posixpath.sep, 1)[0] for child in names) + + def search(self, name): + return self.lookup(self.mtime).search(name) + + @property + def mtime(self): + with suppress(OSError): + return os.stat(self.root).st_mtime + self.lookup.cache_clear() + + @method_cache + def lookup(self, mtime): + return Lookup(self) + + +class Lookup: + """ + A micro-optimized class for searching a (fast) path for metadata. + """ + + def __init__(self, path: FastPath): + """ + Calculate all of the children representing metadata. + + From the children in the path, calculate early all of the + children that appear to represent metadata (infos) or legacy + metadata (eggs). + """ + + base = os.path.basename(path.root).lower() + base_is_egg = base.endswith(".egg") + self.infos = FreezableDefaultDict(list) + self.eggs = FreezableDefaultDict(list) + + for child in path.children(): + low = child.lower() + if low.endswith((".dist-info", ".egg-info")): + # rpartition is faster than splitext and suitable for this purpose. + name = low.rpartition(".")[0].partition("-")[0] + normalized = Prepared.normalize(name) + self.infos[normalized].append(path.joinpath(child)) + elif base_is_egg and low == "egg-info": + name = base.rpartition(".")[0].partition("-")[0] + legacy_normalized = Prepared.legacy_normalize(name) + self.eggs[legacy_normalized].append(path.joinpath(child)) + + self.infos.freeze() + self.eggs.freeze() + + def search(self, prepared: Prepared): + """ + Yield all infos and eggs matching the Prepared query. + """ + infos = ( + self.infos[prepared.normalized] + if prepared + else itertools.chain.from_iterable(self.infos.values()) + ) + eggs = ( + self.eggs[prepared.legacy_normalized] + if prepared + else itertools.chain.from_iterable(self.eggs.values()) + ) + return itertools.chain(infos, eggs) + + +class Prepared: + """ + A prepared search query for metadata on a possibly-named package. + + Pre-calculates the normalization to prevent repeated operations. + + >>> none = Prepared(None) + >>> none.normalized + >>> none.legacy_normalized + >>> bool(none) + False + >>> sample = Prepared('Sample__Pkg-name.foo') + >>> sample.normalized + 'sample_pkg_name_foo' + >>> sample.legacy_normalized + 'sample__pkg_name.foo' + >>> bool(sample) + True + """ + + normalized = None + legacy_normalized = None + + def __init__(self, name: Optional[str]): + self.name = name + if name is None: + return + self.normalized = self.normalize(name) + self.legacy_normalized = self.legacy_normalize(name) + + @staticmethod + def normalize(name): + """ + PEP 503 normalization plus dashes as underscores. + """ + return re.sub(r"[-_.]+", "-", name).lower().replace('-', '_') + + @staticmethod + def legacy_normalize(name): + """ + Normalize the package name as found in the convention in + older packaging tools versions and specs. + """ + return name.lower().replace('-', '_') + + def __bool__(self): + return bool(self.name) + + +class MetadataPathFinder(DistributionFinder): + @classmethod + def find_distributions( + cls, context=DistributionFinder.Context() + ) -> Iterable[PathDistribution]: + """ + Find distributions. + + Return an iterable of all Distribution instances capable of + loading the metadata for packages matching ``context.name`` + (or all names if ``None`` indicated) along the paths in the list + of directories ``context.path``. + """ + found = cls._search_paths(context.name, context.path) + return map(PathDistribution, found) + + @classmethod + def _search_paths(cls, name, paths): + """Find metadata directories in paths heuristically.""" + prepared = Prepared(name) + return itertools.chain.from_iterable( + path.search(prepared) for path in map(FastPath, paths) + ) + + @classmethod + def invalidate_caches(cls) -> None: + FastPath.__new__.cache_clear() + + +class PathDistribution(Distribution): + def __init__(self, path: SimplePath) -> None: + """Construct a distribution. + + :param path: SimplePath indicating the metadata directory. + """ + self._path = path + + def read_text(self, filename: str | os.PathLike[str]) -> Optional[str]: + with suppress( + FileNotFoundError, + IsADirectoryError, + KeyError, + NotADirectoryError, + PermissionError, + ): + return self._path.joinpath(filename).read_text(encoding='utf-8') + + return None + + read_text.__doc__ = Distribution.read_text.__doc__ + + def locate_file(self, path: str | os.PathLike[str]) -> SimplePath: + return self._path.parent / path + + @property + def _normalized_name(self): + """ + Performance optimization: where possible, resolve the + normalized name from the file system path. + """ + stem = os.path.basename(str(self._path)) + return ( + pass_none(Prepared.normalize)(self._name_from_stem(stem)) + or super()._normalized_name + ) + + @staticmethod + def _name_from_stem(stem): + """ + >>> PathDistribution._name_from_stem('foo-3.0.egg-info') + 'foo' + >>> PathDistribution._name_from_stem('CherryPy-3.0.dist-info') + 'CherryPy' + >>> PathDistribution._name_from_stem('face.egg-info') + 'face' + >>> PathDistribution._name_from_stem('foo.bar') + """ + filename, ext = os.path.splitext(stem) + if ext not in ('.dist-info', '.egg-info'): + return + name, sep, rest = filename.partition('-') + return name + + +def distribution(distribution_name: str) -> Distribution: + """Get the ``Distribution`` instance for the named package. + + :param distribution_name: The name of the distribution package as a string. + :return: A ``Distribution`` instance (or subclass thereof). + """ + return Distribution.from_name(distribution_name) + + +def distributions(**kwargs) -> Iterable[Distribution]: + """Get all ``Distribution`` instances in the current environment. + + :return: An iterable of ``Distribution`` instances. + """ + return Distribution.discover(**kwargs) + + +def metadata(distribution_name: str) -> _meta.PackageMetadata: + """Get the metadata for the named package. + + :param distribution_name: The name of the distribution package to query. + :return: A PackageMetadata containing the parsed metadata. + """ + return Distribution.from_name(distribution_name).metadata + + +def version(distribution_name: str) -> str: + """Get the version string for the named package. + + :param distribution_name: The name of the distribution package to query. + :return: The version string for the package as defined in the package's + "Version" metadata key. + """ + return distribution(distribution_name).version + + +_unique = functools.partial( + unique_everseen, + key=operator.attrgetter('_normalized_name'), +) +""" +Wrapper for ``distributions`` to return unique distributions by name. +""" + + +def entry_points(**params) -> EntryPoints: + """Return EntryPoint objects for all installed packages. + + Pass selection parameters (group or name) to filter the + result to entry points matching those properties (see + EntryPoints.select()). + + :return: EntryPoints for all installed packages. + """ + eps = itertools.chain.from_iterable( + dist.entry_points for dist in _unique(distributions()) + ) + return EntryPoints(eps).select(**params) + + +def files(distribution_name: str) -> Optional[List[PackagePath]]: + """Return a list of files for the named package. + + :param distribution_name: The name of the distribution package to query. + :return: List of files composing the distribution. + """ + return distribution(distribution_name).files + + +def requires(distribution_name: str) -> Optional[List[str]]: + """ + Return a list of requirements for the named package. + + :return: An iterable of requirements, suitable for + packaging.requirement.Requirement. + """ + return distribution(distribution_name).requires + + +def packages_distributions() -> Mapping[str, List[str]]: + """ + Return a mapping of top-level packages to their + distributions. + + >>> import collections.abc + >>> pkgs = packages_distributions() + >>> all(isinstance(dist, collections.abc.Sequence) for dist in pkgs.values()) + True + """ + pkg_to_dist = collections.defaultdict(list) + for dist in distributions(): + for pkg in _top_level_declared(dist) or _top_level_inferred(dist): + pkg_to_dist[pkg].append(dist.metadata['Name']) + return dict(pkg_to_dist) + + +def _top_level_declared(dist): + return (dist.read_text('top_level.txt') or '').split() + + +def _topmost(name: PackagePath) -> Optional[str]: + """ + Return the top-most parent as long as there is a parent. + """ + top, *rest = name.parts + return top if rest else None + + +def _get_toplevel_name(name: PackagePath) -> str: + """ + Infer a possibly importable module name from a name presumed on + sys.path. + + >>> _get_toplevel_name(PackagePath('foo.py')) + 'foo' + >>> _get_toplevel_name(PackagePath('foo')) + 'foo' + >>> _get_toplevel_name(PackagePath('foo.pyc')) + 'foo' + >>> _get_toplevel_name(PackagePath('foo/__init__.py')) + 'foo' + >>> _get_toplevel_name(PackagePath('foo.pth')) + 'foo.pth' + >>> _get_toplevel_name(PackagePath('foo.dist-info')) + 'foo.dist-info' + """ + return _topmost(name) or ( + # python/typeshed#10328 + inspect.getmodulename(name) # type: ignore + or str(name) + ) + + +def _top_level_inferred(dist): + opt_names = set(map(_get_toplevel_name, always_iterable(dist.files))) + + def importable_name(name): + return '.' not in name + + return filter(importable_name, opt_names) diff --git a/stdlib/importlib/metadata/_adapters.py b/stdlib/importlib/metadata/_adapters.py new file mode 100644 index 000000000..591168808 --- /dev/null +++ b/stdlib/importlib/metadata/_adapters.py @@ -0,0 +1,89 @@ +import functools +import warnings +import re +import textwrap +import email.message + +from ._text import FoldedCase + + +# Do not remove prior to 2024-01-01 or Python 3.14 +_warn = functools.partial( + warnings.warn, + "Implicit None on return values is deprecated and will raise KeyErrors.", + DeprecationWarning, + stacklevel=2, +) + + +class Message(email.message.Message): + multiple_use_keys = set( + map( + FoldedCase, + [ + 'Classifier', + 'Obsoletes-Dist', + 'Platform', + 'Project-URL', + 'Provides-Dist', + 'Provides-Extra', + 'Requires-Dist', + 'Requires-External', + 'Supported-Platform', + 'Dynamic', + ], + ) + ) + """ + Keys that may be indicated multiple times per PEP 566. + """ + + def __new__(cls, orig: email.message.Message): + res = super().__new__(cls) + vars(res).update(vars(orig)) + return res + + def __init__(self, *args, **kwargs): + self._headers = self._repair_headers() + + # suppress spurious error from mypy + def __iter__(self): + return super().__iter__() + + def __getitem__(self, item): + """ + Warn users that a ``KeyError`` can be expected when a + missing key is supplied. Ref python/importlib_metadata#371. + """ + res = super().__getitem__(item) + if res is None: + _warn() + return res + + def _repair_headers(self): + def redent(value): + "Correct for RFC822 indentation" + if not value or '\n' not in value: + return value + return textwrap.dedent(' ' * 8 + value) + + headers = [(key, redent(value)) for key, value in vars(self)['_headers']] + if self._payload: + headers.append(('Description', self.get_payload())) + return headers + + @property + def json(self): + """ + Convert PackageMetadata to a JSON-compatible format + per PEP 0566. + """ + + def transform(key): + value = self.get_all(key) if key in self.multiple_use_keys else self[key] + if key == 'Keywords': + value = re.split(r'\s+', value) + tk = key.lower().replace('-', '_') + return tk, value + + return dict(map(transform, map(FoldedCase, self))) diff --git a/stdlib/importlib/metadata/_collections.py b/stdlib/importlib/metadata/_collections.py new file mode 100644 index 000000000..cf0954e1a --- /dev/null +++ b/stdlib/importlib/metadata/_collections.py @@ -0,0 +1,30 @@ +import collections + + +# from jaraco.collections 3.3 +class FreezableDefaultDict(collections.defaultdict): + """ + Often it is desirable to prevent the mutation of + a default dict after its initial construction, such + as to prevent mutation during iteration. + + >>> dd = FreezableDefaultDict(list) + >>> dd[0].append('1') + >>> dd.freeze() + >>> dd[1] + [] + >>> len(dd) + 1 + """ + + def __missing__(self, key): + return getattr(self, '_frozen', super().__missing__)(key) + + def freeze(self): + self._frozen = lambda key: self.default_factory() + + +class Pair(collections.namedtuple('Pair', 'name value')): + @classmethod + def parse(cls, text): + return cls(*map(str.strip, text.split("=", 1))) diff --git a/stdlib/importlib/metadata/_functools.py b/stdlib/importlib/metadata/_functools.py new file mode 100644 index 000000000..71f66bd03 --- /dev/null +++ b/stdlib/importlib/metadata/_functools.py @@ -0,0 +1,104 @@ +import types +import functools + + +# from jaraco.functools 3.3 +def method_cache(method, cache_wrapper=None): + """ + Wrap lru_cache to support storing the cache data in the object instances. + + Abstracts the common paradigm where the method explicitly saves an + underscore-prefixed protected property on first call and returns that + subsequently. + + >>> class MyClass: + ... calls = 0 + ... + ... @method_cache + ... def method(self, value): + ... self.calls += 1 + ... return value + + >>> a = MyClass() + >>> a.method(3) + 3 + >>> for x in range(75): + ... res = a.method(x) + >>> a.calls + 75 + + Note that the apparent behavior will be exactly like that of lru_cache + except that the cache is stored on each instance, so values in one + instance will not flush values from another, and when an instance is + deleted, so are the cached values for that instance. + + >>> b = MyClass() + >>> for x in range(35): + ... res = b.method(x) + >>> b.calls + 35 + >>> a.method(0) + 0 + >>> a.calls + 75 + + Note that if method had been decorated with ``functools.lru_cache()``, + a.calls would have been 76 (due to the cached value of 0 having been + flushed by the 'b' instance). + + Clear the cache with ``.cache_clear()`` + + >>> a.method.cache_clear() + + Same for a method that hasn't yet been called. + + >>> c = MyClass() + >>> c.method.cache_clear() + + Another cache wrapper may be supplied: + + >>> cache = functools.lru_cache(maxsize=2) + >>> MyClass.method2 = method_cache(lambda self: 3, cache_wrapper=cache) + >>> a = MyClass() + >>> a.method2() + 3 + + Caution - do not subsequently wrap the method with another decorator, such + as ``@property``, which changes the semantics of the function. + + See also + http://code.activestate.com/recipes/577452-a-memoize-decorator-for-instance-methods/ + for another implementation and additional justification. + """ + cache_wrapper = cache_wrapper or functools.lru_cache() + + def wrapper(self, *args, **kwargs): + # it's the first call, replace the method with a cached, bound method + bound_method = types.MethodType(method, self) + cached_method = cache_wrapper(bound_method) + setattr(self, method.__name__, cached_method) + return cached_method(*args, **kwargs) + + # Support cache clear even before cache has been created. + wrapper.cache_clear = lambda: None + + return wrapper + + +# From jaraco.functools 3.3 +def pass_none(func): + """ + Wrap func so it's not called if its first param is None + + >>> print_text = pass_none(print) + >>> print_text('text') + text + >>> print_text(None) + """ + + @functools.wraps(func) + def wrapper(param, *args, **kwargs): + if param is not None: + return func(param, *args, **kwargs) + + return wrapper diff --git a/stdlib/importlib/metadata/_itertools.py b/stdlib/importlib/metadata/_itertools.py new file mode 100644 index 000000000..d4ca9b914 --- /dev/null +++ b/stdlib/importlib/metadata/_itertools.py @@ -0,0 +1,73 @@ +from itertools import filterfalse + + +def unique_everseen(iterable, key=None): + "List unique elements, preserving order. Remember all elements ever seen." + # unique_everseen('AAAABBBCCDAABBB') --> A B C D + # unique_everseen('ABBCcAD', str.lower) --> A B C D + seen = set() + seen_add = seen.add + if key is None: + for element in filterfalse(seen.__contains__, iterable): + seen_add(element) + yield element + else: + for element in iterable: + k = key(element) + if k not in seen: + seen_add(k) + yield element + + +# copied from more_itertools 8.8 +def always_iterable(obj, base_type=(str, bytes)): + """If *obj* is iterable, return an iterator over its items:: + + >>> obj = (1, 2, 3) + >>> list(always_iterable(obj)) + [1, 2, 3] + + If *obj* is not iterable, return a one-item iterable containing *obj*:: + + >>> obj = 1 + >>> list(always_iterable(obj)) + [1] + + If *obj* is ``None``, return an empty iterable: + + >>> obj = None + >>> list(always_iterable(None)) + [] + + By default, binary and text strings are not considered iterable:: + + >>> obj = 'foo' + >>> list(always_iterable(obj)) + ['foo'] + + If *base_type* is set, objects for which ``isinstance(obj, base_type)`` + returns ``True`` won't be considered iterable. + + >>> obj = {'a': 1} + >>> list(always_iterable(obj)) # Iterate over the dict's keys + ['a'] + >>> list(always_iterable(obj, base_type=dict)) # Treat dicts as a unit + [{'a': 1}] + + Set *base_type* to ``None`` to avoid any special handling and treat objects + Python considers iterable as iterable: + + >>> obj = 'foo' + >>> list(always_iterable(obj, base_type=None)) + ['f', 'o', 'o'] + """ + if obj is None: + return iter(()) + + if (base_type is not None) and isinstance(obj, base_type): + return iter((obj,)) + + try: + return iter(obj) + except TypeError: + return iter((obj,)) diff --git a/stdlib/importlib/metadata/_meta.py b/stdlib/importlib/metadata/_meta.py new file mode 100644 index 000000000..1927d0f62 --- /dev/null +++ b/stdlib/importlib/metadata/_meta.py @@ -0,0 +1,67 @@ +from __future__ import annotations + +import os +from typing import Protocol +from typing import Any, Dict, Iterator, List, Optional, TypeVar, Union, overload + + +_T = TypeVar("_T") + + +class PackageMetadata(Protocol): + def __len__(self) -> int: ... # pragma: no cover + + def __contains__(self, item: str) -> bool: ... # pragma: no cover + + def __getitem__(self, key: str) -> str: ... # pragma: no cover + + def __iter__(self) -> Iterator[str]: ... # pragma: no cover + + @overload + def get( + self, name: str, failobj: None = None + ) -> Optional[str]: ... # pragma: no cover + + @overload + def get(self, name: str, failobj: _T) -> Union[str, _T]: ... # pragma: no cover + + # overload per python/importlib_metadata#435 + @overload + def get_all( + self, name: str, failobj: None = None + ) -> Optional[List[Any]]: ... # pragma: no cover + + @overload + def get_all(self, name: str, failobj: _T) -> Union[List[Any], _T]: + """ + Return all values associated with a possibly multi-valued key. + """ + + @property + def json(self) -> Dict[str, Union[str, List[str]]]: + """ + A JSON-compatible form of the metadata. + """ + + +class SimplePath(Protocol): + """ + A minimal subset of pathlib.Path required by Distribution. + """ + + def joinpath( + self, other: Union[str, os.PathLike[str]] + ) -> SimplePath: ... # pragma: no cover + + def __truediv__( + self, other: Union[str, os.PathLike[str]] + ) -> SimplePath: ... # pragma: no cover + + @property + def parent(self) -> SimplePath: ... # pragma: no cover + + def read_text(self, encoding=None) -> str: ... # pragma: no cover + + def read_bytes(self) -> bytes: ... # pragma: no cover + + def exists(self) -> bool: ... # pragma: no cover diff --git a/stdlib/importlib/metadata/_text.py b/stdlib/importlib/metadata/_text.py new file mode 100644 index 000000000..c88cfbb23 --- /dev/null +++ b/stdlib/importlib/metadata/_text.py @@ -0,0 +1,99 @@ +import re + +from ._functools import method_cache + + +# from jaraco.text 3.5 +class FoldedCase(str): + """ + A case insensitive string class; behaves just like str + except compares equal when the only variation is case. + + >>> s = FoldedCase('hello world') + + >>> s == 'Hello World' + True + + >>> 'Hello World' == s + True + + >>> s != 'Hello World' + False + + >>> s.index('O') + 4 + + >>> s.split('O') + ['hell', ' w', 'rld'] + + >>> sorted(map(FoldedCase, ['GAMMA', 'alpha', 'Beta'])) + ['alpha', 'Beta', 'GAMMA'] + + Sequence membership is straightforward. + + >>> "Hello World" in [s] + True + >>> s in ["Hello World"] + True + + You may test for set inclusion, but candidate and elements + must both be folded. + + >>> FoldedCase("Hello World") in {s} + True + >>> s in {FoldedCase("Hello World")} + True + + String inclusion works as long as the FoldedCase object + is on the right. + + >>> "hello" in FoldedCase("Hello World") + True + + But not if the FoldedCase object is on the left: + + >>> FoldedCase('hello') in 'Hello World' + False + + In that case, use in_: + + >>> FoldedCase('hello').in_('Hello World') + True + + >>> FoldedCase('hello') > FoldedCase('Hello') + False + """ + + def __lt__(self, other): + return self.lower() < other.lower() + + def __gt__(self, other): + return self.lower() > other.lower() + + def __eq__(self, other): + return self.lower() == other.lower() + + def __ne__(self, other): + return self.lower() != other.lower() + + def __hash__(self): + return hash(self.lower()) + + def __contains__(self, other): + return super().lower().__contains__(other.lower()) + + def in_(self, other): + "Does self appear in other?" + return self in FoldedCase(other) + + # cache lower since it's likely to be called frequently. + @method_cache + def lower(self): + return super().lower() + + def index(self, sub): + return self.lower().index(sub.lower()) + + def split(self, splitter=' ', maxsplit=0): + pattern = re.compile(re.escape(splitter), re.I) + return pattern.split(self, maxsplit) diff --git a/stdlib/importlib/metadata/diagnose.py b/stdlib/importlib/metadata/diagnose.py new file mode 100644 index 000000000..e405471ac --- /dev/null +++ b/stdlib/importlib/metadata/diagnose.py @@ -0,0 +1,21 @@ +import sys + +from . import Distribution + + +def inspect(path): + print("Inspecting", path) + dists = list(Distribution.discover(path=[path])) + if not dists: + return + print("Found", len(dists), "packages:", end=' ') + print(', '.join(dist.name for dist in dists)) + + +def run(): + for path in sys.path: + inspect(path) + + +if __name__ == '__main__': + run() diff --git a/stdlib/importlib/readers.py b/stdlib/importlib/readers.py new file mode 100644 index 000000000..df7fb92e5 --- /dev/null +++ b/stdlib/importlib/readers.py @@ -0,0 +1,12 @@ +""" +Compatibility shim for .resources.readers as found on Python 3.10. + +Consumers that can rely on Python 3.11 should use the other +module directly. +""" + +from .resources.readers import ( + FileReader, ZipReader, MultiplexedPath, NamespaceReader, +) + +__all__ = ['FileReader', 'ZipReader', 'MultiplexedPath', 'NamespaceReader'] diff --git a/stdlib/importlib/resources/__init__.py b/stdlib/importlib/resources/__init__.py new file mode 100644 index 000000000..723c9f9eb --- /dev/null +++ b/stdlib/importlib/resources/__init__.py @@ -0,0 +1,43 @@ +""" +Read resources contained within a package. + +This codebase is shared between importlib.resources in the stdlib +and importlib_resources in PyPI. See +https://github.com/python/importlib_metadata/wiki/Development-Methodology +for more detail. +""" + +from ._common import ( + as_file, + files, + Package, + Anchor, +) + +from ._functional import ( + contents, + is_resource, + open_binary, + open_text, + path, + read_binary, + read_text, +) + +from .abc import ResourceReader + + +__all__ = [ + 'Package', + 'Anchor', + 'ResourceReader', + 'as_file', + 'files', + 'contents', + 'is_resource', + 'open_binary', + 'open_text', + 'path', + 'read_binary', + 'read_text', +] diff --git a/stdlib/importlib/resources/_adapters.py b/stdlib/importlib/resources/_adapters.py new file mode 100644 index 000000000..50688fbb6 --- /dev/null +++ b/stdlib/importlib/resources/_adapters.py @@ -0,0 +1,168 @@ +from contextlib import suppress +from io import TextIOWrapper + +from . import abc + + +class SpecLoaderAdapter: + """ + Adapt a package spec to adapt the underlying loader. + """ + + def __init__(self, spec, adapter=lambda spec: spec.loader): + self.spec = spec + self.loader = adapter(spec) + + def __getattr__(self, name): + return getattr(self.spec, name) + + +class TraversableResourcesLoader: + """ + Adapt a loader to provide TraversableResources. + """ + + def __init__(self, spec): + self.spec = spec + + def get_resource_reader(self, name): + return CompatibilityFiles(self.spec)._native() + + +def _io_wrapper(file, mode='r', *args, **kwargs): + if mode == 'r': + return TextIOWrapper(file, *args, **kwargs) + elif mode == 'rb': + return file + raise ValueError(f"Invalid mode value '{mode}', only 'r' and 'rb' are supported") + + +class CompatibilityFiles: + """ + Adapter for an existing or non-existent resource reader + to provide a compatibility .files(). + """ + + class SpecPath(abc.Traversable): + """ + Path tied to a module spec. + Can be read and exposes the resource reader children. + """ + + def __init__(self, spec, reader): + self._spec = spec + self._reader = reader + + def iterdir(self): + if not self._reader: + return iter(()) + return iter( + CompatibilityFiles.ChildPath(self._reader, path) + for path in self._reader.contents() + ) + + def is_file(self): + return False + + is_dir = is_file + + def joinpath(self, other): + if not self._reader: + return CompatibilityFiles.OrphanPath(other) + return CompatibilityFiles.ChildPath(self._reader, other) + + @property + def name(self): + return self._spec.name + + def open(self, mode='r', *args, **kwargs): + return _io_wrapper(self._reader.open_resource(None), mode, *args, **kwargs) + + class ChildPath(abc.Traversable): + """ + Path tied to a resource reader child. + Can be read but doesn't expose any meaningful children. + """ + + def __init__(self, reader, name): + self._reader = reader + self._name = name + + def iterdir(self): + return iter(()) + + def is_file(self): + return self._reader.is_resource(self.name) + + def is_dir(self): + return not self.is_file() + + def joinpath(self, other): + return CompatibilityFiles.OrphanPath(self.name, other) + + @property + def name(self): + return self._name + + def open(self, mode='r', *args, **kwargs): + return _io_wrapper( + self._reader.open_resource(self.name), mode, *args, **kwargs + ) + + class OrphanPath(abc.Traversable): + """ + Orphan path, not tied to a module spec or resource reader. + Can't be read and doesn't expose any meaningful children. + """ + + def __init__(self, *path_parts): + if len(path_parts) < 1: + raise ValueError('Need at least one path part to construct a path') + self._path = path_parts + + def iterdir(self): + return iter(()) + + def is_file(self): + return False + + is_dir = is_file + + def joinpath(self, other): + return CompatibilityFiles.OrphanPath(*self._path, other) + + @property + def name(self): + return self._path[-1] + + def open(self, mode='r', *args, **kwargs): + raise FileNotFoundError("Can't open orphan path") + + def __init__(self, spec): + self.spec = spec + + @property + def _reader(self): + with suppress(AttributeError): + return self.spec.loader.get_resource_reader(self.spec.name) + + def _native(self): + """ + Return the native reader if it supports files(). + """ + reader = self._reader + return reader if hasattr(reader, 'files') else self + + def __getattr__(self, attr): + return getattr(self._reader, attr) + + def files(self): + return CompatibilityFiles.SpecPath(self.spec, self._reader) + + +def wrap_spec(package): + """ + Construct a package spec with traversable compatibility + on the spec/loader/reader. + """ + return SpecLoaderAdapter(package.__spec__, TraversableResourcesLoader) diff --git a/stdlib/importlib/resources/_common.py b/stdlib/importlib/resources/_common.py new file mode 100644 index 000000000..4e9014c45 --- /dev/null +++ b/stdlib/importlib/resources/_common.py @@ -0,0 +1,211 @@ +import os +import pathlib +import tempfile +import functools +import contextlib +import types +import importlib +import inspect +import warnings +import itertools + +from typing import Union, Optional, cast +from .abc import ResourceReader, Traversable + +Package = Union[types.ModuleType, str] +Anchor = Package + + +def package_to_anchor(func): + """ + Replace 'package' parameter as 'anchor' and warn about the change. + + Other errors should fall through. + + >>> files('a', 'b') + Traceback (most recent call last): + TypeError: files() takes from 0 to 1 positional arguments but 2 were given + + Remove this compatibility in Python 3.14. + """ + undefined = object() + + @functools.wraps(func) + def wrapper(anchor=undefined, package=undefined): + if package is not undefined: + if anchor is not undefined: + return func(anchor, package) + warnings.warn( + "First parameter to files is renamed to 'anchor'", + DeprecationWarning, + stacklevel=2, + ) + return func(package) + elif anchor is undefined: + return func() + return func(anchor) + + return wrapper + + +@package_to_anchor +def files(anchor: Optional[Anchor] = None) -> Traversable: + """ + Get a Traversable resource for an anchor. + """ + return from_package(resolve(anchor)) + + +def get_resource_reader(package: types.ModuleType) -> Optional[ResourceReader]: + """ + Return the package's loader if it's a ResourceReader. + """ + # We can't use + # a issubclass() check here because apparently abc.'s __subclasscheck__() + # hook wants to create a weak reference to the object, but + # zipimport.zipimporter does not support weak references, resulting in a + # TypeError. That seems terrible. + spec = package.__spec__ + reader = getattr(spec.loader, 'get_resource_reader', None) # type: ignore[union-attr] + if reader is None: + return None + return reader(spec.name) # type: ignore[union-attr] + + +@functools.singledispatch +def resolve(cand: Optional[Anchor]) -> types.ModuleType: + return cast(types.ModuleType, cand) + + +@resolve.register +def _(cand: str) -> types.ModuleType: + return importlib.import_module(cand) + + +@resolve.register +def _(cand: None) -> types.ModuleType: + return resolve(_infer_caller().f_globals['__name__']) + + +def _infer_caller(): + """ + Walk the stack and find the frame of the first caller not in this module. + """ + + def is_this_file(frame_info): + return frame_info.filename == stack[0].filename + + def is_wrapper(frame_info): + return frame_info.function == 'wrapper' + + stack = inspect.stack() + not_this_file = itertools.filterfalse(is_this_file, stack) + # also exclude 'wrapper' due to singledispatch in the call stack + callers = itertools.filterfalse(is_wrapper, not_this_file) + return next(callers).frame + + +def from_package(package: types.ModuleType): + """ + Return a Traversable object for the given package. + + """ + # deferred for performance (python/cpython#109829) + from ._adapters import wrap_spec + + spec = wrap_spec(package) + reader = spec.loader.get_resource_reader(spec.name) + return reader.files() + + +@contextlib.contextmanager +def _tempfile( + reader, + suffix='', + # gh-93353: Keep a reference to call os.remove() in late Python + # finalization. + *, + _os_remove=os.remove, +): + # Not using tempfile.NamedTemporaryFile as it leads to deeper 'try' + # blocks due to the need to close the temporary file to work on Windows + # properly. + fd, raw_path = tempfile.mkstemp(suffix=suffix) + try: + try: + os.write(fd, reader()) + finally: + os.close(fd) + del reader + yield pathlib.Path(raw_path) + finally: + try: + _os_remove(raw_path) + except FileNotFoundError: + pass + + +def _temp_file(path): + return _tempfile(path.read_bytes, suffix=path.name) + + +def _is_present_dir(path: Traversable) -> bool: + """ + Some Traversables implement ``is_dir()`` to raise an + exception (i.e. ``FileNotFoundError``) when the + directory doesn't exist. This function wraps that call + to always return a boolean and only return True + if there's a dir and it exists. + """ + with contextlib.suppress(FileNotFoundError): + return path.is_dir() + return False + + +@functools.singledispatch +def as_file(path): + """ + Given a Traversable object, return that object as a + path on the local file system in a context manager. + """ + return _temp_dir(path) if _is_present_dir(path) else _temp_file(path) + + +@as_file.register(pathlib.Path) +@contextlib.contextmanager +def _(path): + """ + Degenerate behavior for pathlib.Path objects. + """ + yield path + + +@contextlib.contextmanager +def _temp_path(dir: tempfile.TemporaryDirectory): + """ + Wrap tempfile.TemporaryDirectory to return a pathlib object. + """ + with dir as result: + yield pathlib.Path(result) + + +@contextlib.contextmanager +def _temp_dir(path): + """ + Given a traversable dir, recursively replicate the whole tree + to the file system in a context manager. + """ + assert path.is_dir() + with _temp_path(tempfile.TemporaryDirectory()) as temp_dir: + yield _write_contents(temp_dir, path) + + +def _write_contents(target, source): + child = target.joinpath(source.name) + if source.is_dir(): + child.mkdir() + for item in source.iterdir(): + _write_contents(child, item) + else: + child.write_bytes(source.read_bytes()) + return child diff --git a/stdlib/importlib/resources/_functional.py b/stdlib/importlib/resources/_functional.py new file mode 100644 index 000000000..f59416f2d --- /dev/null +++ b/stdlib/importlib/resources/_functional.py @@ -0,0 +1,81 @@ +"""Simplified function-based API for importlib.resources""" + +import warnings + +from ._common import files, as_file + + +_MISSING = object() + + +def open_binary(anchor, *path_names): + """Open for binary reading the *resource* within *package*.""" + return _get_resource(anchor, path_names).open('rb') + + +def open_text(anchor, *path_names, encoding=_MISSING, errors='strict'): + """Open for text reading the *resource* within *package*.""" + encoding = _get_encoding_arg(path_names, encoding) + resource = _get_resource(anchor, path_names) + return resource.open('r', encoding=encoding, errors=errors) + + +def read_binary(anchor, *path_names): + """Read and return contents of *resource* within *package* as bytes.""" + return _get_resource(anchor, path_names).read_bytes() + + +def read_text(anchor, *path_names, encoding=_MISSING, errors='strict'): + """Read and return contents of *resource* within *package* as str.""" + encoding = _get_encoding_arg(path_names, encoding) + resource = _get_resource(anchor, path_names) + return resource.read_text(encoding=encoding, errors=errors) + + +def path(anchor, *path_names): + """Return the path to the *resource* as an actual file system path.""" + return as_file(_get_resource(anchor, path_names)) + + +def is_resource(anchor, *path_names): + """Return ``True`` if there is a resource named *name* in the package, + + Otherwise returns ``False``. + """ + return _get_resource(anchor, path_names).is_file() + + +def contents(anchor, *path_names): + """Return an iterable over the named resources within the package. + + The iterable returns :class:`str` resources (e.g. files). + The iterable does not recurse into subdirectories. + """ + warnings.warn( + "importlib.resources.contents is deprecated. " + "Use files(anchor).iterdir() instead.", + DeprecationWarning, + stacklevel=1, + ) + return (resource.name for resource in _get_resource(anchor, path_names).iterdir()) + + +def _get_encoding_arg(path_names, encoding): + # For compatibility with versions where *encoding* was a positional + # argument, it needs to be given explicitly when there are multiple + # *path_names*. + # This limitation can be removed in Python 3.15. + if encoding is _MISSING: + if len(path_names) > 1: + raise TypeError( + "'encoding' argument required with multiple path names", + ) + else: + return 'utf-8' + return encoding + + +def _get_resource(anchor, path_names): + if anchor is None: + raise TypeError("anchor must be module or string, got None") + return files(anchor).joinpath(*path_names) diff --git a/stdlib/importlib/resources/_itertools.py b/stdlib/importlib/resources/_itertools.py new file mode 100644 index 000000000..7b775ef5a --- /dev/null +++ b/stdlib/importlib/resources/_itertools.py @@ -0,0 +1,38 @@ +# from more_itertools 9.0 +def only(iterable, default=None, too_long=None): + """If *iterable* has only one item, return it. + If it has zero items, return *default*. + If it has more than one item, raise the exception given by *too_long*, + which is ``ValueError`` by default. + >>> only([], default='missing') + 'missing' + >>> only([1]) + 1 + >>> only([1, 2]) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + ValueError: Expected exactly one item in iterable, but got 1, 2, + and perhaps more.' + >>> only([1, 2], too_long=TypeError) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + TypeError + Note that :func:`only` attempts to advance *iterable* twice to ensure there + is only one item. See :func:`spy` or :func:`peekable` to check + iterable contents less destructively. + """ + it = iter(iterable) + first_value = next(it, default) + + try: + second_value = next(it) + except StopIteration: + pass + else: + msg = ( + 'Expected exactly one item in iterable, but got {!r}, {!r}, ' + 'and perhaps more.'.format(first_value, second_value) + ) + raise too_long or ValueError(msg) + + return first_value diff --git a/stdlib/importlib/resources/abc.py b/stdlib/importlib/resources/abc.py new file mode 100644 index 000000000..6750a7aaf --- /dev/null +++ b/stdlib/importlib/resources/abc.py @@ -0,0 +1,173 @@ +import abc +import io +import itertools +import os +import pathlib +from typing import Any, BinaryIO, Iterable, Iterator, NoReturn, Text, Optional +from typing import runtime_checkable, Protocol +from typing import Union + + +StrPath = Union[str, os.PathLike[str]] + +__all__ = ["ResourceReader", "Traversable", "TraversableResources"] + + +class ResourceReader(metaclass=abc.ABCMeta): + """Abstract base class for loaders to provide resource reading support.""" + + @abc.abstractmethod + def open_resource(self, resource: Text) -> BinaryIO: + """Return an opened, file-like object for binary reading. + + The 'resource' argument is expected to represent only a file name. + If the resource cannot be found, FileNotFoundError is raised. + """ + # This deliberately raises FileNotFoundError instead of + # NotImplementedError so that if this method is accidentally called, + # it'll still do the right thing. + raise FileNotFoundError + + @abc.abstractmethod + def resource_path(self, resource: Text) -> Text: + """Return the file system path to the specified resource. + + The 'resource' argument is expected to represent only a file name. + If the resource does not exist on the file system, raise + FileNotFoundError. + """ + # This deliberately raises FileNotFoundError instead of + # NotImplementedError so that if this method is accidentally called, + # it'll still do the right thing. + raise FileNotFoundError + + @abc.abstractmethod + def is_resource(self, path: Text) -> bool: + """Return True if the named 'path' is a resource. + + Files are resources, directories are not. + """ + raise FileNotFoundError + + @abc.abstractmethod + def contents(self) -> Iterable[str]: + """Return an iterable of entries in `package`.""" + raise FileNotFoundError + + +class TraversalError(Exception): + pass + + +@runtime_checkable +class Traversable(Protocol): + """ + An object with a subset of pathlib.Path methods suitable for + traversing directories and opening files. + + Any exceptions that occur when accessing the backing resource + may propagate unaltered. + """ + + @abc.abstractmethod + def iterdir(self) -> Iterator["Traversable"]: + """ + Yield Traversable objects in self + """ + + def read_bytes(self) -> bytes: + """ + Read contents of self as bytes + """ + with self.open('rb') as strm: + return strm.read() + + def read_text(self, encoding: Optional[str] = None) -> str: + """ + Read contents of self as text + """ + with self.open(encoding=encoding) as strm: + return strm.read() + + @abc.abstractmethod + def is_dir(self) -> bool: + """ + Return True if self is a directory + """ + + @abc.abstractmethod + def is_file(self) -> bool: + """ + Return True if self is a file + """ + + def joinpath(self, *descendants: StrPath) -> "Traversable": + """ + Return Traversable resolved with any descendants applied. + + Each descendant should be a path segment relative to self + and each may contain multiple levels separated by + ``posixpath.sep`` (``/``). + """ + if not descendants: + return self + names = itertools.chain.from_iterable( + path.parts for path in map(pathlib.PurePosixPath, descendants) + ) + target = next(names) + matches = ( + traversable for traversable in self.iterdir() if traversable.name == target + ) + try: + match = next(matches) + except StopIteration: + raise TraversalError( + "Target not found during traversal.", target, list(names) + ) + return match.joinpath(*names) + + def __truediv__(self, child: StrPath) -> "Traversable": + """ + Return Traversable child in self + """ + return self.joinpath(child) + + @abc.abstractmethod + def open(self, mode='r', *args, **kwargs): + """ + mode may be 'r' or 'rb' to open as text or binary. Return a handle + suitable for reading (same as pathlib.Path.open). + + When opening as text, accepts encoding parameters such as those + accepted by io.TextIOWrapper. + """ + + @property + @abc.abstractmethod + def name(self) -> str: + """ + The base name of this object without any parent references. + """ + + +class TraversableResources(ResourceReader): + """ + The required interface for providing traversable + resources. + """ + + @abc.abstractmethod + def files(self) -> "Traversable": + """Return a Traversable object for the loaded package.""" + + def open_resource(self, resource: StrPath) -> io.BufferedReader: + return self.files().joinpath(resource).open('rb') + + def resource_path(self, resource: Any) -> NoReturn: + raise FileNotFoundError(resource) + + def is_resource(self, path: StrPath) -> bool: + return self.files().joinpath(path).is_file() + + def contents(self) -> Iterator[str]: + return (item.name for item in self.files().iterdir()) diff --git a/stdlib/importlib/resources/readers.py b/stdlib/importlib/resources/readers.py new file mode 100644 index 000000000..70fc7e2b9 --- /dev/null +++ b/stdlib/importlib/resources/readers.py @@ -0,0 +1,203 @@ +from __future__ import annotations + +import collections +import contextlib +import itertools +import pathlib +import operator +import re +import warnings +import zipfile +from collections.abc import Iterator + +from . import abc + +from ._itertools import only + + +def remove_duplicates(items): + return iter(collections.OrderedDict.fromkeys(items)) + + +class FileReader(abc.TraversableResources): + def __init__(self, loader): + self.path = pathlib.Path(loader.path).parent + + def resource_path(self, resource): + """ + Return the file system path to prevent + `resources.path()` from creating a temporary + copy. + """ + return str(self.path.joinpath(resource)) + + def files(self): + return self.path + + +class ZipReader(abc.TraversableResources): + def __init__(self, loader, module): + self.prefix = loader.prefix.replace('\\', '/') + if loader.is_package(module): + _, _, name = module.rpartition('.') + self.prefix += name + '/' + self.archive = loader.archive + + def open_resource(self, resource): + try: + return super().open_resource(resource) + except KeyError as exc: + raise FileNotFoundError(exc.args[0]) + + def is_resource(self, path): + """ + Workaround for `zipfile.Path.is_file` returning true + for non-existent paths. + """ + target = self.files().joinpath(path) + return target.is_file() and target.exists() + + def files(self): + return zipfile.Path(self.archive, self.prefix) + + +class MultiplexedPath(abc.Traversable): + """ + Given a series of Traversable objects, implement a merged + version of the interface across all objects. Useful for + namespace packages which may be multihomed at a single + name. + """ + + def __init__(self, *paths): + self._paths = list(map(_ensure_traversable, remove_duplicates(paths))) + if not self._paths: + message = 'MultiplexedPath must contain at least one path' + raise FileNotFoundError(message) + if not all(path.is_dir() for path in self._paths): + raise NotADirectoryError('MultiplexedPath only supports directories') + + def iterdir(self): + children = (child for path in self._paths for child in path.iterdir()) + by_name = operator.attrgetter('name') + groups = itertools.groupby(sorted(children, key=by_name), key=by_name) + return map(self._follow, (locs for name, locs in groups)) + + def read_bytes(self): + raise FileNotFoundError(f'{self} is not a file') + + def read_text(self, *args, **kwargs): + raise FileNotFoundError(f'{self} is not a file') + + def is_dir(self): + return True + + def is_file(self): + return False + + def joinpath(self, *descendants): + try: + return super().joinpath(*descendants) + except abc.TraversalError: + # One of the paths did not resolve (a directory does not exist). + # Just return something that will not exist. + return self._paths[0].joinpath(*descendants) + + @classmethod + def _follow(cls, children): + """ + Construct a MultiplexedPath if needed. + + If children contains a sole element, return it. + Otherwise, return a MultiplexedPath of the items. + Unless one of the items is not a Directory, then return the first. + """ + subdirs, one_dir, one_file = itertools.tee(children, 3) + + try: + return only(one_dir) + except ValueError: + try: + return cls(*subdirs) + except NotADirectoryError: + return next(one_file) + + def open(self, *args, **kwargs): + raise FileNotFoundError(f'{self} is not a file') + + @property + def name(self): + return self._paths[0].name + + def __repr__(self): + paths = ', '.join(f"'{path}'" for path in self._paths) + return f'MultiplexedPath({paths})' + + +class NamespaceReader(abc.TraversableResources): + def __init__(self, namespace_path): + if 'NamespacePath' not in str(namespace_path): + raise ValueError('Invalid path') + self.path = MultiplexedPath(*filter(bool, map(self._resolve, namespace_path))) + + @classmethod + def _resolve(cls, path_str) -> abc.Traversable | None: + r""" + Given an item from a namespace path, resolve it to a Traversable. + + path_str might be a directory on the filesystem or a path to a + zipfile plus the path within the zipfile, e.g. ``/foo/bar`` or + ``/foo/baz.zip/inner_dir`` or ``foo\baz.zip\inner_dir\sub``. + + path_str might also be a sentinel used by editable packages to + trigger other behaviors (see python/importlib_resources#311). + In that case, return None. + """ + dirs = (cand for cand in cls._candidate_paths(path_str) if cand.is_dir()) + return next(dirs, None) + + @classmethod + def _candidate_paths(cls, path_str: str) -> Iterator[abc.Traversable]: + yield pathlib.Path(path_str) + yield from cls._resolve_zip_path(path_str) + + @staticmethod + def _resolve_zip_path(path_str: str): + for match in reversed(list(re.finditer(r'[\\/]', path_str))): + with contextlib.suppress( + FileNotFoundError, + IsADirectoryError, + NotADirectoryError, + PermissionError, + ): + inner = path_str[match.end() :].replace('\\', '/') + '/' + yield zipfile.Path(path_str[: match.start()], inner.lstrip('/')) + + def resource_path(self, resource): + """ + Return the file system path to prevent + `resources.path()` from creating a temporary + copy. + """ + return str(self.path.joinpath(resource)) + + def files(self): + return self.path + + +def _ensure_traversable(path): + """ + Convert deprecated string arguments to traversables (pathlib.Path). + + Remove with Python 3.15. + """ + if not isinstance(path, str): + return path + + warnings.warn( + "String arguments are deprecated. Pass a Traversable instead.", + DeprecationWarning, + stacklevel=3, + ) + + return pathlib.Path(path) diff --git a/stdlib/importlib/resources/simple.py b/stdlib/importlib/resources/simple.py new file mode 100644 index 000000000..2e75299b1 --- /dev/null +++ b/stdlib/importlib/resources/simple.py @@ -0,0 +1,106 @@ +""" +Interface adapters for low-level readers. +""" + +import abc +import io +import itertools +from typing import BinaryIO, List + +from .abc import Traversable, TraversableResources + + +class SimpleReader(abc.ABC): + """ + The minimum, low-level interface required from a resource + provider. + """ + + @property + @abc.abstractmethod + def package(self) -> str: + """ + The name of the package for which this reader loads resources. + """ + + @abc.abstractmethod + def children(self) -> List['SimpleReader']: + """ + Obtain an iterable of SimpleReader for available + child containers (e.g. directories). + """ + + @abc.abstractmethod + def resources(self) -> List[str]: + """ + Obtain available named resources for this virtual package. + """ + + @abc.abstractmethod + def open_binary(self, resource: str) -> BinaryIO: + """ + Obtain a File-like for a named resource. + """ + + @property + def name(self): + return self.package.split('.')[-1] + + +class ResourceContainer(Traversable): + """ + Traversable container for a package's resources via its reader. + """ + + def __init__(self, reader: SimpleReader): + self.reader = reader + + def is_dir(self): + return True + + def is_file(self): + return False + + def iterdir(self): + files = (ResourceHandle(self, name) for name in self.reader.resources) + dirs = map(ResourceContainer, self.reader.children()) + return itertools.chain(files, dirs) + + def open(self, *args, **kwargs): + raise IsADirectoryError() + + +class ResourceHandle(Traversable): + """ + Handle to a named resource in a ResourceReader. + """ + + def __init__(self, parent: ResourceContainer, name: str): + self.parent = parent + self.name = name # type: ignore[misc] + + def is_file(self): + return True + + def is_dir(self): + return False + + def open(self, mode='r', *args, **kwargs): + stream = self.parent.reader.open_binary(self.name) + if 'b' not in mode: + stream = io.TextIOWrapper(stream, *args, **kwargs) + return stream + + def joinpath(self, name): + raise RuntimeError("Cannot traverse into a resource") + + +class TraversableReader(TraversableResources, SimpleReader): + """ + A TraversableResources based on SimpleReader. Resource providers + may derive from this class to provide the TraversableResources + interface by supplying the SimpleReader interface. + """ + + def files(self): + return ResourceContainer(self) diff --git a/stdlib/importlib/simple.py b/stdlib/importlib/simple.py new file mode 100644 index 000000000..845bb9036 --- /dev/null +++ b/stdlib/importlib/simple.py @@ -0,0 +1,14 @@ +""" +Compatibility shim for .resources.simple as found on Python 3.10. + +Consumers that can rely on Python 3.11 should use the other +module directly. +""" + +from .resources.simple import ( + SimpleReader, ResourceHandle, ResourceContainer, TraversableReader, +) + +__all__ = [ + 'SimpleReader', 'ResourceHandle', 'ResourceContainer', 'TraversableReader', +] diff --git a/stdlib/importlib/util.py b/stdlib/importlib/util.py index aadfcae60..2b564e9b5 100644 --- a/stdlib/importlib/util.py +++ b/stdlib/importlib/util.py @@ -1,207 +1,279 @@ -"""importlib.util: gopy stub for the parts pkgutil/unittest.mock need. - -CPython's Lib/importlib/util.py re-exports symbols from the import -machinery's _bootstrap and _bootstrap_external modules, which gopy -doesn't fully ship. The pkgutil/unittest.mock load path only references -MAGIC_NUMBER at module load (inside a function body) plus find_spec -later; resolve_name doesn't touch util at all. Until spec 1711 Phase -9 wires the full importlib port this stub keeps the import chain -green. - -CPython: Lib/importlib/util.py -""" - -import os +"""Utility code for constructing importers, etc.""" +from ._abc import Loader +from ._bootstrap import module_from_spec +from ._bootstrap import _resolve_name +from ._bootstrap import spec_from_loader +from ._bootstrap import _find_spec +from ._bootstrap_external import MAGIC_NUMBER +from ._bootstrap_external import cache_from_source +from ._bootstrap_external import decode_source +from ._bootstrap_external import source_from_cache +from ._bootstrap_external import spec_from_file_location + +import _imp import sys import types -from importlib._bootstrap_external import ( - MAGIC_NUMBER, - cache_from_source, - decode_source, - source_from_cache, - source_hash, -) +def source_hash(source_bytes): + "Return the hash of *source_bytes* as used in hash-based pyc files." + return _imp.source_hash(_imp.pyc_magic_number_token, source_bytes) -class _SourceFileLoader: - """Minimal SourceFileLoader: reads the .py file and compiles it. - CPython: Lib/importlib/_bootstrap_external.py:962 SourceFileLoader - """ - - def __init__(self, name, path): - self.name = name - self.path = path - - def get_filename(self, fullname=None): - return self.path +def resolve_name(name, package): + """Resolve a relative module name to an absolute one.""" + if not name.startswith('.'): + return name + elif not package: + raise ImportError(f'no package specified for {repr(name)} ' + '(required for relative module names)') + level = 0 + for character in name: + if character != '.': + break + level += 1 + return _resolve_name(name[level:], package, level) - def get_source(self, fullname=None): - with open(self.path, "rb") as f: - data = f.read() - try: - return data.decode("utf-8") - except UnicodeDecodeError: - return data.decode("latin-1") - def get_code(self, fullname): - source = self.get_source(fullname) - return compile(source, self.path, "exec") +def _find_spec_from_path(name, path=None): + """Return the spec for the specified module. + First, sys.modules is checked to see if the module was already imported. If + so, then sys.modules[name].__spec__ is returned. If that happens to be + set to None, then ValueError is raised. If the module is not in + sys.modules, then sys.meta_path is searched for a suitable spec with the + value of 'path' given to the finders. None is returned if no spec could + be found. -class _ModuleSpec: - """Stripped-down ModuleSpec mirroring importlib.machinery.ModuleSpec. + Dotted names do not have their parent packages implicitly imported. You will + most likely need to explicitly import all parent packages in the proper + order for a submodule to get the correct spec. - CPython: Lib/importlib/_bootstrap.py:392 ModuleSpec """ - - def __init__(self, name, loader, *, origin=None, is_package=False): - self.name = name - self.loader = loader - self.origin = origin - self.submodule_search_locations = [] if is_package else None - self.has_location = origin is not None - self.cached = None - self.parent = name.rpartition(".")[0] if is_package else name.rpartition(".")[0] - - -def _resolve_search_paths(name): - parent, _, _ = name.rpartition(".") - if not parent: - return sys.path - pkg = sys.modules.get(parent) - if pkg is None: - try: - __import__(parent) - except ImportError: + if name not in sys.modules: + return _find_spec(name, path) + else: + module = sys.modules[name] + if module is None: return None - pkg = sys.modules.get(parent) - if pkg is None: - return None - return getattr(pkg, "__path__", None) + try: + spec = module.__spec__ + except AttributeError: + raise ValueError(f'{name}.__spec__ is not set') from None + else: + if spec is None: + raise ValueError(f'{name}.__spec__ is None') + return spec def find_spec(name, package=None): - """Locate name on sys.path (or the parent package's __path__) and - return a ModuleSpec the caller can drive through .loader.get_code(). + """Return the spec for the specified module. - CPython: Lib/importlib/util.py:90 find_spec - """ - if name.startswith("."): - if package is None: - raise ValueError("relative module name requires package") - name = resolve_name(name, package) - if name in sys.modules: - mod = sys.modules[name] - spec = getattr(mod, "__spec__", None) - if spec is not None: - return spec - search = _resolve_search_paths(name) - if search is None: - return None - tail = name.rpartition(".")[2] - for entry in search: - directory = entry if entry else "." - pkg_init = os.path.join(directory, tail, "__init__.py") - if os.path.isfile(pkg_init): - loader = _SourceFileLoader(name, pkg_init) - spec = _ModuleSpec(name, loader, origin=pkg_init, is_package=True) - spec.submodule_search_locations = [os.path.join(directory, tail)] - return spec - mod_file = os.path.join(directory, tail + ".py") - if os.path.isfile(mod_file): - return _ModuleSpec(name, _SourceFileLoader(name, mod_file), - origin=mod_file) - return None + First, sys.modules is checked to see if the module was already imported. If + so, then sys.modules[name].__spec__ is returned. If that happens to be + set to None, then ValueError is raised. If the module is not in + sys.modules, then sys.meta_path is searched for a suitable spec with the + value of 'path' given to the finders. None is returned if no spec could + be found. + If the name is for submodule (contains a dot), the parent module is + automatically imported. -def module_from_spec(spec): - """Create a new module based on spec and spec.loader.create_module. + The name and package arguments work the same as importlib.import_module(). + In other words, relative module names (with leading dots) work. - CPython: Lib/importlib/_bootstrap.py:571 module_from_spec - """ - import types - module = None - if hasattr(spec.loader, 'create_module'): - module = spec.loader.create_module(spec) - if module is None: - module = types.ModuleType(spec.name) - module.__loader__ = spec.loader - module.__spec__ = spec - module.__package__ = spec.name.rpartition('.')[0] - if spec.origin is not None: - module.__file__ = spec.origin - if spec.submodule_search_locations is not None: - module.__path__ = list(spec.submodule_search_locations) - return module - - -def spec_from_loader(name, loader, *, origin=None, is_package=None): - """Return a ModuleSpec based on a loader. - - CPython: Lib/importlib/util.py:44 spec_from_loader """ - if origin is None and hasattr(loader, 'get_filename'): - try: - origin = loader.get_filename(name) - except (ImportError, AttributeError): - pass - if is_package is None: - if hasattr(loader, 'is_package'): + fullname = resolve_name(name, package) if name.startswith('.') else name + if fullname not in sys.modules: + parent_name = fullname.rpartition('.')[0] + if parent_name: + parent = __import__(parent_name, fromlist=['__path__']) try: - is_package = loader.is_package(name) - except ImportError: - is_package = False + parent_path = parent.__path__ + except AttributeError as e: + raise ModuleNotFoundError( + f"__path__ attribute not found on {parent_name!r} " + f"while trying to find {fullname!r}", name=fullname) from e else: - is_package = False - return _ModuleSpec(name, loader, origin=origin, is_package=bool(is_package)) + parent_path = None + return _find_spec(fullname, parent_path) + else: + module = sys.modules[fullname] + if module is None: + return None + try: + spec = module.__spec__ + except AttributeError: + raise ValueError(f'{name}.__spec__ is not set') from None + else: + if spec is None: + raise ValueError(f'{name}.__spec__ is None') + return spec -def spec_from_file_location(name, location=None, *, loader=None, - submodule_search_locations=None): - """Return a ModuleSpec for the specified module, using file location. +# Normally we would use contextlib.contextmanager. However, this module +# is imported by runpy, which means we want to avoid any unnecessary +# dependencies. Thus we use a class. - CPython: Lib/importlib/util.py:132 spec_from_file_location - """ - if location is None and loader is None: - return None - if loader is None and location is not None: - loader = _SourceFileLoader(name, str(location)) - origin = str(location) if location is not None else getattr(loader, 'path', None) - is_package = submodule_search_locations is not None - spec = _ModuleSpec(name, loader, origin=origin, is_package=is_package) - if submodule_search_locations is not None: - spec.submodule_search_locations = list(submodule_search_locations) - return spec +class _incompatible_extension_module_restrictions: + """A context manager that can temporarily skip the compatibility check. + NOTE: This function is meant to accommodate an unusual case; one + which is likely to eventually go away. There's is a pretty good + chance this is not what you were looking for. -def resolve_name(name, package): - """Resolve a relative module name to an absolute one.""" - if not name.startswith('.'): - return name - if not package: - raise ImportError(f'no package specified for {name!r} ' - '(required for relative module names)') - level = 0 - for character in name: - if character != '.': - break - level += 1 - return _resolve_name(name[level:], package, level) + WARNING: Using this function to disable the check can lead to + unexpected behavior and even crashes. It should only be used during + extension module development. + If "disable_check" is True then the compatibility check will not + happen while the context manager is active. Otherwise the check + *will* happen. -def _resolve_name(name, package, level): - bits = package.rsplit('.', level - 1) - if len(bits) < level: - raise ImportError('attempted relative import beyond top-level package') - base = bits[0] - return f'{base}.{name}' if name else base + Normally, extensions that do not support multiple interpreters + may not be imported in a subinterpreter. That implies modules + that do not implement multi-phase init or that explicitly of out. + Likewise for modules import in a subinterpreter with its own GIL + when the extension does not support a per-interpreter GIL. This + implies the module does not have a Py_mod_multiple_interpreters slot + set to Py_MOD_PER_INTERPRETER_GIL_SUPPORTED. -class LazyLoader: - """Stub: not used by the unittest.mock import chain.""" + In both cases, this context manager may be used to temporarily + disable the check for compatible extension modules. + + You can get the same effect as this function by implementing the + basic interface of multi-phase init (PEP 489) and lying about + support for multiple interpreters (or per-interpreter GIL). + """ + + def __init__(self, *, disable_check): + self.disable_check = bool(disable_check) + + def __enter__(self): + self.old = _imp._override_multi_interp_extensions_check(self.override) + return self + + def __exit__(self, *args): + old = self.old + del self.old + _imp._override_multi_interp_extensions_check(old) + + @property + def override(self): + return -1 if self.disable_check else 1 + + +class _LazyModule(types.ModuleType): + + """A subclass of the module type which triggers loading upon attribute access.""" + + def __getattribute__(self, attr): + """Trigger the load of the module and return the attribute.""" + __spec__ = object.__getattribute__(self, '__spec__') + loader_state = __spec__.loader_state + with loader_state['lock']: + # Only the first thread to get the lock should trigger the load + # and reset the module's class. The rest can now getattr(). + if object.__getattribute__(self, '__class__') is _LazyModule: + __class__ = loader_state['__class__'] + + # Reentrant calls from the same thread must be allowed to proceed without + # triggering the load again. + # exec_module() and self-referential imports are the primary ways this can + # happen, but in any case we must return something to avoid deadlock. + if loader_state['is_loading']: + return __class__.__getattribute__(self, attr) + loader_state['is_loading'] = True + + __dict__ = __class__.__getattribute__(self, '__dict__') + + # All module metadata must be gathered from __spec__ in order to avoid + # using mutated values. + # Get the original name to make sure no object substitution occurred + # in sys.modules. + original_name = __spec__.name + # Figure out exactly what attributes were mutated between the creation + # of the module and now. + attrs_then = loader_state['__dict__'] + attrs_now = __dict__ + attrs_updated = {} + for key, value in attrs_now.items(): + # Code that set an attribute may have kept a reference to the + # assigned object, making identity more important than equality. + if key not in attrs_then: + attrs_updated[key] = value + elif id(attrs_now[key]) != id(attrs_then[key]): + attrs_updated[key] = value + __spec__.loader.exec_module(self) + # If exec_module() was used directly there is no guarantee the module + # object was put into sys.modules. + if original_name in sys.modules: + if id(self) != id(sys.modules[original_name]): + raise ValueError(f"module object for {original_name!r} " + "substituted in sys.modules during a lazy " + "load") + # Update after loading since that's what would happen in an eager + # loading situation. + __dict__.update(attrs_updated) + # Finally, stop triggering this method, if the module did not + # already update its own __class__. + if isinstance(self, _LazyModule): + object.__setattr__(self, '__class__', __class__) + + return getattr(self, attr) + + def __delattr__(self, attr): + """Trigger the load and then perform the deletion.""" + # To trigger the load and raise an exception if the attribute + # doesn't exist. + self.__getattribute__(attr) + delattr(self, attr) + + +class LazyLoader(Loader): + + """A loader that creates a module which defers loading until attribute access.""" + + @staticmethod + def __check_eager_loader(loader): + if not hasattr(loader, 'exec_module'): + raise TypeError('loader must define exec_module()') @classmethod def factory(cls, loader): - raise NotImplementedError("importlib.util.LazyLoader is unavailable in gopy") + """Construct a callable which returns the eager loader made lazy.""" + cls.__check_eager_loader(loader) + return lambda *args, **kwargs: cls(loader(*args, **kwargs)) + + def __init__(self, loader): + self.__check_eager_loader(loader) + self.loader = loader + + def create_module(self, spec): + return self.loader.create_module(spec) + + def exec_module(self, module): + """Make the module load lazily.""" + # Threading is only needed for lazy loading, and importlib.util can + # be pulled in at interpreter startup, so defer until needed. + import threading + module.__spec__.loader = self.loader + module.__loader__ = self.loader + # Don't need to worry about deep-copying as trying to set an attribute + # on an object would have triggered the load, + # e.g. ``module.__spec__.loader = None`` would trigger a load from + # trying to access module.__spec__. + loader_state = {} + loader_state['__dict__'] = module.__dict__.copy() + loader_state['__class__'] = module.__class__ + loader_state['lock'] = threading.RLock() + loader_state['is_loading'] = False + module.__spec__.loader_state = loader_state + module.__class__ = _LazyModule + + +__all__ = ['LazyLoader', 'Loader', 'MAGIC_NUMBER', + 'cache_from_source', 'decode_source', 'find_spec', + 'module_from_spec', 'resolve_name', 'source_from_cache', + 'source_hash', 'spec_from_file_location', 'spec_from_loader'] diff --git a/stdlib/modulefinder.py b/stdlib/modulefinder.py new file mode 100644 index 000000000..ac478ee7f --- /dev/null +++ b/stdlib/modulefinder.py @@ -0,0 +1,671 @@ +"""Find modules used by a script, using introspection.""" + +import dis +import importlib._bootstrap_external +import importlib.machinery +import marshal +import os +import io +import sys + +# Old imp constants: + +_SEARCH_ERROR = 0 +_PY_SOURCE = 1 +_PY_COMPILED = 2 +_C_EXTENSION = 3 +_PKG_DIRECTORY = 5 +_C_BUILTIN = 6 +_PY_FROZEN = 7 + +# Modulefinder does a good job at simulating Python's, but it can not +# handle __path__ modifications packages make at runtime. Therefore there +# is a mechanism whereby you can register extra paths in this map for a +# package, and it will be honored. + +# Note this is a mapping is lists of paths. +packagePathMap = {} + +# A Public interface +def AddPackagePath(packagename, path): + packagePathMap.setdefault(packagename, []).append(path) + +replacePackageMap = {} + +# This ReplacePackage mechanism allows modulefinder to work around +# situations in which a package injects itself under the name +# of another package into sys.modules at runtime by calling +# ReplacePackage("real_package_name", "faked_package_name") +# before running ModuleFinder. + +def ReplacePackage(oldname, newname): + replacePackageMap[oldname] = newname + + +def _find_module(name, path=None): + """An importlib reimplementation of imp.find_module (for our purposes).""" + + # It's necessary to clear the caches for our Finder first, in case any + # modules are being added/deleted/modified at runtime. In particular, + # test_modulefinder.py changes file tree contents in a cache-breaking way: + + importlib.machinery.PathFinder.invalidate_caches() + + spec = importlib.machinery.PathFinder.find_spec(name, path) + + if spec is None: + raise ImportError("No module named {name!r}".format(name=name), name=name) + + # Some special cases: + + if spec.loader is importlib.machinery.BuiltinImporter: + return None, None, ("", "", _C_BUILTIN) + + if spec.loader is importlib.machinery.FrozenImporter: + return None, None, ("", "", _PY_FROZEN) + + file_path = spec.origin + + if spec.loader.is_package(name): + return None, os.path.dirname(file_path), ("", "", _PKG_DIRECTORY) + + if isinstance(spec.loader, importlib.machinery.SourceFileLoader): + kind = _PY_SOURCE + + elif isinstance( + spec.loader, ( + importlib.machinery.ExtensionFileLoader, + importlib.machinery.AppleFrameworkLoader, + ) + ): + kind = _C_EXTENSION + + elif isinstance(spec.loader, importlib.machinery.SourcelessFileLoader): + kind = _PY_COMPILED + + else: # Should never happen. + return None, None, ("", "", _SEARCH_ERROR) + + file = io.open_code(file_path) + suffix = os.path.splitext(file_path)[-1] + + return file, file_path, (suffix, "rb", kind) + + +class Module: + + def __init__(self, name, file=None, path=None): + self.__name__ = name + self.__file__ = file + self.__path__ = path + self.__code__ = None + # The set of global names that are assigned to in the module. + # This includes those names imported through starimports of + # Python modules. + self.globalnames = {} + # The set of starimports this module did that could not be + # resolved, ie. a starimport from a non-Python module. + self.starimports = {} + + def __repr__(self): + s = "Module(%r" % (self.__name__,) + if self.__file__ is not None: + s = s + ", %r" % (self.__file__,) + if self.__path__ is not None: + s = s + ", %r" % (self.__path__,) + s = s + ")" + return s + +class ModuleFinder: + + def __init__(self, path=None, debug=0, excludes=None, replace_paths=None): + if path is None: + path = sys.path + self.path = path + self.modules = {} + self.badmodules = {} + self.debug = debug + self.indent = 0 + self.excludes = excludes if excludes is not None else [] + self.replace_paths = replace_paths if replace_paths is not None else [] + self.processed_paths = [] # Used in debugging only + + def msg(self, level, str, *args): + if level <= self.debug: + for i in range(self.indent): + print(" ", end=' ') + print(str, end=' ') + for arg in args: + print(repr(arg), end=' ') + print() + + def msgin(self, *args): + level = args[0] + if level <= self.debug: + self.indent = self.indent + 1 + self.msg(*args) + + def msgout(self, *args): + level = args[0] + if level <= self.debug: + self.indent = self.indent - 1 + self.msg(*args) + + def run_script(self, pathname): + self.msg(2, "run_script", pathname) + with io.open_code(pathname) as fp: + stuff = ("", "rb", _PY_SOURCE) + self.load_module('__main__', fp, pathname, stuff) + + def load_file(self, pathname): + dir, name = os.path.split(pathname) + name, ext = os.path.splitext(name) + with io.open_code(pathname) as fp: + stuff = (ext, "rb", _PY_SOURCE) + self.load_module(name, fp, pathname, stuff) + + def import_hook(self, name, caller=None, fromlist=None, level=-1): + self.msg(3, "import_hook", name, caller, fromlist, level) + parent = self.determine_parent(caller, level=level) + q, tail = self.find_head_package(parent, name) + m = self.load_tail(q, tail) + if not fromlist: + return q + if m.__path__: + self.ensure_fromlist(m, fromlist) + return None + + def determine_parent(self, caller, level=-1): + self.msgin(4, "determine_parent", caller, level) + if not caller or level == 0: + self.msgout(4, "determine_parent -> None") + return None + pname = caller.__name__ + if level >= 1: # relative import + if caller.__path__: + level -= 1 + if level == 0: + parent = self.modules[pname] + assert parent is caller + self.msgout(4, "determine_parent ->", parent) + return parent + if pname.count(".") < level: + raise ImportError("relative importpath too deep") + pname = ".".join(pname.split(".")[:-level]) + parent = self.modules[pname] + self.msgout(4, "determine_parent ->", parent) + return parent + if caller.__path__: + parent = self.modules[pname] + assert caller is parent + self.msgout(4, "determine_parent ->", parent) + return parent + if '.' in pname: + i = pname.rfind('.') + pname = pname[:i] + parent = self.modules[pname] + assert parent.__name__ == pname + self.msgout(4, "determine_parent ->", parent) + return parent + self.msgout(4, "determine_parent -> None") + return None + + def find_head_package(self, parent, name): + self.msgin(4, "find_head_package", parent, name) + if '.' in name: + i = name.find('.') + head = name[:i] + tail = name[i+1:] + else: + head = name + tail = "" + if parent: + qname = "%s.%s" % (parent.__name__, head) + else: + qname = head + q = self.import_module(head, qname, parent) + if q: + self.msgout(4, "find_head_package ->", (q, tail)) + return q, tail + if parent: + qname = head + parent = None + q = self.import_module(head, qname, parent) + if q: + self.msgout(4, "find_head_package ->", (q, tail)) + return q, tail + self.msgout(4, "raise ImportError: No module named", qname) + raise ImportError("No module named " + qname) + + def load_tail(self, q, tail): + self.msgin(4, "load_tail", q, tail) + m = q + while tail: + i = tail.find('.') + if i < 0: i = len(tail) + head, tail = tail[:i], tail[i+1:] + mname = "%s.%s" % (m.__name__, head) + m = self.import_module(head, mname, m) + if not m: + self.msgout(4, "raise ImportError: No module named", mname) + raise ImportError("No module named " + mname) + self.msgout(4, "load_tail ->", m) + return m + + def ensure_fromlist(self, m, fromlist, recursive=0): + self.msg(4, "ensure_fromlist", m, fromlist, recursive) + for sub in fromlist: + if sub == "*": + if not recursive: + all = self.find_all_submodules(m) + if all: + self.ensure_fromlist(m, all, 1) + elif not hasattr(m, sub): + subname = "%s.%s" % (m.__name__, sub) + submod = self.import_module(sub, subname, m) + if not submod: + raise ImportError("No module named " + subname) + + def find_all_submodules(self, m): + if not m.__path__: + return + modules = {} + # 'suffixes' used to be a list hardcoded to [".py", ".pyc"]. + # But we must also collect Python extension modules - although + # we cannot separate normal dlls from Python extensions. + suffixes = [] + suffixes += importlib.machinery.EXTENSION_SUFFIXES[:] + suffixes += importlib.machinery.SOURCE_SUFFIXES[:] + suffixes += importlib.machinery.BYTECODE_SUFFIXES[:] + for dir in m.__path__: + try: + names = os.listdir(dir) + except OSError: + self.msg(2, "can't list directory", dir) + continue + for name in names: + mod = None + for suff in suffixes: + n = len(suff) + if name[-n:] == suff: + mod = name[:-n] + break + if mod and mod != "__init__": + modules[mod] = mod + return modules.keys() + + def import_module(self, partname, fqname, parent): + self.msgin(3, "import_module", partname, fqname, parent) + try: + m = self.modules[fqname] + except KeyError: + pass + else: + self.msgout(3, "import_module ->", m) + return m + if fqname in self.badmodules: + self.msgout(3, "import_module -> None") + return None + if parent and parent.__path__ is None: + self.msgout(3, "import_module -> None") + return None + try: + fp, pathname, stuff = self.find_module(partname, + parent and parent.__path__, parent) + except ImportError: + self.msgout(3, "import_module ->", None) + return None + + try: + m = self.load_module(fqname, fp, pathname, stuff) + finally: + if fp: + fp.close() + if parent: + setattr(parent, partname, m) + self.msgout(3, "import_module ->", m) + return m + + def load_module(self, fqname, fp, pathname, file_info): + suffix, mode, type = file_info + self.msgin(2, "load_module", fqname, fp and "fp", pathname) + if type == _PKG_DIRECTORY: + m = self.load_package(fqname, pathname) + self.msgout(2, "load_module ->", m) + return m + if type == _PY_SOURCE: + co = compile(fp.read(), pathname, 'exec') + elif type == _PY_COMPILED: + try: + data = fp.read() + importlib._bootstrap_external._classify_pyc(data, fqname, {}) + except ImportError as exc: + self.msgout(2, "raise ImportError: " + str(exc), pathname) + raise + co = marshal.loads(memoryview(data)[16:]) + else: + co = None + m = self.add_module(fqname) + m.__file__ = pathname + if co: + if self.replace_paths: + co = self.replace_paths_in_code(co) + m.__code__ = co + self.scan_code(co, m) + self.msgout(2, "load_module ->", m) + return m + + def _add_badmodule(self, name, caller): + if name not in self.badmodules: + self.badmodules[name] = {} + if caller: + self.badmodules[name][caller.__name__] = 1 + else: + self.badmodules[name]["-"] = 1 + + def _safe_import_hook(self, name, caller, fromlist, level=-1): + # wrapper for self.import_hook() that won't raise ImportError + if name in self.badmodules: + self._add_badmodule(name, caller) + return + try: + self.import_hook(name, caller, level=level) + except ImportError as msg: + self.msg(2, "ImportError:", str(msg)) + self._add_badmodule(name, caller) + except SyntaxError as msg: + self.msg(2, "SyntaxError:", str(msg)) + self._add_badmodule(name, caller) + else: + if fromlist: + for sub in fromlist: + fullname = name + "." + sub + if fullname in self.badmodules: + self._add_badmodule(fullname, caller) + continue + try: + self.import_hook(name, caller, [sub], level=level) + except ImportError as msg: + self.msg(2, "ImportError:", str(msg)) + self._add_badmodule(fullname, caller) + + def scan_opcodes(self, co): + # Scan the code, and yield 'interesting' opcode combinations + for name in dis._find_store_names(co): + yield "store", (name,) + for name, level, fromlist in dis._find_imports(co): + if level == 0: # absolute import + yield "absolute_import", (fromlist, name) + else: # relative import + yield "relative_import", (level, fromlist, name) + + def scan_code(self, co, m): + code = co.co_code + scanner = self.scan_opcodes + for what, args in scanner(co): + if what == "store": + name, = args + m.globalnames[name] = 1 + elif what == "absolute_import": + fromlist, name = args + have_star = 0 + if fromlist is not None: + if "*" in fromlist: + have_star = 1 + fromlist = [f for f in fromlist if f != "*"] + self._safe_import_hook(name, m, fromlist, level=0) + if have_star: + # We've encountered an "import *". If it is a Python module, + # the code has already been parsed and we can suck out the + # global names. + mm = None + if m.__path__: + # At this point we don't know whether 'name' is a + # submodule of 'm' or a global module. Let's just try + # the full name first. + mm = self.modules.get(m.__name__ + "." + name) + if mm is None: + mm = self.modules.get(name) + if mm is not None: + m.globalnames.update(mm.globalnames) + m.starimports.update(mm.starimports) + if mm.__code__ is None: + m.starimports[name] = 1 + else: + m.starimports[name] = 1 + elif what == "relative_import": + level, fromlist, name = args + if name: + self._safe_import_hook(name, m, fromlist, level=level) + else: + parent = self.determine_parent(m, level=level) + self._safe_import_hook(parent.__name__, None, fromlist, level=0) + else: + # We don't expect anything else from the generator. + raise RuntimeError(what) + + for c in co.co_consts: + if isinstance(c, type(co)): + self.scan_code(c, m) + + def load_package(self, fqname, pathname): + self.msgin(2, "load_package", fqname, pathname) + newname = replacePackageMap.get(fqname) + if newname: + fqname = newname + m = self.add_module(fqname) + m.__file__ = pathname + m.__path__ = [pathname] + + # As per comment at top of file, simulate runtime __path__ additions. + m.__path__ = m.__path__ + packagePathMap.get(fqname, []) + + fp, buf, stuff = self.find_module("__init__", m.__path__) + try: + self.load_module(fqname, fp, buf, stuff) + self.msgout(2, "load_package ->", m) + return m + finally: + if fp: + fp.close() + + def add_module(self, fqname): + if fqname in self.modules: + return self.modules[fqname] + self.modules[fqname] = m = Module(fqname) + return m + + def find_module(self, name, path, parent=None): + if parent is not None: + # assert path is not None + fullname = parent.__name__+'.'+name + else: + fullname = name + if fullname in self.excludes: + self.msgout(3, "find_module -> Excluded", fullname) + raise ImportError(name) + + if path is None: + if name in sys.builtin_module_names: + return (None, None, ("", "", _C_BUILTIN)) + + path = self.path + + return _find_module(name, path) + + def report(self): + """Print a report to stdout, listing the found modules with their + paths, as well as modules that are missing, or seem to be missing. + """ + print() + print(" %-25s %s" % ("Name", "File")) + print(" %-25s %s" % ("----", "----")) + # Print modules found + keys = sorted(self.modules.keys()) + for key in keys: + m = self.modules[key] + if m.__path__: + print("P", end=' ') + else: + print("m", end=' ') + print("%-25s" % key, m.__file__ or "") + + # Print missing modules + missing, maybe = self.any_missing_maybe() + if missing: + print() + print("Missing modules:") + for name in missing: + mods = sorted(self.badmodules[name].keys()) + print("?", name, "imported from", ', '.join(mods)) + # Print modules that may be missing, but then again, maybe not... + if maybe: + print() + print("Submodules that appear to be missing, but could also be", end=' ') + print("global names in the parent package:") + for name in maybe: + mods = sorted(self.badmodules[name].keys()) + print("?", name, "imported from", ', '.join(mods)) + + def any_missing(self): + """Return a list of modules that appear to be missing. Use + any_missing_maybe() if you want to know which modules are + certain to be missing, and which *may* be missing. + """ + missing, maybe = self.any_missing_maybe() + return missing + maybe + + def any_missing_maybe(self): + """Return two lists, one with modules that are certainly missing + and one with modules that *may* be missing. The latter names could + either be submodules *or* just global names in the package. + + The reason it can't always be determined is that it's impossible to + tell which names are imported when "from module import *" is done + with an extension module, short of actually importing it. + """ + missing = [] + maybe = [] + for name in self.badmodules: + if name in self.excludes: + continue + i = name.rfind(".") + if i < 0: + missing.append(name) + continue + subname = name[i+1:] + pkgname = name[:i] + pkg = self.modules.get(pkgname) + if pkg is not None: + if pkgname in self.badmodules[name]: + # The package tried to import this module itself and + # failed. It's definitely missing. + missing.append(name) + elif subname in pkg.globalnames: + # It's a global in the package: definitely not missing. + pass + elif pkg.starimports: + # It could be missing, but the package did an "import *" + # from a non-Python module, so we simply can't be sure. + maybe.append(name) + else: + # It's not a global in the package, the package didn't + # do funny star imports, it's very likely to be missing. + # The symbol could be inserted into the package from the + # outside, but since that's not good style we simply list + # it missing. + missing.append(name) + else: + missing.append(name) + missing.sort() + maybe.sort() + return missing, maybe + + def replace_paths_in_code(self, co): + new_filename = original_filename = os.path.normpath(co.co_filename) + for f, r in self.replace_paths: + if original_filename.startswith(f): + new_filename = r + original_filename[len(f):] + break + + if self.debug and original_filename not in self.processed_paths: + if new_filename != original_filename: + self.msgout(2, "co_filename %r changed to %r" \ + % (original_filename,new_filename,)) + else: + self.msgout(2, "co_filename %r remains unchanged" \ + % (original_filename,)) + self.processed_paths.append(original_filename) + + consts = list(co.co_consts) + for i in range(len(consts)): + if isinstance(consts[i], type(co)): + consts[i] = self.replace_paths_in_code(consts[i]) + + return co.replace(co_consts=tuple(consts), co_filename=new_filename) + + +def test(): + # Parse command line + import getopt + try: + opts, args = getopt.getopt(sys.argv[1:], "dmp:qx:") + except getopt.error as msg: + print(msg) + return + + # Process options + debug = 1 + domods = 0 + addpath = [] + exclude = [] + for o, a in opts: + if o == '-d': + debug = debug + 1 + if o == '-m': + domods = 1 + if o == '-p': + addpath = addpath + a.split(os.pathsep) + if o == '-q': + debug = 0 + if o == '-x': + exclude.append(a) + + # Provide default arguments + if not args: + script = "hello.py" + else: + script = args[0] + + # Set the path based on sys.path and the script directory + path = sys.path[:] + path[0] = os.path.dirname(script) + path = addpath + path + if debug > 1: + print("path:") + for item in path: + print(" ", repr(item)) + + # Create the module finder and turn its crank + mf = ModuleFinder(path, debug, exclude) + for arg in args[1:]: + if arg == '-m': + domods = 1 + continue + if domods: + if arg[-2:] == '.*': + mf.import_hook(arg[:-2], None, ["*"]) + else: + mf.import_hook(arg) + else: + mf.load_file(arg) + mf.run_script(script) + mf.report() + return mf # for -i debugging + + +if __name__ == '__main__': + try: + mf = test() + except KeyboardInterrupt: + print("\n[interrupted]") diff --git a/stdlib/pyclbr.py b/stdlib/pyclbr.py new file mode 100644 index 000000000..37f86995d --- /dev/null +++ b/stdlib/pyclbr.py @@ -0,0 +1,314 @@ +"""Parse a Python module and describe its classes and functions. + +Parse enough of a Python file to recognize imports and class and +function definitions, and to find out the superclasses of a class. + +The interface consists of a single function: + readmodule_ex(module, path=None) +where module is the name of a Python module, and path is an optional +list of directories where the module is to be searched. If present, +path is prepended to the system search path sys.path. The return value +is a dictionary. The keys of the dictionary are the names of the +classes and functions defined in the module (including classes that are +defined via the from XXX import YYY construct). The values are +instances of classes Class and Function. One special key/value pair is +present for packages: the key '__path__' has a list as its value which +contains the package search path. + +Classes and Functions have a common superclass: _Object. Every instance +has the following attributes: + module -- name of the module; + name -- name of the object; + file -- file in which the object is defined; + lineno -- line in the file where the object's definition starts; + end_lineno -- line in the file where the object's definition ends; + parent -- parent of this object, if any; + children -- nested objects contained in this object. +The 'children' attribute is a dictionary mapping names to objects. + +Instances of Function describe functions with the attributes from _Object, +plus the following: + is_async -- if a function is defined with an 'async' prefix + +Instances of Class describe classes with the attributes from _Object, +plus the following: + super -- list of super classes (Class instances if possible); + methods -- mapping of method names to beginning line numbers. +If the name of a super class is not recognized, the corresponding +entry in the list of super classes is not a class instance but a +string giving the name of the super class. Since import statements +are recognized and imported modules are scanned as well, this +shouldn't happen often. +""" + +import ast +import sys +import importlib.util + +__all__ = ["readmodule", "readmodule_ex", "Class", "Function"] + +_modules = {} # Initialize cache of modules we've seen. + + +class _Object: + "Information about Python class or function." + def __init__(self, module, name, file, lineno, end_lineno, parent): + self.module = module + self.name = name + self.file = file + self.lineno = lineno + self.end_lineno = end_lineno + self.parent = parent + self.children = {} + if parent is not None: + parent.children[name] = self + + +# Odd Function and Class signatures are for back-compatibility. +class Function(_Object): + "Information about a Python function, including methods." + def __init__(self, module, name, file, lineno, + parent=None, is_async=False, *, end_lineno=None): + super().__init__(module, name, file, lineno, end_lineno, parent) + self.is_async = is_async + if isinstance(parent, Class): + parent.methods[name] = lineno + + +class Class(_Object): + "Information about a Python class." + def __init__(self, module, name, super_, file, lineno, + parent=None, *, end_lineno=None): + super().__init__(module, name, file, lineno, end_lineno, parent) + self.super = super_ or [] + self.methods = {} + + +# These 2 functions are used in these tests +# Lib/test/test_pyclbr, Lib/idlelib/idle_test/test_browser.py +def _nest_function(ob, func_name, lineno, end_lineno, is_async=False): + "Return a Function after nesting within ob." + return Function(ob.module, func_name, ob.file, lineno, + parent=ob, is_async=is_async, end_lineno=end_lineno) + +def _nest_class(ob, class_name, lineno, end_lineno, super=None): + "Return a Class after nesting within ob." + return Class(ob.module, class_name, super, ob.file, lineno, + parent=ob, end_lineno=end_lineno) + + +def readmodule(module, path=None): + """Return Class objects for the top-level classes in module. + + This is the original interface, before Functions were added. + """ + + res = {} + for key, value in _readmodule(module, path or []).items(): + if isinstance(value, Class): + res[key] = value + return res + +def readmodule_ex(module, path=None): + """Return a dictionary with all functions and classes in module. + + Search for module in PATH + sys.path. + If possible, include imported superclasses. + Do this by reading source, without importing (and executing) it. + """ + return _readmodule(module, path or []) + + +def _readmodule(module, path, inpackage=None): + """Do the hard work for readmodule[_ex]. + + If inpackage is given, it must be the dotted name of the package in + which we are searching for a submodule, and then PATH must be the + package search path; otherwise, we are searching for a top-level + module, and path is combined with sys.path. + """ + # Compute the full module name (prepending inpackage if set). + if inpackage is not None: + fullmodule = "%s.%s" % (inpackage, module) + else: + fullmodule = module + + # Check in the cache. + if fullmodule in _modules: + return _modules[fullmodule] + + # Initialize the dict for this module's contents. + tree = {} + + # Check if it is a built-in module; we don't do much for these. + if module in sys.builtin_module_names and inpackage is None: + _modules[module] = tree + return tree + + # Check for a dotted module name. + i = module.rfind('.') + if i >= 0: + package = module[:i] + submodule = module[i+1:] + parent = _readmodule(package, path, inpackage) + if inpackage is not None: + package = "%s.%s" % (inpackage, package) + if not '__path__' in parent: + raise ImportError('No package named {}'.format(package)) + return _readmodule(submodule, parent['__path__'], package) + + # Search the path for the module. + f = None + if inpackage is not None: + search_path = path + else: + search_path = path + sys.path + spec = importlib.util._find_spec_from_path(fullmodule, search_path) + if spec is None: + raise ModuleNotFoundError(f"no module named {fullmodule!r}", name=fullmodule) + _modules[fullmodule] = tree + # Is module a package? + if spec.submodule_search_locations is not None: + tree['__path__'] = spec.submodule_search_locations + try: + source = spec.loader.get_source(fullmodule) + except (AttributeError, ImportError): + # If module is not Python source, we cannot do anything. + return tree + else: + if source is None: + return tree + + fname = spec.loader.get_filename(fullmodule) + return _create_tree(fullmodule, path, fname, source, tree, inpackage) + + +class _ModuleBrowser(ast.NodeVisitor): + def __init__(self, module, path, file, tree, inpackage): + self.path = path + self.tree = tree + self.file = file + self.module = module + self.inpackage = inpackage + self.stack = [] + + def visit_ClassDef(self, node): + bases = [] + for base in node.bases: + name = ast.unparse(base) + if name in self.tree: + # We know this super class. + bases.append(self.tree[name]) + elif len(names := name.split(".")) > 1: + # Super class form is module.class: + # look in module for class. + *_, module, class_ = names + if module in _modules: + bases.append(_modules[module].get(class_, name)) + else: + bases.append(name) + + parent = self.stack[-1] if self.stack else None + class_ = Class(self.module, node.name, bases, self.file, node.lineno, + parent=parent, end_lineno=node.end_lineno) + if parent is None: + self.tree[node.name] = class_ + self.stack.append(class_) + self.generic_visit(node) + self.stack.pop() + + def visit_FunctionDef(self, node, *, is_async=False): + parent = self.stack[-1] if self.stack else None + function = Function(self.module, node.name, self.file, node.lineno, + parent, is_async, end_lineno=node.end_lineno) + if parent is None: + self.tree[node.name] = function + self.stack.append(function) + self.generic_visit(node) + self.stack.pop() + + def visit_AsyncFunctionDef(self, node): + self.visit_FunctionDef(node, is_async=True) + + def visit_Import(self, node): + if node.col_offset != 0: + return + + for module in node.names: + try: + try: + _readmodule(module.name, self.path, self.inpackage) + except ImportError: + _readmodule(module.name, []) + except (ImportError, SyntaxError): + # If we can't find or parse the imported module, + # too bad -- don't die here. + continue + + def visit_ImportFrom(self, node): + if node.col_offset != 0: + return + try: + module = "." * node.level + if node.module: + module += node.module + module = _readmodule(module, self.path, self.inpackage) + except (ImportError, SyntaxError): + return + + for name in node.names: + if name.name in module: + self.tree[name.asname or name.name] = module[name.name] + elif name.name == "*": + for import_name, import_value in module.items(): + if import_name.startswith("_"): + continue + self.tree[import_name] = import_value + + +def _create_tree(fullmodule, path, fname, source, tree, inpackage): + mbrowser = _ModuleBrowser(fullmodule, path, fname, tree, inpackage) + mbrowser.visit(ast.parse(source)) + return mbrowser.tree + + +def _main(): + "Print module output (default this file) for quick visual check." + import os + try: + mod = sys.argv[1] + except: + mod = __file__ + if os.path.exists(mod): + path = [os.path.dirname(mod)] + mod = os.path.basename(mod) + if mod.lower().endswith(".py"): + mod = mod[:-3] + else: + path = [] + tree = readmodule_ex(mod, path) + lineno_key = lambda a: getattr(a, 'lineno', 0) + objs = sorted(tree.values(), key=lineno_key, reverse=True) + indent_level = 2 + while objs: + obj = objs.pop() + if isinstance(obj, list): + # Value is a __path__ key. + continue + if not hasattr(obj, 'indent'): + obj.indent = 0 + + if isinstance(obj, _Object): + new_objs = sorted(obj.children.values(), + key=lineno_key, reverse=True) + for ob in new_objs: + ob.indent = obj.indent + indent_level + objs.extend(new_objs) + if isinstance(obj, Class): + print("{}class {} {} {}" + .format(' ' * obj.indent, obj.name, obj.super, obj.lineno)) + elif isinstance(obj, Function): + print("{}def {} {}".format(' ' * obj.indent, obj.name, obj.lineno)) + +if __name__ == "__main__": + _main() diff --git a/stdlib/site.py b/stdlib/site.py new file mode 100644 index 000000000..aeb7c6cfc --- /dev/null +++ b/stdlib/site.py @@ -0,0 +1,779 @@ +"""Append module search paths for third-party packages to sys.path. + +**************************************************************** +* This module is automatically imported during initialization. * +**************************************************************** + +This will append site-specific paths to the module search path. On +Unix (including Mac OSX), it starts with sys.prefix and +sys.exec_prefix (if different) and appends +lib/python/site-packages. +On other platforms (such as Windows), it tries each of the +prefixes directly, as well as with lib/site-packages appended. The +resulting directories, if they exist, are appended to sys.path, and +also inspected for path configuration files. + +If a file named "pyvenv.cfg" exists one directory above sys.executable, +sys.prefix and sys.exec_prefix are set to that directory and +it is also checked for site-packages (sys.base_prefix and +sys.base_exec_prefix will always be the "real" prefixes of the Python +installation). If "pyvenv.cfg" (a bootstrap configuration file) contains +the key "include-system-site-packages" set to anything other than "false" +(case-insensitive), the system-level prefixes will still also be +searched for site-packages; otherwise they won't. + +All of the resulting site-specific directories, if they exist, are +appended to sys.path, and also inspected for path configuration +files. + +A path configuration file is a file whose name has the form +.pth; its contents are additional directories (one per line) +to be added to sys.path. Non-existing directories (or +non-directories) are never added to sys.path; no directory is added to +sys.path more than once. Blank lines and lines beginning with +'#' are skipped. Lines starting with 'import' are executed. + +For example, suppose sys.prefix and sys.exec_prefix are set to +/usr/local and there is a directory /usr/local/lib/python2.5/site-packages +with three subdirectories, foo, bar and spam, and two path +configuration files, foo.pth and bar.pth. Assume foo.pth contains the +following: + + # foo package configuration + foo + bar + bletch + +and bar.pth contains: + + # bar package configuration + bar + +Then the following directories are added to sys.path, in this order: + + /usr/local/lib/python2.5/site-packages/bar + /usr/local/lib/python2.5/site-packages/foo + +Note that bletch is omitted because it doesn't exist; bar precedes foo +because bar.pth comes alphabetically before foo.pth; and spam is +omitted because it is not mentioned in either path configuration file. + +The readline module is also automatically configured to enable +completion for systems that support it. This can be overridden in +sitecustomize, usercustomize or PYTHONSTARTUP. Starting Python in +isolated mode (-I) disables automatic readline configuration. + +After these operations, an attempt is made to import a module +named sitecustomize, which can perform arbitrary additional +site-specific customizations. If this import fails with an +ImportError exception, it is silently ignored. +""" + +import sys +import os +import builtins +import _sitebuiltins +import _io as io +import stat +import errno + +# Prefixes for site-packages; add additional prefixes like /usr/local here +PREFIXES = [sys.prefix, sys.exec_prefix] +# Enable per user site-packages directory +# set it to False to disable the feature or True to force the feature +ENABLE_USER_SITE = None + +# for distutils.commands.install +# These values are initialized by the getuserbase() and getusersitepackages() +# functions, through the main() function when Python starts. +USER_SITE = None +USER_BASE = None + + +def _trace(message): + if sys.flags.verbose: + print(message, file=sys.stderr) + + +def _warn(*args, **kwargs): + import warnings + + warnings.warn(*args, **kwargs) + + +def makepath(*paths): + dir = os.path.join(*paths) + try: + dir = os.path.abspath(dir) + except OSError: + pass + return dir, os.path.normcase(dir) + + +def abs_paths(): + """Set all module __file__ and __cached__ attributes to an absolute path""" + for m in set(sys.modules.values()): + loader_module = None + try: + loader_module = m.__loader__.__module__ + except AttributeError: + try: + loader_module = m.__spec__.loader.__module__ + except AttributeError: + pass + if loader_module not in {'_frozen_importlib', '_frozen_importlib_external'}: + continue # don't mess with a PEP 302-supplied __file__ + try: + m.__file__ = os.path.abspath(m.__file__) + except (AttributeError, OSError, TypeError): + pass + try: + m.__cached__ = os.path.abspath(m.__cached__) + except (AttributeError, OSError, TypeError): + pass + + +def removeduppaths(): + """ Remove duplicate entries from sys.path along with making them + absolute""" + # This ensures that the initial path provided by the interpreter contains + # only absolute pathnames, even if we're running from the build directory. + L = [] + known_paths = set() + for dir in sys.path: + # Filter out duplicate paths (on case-insensitive file systems also + # if they only differ in case); turn relative paths into absolute + # paths. + dir, dircase = makepath(dir) + if dircase not in known_paths: + L.append(dir) + known_paths.add(dircase) + sys.path[:] = L + return known_paths + + +def _init_pathinfo(): + """Return a set containing all existing file system items from sys.path.""" + d = set() + for item in sys.path: + try: + if os.path.exists(item): + _, itemcase = makepath(item) + d.add(itemcase) + except TypeError: + continue + return d + + +def addpackage(sitedir, name, known_paths): + """Process a .pth file within the site-packages directory: + For each line in the file, either combine it with sitedir to a path + and add that to known_paths, or execute it if it starts with 'import '. + """ + if known_paths is None: + known_paths = _init_pathinfo() + reset = True + else: + reset = False + fullname = os.path.join(sitedir, name) + try: + st = os.lstat(fullname) + except OSError: + return + if ((getattr(st, 'st_flags', 0) & stat.UF_HIDDEN) or + (getattr(st, 'st_file_attributes', 0) & stat.FILE_ATTRIBUTE_HIDDEN)): + _trace(f"Skipping hidden .pth file: {fullname!r}") + return + _trace(f"Processing .pth file: {fullname!r}") + try: + with io.open_code(fullname) as f: + pth_content = f.read() + except OSError: + return + + try: + # Accept BOM markers in .pth files as we do in source files + # (Windows PowerShell 5.1 makes it hard to emit UTF-8 files without a BOM) + pth_content = pth_content.decode("utf-8-sig") + except UnicodeDecodeError: + # Fallback to locale encoding for backward compatibility. + # We will deprecate this fallback in the future. + import locale + pth_content = pth_content.decode(locale.getencoding()) + _trace(f"Cannot read {fullname!r} as UTF-8. " + f"Using fallback encoding {locale.getencoding()!r}") + + for n, line in enumerate(pth_content.splitlines(), 1): + if line.startswith("#"): + continue + if line.strip() == "": + continue + try: + if line.startswith(("import ", "import\t")): + exec(line) + continue + line = line.rstrip() + dir, dircase = makepath(sitedir, line) + if dircase not in known_paths and os.path.exists(dir): + sys.path.append(dir) + known_paths.add(dircase) + except Exception as exc: + print(f"Error processing line {n:d} of {fullname}:\n", + file=sys.stderr) + import traceback + for record in traceback.format_exception(exc): + for line in record.splitlines(): + print(' '+line, file=sys.stderr) + print("\nRemainder of file ignored", file=sys.stderr) + break + if reset: + known_paths = None + return known_paths + + +def addsitedir(sitedir, known_paths=None): + """Add 'sitedir' argument to sys.path if missing and handle .pth files in + 'sitedir'""" + _trace(f"Adding directory: {sitedir!r}") + if known_paths is None: + known_paths = _init_pathinfo() + reset = True + else: + reset = False + sitedir, sitedircase = makepath(sitedir) + if not sitedircase in known_paths: + sys.path.append(sitedir) # Add path component + known_paths.add(sitedircase) + try: + names = os.listdir(sitedir) + except OSError: + return + names = [name for name in names + if name.endswith(".pth") and not name.startswith(".")] + for name in sorted(names): + addpackage(sitedir, name, known_paths) + if reset: + known_paths = None + return known_paths + + +def check_enableusersite(): + """Check if user site directory is safe for inclusion + + The function tests for the command line flag (including environment var), + process uid/gid equal to effective uid/gid. + + None: Disabled for security reasons + False: Disabled by user (command line option) + True: Safe and enabled + """ + if sys.flags.no_user_site: + return False + + if hasattr(os, "getuid") and hasattr(os, "geteuid"): + # check process uid == effective uid + if os.geteuid() != os.getuid(): + return None + if hasattr(os, "getgid") and hasattr(os, "getegid"): + # check process gid == effective gid + if os.getegid() != os.getgid(): + return None + + return True + + +# NOTE: sysconfig and it's dependencies are relatively large but site module +# needs very limited part of them. +# To speedup startup time, we have copy of them. +# +# See https://bugs.python.org/issue29585 + +# Copy of sysconfig._get_implementation() +def _get_implementation(): + return 'Python' + +# Copy of sysconfig._getuserbase() +def _getuserbase(): + env_base = os.environ.get("PYTHONUSERBASE", None) + if env_base: + return env_base + + # Emscripten, iOS, tvOS, VxWorks, WASI, and watchOS have no home directories + if sys.platform in {"emscripten", "ios", "tvos", "vxworks", "wasi", "watchos"}: + return None + + def joinuser(*args): + return os.path.expanduser(os.path.join(*args)) + + if os.name == "nt": + base = os.environ.get("APPDATA") or "~" + return joinuser(base, _get_implementation()) + + if sys.platform == "darwin" and sys._framework: + return joinuser("~", "Library", sys._framework, + "%d.%d" % sys.version_info[:2]) + + return joinuser("~", ".local") + + +# Same to sysconfig.get_path('purelib', os.name+'_user') +def _get_path(userbase): + version = sys.version_info + if hasattr(sys, 'abiflags') and 't' in sys.abiflags: + abi_thread = 't' + else: + abi_thread = '' + + implementation = _get_implementation() + implementation_lower = implementation.lower() + if os.name == 'nt': + ver_nodot = sys.winver.replace('.', '') + return f'{userbase}\\{implementation}{ver_nodot}\\site-packages' + + if sys.platform == 'darwin' and sys._framework: + return f'{userbase}/lib/{implementation_lower}/site-packages' + + return f'{userbase}/lib/python{version[0]}.{version[1]}{abi_thread}/site-packages' + + +def getuserbase(): + """Returns the `user base` directory path. + + The `user base` directory can be used to store data. If the global + variable ``USER_BASE`` is not initialized yet, this function will also set + it. + """ + global USER_BASE + if USER_BASE is None: + USER_BASE = _getuserbase() + return USER_BASE + + +def getusersitepackages(): + """Returns the user-specific site-packages directory path. + + If the global variable ``USER_SITE`` is not initialized yet, this + function will also set it. + """ + global USER_SITE, ENABLE_USER_SITE + userbase = getuserbase() # this will also set USER_BASE + + if USER_SITE is None: + if userbase is None: + ENABLE_USER_SITE = False # disable user site and return None + else: + USER_SITE = _get_path(userbase) + + return USER_SITE + +def addusersitepackages(known_paths): + """Add a per user site-package to sys.path + + Each user has its own python directory with site-packages in the + home directory. + """ + # get the per user site-package path + # this call will also make sure USER_BASE and USER_SITE are set + _trace("Processing user site-packages") + user_site = getusersitepackages() + + if ENABLE_USER_SITE and os.path.isdir(user_site): + addsitedir(user_site, known_paths) + return known_paths + +def getsitepackages(prefixes=None): + """Returns a list containing all global site-packages directories. + + For each directory present in ``prefixes`` (or the global ``PREFIXES``), + this function will find its `site-packages` subdirectory depending on the + system environment, and will return a list of full paths. + """ + sitepackages = [] + seen = set() + + if prefixes is None: + prefixes = PREFIXES + + for prefix in prefixes: + if not prefix or prefix in seen: + continue + seen.add(prefix) + + implementation = _get_implementation().lower() + ver = sys.version_info + if hasattr(sys, 'abiflags') and 't' in sys.abiflags: + abi_thread = 't' + else: + abi_thread = '' + if os.sep == '/': + libdirs = [sys.platlibdir] + if sys.platlibdir != "lib": + libdirs.append("lib") + + for libdir in libdirs: + path = os.path.join(prefix, libdir, + f"{implementation}{ver[0]}.{ver[1]}{abi_thread}", + "site-packages") + sitepackages.append(path) + else: + sitepackages.append(prefix) + sitepackages.append(os.path.join(prefix, "Lib", "site-packages")) + return sitepackages + +def addsitepackages(known_paths, prefixes=None): + """Add site-packages to sys.path""" + _trace("Processing global site-packages") + for sitedir in getsitepackages(prefixes): + if os.path.isdir(sitedir): + addsitedir(sitedir, known_paths) + + return known_paths + +def setquit(): + """Define new builtins 'quit' and 'exit'. + + These are objects which make the interpreter exit when called. + The repr of each object contains a hint at how it works. + + """ + if os.sep == '\\': + eof = 'Ctrl-Z plus Return' + else: + eof = 'Ctrl-D (i.e. EOF)' + + builtins.quit = _sitebuiltins.Quitter('quit', eof) + builtins.exit = _sitebuiltins.Quitter('exit', eof) + + +def setcopyright(): + """Set 'copyright' and 'credits' in builtins""" + builtins.copyright = _sitebuiltins._Printer("copyright", sys.copyright) + builtins.credits = _sitebuiltins._Printer("credits", """\ +Thanks to CWI, CNRI, BeOpen, Zope Corporation, the Python Software +Foundation, and a cast of thousands for supporting Python +development. See www.python.org for more information.""") + files, dirs = [], [] + # Not all modules are required to have a __file__ attribute. See + # PEP 420 for more details. + here = getattr(sys, '_stdlib_dir', None) + if not here and hasattr(os, '__file__'): + here = os.path.dirname(os.__file__) + if here: + files.extend(["LICENSE.txt", "LICENSE"]) + dirs.extend([os.path.join(here, os.pardir), here, os.curdir]) + builtins.license = _sitebuiltins._Printer( + "license", + "See https://www.python.org/psf/license/", + files, dirs) + + +def sethelper(): + builtins.help = _sitebuiltins._Helper() + + +def gethistoryfile(): + """Check if the PYTHON_HISTORY environment variable is set and define + it as the .python_history file. If PYTHON_HISTORY is not set, use the + default .python_history file. + """ + if not sys.flags.ignore_environment: + history = os.environ.get("PYTHON_HISTORY") + if history: + return history + return os.path.join(os.path.expanduser('~'), + '.python_history') + + +def enablerlcompleter(): + """Enable default readline configuration on interactive prompts, by + registering a sys.__interactivehook__. + """ + sys.__interactivehook__ = register_readline + + +def register_readline(): + """Configure readline completion on interactive prompts. + + If the readline module can be imported, the hook will set the Tab key + as completion key and register ~/.python_history as history file. + This can be overridden in the sitecustomize or usercustomize module, + or in a PYTHONSTARTUP file. + """ + if not sys.flags.ignore_environment: + PYTHON_BASIC_REPL = os.getenv("PYTHON_BASIC_REPL") + else: + PYTHON_BASIC_REPL = False + + import atexit + + try: + try: + import readline + except ImportError: + readline = None + else: + import rlcompleter # noqa: F401 + except ImportError: + return + + try: + if PYTHON_BASIC_REPL: + CAN_USE_PYREPL = False + else: + original_path = sys.path + sys.path = [p for p in original_path if p != ''] + try: + import _pyrepl.readline + if os.name == "nt": + import _pyrepl.windows_console + console_errors = (_pyrepl.windows_console._error,) + else: + import _pyrepl.unix_console + console_errors = _pyrepl.unix_console._error + from _pyrepl.main import CAN_USE_PYREPL + finally: + sys.path = original_path + except ImportError: + return + + if readline is not None: + # Reading the initialization (config) file may not be enough to set a + # completion key, so we set one first and then read the file. + if readline.backend == 'editline': + readline.parse_and_bind('bind ^I rl_complete') + else: + readline.parse_and_bind('tab: complete') + + try: + readline.read_init_file() + except OSError: + # An OSError here could have many causes, but the most likely one + # is that there's no .inputrc file (or .editrc file in the case of + # Mac OS X + libedit) in the expected location. In that case, we + # want to ignore the exception. + pass + + if readline is None or readline.get_current_history_length() == 0: + # If no history was loaded, default to .python_history, + # or PYTHON_HISTORY. + # The guard is necessary to avoid doubling history size at + # each interpreter exit when readline was already configured + # through a PYTHONSTARTUP hook, see: + # http://bugs.python.org/issue5845#msg198636 + history = gethistoryfile() + + if CAN_USE_PYREPL: + readline_module = _pyrepl.readline + exceptions = (OSError, *console_errors) + else: + if readline is None: + return + readline_module = readline + exceptions = OSError + + try: + readline_module.read_history_file(history) + except exceptions: + pass + + def write_history(): + try: + readline_module.write_history_file(history) + except FileNotFoundError, PermissionError: + # home directory does not exist or is not writable + # https://bugs.python.org/issue19891 + pass + except OSError: + if errno.EROFS: + pass # gh-128066: read-only file system + else: + raise + + atexit.register(write_history) + + +def venv(known_paths): + global PREFIXES, ENABLE_USER_SITE + + env = os.environ + if sys.platform == 'darwin' and '__PYVENV_LAUNCHER__' in env: + executable = sys._base_executable = os.environ['__PYVENV_LAUNCHER__'] + else: + executable = sys.executable + exe_dir = os.path.dirname(os.path.abspath(executable)) + site_prefix = os.path.dirname(exe_dir) + sys._home = None + conf_basename = 'pyvenv.cfg' + candidate_conf = next( + ( + conffile for conffile in ( + os.path.join(exe_dir, conf_basename), + os.path.join(site_prefix, conf_basename) + ) + if os.path.isfile(conffile) + ), + None + ) + + if candidate_conf: + virtual_conf = candidate_conf + system_site = "true" + # Issue 25185: Use UTF-8, as that's what the venv module uses when + # writing the file. + with open(virtual_conf, encoding='utf-8') as f: + for line in f: + if '=' in line: + key, _, value = line.partition('=') + key = key.strip().lower() + value = value.strip() + if key == 'include-system-site-packages': + system_site = value.lower() + elif key == 'home': + sys._home = value + + if sys.prefix != site_prefix: + _warn(f'Unexpected value in sys.prefix, expected {site_prefix}, got {sys.prefix}', RuntimeWarning) + if sys.exec_prefix != site_prefix: + _warn(f'Unexpected value in sys.exec_prefix, expected {site_prefix}, got {sys.exec_prefix}', RuntimeWarning) + + # Doing this here ensures venv takes precedence over user-site + addsitepackages(known_paths, [sys.prefix]) + + if system_site == "true": + PREFIXES += [sys.base_prefix, sys.base_exec_prefix] + else: + ENABLE_USER_SITE = False + + return known_paths + + +def execsitecustomize(): + """Run custom site specific code, if available.""" + try: + try: + import sitecustomize # noqa: F401 + except ImportError as exc: + if exc.name == 'sitecustomize': + pass + else: + raise + except Exception as err: + if sys.flags.verbose: + sys.excepthook(*sys.exc_info()) + else: + sys.stderr.write( + "Error in sitecustomize; set PYTHONVERBOSE for traceback:\n" + "%s: %s\n" % + (err.__class__.__name__, err)) + + +def execusercustomize(): + """Run custom user specific code, if available.""" + try: + try: + import usercustomize # noqa: F401 + except ImportError as exc: + if exc.name == 'usercustomize': + pass + else: + raise + except Exception as err: + if sys.flags.verbose: + sys.excepthook(*sys.exc_info()) + else: + sys.stderr.write( + "Error in usercustomize; set PYTHONVERBOSE for traceback:\n" + "%s: %s\n" % + (err.__class__.__name__, err)) + + +def main(): + """Add standard site-specific directories to the module search path. + + This function is called automatically when this module is imported, + unless the python interpreter was started with the -S flag. + """ + global ENABLE_USER_SITE + + orig_path = sys.path[:] + known_paths = removeduppaths() + if orig_path != sys.path: + # removeduppaths() might make sys.path absolute. + # fix __file__ and __cached__ of already imported modules too. + abs_paths() + + known_paths = venv(known_paths) + if ENABLE_USER_SITE is None: + ENABLE_USER_SITE = check_enableusersite() + known_paths = addusersitepackages(known_paths) + known_paths = addsitepackages(known_paths) + setquit() + setcopyright() + sethelper() + if not sys.flags.isolated: + enablerlcompleter() + execsitecustomize() + if ENABLE_USER_SITE: + execusercustomize() + +# Prevent extending of sys.path when python was started with -S and +# site is imported later. +if not sys.flags.no_site: + main() + +def _script(): + help = """\ + %s [--user-base] [--user-site] + + Without arguments print some useful information + With arguments print the value of USER_BASE and/or USER_SITE separated + by '%s'. + + Exit codes with --user-base or --user-site: + 0 - user site directory is enabled + 1 - user site directory is disabled by user + 2 - user site directory is disabled by super user + or for security reasons + >2 - unknown error + """ + args = sys.argv[1:] + if not args: + user_base = getuserbase() + user_site = getusersitepackages() + print("sys.path = [") + for dir in sys.path: + print(" %r," % (dir,)) + print("]") + def exists(path): + if path is not None and os.path.isdir(path): + return "exists" + else: + return "doesn't exist" + print(f"USER_BASE: {user_base!r} ({exists(user_base)})") + print(f"USER_SITE: {user_site!r} ({exists(user_site)})") + print(f"ENABLE_USER_SITE: {ENABLE_USER_SITE!r}") + sys.exit(0) + + buffer = [] + if '--user-base' in args: + buffer.append(USER_BASE) + if '--user-site' in args: + buffer.append(USER_SITE) + + if buffer: + print(os.pathsep.join(buffer)) + if ENABLE_USER_SITE: + sys.exit(0) + elif ENABLE_USER_SITE is False: + sys.exit(1) + elif ENABLE_USER_SITE is None: + sys.exit(2) + else: + sys.exit(3) + else: + import textwrap + print(textwrap.dedent(help % (sys.argv[0], os.pathsep))) + sys.exit(10) + +if __name__ == '__main__': + _script() diff --git a/stdlib/sre_compile.py b/stdlib/sre_compile.py new file mode 100644 index 000000000..f9da61e64 --- /dev/null +++ b/stdlib/sre_compile.py @@ -0,0 +1,7 @@ +import warnings +warnings.warn(f"module {__name__!r} is deprecated", + DeprecationWarning, + stacklevel=2) + +from re import _compiler as _ +globals().update({k: v for k, v in vars(_).items() if k[:2] != '__'}) diff --git a/stdlib/sre_constants.py b/stdlib/sre_constants.py new file mode 100644 index 000000000..fa09d0442 --- /dev/null +++ b/stdlib/sre_constants.py @@ -0,0 +1,7 @@ +import warnings +warnings.warn(f"module {__name__!r} is deprecated", + DeprecationWarning, + stacklevel=2) + +from re import _constants as _ +globals().update({k: v for k, v in vars(_).items() if k[:2] != '__'}) diff --git a/stdlib/sre_parse.py b/stdlib/sre_parse.py new file mode 100644 index 000000000..25a3f557d --- /dev/null +++ b/stdlib/sre_parse.py @@ -0,0 +1,7 @@ +import warnings +warnings.warn(f"module {__name__!r} is deprecated", + DeprecationWarning, + stacklevel=2) + +from re import _parser as _ +globals().update({k: v for k, v in vars(_).items() if k[:2] != '__'}) diff --git a/stdlib/test/lock_tests.py b/stdlib/test/lock_tests.py new file mode 100644 index 000000000..fb11f4828 --- /dev/null +++ b/stdlib/test/lock_tests.py @@ -0,0 +1,1260 @@ +""" +Various tests for synchronization primitives. +""" + +import gc +import sys +import time +from _thread import start_new_thread, TIMEOUT_MAX +import threading +import unittest +import weakref + +from test import support +from test.support import threading_helper + + +requires_fork = unittest.skipUnless(support.has_fork_support, + "platform doesn't support fork " + "(no _at_fork_reinit method)") + + +def wait_threads_blocked(nthread): + # Arbitrary sleep to wait until N threads are blocked, + # like waiting for a lock. + time.sleep(0.010 * nthread) + + +class Bunch(object): + """ + A bunch of threads. + """ + def __init__(self, func, nthread, wait_before_exit=False): + """ + Construct a bunch of `nthread` threads running the same function `func`. + If `wait_before_exit` is True, the threads won't terminate until + do_finish() is called. + """ + self.func = func + self.nthread = nthread + self.started = [] + self.finished = [] + self.exceptions = [] + self._can_exit = not wait_before_exit + self._wait_thread = None + + def task(self): + tid = threading.get_ident() + self.started.append(tid) + try: + self.func() + except BaseException as exc: + self.exceptions.append(exc) + finally: + self.finished.append(tid) + for _ in support.sleeping_retry(support.SHORT_TIMEOUT): + if self._can_exit: + break + + def __enter__(self): + self._wait_thread = threading_helper.wait_threads_exit(support.SHORT_TIMEOUT) + self._wait_thread.__enter__() + + try: + for _ in range(self.nthread): + start_new_thread(self.task, ()) + except: + self._can_exit = True + raise + + for _ in support.sleeping_retry(support.SHORT_TIMEOUT): + if len(self.started) >= self.nthread: + break + + return self + + def __exit__(self, exc_type, exc_value, traceback): + for _ in support.sleeping_retry(support.SHORT_TIMEOUT): + if len(self.finished) >= self.nthread: + break + + # Wait until threads completely exit according to _thread._count() + self._wait_thread.__exit__(None, None, None) + + # Break reference cycle + exceptions = self.exceptions + self.exceptions = None + if exceptions: + raise ExceptionGroup(f"{self.func} threads raised exceptions", + exceptions) + + def do_finish(self): + self._can_exit = True + + +class BaseTestCase(unittest.TestCase): + def setUp(self): + self._threads = threading_helper.threading_setup() + + def tearDown(self): + threading_helper.threading_cleanup(*self._threads) + support.reap_children() + + def assertTimeout(self, actual, expected): + # The waiting and/or time.monotonic() can be imprecise, which + # is why comparing to the expected value would sometimes fail + # (especially under Windows). + self.assertGreaterEqual(actual, expected * 0.6) + # Test nothing insane happened + self.assertLess(actual, expected * 10.0) + + +class BaseLockTests(BaseTestCase): + """ + Tests for both recursive and non-recursive locks. + """ + + def wait_phase(self, phase, expected): + for _ in support.sleeping_retry(support.SHORT_TIMEOUT): + if len(phase) >= expected: + break + self.assertEqual(len(phase), expected) + + def test_constructor(self): + lock = self.locktype() + del lock + + def test_repr(self): + lock = self.locktype() + self.assertRegex(repr(lock), "") + del lock + + def test_locked_repr(self): + lock = self.locktype() + lock.acquire() + self.assertRegex(repr(lock), "") + del lock + + def test_acquire_destroy(self): + lock = self.locktype() + lock.acquire() + del lock + + def test_acquire_release(self): + lock = self.locktype() + lock.acquire() + lock.release() + del lock + + def test_try_acquire(self): + lock = self.locktype() + self.assertTrue(lock.acquire(False)) + lock.release() + + def test_try_acquire_contended(self): + lock = self.locktype() + lock.acquire() + result = [] + def f(): + result.append(lock.acquire(False)) + with Bunch(f, 1): + pass + self.assertFalse(result[0]) + lock.release() + + def test_acquire_contended(self): + lock = self.locktype() + lock.acquire() + def f(): + lock.acquire() + lock.release() + + N = 5 + with Bunch(f, N) as bunch: + # Threads block on lock.acquire() + wait_threads_blocked(N) + self.assertEqual(len(bunch.finished), 0) + + # Threads unblocked + lock.release() + + self.assertEqual(len(bunch.finished), N) + + def test_with(self): + lock = self.locktype() + def f(): + lock.acquire() + lock.release() + + def with_lock(err=None): + with lock: + if err is not None: + raise err + + # Acquire the lock, do nothing, with releases the lock + with lock: + pass + + # Check that the lock is unacquired + with Bunch(f, 1): + pass + + # Acquire the lock, raise an exception, with releases the lock + with self.assertRaises(TypeError): + with lock: + raise TypeError + + # Check that the lock is unacquired even if after an exception + # was raised in the previous "with lock:" block + with Bunch(f, 1): + pass + + def test_thread_leak(self): + # The lock shouldn't leak a Thread instance when used from a foreign + # (non-threading) thread. + lock = self.locktype() + def f(): + lock.acquire() + lock.release() + + # We run many threads in the hope that existing threads ids won't + # be recycled. + with Bunch(f, 15): + pass + + def test_timeout(self): + lock = self.locktype() + # Can't set timeout if not blocking + self.assertRaises(ValueError, lock.acquire, False, 1) + # Invalid timeout values + self.assertRaises(ValueError, lock.acquire, timeout=-100) + self.assertRaises(OverflowError, lock.acquire, timeout=1e100) + self.assertRaises(OverflowError, lock.acquire, timeout=TIMEOUT_MAX + 1) + # TIMEOUT_MAX is ok + lock.acquire(timeout=TIMEOUT_MAX) + lock.release() + t1 = time.monotonic() + self.assertTrue(lock.acquire(timeout=5)) + t2 = time.monotonic() + # Just a sanity test that it didn't actually wait for the timeout. + self.assertLess(t2 - t1, 5) + results = [] + def f(): + t1 = time.monotonic() + results.append(lock.acquire(timeout=0.5)) + t2 = time.monotonic() + results.append(t2 - t1) + with Bunch(f, 1): + pass + self.assertFalse(results[0]) + self.assertTimeout(results[1], 0.5) + + def test_weakref_exists(self): + lock = self.locktype() + ref = weakref.ref(lock) + self.assertIsNotNone(ref()) + + def test_weakref_deleted(self): + lock = self.locktype() + ref = weakref.ref(lock) + del lock + gc.collect() # For PyPy or other GCs. + self.assertIsNone(ref()) + + +class LockTests(BaseLockTests): + """ + Tests for non-recursive, weak locks + (which can be acquired and released from different threads). + """ + def test_reacquire(self): + # Lock needs to be released before re-acquiring. + lock = self.locktype() + phase = [] + + def f(): + lock.acquire() + phase.append(None) + lock.acquire() + phase.append(None) + + with threading_helper.wait_threads_exit(): + # Thread blocked on lock.acquire() + start_new_thread(f, ()) + self.wait_phase(phase, 1) + + # Thread unblocked + lock.release() + self.wait_phase(phase, 2) + + def test_different_thread(self): + # Lock can be released from a different thread. + lock = self.locktype() + lock.acquire() + def f(): + lock.release() + with Bunch(f, 1): + pass + lock.acquire() + lock.release() + + def test_state_after_timeout(self): + # Issue #11618: check that lock is in a proper state after a + # (non-zero) timeout. + lock = self.locktype() + lock.acquire() + self.assertFalse(lock.acquire(timeout=0.01)) + lock.release() + self.assertFalse(lock.locked()) + self.assertTrue(lock.acquire(blocking=False)) + + @requires_fork + def test_at_fork_reinit(self): + def use_lock(lock): + # make sure that the lock still works normally + # after _at_fork_reinit() + lock.acquire() + lock.release() + + # unlocked + lock = self.locktype() + lock._at_fork_reinit() + use_lock(lock) + + # locked: _at_fork_reinit() resets the lock to the unlocked state + lock2 = self.locktype() + lock2.acquire() + lock2._at_fork_reinit() + use_lock(lock2) + + +class RLockTests(BaseLockTests): + """ + Tests for recursive locks. + """ + def test_repr_count(self): + # see gh-134322: check that count values are correct: + # when a rlock is just created, + # in a second thread when rlock is acquired in the main thread. + lock = self.locktype() + self.assertIn("count=0", repr(lock)) + self.assertIn("") + evt.set() + self.assertRegex(repr(evt), r"<\w+\.Event at .*: set>") + + +class ConditionTests(BaseTestCase): + """ + Tests for condition variables. + """ + + def test_acquire(self): + cond = self.condtype() + # Be default we have an RLock: the condition can be acquired multiple + # times. + cond.acquire() + cond.acquire() + cond.release() + cond.release() + lock = threading.Lock() + cond = self.condtype(lock) + cond.acquire() + self.assertFalse(lock.acquire(False)) + cond.release() + self.assertTrue(lock.acquire(False)) + self.assertFalse(cond.acquire(False)) + lock.release() + with cond: + self.assertFalse(lock.acquire(False)) + + def test_unacquired_wait(self): + cond = self.condtype() + self.assertRaises(RuntimeError, cond.wait) + + def test_unacquired_notify(self): + cond = self.condtype() + self.assertRaises(RuntimeError, cond.notify) + + def _check_notify(self, cond): + # Note that this test is sensitive to timing. If the worker threads + # don't execute in a timely fashion, the main thread may think they + # are further along then they are. The main thread therefore issues + # wait_threads_blocked() statements to try to make sure that it doesn't + # race ahead of the workers. + # Secondly, this test assumes that condition variables are not subject + # to spurious wakeups. The absence of spurious wakeups is an implementation + # detail of Condition Variables in current CPython, but in general, not + # a guaranteed property of condition variables as a programming + # construct. In particular, it is possible that this can no longer + # be conveniently guaranteed should their implementation ever change. + ready = [] + results1 = [] + results2 = [] + phase_num = 0 + def f(): + cond.acquire() + ready.append(phase_num) + result = cond.wait() + + cond.release() + results1.append((result, phase_num)) + + cond.acquire() + ready.append(phase_num) + + result = cond.wait() + cond.release() + results2.append((result, phase_num)) + + N = 5 + with Bunch(f, N): + # first wait, to ensure all workers settle into cond.wait() before + # we continue. See issues #8799 and #30727. + for _ in support.sleeping_retry(support.SHORT_TIMEOUT): + if len(ready) >= N: + break + + ready.clear() + self.assertEqual(results1, []) + + # Notify 3 threads at first + count1 = 3 + cond.acquire() + cond.notify(count1) + wait_threads_blocked(count1) + + # Phase 1 + phase_num = 1 + cond.release() + for _ in support.sleeping_retry(support.SHORT_TIMEOUT): + if len(results1) >= count1: + break + + self.assertEqual(results1, [(True, 1)] * count1) + self.assertEqual(results2, []) + + # Wait until awaken workers are blocked on cond.wait() + for _ in support.sleeping_retry(support.SHORT_TIMEOUT): + if len(ready) >= count1 : + break + + # Notify 5 threads: they might be in their first or second wait + cond.acquire() + cond.notify(5) + wait_threads_blocked(N) + + # Phase 2 + phase_num = 2 + cond.release() + for _ in support.sleeping_retry(support.SHORT_TIMEOUT): + if len(results1) + len(results2) >= (N + count1): + break + + count2 = N - count1 + self.assertEqual(results1, [(True, 1)] * count1 + [(True, 2)] * count2) + self.assertEqual(results2, [(True, 2)] * count1) + + # Make sure all workers settle into cond.wait() + for _ in support.sleeping_retry(support.SHORT_TIMEOUT): + if len(ready) >= N: + break + + # Notify all threads: they are all in their second wait + cond.acquire() + cond.notify_all() + wait_threads_blocked(N) + + # Phase 3 + phase_num = 3 + cond.release() + for _ in support.sleeping_retry(support.SHORT_TIMEOUT): + if len(results2) >= N: + break + self.assertEqual(results1, [(True, 1)] * count1 + [(True, 2)] * count2) + self.assertEqual(results2, [(True, 2)] * count1 + [(True, 3)] * count2) + + def test_notify(self): + cond = self.condtype() + self._check_notify(cond) + # A second time, to check internal state is still ok. + self._check_notify(cond) + + def test_timeout(self): + cond = self.condtype() + timeout = 0.5 + results = [] + def f(): + cond.acquire() + t1 = time.monotonic() + result = cond.wait(timeout) + t2 = time.monotonic() + cond.release() + results.append((t2 - t1, result)) + + N = 5 + with Bunch(f, N): + pass + self.assertEqual(len(results), N) + + for dt, result in results: + self.assertTimeout(dt, timeout) + # Note that conceptually (that"s the condition variable protocol) + # a wait() may succeed even if no one notifies us and before any + # timeout occurs. Spurious wakeups can occur. + # This makes it hard to verify the result value. + # In practice, this implementation has no spurious wakeups. + self.assertFalse(result) + + def test_waitfor(self): + cond = self.condtype() + state = 0 + def f(): + with cond: + result = cond.wait_for(lambda: state == 4) + self.assertTrue(result) + self.assertEqual(state, 4) + + with Bunch(f, 1): + for i in range(4): + time.sleep(0.010) + with cond: + state += 1 + cond.notify() + + def test_waitfor_timeout(self): + cond = self.condtype() + state = 0 + success = [] + def f(): + with cond: + dt = time.monotonic() + result = cond.wait_for(lambda : state==4, timeout=0.1) + dt = time.monotonic() - dt + self.assertFalse(result) + self.assertTimeout(dt, 0.1) + success.append(None) + + with Bunch(f, 1): + # Only increment 3 times, so state == 4 is never reached. + for i in range(3): + time.sleep(0.010) + with cond: + state += 1 + cond.notify() + + self.assertEqual(len(success), 1) + + +class BaseSemaphoreTests(BaseTestCase): + """ + Common tests for {bounded, unbounded} semaphore objects. + """ + + def test_constructor(self): + self.assertRaises(ValueError, self.semtype, value = -1) + self.assertRaises(ValueError, self.semtype, value = -sys.maxsize) + + def test_acquire(self): + sem = self.semtype(1) + sem.acquire() + sem.release() + sem = self.semtype(2) + sem.acquire() + sem.acquire() + sem.release() + sem.release() + + def test_acquire_destroy(self): + sem = self.semtype() + sem.acquire() + del sem + + def test_acquire_contended(self): + sem_value = 7 + sem = self.semtype(sem_value) + sem.acquire() + + sem_results = [] + results1 = [] + results2 = [] + phase_num = 0 + + def func(): + sem_results.append(sem.acquire()) + results1.append(phase_num) + + sem_results.append(sem.acquire()) + results2.append(phase_num) + + def wait_count(count): + for _ in support.sleeping_retry(support.SHORT_TIMEOUT): + if len(results1) + len(results2) >= count: + break + + N = 10 + with Bunch(func, N): + # Phase 0 + count1 = sem_value - 1 + wait_count(count1) + self.assertEqual(results1 + results2, [0] * count1) + + # Phase 1 + phase_num = 1 + for i in range(sem_value): + sem.release() + count2 = sem_value + wait_count(count1 + count2) + self.assertEqual(sorted(results1 + results2), + [0] * count1 + [1] * count2) + + # Phase 2 + phase_num = 2 + count3 = (sem_value - 1) + for i in range(count3): + sem.release() + wait_count(count1 + count2 + count3) + self.assertEqual(sorted(results1 + results2), + [0] * count1 + [1] * count2 + [2] * count3) + # The semaphore is still locked + self.assertFalse(sem.acquire(False)) + + # Final release, to let the last thread finish + count4 = 1 + sem.release() + + self.assertEqual(sem_results, + [True] * (count1 + count2 + count3 + count4)) + + def test_multirelease(self): + sem_value = 7 + sem = self.semtype(sem_value) + sem.acquire() + + results1 = [] + results2 = [] + phase_num = 0 + def func(): + sem.acquire() + results1.append(phase_num) + + sem.acquire() + results2.append(phase_num) + + def wait_count(count): + for _ in support.sleeping_retry(support.SHORT_TIMEOUT): + if len(results1) + len(results2) >= count: + break + + with Bunch(func, 10): + # Phase 0 + count1 = sem_value - 1 + wait_count(count1) + self.assertEqual(results1 + results2, [0] * count1) + + # Phase 1 + phase_num = 1 + count2 = sem_value + sem.release(count2) + wait_count(count1 + count2) + self.assertEqual(sorted(results1 + results2), + [0] * count1 + [1] * count2) + + # Phase 2 + phase_num = 2 + count3 = sem_value - 1 + sem.release(count3) + wait_count(count1 + count2 + count3) + self.assertEqual(sorted(results1 + results2), + [0] * count1 + [1] * count2 + [2] * count3) + # The semaphore is still locked + self.assertFalse(sem.acquire(False)) + + # Final release, to let the last thread finish + sem.release() + + def test_try_acquire(self): + sem = self.semtype(2) + self.assertTrue(sem.acquire(False)) + self.assertTrue(sem.acquire(False)) + self.assertFalse(sem.acquire(False)) + sem.release() + self.assertTrue(sem.acquire(False)) + + def test_try_acquire_contended(self): + sem = self.semtype(4) + sem.acquire() + results = [] + def f(): + results.append(sem.acquire(False)) + results.append(sem.acquire(False)) + with Bunch(f, 5): + pass + # There can be a thread switch between acquiring the semaphore and + # appending the result, therefore results will not necessarily be + # ordered. + self.assertEqual(sorted(results), [False] * 7 + [True] * 3 ) + + def test_acquire_timeout(self): + sem = self.semtype(2) + self.assertRaises(ValueError, sem.acquire, False, timeout=1.0) + self.assertTrue(sem.acquire(timeout=0.005)) + self.assertTrue(sem.acquire(timeout=0.005)) + self.assertFalse(sem.acquire(timeout=0.005)) + sem.release() + self.assertTrue(sem.acquire(timeout=0.005)) + t = time.monotonic() + self.assertFalse(sem.acquire(timeout=0.5)) + dt = time.monotonic() - t + self.assertTimeout(dt, 0.5) + + def test_default_value(self): + # The default initial value is 1. + sem = self.semtype() + sem.acquire() + def f(): + sem.acquire() + sem.release() + + with Bunch(f, 1) as bunch: + # Thread blocked on sem.acquire() + wait_threads_blocked(1) + self.assertFalse(bunch.finished) + + # Thread unblocked + sem.release() + + def test_with(self): + sem = self.semtype(2) + def _with(err=None): + with sem: + self.assertTrue(sem.acquire(False)) + sem.release() + with sem: + self.assertFalse(sem.acquire(False)) + if err: + raise err + _with() + self.assertTrue(sem.acquire(False)) + sem.release() + self.assertRaises(TypeError, _with, TypeError) + self.assertTrue(sem.acquire(False)) + sem.release() + +class SemaphoreTests(BaseSemaphoreTests): + """ + Tests for unbounded semaphores. + """ + + def test_release_unacquired(self): + # Unbounded releases are allowed and increment the semaphore's value + sem = self.semtype(1) + sem.release() + sem.acquire() + sem.acquire() + sem.release() + + def test_repr(self): + sem = self.semtype(3) + self.assertRegex(repr(sem), r"<\w+\.Semaphore at .*: value=3>") + sem.acquire() + self.assertRegex(repr(sem), r"<\w+\.Semaphore at .*: value=2>") + sem.release() + sem.release() + self.assertRegex(repr(sem), r"<\w+\.Semaphore at .*: value=4>") + + +class BoundedSemaphoreTests(BaseSemaphoreTests): + """ + Tests for bounded semaphores. + """ + + def test_release_unacquired(self): + # Cannot go past the initial value + sem = self.semtype() + self.assertRaises(ValueError, sem.release) + sem.acquire() + sem.release() + self.assertRaises(ValueError, sem.release) + + def test_repr(self): + sem = self.semtype(3) + self.assertRegex(repr(sem), r"<\w+\.BoundedSemaphore at .*: value=3/3>") + sem.acquire() + self.assertRegex(repr(sem), r"<\w+\.BoundedSemaphore at .*: value=2/3>") + + +class BarrierTests(BaseTestCase): + """ + Tests for Barrier objects. + """ + N = 5 + defaultTimeout = 2.0 + + def setUp(self): + self.barrier = self.barriertype(self.N, timeout=self.defaultTimeout) + + def tearDown(self): + self.barrier.abort() + + def run_threads(self, f): + with Bunch(f, self.N): + pass + + def multipass(self, results, n): + m = self.barrier.parties + self.assertEqual(m, self.N) + for i in range(n): + results[0].append(True) + self.assertEqual(len(results[1]), i * m) + self.barrier.wait() + results[1].append(True) + self.assertEqual(len(results[0]), (i + 1) * m) + self.barrier.wait() + self.assertEqual(self.barrier.n_waiting, 0) + self.assertFalse(self.barrier.broken) + + def test_constructor(self): + self.assertRaises(ValueError, self.barriertype, parties=0) + self.assertRaises(ValueError, self.barriertype, parties=-1) + + def test_barrier(self, passes=1): + """ + Test that a barrier is passed in lockstep + """ + results = [[],[]] + def f(): + self.multipass(results, passes) + self.run_threads(f) + + def test_barrier_10(self): + """ + Test that a barrier works for 10 consecutive runs + """ + return self.test_barrier(10) + + def test_wait_return(self): + """ + test the return value from barrier.wait + """ + results = [] + def f(): + r = self.barrier.wait() + results.append(r) + + self.run_threads(f) + self.assertEqual(sum(results), sum(range(self.N))) + + def test_action(self): + """ + Test the 'action' callback + """ + results = [] + def action(): + results.append(True) + barrier = self.barriertype(self.N, action) + def f(): + barrier.wait() + self.assertEqual(len(results), 1) + + self.run_threads(f) + + def test_abort(self): + """ + Test that an abort will put the barrier in a broken state + """ + results1 = [] + results2 = [] + def f(): + try: + i = self.barrier.wait() + if i == self.N//2: + raise RuntimeError + self.barrier.wait() + results1.append(True) + except threading.BrokenBarrierError: + results2.append(True) + except RuntimeError: + self.barrier.abort() + pass + + self.run_threads(f) + self.assertEqual(len(results1), 0) + self.assertEqual(len(results2), self.N-1) + self.assertTrue(self.barrier.broken) + + def test_reset(self): + """ + Test that a 'reset' on a barrier frees the waiting threads + """ + results1 = [] + results2 = [] + results3 = [] + def f(): + i = self.barrier.wait() + if i == self.N//2: + # Wait until the other threads are all in the barrier. + for _ in support.sleeping_retry(support.SHORT_TIMEOUT): + if self.barrier.n_waiting >= (self.N - 1): + break + self.barrier.reset() + else: + try: + self.barrier.wait() + results1.append(True) + except threading.BrokenBarrierError: + results2.append(True) + # Now, pass the barrier again + self.barrier.wait() + results3.append(True) + + self.run_threads(f) + self.assertEqual(len(results1), 0) + self.assertEqual(len(results2), self.N-1) + self.assertEqual(len(results3), self.N) + + + def test_abort_and_reset(self): + """ + Test that a barrier can be reset after being broken. + """ + results1 = [] + results2 = [] + results3 = [] + barrier2 = self.barriertype(self.N) + def f(): + try: + i = self.barrier.wait() + if i == self.N//2: + raise RuntimeError + self.barrier.wait() + results1.append(True) + except threading.BrokenBarrierError: + results2.append(True) + except RuntimeError: + self.barrier.abort() + pass + # Synchronize and reset the barrier. Must synchronize first so + # that everyone has left it when we reset, and after so that no + # one enters it before the reset. + if barrier2.wait() == self.N//2: + self.barrier.reset() + barrier2.wait() + self.barrier.wait() + results3.append(True) + + self.run_threads(f) + self.assertEqual(len(results1), 0) + self.assertEqual(len(results2), self.N-1) + self.assertEqual(len(results3), self.N) + + def test_timeout(self): + """ + Test wait(timeout) + """ + def f(): + i = self.barrier.wait() + if i == self.N // 2: + # One thread is late! + time.sleep(self.defaultTimeout / 2) + # Default timeout is 2.0, so this is shorter. + self.assertRaises(threading.BrokenBarrierError, + self.barrier.wait, self.defaultTimeout / 4) + self.run_threads(f) + + def test_default_timeout(self): + """ + Test the barrier's default timeout + """ + timeout = 0.100 + barrier = self.barriertype(2, timeout=timeout) + def f(): + self.assertRaises(threading.BrokenBarrierError, + barrier.wait) + + start_time = time.monotonic() + with Bunch(f, 1): + pass + dt = time.monotonic() - start_time + self.assertGreaterEqual(dt, timeout) + + def test_single_thread(self): + b = self.barriertype(1) + b.wait() + b.wait() + + def test_repr(self): + barrier = self.barriertype(3) + timeout = support.LONG_TIMEOUT + self.assertRegex(repr(barrier), r"<\w+\.Barrier at .*: waiters=0/3>") + def f(): + barrier.wait(timeout) + + N = 2 + with Bunch(f, N): + # Threads blocked on barrier.wait() + for _ in support.sleeping_retry(support.SHORT_TIMEOUT): + if barrier.n_waiting >= N: + break + self.assertRegex(repr(barrier), + r"<\w+\.Barrier at .*: waiters=2/3>") + + # Threads unblocked + barrier.wait(timeout) + + self.assertRegex(repr(barrier), + r"<\w+\.Barrier at .*: waiters=0/3>") + + # Abort the barrier + barrier.abort() + self.assertRegex(repr(barrier), + r"<\w+\.Barrier at .*: broken>") diff --git a/stdlib/test/pyclbr_input.py b/stdlib/test/pyclbr_input.py new file mode 100644 index 000000000..5535edbfa --- /dev/null +++ b/stdlib/test/pyclbr_input.py @@ -0,0 +1,85 @@ +"""Test cases for test_pyclbr.py""" + +def f(): pass + +class Other(object): + @classmethod + def foo(c): pass + + def om(self): pass + +class B (object): + def bm(self): pass + +class C (B): + d = 10 + + # This one is correctly considered by both test_pyclbr.py and pyclbr.py + # as a non-method of C. + foo = Other().foo + + # This causes test_pyclbr.py to fail, but only because the + # introspection-based is_method() code in the test can't + # distinguish between this and a genuine method function like m(). + # + # The pyclbr.py module gets this right as it parses the text. + om = Other.om + f = f + + def m(self): pass + + @staticmethod + def sm(self): pass + + @classmethod + def cm(self): pass + +# Check that mangling is correctly handled + +class a: + def a(self): pass + def _(self): pass + def _a(self): pass + def __(self): pass + def ___(self): pass + def __a(self): pass + +class _: + def a(self): pass + def _(self): pass + def _a(self): pass + def __(self): pass + def ___(self): pass + def __a(self): pass + +class __: + def a(self): pass + def _(self): pass + def _a(self): pass + def __(self): pass + def ___(self): pass + def __a(self): pass + +class ___: + def a(self): pass + def _(self): pass + def _a(self): pass + def __(self): pass + def ___(self): pass + def __a(self): pass + +class _a: + def a(self): pass + def _(self): pass + def _a(self): pass + def __(self): pass + def ___(self): pass + def __a(self): pass + +class __a: + def a(self): pass + def _(self): pass + def _a(self): pass + def __(self): pass + def ___(self): pass + def __a(self): pass diff --git a/stdlib/test/relimport.py b/stdlib/test/relimport.py new file mode 100644 index 000000000..50aa497f7 --- /dev/null +++ b/stdlib/test/relimport.py @@ -0,0 +1 @@ +from .test_import import * diff --git a/stdlib/test/test_doctest/__init__.py b/stdlib/test/test_doctest/__init__.py new file mode 100644 index 000000000..4b16ecc31 --- /dev/null +++ b/stdlib/test/test_doctest/__init__.py @@ -0,0 +1,5 @@ +import os +from test.support import load_package_tests + +def load_tests(*args): + return load_package_tests(os.path.dirname(__file__), *args) diff --git a/stdlib/test/test_doctest/decorator_mod.py b/stdlib/test/test_doctest/decorator_mod.py new file mode 100644 index 000000000..9f1068884 --- /dev/null +++ b/stdlib/test/test_doctest/decorator_mod.py @@ -0,0 +1,10 @@ +# This module is used in `doctest_lineno.py`. +import functools + + +def decorator(f): + @functools.wraps(f) + def inner(): + return f() + + return inner diff --git a/stdlib/test/test_doctest/doctest_aliases.py b/stdlib/test/test_doctest/doctest_aliases.py new file mode 100644 index 000000000..30cefafa8 --- /dev/null +++ b/stdlib/test/test_doctest/doctest_aliases.py @@ -0,0 +1,13 @@ +# Used by test_doctest.py. + +class TwoNames: + '''f() and g() are two names for the same method''' + + def f(self): + ''' + >>> print(TwoNames().f()) + f + ''' + return 'f' + + g = f # define an alias for f diff --git a/stdlib/test/test_doctest/doctest_lineno.py b/stdlib/test/test_doctest/doctest_lineno.py new file mode 100644 index 000000000..0bd402e98 --- /dev/null +++ b/stdlib/test/test_doctest/doctest_lineno.py @@ -0,0 +1,107 @@ +# This module is used in `test_doctest`. +# It must not have a docstring. + +def func_with_docstring(): + """Some unrelated info.""" + + +def func_without_docstring(): + pass + + +def func_with_doctest(): + """ + This function really contains a test case. + + >>> func_with_doctest.__name__ + 'func_with_doctest' + """ + return 3 + + +class ClassWithDocstring: + """Some unrelated class information.""" + + +class ClassWithoutDocstring: + pass + + +class ClassWithDoctest: + """This class really has a test case in it. + + >>> ClassWithDoctest.__name__ + 'ClassWithDoctest' + """ + + +class MethodWrapper: + def method_with_docstring(self): + """Method with a docstring.""" + + def method_without_docstring(self): + pass + + def method_with_doctest(self): + """ + This has a doctest! + >>> MethodWrapper.method_with_doctest.__name__ + 'method_with_doctest' + """ + + @classmethod + def classmethod_with_doctest(cls): + """ + This has a doctest! + >>> MethodWrapper.classmethod_with_doctest.__name__ + 'classmethod_with_doctest' + """ + + @property + def property_with_doctest(self): + """ + This has a doctest! + >>> MethodWrapper.property_with_doctest.__name__ + 'property_with_doctest' + """ + +# https://github.com/python/cpython/issues/99433 +str_wrapper = object().__str__ + + +# https://github.com/python/cpython/issues/115392 +from test.test_doctest.decorator_mod import decorator + +@decorator +@decorator +def func_with_docstring_wrapped(): + """Some unrelated info.""" + + +# https://github.com/python/cpython/issues/136914 +import functools + + +@functools.cache +def cached_func_with_doctest(value): + """ + >>> cached_func_with_doctest(1) + -1 + """ + return -value + + +@functools.cache +def cached_func_without_docstring(value): + return value + 1 + + +class ClassWithACachedProperty: + + @functools.cached_property + def cached(self): + """ + >>> X().cached + -1 + """ + return 0 diff --git a/stdlib/test/test_doctest/sample_doctest.py b/stdlib/test/test_doctest/sample_doctest.py new file mode 100644 index 000000000..049f737a0 --- /dev/null +++ b/stdlib/test/test_doctest/sample_doctest.py @@ -0,0 +1,76 @@ +"""This is a sample module that doesn't really test anything all that + interesting. + +It simply has a few tests, some of which succeed and some of which fail. + +It's important that the numbers remain constant as another test is +testing the running of these tests. + + +>>> 2+2 +4 +""" + + +def foo(): + """ + + >>> 2+2 + 5 + + >>> 2+2 + 4 + """ + +def bar(): + """ + + >>> 2+2 + 4 + """ + +def test_silly_setup(): + """ + + >>> import test.test_doctest.test_doctest + >>> test.test_doctest.test_doctest.sillySetup + True + """ + +def w_blank(): + """ + >>> if 1: + ... print('a') + ... print() + ... print('b') + a + + b + """ + +x = 1 +def x_is_one(): + """ + >>> x + 1 + """ + +def y_is_one(): + """ + >>> y + 1 + """ + +__test__ = {'good': """ + >>> 42 + 42 + """, + 'bad': """ + >>> 42 + 666 + """, + } + +def test_suite(): + import doctest + return doctest.DocTestSuite() diff --git a/stdlib/test/test_doctest/sample_doctest_errors.py b/stdlib/test/test_doctest/sample_doctest_errors.py new file mode 100644 index 000000000..4a6f07af2 --- /dev/null +++ b/stdlib/test/test_doctest/sample_doctest_errors.py @@ -0,0 +1,46 @@ +"""This is a sample module used for testing doctest. + +This module includes various scenarios involving errors. + +>>> 2 + 2 +5 +>>> 1/0 +1 +""" + +def g(): + [][0] # line 12 + +def errors(): + """ + >>> 2 + 2 + 5 + >>> 1/0 + 1 + >>> def f(): + ... 2 + '2' + ... + >>> f() + 1 + >>> g() + 1 + """ + +def syntax_error(): + """ + >>> 2+*3 + 5 + """ + +__test__ = { + 'bad': """ + >>> 2 + 2 + 5 + >>> 1/0 + 1 + """, +} + +def test_suite(): + import doctest + return doctest.DocTestSuite() diff --git a/stdlib/test/test_doctest/sample_doctest_no_docstrings.py b/stdlib/test/test_doctest/sample_doctest_no_docstrings.py new file mode 100644 index 000000000..e4201edbc --- /dev/null +++ b/stdlib/test/test_doctest/sample_doctest_no_docstrings.py @@ -0,0 +1,12 @@ +# This is a sample module used for testing doctest. +# +# This module is for testing how doctest handles a module with no +# docstrings. + + +class Foo(object): + + # A class with no docstring. + + def __init__(self): + pass diff --git a/stdlib/test/test_doctest/sample_doctest_no_doctests.py b/stdlib/test/test_doctest/sample_doctest_no_doctests.py new file mode 100644 index 000000000..7daa57231 --- /dev/null +++ b/stdlib/test/test_doctest/sample_doctest_no_doctests.py @@ -0,0 +1,15 @@ +"""This is a sample module used for testing doctest. + +This module is for testing how doctest handles a module with docstrings +but no doctest examples. + +""" + + +class Foo(object): + """A docstring with no doctest examples. + + """ + + def __init__(self): + pass diff --git a/stdlib/test/test_doctest/sample_doctest_skip.py b/stdlib/test/test_doctest/sample_doctest_skip.py new file mode 100644 index 000000000..1b83dec1f --- /dev/null +++ b/stdlib/test/test_doctest/sample_doctest_skip.py @@ -0,0 +1,49 @@ +"""This is a sample module used for testing doctest. + +This module includes various scenarios involving skips. +""" + +def no_skip_pass(): + """ + >>> 2 + 2 + 4 + """ + +def no_skip_fail(): + """ + >>> 2 + 2 + 5 + """ + +def single_skip(): + """ + >>> 2 + 2 # doctest: +SKIP + 4 + """ + +def double_skip(): + """ + >>> 2 + 2 # doctest: +SKIP + 4 + >>> 3 + 3 # doctest: +SKIP + 6 + """ + +def partial_skip_pass(): + """ + >>> 2 + 2 # doctest: +SKIP + 4 + >>> 3 + 3 + 6 + """ + +def partial_skip_fail(): + """ + >>> 2 + 2 # doctest: +SKIP + 4 + >>> 2 + 2 + 5 + """ + +def no_examples(): + """A docstring with no examples should not be counted as run or skipped.""" diff --git a/stdlib/test/test_doctest/test_doctest.py b/stdlib/test/test_doctest/test_doctest.py new file mode 100644 index 000000000..00e6126b6 --- /dev/null +++ b/stdlib/test/test_doctest/test_doctest.py @@ -0,0 +1,3863 @@ +""" +Test script for doctest. +""" + +from test import support +from test.support import import_helper +import doctest +import functools +import os +import sys +import importlib +import importlib.abc +import importlib.util +import unittest +import tempfile +import types +import contextlib + + +def doctest_skip_if(condition): + def decorator(func): + if condition and support.HAVE_DOCSTRINGS: + func.__doc__ = ">>> pass # doctest: +SKIP" + return func + return decorator + + +# NOTE: There are some additional tests relating to interaction with +# zipimport in the test_zipimport_support test module. +# There are also related tests in `test_doctest2` module. + +###################################################################### +## Sample Objects (used by test cases) +###################################################################### + +def sample_func(v): + """ + Blah blah + + >>> print(sample_func(22)) + 44 + + Yee ha! + """ + return v+v + +class SampleClass: + """ + >>> print(1) + 1 + + >>> # comments get ignored. so are empty PS1 and PS2 prompts: + >>> + ... + + Multiline example: + >>> sc = SampleClass(3) + >>> for i in range(10): + ... sc = sc.double() + ... print(' ', sc.get(), sep='', end='') + 6 12 24 48 96 192 384 768 1536 3072 + """ + def __init__(self, val): + """ + >>> print(SampleClass(12).get()) + 12 + """ + self.val = val + + def double(self): + """ + >>> print(SampleClass(12).double().get()) + 24 + """ + return SampleClass(self.val + self.val) + + def get(self): + """ + >>> print(SampleClass(-5).get()) + -5 + """ + return self.val + + def setter(self, val): + """ + >>> s = SampleClass(-5) + >>> s.setter(1) + >>> print(s.val) + 1 + """ + self.val = val + + def a_staticmethod(v): + """ + >>> print(SampleClass.a_staticmethod(10)) + 11 + """ + return v+1 + a_staticmethod = staticmethod(a_staticmethod) + + def a_classmethod(cls, v): + """ + >>> print(SampleClass.a_classmethod(10)) + 12 + >>> print(SampleClass(0).a_classmethod(10)) + 12 + """ + return v+2 + a_classmethod = classmethod(a_classmethod) + + a_property = property(get, setter, doc=""" + >>> print(SampleClass(22).a_property) + 22 + """) + + a_class_attribute = 42 + + @functools.cached_property + def a_cached_property(self): + """ + >>> print(SampleClass(29).get()) + 29 + """ + return "hello" + + class NestedClass: + """ + >>> x = SampleClass.NestedClass(5) + >>> y = x.square() + >>> print(y.get()) + 25 + """ + def __init__(self, val=0): + """ + >>> print(SampleClass.NestedClass().get()) + 0 + """ + self.val = val + def square(self): + return SampleClass.NestedClass(self.val*self.val) + def get(self): + return self.val + +class SampleNewStyleClass(object): + r""" + >>> print('1\n2\n3') + 1 + 2 + 3 + """ + def __init__(self, val): + """ + >>> print(SampleNewStyleClass(12).get()) + 12 + """ + self.val = val + + def double(self): + """ + >>> print(SampleNewStyleClass(12).double().get()) + 24 + """ + return SampleNewStyleClass(self.val + self.val) + + def get(self): + """ + >>> print(SampleNewStyleClass(-5).get()) + -5 + """ + return self.val + +###################################################################### +## Test Cases +###################################################################### + +def test_Example(): r""" +Unit tests for the `Example` class. + +Example is a simple container class that holds: + - `source`: A source string. + - `want`: An expected output string. + - `exc_msg`: An expected exception message string (or None if no + exception is expected). + - `lineno`: A line number (within the docstring). + - `indent`: The example's indentation in the input string. + - `options`: An option dictionary, mapping option flags to True or + False. + +These attributes are set by the constructor. `source` and `want` are +required; the other attributes all have default values: + + >>> example = doctest.Example('print(1)', '1\n') + >>> (example.source, example.want, example.exc_msg, + ... example.lineno, example.indent, example.options) + ('print(1)\n', '1\n', None, 0, 0, {}) + +The first three attributes (`source`, `want`, and `exc_msg`) may be +specified positionally; the remaining arguments should be specified as +keyword arguments: + + >>> exc_msg = 'IndexError: pop from an empty list' + >>> example = doctest.Example('[].pop()', '', exc_msg, + ... lineno=5, indent=4, + ... options={doctest.ELLIPSIS: True}) + >>> (example.source, example.want, example.exc_msg, + ... example.lineno, example.indent, example.options) + ('[].pop()\n', '', 'IndexError: pop from an empty list\n', 5, 4, {8: True}) + +The constructor normalizes the `source` string to end in a newline: + + Source spans a single line: no terminating newline. + >>> e = doctest.Example('print(1)', '1\n') + >>> e.source, e.want + ('print(1)\n', '1\n') + + >>> e = doctest.Example('print(1)\n', '1\n') + >>> e.source, e.want + ('print(1)\n', '1\n') + + Source spans multiple lines: require terminating newline. + >>> e = doctest.Example('print(1);\nprint(2)\n', '1\n2\n') + >>> e.source, e.want + ('print(1);\nprint(2)\n', '1\n2\n') + + >>> e = doctest.Example('print(1);\nprint(2)', '1\n2\n') + >>> e.source, e.want + ('print(1);\nprint(2)\n', '1\n2\n') + + Empty source string (which should never appear in real examples) + >>> e = doctest.Example('', '') + >>> e.source, e.want + ('\n', '') + +The constructor normalizes the `want` string to end in a newline, +unless it's the empty string: + + >>> e = doctest.Example('print(1)', '1\n') + >>> e.source, e.want + ('print(1)\n', '1\n') + + >>> e = doctest.Example('print(1)', '1') + >>> e.source, e.want + ('print(1)\n', '1\n') + + >>> e = doctest.Example('print', '') + >>> e.source, e.want + ('print\n', '') + +The constructor normalizes the `exc_msg` string to end in a newline, +unless it's `None`: + + Message spans one line + >>> exc_msg = 'IndexError: pop from an empty list' + >>> e = doctest.Example('[].pop()', '', exc_msg) + >>> e.exc_msg + 'IndexError: pop from an empty list\n' + + >>> exc_msg = 'IndexError: pop from an empty list\n' + >>> e = doctest.Example('[].pop()', '', exc_msg) + >>> e.exc_msg + 'IndexError: pop from an empty list\n' + + Message spans multiple lines + >>> exc_msg = 'ValueError: 1\n 2' + >>> e = doctest.Example('raise ValueError("1\n 2")', '', exc_msg) + >>> e.exc_msg + 'ValueError: 1\n 2\n' + + >>> exc_msg = 'ValueError: 1\n 2\n' + >>> e = doctest.Example('raise ValueError("1\n 2")', '', exc_msg) + >>> e.exc_msg + 'ValueError: 1\n 2\n' + + Empty (but non-None) exception message (which should never appear + in real examples) + >>> exc_msg = '' + >>> e = doctest.Example('raise X()', '', exc_msg) + >>> e.exc_msg + '\n' + +Compare `Example`: + >>> example = doctest.Example('print 1', '1\n') + >>> same_example = doctest.Example('print 1', '1\n') + >>> other_example = doctest.Example('print 42', '42\n') + >>> example == same_example + True + >>> example != same_example + False + >>> hash(example) == hash(same_example) + True + >>> example == other_example + False + >>> example != other_example + True +""" + +def test_DocTest(): r""" +Unit tests for the `DocTest` class. + +DocTest is a collection of examples, extracted from a docstring, along +with information about where the docstring comes from (a name, +filename, and line number). The docstring is parsed by the `DocTest` +constructor: + + >>> docstring = ''' + ... >>> print(12) + ... 12 + ... + ... Non-example text. + ... + ... >>> print('another\\example') + ... another + ... example + ... ''' + >>> globs = {} # globals to run the test in. + >>> parser = doctest.DocTestParser() + >>> test = parser.get_doctest(docstring, globs, 'some_test', + ... 'some_file', 20) + >>> print(test) + + >>> len(test.examples) + 2 + >>> e1, e2 = test.examples + >>> (e1.source, e1.want, e1.lineno) + ('print(12)\n', '12\n', 1) + >>> (e2.source, e2.want, e2.lineno) + ("print('another\\example')\n", 'another\nexample\n', 6) + +Source information (name, filename, and line number) is available as +attributes on the doctest object: + + >>> (test.name, test.filename, test.lineno) + ('some_test', 'some_file', 20) + +The line number of an example within its containing file is found by +adding the line number of the example and the line number of its +containing test: + + >>> test.lineno + e1.lineno + 21 + >>> test.lineno + e2.lineno + 26 + +If the docstring contains inconsistent leading whitespace in the +expected output of an example, then `DocTest` will raise a ValueError: + + >>> docstring = r''' + ... >>> print('bad\nindentation') + ... bad + ... indentation + ... ''' + >>> parser.get_doctest(docstring, globs, 'some_test', 'filename', 0) + Traceback (most recent call last): + ValueError: line 4 of the docstring for some_test has inconsistent leading whitespace: 'indentation' + +If the docstring contains inconsistent leading whitespace on +continuation lines, then `DocTest` will raise a ValueError: + + >>> docstring = r''' + ... >>> print(('bad indentation', + ... ... 2)) + ... ('bad', 'indentation') + ... ''' + >>> parser.get_doctest(docstring, globs, 'some_test', 'filename', 0) + Traceback (most recent call last): + ValueError: line 2 of the docstring for some_test has inconsistent leading whitespace: '... 2))' + +If there's no blank space after a PS1 prompt ('>>>'), then `DocTest` +will raise a ValueError: + + >>> docstring = '>>>print(1)\n1' + >>> parser.get_doctest(docstring, globs, 'some_test', 'filename', 0) + Traceback (most recent call last): + ValueError: line 1 of the docstring for some_test lacks blank after >>>: '>>>print(1)' + +If there's no blank space after a PS2 prompt ('...'), then `DocTest` +will raise a ValueError: + + >>> docstring = '>>> if 1:\n...print(1)\n1' + >>> parser.get_doctest(docstring, globs, 'some_test', 'filename', 0) + Traceback (most recent call last): + ValueError: line 2 of the docstring for some_test lacks blank after ...: '...print(1)' + +Compare `DocTest`: + + >>> docstring = ''' + ... >>> print 12 + ... 12 + ... ''' + >>> test = parser.get_doctest(docstring, globs, 'some_test', + ... 'some_test', 20) + >>> same_test = parser.get_doctest(docstring, globs, 'some_test', + ... 'some_test', 20) + >>> test == same_test + True + >>> test != same_test + False + >>> hash(test) == hash(same_test) + True + >>> docstring = ''' + ... >>> print 42 + ... 42 + ... ''' + >>> other_test = parser.get_doctest(docstring, globs, 'other_test', + ... 'other_file', 10) + >>> test == other_test + False + >>> test != other_test + True + >>> test < other_test + False + >>> other_test < test + True + +Test comparison with lineno None on one side + + >>> no_lineno = parser.get_doctest(docstring, globs, 'some_test', + ... 'some_test', None) + >>> test.lineno is None + False + >>> no_lineno.lineno is None + True + >>> test < no_lineno + False + >>> no_lineno < test + True + +Compare `DocTestCase`: + + >>> DocTestCase = doctest.DocTestCase + >>> test_case = DocTestCase(test) + >>> same_test_case = DocTestCase(same_test) + >>> other_test_case = DocTestCase(other_test) + >>> test_case == same_test_case + True + >>> test_case != same_test_case + False + >>> hash(test_case) == hash(same_test_case) + True + >>> test == other_test_case + False + >>> test != other_test_case + True + +""" + +class test_DocTestFinder: + def basics(): r""" +Unit tests for the `DocTestFinder` class. + +DocTestFinder is used to extract DocTests from an object's docstring +and the docstrings of its contained objects. It can be used with +modules, functions, classes, methods, staticmethods, classmethods, and +properties. + +Finding Tests in Functions +~~~~~~~~~~~~~~~~~~~~~~~~~~ +For a function whose docstring contains examples, DocTestFinder.find() +will return a single test (for that function's docstring): + + >>> finder = doctest.DocTestFinder() + +We'll simulate a __file__ attr that ends in pyc: + + >>> from test.test_doctest import test_doctest + >>> old = test_doctest.__file__ + >>> test_doctest.__file__ = 'test_doctest.pyc' + + >>> tests = finder.find(sample_func) + + >>> print(tests) # doctest: +ELLIPSIS + [] + +The exact name depends on how test_doctest was invoked, so allow for +leading path components. + + >>> tests[0].filename # doctest: +ELLIPSIS + '...test_doctest.py' + + >>> test_doctest.__file__ = old + + + >>> e = tests[0].examples[0] + >>> (e.source, e.want, e.lineno) + ('print(sample_func(22))\n', '44\n', 3) + +By default, tests are created for objects with no docstring: + + >>> def no_docstring(v): + ... pass + >>> finder.find(no_docstring) + [] + +However, the optional argument `exclude_empty` to the DocTestFinder +constructor can be used to exclude tests for objects with empty +docstrings: + + >>> def no_docstring(v): + ... pass + >>> excl_empty_finder = doctest.DocTestFinder(exclude_empty=True) + >>> excl_empty_finder.find(no_docstring) + [] + +If the function has a docstring with no examples, then a test with no +examples is returned. (This lets `DocTestRunner` collect statistics +about which functions have no tests -- but is that useful? And should +an empty test also be created when there's no docstring?) + + >>> def no_examples(v): + ... ''' no doctest examples ''' + >>> finder.find(no_examples) # doctest: +ELLIPSIS + [] + +Finding Tests in Classes +~~~~~~~~~~~~~~~~~~~~~~~~ +For a class, DocTestFinder will create a test for the class's +docstring, and will recursively explore its contents, including +methods, classmethods, staticmethods, properties, and nested classes. + + >>> finder = doctest.DocTestFinder() + >>> tests = finder.find(SampleClass) + >>> for t in tests: + ... print('%2s %s' % (len(t.examples), t.name)) + 3 SampleClass + 3 SampleClass.NestedClass + 1 SampleClass.NestedClass.__init__ + 1 SampleClass.__init__ + 1 SampleClass.a_cached_property + 2 SampleClass.a_classmethod + 1 SampleClass.a_property + 1 SampleClass.a_staticmethod + 1 SampleClass.double + 1 SampleClass.get + 3 SampleClass.setter + +New-style classes are also supported: + + >>> tests = finder.find(SampleNewStyleClass) + >>> for t in tests: + ... print('%2s %s' % (len(t.examples), t.name)) + 1 SampleNewStyleClass + 1 SampleNewStyleClass.__init__ + 1 SampleNewStyleClass.double + 1 SampleNewStyleClass.get + +Finding Tests in Modules +~~~~~~~~~~~~~~~~~~~~~~~~ +For a module, DocTestFinder will create a test for the class's +docstring, and will recursively explore its contents, including +functions, classes, and the `__test__` dictionary, if it exists: + + >>> # A module + >>> import types + >>> m = types.ModuleType('some_module') + >>> def triple(val): + ... ''' + ... >>> print(triple(11)) + ... 33 + ... ''' + ... return val*3 + >>> m.__dict__.update({ + ... 'sample_func': sample_func, + ... 'SampleClass': SampleClass, + ... '__doc__': ''' + ... Module docstring. + ... >>> print('module') + ... module + ... ''', + ... '__test__': { + ... 'd': '>>> print(6)\n6\n>>> print(7)\n7\n', + ... 'c': triple}}) + + >>> finder = doctest.DocTestFinder() + >>> # Use module=test_doctest, to prevent doctest from + >>> # ignoring the objects since they weren't defined in m. + >>> from test.test_doctest import test_doctest + >>> tests = finder.find(m, module=test_doctest) + >>> for t in tests: + ... print('%2s %s' % (len(t.examples), t.name)) + 1 some_module + 3 some_module.SampleClass + 3 some_module.SampleClass.NestedClass + 1 some_module.SampleClass.NestedClass.__init__ + 1 some_module.SampleClass.__init__ + 1 some_module.SampleClass.a_cached_property + 2 some_module.SampleClass.a_classmethod + 1 some_module.SampleClass.a_property + 1 some_module.SampleClass.a_staticmethod + 1 some_module.SampleClass.double + 1 some_module.SampleClass.get + 3 some_module.SampleClass.setter + 1 some_module.__test__.c + 2 some_module.__test__.d + 1 some_module.sample_func + +However, doctest will ignore imported objects from other modules +(without proper `module=`): + + >>> import types + >>> m = types.ModuleType('poluted_namespace') + >>> m.__dict__.update({ + ... 'sample_func': sample_func, + ... 'SampleClass': SampleClass, + ... }) + + >>> finder = doctest.DocTestFinder() + >>> finder.find(m) + [] + +Duplicate Removal +~~~~~~~~~~~~~~~~~ +If a single object is listed twice (under different names), then tests +will only be generated for it once: + + >>> from test.test_doctest import doctest_aliases + >>> assert doctest_aliases.TwoNames.f + >>> assert doctest_aliases.TwoNames.g + >>> tests = excl_empty_finder.find(doctest_aliases) + >>> print(len(tests)) + 2 + >>> print(tests[0].name) + test.test_doctest.doctest_aliases.TwoNames + + TwoNames.f and TwoNames.g are bound to the same object. + We can't guess which will be found in doctest's traversal of + TwoNames.__dict__ first, so we have to allow for either. + + >>> tests[1].name.split('.')[-1] in ['f', 'g'] + True + +Empty Tests +~~~~~~~~~~~ +By default, an object with no doctests doesn't create any tests: + + >>> tests = doctest.DocTestFinder().find(SampleClass) + >>> for t in tests: + ... print('%2s %s' % (len(t.examples), t.name)) + 3 SampleClass + 3 SampleClass.NestedClass + 1 SampleClass.NestedClass.__init__ + 1 SampleClass.__init__ + 1 SampleClass.a_cached_property + 2 SampleClass.a_classmethod + 1 SampleClass.a_property + 1 SampleClass.a_staticmethod + 1 SampleClass.double + 1 SampleClass.get + 3 SampleClass.setter + +By default, that excluded objects with no doctests. exclude_empty=False +tells it to include (empty) tests for objects with no doctests. This feature +is really to support backward compatibility in what doctest.master.summarize() +displays. + + >>> tests = doctest.DocTestFinder(exclude_empty=False).find(SampleClass) + >>> for t in tests: + ... print('%2s %s' % (len(t.examples), t.name)) + 3 SampleClass + 3 SampleClass.NestedClass + 1 SampleClass.NestedClass.__init__ + 0 SampleClass.NestedClass.get + 0 SampleClass.NestedClass.square + 1 SampleClass.__init__ + 1 SampleClass.a_cached_property + 2 SampleClass.a_classmethod + 1 SampleClass.a_property + 1 SampleClass.a_staticmethod + 1 SampleClass.double + 1 SampleClass.get + 3 SampleClass.setter + +When used with `exclude_empty=False` we are also interested in line numbers +of doctests that are empty. +It used to be broken for quite some time until `bpo-28249`. + + >>> from test.test_doctest import doctest_lineno + >>> tests = doctest.DocTestFinder(exclude_empty=False).find(doctest_lineno) + >>> for t in tests: + ... print('%5s %s' % (t.lineno, t.name)) + None test.test_doctest.doctest_lineno + None test.test_doctest.doctest_lineno.ClassWithACachedProperty + 102 test.test_doctest.doctest_lineno.ClassWithACachedProperty.cached + 22 test.test_doctest.doctest_lineno.ClassWithDocstring + 30 test.test_doctest.doctest_lineno.ClassWithDoctest + None test.test_doctest.doctest_lineno.ClassWithoutDocstring + None test.test_doctest.doctest_lineno.MethodWrapper + 53 test.test_doctest.doctest_lineno.MethodWrapper.classmethod_with_doctest + 39 test.test_doctest.doctest_lineno.MethodWrapper.method_with_docstring + 45 test.test_doctest.doctest_lineno.MethodWrapper.method_with_doctest + None test.test_doctest.doctest_lineno.MethodWrapper.method_without_docstring + 61 test.test_doctest.doctest_lineno.MethodWrapper.property_with_doctest + 86 test.test_doctest.doctest_lineno.cached_func_with_doctest + None test.test_doctest.doctest_lineno.cached_func_without_docstring + 4 test.test_doctest.doctest_lineno.func_with_docstring + 77 test.test_doctest.doctest_lineno.func_with_docstring_wrapped + 12 test.test_doctest.doctest_lineno.func_with_doctest + None test.test_doctest.doctest_lineno.func_without_docstring + +Turning off Recursion +~~~~~~~~~~~~~~~~~~~~~ +DocTestFinder can be told not to look for tests in contained objects +using the `recurse` flag: + + >>> tests = doctest.DocTestFinder(recurse=False).find(SampleClass) + >>> for t in tests: + ... print('%2s %s' % (len(t.examples), t.name)) + 3 SampleClass + +Line numbers +~~~~~~~~~~~~ +DocTestFinder finds the line number of each example: + + >>> def f(x): + ... ''' + ... >>> x = 12 + ... + ... some text + ... + ... >>> # examples are not created for comments & bare prompts. + ... >>> + ... ... + ... + ... >>> for x in range(10): + ... ... print(x, end=' ') + ... 0 1 2 3 4 5 6 7 8 9 + ... >>> x//2 + ... 6 + ... ''' + >>> test = doctest.DocTestFinder().find(f)[0] + >>> [e.lineno for e in test.examples] + [1, 9, 12] +""" + + if int.__doc__: # simple check for --without-doc-strings, skip if lacking + def non_Python_modules(): r""" + +Finding Doctests in Modules Not Written in Python +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +DocTestFinder can also find doctests in most modules not written in Python. +We'll use builtins as an example, since it almost certainly isn't written in +plain ol' Python and is guaranteed to be available. + + >>> import builtins + >>> tests = doctest.DocTestFinder().find(builtins) + >>> 750 < len(tests) < 800 # approximate number of objects with docstrings + True + >>> real_tests = [t for t in tests if len(t.examples) > 0] + >>> len(real_tests) # objects that actually have doctests + 14 + >>> for t in real_tests: + ... print('{} {}'.format(len(t.examples), t.name)) + ... + 1 builtins.bin + 5 builtins.bytearray.hex + 5 builtins.bytes.hex + 3 builtins.float.as_integer_ratio + 2 builtins.float.fromhex + 2 builtins.float.hex + 1 builtins.hex + 1 builtins.int + 3 builtins.int.as_integer_ratio + 2 builtins.int.bit_count + 2 builtins.int.bit_length + 5 builtins.memoryview.hex + 1 builtins.oct + 1 builtins.zip + +Note here that 'bin', 'oct', and 'hex' are functions; 'float.as_integer_ratio', +'float.hex', and 'int.bit_length' are methods; 'float.fromhex' is a classmethod, +and 'int' is a type. +""" + + +class TestDocTest(unittest.TestCase): + + def test_run(self): + test = ''' + >>> 1 + 1 + 11 + >>> 2 + 3 # doctest: +SKIP + "23" + >>> 5 + 7 + 57 + ''' + + def myfunc(): + pass + myfunc.__doc__ = test + + # test DocTestFinder.run() + test = doctest.DocTestFinder().find(myfunc)[0] + with support.captured_stdout(): + with support.captured_stderr(): + results = doctest.DocTestRunner(verbose=False).run(test) + + # test TestResults + self.assertIsInstance(results, doctest.TestResults) + self.assertEqual(results.failed, 2) + self.assertEqual(results.attempted, 3) + self.assertEqual(results.skipped, 1) + self.assertEqual(tuple(results), (2, 3)) + x, y = results + self.assertEqual((x, y), (2, 3)) + + +class TestDocTestFinder(unittest.TestCase): + + def test_issue35753(self): + # This import of `call` should trigger issue35753 when + # DocTestFinder.find() is called due to inspect.unwrap() failing, + # however with a patched doctest this should succeed. + from unittest.mock import call + dummy_module = types.ModuleType("dummy") + dummy_module.__dict__['inject_call'] = call + finder = doctest.DocTestFinder() + self.assertEqual(finder.find(dummy_module), []) + + def test_empty_namespace_package(self): + pkg_name = 'doctest_empty_pkg' + with tempfile.TemporaryDirectory() as parent_dir: + pkg_dir = os.path.join(parent_dir, pkg_name) + os.mkdir(pkg_dir) + sys.path.append(parent_dir) + try: + mod = importlib.import_module(pkg_name) + finally: + import_helper.forget(pkg_name) + sys.path.pop() + + include_empty_finder = doctest.DocTestFinder(exclude_empty=False) + exclude_empty_finder = doctest.DocTestFinder(exclude_empty=True) + + self.assertEqual(len(include_empty_finder.find(mod)), 1) + self.assertEqual(len(exclude_empty_finder.find(mod)), 0) + +def test_DocTestParser(): r""" +Unit tests for the `DocTestParser` class. + +DocTestParser is used to parse docstrings containing doctest examples. + +The `parse` method divides a docstring into examples and intervening +text: + + >>> s = ''' + ... >>> x, y = 2, 3 # no output expected + ... >>> if 1: + ... ... print(x) + ... ... print(y) + ... 2 + ... 3 + ... + ... Some text. + ... >>> x+y + ... 5 + ... ''' + >>> parser = doctest.DocTestParser() + >>> for piece in parser.parse(s): + ... if isinstance(piece, doctest.Example): + ... print('Example:', (piece.source, piece.want, piece.lineno)) + ... else: + ... print(' Text:', repr(piece)) + Text: '\n' + Example: ('x, y = 2, 3 # no output expected\n', '', 1) + Text: '' + Example: ('if 1:\n print(x)\n print(y)\n', '2\n3\n', 2) + Text: '\nSome text.\n' + Example: ('x+y\n', '5\n', 9) + Text: '' + +The `get_examples` method returns just the examples: + + >>> for piece in parser.get_examples(s): + ... print((piece.source, piece.want, piece.lineno)) + ('x, y = 2, 3 # no output expected\n', '', 1) + ('if 1:\n print(x)\n print(y)\n', '2\n3\n', 2) + ('x+y\n', '5\n', 9) + +The `get_doctest` method creates a Test from the examples, along with the +given arguments: + + >>> test = parser.get_doctest(s, {}, 'name', 'filename', lineno=5) + >>> (test.name, test.filename, test.lineno) + ('name', 'filename', 5) + >>> for piece in test.examples: + ... print((piece.source, piece.want, piece.lineno)) + ('x, y = 2, 3 # no output expected\n', '', 1) + ('if 1:\n print(x)\n print(y)\n', '2\n3\n', 2) + ('x+y\n', '5\n', 9) +""" + +class test_DocTestRunner: + def basics(): r""" +Unit tests for the `DocTestRunner` class. + +DocTestRunner is used to run DocTest test cases, and to accumulate +statistics. Here's a simple DocTest case we can use: + + >>> import _colorize + >>> save_colorize = _colorize.COLORIZE + >>> _colorize.COLORIZE = False + + >>> def f(x): + ... ''' + ... >>> x = 12 + ... >>> print(x) + ... 12 + ... >>> x//2 + ... 6 + ... ''' + >>> test = doctest.DocTestFinder().find(f)[0] + +The main DocTestRunner interface is the `run` method, which runs a +given DocTest case in a given namespace (globs). It returns a tuple +`(f,t)`, where `f` is the number of failed tests and `t` is the number +of tried tests. + + >>> doctest.DocTestRunner(verbose=False).run(test) + TestResults(failed=0, attempted=3) + +If any example produces incorrect output, then the test runner reports +the failure and proceeds to the next example: + + >>> def f(x): + ... ''' + ... >>> x = 12 + ... >>> print(x) + ... 14 + ... >>> x//2 + ... 6 + ... ''' + >>> test = doctest.DocTestFinder().find(f)[0] + >>> doctest.DocTestRunner(verbose=True).run(test) + ... # doctest: +ELLIPSIS + Trying: + x = 12 + Expecting nothing + ok + Trying: + print(x) + Expecting: + 14 + ********************************************************************** + File ..., line 4, in f + Failed example: + print(x) + Expected: + 14 + Got: + 12 + Trying: + x//2 + Expecting: + 6 + ok + TestResults(failed=1, attempted=3) + + >>> _colorize.COLORIZE = save_colorize +""" + def verbose_flag(): r""" +The `verbose` flag makes the test runner generate more detailed +output: + + >>> def f(x): + ... ''' + ... >>> x = 12 + ... >>> print(x) + ... 12 + ... >>> x//2 + ... 6 + ... ''' + >>> test = doctest.DocTestFinder().find(f)[0] + + >>> doctest.DocTestRunner(verbose=True).run(test) + Trying: + x = 12 + Expecting nothing + ok + Trying: + print(x) + Expecting: + 12 + ok + Trying: + x//2 + Expecting: + 6 + ok + TestResults(failed=0, attempted=3) + +If the `verbose` flag is unspecified, then the output will be verbose +iff `-v` appears in sys.argv: + + >>> # Save the real sys.argv list. + >>> old_argv = sys.argv + + >>> # If -v does not appear in sys.argv, then output isn't verbose. + >>> sys.argv = ['test'] + >>> doctest.DocTestRunner().run(test) + TestResults(failed=0, attempted=3) + + >>> # If -v does appear in sys.argv, then output is verbose. + >>> sys.argv = ['test', '-v'] + >>> doctest.DocTestRunner().run(test) + Trying: + x = 12 + Expecting nothing + ok + Trying: + print(x) + Expecting: + 12 + ok + Trying: + x//2 + Expecting: + 6 + ok + TestResults(failed=0, attempted=3) + + >>> # Restore sys.argv + >>> sys.argv = old_argv + +In the remaining examples, the test runner's verbosity will be +explicitly set, to ensure that the test behavior is consistent. + """ + def exceptions(): r""" +Tests of `DocTestRunner`'s exception handling. + +An expected exception is specified with a traceback message. The +lines between the first line and the type/value may be omitted or +replaced with any other string: + + >>> import _colorize + >>> save_colorize = _colorize.COLORIZE + >>> _colorize.COLORIZE = False + + >>> def f(x): + ... ''' + ... >>> x = 12 + ... >>> print(x//0) + ... Traceback (most recent call last): + ... ZeroDivisionError: division by zero + ... ''' + >>> test = doctest.DocTestFinder().find(f)[0] + >>> doctest.DocTestRunner(verbose=False).run(test) + TestResults(failed=0, attempted=2) + +An example may not generate output before it raises an exception; if +it does, then the traceback message will not be recognized as +signaling an expected exception, so the example will be reported as an +unexpected exception: + + >>> def f(x): + ... ''' + ... >>> x = 12 + ... >>> print('pre-exception output', x//0) + ... pre-exception output + ... Traceback (most recent call last): + ... ZeroDivisionError: division by zero + ... ''' + >>> test = doctest.DocTestFinder().find(f)[0] + >>> doctest.DocTestRunner(verbose=False).run(test) + ... # doctest: +ELLIPSIS + ********************************************************************** + File ..., line 4, in f + Failed example: + print('pre-exception output', x//0) + Exception raised: + ... + ZeroDivisionError: division by zero + TestResults(failed=1, attempted=2) + +Exception messages may contain newlines: + + >>> def f(x): + ... r''' + ... >>> raise ValueError('multi\nline\nmessage') + ... Traceback (most recent call last): + ... ValueError: multi + ... line + ... message + ... ''' + >>> test = doctest.DocTestFinder().find(f)[0] + >>> doctest.DocTestRunner(verbose=False).run(test) + TestResults(failed=0, attempted=1) + +If an exception is expected, but an exception with the wrong type or +message is raised, then it is reported as a failure: + + >>> def f(x): + ... r''' + ... >>> raise ValueError('message') + ... Traceback (most recent call last): + ... ValueError: wrong message + ... ''' + >>> test = doctest.DocTestFinder().find(f)[0] + >>> doctest.DocTestRunner(verbose=False).run(test) + ... # doctest: +ELLIPSIS + ********************************************************************** + File ..., line 3, in f + Failed example: + raise ValueError('message') + Expected: + Traceback (most recent call last): + ValueError: wrong message + Got: + Traceback (most recent call last): + ... + ValueError: message + TestResults(failed=1, attempted=1) + +However, IGNORE_EXCEPTION_DETAIL can be used to allow a mismatch in the +detail: + + >>> def f(x): + ... r''' + ... >>> raise ValueError('message') #doctest: +IGNORE_EXCEPTION_DETAIL + ... Traceback (most recent call last): + ... ValueError: wrong message + ... ''' + >>> test = doctest.DocTestFinder().find(f)[0] + >>> doctest.DocTestRunner(verbose=False).run(test) + TestResults(failed=0, attempted=1) + +IGNORE_EXCEPTION_DETAIL also ignores difference in exception formatting +between Python versions. For example, in Python 2.x, the module path of +the exception is not in the output, but this will fail under Python 3: + + >>> def f(x): + ... r''' + ... >>> from http.client import HTTPException + ... >>> raise HTTPException('message') + ... Traceback (most recent call last): + ... HTTPException: message + ... ''' + >>> test = doctest.DocTestFinder().find(f)[0] + >>> doctest.DocTestRunner(verbose=False).run(test) + ... # doctest: +ELLIPSIS + ********************************************************************** + File ..., line 4, in f + Failed example: + raise HTTPException('message') + Expected: + Traceback (most recent call last): + HTTPException: message + Got: + Traceback (most recent call last): + ... + http.client.HTTPException: message + TestResults(failed=1, attempted=2) + +But in Python 3 the module path is included, and therefore a test must look +like the following test to succeed in Python 3. But that test will fail under +Python 2. + + >>> def f(x): + ... r''' + ... >>> from http.client import HTTPException + ... >>> raise HTTPException('message') + ... Traceback (most recent call last): + ... http.client.HTTPException: message + ... ''' + >>> test = doctest.DocTestFinder().find(f)[0] + >>> doctest.DocTestRunner(verbose=False).run(test) + TestResults(failed=0, attempted=2) + +However, with IGNORE_EXCEPTION_DETAIL, the module name of the exception +(or its unexpected absence) will be ignored: + + >>> def f(x): + ... r''' + ... >>> from http.client import HTTPException + ... >>> raise HTTPException('message') #doctest: +IGNORE_EXCEPTION_DETAIL + ... Traceback (most recent call last): + ... HTTPException: message + ... ''' + >>> test = doctest.DocTestFinder().find(f)[0] + >>> doctest.DocTestRunner(verbose=False).run(test) + TestResults(failed=0, attempted=2) + +The module path will be completely ignored, so two different module paths will +still pass if IGNORE_EXCEPTION_DETAIL is given. This is intentional, so it can +be used when exceptions have changed module. + + >>> def f(x): + ... r''' + ... >>> from http.client import HTTPException + ... >>> raise HTTPException('message') #doctest: +IGNORE_EXCEPTION_DETAIL + ... Traceback (most recent call last): + ... foo.bar.HTTPException: message + ... ''' + >>> test = doctest.DocTestFinder().find(f)[0] + >>> doctest.DocTestRunner(verbose=False).run(test) + TestResults(failed=0, attempted=2) + +But IGNORE_EXCEPTION_DETAIL does not allow a mismatch in the exception type: + + >>> def f(x): + ... r''' + ... >>> raise ValueError('message') #doctest: +IGNORE_EXCEPTION_DETAIL + ... Traceback (most recent call last): + ... TypeError: wrong type + ... ''' + >>> test = doctest.DocTestFinder().find(f)[0] + >>> doctest.DocTestRunner(verbose=False).run(test) + ... # doctest: +ELLIPSIS + ********************************************************************** + File ..., line 3, in f + Failed example: + raise ValueError('message') #doctest: +IGNORE_EXCEPTION_DETAIL + Expected: + Traceback (most recent call last): + TypeError: wrong type + Got: + Traceback (most recent call last): + ... + ValueError: message + TestResults(failed=1, attempted=1) + +If the exception does not have a message, you can still use +IGNORE_EXCEPTION_DETAIL to normalize the modules between Python 2 and 3: + + >>> def f(x): + ... r''' + ... >>> from http.client import HTTPException + ... >>> raise HTTPException() #doctest: +IGNORE_EXCEPTION_DETAIL + ... Traceback (most recent call last): + ... foo.bar.HTTPException + ... ''' + >>> test = doctest.DocTestFinder().find(f)[0] + >>> doctest.DocTestRunner(verbose=False).run(test) + TestResults(failed=0, attempted=2) + +Note that a trailing colon doesn't matter either: + + >>> def f(x): + ... r''' + ... >>> from http.client import HTTPException + ... >>> raise HTTPException() #doctest: +IGNORE_EXCEPTION_DETAIL + ... Traceback (most recent call last): + ... foo.bar.HTTPException: + ... ''' + >>> test = doctest.DocTestFinder().find(f)[0] + >>> doctest.DocTestRunner(verbose=False).run(test) + TestResults(failed=0, attempted=2) + +If an exception is raised but not expected, then it is reported as an +unexpected exception: + + >>> def f(x): + ... r''' + ... >>> 1//0 + ... 0 + ... ''' + >>> test = doctest.DocTestFinder().find(f)[0] + >>> doctest.DocTestRunner(verbose=False).run(test) + ... # doctest: +ELLIPSIS + ********************************************************************** + File ..., line 3, in f + Failed example: + 1//0 + Exception raised: + Traceback (most recent call last): + ... + ZeroDivisionError: division by zero + TestResults(failed=1, attempted=1) + + >>> _colorize.COLORIZE = save_colorize +""" + def displayhook(): r""" +Test that changing sys.displayhook doesn't matter for doctest. + + >>> import sys + >>> orig_displayhook = sys.displayhook + >>> def my_displayhook(x): + ... print('hi!') + >>> sys.displayhook = my_displayhook + >>> def f(): + ... ''' + ... >>> 3 + ... 3 + ... ''' + >>> test = doctest.DocTestFinder().find(f)[0] + >>> r = doctest.DocTestRunner(verbose=False).run(test) + >>> post_displayhook = sys.displayhook + + We need to restore sys.displayhook now, so that we'll be able to test + results. + + >>> sys.displayhook = orig_displayhook + + Ok, now we can check that everything is ok. + + >>> r + TestResults(failed=0, attempted=1) + >>> post_displayhook is my_displayhook + True +""" + def optionflags(): r""" +Tests of `DocTestRunner`'s option flag handling. + +Several option flags can be used to customize the behavior of the test +runner. These are defined as module constants in doctest, and passed +to the DocTestRunner constructor (multiple constants should be ORed +together). + +The DONT_ACCEPT_TRUE_FOR_1 flag disables matches between True/False +and 1/0: + + >>> import _colorize + >>> save_colorize = _colorize.COLORIZE + >>> _colorize.COLORIZE = False + + >>> def f(x): + ... '>>> True\n1\n' + + >>> # Without the flag: + >>> test = doctest.DocTestFinder().find(f)[0] + >>> doctest.DocTestRunner(verbose=False).run(test) + TestResults(failed=0, attempted=1) + + >>> # With the flag: + >>> test = doctest.DocTestFinder().find(f)[0] + >>> flags = doctest.DONT_ACCEPT_TRUE_FOR_1 + >>> doctest.DocTestRunner(verbose=False, optionflags=flags).run(test) + ... # doctest: +ELLIPSIS + ********************************************************************** + File ..., line 2, in f + Failed example: + True + Expected: + 1 + Got: + True + TestResults(failed=1, attempted=1) + +The DONT_ACCEPT_BLANKLINE flag disables the match between blank lines +and the '' marker: + + >>> def f(x): + ... '>>> print("a\\n\\nb")\na\n\nb\n' + + >>> # Without the flag: + >>> test = doctest.DocTestFinder().find(f)[0] + >>> doctest.DocTestRunner(verbose=False).run(test) + TestResults(failed=0, attempted=1) + + >>> # With the flag: + >>> test = doctest.DocTestFinder().find(f)[0] + >>> flags = doctest.DONT_ACCEPT_BLANKLINE + >>> doctest.DocTestRunner(verbose=False, optionflags=flags).run(test) + ... # doctest: +ELLIPSIS + ********************************************************************** + File ..., line 2, in f + Failed example: + print("a\n\nb") + Expected: + a + + b + Got: + a + + b + TestResults(failed=1, attempted=1) + +The NORMALIZE_WHITESPACE flag causes all sequences of whitespace to be +treated as equal: + + >>> def f(x): + ... '\n>>> print(1, 2, 3)\n 1 2\n 3' + + >>> # Without the flag: + >>> test = doctest.DocTestFinder().find(f)[0] + >>> doctest.DocTestRunner(verbose=False).run(test) + ... # doctest: +ELLIPSIS + ********************************************************************** + File ..., line 3, in f + Failed example: + print(1, 2, 3) + Expected: + 1 2 + 3 + Got: + 1 2 3 + TestResults(failed=1, attempted=1) + + >>> # With the flag: + >>> test = doctest.DocTestFinder().find(f)[0] + >>> flags = doctest.NORMALIZE_WHITESPACE + >>> doctest.DocTestRunner(verbose=False, optionflags=flags).run(test) + TestResults(failed=0, attempted=1) + + An example from the docs: + >>> print(list(range(20))) #doctest: +NORMALIZE_WHITESPACE + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] + +The ELLIPSIS flag causes ellipsis marker ("...") in the expected +output to match any substring in the actual output: + + >>> def f(x): + ... '>>> print(list(range(15)))\n[0, 1, 2, ..., 14]\n' + + >>> # Without the flag: + >>> test = doctest.DocTestFinder().find(f)[0] + >>> doctest.DocTestRunner(verbose=False).run(test) + ... # doctest: +ELLIPSIS + ********************************************************************** + File ..., line 2, in f + Failed example: + print(list(range(15))) + Expected: + [0, 1, 2, ..., 14] + Got: + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] + TestResults(failed=1, attempted=1) + + >>> # With the flag: + >>> test = doctest.DocTestFinder().find(f)[0] + >>> flags = doctest.ELLIPSIS + >>> doctest.DocTestRunner(verbose=False, optionflags=flags).run(test) + TestResults(failed=0, attempted=1) + + ... also matches nothing: + + >>> if 1: + ... for i in range(100): + ... print(i**2, end=' ') #doctest: +ELLIPSIS + ... print('!') + 0 1...4...9 16 ... 36 49 64 ... 9801 ! + + ... can be surprising; e.g., this test passes: + + >>> if 1: #doctest: +ELLIPSIS + ... for i in range(20): + ... print(i, end=' ') + ... print(20) + 0 1 2 ...1...2...0 + + Examples from the docs: + + >>> print(list(range(20))) # doctest:+ELLIPSIS + [0, 1, ..., 18, 19] + + >>> print(list(range(20))) # doctest: +ELLIPSIS + ... # doctest: +NORMALIZE_WHITESPACE + [0, 1, ..., 18, 19] + +The SKIP flag causes an example to be skipped entirely. I.e., the +example is not run. It can be useful in contexts where doctest +examples serve as both documentation and test cases, and an example +should be included for documentation purposes, but should not be +checked (e.g., because its output is random, or depends on resources +which would be unavailable.) The SKIP flag can also be used for +'commenting out' broken examples. + + >>> import unavailable_resource # doctest: +SKIP + >>> unavailable_resource.do_something() # doctest: +SKIP + >>> unavailable_resource.blow_up() # doctest: +SKIP + Traceback (most recent call last): + ... + UncheckedBlowUpError: Nobody checks me. + + >>> import random + >>> print(random.random()) # doctest: +SKIP + 0.721216923889 + +The REPORT_UDIFF flag causes failures that involve multi-line expected +and actual outputs to be displayed using a unified diff: + + >>> def f(x): + ... r''' + ... >>> print('\n'.join('abcdefg')) + ... a + ... B + ... c + ... d + ... f + ... g + ... h + ... ''' + + >>> # Without the flag: + >>> test = doctest.DocTestFinder().find(f)[0] + >>> doctest.DocTestRunner(verbose=False).run(test) + ... # doctest: +ELLIPSIS + ********************************************************************** + File ..., line 3, in f + Failed example: + print('\n'.join('abcdefg')) + Expected: + a + B + c + d + f + g + h + Got: + a + b + c + d + e + f + g + TestResults(failed=1, attempted=1) + + >>> # With the flag: + >>> test = doctest.DocTestFinder().find(f)[0] + >>> flags = doctest.REPORT_UDIFF + >>> doctest.DocTestRunner(verbose=False, optionflags=flags).run(test) + ... # doctest: +ELLIPSIS + ********************************************************************** + File ..., line 3, in f + Failed example: + print('\n'.join('abcdefg')) + Differences (unified diff with -expected +actual): + @@ -1,7 +1,7 @@ + a + -B + +b + c + d + +e + f + g + -h + TestResults(failed=1, attempted=1) + +The REPORT_CDIFF flag causes failures that involve multi-line expected +and actual outputs to be displayed using a context diff: + + >>> # Reuse f() from the REPORT_UDIFF example, above. + >>> test = doctest.DocTestFinder().find(f)[0] + >>> flags = doctest.REPORT_CDIFF + >>> doctest.DocTestRunner(verbose=False, optionflags=flags).run(test) + ... # doctest: +ELLIPSIS + ********************************************************************** + File ..., line 3, in f + Failed example: + print('\n'.join('abcdefg')) + Differences (context diff with expected followed by actual): + *************** + *** 1,7 **** + a + ! B + c + d + f + g + - h + --- 1,7 ---- + a + ! b + c + d + + e + f + g + TestResults(failed=1, attempted=1) + + +The REPORT_NDIFF flag causes failures to use the difflib.Differ algorithm +used by the popular ndiff.py utility. This does intraline difference +marking, as well as interline differences. + + >>> def f(x): + ... r''' + ... >>> print("a b c d e f g h i j k l m") + ... a b c d e f g h i j k 1 m + ... ''' + >>> test = doctest.DocTestFinder().find(f)[0] + >>> flags = doctest.REPORT_NDIFF + >>> doctest.DocTestRunner(verbose=False, optionflags=flags).run(test) + ... # doctest: +ELLIPSIS + ********************************************************************** + File ..., line 3, in f + Failed example: + print("a b c d e f g h i j k l m") + Differences (ndiff with -expected +actual): + - a b c d e f g h i j k 1 m + ? ^ + + a b c d e f g h i j k l m + ? + ++ ^ + TestResults(failed=1, attempted=1) + +The REPORT_ONLY_FIRST_FAILURE suppresses result output after the first +failing example: + + >>> def f(x): + ... r''' + ... >>> print(1) # first success + ... 1 + ... >>> print(2) # first failure + ... 200 + ... >>> print(3) # second failure + ... 300 + ... >>> print(4) # second success + ... 4 + ... >>> print(5) # third failure + ... 500 + ... ''' + >>> test = doctest.DocTestFinder().find(f)[0] + >>> flags = doctest.REPORT_ONLY_FIRST_FAILURE + >>> doctest.DocTestRunner(verbose=False, optionflags=flags).run(test) + ... # doctest: +ELLIPSIS + ********************************************************************** + File ..., line 5, in f + Failed example: + print(2) # first failure + Expected: + 200 + Got: + 2 + TestResults(failed=3, attempted=5) + +However, output from `report_start` is not suppressed: + + >>> doctest.DocTestRunner(verbose=True, optionflags=flags).run(test) + ... # doctest: +ELLIPSIS + Trying: + print(1) # first success + Expecting: + 1 + ok + Trying: + print(2) # first failure + Expecting: + 200 + ********************************************************************** + File ..., line 5, in f + Failed example: + print(2) # first failure + Expected: + 200 + Got: + 2 + TestResults(failed=3, attempted=5) + +The FAIL_FAST flag causes the runner to exit after the first failing example, +so subsequent examples are not even attempted: + + >>> flags = doctest.FAIL_FAST + >>> doctest.DocTestRunner(verbose=False, optionflags=flags).run(test) + ... # doctest: +ELLIPSIS + ********************************************************************** + File ..., line 5, in f + Failed example: + print(2) # first failure + Expected: + 200 + Got: + 2 + TestResults(failed=1, attempted=2) + +Specifying both FAIL_FAST and REPORT_ONLY_FIRST_FAILURE is equivalent to +FAIL_FAST only: + + >>> flags = doctest.FAIL_FAST | doctest.REPORT_ONLY_FIRST_FAILURE + >>> doctest.DocTestRunner(verbose=False, optionflags=flags).run(test) + ... # doctest: +ELLIPSIS + ********************************************************************** + File ..., line 5, in f + Failed example: + print(2) # first failure + Expected: + 200 + Got: + 2 + TestResults(failed=1, attempted=2) + +For the purposes of both REPORT_ONLY_FIRST_FAILURE and FAIL_FAST, unexpected +exceptions count as failures: + + >>> def f(x): + ... r''' + ... >>> print(1) # first success + ... 1 + ... >>> raise ValueError(2) # first failure + ... 200 + ... >>> print(3) # second failure + ... 300 + ... >>> print(4) # second success + ... 4 + ... >>> print(5) # third failure + ... 500 + ... ''' + >>> test = doctest.DocTestFinder().find(f)[0] + >>> flags = doctest.REPORT_ONLY_FIRST_FAILURE + >>> doctest.DocTestRunner(verbose=False, optionflags=flags).run(test) + ... # doctest: +ELLIPSIS + ********************************************************************** + File ..., line 5, in f + Failed example: + raise ValueError(2) # first failure + Exception raised: + ... + ValueError: 2 + TestResults(failed=3, attempted=5) + >>> flags = doctest.FAIL_FAST + >>> doctest.DocTestRunner(verbose=False, optionflags=flags).run(test) + ... # doctest: +ELLIPSIS + ********************************************************************** + File ..., line 5, in f + Failed example: + raise ValueError(2) # first failure + Exception raised: + ... + ValueError: 2 + TestResults(failed=1, attempted=2) + +New option flags can also be registered, via register_optionflag(). Here +we reach into doctest's internals a bit. + + >>> unlikely = "UNLIKELY_OPTION_NAME" + >>> unlikely in doctest.OPTIONFLAGS_BY_NAME + False + >>> new_flag_value = doctest.register_optionflag(unlikely) + >>> unlikely in doctest.OPTIONFLAGS_BY_NAME + True + +Before 2.4.4/2.5, registering a name more than once erroneously created +more than one flag value. Here we verify that's fixed: + + >>> redundant_flag_value = doctest.register_optionflag(unlikely) + >>> redundant_flag_value == new_flag_value + True + +Clean up. + >>> del doctest.OPTIONFLAGS_BY_NAME[unlikely] + >>> _colorize.COLORIZE = save_colorize + + """ + + def option_directives(): r""" +Tests of `DocTestRunner`'s option directive mechanism. + +Option directives can be used to turn option flags on or off for a +single example. To turn an option on for an example, follow that +example with a comment of the form ``# doctest: +OPTION``: + + >>> import _colorize + >>> save_colorize = _colorize.COLORIZE + >>> _colorize.COLORIZE = False + + >>> def f(x): r''' + ... >>> print(list(range(10))) # should fail: no ellipsis + ... [0, 1, ..., 9] + ... + ... >>> print(list(range(10))) # doctest: +ELLIPSIS + ... [0, 1, ..., 9] + ... ''' + >>> test = doctest.DocTestFinder().find(f)[0] + >>> doctest.DocTestRunner(verbose=False).run(test) + ... # doctest: +ELLIPSIS + ********************************************************************** + File ..., line 2, in f + Failed example: + print(list(range(10))) # should fail: no ellipsis + Expected: + [0, 1, ..., 9] + Got: + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + TestResults(failed=1, attempted=2) + +To turn an option off for an example, follow that example with a +comment of the form ``# doctest: -OPTION``: + + >>> def f(x): r''' + ... >>> print(list(range(10))) + ... [0, 1, ..., 9] + ... + ... >>> # should fail: no ellipsis + ... >>> print(list(range(10))) # doctest: -ELLIPSIS + ... [0, 1, ..., 9] + ... ''' + >>> test = doctest.DocTestFinder().find(f)[0] + >>> doctest.DocTestRunner(verbose=False, + ... optionflags=doctest.ELLIPSIS).run(test) + ... # doctest: +ELLIPSIS + ********************************************************************** + File ..., line 6, in f + Failed example: + print(list(range(10))) # doctest: -ELLIPSIS + Expected: + [0, 1, ..., 9] + Got: + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + TestResults(failed=1, attempted=2) + +Option directives affect only the example that they appear with; they +do not change the options for surrounding examples: + + >>> def f(x): r''' + ... >>> print(list(range(10))) # Should fail: no ellipsis + ... [0, 1, ..., 9] + ... + ... >>> print(list(range(10))) # doctest: +ELLIPSIS + ... [0, 1, ..., 9] + ... + ... >>> print(list(range(10))) # Should fail: no ellipsis + ... [0, 1, ..., 9] + ... ''' + >>> test = doctest.DocTestFinder().find(f)[0] + >>> doctest.DocTestRunner(verbose=False).run(test) + ... # doctest: +ELLIPSIS + ********************************************************************** + File ..., line 2, in f + Failed example: + print(list(range(10))) # Should fail: no ellipsis + Expected: + [0, 1, ..., 9] + Got: + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + ********************************************************************** + File ..., line 8, in f + Failed example: + print(list(range(10))) # Should fail: no ellipsis + Expected: + [0, 1, ..., 9] + Got: + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + TestResults(failed=2, attempted=3) + +Multiple options may be modified by a single option directive. They +may be separated by whitespace, commas, or both: + + >>> def f(x): r''' + ... >>> print(list(range(10))) # Should fail + ... [0, 1, ..., 9] + ... >>> print(list(range(10))) # Should succeed + ... ... # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE + ... [0, 1, ..., 9] + ... ''' + >>> test = doctest.DocTestFinder().find(f)[0] + >>> doctest.DocTestRunner(verbose=False).run(test) + ... # doctest: +ELLIPSIS + ********************************************************************** + File ..., line 2, in f + Failed example: + print(list(range(10))) # Should fail + Expected: + [0, 1, ..., 9] + Got: + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + TestResults(failed=1, attempted=2) + + >>> def f(x): r''' + ... >>> print(list(range(10))) # Should fail + ... [0, 1, ..., 9] + ... >>> print(list(range(10))) # Should succeed + ... ... # doctest: +ELLIPSIS,+NORMALIZE_WHITESPACE + ... [0, 1, ..., 9] + ... ''' + >>> test = doctest.DocTestFinder().find(f)[0] + >>> doctest.DocTestRunner(verbose=False).run(test) + ... # doctest: +ELLIPSIS + ********************************************************************** + File ..., line 2, in f + Failed example: + print(list(range(10))) # Should fail + Expected: + [0, 1, ..., 9] + Got: + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + TestResults(failed=1, attempted=2) + + >>> def f(x): r''' + ... >>> print(list(range(10))) # Should fail + ... [0, 1, ..., 9] + ... >>> print(list(range(10))) # Should succeed + ... ... # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE + ... [0, 1, ..., 9] + ... ''' + >>> test = doctest.DocTestFinder().find(f)[0] + >>> doctest.DocTestRunner(verbose=False).run(test) + ... # doctest: +ELLIPSIS + ********************************************************************** + File ..., line 2, in f + Failed example: + print(list(range(10))) # Should fail + Expected: + [0, 1, ..., 9] + Got: + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + TestResults(failed=1, attempted=2) + +The option directive may be put on the line following the source, as +long as a continuation prompt is used: + + >>> def f(x): r''' + ... >>> print(list(range(10))) + ... ... # doctest: +ELLIPSIS + ... [0, 1, ..., 9] + ... ''' + >>> test = doctest.DocTestFinder().find(f)[0] + >>> doctest.DocTestRunner(verbose=False).run(test) + TestResults(failed=0, attempted=1) + +For examples with multi-line source, the option directive may appear +at the end of any line: + + >>> def f(x): r''' + ... >>> for x in range(10): # doctest: +ELLIPSIS + ... ... print(' ', x, end='', sep='') + ... 0 1 2 ... 9 + ... + ... >>> for x in range(10): + ... ... print(' ', x, end='', sep='') # doctest: +ELLIPSIS + ... 0 1 2 ... 9 + ... ''' + >>> test = doctest.DocTestFinder().find(f)[0] + >>> doctest.DocTestRunner(verbose=False).run(test) + TestResults(failed=0, attempted=2) + +If more than one line of an example with multi-line source has an +option directive, then they are combined: + + >>> def f(x): r''' + ... Should fail (option directive not on the last line): + ... >>> for x in range(10): # doctest: +ELLIPSIS + ... ... print(x, end=' ') # doctest: +NORMALIZE_WHITESPACE + ... 0 1 2...9 + ... ''' + >>> test = doctest.DocTestFinder().find(f)[0] + >>> doctest.DocTestRunner(verbose=False).run(test) + TestResults(failed=0, attempted=1) + +It is an error to have a comment of the form ``# doctest:`` that is +*not* followed by words of the form ``+OPTION`` or ``-OPTION``, where +``OPTION`` is an option that has been registered with +`register_option`: + + >>> # Error: Option not registered + >>> s = '>>> print(12) #doctest: +BADOPTION' + >>> test = doctest.DocTestParser().get_doctest(s, {}, 's', 's.py', 0) + Traceback (most recent call last): + ValueError: line 1 of the doctest for s has an invalid option: '+BADOPTION' + + >>> # Error: No + or - prefix + >>> s = '>>> print(12) #doctest: ELLIPSIS' + >>> test = doctest.DocTestParser().get_doctest(s, {}, 's', 's.py', 0) + Traceback (most recent call last): + ValueError: line 1 of the doctest for s has an invalid option: 'ELLIPSIS' + +It is an error to use an option directive on a line that contains no +source: + + >>> s = '>>> # doctest: +ELLIPSIS' + >>> test = doctest.DocTestParser().get_doctest(s, {}, 's', 's.py', 0) + Traceback (most recent call last): + ValueError: line 0 of the doctest for s has an option directive on a line with no example: '# doctest: +ELLIPSIS' + + >>> _colorize.COLORIZE = save_colorize +""" + +def test_testsource(): r""" +Unit tests for `testsource()`. + +The testsource() function takes a module and a name, finds the (first) +test with that name in that module, and converts it to a script. The +example code is converted to regular Python code. The surrounding +words and expected output are converted to comments: + + >>> from test.test_doctest import test_doctest + >>> name = 'test.test_doctest.test_doctest.sample_func' + >>> print(doctest.testsource(test_doctest, name)) + # Blah blah + # + print(sample_func(22)) + # Expected: + ## 44 + # + # Yee ha! + + + >>> name = 'test.test_doctest.test_doctest.SampleNewStyleClass' + >>> print(doctest.testsource(test_doctest, name)) + print('1\n2\n3') + # Expected: + ## 1 + ## 2 + ## 3 + + + >>> name = 'test.test_doctest.test_doctest.SampleClass.a_classmethod' + >>> print(doctest.testsource(test_doctest, name)) + print(SampleClass.a_classmethod(10)) + # Expected: + ## 12 + print(SampleClass(0).a_classmethod(10)) + # Expected: + ## 12 + +""" + +def test_debug(): r""" + +Create a docstring that we want to debug: + + >>> s = ''' + ... >>> x = 12 + ... >>> print(x) + ... 12 + ... ''' + +Create some fake stdin input, to feed to the debugger: + + >>> from test.support.pty_helper import FakeInput + >>> real_stdin = sys.stdin + >>> sys.stdin = FakeInput(['next', 'print(x)', 'continue']) + +Run the debugger on the docstring, and then restore sys.stdin. + + >>> try: doctest.debug_src(s) + ... finally: sys.stdin = real_stdin + > (1)() + (Pdb) next + 12 + --Return-- + > (1)()->None + (Pdb) print(x) + 12 + (Pdb) continue + +""" + +if not hasattr(sys, 'gettrace') or not sys.gettrace(): + def test_pdb_set_trace(): + """Using pdb.set_trace from a doctest. + + You can use pdb.set_trace from a doctest. To do so, you must + retrieve the set_trace function from the pdb module at the time + you use it. The doctest module changes sys.stdout so that it can + capture program output. It also temporarily replaces pdb.set_trace + with a version that restores stdout. This is necessary for you to + see debugger output. + + >>> import _colorize + >>> save_colorize = _colorize.COLORIZE + >>> _colorize.COLORIZE = False + + >>> doc = ''' + ... >>> x = 42 + ... >>> raise Exception('clé') + ... Traceback (most recent call last): + ... Exception: clé + ... >>> import pdb; pdb.set_trace() + ... ''' + >>> parser = doctest.DocTestParser() + >>> test = parser.get_doctest(doc, {}, "foo-bar@baz", "foo-bar@baz.py", 0) + >>> runner = doctest.DocTestRunner(verbose=False) + + To demonstrate this, we'll create a fake standard input that + captures our debugger input: + + >>> from test.support.pty_helper import FakeInput + >>> real_stdin = sys.stdin + >>> sys.stdin = FakeInput([ + ... 'print(x)', # print data defined by the example + ... 'continue', # stop debugging + ... '']) + + >>> try: runner.run(test) + ... finally: sys.stdin = real_stdin + > (1)() + -> import pdb; pdb.set_trace() + (Pdb) print(x) + 42 + (Pdb) continue + TestResults(failed=0, attempted=3) + + You can also put pdb.set_trace in a function called from a test: + + >>> def calls_set_trace(): + ... y=2 + ... import pdb; pdb.set_trace() + + >>> doc = ''' + ... >>> x=1 + ... >>> calls_set_trace() + ... ''' + >>> test = parser.get_doctest(doc, globals(), "foo-bar@baz", "foo-bar@baz.py", 0) + >>> real_stdin = sys.stdin + >>> sys.stdin = FakeInput([ + ... 'print(y)', # print data defined in the function + ... 'up', # out of function + ... 'print(x)', # print data defined by the example + ... 'continue', # stop debugging + ... '']) + + >>> try: + ... runner.run(test) + ... finally: + ... sys.stdin = real_stdin + > (3)calls_set_trace() + -> import pdb; pdb.set_trace() + (Pdb) print(y) + 2 + (Pdb) up + > (1)() + -> calls_set_trace() + (Pdb) print(x) + 1 + (Pdb) continue + TestResults(failed=0, attempted=2) + + During interactive debugging, source code is shown, even for + doctest examples: + + >>> doc = ''' + ... >>> def f(x): + ... ... g(x*2) + ... >>> def g(x): + ... ... print(x+3) + ... ... import pdb; pdb.set_trace() + ... >>> f(3) + ... ''' + >>> test = parser.get_doctest(doc, globals(), "foo-bar@baz", "foo-bar@baz.py", 0) + >>> real_stdin = sys.stdin + >>> sys.stdin = FakeInput([ + ... 'step', # return event of g + ... 'list', # list source from example 2 + ... 'next', # return from g() + ... 'list', # list source from example 1 + ... 'next', # return from f() + ... 'list', # list source from example 3 + ... 'continue', # stop debugging + ... '']) + >>> try: runner.run(test) + ... finally: sys.stdin = real_stdin + ... # doctest: +NORMALIZE_WHITESPACE + > (3)g() + -> import pdb; pdb.set_trace() + (Pdb) step + --Return-- + > (3)g()->None + -> import pdb; pdb.set_trace() + (Pdb) list + 1 def g(x): + 2 print(x+3) + 3 -> import pdb; pdb.set_trace() + [EOF] + (Pdb) next + --Return-- + > (2)f()->None + -> g(x*2) + (Pdb) list + 1 def f(x): + 2 -> g(x*2) + [EOF] + (Pdb) next + --Return-- + > (1)()->None + -> f(3) + (Pdb) list + 1 -> f(3) + [EOF] + (Pdb) continue + ********************************************************************** + File "foo-bar@baz.py", line 7, in foo-bar@baz + Failed example: + f(3) + Expected nothing + Got: + 9 + TestResults(failed=1, attempted=3) + + >>> _colorize.COLORIZE = save_colorize + """ + + def test_pdb_set_trace_nested(): + """This illustrates more-demanding use of set_trace with nested functions. + + >>> class C(object): + ... def calls_set_trace(self): + ... y = 1 + ... import pdb; pdb.set_trace() + ... self.f1() + ... y = 2 + ... def f1(self): + ... x = 1 + ... self.f2() + ... x = 2 + ... def f2(self): + ... z = 1 + ... z = 2 + + >>> calls_set_trace = C().calls_set_trace + + >>> doc = ''' + ... >>> a = 1 + ... >>> calls_set_trace() + ... ''' + >>> parser = doctest.DocTestParser() + >>> runner = doctest.DocTestRunner(verbose=False) + >>> test = parser.get_doctest(doc, globals(), "foo-bar@baz", "foo-bar@baz.py", 0) + >>> from test.support.pty_helper import FakeInput + >>> real_stdin = sys.stdin + >>> sys.stdin = FakeInput([ + ... 'step', + ... 'print(y)', # print data defined in the function + ... 'step', 'step', 'step', 'step', 'step', 'step', 'print(z)', + ... 'up', 'print(x)', + ... 'up', 'print(y)', + ... 'up', 'print(foo)', + ... 'continue', # stop debugging + ... '']) + + >>> try: + ... runner.run(test) + ... finally: + ... sys.stdin = real_stdin + ... # doctest: +REPORT_NDIFF + > (4)calls_set_trace() + -> import pdb; pdb.set_trace() + (Pdb) step + > (5)calls_set_trace() + -> self.f1() + (Pdb) print(y) + 1 + (Pdb) step + --Call-- + > (7)f1() + -> def f1(self): + (Pdb) step + > (8)f1() + -> x = 1 + (Pdb) step + > (9)f1() + -> self.f2() + (Pdb) step + --Call-- + > (11)f2() + -> def f2(self): + (Pdb) step + > (12)f2() + -> z = 1 + (Pdb) step + > (13)f2() + -> z = 2 + (Pdb) print(z) + 1 + (Pdb) up + > (9)f1() + -> self.f2() + (Pdb) print(x) + 1 + (Pdb) up + > (5)calls_set_trace() + -> self.f1() + (Pdb) print(y) + 1 + (Pdb) up + > (1)() + -> calls_set_trace() + (Pdb) print(foo) + *** NameError: name 'foo' is not defined + (Pdb) continue + TestResults(failed=0, attempted=2) + """ + +def test_DocTestSuite(): + """DocTestSuite creates a unittest test suite from a doctest. + + We create a Suite by providing a module. A module can be provided + by passing a module object: + + >>> import unittest + >>> import test.test_doctest.sample_doctest + >>> suite = doctest.DocTestSuite(test.test_doctest.sample_doctest) + >>> result = suite.run(unittest.TestResult()) + >>> result + + >>> for tst, _ in result.failures: + ... print(tst) + bad (test.test_doctest.sample_doctest.__test__) + foo (test.test_doctest.sample_doctest) + test_silly_setup (test.test_doctest.sample_doctest) + y_is_one (test.test_doctest.sample_doctest) + + We can also supply the module by name: + + >>> suite = doctest.DocTestSuite('test.test_doctest.sample_doctest') + >>> result = suite.run(unittest.TestResult()) + >>> result + + + The module need not contain any doctest examples: + + >>> suite = doctest.DocTestSuite('test.test_doctest.sample_doctest_no_doctests') + >>> suite.run(unittest.TestResult()) + + + The module need not contain any docstrings either: + + >>> suite = doctest.DocTestSuite('test.test_doctest.sample_doctest_no_docstrings') + >>> suite.run(unittest.TestResult()) + + + If all examples in a docstring are skipped, unittest will report it as a + skipped test: + + >>> suite = doctest.DocTestSuite('test.test_doctest.sample_doctest_skip') + >>> result = suite.run(unittest.TestResult()) + >>> result + + >>> len(result.skipped) + 2 + >>> for tst, _ in result.skipped: + ... print(tst) + double_skip (test.test_doctest.sample_doctest_skip) + single_skip (test.test_doctest.sample_doctest_skip) + >>> for tst, _ in result.failures: + ... print(tst) + no_skip_fail (test.test_doctest.sample_doctest_skip) + partial_skip_fail (test.test_doctest.sample_doctest_skip) + + We can use the current module: + + >>> suite = test.test_doctest.sample_doctest.test_suite() + >>> suite.run(unittest.TestResult()) + + + We can also provide a DocTestFinder: + + >>> finder = doctest.DocTestFinder() + >>> suite = doctest.DocTestSuite('test.test_doctest.sample_doctest', + ... test_finder=finder) + >>> suite.run(unittest.TestResult()) + + + The DocTestFinder need not return any tests: + + >>> finder = doctest.DocTestFinder() + >>> suite = doctest.DocTestSuite('test.test_doctest.sample_doctest_no_docstrings', + ... test_finder=finder) + >>> suite.run(unittest.TestResult()) + + + We can supply global variables. If we pass globs, they will be + used instead of the module globals. Here we'll pass an empty + globals, triggering an extra error: + + >>> suite = doctest.DocTestSuite('test.test_doctest.sample_doctest', globs={}) + >>> suite.run(unittest.TestResult()) + + + Alternatively, we can provide extra globals. Here we'll make an + error go away by providing an extra global variable: + + >>> suite = doctest.DocTestSuite('test.test_doctest.sample_doctest', + ... extraglobs={'y': 1}) + >>> suite.run(unittest.TestResult()) + + + You can pass option flags. Here we'll cause an extra error + by disabling the blank-line feature: + + >>> suite = doctest.DocTestSuite('test.test_doctest.sample_doctest', + ... optionflags=doctest.DONT_ACCEPT_BLANKLINE) + >>> suite.run(unittest.TestResult()) + + + You can supply setUp and tearDown functions: + + >>> def setUp(t): + ... from test.test_doctest import test_doctest + ... test_doctest.sillySetup = True + + >>> def tearDown(t): + ... from test.test_doctest import test_doctest + ... del test_doctest.sillySetup + + Here, we installed a silly variable that the test expects: + + >>> suite = doctest.DocTestSuite('test.test_doctest.sample_doctest', + ... setUp=setUp, tearDown=tearDown) + >>> suite.run(unittest.TestResult()) + + + But the tearDown restores sanity: + + >>> from test.test_doctest import test_doctest + >>> test_doctest.sillySetup + Traceback (most recent call last): + ... + AttributeError: module 'test.test_doctest.test_doctest' has no attribute 'sillySetup' + + The setUp and tearDown functions are passed test objects. Here + we'll use the setUp function to supply the missing variable y: + + >>> def setUp(test): + ... test.globs['y'] = 1 + + >>> suite = doctest.DocTestSuite('test.test_doctest.sample_doctest', setUp=setUp) + >>> suite.run(unittest.TestResult()) + + + Here, we didn't need to use a tearDown function because we + modified the test globals, which are a copy of the + sample_doctest module dictionary. The test globals are + automatically cleared for us after a test. + """ + +def test_DocTestSuite_errors(): + """Tests for error reporting in DocTestSuite. + + >>> import unittest + >>> import test.test_doctest.sample_doctest_errors as mod + >>> suite = doctest.DocTestSuite(mod) + >>> result = suite.run(unittest.TestResult()) + >>> result + + >>> print(result.failures[0][1]) # doctest: +ELLIPSIS + AssertionError: Failed doctest test for test.test_doctest.sample_doctest_errors + File "...sample_doctest_errors.py", line 0, in sample_doctest_errors + + ---------------------------------------------------------------------- + File "...sample_doctest_errors.py", line 5, in test.test_doctest.sample_doctest_errors + Failed example: + 2 + 2 + Expected: + 5 + Got: + 4 + ---------------------------------------------------------------------- + File "...sample_doctest_errors.py", line 7, in test.test_doctest.sample_doctest_errors + Failed example: + 1/0 + Exception raised: + Traceback (most recent call last): + File "", line 1, in + 1/0 + ~^~ + ZeroDivisionError: division by zero + + >>> print(result.failures[1][1]) # doctest: +ELLIPSIS + AssertionError: Failed doctest test for test.test_doctest.sample_doctest_errors.__test__.bad + File "...sample_doctest_errors.py", line unknown line number, in bad + + ---------------------------------------------------------------------- + File "...sample_doctest_errors.py", line ?, in test.test_doctest.sample_doctest_errors.__test__.bad + Failed example: + 2 + 2 + Expected: + 5 + Got: + 4 + ---------------------------------------------------------------------- + File "...sample_doctest_errors.py", line ?, in test.test_doctest.sample_doctest_errors.__test__.bad + Failed example: + 1/0 + Exception raised: + Traceback (most recent call last): + File "", line 1, in + 1/0 + ~^~ + ZeroDivisionError: division by zero + + >>> print(result.failures[2][1]) # doctest: +ELLIPSIS + AssertionError: Failed doctest test for test.test_doctest.sample_doctest_errors.errors + File "...sample_doctest_errors.py", line 14, in errors + + ---------------------------------------------------------------------- + File "...sample_doctest_errors.py", line 16, in test.test_doctest.sample_doctest_errors.errors + Failed example: + 2 + 2 + Expected: + 5 + Got: + 4 + ---------------------------------------------------------------------- + File "...sample_doctest_errors.py", line 18, in test.test_doctest.sample_doctest_errors.errors + Failed example: + 1/0 + Exception raised: + Traceback (most recent call last): + File "", line 1, in + 1/0 + ~^~ + ZeroDivisionError: division by zero + ---------------------------------------------------------------------- + File "...sample_doctest_errors.py", line 23, in test.test_doctest.sample_doctest_errors.errors + Failed example: + f() + Exception raised: + Traceback (most recent call last): + File "", line 1, in + f() + ~^^ + File "", line 2, in f + 2 + '2' + ~~^~~~~ + TypeError: ... + ---------------------------------------------------------------------- + File "...sample_doctest_errors.py", line 25, in test.test_doctest.sample_doctest_errors.errors + Failed example: + g() + Exception raised: + Traceback (most recent call last): + File "", line 1, in + g() + ~^^ + File "...sample_doctest_errors.py", line 12, in g + [][0] # line 12 + ~~^^^ + IndexError: list index out of range + + >>> print(result.failures[3][1]) # doctest: +ELLIPSIS + AssertionError: Failed doctest test for test.test_doctest.sample_doctest_errors.syntax_error + File "...sample_doctest_errors.py", line 29, in syntax_error + + ---------------------------------------------------------------------- + File "...sample_doctest_errors.py", line 31, in test.test_doctest.sample_doctest_errors.syntax_error + Failed example: + 2+*3 + Exception raised: + File "", line 1 + 2+*3 + ^ + SyntaxError: invalid syntax + + """ + +def test_DocFileSuite(): + """We can test tests found in text files using a DocFileSuite. + + We create a suite by providing the names of one or more text + files that include examples: + + >>> import unittest + >>> suite = doctest.DocFileSuite('test_doctest.txt', + ... 'test_doctest2.txt', + ... 'test_doctest4.txt') + >>> suite.run(unittest.TestResult()) + + + The test files are looked for in the directory containing the + calling module. A package keyword argument can be provided to + specify a different relative location. + + >>> import unittest + >>> suite = doctest.DocFileSuite('test_doctest.txt', + ... 'test_doctest2.txt', + ... 'test_doctest4.txt', + ... package='test.test_doctest') + >>> suite.run(unittest.TestResult()) + + + '/' should be used as a path separator. It will be converted + to a native separator at run time: + + >>> suite = doctest.DocFileSuite('../test_doctest/test_doctest.txt') + >>> suite.run(unittest.TestResult()) + + + If DocFileSuite is used from an interactive session, then files + are resolved relative to the directory of sys.argv[0]: + + >>> import types, os.path + >>> from test.test_doctest import test_doctest + >>> save_argv = sys.argv + >>> sys.argv = [test_doctest.__file__] + >>> suite = doctest.DocFileSuite('test_doctest.txt', + ... package=types.ModuleType('__main__')) + >>> sys.argv = save_argv + + By setting `module_relative=False`, os-specific paths may be + used (including absolute paths and paths relative to the + working directory): + + >>> # Get the absolute path of the test package. + >>> test_doctest_path = os.path.abspath(test_doctest.__file__) + >>> test_pkg_path = os.path.split(test_doctest_path)[0] + + >>> # Use it to find the absolute path of test_doctest.txt. + >>> test_file = os.path.join(test_pkg_path, 'test_doctest.txt') + + >>> suite = doctest.DocFileSuite(test_file, module_relative=False) + >>> suite.run(unittest.TestResult()) + + + It is an error to specify `package` when `module_relative=False`: + + >>> suite = doctest.DocFileSuite(test_file, module_relative=False, + ... package='test') + Traceback (most recent call last): + ValueError: Package may only be specified for module-relative paths. + + If all examples in a file are skipped, unittest will report it as a + skipped test: + + >>> suite = doctest.DocFileSuite('test_doctest.txt', + ... 'test_doctest4.txt', + ... 'test_doctest_skip.txt', + ... 'test_doctest_skip2.txt') + >>> result = suite.run(unittest.TestResult()) + >>> result + + >>> len(result.skipped) + 1 + >>> for tst, _ in result.skipped: # doctest: +ELLIPSIS + ... print('=', tst) + = ...test_doctest_skip.txt + + You can specify initial global variables: + + >>> suite = doctest.DocFileSuite('test_doctest.txt', + ... 'test_doctest2.txt', + ... 'test_doctest4.txt', + ... globs={'favorite_color': 'blue'}) + >>> suite.run(unittest.TestResult()) + + + In this case, we supplied a missing favorite color. You can + provide doctest options: + + >>> suite = doctest.DocFileSuite('test_doctest.txt', + ... 'test_doctest2.txt', + ... 'test_doctest4.txt', + ... optionflags=doctest.DONT_ACCEPT_BLANKLINE, + ... globs={'favorite_color': 'blue'}) + >>> suite.run(unittest.TestResult()) + + + And, you can provide setUp and tearDown functions: + + >>> def setUp(t): + ... from test.test_doctest import test_doctest + ... test_doctest.sillySetup = True + + >>> def tearDown(t): + ... from test.test_doctest import test_doctest + ... del test_doctest.sillySetup + + Here, we installed a silly variable that the test expects: + + >>> suite = doctest.DocFileSuite('test_doctest.txt', + ... 'test_doctest2.txt', + ... 'test_doctest4.txt', + ... setUp=setUp, tearDown=tearDown) + >>> suite.run(unittest.TestResult()) + + + But the tearDown restores sanity: + + >>> from test.test_doctest import test_doctest + >>> test_doctest.sillySetup + Traceback (most recent call last): + ... + AttributeError: module 'test.test_doctest.test_doctest' has no attribute 'sillySetup' + + The setUp and tearDown functions are passed test objects. + Here, we'll use a setUp function to set the favorite color in + test_doctest.txt: + + >>> def setUp(test): + ... test.globs['favorite_color'] = 'blue' + + >>> suite = doctest.DocFileSuite('test_doctest.txt', setUp=setUp) + >>> suite.run(unittest.TestResult()) + + + Here, we didn't need to use a tearDown function because we + modified the test globals. The test globals are + automatically cleared for us after a test. + + Tests in a file run using `DocFileSuite` can also access the + `__file__` global, which is set to the name of the file + containing the tests: + + >>> suite = doctest.DocFileSuite('test_doctest3.txt') + >>> suite.run(unittest.TestResult()) + + + If the tests contain non-ASCII characters, we have to specify which + encoding the file is encoded with. We do so by using the `encoding` + parameter: + + >>> suite = doctest.DocFileSuite('test_doctest.txt', + ... 'test_doctest2.txt', + ... 'test_doctest4.txt', + ... encoding='utf-8') + >>> suite.run(unittest.TestResult()) + + """ + +def test_DocFileSuite_errors(): + """Tests for error reporting in DocTestSuite. + + >>> import unittest + >>> suite = doctest.DocFileSuite('test_doctest_errors.txt') + >>> result = suite.run(unittest.TestResult()) + >>> result + + >>> print(result.failures[0][1]) # doctest: +ELLIPSIS + AssertionError: Failed doctest test for test_doctest_errors.txt + File "...test_doctest_errors.txt", line 0 + + ---------------------------------------------------------------------- + File "...test_doctest_errors.txt", line 4, in test_doctest_errors.txt + Failed example: + 2 + 2 + Expected: + 5 + Got: + 4 + ---------------------------------------------------------------------- + File "...test_doctest_errors.txt", line 6, in test_doctest_errors.txt + Failed example: + 1/0 + Exception raised: + Traceback (most recent call last): + File "", line 1, in + 1/0 + ~^~ + ZeroDivisionError: division by zero + ---------------------------------------------------------------------- + File "...test_doctest_errors.txt", line 11, in test_doctest_errors.txt + Failed example: + f() + Exception raised: + Traceback (most recent call last): + File "", line 1, in + f() + ~^^ + File "", line 2, in f + 2 + '2' + ~~^~~~~ + TypeError: ... + ---------------------------------------------------------------------- + File "...test_doctest_errors.txt", line 13, in test_doctest_errors.txt + Failed example: + 2+*3 + Exception raised: + File "", line 1 + 2+*3 + ^ + SyntaxError: invalid syntax + + """ + +def test_trailing_space_in_test(): + """ + Trailing spaces in expected output are significant: + + >>> x, y = 'foo', '' + >>> print(x, y) + foo \n + """ + +class Wrapper: + def __init__(self, func): + self.func = func + functools.update_wrapper(self, func) + + def __call__(self, *args, **kwargs): + self.func(*args, **kwargs) + +@Wrapper +def wrapped(): + """ + Docstrings in wrapped functions must be detected as well. + + >>> 'one other test' + 'one other test' + """ + +def test_look_in_unwrapped(): + """ + Ensure that wrapped doctests work correctly. + + >>> import doctest + >>> doctest.run_docstring_examples( + ... wrapped, {}, name=wrapped.__name__, verbose=True) + Finding tests in wrapped + Trying: + 'one other test' + Expecting: + 'one other test' + ok + """ + +@doctest_skip_if(support.check_impl_detail(cpython=False)) +def test_wrapped_c_func(): + """ + # https://github.com/python/cpython/issues/117692 + >>> import binascii + >>> from test.test_doctest.decorator_mod import decorator + + >>> c_func_wrapped = decorator(binascii.b2a_hex) + >>> tests = doctest.DocTestFinder(exclude_empty=False).find(c_func_wrapped) + >>> for test in tests: + ... print(test.lineno, test.name) + None b2a_hex + """ + +def test_unittest_reportflags(): + """Default unittest reporting flags can be set to control reporting + + Here, we'll set the REPORT_ONLY_FIRST_FAILURE option so we see + only the first failure of each test. First, we'll look at the + output without the flag. The file test_doctest.txt file has two + tests. They both fail if blank lines are disabled: + + >>> suite = doctest.DocFileSuite('test_doctest.txt', + ... optionflags=doctest.DONT_ACCEPT_BLANKLINE) + >>> import unittest + >>> result = suite.run(unittest.TestResult()) + >>> result + + >>> print(result.failures[0][1]) # doctest: +ELLIPSIS + AssertionError: Failed doctest test for test_doctest.txt + ... + Failed example: + favorite_color + ... + Failed example: + if 1: + ... + + Note that we see both failures displayed. + + >>> old = doctest.set_unittest_reportflags( + ... doctest.REPORT_ONLY_FIRST_FAILURE) + + Now, when we run the test: + + >>> result = suite.run(unittest.TestResult()) + >>> result + + >>> print(result.failures[0][1]) # doctest: +ELLIPSIS + AssertionError: Failed doctest test for test_doctest.txt + ... + Failed example: + favorite_color + Exception raised: + ... + NameError: name 'favorite_color' is not defined + + + We get only the first failure. + + If we give any reporting options when we set up the tests, + however: + + >>> suite = doctest.DocFileSuite('test_doctest.txt', + ... optionflags=doctest.DONT_ACCEPT_BLANKLINE | doctest.REPORT_NDIFF) + + Then the default eporting options are ignored: + + >>> result = suite.run(unittest.TestResult()) + >>> result + + + *NOTE*: These doctest are intentionally not placed in raw string to depict + the trailing whitespace using `\x20` in the diff below. + + >>> print(result.failures[0][1]) # doctest: +ELLIPSIS + AssertionError: Failed doctest test for test_doctest.txt + ... + Failed example: + favorite_color + ... + Failed example: + if 1: + print('a') + print() + print('b') + Differences (ndiff with -expected +actual): + a + - + +\x20 + b + + + + Test runners can restore the formatting flags after they run: + + >>> ignored = doctest.set_unittest_reportflags(old) + + """ + +def test_testfile(): r""" +Tests for the `testfile()` function. This function runs all the +doctest examples in a given file. In its simple invocation, it is +called with the name of a file, which is taken to be relative to the +calling module. The return value is (#failures, #tests). + +We don't want color or `-v` in sys.argv for these tests. + + >>> import _colorize + >>> save_colorize = _colorize.COLORIZE + >>> _colorize.COLORIZE = False + + >>> save_argv = sys.argv + >>> if '-v' in sys.argv: + ... sys.argv = [arg for arg in save_argv if arg != '-v'] + + + >>> doctest.testfile('test_doctest.txt') # doctest: +ELLIPSIS + ********************************************************************** + File "...", line 6, in test_doctest.txt + Failed example: + favorite_color + Exception raised: + ... + NameError: name 'favorite_color' is not defined + ********************************************************************** + 1 item had failures: + 1 of 2 in test_doctest.txt + ***Test Failed*** 1 failure. + TestResults(failed=1, attempted=2) + >>> doctest.master = None # Reset master. + +(Note: we'll be clearing doctest.master after each call to +`doctest.testfile`, to suppress warnings about multiple tests with the +same name.) + +Globals may be specified with the `globs` and `extraglobs` parameters: + + >>> globs = {'favorite_color': 'blue'} + >>> doctest.testfile('test_doctest.txt', globs=globs) + TestResults(failed=0, attempted=2) + >>> doctest.master = None # Reset master. + + >>> extraglobs = {'favorite_color': 'red'} + >>> doctest.testfile('test_doctest.txt', globs=globs, + ... extraglobs=extraglobs) # doctest: +ELLIPSIS + ********************************************************************** + File "...", line 6, in test_doctest.txt + Failed example: + favorite_color + Expected: + 'blue' + Got: + 'red' + ********************************************************************** + 1 item had failures: + 1 of 2 in test_doctest.txt + ***Test Failed*** 1 failure. + TestResults(failed=1, attempted=2) + >>> doctest.master = None # Reset master. + +The file may be made relative to a given module or package, using the +optional `module_relative` parameter: + + >>> doctest.testfile('test_doctest.txt', globs=globs, + ... module_relative='test') + TestResults(failed=0, attempted=2) + >>> doctest.master = None # Reset master. + +Verbosity can be increased with the optional `verbose` parameter: + + >>> doctest.testfile('test_doctest.txt', globs=globs, verbose=True) + Trying: + favorite_color + Expecting: + 'blue' + ok + Trying: + if 1: + print('a') + print() + print('b') + Expecting: + a + + b + ok + 1 item passed all tests: + 2 tests in test_doctest.txt + 2 tests in 1 item. + 2 passed. + Test passed. + TestResults(failed=0, attempted=2) + >>> doctest.master = None # Reset master. + +The name of the test may be specified with the optional `name` +parameter: + + >>> doctest.testfile('test_doctest.txt', name='newname') + ... # doctest: +ELLIPSIS + ********************************************************************** + File "...", line 6, in newname + ... + TestResults(failed=1, attempted=2) + >>> doctest.master = None # Reset master. + +The summary report may be suppressed with the optional `report` +parameter: + + >>> doctest.testfile('test_doctest.txt', report=False) + ... # doctest: +ELLIPSIS + ********************************************************************** + File "...", line 6, in test_doctest.txt + Failed example: + favorite_color + Exception raised: + ... + NameError: name 'favorite_color' is not defined + TestResults(failed=1, attempted=2) + >>> doctest.master = None # Reset master. + +The optional keyword argument `raise_on_error` can be used to raise an +exception on the first error (which may be useful for postmortem +debugging): + + >>> doctest.testfile('test_doctest.txt', raise_on_error=True) + ... # doctest: +ELLIPSIS + Traceback (most recent call last): + doctest.UnexpectedException: ... + >>> doctest.master = None # Reset master. + +If the tests contain non-ASCII characters, the tests might fail, since +it's unknown which encoding is used. The encoding can be specified +using the optional keyword argument `encoding`: + + >>> doctest.testfile('test_doctest4.txt', encoding='latin-1') # doctest: +ELLIPSIS + ********************************************************************** + File "...", line 7, in test_doctest4.txt + Failed example: + '...' + Expected: + 'f\xf6\xf6' + Got: + 'f\xc3\xb6\xc3\xb6' + ********************************************************************** + ... + ********************************************************************** + 1 item had failures: + 2 of 2 in test_doctest4.txt + ***Test Failed*** 2 failures. + TestResults(failed=2, attempted=2) + >>> doctest.master = None # Reset master. + + >>> doctest.testfile('test_doctest4.txt', encoding='utf-8') + TestResults(failed=0, attempted=2) + >>> doctest.master = None # Reset master. + +Test the verbose output: + + >>> doctest.testfile('test_doctest4.txt', encoding='utf-8', verbose=True) + Trying: + 'föö' + Expecting: + 'f\xf6\xf6' + ok + Trying: + 'bąr' + Expecting: + 'b\u0105r' + ok + 1 item passed all tests: + 2 tests in test_doctest4.txt + 2 tests in 1 item. + 2 passed. + Test passed. + TestResults(failed=0, attempted=2) + >>> doctest.master = None # Reset master. + >>> sys.argv = save_argv + >>> _colorize.COLORIZE = save_colorize +""" + +def test_testfile_errors(): r""" +Tests for error reporting in the testfile() function. + + >>> doctest.testfile('test_doctest_errors.txt', verbose=False) # doctest: +ELLIPSIS + ********************************************************************** + File "...test_doctest_errors.txt", line 4, in test_doctest_errors.txt + Failed example: + 2 + 2 + Expected: + 5 + Got: + 4 + ********************************************************************** + File "...test_doctest_errors.txt", line 6, in test_doctest_errors.txt + Failed example: + 1/0 + Exception raised: + Traceback (most recent call last): + File "", line 1, in + 1/0 + ~^~ + ZeroDivisionError: division by zero + ********************************************************************** + File "...test_doctest_errors.txt", line 11, in test_doctest_errors.txt + Failed example: + f() + Exception raised: + Traceback (most recent call last): + File "", line 1, in + f() + ~^^ + File "", line 2, in f + 2 + '2' + ~~^~~~~ + TypeError: ... + ********************************************************************** + File "...test_doctest_errors.txt", line 13, in test_doctest_errors.txt + Failed example: + 2+*3 + Exception raised: + File "", line 1 + 2+*3 + ^ + SyntaxError: invalid syntax + ********************************************************************** + 1 item had failures: + 4 of 5 in test_doctest_errors.txt + ***Test Failed*** 4 failures. + TestResults(failed=4, attempted=5) +""" + +class TestImporter(importlib.abc.MetaPathFinder): + + def find_spec(self, fullname, path, target=None): + return importlib.util.spec_from_file_location(fullname, path, loader=self) + + def get_data(self, path): + with open(path, mode='rb') as f: + return f.read() + + def exec_module(self, module): + raise ImportError + + def create_module(self, spec): + return None + +class TestHook: + + def __init__(self, pathdir): + self.sys_path = sys.path[:] + self.meta_path = sys.meta_path[:] + self.path_hooks = sys.path_hooks[:] + sys.path.append(pathdir) + sys.path_importer_cache.clear() + self.modules_before = sys.modules.copy() + self.importer = TestImporter() + sys.meta_path.append(self.importer) + + def remove(self): + sys.path[:] = self.sys_path + sys.meta_path[:] = self.meta_path + sys.path_hooks[:] = self.path_hooks + sys.path_importer_cache.clear() + sys.modules.clear() + sys.modules.update(self.modules_before) + + +@contextlib.contextmanager +def test_hook(pathdir): + hook = TestHook(pathdir) + try: + yield hook + finally: + hook.remove() + + +def test_lineendings(): r""" +*nix systems use \n line endings, while Windows systems use \r\n, and +old Mac systems used \r, which Python still recognizes as a line ending. Python +handles this using universal newline mode for reading files. Let's make +sure doctest does so (issue 8473) by creating temporary test files using each +of the three line disciplines. At least one will not match either the universal +newline \n or os.linesep for the platform the test is run on. + +Windows line endings first: + + >>> import tempfile, os + >>> fn = tempfile.mktemp() + >>> with open(fn, 'wb') as f: + ... f.write(b'Test:\r\n\r\n >>> x = 1 + 1\r\n\r\nDone.\r\n') + 35 + >>> doctest.testfile(fn, module_relative=False, verbose=False) + TestResults(failed=0, attempted=1) + >>> os.remove(fn) + +And now *nix line endings: + + >>> fn = tempfile.mktemp() + >>> with open(fn, 'wb') as f: + ... f.write(b'Test:\n\n >>> x = 1 + 1\n\nDone.\n') + 30 + >>> doctest.testfile(fn, module_relative=False, verbose=False) + TestResults(failed=0, attempted=1) + >>> os.remove(fn) + +And finally old Mac line endings: + + >>> fn = tempfile.mktemp() + >>> with open(fn, 'wb') as f: + ... f.write(b'Test:\r\r >>> x = 1 + 1\r\rDone.\r') + 30 + >>> doctest.testfile(fn, module_relative=False, verbose=False) + TestResults(failed=0, attempted=1) + >>> os.remove(fn) + +Now we test with a package loader that has a get_data method, since that +bypasses the standard universal newline handling so doctest has to do the +newline conversion itself; let's make sure it does so correctly (issue 1812). +We'll write a file inside the package that has all three kinds of line endings +in it, and use a package hook to install a custom loader; on any platform, +at least one of the line endings will raise a ValueError for inconsistent +whitespace if doctest does not correctly do the newline conversion. + + >>> from test.support import os_helper + >>> import shutil + >>> dn = tempfile.mkdtemp() + >>> pkg = os.path.join(dn, "doctest_testpkg") + >>> os.mkdir(pkg) + >>> os_helper.create_empty_file(os.path.join(pkg, "__init__.py")) + >>> fn = os.path.join(pkg, "doctest_testfile.txt") + >>> with open(fn, 'wb') as f: + ... f.write( + ... b'Test:\r\n\r\n' + ... b' >>> x = 1 + 1\r\n\r\n' + ... b'Done.\r\n' + ... b'Test:\n\n' + ... b' >>> x = 1 + 1\n\n' + ... b'Done.\n' + ... b'Test:\r\r' + ... b' >>> x = 1 + 1\r\r' + ... b'Done.\r' + ... ) + 95 + >>> with test_hook(dn): + ... doctest.testfile("doctest_testfile.txt", package="doctest_testpkg", verbose=False) + TestResults(failed=0, attempted=3) + >>> shutil.rmtree(dn) + +""" + +def test_testmod(): r""" +Tests for the testmod function. More might be useful, but for now we're just +testing the case raised by Issue 6195, where trying to doctest a C module would +fail with a UnicodeDecodeError because doctest tried to read the "source" lines +out of the binary module. + + >>> import unicodedata + >>> doctest.testmod(unicodedata, verbose=False) + TestResults(failed=0, attempted=0) +""" + +def test_testmod_errors(): r""" +Tests for error reporting in the testmod() function. + + >>> import test.test_doctest.sample_doctest_errors as mod + >>> doctest.testmod(mod, verbose=False) # doctest: +ELLIPSIS + ********************************************************************** + File "...sample_doctest_errors.py", line 5, in test.test_doctest.sample_doctest_errors + Failed example: + 2 + 2 + Expected: + 5 + Got: + 4 + ********************************************************************** + File "...sample_doctest_errors.py", line 7, in test.test_doctest.sample_doctest_errors + Failed example: + 1/0 + Exception raised: + Traceback (most recent call last): + File "", line 1, in + 1/0 + ~^~ + ZeroDivisionError: division by zero + ********************************************************************** + File "...sample_doctest_errors.py", line ?, in test.test_doctest.sample_doctest_errors.__test__.bad + Failed example: + 2 + 2 + Expected: + 5 + Got: + 4 + ********************************************************************** + File "...sample_doctest_errors.py", line ?, in test.test_doctest.sample_doctest_errors.__test__.bad + Failed example: + 1/0 + Exception raised: + Traceback (most recent call last): + File "", line 1, in + 1/0 + ~^~ + ZeroDivisionError: division by zero + ********************************************************************** + File "...sample_doctest_errors.py", line 16, in test.test_doctest.sample_doctest_errors.errors + Failed example: + 2 + 2 + Expected: + 5 + Got: + 4 + ********************************************************************** + File "...sample_doctest_errors.py", line 18, in test.test_doctest.sample_doctest_errors.errors + Failed example: + 1/0 + Exception raised: + Traceback (most recent call last): + File "", line 1, in + 1/0 + ~^~ + ZeroDivisionError: division by zero + ********************************************************************** + File "...sample_doctest_errors.py", line 23, in test.test_doctest.sample_doctest_errors.errors + Failed example: + f() + Exception raised: + Traceback (most recent call last): + File "", line 1, in + f() + ~^^ + File "", line 2, in f + 2 + '2' + ~~^~~~~ + TypeError: ... + ********************************************************************** + File "...sample_doctest_errors.py", line 25, in test.test_doctest.sample_doctest_errors.errors + Failed example: + g() + Exception raised: + Traceback (most recent call last): + File "", line 1, in + g() + ~^^ + File "...sample_doctest_errors.py", line 12, in g + [][0] # line 12 + ~~^^^ + IndexError: list index out of range + ********************************************************************** + File "...sample_doctest_errors.py", line 31, in test.test_doctest.sample_doctest_errors.syntax_error + Failed example: + 2+*3 + Exception raised: + File "", line 1 + 2+*3 + ^ + SyntaxError: invalid syntax + ********************************************************************** + 4 items had failures: + 2 of 2 in test.test_doctest.sample_doctest_errors + 2 of 2 in test.test_doctest.sample_doctest_errors.__test__.bad + 4 of 5 in test.test_doctest.sample_doctest_errors.errors + 1 of 1 in test.test_doctest.sample_doctest_errors.syntax_error + ***Test Failed*** 9 failures. + TestResults(failed=9, attempted=10) +""" + +try: + os.fsencode("foo-bär@baz.py") + supports_unicode = True +except UnicodeEncodeError: + # Skip the test: the filesystem encoding is unable to encode the filename + supports_unicode = False + +if supports_unicode: + def test_unicode(): """ +Check doctest with a non-ascii filename: + + >>> import _colorize + >>> save_colorize = _colorize.COLORIZE + >>> _colorize.COLORIZE = False + + >>> doc = ''' + ... >>> raise Exception('clé') + ... ''' + ... + >>> parser = doctest.DocTestParser() + >>> test = parser.get_doctest(doc, {}, "foo-bär@baz", "foo-bär@baz.py", 0) + >>> test + + >>> runner = doctest.DocTestRunner(verbose=False) + >>> runner.run(test) # doctest: +ELLIPSIS + ********************************************************************** + File "foo-bär@baz.py", line 2, in foo-bär@baz + Failed example: + raise Exception('clé') + Exception raised: + Traceback (most recent call last): + File "", line 1, in + raise Exception('clé') + Exception: clé + TestResults(failed=1, attempted=1) + + >>> _colorize.COLORIZE = save_colorize + """ + + +@doctest_skip_if(not support.has_subprocess_support) +def test_CLI(): r""" +The doctest module can be used to run doctests against an arbitrary file. +These tests test this CLI functionality. + +We'll use the support module's script_helpers for this, and write a test files +to a temp dir to run the command against. Due to a current limitation in +script_helpers, though, we need a little utility function to turn the returned +output into something we can doctest against: + + >>> def normalize(s): + ... return '\n'.join(s.decode().splitlines()) + +With those preliminaries out of the way, we'll start with a file with two +simple tests and no errors. We'll run both the unadorned doctest command, and +the verbose version, and then check the output: + + >>> from test.support import script_helper + >>> from test.support.os_helper import temp_dir + >>> with temp_dir() as tmpdir: + ... fn = os.path.join(tmpdir, 'myfile.doc') + ... with open(fn, 'w', encoding='utf-8') as f: + ... _ = f.write('This is a very simple test file.\n') + ... _ = f.write(' >>> 1 + 1\n') + ... _ = f.write(' 2\n') + ... _ = f.write(' >>> "a"\n') + ... _ = f.write(" 'a'\n") + ... _ = f.write('\n') + ... _ = f.write('And that is it.\n') + ... rc1, out1, err1 = script_helper.assert_python_ok( + ... '-m', 'doctest', fn) + ... rc2, out2, err2 = script_helper.assert_python_ok( + ... '-m', 'doctest', '-v', fn) + +With no arguments and passing tests, we should get no output: + + >>> rc1, out1, err1 + (0, b'', b'') + +With the verbose flag, we should see the test output, but no error output: + + >>> rc2, err2 + (0, b'') + >>> print(normalize(out2)) + Trying: + 1 + 1 + Expecting: + 2 + ok + Trying: + "a" + Expecting: + 'a' + ok + 1 item passed all tests: + 2 tests in myfile.doc + 2 tests in 1 item. + 2 passed. + Test passed. + +Now we'll write a couple files, one with three tests, the other a python module +with two tests, both of the files having "errors" in the tests that can be made +non-errors by applying the appropriate doctest options to the run (ELLIPSIS in +the first file, NORMALIZE_WHITESPACE in the second). This combination will +allow thoroughly testing the -f and -o flags, as well as the doctest command's +ability to process more than one file on the command line and, since the second +file ends in '.py', its handling of python module files (as opposed to straight +text files). + + >>> from test.support import script_helper + >>> from test.support.os_helper import temp_dir + >>> with temp_dir() as tmpdir: + ... fn = os.path.join(tmpdir, 'myfile.doc') + ... with open(fn, 'w', encoding="utf-8") as f: + ... _ = f.write('This is another simple test file.\n') + ... _ = f.write(' >>> 1 + 1\n') + ... _ = f.write(' 2\n') + ... _ = f.write(' >>> "abcdef"\n') + ... _ = f.write(" 'a...f'\n") + ... _ = f.write(' >>> "ajkml"\n') + ... _ = f.write(" 'a...l'\n") + ... _ = f.write('\n') + ... _ = f.write('And that is it.\n') + ... fn2 = os.path.join(tmpdir, 'myfile2.py') + ... with open(fn2, 'w', encoding='utf-8') as f: + ... _ = f.write('def test_func():\n') + ... _ = f.write(' \"\"\"\n') + ... _ = f.write(' This is simple python test function.\n') + ... _ = f.write(' >>> 1 + 1\n') + ... _ = f.write(' 2\n') + ... _ = f.write(' >>> "abc def"\n') + ... _ = f.write(" 'abc def'\n") + ... _ = f.write("\n") + ... _ = f.write(' \"\"\"\n') + ... rc1, out1, err1 = script_helper.assert_python_failure( + ... '-m', 'doctest', fn, fn2) + ... rc2, out2, err2 = script_helper.assert_python_ok( + ... '-m', 'doctest', '-o', 'ELLIPSIS', fn) + ... rc3, out3, err3 = script_helper.assert_python_ok( + ... '-m', 'doctest', '-o', 'ELLIPSIS', + ... '-o', 'NORMALIZE_WHITESPACE', fn, fn2) + ... rc4, out4, err4 = script_helper.assert_python_failure( + ... '-m', 'doctest', '-f', fn, fn2) + ... rc5, out5, err5 = script_helper.assert_python_ok( + ... '-m', 'doctest', '-v', '-o', 'ELLIPSIS', + ... '-o', 'NORMALIZE_WHITESPACE', fn, fn2) + +Our first test run will show the errors from the first file (doctest stops if a +file has errors). Note that doctest test-run error output appears on stdout, +not stderr: + + >>> rc1, err1 + (1, b'') + >>> print(normalize(out1)) # doctest: +ELLIPSIS + ********************************************************************** + File "...myfile.doc", line 4, in myfile.doc + Failed example: + "abcdef" + Expected: + 'a...f' + Got: + 'abcdef' + ********************************************************************** + File "...myfile.doc", line 6, in myfile.doc + Failed example: + "ajkml" + Expected: + 'a...l' + Got: + 'ajkml' + ********************************************************************** + 1 item had failures: + 2 of 3 in myfile.doc + ***Test Failed*** 2 failures. + +With -o ELLIPSIS specified, the second run, against just the first file, should +produce no errors, and with -o NORMALIZE_WHITESPACE also specified, neither +should the third, which ran against both files: + + >>> rc2, out2, err2 + (0, b'', b'') + >>> rc3, out3, err3 + (0, b'', b'') + +The fourth run uses FAIL_FAST, so we should see only one error: + + >>> rc4, err4 + (1, b'') + >>> print(normalize(out4)) # doctest: +ELLIPSIS + ********************************************************************** + File "...myfile.doc", line 4, in myfile.doc + Failed example: + "abcdef" + Expected: + 'a...f' + Got: + 'abcdef' + ********************************************************************** + 1 item had failures: + 1 of 2 in myfile.doc + ***Test Failed*** 1 failure. + +The fifth test uses verbose with the two options, so we should get verbose +success output for the tests in both files: + + >>> rc5, err5 + (0, b'') + >>> print(normalize(out5)) + Trying: + 1 + 1 + Expecting: + 2 + ok + Trying: + "abcdef" + Expecting: + 'a...f' + ok + Trying: + "ajkml" + Expecting: + 'a...l' + ok + 1 item passed all tests: + 3 tests in myfile.doc + 3 tests in 1 item. + 3 passed. + Test passed. + Trying: + 1 + 1 + Expecting: + 2 + ok + Trying: + "abc def" + Expecting: + 'abc def' + ok + 1 item had no tests: + myfile2 + 1 item passed all tests: + 2 tests in myfile2.test_func + 2 tests in 2 items. + 2 passed. + Test passed. + +We should also check some typical error cases. + +Invalid file name: + + >>> rc, out, err = script_helper.assert_python_failure( + ... '-m', 'doctest', 'nosuchfile') + >>> rc, out + (1, b'') + >>> # The exact error message changes depending on the platform. + >>> print(normalize(err)) # doctest: +ELLIPSIS + Traceback (most recent call last): + ... + FileNotFoundError: [Errno ...] ...nosuchfile... + +Invalid doctest option: + + >>> rc, out, err = script_helper.assert_python_failure( + ... '-m', 'doctest', '-o', 'nosuchoption') + >>> rc, out + (2, b'') + >>> print(normalize(err)) # doctest: +ELLIPSIS + usage...invalid...nosuchoption... + +""" + +def test_no_trailing_whitespace_stripping(): + r""" + The fancy reports had a bug for a long time where any trailing whitespace on + the reported diff lines was stripped, making it impossible to see the + differences in line reported as different that differed only in the amount of + trailing whitespace. The whitespace still isn't particularly visible unless + you use NDIFF, but at least it is now there to be found. + + *NOTE*: This snippet was intentionally put inside a raw string to get rid of + leading whitespace error in executing the example below + + >>> def f(x): + ... r''' + ... >>> print('\n'.join(['a ', 'b'])) + ... a + ... b + ... ''' + """ + """ + *NOTE*: These doctest are not placed in raw string to depict the trailing whitespace + using `\x20` + + >>> test = doctest.DocTestFinder().find(f)[0] + >>> flags = doctest.REPORT_NDIFF + >>> doctest.DocTestRunner(verbose=False, optionflags=flags).run(test) + ... # doctest: +ELLIPSIS + ********************************************************************** + File ..., line 3, in f + Failed example: + print('\n'.join(['a ', 'b'])) + Differences (ndiff with -expected +actual): + - a + + a + b + TestResults(failed=1, attempted=1) + + *NOTE*: `\x20` is for checking the trailing whitespace on the +a line above. + We cannot use actual spaces there, as a commit hook prevents from committing + patches that contain trailing whitespace. More info on Issue 24746. + """ + + +def test_run_doctestsuite_multiple_times(): + """ + It was not possible to run the same DocTestSuite multiple times + http://bugs.python.org/issue2604 + http://bugs.python.org/issue9736 + + >>> import unittest + >>> import test.test_doctest.sample_doctest + >>> suite = doctest.DocTestSuite(test.test_doctest.sample_doctest) + >>> suite.run(unittest.TestResult()) + + >>> suite.run(unittest.TestResult()) + + """ + + +def test_exception_with_note(note): + """ + >>> import _colorize + >>> save_colorize = _colorize.COLORIZE + >>> _colorize.COLORIZE = False + + >>> test_exception_with_note('Note') + Traceback (most recent call last): + ... + ValueError: Text + Note + + >>> test_exception_with_note('Note') # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + ValueError: Text + Note + + >>> test_exception_with_note('''Note + ... multiline + ... example''') + Traceback (most recent call last): + ValueError: Text + Note + multiline + example + + Different note will fail the test: + + >>> def f(x): + ... r''' + ... >>> exc = ValueError('message') + ... >>> exc.add_note('note') + ... >>> raise exc + ... Traceback (most recent call last): + ... ValueError: message + ... wrong note + ... ''' + >>> test = doctest.DocTestFinder().find(f)[0] + >>> doctest.DocTestRunner(verbose=False).run(test) + ... # doctest: +ELLIPSIS + ********************************************************************** + File "...", line 5, in f + Failed example: + raise exc + Expected: + Traceback (most recent call last): + ValueError: message + wrong note + Got: + Traceback (most recent call last): + ... + ValueError: message + note + TestResults(failed=1, attempted=...) + + >>> _colorize.COLORIZE = save_colorize + """ + exc = ValueError('Text') + exc.add_note(note) + raise exc + + +def test_exception_with_multiple_notes(): + """ + >>> test_exception_with_multiple_notes() + Traceback (most recent call last): + ... + ValueError: Text + One + Two + """ + exc = ValueError('Text') + exc.add_note('One') + exc.add_note('Two') + raise exc + + +def test_syntax_error_with_note(cls, multiline=False): + """ + >>> test_syntax_error_with_note(SyntaxError) + Traceback (most recent call last): + ... + SyntaxError: error + Note + + >>> test_syntax_error_with_note(SyntaxError) + Traceback (most recent call last): + SyntaxError: error + Note + + >>> test_syntax_error_with_note(SyntaxError) + Traceback (most recent call last): + ... + File "x.py", line 23 + bad syntax + SyntaxError: error + Note + + >>> test_syntax_error_with_note(IndentationError) + Traceback (most recent call last): + ... + IndentationError: error + Note + + >>> test_syntax_error_with_note(TabError, multiline=True) + Traceback (most recent call last): + ... + TabError: error + Note + Line + """ + exc = cls("error", ("x.py", 23, None, "bad syntax")) + exc.add_note('Note\nLine' if multiline else 'Note') + raise exc + + +def test_syntax_error_subclass_from_stdlib(): + """ + `ParseError` is a subclass of `SyntaxError`, but it is not a builtin: + + >>> test_syntax_error_subclass_from_stdlib() + Traceback (most recent call last): + ... + xml.etree.ElementTree.ParseError: error + error + Note + Line + """ + from xml.etree.ElementTree import ParseError + exc = ParseError("error\nerror") + exc.add_note('Note\nLine') + raise exc + + +def test_syntax_error_with_incorrect_expected_note(): + """ + >>> import _colorize + >>> save_colorize = _colorize.COLORIZE + >>> _colorize.COLORIZE = False + + >>> def f(x): + ... r''' + ... >>> exc = SyntaxError("error", ("x.py", 23, None, "bad syntax")) + ... >>> exc.add_note('note1') + ... >>> exc.add_note('note2') + ... >>> raise exc + ... Traceback (most recent call last): + ... SyntaxError: error + ... wrong note + ... ''' + >>> test = doctest.DocTestFinder().find(f)[0] + >>> doctest.DocTestRunner(verbose=False).run(test) + ... # doctest: +ELLIPSIS + ********************************************************************** + File "...", line 6, in f + Failed example: + raise exc + Expected: + Traceback (most recent call last): + SyntaxError: error + wrong note + Got: + Traceback (most recent call last): + ... + SyntaxError: error + note1 + note2 + TestResults(failed=1, attempted=...) + + >>> _colorize.COLORIZE = save_colorize + """ + + +def load_tests(loader, tests, pattern): + tests.addTest(doctest.DocTestSuite(doctest)) + tests.addTest(doctest.DocTestSuite()) + return tests + + +if __name__ == '__main__': + unittest.main(module='test.test_doctest.test_doctest') diff --git a/stdlib/test/test_doctest/test_doctest.txt b/stdlib/test/test_doctest/test_doctest.txt new file mode 100644 index 000000000..23446d1d2 --- /dev/null +++ b/stdlib/test/test_doctest/test_doctest.txt @@ -0,0 +1,17 @@ +This is a sample doctest in a text file. + +In this example, we'll rely on a global variable being set for us +already: + + >>> favorite_color + 'blue' + +We can make this fail by disabling the blank-line feature. + + >>> if 1: + ... print('a') + ... print() + ... print('b') + a + + b diff --git a/stdlib/test/test_doctest/test_doctest2.py b/stdlib/test/test_doctest/test_doctest2.py new file mode 100644 index 000000000..ab8a06967 --- /dev/null +++ b/stdlib/test/test_doctest/test_doctest2.py @@ -0,0 +1,126 @@ +"""A module to test whether doctest recognizes some 2.2 features, +like static and class methods. + +>>> print('yup') # 1 +yup + +We include some (random) encoded (utf-8) text in the text surrounding +the example. It should be ignored: + +ЉЊЈЁЂ + +""" + +import sys +import unittest +if sys.flags.optimize >= 2: + raise unittest.SkipTest("Cannot test docstrings with -O2") + +class C(object): + """Class C. + + >>> print(C()) # 2 + 42 + + + We include some (random) encoded (utf-8) text in the text surrounding + the example. It should be ignored: + + ЉЊЈЁЂ + + """ + + def __init__(self): + """C.__init__. + + >>> print(C()) # 3 + 42 + """ + + def __str__(self): + """ + >>> print(C()) # 4 + 42 + """ + return "42" + + class D(object): + """A nested D class. + + >>> print("In D!") # 5 + In D! + """ + + def nested(self): + """ + >>> print(3) # 6 + 3 + """ + + def getx(self): + """ + >>> c = C() # 7 + >>> c.x = 12 # 8 + >>> print(c.x) # 9 + -12 + """ + return -self._x + + def setx(self, value): + """ + >>> c = C() # 10 + >>> c.x = 12 # 11 + >>> print(c.x) # 12 + -12 + """ + self._x = value + + x = property(getx, setx, doc="""\ + >>> c = C() # 13 + >>> c.x = 12 # 14 + >>> print(c.x) # 15 + -12 + """) + + @staticmethod + def statm(): + """ + A static method. + + >>> print(C.statm()) # 16 + 666 + >>> print(C().statm()) # 17 + 666 + """ + return 666 + + @classmethod + def clsm(cls, val): + """ + A class method. + + >>> print(C.clsm(22)) # 18 + 22 + >>> print(C().clsm(23)) # 19 + 23 + """ + return val + + +class Test(unittest.TestCase): + def test_testmod(self): + import doctest, sys + EXPECTED = 19 + f, t = doctest.testmod(sys.modules[__name__]) + if f: + self.fail("%d of %d doctests failed" % (f, t)) + if t != EXPECTED: + self.fail("expected %d tests to run, not %d" % (EXPECTED, t)) + + +# Pollute the namespace with a bunch of imported functions and classes, +# to make sure they don't get tested. +from doctest import * + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_doctest/test_doctest2.txt b/stdlib/test/test_doctest/test_doctest2.txt new file mode 100644 index 000000000..76dab94a9 --- /dev/null +++ b/stdlib/test/test_doctest/test_doctest2.txt @@ -0,0 +1,14 @@ +This is a sample doctest in a text file. + +In this example, we'll rely on some silly setup: + + >>> import test.test_doctest.test_doctest + >>> test.test_doctest.test_doctest.sillySetup + True + +This test also has some (random) encoded (utf-8) unicode text: + + ЉЊЈЁЂ + +This doesn't cause a problem in the tect surrounding the examples, but +we include it here (in this test text file) to make sure. :) diff --git a/stdlib/test/test_doctest/test_doctest3.txt b/stdlib/test/test_doctest/test_doctest3.txt new file mode 100644 index 000000000..dd8557e57 --- /dev/null +++ b/stdlib/test/test_doctest/test_doctest3.txt @@ -0,0 +1,5 @@ + +Here we check that `__file__` is provided: + + >>> type(__file__) + diff --git a/stdlib/test/test_doctest/test_doctest4.txt b/stdlib/test/test_doctest/test_doctest4.txt new file mode 100644 index 000000000..0428e6f96 --- /dev/null +++ b/stdlib/test/test_doctest/test_doctest4.txt @@ -0,0 +1,11 @@ +This is a sample doctest in a text file that contains non-ASCII characters. +This file is encoded using UTF-8. + +In order to get this test to pass, we have to manually specify the +encoding. + + >>> 'föö' + 'f\xf6\xf6' + + >>> 'bąr' + 'b\u0105r' diff --git a/stdlib/test/test_doctest/test_doctest_errors.txt b/stdlib/test/test_doctest/test_doctest_errors.txt new file mode 100644 index 000000000..93c3c106e --- /dev/null +++ b/stdlib/test/test_doctest/test_doctest_errors.txt @@ -0,0 +1,14 @@ +This is a sample doctest in a text file, in which all examples fail +or raise an exception. + + >>> 2 + 2 + 5 + >>> 1/0 + 1 + >>> def f(): + ... 2 + '2' + ... + >>> f() + 1 + >>> 2+*3 + 5 diff --git a/stdlib/test/test_doctest/test_doctest_skip.txt b/stdlib/test/test_doctest/test_doctest_skip.txt new file mode 100644 index 000000000..06c23d06e --- /dev/null +++ b/stdlib/test/test_doctest/test_doctest_skip.txt @@ -0,0 +1,6 @@ +This is a sample doctest in a text file, in which all examples are skipped. + + >>> 2 + 2 # doctest: +SKIP + 5 + >>> 2 + 2 # doctest: +SKIP + 4 diff --git a/stdlib/test/test_doctest/test_doctest_skip2.txt b/stdlib/test/test_doctest/test_doctest_skip2.txt new file mode 100644 index 000000000..85e4938c3 --- /dev/null +++ b/stdlib/test/test_doctest/test_doctest_skip2.txt @@ -0,0 +1,6 @@ +This is a sample doctest in a text file, in which some examples are skipped. + + >>> 2 + 2 # doctest: +SKIP + 5 + >>> 2 + 2 + 4 diff --git a/stdlib/test/test_import/__init__.py b/stdlib/test/test_import/__init__.py new file mode 100644 index 000000000..2e1c6d72f --- /dev/null +++ b/stdlib/test/test_import/__init__.py @@ -0,0 +1,3407 @@ +import builtins +import errno +import glob +import json +import importlib.util +from importlib._bootstrap_external import _get_sourcefile +from importlib.machinery import ( + AppleFrameworkLoader, + BuiltinImporter, + ExtensionFileLoader, + FrozenImporter, + SourceFileLoader, +) +import marshal +import os +import py_compile +import random +import shutil +import stat +import subprocess +import sys +import textwrap +import threading +import time +import types +import unittest +from unittest import mock +import _imp + +from test.support import os_helper +from test.support import ( + STDLIB_DIR, + swap_attr, + swap_item, + cpython_only, + is_apple_mobile, + is_emscripten, + is_wasm32, + run_in_subinterp, + run_in_subinterp_with_config, + Py_TRACE_REFS, + requires_gil_enabled, + Py_GIL_DISABLED, + no_rerun, + force_not_colorized_test_class, + catch_unraisable_exception +) +from test.support.import_helper import ( + forget, make_legacy_pyc, unlink, unload, ready_to_import, + DirsOnSysPath, CleanImport, import_module) +from test.support.os_helper import ( + TESTFN, rmtree, temp_umask, TESTFN_UNENCODABLE) +from test.support import script_helper +from test.support import threading_helper +from test.test_importlib.util import uncache +from types import ModuleType +try: + import _testsinglephase +except ImportError: + _testsinglephase = None +try: + import _testmultiphase +except ImportError: + _testmultiphase = None +try: + import _interpreters +except ModuleNotFoundError: + _interpreters = None +try: + import _testinternalcapi +except ImportError: + _testinternalcapi = None + + +skip_if_dont_write_bytecode = unittest.skipIf( + sys.dont_write_bytecode, + "test meaningful only when writing bytecode") + + +def _require_loader(module, loader, skip): + if isinstance(module, str): + module = __import__(module) + + MODULE_KINDS = { + BuiltinImporter: 'built-in', + ExtensionFileLoader: 'extension', + AppleFrameworkLoader: 'framework extension', + FrozenImporter: 'frozen', + SourceFileLoader: 'pure Python', + } + + expected = loader + assert isinstance(expected, type), expected + expected = MODULE_KINDS[expected] + + actual = module.__spec__.loader + if not isinstance(actual, type): + actual = type(actual) + actual = MODULE_KINDS[actual] + + if actual != expected: + err = f'expected module to be {expected}, got {module.__spec__}' + if skip: + raise unittest.SkipTest(err) + raise Exception(err) + return module + +def require_builtin(module, *, skip=False): + module = _require_loader(module, BuiltinImporter, skip) + assert module.__spec__.origin == 'built-in', module.__spec__ + +def require_extension(module, *, skip=False): + # Apple extensions must be distributed as frameworks. This requires + # a specialist loader. + if is_apple_mobile: + _require_loader(module, AppleFrameworkLoader, skip) + else: + _require_loader(module, ExtensionFileLoader, skip) + +def require_frozen(module, *, skip=True): + module = _require_loader(module, FrozenImporter, skip) + assert module.__spec__.origin == 'frozen', module.__spec__ + +def require_pure_python(module, *, skip=False): + _require_loader(module, SourceFileLoader, skip) + +def create_extension_loader(modname, filename): + # Apple extensions must be distributed as frameworks. This requires + # a specialist loader. + if is_apple_mobile: + return AppleFrameworkLoader(modname, filename) + else: + return ExtensionFileLoader(modname, filename) + +def import_extension_from_file(modname, filename, *, put_in_sys_modules=True): + loader = create_extension_loader(modname, filename) + spec = importlib.util.spec_from_loader(modname, loader) + module = importlib.util.module_from_spec(spec) + loader.exec_module(module) + if put_in_sys_modules: + sys.modules[modname] = module + return module + + +def remove_files(name): + for f in (name + ".py", + name + ".pyc", + name + ".pyw", + name + "$py.class"): + unlink(f) + rmtree('__pycache__') + + +if _testsinglephase is not None: + def restore__testsinglephase(*, _orig=_testsinglephase): + # We started with the module imported and want to restore + # it to its nominal state. + sys.modules.pop('_testsinglephase', None) + _orig._clear_globals() + origin = _orig.__spec__.origin + _testinternalcapi.clear_extension('_testsinglephase', origin) + import _testsinglephase + + +def requires_singlephase_init(meth): + """Decorator to skip if single-phase init modules are not supported.""" + if not isinstance(meth, type): + def meth(self, _meth=meth): + try: + return _meth(self) + finally: + restore__testsinglephase() + meth = cpython_only(meth) + msg = "gh-117694: free-threaded build does not currently support single-phase init modules in sub-interpreters" + meth = requires_gil_enabled(msg)(meth) + return unittest.skipIf(_testsinglephase is None, + 'test requires _testsinglephase module')(meth) + + +def requires_subinterpreters(meth): + """Decorator to skip a test if subinterpreters are not supported.""" + return unittest.skipIf(_interpreters is None, + 'subinterpreters required')(meth) + + +class ModuleSnapshot(types.SimpleNamespace): + """A representation of a module for testing. + + Fields: + + * id - the module's object ID + * module - the actual module or an adequate substitute + * __file__ + * __spec__ + * name + * origin + * ns - a copy (dict) of the module's __dict__ (or None) + * ns_id - the object ID of the module's __dict__ + * cached - the sys.modules[mod.__spec__.name] entry (or None) + * cached_id - the object ID of the sys.modules entry (or None) + + In cases where the value is not available (e.g. due to serialization), + the value will be None. + """ + _fields = tuple('id module ns ns_id cached cached_id'.split()) + + @classmethod + def from_module(cls, mod): + name = mod.__spec__.name + cached = sys.modules.get(name) + return cls( + id=id(mod), + module=mod, + ns=types.SimpleNamespace(**mod.__dict__), + ns_id=id(mod.__dict__), + cached=cached, + cached_id=id(cached), + ) + + SCRIPT = textwrap.dedent(''' + {imports} + + name = {name!r} + + {prescript} + + mod = {name} + + {body} + + {postscript} + ''') + IMPORTS = textwrap.dedent(''' + import sys + ''').strip() + SCRIPT_BODY = textwrap.dedent(''' + # Capture the snapshot data. + cached = sys.modules.get(name) + snapshot = dict( + id=id(mod), + module=dict( + __file__=mod.__file__, + __spec__=dict( + name=mod.__spec__.name, + origin=mod.__spec__.origin, + ), + ), + ns=None, + ns_id=id(mod.__dict__), + cached=None, + cached_id=id(cached) if cached else None, + ) + ''').strip() + CLEANUP_SCRIPT = textwrap.dedent(''' + # Clean up the module. + sys.modules.pop(name, None) + ''').strip() + + @classmethod + def build_script(cls, name, *, + prescript=None, + import_first=False, + postscript=None, + postcleanup=False, + ): + if postcleanup is True: + postcleanup = cls.CLEANUP_SCRIPT + elif isinstance(postcleanup, str): + postcleanup = textwrap.dedent(postcleanup).strip() + postcleanup = cls.CLEANUP_SCRIPT + os.linesep + postcleanup + else: + postcleanup = '' + prescript = textwrap.dedent(prescript).strip() if prescript else '' + postscript = textwrap.dedent(postscript).strip() if postscript else '' + + if postcleanup: + if postscript: + postscript = postscript + os.linesep * 2 + postcleanup + else: + postscript = postcleanup + + if import_first: + prescript += textwrap.dedent(f''' + + # Now import the module. + assert name not in sys.modules + import {name}''') + + return cls.SCRIPT.format( + imports=cls.IMPORTS.strip(), + name=name, + prescript=prescript.strip(), + body=cls.SCRIPT_BODY.strip(), + postscript=postscript, + ) + + @classmethod + def parse(cls, text): + raw = json.loads(text) + mod = raw['module'] + mod['__spec__'] = types.SimpleNamespace(**mod['__spec__']) + raw['module'] = types.SimpleNamespace(**mod) + return cls(**raw) + + @classmethod + def from_subinterp(cls, name, interpid=None, *, pipe=None, **script_kwds): + if pipe is not None: + return cls._from_subinterp(name, interpid, pipe, script_kwds) + pipe = os.pipe() + try: + return cls._from_subinterp(name, interpid, pipe, script_kwds) + finally: + r, w = pipe + os.close(r) + os.close(w) + + @classmethod + def _from_subinterp(cls, name, interpid, pipe, script_kwargs): + r, w = pipe + + # Build the script. + postscript = textwrap.dedent(f''' + # Send the result over the pipe. + import json + import os + os.write({w}, json.dumps(snapshot).encode()) + + ''') + _postscript = script_kwargs.get('postscript') + if _postscript: + _postscript = textwrap.dedent(_postscript).lstrip() + postscript += _postscript + script_kwargs['postscript'] = postscript.strip() + script = cls.build_script(name, **script_kwargs) + + # Run the script. + if interpid is None: + ret = run_in_subinterp(script) + if ret != 0: + raise AssertionError(f'{ret} != 0') + else: + _interpreters.run_string(interpid, script) + + # Parse the results. + text = os.read(r, 1000) + return cls.parse(text.decode()) + + +@force_not_colorized_test_class +class ImportTests(unittest.TestCase): + + def setUp(self): + remove_files(TESTFN) + importlib.invalidate_caches() + + def tearDown(self): + unload(TESTFN) + + def test_import_raises_ModuleNotFoundError(self): + with self.assertRaises(ModuleNotFoundError): + import something_that_should_not_exist_anywhere + + def test_from_import_missing_module_raises_ModuleNotFoundError(self): + with self.assertRaises(ModuleNotFoundError): + from something_that_should_not_exist_anywhere import blah + + def test_from_import_missing_attr_raises_ImportError(self): + with self.assertRaises(ImportError): + from importlib import something_that_should_not_exist_anywhere + + def test_from_import_missing_attr_has_name_and_path(self): + with CleanImport('os'): + import os + with self.assertRaises(ImportError) as cm: + from os import i_dont_exist + self.assertEqual(cm.exception.name, 'os') + self.assertEqual(cm.exception.path, os.__file__) + self.assertRegex(str(cm.exception), r"cannot import name 'i_dont_exist' from 'os' \(.*os.py\)") + + @cpython_only + def test_from_import_missing_attr_has_name_and_so_path(self): + _testcapi = import_module("_testcapi") + with self.assertRaises(ImportError) as cm: + from _testcapi import i_dont_exist + self.assertEqual(cm.exception.name, '_testcapi') + if hasattr(_testcapi, "__file__"): + # The path on the exception is strictly the spec origin, not the + # module's __file__. For most cases, these are the same; but on + # iOS, the Framework relocation process results in the exception + # being raised from the spec location. + self.assertEqual(cm.exception.path, _testcapi.__spec__.origin) + self.assertRegex( + str(cm.exception), + r"cannot import name 'i_dont_exist' from '_testcapi' \(.*(\.(so|pyd))?\)" + ) + else: + self.assertEqual( + str(cm.exception), + "cannot import name 'i_dont_exist' from '_testcapi' (unknown location)" + ) + + def test_from_import_missing_attr_has_name(self): + with self.assertRaises(ImportError) as cm: + # _warning has no path as it's a built-in module. + from _warning import i_dont_exist + self.assertEqual(cm.exception.name, '_warning') + self.assertIsNone(cm.exception.path) + + def test_from_import_missing_attr_path_is_canonical(self): + with self.assertRaises(ImportError) as cm: + from os.path import i_dont_exist + self.assertIn(cm.exception.name, {'posixpath', 'ntpath'}) + self.assertIsNotNone(cm.exception) + + def test_from_import_star_invalid_type(self): + import re + with ready_to_import() as (name, path): + with open(path, 'w', encoding='utf-8') as f: + f.write("__all__ = [b'invalid_type']") + globals = {} + with self.assertRaisesRegex( + TypeError, f"{re.escape(name)}\\.__all__ must be str" + ): + exec(f"from {name} import *", globals) + self.assertNotIn(b"invalid_type", globals) + with ready_to_import() as (name, path): + with open(path, 'w', encoding='utf-8') as f: + f.write("globals()[b'invalid_type'] = object()") + globals = {} + with self.assertRaisesRegex( + TypeError, f"{re.escape(name)}\\.__dict__ must be str" + ): + exec(f"from {name} import *", globals) + self.assertNotIn(b"invalid_type", globals) + + def test_case_sensitivity(self): + # Brief digression to test that import is case-sensitive: if we got + # this far, we know for sure that "random" exists. + with self.assertRaises(ImportError): + import RAnDoM + + def test_double_const(self): + # Importing double_const checks that float constants + # serialized by marshal as PYC files don't lose precision + # (SF bug 422177). + from test.test_import.data import double_const + unload('test.test_import.data.double_const') + from test.test_import.data import double_const # noqa: F811 + + def test_import(self): + def test_with_extension(ext): + # The extension is normally ".py", perhaps ".pyw". + source = TESTFN + ext + pyc = TESTFN + ".pyc" + + with open(source, "w", encoding='utf-8') as f: + print("# This tests Python's ability to import a", + ext, "file.", file=f) + a = random.randrange(1000) + b = random.randrange(1000) + print("a =", a, file=f) + print("b =", b, file=f) + + if TESTFN in sys.modules: + del sys.modules[TESTFN] + importlib.invalidate_caches() + try: + try: + mod = __import__(TESTFN) + except ImportError as err: + self.fail("import from %s failed: %s" % (ext, err)) + + self.assertEqual(mod.a, a, + "module loaded (%s) but contents invalid" % mod) + self.assertEqual(mod.b, b, + "module loaded (%s) but contents invalid" % mod) + finally: + forget(TESTFN) + unlink(source) + unlink(pyc) + + sys.path.insert(0, os.curdir) + try: + test_with_extension(".py") + if sys.platform.startswith("win"): + for ext in [".PY", ".Py", ".pY", ".pyw", ".PYW", ".pYw"]: + test_with_extension(ext) + finally: + del sys.path[0] + + def test_module_with_large_stack(self, module='longlist'): + # Regression test for http://bugs.python.org/issue561858. + filename = module + '.py' + + # Create a file with a list of 65000 elements. + with open(filename, 'w', encoding='utf-8') as f: + f.write('d = [\n') + for i in range(65000): + f.write('"",\n') + f.write(']') + + try: + # Compile & remove .py file; we only need .pyc. + # Bytecode must be relocated from the PEP 3147 bytecode-only location. + py_compile.compile(filename) + finally: + unlink(filename) + + # Need to be able to load from current dir. + sys.path.append('') + importlib.invalidate_caches() + + namespace = {} + try: + make_legacy_pyc(filename) + # This used to crash. + exec('import ' + module, None, namespace) + finally: + # Cleanup. + del sys.path[-1] + unlink(filename + 'c') + unlink(filename + 'o') + + # Remove references to the module (unload the module) + namespace.clear() + try: + del sys.modules[module] + except KeyError: + pass + + def test_failing_import_sticks(self): + source = TESTFN + ".py" + with open(source, "w", encoding='utf-8') as f: + print("a = 1/0", file=f) + + # New in 2.4, we shouldn't be able to import that no matter how often + # we try. + sys.path.insert(0, os.curdir) + importlib.invalidate_caches() + if TESTFN in sys.modules: + del sys.modules[TESTFN] + try: + for i in [1, 2, 3]: + self.assertRaises(ZeroDivisionError, __import__, TESTFN) + self.assertNotIn(TESTFN, sys.modules, + "damaged module in sys.modules on %i try" % i) + finally: + del sys.path[0] + remove_files(TESTFN) + + def test_import_name_binding(self): + # import x.y.z binds x in the current namespace + import test as x + import test.support + self.assertIs(x, test, x.__name__) + self.assertHasAttr(test.support, "__file__") + + # import x.y.z as w binds z as w + import test.support as y + self.assertIs(y, test.support, y.__name__) + + def test_issue31286(self): + # import in a 'finally' block resulted in SystemError + try: + x = ... + finally: + import test.support.script_helper as x + + # import in a 'while' loop resulted in stack overflow + i = 0 + while i < 10: + import test.support.script_helper as x + i += 1 + + # import in a 'for' loop resulted in segmentation fault + for i in range(2): + import test.support.script_helper as x # noqa: F811 + + def test_failing_reload(self): + # A failing reload should leave the module object in sys.modules. + source = TESTFN + os.extsep + "py" + with open(source, "w", encoding='utf-8') as f: + f.write("a = 1\nb=2\n") + + sys.path.insert(0, os.curdir) + try: + mod = __import__(TESTFN) + self.assertIn(TESTFN, sys.modules) + self.assertEqual(mod.a, 1, "module has wrong attribute values") + self.assertEqual(mod.b, 2, "module has wrong attribute values") + + # On WinXP, just replacing the .py file wasn't enough to + # convince reload() to reparse it. Maybe the timestamp didn't + # move enough. We force it to get reparsed by removing the + # compiled file too. + remove_files(TESTFN) + + # Now damage the module. + with open(source, "w", encoding='utf-8') as f: + f.write("a = 10\nb=20//0\n") + + self.assertRaises(ZeroDivisionError, importlib.reload, mod) + # But we still expect the module to be in sys.modules. + mod = sys.modules.get(TESTFN) + self.assertIsNotNone(mod, "expected module to be in sys.modules") + + # We should have replaced a w/ 10, but the old b value should + # stick. + self.assertEqual(mod.a, 10, "module has wrong attribute values") + self.assertEqual(mod.b, 2, "module has wrong attribute values") + + finally: + del sys.path[0] + remove_files(TESTFN) + unload(TESTFN) + + @skip_if_dont_write_bytecode + def test_file_to_source(self): + # check if __file__ points to the source file where available + source = TESTFN + ".py" + with open(source, "w", encoding='utf-8') as f: + f.write("test = None\n") + + sys.path.insert(0, os.curdir) + try: + mod = __import__(TESTFN) + self.assertEndsWith(mod.__file__, '.py') + os.remove(source) + del sys.modules[TESTFN] + make_legacy_pyc(source) + importlib.invalidate_caches() + mod = __import__(TESTFN) + base, ext = os.path.splitext(mod.__file__) + self.assertEqual(ext, '.pyc') + finally: + del sys.path[0] + remove_files(TESTFN) + if TESTFN in sys.modules: + del sys.modules[TESTFN] + + def test_import_by_filename(self): + path = os.path.abspath(TESTFN) + encoding = sys.getfilesystemencoding() + try: + path.encode(encoding) + except UnicodeEncodeError: + self.skipTest('path is not encodable to {}'.format(encoding)) + with self.assertRaises(ImportError) as c: + __import__(path) + + def test_import_in_del_does_not_crash(self): + # Issue 4236 + testfn = script_helper.make_script('', TESTFN, textwrap.dedent("""\ + import sys + class C: + def __del__(self): + import importlib + sys.argv.insert(0, C()) + """)) + script_helper.assert_python_ok(testfn) + + @skip_if_dont_write_bytecode + def test_timestamp_overflow(self): + # A modification timestamp larger than 2**32 should not be a problem + # when importing a module (issue #11235). + sys.path.insert(0, os.curdir) + try: + source = TESTFN + ".py" + compiled = importlib.util.cache_from_source(source) + with open(source, 'w', encoding='utf-8') as f: + pass + try: + os.utime(source, (2 ** 33 - 5, 2 ** 33 - 5)) + except OverflowError: + self.skipTest("cannot set modification time to large integer") + except OSError as e: + if e.errno not in (getattr(errno, 'EOVERFLOW', None), + getattr(errno, 'EINVAL', None)): + raise + self.skipTest("cannot set modification time to large integer ({})".format(e)) + __import__(TESTFN) + # The pyc file was created. + os.stat(compiled) + finally: + del sys.path[0] + remove_files(TESTFN) + + def test_bogus_fromlist(self): + try: + __import__('http', fromlist=['blah']) + except ImportError: + self.fail("fromlist must allow bogus names") + + @cpython_only + def test_delete_builtins_import(self): + args = ["-c", "del __builtins__.__import__; import os"] + popen = script_helper.spawn_python(*args) + stdout, stderr = popen.communicate() + self.assertIn(b"ImportError", stdout) + + def test_from_import_message_for_nonexistent_module(self): + with self.assertRaisesRegex(ImportError, "^No module named 'bogus'"): + from bogus import foo + + def test_from_import_message_for_existing_module(self): + with self.assertRaisesRegex(ImportError, "^cannot import name 'bogus'"): + from re import bogus + + def test_from_import_AttributeError(self): + # Issue #24492: trying to import an attribute that raises an + # AttributeError should lead to an ImportError. + class AlwaysAttributeError: + def __getattr__(self, _): + raise AttributeError + + module_name = 'test_from_import_AttributeError' + self.addCleanup(unload, module_name) + sys.modules[module_name] = AlwaysAttributeError() + with self.assertRaises(ImportError) as cm: + from test_from_import_AttributeError import does_not_exist + + self.assertEqual(str(cm.exception), + "cannot import name 'does_not_exist' from '' (unknown location)") + + @cpython_only + def test_issue31492(self): + # There shouldn't be an assertion failure in case of failing to import + # from a module with a bad __name__ attribute, or in case of failing + # to access an attribute of such a module. + with swap_attr(os, '__name__', None): + with self.assertRaises(ImportError): + from os import does_not_exist + + with self.assertRaises(AttributeError): + os.does_not_exist + + @threading_helper.requires_working_threading() + def test_concurrency(self): + # bpo 38091: this is a hack to slow down the code that calls + # has_deadlock(); the logic was itself sometimes deadlocking. + def delay_has_deadlock(frame, event, arg): + if event == 'call' and frame.f_code.co_name == 'has_deadlock': + time.sleep(0.1) + + sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'data')) + try: + exc = None + def run(): + sys.settrace(delay_has_deadlock) + event.wait() + try: + import package + except BaseException as e: + nonlocal exc + exc = e + sys.settrace(None) + + for i in range(10): + event = threading.Event() + threads = [threading.Thread(target=run) for x in range(2)] + try: + with threading_helper.start_threads(threads, event.set): + time.sleep(0) + finally: + sys.modules.pop('package', None) + sys.modules.pop('package.submodule', None) + if exc is not None: + raise exc + finally: + del sys.path[0] + + @unittest.skipUnless(sys.platform == "win32", "Windows-specific") + def test_dll_dependency_import(self): + from _winapi import GetModuleFileName + dllname = GetModuleFileName(sys.dllhandle) + pydname = importlib.util.find_spec("_sqlite3").origin + depname = os.path.join( + os.path.dirname(pydname), + "sqlite3{}.dll".format("_d" if "_d" in pydname else "")) + + with os_helper.temp_dir() as tmp: + tmp2 = os.path.join(tmp, "DLLs") + os.mkdir(tmp2) + + pyexe = os.path.join(tmp, os.path.basename(sys.executable)) + shutil.copy(sys.executable, pyexe) + shutil.copy(dllname, tmp) + for f in glob.glob(os.path.join(glob.escape(sys.prefix), "vcruntime*.dll")): + shutil.copy(f, tmp) + + shutil.copy(pydname, tmp2) + + env = None + env = {k.upper(): os.environ[k] for k in os.environ} + env["PYTHONPATH"] = tmp2 + ";" + STDLIB_DIR + + # Test 1: import with added DLL directory + subprocess.check_call([ + pyexe, "-Sc", ";".join([ + "import os", + "p = os.add_dll_directory({!r})".format( + os.path.dirname(depname)), + "import _sqlite3", + "p.close" + ])], + stderr=subprocess.STDOUT, + env=env, + cwd=os.path.dirname(pyexe)) + + # Test 2: import with DLL adjacent to PYD + shutil.copy(depname, tmp2) + subprocess.check_call([pyexe, "-Sc", "import _sqlite3"], + stderr=subprocess.STDOUT, + env=env, + cwd=os.path.dirname(pyexe)) + + def test_issue105979(self): + # this used to crash + with self.assertRaises(ImportError) as cm: + _imp.get_frozen_object("x", b"6\'\xd5Cu\x12") + self.assertIn("Frozen object named 'x' is invalid", + str(cm.exception)) + + def test_frozen_module_from_import_error(self): + with self.assertRaises(ImportError) as cm: + from os import this_will_never_exist + self.assertIn( + f"cannot import name 'this_will_never_exist' from 'os' ({os.__file__})", + str(cm.exception), + ) + with self.assertRaises(ImportError) as cm: + from sys import this_will_never_exist + self.assertIn( + "cannot import name 'this_will_never_exist' from 'sys' (unknown location)", + str(cm.exception), + ) + + scripts = [ + """ +import os +os.__spec__.has_location = False +os.__file__ = [] +from os import this_will_never_exist +""", + """ +import os +os.__spec__.has_location = False +del os.__file__ +from os import this_will_never_exist +""", + """ +import os +os.__spec__.origin = [] +os.__file__ = [] +from os import this_will_never_exist +""" + ] + for script in scripts: + with self.subTest(script=script): + expected_error = ( + b"cannot import name 'this_will_never_exist' " + b"from 'os' (unknown location)" + ) + popen = script_helper.spawn_python("-c", script) + stdout, stderr = popen.communicate() + self.assertIn(expected_error, stdout) + + def test_non_module_from_import_error(self): + prefix = """ +import sys +class NotAModule: ... +nm = NotAModule() +nm.symbol = 123 +sys.modules["not_a_module"] = nm +from not_a_module import symbol +""" + scripts = [ + prefix + "from not_a_module import missing_symbol", + prefix + "nm.__spec__ = []\nfrom not_a_module import missing_symbol", + ] + for script in scripts: + with self.subTest(script=script): + expected_error = ( + b"ImportError: cannot import name 'missing_symbol' from " + b"'' (unknown location)" + ) + popen = script_helper.spawn_python("-c", script) + stdout, stderr = popen.communicate() + self.assertIn(expected_error, stdout) + + def test_script_shadowing_stdlib(self): + script_errors = [ + ( + "import fractions\nfractions.Fraction", + rb"AttributeError: module 'fractions' has no attribute 'Fraction'" + ), + ( + "from fractions import Fraction", + rb"ImportError: cannot import name 'Fraction' from 'fractions'" + ) + ] + for script, error in script_errors: + with self.subTest(script=script), os_helper.temp_dir() as tmp: + with open(os.path.join(tmp, "fractions.py"), "w", encoding='utf-8') as f: + f.write(script) + + expected_error = error + ( + rb" \(consider renaming '.*fractions.py' since it has the " + rb"same name as the standard library module named 'fractions' " + rb"and prevents importing that standard library module\)" + ) + + popen = script_helper.spawn_python(os.path.join(tmp, "fractions.py"), cwd=tmp) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, expected_error) + + popen = script_helper.spawn_python('-m', 'fractions', cwd=tmp) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, expected_error) + + popen = script_helper.spawn_python('-c', 'import fractions', cwd=tmp) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, expected_error) + + # and there's no error at all when using -P + popen = script_helper.spawn_python('-P', 'fractions.py', cwd=tmp) + stdout, stderr = popen.communicate() + self.assertEqual(stdout, b'') + + tmp_child = os.path.join(tmp, "child") + os.mkdir(tmp_child) + + # test the logic with different cwd + popen = script_helper.spawn_python(os.path.join(tmp, "fractions.py"), cwd=tmp_child) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, expected_error) + + popen = script_helper.spawn_python('-m', 'fractions', cwd=tmp_child) + stdout, stderr = popen.communicate() + self.assertEqual(stdout, b'') # no error + + popen = script_helper.spawn_python('-c', 'import fractions', cwd=tmp_child) + stdout, stderr = popen.communicate() + self.assertEqual(stdout, b'') # no error + + def test_package_shadowing_stdlib_module(self): + script_errors = [ + ( + "fractions.Fraction", + rb"AttributeError: module 'fractions' has no attribute 'Fraction'" + ), + ( + "from fractions import Fraction", + rb"ImportError: cannot import name 'Fraction' from 'fractions'" + ) + ] + for script, error in script_errors: + with self.subTest(script=script), os_helper.temp_dir() as tmp: + os.mkdir(os.path.join(tmp, "fractions")) + with open( + os.path.join(tmp, "fractions", "__init__.py"), "w", encoding='utf-8' + ) as f: + f.write("shadowing_module = True") + with open(os.path.join(tmp, "main.py"), "w", encoding='utf-8') as f: + f.write("import fractions; fractions.shadowing_module\n") + f.write(script) + + expected_error = error + ( + rb" \(consider renaming '.*[\\/]fractions[\\/]+__init__.py' since it has the " + rb"same name as the standard library module named 'fractions' " + rb"and prevents importing that standard library module\)" + ) + + popen = script_helper.spawn_python(os.path.join(tmp, "main.py"), cwd=tmp) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, expected_error) + + popen = script_helper.spawn_python('-m', 'main', cwd=tmp) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, expected_error) + + # and there's no shadowing at all when using -P + popen = script_helper.spawn_python('-P', 'main.py', cwd=tmp) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, b"module 'fractions' has no attribute 'shadowing_module'") + + def test_script_shadowing_third_party(self): + script_errors = [ + ( + "import numpy\nnumpy.array", + rb"AttributeError: module 'numpy' has no attribute 'array'" + ), + ( + "from numpy import array", + rb"ImportError: cannot import name 'array' from 'numpy'" + ) + ] + for script, error in script_errors: + with self.subTest(script=script), os_helper.temp_dir() as tmp: + with open(os.path.join(tmp, "numpy.py"), "w", encoding='utf-8') as f: + f.write(script) + + expected_error = error + ( + rb" \(consider renaming '.*numpy.py' if it has the " + rb"same name as a library you intended to import\)\s+\z" + ) + + popen = script_helper.spawn_python(os.path.join(tmp, "numpy.py")) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, expected_error) + + popen = script_helper.spawn_python('-m', 'numpy', cwd=tmp) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, expected_error) + + popen = script_helper.spawn_python('-c', 'import numpy', cwd=tmp) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, expected_error) + + def test_script_maybe_not_shadowing_third_party(self): + with os_helper.temp_dir() as tmp: + with open(os.path.join(tmp, "numpy.py"), "w", encoding='utf-8') as f: + f.write("this_script_does_not_attempt_to_import_numpy = True") + + expected_error = ( + rb"AttributeError: module 'numpy' has no attribute 'attr'\s+\z" + ) + popen = script_helper.spawn_python('-c', 'import numpy; numpy.attr', cwd=tmp) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, expected_error) + + expected_error = ( + rb"ImportError: cannot import name 'attr' from 'numpy' \(.*\)\s+\z" + ) + popen = script_helper.spawn_python('-c', 'from numpy import attr', cwd=tmp) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, expected_error) + + def test_script_shadowing_stdlib_edge_cases(self): + with os_helper.temp_dir() as tmp: + with open(os.path.join(tmp, "fractions.py"), "w", encoding='utf-8') as f: + f.write("shadowing_module = True") + + # Unhashable str subclass + with open(os.path.join(tmp, "main.py"), "w", encoding='utf-8') as f: + f.write(""" +import fractions +fractions.shadowing_module +class substr(str): + __hash__ = None +fractions.__name__ = substr('fractions') +try: + fractions.Fraction +except TypeError as e: + print(str(e)) +""") + popen = script_helper.spawn_python("main.py", cwd=tmp) + stdout, stderr = popen.communicate() + self.assertIn(b"unhashable type: 'substr'", stdout.rstrip()) + + with open(os.path.join(tmp, "main.py"), "w", encoding='utf-8') as f: + f.write(""" +import fractions +fractions.shadowing_module +class substr(str): + __hash__ = None +fractions.__name__ = substr('fractions') +try: + from fractions import Fraction +except TypeError as e: + print(str(e)) +""") + + popen = script_helper.spawn_python("main.py", cwd=tmp) + stdout, stderr = popen.communicate() + self.assertIn(b"unhashable type: 'substr'", stdout.rstrip()) + + # Various issues with sys module + with open(os.path.join(tmp, "main.py"), "w", encoding='utf-8') as f: + f.write(""" +import fractions +fractions.shadowing_module + +import sys +sys.stdlib_module_names = None +try: + fractions.Fraction +except AttributeError as e: + print(str(e)) + +del sys.stdlib_module_names +try: + fractions.Fraction +except AttributeError as e: + print(str(e)) + +sys.path = [0] +try: + fractions.Fraction +except AttributeError as e: + print(str(e)) +""") + popen = script_helper.spawn_python("main.py", cwd=tmp) + stdout, stderr = popen.communicate() + lines = stdout.splitlines() + self.assertEqual(len(lines), 3) + for line in lines: + self.assertEqual(line, b"module 'fractions' has no attribute 'Fraction'") + + with open(os.path.join(tmp, "main.py"), "w", encoding='utf-8') as f: + f.write(""" +import fractions +fractions.shadowing_module + +import sys +sys.stdlib_module_names = None +try: + from fractions import Fraction +except ImportError as e: + print(str(e)) + +del sys.stdlib_module_names +try: + from fractions import Fraction +except ImportError as e: + print(str(e)) + +sys.path = [0] +try: + from fractions import Fraction +except ImportError as e: + print(str(e)) +""") + popen = script_helper.spawn_python("main.py", cwd=tmp) + stdout, stderr = popen.communicate() + lines = stdout.splitlines() + self.assertEqual(len(lines), 3) + for line in lines: + self.assertRegex(line, rb"cannot import name 'Fraction' from 'fractions' \(.*\)") + + # Various issues with origin + with open(os.path.join(tmp, "main.py"), "w", encoding='utf-8') as f: + f.write(""" +import fractions +fractions.shadowing_module +del fractions.__spec__.origin +try: + fractions.Fraction +except AttributeError as e: + print(str(e)) + +fractions.__spec__.origin = [] +try: + fractions.Fraction +except AttributeError as e: + print(str(e)) +""") + + popen = script_helper.spawn_python("main.py", cwd=tmp) + stdout, stderr = popen.communicate() + lines = stdout.splitlines() + self.assertEqual(len(lines), 2) + for line in lines: + self.assertEqual(line, b"module 'fractions' has no attribute 'Fraction'") + + with open(os.path.join(tmp, "main.py"), "w", encoding='utf-8') as f: + f.write(""" +import fractions +fractions.shadowing_module +del fractions.__spec__.origin +try: + from fractions import Fraction +except ImportError as e: + print(str(e)) + +fractions.__spec__.origin = [] +try: + from fractions import Fraction +except ImportError as e: + print(str(e)) +""") + popen = script_helper.spawn_python("main.py", cwd=tmp) + stdout, stderr = popen.communicate() + lines = stdout.splitlines() + self.assertEqual(len(lines), 2) + for line in lines: + self.assertRegex(line, rb"cannot import name 'Fraction' from 'fractions' \(.*\)") + + @unittest.skipIf(sys.platform == 'win32', 'Cannot delete cwd on Windows') + @unittest.skipIf(sys.platform == 'sunos5', 'Cannot delete cwd on Solaris/Illumos') + @unittest.skipIf(sys.platform.startswith('aix'), 'Cannot delete cwd on AIX') + def test_script_shadowing_stdlib_cwd_failure(self): + with os_helper.temp_dir() as tmp: + subtmp = os.path.join(tmp, "subtmp") + os.mkdir(subtmp) + with open(os.path.join(subtmp, "main.py"), "w", encoding='utf-8') as f: + f.write(f""" +import sys +assert sys.path[0] == '' + +import os +import shutil +shutil.rmtree(os.getcwd()) + +os.does_not_exist +""") + # Use -c to ensure sys.path[0] is "" + popen = script_helper.spawn_python("-c", "import main", cwd=subtmp) + stdout, stderr = popen.communicate() + expected_error = rb"AttributeError: module 'os' has no attribute 'does_not_exist'" + self.assertRegex(stdout, expected_error) + + def test_script_shadowing_stdlib_sys_path_modification(self): + script_errors = [ + ( + "import fractions\nfractions.Fraction", + rb"AttributeError: module 'fractions' has no attribute 'Fraction'" + ), + ( + "from fractions import Fraction", + rb"ImportError: cannot import name 'Fraction' from 'fractions'" + ) + ] + for script, error in script_errors: + with self.subTest(script=script), os_helper.temp_dir() as tmp: + with open(os.path.join(tmp, "fractions.py"), "w", encoding='utf-8') as f: + f.write("shadowing_module = True") + with open(os.path.join(tmp, "main.py"), "w", encoding='utf-8') as f: + f.write('import sys; sys.path.insert(0, "this_folder_does_not_exist")\n') + f.write(script) + expected_error = error + ( + rb" \(consider renaming '.*fractions.py' since it has the " + rb"same name as the standard library module named 'fractions' " + rb"and prevents importing that standard library module\)" + ) + + popen = script_helper.spawn_python("main.py", cwd=tmp) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, expected_error) + + def test_create_dynamic_null(self): + with self.assertRaisesRegex(ValueError, 'embedded null character'): + class Spec: + name = "a\x00b" + origin = "abc" + _imp.create_dynamic(Spec()) + + with self.assertRaisesRegex(ValueError, 'embedded null character'): + class Spec2: + name = "abc" + origin = "a\x00b" + _imp.create_dynamic(Spec2()) + + +@skip_if_dont_write_bytecode +class FilePermissionTests(unittest.TestCase): + # tests for file mode on cached .pyc files + + @unittest.skipUnless(os.name == 'posix', + "test meaningful only on posix systems") + @unittest.skipIf( + is_wasm32, + "Emscripten's/WASI's umask is a stub." + ) + def test_creation_mode(self): + mask = 0o022 + with temp_umask(mask), ready_to_import() as (name, path): + cached_path = importlib.util.cache_from_source(path) + module = __import__(name) + if not os.path.exists(cached_path): + self.fail("__import__ did not result in creation of " + "a .pyc file") + stat_info = os.stat(cached_path) + + # Check that the umask is respected, and the executable bits + # aren't set. + self.assertEqual(oct(stat.S_IMODE(stat_info.st_mode)), + oct(0o666 & ~mask)) + + @unittest.skipUnless(os.name == 'posix', + "test meaningful only on posix systems") + @os_helper.skip_unless_working_chmod + def test_cached_mode_issue_2051(self): + # permissions of .pyc should match those of .py, regardless of mask + mode = 0o600 + with temp_umask(0o022), ready_to_import() as (name, path): + cached_path = importlib.util.cache_from_source(path) + os.chmod(path, mode) + __import__(name) + if not os.path.exists(cached_path): + self.fail("__import__ did not result in creation of " + "a .pyc file") + stat_info = os.stat(cached_path) + + self.assertEqual(oct(stat.S_IMODE(stat_info.st_mode)), oct(mode)) + + @unittest.skipUnless(os.name == 'posix', + "test meaningful only on posix systems") + @os_helper.skip_unless_working_chmod + def test_cached_readonly(self): + mode = 0o400 + with temp_umask(0o022), ready_to_import() as (name, path): + cached_path = importlib.util.cache_from_source(path) + os.chmod(path, mode) + __import__(name) + if not os.path.exists(cached_path): + self.fail("__import__ did not result in creation of " + "a .pyc file") + stat_info = os.stat(cached_path) + + expected = mode | 0o200 # Account for fix for issue #6074 + self.assertEqual(oct(stat.S_IMODE(stat_info.st_mode)), oct(expected)) + + def test_pyc_always_writable(self): + # Initially read-only .pyc files on Windows used to cause problems + # with later updates, see issue #6074 for details + with ready_to_import() as (name, path): + # Write a Python file, make it read-only and import it + with open(path, 'w', encoding='utf-8') as f: + f.write("x = 'original'\n") + # Tweak the mtime of the source to ensure pyc gets updated later + s = os.stat(path) + os.utime(path, (s.st_atime, s.st_mtime-100000000)) + os.chmod(path, 0o400) + m = __import__(name) + self.assertEqual(m.x, 'original') + # Change the file and then reimport it + os.chmod(path, 0o600) + with open(path, 'w', encoding='utf-8') as f: + f.write("x = 'rewritten'\n") + unload(name) + importlib.invalidate_caches() + m = __import__(name) + self.assertEqual(m.x, 'rewritten') + # Now delete the source file and check the pyc was rewritten + unlink(path) + unload(name) + importlib.invalidate_caches() + bytecode_only = path + "c" + os.rename(importlib.util.cache_from_source(path), bytecode_only) + m = __import__(name) + self.assertEqual(m.x, 'rewritten') + + +class PycRewritingTests(unittest.TestCase): + # Test that the `co_filename` attribute on code objects always points + # to the right file, even when various things happen (e.g. both the .py + # and the .pyc file are renamed). + + module_name = "unlikely_module_name" + module_source = """ +import sys +code_filename = sys._getframe().f_code.co_filename +module_filename = __file__ +constant = 1000 +def func(): + pass +func_filename = func.__code__.co_filename +""" + dir_name = os.path.abspath(TESTFN) + file_name = os.path.join(dir_name, module_name) + os.extsep + "py" + compiled_name = importlib.util.cache_from_source(file_name) + + def setUp(self): + self.sys_path = sys.path[:] + self.orig_module = sys.modules.pop(self.module_name, None) + os.mkdir(self.dir_name) + with open(self.file_name, "w", encoding='utf-8') as f: + f.write(self.module_source) + sys.path.insert(0, self.dir_name) + importlib.invalidate_caches() + + def tearDown(self): + sys.path[:] = self.sys_path + if self.orig_module is not None: + sys.modules[self.module_name] = self.orig_module + else: + unload(self.module_name) + unlink(self.file_name) + unlink(self.compiled_name) + rmtree(self.dir_name) + + def import_module(self): + ns = globals() + __import__(self.module_name, ns, ns) + return sys.modules[self.module_name] + + def test_basics(self): + mod = self.import_module() + self.assertEqual(mod.module_filename, self.file_name) + self.assertEqual(mod.code_filename, self.file_name) + self.assertEqual(mod.func_filename, self.file_name) + del sys.modules[self.module_name] + mod = self.import_module() + self.assertEqual(mod.module_filename, self.file_name) + self.assertEqual(mod.code_filename, self.file_name) + self.assertEqual(mod.func_filename, self.file_name) + + def test_incorrect_code_name(self): + py_compile.compile(self.file_name, dfile="another_module.py") + mod = self.import_module() + self.assertEqual(mod.module_filename, self.file_name) + self.assertEqual(mod.code_filename, self.file_name) + self.assertEqual(mod.func_filename, self.file_name) + + def test_module_without_source(self): + target = "another_module.py" + py_compile.compile(self.file_name, dfile=target) + os.remove(self.file_name) + pyc_file = make_legacy_pyc(self.file_name) + importlib.invalidate_caches() + mod = self.import_module() + self.assertEqual(mod.module_filename, pyc_file) + self.assertEqual(mod.code_filename, target) + self.assertEqual(mod.func_filename, target) + + def test_foreign_code(self): + py_compile.compile(self.file_name) + with open(self.compiled_name, "rb") as f: + header = f.read(16) + code = marshal.load(f) + constants = list(code.co_consts) + foreign_code = importlib.import_module.__code__ + pos = constants.index(1000) + constants[pos] = foreign_code + code = code.replace(co_consts=tuple(constants)) + with open(self.compiled_name, "wb") as f: + f.write(header) + marshal.dump(code, f) + mod = self.import_module() + self.assertEqual(mod.constant.co_filename, foreign_code.co_filename) + + +class PathsTests(unittest.TestCase): + SAMPLES = ('test', 'test\u00e4\u00f6\u00fc\u00df', 'test\u00e9\u00e8', + 'test\u00b0\u00b3\u00b2') + path = TESTFN + + def setUp(self): + os.mkdir(self.path) + self.syspath = sys.path[:] + + def tearDown(self): + rmtree(self.path) + sys.path[:] = self.syspath + + # Regression test for http://bugs.python.org/issue1293. + def test_trailing_slash(self): + with open(os.path.join(self.path, 'test_trailing_slash.py'), + 'w', encoding='utf-8') as f: + f.write("testdata = 'test_trailing_slash'") + sys.path.append(self.path+'/') + mod = __import__("test_trailing_slash") + self.assertEqual(mod.testdata, 'test_trailing_slash') + unload("test_trailing_slash") + + # Regression test for http://bugs.python.org/issue3677. + @unittest.skipUnless(sys.platform == 'win32', 'Windows-specific') + def test_UNC_path(self): + with open(os.path.join(self.path, 'test_unc_path.py'), 'w') as f: + f.write("testdata = 'test_unc_path'") + importlib.invalidate_caches() + # Create the UNC path, like \\myhost\c$\foo\bar. + path = os.path.abspath(self.path) + import socket + hn = socket.gethostname() + drive = path[0] + unc = "\\\\%s\\%s$"%(hn, drive) + unc += path[2:] + try: + os.listdir(unc) + except OSError as e: + if e.errno in (errno.EPERM, errno.EACCES, errno.ENOENT): + # See issue #15338 + self.skipTest("cannot access administrative share %r" % (unc,)) + raise + sys.path.insert(0, unc) + try: + mod = __import__("test_unc_path") + except ImportError as e: + self.fail("could not import 'test_unc_path' from %r: %r" + % (unc, e)) + self.assertEqual(mod.testdata, 'test_unc_path') + self.assertStartsWith(mod.__file__, unc) + unload("test_unc_path") + + +class RelativeImportTests(unittest.TestCase): + + def tearDown(self): + unload("test.relimport") + setUp = tearDown + + def test_relimport_star(self): + # This will import * from .test_import. + from .. import relimport + self.assertHasAttr(relimport, "RelativeImportTests") + + def test_issue3221(self): + # Note for mergers: the 'absolute' tests from the 2.x branch + # are missing in Py3k because implicit relative imports are + # a thing of the past + # + # Regression test for http://bugs.python.org/issue3221. + def check_relative(): + exec("from . import relimport", ns) + + # Check relative import OK with __package__ and __name__ correct + ns = dict(__package__='test', __name__='test.notarealmodule') + check_relative() + + # Check relative import OK with only __name__ wrong + ns = dict(__package__='test', __name__='notarealpkg.notarealmodule') + check_relative() + + # Check relative import fails with only __package__ wrong + ns = dict(__package__='foo', __name__='test.notarealmodule') + self.assertRaises(ModuleNotFoundError, check_relative) + + # Check relative import fails with __package__ and __name__ wrong + ns = dict(__package__='foo', __name__='notarealpkg.notarealmodule') + self.assertRaises(ModuleNotFoundError, check_relative) + + # Check relative import fails with package set to a non-string + ns = dict(__package__=object()) + self.assertRaises(TypeError, check_relative) + + def test_parentless_import_shadowed_by_global(self): + # Test as if this were done from the REPL where this error most commonly occurs (bpo-37409). + script_helper.assert_python_failure('-W', 'ignore', '-c', + "foo = 1; from . import foo") + + def test_absolute_import_without_future(self): + # If explicit relative import syntax is used, then do not try + # to perform an absolute import in the face of failure. + # Issue #7902. + with self.assertRaises(ImportError): + from .os import sep + self.fail("explicit relative import triggered an " + "implicit absolute import") + + def test_import_from_non_package(self): + path = os.path.join(os.path.dirname(__file__), 'data', 'package2') + with uncache('submodule1', 'submodule2'), DirsOnSysPath(path): + with self.assertRaises(ImportError): + import submodule1 + self.assertNotIn('submodule1', sys.modules) + self.assertNotIn('submodule2', sys.modules) + + def test_import_from_unloaded_package(self): + with uncache('package2', 'package2.submodule1', 'package2.submodule2'), \ + DirsOnSysPath(os.path.join(os.path.dirname(__file__), 'data')): + import package2.submodule1 + package2.submodule1.submodule2 + + def test_rebinding(self): + # The same data is also used for testing pkgutil.resolve_name() + # in test_pkgutil and mock.patch in test_unittest. + path = os.path.join(os.path.dirname(__file__), 'data') + with uncache('package3', 'package3.submodule'), DirsOnSysPath(path): + from package3 import submodule + self.assertEqual(submodule.attr, 'rebound') + import package3.submodule as submodule + self.assertEqual(submodule.attr, 'rebound') + with uncache('package3', 'package3.submodule'), DirsOnSysPath(path): + import package3.submodule as submodule + self.assertEqual(submodule.attr, 'rebound') + from package3 import submodule + self.assertEqual(submodule.attr, 'rebound') + + def test_rebinding2(self): + path = os.path.join(os.path.dirname(__file__), 'data') + with uncache('package4', 'package4.submodule'), DirsOnSysPath(path): + import package4.submodule as submodule + self.assertEqual(submodule.attr, 'submodule') + from package4 import submodule + self.assertEqual(submodule.attr, 'submodule') + with uncache('package4', 'package4.submodule'), DirsOnSysPath(path): + from package4 import submodule + self.assertEqual(submodule.attr, 'origin') + import package4.submodule as submodule + self.assertEqual(submodule.attr, 'submodule') + + +class OverridingImportBuiltinTests(unittest.TestCase): + def test_override_builtin(self): + # Test that overriding builtins.__import__ can bypass sys.modules. + import os + + def foo(): + import os + return os + self.assertEqual(foo(), os) # Quick sanity check. + + with swap_attr(builtins, "__import__", lambda *x: 5): + self.assertEqual(foo(), 5) + + # Test what happens when we shadow __import__ in globals(); this + # currently does not impact the import process, but if this changes, + # other code will need to change, so keep this test as a tripwire. + with swap_item(globals(), "__import__", lambda *x: 5): + self.assertEqual(foo(), os) + + +class PycacheTests(unittest.TestCase): + # Test the various PEP 3147/488-related behaviors. + + def _clean(self): + forget(TESTFN) + rmtree('__pycache__') + unlink(self.source) + + def setUp(self): + self.source = TESTFN + '.py' + self._clean() + with open(self.source, 'w', encoding='utf-8') as fp: + print('# This is a test file written by test_import.py', file=fp) + sys.path.insert(0, os.curdir) + importlib.invalidate_caches() + + def tearDown(self): + assert sys.path[0] == os.curdir, 'Unexpected sys.path[0]' + del sys.path[0] + self._clean() + + @skip_if_dont_write_bytecode + def test_import_pyc_path(self): + self.assertFalse(os.path.exists('__pycache__')) + __import__(TESTFN) + self.assertTrue(os.path.exists('__pycache__')) + pyc_path = importlib.util.cache_from_source(self.source) + self.assertTrue(os.path.exists(pyc_path), + 'bytecode file {!r} for {!r} does not ' + 'exist'.format(pyc_path, TESTFN)) + + @unittest.skipUnless(os.name == 'posix', + "test meaningful only on posix systems") + @skip_if_dont_write_bytecode + @os_helper.skip_unless_working_chmod + @os_helper.skip_if_dac_override + @unittest.skipIf(is_emscripten, "umask is a stub") + def test_unwritable_directory(self): + # When the umask causes the new __pycache__ directory to be + # unwritable, the import still succeeds but no .pyc file is written. + with temp_umask(0o222): + __import__(TESTFN) + self.assertTrue(os.path.exists('__pycache__')) + pyc_path = importlib.util.cache_from_source(self.source) + self.assertFalse(os.path.exists(pyc_path), + 'bytecode file {!r} for {!r} ' + 'exists'.format(pyc_path, TESTFN)) + + @skip_if_dont_write_bytecode + def test_missing_source(self): + # With PEP 3147 cache layout, removing the source but leaving the pyc + # file does not satisfy the import. + __import__(TESTFN) + pyc_file = importlib.util.cache_from_source(self.source) + self.assertTrue(os.path.exists(pyc_file)) + os.remove(self.source) + forget(TESTFN) + importlib.invalidate_caches() + self.assertRaises(ImportError, __import__, TESTFN) + + @skip_if_dont_write_bytecode + def test_missing_source_legacy(self): + # Like test_missing_source() except that for backward compatibility, + # when the pyc file lives where the py file would have been (and named + # without the tag), it is importable. The __file__ of the imported + # module is the pyc location. + __import__(TESTFN) + # pyc_file gets removed in _clean() via tearDown(). + pyc_file = make_legacy_pyc(self.source) + os.remove(self.source) + unload(TESTFN) + importlib.invalidate_caches() + m = __import__(TESTFN) + try: + self.assertEqual(m.__file__, + os.path.join(os.getcwd(), os.path.relpath(pyc_file))) + finally: + os.remove(pyc_file) + + def test___cached__(self): + # Modules now also have an __cached__ that points to the pyc file. + m = __import__(TESTFN) + pyc_file = importlib.util.cache_from_source(TESTFN + '.py') + self.assertEqual(m.__cached__, os.path.join(os.getcwd(), pyc_file)) + + @skip_if_dont_write_bytecode + def test___cached___legacy_pyc(self): + # Like test___cached__() except that for backward compatibility, + # when the pyc file lives where the py file would have been (and named + # without the tag), it is importable. The __cached__ of the imported + # module is the pyc location. + __import__(TESTFN) + # pyc_file gets removed in _clean() via tearDown(). + pyc_file = make_legacy_pyc(self.source) + os.remove(self.source) + unload(TESTFN) + importlib.invalidate_caches() + m = __import__(TESTFN) + self.assertEqual(m.__cached__, + os.path.join(os.getcwd(), os.path.relpath(pyc_file))) + + @skip_if_dont_write_bytecode + def test_package___cached__(self): + # Like test___cached__ but for packages. + def cleanup(): + rmtree('pep3147') + unload('pep3147.foo') + unload('pep3147') + os.mkdir('pep3147') + self.addCleanup(cleanup) + # Touch the __init__.py + with open(os.path.join('pep3147', '__init__.py'), 'wb'): + pass + with open(os.path.join('pep3147', 'foo.py'), 'wb'): + pass + importlib.invalidate_caches() + m = __import__('pep3147.foo') + init_pyc = importlib.util.cache_from_source( + os.path.join('pep3147', '__init__.py')) + self.assertEqual(m.__cached__, os.path.join(os.getcwd(), init_pyc)) + foo_pyc = importlib.util.cache_from_source(os.path.join('pep3147', 'foo.py')) + self.assertEqual(sys.modules['pep3147.foo'].__cached__, + os.path.join(os.getcwd(), foo_pyc)) + + def test_package___cached___from_pyc(self): + # Like test___cached__ but ensuring __cached__ when imported from a + # PEP 3147 pyc file. + def cleanup(): + rmtree('pep3147') + unload('pep3147.foo') + unload('pep3147') + os.mkdir('pep3147') + self.addCleanup(cleanup) + # Touch the __init__.py + with open(os.path.join('pep3147', '__init__.py'), 'wb'): + pass + with open(os.path.join('pep3147', 'foo.py'), 'wb'): + pass + importlib.invalidate_caches() + m = __import__('pep3147.foo') + unload('pep3147.foo') + unload('pep3147') + importlib.invalidate_caches() + m = __import__('pep3147.foo') + init_pyc = importlib.util.cache_from_source( + os.path.join('pep3147', '__init__.py')) + self.assertEqual(m.__cached__, os.path.join(os.getcwd(), init_pyc)) + foo_pyc = importlib.util.cache_from_source(os.path.join('pep3147', 'foo.py')) + self.assertEqual(sys.modules['pep3147.foo'].__cached__, + os.path.join(os.getcwd(), foo_pyc)) + + def test_recompute_pyc_same_second(self): + # Even when the source file doesn't change timestamp, a change in + # source size is enough to trigger recomputation of the pyc file. + __import__(TESTFN) + unload(TESTFN) + with open(self.source, 'a', encoding='utf-8') as fp: + print("x = 5", file=fp) + m = __import__(TESTFN) + self.assertEqual(m.x, 5) + + +class TestSymbolicallyLinkedPackage(unittest.TestCase): + package_name = 'sample' + tagged = package_name + '-tagged' + + def setUp(self): + os_helper.rmtree(self.tagged) + os_helper.rmtree(self.package_name) + self.orig_sys_path = sys.path[:] + + # create a sample package; imagine you have a package with a tag and + # you want to symbolically link it from its untagged name. + os.mkdir(self.tagged) + self.addCleanup(os_helper.rmtree, self.tagged) + init_file = os.path.join(self.tagged, '__init__.py') + os_helper.create_empty_file(init_file) + assert os.path.exists(init_file) + + # now create a symlink to the tagged package + # sample -> sample-tagged + os.symlink(self.tagged, self.package_name, target_is_directory=True) + self.addCleanup(os_helper.unlink, self.package_name) + importlib.invalidate_caches() + + self.assertEqual(os.path.isdir(self.package_name), True) + + assert os.path.isfile(os.path.join(self.package_name, '__init__.py')) + + def tearDown(self): + sys.path[:] = self.orig_sys_path + + # regression test for issue6727 + @unittest.skipUnless( + not hasattr(sys, 'getwindowsversion') + or sys.getwindowsversion() >= (6, 0), + "Windows Vista or later required") + @os_helper.skip_unless_symlink + def test_symlinked_dir_importable(self): + # make sure sample can only be imported from the current directory. + sys.path[:] = ['.'] + assert os.path.exists(self.package_name) + assert os.path.exists(os.path.join(self.package_name, '__init__.py')) + + # Try to import the package + importlib.import_module(self.package_name) + + +@cpython_only +class ImportlibBootstrapTests(unittest.TestCase): + # These tests check that importlib is bootstrapped. + + def test_frozen_importlib(self): + mod = sys.modules['_frozen_importlib'] + self.assertTrue(mod) + + def test_frozen_importlib_is_bootstrap(self): + from importlib import _bootstrap + mod = sys.modules['_frozen_importlib'] + self.assertIs(mod, _bootstrap) + self.assertEqual(mod.__name__, 'importlib._bootstrap') + self.assertEqual(mod.__package__, 'importlib') + self.assertEndsWith(mod.__file__, '_bootstrap.py') + + def test_frozen_importlib_external_is_bootstrap_external(self): + from importlib import _bootstrap_external + mod = sys.modules['_frozen_importlib_external'] + self.assertIs(mod, _bootstrap_external) + self.assertEqual(mod.__name__, 'importlib._bootstrap_external') + self.assertEqual(mod.__package__, 'importlib') + self.assertEndsWith(mod.__file__, '_bootstrap_external.py') + + def test_there_can_be_only_one(self): + # Issue #15386 revealed a tricky loophole in the bootstrapping + # This test is technically redundant, since the bug caused importing + # this test module to crash completely, but it helps prove the point + from importlib import machinery + mod = sys.modules['_frozen_importlib'] + self.assertIs(machinery.ModuleSpec, mod.ModuleSpec) + + +@cpython_only +class GetSourcefileTests(unittest.TestCase): + + """Test importlib._bootstrap_external._get_sourcefile() as used by the C API. + + Because of the peculiarities of the need of this function, the tests are + knowingly whitebox tests. + + """ + + def test_get_sourcefile(self): + # Given a valid bytecode path, return the path to the corresponding + # source file if it exists. + with mock.patch('importlib._bootstrap_external._path_isfile') as _path_isfile: + _path_isfile.return_value = True + path = TESTFN + '.pyc' + expect = TESTFN + '.py' + self.assertEqual(_get_sourcefile(path), expect) + + def test_get_sourcefile_no_source(self): + # Given a valid bytecode path without a corresponding source path, + # return the original bytecode path. + with mock.patch('importlib._bootstrap_external._path_isfile') as _path_isfile: + _path_isfile.return_value = False + path = TESTFN + '.pyc' + self.assertEqual(_get_sourcefile(path), path) + + def test_get_sourcefile_bad_ext(self): + # Given a path with an invalid bytecode extension, return the + # bytecode path passed as the argument. + path = TESTFN + '.bad_ext' + self.assertEqual(_get_sourcefile(path), path) + + +class ImportTracebackTests(unittest.TestCase): + + def setUp(self): + os.mkdir(TESTFN) + self.old_path = sys.path[:] + sys.path.insert(0, TESTFN) + + def tearDown(self): + sys.path[:] = self.old_path + rmtree(TESTFN) + + def create_module(self, mod, contents, ext=".py"): + fname = os.path.join(TESTFN, mod + ext) + with open(fname, "w", encoding='utf-8') as f: + f.write(contents) + self.addCleanup(unload, mod) + importlib.invalidate_caches() + return fname + + def assert_traceback(self, tb, files): + deduped_files = [] + while tb: + code = tb.tb_frame.f_code + fn = code.co_filename + if not deduped_files or fn != deduped_files[-1]: + deduped_files.append(fn) + tb = tb.tb_next + self.assertEqual(len(deduped_files), len(files), deduped_files) + for fn, pat in zip(deduped_files, files): + self.assertIn(pat, fn) + + def test_nonexistent_module(self): + try: + # assertRaises() clears __traceback__ + import nonexistent_xyzzy + except ImportError as e: + tb = e.__traceback__ + else: + self.fail("ImportError should have been raised") + self.assert_traceback(tb, [__file__]) + + def test_nonexistent_module_nested(self): + self.create_module("foo", "import nonexistent_xyzzy") + try: + import foo + except ImportError as e: + tb = e.__traceback__ + else: + self.fail("ImportError should have been raised") + self.assert_traceback(tb, [__file__, 'foo.py']) + + def test_exec_failure(self): + self.create_module("foo", "1/0") + try: + import foo + except ZeroDivisionError as e: + tb = e.__traceback__ + else: + self.fail("ZeroDivisionError should have been raised") + self.assert_traceback(tb, [__file__, 'foo.py']) + + def test_exec_failure_nested(self): + self.create_module("foo", "import bar") + self.create_module("bar", "1/0") + try: + import foo + except ZeroDivisionError as e: + tb = e.__traceback__ + else: + self.fail("ZeroDivisionError should have been raised") + self.assert_traceback(tb, [__file__, 'foo.py', 'bar.py']) + + # A few more examples from issue #15425 + def test_syntax_error(self): + self.create_module("foo", "invalid syntax is invalid") + try: + import foo + except SyntaxError as e: + tb = e.__traceback__ + else: + self.fail("SyntaxError should have been raised") + self.assert_traceback(tb, [__file__]) + + def _setup_broken_package(self, parent, child): + pkg_name = "_parent_foo" + self.addCleanup(unload, pkg_name) + pkg_path = os.path.join(TESTFN, pkg_name) + os.mkdir(pkg_path) + # Touch the __init__.py + init_path = os.path.join(pkg_path, '__init__.py') + with open(init_path, 'w', encoding='utf-8') as f: + f.write(parent) + bar_path = os.path.join(pkg_path, 'bar.py') + with open(bar_path, 'w', encoding='utf-8') as f: + f.write(child) + importlib.invalidate_caches() + return init_path, bar_path + + def test_broken_submodule(self): + init_path, bar_path = self._setup_broken_package("", "1/0") + try: + import _parent_foo.bar + except ZeroDivisionError as e: + tb = e.__traceback__ + else: + self.fail("ZeroDivisionError should have been raised") + self.assert_traceback(tb, [__file__, bar_path]) + + def test_broken_from(self): + init_path, bar_path = self._setup_broken_package("", "1/0") + try: + from _parent_foo import bar + except ZeroDivisionError as e: + tb = e.__traceback__ + else: + self.fail("ImportError should have been raised") + self.assert_traceback(tb, [__file__, bar_path]) + + def test_broken_parent(self): + init_path, bar_path = self._setup_broken_package("1/0", "") + try: + import _parent_foo.bar + except ZeroDivisionError as e: + tb = e.__traceback__ + else: + self.fail("ZeroDivisionError should have been raised") + self.assert_traceback(tb, [__file__, init_path]) + + def test_broken_parent_from(self): + init_path, bar_path = self._setup_broken_package("1/0", "") + try: + from _parent_foo import bar + except ZeroDivisionError as e: + tb = e.__traceback__ + else: + self.fail("ZeroDivisionError should have been raised") + self.assert_traceback(tb, [__file__, init_path]) + + @cpython_only + def test_import_bug(self): + # We simulate a bug in importlib and check that it's not stripped + # away from the traceback. + self.create_module("foo", "") + importlib = sys.modules['_frozen_importlib_external'] + if 'load_module' in vars(importlib.SourceLoader): + old_exec_module = importlib.SourceLoader.exec_module + else: + old_exec_module = None + try: + def exec_module(*args): + 1/0 + importlib.SourceLoader.exec_module = exec_module + try: + import foo + except ZeroDivisionError as e: + tb = e.__traceback__ + else: + self.fail("ZeroDivisionError should have been raised") + self.assert_traceback(tb, [__file__, 'imports.modules_by_index was set for the module). + self.assertEqual(snap.lookedup_id, snap.id) + self.assertEqual(snap.cached_id, snap.id) + with self.assertRaises(AttributeError): + snap.spam + else: + self.assertIs(snap.lookedup, mod) + self.assertIs(snap.cached, mod) + + def check_direct(self, loaded): + # The module has its own PyModuleDef, with a matching name. + self.assertEqual(loaded.module.__name__, loaded.name) + self.assertIs(loaded.snapshot.lookedup, loaded.module) + + def check_indirect(self, loaded, orig): + # The module re-uses another's PyModuleDef, with a different name. + assert orig is not loaded.module + assert orig.__name__ != loaded.name + self.assertNotEqual(loaded.module.__name__, loaded.name) + self.assertIs(loaded.snapshot.lookedup, loaded.module) + + def check_basic(self, loaded, expected_init_count): + # m_size == -1 + # The module loads fresh the first time and copies m_copy after. + snap = loaded.snapshot + self.assertIsNot(snap.state_initialized, None) + self.assertIsInstance(snap.init_count, int) + self.assertGreater(snap.init_count, 0) + self.assertEqual(snap.init_count, expected_init_count) + + def check_with_reinit(self, loaded): + # m_size >= 0 + # The module loads fresh every time. + pass + + def check_fresh(self, loaded): + """ + The module had not been loaded before (at least since fully reset). + """ + snap = loaded.snapshot + # The module's init func was run. + # A copy of the module's __dict__ was stored in def->m_base.m_copy. + # The previous m_copy was deleted first. + # _PyRuntime.imports.extensions was set. + self.assertEqual(snap.init_count, 1) + # The global state was initialized. + # The module attrs were initialized from that state. + self.assertEqual(snap.module._module_initialized, + snap.state_initialized) + + def check_semi_fresh(self, loaded, base, prev): + """ + The module had been loaded before and then reset + (but the module global state wasn't). + """ + snap = loaded.snapshot + # The module's init func was run again. + # A copy of the module's __dict__ was stored in def->m_base.m_copy. + # The previous m_copy was deleted first. + # The module globals did not get reset. + self.assertNotEqual(snap.id, base.snapshot.id) + self.assertNotEqual(snap.id, prev.snapshot.id) + self.assertEqual(snap.init_count, prev.snapshot.init_count + 1) + # The global state was updated. + # The module attrs were initialized from that state. + self.assertEqual(snap.module._module_initialized, + snap.state_initialized) + self.assertNotEqual(snap.state_initialized, + base.snapshot.state_initialized) + self.assertNotEqual(snap.state_initialized, + prev.snapshot.state_initialized) + + def check_copied(self, loaded, base): + """ + The module had been loaded before and never reset. + """ + snap = loaded.snapshot + # The module's init func was not run again. + # The interpreter copied m_copy, as set by the other interpreter, + # with objects owned by the other interpreter. + # The module globals did not get reset. + self.assertNotEqual(snap.id, base.snapshot.id) + self.assertEqual(snap.init_count, base.snapshot.init_count) + # The global state was not updated since the init func did not run. + # The module attrs were not directly initialized from that state. + # The state and module attrs still match the previous loading. + self.assertEqual(snap.module._module_initialized, + snap.state_initialized) + self.assertEqual(snap.state_initialized, + base.snapshot.state_initialized) + + ######################### + # the tests + + def test_cleared_globals(self): + loaded = self.load(self.NAME) + _testsinglephase = loaded.module + init_before = _testsinglephase.state_initialized() + + _testsinglephase._clear_globals() + init_after = _testsinglephase.state_initialized() + init_count = _testsinglephase.initialized_count() + + self.assertGreater(init_before, 0) + self.assertEqual(init_after, 0) + self.assertEqual(init_count, -1) + + def test_variants(self): + # Exercise the most meaningful variants described in Python/import.c. + self.maxDiff = None + + # Check the "basic" module. + + name = self.NAME + expected_init_count = 1 + with self.subTest(name): + loaded = self.load(name) + + self.check_common(loaded) + self.check_direct(loaded) + self.check_basic(loaded, expected_init_count) + basic = loaded.module + + # Check its indirect variants. + + name = f'{self.NAME}_basic_wrapper' + self.add_module_cleanup(name) + expected_init_count += 1 + with self.subTest(name): + loaded = self.load(name) + + self.check_common(loaded) + self.check_indirect(loaded, basic) + self.check_basic(loaded, expected_init_count) + + # Currently PyState_AddModule() always replaces the cached module. + self.assertIs(basic.look_up_self(), loaded.module) + self.assertEqual(basic.initialized_count(), expected_init_count) + + # The cached module shouldn't change after this point. + basic_lookedup = loaded.module + + # Check its direct variant. + + name = f'{self.NAME}_basic_copy' + self.add_module_cleanup(name) + expected_init_count += 1 + with self.subTest(name): + loaded = self.load(name) + + self.check_common(loaded) + self.check_direct(loaded) + self.check_basic(loaded, expected_init_count) + + # This should change the cached module for _testsinglephase. + self.assertIs(basic.look_up_self(), basic_lookedup) + self.assertEqual(basic.initialized_count(), expected_init_count) + + # Check the non-basic variant that has no state. + + name = f'{self.NAME}_with_reinit' + self.add_module_cleanup(name) + with self.subTest(name): + loaded = self.load(name) + + self.check_common(loaded) + self.assertIs(loaded.snapshot.state_initialized, None) + self.check_direct(loaded) + self.check_with_reinit(loaded) + + # This should change the cached module for _testsinglephase. + self.assertIs(basic.look_up_self(), basic_lookedup) + self.assertEqual(basic.initialized_count(), expected_init_count) + + # Check the basic variant that has state. + + name = f'{self.NAME}_with_state' + self.add_module_cleanup(name) + with self.subTest(name): + loaded = self.load(name) + self.addCleanup(loaded.module._clear_module_state) + + self.check_common(loaded) + self.assertIsNot(loaded.snapshot.state_initialized, None) + self.check_direct(loaded) + self.check_with_reinit(loaded) + + # This should change the cached module for _testsinglephase. + self.assertIs(basic.look_up_self(), basic_lookedup) + self.assertEqual(basic.initialized_count(), expected_init_count) + + def test_basic_reloaded(self): + # m_copy is copied into the existing module object. + # Global state is not changed. + self.maxDiff = None + + for name in [ + self.NAME, # the "basic" module + f'{self.NAME}_basic_wrapper', # the indirect variant + f'{self.NAME}_basic_copy', # the direct variant + ]: + self.add_module_cleanup(name) + with self.subTest(name): + loaded = self.load(name) + reloaded = self.re_load(name, loaded.module) + + self.check_common(loaded) + self.check_common(reloaded) + + # Make sure the original __dict__ did not get replaced. + self.assertEqual(id(loaded.module.__dict__), + loaded.snapshot.ns_id) + self.assertEqual(loaded.snapshot.ns.__dict__, + loaded.module.__dict__) + + self.assertEqual(reloaded.module.__spec__.name, reloaded.name) + self.assertEqual(reloaded.module.__name__, + reloaded.snapshot.ns.__name__) + + self.assertIs(reloaded.module, loaded.module) + self.assertIs(reloaded.module.__dict__, loaded.module.__dict__) + # It only happens to be the same but that's good enough here. + # We really just want to verify that the re-loaded attrs + # didn't change. + self.assertIs(reloaded.snapshot.lookedup, + loaded.snapshot.lookedup) + self.assertEqual(reloaded.snapshot.state_initialized, + loaded.snapshot.state_initialized) + self.assertEqual(reloaded.snapshot.init_count, + loaded.snapshot.init_count) + + self.assertIs(reloaded.snapshot.cached, reloaded.module) + + def test_with_reinit_reloaded(self): + # The module's m_init func is run again. + self.maxDiff = None + + # Keep a reference around. + basic = self.load(self.NAME) + + for name, has_state in [ + (f'{self.NAME}_with_reinit', False), # m_size == 0 + (f'{self.NAME}_with_state', True), # m_size > 0 + ]: + self.add_module_cleanup(name) + with self.subTest(name=name, has_state=has_state): + loaded = self.load(name) + if has_state: + self.addCleanup(loaded.module._clear_module_state) + + reloaded = self.re_load(name, loaded.module) + if has_state: + self.addCleanup(reloaded.module._clear_module_state) + + self.check_common(loaded) + self.check_common(reloaded) + + # Make sure the original __dict__ did not get replaced. + self.assertEqual(id(loaded.module.__dict__), + loaded.snapshot.ns_id) + self.assertEqual(loaded.snapshot.ns.__dict__, + loaded.module.__dict__) + + self.assertEqual(reloaded.module.__spec__.name, reloaded.name) + self.assertEqual(reloaded.module.__name__, + reloaded.snapshot.ns.__name__) + + self.assertIsNot(reloaded.module, loaded.module) + self.assertNotEqual(reloaded.module.__dict__, + loaded.module.__dict__) + self.assertIs(reloaded.snapshot.lookedup, reloaded.module) + if loaded.snapshot.state_initialized is None: + self.assertIs(reloaded.snapshot.state_initialized, None) + else: + self.assertGreater(reloaded.snapshot.state_initialized, + loaded.snapshot.state_initialized) + + self.assertIs(reloaded.snapshot.cached, reloaded.module) + + @unittest.skipIf(_testinternalcapi is None, "requires _testinternalcapi") + def test_check_state_first(self): + for variant in ['', '_with_reinit', '_with_state']: + name = f'{self.NAME}{variant}_check_cache_first' + with self.subTest(name): + mod = self._load_dynamic(name, self.ORIGIN) + self.assertEqual(mod.__name__, name) + sys.modules.pop(name, None) + _testinternalcapi.clear_extension(name, self.ORIGIN) + + # Currently, for every single-phrase init module loaded + # in multiple interpreters, those interpreters share a + # PyModuleDef for that object, which can be a problem. + # Also, we test with a single-phase module that has global state, + # which is shared by all interpreters. + + @no_rerun(reason="module state is not cleared (see gh-140657)") + @requires_subinterpreters + def test_basic_multiple_interpreters_main_no_reset(self): + # without resetting; already loaded in main interpreter + + # At this point: + # * alive in 0 interpreters + # * module def may or may not be loaded already + # * module def not in _PyRuntime.imports.extensions + # * mod init func has not run yet (since reset, at least) + # * m_copy not set (hasn't been loaded yet or already cleared) + # * module's global state has not been initialized yet + # (or already cleared) + + main_loaded = self.load(self.NAME) + _testsinglephase = main_loaded.module + # Attrs set after loading are not in m_copy. + _testsinglephase.spam = 'spam, spam, spam, spam, eggs, and spam' + + self.check_common(main_loaded) + self.check_fresh(main_loaded) + + interpid1 = self.add_subinterpreter() + interpid2 = self.add_subinterpreter() + + # At this point: + # * alive in 1 interpreter (main) + # * module def in _PyRuntime.imports.extensions + # * mod init func ran for the first time (since reset, at least) + # * m_copy was copied from the main interpreter (was NULL) + # * module's global state was initialized + + # Use an interpreter that gets destroyed right away. + loaded = self.import_in_subinterp() + self.check_common(loaded) + self.check_copied(loaded, main_loaded) + + # At this point: + # * alive in 1 interpreter (main) + # * module def still in _PyRuntime.imports.extensions + # * mod init func ran again + # * m_copy is NULL (cleared when the interpreter was destroyed) + # (was from main interpreter) + # * module's global state was updated, not reset + + # Use a subinterpreter that sticks around. + loaded = self.import_in_subinterp(interpid1) + self.check_common(loaded) + self.check_copied(loaded, main_loaded) + + # At this point: + # * alive in 2 interpreters (main, interp1) + # * module def still in _PyRuntime.imports.extensions + # * mod init func ran again + # * m_copy was copied from interp1 + # * module's global state was updated, not reset + + # Use a subinterpreter while the previous one is still alive. + loaded = self.import_in_subinterp(interpid2) + self.check_common(loaded) + self.check_copied(loaded, main_loaded) + + # At this point: + # * alive in 3 interpreters (main, interp1, interp2) + # * module def still in _PyRuntime.imports.extensions + # * mod init func ran again + # * m_copy was copied from interp2 (was from interp1) + # * module's global state was updated, not reset + + @no_rerun(reason="rerun not possible; module state is never cleared (see gh-102251)") + @requires_subinterpreters + def test_basic_multiple_interpreters_deleted_no_reset(self): + # without resetting; already loaded in a deleted interpreter + + if Py_TRACE_REFS: + # It's a Py_TRACE_REFS build. + # This test breaks interpreter isolation a little, + # which causes problems on Py_TRACE_REF builds. + raise unittest.SkipTest('crashes on Py_TRACE_REFS builds') + + # At this point: + # * alive in 0 interpreters + # * module def may or may not be loaded already + # * module def not in _PyRuntime.imports.extensions + # * mod init func has not run yet (since reset, at least) + # * m_copy not set (hasn't been loaded yet or already cleared) + # * module's global state has not been initialized yet + # (or already cleared) + + interpid1 = self.add_subinterpreter() + interpid2 = self.add_subinterpreter() + + # First, load in the main interpreter but then completely clear it. + loaded_main = self.load(self.NAME) + loaded_main.module._clear_globals() + _testinternalcapi.clear_extension(self.NAME, self.ORIGIN) + + # At this point: + # * alive in 0 interpreters + # * module def loaded already + # * module def was in _PyRuntime.imports.extensions, but cleared + # * mod init func ran for the first time (since reset, at least) + # * m_copy was set, but cleared (was NULL) + # * module's global state was initialized but cleared + + # Start with an interpreter that gets destroyed right away. + base = self.import_in_subinterp( + postscript=''' + # Attrs set after loading are not in m_copy. + mod.spam = 'spam, spam, mash, spam, eggs, and spam' + ''') + self.check_common(base) + self.check_fresh(base) + + # At this point: + # * alive in 0 interpreters + # * module def in _PyRuntime.imports.extensions + # * mod init func ran for the first time (since reset) + # * m_copy is still set (owned by main interpreter) + # * module's global state was initialized, not reset + + # Use a subinterpreter that sticks around. + loaded_interp1 = self.import_in_subinterp(interpid1) + self.check_common(loaded_interp1) + self.check_copied(loaded_interp1, base) + + # At this point: + # * alive in 1 interpreter (interp1) + # * module def still in _PyRuntime.imports.extensions + # * mod init func did not run again + # * m_copy was not changed + # * module's global state was not touched + + # Use a subinterpreter while the previous one is still alive. + loaded_interp2 = self.import_in_subinterp(interpid2) + self.check_common(loaded_interp2) + self.check_copied(loaded_interp2, loaded_interp1) + + # At this point: + # * alive in 2 interpreters (interp1, interp2) + # * module def still in _PyRuntime.imports.extensions + # * mod init func did not run again + # * m_copy was not changed + # * module's global state was not touched + + @requires_subinterpreters + def test_basic_multiple_interpreters_reset_each(self): + # resetting between each interpreter + + # At this point: + # * alive in 0 interpreters + # * module def may or may not be loaded already + # * module def not in _PyRuntime.imports.extensions + # * mod init func has not run yet (since reset, at least) + # * m_copy not set (hasn't been loaded yet or already cleared) + # * module's global state has not been initialized yet + # (or already cleared) + + interpid1 = self.add_subinterpreter() + interpid2 = self.add_subinterpreter() + + # Use an interpreter that gets destroyed right away. + loaded = self.import_in_subinterp( + postscript=''' + # Attrs set after loading are not in m_copy. + mod.spam = 'spam, spam, mash, spam, eggs, and spam' + ''', + postcleanup=True, + ) + self.check_common(loaded) + self.check_fresh(loaded) + + # At this point: + # * alive in 0 interpreters + # * module def in _PyRuntime.imports.extensions + # * mod init func ran for the first time (since reset, at least) + # * m_copy is NULL (cleared when the interpreter was destroyed) + # * module's global state was initialized, not reset + + # Use a subinterpreter that sticks around. + loaded = self.import_in_subinterp(interpid1, postcleanup=True) + self.check_common(loaded) + self.check_fresh(loaded) + + # At this point: + # * alive in 1 interpreter (interp1) + # * module def still in _PyRuntime.imports.extensions + # * mod init func ran again + # * m_copy was copied from interp1 (was NULL) + # * module's global state was initialized, not reset + + # Use a subinterpreter while the previous one is still alive. + loaded = self.import_in_subinterp(interpid2, postcleanup=True) + self.check_common(loaded) + self.check_fresh(loaded) + + # At this point: + # * alive in 2 interpreters (interp2, interp2) + # * module def still in _PyRuntime.imports.extensions + # * mod init func ran again + # * m_copy was copied from interp2 (was from interp1) + # * module's global state was initialized, not reset + + +@cpython_only +class TestMagicNumber(unittest.TestCase): + def test_magic_number_endianness(self): + magic_number_bytes = _imp.pyc_magic_number_token.to_bytes(4, 'little') + self.assertEqual(magic_number_bytes[2:], b'\r\n') + # Starting with Python 3.11, Python 3.n starts with magic number 2900+50n. + magic_number = int.from_bytes(magic_number_bytes[:2], 'little') + start = 2900 + sys.version_info.minor * 50 + self.assertIn(magic_number, range(start, start + 50)) + + +if __name__ == '__main__': + # Test needs to be a package, so we can do relative imports. + unittest.main() diff --git a/stdlib/test/test_import/__main__.py b/stdlib/test/test_import/__main__.py new file mode 100644 index 000000000..24f02a171 --- /dev/null +++ b/stdlib/test/test_import/__main__.py @@ -0,0 +1,3 @@ +import unittest + +unittest.main('test.test_import') diff --git a/stdlib/test/test_import/data/circular_imports/basic.py b/stdlib/test/test_import/data/circular_imports/basic.py new file mode 100644 index 000000000..3e41e395d --- /dev/null +++ b/stdlib/test/test_import/data/circular_imports/basic.py @@ -0,0 +1,2 @@ +"""Circular imports through direct, relative imports.""" +from . import basic2 diff --git a/stdlib/test/test_import/data/circular_imports/basic2.py b/stdlib/test/test_import/data/circular_imports/basic2.py new file mode 100644 index 000000000..00bd2f29f --- /dev/null +++ b/stdlib/test/test_import/data/circular_imports/basic2.py @@ -0,0 +1 @@ +from . import basic diff --git a/stdlib/test/test_import/data/circular_imports/binding.py b/stdlib/test/test_import/data/circular_imports/binding.py new file mode 100644 index 000000000..1fbf929ab --- /dev/null +++ b/stdlib/test/test_import/data/circular_imports/binding.py @@ -0,0 +1 @@ +import test.test_import.data.circular_imports.binding2 as binding2 diff --git a/stdlib/test/test_import/data/circular_imports/binding2.py b/stdlib/test/test_import/data/circular_imports/binding2.py new file mode 100644 index 000000000..3d6693769 --- /dev/null +++ b/stdlib/test/test_import/data/circular_imports/binding2.py @@ -0,0 +1 @@ +import test.test_import.data.circular_imports.binding as binding diff --git a/stdlib/test/test_import/data/circular_imports/from_cycle1.py b/stdlib/test/test_import/data/circular_imports/from_cycle1.py new file mode 100644 index 000000000..aacfd5f46 --- /dev/null +++ b/stdlib/test/test_import/data/circular_imports/from_cycle1.py @@ -0,0 +1,2 @@ +from .from_cycle2 import a +b = 1 diff --git a/stdlib/test/test_import/data/circular_imports/from_cycle2.py b/stdlib/test/test_import/data/circular_imports/from_cycle2.py new file mode 100644 index 000000000..62a66e1cf --- /dev/null +++ b/stdlib/test/test_import/data/circular_imports/from_cycle2.py @@ -0,0 +1,2 @@ +from .from_cycle1 import b +a = 1 diff --git a/stdlib/test/test_import/data/circular_imports/import_cycle.py b/stdlib/test/test_import/data/circular_imports/import_cycle.py new file mode 100644 index 000000000..cd9507b5f --- /dev/null +++ b/stdlib/test/test_import/data/circular_imports/import_cycle.py @@ -0,0 +1,3 @@ +import test.test_import.data.circular_imports.import_cycle as m + +m.some_attribute diff --git a/stdlib/test/test_import/data/circular_imports/indirect.py b/stdlib/test/test_import/data/circular_imports/indirect.py new file mode 100644 index 000000000..6925788d6 --- /dev/null +++ b/stdlib/test/test_import/data/circular_imports/indirect.py @@ -0,0 +1 @@ +from . import basic, basic2 diff --git a/stdlib/test/test_import/data/circular_imports/rebinding.py b/stdlib/test/test_import/data/circular_imports/rebinding.py new file mode 100644 index 000000000..2b7737555 --- /dev/null +++ b/stdlib/test/test_import/data/circular_imports/rebinding.py @@ -0,0 +1,3 @@ +"""Test the binding of names when a circular import shares the same name as an +attribute.""" +from .rebinding2 import util diff --git a/stdlib/test/test_import/data/circular_imports/rebinding2.py b/stdlib/test/test_import/data/circular_imports/rebinding2.py new file mode 100644 index 000000000..57a9e6945 --- /dev/null +++ b/stdlib/test/test_import/data/circular_imports/rebinding2.py @@ -0,0 +1,3 @@ +from .subpkg import util +from . import rebinding +util = util.util diff --git a/stdlib/test/test_import/data/circular_imports/singlephase.py b/stdlib/test/test_import/data/circular_imports/singlephase.py new file mode 100644 index 000000000..05618bc72 --- /dev/null +++ b/stdlib/test/test_import/data/circular_imports/singlephase.py @@ -0,0 +1,13 @@ +"""Circular import involving a single-phase-init extension. + +This module is imported from the _testsinglephase_circular module from +_testsinglephase, and imports that module again. +""" + +import importlib +import _testsinglephase +from test.test_import import import_extension_from_file + +name = '_testsinglephase_circular' +filename = _testsinglephase.__file__ +mod = import_extension_from_file(name, filename) diff --git a/stdlib/test/test_import/data/circular_imports/source.py b/stdlib/test/test_import/data/circular_imports/source.py new file mode 100644 index 000000000..f10409904 --- /dev/null +++ b/stdlib/test/test_import/data/circular_imports/source.py @@ -0,0 +1,2 @@ +from . import use +spam = 1 diff --git a/stdlib/test/test_import/data/circular_imports/subpackage.py b/stdlib/test/test_import/data/circular_imports/subpackage.py new file mode 100644 index 000000000..7b412f76f --- /dev/null +++ b/stdlib/test/test_import/data/circular_imports/subpackage.py @@ -0,0 +1,2 @@ +"""Circular import involving a sub-package.""" +from .subpkg import subpackage2 diff --git a/stdlib/test/test_import/data/circular_imports/subpkg/subpackage2.py b/stdlib/test/test_import/data/circular_imports/subpkg/subpackage2.py new file mode 100644 index 000000000..17b893a1a --- /dev/null +++ b/stdlib/test/test_import/data/circular_imports/subpkg/subpackage2.py @@ -0,0 +1,2 @@ +#from .util import util +from .. import subpackage diff --git a/stdlib/test/test_import/data/circular_imports/subpkg/util.py b/stdlib/test/test_import/data/circular_imports/subpkg/util.py new file mode 100644 index 000000000..343bd843b --- /dev/null +++ b/stdlib/test/test_import/data/circular_imports/subpkg/util.py @@ -0,0 +1,2 @@ +def util(): + pass diff --git a/stdlib/test/test_import/data/circular_imports/subpkg2/__init__.py b/stdlib/test/test_import/data/circular_imports/subpkg2/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/stdlib/test/test_import/data/circular_imports/subpkg2/parent/__init__.py b/stdlib/test/test_import/data/circular_imports/subpkg2/parent/__init__.py new file mode 100644 index 000000000..9745f60a7 --- /dev/null +++ b/stdlib/test/test_import/data/circular_imports/subpkg2/parent/__init__.py @@ -0,0 +1 @@ +import test.test_import.data.circular_imports.subpkg2.parent.child diff --git a/stdlib/test/test_import/data/circular_imports/subpkg2/parent/child.py b/stdlib/test/test_import/data/circular_imports/subpkg2/parent/child.py new file mode 100644 index 000000000..1995a3730 --- /dev/null +++ b/stdlib/test/test_import/data/circular_imports/subpkg2/parent/child.py @@ -0,0 +1,3 @@ +import test.test_import.data.circular_imports.subpkg2.parent + +test.test_import.data.circular_imports.subpkg2.parent diff --git a/stdlib/test/test_import/data/circular_imports/use.py b/stdlib/test/test_import/data/circular_imports/use.py new file mode 100644 index 000000000..418f9e268 --- /dev/null +++ b/stdlib/test/test_import/data/circular_imports/use.py @@ -0,0 +1,2 @@ +from . import source +source.spam diff --git a/stdlib/test/test_import/data/circular_imports/util.py b/stdlib/test/test_import/data/circular_imports/util.py new file mode 100644 index 000000000..343bd843b --- /dev/null +++ b/stdlib/test/test_import/data/circular_imports/util.py @@ -0,0 +1,2 @@ +def util(): + pass diff --git a/stdlib/test/test_import/data/double_const.py b/stdlib/test/test_import/data/double_const.py new file mode 100644 index 000000000..67852aaf9 --- /dev/null +++ b/stdlib/test/test_import/data/double_const.py @@ -0,0 +1,30 @@ +from test.support import TestFailed + +# A test for SF bug 422177: manifest float constants varied way too much in +# precision depending on whether Python was loading a module for the first +# time, or reloading it from a precompiled .pyc. The "expected" failure +# mode is that when test_import imports this after all .pyc files have been +# erased, it passes, but when test_import imports this from +# double_const.pyc, it fails. This indicates a woeful loss of precision in +# the marshal format for doubles. It's also possible that repr() doesn't +# produce enough digits to get reasonable precision for this box. + +PI = 3.14159265358979324 +TWOPI = 6.28318530717958648 + +PI_str = "3.14159265358979324" +TWOPI_str = "6.28318530717958648" + +# Verify that the double x is within a few bits of eval(x_str). +def check_ok(x, x_str): + assert x > 0.0 + x2 = eval(x_str) + assert x2 > 0.0 + diff = abs(x - x2) + # If diff is no larger than 3 ULP (wrt x2), then diff/8 is no larger + # than 0.375 ULP, so adding diff/8 to x2 should have no effect. + if x2 + (diff / 8.) != x2: + raise TestFailed("Manifest const %s lost too much precision " % x_str) + +check_ok(PI, PI_str) +check_ok(TWOPI, TWOPI_str) diff --git a/stdlib/test/test_import/data/package/__init__.py b/stdlib/test/test_import/data/package/__init__.py new file mode 100644 index 000000000..a4f2bc340 --- /dev/null +++ b/stdlib/test/test_import/data/package/__init__.py @@ -0,0 +1,2 @@ +import package.submodule +package.submodule diff --git a/stdlib/test/test_import/data/package/submodule.py b/stdlib/test/test_import/data/package/submodule.py new file mode 100644 index 000000000..e69de29bb diff --git a/stdlib/test/test_import/data/package2/submodule1.py b/stdlib/test/test_import/data/package2/submodule1.py new file mode 100644 index 000000000..0698ed6de --- /dev/null +++ b/stdlib/test/test_import/data/package2/submodule1.py @@ -0,0 +1,3 @@ +import sys +sys.modules.pop(__package__, None) +from . import submodule2 diff --git a/stdlib/test/test_import/data/package2/submodule2.py b/stdlib/test/test_import/data/package2/submodule2.py new file mode 100644 index 000000000..e69de29bb diff --git a/stdlib/test/test_import/data/package3/__init__.py b/stdlib/test/test_import/data/package3/__init__.py new file mode 100644 index 000000000..7033c22a7 --- /dev/null +++ b/stdlib/test/test_import/data/package3/__init__.py @@ -0,0 +1,2 @@ +"""Rebinding the package attribute after importing the module.""" +from .submodule import submodule diff --git a/stdlib/test/test_import/data/package3/submodule.py b/stdlib/test/test_import/data/package3/submodule.py new file mode 100644 index 000000000..cd7b30db1 --- /dev/null +++ b/stdlib/test/test_import/data/package3/submodule.py @@ -0,0 +1,7 @@ +attr = 'submodule' +class A: + attr = 'submodule' +class submodule: + attr = 'rebound' + class B: + attr = 'rebound' diff --git a/stdlib/test/test_import/data/package4/__init__.py b/stdlib/test/test_import/data/package4/__init__.py new file mode 100644 index 000000000..d8af60ab3 --- /dev/null +++ b/stdlib/test/test_import/data/package4/__init__.py @@ -0,0 +1,5 @@ +"""Binding the package attribute without importing the module.""" +class submodule: + attr = 'origin' + class B: + attr = 'origin' diff --git a/stdlib/test/test_import/data/package4/submodule.py b/stdlib/test/test_import/data/package4/submodule.py new file mode 100644 index 000000000..c861417ae --- /dev/null +++ b/stdlib/test/test_import/data/package4/submodule.py @@ -0,0 +1,3 @@ +attr = 'submodule' +class A: + attr = 'submodule' diff --git a/stdlib/test/test_import/data/unwritable/__init__.py b/stdlib/test/test_import/data/unwritable/__init__.py new file mode 100644 index 000000000..da4ddb3d0 --- /dev/null +++ b/stdlib/test/test_import/data/unwritable/__init__.py @@ -0,0 +1,12 @@ +import sys + +class MyMod(object): + __slots__ = ['__builtins__', '__cached__', '__doc__', + '__file__', '__loader__', '__name__', + '__package__', '__path__', '__spec__'] + def __init__(self): + for attr in self.__slots__: + setattr(self, attr, globals()[attr]) + + +sys.modules[__name__] = MyMod() diff --git a/stdlib/test/test_import/data/unwritable/x.py b/stdlib/test/test_import/data/unwritable/x.py new file mode 100644 index 000000000..e69de29bb diff --git a/stdlib/test/test_importlib/__init__.py b/stdlib/test/test_importlib/__init__.py new file mode 100644 index 000000000..4b16ecc31 --- /dev/null +++ b/stdlib/test/test_importlib/__init__.py @@ -0,0 +1,5 @@ +import os +from test.support import load_package_tests + +def load_tests(*args): + return load_package_tests(os.path.dirname(__file__), *args) diff --git a/stdlib/test/test_importlib/__main__.py b/stdlib/test/test_importlib/__main__.py new file mode 100644 index 000000000..40a23a297 --- /dev/null +++ b/stdlib/test/test_importlib/__main__.py @@ -0,0 +1,4 @@ +from . import load_tests +import unittest + +unittest.main() diff --git a/stdlib/test/test_importlib/abc.py b/stdlib/test/test_importlib/abc.py new file mode 100644 index 000000000..5d4b95876 --- /dev/null +++ b/stdlib/test/test_importlib/abc.py @@ -0,0 +1,93 @@ +import abc + + +class FinderTests(metaclass=abc.ABCMeta): + + """Basic tests for a finder to pass.""" + + @abc.abstractmethod + def test_module(self): + # Test importing a top-level module. + pass + + @abc.abstractmethod + def test_package(self): + # Test importing a package. + pass + + @abc.abstractmethod + def test_module_in_package(self): + # Test importing a module contained within a package. + # A value for 'path' should be used if for a meta_path finder. + pass + + @abc.abstractmethod + def test_package_in_package(self): + # Test importing a subpackage. + # A value for 'path' should be used if for a meta_path finder. + pass + + @abc.abstractmethod + def test_package_over_module(self): + # Test that packages are chosen over modules. + pass + + @abc.abstractmethod + def test_failure(self): + # Test trying to find a module that cannot be handled. + pass + + +class LoaderTests(metaclass=abc.ABCMeta): + + @abc.abstractmethod + def test_module(self): + """A module should load without issue. + + After the loader returns the module should be in sys.modules. + + Attributes to verify: + + * __file__ + * __loader__ + * __name__ + * No __path__ + + """ + pass + + @abc.abstractmethod + def test_package(self): + """Loading a package should work. + + After the loader returns the module should be in sys.modules. + + Attributes to verify: + + * __name__ + * __file__ + * __package__ + * __path__ + * __loader__ + + """ + pass + + @abc.abstractmethod + def test_lacking_parent(self): + """A loader should not be dependent on it's parent package being + imported.""" + pass + + @abc.abstractmethod + def test_state_after_failure(self): + """If a module is already in sys.modules and a reload fails + (e.g. a SyntaxError), the module should be in the state it was before + the reload began.""" + pass + + @abc.abstractmethod + def test_unloadable(self): + """Test ImportError is raised when the loader is asked to load a module + it can't.""" + pass diff --git a/stdlib/test/test_importlib/builtin/__init__.py b/stdlib/test/test_importlib/builtin/__init__.py new file mode 100644 index 000000000..4b16ecc31 --- /dev/null +++ b/stdlib/test/test_importlib/builtin/__init__.py @@ -0,0 +1,5 @@ +import os +from test.support import load_package_tests + +def load_tests(*args): + return load_package_tests(os.path.dirname(__file__), *args) diff --git a/stdlib/test/test_importlib/builtin/__main__.py b/stdlib/test/test_importlib/builtin/__main__.py new file mode 100644 index 000000000..40a23a297 --- /dev/null +++ b/stdlib/test/test_importlib/builtin/__main__.py @@ -0,0 +1,4 @@ +from . import load_tests +import unittest + +unittest.main() diff --git a/stdlib/test/test_importlib/builtin/test_finder.py b/stdlib/test/test_importlib/builtin/test_finder.py new file mode 100644 index 000000000..1fb1d2f9e --- /dev/null +++ b/stdlib/test/test_importlib/builtin/test_finder.py @@ -0,0 +1,46 @@ +from test.test_importlib import abc, util + +machinery = util.import_importlib('importlib.machinery') + +import sys +import unittest + + +@unittest.skipIf(util.BUILTINS.good_name is None, 'no reasonable builtin module') +class FindSpecTests(abc.FinderTests): + + """Test find_spec() for built-in modules.""" + + def test_module(self): + # Common case. + with util.uncache(util.BUILTINS.good_name): + found = self.machinery.BuiltinImporter.find_spec(util.BUILTINS.good_name) + self.assertTrue(found) + self.assertEqual(found.origin, 'built-in') + + # Built-in modules cannot be a package. + test_package = None + + # Built-in modules cannot be in a package. + test_module_in_package = None + + # Built-in modules cannot be a package. + test_package_in_package = None + + # Built-in modules cannot be a package. + test_package_over_module = None + + def test_failure(self): + name = 'importlib' + assert name not in sys.builtin_module_names + spec = self.machinery.BuiltinImporter.find_spec(name) + self.assertIsNone(spec) + + +(Frozen_FindSpecTests, + Source_FindSpecTests + ) = util.test_both(FindSpecTests, machinery=machinery) + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/builtin/test_loader.py b/stdlib/test/test_importlib/builtin/test_loader.py new file mode 100644 index 000000000..7e9d1b196 --- /dev/null +++ b/stdlib/test/test_importlib/builtin/test_loader.py @@ -0,0 +1,110 @@ +from test.test_importlib import abc, util + +machinery = util.import_importlib('importlib.machinery') + +import sys +import types +import unittest +import warnings + +@unittest.skipIf(util.BUILTINS.good_name is None, 'no reasonable builtin module') +class LoaderTests(abc.LoaderTests): + + """Test load_module() for built-in modules.""" + + def setUp(self): + self.verification = {'__name__': 'errno', '__package__': '', + '__loader__': self.machinery.BuiltinImporter} + + def verify(self, module): + """Verify that the module matches against what it should have.""" + self.assertIsInstance(module, types.ModuleType) + for attr, value in self.verification.items(): + self.assertEqual(getattr(module, attr), value) + self.assertIn(module.__name__, sys.modules) + + def load_module(self, name): + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + return self.machinery.BuiltinImporter.load_module(name) + + def test_module(self): + # Common case. + with util.uncache(util.BUILTINS.good_name): + module = self.load_module(util.BUILTINS.good_name) + self.verify(module) + + # Built-in modules cannot be a package. + test_package = test_lacking_parent = None + + # No way to force an import failure. + test_state_after_failure = None + + def test_module_reuse(self): + # Test that the same module is used in a reload. + with util.uncache(util.BUILTINS.good_name): + module1 = self.load_module(util.BUILTINS.good_name) + module2 = self.load_module(util.BUILTINS.good_name) + self.assertIs(module1, module2) + + def test_unloadable(self): + name = 'dssdsdfff' + assert name not in sys.builtin_module_names + with self.assertRaises(ImportError) as cm: + self.load_module(name) + self.assertEqual(cm.exception.name, name) + + def test_already_imported(self): + # Using the name of a module already imported but not a built-in should + # still fail. + module_name = 'builtin_reload_test' + assert module_name not in sys.builtin_module_names + with util.uncache(module_name): + module = types.ModuleType(module_name) + sys.modules[module_name] = module + with self.assertRaises(ImportError) as cm: + self.load_module(module_name) + self.assertEqual(cm.exception.name, module_name) + + +(Frozen_LoaderTests, + Source_LoaderTests + ) = util.test_both(LoaderTests, machinery=machinery) + + +@unittest.skipIf(util.BUILTINS.good_name is None, 'no reasonable builtin module') +class InspectLoaderTests: + + """Tests for InspectLoader methods for BuiltinImporter.""" + + def test_get_code(self): + # There is no code object. + result = self.machinery.BuiltinImporter.get_code(util.BUILTINS.good_name) + self.assertIsNone(result) + + def test_get_source(self): + # There is no source. + result = self.machinery.BuiltinImporter.get_source(util.BUILTINS.good_name) + self.assertIsNone(result) + + def test_is_package(self): + # Cannot be a package. + result = self.machinery.BuiltinImporter.is_package(util.BUILTINS.good_name) + self.assertFalse(result) + + @unittest.skipIf(util.BUILTINS.bad_name is None, 'all modules are built in') + def test_not_builtin(self): + # Modules not built-in should raise ImportError. + for meth_name in ('get_code', 'get_source', 'is_package'): + method = getattr(self.machinery.BuiltinImporter, meth_name) + with self.assertRaises(ImportError) as cm: + method(util.BUILTINS.bad_name) + + +(Frozen_InspectLoaderTests, + Source_InspectLoaderTests + ) = util.test_both(InspectLoaderTests, machinery=machinery) + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/extension/__init__.py b/stdlib/test/test_importlib/extension/__init__.py new file mode 100644 index 000000000..4b16ecc31 --- /dev/null +++ b/stdlib/test/test_importlib/extension/__init__.py @@ -0,0 +1,5 @@ +import os +from test.support import load_package_tests + +def load_tests(*args): + return load_package_tests(os.path.dirname(__file__), *args) diff --git a/stdlib/test/test_importlib/extension/__main__.py b/stdlib/test/test_importlib/extension/__main__.py new file mode 100644 index 000000000..40a23a297 --- /dev/null +++ b/stdlib/test/test_importlib/extension/__main__.py @@ -0,0 +1,4 @@ +from . import load_tests +import unittest + +unittest.main() diff --git a/stdlib/test/test_importlib/extension/_test_nonmodule_cases.py b/stdlib/test/test_importlib/extension/_test_nonmodule_cases.py new file mode 100644 index 000000000..8ffd18d22 --- /dev/null +++ b/stdlib/test/test_importlib/extension/_test_nonmodule_cases.py @@ -0,0 +1,44 @@ +import types +import unittest +from test.test_importlib import util + +machinery = util.import_importlib('importlib.machinery') + +from test.test_importlib.extension.test_loader import MultiPhaseExtensionModuleTests + + +class NonModuleExtensionTests: + setUp = MultiPhaseExtensionModuleTests.setUp + load_module_by_name = MultiPhaseExtensionModuleTests.load_module_by_name + + def _test_nonmodule(self): + # Test returning a non-module object from create works. + name = self.name + '_nonmodule' + mod = self.load_module_by_name(name) + self.assertNotEqual(type(mod), type(unittest)) + self.assertEqual(mod.three, 3) + + # issue 27782 + def test_nonmodule_with_methods(self): + # Test creating a non-module object with methods defined. + name = self.name + '_nonmodule_with_methods' + mod = self.load_module_by_name(name) + self.assertNotEqual(type(mod), type(unittest)) + self.assertEqual(mod.three, 3) + self.assertEqual(mod.bar(10, 1), 9) + + def test_null_slots(self): + # Test that NULL slots aren't a problem. + name = self.name + '_null_slots' + module = self.load_module_by_name(name) + self.assertIsInstance(module, types.ModuleType) + self.assertEqual(module.__name__, name) + + +(Frozen_NonModuleExtensionTests, + Source_NonModuleExtensionTests + ) = util.test_both(NonModuleExtensionTests, machinery=machinery) + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/extension/test_case_sensitivity.py b/stdlib/test/test_importlib/extension/test_case_sensitivity.py new file mode 100644 index 000000000..518371916 --- /dev/null +++ b/stdlib/test/test_importlib/extension/test_case_sensitivity.py @@ -0,0 +1,48 @@ +from test.support import os_helper +import unittest +import sys +from test.test_importlib import util + +importlib = util.import_importlib('importlib') +machinery = util.import_importlib('importlib.machinery') + + +@unittest.skipIf(util.EXTENSIONS is None or util.EXTENSIONS.filename is None, + 'dynamic loading not supported or test module not available') +@util.case_insensitive_tests +class ExtensionModuleCaseSensitivityTest(util.CASEOKTestBase): + + def find_spec(self): + good_name = util.EXTENSIONS.name + bad_name = good_name.upper() + assert good_name != bad_name + finder = self.machinery.FileFinder(util.EXTENSIONS.path, + (self.machinery.ExtensionFileLoader, + self.machinery.EXTENSION_SUFFIXES)) + return finder.find_spec(bad_name) + + @unittest.skipIf(sys.flags.ignore_environment, 'ignore_environment flag was set') + def test_case_sensitive(self): + with os_helper.EnvironmentVarGuard() as env: + env.unset('PYTHONCASEOK') + self.caseok_env_changed(should_exist=False) + spec = self.find_spec() + self.assertIsNone(spec) + + @unittest.skipIf(sys.flags.ignore_environment, 'ignore_environment flag was set') + def test_case_insensitivity(self): + with os_helper.EnvironmentVarGuard() as env: + env.set('PYTHONCASEOK', '1') + self.caseok_env_changed(should_exist=True) + spec = self.find_spec() + self.assertTrue(spec) + + +(Frozen_ExtensionCaseSensitivity, + Source_ExtensionCaseSensitivity + ) = util.test_both(ExtensionModuleCaseSensitivityTest, importlib=importlib, + machinery=machinery) + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/extension/test_finder.py b/stdlib/test/test_importlib/extension/test_finder.py new file mode 100644 index 000000000..cdc8884d6 --- /dev/null +++ b/stdlib/test/test_importlib/extension/test_finder.py @@ -0,0 +1,69 @@ +from test.support import is_apple_mobile +from test.test_importlib import abc, util + +machinery = util.import_importlib('importlib.machinery') + +import unittest +import sys + + +class FinderTests(abc.FinderTests): + + """Test the finder for extension modules.""" + + def setUp(self): + if not self.machinery.EXTENSION_SUFFIXES or not util.EXTENSIONS: + raise unittest.SkipTest("Requires dynamic loading support.") + if util.EXTENSIONS.name in sys.builtin_module_names: + raise unittest.SkipTest( + f"{util.EXTENSIONS.name} is a builtin module" + ) + + def find_spec(self, fullname): + if is_apple_mobile: + # Apple mobile platforms require a specialist loader that uses + # .fwork files as placeholders for the true `.so` files. + loaders = [ + ( + self.machinery.AppleFrameworkLoader, + [ + ext.replace(".so", ".fwork") + for ext in self.machinery.EXTENSION_SUFFIXES + ] + ) + ] + else: + loaders = [ + ( + self.machinery.ExtensionFileLoader, + self.machinery.EXTENSION_SUFFIXES + ) + ] + + importer = self.machinery.FileFinder(util.EXTENSIONS.path, *loaders) + + return importer.find_spec(fullname) + + def test_module(self): + self.assertTrue(self.find_spec(util.EXTENSIONS.name)) + + # No extension module as an __init__ available for testing. + test_package = test_package_in_package = None + + # No extension module in a package available for testing. + test_module_in_package = None + + # Extension modules cannot be an __init__ for a package. + test_package_over_module = None + + def test_failure(self): + self.assertIsNone(self.find_spec('asdfjkl;')) + + +(Frozen_FinderTests, + Source_FinderTests + ) = util.test_both(FinderTests, machinery=machinery) + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/extension/test_loader.py b/stdlib/test/test_importlib/extension/test_loader.py new file mode 100644 index 000000000..0dd21e079 --- /dev/null +++ b/stdlib/test/test_importlib/extension/test_loader.py @@ -0,0 +1,392 @@ +from test.support import is_apple_mobile +from test.test_importlib import abc, util + +machinery = util.import_importlib('importlib.machinery') + +import os.path +import sys +import types +import unittest +import warnings +import importlib.util +import importlib +from test import support +from test.support import MISSING_C_DOCSTRINGS, script_helper + + +class LoaderTests: + + """Test ExtensionFileLoader.""" + + def setUp(self): + if not self.machinery.EXTENSION_SUFFIXES or not util.EXTENSIONS: + raise unittest.SkipTest("Requires dynamic loading support.") + if util.EXTENSIONS.name in sys.builtin_module_names: + raise unittest.SkipTest( + f"{util.EXTENSIONS.name} is a builtin module" + ) + + # Apple extensions must be distributed as frameworks. This requires + # a specialist loader. + if is_apple_mobile: + self.LoaderClass = self.machinery.AppleFrameworkLoader + else: + self.LoaderClass = self.machinery.ExtensionFileLoader + + self.loader = self.LoaderClass(util.EXTENSIONS.name, util.EXTENSIONS.file_path) + + def load_module(self, fullname): + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + return self.loader.load_module(fullname) + + def test_equality(self): + other = self.LoaderClass(util.EXTENSIONS.name, util.EXTENSIONS.file_path) + self.assertEqual(self.loader, other) + + def test_inequality(self): + other = self.LoaderClass('_' + util.EXTENSIONS.name, util.EXTENSIONS.file_path) + self.assertNotEqual(self.loader, other) + + def test_load_module_API(self): + # Test the default argument for load_module(). + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + self.loader.load_module() + self.loader.load_module(None) + with self.assertRaises(ImportError): + self.load_module('XXX') + + def test_module(self): + with util.uncache(util.EXTENSIONS.name): + module = self.load_module(util.EXTENSIONS.name) + for attr, value in [('__name__', util.EXTENSIONS.name), + ('__file__', util.EXTENSIONS.file_path), + ('__package__', '')]: + self.assertEqual(getattr(module, attr), value) + self.assertIn(util.EXTENSIONS.name, sys.modules) + self.assertIsInstance(module.__loader__, self.LoaderClass) + + # No extension module as __init__ available for testing. + test_package = None + + # No extension module in a package available for testing. + test_lacking_parent = None + + # No easy way to trigger a failure after a successful import. + test_state_after_failure = None + + def test_unloadable(self): + name = 'asdfjkl;' + with self.assertRaises(ImportError) as cm: + self.load_module(name) + self.assertEqual(cm.exception.name, name) + + def test_module_reuse(self): + with util.uncache(util.EXTENSIONS.name): + module1 = self.load_module(util.EXTENSIONS.name) + module2 = self.load_module(util.EXTENSIONS.name) + self.assertIs(module1, module2) + + def test_is_package(self): + self.assertFalse(self.loader.is_package(util.EXTENSIONS.name)) + for suffix in self.machinery.EXTENSION_SUFFIXES: + path = os.path.join('some', 'path', 'pkg', '__init__' + suffix) + loader = self.LoaderClass('pkg', path) + self.assertTrue(loader.is_package('pkg')) + + +(Frozen_LoaderTests, + Source_LoaderTests + ) = util.test_both(LoaderTests, machinery=machinery) + + +class SinglePhaseExtensionModuleTests(abc.LoaderTests): + # Test loading extension modules without multi-phase initialization. + + def setUp(self): + if not self.machinery.EXTENSION_SUFFIXES or not util.EXTENSIONS: + raise unittest.SkipTest("Requires dynamic loading support.") + + # Apple extensions must be distributed as frameworks. This requires + # a specialist loader. + if is_apple_mobile: + self.LoaderClass = self.machinery.AppleFrameworkLoader + else: + self.LoaderClass = self.machinery.ExtensionFileLoader + + self.name = '_testsinglephase' + if self.name in sys.builtin_module_names: + raise unittest.SkipTest( + f"{self.name} is a builtin module" + ) + finder = self.machinery.FileFinder(None) + self.spec = importlib.util.find_spec(self.name) + assert self.spec + + self.loader = self.LoaderClass(self.name, self.spec.origin) + + def load_module(self): + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + return self.loader.load_module(self.name) + + def load_module_by_name(self, fullname): + # Load a module from the test extension by name. + origin = self.spec.origin + loader = self.LoaderClass(fullname, origin) + spec = importlib.util.spec_from_loader(fullname, loader) + module = importlib.util.module_from_spec(spec) + loader.exec_module(module) + return module + + def test_module(self): + # Test loading an extension module. + with util.uncache(self.name): + module = self.load_module() + for attr, value in [('__name__', self.name), + ('__file__', self.spec.origin), + ('__package__', '')]: + self.assertEqual(getattr(module, attr), value) + with self.assertRaises(AttributeError): + module.__path__ + self.assertIs(module, sys.modules[self.name]) + self.assertIsInstance(module.__loader__, self.LoaderClass) + + # No extension module as __init__ available for testing. + test_package = None + + # No extension module in a package available for testing. + test_lacking_parent = None + + # No easy way to trigger a failure after a successful import. + test_state_after_failure = None + + def test_unloadable(self): + name = 'asdfjkl;' + with self.assertRaises(ImportError) as cm: + self.load_module_by_name(name) + self.assertEqual(cm.exception.name, name) + + def test_unloadable_nonascii(self): + # Test behavior with nonexistent module with non-ASCII name. + name = 'fo\xf3' + with self.assertRaises(ImportError) as cm: + self.load_module_by_name(name) + self.assertEqual(cm.exception.name, name) + + # It may make sense to add the equivalent to + # the following MultiPhaseExtensionModuleTests tests: + # + # * test_nonmodule + # * test_nonmodule_with_methods + # * test_bad_modules + # * test_nonascii + + +(Frozen_SinglePhaseExtensionModuleTests, + Source_SinglePhaseExtensionModuleTests + ) = util.test_both(SinglePhaseExtensionModuleTests, machinery=machinery) + + +class MultiPhaseExtensionModuleTests(abc.LoaderTests): + # Test loading extension modules with multi-phase initialization (PEP 489). + + def setUp(self): + if not self.machinery.EXTENSION_SUFFIXES or not util.EXTENSIONS: + raise unittest.SkipTest("Requires dynamic loading support.") + + # Apple extensions must be distributed as frameworks. This requires + # a specialist loader. + if is_apple_mobile: + self.LoaderClass = self.machinery.AppleFrameworkLoader + else: + self.LoaderClass = self.machinery.ExtensionFileLoader + + self.name = '_testmultiphase' + if self.name in sys.builtin_module_names: + raise unittest.SkipTest( + f"{self.name} is a builtin module" + ) + finder = self.machinery.FileFinder(None) + self.spec = importlib.util.find_spec(self.name) + assert self.spec + self.loader = self.LoaderClass(self.name, self.spec.origin) + + def load_module(self): + # Load the module from the test extension. + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + return self.loader.load_module(self.name) + + def load_module_by_name(self, fullname): + # Load a module from the test extension by name. + origin = self.spec.origin + loader = self.LoaderClass(fullname, origin) + spec = importlib.util.spec_from_loader(fullname, loader) + module = importlib.util.module_from_spec(spec) + loader.exec_module(module) + return module + + # No extension module as __init__ available for testing. + test_package = None + + # No extension module in a package available for testing. + test_lacking_parent = None + + # Handling failure on reload is the up to the module. + test_state_after_failure = None + + def test_module(self): + # Test loading an extension module. + with util.uncache(self.name): + module = self.load_module() + for attr, value in [('__name__', self.name), + ('__file__', self.spec.origin), + ('__package__', '')]: + self.assertEqual(getattr(module, attr), value) + with self.assertRaises(AttributeError): + module.__path__ + self.assertIs(module, sys.modules[self.name]) + self.assertIsInstance(module.__loader__, self.LoaderClass) + + def test_functionality(self): + # Test basic functionality of stuff defined in an extension module. + with util.uncache(self.name): + module = self.load_module() + self.assertIsInstance(module, types.ModuleType) + ex = module.Example() + self.assertEqual(ex.demo('abcd'), 'abcd') + self.assertEqual(ex.demo(), None) + with self.assertRaises(AttributeError): + ex.abc + ex.abc = 0 + self.assertEqual(ex.abc, 0) + self.assertEqual(module.foo(9, 9), 18) + self.assertIsInstance(module.Str(), str) + self.assertEqual(module.Str(1) + '23', '123') + with self.assertRaises(module.error): + raise module.error() + self.assertEqual(module.int_const, 1969) + self.assertEqual(module.str_const, 'something different') + + def test_reload(self): + # Test that reload didn't re-set the module's attributes. + with util.uncache(self.name): + module = self.load_module() + ex_class = module.Example + importlib.reload(module) + self.assertIs(ex_class, module.Example) + + def test_try_registration(self): + # Assert that the PyState_{Find,Add,Remove}Module C API doesn't work. + with util.uncache(self.name): + module = self.load_module() + with self.subTest('PyState_FindModule'): + self.assertEqual(module.call_state_registration_func(0), None) + with self.subTest('PyState_AddModule'): + with self.assertRaises(SystemError): + module.call_state_registration_func(1) + with self.subTest('PyState_RemoveModule'): + with self.assertRaises(SystemError): + module.call_state_registration_func(2) + + def test_load_submodule(self): + # Test loading a simulated submodule. + module = self.load_module_by_name('pkg.' + self.name) + self.assertIsInstance(module, types.ModuleType) + self.assertEqual(module.__name__, 'pkg.' + self.name) + self.assertEqual(module.str_const, 'something different') + + def test_load_short_name(self): + # Test loading module with a one-character name. + module = self.load_module_by_name('x') + self.assertIsInstance(module, types.ModuleType) + self.assertEqual(module.__name__, 'x') + self.assertEqual(module.str_const, 'something different') + self.assertNotIn('x', sys.modules) + + def test_load_twice(self): + # Test that 2 loads result in 2 module objects. + module1 = self.load_module_by_name(self.name) + module2 = self.load_module_by_name(self.name) + self.assertIsNot(module1, module2) + + def test_unloadable(self): + # Test nonexistent module. + name = 'asdfjkl;' + with self.assertRaises(ImportError) as cm: + self.load_module_by_name(name) + self.assertEqual(cm.exception.name, name) + + def test_unloadable_nonascii(self): + # Test behavior with nonexistent module with non-ASCII name. + name = 'fo\xf3' + with self.assertRaises(ImportError) as cm: + self.load_module_by_name(name) + self.assertEqual(cm.exception.name, name) + + def test_bad_modules(self): + # Test SystemError is raised for misbehaving extensions. + for name_base in [ + 'bad_slot_large', + 'bad_slot_negative', + 'create_int_with_state', + 'negative_size', + 'export_null', + 'export_uninitialized', + 'export_raise', + 'export_unreported_exception', + 'create_null', + 'create_raise', + 'create_unreported_exception', + 'nonmodule_with_exec_slots', + 'exec_err', + 'exec_raise', + 'exec_unreported_exception', + 'multiple_create_slots', + 'multiple_multiple_interpreters_slots', + ]: + with self.subTest(name_base): + name = self.name + '_' + name_base + with self.assertRaises(SystemError) as cm: + self.load_module_by_name(name) + + # If there is an unreported exception, it should be chained + # with the `SystemError`. + if "unreported_exception" in name_base: + self.assertIsNotNone(cm.exception.__cause__) + + def test_nonascii(self): + # Test that modules with non-ASCII names can be loaded. + # punycode behaves slightly differently in some-ASCII and no-ASCII + # cases, so test both. + cases = [ + (self.name + '_zkou\u0161ka_na\u010dten\xed', 'Czech'), + ('\uff3f\u30a4\u30f3\u30dd\u30fc\u30c8\u30c6\u30b9\u30c8', + 'Japanese'), + ] + for name, lang in cases: + with self.subTest(name): + module = self.load_module_by_name(name) + self.assertEqual(module.__name__, name) + if not MISSING_C_DOCSTRINGS: + self.assertEqual(module.__doc__, "Module named in %s" % lang) + + +(Frozen_MultiPhaseExtensionModuleTests, + Source_MultiPhaseExtensionModuleTests + ) = util.test_both(MultiPhaseExtensionModuleTests, machinery=machinery) + + +class NonModuleExtensionTests(unittest.TestCase): + def test_nonmodule_cases(self): + # The test cases in this file cause the GIL to be enabled permanently + # in free-threaded builds, so they are run in a subprocess to isolate + # this effect. + script = support.findfile("test_importlib/extension/_test_nonmodule_cases.py") + script_helper.run_test_script(script) + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/extension/test_path_hook.py b/stdlib/test/test_importlib/extension/test_path_hook.py new file mode 100644 index 000000000..941dcd543 --- /dev/null +++ b/stdlib/test/test_importlib/extension/test_path_hook.py @@ -0,0 +1,33 @@ +from test.test_importlib import util + +machinery = util.import_importlib('importlib.machinery') + +import unittest + + +@unittest.skipIf(util.EXTENSIONS is None or util.EXTENSIONS.filename is None, + 'dynamic loading not supported or test module not available') +class PathHookTests: + + """Test the path hook for extension modules.""" + # XXX Should it only succeed for pre-existing directories? + # XXX Should it only work for directories containing an extension module? + + def hook(self, entry): + return self.machinery.FileFinder.path_hook( + (self.machinery.ExtensionFileLoader, + self.machinery.EXTENSION_SUFFIXES))(entry) + + def test_success(self): + # Path hook should handle a directory where a known extension module + # exists. + self.assertHasAttr(self.hook(util.EXTENSIONS.path), 'find_spec') + + +(Frozen_PathHooksTests, + Source_PathHooksTests + ) = util.test_both(PathHookTests, machinery=machinery) + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/frozen/__init__.py b/stdlib/test/test_importlib/frozen/__init__.py new file mode 100644 index 000000000..4b16ecc31 --- /dev/null +++ b/stdlib/test/test_importlib/frozen/__init__.py @@ -0,0 +1,5 @@ +import os +from test.support import load_package_tests + +def load_tests(*args): + return load_package_tests(os.path.dirname(__file__), *args) diff --git a/stdlib/test/test_importlib/frozen/__main__.py b/stdlib/test/test_importlib/frozen/__main__.py new file mode 100644 index 000000000..40a23a297 --- /dev/null +++ b/stdlib/test/test_importlib/frozen/__main__.py @@ -0,0 +1,4 @@ +from . import load_tests +import unittest + +unittest.main() diff --git a/stdlib/test/test_importlib/frozen/test_finder.py b/stdlib/test/test_importlib/frozen/test_finder.py new file mode 100644 index 000000000..971cc28b6 --- /dev/null +++ b/stdlib/test/test_importlib/frozen/test_finder.py @@ -0,0 +1,183 @@ +from test.test_importlib import abc, util + +machinery = util.import_importlib('importlib.machinery') + +import os.path +import unittest + +from test.support import import_helper, REPO_ROOT, STDLIB_DIR + + +def resolve_stdlib_file(name, ispkg=False): + assert name + if ispkg: + return os.path.join(STDLIB_DIR, *name.split('.'), '__init__.py') + else: + return os.path.join(STDLIB_DIR, *name.split('.')) + '.py' + + +class FindSpecTests(abc.FinderTests): + + """Test finding frozen modules.""" + + def find(self, name, **kwargs): + finder = self.machinery.FrozenImporter + with import_helper.frozen_modules(): + return finder.find_spec(name, **kwargs) + + def check_basic(self, spec, name, ispkg=False): + self.assertEqual(spec.name, name) + self.assertIs(spec.loader, self.machinery.FrozenImporter) + self.assertEqual(spec.origin, 'frozen') + self.assertFalse(spec.has_location) + if ispkg: + self.assertIsNotNone(spec.submodule_search_locations) + else: + self.assertIsNone(spec.submodule_search_locations) + self.assertIsNotNone(spec.loader_state) + + def check_loader_state(self, spec, origname=None, filename=None): + if not filename: + if not origname: + origname = spec.name + filename = resolve_stdlib_file(origname) + + actual = dict(vars(spec.loader_state)) + + # Check the rest of spec.loader_state. + expected = dict( + origname=origname, + filename=filename if origname else None, + ) + self.assertDictEqual(actual, expected) + + def check_search_locations(self, spec): + """This is only called when testing packages.""" + missing = object() + filename = getattr(spec.loader_state, 'filename', missing) + origname = getattr(spec.loader_state, 'origname', None) + if not origname or filename is missing: + # We deal with this in check_loader_state(). + return + if not filename: + expected = [] + elif origname != spec.name and not origname.startswith('<'): + expected = [] + else: + expected = [os.path.dirname(filename)] + self.assertListEqual(spec.submodule_search_locations, expected) + + def test_module(self): + modules = [ + '__hello__', + '__phello__.spam', + '__phello__.ham.eggs', + ] + for name in modules: + with self.subTest(f'{name} -> {name}'): + spec = self.find(name) + self.check_basic(spec, name) + self.check_loader_state(spec) + modules = { + '__hello_alias__': '__hello__', + '_frozen_importlib': 'importlib._bootstrap', + } + for name, origname in modules.items(): + with self.subTest(f'{name} -> {origname}'): + spec = self.find(name) + self.check_basic(spec, name) + self.check_loader_state(spec, origname) + modules = [ + '__phello__.__init__', + '__phello__.ham.__init__', + ] + for name in modules: + origname = '<' + name.rpartition('.')[0] + filename = resolve_stdlib_file(name) + with self.subTest(f'{name} -> {origname}'): + spec = self.find(name) + self.check_basic(spec, name) + self.check_loader_state(spec, origname, filename) + modules = { + '__hello_only__': ('Tools', 'freeze', 'flag.py'), + } + for name, path in modules.items(): + origname = None + filename = os.path.join(REPO_ROOT, *path) + with self.subTest(f'{name} -> {filename}'): + spec = self.find(name) + self.check_basic(spec, name) + self.check_loader_state(spec, origname, filename) + + def test_package(self): + packages = [ + '__phello__', + '__phello__.ham', + ] + for name in packages: + filename = resolve_stdlib_file(name, ispkg=True) + with self.subTest(f'{name} -> {name}'): + spec = self.find(name) + self.check_basic(spec, name, ispkg=True) + self.check_loader_state(spec, name, filename) + self.check_search_locations(spec) + packages = { + '__phello_alias__': '__hello__', + } + for name, origname in packages.items(): + filename = resolve_stdlib_file(origname, ispkg=False) + with self.subTest(f'{name} -> {origname}'): + spec = self.find(name) + self.check_basic(spec, name, ispkg=True) + self.check_loader_state(spec, origname, filename) + self.check_search_locations(spec) + + # These are covered by test_module() and test_package(). + test_module_in_package = None + test_package_in_package = None + + # No easy way to test. + test_package_over_module = None + + def test_path_ignored(self): + for name in ('__hello__', '__phello__', '__phello__.spam'): + actual = self.find(name) + for path in (None, object(), '', 'eggs', [], [''], ['eggs']): + with self.subTest((name, path)): + spec = self.find(name, path=path) + self.assertEqual(spec, actual) + + def test_target_ignored(self): + imported = ('__hello__', '__phello__') + with import_helper.CleanImport(*imported, usefrozen=True): + import __hello__ as match + import __phello__ as nonmatch + name = '__hello__' + actual = self.find(name) + for target in (None, match, nonmatch, object(), 'not-a-module-object'): + with self.subTest(target): + spec = self.find(name, target=target) + self.assertEqual(spec, actual) + + def test_failure(self): + spec = self.find('') + self.assertIsNone(spec) + + def test_not_using_frozen(self): + finder = self.machinery.FrozenImporter + with import_helper.frozen_modules(enabled=False): + # both frozen and not frozen + spec1 = finder.find_spec('__hello__') + # only frozen + spec2 = finder.find_spec('__hello_only__') + self.assertIsNone(spec1) + self.assertIsNone(spec2) + + +(Frozen_FindSpecTests, + Source_FindSpecTests + ) = util.test_both(FindSpecTests, machinery=machinery) + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/frozen/test_loader.py b/stdlib/test/test_importlib/frozen/test_loader.py new file mode 100644 index 000000000..c808bb732 --- /dev/null +++ b/stdlib/test/test_importlib/frozen/test_loader.py @@ -0,0 +1,172 @@ +from test.test_importlib import abc, util + +machinery = util.import_importlib('importlib.machinery') + +from test.support import captured_stdout, import_helper, STDLIB_DIR +import contextlib +import os.path +import types +import unittest +import warnings + + +@contextlib.contextmanager +def deprecated(): + with warnings.catch_warnings(): + warnings.simplefilter('ignore', DeprecationWarning) + yield + + +@contextlib.contextmanager +def fresh(name, *, oldapi=False): + with util.uncache(name): + with import_helper.frozen_modules(): + if oldapi: + with deprecated(): + yield + else: + yield + + +def resolve_stdlib_file(name, ispkg=False): + assert name + if ispkg: + return os.path.join(STDLIB_DIR, *name.split('.'), '__init__.py') + else: + return os.path.join(STDLIB_DIR, *name.split('.')) + '.py' + + +class ExecModuleTests(abc.LoaderTests): + + def exec_module(self, name, origname=None): + with import_helper.frozen_modules(): + is_package = self.machinery.FrozenImporter.is_package(name) + spec = self.machinery.ModuleSpec( + name, + self.machinery.FrozenImporter, + origin='frozen', + is_package=is_package, + loader_state=types.SimpleNamespace( + origname=origname or name, + filename=resolve_stdlib_file(origname or name, is_package), + ), + ) + module = types.ModuleType(name) + module.__spec__ = spec + assert not hasattr(module, 'initialized') + + with fresh(name): + self.machinery.FrozenImporter.exec_module(module) + with captured_stdout() as stdout: + module.main() + + self.assertTrue(module.initialized) + self.assertHasAttr(module, '__spec__') + self.assertEqual(module.__spec__.origin, 'frozen') + return module, stdout.getvalue() + + def test_module(self): + name = '__hello__' + module, output = self.exec_module(name) + check = {'__name__': name} + for attr, value in check.items(): + self.assertEqual(getattr(module, attr), value) + self.assertEqual(output, 'Hello world!\n') + self.assertHasAttr(module, '__spec__') + self.assertEqual(module.__spec__.loader_state.origname, name) + + def test_package(self): + name = '__phello__' + module, output = self.exec_module(name) + check = {'__name__': name} + for attr, value in check.items(): + attr_value = getattr(module, attr) + self.assertEqual(attr_value, value, + 'for {name}.{attr}, {given!r} != {expected!r}'.format( + name=name, attr=attr, given=attr_value, + expected=value)) + self.assertEqual(output, 'Hello world!\n') + self.assertEqual(module.__spec__.loader_state.origname, name) + + def test_lacking_parent(self): + name = '__phello__.spam' + with util.uncache('__phello__'): + module, output = self.exec_module(name) + check = {'__name__': name} + for attr, value in check.items(): + attr_value = getattr(module, attr) + self.assertEqual(attr_value, value, + 'for {name}.{attr}, {given} != {expected!r}'.format( + name=name, attr=attr, given=attr_value, + expected=value)) + self.assertEqual(output, 'Hello world!\n') + + def test_module_repr_indirect_through_spec(self): + name = '__hello__' + module, output = self.exec_module(name) + self.assertEqual(repr(module), + "") + + # No way to trigger an error in a frozen module. + test_state_after_failure = None + + def test_unloadable(self): + with import_helper.frozen_modules(): + assert self.machinery.FrozenImporter.find_spec('_not_real') is None + with self.assertRaises(ImportError) as cm: + self.exec_module('_not_real') + self.assertEqual(cm.exception.name, '_not_real') + + +(Frozen_ExecModuleTests, + Source_ExecModuleTests + ) = util.test_both(ExecModuleTests, machinery=machinery) + + +class InspectLoaderTests: + + """Tests for the InspectLoader methods for FrozenImporter.""" + + def test_get_code(self): + # Make sure that the code object is good. + name = '__hello__' + with import_helper.frozen_modules(): + code = self.machinery.FrozenImporter.get_code(name) + mod = types.ModuleType(name) + exec(code, mod.__dict__) + with captured_stdout() as stdout: + mod.main() + self.assertHasAttr(mod, 'initialized') + self.assertEqual(stdout.getvalue(), 'Hello world!\n') + + def test_get_source(self): + # Should always return None. + with import_helper.frozen_modules(): + result = self.machinery.FrozenImporter.get_source('__hello__') + self.assertIsNone(result) + + def test_is_package(self): + # Should be able to tell what is a package. + test_for = (('__hello__', False), ('__phello__', True), + ('__phello__.spam', False)) + for name, is_package in test_for: + with import_helper.frozen_modules(): + result = self.machinery.FrozenImporter.is_package(name) + self.assertEqual(bool(result), is_package) + + def test_failure(self): + # Raise ImportError for modules that are not frozen. + for meth_name in ('get_code', 'get_source', 'is_package'): + method = getattr(self.machinery.FrozenImporter, meth_name) + with self.assertRaises(ImportError) as cm: + with import_helper.frozen_modules(): + method('importlib') + self.assertEqual(cm.exception.name, 'importlib') + +(Frozen_ILTests, + Source_ILTests + ) = util.test_both(InspectLoaderTests, machinery=machinery) + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/import_/__init__.py b/stdlib/test/test_importlib/import_/__init__.py new file mode 100644 index 000000000..4b16ecc31 --- /dev/null +++ b/stdlib/test/test_importlib/import_/__init__.py @@ -0,0 +1,5 @@ +import os +from test.support import load_package_tests + +def load_tests(*args): + return load_package_tests(os.path.dirname(__file__), *args) diff --git a/stdlib/test/test_importlib/import_/__main__.py b/stdlib/test/test_importlib/import_/__main__.py new file mode 100644 index 000000000..40a23a297 --- /dev/null +++ b/stdlib/test/test_importlib/import_/__main__.py @@ -0,0 +1,4 @@ +from . import load_tests +import unittest + +unittest.main() diff --git a/stdlib/test/test_importlib/import_/test___loader__.py b/stdlib/test/test_importlib/import_/test___loader__.py new file mode 100644 index 000000000..858b37eff --- /dev/null +++ b/stdlib/test/test_importlib/import_/test___loader__.py @@ -0,0 +1,34 @@ +from importlib import machinery +import unittest + +from test.test_importlib import util + + +class SpecLoaderMock: + + def find_spec(self, fullname, path=None, target=None): + return machinery.ModuleSpec(fullname, self) + + def create_module(self, spec): + return None + + def exec_module(self, module): + pass + + +class SpecLoaderAttributeTests: + + def test___loader__(self): + loader = SpecLoaderMock() + with util.uncache('blah'), util.import_state(meta_path=[loader]): + module = self.__import__('blah') + self.assertEqual(loader, module.__loader__) + + +(Frozen_SpecTests, + Source_SpecTests + ) = util.test_both(SpecLoaderAttributeTests, __import__=util.__import__) + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/import_/test___package__.py b/stdlib/test/test_importlib/import_/test___package__.py new file mode 100644 index 000000000..7130c99a6 --- /dev/null +++ b/stdlib/test/test_importlib/import_/test___package__.py @@ -0,0 +1,152 @@ +"""PEP 366 ("Main module explicit relative imports") specifies the +semantics for the __package__ attribute on modules. This attribute is +used, when available, to detect which package a module belongs to (instead +of using the typical __path__/__name__ test). + +""" +import unittest +import warnings +from test.test_importlib import util + + +class Using__package__: + + """Use of __package__ supersedes the use of __name__/__path__ to calculate + what package a module belongs to. The basic algorithm is [__package__]:: + + def resolve_name(name, package, level): + level -= 1 + base = package.rsplit('.', level)[0] + return '{0}.{1}'.format(base, name) + + But since there is no guarantee that __package__ has been set (or not been + set to None [None]), there has to be a way to calculate the attribute's value + [__name__]:: + + def calc_package(caller_name, has___path__): + if has__path__: + return caller_name + else: + return caller_name.rsplit('.', 1)[0] + + Then the normal algorithm for relative name imports can proceed as if + __package__ had been set. + + """ + + def import_module(self, globals_): + with self.mock_modules('pkg.__init__', 'pkg.fake') as importer: + with util.import_state(meta_path=[importer]): + self.__import__('pkg.fake') + module = self.__import__('', + globals=globals_, + fromlist=['attr'], level=2) + return module + + def test_using___package__(self): + # [__package__] + module = self.import_module({'__package__': 'pkg.fake'}) + self.assertEqual(module.__name__, 'pkg') + + def test_using___name__(self): + # [__name__] + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + module = self.import_module({'__name__': 'pkg.fake', + '__path__': []}) + self.assertEqual(module.__name__, 'pkg') + + def test_warn_when_using___name__(self): + with self.assertWarns(ImportWarning): + self.import_module({'__name__': 'pkg.fake', '__path__': []}) + + def test_None_as___package__(self): + # [None] + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + module = self.import_module({ + '__name__': 'pkg.fake', '__path__': [], '__package__': None }) + self.assertEqual(module.__name__, 'pkg') + + def test_spec_fallback(self): + # If __package__ isn't defined, fall back on __spec__.parent. + module = self.import_module({'__spec__': FakeSpec('pkg.fake')}) + self.assertEqual(module.__name__, 'pkg') + + def test_warn_when_package_and_spec_disagree(self): + # Raise a DeprecationWarning if __package__ != __spec__.parent. + with self.assertWarns(DeprecationWarning): + self.import_module({'__package__': 'pkg.fake', + '__spec__': FakeSpec('pkg.fakefake')}) + + def test_bad__package__(self): + globals = {'__package__': ''} + with self.assertRaises(ModuleNotFoundError): + self.__import__('', globals, {}, ['relimport'], 1) + + def test_bunk__package__(self): + globals = {'__package__': 42} + with self.assertRaises(TypeError): + self.__import__('', globals, {}, ['relimport'], 1) + + +class FakeSpec: + def __init__(self, parent): + self.parent = parent + + +class Using__package__PEP451(Using__package__): + mock_modules = util.mock_spec + + +(Frozen_UsingPackagePEP451, + Source_UsingPackagePEP451 + ) = util.test_both(Using__package__PEP451, __import__=util.__import__) + + +class Setting__package__: + + """Because __package__ is a new feature, it is not always set by a loader. + Import will set it as needed to help with the transition to relying on + __package__. + + For a top-level module, __package__ is set to None [top-level]. For a + package __name__ is used for __package__ [package]. For submodules the + value is __name__.rsplit('.', 1)[0] [submodule]. + + """ + + __import__ = util.__import__['Source'] + + # [top-level] + def test_top_level(self): + with self.mock_modules('top_level') as mock: + with util.import_state(meta_path=[mock]): + del mock['top_level'].__package__ + module = self.__import__('top_level') + self.assertEqual(module.__package__, '') + + # [package] + def test_package(self): + with self.mock_modules('pkg.__init__') as mock: + with util.import_state(meta_path=[mock]): + del mock['pkg'].__package__ + module = self.__import__('pkg') + self.assertEqual(module.__package__, 'pkg') + + # [submodule] + def test_submodule(self): + with self.mock_modules('pkg.__init__', 'pkg.mod') as mock: + with util.import_state(meta_path=[mock]): + del mock['pkg.mod'].__package__ + pkg = self.__import__('pkg.mod') + module = getattr(pkg, 'mod') + self.assertEqual(module.__package__, 'pkg') + + +class Setting__package__PEP451(Setting__package__, unittest.TestCase): + mock_modules = util.mock_spec + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/import_/test_api.py b/stdlib/test/test_importlib/import_/test_api.py new file mode 100644 index 000000000..d6ad590b3 --- /dev/null +++ b/stdlib/test/test_importlib/import_/test_api.py @@ -0,0 +1,145 @@ +from test.test_importlib import util + +from importlib import machinery +import sys +import types +import unittest +import warnings + +PKG_NAME = 'fine' +SUBMOD_NAME = 'fine.bogus' + + +class BadSpecFinderLoader: + @classmethod + def find_spec(cls, fullname, path=None, target=None): + if fullname == SUBMOD_NAME: + spec = machinery.ModuleSpec(fullname, cls) + return spec + + @staticmethod + def create_module(spec): + return None + + @staticmethod + def exec_module(module): + if module.__name__ == SUBMOD_NAME: + raise ImportError('I cannot be loaded!') + + +class BadLoaderFinder: + @classmethod + def load_module(cls, fullname): + if fullname == SUBMOD_NAME: + raise ImportError('I cannot be loaded!') + + +class APITest: + + """Test API-specific details for __import__ (e.g. raising the right + exception when passing in an int for the module name).""" + + def test_raises_ModuleNotFoundError(self): + with self.assertRaises(ModuleNotFoundError): + util.import_importlib('some module that does not exist') + + def test_name_requires_rparition(self): + # Raise TypeError if a non-string is passed in for the module name. + with self.assertRaises(TypeError): + self.__import__(42) + + def test_negative_level(self): + # Raise ValueError when a negative level is specified. + # PEP 328 did away with sys.module None entries and the ambiguity of + # absolute/relative imports. + with self.assertRaises(ValueError): + self.__import__('os', globals(), level=-1) + + def test_nonexistent_fromlist_entry(self): + # If something in fromlist doesn't exist, that's okay. + # issue15715 + mod = types.ModuleType(PKG_NAME) + mod.__path__ = ['XXX'] + with util.import_state(meta_path=[self.bad_finder_loader]): + with util.uncache(PKG_NAME): + sys.modules[PKG_NAME] = mod + self.__import__(PKG_NAME, fromlist=['not here']) + + def test_fromlist_load_error_propagates(self): + # If something in fromlist triggers an exception not related to not + # existing, let that exception propagate. + # issue15316 + mod = types.ModuleType(PKG_NAME) + mod.__path__ = ['XXX'] + with util.import_state(meta_path=[self.bad_finder_loader]): + with util.uncache(PKG_NAME): + sys.modules[PKG_NAME] = mod + with self.assertRaises(ImportError): + self.__import__(PKG_NAME, + fromlist=[SUBMOD_NAME.rpartition('.')[-1]]) + + def test_blocked_fromlist(self): + # If fromlist entry is None, let a ModuleNotFoundError propagate. + # issue31642 + mod = types.ModuleType(PKG_NAME) + mod.__path__ = [] + with util.import_state(meta_path=[self.bad_finder_loader]): + with util.uncache(PKG_NAME, SUBMOD_NAME): + sys.modules[PKG_NAME] = mod + sys.modules[SUBMOD_NAME] = None + with self.assertRaises(ModuleNotFoundError) as cm: + self.__import__(PKG_NAME, + fromlist=[SUBMOD_NAME.rpartition('.')[-1]]) + self.assertEqual(cm.exception.name, SUBMOD_NAME) + + +class OldAPITests(APITest): + bad_finder_loader = BadLoaderFinder + + def test_raises_ModuleNotFoundError(self): + with warnings.catch_warnings(): + warnings.simplefilter("ignore", ImportWarning) + super().test_raises_ModuleNotFoundError() + + def test_name_requires_rparition(self): + with warnings.catch_warnings(): + warnings.simplefilter("ignore", ImportWarning) + super().test_name_requires_rparition() + + def test_negative_level(self): + with warnings.catch_warnings(): + warnings.simplefilter("ignore", ImportWarning) + super().test_negative_level() + + def test_nonexistent_fromlist_entry(self): + with warnings.catch_warnings(): + warnings.simplefilter("ignore", ImportWarning) + super().test_nonexistent_fromlist_entry() + + def test_fromlist_load_error_propagates(self): + with warnings.catch_warnings(): + warnings.simplefilter("ignore", ImportWarning) + super().test_fromlist_load_error_propagates + + def test_blocked_fromlist(self): + with warnings.catch_warnings(): + warnings.simplefilter("ignore", ImportWarning) + super().test_blocked_fromlist() + + +(Frozen_OldAPITests, + Source_OldAPITests + ) = util.test_both(OldAPITests, __import__=util.__import__) + + +class SpecAPITests(APITest): + bad_finder_loader = BadSpecFinderLoader + + +(Frozen_SpecAPITests, + Source_SpecAPITests + ) = util.test_both(SpecAPITests, __import__=util.__import__) + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/import_/test_caching.py b/stdlib/test/test_importlib/import_/test_caching.py new file mode 100644 index 000000000..718e7d041 --- /dev/null +++ b/stdlib/test/test_importlib/import_/test_caching.py @@ -0,0 +1,97 @@ +"""Test that sys.modules is used properly by import.""" +from test.test_importlib import util +import sys +from types import MethodType +import unittest +import warnings + + +class UseCache: + + """When it comes to sys.modules, import prefers it over anything else. + + Once a name has been resolved, sys.modules is checked to see if it contains + the module desired. If so, then it is returned [use cache]. If it is not + found, then the proper steps are taken to perform the import, but + sys.modules is still used to return the imported module (e.g., not what a + loader returns) [from cache on return]. This also applies to imports of + things contained within a package and thus get assigned as an attribute + [from cache to attribute] or pulled in thanks to a fromlist import + [from cache for fromlist]. But if sys.modules contains None then + ImportError is raised [None in cache]. + + """ + + def test_using_cache(self): + # [use cache] + module_to_use = "some module found!" + with util.uncache('some_module'): + sys.modules['some_module'] = module_to_use + module = self.__import__('some_module') + self.assertEqual(id(module_to_use), id(module)) + + def test_None_in_cache(self): + #[None in cache] + name = 'using_None' + with util.uncache(name): + sys.modules[name] = None + with self.assertRaises(ImportError) as cm: + self.__import__(name) + self.assertEqual(cm.exception.name, name) + + +(Frozen_UseCache, + Source_UseCache + ) = util.test_both(UseCache, __import__=util.__import__) + + +class ImportlibUseCache(UseCache, unittest.TestCase): + + # Pertinent only to PEP 302; exec_module() doesn't return a module. + + __import__ = util.__import__['Source'] + + def create_mock(self, *names, return_=None): + mock = util.mock_spec(*names) + original_spec = mock.find_spec + def find_spec(self, fullname, path, target=None): + return original_spec(fullname) + mock.find_spec = MethodType(find_spec, mock) + return mock + + # __import__ inconsistent between loaders and built-in import when it comes + # to when to use the module in sys.modules and when not to. + def test_using_cache_after_loader(self): + # [from cache on return] + with warnings.catch_warnings(): + warnings.simplefilter("ignore", ImportWarning) + with self.create_mock('module') as mock: + with util.import_state(meta_path=[mock]): + module = self.__import__('module') + self.assertEqual(id(module), id(sys.modules['module'])) + + # See test_using_cache_after_loader() for reasoning. + def test_using_cache_for_assigning_to_attribute(self): + # [from cache to attribute] + with warnings.catch_warnings(): + warnings.simplefilter("ignore", ImportWarning) + with self.create_mock('pkg.__init__', 'pkg.module') as importer: + with util.import_state(meta_path=[importer]): + module = self.__import__('pkg.module') + self.assertHasAttr(module, 'module') + self.assertEqual(id(module.module), + id(sys.modules['pkg.module'])) + + # See test_using_cache_after_loader() for reasoning. + def test_using_cache_for_fromlist(self): + # [from cache for fromlist] + with self.create_mock('pkg.__init__', 'pkg.module') as importer: + with util.import_state(meta_path=[importer]): + module = self.__import__('pkg', fromlist=['module']) + self.assertHasAttr(module, 'module') + self.assertEqual(id(module.module), + id(sys.modules['pkg.module'])) + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/import_/test_fromlist.py b/stdlib/test/test_importlib/import_/test_fromlist.py new file mode 100644 index 000000000..feccc7be0 --- /dev/null +++ b/stdlib/test/test_importlib/import_/test_fromlist.py @@ -0,0 +1,175 @@ +"""Test that the semantics relating to the 'fromlist' argument are correct.""" +from test.test_importlib import util +import warnings +import unittest + + +class ReturnValue: + + """The use of fromlist influences what import returns. + + If direct ``import ...`` statement is used, the root module or package is + returned [import return]. But if fromlist is set, then the specified module + is actually returned (whether it is a relative import or not) + [from return]. + + """ + + def test_return_from_import(self): + # [import return] + with util.mock_spec('pkg.__init__', 'pkg.module') as importer: + with util.import_state(meta_path=[importer]): + module = self.__import__('pkg.module') + self.assertEqual(module.__name__, 'pkg') + + def test_return_from_from_import(self): + # [from return] + with util.mock_spec('pkg.__init__', 'pkg.module')as importer: + with util.import_state(meta_path=[importer]): + module = self.__import__('pkg.module', fromlist=['attr']) + self.assertEqual(module.__name__, 'pkg.module') + + +(Frozen_ReturnValue, + Source_ReturnValue + ) = util.test_both(ReturnValue, __import__=util.__import__) + + +class HandlingFromlist: + + """Using fromlist triggers different actions based on what is being asked + of it. + + If fromlist specifies an object on a module, nothing special happens + [object case]. This is even true if the object does not exist [bad object]. + + If a package is being imported, then what is listed in fromlist may be + treated as a module to be imported [module]. And this extends to what is + contained in __all__ when '*' is imported [using *]. And '*' does not need + to be the only name in the fromlist [using * with others]. + + """ + + def test_object(self): + # [object case] + with util.mock_spec('module') as importer: + with util.import_state(meta_path=[importer]): + module = self.__import__('module', fromlist=['attr']) + self.assertEqual(module.__name__, 'module') + + def test_nonexistent_object(self): + # [bad object] + with util.mock_spec('module') as importer: + with util.import_state(meta_path=[importer]): + module = self.__import__('module', fromlist=['non_existent']) + self.assertEqual(module.__name__, 'module') + self.assertNotHasAttr(module, 'non_existent') + + def test_module_from_package(self): + # [module] + with util.mock_spec('pkg.__init__', 'pkg.module') as importer: + with util.import_state(meta_path=[importer]): + module = self.__import__('pkg', fromlist=['module']) + self.assertEqual(module.__name__, 'pkg') + self.assertHasAttr(module, 'module') + self.assertEqual(module.module.__name__, 'pkg.module') + + def test_nonexistent_from_package(self): + with util.mock_spec('pkg.__init__') as importer: + with util.import_state(meta_path=[importer]): + module = self.__import__('pkg', fromlist=['non_existent']) + self.assertEqual(module.__name__, 'pkg') + self.assertNotHasAttr(module, 'non_existent') + + def test_module_from_package_triggers_ModuleNotFoundError(self): + # If a submodule causes an ModuleNotFoundError because it tries + # to import a module which doesn't exist, that should let the + # ModuleNotFoundError propagate. + def module_code(): + import i_do_not_exist + with util.mock_spec('pkg.__init__', 'pkg.mod', + module_code={'pkg.mod': module_code}) as importer: + with util.import_state(meta_path=[importer]): + with self.assertRaises(ModuleNotFoundError) as exc: + self.__import__('pkg', fromlist=['mod']) + self.assertEqual('i_do_not_exist', exc.exception.name) + + def test_empty_string(self): + with util.mock_spec('pkg.__init__', 'pkg.mod') as importer: + with util.import_state(meta_path=[importer]): + module = self.__import__('pkg.mod', fromlist=['']) + self.assertEqual(module.__name__, 'pkg.mod') + + def basic_star_test(self, fromlist=['*']): + # [using *] + with util.mock_spec('pkg.__init__', 'pkg.module') as mock: + with util.import_state(meta_path=[mock]): + mock['pkg'].__all__ = ['module'] + module = self.__import__('pkg', fromlist=fromlist) + self.assertEqual(module.__name__, 'pkg') + self.assertHasAttr(module, 'module') + self.assertEqual(module.module.__name__, 'pkg.module') + + def test_using_star(self): + # [using *] + self.basic_star_test() + + def test_fromlist_as_tuple(self): + self.basic_star_test(('*',)) + + def test_star_with_others(self): + # [using * with others] + context = util.mock_spec('pkg.__init__', 'pkg.module1', 'pkg.module2') + with context as mock: + with util.import_state(meta_path=[mock]): + mock['pkg'].__all__ = ['module1'] + module = self.__import__('pkg', fromlist=['module2', '*']) + self.assertEqual(module.__name__, 'pkg') + self.assertHasAttr(module, 'module1') + self.assertHasAttr(module, 'module2') + self.assertEqual(module.module1.__name__, 'pkg.module1') + self.assertEqual(module.module2.__name__, 'pkg.module2') + + def test_nonexistent_in_all(self): + with util.mock_spec('pkg.__init__') as importer: + with util.import_state(meta_path=[importer]): + importer['pkg'].__all__ = ['non_existent'] + module = self.__import__('pkg', fromlist=['*']) + self.assertEqual(module.__name__, 'pkg') + self.assertNotHasAttr(module, 'non_existent') + + def test_star_in_all(self): + with util.mock_spec('pkg.__init__') as importer: + with util.import_state(meta_path=[importer]): + importer['pkg'].__all__ = ['*'] + module = self.__import__('pkg', fromlist=['*']) + self.assertEqual(module.__name__, 'pkg') + self.assertNotHasAttr(module, '*') + + def test_invalid_type(self): + with util.mock_spec('pkg.__init__') as importer: + with util.import_state(meta_path=[importer]), \ + warnings.catch_warnings(): + warnings.simplefilter('error', BytesWarning) + with self.assertRaisesRegex(TypeError, r'\bfrom\b'): + self.__import__('pkg', fromlist=[b'attr']) + with self.assertRaisesRegex(TypeError, r'\bfrom\b'): + self.__import__('pkg', fromlist=iter([b'attr'])) + + def test_invalid_type_in_all(self): + with util.mock_spec('pkg.__init__') as importer: + with util.import_state(meta_path=[importer]), \ + warnings.catch_warnings(): + warnings.simplefilter('error', BytesWarning) + importer['pkg'].__all__ = [b'attr'] + with self.assertRaisesRegex(TypeError, r'\bpkg\.__all__\b'): + self.__import__('pkg', fromlist=['*']) + + +(Frozen_FromList, + Source_FromList + ) = util.test_both(HandlingFromlist, __import__=util.__import__) + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/import_/test_helpers.py b/stdlib/test/test_importlib/import_/test_helpers.py new file mode 100644 index 000000000..550f88d1d --- /dev/null +++ b/stdlib/test/test_importlib/import_/test_helpers.py @@ -0,0 +1,184 @@ +"""Tests for helper functions used by import.c .""" + +from importlib import _bootstrap_external, machinery +import os.path +from types import ModuleType, SimpleNamespace +import unittest +import warnings + +from .. import util + + +class FixUpModuleTests: + + def test_no_loader_but_spec(self): + loader = object() + name = "hello" + path = "hello.py" + spec = machinery.ModuleSpec(name, loader) + ns = {"__spec__": spec} + _bootstrap_external._fix_up_module(ns, name, path) + + expected = {"__spec__": spec, "__loader__": loader, "__file__": path, + "__cached__": None} + self.assertEqual(ns, expected) + + def test_no_loader_no_spec_but_sourceless(self): + name = "hello" + path = "hello.py" + ns = {} + _bootstrap_external._fix_up_module(ns, name, path, path) + + expected = {"__file__": path, "__cached__": path} + + for key, val in expected.items(): + with self.subTest(f"{key}: {val}"): + self.assertEqual(ns[key], val) + + spec = ns["__spec__"] + self.assertIsInstance(spec, machinery.ModuleSpec) + self.assertEqual(spec.name, name) + self.assertEqual(spec.origin, os.path.abspath(path)) + self.assertEqual(spec.cached, os.path.abspath(path)) + self.assertIsInstance(spec.loader, machinery.SourcelessFileLoader) + self.assertEqual(spec.loader.name, name) + self.assertEqual(spec.loader.path, path) + self.assertEqual(spec.loader, ns["__loader__"]) + + def test_no_loader_no_spec_but_source(self): + name = "hello" + path = "hello.py" + ns = {} + _bootstrap_external._fix_up_module(ns, name, path) + + expected = {"__file__": path, "__cached__": None} + + for key, val in expected.items(): + with self.subTest(f"{key}: {val}"): + self.assertEqual(ns[key], val) + + spec = ns["__spec__"] + self.assertIsInstance(spec, machinery.ModuleSpec) + self.assertEqual(spec.name, name) + self.assertEqual(spec.origin, os.path.abspath(path)) + self.assertIsInstance(spec.loader, machinery.SourceFileLoader) + self.assertEqual(spec.loader.name, name) + self.assertEqual(spec.loader.path, path) + self.assertEqual(spec.loader, ns["__loader__"]) + + +FrozenFixUpModuleTests, SourceFixUpModuleTests = util.test_both(FixUpModuleTests) + + +class TestBlessMyLoader(unittest.TestCase): + # GH#86298 is part of the migration away from module attributes and toward + # __spec__ attributes. There are several cases to test here. This will + # have to change in Python 3.14 when we actually remove/ignore __loader__ + # in favor of requiring __spec__.loader. + + def test_gh86298_no_loader_and_no_spec(self): + bar = ModuleType('bar') + del bar.__loader__ + del bar.__spec__ + # 2022-10-06(warsaw): For backward compatibility with the + # implementation in _warnings.c, this can't raise an + # AttributeError. See _bless_my_loader() in _bootstrap_external.py + # If working with a module: + ## self.assertRaises( + ## AttributeError, _bootstrap_external._bless_my_loader, + ## bar.__dict__) + self.assertIsNone(_bootstrap_external._bless_my_loader(bar.__dict__)) + + def test_gh86298_loader_is_none_and_no_spec(self): + bar = ModuleType('bar') + bar.__loader__ = None + del bar.__spec__ + # 2022-10-06(warsaw): For backward compatibility with the + # implementation in _warnings.c, this can't raise an + # AttributeError. See _bless_my_loader() in _bootstrap_external.py + # If working with a module: + ## self.assertRaises( + ## AttributeError, _bootstrap_external._bless_my_loader, + ## bar.__dict__) + self.assertIsNone(_bootstrap_external._bless_my_loader(bar.__dict__)) + + def test_gh86298_no_loader_and_spec_is_none(self): + bar = ModuleType('bar') + del bar.__loader__ + bar.__spec__ = None + self.assertRaises( + ValueError, + _bootstrap_external._bless_my_loader, bar.__dict__) + + def test_gh86298_loader_is_none_and_spec_is_none(self): + bar = ModuleType('bar') + bar.__loader__ = None + bar.__spec__ = None + self.assertRaises( + ValueError, + _bootstrap_external._bless_my_loader, bar.__dict__) + + def test_gh86298_loader_is_none_and_spec_loader_is_none(self): + bar = ModuleType('bar') + bar.__loader__ = None + bar.__spec__ = SimpleNamespace(loader=None) + self.assertRaises( + ValueError, + _bootstrap_external._bless_my_loader, bar.__dict__) + + def test_gh86298_no_spec(self): + bar = ModuleType('bar') + bar.__loader__ = object() + del bar.__spec__ + with warnings.catch_warnings(): + self.assertWarns( + DeprecationWarning, + _bootstrap_external._bless_my_loader, bar.__dict__) + + def test_gh86298_spec_is_none(self): + bar = ModuleType('bar') + bar.__loader__ = object() + bar.__spec__ = None + with warnings.catch_warnings(): + self.assertWarns( + DeprecationWarning, + _bootstrap_external._bless_my_loader, bar.__dict__) + + def test_gh86298_no_spec_loader(self): + bar = ModuleType('bar') + bar.__loader__ = object() + bar.__spec__ = SimpleNamespace() + with warnings.catch_warnings(): + self.assertWarns( + DeprecationWarning, + _bootstrap_external._bless_my_loader, bar.__dict__) + + def test_gh86298_loader_and_spec_loader_disagree(self): + bar = ModuleType('bar') + bar.__loader__ = object() + bar.__spec__ = SimpleNamespace(loader=object()) + with warnings.catch_warnings(): + self.assertWarns( + DeprecationWarning, + _bootstrap_external._bless_my_loader, bar.__dict__) + + def test_gh86298_no_loader_and_no_spec_loader(self): + bar = ModuleType('bar') + del bar.__loader__ + bar.__spec__ = SimpleNamespace() + self.assertRaises( + AttributeError, + _bootstrap_external._bless_my_loader, bar.__dict__) + + def test_gh86298_no_loader_with_spec_loader_okay(self): + bar = ModuleType('bar') + del bar.__loader__ + loader = object() + bar.__spec__ = SimpleNamespace(loader=loader) + self.assertEqual( + _bootstrap_external._bless_my_loader(bar.__dict__), + loader) + + +if __name__ == "__main__": + unittest.main() diff --git a/stdlib/test/test_importlib/import_/test_meta_path.py b/stdlib/test/test_importlib/import_/test_meta_path.py new file mode 100644 index 000000000..4c00f6068 --- /dev/null +++ b/stdlib/test/test_importlib/import_/test_meta_path.py @@ -0,0 +1,127 @@ +from test.test_importlib import util +import importlib._bootstrap +import sys +from types import MethodType +import unittest +import warnings + + +class CallingOrder: + + """Calls to the importers on sys.meta_path happen in order that they are + specified in the sequence, starting with the first importer + [first called], and then continuing on down until one is found that doesn't + return None [continuing].""" + + + def test_first_called(self): + # [first called] + mod = 'top_level' + with util.mock_spec(mod) as first, util.mock_spec(mod) as second: + with util.import_state(meta_path=[first, second]): + self.assertIs(self.__import__(mod), first.modules[mod]) + + def test_continuing(self): + # [continuing] + mod_name = 'for_real' + with util.mock_spec('nonexistent') as first, \ + util.mock_spec(mod_name) as second: + first.find_spec = lambda self, fullname, path=None, parent=None: None + with util.import_state(meta_path=[first, second]): + self.assertIs(self.__import__(mod_name), second.modules[mod_name]) + + def test_empty(self): + # Raise an ImportWarning if sys.meta_path is empty. + module_name = 'nothing' + try: + del sys.modules[module_name] + except KeyError: + pass + with util.import_state(meta_path=[]): + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + self.assertIsNone(importlib._bootstrap._find_spec('nothing', + None)) + self.assertEqual(len(w), 1) + self.assertIsSubclass(w[-1].category, ImportWarning) + + +(Frozen_CallingOrder, + Source_CallingOrder + ) = util.test_both(CallingOrder, __import__=util.__import__) + + +class CallSignature: + + """If there is no __path__ entry on the parent module, then 'path' is None + [no path]. Otherwise, the value for __path__ is passed in for the 'path' + argument [path set].""" + + def log_finder(self, importer): + fxn = getattr(importer, self.finder_name) + log = [] + def wrapper(self, *args, **kwargs): + log.append([args, kwargs]) + return fxn(*args, **kwargs) + return log, wrapper + + def test_no_path(self): + # [no path] + mod_name = 'top_level' + assert '.' not in mod_name + with self.mock_modules(mod_name) as importer: + log, wrapped_call = self.log_finder(importer) + setattr(importer, self.finder_name, MethodType(wrapped_call, importer)) + with util.import_state(meta_path=[importer]): + self.__import__(mod_name) + assert len(log) == 1 + args = log[0][0] + # Assuming all arguments are positional. + self.assertEqual(args[0], mod_name) + self.assertIsNone(args[1]) + + def test_with_path(self): + # [path set] + pkg_name = 'pkg' + mod_name = pkg_name + '.module' + path = [42] + assert '.' in mod_name + with self.mock_modules(pkg_name+'.__init__', mod_name) as importer: + importer.modules[pkg_name].__path__ = path + log, wrapped_call = self.log_finder(importer) + setattr(importer, self.finder_name, MethodType(wrapped_call, importer)) + with util.import_state(meta_path=[importer]): + self.__import__(mod_name) + assert len(log) == 2 + args = log[1][0] + kwargs = log[1][1] + # Assuming all arguments are positional. + self.assertFalse(kwargs) + self.assertEqual(args[0], mod_name) + self.assertIs(args[1], path) + +class CallSignoreSuppressImportWarning(CallSignature): + + def test_no_path(self): + with warnings.catch_warnings(): + warnings.simplefilter("ignore", ImportWarning) + super().test_no_path() + + def test_with_path(self): + with warnings.catch_warnings(): + warnings.simplefilter("ignore", ImportWarning) + super().test_no_path() + + +class CallSignaturePEP451(CallSignature): + mock_modules = util.mock_spec + finder_name = 'find_spec' + + +(Frozen_CallSignaturePEP451, + Source_CallSignaturePEP451 + ) = util.test_both(CallSignaturePEP451, __import__=util.__import__) + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/import_/test_packages.py b/stdlib/test/test_importlib/import_/test_packages.py new file mode 100644 index 000000000..0c29d6083 --- /dev/null +++ b/stdlib/test/test_importlib/import_/test_packages.py @@ -0,0 +1,110 @@ +from test.test_importlib import util +import sys +import unittest +from test.support import import_helper + + +class ParentModuleTests: + + """Importing a submodule should import the parent modules.""" + + def test_import_parent(self): + with util.mock_spec('pkg.__init__', 'pkg.module') as mock: + with util.import_state(meta_path=[mock]): + module = self.__import__('pkg.module') + self.assertIn('pkg', sys.modules) + + def test_bad_parent(self): + with util.mock_spec('pkg.module') as mock: + with util.import_state(meta_path=[mock]): + with self.assertRaises(ImportError) as cm: + self.__import__('pkg.module') + self.assertEqual(cm.exception.name, 'pkg') + + def test_raising_parent_after_importing_child(self): + def __init__(): + import pkg.module + 1/0 + mock = util.mock_spec('pkg.__init__', 'pkg.module', + module_code={'pkg': __init__}) + with mock: + with util.import_state(meta_path=[mock]): + with self.assertRaises(ZeroDivisionError): + self.__import__('pkg') + self.assertNotIn('pkg', sys.modules) + self.assertIn('pkg.module', sys.modules) + with self.assertRaises(ZeroDivisionError): + self.__import__('pkg.module') + self.assertNotIn('pkg', sys.modules) + self.assertIn('pkg.module', sys.modules) + + def test_raising_parent_after_relative_importing_child(self): + def __init__(): + from . import module + 1/0 + mock = util.mock_spec('pkg.__init__', 'pkg.module', + module_code={'pkg': __init__}) + with mock: + with util.import_state(meta_path=[mock]): + with self.assertRaises((ZeroDivisionError, ImportError)): + # This raises ImportError on the "from . import module" + # line, not sure why. + self.__import__('pkg') + self.assertNotIn('pkg', sys.modules) + with self.assertRaises((ZeroDivisionError, ImportError)): + self.__import__('pkg.module') + self.assertNotIn('pkg', sys.modules) + # XXX False + #self.assertIn('pkg.module', sys.modules) + + def test_raising_parent_after_double_relative_importing_child(self): + def __init__(): + from ..subpkg import module + 1/0 + mock = util.mock_spec('pkg.__init__', 'pkg.subpkg.__init__', + 'pkg.subpkg.module', + module_code={'pkg.subpkg': __init__}) + with mock: + with util.import_state(meta_path=[mock]): + with self.assertRaises((ZeroDivisionError, ImportError)): + # This raises ImportError on the "from ..subpkg import module" + # line, not sure why. + self.__import__('pkg.subpkg') + self.assertNotIn('pkg.subpkg', sys.modules) + with self.assertRaises((ZeroDivisionError, ImportError)): + self.__import__('pkg.subpkg.module') + self.assertNotIn('pkg.subpkg', sys.modules) + # XXX False + #self.assertIn('pkg.subpkg.module', sys.modules) + + def test_module_not_package(self): + # Try to import a submodule from a non-package should raise ImportError. + assert not hasattr(sys, '__path__') + with self.assertRaises(ImportError) as cm: + self.__import__('sys.no_submodules_here') + self.assertEqual(cm.exception.name, 'sys.no_submodules_here') + + def test_module_not_package_but_side_effects(self): + # If a module injects something into sys.modules as a side-effect, then + # pick up on that fact. + name = 'mod' + subname = name + '.b' + def module_injection(): + sys.modules[subname] = 'total bunk' + mock_spec = util.mock_spec('mod', + module_code={'mod': module_injection}) + with mock_spec as mock: + with util.import_state(meta_path=[mock]): + try: + submodule = self.__import__(subname) + finally: + import_helper.unload(subname) + + +(Frozen_ParentTests, + Source_ParentTests + ) = util.test_both(ParentModuleTests, __import__=util.__import__) + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/import_/test_path.py b/stdlib/test/test_importlib/import_/test_path.py new file mode 100644 index 000000000..79e0bdca9 --- /dev/null +++ b/stdlib/test/test_importlib/import_/test_path.py @@ -0,0 +1,269 @@ +from test.support import os_helper +from test.test_importlib import util + +importlib = util.import_importlib('importlib') +machinery = util.import_importlib('importlib.machinery') + +import os +import sys +import tempfile +from types import ModuleType +import unittest +import warnings +import zipimport + + +class FinderTests: + + """Tests for PathFinder.""" + + find = None + check_found = None + + def test_failure(self): + # Test None returned upon not finding a suitable loader. + module = '' + with util.import_state(): + self.assertIsNone(self.find(module)) + + def test_sys_path(self): + # Test that sys.path is used when 'path' is None. + # Implicitly tests that sys.path_importer_cache is used. + module = '' + path = '' + importer = util.mock_spec(module) + with util.import_state(path_importer_cache={path: importer}, + path=[path]): + found = self.find(module) + self.check_found(found, importer) + + def test_path(self): + # Test that 'path' is used when set. + # Implicitly tests that sys.path_importer_cache is used. + module = '' + path = '' + importer = util.mock_spec(module) + with util.import_state(path_importer_cache={path: importer}): + found = self.find(module, [path]) + self.check_found(found, importer) + + def test_empty_list(self): + # An empty list should not count as asking for sys.path. + module = 'module' + path = '' + importer = util.mock_spec(module) + with util.import_state(path_importer_cache={path: importer}, + path=[path]): + self.assertIsNone(self.find('module', [])) + + def test_path_hooks(self): + # Test that sys.path_hooks is used. + # Test that sys.path_importer_cache is set. + module = '' + path = '' + importer = util.mock_spec(module) + hook = util.mock_path_hook(path, importer=importer) + with util.import_state(path_hooks=[hook]): + found = self.find(module, [path]) + self.check_found(found, importer) + self.assertIn(path, sys.path_importer_cache) + self.assertIs(sys.path_importer_cache[path], importer) + + def test_empty_path_hooks(self): + # Test that if sys.path_hooks is empty a warning is raised, + # sys.path_importer_cache gets None set, and PathFinder returns None. + path_entry = 'bogus_path' + with util.import_state(path_importer_cache={}, path_hooks=[], + path=[path_entry]): + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always', ImportWarning) + warnings.simplefilter('ignore', DeprecationWarning) + self.assertIsNone(self.find('os')) + self.assertIsNone(sys.path_importer_cache[path_entry]) + self.assertEqual(len(w), 1) + self.assertIsSubclass(w[-1].category, ImportWarning) + + def test_path_importer_cache_empty_string(self): + # The empty string should create a finder using the cwd. + path = '' + module = '' + importer = util.mock_spec(module) + hook = util.mock_path_hook(os.getcwd(), importer=importer) + with util.import_state(path=[path], path_hooks=[hook]): + found = self.find(module) + self.check_found(found, importer) + self.assertIn(os.getcwd(), sys.path_importer_cache) + + def test_None_on_sys_path(self): + # Putting None in sys.path[0] caused an import regression from Python + # 3.2: http://bugs.python.org/issue16514 + new_path = sys.path[:] + new_path.insert(0, None) + new_path_importer_cache = sys.path_importer_cache.copy() + new_path_importer_cache.pop(None, None) + new_path_hooks = [zipimport.zipimporter, + self.machinery.FileFinder.path_hook( + *self.importlib._bootstrap_external._get_supported_file_loaders())] + missing = object() + email = sys.modules.pop('email', missing) + try: + with util.import_state(meta_path=sys.meta_path[:], + path=new_path, + path_importer_cache=new_path_importer_cache, + path_hooks=new_path_hooks): + module = self.importlib.import_module('email') + self.assertIsInstance(module, ModuleType) + finally: + if email is not missing: + sys.modules['email'] = email + + def test_finder_with_find_spec(self): + class TestFinder: + spec = None + def find_spec(self, fullname, target=None): + return self.spec + path = 'testing path' + with util.import_state(path_importer_cache={path: TestFinder()}): + self.assertIsNone( + self.machinery.PathFinder.find_spec('whatever', [path])) + success_finder = TestFinder() + success_finder.spec = self.machinery.ModuleSpec('whatever', __loader__) + with util.import_state(path_importer_cache={path: success_finder}): + got = self.machinery.PathFinder.find_spec('whatever', [path]) + self.assertEqual(got, success_finder.spec) + + def test_deleted_cwd(self): + # Issue #22834 + old_dir = os.getcwd() + self.addCleanup(os.chdir, old_dir) + new_dir = tempfile.mkdtemp() + try: + os.chdir(new_dir) + try: + os.rmdir(new_dir) + except OSError: + # EINVAL on Solaris, EBUSY on AIX, ENOTEMPTY on Windows + self.skipTest("platform does not allow " + "the deletion of the cwd") + except: + os.chdir(old_dir) + os.rmdir(new_dir) + raise + + with util.import_state(path=['']): + # Do not want FileNotFoundError raised. + self.assertIsNone(self.machinery.PathFinder.find_spec('whatever')) + + @os_helper.skip_unless_working_chmod + def test_permission_error_cwd(self): + # gh-115911: Test that an unreadable CWD does not break imports, in + # particular during early stages of interpreter startup. + + def noop_hook(*args): + raise ImportError + + with ( + os_helper.temp_dir() as new_dir, + os_helper.save_mode(new_dir), + os_helper.change_cwd(new_dir), + util.import_state(path=[''], path_hooks=[noop_hook]), + ): + # chmod() is done here (inside the 'with' block) because the order + # of teardown operations cannot be the reverse of setup order. See + # https://github.com/python/cpython/pull/116131#discussion_r1739649390 + try: + os.chmod(new_dir, 0o000) + except OSError: + self.skipTest("platform does not allow " + "changing mode of the cwd") + + # Do not want PermissionError raised. + self.assertIsNone(self.machinery.PathFinder.find_spec('whatever')) + + def test_invalidate_caches_finders(self): + # Finders with an invalidate_caches() method have it called. + class FakeFinder: + def __init__(self): + self.called = False + + def invalidate_caches(self): + self.called = True + + key = os.path.abspath('finder_to_invalidate') + cache = {'leave_alone': object(), key: FakeFinder()} + with util.import_state(path_importer_cache=cache): + self.machinery.PathFinder.invalidate_caches() + self.assertTrue(cache[key].called) + + def test_invalidate_caches_clear_out_None(self): + # Clear out None in sys.path_importer_cache() when invalidating caches. + cache = {'clear_out': None} + with util.import_state(path_importer_cache=cache): + self.machinery.PathFinder.invalidate_caches() + self.assertEqual(len(cache), 0) + + def test_invalidate_caches_clear_out_relative_path(self): + class FakeFinder: + def invalidate_caches(self): + pass + + cache = {'relative_path': FakeFinder()} + with util.import_state(path_importer_cache=cache): + self.machinery.PathFinder.invalidate_caches() + self.assertEqual(cache, {}) + + +class FindModuleTests(FinderTests): + def find(self, *args, **kwargs): + spec = self.machinery.PathFinder.find_spec(*args, **kwargs) + return None if spec is None else spec.loader + + def check_found(self, found, importer): + self.assertIs(found, importer) + + +(Frozen_FindModuleTests, + Source_FindModuleTests +) = util.test_both(FindModuleTests, importlib=importlib, machinery=machinery) + + +class FindSpecTests(FinderTests): + def find(self, *args, **kwargs): + return self.machinery.PathFinder.find_spec(*args, **kwargs) + def check_found(self, found, importer): + self.assertIs(found.loader, importer) + + +(Frozen_FindSpecTests, + Source_FindSpecTests + ) = util.test_both(FindSpecTests, importlib=importlib, machinery=machinery) + + +class PathEntryFinderTests: + + def test_finder_with_failing_find_spec(self): + class Finder: + path_location = 'test_finder_with_find_spec' + def __init__(self, path): + if path != self.path_location: + raise ImportError + + @staticmethod + def find_spec(fullname, target=None): + return None + + + with util.import_state(path=[Finder.path_location]+sys.path[:], + path_hooks=[Finder]): + with warnings.catch_warnings(): + warnings.simplefilter("ignore", ImportWarning) + self.machinery.PathFinder.find_spec('importlib') + + +(Frozen_PEFTests, + Source_PEFTests + ) = util.test_both(PathEntryFinderTests, machinery=machinery) + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/import_/test_relative_imports.py b/stdlib/test/test_importlib/import_/test_relative_imports.py new file mode 100644 index 000000000..1549cbe96 --- /dev/null +++ b/stdlib/test/test_importlib/import_/test_relative_imports.py @@ -0,0 +1,248 @@ +"""Test relative imports (PEP 328).""" +from test.test_importlib import util +import unittest +import warnings + + +class RelativeImports: + + """PEP 328 introduced relative imports. This allows for imports to occur + from within a package without having to specify the actual package name. + + A simple example is to import another module within the same package + [module from module]:: + + # From pkg.mod1 with pkg.mod2 being a module. + from . import mod2 + + This also works for getting an attribute from a module that is specified + in a relative fashion [attr from module]:: + + # From pkg.mod1. + from .mod2 import attr + + But this is in no way restricted to working between modules; it works + from [package to module],:: + + # From pkg, importing pkg.module which is a module. + from . import module + + [module to package],:: + + # Pull attr from pkg, called from pkg.module which is a module. + from . import attr + + and [package to package]:: + + # From pkg.subpkg1 (both pkg.subpkg[1,2] are packages). + from .. import subpkg2 + + The number of dots used is in no way restricted [deep import]:: + + # Import pkg.attr from pkg.pkg1.pkg2.pkg3.pkg4.pkg5. + from ...... import attr + + To prevent someone from accessing code that is outside of a package, one + cannot reach the location containing the root package itself:: + + # From pkg.__init__ [too high from package] + from .. import top_level + + # From pkg.module [too high from module] + from .. import top_level + + Relative imports are the only type of import that allow for an empty + module name for an import [empty name]. + + """ + + def relative_import_test(self, create, globals_, callback): + """Abstract out boilerplace for setting up for an import test.""" + uncache_names = [] + for name in create: + if not name.endswith('.__init__'): + uncache_names.append(name) + else: + uncache_names.append(name[:-len('.__init__')]) + with util.mock_spec(*create) as importer: + with util.import_state(meta_path=[importer]): + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + for global_ in globals_: + with util.uncache(*uncache_names): + callback(global_) + + + def test_module_from_module(self): + # [module from module] + create = 'pkg.__init__', 'pkg.mod2' + globals_ = {'__package__': 'pkg'}, {'__name__': 'pkg.mod1'} + def callback(global_): + self.__import__('pkg') # For __import__(). + module = self.__import__('', global_, fromlist=['mod2'], level=1) + self.assertEqual(module.__name__, 'pkg') + self.assertHasAttr(module, 'mod2') + self.assertEqual(module.mod2.attr, 'pkg.mod2') + self.relative_import_test(create, globals_, callback) + + def test_attr_from_module(self): + # [attr from module] + create = 'pkg.__init__', 'pkg.mod2' + globals_ = {'__package__': 'pkg'}, {'__name__': 'pkg.mod1'} + def callback(global_): + self.__import__('pkg') # For __import__(). + module = self.__import__('mod2', global_, fromlist=['attr'], + level=1) + self.assertEqual(module.__name__, 'pkg.mod2') + self.assertEqual(module.attr, 'pkg.mod2') + self.relative_import_test(create, globals_, callback) + + def test_package_to_module(self): + # [package to module] + create = 'pkg.__init__', 'pkg.module' + globals_ = ({'__package__': 'pkg'}, + {'__name__': 'pkg', '__path__': ['blah']}) + def callback(global_): + self.__import__('pkg') # For __import__(). + module = self.__import__('', global_, fromlist=['module'], + level=1) + self.assertEqual(module.__name__, 'pkg') + self.assertHasAttr(module, 'module') + self.assertEqual(module.module.attr, 'pkg.module') + self.relative_import_test(create, globals_, callback) + + def test_module_to_package(self): + # [module to package] + create = 'pkg.__init__', 'pkg.module' + globals_ = {'__package__': 'pkg'}, {'__name__': 'pkg.module'} + def callback(global_): + self.__import__('pkg') # For __import__(). + module = self.__import__('', global_, fromlist=['attr'], level=1) + self.assertEqual(module.__name__, 'pkg') + self.relative_import_test(create, globals_, callback) + + def test_package_to_package(self): + # [package to package] + create = ('pkg.__init__', 'pkg.subpkg1.__init__', + 'pkg.subpkg2.__init__') + globals_ = ({'__package__': 'pkg.subpkg1'}, + {'__name__': 'pkg.subpkg1', '__path__': ['blah']}) + def callback(global_): + module = self.__import__('', global_, fromlist=['subpkg2'], + level=2) + self.assertEqual(module.__name__, 'pkg') + self.assertHasAttr(module, 'subpkg2') + self.assertEqual(module.subpkg2.attr, 'pkg.subpkg2.__init__') + self.relative_import_test(create, globals_, callback) + + def test_deep_import(self): + # [deep import] + create = ['pkg.__init__'] + for count in range(1,6): + create.append('{0}.pkg{1}.__init__'.format( + create[-1][:-len('.__init__')], count)) + globals_ = ({'__package__': 'pkg.pkg1.pkg2.pkg3.pkg4.pkg5'}, + {'__name__': 'pkg.pkg1.pkg2.pkg3.pkg4.pkg5', + '__path__': ['blah']}) + def callback(global_): + self.__import__(globals_[0]['__package__']) + module = self.__import__('', global_, fromlist=['attr'], level=6) + self.assertEqual(module.__name__, 'pkg') + self.relative_import_test(create, globals_, callback) + + def test_too_high_from_package(self): + # [too high from package] + create = ['top_level', 'pkg.__init__'] + globals_ = ({'__package__': 'pkg'}, + {'__name__': 'pkg', '__path__': ['blah']}) + def callback(global_): + self.__import__('pkg') + with self.assertRaises(ImportError): + self.__import__('', global_, fromlist=['top_level'], + level=2) + self.relative_import_test(create, globals_, callback) + + def test_too_high_from_module(self): + # [too high from module] + create = ['top_level', 'pkg.__init__', 'pkg.module'] + globals_ = {'__package__': 'pkg'}, {'__name__': 'pkg.module'} + def callback(global_): + self.__import__('pkg') + with self.assertRaises(ImportError): + self.__import__('', global_, fromlist=['top_level'], + level=2) + self.relative_import_test(create, globals_, callback) + + def test_empty_name_w_level_0(self): + # [empty name] + with self.assertRaises(ValueError): + self.__import__('') + + def test_import_from_different_package(self): + # Test importing from a different package than the caller. + # in pkg.subpkg1.mod + # from ..subpkg2 import mod + create = ['__runpy_pkg__.__init__', + '__runpy_pkg__.__runpy_pkg__.__init__', + '__runpy_pkg__.uncle.__init__', + '__runpy_pkg__.uncle.cousin.__init__', + '__runpy_pkg__.uncle.cousin.nephew'] + globals_ = {'__package__': '__runpy_pkg__.__runpy_pkg__'} + def callback(global_): + self.__import__('__runpy_pkg__.__runpy_pkg__') + module = self.__import__('uncle.cousin', globals_, {}, + fromlist=['nephew'], + level=2) + self.assertEqual(module.__name__, '__runpy_pkg__.uncle.cousin') + self.relative_import_test(create, globals_, callback) + + def test_import_relative_import_no_fromlist(self): + # Import a relative module w/ no fromlist. + create = ['crash.__init__', 'crash.mod'] + globals_ = [{'__package__': 'crash', '__name__': 'crash'}] + def callback(global_): + self.__import__('crash') + mod = self.__import__('mod', global_, {}, [], 1) + self.assertEqual(mod.__name__, 'crash.mod') + self.relative_import_test(create, globals_, callback) + + def test_relative_import_no_globals(self): + # No globals for a relative import is an error. + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + with self.assertRaises(KeyError): + self.__import__('sys', level=1) + + def test_relative_import_no_package(self): + with self.assertRaises(ImportError): + self.__import__('a', {'__package__': '', '__spec__': None}, + level=1) + + def test_relative_import_no_package_exists_absolute(self): + with self.assertRaises(ImportError): + self.__import__('sys', {'__package__': '', '__spec__': None}, + level=1) + + def test_malicious_relative_import(self): + # https://github.com/python/cpython/issues/134100 + # Test to make sure UAF bug with error msg doesn't come back to life + import sys + loooong = "".ljust(0x23000, "b") + name = f"a.{loooong}.c" + + with util.uncache(name): + sys.modules[name] = {} + with self.assertRaisesRegex( + KeyError, + r"'a\.b+' not in sys\.modules as expected" + ): + __import__(f"{loooong}.c", {"__package__": "a"}, level=1) + + +(Frozen_RelativeImports, + Source_RelativeImports + ) = util.test_both(RelativeImports, __import__=util.__import__) + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/metadata/__init__.py b/stdlib/test/test_importlib/metadata/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/stdlib/test/test_importlib/metadata/_context.py b/stdlib/test/test_importlib/metadata/_context.py new file mode 100644 index 000000000..8a53eb55d --- /dev/null +++ b/stdlib/test/test_importlib/metadata/_context.py @@ -0,0 +1,13 @@ +import contextlib + + +# from jaraco.context 4.3 +class suppress(contextlib.suppress, contextlib.ContextDecorator): + """ + A version of contextlib.suppress with decorator support. + + >>> @suppress(KeyError) + ... def key_error(): + ... {}[''] + >>> key_error() + """ diff --git a/stdlib/test/test_importlib/metadata/_path.py b/stdlib/test/test_importlib/metadata/_path.py new file mode 100644 index 000000000..b3cfb9cd5 --- /dev/null +++ b/stdlib/test/test_importlib/metadata/_path.py @@ -0,0 +1,115 @@ +# from jaraco.path 3.7 + +import functools +import pathlib +from typing import Dict, Protocol, Union +from typing import runtime_checkable + + +class Symlink(str): + """ + A string indicating the target of a symlink. + """ + + +FilesSpec = Dict[str, Union[str, bytes, Symlink, 'FilesSpec']] # type: ignore + + +@runtime_checkable +class TreeMaker(Protocol): + def __truediv__(self, *args, **kwargs): ... # pragma: no cover + + def mkdir(self, **kwargs): ... # pragma: no cover + + def write_text(self, content, **kwargs): ... # pragma: no cover + + def write_bytes(self, content): ... # pragma: no cover + + def symlink_to(self, target): ... # pragma: no cover + + +def _ensure_tree_maker(obj: Union[str, TreeMaker]) -> TreeMaker: + return obj if isinstance(obj, TreeMaker) else pathlib.Path(obj) # type: ignore + + +def build( + spec: FilesSpec, + prefix: Union[str, TreeMaker] = pathlib.Path(), # type: ignore +): + """ + Build a set of files/directories, as described by the spec. + + Each key represents a pathname, and the value represents + the content. Content may be a nested directory. + + >>> spec = { + ... 'README.txt': "A README file", + ... "foo": { + ... "__init__.py": "", + ... "bar": { + ... "__init__.py": "", + ... }, + ... "baz.py": "# Some code", + ... "bar.py": Symlink("baz.py"), + ... }, + ... "bing": Symlink("foo"), + ... } + >>> target = getfixture('tmp_path') + >>> build(spec, target) + >>> target.joinpath('foo/baz.py').read_text(encoding='utf-8') + '# Some code' + >>> target.joinpath('bing/bar.py').read_text(encoding='utf-8') + '# Some code' + """ + for name, contents in spec.items(): + create(contents, _ensure_tree_maker(prefix) / name) + + +@functools.singledispatch +def create(content: Union[str, bytes, FilesSpec], path): + path.mkdir(exist_ok=True) + build(content, prefix=path) # type: ignore + + +@create.register +def _(content: bytes, path): + path.write_bytes(content) + + +@create.register +def _(content: str, path): + path.write_text(content, encoding='utf-8') + + +@create.register +def _(content: Symlink, path): + path.symlink_to(content) + + +class Recording: + """ + A TreeMaker object that records everything that would be written. + + >>> r = Recording() + >>> build({'foo': {'foo1.txt': 'yes'}, 'bar.txt': 'abc'}, r) + >>> r.record + ['foo/foo1.txt', 'bar.txt'] + """ + + def __init__(self, loc=pathlib.PurePosixPath(), record=None): + self.loc = loc + self.record = record if record is not None else [] + + def __truediv__(self, other): + return Recording(self.loc / other, self.record) + + def write_text(self, content, **kwargs): + self.record.append(str(self.loc)) + + write_bytes = write_text + + def mkdir(self, **kwargs): + return + + def symlink_to(self, target): + pass diff --git a/stdlib/test/test_importlib/metadata/data/__init__.py b/stdlib/test/test_importlib/metadata/data/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/stdlib/test/test_importlib/metadata/data/example-21.12-py3-none-any.whl b/stdlib/test/test_importlib/metadata/data/example-21.12-py3-none-any.whl new file mode 100644 index 000000000..641ab07f7 Binary files /dev/null and b/stdlib/test/test_importlib/metadata/data/example-21.12-py3-none-any.whl differ diff --git a/stdlib/test/test_importlib/metadata/data/example-21.12-py3.6.egg b/stdlib/test/test_importlib/metadata/data/example-21.12-py3.6.egg new file mode 100644 index 000000000..cdb298a19 Binary files /dev/null and b/stdlib/test/test_importlib/metadata/data/example-21.12-py3.6.egg differ diff --git a/stdlib/test/test_importlib/metadata/data/example2-1.0.0-py3-none-any.whl b/stdlib/test/test_importlib/metadata/data/example2-1.0.0-py3-none-any.whl new file mode 100644 index 000000000..5ca93657f Binary files /dev/null and b/stdlib/test/test_importlib/metadata/data/example2-1.0.0-py3-none-any.whl differ diff --git a/stdlib/test/test_importlib/metadata/data/sources/example/example/__init__.py b/stdlib/test/test_importlib/metadata/data/sources/example/example/__init__.py new file mode 100644 index 000000000..ba73b7433 --- /dev/null +++ b/stdlib/test/test_importlib/metadata/data/sources/example/example/__init__.py @@ -0,0 +1,2 @@ +def main(): + return 'example' diff --git a/stdlib/test/test_importlib/metadata/data/sources/example/setup.py b/stdlib/test/test_importlib/metadata/data/sources/example/setup.py new file mode 100644 index 000000000..479488a03 --- /dev/null +++ b/stdlib/test/test_importlib/metadata/data/sources/example/setup.py @@ -0,0 +1,11 @@ +from setuptools import setup + +setup( + name='example', + version='21.12', + license='Apache Software License', + packages=['example'], + entry_points={ + 'console_scripts': ['example = example:main', 'Example=example:main'], + }, +) diff --git a/stdlib/test/test_importlib/metadata/data/sources/example2/example2/__init__.py b/stdlib/test/test_importlib/metadata/data/sources/example2/example2/__init__.py new file mode 100644 index 000000000..de645c2e8 --- /dev/null +++ b/stdlib/test/test_importlib/metadata/data/sources/example2/example2/__init__.py @@ -0,0 +1,2 @@ +def main(): + return "example" diff --git a/stdlib/test/test_importlib/metadata/data/sources/example2/pyproject.toml b/stdlib/test/test_importlib/metadata/data/sources/example2/pyproject.toml new file mode 100644 index 000000000..011f4751f --- /dev/null +++ b/stdlib/test/test_importlib/metadata/data/sources/example2/pyproject.toml @@ -0,0 +1,10 @@ +[build-system] +build-backend = 'trampolim' +requires = ['trampolim'] + +[project] +name = 'example2' +version = '1.0.0' + +[project.scripts] +example = 'example2:main' diff --git a/stdlib/test/test_importlib/metadata/fixtures.py b/stdlib/test/test_importlib/metadata/fixtures.py new file mode 100644 index 000000000..826b1b325 --- /dev/null +++ b/stdlib/test/test_importlib/metadata/fixtures.py @@ -0,0 +1,395 @@ +import sys +import copy +import json +import shutil +import pathlib +import textwrap +import functools +import contextlib + +from test.support import import_helper +from test.support import os_helper +from test.support import requires_zlib + +from . import _path +from ._path import FilesSpec + + +try: + from importlib import resources # type: ignore + + getattr(resources, 'files') + getattr(resources, 'as_file') +except (ImportError, AttributeError): + import importlib_resources as resources # type: ignore + + +@contextlib.contextmanager +def tmp_path(): + """ + Like os_helper.temp_dir, but yields a pathlib.Path. + """ + with os_helper.temp_dir() as path: + yield pathlib.Path(path) + + +@contextlib.contextmanager +def install_finder(finder): + sys.meta_path.append(finder) + try: + yield + finally: + sys.meta_path.remove(finder) + + +class Fixtures: + def setUp(self): + self.fixtures = contextlib.ExitStack() + self.addCleanup(self.fixtures.close) + + +class SiteDir(Fixtures): + def setUp(self): + super().setUp() + self.site_dir = self.fixtures.enter_context(tmp_path()) + + +class OnSysPath(Fixtures): + @staticmethod + @contextlib.contextmanager + def add_sys_path(dir): + sys.path[:0] = [str(dir)] + try: + yield + finally: + sys.path.remove(str(dir)) + + def setUp(self): + super().setUp() + self.fixtures.enter_context(self.add_sys_path(self.site_dir)) + self.fixtures.enter_context(import_helper.isolated_modules()) + + +class SiteBuilder(SiteDir): + def setUp(self): + super().setUp() + for cls in self.__class__.mro(): + with contextlib.suppress(AttributeError): + build_files(cls.files, prefix=self.site_dir) + + +class DistInfoPkg(OnSysPath, SiteBuilder): + files: FilesSpec = { + "distinfo_pkg-1.0.0.dist-info": { + "METADATA": """ + Name: distinfo-pkg + Author: Steven Ma + Version: 1.0.0 + Requires-Dist: wheel >= 1.0 + Requires-Dist: pytest; extra == 'test' + Keywords: sample package + + Once upon a time + There was a distinfo pkg + """, + "RECORD": "mod.py,sha256=abc,20\n", + "entry_points.txt": """ + [entries] + main = mod:main + ns:sub = mod:main + """, + }, + "mod.py": """ + def main(): + print("hello world") + """, + } + + def make_uppercase(self): + """ + Rewrite metadata with everything uppercase. + """ + shutil.rmtree(self.site_dir / "distinfo_pkg-1.0.0.dist-info") + files = copy.deepcopy(DistInfoPkg.files) + info = files["distinfo_pkg-1.0.0.dist-info"] + info["METADATA"] = info["METADATA"].upper() + build_files(files, self.site_dir) + + +class DistInfoPkgEditable(DistInfoPkg): + """ + Package with a PEP 660 direct_url.json. + """ + + some_hash = '524127ce937f7cb65665130c695abd18ca386f60bb29687efb976faa1596fdcc' + files: FilesSpec = { + 'distinfo_pkg-1.0.0.dist-info': { + 'direct_url.json': json.dumps({ + "archive_info": { + "hash": f"sha256={some_hash}", + "hashes": {"sha256": f"{some_hash}"}, + }, + "url": "file:///path/to/distinfo_pkg-1.0.0.editable-py3-none-any.whl", + }) + }, + } + + +class DistInfoPkgWithDot(OnSysPath, SiteBuilder): + files: FilesSpec = { + "pkg_dot-1.0.0.dist-info": { + "METADATA": """ + Name: pkg.dot + Version: 1.0.0 + """, + }, + } + + +class DistInfoPkgWithDotLegacy(OnSysPath, SiteBuilder): + files: FilesSpec = { + "pkg.dot-1.0.0.dist-info": { + "METADATA": """ + Name: pkg.dot + Version: 1.0.0 + """, + }, + "pkg.lot.egg-info": { + "METADATA": """ + Name: pkg.lot + Version: 1.0.0 + """, + }, + } + + +class DistInfoPkgOffPath(SiteBuilder): + files = DistInfoPkg.files + + +class EggInfoPkg(OnSysPath, SiteBuilder): + files: FilesSpec = { + "egginfo_pkg.egg-info": { + "PKG-INFO": """ + Name: egginfo-pkg + Author: Steven Ma + License: Unknown + Version: 1.0.0 + Classifier: Intended Audience :: Developers + Classifier: Topic :: Software Development :: Libraries + Keywords: sample package + Description: Once upon a time + There was an egginfo package + """, + "SOURCES.txt": """ + mod.py + egginfo_pkg.egg-info/top_level.txt + """, + "entry_points.txt": """ + [entries] + main = mod:main + """, + "requires.txt": """ + wheel >= 1.0; python_version >= "2.7" + [test] + pytest + """, + "top_level.txt": "mod\n", + }, + "mod.py": """ + def main(): + print("hello world") + """, + } + + +class EggInfoPkgPipInstalledNoToplevel(OnSysPath, SiteBuilder): + files: FilesSpec = { + "egg_with_module_pkg.egg-info": { + "PKG-INFO": "Name: egg_with_module-pkg", + # SOURCES.txt is made from the source archive, and contains files + # (setup.py) that are not present after installation. + "SOURCES.txt": """ + egg_with_module.py + setup.py + egg_with_module_pkg.egg-info/PKG-INFO + egg_with_module_pkg.egg-info/SOURCES.txt + egg_with_module_pkg.egg-info/top_level.txt + """, + # installed-files.txt is written by pip, and is a strictly more + # accurate source than SOURCES.txt as to the installed contents of + # the package. + "installed-files.txt": """ + ../egg_with_module.py + PKG-INFO + SOURCES.txt + top_level.txt + """, + # missing top_level.txt (to trigger fallback to installed-files.txt) + }, + "egg_with_module.py": """ + def main(): + print("hello world") + """, + } + + +class EggInfoPkgPipInstalledExternalDataFiles(OnSysPath, SiteBuilder): + files: FilesSpec = { + "egg_with_module_pkg.egg-info": { + "PKG-INFO": "Name: egg_with_module-pkg", + # SOURCES.txt is made from the source archive, and contains files + # (setup.py) that are not present after installation. + "SOURCES.txt": """ + egg_with_module.py + setup.py + egg_with_module.json + egg_with_module_pkg.egg-info/PKG-INFO + egg_with_module_pkg.egg-info/SOURCES.txt + egg_with_module_pkg.egg-info/top_level.txt + """, + # installed-files.txt is written by pip, and is a strictly more + # accurate source than SOURCES.txt as to the installed contents of + # the package. + "installed-files.txt": """ + ../../../etc/jupyter/jupyter_notebook_config.d/relative.json + /etc/jupyter/jupyter_notebook_config.d/absolute.json + ../egg_with_module.py + PKG-INFO + SOURCES.txt + top_level.txt + """, + # missing top_level.txt (to trigger fallback to installed-files.txt) + }, + "egg_with_module.py": """ + def main(): + print("hello world") + """, + } + + +class EggInfoPkgPipInstalledNoModules(OnSysPath, SiteBuilder): + files: FilesSpec = { + "egg_with_no_modules_pkg.egg-info": { + "PKG-INFO": "Name: egg_with_no_modules-pkg", + # SOURCES.txt is made from the source archive, and contains files + # (setup.py) that are not present after installation. + "SOURCES.txt": """ + setup.py + egg_with_no_modules_pkg.egg-info/PKG-INFO + egg_with_no_modules_pkg.egg-info/SOURCES.txt + egg_with_no_modules_pkg.egg-info/top_level.txt + """, + # installed-files.txt is written by pip, and is a strictly more + # accurate source than SOURCES.txt as to the installed contents of + # the package. + "installed-files.txt": """ + PKG-INFO + SOURCES.txt + top_level.txt + """, + # top_level.txt correctly reflects that no modules are installed + "top_level.txt": b"\n", + }, + } + + +class EggInfoPkgSourcesFallback(OnSysPath, SiteBuilder): + files: FilesSpec = { + "sources_fallback_pkg.egg-info": { + "PKG-INFO": "Name: sources_fallback-pkg", + # SOURCES.txt is made from the source archive, and contains files + # (setup.py) that are not present after installation. + "SOURCES.txt": """ + sources_fallback.py + setup.py + sources_fallback_pkg.egg-info/PKG-INFO + sources_fallback_pkg.egg-info/SOURCES.txt + """, + # missing installed-files.txt (i.e. not installed by pip) and + # missing top_level.txt (to trigger fallback to SOURCES.txt) + }, + "sources_fallback.py": """ + def main(): + print("hello world") + """, + } + + +class EggInfoFile(OnSysPath, SiteBuilder): + files: FilesSpec = { + "egginfo_file.egg-info": """ + Metadata-Version: 1.0 + Name: egginfo_file + Version: 0.1 + Summary: An example package + Home-page: www.example.com + Author: Eric Haffa-Vee + Author-email: eric@example.coms + License: UNKNOWN + Description: UNKNOWN + Platform: UNKNOWN + """, + } + + +# dedent all text strings before writing +orig = _path.create.registry[str] +_path.create.register(str, lambda content, path: orig(DALS(content), path)) + + +build_files = _path.build + + +def build_record(file_defs): + return ''.join(f'{name},,\n' for name in record_names(file_defs)) + + +def record_names(file_defs): + recording = _path.Recording() + _path.build(file_defs, recording) + return recording.record + + +class FileBuilder: + def unicode_filename(self): + return os_helper.FS_NONASCII or self.skip( + "File system does not support non-ascii." + ) + + +def DALS(str): + "Dedent and left-strip" + return textwrap.dedent(str).lstrip() + + +@requires_zlib() +class ZipFixtures: + root = 'test.test_importlib.metadata.data' + + def _fixture_on_path(self, filename): + pkg_file = resources.files(self.root).joinpath(filename) + file = self.resources.enter_context(resources.as_file(pkg_file)) + assert file.name.startswith('example'), file.name + sys.path.insert(0, str(file)) + self.resources.callback(sys.path.pop, 0) + + def setUp(self): + # Add self.zip_name to the front of sys.path. + self.resources = contextlib.ExitStack() + self.addCleanup(self.resources.close) + + +def parameterize(*args_set): + """Run test method with a series of parameters.""" + + def wrapper(func): + @functools.wraps(func) + def _inner(self): + for args in args_set: + with self.subTest(**args): + func(self, **args) + + return _inner + + return wrapper diff --git a/stdlib/test/test_importlib/metadata/stubs.py b/stdlib/test/test_importlib/metadata/stubs.py new file mode 100644 index 000000000..e5b011c39 --- /dev/null +++ b/stdlib/test/test_importlib/metadata/stubs.py @@ -0,0 +1,10 @@ +import unittest + + +class fake_filesystem_unittest: + """ + Stubbed version of the pyfakefs module + """ + class TestCase(unittest.TestCase): + def setUpPyfakefs(self): + self.skipTest("pyfakefs not available") diff --git a/stdlib/test/test_importlib/metadata/test_api.py b/stdlib/test/test_importlib/metadata/test_api.py new file mode 100644 index 000000000..2256e0c50 --- /dev/null +++ b/stdlib/test/test_importlib/metadata/test_api.py @@ -0,0 +1,323 @@ +import re +import textwrap +import unittest +import warnings +import importlib +import contextlib + +from . import fixtures +from importlib.metadata import ( + Distribution, + PackageNotFoundError, + distribution, + entry_points, + files, + metadata, + requires, + version, +) + + +@contextlib.contextmanager +def suppress_known_deprecation(): + with warnings.catch_warnings(record=True) as ctx: + warnings.simplefilter('default', category=DeprecationWarning) + yield ctx + + +class APITests( + fixtures.EggInfoPkg, + fixtures.EggInfoPkgPipInstalledNoToplevel, + fixtures.EggInfoPkgPipInstalledNoModules, + fixtures.EggInfoPkgPipInstalledExternalDataFiles, + fixtures.EggInfoPkgSourcesFallback, + fixtures.DistInfoPkg, + fixtures.DistInfoPkgWithDot, + fixtures.EggInfoFile, + unittest.TestCase, +): + version_pattern = r'\d+\.\d+(\.\d)?' + + def test_retrieves_version_of_self(self): + pkg_version = version('egginfo-pkg') + assert isinstance(pkg_version, str) + assert re.match(self.version_pattern, pkg_version) + + def test_retrieves_version_of_distinfo_pkg(self): + pkg_version = version('distinfo-pkg') + assert isinstance(pkg_version, str) + assert re.match(self.version_pattern, pkg_version) + + def test_for_name_does_not_exist(self): + with self.assertRaises(PackageNotFoundError): + distribution('does-not-exist') + + def test_name_normalization(self): + names = 'pkg.dot', 'pkg_dot', 'pkg-dot', 'pkg..dot', 'Pkg.Dot' + for name in names: + with self.subTest(name): + assert distribution(name).metadata['Name'] == 'pkg.dot' + + def test_prefix_not_matched(self): + prefixes = 'p', 'pkg', 'pkg.' + for prefix in prefixes: + with self.subTest(prefix): + with self.assertRaises(PackageNotFoundError): + distribution(prefix) + + def test_for_top_level(self): + tests = [ + ('egginfo-pkg', 'mod'), + ('egg_with_no_modules-pkg', ''), + ] + for pkg_name, expect_content in tests: + with self.subTest(pkg_name): + self.assertEqual( + distribution(pkg_name).read_text('top_level.txt').strip(), + expect_content, + ) + + def test_read_text(self): + tests = [ + ('egginfo-pkg', 'mod\n'), + ('egg_with_no_modules-pkg', '\n'), + ] + for pkg_name, expect_content in tests: + with self.subTest(pkg_name): + top_level = [ + path for path in files(pkg_name) if path.name == 'top_level.txt' + ][0] + self.assertEqual(top_level.read_text(), expect_content) + + def test_entry_points(self): + eps = entry_points() + assert 'entries' in eps.groups + entries = eps.select(group='entries') + assert 'main' in entries.names + ep = entries['main'] + self.assertEqual(ep.value, 'mod:main') + self.assertEqual(ep.extras, []) + + def test_entry_points_distribution(self): + entries = entry_points(group='entries') + for entry in ("main", "ns:sub"): + ep = entries[entry] + self.assertIn(ep.dist.name, ('distinfo-pkg', 'egginfo-pkg')) + self.assertEqual(ep.dist.version, "1.0.0") + + def test_entry_points_unique_packages_normalized(self): + """ + Entry points should only be exposed for the first package + on sys.path with a given name (even when normalized). + """ + alt_site_dir = self.fixtures.enter_context(fixtures.tmp_path()) + self.fixtures.enter_context(self.add_sys_path(alt_site_dir)) + alt_pkg = { + "DistInfo_pkg-1.1.0.dist-info": { + "METADATA": """ + Name: distinfo-pkg + Version: 1.1.0 + """, + "entry_points.txt": """ + [entries] + main = mod:altmain + """, + }, + } + fixtures.build_files(alt_pkg, alt_site_dir) + entries = entry_points(group='entries') + assert not any( + ep.dist.name == 'distinfo-pkg' and ep.dist.version == '1.0.0' + for ep in entries + ) + # ns:sub doesn't exist in alt_pkg + assert 'ns:sub' not in entries.names + + def test_entry_points_missing_name(self): + with self.assertRaises(KeyError): + entry_points(group='entries')['missing'] + + def test_entry_points_missing_group(self): + assert entry_points(group='missing') == () + + def test_entry_points_allows_no_attributes(self): + ep = entry_points().select(group='entries', name='main') + with self.assertRaises(AttributeError): + ep.foo = 4 + + def test_metadata_for_this_package(self): + md = metadata('egginfo-pkg') + assert md['author'] == 'Steven Ma' + assert md['LICENSE'] == 'Unknown' + assert md['Name'] == 'egginfo-pkg' + classifiers = md.get_all('Classifier') + assert 'Topic :: Software Development :: Libraries' in classifiers + + def test_missing_key_legacy(self): + """ + Requesting a missing key will still return None, but warn. + """ + md = metadata('distinfo-pkg') + with suppress_known_deprecation(): + assert md['does-not-exist'] is None + + def test_get_key(self): + """ + Getting a key gets the key. + """ + md = metadata('egginfo-pkg') + assert md.get('Name') == 'egginfo-pkg' + + def test_get_missing_key(self): + """ + Requesting a missing key will return None. + """ + md = metadata('distinfo-pkg') + assert md.get('does-not-exist') is None + + @staticmethod + def _test_files(files): + root = files[0].root + for file in files: + assert file.root == root + assert not file.hash or file.hash.value + assert not file.hash or file.hash.mode == 'sha256' + assert not file.size or file.size >= 0 + assert file.locate().exists() + assert isinstance(file.read_binary(), bytes) + if file.name.endswith('.py'): + file.read_text() + + def test_file_hash_repr(self): + util = [p for p in files('distinfo-pkg') if p.name == 'mod.py'][0] + self.assertRegex(repr(util.hash), '') + + def test_files_dist_info(self): + self._test_files(files('distinfo-pkg')) + + def test_files_egg_info(self): + self._test_files(files('egginfo-pkg')) + self._test_files(files('egg_with_module-pkg')) + self._test_files(files('egg_with_no_modules-pkg')) + self._test_files(files('sources_fallback-pkg')) + + def test_version_egg_info_file(self): + self.assertEqual(version('egginfo-file'), '0.1') + + def test_requires_egg_info_file(self): + requirements = requires('egginfo-file') + self.assertIsNone(requirements) + + def test_requires_egg_info(self): + deps = requires('egginfo-pkg') + assert len(deps) == 2 + assert any(dep == 'wheel >= 1.0; python_version >= "2.7"' for dep in deps) + + def test_requires_egg_info_empty(self): + fixtures.build_files( + { + 'requires.txt': '', + }, + self.site_dir.joinpath('egginfo_pkg.egg-info'), + ) + deps = requires('egginfo-pkg') + assert deps == [] + + def test_requires_dist_info(self): + deps = requires('distinfo-pkg') + assert len(deps) == 2 + assert all(deps) + assert 'wheel >= 1.0' in deps + assert "pytest; extra == 'test'" in deps + + def test_more_complex_deps_requires_text(self): + requires = textwrap.dedent( + """ + dep1 + dep2 + + [:python_version < "3"] + dep3 + + [extra1] + dep4 + dep6@ git+https://example.com/python/dep.git@v1.0.0 + + [extra2:python_version < "3"] + dep5 + """ + ) + deps = sorted(Distribution._deps_from_requires_text(requires)) + expected = [ + 'dep1', + 'dep2', + 'dep3; python_version < "3"', + 'dep4; extra == "extra1"', + 'dep5; (python_version < "3") and extra == "extra2"', + 'dep6@ git+https://example.com/python/dep.git@v1.0.0 ; extra == "extra1"', + ] + # It's important that the environment marker expression be + # wrapped in parentheses to avoid the following 'and' binding more + # tightly than some other part of the environment expression. + + assert deps == expected + + def test_as_json(self): + md = metadata('distinfo-pkg').json + assert 'name' in md + assert md['keywords'] == ['sample', 'package'] + desc = md['description'] + assert desc.startswith('Once upon a time\nThere was') + assert len(md['requires_dist']) == 2 + + def test_as_json_egg_info(self): + md = metadata('egginfo-pkg').json + assert 'name' in md + assert md['keywords'] == ['sample', 'package'] + desc = md['description'] + assert desc.startswith('Once upon a time\nThere was') + assert len(md['classifier']) == 2 + + def test_as_json_odd_case(self): + self.make_uppercase() + md = metadata('distinfo-pkg').json + assert 'name' in md + assert len(md['requires_dist']) == 2 + assert md['keywords'] == ['SAMPLE', 'PACKAGE'] + + +class LegacyDots(fixtures.DistInfoPkgWithDotLegacy, unittest.TestCase): + def test_name_normalization(self): + names = 'pkg.dot', 'pkg_dot', 'pkg-dot', 'pkg..dot', 'Pkg.Dot' + for name in names: + with self.subTest(name): + assert distribution(name).metadata['Name'] == 'pkg.dot' + + def test_name_normalization_versionless_egg_info(self): + names = 'pkg.lot', 'pkg_lot', 'pkg-lot', 'pkg..lot', 'Pkg.Lot' + for name in names: + with self.subTest(name): + assert distribution(name).metadata['Name'] == 'pkg.lot' + + +class OffSysPathTests(fixtures.DistInfoPkgOffPath, unittest.TestCase): + def test_find_distributions_specified_path(self): + dists = Distribution.discover(path=[str(self.site_dir)]) + assert any(dist.metadata['Name'] == 'distinfo-pkg' for dist in dists) + + def test_distribution_at_pathlib(self): + """Demonstrate how to load metadata direct from a directory.""" + dist_info_path = self.site_dir / 'distinfo_pkg-1.0.0.dist-info' + dist = Distribution.at(dist_info_path) + assert dist.version == '1.0.0' + + def test_distribution_at_str(self): + dist_info_path = self.site_dir / 'distinfo_pkg-1.0.0.dist-info' + dist = Distribution.at(str(dist_info_path)) + assert dist.version == '1.0.0' + + +class InvalidateCache(unittest.TestCase): + def test_invalidate_cache(self): + # No externally observable behavior, but ensures test coverage... + importlib.invalidate_caches() diff --git a/stdlib/test/test_importlib/metadata/test_main.py b/stdlib/test/test_importlib/metadata/test_main.py new file mode 100644 index 000000000..e4218076f --- /dev/null +++ b/stdlib/test/test_importlib/metadata/test_main.py @@ -0,0 +1,468 @@ +import re +import pickle +import unittest +import warnings +import importlib +import importlib.metadata +import contextlib +from test.support import os_helper + +try: + import pyfakefs.fake_filesystem_unittest as ffs +except ImportError: + from .stubs import fake_filesystem_unittest as ffs + +from . import fixtures +from ._context import suppress +from ._path import Symlink +from importlib.metadata import ( + Distribution, + EntryPoint, + PackageNotFoundError, + _unique, + distributions, + entry_points, + metadata, + packages_distributions, + version, +) + + +@contextlib.contextmanager +def suppress_known_deprecation(): + with warnings.catch_warnings(record=True) as ctx: + warnings.simplefilter('default', category=DeprecationWarning) + yield ctx + + +class BasicTests(fixtures.DistInfoPkg, unittest.TestCase): + version_pattern = r'\d+\.\d+(\.\d)?' + + def test_retrieves_version_of_self(self): + dist = Distribution.from_name('distinfo-pkg') + assert isinstance(dist.version, str) + assert re.match(self.version_pattern, dist.version) + + def test_for_name_does_not_exist(self): + with self.assertRaises(PackageNotFoundError): + Distribution.from_name('does-not-exist') + + def test_package_not_found_mentions_metadata(self): + """ + When a package is not found, that could indicate that the + package is not installed or that it is installed without + metadata. Ensure the exception mentions metadata to help + guide users toward the cause. See #124. + """ + with self.assertRaises(PackageNotFoundError) as ctx: + Distribution.from_name('does-not-exist') + + assert "metadata" in str(ctx.exception) + + # expected to fail until ABC is enforced + @suppress(AssertionError) + @suppress_known_deprecation() + def test_abc_enforced(self): + with self.assertRaises(TypeError): + type('DistributionSubclass', (Distribution,), {})() + + @fixtures.parameterize( + dict(name=None), + dict(name=''), + ) + def test_invalid_inputs_to_from_name(self, name): + with self.assertRaises(Exception): + Distribution.from_name(name) + + +class ImportTests(fixtures.DistInfoPkg, unittest.TestCase): + def test_import_nonexistent_module(self): + # Ensure that the MetadataPathFinder does not crash an import of a + # non-existent module. + with self.assertRaises(ImportError): + importlib.import_module('does_not_exist') + + def test_resolve(self): + ep = entry_points(group='entries')['main'] + self.assertEqual(ep.load().__name__, "main") + + def test_entrypoint_with_colon_in_name(self): + ep = entry_points(group='entries')['ns:sub'] + self.assertEqual(ep.value, 'mod:main') + + def test_resolve_without_attr(self): + ep = EntryPoint( + name='ep', + value='importlib.metadata', + group='grp', + ) + assert ep.load() is importlib.metadata + + +class NameNormalizationTests(fixtures.OnSysPath, fixtures.SiteDir, unittest.TestCase): + @staticmethod + def make_pkg(name): + """ + Create minimal metadata for a dist-info package with + the indicated name on the file system. + """ + return { + f'{name}.dist-info': { + 'METADATA': 'VERSION: 1.0\n', + }, + } + + def test_dashes_in_dist_name_found_as_underscores(self): + """ + For a package with a dash in the name, the dist-info metadata + uses underscores in the name. Ensure the metadata loads. + """ + fixtures.build_files(self.make_pkg('my_pkg'), self.site_dir) + assert version('my-pkg') == '1.0' + + def test_dist_name_found_as_any_case(self): + """ + Ensure the metadata loads when queried with any case. + """ + pkg_name = 'CherryPy' + fixtures.build_files(self.make_pkg(pkg_name), self.site_dir) + assert version(pkg_name) == '1.0' + assert version(pkg_name.lower()) == '1.0' + assert version(pkg_name.upper()) == '1.0' + + def test_unique_distributions(self): + """ + Two distributions varying only by non-normalized name on + the file system should resolve as the same. + """ + fixtures.build_files(self.make_pkg('abc'), self.site_dir) + before = list(_unique(distributions())) + + alt_site_dir = self.fixtures.enter_context(fixtures.tmp_path()) + self.fixtures.enter_context(self.add_sys_path(alt_site_dir)) + fixtures.build_files(self.make_pkg('ABC'), alt_site_dir) + after = list(_unique(distributions())) + + assert len(after) == len(before) + + +class NonASCIITests(fixtures.OnSysPath, fixtures.SiteDir, unittest.TestCase): + @staticmethod + def pkg_with_non_ascii_description(site_dir): + """ + Create minimal metadata for a package with non-ASCII in + the description. + """ + contents = { + 'portend.dist-info': { + 'METADATA': 'Description: pôrˈtend', + }, + } + fixtures.build_files(contents, site_dir) + return 'portend' + + @staticmethod + def pkg_with_non_ascii_description_egg_info(site_dir): + """ + Create minimal metadata for an egg-info package with + non-ASCII in the description. + """ + contents = { + 'portend.dist-info': { + 'METADATA': """ + Name: portend + + pôrˈtend""", + }, + } + fixtures.build_files(contents, site_dir) + return 'portend' + + def test_metadata_loads(self): + pkg_name = self.pkg_with_non_ascii_description(self.site_dir) + meta = metadata(pkg_name) + assert meta['Description'] == 'pôrˈtend' + + def test_metadata_loads_egg_info(self): + pkg_name = self.pkg_with_non_ascii_description_egg_info(self.site_dir) + meta = metadata(pkg_name) + assert meta['Description'] == 'pôrˈtend' + + +class DiscoveryTests( + fixtures.EggInfoPkg, + fixtures.EggInfoPkgPipInstalledNoToplevel, + fixtures.EggInfoPkgPipInstalledNoModules, + fixtures.EggInfoPkgSourcesFallback, + fixtures.DistInfoPkg, + unittest.TestCase, +): + def test_package_discovery(self): + dists = list(distributions()) + assert all(isinstance(dist, Distribution) for dist in dists) + assert any(dist.metadata['Name'] == 'egginfo-pkg' for dist in dists) + assert any(dist.metadata['Name'] == 'egg_with_module-pkg' for dist in dists) + assert any(dist.metadata['Name'] == 'egg_with_no_modules-pkg' for dist in dists) + assert any(dist.metadata['Name'] == 'sources_fallback-pkg' for dist in dists) + assert any(dist.metadata['Name'] == 'distinfo-pkg' for dist in dists) + + def test_invalid_usage(self): + with self.assertRaises(ValueError): + list(distributions(context='something', name='else')) + + def test_interleaved_discovery(self): + """ + Ensure interleaved searches are safe. + + When the search is cached, it is possible for searches to be + interleaved, so make sure those use-cases are safe. + + Ref #293 + """ + dists = distributions() + next(dists) + version('egginfo-pkg') + next(dists) + + +class DirectoryTest(fixtures.OnSysPath, fixtures.SiteDir, unittest.TestCase): + def test_egg_info(self): + # make an `EGG-INFO` directory that's unrelated + self.site_dir.joinpath('EGG-INFO').mkdir() + # used to crash with `IsADirectoryError` + with self.assertRaises(PackageNotFoundError): + version('unknown-package') + + def test_egg(self): + egg = self.site_dir.joinpath('foo-3.6.egg') + egg.mkdir() + with self.add_sys_path(egg): + with self.assertRaises(PackageNotFoundError): + version('foo') + + +class MissingSysPath(fixtures.OnSysPath, unittest.TestCase): + site_dir = '/does-not-exist' + + def test_discovery(self): + """ + Discovering distributions should succeed even if + there is an invalid path on sys.path. + """ + importlib.metadata.distributions() + + +class InaccessibleSysPath(fixtures.OnSysPath, ffs.TestCase): + site_dir = '/access-denied' + + def setUp(self): + super().setUp() + self.setUpPyfakefs() + self.fs.create_dir(self.site_dir, perm_bits=000) + + def test_discovery(self): + """ + Discovering distributions should succeed even if + there is an invalid path on sys.path. + """ + list(importlib.metadata.distributions()) + + +class TestEntryPoints(unittest.TestCase): + def __init__(self, *args): + super().__init__(*args) + self.ep = importlib.metadata.EntryPoint( + name='name', value='value', group='group' + ) + + def test_entry_point_pickleable(self): + revived = pickle.loads(pickle.dumps(self.ep)) + assert revived == self.ep + + def test_positional_args(self): + """ + Capture legacy (namedtuple) construction, discouraged. + """ + EntryPoint('name', 'value', 'group') + + def test_immutable(self): + """EntryPoints should be immutable""" + with self.assertRaises(AttributeError): + self.ep.name = 'badactor' + + def test_repr(self): + assert 'EntryPoint' in repr(self.ep) + assert 'name=' in repr(self.ep) + assert "'name'" in repr(self.ep) + + def test_hashable(self): + """EntryPoints should be hashable""" + hash(self.ep) + + def test_module(self): + assert self.ep.module == 'value' + + def test_attr(self): + assert self.ep.attr is None + + def test_sortable(self): + """ + EntryPoint objects are sortable, but result is undefined. + """ + sorted([ + EntryPoint(name='b', value='val', group='group'), + EntryPoint(name='a', value='val', group='group'), + ]) + + +class FileSystem( + fixtures.OnSysPath, fixtures.SiteDir, fixtures.FileBuilder, unittest.TestCase +): + def test_unicode_dir_on_sys_path(self): + """ + Ensure a Unicode subdirectory of a directory on sys.path + does not crash. + """ + fixtures.build_files( + {self.unicode_filename(): {}}, + prefix=self.site_dir, + ) + list(distributions()) + + +class PackagesDistributionsPrebuiltTest(fixtures.ZipFixtures, unittest.TestCase): + def test_packages_distributions_example(self): + self._fixture_on_path('example-21.12-py3-none-any.whl') + assert packages_distributions()['example'] == ['example'] + + def test_packages_distributions_example2(self): + """ + Test packages_distributions on a wheel built + by trampolim. + """ + self._fixture_on_path('example2-1.0.0-py3-none-any.whl') + assert packages_distributions()['example2'] == ['example2'] + + +class PackagesDistributionsTest( + fixtures.OnSysPath, fixtures.SiteDir, unittest.TestCase +): + def test_packages_distributions_neither_toplevel_nor_files(self): + """ + Test a package built without 'top-level.txt' or a file list. + """ + fixtures.build_files( + { + 'trim_example-1.0.0.dist-info': { + 'METADATA': """ + Name: trim_example + Version: 1.0.0 + """, + } + }, + prefix=self.site_dir, + ) + packages_distributions() + + def test_packages_distributions_all_module_types(self): + """ + Test top-level modules detected on a package without 'top-level.txt'. + """ + suffixes = importlib.machinery.all_suffixes() + metadata = dict( + METADATA=""" + Name: all_distributions + Version: 1.0.0 + """, + ) + files = { + 'all_distributions-1.0.0.dist-info': metadata, + } + for i, suffix in enumerate(suffixes): + files.update({ + f'importable-name {i}{suffix}': '', + f'in_namespace_{i}': { + f'mod{suffix}': '', + }, + f'in_package_{i}': { + '__init__.py': '', + f'mod{suffix}': '', + }, + }) + metadata.update(RECORD=fixtures.build_record(files)) + fixtures.build_files(files, prefix=self.site_dir) + + distributions = packages_distributions() + + for i in range(len(suffixes)): + assert distributions[f'importable-name {i}'] == ['all_distributions'] + assert distributions[f'in_namespace_{i}'] == ['all_distributions'] + assert distributions[f'in_package_{i}'] == ['all_distributions'] + + assert not any(name.endswith('.dist-info') for name in distributions) + + @os_helper.skip_unless_symlink + def test_packages_distributions_symlinked_top_level(self) -> None: + """ + Distribution is resolvable from a simple top-level symlink in RECORD. + See #452. + """ + + files: fixtures.FilesSpec = { + "symlinked_pkg-1.0.0.dist-info": { + "METADATA": """ + Name: symlinked-pkg + Version: 1.0.0 + """, + "RECORD": "symlinked,,\n", + }, + ".symlink.target": {}, + "symlinked": Symlink(".symlink.target"), + } + + fixtures.build_files(files, self.site_dir) + assert packages_distributions()['symlinked'] == ['symlinked-pkg'] + + +class PackagesDistributionsEggTest( + fixtures.EggInfoPkg, + fixtures.EggInfoPkgPipInstalledNoToplevel, + fixtures.EggInfoPkgPipInstalledNoModules, + fixtures.EggInfoPkgSourcesFallback, + unittest.TestCase, +): + def test_packages_distributions_on_eggs(self): + """ + Test old-style egg packages with a variation of 'top_level.txt', + 'SOURCES.txt', and 'installed-files.txt', available. + """ + distributions = packages_distributions() + + def import_names_from_package(package_name): + return { + import_name + for import_name, package_names in distributions.items() + if package_name in package_names + } + + # egginfo-pkg declares one import ('mod') via top_level.txt + assert import_names_from_package('egginfo-pkg') == {'mod'} + + # egg_with_module-pkg has one import ('egg_with_module') inferred from + # installed-files.txt (top_level.txt is missing) + assert import_names_from_package('egg_with_module-pkg') == {'egg_with_module'} + + # egg_with_no_modules-pkg should not be associated with any import names + # (top_level.txt is empty, and installed-files.txt has no .py files) + assert import_names_from_package('egg_with_no_modules-pkg') == set() + + # sources_fallback-pkg has one import ('sources_fallback') inferred from + # SOURCES.txt (top_level.txt and installed-files.txt is missing) + assert import_names_from_package('sources_fallback-pkg') == {'sources_fallback'} + + +class EditableDistributionTest(fixtures.DistInfoPkgEditable, unittest.TestCase): + def test_origin(self): + dist = Distribution.from_name('distinfo-pkg') + assert dist.origin.url.endswith('.whl') + assert dist.origin.archive_info.hashes.sha256 diff --git a/stdlib/test/test_importlib/metadata/test_zip.py b/stdlib/test/test_importlib/metadata/test_zip.py new file mode 100644 index 000000000..276f6288c --- /dev/null +++ b/stdlib/test/test_importlib/metadata/test_zip.py @@ -0,0 +1,62 @@ +import sys +import unittest + +from . import fixtures +from importlib.metadata import ( + PackageNotFoundError, + distribution, + distributions, + entry_points, + files, + version, +) + + +class TestZip(fixtures.ZipFixtures, unittest.TestCase): + def setUp(self): + super().setUp() + self._fixture_on_path('example-21.12-py3-none-any.whl') + + def test_zip_version(self): + self.assertEqual(version('example'), '21.12') + + def test_zip_version_does_not_match(self): + with self.assertRaises(PackageNotFoundError): + version('definitely-not-installed') + + def test_zip_entry_points(self): + scripts = entry_points(group='console_scripts') + entry_point = scripts['example'] + self.assertEqual(entry_point.value, 'example:main') + entry_point = scripts['Example'] + self.assertEqual(entry_point.value, 'example:main') + + def test_missing_metadata(self): + self.assertIsNone(distribution('example').read_text('does not exist')) + + def test_case_insensitive(self): + self.assertEqual(version('Example'), '21.12') + + def test_files(self): + for file in files('example'): + path = str(file.dist.locate_file(file)) + assert '.whl/' in path, path + + def test_one_distribution(self): + dists = list(distributions(path=sys.path[:1])) + assert len(dists) == 1 + + +class TestEgg(TestZip): + def setUp(self): + super().setUp() + self._fixture_on_path('example-21.12-py3.6.egg') + + def test_files(self): + for file in files('example'): + path = str(file.dist.locate_file(file)) + assert '.egg/' in path, path + + def test_normalized_name(self): + dist = distribution('example') + assert dist._normalized_name == 'example' diff --git a/stdlib/test/test_importlib/namespace_pkgs/both_portions/foo/one.py b/stdlib/test/test_importlib/namespace_pkgs/both_portions/foo/one.py new file mode 100644 index 000000000..3080f6f8f --- /dev/null +++ b/stdlib/test/test_importlib/namespace_pkgs/both_portions/foo/one.py @@ -0,0 +1 @@ +attr = 'both_portions foo one' diff --git a/stdlib/test/test_importlib/namespace_pkgs/both_portions/foo/two.py b/stdlib/test/test_importlib/namespace_pkgs/both_portions/foo/two.py new file mode 100644 index 000000000..4131d3d4b --- /dev/null +++ b/stdlib/test/test_importlib/namespace_pkgs/both_portions/foo/two.py @@ -0,0 +1 @@ +attr = 'both_portions foo two' diff --git a/stdlib/test/test_importlib/namespace_pkgs/missing_directory.zip b/stdlib/test/test_importlib/namespace_pkgs/missing_directory.zip new file mode 100644 index 000000000..836a9106b Binary files /dev/null and b/stdlib/test/test_importlib/namespace_pkgs/missing_directory.zip differ diff --git a/stdlib/test/test_importlib/namespace_pkgs/module_and_namespace_package/a_test.py b/stdlib/test/test_importlib/namespace_pkgs/module_and_namespace_package/a_test.py new file mode 100644 index 000000000..43cbedbbd --- /dev/null +++ b/stdlib/test/test_importlib/namespace_pkgs/module_and_namespace_package/a_test.py @@ -0,0 +1 @@ +attr = 'in module' diff --git a/stdlib/test/test_importlib/namespace_pkgs/module_and_namespace_package/a_test/empty b/stdlib/test/test_importlib/namespace_pkgs/module_and_namespace_package/a_test/empty new file mode 100644 index 000000000..e69de29bb diff --git a/stdlib/test/test_importlib/namespace_pkgs/nested_portion1.zip b/stdlib/test/test_importlib/namespace_pkgs/nested_portion1.zip new file mode 100644 index 000000000..8d22406f2 Binary files /dev/null and b/stdlib/test/test_importlib/namespace_pkgs/nested_portion1.zip differ diff --git a/stdlib/test/test_importlib/namespace_pkgs/not_a_namespace_pkg/foo/__init__.py b/stdlib/test/test_importlib/namespace_pkgs/not_a_namespace_pkg/foo/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/stdlib/test/test_importlib/namespace_pkgs/not_a_namespace_pkg/foo/one.py b/stdlib/test/test_importlib/namespace_pkgs/not_a_namespace_pkg/foo/one.py new file mode 100644 index 000000000..d8f5c831f --- /dev/null +++ b/stdlib/test/test_importlib/namespace_pkgs/not_a_namespace_pkg/foo/one.py @@ -0,0 +1 @@ +attr = 'portion1 foo one' diff --git a/stdlib/test/test_importlib/namespace_pkgs/portion1/foo/one.py b/stdlib/test/test_importlib/namespace_pkgs/portion1/foo/one.py new file mode 100644 index 000000000..d8f5c831f --- /dev/null +++ b/stdlib/test/test_importlib/namespace_pkgs/portion1/foo/one.py @@ -0,0 +1 @@ +attr = 'portion1 foo one' diff --git a/stdlib/test/test_importlib/namespace_pkgs/portion2/foo/two.py b/stdlib/test/test_importlib/namespace_pkgs/portion2/foo/two.py new file mode 100644 index 000000000..d092e1e99 --- /dev/null +++ b/stdlib/test/test_importlib/namespace_pkgs/portion2/foo/two.py @@ -0,0 +1 @@ +attr = 'portion2 foo two' diff --git a/stdlib/test/test_importlib/namespace_pkgs/project1/parent/child/one.py b/stdlib/test/test_importlib/namespace_pkgs/project1/parent/child/one.py new file mode 100644 index 000000000..2776fcdfd --- /dev/null +++ b/stdlib/test/test_importlib/namespace_pkgs/project1/parent/child/one.py @@ -0,0 +1 @@ +attr = 'parent child one' diff --git a/stdlib/test/test_importlib/namespace_pkgs/project2/parent/child/two.py b/stdlib/test/test_importlib/namespace_pkgs/project2/parent/child/two.py new file mode 100644 index 000000000..8b037bcb0 --- /dev/null +++ b/stdlib/test/test_importlib/namespace_pkgs/project2/parent/child/two.py @@ -0,0 +1 @@ +attr = 'parent child two' diff --git a/stdlib/test/test_importlib/namespace_pkgs/project3/parent/child/three.py b/stdlib/test/test_importlib/namespace_pkgs/project3/parent/child/three.py new file mode 100644 index 000000000..f8abfe1c1 --- /dev/null +++ b/stdlib/test/test_importlib/namespace_pkgs/project3/parent/child/three.py @@ -0,0 +1 @@ +attr = 'parent child three' diff --git a/stdlib/test/test_importlib/namespace_pkgs/top_level_portion1.zip b/stdlib/test/test_importlib/namespace_pkgs/top_level_portion1.zip new file mode 100644 index 000000000..3b866c914 Binary files /dev/null and b/stdlib/test/test_importlib/namespace_pkgs/top_level_portion1.zip differ diff --git a/stdlib/test/test_importlib/partial/cfimport.py b/stdlib/test/test_importlib/partial/cfimport.py new file mode 100644 index 000000000..c92d2fe1d --- /dev/null +++ b/stdlib/test/test_importlib/partial/cfimport.py @@ -0,0 +1,38 @@ +import os +import sys +import threading +import traceback + + +NLOOPS = 50 +NTHREADS = 30 + + +def t1(): + try: + from concurrent.futures import ThreadPoolExecutor + except Exception: + traceback.print_exc() + os._exit(1) + +def t2(): + try: + from concurrent.futures.thread import ThreadPoolExecutor + except Exception: + traceback.print_exc() + os._exit(1) + +def main(): + for j in range(NLOOPS): + threads = [] + for i in range(NTHREADS): + threads.append(threading.Thread(target=t2 if i % 1 else t1)) + for thread in threads: + thread.start() + for thread in threads: + thread.join() + sys.modules.pop('concurrent.futures', None) + sys.modules.pop('concurrent.futures.thread', None) + +if __name__ == "__main__": + main() diff --git a/stdlib/test/test_importlib/partial/pool_in_threads.py b/stdlib/test/test_importlib/partial/pool_in_threads.py new file mode 100644 index 000000000..faa7867b8 --- /dev/null +++ b/stdlib/test/test_importlib/partial/pool_in_threads.py @@ -0,0 +1,27 @@ +import multiprocessing +import os +import threading +import traceback + + +def t(): + try: + with multiprocessing.Pool(1): + pass + except Exception: + traceback.print_exc() + os._exit(1) + + +def main(): + threads = [] + for i in range(20): + threads.append(threading.Thread(target=t)) + for thread in threads: + thread.start() + for thread in threads: + thread.join() + + +if __name__ == "__main__": + main() diff --git a/stdlib/test/test_importlib/resources/__init__.py b/stdlib/test/test_importlib/resources/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/stdlib/test/test_importlib/resources/_path.py b/stdlib/test/test_importlib/resources/_path.py new file mode 100644 index 000000000..b144628cb --- /dev/null +++ b/stdlib/test/test_importlib/resources/_path.py @@ -0,0 +1,94 @@ +import pathlib +import functools + +from typing import Dict, Union +from typing import runtime_checkable +from typing import Protocol + + +#### +# from jaraco.path 3.7.1 + + +class Symlink(str): + """ + A string indicating the target of a symlink. + """ + + +FilesSpec = Dict[str, Union[str, bytes, Symlink, 'FilesSpec']] + + +@runtime_checkable +class TreeMaker(Protocol): + def __truediv__(self, *args, **kwargs): ... # pragma: no cover + + def mkdir(self, **kwargs): ... # pragma: no cover + + def write_text(self, content, **kwargs): ... # pragma: no cover + + def write_bytes(self, content): ... # pragma: no cover + + def symlink_to(self, target): ... # pragma: no cover + + +def _ensure_tree_maker(obj: Union[str, TreeMaker]) -> TreeMaker: + return obj if isinstance(obj, TreeMaker) else pathlib.Path(obj) # type: ignore[return-value] + + +def build( + spec: FilesSpec, + prefix: Union[str, TreeMaker] = pathlib.Path(), # type: ignore[assignment] +): + """ + Build a set of files/directories, as described by the spec. + + Each key represents a pathname, and the value represents + the content. Content may be a nested directory. + + >>> spec = { + ... 'README.txt': "A README file", + ... "foo": { + ... "__init__.py": "", + ... "bar": { + ... "__init__.py": "", + ... }, + ... "baz.py": "# Some code", + ... "bar.py": Symlink("baz.py"), + ... }, + ... "bing": Symlink("foo"), + ... } + >>> target = getfixture('tmp_path') + >>> build(spec, target) + >>> target.joinpath('foo/baz.py').read_text(encoding='utf-8') + '# Some code' + >>> target.joinpath('bing/bar.py').read_text(encoding='utf-8') + '# Some code' + """ + for name, contents in spec.items(): + create(contents, _ensure_tree_maker(prefix) / name) + + +@functools.singledispatch +def create(content: Union[str, bytes, FilesSpec], path): + path.mkdir(exist_ok=True) + build(content, prefix=path) # type: ignore[arg-type] + + +@create.register +def _(content: bytes, path): + path.write_bytes(content) + + +@create.register +def _(content: str, path): + path.write_text(content, encoding='utf-8') + + +@create.register +def _(content: Symlink, path): + path.symlink_to(content) + + +# end from jaraco.path +#### diff --git a/stdlib/test/test_importlib/resources/test_compatibilty_files.py b/stdlib/test/test_importlib/resources/test_compatibilty_files.py new file mode 100644 index 000000000..bcf608d9e --- /dev/null +++ b/stdlib/test/test_importlib/resources/test_compatibilty_files.py @@ -0,0 +1,104 @@ +import io +import unittest + +from importlib import resources + +from importlib.resources._adapters import ( + CompatibilityFiles, + wrap_spec, +) + +from . import util + + +class CompatibilityFilesTests(unittest.TestCase): + @property + def package(self): + bytes_data = io.BytesIO(b'Hello, world!') + return util.create_package( + file=bytes_data, + path='some_path', + contents=('a', 'b', 'c'), + ) + + @property + def files(self): + return resources.files(self.package) + + def test_spec_path_iter(self): + self.assertEqual( + sorted(path.name for path in self.files.iterdir()), + ['a', 'b', 'c'], + ) + + def test_child_path_iter(self): + self.assertEqual(list((self.files / 'a').iterdir()), []) + + def test_orphan_path_iter(self): + self.assertEqual(list((self.files / 'a' / 'a').iterdir()), []) + self.assertEqual(list((self.files / 'a' / 'a' / 'a').iterdir()), []) + + def test_spec_path_is(self): + self.assertFalse(self.files.is_file()) + self.assertFalse(self.files.is_dir()) + + def test_child_path_is(self): + self.assertTrue((self.files / 'a').is_file()) + self.assertFalse((self.files / 'a').is_dir()) + + def test_orphan_path_is(self): + self.assertFalse((self.files / 'a' / 'a').is_file()) + self.assertFalse((self.files / 'a' / 'a').is_dir()) + self.assertFalse((self.files / 'a' / 'a' / 'a').is_file()) + self.assertFalse((self.files / 'a' / 'a' / 'a').is_dir()) + + def test_spec_path_name(self): + self.assertEqual(self.files.name, 'testingpackage') + + def test_child_path_name(self): + self.assertEqual((self.files / 'a').name, 'a') + + def test_orphan_path_name(self): + self.assertEqual((self.files / 'a' / 'b').name, 'b') + self.assertEqual((self.files / 'a' / 'b' / 'c').name, 'c') + + def test_spec_path_open(self): + self.assertEqual(self.files.read_bytes(), b'Hello, world!') + self.assertEqual(self.files.read_text(encoding='utf-8'), 'Hello, world!') + + def test_child_path_open(self): + self.assertEqual((self.files / 'a').read_bytes(), b'Hello, world!') + self.assertEqual( + (self.files / 'a').read_text(encoding='utf-8'), 'Hello, world!' + ) + + def test_orphan_path_open(self): + with self.assertRaises(FileNotFoundError): + (self.files / 'a' / 'b').read_bytes() + with self.assertRaises(FileNotFoundError): + (self.files / 'a' / 'b' / 'c').read_bytes() + + def test_open_invalid_mode(self): + with self.assertRaises(ValueError): + self.files.open('0') + + def test_orphan_path_invalid(self): + with self.assertRaises(ValueError): + CompatibilityFiles.OrphanPath() + + def test_wrap_spec(self): + spec = wrap_spec(self.package) + self.assertIsInstance(spec.loader.get_resource_reader(None), CompatibilityFiles) + + +class CompatibilityFilesNoReaderTests(unittest.TestCase): + @property + def package(self): + return util.create_package_from_loader(None) + + @property + def files(self): + return resources.files(self.package) + + def test_spec_path_joinpath(self): + self.assertIsInstance(self.files / 'a', CompatibilityFiles.OrphanPath) diff --git a/stdlib/test/test_importlib/resources/test_contents.py b/stdlib/test/test_importlib/resources/test_contents.py new file mode 100644 index 000000000..4e4e0e9c3 --- /dev/null +++ b/stdlib/test/test_importlib/resources/test_contents.py @@ -0,0 +1,38 @@ +import unittest +from importlib import resources + +from . import util + + +class ContentsTests: + expected = { + '__init__.py', + 'binary.file', + 'subdirectory', + 'utf-16.file', + 'utf-8.file', + } + + def test_contents(self): + contents = {path.name for path in resources.files(self.data).iterdir()} + assert self.expected <= contents + + +class ContentsDiskTests(ContentsTests, util.DiskSetup, unittest.TestCase): + pass + + +class ContentsZipTests(ContentsTests, util.ZipSetup, unittest.TestCase): + pass + + +class ContentsNamespaceTests(ContentsTests, util.DiskSetup, unittest.TestCase): + MODULE = 'namespacedata01' + + expected = { + # no __init__ because of namespace design + 'binary.file', + 'subdirectory', + 'utf-16.file', + 'utf-8.file', + } diff --git a/stdlib/test/test_importlib/resources/test_custom.py b/stdlib/test/test_importlib/resources/test_custom.py new file mode 100644 index 000000000..640f90fc0 --- /dev/null +++ b/stdlib/test/test_importlib/resources/test_custom.py @@ -0,0 +1,48 @@ +import unittest +import contextlib +import pathlib + +from test.support import os_helper + +from importlib import resources +from importlib.resources import abc +from importlib.resources.abc import TraversableResources, ResourceReader +from . import util + + +class SimpleLoader: + """ + A simple loader that only implements a resource reader. + """ + + def __init__(self, reader: ResourceReader): + self.reader = reader + + def get_resource_reader(self, package): + return self.reader + + +class MagicResources(TraversableResources): + """ + Magically returns the resources at path. + """ + + def __init__(self, path: pathlib.Path): + self.path = path + + def files(self): + return self.path + + +class CustomTraversableResourcesTests(unittest.TestCase): + def setUp(self): + self.fixtures = contextlib.ExitStack() + self.addCleanup(self.fixtures.close) + + def test_custom_loader(self): + temp_dir = pathlib.Path(self.fixtures.enter_context(os_helper.temp_dir())) + loader = SimpleLoader(MagicResources(temp_dir)) + pkg = util.create_package_from_loader(loader) + files = resources.files(pkg) + assert isinstance(files, abc.Traversable) + assert list(files.iterdir()) == [] diff --git a/stdlib/test/test_importlib/resources/test_files.py b/stdlib/test/test_importlib/resources/test_files.py new file mode 100644 index 000000000..3ce44999f --- /dev/null +++ b/stdlib/test/test_importlib/resources/test_files.py @@ -0,0 +1,191 @@ +import pathlib +import py_compile +import textwrap +import unittest +import warnings +import importlib +import contextlib + +from importlib import resources +from importlib.resources.abc import Traversable +from . import util +from test.support import os_helper, import_helper + + +@contextlib.contextmanager +def suppress_known_deprecation(): + with warnings.catch_warnings(record=True) as ctx: + warnings.simplefilter('default', category=DeprecationWarning) + yield ctx + + +class FilesTests: + def test_read_bytes(self): + files = resources.files(self.data) + actual = files.joinpath('utf-8.file').read_bytes() + assert actual == b'Hello, UTF-8 world!\n' + + def test_read_text(self): + files = resources.files(self.data) + actual = files.joinpath('utf-8.file').read_text(encoding='utf-8') + assert actual == 'Hello, UTF-8 world!\n' + + def test_traversable(self): + assert isinstance(resources.files(self.data), Traversable) + + def test_joinpath_with_multiple_args(self): + files = resources.files(self.data) + binfile = files.joinpath('subdirectory', 'binary.file') + self.assertTrue(binfile.is_file()) + + def test_old_parameter(self): + """ + Files used to take a 'package' parameter. Make sure anyone + passing by name is still supported. + """ + with suppress_known_deprecation(): + resources.files(package=self.data) + + +class OpenDiskTests(FilesTests, util.DiskSetup, unittest.TestCase): + pass + + +class OpenZipTests(FilesTests, util.ZipSetup, unittest.TestCase): + pass + + +class OpenNamespaceTests(FilesTests, util.DiskSetup, unittest.TestCase): + MODULE = 'namespacedata01' + + def test_non_paths_in_dunder_path(self): + """ + Non-path items in a namespace package's ``__path__`` are ignored. + + As reported in python/importlib_resources#311, some tools + like Setuptools, when creating editable packages, will inject + non-paths into a namespace package's ``__path__``, a + sentinel like + ``__editable__.sample_namespace-1.0.finder.__path_hook__`` + to cause the ``PathEntryFinder`` to be called when searching + for packages. In that case, resources should still be loadable. + """ + import namespacedata01 + + namespacedata01.__path__.append( + '__editable__.sample_namespace-1.0.finder.__path_hook__' + ) + + resources.files(namespacedata01) + + +class OpenNamespaceZipTests(FilesTests, util.ZipSetup, unittest.TestCase): + ZIP_MODULE = 'namespacedata01' + + +class DirectSpec: + """ + Override behavior of ModuleSetup to write a full spec directly. + """ + + MODULE = 'unused' + + def load_fixture(self, name): + self.tree_on_path(self.spec) + + +class ModulesFiles: + spec = { + 'mod.py': '', + 'res.txt': 'resources are the best', + } + + def test_module_resources(self): + """ + A module can have resources found adjacent to the module. + """ + import mod # type: ignore[import-not-found] + + actual = resources.files(mod).joinpath('res.txt').read_text(encoding='utf-8') + assert actual == self.spec['res.txt'] + + +class ModuleFilesDiskTests(DirectSpec, util.DiskSetup, ModulesFiles, unittest.TestCase): + pass + + +class ModuleFilesZipTests(DirectSpec, util.ZipSetup, ModulesFiles, unittest.TestCase): + pass + + +class ImplicitContextFiles: + set_val = textwrap.dedent( + f""" + import {resources.__name__} as res + val = res.files().joinpath('res.txt').read_text(encoding='utf-8') + """ + ) + spec = { + 'somepkg': { + '__init__.py': set_val, + 'submod.py': set_val, + 'res.txt': 'resources are the best', + }, + 'frozenpkg': { + '__init__.py': set_val.replace(resources.__name__, 'c_resources'), + 'res.txt': 'resources are the best', + }, + } + + def test_implicit_files_package(self): + """ + Without any parameter, files() will infer the location as the caller. + """ + assert importlib.import_module('somepkg').val == 'resources are the best' + + def test_implicit_files_submodule(self): + """ + Without any parameter, files() will infer the location as the caller. + """ + assert importlib.import_module('somepkg.submod').val == 'resources are the best' + + def _compile_importlib(self): + """ + Make a compiled-only copy of the importlib resources package. + + Currently only code is copied, as importlib resources doesn't itself + have any resources. + """ + bin_site = self.fixtures.enter_context(os_helper.temp_dir()) + c_resources = pathlib.Path(bin_site, 'c_resources') + sources = pathlib.Path(resources.__file__).parent + + for source_path in sources.glob('**/*.py'): + c_path = c_resources.joinpath(source_path.relative_to(sources)).with_suffix('.pyc') + py_compile.compile(source_path, c_path) + self.fixtures.enter_context(import_helper.DirsOnSysPath(bin_site)) + + def test_implicit_files_with_compiled_importlib(self): + """ + Caller detection works for compiled-only resources module. + + python/cpython#123085 + """ + self._compile_importlib() + assert importlib.import_module('frozenpkg').val == 'resources are the best' + + +class ImplicitContextFilesDiskTests( + DirectSpec, util.DiskSetup, ImplicitContextFiles, unittest.TestCase +): + pass + + +class ImplicitContextFilesZipTests( + DirectSpec, util.ZipSetup, ImplicitContextFiles, unittest.TestCase +): + pass + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/resources/test_functional.py b/stdlib/test/test_importlib/resources/test_functional.py new file mode 100644 index 000000000..e8d25fa4d --- /dev/null +++ b/stdlib/test/test_importlib/resources/test_functional.py @@ -0,0 +1,249 @@ +import unittest +import os +import importlib + +from test.support import warnings_helper + +from importlib import resources + +from . import util + +# Since the functional API forwards to Traversable, we only test +# filesystem resources here -- not zip files, namespace packages etc. +# We do test for two kinds of Anchor, though. + + +class StringAnchorMixin: + anchor01 = 'data01' + anchor02 = 'data02' + + +class ModuleAnchorMixin: + @property + def anchor01(self): + return importlib.import_module('data01') + + @property + def anchor02(self): + return importlib.import_module('data02') + + +class FunctionalAPIBase(util.DiskSetup): + def setUp(self): + super().setUp() + self.load_fixture('data02') + + def _gen_resourcetxt_path_parts(self): + """Yield various names of a text file in anchor02, each in a subTest""" + for path_parts in ( + ('subdirectory', 'subsubdir', 'resource.txt'), + ('subdirectory/subsubdir/resource.txt',), + ('subdirectory/subsubdir', 'resource.txt'), + ): + with self.subTest(path_parts=path_parts): + yield path_parts + + def test_read_text(self): + self.assertEqual( + resources.read_text(self.anchor01, 'utf-8.file'), + 'Hello, UTF-8 world!\n', + ) + self.assertEqual( + resources.read_text( + self.anchor02, + 'subdirectory', + 'subsubdir', + 'resource.txt', + encoding='utf-8', + ), + 'a resource', + ) + for path_parts in self._gen_resourcetxt_path_parts(): + self.assertEqual( + resources.read_text( + self.anchor02, + *path_parts, + encoding='utf-8', + ), + 'a resource', + ) + # Use generic OSError, since e.g. attempting to read a directory can + # fail with PermissionError rather than IsADirectoryError + with self.assertRaises(OSError): + resources.read_text(self.anchor01) + with self.assertRaises(OSError): + resources.read_text(self.anchor01, 'no-such-file') + with self.assertRaises(UnicodeDecodeError): + resources.read_text(self.anchor01, 'utf-16.file') + self.assertEqual( + resources.read_text( + self.anchor01, + 'binary.file', + encoding='latin1', + ), + '\x00\x01\x02\x03', + ) + self.assertEndsWith( # ignore the BOM + resources.read_text( + self.anchor01, + 'utf-16.file', + errors='backslashreplace', + ), + 'Hello, UTF-16 world!\n'.encode('utf-16-le').decode( + errors='backslashreplace', + ), + ) + + def test_read_binary(self): + self.assertEqual( + resources.read_binary(self.anchor01, 'utf-8.file'), + b'Hello, UTF-8 world!\n', + ) + for path_parts in self._gen_resourcetxt_path_parts(): + self.assertEqual( + resources.read_binary(self.anchor02, *path_parts), + b'a resource', + ) + + def test_open_text(self): + with resources.open_text(self.anchor01, 'utf-8.file') as f: + self.assertEqual(f.read(), 'Hello, UTF-8 world!\n') + for path_parts in self._gen_resourcetxt_path_parts(): + with resources.open_text( + self.anchor02, + *path_parts, + encoding='utf-8', + ) as f: + self.assertEqual(f.read(), 'a resource') + # Use generic OSError, since e.g. attempting to read a directory can + # fail with PermissionError rather than IsADirectoryError + with self.assertRaises(OSError): + resources.open_text(self.anchor01) + with self.assertRaises(OSError): + resources.open_text(self.anchor01, 'no-such-file') + with resources.open_text(self.anchor01, 'utf-16.file') as f: + with self.assertRaises(UnicodeDecodeError): + f.read() + with resources.open_text( + self.anchor01, + 'binary.file', + encoding='latin1', + ) as f: + self.assertEqual(f.read(), '\x00\x01\x02\x03') + with resources.open_text( + self.anchor01, + 'utf-16.file', + errors='backslashreplace', + ) as f: + self.assertEndsWith( # ignore the BOM + f.read(), + 'Hello, UTF-16 world!\n'.encode('utf-16-le').decode( + errors='backslashreplace', + ), + ) + + def test_open_binary(self): + with resources.open_binary(self.anchor01, 'utf-8.file') as f: + self.assertEqual(f.read(), b'Hello, UTF-8 world!\n') + for path_parts in self._gen_resourcetxt_path_parts(): + with resources.open_binary( + self.anchor02, + *path_parts, + ) as f: + self.assertEqual(f.read(), b'a resource') + + def test_path(self): + with resources.path(self.anchor01, 'utf-8.file') as path: + with open(str(path), encoding='utf-8') as f: + self.assertEqual(f.read(), 'Hello, UTF-8 world!\n') + with resources.path(self.anchor01) as path: + with open(os.path.join(path, 'utf-8.file'), encoding='utf-8') as f: + self.assertEqual(f.read(), 'Hello, UTF-8 world!\n') + + def test_is_resource(self): + is_resource = resources.is_resource + self.assertTrue(is_resource(self.anchor01, 'utf-8.file')) + self.assertFalse(is_resource(self.anchor01, 'no_such_file')) + self.assertFalse(is_resource(self.anchor01)) + self.assertFalse(is_resource(self.anchor01, 'subdirectory')) + for path_parts in self._gen_resourcetxt_path_parts(): + self.assertTrue(is_resource(self.anchor02, *path_parts)) + + def test_contents(self): + with warnings_helper.check_warnings((".*contents.*", DeprecationWarning)): + c = resources.contents(self.anchor01) + self.assertGreaterEqual( + set(c), + {'utf-8.file', 'utf-16.file', 'binary.file', 'subdirectory'}, + ) + with self.assertRaises(OSError), warnings_helper.check_warnings(( + ".*contents.*", + DeprecationWarning, + )): + list(resources.contents(self.anchor01, 'utf-8.file')) + + for path_parts in self._gen_resourcetxt_path_parts(): + with self.assertRaises(OSError), warnings_helper.check_warnings(( + ".*contents.*", + DeprecationWarning, + )): + list(resources.contents(self.anchor01, *path_parts)) + with warnings_helper.check_warnings((".*contents.*", DeprecationWarning)): + c = resources.contents(self.anchor01, 'subdirectory') + self.assertGreaterEqual( + set(c), + {'binary.file'}, + ) + + @warnings_helper.ignore_warnings(category=DeprecationWarning) + def test_common_errors(self): + for func in ( + resources.read_text, + resources.read_binary, + resources.open_text, + resources.open_binary, + resources.path, + resources.is_resource, + resources.contents, + ): + with self.subTest(func=func): + # Rejecting None anchor + with self.assertRaises(TypeError): + func(None) + # Rejecting invalid anchor type + with self.assertRaises((TypeError, AttributeError)): + func(1234) + # Unknown module + with self.assertRaises(ModuleNotFoundError): + func('$missing module$') + + def test_text_errors(self): + for func in ( + resources.read_text, + resources.open_text, + ): + with self.subTest(func=func): + # Multiple path arguments need explicit encoding argument. + with self.assertRaises(TypeError): + func( + self.anchor02, + 'subdirectory', + 'subsubdir', + 'resource.txt', + ) + + +class FunctionalAPITest_StringAnchor( + StringAnchorMixin, + FunctionalAPIBase, + unittest.TestCase, +): + pass + + +class FunctionalAPITest_ModuleAnchor( + ModuleAnchorMixin, + FunctionalAPIBase, + unittest.TestCase, +): + pass diff --git a/stdlib/test/test_importlib/resources/test_open.py b/stdlib/test/test_importlib/resources/test_open.py new file mode 100644 index 000000000..8c00378ad --- /dev/null +++ b/stdlib/test/test_importlib/resources/test_open.py @@ -0,0 +1,84 @@ +import unittest + +from importlib import resources +from . import util + + +class CommonBinaryTests(util.CommonTests, unittest.TestCase): + def execute(self, package, path): + target = resources.files(package).joinpath(path) + with target.open('rb'): + pass + + +class CommonTextTests(util.CommonTests, unittest.TestCase): + def execute(self, package, path): + target = resources.files(package).joinpath(path) + with target.open(encoding='utf-8'): + pass + + +class OpenTests: + def test_open_binary(self): + target = resources.files(self.data) / 'binary.file' + with target.open('rb') as fp: + result = fp.read() + self.assertEqual(result, bytes(range(4))) + + def test_open_text_default_encoding(self): + target = resources.files(self.data) / 'utf-8.file' + with target.open(encoding='utf-8') as fp: + result = fp.read() + self.assertEqual(result, 'Hello, UTF-8 world!\n') + + def test_open_text_given_encoding(self): + target = resources.files(self.data) / 'utf-16.file' + with target.open(encoding='utf-16', errors='strict') as fp: + result = fp.read() + self.assertEqual(result, 'Hello, UTF-16 world!\n') + + def test_open_text_with_errors(self): + """ + Raises UnicodeError without the 'errors' argument. + """ + target = resources.files(self.data) / 'utf-16.file' + with target.open(encoding='utf-8', errors='strict') as fp: + self.assertRaises(UnicodeError, fp.read) + with target.open(encoding='utf-8', errors='ignore') as fp: + result = fp.read() + self.assertEqual( + result, + 'H\x00e\x00l\x00l\x00o\x00,\x00 ' + '\x00U\x00T\x00F\x00-\x001\x006\x00 ' + '\x00w\x00o\x00r\x00l\x00d\x00!\x00\n\x00', + ) + + def test_open_binary_FileNotFoundError(self): + target = resources.files(self.data) / 'does-not-exist' + with self.assertRaises(FileNotFoundError): + target.open('rb') + + def test_open_text_FileNotFoundError(self): + target = resources.files(self.data) / 'does-not-exist' + with self.assertRaises(FileNotFoundError): + target.open(encoding='utf-8') + + +class OpenDiskTests(OpenTests, util.DiskSetup, unittest.TestCase): + pass + + +class OpenDiskNamespaceTests(OpenTests, util.DiskSetup, unittest.TestCase): + MODULE = 'namespacedata01' + + +class OpenZipTests(OpenTests, util.ZipSetup, unittest.TestCase): + pass + + +class OpenNamespaceZipTests(OpenTests, util.ZipSetup, unittest.TestCase): + MODULE = 'namespacedata01' + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/resources/test_path.py b/stdlib/test/test_importlib/resources/test_path.py new file mode 100644 index 000000000..903911f57 --- /dev/null +++ b/stdlib/test/test_importlib/resources/test_path.py @@ -0,0 +1,60 @@ +import io +import pathlib +import unittest + +from importlib import resources +from . import util + + +class CommonTests(util.CommonTests, unittest.TestCase): + def execute(self, package, path): + with resources.as_file(resources.files(package).joinpath(path)): + pass + + +class PathTests: + def test_reading(self): + """ + Path should be readable and a pathlib.Path instance. + """ + target = resources.files(self.data) / 'utf-8.file' + with resources.as_file(target) as path: + self.assertIsInstance(path, pathlib.Path) + self.assertEndsWith(path.name, "utf-8.file") + self.assertEqual('Hello, UTF-8 world!\n', path.read_text(encoding='utf-8')) + + +class PathDiskTests(PathTests, util.DiskSetup, unittest.TestCase): + def test_natural_path(self): + # Guarantee the internal implementation detail that + # file-system-backed resources do not get the tempdir + # treatment. + target = resources.files(self.data) / 'utf-8.file' + with resources.as_file(target) as path: + assert 'data' in str(path) + + +class PathMemoryTests(PathTests, unittest.TestCase): + def setUp(self): + file = io.BytesIO(b'Hello, UTF-8 world!\n') + self.addCleanup(file.close) + self.data = util.create_package( + file=file, path=FileNotFoundError("package exists only in memory") + ) + self.data.__spec__.origin = None + self.data.__spec__.has_location = False + + +class PathZipTests(PathTests, util.ZipSetup, unittest.TestCase): + def test_remove_in_context_manager(self): + """ + It is not an error if the file that was temporarily stashed on the + file system is removed inside the `with` stanza. + """ + target = resources.files(self.data) / 'utf-8.file' + with resources.as_file(target) as path: + path.unlink() + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/resources/test_read.py b/stdlib/test/test_importlib/resources/test_read.py new file mode 100644 index 000000000..59c237d96 --- /dev/null +++ b/stdlib/test/test_importlib/resources/test_read.py @@ -0,0 +1,93 @@ +import unittest + +from importlib import import_module, resources + +from . import util + + +class CommonBinaryTests(util.CommonTests, unittest.TestCase): + def execute(self, package, path): + resources.files(package).joinpath(path).read_bytes() + + +class CommonTextTests(util.CommonTests, unittest.TestCase): + def execute(self, package, path): + resources.files(package).joinpath(path).read_text(encoding='utf-8') + + +class ReadTests: + def test_read_bytes(self): + result = resources.files(self.data).joinpath('binary.file').read_bytes() + self.assertEqual(result, bytes(range(4))) + + def test_read_text_default_encoding(self): + result = ( + resources.files(self.data) + .joinpath('utf-8.file') + .read_text(encoding='utf-8') + ) + self.assertEqual(result, 'Hello, UTF-8 world!\n') + + def test_read_text_given_encoding(self): + result = ( + resources.files(self.data) + .joinpath('utf-16.file') + .read_text(encoding='utf-16') + ) + self.assertEqual(result, 'Hello, UTF-16 world!\n') + + def test_read_text_with_errors(self): + """ + Raises UnicodeError without the 'errors' argument. + """ + target = resources.files(self.data) / 'utf-16.file' + self.assertRaises(UnicodeError, target.read_text, encoding='utf-8') + result = target.read_text(encoding='utf-8', errors='ignore') + self.assertEqual( + result, + 'H\x00e\x00l\x00l\x00o\x00,\x00 ' + '\x00U\x00T\x00F\x00-\x001\x006\x00 ' + '\x00w\x00o\x00r\x00l\x00d\x00!\x00\n\x00', + ) + + +class ReadDiskTests(ReadTests, util.DiskSetup, unittest.TestCase): + pass + + +class ReadZipTests(ReadTests, util.ZipSetup, unittest.TestCase): + def test_read_submodule_resource(self): + submodule = import_module('data01.subdirectory') + result = resources.files(submodule).joinpath('binary.file').read_bytes() + self.assertEqual(result, bytes(range(4, 8))) + + def test_read_submodule_resource_by_name(self): + result = ( + resources.files('data01.subdirectory').joinpath('binary.file').read_bytes() + ) + self.assertEqual(result, bytes(range(4, 8))) + + +class ReadNamespaceTests(ReadTests, util.DiskSetup, unittest.TestCase): + MODULE = 'namespacedata01' + + +class ReadNamespaceZipTests(ReadTests, util.ZipSetup, unittest.TestCase): + MODULE = 'namespacedata01' + + def test_read_submodule_resource(self): + submodule = import_module('namespacedata01.subdirectory') + result = resources.files(submodule).joinpath('binary.file').read_bytes() + self.assertEqual(result, bytes(range(12, 16))) + + def test_read_submodule_resource_by_name(self): + result = ( + resources.files('namespacedata01.subdirectory') + .joinpath('binary.file') + .read_bytes() + ) + self.assertEqual(result, bytes(range(12, 16))) + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/resources/test_reader.py b/stdlib/test/test_importlib/resources/test_reader.py new file mode 100644 index 000000000..ed5693ab4 --- /dev/null +++ b/stdlib/test/test_importlib/resources/test_reader.py @@ -0,0 +1,137 @@ +import os.path +import pathlib +import unittest + +from importlib import import_module +from importlib.readers import MultiplexedPath, NamespaceReader + +from . import util + + +class MultiplexedPathTest(util.DiskSetup, unittest.TestCase): + MODULE = 'namespacedata01' + + def setUp(self): + super().setUp() + self.folder = pathlib.Path(self.data.__path__[0]) + self.data01 = pathlib.Path(self.load_fixture('data01').__file__).parent + self.data02 = pathlib.Path(self.load_fixture('data02').__file__).parent + + def test_init_no_paths(self): + with self.assertRaises(FileNotFoundError): + MultiplexedPath() + + def test_init_file(self): + with self.assertRaises(NotADirectoryError): + MultiplexedPath(self.folder / 'binary.file') + + def test_iterdir(self): + contents = {path.name for path in MultiplexedPath(self.folder).iterdir()} + try: + contents.remove('__pycache__') + except (KeyError, ValueError): + pass + self.assertEqual( + contents, {'subdirectory', 'binary.file', 'utf-16.file', 'utf-8.file'} + ) + + def test_iterdir_duplicate(self): + contents = { + path.name for path in MultiplexedPath(self.folder, self.data01).iterdir() + } + for remove in ('__pycache__', '__init__.pyc'): + try: + contents.remove(remove) + except (KeyError, ValueError): + pass + self.assertEqual( + contents, + {'__init__.py', 'binary.file', 'subdirectory', 'utf-16.file', 'utf-8.file'}, + ) + + def test_is_dir(self): + self.assertEqual(MultiplexedPath(self.folder).is_dir(), True) + + def test_is_file(self): + self.assertEqual(MultiplexedPath(self.folder).is_file(), False) + + def test_open_file(self): + path = MultiplexedPath(self.folder) + with self.assertRaises(FileNotFoundError): + path.read_bytes() + with self.assertRaises(FileNotFoundError): + path.read_text() + with self.assertRaises(FileNotFoundError): + path.open() + + def test_join_path(self): + prefix = str(self.folder.parent) + path = MultiplexedPath(self.folder, self.data01) + self.assertEqual( + str(path.joinpath('binary.file'))[len(prefix) + 1 :], + os.path.join('namespacedata01', 'binary.file'), + ) + sub = path.joinpath('subdirectory') + assert isinstance(sub, MultiplexedPath) + assert 'namespacedata01' in str(sub) + assert 'data01' in str(sub) + self.assertEqual( + str(path.joinpath('imaginary'))[len(prefix) + 1 :], + os.path.join('namespacedata01', 'imaginary'), + ) + self.assertEqual(path.joinpath(), path) + + def test_join_path_compound(self): + path = MultiplexedPath(self.folder) + assert not path.joinpath('imaginary/foo.py').exists() + + def test_join_path_common_subdir(self): + prefix = str(self.data02.parent) + path = MultiplexedPath(self.data01, self.data02) + self.assertIsInstance(path.joinpath('subdirectory'), MultiplexedPath) + self.assertEqual( + str(path.joinpath('subdirectory', 'subsubdir'))[len(prefix) + 1 :], + os.path.join('data02', 'subdirectory', 'subsubdir'), + ) + + def test_repr(self): + self.assertEqual( + repr(MultiplexedPath(self.folder)), + f"MultiplexedPath('{self.folder}')", + ) + + def test_name(self): + self.assertEqual( + MultiplexedPath(self.folder).name, + os.path.basename(self.folder), + ) + + +class NamespaceReaderTest(util.DiskSetup, unittest.TestCase): + MODULE = 'namespacedata01' + + def test_init_error(self): + with self.assertRaises(ValueError): + NamespaceReader(['path1', 'path2']) + + def test_resource_path(self): + namespacedata01 = import_module('namespacedata01') + reader = NamespaceReader(namespacedata01.__spec__.submodule_search_locations) + + root = self.data.__path__[0] + self.assertEqual( + reader.resource_path('binary.file'), os.path.join(root, 'binary.file') + ) + self.assertEqual( + reader.resource_path('imaginary'), os.path.join(root, 'imaginary') + ) + + def test_files(self): + reader = NamespaceReader(self.data.__spec__.submodule_search_locations) + root = self.data.__path__[0] + self.assertIsInstance(reader.files(), MultiplexedPath) + self.assertEqual(repr(reader.files()), f"MultiplexedPath('{root}')") + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/resources/test_resource.py b/stdlib/test/test_importlib/resources/test_resource.py new file mode 100644 index 000000000..fcede14b8 --- /dev/null +++ b/stdlib/test/test_importlib/resources/test_resource.py @@ -0,0 +1,236 @@ +import unittest + +from . import util +from importlib import resources, import_module + + +class ResourceTests: + # Subclasses are expected to set the `data` attribute. + + def test_is_file_exists(self): + target = resources.files(self.data) / 'binary.file' + self.assertTrue(target.is_file()) + + def test_is_file_missing(self): + target = resources.files(self.data) / 'not-a-file' + self.assertFalse(target.is_file()) + + def test_is_dir(self): + target = resources.files(self.data) / 'subdirectory' + self.assertFalse(target.is_file()) + self.assertTrue(target.is_dir()) + + +class ResourceDiskTests(ResourceTests, util.DiskSetup, unittest.TestCase): + pass + + +class ResourceZipTests(ResourceTests, util.ZipSetup, unittest.TestCase): + pass + + +def names(traversable): + return {item.name for item in traversable.iterdir()} + + +class ResourceLoaderTests(util.DiskSetup, unittest.TestCase): + def test_resource_contents(self): + package = util.create_package( + file=self.data, path=self.data.__file__, contents=['A', 'B', 'C'] + ) + self.assertEqual(names(resources.files(package)), {'A', 'B', 'C'}) + + def test_is_file(self): + package = util.create_package( + file=self.data, + path=self.data.__file__, + contents=['A', 'B', 'C', 'D/E', 'D/F'], + ) + self.assertTrue(resources.files(package).joinpath('B').is_file()) + + def test_is_dir(self): + package = util.create_package( + file=self.data, + path=self.data.__file__, + contents=['A', 'B', 'C', 'D/E', 'D/F'], + ) + self.assertTrue(resources.files(package).joinpath('D').is_dir()) + + def test_resource_missing(self): + package = util.create_package( + file=self.data, + path=self.data.__file__, + contents=['A', 'B', 'C', 'D/E', 'D/F'], + ) + self.assertFalse(resources.files(package).joinpath('Z').is_file()) + + +class ResourceCornerCaseTests(util.DiskSetup, unittest.TestCase): + def test_package_has_no_reader_fallback(self): + """ + Test odd ball packages which: + # 1. Do not have a ResourceReader as a loader + # 2. Are not on the file system + # 3. Are not in a zip file + """ + module = util.create_package( + file=self.data, path=self.data.__file__, contents=['A', 'B', 'C'] + ) + # Give the module a dummy loader. + module.__loader__ = object() + # Give the module a dummy origin. + module.__file__ = '/path/which/shall/not/be/named' + module.__spec__.loader = module.__loader__ + module.__spec__.origin = module.__file__ + self.assertFalse(resources.files(module).joinpath('A').is_file()) + + +class ResourceFromZipsTest01(util.ZipSetup, unittest.TestCase): + def test_is_submodule_resource(self): + submodule = import_module('data01.subdirectory') + self.assertTrue(resources.files(submodule).joinpath('binary.file').is_file()) + + def test_read_submodule_resource_by_name(self): + self.assertTrue( + resources.files('data01.subdirectory').joinpath('binary.file').is_file() + ) + + def test_submodule_contents(self): + submodule = import_module('data01.subdirectory') + self.assertEqual( + names(resources.files(submodule)), {'__init__.py', 'binary.file'} + ) + + def test_submodule_contents_by_name(self): + self.assertEqual( + names(resources.files('data01.subdirectory')), + {'__init__.py', 'binary.file'}, + ) + + def test_as_file_directory(self): + with resources.as_file(resources.files('data01')) as data: + assert data.name == 'data01' + assert data.is_dir() + assert data.joinpath('subdirectory').is_dir() + assert len(list(data.iterdir())) + assert not data.parent.exists() + + +class ResourceFromZipsTest02(util.ZipSetup, unittest.TestCase): + MODULE = 'data02' + + def test_unrelated_contents(self): + """ + Test thata zip with two unrelated subpackages return + distinct resources. Ref python/importlib_resources#44. + """ + self.assertEqual( + names(resources.files('data02.one')), + {'__init__.py', 'resource1.txt'}, + ) + self.assertEqual( + names(resources.files('data02.two')), + {'__init__.py', 'resource2.txt'}, + ) + + +class DeletingZipsTest(util.ZipSetup, unittest.TestCase): + """Having accessed resources in a zip file should not keep an open + reference to the zip. + """ + + def test_iterdir_does_not_keep_open(self): + [item.name for item in resources.files('data01').iterdir()] + + def test_is_file_does_not_keep_open(self): + resources.files('data01').joinpath('binary.file').is_file() + + def test_is_file_failure_does_not_keep_open(self): + resources.files('data01').joinpath('not-present').is_file() + + @unittest.skip("Desired but not supported.") + def test_as_file_does_not_keep_open(self): # pragma: no cover + resources.as_file(resources.files('data01') / 'binary.file') + + def test_entered_path_does_not_keep_open(self): + """ + Mimic what certifi does on import to make its bundle + available for the process duration. + """ + resources.as_file(resources.files('data01') / 'binary.file').__enter__() + + def test_read_binary_does_not_keep_open(self): + resources.files('data01').joinpath('binary.file').read_bytes() + + def test_read_text_does_not_keep_open(self): + resources.files('data01').joinpath('utf-8.file').read_text(encoding='utf-8') + + +class ResourceFromNamespaceTests: + def test_is_submodule_resource(self): + self.assertTrue( + resources.files(import_module('namespacedata01')) + .joinpath('binary.file') + .is_file() + ) + + def test_read_submodule_resource_by_name(self): + self.assertTrue( + resources.files('namespacedata01').joinpath('binary.file').is_file() + ) + + def test_submodule_contents(self): + contents = names(resources.files(import_module('namespacedata01'))) + try: + contents.remove('__pycache__') + except KeyError: + pass + self.assertEqual( + contents, {'subdirectory', 'binary.file', 'utf-8.file', 'utf-16.file'} + ) + + def test_submodule_contents_by_name(self): + contents = names(resources.files('namespacedata01')) + try: + contents.remove('__pycache__') + except KeyError: + pass + self.assertEqual( + contents, {'subdirectory', 'binary.file', 'utf-8.file', 'utf-16.file'} + ) + + def test_submodule_sub_contents(self): + contents = names(resources.files(import_module('namespacedata01.subdirectory'))) + try: + contents.remove('__pycache__') + except KeyError: + pass + self.assertEqual(contents, {'binary.file'}) + + def test_submodule_sub_contents_by_name(self): + contents = names(resources.files('namespacedata01.subdirectory')) + try: + contents.remove('__pycache__') + except KeyError: + pass + self.assertEqual(contents, {'binary.file'}) + + +class ResourceFromNamespaceDiskTests( + util.DiskSetup, + ResourceFromNamespaceTests, + unittest.TestCase, +): + MODULE = 'namespacedata01' + + +class ResourceFromNamespaceZipTests( + util.ZipSetup, + ResourceFromNamespaceTests, + unittest.TestCase, +): + MODULE = 'namespacedata01' + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/resources/util.py b/stdlib/test/test_importlib/resources/util.py new file mode 100644 index 000000000..e2d995f59 --- /dev/null +++ b/stdlib/test/test_importlib/resources/util.py @@ -0,0 +1,206 @@ +import abc +import importlib +import io +import sys +import types +import pathlib +import contextlib + +from importlib.resources.abc import ResourceReader +from test.support import import_helper, os_helper +from . import zip as zip_ +from . import _path + + +from importlib.machinery import ModuleSpec + + +class Reader(ResourceReader): + def __init__(self, **kwargs): + vars(self).update(kwargs) + + def get_resource_reader(self, package): + return self + + def open_resource(self, path): + self._path = path + if isinstance(self.file, Exception): + raise self.file + return self.file + + def resource_path(self, path_): + self._path = path_ + if isinstance(self.path, Exception): + raise self.path + return self.path + + def is_resource(self, path_): + self._path = path_ + if isinstance(self.path, Exception): + raise self.path + + def part(entry): + return entry.split('/') + + return any( + len(parts) == 1 and parts[0] == path_ for parts in map(part, self._contents) + ) + + def contents(self): + if isinstance(self.path, Exception): + raise self.path + yield from self._contents + + +def create_package_from_loader(loader, is_package=True): + name = 'testingpackage' + module = types.ModuleType(name) + spec = ModuleSpec(name, loader, origin='does-not-exist', is_package=is_package) + module.__spec__ = spec + module.__loader__ = loader + return module + + +def create_package(file=None, path=None, is_package=True, contents=()): + return create_package_from_loader( + Reader(file=file, path=path, _contents=contents), + is_package, + ) + + +class CommonTestsBase(metaclass=abc.ABCMeta): + """ + Tests shared by test_open, test_path, and test_read. + """ + + @abc.abstractmethod + def execute(self, package, path): + """ + Call the pertinent legacy API function (e.g. open_text, path) + on package and path. + """ + + def test_package_name(self): + """ + Passing in the package name should succeed. + """ + self.execute(self.data.__name__, 'utf-8.file') + + def test_package_object(self): + """ + Passing in the package itself should succeed. + """ + self.execute(self.data, 'utf-8.file') + + def test_string_path(self): + """ + Passing in a string for the path should succeed. + """ + path = 'utf-8.file' + self.execute(self.data, path) + + def test_pathlib_path(self): + """ + Passing in a pathlib.PurePath object for the path should succeed. + """ + path = pathlib.PurePath('utf-8.file') + self.execute(self.data, path) + + def test_importing_module_as_side_effect(self): + """ + The anchor package can already be imported. + """ + del sys.modules[self.data.__name__] + self.execute(self.data.__name__, 'utf-8.file') + + def test_missing_path(self): + """ + Attempting to open or read or request the path for a + non-existent path should succeed if open_resource + can return a viable data stream. + """ + bytes_data = io.BytesIO(b'Hello, world!') + package = create_package(file=bytes_data, path=FileNotFoundError()) + self.execute(package, 'utf-8.file') + self.assertEqual(package.__loader__._path, 'utf-8.file') + + def test_extant_path(self): + # Attempting to open or read or request the path when the + # path does exist should still succeed. Does not assert + # anything about the result. + bytes_data = io.BytesIO(b'Hello, world!') + # any path that exists + path = __file__ + package = create_package(file=bytes_data, path=path) + self.execute(package, 'utf-8.file') + self.assertEqual(package.__loader__._path, 'utf-8.file') + + def test_useless_loader(self): + package = create_package(file=FileNotFoundError(), path=FileNotFoundError()) + with self.assertRaises(FileNotFoundError): + self.execute(package, 'utf-8.file') + + +fixtures = dict( + data01={ + '__init__.py': '', + 'binary.file': bytes(range(4)), + 'utf-16.file': '\ufeffHello, UTF-16 world!\n'.encode('utf-16-le'), + 'utf-8.file': 'Hello, UTF-8 world!\n'.encode('utf-8'), + 'subdirectory': { + '__init__.py': '', + 'binary.file': bytes(range(4, 8)), + }, + }, + data02={ + '__init__.py': '', + 'one': {'__init__.py': '', 'resource1.txt': 'one resource'}, + 'two': {'__init__.py': '', 'resource2.txt': 'two resource'}, + 'subdirectory': {'subsubdir': {'resource.txt': 'a resource'}}, + }, + namespacedata01={ + 'binary.file': bytes(range(4)), + 'utf-16.file': '\ufeffHello, UTF-16 world!\n'.encode('utf-16-le'), + 'utf-8.file': 'Hello, UTF-8 world!\n'.encode('utf-8'), + 'subdirectory': { + 'binary.file': bytes(range(12, 16)), + }, + }, +) + + +class ModuleSetup: + def setUp(self): + self.fixtures = contextlib.ExitStack() + self.addCleanup(self.fixtures.close) + + self.fixtures.enter_context(import_helper.isolated_modules()) + self.data = self.load_fixture(self.MODULE) + + def load_fixture(self, module): + self.tree_on_path({module: fixtures[module]}) + return importlib.import_module(module) + + +class ZipSetup(ModuleSetup): + MODULE = 'data01' + + def tree_on_path(self, spec): + temp_dir = self.fixtures.enter_context(os_helper.temp_dir()) + modules = pathlib.Path(temp_dir) / 'zipped modules.zip' + self.fixtures.enter_context( + import_helper.DirsOnSysPath(str(zip_.make_zip_file(spec, modules))) + ) + + +class DiskSetup(ModuleSetup): + MODULE = 'data01' + + def tree_on_path(self, spec): + temp_dir = self.fixtures.enter_context(os_helper.temp_dir()) + _path.build(spec, pathlib.Path(temp_dir)) + self.fixtures.enter_context(import_helper.DirsOnSysPath(temp_dir)) + + +class CommonTests(DiskSetup, CommonTestsBase): + pass diff --git a/stdlib/test/test_importlib/resources/zip.py b/stdlib/test/test_importlib/resources/zip.py new file mode 100755 index 000000000..fc453f020 --- /dev/null +++ b/stdlib/test/test_importlib/resources/zip.py @@ -0,0 +1,24 @@ +""" +Generate zip test data files. +""" + +import zipfile + + +def make_zip_file(tree, dst): + """ + Zip the files in tree into a new zipfile at dst. + """ + with zipfile.ZipFile(dst, 'w') as zf: + for name, contents in walk(tree): + zf.writestr(name, contents) + zipfile._path.CompleteDirs.inject(zf) + return dst + + +def walk(tree, prefix=''): + for name, contents in tree.items(): + if isinstance(contents, dict): + yield from walk(contents, prefix=f'{prefix}{name}/') + else: + yield f'{prefix}{name}', contents diff --git a/stdlib/test/test_importlib/source/__init__.py b/stdlib/test/test_importlib/source/__init__.py new file mode 100644 index 000000000..4b16ecc31 --- /dev/null +++ b/stdlib/test/test_importlib/source/__init__.py @@ -0,0 +1,5 @@ +import os +from test.support import load_package_tests + +def load_tests(*args): + return load_package_tests(os.path.dirname(__file__), *args) diff --git a/stdlib/test/test_importlib/source/__main__.py b/stdlib/test/test_importlib/source/__main__.py new file mode 100644 index 000000000..40a23a297 --- /dev/null +++ b/stdlib/test/test_importlib/source/__main__.py @@ -0,0 +1,4 @@ +from . import load_tests +import unittest + +unittest.main() diff --git a/stdlib/test/test_importlib/source/test_case_sensitivity.py b/stdlib/test/test_importlib/source/test_case_sensitivity.py new file mode 100644 index 000000000..e52829e62 --- /dev/null +++ b/stdlib/test/test_importlib/source/test_case_sensitivity.py @@ -0,0 +1,78 @@ +"""Test case-sensitivity (PEP 235).""" +import sys + +from test.test_importlib import util + +importlib = util.import_importlib('importlib') +machinery = util.import_importlib('importlib.machinery') + +import os +from test.support import os_helper +import unittest + + +@util.case_insensitive_tests +class CaseSensitivityTest(util.CASEOKTestBase): + + """PEP 235 dictates that on case-preserving, case-insensitive file systems + that imports are case-sensitive unless the PYTHONCASEOK environment + variable is set.""" + + name = 'MoDuLe' + assert name != name.lower() + + def finder(self, path): + return self.machinery.FileFinder(path, + (self.machinery.SourceFileLoader, + self.machinery.SOURCE_SUFFIXES), + (self.machinery.SourcelessFileLoader, + self.machinery.BYTECODE_SUFFIXES)) + + def sensitivity_test(self): + """Look for a module with matching and non-matching sensitivity.""" + sensitive_pkg = 'sensitive.{0}'.format(self.name) + insensitive_pkg = 'insensitive.{0}'.format(self.name.lower()) + context = util.create_modules(insensitive_pkg, sensitive_pkg) + with context as mapping: + sensitive_path = os.path.join(mapping['.root'], 'sensitive') + insensitive_path = os.path.join(mapping['.root'], 'insensitive') + sensitive_finder = self.finder(sensitive_path) + insensitive_finder = self.finder(insensitive_path) + return self.find(sensitive_finder), self.find(insensitive_finder) + + @unittest.skipIf(sys.flags.ignore_environment, 'ignore_environment flag was set') + def test_sensitive(self): + with os_helper.EnvironmentVarGuard() as env: + env.unset('PYTHONCASEOK') + self.caseok_env_changed(should_exist=False) + sensitive, insensitive = self.sensitivity_test() + self.assertIsNotNone(sensitive) + self.assertIn(self.name, sensitive.get_filename(self.name)) + self.assertIsNone(insensitive) + + @unittest.skipIf(sys.flags.ignore_environment, 'ignore_environment flag was set') + def test_insensitive(self): + with os_helper.EnvironmentVarGuard() as env: + env.set('PYTHONCASEOK', '1') + self.caseok_env_changed(should_exist=True) + sensitive, insensitive = self.sensitivity_test() + self.assertIsNotNone(sensitive) + self.assertIn(self.name, sensitive.get_filename(self.name)) + self.assertIsNotNone(insensitive) + self.assertIn(self.name, insensitive.get_filename(self.name)) + + +class CaseSensitivityTestPEP451(CaseSensitivityTest): + def find(self, finder): + found = finder.find_spec(self.name) + return found.loader if found is not None else found + + +(Frozen_CaseSensitivityTestPEP451, + Source_CaseSensitivityTestPEP451 + ) = util.test_both(CaseSensitivityTestPEP451, importlib=importlib, + machinery=machinery) + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/source/test_file_loader.py b/stdlib/test/test_importlib/source/test_file_loader.py new file mode 100644 index 000000000..f35adec1a --- /dev/null +++ b/stdlib/test/test_importlib/source/test_file_loader.py @@ -0,0 +1,795 @@ +from test.test_importlib import abc, util + +importlib = util.import_importlib('importlib') +importlib_abc = util.import_importlib('importlib.abc') +machinery = util.import_importlib('importlib.machinery') +importlib_util = util.import_importlib('importlib.util') + +import errno +import marshal +import os +import py_compile +import shutil +import stat +import sys +import types +import unittest +import warnings + +from test.support.import_helper import make_legacy_pyc, unload + +from test.test_py_compile import without_source_date_epoch +from test.test_py_compile import SourceDateEpochTestMeta + + +class SimpleTest(abc.LoaderTests): + + """Should have no issue importing a source module [basic]. And if there is + a syntax error, it should raise a SyntaxError [syntax error]. + + """ + + def setUp(self): + self.name = 'spam' + self.filepath = os.path.join('ham', self.name + '.py') + self.loader = self.machinery.SourceFileLoader(self.name, self.filepath) + + def test_load_module_API(self): + class Tester(self.abc.FileLoader): + def get_source(self, _): return 'attr = 42' + def is_package(self, _): return False + + loader = Tester('blah', 'blah.py') + self.addCleanup(unload, 'blah') + with warnings.catch_warnings(): + warnings.simplefilter('ignore', DeprecationWarning) + module = loader.load_module() # Should not raise an exception. + + def test_get_filename_API(self): + # If fullname is not set then assume self.path is desired. + class Tester(self.abc.FileLoader): + def get_code(self, _): pass + def get_source(self, _): pass + def is_package(self, _): pass + + path = 'some_path' + name = 'some_name' + loader = Tester(name, path) + self.assertEqual(path, loader.get_filename(name)) + self.assertEqual(path, loader.get_filename()) + self.assertEqual(path, loader.get_filename(None)) + with self.assertRaises(ImportError): + loader.get_filename(name + 'XXX') + + def test_equality(self): + other = self.machinery.SourceFileLoader(self.name, self.filepath) + self.assertEqual(self.loader, other) + + def test_inequality(self): + other = self.machinery.SourceFileLoader('_' + self.name, self.filepath) + self.assertNotEqual(self.loader, other) + + # [basic] + def test_module(self): + with util.create_modules('_temp') as mapping: + loader = self.machinery.SourceFileLoader('_temp', mapping['_temp']) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', DeprecationWarning) + module = loader.load_module('_temp') + self.assertIn('_temp', sys.modules) + check = {'__name__': '_temp', '__file__': mapping['_temp'], + '__package__': ''} + for attr, value in check.items(): + self.assertEqual(getattr(module, attr), value) + + def test_package(self): + with util.create_modules('_pkg.__init__') as mapping: + loader = self.machinery.SourceFileLoader('_pkg', + mapping['_pkg.__init__']) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', DeprecationWarning) + module = loader.load_module('_pkg') + self.assertIn('_pkg', sys.modules) + check = {'__name__': '_pkg', '__file__': mapping['_pkg.__init__'], + '__path__': [os.path.dirname(mapping['_pkg.__init__'])], + '__package__': '_pkg'} + for attr, value in check.items(): + self.assertEqual(getattr(module, attr), value) + + + def test_lacking_parent(self): + with util.create_modules('_pkg.__init__', '_pkg.mod')as mapping: + loader = self.machinery.SourceFileLoader('_pkg.mod', + mapping['_pkg.mod']) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', DeprecationWarning) + module = loader.load_module('_pkg.mod') + self.assertIn('_pkg.mod', sys.modules) + check = {'__name__': '_pkg.mod', '__file__': mapping['_pkg.mod'], + '__package__': '_pkg'} + for attr, value in check.items(): + self.assertEqual(getattr(module, attr), value) + + def fake_mtime(self, fxn): + """Fake mtime to always be higher than expected.""" + return lambda name: fxn(name) + 1 + + def test_module_reuse(self): + with util.create_modules('_temp') as mapping: + loader = self.machinery.SourceFileLoader('_temp', mapping['_temp']) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', DeprecationWarning) + module = loader.load_module('_temp') + module_id = id(module) + module_dict_id = id(module.__dict__) + with open(mapping['_temp'], 'w', encoding='utf-8') as file: + file.write("testing_var = 42\n") + with warnings.catch_warnings(): + warnings.simplefilter('ignore', DeprecationWarning) + module = loader.load_module('_temp') + self.assertIn('testing_var', module.__dict__, + "'testing_var' not in " + "{0}".format(list(module.__dict__.keys()))) + self.assertEqual(module, sys.modules['_temp']) + self.assertEqual(id(module), module_id) + self.assertEqual(id(module.__dict__), module_dict_id) + + def test_state_after_failure(self): + # A failed reload should leave the original module intact. + attributes = ('__file__', '__path__', '__package__') + value = '' + name = '_temp' + with util.create_modules(name) as mapping: + orig_module = types.ModuleType(name) + for attr in attributes: + setattr(orig_module, attr, value) + with open(mapping[name], 'w', encoding='utf-8') as file: + file.write('+++ bad syntax +++') + loader = self.machinery.SourceFileLoader('_temp', mapping['_temp']) + with self.assertRaises(SyntaxError): + loader.exec_module(orig_module) + for attr in attributes: + self.assertEqual(getattr(orig_module, attr), value) + with self.assertRaises(SyntaxError): + with warnings.catch_warnings(): + warnings.simplefilter('ignore', DeprecationWarning) + loader.load_module(name) + for attr in attributes: + self.assertEqual(getattr(orig_module, attr), value) + + # [syntax error] + def test_bad_syntax(self): + with util.create_modules('_temp') as mapping: + with open(mapping['_temp'], 'w', encoding='utf-8') as file: + file.write('=') + loader = self.machinery.SourceFileLoader('_temp', mapping['_temp']) + with self.assertRaises(SyntaxError): + with warnings.catch_warnings(): + warnings.simplefilter('ignore', DeprecationWarning) + loader.load_module('_temp') + self.assertNotIn('_temp', sys.modules) + + def test_file_from_empty_string_dir(self): + # Loading a module found from an empty string entry on sys.path should + # not only work, but keep all attributes relative. + file_path = '_temp.py' + with open(file_path, 'w', encoding='utf-8') as file: + file.write("# test file for importlib") + try: + with util.uncache('_temp'): + loader = self.machinery.SourceFileLoader('_temp', file_path) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', DeprecationWarning) + mod = loader.load_module('_temp') + self.assertEqual(file_path, mod.__file__) + self.assertEqual(self.util.cache_from_source(file_path), + mod.__cached__) + finally: + os.unlink(file_path) + pycache = os.path.dirname(self.util.cache_from_source(file_path)) + if os.path.exists(pycache): + shutil.rmtree(pycache) + + @util.writes_bytecode_files + def test_timestamp_overflow(self): + # When a modification timestamp is larger than 2**32, it should be + # truncated rather than raise an OverflowError. + with util.create_modules('_temp') as mapping: + source = mapping['_temp'] + compiled = self.util.cache_from_source(source) + with open(source, 'w', encoding='utf-8') as f: + f.write("x = 5") + try: + os.utime(source, (2 ** 33 - 5, 2 ** 33 - 5)) + except OverflowError: + self.skipTest("cannot set modification time to large integer") + except OSError as e: + if e.errno != getattr(errno, 'EOVERFLOW', None): + raise + self.skipTest("cannot set modification time to large integer ({})".format(e)) + loader = self.machinery.SourceFileLoader('_temp', mapping['_temp']) + # PEP 451 + module = types.ModuleType('_temp') + module.__spec__ = self.util.spec_from_loader('_temp', loader) + loader.exec_module(module) + self.assertEqual(module.x, 5) + self.assertTrue(os.path.exists(compiled)) + os.unlink(compiled) + # PEP 302 + with warnings.catch_warnings(): + warnings.simplefilter('ignore', DeprecationWarning) + mod = loader.load_module('_temp') + # Sanity checks. + self.assertEqual(mod.__cached__, compiled) + self.assertEqual(mod.x, 5) + # The pyc file was created. + self.assertTrue(os.path.exists(compiled)) + + def test_unloadable(self): + loader = self.machinery.SourceFileLoader('good name', {}) + module = types.ModuleType('bad name') + module.__spec__ = self.machinery.ModuleSpec('bad name', loader) + with self.assertRaises(ImportError): + loader.exec_module(module) + with self.assertRaises(ImportError): + with warnings.catch_warnings(): + warnings.simplefilter('ignore', DeprecationWarning) + loader.load_module('bad name') + + @util.writes_bytecode_files + def test_checked_hash_based_pyc(self): + with util.create_modules('_temp') as mapping: + source = mapping['_temp'] + pyc = self.util.cache_from_source(source) + with open(source, 'wb') as fp: + fp.write(b'state = "old"') + os.utime(source, (50, 50)) + py_compile.compile( + source, + invalidation_mode=py_compile.PycInvalidationMode.CHECKED_HASH, + ) + loader = self.machinery.SourceFileLoader('_temp', source) + mod = types.ModuleType('_temp') + mod.__spec__ = self.util.spec_from_loader('_temp', loader) + loader.exec_module(mod) + self.assertEqual(mod.state, 'old') + # Write a new source with the same mtime and size as before. + with open(source, 'wb') as fp: + fp.write(b'state = "new"') + os.utime(source, (50, 50)) + loader.exec_module(mod) + self.assertEqual(mod.state, 'new') + with open(pyc, 'rb') as fp: + data = fp.read() + self.assertEqual(int.from_bytes(data[4:8], 'little'), 0b11) + self.assertEqual( + self.util.source_hash(b'state = "new"'), + data[8:16], + ) + + @util.writes_bytecode_files + def test_overridden_checked_hash_based_pyc(self): + with util.create_modules('_temp') as mapping, \ + unittest.mock.patch('_imp.check_hash_based_pycs', 'never'): + source = mapping['_temp'] + pyc = self.util.cache_from_source(source) + with open(source, 'wb') as fp: + fp.write(b'state = "old"') + os.utime(source, (50, 50)) + py_compile.compile( + source, + invalidation_mode=py_compile.PycInvalidationMode.CHECKED_HASH, + ) + loader = self.machinery.SourceFileLoader('_temp', source) + mod = types.ModuleType('_temp') + mod.__spec__ = self.util.spec_from_loader('_temp', loader) + loader.exec_module(mod) + self.assertEqual(mod.state, 'old') + # Write a new source with the same mtime and size as before. + with open(source, 'wb') as fp: + fp.write(b'state = "new"') + os.utime(source, (50, 50)) + loader.exec_module(mod) + self.assertEqual(mod.state, 'old') + + @util.writes_bytecode_files + def test_unchecked_hash_based_pyc(self): + with util.create_modules('_temp') as mapping: + source = mapping['_temp'] + pyc = self.util.cache_from_source(source) + with open(source, 'wb') as fp: + fp.write(b'state = "old"') + os.utime(source, (50, 50)) + py_compile.compile( + source, + invalidation_mode=py_compile.PycInvalidationMode.UNCHECKED_HASH, + ) + loader = self.machinery.SourceFileLoader('_temp', source) + mod = types.ModuleType('_temp') + mod.__spec__ = self.util.spec_from_loader('_temp', loader) + loader.exec_module(mod) + self.assertEqual(mod.state, 'old') + # Update the source file, which should be ignored. + with open(source, 'wb') as fp: + fp.write(b'state = "new"') + loader.exec_module(mod) + self.assertEqual(mod.state, 'old') + with open(pyc, 'rb') as fp: + data = fp.read() + self.assertEqual(int.from_bytes(data[4:8], 'little'), 0b1) + self.assertEqual( + self.util.source_hash(b'state = "old"'), + data[8:16], + ) + + @util.writes_bytecode_files + def test_overridden_unchecked_hash_based_pyc(self): + with util.create_modules('_temp') as mapping, \ + unittest.mock.patch('_imp.check_hash_based_pycs', 'always'): + source = mapping['_temp'] + pyc = self.util.cache_from_source(source) + with open(source, 'wb') as fp: + fp.write(b'state = "old"') + os.utime(source, (50, 50)) + py_compile.compile( + source, + invalidation_mode=py_compile.PycInvalidationMode.UNCHECKED_HASH, + ) + loader = self.machinery.SourceFileLoader('_temp', source) + mod = types.ModuleType('_temp') + mod.__spec__ = self.util.spec_from_loader('_temp', loader) + loader.exec_module(mod) + self.assertEqual(mod.state, 'old') + # Update the source file, which should be ignored. + with open(source, 'wb') as fp: + fp.write(b'state = "new"') + loader.exec_module(mod) + self.assertEqual(mod.state, 'new') + with open(pyc, 'rb') as fp: + data = fp.read() + self.assertEqual(int.from_bytes(data[4:8], 'little'), 0b1) + self.assertEqual( + self.util.source_hash(b'state = "new"'), + data[8:16], + ) + + +(Frozen_SimpleTest, + Source_SimpleTest + ) = util.test_both(SimpleTest, importlib=importlib, machinery=machinery, + abc=importlib_abc, util=importlib_util) + + +class SourceDateEpochTestMeta(SourceDateEpochTestMeta, + type(Source_SimpleTest)): + pass + + +class SourceDateEpoch_SimpleTest(Source_SimpleTest, + metaclass=SourceDateEpochTestMeta, + source_date_epoch=True): + pass + + +class BadBytecodeTest: + + def import_(self, file, module_name): + raise NotImplementedError + + def manipulate_bytecode(self, + name, mapping, manipulator, *, + del_source=False, + invalidation_mode=py_compile.PycInvalidationMode.TIMESTAMP): + """Manipulate the bytecode of a module by passing it into a callable + that returns what to use as the new bytecode.""" + try: + del sys.modules['_temp'] + except KeyError: + pass + py_compile.compile(mapping[name], invalidation_mode=invalidation_mode) + if not del_source: + bytecode_path = self.util.cache_from_source(mapping[name]) + else: + os.unlink(mapping[name]) + bytecode_path = make_legacy_pyc(mapping[name]) + if manipulator: + with open(bytecode_path, 'rb') as file: + bc = file.read() + new_bc = manipulator(bc) + with open(bytecode_path, 'wb') as file: + if new_bc is not None: + file.write(new_bc) + return bytecode_path + + def _test_empty_file(self, test, *, del_source=False): + with util.create_modules('_temp') as mapping: + bc_path = self.manipulate_bytecode('_temp', mapping, + lambda bc: b'', + del_source=del_source) + test('_temp', mapping, bc_path) + + @util.writes_bytecode_files + def _test_partial_magic(self, test, *, del_source=False): + # When their are less than 4 bytes to a .pyc, regenerate it if + # possible, else raise ImportError. + with util.create_modules('_temp') as mapping: + bc_path = self.manipulate_bytecode('_temp', mapping, + lambda bc: bc[:3], + del_source=del_source) + test('_temp', mapping, bc_path) + + def _test_magic_only(self, test, *, del_source=False): + with util.create_modules('_temp') as mapping: + bc_path = self.manipulate_bytecode('_temp', mapping, + lambda bc: bc[:4], + del_source=del_source) + test('_temp', mapping, bc_path) + + def _test_partial_flags(self, test, *, del_source=False): + with util.create_modules('_temp') as mapping: + bc_path = self.manipulate_bytecode('_temp', mapping, + lambda bc: bc[:7], + del_source=del_source) + test('_temp', mapping, bc_path) + + def _test_partial_hash(self, test, *, del_source=False): + with util.create_modules('_temp') as mapping: + bc_path = self.manipulate_bytecode( + '_temp', + mapping, + lambda bc: bc[:13], + del_source=del_source, + invalidation_mode=py_compile.PycInvalidationMode.CHECKED_HASH, + ) + test('_temp', mapping, bc_path) + with util.create_modules('_temp') as mapping: + bc_path = self.manipulate_bytecode( + '_temp', + mapping, + lambda bc: bc[:13], + del_source=del_source, + invalidation_mode=py_compile.PycInvalidationMode.UNCHECKED_HASH, + ) + test('_temp', mapping, bc_path) + + def _test_partial_timestamp(self, test, *, del_source=False): + with util.create_modules('_temp') as mapping: + bc_path = self.manipulate_bytecode('_temp', mapping, + lambda bc: bc[:11], + del_source=del_source) + test('_temp', mapping, bc_path) + + def _test_partial_size(self, test, *, del_source=False): + with util.create_modules('_temp') as mapping: + bc_path = self.manipulate_bytecode('_temp', mapping, + lambda bc: bc[:15], + del_source=del_source) + test('_temp', mapping, bc_path) + + def _test_no_marshal(self, *, del_source=False): + with util.create_modules('_temp') as mapping: + bc_path = self.manipulate_bytecode('_temp', mapping, + lambda bc: bc[:16], + del_source=del_source) + file_path = mapping['_temp'] if not del_source else bc_path + with self.assertRaises(EOFError): + self.import_(file_path, '_temp') + + def _test_non_code_marshal(self, *, del_source=False): + with util.create_modules('_temp') as mapping: + bytecode_path = self.manipulate_bytecode('_temp', mapping, + lambda bc: bc[:16] + marshal.dumps(b'abcd'), + del_source=del_source) + file_path = mapping['_temp'] if not del_source else bytecode_path + with self.assertRaises(ImportError) as cm: + self.import_(file_path, '_temp') + self.assertEqual(cm.exception.name, '_temp') + self.assertEqual(cm.exception.path, bytecode_path) + + def _test_bad_marshal(self, *, del_source=False): + with util.create_modules('_temp') as mapping: + bytecode_path = self.manipulate_bytecode('_temp', mapping, + lambda bc: bc[:16] + b'', + del_source=del_source) + file_path = mapping['_temp'] if not del_source else bytecode_path + with self.assertRaises(EOFError): + self.import_(file_path, '_temp') + + def _test_bad_magic(self, test, *, del_source=False): + with util.create_modules('_temp') as mapping: + bc_path = self.manipulate_bytecode('_temp', mapping, + lambda bc: b'\x00\x00\x00\x00' + bc[4:]) + test('_temp', mapping, bc_path) + + +class BadBytecodeTestPEP451(BadBytecodeTest): + + def import_(self, file, module_name): + loader = self.loader(module_name, file) + module = types.ModuleType(module_name) + module.__spec__ = self.util.spec_from_loader(module_name, loader) + loader.exec_module(module) + + +class BadBytecodeTestPEP302(BadBytecodeTest): + + def import_(self, file, module_name): + loader = self.loader(module_name, file) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', DeprecationWarning) + module = loader.load_module(module_name) + self.assertIn(module_name, sys.modules) + + +class SourceLoaderBadBytecodeTest: + + @classmethod + def setUpClass(cls): + cls.loader = cls.machinery.SourceFileLoader + + @util.writes_bytecode_files + def test_empty_file(self): + # When a .pyc is empty, regenerate it if possible, else raise + # ImportError. + def test(name, mapping, bytecode_path): + self.import_(mapping[name], name) + with open(bytecode_path, 'rb') as file: + self.assertGreater(len(file.read()), 16) + + self._test_empty_file(test) + + def test_partial_magic(self): + def test(name, mapping, bytecode_path): + self.import_(mapping[name], name) + with open(bytecode_path, 'rb') as file: + self.assertGreater(len(file.read()), 16) + + self._test_partial_magic(test) + + @util.writes_bytecode_files + def test_magic_only(self): + # When there is only the magic number, regenerate the .pyc if possible, + # else raise EOFError. + def test(name, mapping, bytecode_path): + self.import_(mapping[name], name) + with open(bytecode_path, 'rb') as file: + self.assertGreater(len(file.read()), 16) + + self._test_magic_only(test) + + @util.writes_bytecode_files + def test_bad_magic(self): + # When the magic number is different, the bytecode should be + # regenerated. + def test(name, mapping, bytecode_path): + self.import_(mapping[name], name) + with open(bytecode_path, 'rb') as bytecode_file: + self.assertEqual(bytecode_file.read(4), + self.util.MAGIC_NUMBER) + + self._test_bad_magic(test) + + @util.writes_bytecode_files + def test_partial_timestamp(self): + # When the timestamp is partial, regenerate the .pyc, else + # raise EOFError. + def test(name, mapping, bc_path): + self.import_(mapping[name], name) + with open(bc_path, 'rb') as file: + self.assertGreater(len(file.read()), 16) + + self._test_partial_timestamp(test) + + @util.writes_bytecode_files + def test_partial_flags(self): + # When the flags is partial, regenerate the .pyc, else raise EOFError. + def test(name, mapping, bc_path): + self.import_(mapping[name], name) + with open(bc_path, 'rb') as file: + self.assertGreater(len(file.read()), 16) + + self._test_partial_flags(test) + + @util.writes_bytecode_files + def test_partial_hash(self): + # When the hash is partial, regenerate the .pyc, else raise EOFError. + def test(name, mapping, bc_path): + self.import_(mapping[name], name) + with open(bc_path, 'rb') as file: + self.assertGreater(len(file.read()), 16) + + self._test_partial_hash(test) + + @util.writes_bytecode_files + def test_partial_size(self): + # When the size is partial, regenerate the .pyc, else + # raise EOFError. + def test(name, mapping, bc_path): + self.import_(mapping[name], name) + with open(bc_path, 'rb') as file: + self.assertGreater(len(file.read()), 16) + + self._test_partial_size(test) + + @util.writes_bytecode_files + def test_no_marshal(self): + # When there is only the magic number and timestamp, raise EOFError. + self._test_no_marshal() + + @util.writes_bytecode_files + def test_non_code_marshal(self): + self._test_non_code_marshal() + # XXX ImportError when sourceless + + # [bad marshal] + @util.writes_bytecode_files + def test_bad_marshal(self): + # Bad marshal data should raise a ValueError. + self._test_bad_marshal() + + # [bad timestamp] + @util.writes_bytecode_files + @without_source_date_epoch + def test_old_timestamp(self): + # When the timestamp is older than the source, bytecode should be + # regenerated. + zeros = b'\x00\x00\x00\x00' + with util.create_modules('_temp') as mapping: + py_compile.compile(mapping['_temp']) + bytecode_path = self.util.cache_from_source(mapping['_temp']) + with open(bytecode_path, 'r+b') as bytecode_file: + bytecode_file.seek(8) + bytecode_file.write(zeros) + self.import_(mapping['_temp'], '_temp') + source_mtime = os.path.getmtime(mapping['_temp']) + source_timestamp = self.importlib._pack_uint32(source_mtime) + with open(bytecode_path, 'rb') as bytecode_file: + bytecode_file.seek(8) + self.assertEqual(bytecode_file.read(4), source_timestamp) + + # [bytecode read-only] + @util.writes_bytecode_files + def test_read_only_bytecode(self): + # When bytecode is read-only but should be rewritten, fail silently. + with util.create_modules('_temp') as mapping: + # Create bytecode that will need to be re-created. + py_compile.compile(mapping['_temp']) + bytecode_path = self.util.cache_from_source(mapping['_temp']) + with open(bytecode_path, 'r+b') as bytecode_file: + bytecode_file.seek(0) + bytecode_file.write(b'\x00\x00\x00\x00') + # Make the bytecode read-only. + os.chmod(bytecode_path, + stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH) + try: + # Should not raise OSError! + self.import_(mapping['_temp'], '_temp') + finally: + # Make writable for eventual clean-up. + os.chmod(bytecode_path, stat.S_IWUSR) + + +class SourceLoaderBadBytecodeTestPEP451( + SourceLoaderBadBytecodeTest, BadBytecodeTestPEP451): + pass + + +(Frozen_SourceBadBytecodePEP451, + Source_SourceBadBytecodePEP451 + ) = util.test_both(SourceLoaderBadBytecodeTestPEP451, importlib=importlib, + machinery=machinery, abc=importlib_abc, + util=importlib_util) + + +class SourceLoaderBadBytecodeTestPEP302( + SourceLoaderBadBytecodeTest, BadBytecodeTestPEP302): + pass + + +(Frozen_SourceBadBytecodePEP302, + Source_SourceBadBytecodePEP302 + ) = util.test_both(SourceLoaderBadBytecodeTestPEP302, importlib=importlib, + machinery=machinery, abc=importlib_abc, + util=importlib_util) + + +class SourcelessLoaderBadBytecodeTest: + + @classmethod + def setUpClass(cls): + cls.loader = cls.machinery.SourcelessFileLoader + + def test_empty_file(self): + def test(name, mapping, bytecode_path): + with self.assertRaises(ImportError) as cm: + self.import_(bytecode_path, name) + self.assertEqual(cm.exception.name, name) + self.assertEqual(cm.exception.path, bytecode_path) + + self._test_empty_file(test, del_source=True) + + def test_partial_magic(self): + def test(name, mapping, bytecode_path): + with self.assertRaises(ImportError) as cm: + self.import_(bytecode_path, name) + self.assertEqual(cm.exception.name, name) + self.assertEqual(cm.exception.path, bytecode_path) + self._test_partial_magic(test, del_source=True) + + def test_magic_only(self): + def test(name, mapping, bytecode_path): + with self.assertRaises(EOFError): + self.import_(bytecode_path, name) + + self._test_magic_only(test, del_source=True) + + def test_bad_magic(self): + def test(name, mapping, bytecode_path): + with self.assertRaises(ImportError) as cm: + self.import_(bytecode_path, name) + self.assertEqual(cm.exception.name, name) + self.assertEqual(cm.exception.path, bytecode_path) + + self._test_bad_magic(test, del_source=True) + + def test_partial_timestamp(self): + def test(name, mapping, bytecode_path): + with self.assertRaises(EOFError): + self.import_(bytecode_path, name) + + self._test_partial_timestamp(test, del_source=True) + + def test_partial_flags(self): + def test(name, mapping, bytecode_path): + with self.assertRaises(EOFError): + self.import_(bytecode_path, name) + + self._test_partial_flags(test, del_source=True) + + def test_partial_hash(self): + def test(name, mapping, bytecode_path): + with self.assertRaises(EOFError): + self.import_(bytecode_path, name) + + self._test_partial_hash(test, del_source=True) + + def test_partial_size(self): + def test(name, mapping, bytecode_path): + with self.assertRaises(EOFError): + self.import_(bytecode_path, name) + + self._test_partial_size(test, del_source=True) + + def test_no_marshal(self): + self._test_no_marshal(del_source=True) + + def test_non_code_marshal(self): + self._test_non_code_marshal(del_source=True) + + +class SourcelessLoaderBadBytecodeTestPEP451(SourcelessLoaderBadBytecodeTest, + BadBytecodeTestPEP451): + pass + + +(Frozen_SourcelessBadBytecodePEP451, + Source_SourcelessBadBytecodePEP451 + ) = util.test_both(SourcelessLoaderBadBytecodeTestPEP451, importlib=importlib, + machinery=machinery, abc=importlib_abc, + util=importlib_util) + + +class SourcelessLoaderBadBytecodeTestPEP302(SourcelessLoaderBadBytecodeTest, + BadBytecodeTestPEP302): + pass + + +(Frozen_SourcelessBadBytecodePEP302, + Source_SourcelessBadBytecodePEP302 + ) = util.test_both(SourcelessLoaderBadBytecodeTestPEP302, importlib=importlib, + machinery=machinery, abc=importlib_abc, + util=importlib_util) + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/source/test_finder.py b/stdlib/test/test_importlib/source/test_finder.py new file mode 100644 index 000000000..4de736a6b --- /dev/null +++ b/stdlib/test/test_importlib/source/test_finder.py @@ -0,0 +1,212 @@ +from test.test_importlib import abc, util + +machinery = util.import_importlib('importlib.machinery') + +import errno +import os +import py_compile +import stat +import sys +import tempfile +from test.support.import_helper import make_legacy_pyc +import unittest + + +class FinderTests(abc.FinderTests): + + """For a top-level module, it should just be found directly in the + directory being searched. This is true for a directory with source + [top-level source], bytecode [top-level bc], or both [top-level both]. + There is also the possibility that it is a package [top-level package], in + which case there will be a directory with the module name and an + __init__.py file. If there is a directory without an __init__.py an + ImportWarning is returned [empty dir]. + + For sub-modules and sub-packages, the same happens as above but only use + the tail end of the name [sub module] [sub package] [sub empty]. + + When there is a conflict between a package and module having the same name + in the same directory, the package wins out [package over module]. This is + so that imports of modules within the package can occur rather than trigger + an import error. + + When there is a package and module with the same name, always pick the + package over the module [package over module]. This is so that imports from + the package have the possibility of succeeding. + + """ + + def get_finder(self, root): + loader_details = [(self.machinery.SourceFileLoader, + self.machinery.SOURCE_SUFFIXES), + (self.machinery.SourcelessFileLoader, + self.machinery.BYTECODE_SUFFIXES)] + return self.machinery.FileFinder(root, *loader_details) + + def import_(self, root, module): + finder = self.get_finder(root) + return self._find(finder, module, loader_only=True) + + def run_test(self, test, create=None, *, compile_=None, unlink=None): + """Test the finding of 'test' with the creation of modules listed in + 'create'. + + Any names listed in 'compile_' are byte-compiled. Modules + listed in 'unlink' have their source files deleted. + + """ + if create is None: + create = {test} + with util.create_modules(*create) as mapping: + if compile_: + for name in compile_: + py_compile.compile(mapping[name]) + if unlink: + for name in unlink: + os.unlink(mapping[name]) + try: + make_legacy_pyc(mapping[name]) + except OSError as error: + # Some tests do not set compile_=True so the source + # module will not get compiled and there will be no + # PEP 3147 pyc file to rename. + if error.errno != errno.ENOENT: + raise + loader = self.import_(mapping['.root'], test) + self.assertHasAttr(loader, 'load_module') + return loader + + def test_module(self): + # [top-level source] + self.run_test('top_level') + # [top-level bc] + self.run_test('top_level', compile_={'top_level'}, + unlink={'top_level'}) + # [top-level both] + self.run_test('top_level', compile_={'top_level'}) + + # [top-level package] + def test_package(self): + # Source. + self.run_test('pkg', {'pkg.__init__'}) + # Bytecode. + self.run_test('pkg', {'pkg.__init__'}, compile_={'pkg.__init__'}, + unlink={'pkg.__init__'}) + # Both. + self.run_test('pkg', {'pkg.__init__'}, compile_={'pkg.__init__'}) + + # [sub module] + def test_module_in_package(self): + with util.create_modules('pkg.__init__', 'pkg.sub') as mapping: + pkg_dir = os.path.dirname(mapping['pkg.__init__']) + loader = self.import_(pkg_dir, 'pkg.sub') + self.assertHasAttr(loader, 'load_module') + + # [sub package] + def test_package_in_package(self): + context = util.create_modules('pkg.__init__', 'pkg.sub.__init__') + with context as mapping: + pkg_dir = os.path.dirname(mapping['pkg.__init__']) + loader = self.import_(pkg_dir, 'pkg.sub') + self.assertHasAttr(loader, 'load_module') + + # [package over modules] + def test_package_over_module(self): + name = '_temp' + loader = self.run_test(name, {'{0}.__init__'.format(name), name}) + self.assertIn('__init__', loader.get_filename(name)) + + def test_failure(self): + with util.create_modules('blah') as mapping: + nothing = self.import_(mapping['.root'], 'sdfsadsadf') + self.assertEqual(nothing, self.NOT_FOUND) + + def test_empty_string_for_dir(self): + # The empty string from sys.path means to search in the cwd. + finder = self.machinery.FileFinder('', (self.machinery.SourceFileLoader, + self.machinery.SOURCE_SUFFIXES)) + with open('mod.py', 'w', encoding='utf-8') as file: + file.write("# test file for importlib") + try: + loader = self._find(finder, 'mod', loader_only=True) + self.assertHasAttr(loader, 'load_module') + finally: + os.unlink('mod.py') + + def test_invalidate_caches(self): + # invalidate_caches() should reset the mtime. + finder = self.machinery.FileFinder('', (self.machinery.SourceFileLoader, + self.machinery.SOURCE_SUFFIXES)) + finder._path_mtime = 42 + finder.invalidate_caches() + self.assertEqual(finder._path_mtime, -1) + + # Regression test for http://bugs.python.org/issue14846 + def test_dir_removal_handling(self): + mod = 'mod' + with util.create_modules(mod) as mapping: + finder = self.get_finder(mapping['.root']) + found = self._find(finder, 'mod', loader_only=True) + self.assertIsNotNone(found) + found = self._find(finder, 'mod', loader_only=True) + self.assertEqual(found, self.NOT_FOUND) + + @unittest.skipUnless(sys.platform != 'win32', + 'os.chmod() does not support the needed arguments under Windows') + def test_no_read_directory(self): + # Issue #16730 + tempdir = tempfile.TemporaryDirectory() + self.enterContext(tempdir) + # Since we muck with the permissions, we want to set them back to + # their original values to make sure the directory can be properly + # cleaned up. + original_mode = os.stat(tempdir.name).st_mode + self.addCleanup(os.chmod, tempdir.name, original_mode) + os.chmod(tempdir.name, stat.S_IWUSR | stat.S_IXUSR) + finder = self.get_finder(tempdir.name) + found = self._find(finder, 'doesnotexist') + self.assertEqual(found, self.NOT_FOUND) + + def test_ignore_file(self): + # If a directory got changed to a file from underneath us, then don't + # worry about looking for submodules. + with tempfile.NamedTemporaryFile() as file_obj: + finder = self.get_finder(file_obj.name) + found = self._find(finder, 'doesnotexist') + self.assertEqual(found, self.NOT_FOUND) + + +class FinderTestsPEP451(FinderTests): + + NOT_FOUND = None + + def _find(self, finder, name, loader_only=False): + spec = finder.find_spec(name) + return spec.loader if spec is not None else spec + + +(Frozen_FinderTestsPEP451, + Source_FinderTestsPEP451 + ) = util.test_both(FinderTestsPEP451, machinery=machinery) + + +class FinderTestsPEP420(FinderTests): + + NOT_FOUND = (None, []) + + def _find(self, finder, name, loader_only=False): + spec = finder.find_spec(name) + if spec is None: + return self.NOT_FOUND + if loader_only: + return spec.loader + return spec.loader, spec.submodule_search_locations + + +(Frozen_FinderTestsPEP420, + Source_FinderTestsPEP420 + ) = util.test_both(FinderTestsPEP420, machinery=machinery) + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/source/test_path_hook.py b/stdlib/test/test_importlib/source/test_path_hook.py new file mode 100644 index 000000000..6e1c23e6a --- /dev/null +++ b/stdlib/test/test_importlib/source/test_path_hook.py @@ -0,0 +1,32 @@ +from test.test_importlib import util + +machinery = util.import_importlib('importlib.machinery') + +import unittest + + +class PathHookTest: + + """Test the path hook for source.""" + + def path_hook(self): + return self.machinery.FileFinder.path_hook((self.machinery.SourceFileLoader, + self.machinery.SOURCE_SUFFIXES)) + + def test_success(self): + with util.create_modules('dummy') as mapping: + self.assertHasAttr(self.path_hook()(mapping['.root']), + 'find_spec') + + def test_empty_string(self): + # The empty string represents the cwd. + self.assertHasAttr(self.path_hook()(''), 'find_spec') + + +(Frozen_PathHookTest, + Source_PathHooktest + ) = util.test_both(PathHookTest, machinery=machinery) + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/source/test_source_encoding.py b/stdlib/test/test_importlib/source/test_source_encoding.py new file mode 100644 index 000000000..c09c9aa12 --- /dev/null +++ b/stdlib/test/test_importlib/source/test_source_encoding.py @@ -0,0 +1,175 @@ +from test.test_importlib import util + +machinery = util.import_importlib('importlib.machinery') + +import codecs +import importlib.util +import re +import types +# Because sys.path gets essentially blanked, need to have unicodedata already +# imported for the parser to use. +import unicodedata +import unittest +import warnings + + +CODING_RE = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII) + + +class EncodingTest: + + """PEP 3120 makes UTF-8 the default encoding for source code + [default encoding]. + + PEP 263 specifies how that can change on a per-file basis. Either the first + or second line can contain the encoding line [encoding first line] + [encoding second line]. If the file has the BOM marker it is considered UTF-8 + implicitly [BOM]. If any encoding is specified it must be UTF-8, else it is + an error [BOM and utf-8][BOM conflict]. + + """ + + variable = '\u00fc' + character = '\u00c9' + source_line = "{0} = '{1}'\n".format(variable, character) + module_name = '_temp' + + def run_test(self, source): + with util.create_modules(self.module_name) as mapping: + with open(mapping[self.module_name], 'wb') as file: + file.write(source) + loader = self.machinery.SourceFileLoader(self.module_name, + mapping[self.module_name]) + return self.load(loader) + + def create_source(self, encoding): + encoding_line = "# coding={0}".format(encoding) + assert CODING_RE.match(encoding_line) + source_lines = [encoding_line.encode('utf-8')] + source_lines.append(self.source_line.encode(encoding)) + return b'\n'.join(source_lines) + + def test_non_obvious_encoding(self): + # Make sure that an encoding that has never been a standard one for + # Python works. + encoding_line = "# coding=koi8-r" + assert CODING_RE.match(encoding_line) + source = "{0}\na=42\n".format(encoding_line).encode("koi8-r") + self.run_test(source) + + # [default encoding] + def test_default_encoding(self): + self.run_test(self.source_line.encode('utf-8')) + + # [encoding first line] + def test_encoding_on_first_line(self): + encoding = 'Latin-1' + source = self.create_source(encoding) + self.run_test(source) + + # [encoding second line] + def test_encoding_on_second_line(self): + source = b"#/usr/bin/python\n" + self.create_source('Latin-1') + self.run_test(source) + + # [BOM] + def test_bom(self): + self.run_test(codecs.BOM_UTF8 + self.source_line.encode('utf-8')) + + # [BOM and utf-8] + def test_bom_and_utf_8(self): + source = codecs.BOM_UTF8 + self.create_source('utf-8') + self.run_test(source) + + # [BOM conflict] + def test_bom_conflict(self): + source = codecs.BOM_UTF8 + self.create_source('latin-1') + with self.assertRaises(SyntaxError): + self.run_test(source) + + +class EncodingTestPEP451(EncodingTest): + + def load(self, loader): + module = types.ModuleType(self.module_name) + module.__spec__ = importlib.util.spec_from_loader(self.module_name, loader) + loader.exec_module(module) + return module + + +(Frozen_EncodingTestPEP451, + Source_EncodingTestPEP451 + ) = util.test_both(EncodingTestPEP451, machinery=machinery) + + +class EncodingTestPEP302(EncodingTest): + + def load(self, loader): + with warnings.catch_warnings(): + warnings.simplefilter('ignore', DeprecationWarning) + return loader.load_module(self.module_name) + + +(Frozen_EncodingTestPEP302, + Source_EncodingTestPEP302 + ) = util.test_both(EncodingTestPEP302, machinery=machinery) + + +class LineEndingTest: + + r"""Source written with the three types of line endings (\n, \r\n, \r) + need to be readable [cr][crlf][lf].""" + + def run_test(self, line_ending): + module_name = '_temp' + source_lines = [b"a = 42", b"b = -13", b''] + source = line_ending.join(source_lines) + with util.create_modules(module_name) as mapping: + with open(mapping[module_name], 'wb') as file: + file.write(source) + loader = self.machinery.SourceFileLoader(module_name, + mapping[module_name]) + return self.load(loader, module_name) + + # [cr] + def test_cr(self): + self.run_test(b'\r') + + # [crlf] + def test_crlf(self): + self.run_test(b'\r\n') + + # [lf] + def test_lf(self): + self.run_test(b'\n') + + +class LineEndingTestPEP451(LineEndingTest): + + def load(self, loader, module_name): + module = types.ModuleType(module_name) + module.__spec__ = importlib.util.spec_from_loader(module_name, loader) + loader.exec_module(module) + return module + + +(Frozen_LineEndingTestPEP451, + Source_LineEndingTestPEP451 + ) = util.test_both(LineEndingTestPEP451, machinery=machinery) + + +class LineEndingTestPEP302(LineEndingTest): + + def load(self, loader, module_name): + with warnings.catch_warnings(): + warnings.simplefilter('ignore', DeprecationWarning) + return loader.load_module(module_name) + + +(Frozen_LineEndingTestPEP302, + Source_LineEndingTestPEP302 + ) = util.test_both(LineEndingTestPEP302, machinery=machinery) + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/test_abc.py b/stdlib/test/test_importlib/test_abc.py new file mode 100644 index 000000000..dd943210f --- /dev/null +++ b/stdlib/test/test_importlib/test_abc.py @@ -0,0 +1,943 @@ +import io +import marshal +import os +import sys +from test.support import import_helper +import types +import unittest +from unittest import mock +import warnings + +from test.test_importlib import util as test_util + +init = test_util.import_importlib('importlib') +abc = test_util.import_importlib('importlib.abc') +machinery = test_util.import_importlib('importlib.machinery') +util = test_util.import_importlib('importlib.util') + + +##### Inheritance ############################################################## +class InheritanceTests: + + """Test that the specified class is a subclass/superclass of the expected + classes.""" + + subclasses = [] + superclasses = [] + + def setUp(self): + self.superclasses = [getattr(self.abc, class_name) + for class_name in self.superclass_names] + if hasattr(self, 'subclass_names'): + # Because test.support.import_fresh_module() creates a new + # importlib._bootstrap per module, inheritance checks fail when + # checking across module boundaries (i.e. the _bootstrap in abc is + # not the same as the one in machinery). That means stealing one of + # the modules from the other to make sure the same instance is used. + machinery = self.abc.machinery + self.subclasses = [getattr(machinery, class_name) + for class_name in self.subclass_names] + assert self.subclasses or self.superclasses, self.__class__ + self.__test = getattr(self.abc, self._NAME) + + def test_subclasses(self): + # Test that the expected subclasses inherit. + for subclass in self.subclasses: + self.assertIsSubclass(subclass, self.__test) + + def test_superclasses(self): + # Test that the class inherits from the expected superclasses. + for superclass in self.superclasses: + self.assertIsSubclass(self.__test, superclass) + + +class MetaPathFinder(InheritanceTests): + superclass_names = [] + subclass_names = ['BuiltinImporter', 'FrozenImporter', 'PathFinder', + 'WindowsRegistryFinder'] + + +(Frozen_MetaPathFinderInheritanceTests, + Source_MetaPathFinderInheritanceTests + ) = test_util.test_both(MetaPathFinder, abc=abc) + + +class PathEntryFinder(InheritanceTests): + superclass_names = [] + subclass_names = ['FileFinder'] + + +(Frozen_PathEntryFinderInheritanceTests, + Source_PathEntryFinderInheritanceTests + ) = test_util.test_both(PathEntryFinder, abc=abc) + + +class ResourceLoader(InheritanceTests): + superclass_names = ['Loader'] + + +(Frozen_ResourceLoaderInheritanceTests, + Source_ResourceLoaderInheritanceTests + ) = test_util.test_both(ResourceLoader, abc=abc) + + +class InspectLoader(InheritanceTests): + superclass_names = ['Loader'] + subclass_names = ['BuiltinImporter', 'FrozenImporter', 'ExtensionFileLoader'] + + +(Frozen_InspectLoaderInheritanceTests, + Source_InspectLoaderInheritanceTests + ) = test_util.test_both(InspectLoader, abc=abc) + + +class ExecutionLoader(InheritanceTests): + superclass_names = ['InspectLoader'] + subclass_names = ['ExtensionFileLoader'] + + +(Frozen_ExecutionLoaderInheritanceTests, + Source_ExecutionLoaderInheritanceTests + ) = test_util.test_both(ExecutionLoader, abc=abc) + + +class FileLoader(InheritanceTests): + superclass_names = ['ResourceLoader', 'ExecutionLoader'] + subclass_names = ['SourceFileLoader', 'SourcelessFileLoader'] + + +(Frozen_FileLoaderInheritanceTests, + Source_FileLoaderInheritanceTests + ) = test_util.test_both(FileLoader, abc=abc) + + +class SourceLoader(InheritanceTests): + superclass_names = ['ResourceLoader', 'ExecutionLoader'] + subclass_names = ['SourceFileLoader'] + + +(Frozen_SourceLoaderInheritanceTests, + Source_SourceLoaderInheritanceTests + ) = test_util.test_both(SourceLoader, abc=abc) + + +##### Default return values #################################################### + +def make_abc_subclasses(base_class, name=None, inst=False, **kwargs): + if name is None: + name = base_class.__name__ + base = {kind: getattr(splitabc, name) + for kind, splitabc in abc.items()} + return {cls._KIND: cls() if inst else cls + for cls in test_util.split_frozen(base_class, base, **kwargs)} + + +class ABCTestHarness: + + @property + def ins(self): + # Lazily set ins on the class. + cls = self.SPLIT[self._KIND] + ins = cls() + self.__class__.ins = ins + return ins + + +class MetaPathFinder: + + pass + + +class MetaPathFinderDefaultsTests(ABCTestHarness): + + SPLIT = make_abc_subclasses(MetaPathFinder) + + def test_invalidate_caches(self): + # Calling the method is a no-op. + self.ins.invalidate_caches() + + +(Frozen_MPFDefaultTests, + Source_MPFDefaultTests + ) = test_util.test_both(MetaPathFinderDefaultsTests) + + +class PathEntryFinder: + + pass + + +class PathEntryFinderDefaultsTests(ABCTestHarness): + + SPLIT = make_abc_subclasses(PathEntryFinder) + + def test_invalidate_caches(self): + # Should be a no-op. + self.ins.invalidate_caches() + + +(Frozen_PEFDefaultTests, + Source_PEFDefaultTests + ) = test_util.test_both(PathEntryFinderDefaultsTests) + + +class Loader: + + pass + + +class LoaderDefaultsTests(ABCTestHarness): + + SPLIT = make_abc_subclasses(Loader) + + def test_create_module(self): + spec = 'a spec' + self.assertIsNone(self.ins.create_module(spec)) + + def test_load_module(self): + with self.assertRaises(ImportError): + self.ins.load_module('something') + + def test_module_repr(self): + mod = types.ModuleType('blah') + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + original_repr = repr(mod) + mod.__loader__ = self.ins + # Should still return a proper repr. + self.assertTrue(repr(mod)) + + +(Frozen_LDefaultTests, + SourceLDefaultTests + ) = test_util.test_both(LoaderDefaultsTests) + + +class ResourceLoader(Loader): + + def get_data(self, path): + return super().get_data(path) + + +class ResourceLoaderDefaultsTests(ABCTestHarness): + + SPLIT = make_abc_subclasses(ResourceLoader) + + def test_get_data(self): + with self.assertRaises(IOError): + self.ins.get_data('/some/path') + + +(Frozen_RLDefaultTests, + Source_RLDefaultTests + ) = test_util.test_both(ResourceLoaderDefaultsTests) + + +class InspectLoader(Loader): + + def is_package(self, fullname): + return super().is_package(fullname) + + def get_source(self, fullname): + return super().get_source(fullname) + + +SPLIT_IL = make_abc_subclasses(InspectLoader) + + +class InspectLoaderDefaultsTests(ABCTestHarness): + + SPLIT = SPLIT_IL + + def test_is_package(self): + with self.assertRaises(ImportError): + self.ins.is_package('blah') + + def test_get_source(self): + with self.assertRaises(ImportError): + self.ins.get_source('blah') + + +(Frozen_ILDefaultTests, + Source_ILDefaultTests + ) = test_util.test_both(InspectLoaderDefaultsTests) + + +class ExecutionLoader(InspectLoader): + + def get_filename(self, fullname): + return super().get_filename(fullname) + + +SPLIT_EL = make_abc_subclasses(ExecutionLoader) + + +class ExecutionLoaderDefaultsTests(ABCTestHarness): + + SPLIT = SPLIT_EL + + def test_get_filename(self): + with self.assertRaises(ImportError): + self.ins.get_filename('blah') + + +(Frozen_ELDefaultTests, + Source_ELDefaultsTests + ) = test_util.test_both(InspectLoaderDefaultsTests) + + +class ResourceReader: + + def open_resource(self, *args, **kwargs): + return super().open_resource(*args, **kwargs) + + def resource_path(self, *args, **kwargs): + return super().resource_path(*args, **kwargs) + + def is_resource(self, *args, **kwargs): + return super().is_resource(*args, **kwargs) + + def contents(self, *args, **kwargs): + return super().contents(*args, **kwargs) + + +##### MetaPathFinder concrete methods ########################################## +class MetaPathFinderFindModuleTests: + + @classmethod + def finder(cls, spec): + class MetaPathSpecFinder(cls.abc.MetaPathFinder): + + def find_spec(self, fullname, path, target=None): + self.called_for = fullname, path + return spec + + return MetaPathSpecFinder() + + def test_find_spec_with_explicit_target(self): + loader = object() + spec = self.util.spec_from_loader('blah', loader) + finder = self.finder(spec) + found = finder.find_spec('blah', 'blah', None) + self.assertEqual(found, spec) + + def test_no_spec(self): + finder = self.finder(None) + path = ['a', 'b', 'c'] + name = 'blah' + found = finder.find_spec(name, path, None) + self.assertIsNone(found) + self.assertEqual(name, finder.called_for[0]) + self.assertEqual(path, finder.called_for[1]) + + def test_spec(self): + loader = object() + spec = self.util.spec_from_loader('blah', loader) + finder = self.finder(spec) + found = finder.find_spec('blah', None) + self.assertIs(found, spec) + + +(Frozen_MPFFindModuleTests, + Source_MPFFindModuleTests + ) = test_util.test_both(MetaPathFinderFindModuleTests, abc=abc, util=util) + + +##### Loader concrete methods ################################################## +class LoaderLoadModuleTests: + + def loader(self): + class SpecLoader(self.abc.Loader): + found = None + def exec_module(self, module): + self.found = module + + def is_package(self, fullname): + """Force some non-default module state to be set.""" + return True + + return SpecLoader() + + def test_fresh(self): + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + loader = self.loader() + name = 'blah' + with test_util.uncache(name): + loader.load_module(name) + module = loader.found + self.assertIs(sys.modules[name], module) + self.assertEqual(loader, module.__loader__) + self.assertEqual(loader, module.__spec__.loader) + self.assertEqual(name, module.__name__) + self.assertEqual(name, module.__spec__.name) + self.assertIsNotNone(module.__path__) + self.assertIsNotNone(module.__path__, + module.__spec__.submodule_search_locations) + + def test_reload(self): + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + name = 'blah' + loader = self.loader() + module = types.ModuleType(name) + module.__spec__ = self.util.spec_from_loader(name, loader) + module.__loader__ = loader + with test_util.uncache(name): + sys.modules[name] = module + loader.load_module(name) + found = loader.found + self.assertIs(found, sys.modules[name]) + self.assertIs(module, sys.modules[name]) + + +(Frozen_LoaderLoadModuleTests, + Source_LoaderLoadModuleTests + ) = test_util.test_both(LoaderLoadModuleTests, abc=abc, util=util) + + +##### InspectLoader concrete methods ########################################### +class InspectLoaderSourceToCodeTests: + + def source_to_module(self, data, path=None): + """Help with source_to_code() tests.""" + module = types.ModuleType('blah') + loader = self.InspectLoaderSubclass() + if path is None: + code = loader.source_to_code(data) + else: + code = loader.source_to_code(data, path) + exec(code, module.__dict__) + return module + + def test_source_to_code_source(self): + # Since compile() can handle strings, so should source_to_code(). + source = 'attr = 42' + module = self.source_to_module(source) + self.assertHasAttr(module, 'attr') + self.assertEqual(module.attr, 42) + + def test_source_to_code_bytes(self): + # Since compile() can handle bytes, so should source_to_code(). + source = b'attr = 42' + module = self.source_to_module(source) + self.assertHasAttr(module, 'attr') + self.assertEqual(module.attr, 42) + + def test_source_to_code_path(self): + # Specifying a path should set it for the code object. + path = 'path/to/somewhere' + loader = self.InspectLoaderSubclass() + code = loader.source_to_code('', path) + self.assertEqual(code.co_filename, path) + + def test_source_to_code_no_path(self): + # Not setting a path should still work and be set to since that + # is a pre-existing practice as a default to compile(). + loader = self.InspectLoaderSubclass() + code = loader.source_to_code('') + self.assertEqual(code.co_filename, '') + + +(Frozen_ILSourceToCodeTests, + Source_ILSourceToCodeTests + ) = test_util.test_both(InspectLoaderSourceToCodeTests, + InspectLoaderSubclass=SPLIT_IL) + + +class InspectLoaderGetCodeTests: + + def test_get_code(self): + # Test success. + module = types.ModuleType('blah') + with mock.patch.object(self.InspectLoaderSubclass, 'get_source') as mocked: + mocked.return_value = 'attr = 42' + loader = self.InspectLoaderSubclass() + code = loader.get_code('blah') + exec(code, module.__dict__) + self.assertEqual(module.attr, 42) + + def test_get_code_source_is_None(self): + # If get_source() is None then this should be None. + with mock.patch.object(self.InspectLoaderSubclass, 'get_source') as mocked: + mocked.return_value = None + loader = self.InspectLoaderSubclass() + code = loader.get_code('blah') + self.assertIsNone(code) + + def test_get_code_source_not_found(self): + # If there is no source then there is no code object. + loader = self.InspectLoaderSubclass() + with self.assertRaises(ImportError): + loader.get_code('blah') + + +(Frozen_ILGetCodeTests, + Source_ILGetCodeTests + ) = test_util.test_both(InspectLoaderGetCodeTests, + InspectLoaderSubclass=SPLIT_IL) + + +class InspectLoaderLoadModuleTests: + + """Test InspectLoader.load_module().""" + + module_name = 'blah' + + def setUp(self): + import_helper.unload(self.module_name) + self.addCleanup(import_helper.unload, self.module_name) + + def load(self, loader): + spec = self.util.spec_from_loader(self.module_name, loader) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', DeprecationWarning) + return self.init._bootstrap._load_unlocked(spec) + + def mock_get_code(self): + return mock.patch.object(self.InspectLoaderSubclass, 'get_code') + + def test_get_code_ImportError(self): + # If get_code() raises ImportError, it should propagate. + with self.mock_get_code() as mocked_get_code: + mocked_get_code.side_effect = ImportError + with self.assertRaises(ImportError): + loader = self.InspectLoaderSubclass() + self.load(loader) + + def test_get_code_None(self): + # If get_code() returns None, raise ImportError. + with self.mock_get_code() as mocked_get_code: + mocked_get_code.return_value = None + with self.assertRaises(ImportError): + loader = self.InspectLoaderSubclass() + self.load(loader) + + def test_module_returned(self): + # The loaded module should be returned. + code = compile('attr = 42', '', 'exec') + with self.mock_get_code() as mocked_get_code: + mocked_get_code.return_value = code + loader = self.InspectLoaderSubclass() + module = self.load(loader) + self.assertEqual(module, sys.modules[self.module_name]) + + +(Frozen_ILLoadModuleTests, + Source_ILLoadModuleTests + ) = test_util.test_both(InspectLoaderLoadModuleTests, + InspectLoaderSubclass=SPLIT_IL, + init=init, + util=util) + + +##### ExecutionLoader concrete methods ######################################### +class ExecutionLoaderGetCodeTests: + + def mock_methods(self, *, get_source=False, get_filename=False): + source_mock_context, filename_mock_context = None, None + if get_source: + source_mock_context = mock.patch.object(self.ExecutionLoaderSubclass, + 'get_source') + if get_filename: + filename_mock_context = mock.patch.object(self.ExecutionLoaderSubclass, + 'get_filename') + return source_mock_context, filename_mock_context + + def test_get_code(self): + path = 'blah.py' + source_mock_context, filename_mock_context = self.mock_methods( + get_source=True, get_filename=True) + with source_mock_context as source_mock, filename_mock_context as name_mock: + source_mock.return_value = 'attr = 42' + name_mock.return_value = path + loader = self.ExecutionLoaderSubclass() + code = loader.get_code('blah') + self.assertEqual(code.co_filename, path) + module = types.ModuleType('blah') + exec(code, module.__dict__) + self.assertEqual(module.attr, 42) + + def test_get_code_source_is_None(self): + # If get_source() is None then this should be None. + source_mock_context, _ = self.mock_methods(get_source=True) + with source_mock_context as mocked: + mocked.return_value = None + loader = self.ExecutionLoaderSubclass() + code = loader.get_code('blah') + self.assertIsNone(code) + + def test_get_code_source_not_found(self): + # If there is no source then there is no code object. + loader = self.ExecutionLoaderSubclass() + with self.assertRaises(ImportError): + loader.get_code('blah') + + def test_get_code_no_path(self): + # If get_filename() raises ImportError then simply skip setting the path + # on the code object. + source_mock_context, filename_mock_context = self.mock_methods( + get_source=True, get_filename=True) + with source_mock_context as source_mock, filename_mock_context as name_mock: + source_mock.return_value = 'attr = 42' + name_mock.side_effect = ImportError + loader = self.ExecutionLoaderSubclass() + code = loader.get_code('blah') + self.assertEqual(code.co_filename, '') + module = types.ModuleType('blah') + exec(code, module.__dict__) + self.assertEqual(module.attr, 42) + + +(Frozen_ELGetCodeTests, + Source_ELGetCodeTests + ) = test_util.test_both(ExecutionLoaderGetCodeTests, + ExecutionLoaderSubclass=SPLIT_EL) + + +##### SourceLoader concrete methods ############################################ +class SourceOnlyLoader: + + # Globals that should be defined for all modules. + source = (b"_ = '::'.join([__name__, __file__, __cached__, __package__, " + b"repr(__loader__)])") + + def __init__(self, path): + self.path = path + + def get_data(self, path): + if path != self.path: + raise IOError + return self.source + + def get_filename(self, fullname): + return self.path + + +SPLIT_SOL = make_abc_subclasses(SourceOnlyLoader, 'SourceLoader') + + +class SourceLoader(SourceOnlyLoader): + + source_mtime = 1 + + def __init__(self, path, magic=None): + super().__init__(path) + self.bytecode_path = self.util.cache_from_source(self.path) + self.source_size = len(self.source) + if magic is None: + magic = self.util.MAGIC_NUMBER + data = bytearray(magic) + data.extend(self.init._pack_uint32(0)) + data.extend(self.init._pack_uint32(self.source_mtime)) + data.extend(self.init._pack_uint32(self.source_size)) + code_object = compile(self.source, self.path, 'exec', + dont_inherit=True) + data.extend(marshal.dumps(code_object)) + self.bytecode = bytes(data) + self.written = {} + + def get_data(self, path): + if path == self.path: + return super().get_data(path) + elif path == self.bytecode_path: + return self.bytecode + else: + raise OSError + + def path_stats(self, path): + if path != self.path: + raise IOError + return {'mtime': self.source_mtime, 'size': self.source_size} + + def set_data(self, path, data): + self.written[path] = bytes(data) + return path == self.bytecode_path + + +SPLIT_SL = make_abc_subclasses(SourceLoader, util=util, init=init) + + +class SourceLoaderTestHarness: + + def setUp(self, *, is_package=True, **kwargs): + self.package = 'pkg' + if is_package: + self.path = os.path.join(self.package, '__init__.py') + self.name = self.package + else: + module_name = 'mod' + self.path = os.path.join(self.package, '.'.join(['mod', 'py'])) + self.name = '.'.join([self.package, module_name]) + self.cached = self.util.cache_from_source(self.path) + self.loader = self.loader_mock(self.path, **kwargs) + + def verify_module(self, module): + self.assertEqual(module.__name__, self.name) + self.assertEqual(module.__file__, self.path) + self.assertEqual(module.__cached__, self.cached) + self.assertEqual(module.__package__, self.package) + self.assertEqual(module.__loader__, self.loader) + values = module._.split('::') + self.assertEqual(values[0], self.name) + self.assertEqual(values[1], self.path) + self.assertEqual(values[2], self.cached) + self.assertEqual(values[3], self.package) + self.assertEqual(values[4], repr(self.loader)) + + def verify_code(self, code_object): + module = types.ModuleType(self.name) + module.__file__ = self.path + module.__cached__ = self.cached + module.__package__ = self.package + module.__loader__ = self.loader + module.__path__ = [] + exec(code_object, module.__dict__) + self.verify_module(module) + + +class SourceOnlyLoaderTests(SourceLoaderTestHarness): + """Test importlib.abc.SourceLoader for source-only loading.""" + + def test_get_source(self): + # Verify the source code is returned as a string. + # If an OSError is raised by get_data then raise ImportError. + expected_source = self.loader.source.decode('utf-8') + self.assertEqual(self.loader.get_source(self.name), expected_source) + def raise_OSError(path): + raise OSError + self.loader.get_data = raise_OSError + with self.assertRaises(ImportError) as cm: + self.loader.get_source(self.name) + self.assertEqual(cm.exception.name, self.name) + + def test_is_package(self): + # Properly detect when loading a package. + self.setUp(is_package=False) + self.assertFalse(self.loader.is_package(self.name)) + self.setUp(is_package=True) + self.assertTrue(self.loader.is_package(self.name)) + self.assertFalse(self.loader.is_package(self.name + '.__init__')) + + def test_get_code(self): + # Verify the code object is created. + code_object = self.loader.get_code(self.name) + self.verify_code(code_object) + + def test_source_to_code(self): + # Verify the compiled code object. + code = self.loader.source_to_code(self.loader.source, self.path) + self.verify_code(code) + + def test_load_module(self): + # Loading a module should set __name__, __loader__, __package__, + # __path__ (for packages), __file__, and __cached__. + # The module should also be put into sys.modules. + with warnings.catch_warnings(): + warnings.simplefilter("ignore", ImportWarning) + with test_util.uncache(self.name): + with warnings.catch_warnings(): + warnings.simplefilter('ignore', DeprecationWarning) + module = self.loader.load_module(self.name) + self.verify_module(module) + self.assertEqual(module.__path__, [os.path.dirname(self.path)]) + self.assertIn(self.name, sys.modules) + + def test_package_settings(self): + # __package__ needs to be set, while __path__ is set on if the module + # is a package. + # Testing the values for a package are covered by test_load_module. + with warnings.catch_warnings(): + warnings.simplefilter("ignore", ImportWarning) + self.setUp(is_package=False) + with test_util.uncache(self.name): + with warnings.catch_warnings(): + warnings.simplefilter('ignore', DeprecationWarning) + module = self.loader.load_module(self.name) + self.verify_module(module) + self.assertNotHasAttr(module, '__path__') + + def test_get_source_encoding(self): + # Source is considered encoded in UTF-8 by default unless otherwise + # specified by an encoding line. + source = "_ = 'ü'" + self.loader.source = source.encode('utf-8') + returned_source = self.loader.get_source(self.name) + self.assertEqual(returned_source, source) + source = "# coding: latin-1\n_ = ü" + self.loader.source = source.encode('latin-1') + returned_source = self.loader.get_source(self.name) + self.assertEqual(returned_source, source) + + +(Frozen_SourceOnlyLoaderTests, + Source_SourceOnlyLoaderTests + ) = test_util.test_both(SourceOnlyLoaderTests, util=util, + loader_mock=SPLIT_SOL) + + +@unittest.skipIf(sys.dont_write_bytecode, "sys.dont_write_bytecode is true") +class SourceLoaderBytecodeTests(SourceLoaderTestHarness): + + """Test importlib.abc.SourceLoader's use of bytecode. + + Source-only testing handled by SourceOnlyLoaderTests. + + """ + + def verify_code(self, code_object, *, bytecode_written=False): + super().verify_code(code_object) + if bytecode_written: + self.assertIn(self.cached, self.loader.written) + data = bytearray(self.util.MAGIC_NUMBER) + data.extend(self.init._pack_uint32(0)) + data.extend(self.init._pack_uint32(self.loader.source_mtime)) + data.extend(self.init._pack_uint32(self.loader.source_size)) + # Make sure there's > 1 reference to code_object so that the + # marshaled representation below matches the cached representation + l = [code_object] + data.extend(marshal.dumps(code_object)) + self.assertEqual(self.loader.written[self.cached], bytes(data)) + + def test_code_with_everything(self): + # When everything should work. + code_object = self.loader.get_code(self.name) + self.verify_code(code_object) + + def test_no_bytecode(self): + # If no bytecode exists then move on to the source. + self.loader.bytecode_path = "" + # Sanity check + with self.assertRaises(OSError): + bytecode_path = self.util.cache_from_source(self.path) + self.loader.get_data(bytecode_path) + code_object = self.loader.get_code(self.name) + self.verify_code(code_object, bytecode_written=True) + + def test_code_bad_timestamp(self): + # Bytecode is only used when the timestamp matches the source EXACTLY. + for source_mtime in (0, 2): + assert source_mtime != self.loader.source_mtime + original = self.loader.source_mtime + self.loader.source_mtime = source_mtime + # If bytecode is used then EOFError would be raised by marshal. + self.loader.bytecode = self.loader.bytecode[8:] + code_object = self.loader.get_code(self.name) + self.verify_code(code_object, bytecode_written=True) + self.loader.source_mtime = original + + def test_code_bad_magic(self): + # Skip over bytecode with a bad magic number. + self.setUp(magic=b'0000') + # If bytecode is used then EOFError would be raised by marshal. + self.loader.bytecode = self.loader.bytecode[8:] + code_object = self.loader.get_code(self.name) + self.verify_code(code_object, bytecode_written=True) + + def test_dont_write_bytecode(self): + # Bytecode is not written if sys.dont_write_bytecode is true. + # Can assume it is false already thanks to the skipIf class decorator. + try: + sys.dont_write_bytecode = True + self.loader.bytecode_path = "" + code_object = self.loader.get_code(self.name) + self.assertNotIn(self.cached, self.loader.written) + finally: + sys.dont_write_bytecode = False + + def test_no_set_data(self): + # If set_data is not defined, one can still read bytecode. + self.setUp(magic=b'0000') + original_set_data = self.loader.__class__.mro()[1].set_data + try: + del self.loader.__class__.mro()[1].set_data + code_object = self.loader.get_code(self.name) + self.verify_code(code_object) + finally: + self.loader.__class__.mro()[1].set_data = original_set_data + + def test_set_data_raises_exceptions(self): + # Raising NotImplementedError or OSError is okay for set_data. + def raise_exception(exc): + def closure(*args, **kwargs): + raise exc + return closure + + self.setUp(magic=b'0000') + self.loader.set_data = raise_exception(NotImplementedError) + code_object = self.loader.get_code(self.name) + self.verify_code(code_object) + + +(Frozen_SLBytecodeTests, + SourceSLBytecodeTests + ) = test_util.test_both(SourceLoaderBytecodeTests, init=init, util=util, + loader_mock=SPLIT_SL) + + +class SourceLoaderGetSourceTests: + + """Tests for importlib.abc.SourceLoader.get_source().""" + + def test_default_encoding(self): + # Should have no problems with UTF-8 text. + name = 'mod' + mock = self.SourceOnlyLoaderMock('mod.file') + source = 'x = "ü"' + mock.source = source.encode('utf-8') + returned_source = mock.get_source(name) + self.assertEqual(returned_source, source) + + def test_decoded_source(self): + # Decoding should work. + name = 'mod' + mock = self.SourceOnlyLoaderMock("mod.file") + source = "# coding: Latin-1\nx='ü'" + assert source.encode('latin-1') != source.encode('utf-8') + mock.source = source.encode('latin-1') + returned_source = mock.get_source(name) + self.assertEqual(returned_source, source) + + def test_universal_newlines(self): + # PEP 302 says universal newlines should be used. + name = 'mod' + mock = self.SourceOnlyLoaderMock('mod.file') + source = "x = 42\r\ny = -13\r\n" + mock.source = source.encode('utf-8') + expect = io.IncrementalNewlineDecoder(None, True).decode(source) + self.assertEqual(mock.get_source(name), expect) + + +(Frozen_SourceOnlyLoaderGetSourceTests, + Source_SourceOnlyLoaderGetSourceTests + ) = test_util.test_both(SourceLoaderGetSourceTests, + SourceOnlyLoaderMock=SPLIT_SOL) + + +class SourceLoaderDeprecationWarningsTests(unittest.TestCase): + """Tests SourceLoader deprecation warnings.""" + + def test_deprecated_path_mtime(self): + from importlib.abc import SourceLoader + class DummySourceLoader(SourceLoader): + def get_data(self, path): + return b'' + + def get_filename(self, fullname): + return 'foo.py' + + def path_stats(self, path): + return {'mtime': 1} + + loader = DummySourceLoader() + + with self.assertWarnsRegex( + DeprecationWarning, + r"SourceLoader\.path_mtime is deprecated in favour of " + r"SourceLoader\.path_stats\(\)\." + ): + loader.path_mtime('foo.py') + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/test_api.py b/stdlib/test/test_importlib/test_api.py new file mode 100644 index 000000000..1bc531a2f --- /dev/null +++ b/stdlib/test/test_importlib/test_api.py @@ -0,0 +1,508 @@ +from test.test_importlib import util as test_util + +init = test_util.import_importlib('importlib') +util = test_util.import_importlib('importlib.util') +machinery = test_util.import_importlib('importlib.machinery') + +import os.path +import sys +from test import support +from test.support import import_helper +from test.support import os_helper +import traceback +import types +import unittest + + +class ImportModuleTests: + + """Test importlib.import_module.""" + + def test_module_import(self): + # Test importing a top-level module. + with test_util.mock_spec('top_level') as mock: + with test_util.import_state(meta_path=[mock]): + module = self.init.import_module('top_level') + self.assertEqual(module.__name__, 'top_level') + + def test_absolute_package_import(self): + # Test importing a module from a package with an absolute name. + pkg_name = 'pkg' + pkg_long_name = '{0}.__init__'.format(pkg_name) + name = '{0}.mod'.format(pkg_name) + with test_util.mock_spec(pkg_long_name, name) as mock: + with test_util.import_state(meta_path=[mock]): + module = self.init.import_module(name) + self.assertEqual(module.__name__, name) + + def test_shallow_relative_package_import(self): + # Test importing a module from a package through a relative import. + pkg_name = 'pkg' + pkg_long_name = '{0}.__init__'.format(pkg_name) + module_name = 'mod' + absolute_name = '{0}.{1}'.format(pkg_name, module_name) + relative_name = '.{0}'.format(module_name) + with test_util.mock_spec(pkg_long_name, absolute_name) as mock: + with test_util.import_state(meta_path=[mock]): + self.init.import_module(pkg_name) + module = self.init.import_module(relative_name, pkg_name) + self.assertEqual(module.__name__, absolute_name) + + def test_deep_relative_package_import(self): + modules = ['a.__init__', 'a.b.__init__', 'a.c'] + with test_util.mock_spec(*modules) as mock: + with test_util.import_state(meta_path=[mock]): + self.init.import_module('a') + self.init.import_module('a.b') + module = self.init.import_module('..c', 'a.b') + self.assertEqual(module.__name__, 'a.c') + + def test_absolute_import_with_package(self): + # Test importing a module from a package with an absolute name with + # the 'package' argument given. + pkg_name = 'pkg' + pkg_long_name = '{0}.__init__'.format(pkg_name) + name = '{0}.mod'.format(pkg_name) + with test_util.mock_spec(pkg_long_name, name) as mock: + with test_util.import_state(meta_path=[mock]): + self.init.import_module(pkg_name) + module = self.init.import_module(name, pkg_name) + self.assertEqual(module.__name__, name) + + def test_relative_import_wo_package(self): + # Relative imports cannot happen without the 'package' argument being + # set. + with self.assertRaises(TypeError): + self.init.import_module('.support') + + + def test_loaded_once(self): + # Issue #13591: Modules should only be loaded once when + # initializing the parent package attempts to import the + # module currently being imported. + b_load_count = 0 + def load_a(): + self.init.import_module('a.b') + def load_b(): + nonlocal b_load_count + b_load_count += 1 + code = {'a': load_a, 'a.b': load_b} + modules = ['a.__init__', 'a.b'] + with test_util.mock_spec(*modules, module_code=code) as mock: + with test_util.import_state(meta_path=[mock]): + self.init.import_module('a.b') + self.assertEqual(b_load_count, 1) + + +(Frozen_ImportModuleTests, + Source_ImportModuleTests + ) = test_util.test_both( + ImportModuleTests, init=init, util=util, machinery=machinery) + + +class FindLoaderTests: + + FakeMetaFinder = None + + def test_sys_modules(self): + # If a module with __spec__.loader is in sys.modules, then return it. + name = 'some_mod' + with test_util.uncache(name): + module = types.ModuleType(name) + loader = 'a loader!' + module.__spec__ = self.machinery.ModuleSpec(name, loader) + sys.modules[name] = module + spec = self.util.find_spec(name) + self.assertIsNotNone(spec) + self.assertEqual(spec.loader, loader) + + def test_sys_modules_loader_is_None(self): + # If sys.modules[name].__spec__.loader is None, raise ValueError. + name = 'some_mod' + with test_util.uncache(name): + module = types.ModuleType(name) + module.__loader__ = None + sys.modules[name] = module + with self.assertRaises(ValueError): + self.util.find_spec(name) + + def test_sys_modules_loader_is_not_set(self): + # Should raise ValueError + # Issue #17099 + name = 'some_mod' + with test_util.uncache(name): + module = types.ModuleType(name) + try: + del module.__spec__.loader + except AttributeError: + pass + sys.modules[name] = module + with self.assertRaises(ValueError): + self.util.find_spec(name) + + def test_success(self): + # Return the loader found on sys.meta_path. + name = 'some_mod' + with test_util.uncache(name): + with test_util.import_state(meta_path=[self.FakeMetaFinder]): + spec = self.util.find_spec(name) + self.assertEqual((name, (name, None)), (spec.name, spec.loader)) + + def test_success_path(self): + # Searching on a path should work. + name = 'some_mod' + path = 'path to some place' + with test_util.uncache(name): + with test_util.import_state(meta_path=[self.FakeMetaFinder]): + spec = self.util.find_spec(name, path) + self.assertEqual(name, spec.name) + + def test_nothing(self): + # None is returned upon failure to find a loader. + self.assertIsNone(self.util.find_spec('nevergoingtofindthismodule')) + + +class FindLoaderPEP451Tests(FindLoaderTests): + + class FakeMetaFinder: + @staticmethod + def find_spec(name, path=None, target=None): + return machinery['Source'].ModuleSpec(name, (name, path)) + + +(Frozen_FindLoaderPEP451Tests, + Source_FindLoaderPEP451Tests + ) = test_util.test_both( + FindLoaderPEP451Tests, init=init, util=util, machinery=machinery) + + +class ReloadTests: + + def test_reload_modules(self): + for mod in ('tokenize', 'time', 'marshal'): + with self.subTest(module=mod): + with import_helper.CleanImport(mod): + module = self.init.import_module(mod) + self.init.reload(module) + + def test_module_replaced(self): + def code(): + import sys + module = type(sys)('top_level') + module.spam = 3 + sys.modules['top_level'] = module + mock = test_util.mock_spec('top_level', + module_code={'top_level': code}) + with mock: + with test_util.import_state(meta_path=[mock]): + module = self.init.import_module('top_level') + reloaded = self.init.reload(module) + actual = sys.modules['top_level'] + self.assertEqual(actual.spam, 3) + self.assertEqual(reloaded.spam, 3) + + def test_reload_missing_loader(self): + with import_helper.CleanImport('types'): + import types + loader = types.__loader__ + del types.__loader__ + reloaded = self.init.reload(types) + + self.assertIs(reloaded, types) + self.assertIs(sys.modules['types'], types) + self.assertEqual(reloaded.__loader__.path, loader.path) + + def test_reload_loader_replaced(self): + with import_helper.CleanImport('types'): + import types + types.__loader__ = None + self.init.invalidate_caches() + reloaded = self.init.reload(types) + + self.assertIsNot(reloaded.__loader__, None) + self.assertIs(reloaded, types) + self.assertIs(sys.modules['types'], types) + + def test_reload_location_changed(self): + name = 'spam' + with os_helper.temp_cwd(None) as cwd: + with test_util.uncache('spam'): + with import_helper.DirsOnSysPath(cwd): + # Start as a plain module. + self.init.invalidate_caches() + path = os.path.join(cwd, name + '.py') + cached = self.util.cache_from_source(path) + expected = {'__name__': name, + '__package__': '', + '__file__': path, + '__cached__': cached, + '__doc__': None, + } + os_helper.create_empty_file(path) + module = self.init.import_module(name) + ns = vars(module).copy() + loader = ns.pop('__loader__') + spec = ns.pop('__spec__') + ns.pop('__builtins__', None) # An implementation detail. + self.assertEqual(spec.name, name) + self.assertEqual(spec.loader, loader) + self.assertEqual(loader.path, path) + self.assertEqual(ns, expected) + + # Change to a package. + self.init.invalidate_caches() + init_path = os.path.join(cwd, name, '__init__.py') + cached = self.util.cache_from_source(init_path) + expected = {'__name__': name, + '__package__': name, + '__file__': init_path, + '__cached__': cached, + '__path__': [os.path.dirname(init_path)], + '__doc__': None, + } + os.mkdir(name) + os.rename(path, init_path) + reloaded = self.init.reload(module) + ns = vars(reloaded).copy() + loader = ns.pop('__loader__') + spec = ns.pop('__spec__') + ns.pop('__builtins__', None) # An implementation detail. + self.assertEqual(spec.name, name) + self.assertEqual(spec.loader, loader) + self.assertIs(reloaded, module) + self.assertEqual(loader.path, init_path) + self.maxDiff = None + self.assertEqual(ns, expected) + + def test_reload_namespace_changed(self): + name = 'spam' + with os_helper.temp_cwd(None) as cwd: + with test_util.uncache('spam'): + with test_util.import_state(path=[cwd]): + self.init._bootstrap_external._install(self.init._bootstrap) + # Start as a namespace package. + self.init.invalidate_caches() + bad_path = os.path.join(cwd, name, '__init.py') + cached = self.util.cache_from_source(bad_path) + expected = {'__name__': name, + '__package__': name, + '__doc__': None, + '__file__': None, + } + os.mkdir(name) + with open(bad_path, 'w', encoding='utf-8') as init_file: + init_file.write('eggs = None') + module = self.init.import_module(name) + ns = vars(module).copy() + loader = ns.pop('__loader__') + path = ns.pop('__path__') + spec = ns.pop('__spec__') + ns.pop('__builtins__', None) # An implementation detail. + self.assertEqual(spec.name, name) + self.assertIsNotNone(spec.loader) + self.assertIsNotNone(loader) + self.assertEqual(spec.loader, loader) + self.assertEqual(set(path), + set([os.path.dirname(bad_path)])) + with self.assertRaises(AttributeError): + # a NamespaceLoader + loader.path + self.assertEqual(ns, expected) + + # Change to a regular package. + self.init.invalidate_caches() + init_path = os.path.join(cwd, name, '__init__.py') + cached = self.util.cache_from_source(init_path) + expected = {'__name__': name, + '__package__': name, + '__file__': init_path, + '__cached__': cached, + '__path__': [os.path.dirname(init_path)], + '__doc__': None, + 'eggs': None, + } + os.rename(bad_path, init_path) + reloaded = self.init.reload(module) + ns = vars(reloaded).copy() + loader = ns.pop('__loader__') + spec = ns.pop('__spec__') + ns.pop('__builtins__', None) # An implementation detail. + self.assertEqual(spec.name, name) + self.assertEqual(spec.loader, loader) + self.assertIs(reloaded, module) + self.assertEqual(loader.path, init_path) + self.assertEqual(ns, expected) + + def test_reload_submodule(self): + # See #19851. + name = 'spam' + subname = 'ham' + with test_util.temp_module(name, pkg=True) as pkg_dir: + fullname, _ = test_util.submodule(name, subname, pkg_dir) + ham = self.init.import_module(fullname) + reloaded = self.init.reload(ham) + self.assertIs(reloaded, ham) + + def test_module_missing_spec(self): + #Test that reload() throws ModuleNotFounderror when reloading + # a module whose missing a spec. (bpo-29851) + name = 'spam' + with test_util.uncache(name): + module = sys.modules[name] = types.ModuleType(name) + # Sanity check by attempting an import. + module = self.init.import_module(name) + self.assertIsNone(module.__spec__) + with self.assertRaises(ModuleNotFoundError): + self.init.reload(module) + + def test_reload_traceback_with_non_str(self): + # gh-125519 + with support.captured_stdout() as stdout: + try: + self.init.reload("typing") + except TypeError as exc: + traceback.print_exception(exc, file=stdout) + else: + self.fail("Expected TypeError to be raised") + printed_traceback = stdout.getvalue() + self.assertIn("TypeError", printed_traceback) + self.assertNotIn("AttributeError", printed_traceback) + self.assertNotIn("module.__spec__.name", printed_traceback) + + +(Frozen_ReloadTests, + Source_ReloadTests + ) = test_util.test_both( + ReloadTests, init=init, util=util, machinery=machinery) + + +class InvalidateCacheTests: + + def test_method_called(self): + # If defined the method should be called. + class InvalidatingNullFinder: + def __init__(self, *ignored): + self.called = False + def invalidate_caches(self): + self.called = True + + key = os.path.abspath('gobledeegook') + meta_ins = InvalidatingNullFinder() + path_ins = InvalidatingNullFinder() + sys.meta_path.insert(0, meta_ins) + self.addCleanup(lambda: sys.path_importer_cache.__delitem__(key)) + sys.path_importer_cache[key] = path_ins + self.addCleanup(lambda: sys.meta_path.remove(meta_ins)) + self.init.invalidate_caches() + self.assertTrue(meta_ins.called) + self.assertTrue(path_ins.called) + + def test_method_lacking(self): + # There should be no issues if the method is not defined. + key = 'gobbledeegook' + sys.path_importer_cache[key] = None + self.addCleanup(lambda: sys.path_importer_cache.pop(key, None)) + self.init.invalidate_caches() # Shouldn't trigger an exception. + + +(Frozen_InvalidateCacheTests, + Source_InvalidateCacheTests + ) = test_util.test_both( + InvalidateCacheTests, init=init, util=util, machinery=machinery) + + +class FrozenImportlibTests(unittest.TestCase): + + def test_no_frozen_importlib(self): + # Should be able to import w/o _frozen_importlib being defined. + # Can't do an isinstance() check since separate copies of importlib + # may have been used for import, so just check the name is not for the + # frozen loader. + source_init = init['Source'] + self.assertNotEqual(source_init.__loader__.__class__.__name__, + 'FrozenImporter') + + +class StartupTests: + + def test_everyone_has___loader__(self): + # Issue #17098: all modules should have __loader__ defined. + for name, module in sys.modules.items(): + if isinstance(module, types.ModuleType): + with self.subTest(name=name): + self.assertHasAttr(module, '__loader__') + if self.machinery.BuiltinImporter.find_spec(name): + self.assertIsNot(module.__loader__, None) + elif self.machinery.FrozenImporter.find_spec(name): + self.assertIsNot(module.__loader__, None) + + def test_everyone_has___spec__(self): + for name, module in sys.modules.items(): + if isinstance(module, types.ModuleType): + with self.subTest(name=name): + self.assertHasAttr(module, '__spec__') + if self.machinery.BuiltinImporter.find_spec(name): + self.assertIsNot(module.__spec__, None) + elif self.machinery.FrozenImporter.find_spec(name): + self.assertIsNot(module.__spec__, None) + + +(Frozen_StartupTests, + Source_StartupTests + ) = test_util.test_both(StartupTests, machinery=machinery) + + +class TestModuleAll(unittest.TestCase): + def test_machinery(self): + extra = ( + # from importlib._bootstrap and importlib._bootstrap_external + 'AppleFrameworkLoader', + 'BYTECODE_SUFFIXES', + 'BuiltinImporter', + 'DEBUG_BYTECODE_SUFFIXES', + 'EXTENSION_SUFFIXES', + 'ExtensionFileLoader', + 'FileFinder', + 'FrozenImporter', + 'ModuleSpec', + 'NamespaceLoader', + 'OPTIMIZED_BYTECODE_SUFFIXES', + 'PathFinder', + 'SOURCE_SUFFIXES', + 'SourceFileLoader', + 'SourcelessFileLoader', + 'WindowsRegistryFinder', + ) + support.check__all__(self, machinery['Source'], extra=extra) + + def test_util(self): + extra = ( + # from importlib.abc, importlib._bootstrap + # and importlib._bootstrap_external + 'Loader', + 'MAGIC_NUMBER', + 'cache_from_source', + 'decode_source', + 'module_from_spec', + 'source_from_cache', + 'spec_from_file_location', + 'spec_from_loader', + ) + support.check__all__(self, util['Source'], extra=extra) + + +class TestDeprecations(unittest.TestCase): + def test_machinery_deprecated_attributes(self): + from importlib import machinery + attributes = ( + 'DEBUG_BYTECODE_SUFFIXES', + 'OPTIMIZED_BYTECODE_SUFFIXES', + ) + for attr in attributes: + with self.subTest(attr=attr): + with self.assertWarns(DeprecationWarning): + getattr(machinery, attr) + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/test_lazy.py b/stdlib/test/test_importlib/test_lazy.py new file mode 100644 index 000000000..e48fad889 --- /dev/null +++ b/stdlib/test/test_importlib/test_lazy.py @@ -0,0 +1,229 @@ +import importlib +from importlib import abc +from importlib import util +import sys +import time +import threading +import types +import unittest + +from test.support import threading_helper +from test.test_importlib import util as test_util + + +class CollectInit: + + def __init__(self, *args, **kwargs): + self.args = args + self.kwargs = kwargs + + def exec_module(self, module): + return self + + +class LazyLoaderFactoryTests(unittest.TestCase): + + def test_init(self): + factory = util.LazyLoader.factory(CollectInit) + # E.g. what importlib.machinery.FileFinder instantiates loaders with + # plus keyword arguments. + lazy_loader = factory('module name', 'module path', kw='kw') + loader = lazy_loader.loader + self.assertEqual(('module name', 'module path'), loader.args) + self.assertEqual({'kw': 'kw'}, loader.kwargs) + + def test_validation(self): + # No exec_module(), no lazy loading. + with self.assertRaises(TypeError): + util.LazyLoader.factory(object) + + +class TestingImporter(abc.MetaPathFinder, abc.Loader): + + module_name = 'lazy_loader_test' + mutated_name = 'changed' + loaded = None + load_count = 0 + source_code = 'attr = 42; __name__ = {!r}'.format(mutated_name) + + def find_spec(self, name, path, target=None): + if name != self.module_name: + return None + return util.spec_from_loader(name, util.LazyLoader(self)) + + def exec_module(self, module): + time.sleep(0.01) # Simulate a slow load. + exec(self.source_code, module.__dict__) + self.loaded = module + self.load_count += 1 + + +class LazyLoaderTests(unittest.TestCase): + + def test_init(self): + with self.assertRaises(TypeError): + # Classes that don't define exec_module() trigger TypeError. + util.LazyLoader(object) + + def new_module(self, source_code=None, loader=None): + if loader is None: + loader = TestingImporter() + if source_code is not None: + loader.source_code = source_code + spec = util.spec_from_loader(TestingImporter.module_name, + util.LazyLoader(loader)) + module = spec.loader.create_module(spec) + if module is None: + module = types.ModuleType(TestingImporter.module_name) + module.__spec__ = spec + module.__loader__ = spec.loader + spec.loader.exec_module(module) + # Module is now lazy. + self.assertIsNone(loader.loaded) + return module + + def test_e2e(self): + # End-to-end test to verify the load is in fact lazy. + importer = TestingImporter() + assert importer.loaded is None + with test_util.uncache(importer.module_name): + with test_util.import_state(meta_path=[importer]): + module = importlib.import_module(importer.module_name) + self.assertIsNone(importer.loaded) + # Trigger load. + self.assertEqual(module.__loader__, importer) + self.assertIsNotNone(importer.loaded) + self.assertEqual(module, importer.loaded) + + def test_attr_unchanged(self): + # An attribute only mutated as a side-effect of import should not be + # changed needlessly. + module = self.new_module() + self.assertEqual(TestingImporter.mutated_name, module.__name__) + + def test_new_attr(self): + # A new attribute should persist. + module = self.new_module() + module.new_attr = 42 + self.assertEqual(42, module.new_attr) + + def test_mutated_preexisting_attr(self): + # Changing an attribute that already existed on the module -- + # e.g. __name__ -- should persist. + module = self.new_module() + module.__name__ = 'bogus' + self.assertEqual('bogus', module.__name__) + + def test_mutated_attr(self): + # Changing an attribute that comes into existence after an import + # should persist. + module = self.new_module() + module.attr = 6 + self.assertEqual(6, module.attr) + + def test_delete_eventual_attr(self): + # Deleting an attribute should stay deleted. + module = self.new_module() + del module.attr + self.assertNotHasAttr(module, 'attr') + + def test_delete_preexisting_attr(self): + module = self.new_module() + del module.__name__ + self.assertNotHasAttr(module, '__name__') + + def test_module_substitution_error(self): + with test_util.uncache(TestingImporter.module_name): + fresh_module = types.ModuleType(TestingImporter.module_name) + sys.modules[TestingImporter.module_name] = fresh_module + module = self.new_module() + with self.assertRaisesRegex(ValueError, "substituted"): + module.__name__ + + def test_module_already_in_sys(self): + with test_util.uncache(TestingImporter.module_name): + module = self.new_module() + sys.modules[TestingImporter.module_name] = module + # Force the load; just care that no exception is raised. + module.__name__ + + @threading_helper.requires_working_threading() + def test_module_load_race(self): + with test_util.uncache(TestingImporter.module_name): + loader = TestingImporter() + module = self.new_module(loader=loader) + self.assertEqual(loader.load_count, 0) + + class RaisingThread(threading.Thread): + exc = None + def run(self): + try: + super().run() + except Exception as exc: + self.exc = exc + + def access_module(): + return module.attr + + threads = [] + for _ in range(2): + threads.append(thread := RaisingThread(target=access_module)) + thread.start() + + # Races could cause errors + for thread in threads: + thread.join() + self.assertIsNone(thread.exc) + + # Or multiple load attempts + self.assertEqual(loader.load_count, 1) + + def test_lazy_self_referential_modules(self): + # Directory modules with submodules that reference the parent can attempt to access + # the parent module during a load. Verify that this common pattern works with lazy loading. + # json is a good example in the stdlib. + json_modules = [name for name in sys.modules if name.startswith('json')] + with test_util.uncache(*json_modules): + # Standard lazy loading, unwrapped + spec = util.find_spec('json') + loader = util.LazyLoader(spec.loader) + spec.loader = loader + module = util.module_from_spec(spec) + sys.modules['json'] = module + loader.exec_module(module) + + # Trigger load with attribute lookup, ensure expected behavior + test_load = module.loads('{}') + self.assertEqual(test_load, {}) + + def test_lazy_module_type_override(self): + # Verify that lazy loading works with a module that modifies + # its __class__ to be a custom type. + + # Example module from PEP 726 + module = self.new_module(source_code="""\ +import sys +from types import ModuleType + +CONSTANT = 3.14 + +class ImmutableModule(ModuleType): + def __setattr__(self, name, value): + raise AttributeError('Read-only attribute!') + + def __delattr__(self, name): + raise AttributeError('Read-only attribute!') + +sys.modules[__name__].__class__ = ImmutableModule +""") + sys.modules[TestingImporter.module_name] = module + self.assertIsInstance(module, util._LazyModule) + self.assertEqual(module.CONSTANT, 3.14) + with self.assertRaises(AttributeError): + module.CONSTANT = 2.71 + with self.assertRaises(AttributeError): + del module.CONSTANT + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/test_locks.py b/stdlib/test/test_importlib/test_locks.py new file mode 100644 index 000000000..655e5881a --- /dev/null +++ b/stdlib/test/test_importlib/test_locks.py @@ -0,0 +1,161 @@ +from test.test_importlib import util as test_util + +init = test_util.import_importlib('importlib') + +import sys +import threading +import unittest +import weakref + +from test import support +from test.support import threading_helper +from test import lock_tests + + +threading_helper.requires_working_threading(module=True) + + +class ModuleLockAsRLockTests: + locktype = classmethod(lambda cls: cls.LockType("some_lock")) + + # _is_owned() unsupported + test__is_owned = None + # acquire(blocking=False) unsupported + test_try_acquire = None + test_try_acquire_contended = None + # `with` unsupported + test_with = None + # acquire(timeout=...) unsupported + test_timeout = None + # _release_save() unsupported + test_release_save_unacquired = None + # _recursion_count() unsupported + test_recursion_count = None + # lock status in repr unsupported + test_repr = None + test_locked_repr = None + test_repr_count = None + + def tearDown(self): + for splitinit in init.values(): + splitinit._bootstrap._blocking_on.clear() + + +LOCK_TYPES = {kind: splitinit._bootstrap._ModuleLock + for kind, splitinit in init.items()} + +(Frozen_ModuleLockAsRLockTests, + Source_ModuleLockAsRLockTests + ) = test_util.test_both(ModuleLockAsRLockTests, lock_tests.RLockTests, + LockType=LOCK_TYPES) + + +class DeadlockAvoidanceTests: + + def setUp(self): + try: + self.old_switchinterval = sys.getswitchinterval() + support.setswitchinterval(0.000001) + except AttributeError: + self.old_switchinterval = None + + def tearDown(self): + if self.old_switchinterval is not None: + sys.setswitchinterval(self.old_switchinterval) + + def run_deadlock_avoidance_test(self, create_deadlock): + NLOCKS = 10 + locks = [self.LockType(str(i)) for i in range(NLOCKS)] + pairs = [(locks[i], locks[(i+1)%NLOCKS]) for i in range(NLOCKS)] + if create_deadlock: + NTHREADS = NLOCKS + else: + NTHREADS = NLOCKS - 1 + barrier = threading.Barrier(NTHREADS) + results = [] + + def _acquire(lock): + """Try to acquire the lock. Return True on success, + False on deadlock.""" + try: + lock.acquire() + except self.DeadlockError: + return False + else: + return True + + def f(): + a, b = pairs.pop() + ra = _acquire(a) + barrier.wait() + rb = _acquire(b) + results.append((ra, rb)) + if rb: + b.release() + if ra: + a.release() + with lock_tests.Bunch(f, NTHREADS): + pass + self.assertEqual(len(results), NTHREADS) + return results + + def test_deadlock(self): + results = self.run_deadlock_avoidance_test(True) + # At least one of the threads detected a potential deadlock on its + # second acquire() call. It may be several of them, because the + # deadlock avoidance mechanism is conservative. + nb_deadlocks = results.count((True, False)) + self.assertGreaterEqual(nb_deadlocks, 1) + self.assertEqual(results.count((True, True)), len(results) - nb_deadlocks) + + def test_no_deadlock(self): + results = self.run_deadlock_avoidance_test(False) + self.assertEqual(results.count((True, False)), 0) + self.assertEqual(results.count((True, True)), len(results)) + + +DEADLOCK_ERRORS = {kind: splitinit._bootstrap._DeadlockError + for kind, splitinit in init.items()} + +(Frozen_DeadlockAvoidanceTests, + Source_DeadlockAvoidanceTests + ) = test_util.test_both(DeadlockAvoidanceTests, + LockType=LOCK_TYPES, + DeadlockError=DEADLOCK_ERRORS) + + +class LifetimeTests: + + @property + def bootstrap(self): + return self.init._bootstrap + + def test_lock_lifetime(self): + name = "xyzzy" + self.assertNotIn(name, self.bootstrap._module_locks) + lock = self.bootstrap._get_module_lock(name) + self.assertIn(name, self.bootstrap._module_locks) + wr = weakref.ref(lock) + del lock + support.gc_collect() + self.assertNotIn(name, self.bootstrap._module_locks) + self.assertIsNone(wr()) + + def test_all_locks(self): + support.gc_collect() + self.assertEqual(0, len(self.bootstrap._module_locks), + self.bootstrap._module_locks) + + +(Frozen_LifetimeTests, + Source_LifetimeTests + ) = test_util.test_both(LifetimeTests, init=init) + + +def setUpModule(): + thread_info = threading_helper.threading_setup() + unittest.addModuleCleanup(threading_helper.threading_cleanup, *thread_info) + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/test_namespace_pkgs.py b/stdlib/test/test_importlib/test_namespace_pkgs.py new file mode 100644 index 000000000..6ca0978f9 --- /dev/null +++ b/stdlib/test/test_importlib/test_namespace_pkgs.py @@ -0,0 +1,379 @@ +import contextlib +import importlib +import importlib.abc +import importlib.machinery +import os +import sys +import tempfile +import unittest + +from test.test_importlib import util + +# needed tests: +# +# need to test when nested, so that the top-level path isn't sys.path +# need to test dynamic path detection, both at top-level and nested +# with dynamic path, check when a loader is returned on path reload (that is, +# trying to switch from a namespace package to a regular package) + + +@contextlib.contextmanager +def sys_modules_context(): + """ + Make sure sys.modules is the same object and has the same content + when exiting the context as when entering. + + Similar to importlib.test.util.uncache, but doesn't require explicit + names. + """ + sys_modules_saved = sys.modules + sys_modules_copy = sys.modules.copy() + try: + yield + finally: + sys.modules = sys_modules_saved + sys.modules.clear() + sys.modules.update(sys_modules_copy) + + +@contextlib.contextmanager +def namespace_tree_context(**kwargs): + """ + Save import state and sys.modules cache and restore it on exit. + Typical usage: + + >>> with namespace_tree_context(path=['/tmp/xxyy/portion1', + ... '/tmp/xxyy/portion2']): + ... pass + """ + # use default meta_path and path_hooks unless specified otherwise + kwargs.setdefault('meta_path', sys.meta_path) + kwargs.setdefault('path_hooks', sys.path_hooks) + import_context = util.import_state(**kwargs) + with import_context, sys_modules_context(): + yield + +class NamespacePackageTest(unittest.TestCase): + """ + Subclasses should define self.root and self.paths (under that root) + to be added to sys.path. + """ + root = os.path.join(os.path.dirname(__file__), 'namespace_pkgs') + + def setUp(self): + self.resolved_paths = [ + os.path.join(self.root, path) for path in self.paths + ] + self.enterContext(namespace_tree_context(path=self.resolved_paths)) + + +class SingleNamespacePackage(NamespacePackageTest): + paths = ['portion1'] + + def test_simple_package(self): + import foo.one + self.assertEqual(foo.one.attr, 'portion1 foo one') + + def test_cant_import_other(self): + with self.assertRaises(ImportError): + import foo.two + + def test_simple_repr(self): + import foo.one + self.assertStartsWith(repr(foo), "' + + def __getattr__(self, name): + if name == 'get_filename' and self.path is not None: + return self._get_filename + if name == 'is_package': + return self._is_package + raise AttributeError(name) + + def _get_filename(self, name): + return self.path + + def _is_package(self, name): + return self.package + + def create_module(self, spec): + return None + + +class NewLoader(TestLoader): + + EGGS = 1 + + def exec_module(self, module): + module.eggs = self.EGGS + + +class ModuleSpecTests: + + def setUp(self): + self.name = 'spam' + self.path = 'spam.py' + self.cached = self.util.cache_from_source(self.path) + self.loader = TestLoader() + self.spec = self.machinery.ModuleSpec(self.name, self.loader) + self.loc_spec = self.machinery.ModuleSpec(self.name, self.loader, + origin=self.path) + self.loc_spec._set_fileattr = True + + def test_default(self): + spec = self.machinery.ModuleSpec(self.name, self.loader) + + self.assertEqual(spec.name, self.name) + self.assertEqual(spec.loader, self.loader) + self.assertIs(spec.origin, None) + self.assertIs(spec.loader_state, None) + self.assertIs(spec.submodule_search_locations, None) + self.assertIs(spec.cached, None) + self.assertFalse(spec.has_location) + + def test_default_no_loader(self): + spec = self.machinery.ModuleSpec(self.name, None) + + self.assertEqual(spec.name, self.name) + self.assertIs(spec.loader, None) + self.assertIs(spec.origin, None) + self.assertIs(spec.loader_state, None) + self.assertIs(spec.submodule_search_locations, None) + self.assertIs(spec.cached, None) + self.assertFalse(spec.has_location) + + def test_default_is_package_false(self): + spec = self.machinery.ModuleSpec(self.name, self.loader, + is_package=False) + + self.assertEqual(spec.name, self.name) + self.assertEqual(spec.loader, self.loader) + self.assertIs(spec.origin, None) + self.assertIs(spec.loader_state, None) + self.assertIs(spec.submodule_search_locations, None) + self.assertIs(spec.cached, None) + self.assertFalse(spec.has_location) + + def test_default_is_package_true(self): + spec = self.machinery.ModuleSpec(self.name, self.loader, + is_package=True) + + self.assertEqual(spec.name, self.name) + self.assertEqual(spec.loader, self.loader) + self.assertIs(spec.origin, None) + self.assertIs(spec.loader_state, None) + self.assertEqual(spec.submodule_search_locations, []) + self.assertIs(spec.cached, None) + self.assertFalse(spec.has_location) + + def test_has_location_setter(self): + spec = self.machinery.ModuleSpec(self.name, self.loader, + origin='somewhere') + self.assertFalse(spec.has_location) + spec.has_location = True + self.assertTrue(spec.has_location) + + def test_equality(self): + other = type(sys.implementation)(name=self.name, + loader=self.loader, + origin=None, + submodule_search_locations=None, + has_location=False, + cached=None, + ) + + self.assertTrue(self.spec == other) + + def test_equality_location(self): + other = type(sys.implementation)(name=self.name, + loader=self.loader, + origin=self.path, + submodule_search_locations=None, + has_location=True, + cached=self.cached, + ) + + self.assertEqual(self.loc_spec, other) + + def test_inequality(self): + other = type(sys.implementation)(name='ham', + loader=self.loader, + origin=None, + submodule_search_locations=None, + has_location=False, + cached=None, + ) + + self.assertNotEqual(self.spec, other) + + def test_inequality_incomplete(self): + other = type(sys.implementation)(name=self.name, + loader=self.loader, + ) + + self.assertNotEqual(self.spec, other) + + def test_package(self): + spec = self.machinery.ModuleSpec('spam.eggs', self.loader) + + self.assertEqual(spec.parent, 'spam') + + def test_package_is_package(self): + spec = self.machinery.ModuleSpec('spam.eggs', self.loader, + is_package=True) + + self.assertEqual(spec.parent, 'spam.eggs') + + # cached + + def test_cached_set(self): + before = self.spec.cached + self.spec.cached = 'there' + after = self.spec.cached + + self.assertIs(before, None) + self.assertEqual(after, 'there') + + def test_cached_no_origin(self): + spec = self.machinery.ModuleSpec(self.name, self.loader) + + self.assertIs(spec.cached, None) + + def test_cached_with_origin_not_location(self): + spec = self.machinery.ModuleSpec(self.name, self.loader, + origin=self.path) + + self.assertIs(spec.cached, None) + + def test_cached_source(self): + expected = self.util.cache_from_source(self.path) + + self.assertEqual(self.loc_spec.cached, expected) + + def test_cached_source_unknown_suffix(self): + self.loc_spec.origin = 'spam.spamspamspam' + + self.assertIs(self.loc_spec.cached, None) + + def test_cached_source_missing_cache_tag(self): + original = sys.implementation.cache_tag + sys.implementation.cache_tag = None + try: + cached = self.loc_spec.cached + finally: + sys.implementation.cache_tag = original + + self.assertIs(cached, None) + + def test_cached_sourceless(self): + self.loc_spec.origin = 'spam.pyc' + + self.assertEqual(self.loc_spec.cached, 'spam.pyc') + + +(Frozen_ModuleSpecTests, + Source_ModuleSpecTests + ) = test_util.test_both(ModuleSpecTests, util=util, machinery=machinery) + + +class ModuleSpecMethodsTests: + + @property + def bootstrap(self): + return self.init._bootstrap + + def setUp(self): + self.name = 'spam' + self.path = 'spam.py' + self.cached = self.util.cache_from_source(self.path) + self.loader = TestLoader() + self.spec = self.machinery.ModuleSpec(self.name, self.loader) + self.loc_spec = self.machinery.ModuleSpec(self.name, self.loader, + origin=self.path) + self.loc_spec._set_fileattr = True + + # exec() + + def test_exec(self): + self.spec.loader = NewLoader() + module = self.util.module_from_spec(self.spec) + sys.modules[self.name] = module + self.assertNotHasAttr(module, 'eggs') + self.bootstrap._exec(self.spec, module) + + self.assertEqual(module.eggs, 1) + + # load() + + def test_load(self): + self.spec.loader = NewLoader() + with CleanImport(self.spec.name): + loaded = self.bootstrap._load(self.spec) + installed = sys.modules[self.spec.name] + + self.assertEqual(loaded.eggs, 1) + self.assertIs(loaded, installed) + + def test_load_replaced(self): + replacement = object() + class ReplacingLoader(TestLoader): + def exec_module(self, module): + sys.modules[module.__name__] = replacement + self.spec.loader = ReplacingLoader() + with CleanImport(self.spec.name): + loaded = self.bootstrap._load(self.spec) + installed = sys.modules[self.spec.name] + + self.assertIs(loaded, replacement) + self.assertIs(installed, replacement) + + def test_load_failed(self): + class FailedLoader(TestLoader): + def exec_module(self, module): + raise RuntimeError + self.spec.loader = FailedLoader() + with CleanImport(self.spec.name): + with self.assertRaises(RuntimeError): + loaded = self.bootstrap._load(self.spec) + self.assertNotIn(self.spec.name, sys.modules) + + def test_load_failed_removed(self): + class FailedLoader(TestLoader): + def exec_module(self, module): + del sys.modules[module.__name__] + raise RuntimeError + self.spec.loader = FailedLoader() + with CleanImport(self.spec.name): + with self.assertRaises(RuntimeError): + loaded = self.bootstrap._load(self.spec) + self.assertNotIn(self.spec.name, sys.modules) + + def test_load_legacy_attributes_immutable(self): + module = object() + with warnings.catch_warnings(): + warnings.simplefilter("ignore", ImportWarning) + class ImmutableLoader(TestLoader): + def load_module(self, name): + sys.modules[name] = module + return module + self.spec.loader = ImmutableLoader() + with CleanImport(self.spec.name): + loaded = self.bootstrap._load(self.spec) + + self.assertIs(sys.modules[self.spec.name], module) + + # reload() + + def test_reload(self): + self.spec.loader = NewLoader() + with CleanImport(self.spec.name): + loaded = self.bootstrap._load(self.spec) + reloaded = self.bootstrap._exec(self.spec, loaded) + installed = sys.modules[self.spec.name] + + self.assertEqual(loaded.eggs, 1) + self.assertIs(reloaded, loaded) + self.assertIs(installed, loaded) + + def test_reload_modified(self): + self.spec.loader = NewLoader() + with CleanImport(self.spec.name): + loaded = self.bootstrap._load(self.spec) + loaded.eggs = 2 + reloaded = self.bootstrap._exec(self.spec, loaded) + + self.assertEqual(loaded.eggs, 1) + self.assertIs(reloaded, loaded) + + def test_reload_extra_attributes(self): + self.spec.loader = NewLoader() + with CleanImport(self.spec.name): + loaded = self.bootstrap._load(self.spec) + loaded.available = False + reloaded = self.bootstrap._exec(self.spec, loaded) + + self.assertFalse(loaded.available) + self.assertIs(reloaded, loaded) + + def test_reload_init_module_attrs(self): + self.spec.loader = NewLoader() + with CleanImport(self.spec.name): + loaded = self.bootstrap._load(self.spec) + loaded.__name__ = 'ham' + del loaded.__loader__ + del loaded.__package__ + del loaded.__spec__ + self.bootstrap._exec(self.spec, loaded) + + self.assertEqual(loaded.__name__, self.spec.name) + self.assertIs(loaded.__loader__, self.spec.loader) + self.assertEqual(loaded.__package__, self.spec.parent) + self.assertIs(loaded.__spec__, self.spec) + self.assertNotHasAttr(loaded, '__path__') + self.assertNotHasAttr(loaded, '__file__') + self.assertNotHasAttr(loaded, '__cached__') + + +(Frozen_ModuleSpecMethodsTests, + Source_ModuleSpecMethodsTests + ) = test_util.test_both(ModuleSpecMethodsTests, init=init, util=util, + machinery=machinery) + + +class FactoryTests: + + def setUp(self): + self.name = 'spam' + self.path = os.path.abspath('spam.py') + self.cached = self.util.cache_from_source(self.path) + self.loader = TestLoader() + self.fileloader = TestLoader(self.path) + self.pkgloader = TestLoader(self.path, True) + + # spec_from_loader() + + def test_spec_from_loader_default(self): + spec = self.util.spec_from_loader(self.name, self.loader) + + self.assertEqual(spec.name, self.name) + self.assertEqual(spec.loader, self.loader) + self.assertIs(spec.origin, None) + self.assertIs(spec.loader_state, None) + self.assertIs(spec.submodule_search_locations, None) + self.assertIs(spec.cached, None) + self.assertFalse(spec.has_location) + + def test_spec_from_loader_default_with_bad_is_package(self): + class Loader: + def is_package(self, name): + raise ImportError + loader = Loader() + spec = self.util.spec_from_loader(self.name, loader) + + self.assertEqual(spec.name, self.name) + self.assertEqual(spec.loader, loader) + self.assertIs(spec.origin, None) + self.assertIs(spec.loader_state, None) + self.assertIs(spec.submodule_search_locations, None) + self.assertIs(spec.cached, None) + self.assertFalse(spec.has_location) + + def test_spec_from_loader_origin(self): + origin = 'somewhere over the rainbow' + spec = self.util.spec_from_loader(self.name, self.loader, + origin=origin) + + self.assertEqual(spec.name, self.name) + self.assertEqual(spec.loader, self.loader) + self.assertIs(spec.origin, origin) + self.assertIs(spec.loader_state, None) + self.assertIs(spec.submodule_search_locations, None) + self.assertIs(spec.cached, None) + self.assertFalse(spec.has_location) + + def test_spec_from_loader_is_package_false(self): + spec = self.util.spec_from_loader(self.name, self.loader, + is_package=False) + + self.assertEqual(spec.name, self.name) + self.assertEqual(spec.loader, self.loader) + self.assertIs(spec.origin, None) + self.assertIs(spec.loader_state, None) + self.assertIs(spec.submodule_search_locations, None) + self.assertIs(spec.cached, None) + self.assertFalse(spec.has_location) + + def test_spec_from_loader_is_package_true(self): + spec = self.util.spec_from_loader(self.name, self.loader, + is_package=True) + + self.assertEqual(spec.name, self.name) + self.assertEqual(spec.loader, self.loader) + self.assertIs(spec.origin, None) + self.assertIs(spec.loader_state, None) + self.assertEqual(spec.submodule_search_locations, []) + self.assertIs(spec.cached, None) + self.assertFalse(spec.has_location) + + def test_spec_from_loader_origin_and_is_package(self): + origin = 'where the streets have no name' + spec = self.util.spec_from_loader(self.name, self.loader, + origin=origin, is_package=True) + + self.assertEqual(spec.name, self.name) + self.assertEqual(spec.loader, self.loader) + self.assertIs(spec.origin, origin) + self.assertIs(spec.loader_state, None) + self.assertEqual(spec.submodule_search_locations, []) + self.assertIs(spec.cached, None) + self.assertFalse(spec.has_location) + + def test_spec_from_loader_is_package_with_loader_false(self): + loader = TestLoader(is_package=False) + spec = self.util.spec_from_loader(self.name, loader) + + self.assertEqual(spec.name, self.name) + self.assertEqual(spec.loader, loader) + self.assertIs(spec.origin, None) + self.assertIs(spec.loader_state, None) + self.assertIs(spec.submodule_search_locations, None) + self.assertIs(spec.cached, None) + self.assertFalse(spec.has_location) + + def test_spec_from_loader_is_package_with_loader_true(self): + loader = TestLoader(is_package=True) + spec = self.util.spec_from_loader(self.name, loader) + + self.assertEqual(spec.name, self.name) + self.assertEqual(spec.loader, loader) + self.assertIs(spec.origin, None) + self.assertIs(spec.loader_state, None) + self.assertEqual(spec.submodule_search_locations, []) + self.assertIs(spec.cached, None) + self.assertFalse(spec.has_location) + + def test_spec_from_loader_default_with_file_loader(self): + spec = self.util.spec_from_loader(self.name, self.fileloader) + + self.assertEqual(spec.name, self.name) + self.assertEqual(spec.loader, self.fileloader) + self.assertEqual(spec.origin, self.path) + self.assertIs(spec.loader_state, None) + self.assertIs(spec.submodule_search_locations, None) + self.assertEqual(spec.cached, self.cached) + self.assertTrue(spec.has_location) + + def test_spec_from_loader_is_package_false_with_fileloader(self): + spec = self.util.spec_from_loader(self.name, self.fileloader, + is_package=False) + + self.assertEqual(spec.name, self.name) + self.assertEqual(spec.loader, self.fileloader) + self.assertEqual(spec.origin, self.path) + self.assertIs(spec.loader_state, None) + self.assertIs(spec.submodule_search_locations, None) + self.assertEqual(spec.cached, self.cached) + self.assertTrue(spec.has_location) + + def test_spec_from_loader_is_package_true_with_fileloader(self): + spec = self.util.spec_from_loader(self.name, self.fileloader, + is_package=True) + + self.assertEqual(spec.name, self.name) + self.assertEqual(spec.loader, self.fileloader) + self.assertEqual(spec.origin, self.path) + self.assertIs(spec.loader_state, None) + location = cwd if (cwd := os.getcwd()) != '/' else '' + self.assertEqual(spec.submodule_search_locations, [location]) + self.assertEqual(spec.cached, self.cached) + self.assertTrue(spec.has_location) + + # spec_from_file_location() + + def test_spec_from_file_location_default(self): + spec = self.util.spec_from_file_location(self.name, self.path) + + self.assertEqual(spec.name, self.name) + # Need to use a circuitous route to get at importlib.machinery to make + # sure the same class object is used in the isinstance() check as + # would have been used to create the loader. + SourceFileLoader = self.util.spec_from_file_location.__globals__['SourceFileLoader'] + self.assertIsInstance(spec.loader, SourceFileLoader) + self.assertEqual(spec.loader.name, self.name) + self.assertEqual(spec.loader.path, self.path) + self.assertEqual(spec.origin, self.path) + self.assertIs(spec.loader_state, None) + self.assertIs(spec.submodule_search_locations, None) + self.assertEqual(spec.cached, self.cached) + self.assertTrue(spec.has_location) + + def test_spec_from_file_location_path_like_arg(self): + spec = self.util.spec_from_file_location(self.name, + pathlib.PurePath(self.path)) + self.assertEqual(spec.origin, self.path) + + def test_spec_from_file_location_default_without_location(self): + spec = self.util.spec_from_file_location(self.name) + + self.assertIs(spec, None) + + def test_spec_from_file_location_default_bad_suffix(self): + spec = self.util.spec_from_file_location(self.name, 'spam.eggs') + + self.assertIs(spec, None) + + def test_spec_from_file_location_loader_no_location(self): + spec = self.util.spec_from_file_location(self.name, + loader=self.fileloader) + + self.assertEqual(spec.name, self.name) + self.assertEqual(spec.loader, self.fileloader) + self.assertEqual(spec.origin, self.path) + self.assertIs(spec.loader_state, None) + self.assertIs(spec.submodule_search_locations, None) + self.assertEqual(spec.cached, self.cached) + self.assertTrue(spec.has_location) + + def test_spec_from_file_location_loader_no_location_no_get_filename(self): + spec = self.util.spec_from_file_location(self.name, + loader=self.loader) + + self.assertEqual(spec.name, self.name) + self.assertEqual(spec.loader, self.loader) + self.assertEqual(spec.origin, '') + self.assertIs(spec.loader_state, None) + self.assertIs(spec.submodule_search_locations, None) + self.assertIs(spec.cached, None) + self.assertTrue(spec.has_location) + + def test_spec_from_file_location_loader_no_location_bad_get_filename(self): + class Loader: + def get_filename(self, name): + raise ImportError + loader = Loader() + spec = self.util.spec_from_file_location(self.name, loader=loader) + + self.assertEqual(spec.name, self.name) + self.assertEqual(spec.loader, loader) + self.assertEqual(spec.origin, '') + self.assertIs(spec.loader_state, None) + self.assertIs(spec.submodule_search_locations, None) + self.assertIs(spec.cached, None) + self.assertTrue(spec.has_location) + + def test_spec_from_file_location_smsl_none(self): + spec = self.util.spec_from_file_location(self.name, self.path, + loader=self.fileloader, + submodule_search_locations=None) + + self.assertEqual(spec.name, self.name) + self.assertEqual(spec.loader, self.fileloader) + self.assertEqual(spec.origin, self.path) + self.assertIs(spec.loader_state, None) + self.assertIs(spec.submodule_search_locations, None) + self.assertEqual(spec.cached, self.cached) + self.assertTrue(spec.has_location) + + def test_spec_from_file_location_smsl_empty(self): + spec = self.util.spec_from_file_location(self.name, self.path, + loader=self.fileloader, + submodule_search_locations=[]) + + self.assertEqual(spec.name, self.name) + self.assertEqual(spec.loader, self.fileloader) + self.assertEqual(spec.origin, self.path) + self.assertIs(spec.loader_state, None) + location = cwd if (cwd := os.getcwd()) != '/' else '' + self.assertEqual(spec.submodule_search_locations, [location]) + self.assertEqual(spec.cached, self.cached) + self.assertTrue(spec.has_location) + + def test_spec_from_file_location_smsl_not_empty(self): + spec = self.util.spec_from_file_location(self.name, self.path, + loader=self.fileloader, + submodule_search_locations=['eggs']) + + self.assertEqual(spec.name, self.name) + self.assertEqual(spec.loader, self.fileloader) + self.assertEqual(spec.origin, self.path) + self.assertIs(spec.loader_state, None) + self.assertEqual(spec.submodule_search_locations, ['eggs']) + self.assertEqual(spec.cached, self.cached) + self.assertTrue(spec.has_location) + + def test_spec_from_file_location_smsl_default(self): + spec = self.util.spec_from_file_location(self.name, self.path, + loader=self.pkgloader) + + self.assertEqual(spec.name, self.name) + self.assertEqual(spec.loader, self.pkgloader) + self.assertEqual(spec.origin, self.path) + self.assertIs(spec.loader_state, None) + location = cwd if (cwd := os.getcwd()) != '/' else '' + self.assertEqual(spec.submodule_search_locations, [location]) + self.assertEqual(spec.cached, self.cached) + self.assertTrue(spec.has_location) + + def test_spec_from_file_location_smsl_default_not_package(self): + class Loader: + def is_package(self, name): + return False + loader = Loader() + spec = self.util.spec_from_file_location(self.name, self.path, + loader=loader) + + self.assertEqual(spec.name, self.name) + self.assertEqual(spec.loader, loader) + self.assertEqual(spec.origin, self.path) + self.assertIs(spec.loader_state, None) + self.assertIs(spec.submodule_search_locations, None) + self.assertEqual(spec.cached, self.cached) + self.assertTrue(spec.has_location) + + def test_spec_from_file_location_smsl_default_no_is_package(self): + spec = self.util.spec_from_file_location(self.name, self.path, + loader=self.fileloader) + + self.assertEqual(spec.name, self.name) + self.assertEqual(spec.loader, self.fileloader) + self.assertEqual(spec.origin, self.path) + self.assertIs(spec.loader_state, None) + self.assertIs(spec.submodule_search_locations, None) + self.assertEqual(spec.cached, self.cached) + self.assertTrue(spec.has_location) + + def test_spec_from_file_location_smsl_default_bad_is_package(self): + class Loader: + def is_package(self, name): + raise ImportError + loader = Loader() + spec = self.util.spec_from_file_location(self.name, self.path, + loader=loader) + + self.assertEqual(spec.name, self.name) + self.assertEqual(spec.loader, loader) + self.assertEqual(spec.origin, self.path) + self.assertIs(spec.loader_state, None) + self.assertIs(spec.submodule_search_locations, None) + self.assertEqual(spec.cached, self.cached) + self.assertTrue(spec.has_location) + + def test_spec_from_file_location_relative_path(self): + spec = self.util.spec_from_file_location(self.name, + os.path.basename(self.path), loader=self.fileloader) + + self.assertEqual(spec.name, self.name) + self.assertEqual(spec.loader, self.fileloader) + self.assertEqual(spec.origin, self.path) + self.assertIs(spec.loader_state, None) + self.assertIs(spec.submodule_search_locations, None) + self.assertEqual(spec.cached, self.cached) + self.assertTrue(spec.has_location) + +(Frozen_FactoryTests, + Source_FactoryTests + ) = test_util.test_both(FactoryTests, util=util, machinery=machinery) + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/test_threaded_import.py b/stdlib/test/test_importlib/test_threaded_import.py new file mode 100644 index 000000000..8b793ebf2 --- /dev/null +++ b/stdlib/test/test_importlib/test_threaded_import.py @@ -0,0 +1,340 @@ +# This is a variant of the very old (early 90's) file +# Demo/threads/bug.py. It simply provokes a number of threads into +# trying to import the same module "at the same time". +# There are no pleasant failure modes -- most likely is that Python +# complains several times about module random having no attribute +# randrange, and then Python hangs. + +import _imp as imp +import os +import importlib +import sys +import time +import shutil +import threading +import unittest +from test import support +from test.support import verbose +from test.support.import_helper import forget, mock_register_at_fork +from test.support.os_helper import (TESTFN, unlink, rmtree) +from test.support import script_helper, threading_helper + +threading_helper.requires_working_threading(module=True) + +def task(N, done, done_tasks, errors): + try: + # We don't use modulefinder but still import it in order to stress + # importing of different modules from several threads. + if len(done_tasks) % 2: + import modulefinder + import random + else: + import random + import modulefinder + # This will fail if random is not completely initialized + x = random.randrange(1, 3) + except Exception as e: + errors.append(e.with_traceback(None)) + finally: + done_tasks.append(threading.get_ident()) + finished = len(done_tasks) == N + if finished: + done.set() + +# Create a circular import structure: A -> C -> B -> D -> A +# NOTE: `time` is already loaded and therefore doesn't threaten to deadlock. + +circular_imports_modules = { + 'A': """if 1: + import time + time.sleep(%(delay)s) + x = 'a' + import C + """, + 'B': """if 1: + import time + time.sleep(%(delay)s) + x = 'b' + import D + """, + 'C': """import B""", + 'D': """import A""", +} + +class Finder: + """A dummy finder to detect concurrent access to its find_spec() + method.""" + + def __init__(self): + self.numcalls = 0 + self.x = 0 + self.lock = threading.Lock() + + def find_spec(self, name, path=None, target=None): + # Simulate some thread-unsafe behaviour. If calls to find_spec() + # are properly serialized, `x` will end up the same as `numcalls`. + # Otherwise not. + assert imp.lock_held() + with self.lock: + self.numcalls += 1 + x = self.x + time.sleep(0.01) + self.x = x + 1 + +class FlushingFinder: + """A dummy finder which flushes sys.path_importer_cache when it gets + called.""" + + def find_spec(self, name, path=None, target=None): + sys.path_importer_cache.clear() + + +class ThreadedImportTests(unittest.TestCase): + + def setUp(self): + self.old_random = sys.modules.pop('random', None) + + def tearDown(self): + # If the `random` module was already initialized, we restore the + # old module at the end so that pickling tests don't fail. + # See http://bugs.python.org/issue3657#msg110461 + if self.old_random is not None: + sys.modules['random'] = self.old_random + + @mock_register_at_fork + def check_parallel_module_init(self, mock_os): + if imp.lock_held(): + # This triggers on, e.g., from test import autotest. + raise unittest.SkipTest("can't run when import lock is held") + + done = threading.Event() + for N in (20, 50) * 3: + if verbose: + print("Trying", N, "threads ...", end=' ') + # Make sure that random and modulefinder get reimported freshly + for modname in ['random', 'modulefinder']: + try: + del sys.modules[modname] + except KeyError: + pass + errors = [] + done_tasks = [] + done.clear() + t0 = time.monotonic() + with threading_helper.start_threads( + threading.Thread(target=task, args=(N, done, done_tasks, errors,)) + for i in range(N)): + pass + completed = done.wait(10 * 60) + dt = time.monotonic() - t0 + if verbose: + print("%.1f ms" % (dt*1e3), flush=True, end=" ") + dbg_info = 'done: %s/%s' % (len(done_tasks), N) + self.assertFalse(errors, dbg_info) + self.assertTrue(completed, dbg_info) + if verbose: + print("OK.") + + @support.bigmemtest(size=50, memuse=76*2**20, dry_run=False) + def test_parallel_module_init(self, size): + self.check_parallel_module_init() + + @support.bigmemtest(size=50, memuse=76*2**20, dry_run=False) + def test_parallel_meta_path(self, size): + finder = Finder() + sys.meta_path.insert(0, finder) + try: + self.check_parallel_module_init() + self.assertGreater(finder.numcalls, 0) + self.assertEqual(finder.x, finder.numcalls) + finally: + sys.meta_path.remove(finder) + + @support.bigmemtest(size=50, memuse=76*2**20, dry_run=False) + def test_parallel_path_hooks(self, size): + # Here the Finder instance is only used to check concurrent calls + # to path_hook(). + finder = Finder() + # In order for our path hook to be called at each import, we need + # to flush the path_importer_cache, which we do by registering a + # dedicated meta_path entry. + flushing_finder = FlushingFinder() + def path_hook(path): + finder.find_spec('') + raise ImportError + sys.path_hooks.insert(0, path_hook) + sys.meta_path.append(flushing_finder) + try: + # Flush the cache a first time + flushing_finder.find_spec('') + numtests = self.check_parallel_module_init() + self.assertGreater(finder.numcalls, 0) + self.assertEqual(finder.x, finder.numcalls) + finally: + sys.meta_path.remove(flushing_finder) + sys.path_hooks.remove(path_hook) + + def test_import_hangers(self): + # In case this test is run again, make sure the helper module + # gets loaded from scratch again. + try: + del sys.modules['test.test_importlib.threaded_import_hangers'] + except KeyError: + pass + import test.test_importlib.threaded_import_hangers + self.assertFalse(test.test_importlib.threaded_import_hangers.errors) + + def test_circular_imports(self): + # The goal of this test is to exercise implementations of the import + # lock which use a per-module lock, rather than a global lock. + # In these implementations, there is a possible deadlock with + # circular imports, for example: + # - thread 1 imports A (grabbing the lock for A) which imports B + # - thread 2 imports B (grabbing the lock for B) which imports A + # Such implementations should be able to detect such situations and + # resolve them one way or the other, without freezing. + # NOTE: our test constructs a slightly less trivial import cycle, + # in order to better stress the deadlock avoidance mechanism. + delay = 0.5 + os.mkdir(TESTFN) + self.addCleanup(shutil.rmtree, TESTFN) + sys.path.insert(0, TESTFN) + self.addCleanup(sys.path.remove, TESTFN) + for name, contents in circular_imports_modules.items(): + contents = contents % {'delay': delay} + with open(os.path.join(TESTFN, name + ".py"), "wb") as f: + f.write(contents.encode('utf-8')) + self.addCleanup(forget, name) + + importlib.invalidate_caches() + results = [] + def import_ab(): + import A + results.append(getattr(A, 'x', None)) + def import_ba(): + import B + results.append(getattr(B, 'x', None)) + t1 = threading.Thread(target=import_ab) + t2 = threading.Thread(target=import_ba) + t1.start() + t2.start() + t1.join() + t2.join() + self.assertEqual(set(results), {'a', 'b'}) + + @mock_register_at_fork + def test_side_effect_import(self, mock_os): + code = """if 1: + import threading + def target(): + import random + t = threading.Thread(target=target) + t.start() + t.join() + t = None""" + sys.path.insert(0, os.curdir) + self.addCleanup(sys.path.remove, os.curdir) + filename = TESTFN + ".py" + with open(filename, "wb") as f: + f.write(code.encode('utf-8')) + self.addCleanup(unlink, filename) + self.addCleanup(forget, TESTFN) + self.addCleanup(rmtree, '__pycache__') + importlib.invalidate_caches() + with threading_helper.wait_threads_exit(): + __import__(TESTFN) + del sys.modules[TESTFN] + + @support.bigmemtest(size=1, memuse=1.8*2**30, dry_run=False) + def test_concurrent_futures_circular_import(self, size): + # Regression test for bpo-43515 + fn = os.path.join(os.path.dirname(__file__), + 'partial', 'cfimport.py') + script_helper.assert_python_ok(fn) + + @support.bigmemtest(size=1, memuse=1.8*2**30, dry_run=False) + def test_multiprocessing_pool_circular_import(self, size): + # Regression test for bpo-41567 + fn = os.path.join(os.path.dirname(__file__), + 'partial', 'pool_in_threads.py') + script_helper.assert_python_ok(fn) + + def test_import_failure_race_condition(self): + # Regression test for race condition where a thread could receive + # a partially-initialized module when another thread's import fails. + # The race occurs when: + # 1. Thread 1 starts importing, adds module to sys.modules + # 2. Thread 2 sees the module in sys.modules + # 3. Thread 1's import fails, removes module from sys.modules + # 4. Thread 2 should NOT return the stale module reference + os.mkdir(TESTFN) + self.addCleanup(shutil.rmtree, TESTFN) + sys.path.insert(0, TESTFN) + self.addCleanup(sys.path.remove, TESTFN) + + # Create a module that partially initializes then fails + modname = 'failing_import_module' + with open(os.path.join(TESTFN, modname + '.py'), 'w') as f: + f.write(''' +import time +PARTIAL_ATTR = 'initialized' +time.sleep(0.05) # Widen race window +raise RuntimeError("Intentional import failure") +''') + self.addCleanup(forget, modname) + importlib.invalidate_caches() + + errors = [] + results = [] + + def do_import(delay=0): + time.sleep(delay) + try: + mod = __import__(modname) + # If we got a module, verify it's in sys.modules + if modname not in sys.modules: + errors.append( + f"Got module {mod!r} but {modname!r} not in sys.modules" + ) + elif sys.modules[modname] is not mod: + errors.append( + f"Got different module than sys.modules[{modname!r}]" + ) + else: + results.append(('success', mod)) + except RuntimeError: + results.append(('RuntimeError',)) + except Exception as e: + errors.append(f"Unexpected exception: {e}") + + # Run multiple iterations to increase chance of hitting the race + for _ in range(10): + errors.clear() + results.clear() + if modname in sys.modules: + del sys.modules[modname] + + t1 = threading.Thread(target=do_import, args=(0,)) + t2 = threading.Thread(target=do_import, args=(0.01,)) + t1.start() + t2.start() + t1.join() + t2.join() + + # Neither thread should have errors about stale modules + self.assertEqual(errors, [], f"Race condition detected: {errors}") + + +def setUpModule(): + thread_info = threading_helper.threading_setup() + unittest.addModuleCleanup(threading_helper.threading_cleanup, *thread_info) + try: + old_switchinterval = sys.getswitchinterval() + unittest.addModuleCleanup(sys.setswitchinterval, old_switchinterval) + support.setswitchinterval(1e-5) + except AttributeError: + pass + + +if __name__ == "__main__": + unittest.main() diff --git a/stdlib/test/test_importlib/test_util.py b/stdlib/test/test_importlib/test_util.py new file mode 100644 index 000000000..8c14b9627 --- /dev/null +++ b/stdlib/test/test_importlib/test_util.py @@ -0,0 +1,861 @@ +from test.test_importlib import util + +abc = util.import_importlib('importlib.abc') +init = util.import_importlib('importlib') +machinery = util.import_importlib('importlib.machinery') +importlib_util = util.import_importlib('importlib.util') + +import importlib.util +from importlib import _bootstrap_external +import os +import pathlib +import string +import sys +from test import support +from test.support import os_helper +import textwrap +import types +import unittest +import unittest.mock +import warnings + +try: + import _testsinglephase +except ImportError: + _testsinglephase = None +try: + import _testmultiphase +except ImportError: + _testmultiphase = None +try: + import _interpreters +except ModuleNotFoundError: + _interpreters = None + + +class DecodeSourceBytesTests: + + source = "string ='ü'" + + def test_ut8_default(self): + source_bytes = self.source.encode('utf-8') + self.assertEqual(self.util.decode_source(source_bytes), self.source) + + def test_specified_encoding(self): + source = '# coding=latin-1\n' + self.source + source_bytes = source.encode('latin-1') + assert source_bytes != source.encode('utf-8') + self.assertEqual(self.util.decode_source(source_bytes), source) + + def test_universal_newlines(self): + source = '\r\n'.join([self.source, self.source]) + source_bytes = source.encode('utf-8') + self.assertEqual(self.util.decode_source(source_bytes), + '\n'.join([self.source, self.source])) + + +(Frozen_DecodeSourceBytesTests, + Source_DecodeSourceBytesTests + ) = util.test_both(DecodeSourceBytesTests, util=importlib_util) + + +class ModuleFromSpecTests: + + def test_no_create_module(self): + class Loader: + def exec_module(self, module): + pass + spec = self.machinery.ModuleSpec('test', Loader()) + with self.assertRaises(ImportError): + module = self.util.module_from_spec(spec) + + def test_create_module_returns_None(self): + class Loader(self.abc.Loader): + def create_module(self, spec): + return None + spec = self.machinery.ModuleSpec('test', Loader()) + module = self.util.module_from_spec(spec) + self.assertIsInstance(module, types.ModuleType) + self.assertEqual(module.__name__, spec.name) + + def test_create_module(self): + name = 'already set' + class CustomModule(types.ModuleType): + pass + class Loader(self.abc.Loader): + def create_module(self, spec): + module = CustomModule(spec.name) + module.__name__ = name + return module + spec = self.machinery.ModuleSpec('test', Loader()) + module = self.util.module_from_spec(spec) + self.assertIsInstance(module, CustomModule) + self.assertEqual(module.__name__, name) + + def test___name__(self): + spec = self.machinery.ModuleSpec('test', object()) + module = self.util.module_from_spec(spec) + self.assertEqual(module.__name__, spec.name) + + def test___spec__(self): + spec = self.machinery.ModuleSpec('test', object()) + module = self.util.module_from_spec(spec) + self.assertEqual(module.__spec__, spec) + + def test___loader__(self): + loader = object() + spec = self.machinery.ModuleSpec('test', loader) + module = self.util.module_from_spec(spec) + self.assertIs(module.__loader__, loader) + + def test___package__(self): + spec = self.machinery.ModuleSpec('test.pkg', object()) + module = self.util.module_from_spec(spec) + self.assertEqual(module.__package__, spec.parent) + + def test___path__(self): + spec = self.machinery.ModuleSpec('test', object(), is_package=True) + module = self.util.module_from_spec(spec) + self.assertEqual(module.__path__, spec.submodule_search_locations) + + def test___file__(self): + spec = self.machinery.ModuleSpec('test', object(), origin='some/path') + spec.has_location = True + module = self.util.module_from_spec(spec) + self.assertEqual(module.__file__, spec.origin) + + def test___cached__(self): + spec = self.machinery.ModuleSpec('test', object()) + spec.cached = 'some/path' + spec.has_location = True + module = self.util.module_from_spec(spec) + self.assertEqual(module.__cached__, spec.cached) + +(Frozen_ModuleFromSpecTests, + Source_ModuleFromSpecTests +) = util.test_both(ModuleFromSpecTests, abc=abc, machinery=machinery, + util=importlib_util) + + +class ResolveNameTests: + + """Tests importlib.util.resolve_name().""" + + def test_absolute(self): + # bacon + self.assertEqual('bacon', self.util.resolve_name('bacon', None)) + + def test_absolute_within_package(self): + # bacon in spam + self.assertEqual('bacon', self.util.resolve_name('bacon', 'spam')) + + def test_no_package(self): + # .bacon in '' + with self.assertRaises(ImportError): + self.util.resolve_name('.bacon', '') + + def test_in_package(self): + # .bacon in spam + self.assertEqual('spam.eggs.bacon', + self.util.resolve_name('.bacon', 'spam.eggs')) + + def test_other_package(self): + # ..bacon in spam.bacon + self.assertEqual('spam.bacon', + self.util.resolve_name('..bacon', 'spam.eggs')) + + def test_escape(self): + # ..bacon in spam + with self.assertRaises(ImportError): + self.util.resolve_name('..bacon', 'spam') + + +(Frozen_ResolveNameTests, + Source_ResolveNameTests + ) = util.test_both(ResolveNameTests, util=importlib_util) + + +class FindSpecTests: + + class FakeMetaFinder: + @staticmethod + def find_spec(name, path=None, target=None): return name, path, target + + def test_sys_modules(self): + name = 'some_mod' + with util.uncache(name): + module = types.ModuleType(name) + loader = 'a loader!' + spec = self.machinery.ModuleSpec(name, loader) + module.__loader__ = loader + module.__spec__ = spec + sys.modules[name] = module + found = self.util.find_spec(name) + self.assertEqual(found, spec) + + def test_sys_modules_without___loader__(self): + name = 'some_mod' + with util.uncache(name): + module = types.ModuleType(name) + del module.__loader__ + loader = 'a loader!' + spec = self.machinery.ModuleSpec(name, loader) + module.__spec__ = spec + sys.modules[name] = module + found = self.util.find_spec(name) + self.assertEqual(found, spec) + + def test_sys_modules_spec_is_None(self): + name = 'some_mod' + with util.uncache(name): + module = types.ModuleType(name) + module.__spec__ = None + sys.modules[name] = module + with self.assertRaises(ValueError): + self.util.find_spec(name) + + def test_sys_modules_loader_is_None(self): + name = 'some_mod' + with util.uncache(name): + module = types.ModuleType(name) + spec = self.machinery.ModuleSpec(name, None) + module.__spec__ = spec + sys.modules[name] = module + found = self.util.find_spec(name) + self.assertEqual(found, spec) + + def test_sys_modules_spec_is_not_set(self): + name = 'some_mod' + with util.uncache(name): + module = types.ModuleType(name) + try: + del module.__spec__ + except AttributeError: + pass + sys.modules[name] = module + with self.assertRaises(ValueError): + self.util.find_spec(name) + + def test_success(self): + name = 'some_mod' + with util.uncache(name): + with util.import_state(meta_path=[self.FakeMetaFinder]): + self.assertEqual((name, None, None), + self.util.find_spec(name)) + + def test_nothing(self): + # None is returned upon failure to find a loader. + self.assertIsNone(self.util.find_spec('nevergoingtofindthismodule')) + + def test_find_submodule(self): + name = 'spam' + subname = 'ham' + with util.temp_module(name, pkg=True) as pkg_dir: + fullname, _ = util.submodule(name, subname, pkg_dir) + spec = self.util.find_spec(fullname) + self.assertIsNot(spec, None) + self.assertIn(name, sorted(sys.modules)) + self.assertNotIn(fullname, sorted(sys.modules)) + # Ensure successive calls behave the same. + spec_again = self.util.find_spec(fullname) + self.assertEqual(spec_again, spec) + + def test_find_submodule_parent_already_imported(self): + name = 'spam' + subname = 'ham' + with util.temp_module(name, pkg=True) as pkg_dir: + self.init.import_module(name) + fullname, _ = util.submodule(name, subname, pkg_dir) + spec = self.util.find_spec(fullname) + self.assertIsNot(spec, None) + self.assertIn(name, sorted(sys.modules)) + self.assertNotIn(fullname, sorted(sys.modules)) + # Ensure successive calls behave the same. + spec_again = self.util.find_spec(fullname) + self.assertEqual(spec_again, spec) + + def test_find_relative_module(self): + name = 'spam' + subname = 'ham' + with util.temp_module(name, pkg=True) as pkg_dir: + fullname, _ = util.submodule(name, subname, pkg_dir) + relname = '.' + subname + spec = self.util.find_spec(relname, name) + self.assertIsNot(spec, None) + self.assertIn(name, sorted(sys.modules)) + self.assertNotIn(fullname, sorted(sys.modules)) + # Ensure successive calls behave the same. + spec_again = self.util.find_spec(fullname) + self.assertEqual(spec_again, spec) + + def test_find_relative_module_missing_package(self): + name = 'spam' + subname = 'ham' + with util.temp_module(name, pkg=True) as pkg_dir: + fullname, _ = util.submodule(name, subname, pkg_dir) + relname = '.' + subname + with self.assertRaises(ImportError): + self.util.find_spec(relname) + self.assertNotIn(name, sorted(sys.modules)) + self.assertNotIn(fullname, sorted(sys.modules)) + + def test_find_submodule_in_module(self): + # ModuleNotFoundError raised when a module is specified as + # a parent instead of a package. + with self.assertRaises(ModuleNotFoundError): + self.util.find_spec('module.name') + + +(Frozen_FindSpecTests, + Source_FindSpecTests + ) = util.test_both(FindSpecTests, init=init, util=importlib_util, + machinery=machinery) + + +class MagicNumberTests: + + def test_length(self): + # Should be 4 bytes. + self.assertEqual(len(self.util.MAGIC_NUMBER), 4) + + def test_incorporates_rn(self): + # The magic number uses \r\n to come out wrong when splitting on lines. + self.assertEndsWith(self.util.MAGIC_NUMBER, b'\r\n') + + +(Frozen_MagicNumberTests, + Source_MagicNumberTests + ) = util.test_both(MagicNumberTests, util=importlib_util) + + +class PEP3147Tests: + + """Tests of PEP 3147-related functions: cache_from_source and source_from_cache.""" + + tag = sys.implementation.cache_tag + + @unittest.skipIf(sys.implementation.cache_tag is None, + 'requires sys.implementation.cache_tag not be None') + def test_cache_from_source(self): + # Given the path to a .py file, return the path to its PEP 3147 + # defined .pyc file (i.e. under __pycache__). + path = os.path.join('foo', 'bar', 'baz', 'qux.py') + expect = os.path.join('foo', 'bar', 'baz', '__pycache__', + 'qux.{}.pyc'.format(self.tag)) + self.assertEqual(self.util.cache_from_source(path, optimization=''), + expect) + + def test_cache_from_source_no_cache_tag(self): + # No cache tag means NotImplementedError. + with support.swap_attr(sys.implementation, 'cache_tag', None): + with self.assertRaises(NotImplementedError): + self.util.cache_from_source('whatever.py') + + def test_cache_from_source_no_dot(self): + # Directory with a dot, filename without dot. + path = os.path.join('foo.bar', 'file') + expect = os.path.join('foo.bar', '__pycache__', + 'file{}.pyc'.format(self.tag)) + self.assertEqual(self.util.cache_from_source(path, optimization=''), + expect) + + def test_cache_from_source_debug_override(self): + # Given the path to a .py file, return the path to its PEP 3147/PEP 488 + # defined .pyc file (i.e. under __pycache__). + path = os.path.join('foo', 'bar', 'baz', 'qux.py') + with warnings.catch_warnings(): + warnings.simplefilter('ignore') + self.assertEqual(self.util.cache_from_source(path, False), + self.util.cache_from_source(path, optimization=1)) + self.assertEqual(self.util.cache_from_source(path, True), + self.util.cache_from_source(path, optimization='')) + with warnings.catch_warnings(): + warnings.simplefilter('error') + with self.assertRaises(DeprecationWarning): + self.util.cache_from_source(path, False) + with self.assertRaises(DeprecationWarning): + self.util.cache_from_source(path, True) + + def test_cache_from_source_cwd(self): + path = 'foo.py' + expect = os.path.join('__pycache__', 'foo.{}.pyc'.format(self.tag)) + self.assertEqual(self.util.cache_from_source(path, optimization=''), + expect) + + def test_cache_from_source_override(self): + # When debug_override is not None, it can be any true-ish or false-ish + # value. + path = os.path.join('foo', 'bar', 'baz.py') + # However if the bool-ishness can't be determined, the exception + # propagates. + class Bearish: + def __bool__(self): raise RuntimeError + with warnings.catch_warnings(): + warnings.simplefilter('ignore') + self.assertEqual(self.util.cache_from_source(path, []), + self.util.cache_from_source(path, optimization=1)) + self.assertEqual(self.util.cache_from_source(path, [17]), + self.util.cache_from_source(path, optimization='')) + with self.assertRaises(RuntimeError): + self.util.cache_from_source('/foo/bar/baz.py', Bearish()) + + + def test_cache_from_source_optimization_empty_string(self): + # Setting 'optimization' to '' leads to no optimization tag (PEP 488). + path = 'foo.py' + expect = os.path.join('__pycache__', 'foo.{}.pyc'.format(self.tag)) + self.assertEqual(self.util.cache_from_source(path, optimization=''), + expect) + + def test_cache_from_source_optimization_None(self): + # Setting 'optimization' to None uses the interpreter's optimization. + # (PEP 488) + path = 'foo.py' + optimization_level = sys.flags.optimize + almost_expect = os.path.join('__pycache__', 'foo.{}'.format(self.tag)) + if optimization_level == 0: + expect = almost_expect + '.pyc' + elif optimization_level <= 2: + expect = almost_expect + '.opt-{}.pyc'.format(optimization_level) + else: + msg = '{!r} is a non-standard optimization level'.format(optimization_level) + self.skipTest(msg) + self.assertEqual(self.util.cache_from_source(path, optimization=None), + expect) + + def test_cache_from_source_optimization_set(self): + # The 'optimization' parameter accepts anything that has a string repr + # that passes str.alnum(). + path = 'foo.py' + valid_characters = string.ascii_letters + string.digits + almost_expect = os.path.join('__pycache__', 'foo.{}'.format(self.tag)) + got = self.util.cache_from_source(path, optimization=valid_characters) + # Test all valid characters are accepted. + self.assertEqual(got, + almost_expect + '.opt-{}.pyc'.format(valid_characters)) + # str() should be called on argument. + self.assertEqual(self.util.cache_from_source(path, optimization=42), + almost_expect + '.opt-42.pyc') + # Invalid characters raise ValueError. + with self.assertRaises(ValueError): + self.util.cache_from_source(path, optimization='path/is/bad') + + def test_cache_from_source_debug_override_optimization_both_set(self): + # Can only set one of the optimization-related parameters. + with warnings.catch_warnings(): + warnings.simplefilter('ignore') + with self.assertRaises(TypeError): + self.util.cache_from_source('foo.py', False, optimization='') + + @unittest.skipUnless(os.sep == '\\' and os.altsep == '/', + 'test meaningful only where os.altsep is defined') + def test_sep_altsep_and_sep_cache_from_source(self): + # Windows path and PEP 3147 where sep is right of altsep. + self.assertEqual( + self.util.cache_from_source('\\foo\\bar\\baz/qux.py', optimization=''), + '\\foo\\bar\\baz\\__pycache__\\qux.{}.pyc'.format(self.tag)) + + @unittest.skipIf(sys.implementation.cache_tag is None, + 'requires sys.implementation.cache_tag not be None') + def test_cache_from_source_path_like_arg(self): + path = pathlib.PurePath('foo', 'bar', 'baz', 'qux.py') + expect = os.path.join('foo', 'bar', 'baz', '__pycache__', + 'qux.{}.pyc'.format(self.tag)) + self.assertEqual(self.util.cache_from_source(path, optimization=''), + expect) + + @unittest.skipIf(sys.implementation.cache_tag is None, + 'requires sys.implementation.cache_tag to not be None') + def test_source_from_cache(self): + # Given the path to a PEP 3147 defined .pyc file, return the path to + # its source. This tests the good path. + path = os.path.join('foo', 'bar', 'baz', '__pycache__', + 'qux.{}.pyc'.format(self.tag)) + expect = os.path.join('foo', 'bar', 'baz', 'qux.py') + self.assertEqual(self.util.source_from_cache(path), expect) + + def test_source_from_cache_no_cache_tag(self): + # If sys.implementation.cache_tag is None, raise NotImplementedError. + path = os.path.join('blah', '__pycache__', 'whatever.pyc') + with support.swap_attr(sys.implementation, 'cache_tag', None): + with self.assertRaises(NotImplementedError): + self.util.source_from_cache(path) + + def test_source_from_cache_bad_path(self): + # When the path to a pyc file is not in PEP 3147 format, a ValueError + # is raised. + self.assertRaises( + ValueError, self.util.source_from_cache, '/foo/bar/bazqux.pyc') + + def test_source_from_cache_no_slash(self): + # No slashes at all in path -> ValueError + self.assertRaises( + ValueError, self.util.source_from_cache, 'foo.cpython-32.pyc') + + def test_source_from_cache_too_few_dots(self): + # Too few dots in final path component -> ValueError + self.assertRaises( + ValueError, self.util.source_from_cache, '__pycache__/foo.pyc') + + def test_source_from_cache_too_many_dots(self): + with self.assertRaises(ValueError): + self.util.source_from_cache( + '__pycache__/foo.cpython-32.opt-1.foo.pyc') + + def test_source_from_cache_not_opt(self): + # Non-`opt-` path component -> ValueError + self.assertRaises( + ValueError, self.util.source_from_cache, + '__pycache__/foo.cpython-32.foo.pyc') + + def test_source_from_cache_no__pycache__(self): + # Another problem with the path -> ValueError + self.assertRaises( + ValueError, self.util.source_from_cache, + '/foo/bar/foo.cpython-32.foo.pyc') + + def test_source_from_cache_optimized_bytecode(self): + # Optimized bytecode is not an issue. + path = os.path.join('__pycache__', 'foo.{}.opt-1.pyc'.format(self.tag)) + self.assertEqual(self.util.source_from_cache(path), 'foo.py') + + def test_source_from_cache_missing_optimization(self): + # An empty optimization level is a no-no. + path = os.path.join('__pycache__', 'foo.{}.opt-.pyc'.format(self.tag)) + with self.assertRaises(ValueError): + self.util.source_from_cache(path) + + @unittest.skipIf(sys.implementation.cache_tag is None, + 'requires sys.implementation.cache_tag to not be None') + def test_source_from_cache_path_like_arg(self): + path = pathlib.PurePath('foo', 'bar', 'baz', '__pycache__', + 'qux.{}.pyc'.format(self.tag)) + expect = os.path.join('foo', 'bar', 'baz', 'qux.py') + self.assertEqual(self.util.source_from_cache(path), expect) + + @unittest.skipIf(sys.implementation.cache_tag is None, + 'requires sys.implementation.cache_tag to not be None') + def test_cache_from_source_respects_pycache_prefix(self): + # If pycache_prefix is set, cache_from_source will return a bytecode + # path inside that directory (in a subdirectory mirroring the .py file's + # path) rather than in a __pycache__ dir next to the py file. + pycache_prefixes = [ + os.path.join(os.path.sep, 'tmp', 'bytecode'), + os.path.join(os.path.sep, 'tmp', '\u2603'), # non-ASCII in path! + os.path.join(os.path.sep, 'tmp', 'trailing-slash') + os.path.sep, + ] + drive = '' + if os.name == 'nt': + drive = 'C:' + pycache_prefixes = [ + f'{drive}{prefix}' for prefix in pycache_prefixes] + pycache_prefixes += [r'\\?\C:\foo', r'\\localhost\c$\bar'] + for pycache_prefix in pycache_prefixes: + with self.subTest(path=pycache_prefix): + path = drive + os.path.join( + os.path.sep, 'foo', 'bar', 'baz', 'qux.py') + expect = os.path.join( + pycache_prefix, 'foo', 'bar', 'baz', + 'qux.{}.pyc'.format(self.tag)) + with util.temporary_pycache_prefix(pycache_prefix): + self.assertEqual( + self.util.cache_from_source(path, optimization=''), + expect) + + @unittest.skipIf(sys.implementation.cache_tag is None, + 'requires sys.implementation.cache_tag to not be None') + def test_cache_from_source_respects_pycache_prefix_relative(self): + # If the .py path we are given is relative, we will resolve to an + # absolute path before prefixing with pycache_prefix, to avoid any + # possible ambiguity. + pycache_prefix = os.path.join(os.path.sep, 'tmp', 'bytecode') + path = os.path.join('foo', 'bar', 'baz', 'qux.py') + root = os.path.splitdrive(os.getcwd())[0] + os.path.sep + expect = os.path.join( + pycache_prefix, + os.path.relpath(os.getcwd(), root), + 'foo', 'bar', 'baz', f'qux.{self.tag}.pyc') + with util.temporary_pycache_prefix(pycache_prefix): + self.assertEqual( + self.util.cache_from_source(path, optimization=''), + os.path.normpath(expect)) + + @unittest.skipIf(sys.implementation.cache_tag is None, + 'requires sys.implementation.cache_tag to not be None') + def test_cache_from_source_in_root_with_pycache_prefix(self): + # Regression test for gh-82916 + pycache_prefix = os.path.join(os.path.sep, 'tmp', 'bytecode') + path = 'qux.py' + expect = os.path.join(os.path.sep, 'tmp', 'bytecode', + f'qux.{self.tag}.pyc') + with util.temporary_pycache_prefix(pycache_prefix): + with os_helper.change_cwd('/'): + self.assertEqual(self.util.cache_from_source(path), expect) + + @unittest.skipIf(sys.implementation.cache_tag is None, + 'requires sys.implementation.cache_tag to not be None') + def test_source_from_cache_inside_pycache_prefix(self): + # If pycache_prefix is set and the cache path we get is inside it, + # we return an absolute path to the py file based on the remainder of + # the path within pycache_prefix. + pycache_prefix = os.path.join(os.path.sep, 'tmp', 'bytecode') + path = os.path.join(pycache_prefix, 'foo', 'bar', 'baz', + f'qux.{self.tag}.pyc') + expect = os.path.join(os.path.sep, 'foo', 'bar', 'baz', 'qux.py') + with util.temporary_pycache_prefix(pycache_prefix): + self.assertEqual(self.util.source_from_cache(path), expect) + + @unittest.skipIf(sys.implementation.cache_tag is None, + 'requires sys.implementation.cache_tag to not be None') + def test_source_from_cache_outside_pycache_prefix(self): + # If pycache_prefix is set but the cache path we get is not inside + # it, just ignore it and handle the cache path according to the default + # behavior. + pycache_prefix = os.path.join(os.path.sep, 'tmp', 'bytecode') + path = os.path.join('foo', 'bar', 'baz', '__pycache__', + f'qux.{self.tag}.pyc') + expect = os.path.join('foo', 'bar', 'baz', 'qux.py') + with util.temporary_pycache_prefix(pycache_prefix): + self.assertEqual(self.util.source_from_cache(path), expect) + + +(Frozen_PEP3147Tests, + Source_PEP3147Tests + ) = util.test_both(PEP3147Tests, util=importlib_util) + + +class MagicNumberTests(unittest.TestCase): + """ + Test release compatibility issues relating to importlib + """ + @unittest.skipUnless( + sys.version_info.releaselevel in ('candidate', 'final'), + 'only applies to candidate or final python release levels' + ) + def test_magic_number(self): + # Each python minor release should generally have a MAGIC_NUMBER + # that does not change once the release reaches candidate status. + + # Once a release reaches candidate status, the value of the constant + # EXPECTED_MAGIC_NUMBER in this test should be changed. + # This test will then check that the actual MAGIC_NUMBER matches + # the expected value for the release. + + # In exceptional cases, it may be required to change the MAGIC_NUMBER + # for a maintenance release. In this case the change should be + # discussed in python-dev. If a change is required, community + # stakeholders such as OS package maintainers must be notified + # in advance. Such exceptional releases will then require an + # adjustment to this test case. + EXPECTED_MAGIC_NUMBER = 3627 + actual = int.from_bytes(importlib.util.MAGIC_NUMBER[:2], 'little') + + msg = ( + "To avoid breaking backwards compatibility with cached bytecode " + "files that can't be automatically regenerated by the current " + "user, candidate and final releases require the current " + "importlib.util.MAGIC_NUMBER to match the expected " + "magic number in this test. Set the expected " + "magic number in this test to the current MAGIC_NUMBER to " + "continue with the release.\n\n" + "Changing the MAGIC_NUMBER for a maintenance release " + "requires discussion in python-dev and notification of " + "community stakeholders." + ) + self.assertEqual(EXPECTED_MAGIC_NUMBER, actual, msg) + + +@unittest.skipIf(_interpreters is None, 'subinterpreters required') +class IncompatibleExtensionModuleRestrictionsTests(unittest.TestCase): + + def run_with_own_gil(self, script): + interpid = _interpreters.create('isolated') + def ensure_destroyed(): + try: + _interpreters.destroy(interpid) + except _interpreters.InterpreterNotFoundError: + pass + self.addCleanup(ensure_destroyed) + excsnap = _interpreters.exec(interpid, script) + if excsnap is not None: + if excsnap.type.__name__ == 'ImportError': + raise ImportError(excsnap.msg) + + def run_with_shared_gil(self, script): + interpid = _interpreters.create('legacy') + def ensure_destroyed(): + try: + _interpreters.destroy(interpid) + except _interpreters.InterpreterNotFoundError: + pass + self.addCleanup(ensure_destroyed) + excsnap = _interpreters.exec(interpid, script) + if excsnap is not None: + if excsnap.type.__name__ == 'ImportError': + raise ImportError(excsnap.msg) + + @unittest.skipIf(_testsinglephase is None, "test requires _testsinglephase module") + # gh-117649: single-phase init modules are not currently supported in + # subinterpreters in the free-threaded build + @support.expected_failure_if_gil_disabled() + def test_single_phase_init_module(self): + script = textwrap.dedent(''' + from importlib.util import _incompatible_extension_module_restrictions + with _incompatible_extension_module_restrictions(disable_check=True): + import _testsinglephase + ''') + with self.subTest('check disabled, shared GIL'): + self.run_with_shared_gil(script) + with self.subTest('check disabled, per-interpreter GIL'): + self.run_with_own_gil(script) + + script = textwrap.dedent(f''' + from importlib.util import _incompatible_extension_module_restrictions + with _incompatible_extension_module_restrictions(disable_check=False): + import _testsinglephase + ''') + with self.subTest('check enabled, shared GIL'): + with self.assertRaises(ImportError): + self.run_with_shared_gil(script) + with self.subTest('check enabled, per-interpreter GIL'): + with self.assertRaises(ImportError): + self.run_with_own_gil(script) + + @unittest.skipIf(_testmultiphase is None, "test requires _testmultiphase module") + @support.requires_gil_enabled("gh-117649: not supported in free-threaded build") + def test_incomplete_multi_phase_init_module(self): + # Apple extensions must be distributed as frameworks. This requires + # a specialist loader. + if support.is_apple_mobile: + loader = "AppleFrameworkLoader" + else: + loader = "ExtensionFileLoader" + + prescript = textwrap.dedent(f''' + from importlib.util import spec_from_loader, module_from_spec + from importlib.machinery import {loader} + + name = '_test_shared_gil_only' + filename = {_testmultiphase.__file__!r} + loader = {loader}(name, filename) + spec = spec_from_loader(name, loader) + + ''') + + script = prescript + textwrap.dedent(''' + from importlib.util import _incompatible_extension_module_restrictions + with _incompatible_extension_module_restrictions(disable_check=True): + module = module_from_spec(spec) + loader.exec_module(module) + ''') + with self.subTest('check disabled, shared GIL'): + self.run_with_shared_gil(script) + with self.subTest('check disabled, per-interpreter GIL'): + self.run_with_own_gil(script) + + script = prescript + textwrap.dedent(''' + from importlib.util import _incompatible_extension_module_restrictions + with _incompatible_extension_module_restrictions(disable_check=False): + module = module_from_spec(spec) + loader.exec_module(module) + ''') + with self.subTest('check enabled, shared GIL'): + self.run_with_shared_gil(script) + with self.subTest('check enabled, per-interpreter GIL'): + with self.assertRaises(ImportError): + self.run_with_own_gil(script) + + @unittest.skipIf(_testmultiphase is None, "test requires _testmultiphase module") + def test_complete_multi_phase_init_module(self): + script = textwrap.dedent(''' + from importlib.util import _incompatible_extension_module_restrictions + with _incompatible_extension_module_restrictions(disable_check=True): + import _testmultiphase + ''') + with self.subTest('check disabled, shared GIL'): + self.run_with_shared_gil(script) + with self.subTest('check disabled, per-interpreter GIL'): + self.run_with_own_gil(script) + + script = textwrap.dedent(f''' + from importlib.util import _incompatible_extension_module_restrictions + with _incompatible_extension_module_restrictions(disable_check=False): + import _testmultiphase + ''') + with self.subTest('check enabled, shared GIL'): + self.run_with_shared_gil(script) + with self.subTest('check enabled, per-interpreter GIL'): + self.run_with_own_gil(script) + + +class PatchAtomicWrites: + def __init__(self, truncate_at_length, never_complete=False): + self.truncate_at_length = truncate_at_length + self.never_complete = never_complete + self.seen_write = False + self._children = [] + + def __enter__(self): + import _pyio + + oldwrite = os.write + + # Emulate an os.write that only writes partial data. + def write(fd, data): + if self.seen_write and self.never_complete: + return None + self.seen_write = True + return oldwrite(fd, data[:self.truncate_at_length]) + + # Need to patch _io to be _pyio, so that io.FileIO is affected by the + # os.write patch. + self.children = [ + support.swap_attr(_bootstrap_external, '_io', _pyio), + support.swap_attr(os, 'write', write) + ] + for child in self.children: + child.__enter__() + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + for child in self.children: + child.__exit__(exc_type, exc_val, exc_tb) + + +class MiscTests(unittest.TestCase): + + def test_atomic_write_retries_incomplete_writes(self): + truncate_at_length = 100 + length = truncate_at_length * 2 + + with PatchAtomicWrites(truncate_at_length=truncate_at_length) as cm: + # Make sure we write something longer than the point where we + # truncate. + content = b'x' * length + _bootstrap_external._write_atomic(os_helper.TESTFN, content) + self.assertTrue(cm.seen_write) + + self.assertEqual(os.stat(support.os_helper.TESTFN).st_size, length) + os.unlink(support.os_helper.TESTFN) + + def test_atomic_write_errors_if_unable_to_complete(self): + truncate_at_length = 100 + + with ( + PatchAtomicWrites( + truncate_at_length=truncate_at_length, never_complete=True, + ) as cm, + self.assertRaises(OSError) + ): + # Make sure we write something longer than the point where we + # truncate. + content = b'x' * (truncate_at_length * 2) + _bootstrap_external._write_atomic(os_helper.TESTFN, content) + self.assertTrue(cm.seen_write) + + with self.assertRaises(OSError): + os.stat(support.os_helper.TESTFN) # Check that the file did not get written. + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/test_windows.py b/stdlib/test/test_importlib/test_windows.py new file mode 100644 index 000000000..bef4fb46f --- /dev/null +++ b/stdlib/test/test_importlib/test_windows.py @@ -0,0 +1,210 @@ +from test.test_importlib import util as test_util +machinery = test_util.import_importlib('importlib.machinery') + +import os +import re +import sys +import unittest +from test import support +from test.support import import_helper +from contextlib import contextmanager +from test.test_importlib.util import temp_module + +import_helper.import_module('winreg', required_on=['win']) +from winreg import ( + CreateKey, HKEY_CURRENT_USER, + SetValue, REG_SZ, KEY_ALL_ACCESS, + EnumKey, CloseKey, DeleteKey, OpenKey +) + +def get_platform(): + # Port of distutils.util.get_platform(). + TARGET_TO_PLAT = { + 'x86' : 'win32', + 'x64' : 'win-amd64', + 'arm' : 'win-arm32', + } + if ('VSCMD_ARG_TGT_ARCH' in os.environ and + os.environ['VSCMD_ARG_TGT_ARCH'] in TARGET_TO_PLAT): + return TARGET_TO_PLAT[os.environ['VSCMD_ARG_TGT_ARCH']] + elif 'amd64' in sys.version.lower(): + return 'win-amd64' + elif '(arm)' in sys.version.lower(): + return 'win-arm32' + elif '(arm64)' in sys.version.lower(): + return 'win-arm64' + else: + return sys.platform + +def delete_registry_tree(root, subkey): + try: + hkey = OpenKey(root, subkey, access=KEY_ALL_ACCESS) + except OSError: + # subkey does not exist + return + while True: + try: + subsubkey = EnumKey(hkey, 0) + except OSError: + # no more subkeys + break + delete_registry_tree(hkey, subsubkey) + CloseKey(hkey) + DeleteKey(root, subkey) + +@contextmanager +def setup_module(machinery, name, path=None): + if machinery.WindowsRegistryFinder.DEBUG_BUILD: + root = machinery.WindowsRegistryFinder.REGISTRY_KEY_DEBUG + else: + root = machinery.WindowsRegistryFinder.REGISTRY_KEY + key = root.format(fullname=name, + sys_version='%d.%d' % sys.version_info[:2]) + base_key = "Software\\Python\\PythonCore\\{}.{}".format( + sys.version_info.major, sys.version_info.minor) + assert key.casefold().startswith(base_key.casefold()), ( + "expected key '{}' to start with '{}'".format(key, base_key)) + try: + with temp_module(name, "a = 1") as location: + try: + OpenKey(HKEY_CURRENT_USER, base_key) + if machinery.WindowsRegistryFinder.DEBUG_BUILD: + delete_key = os.path.dirname(key) + else: + delete_key = key + except OSError: + delete_key = base_key + subkey = CreateKey(HKEY_CURRENT_USER, key) + if path is None: + path = location + ".py" + SetValue(subkey, "", REG_SZ, path) + yield + finally: + if delete_key: + delete_registry_tree(HKEY_CURRENT_USER, delete_key) + + +@unittest.skipUnless(sys.platform.startswith('win'), 'requires Windows') +class WindowsRegistryFinderTests: + # The module name is process-specific, allowing for + # simultaneous runs of the same test on a single machine. + test_module = "spamham{}".format(os.getpid()) + + def test_find_spec_missing(self): + with self.assertWarnsRegex( + DeprecationWarning, + r"importlib\.machinery\.WindowsRegistryFinder is deprecated; " + r"use site configuration instead\. Future versions of Python may " + r"not enable this finder by default\." + ): + spec = self.machinery.WindowsRegistryFinder.find_spec('spam') + self.assertIsNone(spec) + + def test_module_found(self): + with setup_module(self.machinery, self.test_module): + with self.assertWarnsRegex( + DeprecationWarning, + r"importlib\.machinery\.WindowsRegistryFinder is deprecated; " + r"use site configuration instead\. Future versions of Python may " + r"not enable this finder by default\." + ): + spec = self.machinery.WindowsRegistryFinder.find_spec(self.test_module) + self.assertIsNotNone(spec) + + def test_module_not_found(self): + with setup_module(self.machinery, self.test_module, path="."): + with self.assertWarnsRegex( + DeprecationWarning, + r"importlib\.machinery\.WindowsRegistryFinder is deprecated; " + r"use site configuration instead\. Future versions of Python may " + r"not enable this finder by default\." + ): + spec = self.machinery.WindowsRegistryFinder.find_spec(self.test_module) + self.assertIsNone(spec) + + def test_raises_deprecation_warning(self): + # WindowsRegistryFinder is not meant to be instantiated, so the + # deprecation warning is raised in the 'find_spec' method instead. + with self.assertWarnsRegex( + DeprecationWarning, + r"importlib\.machinery\.WindowsRegistryFinder is deprecated; " + r"use site configuration instead\. Future versions of Python may " + r"not enable this finder by default\." + ): + self.machinery.WindowsRegistryFinder.find_spec('spam') + +(Frozen_WindowsRegistryFinderTests, + Source_WindowsRegistryFinderTests + ) = test_util.test_both(WindowsRegistryFinderTests, machinery=machinery) + +@unittest.skipUnless(sys.platform.startswith('win'), 'requires Windows') +class WindowsExtensionSuffixTests: + def test_tagged_suffix(self): + suffixes = self.machinery.EXTENSION_SUFFIXES + abi_flags = "t" if support.Py_GIL_DISABLED else "" + ver = sys.version_info + platform = re.sub('[^a-zA-Z0-9]', '_', get_platform()) + expected_tag = f".cp{ver.major}{ver.minor}{abi_flags}-{platform}.pyd" + try: + untagged_i = suffixes.index(".pyd") + except ValueError: + untagged_i = suffixes.index("_d.pyd") + expected_tag = "_d" + expected_tag + + self.assertIn(expected_tag, suffixes) + + # Ensure the tags are in the correct order. + tagged_i = suffixes.index(expected_tag) + self.assertLess(tagged_i, untagged_i) + +(Frozen_WindowsExtensionSuffixTests, + Source_WindowsExtensionSuffixTests + ) = test_util.test_both(WindowsExtensionSuffixTests, machinery=machinery) + + +@unittest.skipUnless(sys.platform.startswith('win'), 'requires Windows') +class WindowsBootstrapPathTests(unittest.TestCase): + def check_join(self, expected, *inputs): + from importlib._bootstrap_external import _path_join + actual = _path_join(*inputs) + if expected.casefold() == actual.casefold(): + return + self.assertEqual(expected, actual) + + def test_path_join(self): + self.check_join(r"C:\A\B", "C:\\", "A", "B") + self.check_join(r"C:\A\B", "D:\\", "D", "C:\\", "A", "B") + self.check_join(r"C:\A\B", "C:\\", "A", "C:B") + self.check_join(r"C:\A\B", "C:\\", "A\\B") + self.check_join(r"C:\A\B", r"C:\A\B") + + self.check_join("D:A", r"D:", "A") + self.check_join("D:A", r"C:\B\C", "D:", "A") + self.check_join("D:A", r"C:\B\C", r"D:A") + + self.check_join(r"A\B\C", "A", "B", "C") + self.check_join(r"A\B\C", "A", r"B\C") + self.check_join(r"A\B/C", "A", "B/C") + self.check_join(r"A\B\C", "A/", "B\\", "C") + + # Dots are not normalised by this function + self.check_join(r"A\../C", "A", "../C") + self.check_join(r"A.\.\B", "A.", ".", "B") + + self.check_join(r"\\Server\Share\A\B\C", r"\\Server\Share", "A", "B", "C") + self.check_join(r"\\Server\Share\A\B\C", r"\\Server\Share", "D", r"\A", "B", "C") + self.check_join(r"\\Server\Share\A\B\C", r"\\Server2\Share2", "D", + r"\\Server\Share", "A", "B", "C") + self.check_join(r"\\Server\Share\A\B\C", r"\\Server", r"\Share", "A", "B", "C") + self.check_join(r"\\Server\Share", r"\\Server\Share") + self.check_join(r"\\Server\Share\\", r"\\Server\Share\\") + + # Handle edge cases with empty segments + self.check_join("C:\\A", "C:/A", "") + self.check_join("C:\\", "C:/", "") + self.check_join("C:", "C:", "") + self.check_join("//Server/Share\\", "//Server/Share/", "") + self.check_join("//Server/Share\\", "//Server/Share", "") + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/threaded_import_hangers.py b/stdlib/test/test_importlib/threaded_import_hangers.py new file mode 100644 index 000000000..5484e60a0 --- /dev/null +++ b/stdlib/test/test_importlib/threaded_import_hangers.py @@ -0,0 +1,45 @@ +# This is a helper module for test_threaded_import. The test imports this +# module, and this module tries to run various Python library functions in +# their own thread, as a side effect of being imported. If the spawned +# thread doesn't complete in TIMEOUT seconds, an "appeared to hang" message +# is appended to the module-global `errors` list. That list remains empty +# if (and only if) all functions tested complete. + +TIMEOUT = 10 + +import threading + +import tempfile +import os.path + +errors = [] + +# This class merely runs a function in its own thread T. The thread importing +# this module holds the import lock, so if the function called by T tries +# to do its own imports it will block waiting for this module's import +# to complete. +class Worker(threading.Thread): + def __init__(self, function, args): + threading.Thread.__init__(self) + self.function = function + self.args = args + + def run(self): + self.function(*self.args) + +for name, func, args in [ + # Bug 147376: TemporaryFile hung on Windows, starting in Python 2.4. + ("tempfile.TemporaryFile", lambda: tempfile.TemporaryFile().close(), ()), + + # The real cause for bug 147376: ntpath.abspath() caused the hang. + ("os.path.abspath", os.path.abspath, ('.',)), + ]: + + try: + t = Worker(func, args) + t.start() + t.join(TIMEOUT) + if t.is_alive(): + errors.append("%s appeared to hang" % name) + finally: + del t diff --git a/stdlib/test/test_importlib/util.py b/stdlib/test/test_importlib/util.py new file mode 100644 index 000000000..bd64b03b7 --- /dev/null +++ b/stdlib/test/test_importlib/util.py @@ -0,0 +1,402 @@ +import builtins +import contextlib +import errno +import functools +from importlib import machinery, util, invalidate_caches +import marshal +import os +import os.path +from test import support +from test.support import import_helper +from test.support import is_apple_mobile +from test.support import os_helper +import unittest +import sys +import tempfile +import types + +import_helper.import_module("_testmultiphase") + + +BUILTINS = types.SimpleNamespace() +BUILTINS.good_name = None +BUILTINS.bad_name = None +if 'errno' in sys.builtin_module_names: + BUILTINS.good_name = 'errno' +if 'importlib' not in sys.builtin_module_names: + BUILTINS.bad_name = 'importlib' + +if support.is_wasi: + # dlopen() is a shim for WASI as of WASI SDK which fails by default. + # We don't provide an implementation, so tests will fail. + # But we also don't want to turn off dynamic loading for those that provide + # a working implementation. + def _extension_details(): + global EXTENSIONS + EXTENSIONS = None +else: + EXTENSIONS = types.SimpleNamespace() + EXTENSIONS.path = None + EXTENSIONS.ext = None + EXTENSIONS.filename = None + EXTENSIONS.file_path = None + EXTENSIONS.name = '_testsinglephase' + + def _extension_details(): + global EXTENSIONS + for path in sys.path: + for ext in machinery.EXTENSION_SUFFIXES: + # Apple mobile platforms mechanically load .so files, + # but the findable files are labelled .fwork + if is_apple_mobile: + ext = ext.replace(".so", ".fwork") + + filename = EXTENSIONS.name + ext + file_path = os.path.join(path, filename) + if os.path.exists(file_path): + EXTENSIONS.path = path + EXTENSIONS.ext = ext + EXTENSIONS.filename = filename + EXTENSIONS.file_path = file_path + return + +_extension_details() + + +def import_importlib(module_name): + """Import a module from importlib both w/ and w/o _frozen_importlib.""" + fresh = ('importlib',) if '.' in module_name else () + frozen = import_helper.import_fresh_module(module_name) + source = import_helper.import_fresh_module(module_name, fresh=fresh, + blocked=('_frozen_importlib', '_frozen_importlib_external')) + return {'Frozen': frozen, 'Source': source} + + +def specialize_class(cls, kind, base=None, **kwargs): + # XXX Support passing in submodule names--load (and cache) them? + # That would clean up the test modules a bit more. + if base is None: + base = unittest.TestCase + elif not isinstance(base, type): + base = base[kind] + name = '{}_{}'.format(kind, cls.__name__) + bases = (cls, base) + specialized = types.new_class(name, bases) + specialized.__module__ = cls.__module__ + specialized._NAME = cls.__name__ + specialized._KIND = kind + for attr, values in kwargs.items(): + value = values[kind] + setattr(specialized, attr, value) + return specialized + + +def split_frozen(cls, base=None, **kwargs): + frozen = specialize_class(cls, 'Frozen', base, **kwargs) + source = specialize_class(cls, 'Source', base, **kwargs) + return frozen, source + + +def test_both(test_class, base=None, **kwargs): + return split_frozen(test_class, base, **kwargs) + + +CASE_INSENSITIVE_FS = True +# Windows is the only OS that is *always* case-insensitive +# (OS X *can* be case-sensitive). +if sys.platform not in ('win32', 'cygwin'): + changed_name = __file__.upper() + if changed_name == __file__: + changed_name = __file__.lower() + if not os.path.exists(changed_name): + CASE_INSENSITIVE_FS = False + +source_importlib = import_importlib('importlib')['Source'] +__import__ = {'Frozen': staticmethod(builtins.__import__), + 'Source': staticmethod(source_importlib.__import__)} + + +def case_insensitive_tests(test): + """Class decorator that nullifies tests requiring a case-insensitive + file system.""" + return unittest.skipIf(not CASE_INSENSITIVE_FS, + "requires a case-insensitive filesystem")(test) + + +def submodule(parent, name, pkg_dir, content=''): + path = os.path.join(pkg_dir, name + '.py') + with open(path, 'w', encoding='utf-8') as subfile: + subfile.write(content) + return '{}.{}'.format(parent, name), path + + +def get_code_from_pyc(pyc_path): + """Reads a pyc file and returns the unmarshalled code object within. + + No header validation is performed. + """ + with open(pyc_path, 'rb') as pyc_f: + pyc_f.seek(16) + return marshal.load(pyc_f) + + +@contextlib.contextmanager +def uncache(*names): + """Uncache a module from sys.modules. + + A basic sanity check is performed to prevent uncaching modules that either + cannot/shouldn't be uncached. + + """ + for name in names: + if name in ('sys', 'marshal'): + raise ValueError("cannot uncache {}".format(name)) + try: + del sys.modules[name] + except KeyError: + pass + try: + yield + finally: + for name in names: + try: + del sys.modules[name] + except KeyError: + pass + + +@contextlib.contextmanager +def temp_module(name, content='', *, pkg=False): + conflicts = [n for n in sys.modules if n.partition('.')[0] == name] + with os_helper.temp_cwd(None) as cwd: + with uncache(name, *conflicts): + with import_helper.DirsOnSysPath(cwd): + invalidate_caches() + + location = os.path.join(cwd, name) + if pkg: + modpath = os.path.join(location, '__init__.py') + os.mkdir(name) + else: + modpath = location + '.py' + if content is None: + # Make sure the module file gets created. + content = '' + if content is not None: + # not a namespace package + with open(modpath, 'w', encoding='utf-8') as modfile: + modfile.write(content) + yield location + + +@contextlib.contextmanager +def import_state(**kwargs): + """Context manager to manage the various importers and stored state in the + sys module. + + The 'modules' attribute is not supported as the interpreter state stores a + pointer to the dict that the interpreter uses internally; + reassigning to sys.modules does not have the desired effect. + + """ + originals = {} + try: + for attr, default in (('meta_path', []), ('path', []), + ('path_hooks', []), + ('path_importer_cache', {})): + originals[attr] = getattr(sys, attr) + if attr in kwargs: + new_value = kwargs[attr] + del kwargs[attr] + else: + new_value = default + setattr(sys, attr, new_value) + if len(kwargs): + raise ValueError('unrecognized arguments: {}'.format(kwargs)) + yield + finally: + for attr, value in originals.items(): + setattr(sys, attr, value) + + +class _ImporterMock: + + """Base class to help with creating importer mocks.""" + + def __init__(self, *names, module_code={}): + self.modules = {} + self.module_code = {} + for name in names: + if not name.endswith('.__init__'): + import_name = name + else: + import_name = name[:-len('.__init__')] + if '.' not in name: + package = None + elif import_name == name: + package = name.rsplit('.', 1)[0] + else: + package = import_name + module = types.ModuleType(import_name) + module.__loader__ = self + module.__file__ = '' + module.__package__ = package + module.attr = name + if import_name != name: + module.__path__ = [''] + self.modules[import_name] = module + if import_name in module_code: + self.module_code[import_name] = module_code[import_name] + + def __getitem__(self, name): + return self.modules[name] + + def __enter__(self): + self._uncache = uncache(*self.modules.keys()) + self._uncache.__enter__() + return self + + def __exit__(self, *exc_info): + self._uncache.__exit__(None, None, None) + + +class mock_spec(_ImporterMock): + + """Importer mock using PEP 451 APIs.""" + + def find_spec(self, fullname, path=None, parent=None): + try: + module = self.modules[fullname] + except KeyError: + return None + spec = util.spec_from_file_location( + fullname, module.__file__, loader=self, + submodule_search_locations=getattr(module, '__path__', None)) + return spec + + def create_module(self, spec): + if spec.name not in self.modules: + raise ImportError + return self.modules[spec.name] + + def exec_module(self, module): + try: + self.module_code[module.__spec__.name]() + except KeyError: + pass + + +def writes_bytecode_files(fxn): + """Decorator to protect sys.dont_write_bytecode from mutation and to skip + tests that require it to be set to False.""" + if sys.dont_write_bytecode: + return unittest.skip("relies on writing bytecode")(fxn) + @functools.wraps(fxn) + def wrapper(*args, **kwargs): + original = sys.dont_write_bytecode + sys.dont_write_bytecode = False + try: + to_return = fxn(*args, **kwargs) + finally: + sys.dont_write_bytecode = original + return to_return + return wrapper + + +def ensure_bytecode_path(bytecode_path): + """Ensure that the __pycache__ directory for PEP 3147 pyc file exists. + + :param bytecode_path: File system path to PEP 3147 pyc file. + """ + try: + os.mkdir(os.path.dirname(bytecode_path)) + except OSError as error: + if error.errno != errno.EEXIST: + raise + + +@contextlib.contextmanager +def temporary_pycache_prefix(prefix): + """Adjust and restore sys.pycache_prefix.""" + _orig_prefix = sys.pycache_prefix + sys.pycache_prefix = prefix + try: + yield + finally: + sys.pycache_prefix = _orig_prefix + + +@contextlib.contextmanager +def create_modules(*names): + """Temporarily create each named module with an attribute (named 'attr') + that contains the name passed into the context manager that caused the + creation of the module. + + All files are created in a temporary directory returned by + tempfile.mkdtemp(). This directory is inserted at the beginning of + sys.path. When the context manager exits all created files (source and + bytecode) are explicitly deleted. + + No magic is performed when creating packages! This means that if you create + a module within a package you must also create the package's __init__ as + well. + + """ + source = 'attr = {0!r}' + created_paths = [] + mapping = {} + state_manager = None + uncache_manager = None + try: + temp_dir = tempfile.mkdtemp() + mapping['.root'] = temp_dir + import_names = set() + for name in names: + if not name.endswith('__init__'): + import_name = name + else: + import_name = name[:-len('.__init__')] + import_names.add(import_name) + if import_name in sys.modules: + del sys.modules[import_name] + name_parts = name.split('.') + file_path = temp_dir + for directory in name_parts[:-1]: + file_path = os.path.join(file_path, directory) + if not os.path.exists(file_path): + os.mkdir(file_path) + created_paths.append(file_path) + file_path = os.path.join(file_path, name_parts[-1] + '.py') + with open(file_path, 'w', encoding='utf-8') as file: + file.write(source.format(name)) + created_paths.append(file_path) + mapping[name] = file_path + uncache_manager = uncache(*import_names) + uncache_manager.__enter__() + state_manager = import_state(path=[temp_dir]) + state_manager.__enter__() + yield mapping + finally: + if state_manager is not None: + state_manager.__exit__(None, None, None) + if uncache_manager is not None: + uncache_manager.__exit__(None, None, None) + os_helper.rmtree(temp_dir) + + +def mock_path_hook(*entries, importer): + """A mock sys.path_hooks entry.""" + def hook(entry): + if entry not in entries: + raise ImportError + return importer + return hook + + +class CASEOKTestBase: + + def caseok_env_changed(self, *, should_exist): + possibilities = b'PYTHONCASEOK', 'PYTHONCASEOK' + if any(x in self.importlib._bootstrap_external._os.environ + for x in possibilities) != should_exist: + self.skipTest('os.environ changes not reflected in _os.environ') diff --git a/stdlib/test/test_module/__init__.py b/stdlib/test/test_module/__init__.py new file mode 100644 index 000000000..22132b01c --- /dev/null +++ b/stdlib/test/test_module/__init__.py @@ -0,0 +1,402 @@ +# Test the module type +import importlib.machinery +import unittest +import weakref +from test.support import gc_collect +from test.support import import_helper +from test.support.script_helper import assert_python_ok + +import sys +ModuleType = type(sys) + + +class FullLoader: + pass + + +class BareLoader: + pass + + +class ModuleTests(unittest.TestCase): + def test_uninitialized(self): + # An uninitialized module has no __dict__ or __name__, + # and __doc__ is None + foo = ModuleType.__new__(ModuleType) + self.assertTrue(isinstance(foo.__dict__, dict)) + self.assertEqual(dir(foo), []) + try: + s = foo.__name__ + self.fail("__name__ = %s" % repr(s)) + except AttributeError: + pass + self.assertEqual(foo.__doc__, ModuleType.__doc__ or '') + + def test_uninitialized_missing_getattr(self): + # Issue 8297 + # test the text in the AttributeError of an uninitialized module + foo = ModuleType.__new__(ModuleType) + self.assertRaisesRegex( + AttributeError, "module has no attribute 'not_here'", + getattr, foo, "not_here") + + def test_missing_getattr(self): + # Issue 8297 + # test the text in the AttributeError + foo = ModuleType("foo") + self.assertRaisesRegex( + AttributeError, "module 'foo' has no attribute 'not_here'", + getattr, foo, "not_here") + + def test_no_docstring(self): + # Regularly initialized module, no docstring + foo = ModuleType("foo") + self.assertEqual(foo.__name__, "foo") + self.assertEqual(foo.__doc__, None) + self.assertIs(foo.__loader__, None) + self.assertIs(foo.__package__, None) + self.assertIs(foo.__spec__, None) + self.assertEqual(foo.__dict__, {"__name__": "foo", "__doc__": None, + "__loader__": None, "__package__": None, + "__spec__": None}) + + def test_ascii_docstring(self): + # ASCII docstring + foo = ModuleType("foo", "foodoc") + self.assertEqual(foo.__name__, "foo") + self.assertEqual(foo.__doc__, "foodoc") + self.assertEqual(foo.__dict__, + {"__name__": "foo", "__doc__": "foodoc", + "__loader__": None, "__package__": None, + "__spec__": None}) + + def test_unicode_docstring(self): + # Unicode docstring + foo = ModuleType("foo", "foodoc\u1234") + self.assertEqual(foo.__name__, "foo") + self.assertEqual(foo.__doc__, "foodoc\u1234") + self.assertEqual(foo.__dict__, + {"__name__": "foo", "__doc__": "foodoc\u1234", + "__loader__": None, "__package__": None, + "__spec__": None}) + + def test_reinit(self): + # Reinitialization should not replace the __dict__ + foo = ModuleType("foo", "foodoc\u1234") + foo.bar = 42 + d = foo.__dict__ + foo.__init__("foo", "foodoc") + self.assertEqual(foo.__name__, "foo") + self.assertEqual(foo.__doc__, "foodoc") + self.assertEqual(foo.bar, 42) + self.assertEqual(foo.__dict__, + {"__name__": "foo", "__doc__": "foodoc", "bar": 42, + "__loader__": None, "__package__": None, "__spec__": None}) + self.assertTrue(foo.__dict__ is d) + + def test_dont_clear_dict(self): + # See issue 7140. + def f(): + foo = ModuleType("foo") + foo.bar = 4 + return foo + gc_collect() + self.assertEqual(f().__dict__["bar"], 4) + + def test_clear_dict_in_ref_cycle(self): + destroyed = [] + m = ModuleType("foo") + m.destroyed = destroyed + s = """class A: + def __init__(self, l): + self.l = l + def __del__(self): + self.l.append(1) +a = A(destroyed)""" + exec(s, m.__dict__) + del m + gc_collect() + self.assertEqual(destroyed, [1]) + + def test_weakref(self): + m = ModuleType("foo") + wr = weakref.ref(m) + self.assertIs(wr(), m) + del m + gc_collect() + self.assertIs(wr(), None) + + def test_module_getattr(self): + import test.test_module.good_getattr as gga + from test.test_module.good_getattr import test + self.assertEqual(test, "There is test") + self.assertEqual(gga.x, 1) + self.assertEqual(gga.y, 2) + with self.assertRaisesRegex(AttributeError, + "Deprecated, use whatever instead"): + gga.yolo + self.assertEqual(gga.whatever, "There is whatever") + del sys.modules['test.test_module.good_getattr'] + + def test_module_getattr_errors(self): + import test.test_module.bad_getattr as bga + from test.test_module import bad_getattr2 + self.assertEqual(bga.x, 1) + self.assertEqual(bad_getattr2.x, 1) + with self.assertRaises(TypeError): + bga.nope + with self.assertRaises(TypeError): + bad_getattr2.nope + del sys.modules['test.test_module.bad_getattr'] + if 'test.test_module.bad_getattr2' in sys.modules: + del sys.modules['test.test_module.bad_getattr2'] + + def test_module_dir(self): + import test.test_module.good_getattr as gga + self.assertEqual(dir(gga), ['a', 'b', 'c']) + del sys.modules['test.test_module.good_getattr'] + + def test_module_dir_errors(self): + import test.test_module.bad_getattr as bga + from test.test_module import bad_getattr2 + with self.assertRaises(TypeError): + dir(bga) + with self.assertRaises(TypeError): + dir(bad_getattr2) + del sys.modules['test.test_module.bad_getattr'] + if 'test.test_module.bad_getattr2' in sys.modules: + del sys.modules['test.test_module.bad_getattr2'] + + def test_module_getattr_tricky(self): + from test.test_module import bad_getattr3 + # these lookups should not crash + with self.assertRaises(AttributeError): + bad_getattr3.one + with self.assertRaises(AttributeError): + bad_getattr3.delgetattr + if 'test.test_module.bad_getattr3' in sys.modules: + del sys.modules['test.test_module.bad_getattr3'] + + def test_module_repr_minimal(self): + # reprs when modules have no __file__, __name__, or __loader__ + m = ModuleType('foo') + del m.__name__ + self.assertEqual(repr(m), "") + + def test_module_repr_with_name(self): + m = ModuleType('foo') + self.assertEqual(repr(m), "") + + def test_module_repr_with_name_and_filename(self): + m = ModuleType('foo') + m.__file__ = '/tmp/foo.py' + self.assertEqual(repr(m), "") + + def test_module_repr_with_filename_only(self): + m = ModuleType('foo') + del m.__name__ + m.__file__ = '/tmp/foo.py' + self.assertEqual(repr(m), "") + + def test_module_repr_with_loader_as_None(self): + m = ModuleType('foo') + assert m.__loader__ is None + self.assertEqual(repr(m), "") + + def test_module_repr_with_bare_loader_but_no_name(self): + m = ModuleType('foo') + del m.__name__ + # Yes, a class not an instance. + m.__loader__ = BareLoader + loader_repr = repr(BareLoader) + self.assertEqual( + repr(m), "".format(loader_repr)) + + def test_module_repr_with_full_loader_but_no_name(self): + # m.__loader__.module_repr() will fail because the module has no + # m.__name__. This exception will get suppressed and instead the + # loader's repr will be used. + m = ModuleType('foo') + del m.__name__ + # Yes, a class not an instance. + m.__loader__ = FullLoader + loader_repr = repr(FullLoader) + self.assertEqual( + repr(m), "".format(loader_repr)) + + def test_module_repr_with_bare_loader(self): + m = ModuleType('foo') + # Yes, a class not an instance. + m.__loader__ = BareLoader + module_repr = repr(BareLoader) + self.assertEqual( + repr(m), "".format(module_repr)) + + def test_module_repr_with_full_loader(self): + m = ModuleType('foo') + # Yes, a class not an instance. + m.__loader__ = FullLoader + self.assertEqual( + repr(m), f")>") + + def test_module_repr_with_bare_loader_and_filename(self): + m = ModuleType('foo') + # Yes, a class not an instance. + m.__loader__ = BareLoader + m.__file__ = '/tmp/foo.py' + self.assertEqual(repr(m), "") + + def test_module_repr_with_full_loader_and_filename(self): + m = ModuleType('foo') + # Yes, a class not an instance. + m.__loader__ = FullLoader + m.__file__ = '/tmp/foo.py' + self.assertEqual(repr(m), "") + + def test_module_repr_builtin(self): + self.assertEqual(repr(sys), "") + + def test_module_repr_source(self): + r = repr(unittest) + starts_with = "") + + def test_module_repr_with_namespace_package_and_custom_loader(self): + m = ModuleType('foo') + loader = BareLoader() + spec = importlib.machinery.ModuleSpec('foo', loader) + m.__loader__ = loader + m.__spec__ = spec + expected_repr_pattern = r"\)>" + self.assertRegex(repr(m), expected_repr_pattern) + self.assertNotIn('from', repr(m)) + + def test_module_repr_with_fake_namespace_package(self): + m = ModuleType('foo') + loader = BareLoader() + loader._path = ['spam'] + spec = importlib.machinery.ModuleSpec('foo', loader) + m.__loader__ = loader + m.__spec__ = spec + expected_repr_pattern = r"\)>" + self.assertRegex(repr(m), expected_repr_pattern) + self.assertNotIn('from', repr(m)) + + def test_module_finalization_at_shutdown(self): + # Module globals and builtins should still be available during shutdown + rc, out, err = assert_python_ok("-c", "from test.test_module import final_a") + self.assertFalse(err) + lines = out.splitlines() + self.assertEqual(set(lines), { + b"x = a", + b"x = b", + b"final_a.x = a", + b"final_b.x = b", + b"len = len", + b"shutil.rmtree = rmtree"}) + + def test_descriptor_errors_propagate(self): + class Descr: + def __get__(self, o, t): + raise RuntimeError + class M(ModuleType): + melon = Descr() + self.assertRaises(RuntimeError, getattr, M("mymod"), "melon") + + def test_lazy_create_annotations(self): + # module objects lazy create their __annotations__ dict on demand. + # the annotations dict is stored in module.__dict__. + # a freshly created module shouldn't have an annotations dict yet. + foo = ModuleType("foo") + for i in range(4): + self.assertFalse("__annotations__" in foo.__dict__) + d = foo.__annotations__ + self.assertTrue("__annotations__" in foo.__dict__) + self.assertEqual(foo.__annotations__, d) + self.assertEqual(foo.__dict__['__annotations__'], d) + if i % 2: + del foo.__annotations__ + else: + del foo.__dict__['__annotations__'] + + def test_setting_annotations(self): + foo = ModuleType("foo") + for i in range(4): + self.assertFalse("__annotations__" in foo.__dict__) + d = {'a': int} + foo.__annotations__ = d + self.assertTrue("__annotations__" in foo.__dict__) + self.assertEqual(foo.__annotations__, d) + self.assertEqual(foo.__dict__['__annotations__'], d) + if i % 2: + del foo.__annotations__ + else: + del foo.__dict__['__annotations__'] + + def test_annotations_getset_raises(self): + # double delete + foo = ModuleType("foo") + foo.__annotations__ = {} + del foo.__annotations__ + with self.assertRaises(AttributeError): + del foo.__annotations__ + + def test_annotations_are_created_correctly(self): + ann_module4 = import_helper.import_fresh_module( + 'test.typinganndata.ann_module4', + ) + self.assertFalse("__annotations__" in ann_module4.__dict__) + self.assertEqual(ann_module4.__annotations__, {"a": int, "b": str}) + self.assertTrue("__annotations__" in ann_module4.__dict__) + del ann_module4.__annotations__ + self.assertFalse("__annotations__" in ann_module4.__dict__) + + + def test_repeated_attribute_pops(self): + # Repeated accesses to module attribute will be specialized + # Check that popping the attribute doesn't break it + m = ModuleType("test") + d = m.__dict__ + count = 0 + for _ in range(100): + m.attr = 1 + count += m.attr # Might be specialized + d.pop("attr") + self.assertEqual(count, 100) + + # frozen and namespace module reprs are tested in importlib. + + def test_subclass_with_slots(self): + # In 3.11alpha this crashed, as the slots weren't NULLed. + + class ModuleWithSlots(ModuleType): + __slots__ = ("a", "b") + + def __init__(self, name): + super().__init__(name) + + m = ModuleWithSlots("name") + with self.assertRaises(AttributeError): + m.a + with self.assertRaises(AttributeError): + m.b + m.a, m.b = 1, 2 + self.assertEqual(m.a, 1) + self.assertEqual(m.b, 2) + + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_module/bad_getattr.py b/stdlib/test/test_module/bad_getattr.py new file mode 100644 index 000000000..16f901b13 --- /dev/null +++ b/stdlib/test/test_module/bad_getattr.py @@ -0,0 +1,4 @@ +x = 1 + +__getattr__ = "Surprise!" +__dir__ = "Surprise again!" diff --git a/stdlib/test/test_module/bad_getattr2.py b/stdlib/test/test_module/bad_getattr2.py new file mode 100644 index 000000000..0a52a53b5 --- /dev/null +++ b/stdlib/test/test_module/bad_getattr2.py @@ -0,0 +1,7 @@ +def __getattr__(): + "Bad one" + +x = 1 + +def __dir__(bad_sig): + return [] diff --git a/stdlib/test/test_module/bad_getattr3.py b/stdlib/test/test_module/bad_getattr3.py new file mode 100644 index 000000000..0d5f9266c --- /dev/null +++ b/stdlib/test/test_module/bad_getattr3.py @@ -0,0 +1,5 @@ +def __getattr__(name): + if name != 'delgetattr': + raise AttributeError + del globals()['__getattr__'] + raise AttributeError diff --git a/stdlib/test/test_module/final_a.py b/stdlib/test/test_module/final_a.py new file mode 100644 index 000000000..a983f3111 --- /dev/null +++ b/stdlib/test/test_module/final_a.py @@ -0,0 +1,19 @@ +""" +Fodder for module finalization tests in test_module. +""" + +import shutil +import test.test_module.final_b + +x = 'a' + +class C: + def __del__(self): + # Inspect module globals and builtins + print("x =", x) + print("final_b.x =", test.test_module.final_b.x) + print("shutil.rmtree =", getattr(shutil.rmtree, '__name__', None)) + print("len =", getattr(len, '__name__', None)) + +c = C() +_underscored = C() diff --git a/stdlib/test/test_module/final_b.py b/stdlib/test/test_module/final_b.py new file mode 100644 index 000000000..f3e8d5594 --- /dev/null +++ b/stdlib/test/test_module/final_b.py @@ -0,0 +1,19 @@ +""" +Fodder for module finalization tests in test_module. +""" + +import shutil +import test.test_module.final_a + +x = 'b' + +class C: + def __del__(self): + # Inspect module globals and builtins + print("x =", x) + print("final_a.x =", test.test_module.final_a.x) + print("shutil.rmtree =", getattr(shutil.rmtree, '__name__', None)) + print("len =", getattr(len, '__name__', None)) + +c = C() +_underscored = C() diff --git a/stdlib/test/test_module/good_getattr.py b/stdlib/test/test_module/good_getattr.py new file mode 100644 index 000000000..7d27de626 --- /dev/null +++ b/stdlib/test/test_module/good_getattr.py @@ -0,0 +1,11 @@ +x = 1 + +def __dir__(): + return ['a', 'b', 'c'] + +def __getattr__(name): + if name == "yolo": + raise AttributeError("Deprecated, use whatever instead") + return f"There is {name}" + +y = 2 diff --git a/stdlib/test/test_py_compile.py b/stdlib/test/test_py_compile.py new file mode 100644 index 000000000..749a877d0 --- /dev/null +++ b/stdlib/test/test_py_compile.py @@ -0,0 +1,310 @@ +import functools +import importlib.util +import os +import py_compile +import shutil +import stat +import subprocess +import sys +import tempfile +import unittest + +from test import support +from test.support import os_helper, script_helper + + +def without_source_date_epoch(fxn): + """Runs function with SOURCE_DATE_EPOCH unset.""" + @functools.wraps(fxn) + def wrapper(*args, **kwargs): + with os_helper.EnvironmentVarGuard() as env: + env.unset('SOURCE_DATE_EPOCH') + return fxn(*args, **kwargs) + return wrapper + + +def with_source_date_epoch(fxn): + """Runs function with SOURCE_DATE_EPOCH set.""" + @functools.wraps(fxn) + def wrapper(*args, **kwargs): + with os_helper.EnvironmentVarGuard() as env: + env['SOURCE_DATE_EPOCH'] = '123456789' + return fxn(*args, **kwargs) + return wrapper + + +# Run tests with SOURCE_DATE_EPOCH set or unset explicitly. +class SourceDateEpochTestMeta(type(unittest.TestCase)): + def __new__(mcls, name, bases, dct, *, source_date_epoch): + cls = super().__new__(mcls, name, bases, dct) + + for attr in dir(cls): + if attr.startswith('test_'): + meth = getattr(cls, attr) + if source_date_epoch: + wrapper = with_source_date_epoch(meth) + else: + wrapper = without_source_date_epoch(meth) + setattr(cls, attr, wrapper) + + return cls + + +class PyCompileTestsBase: + + def setUp(self): + self.directory = tempfile.mkdtemp(dir=os.getcwd()) + self.source_path = os.path.join(self.directory, '_test.py') + self.pyc_path = self.source_path + 'c' + self.cache_path = importlib.util.cache_from_source(self.source_path) + self.cwd_drive = os.path.splitdrive(os.getcwd())[0] + # In these tests we compute relative paths. When using Windows, the + # current working directory path and the 'self.source_path' might be + # on different drives. Therefore we need to switch to the drive where + # the temporary source file lives. + drive = os.path.splitdrive(self.source_path)[0] + if drive: + os.chdir(drive) + with open(self.source_path, 'w') as file: + file.write('x = 123\n') + + def tearDown(self): + shutil.rmtree(self.directory) + if self.cwd_drive: + os.chdir(self.cwd_drive) + + def test_absolute_path(self): + py_compile.compile(self.source_path, self.pyc_path) + self.assertTrue(os.path.exists(self.pyc_path)) + self.assertFalse(os.path.exists(self.cache_path)) + + def test_do_not_overwrite_symlinks(self): + # In the face of a cfile argument being a symlink, bail out. + # Issue #17222 + try: + os.symlink(self.pyc_path + '.actual', self.pyc_path) + except (NotImplementedError, OSError): + self.skipTest('need to be able to create a symlink for a file') + else: + assert os.path.islink(self.pyc_path) + with self.assertRaises(FileExistsError): + py_compile.compile(self.source_path, self.pyc_path) + + @unittest.skipIf(not os.path.exists(os.devnull) or os.path.isfile(os.devnull), + 'requires os.devnull and for it to be a non-regular file') + def test_do_not_overwrite_nonregular_files(self): + # In the face of a cfile argument being a non-regular file, bail out. + # Issue #17222 + with self.assertRaises(FileExistsError): + py_compile.compile(self.source_path, os.devnull) + + def test_cache_path(self): + py_compile.compile(self.source_path) + self.assertTrue(os.path.exists(self.cache_path)) + + def test_cwd(self): + with os_helper.change_cwd(self.directory): + py_compile.compile(os.path.basename(self.source_path), + os.path.basename(self.pyc_path)) + self.assertTrue(os.path.exists(self.pyc_path)) + self.assertFalse(os.path.exists(self.cache_path)) + + def test_relative_path(self): + py_compile.compile(os.path.relpath(self.source_path), + os.path.relpath(self.pyc_path)) + self.assertTrue(os.path.exists(self.pyc_path)) + self.assertFalse(os.path.exists(self.cache_path)) + + @os_helper.skip_if_dac_override + @unittest.skipIf(os.name == 'nt', + 'cannot control directory permissions on Windows') + @os_helper.skip_unless_working_chmod + def test_exceptions_propagate(self): + # Make sure that exceptions raised thanks to issues with writing + # bytecode. + # http://bugs.python.org/issue17244 + mode = os.stat(self.directory) + os.chmod(self.directory, stat.S_IREAD) + try: + with self.assertRaises(IOError): + py_compile.compile(self.source_path, self.pyc_path) + finally: + os.chmod(self.directory, mode.st_mode) + + def test_bad_coding(self): + bad_coding = os.path.join(os.path.dirname(__file__), + 'tokenizedata', + 'bad_coding2.py') + with support.captured_stderr(): + self.assertIsNone(py_compile.compile(bad_coding, doraise=False)) + self.assertFalse(os.path.exists( + importlib.util.cache_from_source(bad_coding))) + + def test_source_date_epoch(self): + py_compile.compile(self.source_path, self.pyc_path) + self.assertTrue(os.path.exists(self.pyc_path)) + self.assertFalse(os.path.exists(self.cache_path)) + with open(self.pyc_path, 'rb') as fp: + flags = importlib._bootstrap_external._classify_pyc( + fp.read(), 'test', {}) + if os.environ.get('SOURCE_DATE_EPOCH'): + expected_flags = 0b11 + else: + expected_flags = 0b00 + + self.assertEqual(flags, expected_flags) + + @unittest.skipIf(sys.flags.optimize > 0, 'test does not work with -O') + def test_double_dot_no_clobber(self): + # http://bugs.python.org/issue22966 + # py_compile foo.bar.py -> __pycache__/foo.cpython-34.pyc + weird_path = os.path.join(self.directory, 'foo.bar.py') + cache_path = importlib.util.cache_from_source(weird_path) + pyc_path = weird_path + 'c' + head, tail = os.path.split(cache_path) + penultimate_tail = os.path.basename(head) + self.assertEqual( + os.path.join(penultimate_tail, tail), + os.path.join( + '__pycache__', + 'foo.bar.{}.pyc'.format(sys.implementation.cache_tag))) + with open(weird_path, 'w') as file: + file.write('x = 123\n') + py_compile.compile(weird_path) + self.assertTrue(os.path.exists(cache_path)) + self.assertFalse(os.path.exists(pyc_path)) + + def test_optimization_path(self): + # Specifying optimized bytecode should lead to a path reflecting that. + self.assertIn('opt-2', py_compile.compile(self.source_path, optimize=2)) + + def test_invalidation_mode(self): + py_compile.compile( + self.source_path, + invalidation_mode=py_compile.PycInvalidationMode.CHECKED_HASH, + ) + with open(self.cache_path, 'rb') as fp: + flags = importlib._bootstrap_external._classify_pyc( + fp.read(), 'test', {}) + self.assertEqual(flags, 0b11) + py_compile.compile( + self.source_path, + invalidation_mode=py_compile.PycInvalidationMode.UNCHECKED_HASH, + ) + with open(self.cache_path, 'rb') as fp: + flags = importlib._bootstrap_external._classify_pyc( + fp.read(), 'test', {}) + self.assertEqual(flags, 0b1) + + def test_quiet(self): + bad_coding = os.path.join(os.path.dirname(__file__), + 'tokenizedata', + 'bad_coding2.py') + with support.captured_stderr() as stderr: + self.assertIsNone(py_compile.compile(bad_coding, doraise=False, quiet=2)) + self.assertIsNone(py_compile.compile(bad_coding, doraise=True, quiet=2)) + self.assertEqual(stderr.getvalue(), '') + with self.assertRaises(py_compile.PyCompileError): + py_compile.compile(bad_coding, doraise=True, quiet=1) + + def test_utf7_decoded_cr_compiles(self): + with open(self.source_path, 'wb') as file: + file.write(b"#coding=U7+AA0''\n") + + pyc_path = py_compile.compile(self.source_path, self.pyc_path, doraise=True) + self.assertEqual(pyc_path, self.pyc_path) + self.assertTrue(os.path.exists(self.pyc_path)) + + +class PyCompileTestsWithSourceEpoch(PyCompileTestsBase, + unittest.TestCase, + metaclass=SourceDateEpochTestMeta, + source_date_epoch=True): + pass + + +class PyCompileTestsWithoutSourceEpoch(PyCompileTestsBase, + unittest.TestCase, + metaclass=SourceDateEpochTestMeta, + source_date_epoch=False): + pass + + +class PyCompileCLITestCase(unittest.TestCase): + + def setUp(self): + self.directory = tempfile.mkdtemp() + self.source_path = os.path.join(self.directory, '_test.py') + self.cache_path = importlib.util.cache_from_source(self.source_path, + optimization='' if __debug__ else 1) + with open(self.source_path, 'w') as file: + file.write('x = 123\n') + + def tearDown(self): + os_helper.rmtree(self.directory) + + @support.requires_subprocess() + def pycompilecmd(self, *args, **kwargs): + # assert_python_* helpers don't return proc object. We'll just use + # subprocess.run() instead of spawn_python() and its friends to test + # stdin support of the CLI. + opts = '-m' if __debug__ else '-Om' + if args and args[0] == '-' and 'input' in kwargs: + return subprocess.run([sys.executable, opts, 'py_compile', '-'], + input=kwargs['input'].encode(), + capture_output=True) + return script_helper.assert_python_ok(opts, 'py_compile', *args, **kwargs) + + def pycompilecmd_failure(self, *args): + return script_helper.assert_python_failure('-m', 'py_compile', *args) + + def test_stdin(self): + self.assertFalse(os.path.exists(self.cache_path)) + result = self.pycompilecmd('-', input=self.source_path) + self.assertEqual(result.returncode, 0) + self.assertEqual(result.stdout, b'') + self.assertEqual(result.stderr, b'') + self.assertTrue(os.path.exists(self.cache_path)) + + def test_with_files(self): + rc, stdout, stderr = self.pycompilecmd(self.source_path, self.source_path) + self.assertEqual(rc, 0) + self.assertEqual(stdout, b'') + self.assertEqual(stderr, b'') + self.assertTrue(os.path.exists(self.cache_path)) + + def test_bad_syntax(self): + bad_syntax = os.path.join(os.path.dirname(__file__), + 'tokenizedata', + 'badsyntax_3131.py') + rc, stdout, stderr = self.pycompilecmd_failure(bad_syntax) + self.assertEqual(rc, 1) + self.assertEqual(stdout, b'') + self.assertIn(b'SyntaxError', stderr) + + def test_bad_syntax_with_quiet(self): + bad_syntax = os.path.join(os.path.dirname(__file__), + 'tokenizedata', + 'badsyntax_3131.py') + rc, stdout, stderr = self.pycompilecmd_failure('-q', bad_syntax) + self.assertEqual(rc, 1) + self.assertEqual(stdout, b'') + self.assertEqual(stderr, b'') + + def test_file_not_exists(self): + should_not_exists = os.path.join(os.path.dirname(__file__), 'should_not_exists.py') + rc, stdout, stderr = self.pycompilecmd_failure(self.source_path, should_not_exists) + self.assertEqual(rc, 1) + self.assertEqual(stdout, b'') + self.assertIn(b'no such file or directory', stderr.lower()) + + def test_file_not_exists_with_quiet(self): + should_not_exists = os.path.join(os.path.dirname(__file__), 'should_not_exists.py') + rc, stdout, stderr = self.pycompilecmd_failure('-q', self.source_path, should_not_exists) + self.assertEqual(rc, 1) + self.assertEqual(stdout, b'') + self.assertEqual(stderr, b'') + + +if __name__ == "__main__": + unittest.main() diff --git a/stdlib/zipapp.py b/stdlib/zipapp.py new file mode 100644 index 000000000..7a4ef96ea --- /dev/null +++ b/stdlib/zipapp.py @@ -0,0 +1,231 @@ +import contextlib +import os +import pathlib +import shutil +import stat +import sys +import zipfile + +__all__ = ['ZipAppError', 'create_archive', 'get_interpreter'] + + +# The __main__.py used if the users specifies "-m module:fn". +# Note that this will always be written as UTF-8 (module and +# function names can be non-ASCII in Python 3). +# We add a coding cookie even though UTF-8 is the default in Python 3 +# because the resulting archive may be intended to be run under Python 2. +MAIN_TEMPLATE = """\ +# -*- coding: utf-8 -*- +import {module} +{module}.{fn}() +""" + + +# The Windows launcher defaults to UTF-8 when parsing shebang lines if the +# file has no BOM. So use UTF-8 on Windows. +# On Unix, use the filesystem encoding. +if sys.platform.startswith('win'): + shebang_encoding = 'utf-8' +else: + shebang_encoding = sys.getfilesystemencoding() + + +class ZipAppError(ValueError): + pass + + +@contextlib.contextmanager +def _maybe_open(archive, mode): + if isinstance(archive, (str, os.PathLike)): + with open(archive, mode) as f: + yield f + else: + yield archive + + +def _write_file_prefix(f, interpreter): + """Write a shebang line.""" + if interpreter: + shebang = b'#!' + interpreter.encode(shebang_encoding) + b'\n' + f.write(shebang) + + +def _copy_archive(archive, new_archive, interpreter=None): + """Copy an application archive, modifying the shebang line.""" + with _maybe_open(archive, 'rb') as src: + # Skip the shebang line from the source. + # Read 2 bytes of the source and check if they are #!. + first_2 = src.read(2) + if first_2 == b'#!': + # Discard the initial 2 bytes and the rest of the shebang line. + first_2 = b'' + src.readline() + + with _maybe_open(new_archive, 'wb') as dst: + _write_file_prefix(dst, interpreter) + # If there was no shebang, "first_2" contains the first 2 bytes + # of the source file, so write them before copying the rest + # of the file. + dst.write(first_2) + shutil.copyfileobj(src, dst) + + if interpreter and isinstance(new_archive, str): + os.chmod(new_archive, os.stat(new_archive).st_mode | stat.S_IEXEC) + + +def create_archive(source, target=None, interpreter=None, main=None, + filter=None, compressed=False): + """Create an application archive from SOURCE. + + The SOURCE can be the name of a directory, or a filename or a file-like + object referring to an existing archive. + + The content of SOURCE is packed into an application archive in TARGET, + which can be a filename or a file-like object. If SOURCE is a directory, + TARGET can be omitted and will default to the name of SOURCE with .pyz + appended. + + The created application archive will have a shebang line specifying + that it should run with INTERPRETER (there will be no shebang line if + INTERPRETER is None), and a __main__.py which runs MAIN (if MAIN is + not specified, an existing __main__.py will be used). It is an error + to specify MAIN for anything other than a directory source with no + __main__.py, and it is an error to omit MAIN if the directory has no + __main__.py. + """ + # Are we copying an existing archive? + source_is_file = False + if hasattr(source, 'read') and hasattr(source, 'readline'): + source_is_file = True + else: + source = pathlib.Path(source) + if source.is_file(): + source_is_file = True + + if source_is_file: + _copy_archive(source, target, interpreter) + return + + # We are creating a new archive from a directory. + if not source.exists(): + raise ZipAppError("Source does not exist") + has_main = (source / '__main__.py').is_file() + if main and has_main: + raise ZipAppError( + "Cannot specify entry point if the source has __main__.py") + if not (main or has_main): + raise ZipAppError("Archive has no entry point") + + main_py = None + if main: + # Check that main has the right format. + mod, sep, fn = main.partition(':') + mod_ok = all(part.isidentifier() for part in mod.split('.')) + fn_ok = all(part.isidentifier() for part in fn.split('.')) + if not (sep == ':' and mod_ok and fn_ok): + raise ZipAppError("Invalid entry point: " + main) + main_py = MAIN_TEMPLATE.format(module=mod, fn=fn) + + if target is None: + target = source.with_suffix('.pyz') + elif not hasattr(target, 'write'): + target = pathlib.Path(target) + + # Create the list of files to add to the archive now, in case + # the target is being created in the source directory - we + # don't want the target being added to itself + files_to_add = {} + for path in sorted(source.rglob('*')): + relative_path = path.relative_to(source) + if filter is None or filter(relative_path): + files_to_add[path] = relative_path + + # The target cannot be in the list of files to add. If it were, we'd + # end up overwriting the source file and writing the archive into + # itself, which is an error. We therefore check for that case and + # provide a helpful message for the user. + + # Note that we only do a simple path equality check. This won't + # catch every case, but it will catch the common case where the + # source is the CWD and the target is a file in the CWD. More + # thorough checks don't provide enough value to justify the extra + # cost. + + # If target is a file-like object, it will simply fail to compare + # equal to any of the entries in files_to_add, so there's no need + # to add a special check for that. + if target in files_to_add: + raise ZipAppError( + f"The target archive {target} overwrites one of the source files.") + + with _maybe_open(target, 'wb') as fd: + _write_file_prefix(fd, interpreter) + compression = (zipfile.ZIP_DEFLATED if compressed else + zipfile.ZIP_STORED) + with zipfile.ZipFile(fd, 'w', compression=compression) as z: + for path, relative_path in files_to_add.items(): + z.write(path, relative_path.as_posix()) + if main_py: + z.writestr('__main__.py', main_py.encode('utf-8')) + + if interpreter and not hasattr(target, 'write'): + target.chmod(target.stat().st_mode | stat.S_IEXEC) + + +def get_interpreter(archive): + with _maybe_open(archive, 'rb') as f: + if f.read(2) == b'#!': + return f.readline().strip().decode(shebang_encoding) + + +def main(args=None): + """Run the zipapp command line interface. + + The ARGS parameter lets you specify the argument list directly. + Omitting ARGS (or setting it to None) works as for argparse, using + sys.argv[1:] as the argument list. + """ + import argparse + + parser = argparse.ArgumentParser(color=True) + parser.add_argument('--output', '-o', default=None, + help="The name of the output archive. " + "Required if SOURCE is an archive.") + parser.add_argument('--python', '-p', default=None, + help="The name of the Python interpreter to use " + "(default: no shebang line).") + parser.add_argument('--main', '-m', default=None, + help="The main function of the application " + "(default: use an existing __main__.py).") + parser.add_argument('--compress', '-c', action='store_true', + help="Compress files with the deflate method. " + "Files are stored uncompressed by default.") + parser.add_argument('--info', default=False, action='store_true', + help="Display the interpreter from the archive.") + parser.add_argument('source', + help="Source directory (or existing archive).") + + args = parser.parse_args(args) + + # Handle `python -m zipapp archive.pyz --info`. + if args.info: + if not os.path.isfile(args.source): + raise SystemExit("Can only get info for an archive file") + interpreter = get_interpreter(args.source) + print("Interpreter: {}".format(interpreter or "")) + sys.exit(0) + + if os.path.isfile(args.source): + if args.output is None or (os.path.exists(args.output) and + os.path.samefile(args.source, args.output)): + raise SystemExit("In-place editing of archives is not supported") + if args.main: + raise SystemExit("Cannot change the main function when copying") + + create_archive(args.source, args.output, + interpreter=args.python, main=args.main, + compressed=args.compress) + + +if __name__ == '__main__': + main() diff --git a/stdlib/zipimport.py b/stdlib/zipimport.py new file mode 100644 index 000000000..3455bbc9f --- /dev/null +++ b/stdlib/zipimport.py @@ -0,0 +1,825 @@ +"""zipimport provides support for importing Python modules from Zip archives. + +This module exports two objects: +- zipimporter: a class; its constructor takes a path to a Zip archive. +- ZipImportError: exception raised by zipimporter objects. It's a + subclass of ImportError, so it can be caught as ImportError, too. + +It is usually not needed to use the zipimport module explicitly; it is +used by the builtin import mechanism for sys.path items that are paths +to Zip archives. +""" + +# gopy resolves imports Go-side and never freezes the bootstrap modules, +# so the frozen _frozen_importlib / _frozen_importlib_external names are +# unavailable. The vendored importlib._bootstrap and +# importlib._bootstrap_external are the same source, so import them +# directly. +from importlib import _bootstrap_external +from importlib._bootstrap_external import _unpack_uint16, _unpack_uint32, _unpack_uint64 +from importlib import _bootstrap # for _verbose_message +import _imp # for check_hash_based_pycs +import _io # for open +import marshal # for loads +import sys # for modules +import time # for mktime + +__all__ = ['ZipImportError', 'zipimporter'] + + +path_sep = _bootstrap_external.path_sep +alt_path_sep = _bootstrap_external.path_separators[1:] + + +class ZipImportError(ImportError): + pass + +# _read_directory() cache +_zip_directory_cache = {} + +_module_type = type(sys) + +END_CENTRAL_DIR_SIZE = 22 +END_CENTRAL_DIR_SIZE_64 = 56 +END_CENTRAL_DIR_LOCATOR_SIZE_64 = 20 +STRING_END_ARCHIVE = b'PK\x05\x06' # standard EOCD signature +STRING_END_LOCATOR_64 = b'PK\x06\x07' # Zip64 EOCD Locator signature +STRING_END_ZIP_64 = b'PK\x06\x06' # Zip64 EOCD signature +MAX_COMMENT_LEN = (1 << 16) - 1 +MAX_UINT32 = 0xffffffff +ZIP64_EXTRA_TAG = 0x1 + +class zipimporter(_bootstrap_external._LoaderBasics): + """zipimporter(archivepath) -> zipimporter object + + Create a new zipimporter instance. 'archivepath' must be a path to + a zipfile, or to a specific path inside a zipfile. For example, it can be + '/tmp/myimport.zip', or '/tmp/myimport.zip/mydirectory', if mydirectory is a + valid directory inside the archive. + + 'ZipImportError is raised if 'archivepath' doesn't point to a valid Zip + archive. + + The 'archive' attribute of zipimporter objects contains the name of the + zipfile targeted. + """ + + # Split the "subdirectory" from the Zip archive path, lookup a matching + # entry in sys.path_importer_cache, fetch the file directory from there + # if found, or else read it from the archive. + def __init__(self, path): + if not isinstance(path, str): + raise TypeError(f"expected str, not {type(path)!r}") + if not path: + raise ZipImportError('archive path is empty', path=path) + if alt_path_sep: + path = path.replace(alt_path_sep, path_sep) + + prefix = [] + while True: + try: + st = _bootstrap_external._path_stat(path) + except (OSError, ValueError): + # On Windows a ValueError is raised for too long paths. + # Back up one path element. + dirname, basename = _bootstrap_external._path_split(path) + if dirname == path: + raise ZipImportError('not a Zip file', path=path) + path = dirname + prefix.append(basename) + else: + # it exists + if (st.st_mode & 0o170000) != 0o100000: # stat.S_ISREG + # it's a not file + raise ZipImportError('not a Zip file', path=path) + break + + if path not in _zip_directory_cache: + _zip_directory_cache[path] = _read_directory(path) + self.archive = path + # a prefix directory following the ZIP file path. + self.prefix = _bootstrap_external._path_join(*prefix[::-1]) + if self.prefix: + self.prefix += path_sep + + + def find_spec(self, fullname, target=None): + """Create a ModuleSpec for the specified module. + + Returns None if the module cannot be found. + """ + module_info = _get_module_info(self, fullname) + if module_info is not None: + return _bootstrap.spec_from_loader(fullname, self, is_package=module_info) + else: + # Not a module or regular package. See if this is a directory, and + # therefore possibly a portion of a namespace package. + + # We're only interested in the last path component of fullname + # earlier components are recorded in self.prefix. + modpath = _get_module_path(self, fullname) + if _is_dir(self, modpath): + # This is possibly a portion of a namespace + # package. Return the string representing its path, + # without a trailing separator. + path = f'{self.archive}{path_sep}{modpath}' + spec = _bootstrap.ModuleSpec(name=fullname, loader=None, + is_package=True) + spec.submodule_search_locations.append(path) + return spec + else: + return None + + def get_code(self, fullname): + """get_code(fullname) -> code object. + + Return the code object for the specified module. Raise ZipImportError + if the module couldn't be imported. + """ + code, ispackage, modpath = _get_module_code(self, fullname) + return code + + + def get_data(self, pathname): + """get_data(pathname) -> string with file data. + + Return the data associated with 'pathname'. Raise OSError if + the file wasn't found. + """ + if alt_path_sep: + pathname = pathname.replace(alt_path_sep, path_sep) + + key = pathname + if pathname.startswith(self.archive + path_sep): + key = pathname[len(self.archive + path_sep):] + + try: + toc_entry = self._get_files()[key] + except KeyError: + raise OSError(0, '', key) + if toc_entry is None: + return b'' + return _get_data(self.archive, toc_entry) + + + # Return a string matching __file__ for the named module + def get_filename(self, fullname): + """get_filename(fullname) -> filename string. + + Return the filename for the specified module or raise ZipImportError + if it couldn't be imported. + """ + # Deciding the filename requires working out where the code + # would come from if the module was actually loaded + code, ispackage, modpath = _get_module_code(self, fullname) + return modpath + + + def get_source(self, fullname): + """get_source(fullname) -> source string. + + Return the source code for the specified module. Raise ZipImportError + if the module couldn't be found, return None if the archive does + contain the module, but has no source for it. + """ + mi = _get_module_info(self, fullname) + if mi is None: + raise ZipImportError(f"can't find module {fullname!r}", name=fullname) + + path = _get_module_path(self, fullname) + if mi: + fullpath = _bootstrap_external._path_join(path, '__init__.py') + else: + fullpath = f'{path}.py' + + try: + toc_entry = self._get_files()[fullpath] + except KeyError: + # we have the module, but no source + return None + return _get_data(self.archive, toc_entry).decode() + + + # Return a bool signifying whether the module is a package or not. + def is_package(self, fullname): + """is_package(fullname) -> bool. + + Return True if the module specified by fullname is a package. + Raise ZipImportError if the module couldn't be found. + """ + mi = _get_module_info(self, fullname) + if mi is None: + raise ZipImportError(f"can't find module {fullname!r}", name=fullname) + return mi + + + # Load and return the module named by 'fullname'. + def load_module(self, fullname): + """load_module(fullname) -> module. + + Load the module specified by 'fullname'. 'fullname' must be the + fully qualified (dotted) module name. It returns the imported + module, or raises ZipImportError if it could not be imported. + + Deprecated since Python 3.10. Use exec_module() instead. + """ + import warnings + warnings._deprecated("zipimport.zipimporter.load_module", + f"{warnings._DEPRECATED_MSG}; " + "use zipimport.zipimporter.exec_module() instead", + remove=(3, 15)) + code, ispackage, modpath = _get_module_code(self, fullname) + mod = sys.modules.get(fullname) + if mod is None or not isinstance(mod, _module_type): + mod = _module_type(fullname) + sys.modules[fullname] = mod + mod.__loader__ = self + + try: + if ispackage: + # add __path__ to the module *before* the code gets + # executed + path = _get_module_path(self, fullname) + fullpath = _bootstrap_external._path_join(self.archive, path) + mod.__path__ = [fullpath] + + if not hasattr(mod, '__builtins__'): + mod.__builtins__ = __builtins__ + _bootstrap_external._fix_up_module(mod.__dict__, fullname, modpath) + exec(code, mod.__dict__) + except: + del sys.modules[fullname] + raise + + try: + mod = sys.modules[fullname] + except KeyError: + raise ImportError(f'Loaded module {fullname!r} not found in sys.modules') + _bootstrap._verbose_message('import {} # loaded from Zip {}', fullname, modpath) + return mod + + + def get_resource_reader(self, fullname): + """Return the ResourceReader for a module in a zip file.""" + from importlib.readers import ZipReader + + return ZipReader(self, fullname) + + + def _get_files(self): + """Return the files within the archive path.""" + try: + files = _zip_directory_cache[self.archive] + except KeyError: + try: + files = _zip_directory_cache[self.archive] = _read_directory(self.archive) + except ZipImportError: + files = {} + + return files + + + def invalidate_caches(self): + """Invalidates the cache of file data of the archive path.""" + _zip_directory_cache.pop(self.archive, None) + + + def __repr__(self): + return f'' + + +# _zip_searchorder defines how we search for a module in the Zip +# archive: we first search for a package __init__, then for +# non-package .pyc, and .py entries. The .pyc entries +# are swapped by initzipimport() if we run in optimized mode. Also, +# '/' is replaced by path_sep there. +_zip_searchorder = ( + (path_sep + '__init__.pyc', True, True), + (path_sep + '__init__.py', False, True), + ('.pyc', True, False), + ('.py', False, False), +) + +# Given a module name, return the potential file path in the +# archive (without extension). +def _get_module_path(self, fullname): + return self.prefix + fullname.rpartition('.')[2] + +# Does this path represent a directory? +def _is_dir(self, path): + # See if this is a "directory". If so, it's eligible to be part + # of a namespace package. We test by seeing if the name, with an + # appended path separator, exists. + dirpath = path + path_sep + # If dirpath is present in self._get_files(), we have a directory. + return dirpath in self._get_files() + +# Return some information about a module. +def _get_module_info(self, fullname): + path = _get_module_path(self, fullname) + for suffix, isbytecode, ispackage in _zip_searchorder: + fullpath = path + suffix + if fullpath in self._get_files(): + return ispackage + return None + + +# implementation + +# _read_directory(archive) -> files dict (new reference) +# +# Given a path to a Zip archive, build a dict, mapping file names +# (local to the archive, using SEP as a separator) to toc entries. +# +# A toc_entry is a tuple: +# +# (__file__, # value to use for __file__, available for all files, +# # encoded to the filesystem encoding +# compress, # compression kind; 0 for uncompressed +# data_size, # size of compressed data on disk +# file_size, # size of decompressed data +# file_offset, # offset of file header from start of archive +# time, # mod time of file (in dos format) +# date, # mod data of file (in dos format) +# crc, # crc checksum of the data +# ) +# +# Directories can be recognized by the trailing path_sep in the name, +# data_size and file_offset are 0. +def _read_directory(archive): + try: + fp = _io.open_code(archive) + except OSError: + raise ZipImportError(f"can't open Zip file: {archive!r}", path=archive) + + with fp: + # GH-87235: On macOS all file descriptors for /dev/fd/N share the same + # file offset, reset the file offset after scanning the zipfile directory + # to not cause problems when some runs 'python3 /dev/fd/9 9= 0 and pos64+END_CENTRAL_DIR_SIZE_64+END_CENTRAL_DIR_LOCATOR_SIZE_64==pos): + # Zip64 at "correct" offset from standard EOCD + buffer = data[pos64:pos64 + END_CENTRAL_DIR_SIZE_64] + if len(buffer) != END_CENTRAL_DIR_SIZE_64: + raise ZipImportError( + f"corrupt Zip64 file: Expected {END_CENTRAL_DIR_SIZE_64} byte " + f"zip64 central directory, but read {len(buffer)} bytes.", + path=archive) + header_position = file_size - len(data) + pos64 + + central_directory_size = _unpack_uint64(buffer[40:48]) + central_directory_position = _unpack_uint64(buffer[48:56]) + num_entries = _unpack_uint64(buffer[24:32]) + elif pos >= 0: + buffer = data[pos:pos+END_CENTRAL_DIR_SIZE] + if len(buffer) != END_CENTRAL_DIR_SIZE: + raise ZipImportError(f"corrupt Zip file: {archive!r}", + path=archive) + + header_position = file_size - len(data) + pos + + # Buffer now contains a valid EOCD, and header_position gives the + # starting position of it. + central_directory_size = _unpack_uint32(buffer[12:16]) + central_directory_position = _unpack_uint32(buffer[16:20]) + num_entries = _unpack_uint16(buffer[8:10]) + + # N.b. if someday you want to prefer the standard (non-zip64) EOCD, + # you need to adjust position by 76 for arc to be 0. + else: + raise ZipImportError(f'not a Zip file: {archive!r}', + path=archive) + + # Buffer now contains a valid EOCD, and header_position gives the + # starting position of it. + # XXX: These are cursory checks but are not as exact or strict as they + # could be. Checking the arc-adjusted value is probably good too. + if header_position < central_directory_size: + raise ZipImportError(f'bad central directory size: {archive!r}', path=archive) + if header_position < central_directory_position: + raise ZipImportError(f'bad central directory offset: {archive!r}', path=archive) + header_position -= central_directory_size + # On just-a-zipfile these values are the same and arc_offset is zero; if + # the file has some bytes prepended, `arc_offset` is the number of such + # bytes. This is used for pex as well as self-extracting .exe. + arc_offset = header_position - central_directory_position + if arc_offset < 0: + raise ZipImportError(f'bad central directory size or offset: {archive!r}', path=archive) + + files = {} + # Start of Central Directory + count = 0 + try: + fp.seek(header_position) + except OSError: + raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive) + while True: + buffer = fp.read(46) + if len(buffer) < 4: + raise EOFError('EOF read where not expected') + # Start of file header + if buffer[:4] != b'PK\x01\x02': + if count != num_entries: + raise ZipImportError( + f"mismatched num_entries: {count} should be {num_entries} in {archive!r}", + path=archive, + ) + break # Bad: Central Dir File Header + if len(buffer) != 46: + raise EOFError('EOF read where not expected') + flags = _unpack_uint16(buffer[8:10]) + compress = _unpack_uint16(buffer[10:12]) + time = _unpack_uint16(buffer[12:14]) + date = _unpack_uint16(buffer[14:16]) + crc = _unpack_uint32(buffer[16:20]) + data_size = _unpack_uint32(buffer[20:24]) + file_size = _unpack_uint32(buffer[24:28]) + name_size = _unpack_uint16(buffer[28:30]) + extra_size = _unpack_uint16(buffer[30:32]) + comment_size = _unpack_uint16(buffer[32:34]) + file_offset = _unpack_uint32(buffer[42:46]) + header_size = name_size + extra_size + comment_size + + try: + name = fp.read(name_size) + except OSError: + raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive) + if len(name) != name_size: + raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive) + # On Windows, calling fseek to skip over the fields we don't use is + # slower than reading the data because fseek flushes stdio's + # internal buffers. See issue #8745. + try: + extra_data_len = header_size - name_size + extra_data = memoryview(fp.read(extra_data_len)) + + if len(extra_data) != extra_data_len: + raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive) + except OSError: + raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive) + + if flags & 0x800: + # UTF-8 file names extension + name = name.decode() + else: + # Historical ZIP filename encoding + try: + name = name.decode('ascii') + except UnicodeDecodeError: + name = name.decode('latin1').translate(cp437_table) + + name = name.replace('/', path_sep) + path = _bootstrap_external._path_join(archive, name) + + # Ordering matches unpacking below. + if ( + file_size == MAX_UINT32 or + data_size == MAX_UINT32 or + file_offset == MAX_UINT32 + ): + # need to decode extra_data looking for a zip64 extra (which might not + # be present) + while extra_data: + if len(extra_data) < 4: + raise ZipImportError(f"can't read header extra: {archive!r}", path=archive) + tag = _unpack_uint16(extra_data[:2]) + size = _unpack_uint16(extra_data[2:4]) + if len(extra_data) < 4 + size: + raise ZipImportError(f"can't read header extra: {archive!r}", path=archive) + if tag == ZIP64_EXTRA_TAG: + if (len(extra_data) - 4) % 8 != 0: + raise ZipImportError(f"can't read header extra: {archive!r}", path=archive) + num_extra_values = (len(extra_data) - 4) // 8 + if num_extra_values > 3: + raise ZipImportError(f"can't read header extra: {archive!r}", path=archive) + import struct + values = list(struct.unpack_from(f"<{min(num_extra_values, 3)}Q", + extra_data, offset=4)) + + # N.b. Here be dragons: the ordering of these is different than + # the header fields, and it's really easy to get it wrong since + # naturally-occurring zips that use all 3 are >4GB + if file_size == MAX_UINT32: + file_size = values.pop(0) + if data_size == MAX_UINT32: + data_size = values.pop(0) + if file_offset == MAX_UINT32: + file_offset = values.pop(0) + + break + + # For a typical zip, this bytes-slicing only happens 2-3 times, on + # small data like timestamps and filesizes. + extra_data = extra_data[4+size:] + else: + _bootstrap._verbose_message( + "zipimport: suspected zip64 but no zip64 extra for {!r}", + path, + ) + # XXX These two statements seem swapped because `central_directory_position` + # is a position within the actual file, but `file_offset` (when compared) is + # as encoded in the entry, not adjusted for this file. + # N.b. this must be after we've potentially read the zip64 extra which can + # change `file_offset`. + if file_offset > central_directory_position: + raise ZipImportError(f'bad local header offset: {archive!r}', path=archive) + file_offset += arc_offset + + t = (path, compress, data_size, file_size, file_offset, time, date, crc) + files[name] = t + count += 1 + finally: + fp.seek(start_offset) + _bootstrap._verbose_message('zipimport: found {} names in {!r}', count, archive) + + # Add implicit directories. + count = 0 + for name in list(files): + while True: + i = name.rstrip(path_sep).rfind(path_sep) + if i < 0: + break + name = name[:i + 1] + if name in files: + break + files[name] = None + count += 1 + if count: + _bootstrap._verbose_message('zipimport: added {} implicit directories in {!r}', + count, archive) + return files + +# During bootstrap, we may need to load the encodings +# package from a ZIP file. But the cp437 encoding is implemented +# in Python in the encodings package. +# +# Break out of this dependency by using the translation table for +# the cp437 encoding. +cp437_table = ( + # ASCII part, 8 rows x 16 chars + '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f' + '\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f' + ' !"#$%&\'()*+,-./' + '0123456789:;<=>?' + '@ABCDEFGHIJKLMNO' + 'PQRSTUVWXYZ[\\]^_' + '`abcdefghijklmno' + 'pqrstuvwxyz{|}~\x7f' + # non-ASCII part, 16 rows x 8 chars + '\xc7\xfc\xe9\xe2\xe4\xe0\xe5\xe7' + '\xea\xeb\xe8\xef\xee\xec\xc4\xc5' + '\xc9\xe6\xc6\xf4\xf6\xf2\xfb\xf9' + '\xff\xd6\xdc\xa2\xa3\xa5\u20a7\u0192' + '\xe1\xed\xf3\xfa\xf1\xd1\xaa\xba' + '\xbf\u2310\xac\xbd\xbc\xa1\xab\xbb' + '\u2591\u2592\u2593\u2502\u2524\u2561\u2562\u2556' + '\u2555\u2563\u2551\u2557\u255d\u255c\u255b\u2510' + '\u2514\u2534\u252c\u251c\u2500\u253c\u255e\u255f' + '\u255a\u2554\u2569\u2566\u2560\u2550\u256c\u2567' + '\u2568\u2564\u2565\u2559\u2558\u2552\u2553\u256b' + '\u256a\u2518\u250c\u2588\u2584\u258c\u2590\u2580' + '\u03b1\xdf\u0393\u03c0\u03a3\u03c3\xb5\u03c4' + '\u03a6\u0398\u03a9\u03b4\u221e\u03c6\u03b5\u2229' + '\u2261\xb1\u2265\u2264\u2320\u2321\xf7\u2248' + '\xb0\u2219\xb7\u221a\u207f\xb2\u25a0\xa0' +) + +_importing_zlib = False + +# Return the zlib.decompress function object, or NULL if zlib couldn't +# be imported. The function is cached when found, so subsequent calls +# don't import zlib again. +def _get_decompress_func(): + global _importing_zlib + if _importing_zlib: + # Someone has a zlib.py[co] in their Zip file + # let's avoid a stack overflow. + _bootstrap._verbose_message('zipimport: zlib UNAVAILABLE') + raise ZipImportError("can't decompress data; zlib not available") + + _importing_zlib = True + try: + from zlib import decompress + except Exception: + _bootstrap._verbose_message('zipimport: zlib UNAVAILABLE') + raise ZipImportError("can't decompress data; zlib not available") + finally: + _importing_zlib = False + + _bootstrap._verbose_message('zipimport: zlib available') + return decompress + +# Given a path to a Zip file and a toc_entry, return the (uncompressed) data. +def _get_data(archive, toc_entry): + datapath, compress, data_size, file_size, file_offset, time, date, crc = toc_entry + if data_size < 0: + raise ZipImportError('negative data size') + + with _io.open_code(archive) as fp: + # Check to make sure the local file header is correct + try: + fp.seek(file_offset) + except OSError: + raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive) + buffer = fp.read(30) + if len(buffer) != 30: + raise EOFError('EOF read where not expected') + + if buffer[:4] != b'PK\x03\x04': + # Bad: Local File Header + raise ZipImportError(f'bad local file header: {archive!r}', path=archive) + + name_size = _unpack_uint16(buffer[26:28]) + extra_size = _unpack_uint16(buffer[28:30]) + header_size = 30 + name_size + extra_size + file_offset += header_size # Start of file data + try: + fp.seek(file_offset) + except OSError: + raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive) + raw_data = fp.read(data_size) + if len(raw_data) != data_size: + raise OSError("zipimport: can't read data") + + if compress == 0: + # data is not compressed + return raw_data + + # Decompress with zlib + try: + decompress = _get_decompress_func() + except Exception: + raise ZipImportError("can't decompress data; zlib not available") + return decompress(raw_data, -15) + + +# Lenient date/time comparison function. The precision of the mtime +# in the archive is lower than the mtime stored in a .pyc: we +# must allow a difference of at most one second. +def _eq_mtime(t1, t2): + # dostime only stores even seconds, so be lenient + return abs(t1 - t2) <= 1 + + +# Given the contents of a .py[co] file, unmarshal the data +# and return the code object. Raises ImportError it the magic word doesn't +# match, or if the recorded .py[co] metadata does not match the source. +def _unmarshal_code(self, pathname, fullpath, fullname, data): + exc_details = { + 'name': fullname, + 'path': fullpath, + } + + flags = _bootstrap_external._classify_pyc(data, fullname, exc_details) + + hash_based = flags & 0b1 != 0 + if hash_based: + check_source = flags & 0b10 != 0 + if (_imp.check_hash_based_pycs != 'never' and + (check_source or _imp.check_hash_based_pycs == 'always')): + source_bytes = _get_pyc_source(self, fullpath) + if source_bytes is not None: + source_hash = _imp.source_hash( + _imp.pyc_magic_number_token, + source_bytes, + ) + + _bootstrap_external._validate_hash_pyc( + data, source_hash, fullname, exc_details) + else: + source_mtime, source_size = \ + _get_mtime_and_size_of_source(self, fullpath) + + if source_mtime: + # We don't use _bootstrap_external._validate_timestamp_pyc + # to allow for a more lenient timestamp check. + if (not _eq_mtime(_unpack_uint32(data[8:12]), source_mtime) or + _unpack_uint32(data[12:16]) != source_size): + _bootstrap._verbose_message( + f'bytecode is stale for {fullname!r}') + return None + + code = marshal.loads(data[16:]) + if not isinstance(code, _code_type): + raise TypeError(f'compiled module {pathname!r} is not a code object') + return code + +_code_type = type(_unmarshal_code.__code__) + + +# Replace any occurrences of '\r\n?' in the input string with '\n'. +# This converts DOS and Mac line endings to Unix line endings. +def _normalize_line_endings(source): + source = source.replace(b'\r\n', b'\n') + source = source.replace(b'\r', b'\n') + return source + +# Given a string buffer containing Python source code, compile it +# and return a code object. +def _compile_source(pathname, source): + source = _normalize_line_endings(source) + return compile(source, pathname, 'exec', dont_inherit=True) + +# Convert the date/time values found in the Zip archive to a value +# that's compatible with the time stamp stored in .pyc files. +def _parse_dostime(d, t): + return time.mktime(( + (d >> 9) + 1980, # bits 9..15: year + (d >> 5) & 0xF, # bits 5..8: month + d & 0x1F, # bits 0..4: day + t >> 11, # bits 11..15: hours + (t >> 5) & 0x3F, # bits 8..10: minutes + (t & 0x1F) * 2, # bits 0..7: seconds / 2 + -1, -1, -1)) + +# Given a path to a .pyc file in the archive, return the +# modification time of the matching .py file and its size, +# or (0, 0) if no source is available. +def _get_mtime_and_size_of_source(self, path): + try: + # strip 'c' or 'o' from *.py[co] + assert path[-1:] in ('c', 'o') + path = path[:-1] + toc_entry = self._get_files()[path] + # fetch the time stamp of the .py file for comparison + # with an embedded pyc time stamp + time = toc_entry[5] + date = toc_entry[6] + uncompressed_size = toc_entry[3] + return _parse_dostime(date, time), uncompressed_size + except (KeyError, IndexError, TypeError): + return 0, 0 + + +# Given a path to a .pyc file in the archive, return the +# contents of the matching .py file, or None if no source +# is available. +def _get_pyc_source(self, path): + # strip 'c' or 'o' from *.py[co] + assert path[-1:] in ('c', 'o') + path = path[:-1] + + try: + toc_entry = self._get_files()[path] + except KeyError: + return None + else: + return _get_data(self.archive, toc_entry) + + +# Get the code object associated with the module specified by +# 'fullname'. +def _get_module_code(self, fullname): + path = _get_module_path(self, fullname) + import_error = None + for suffix, isbytecode, ispackage in _zip_searchorder: + fullpath = path + suffix + _bootstrap._verbose_message('trying {}{}{}', self.archive, path_sep, fullpath, verbosity=2) + try: + toc_entry = self._get_files()[fullpath] + except KeyError: + pass + else: + modpath = toc_entry[0] + data = _get_data(self.archive, toc_entry) + code = None + if isbytecode: + try: + code = _unmarshal_code(self, modpath, fullpath, fullname, data) + except ImportError as exc: + import_error = exc + else: + code = _compile_source(modpath, data) + if code is None: + # bad magic number or non-matching mtime + # in byte code, try next + continue + modpath = toc_entry[0] + return code, ispackage, modpath + else: + if import_error: + msg = f"module load failed: {import_error}" + raise ZipImportError(msg, name=fullname) from import_error + else: + raise ZipImportError(f"can't find module {fullname!r}", name=fullname) diff --git a/stdlibinit/registry.go b/stdlibinit/registry.go index eeac2ec23..73c67c558 100644 --- a/stdlibinit/registry.go +++ b/stdlibinit/registry.go @@ -174,6 +174,20 @@ import ( // CPython: Modules/_testcapi/vectorcall.c:1 vectorcall fixtures _ "github.com/tamnd/gopy/module/_testcapi" + // Built-in module: _testmultiphase. Registers itself via + // module/_testmultiphase/module.go init(). Reproduces the PEP 489 + // multi-phase init extension's main module so test.test_importlib.util + // imports instead of raising SkipTest. + // CPython: Modules/_testmultiphase.c:447 PyInit__testmultiphase + _ "github.com/tamnd/gopy/module/_testmultiphase" + + // Built-in module: _testsinglephase. Registers itself via + // module/_testsinglephase/module.go init(). Reproduces the legacy + // single-phase init extension and the gh-144601 raise-on-init fixture + // the SubinterpImportTests drive through ExtensionFileLoader. + // CPython: Modules/_testsinglephase.c:533 PyInit__testsinglephase + _ "github.com/tamnd/gopy/module/_testsinglephase" + // Built-in module: _json. Registers itself via // module/_json/module.go init(). Accelerates json.py with // scanstring and encode_basestring helpers. @@ -311,6 +325,13 @@ import ( // CPython: Modules/_winapi.c:3023 _winapi_exec _ "github.com/tamnd/gopy/module/_winapi" + // Built-in module: winreg. Registers itself via module/winreg/ + // module.go init(). Exposes the HKEY_*/KEY_*/REG_* constants and the + // error alias that importlib._bootstrap_external imports at module top + // level on Windows. + // CPython: PC/winreg.c:2121 exec_module + _ "github.com/tamnd/gopy/module/winreg" + // Built-in module: _hashlib. Registers itself via // module/_hashlib/module.go init(). Backs Lib/hashlib.py with the // HASH object type and openssl_* convenience constructors using diff --git a/test/cpython/MANIFEST.txt b/test/cpython/MANIFEST.txt index e5aa99179..c3e851ff7 100644 --- a/test/cpython/MANIFEST.txt +++ b/test/cpython/MANIFEST.txt @@ -173,18 +173,18 @@ test__interpchannels ready post-0.12 73/73 pass (skipped=5) test__interpreters ready post-0.12 69/69 pass (skipped=3) test_crossinterp ready post-0.12 102/102 pass (skipped=1) test_frozen ready v0.8.0 3/3 pass -test_import/ ready v0.8.0 full import system panel -test_importlib/ ready v0.8.0 importlib package — run via python -m test from CPython tree +test_import/ ready v0.8.0 118/118 pass (skipped=4); run via -m unittest test.test_import +test_importlib/ ready v0.8.0 1346/1346 pass (skipped=10); run via -m unittest test.test_importlib test_interpreters/ ready post-0.12 167/167 pass (skipped=5) test_module ready v0.7.0 package dir only (no flat test_module.py in CPython 3.14); see test_module/ -test_module/ ready v0.7.0 module-level fixtures +test_module/ ready v0.7.0 39/39 pass; run via -m unittest test.test_module test_modulefinder ready v0.8.0 17/17 pass test_pkg ready v0.8.0 8/8 pass test_pkgutil ready v0.8.0 21/21 pass; requires test_import/data/package3+4 fixtures (vendored alongside) test_pyclbr ready v0.8.0 6/6 pass test_runpy ready v0.10.1 40/40 pass test_zipapp ready v0.10.1 35/35 pass -test_zipimport ready v0.8.0 91/91 pass (skipped=2) +test_zipimport ready v0.8.0 91/91 pass (skipped=4) test_zipimport_support ready v0.8.0 4/4 pass # ---- GC / weakref -------------------------------------------------- diff --git a/test/cpython/pyclbr_input.py b/test/cpython/pyclbr_input.py new file mode 100644 index 000000000..5535edbfa --- /dev/null +++ b/test/cpython/pyclbr_input.py @@ -0,0 +1,85 @@ +"""Test cases for test_pyclbr.py""" + +def f(): pass + +class Other(object): + @classmethod + def foo(c): pass + + def om(self): pass + +class B (object): + def bm(self): pass + +class C (B): + d = 10 + + # This one is correctly considered by both test_pyclbr.py and pyclbr.py + # as a non-method of C. + foo = Other().foo + + # This causes test_pyclbr.py to fail, but only because the + # introspection-based is_method() code in the test can't + # distinguish between this and a genuine method function like m(). + # + # The pyclbr.py module gets this right as it parses the text. + om = Other.om + f = f + + def m(self): pass + + @staticmethod + def sm(self): pass + + @classmethod + def cm(self): pass + +# Check that mangling is correctly handled + +class a: + def a(self): pass + def _(self): pass + def _a(self): pass + def __(self): pass + def ___(self): pass + def __a(self): pass + +class _: + def a(self): pass + def _(self): pass + def _a(self): pass + def __(self): pass + def ___(self): pass + def __a(self): pass + +class __: + def a(self): pass + def _(self): pass + def _a(self): pass + def __(self): pass + def ___(self): pass + def __a(self): pass + +class ___: + def a(self): pass + def _(self): pass + def _a(self): pass + def __(self): pass + def ___(self): pass + def __a(self): pass + +class _a: + def a(self): pass + def _(self): pass + def _a(self): pass + def __(self): pass + def ___(self): pass + def __a(self): pass + +class __a: + def a(self): pass + def _(self): pass + def _a(self): pass + def __(self): pass + def ___(self): pass + def __a(self): pass diff --git a/test/regrtest/runner.go b/test/regrtest/runner.go index d161928f5..efc64f3fc 100644 --- a/test/regrtest/runner.go +++ b/test/regrtest/runner.go @@ -66,11 +66,14 @@ type Runner struct { // Run drives one manifest entry through the gopy binary. Skip / // missing decisions are made before exec; otherwise the binary is // called as ` /` for a file entry, or -// against the package's __main__-equivalent for a directory entry. +// as ` -m unittest test.` for a directory entry. // -// Directory entries are not yet supported (CPython's regrtest invokes -// `python -m unittest `; gopy's import side has to land -// before that works) and surface as OutcomeError. +// CPython's regrtest runs a directory test via `python -m test `, +// which loads the package through unittest discovery. gopy mirrors that +// with `-m unittest test.`: the `test` package resolves from the +// vendored stdlib. The command runs with the corpus directory as its +// working directory so the repo-root `module/` Go source tree does not +// shadow stdlib imports on sys.path[0]. func (r *Runner) Run(ctx context.Context, e Entry) Result { res := Result{Entry: e} @@ -97,17 +100,6 @@ func (r *Runner) Run(ctx context.Context, e Entry) Result { return res } - if e.IsPackage() { - trimmed := strings.TrimSuffix(e.Name, "/") - mainFile := filepath.Join(path, trimmed+".py") - if _, err := os.Stat(mainFile); err != nil { - res.Outcome = OutcomeError - res.Err = fmt.Errorf("regrtest: package %s: no entry point %s", e.Name, mainFile) - return res - } - path = mainFile - } - timeout := r.Timeout if timeout <= 0 { timeout = 60 * time.Second @@ -115,7 +107,15 @@ func (r *Runner) Run(ctx context.Context, e Entry) Result { cctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() - cmd := exec.CommandContext(cctx, r.Binary, path) //nolint:gosec // Binary and path come from a vetted manifest, not external user input + // Binary, path and entry name come from a vetted manifest, not external user input. + var cmd *exec.Cmd + if e.IsPackage() { + pkg := "test." + strings.TrimSuffix(e.Name, "/") + cmd = exec.CommandContext(cctx, r.Binary, "-m", "unittest", pkg) //nolint:gosec // vetted manifest input + cmd.Dir = r.Corpus + } else { + cmd = exec.CommandContext(cctx, r.Binary, path) //nolint:gosec // vetted manifest input + } var stdout, stderr bytes.Buffer cmd.Stdout = &stdout cmd.Stderr = &stderr diff --git a/test/regrtest/runner_test.go b/test/regrtest/runner_test.go index 8c44b0214..e2495ff06 100644 --- a/test/regrtest/runner_test.go +++ b/test/regrtest/runner_test.go @@ -115,16 +115,34 @@ func TestRunnerMissingFile(t *testing.T) { } } -func TestRunnerPackageEntryUnsupported(t *testing.T) { +// TestModulesImportsPanelPackages runs the three directory suites of +// the Modules/imports panel (test_import/, test_importlib/, +// test_module/) from the real corpus. The runner drives each through +// `gopy -m unittest test.` and the suites are expected to pass +// end-to-end. This is the regrtest entry point for spec 1731's +// package rows. +func TestModulesImportsPanelPackages(t *testing.T) { + requireSignalModule(t) + requireNonRace(t) bin := buildGopy(t) - corpus := t.TempDir() - if err := os.Mkdir(filepath.Join(corpus, "test_pkg"), 0o755); err != nil { - t.Fatalf("mkdir: %v", err) + repoRoot, err := filepath.Abs(filepath.Join("..", "..")) + if err != nil { + t.Fatalf("repo root: %v", err) } - r := &Runner{Binary: bin, Corpus: corpus, Timeout: 5 * time.Second} - res := r.Run(context.Background(), Entry{Name: "test_pkg/", Status: StatusReady}) - if res.Outcome != OutcomeError { - t.Fatalf("Outcome = %s, want error (package entries not yet wired)", res.Outcome) + corpus := filepath.Join(repoRoot, "test", "cpython") + for _, name := range []string{"test_import/", "test_module/", "test_importlib/"} { + t.Run(strings.TrimSuffix(name, "/"), func(t *testing.T) { + r := &Runner{Binary: bin, Corpus: corpus, Timeout: 300 * time.Second} + res := r.Run(context.Background(), Entry{Name: name, Status: StatusReady}) + if res.Outcome != OutcomePass { + t.Fatalf("Outcome = %s (err=%v stderr=%q)", res.Outcome, res.Err, res.Stderr) + } + combined := res.Stdout + res.Stderr + if !strings.Contains(combined, "OK") { + t.Fatalf("output missing unittest OK summary:\nstdout=%q\nstderr=%q", + res.Stdout, res.Stderr) + } + }) } } diff --git a/vm/build_class.go b/vm/build_class.go index f5a1b1754..c3cb64d75 100644 --- a/vm/build_class.go +++ b/vm/build_class.go @@ -189,7 +189,8 @@ func buildClass(args []objects.Object, kwargs map[string]objects.Object) (object } callArgs := []objects.Object{nameObj, basesTuple, ns} - result, err := objects.Call(meta, objects.NewTuple(callArgs), kwargsToDict(kwargs)) + callTuple := objects.NewTuple(callArgs) + result, err := objects.Call(meta, callTuple, kwargsToDict(kwargs)) // Verify the __class__ cell the body returned was filled with the // freshly created class. A metaclass that drops __classcell__ from the @@ -214,17 +215,26 @@ func buildClass(args []objects.Object, kwargs map[string]objects.Object) (object } } } - // Release the initial NewDict ref. NewTuple copies raw pointers without - // Incref-ing ns, so the only owner remaining after the metaclass call is - // this reference. The metaclass copied every namespace entry into the - // type's descriptor table with its own Incref, so this is the last owner - // of ns. gopy dicts carry no synchronous tp_dealloc, so dropping the - // refcount alone leaves the method functions ns holds pinned by a count - // no live container backs: the class dies, the methods never reclaim, and - // weakref(A.method) never clears. Mirror dict_dealloc and clear the - // namespace contents once ns reaches refcount zero (the precise signal - // that nothing else, e.g. a __prepare__ mapping the caller kept, still - // holds it). + // Release the temporary args tuple the metaclass call owned. NewTuple + // Incref'd each item (name, bases, ns), so dropping the tuple's last + // reference runs tupleDealloc, which decrefs ns back down. Without this + // the tuple lingers as an orphan whose refcount no live container backs; + // the cycle collector then treats it as an external root and the ns it + // points at (plus every method ns holds) never reclaims. + // + // CPython: Python/bltinmodule.c:241 builtin___build_class__ Py_DECREF(margs) + objects.Decref(callTuple) + + // Release the initial NewDict ref. After the args-tuple decref above the + // only owner remaining after the metaclass call is this reference (the + // metaclass copied every namespace entry into the type's descriptor table + // with its own Incref). gopy dicts carry no synchronous tp_dealloc, so + // dropping the refcount alone leaves the method functions ns holds pinned + // by a count no live container backs: the class dies, the methods never + // reclaim, and weakref(A.method) never clears. Mirror dict_dealloc and + // clear the namespace contents once ns reaches refcount zero (the precise + // signal that nothing else, e.g. a __prepare__ mapping the caller kept, + // still holds it). // // CPython: Python/bltinmodule.c:246 builtin___build_class__ Py_DECREF(ns) // CPython: Objects/dictobject.c:2768 dict_dealloc (PyDict_Clear on last decref) diff --git a/vm/builtins_hook.go b/vm/builtins_hook.go index 07aeebc1e..aab4d13f4 100644 --- a/vm/builtins_hook.go +++ b/vm/builtins_hook.go @@ -10,6 +10,7 @@ import ( "errors" "fmt" "os" + "strings" "github.com/tamnd/gopy/builtins" pyerrors "github.com/tamnd/gopy/errors" @@ -609,12 +610,13 @@ func currentEvaluator(code *objects.Code, globals, locals, closure objects.Objec // currentImporter is the hook builtins.__import__ delegates to. It // reuses vmExecutor so the import can run frozen / built-in module -// init code, then forwards to imp.ImportModuleLevel. fromlist is -// accepted for signature parity; the existing IMPORT_NAME arm -// likewise drops it pending fromlist-driven submodule discovery. +// init code. fromlist is the raw object the caller passed, threaded +// untouched into _handle_fromlist so a non-str entry raises the same +// TypeError CPython raises and a custom iterable is iterated the same +// way. // // CPython: Python/import.c:1561 PyImport_ImportModuleLevelObject -func currentImporter(name, pkgname string, level int, _ []string) (objects.Object, error) { +func currentImporter(name, pkgname string, level int, fromlist objects.Object, globals objects.Object) (objects.Object, error) { ts := currentThread() if ts == nil { ts = state.NewThread() @@ -624,13 +626,72 @@ func currentImporter(name, pkgname string, level int, _ []string) (objects.Objec // // CPython: Python/import.c:1759 import_name reads interp->builtins_module. var b objects.Object - if f := frameStackFor(ts).Top(); f != nil { - b = callerBuiltins(f) + topFrame := frameStackFor(ts).Top() + if topFrame != nil { + b = callerBuiltins(topFrame) } + + // Prefer the live Python importlib, matching CPython where the builtin + // __import__ IS PyImport_ImportModuleLevelObject. That C body resolves + // the name, drives _gcd_import / _find_and_load, and performs the + // fromlist / dotted-head selection itself. importModuleLevelObject ports + // it; the manual Go driver below only runs during early bootstrap before + // _bootstrap._install has wired the frozen importer. + // + // CPython: Python/bltinmodule.c:259 builtin___import___impl + // CPython: Python/import.c:3798 PyImport_ImportModuleLevelObject + // + // The globals handed in must be the dict the caller passed to + // __import__, because resolve_name / _calc___package__ derives the + // relative-import anchor from it. A frame-globals fallback would anchor + // a bare __import__('', {'__package__': 'pkg'}, level=2) against the + // caller's own package; a missing globals must reach _calc___package__ + // as None so it raises the same KeyError("'__name__' not in globals"). + // + // CPython: Python/import.c:3576 resolve_name + // CPython: Lib/importlib/_bootstrap.py:1349 _calc___package__ + callerGlobals := globals + if callerGlobals == nil { + callerGlobals = objects.None() + } + if mod, ok, derr := importModuleLevelObject(name, callerGlobals, fromlist, level); ok { + return mod, derr + } + exec := &vmExecutor{ts: ts, builtins: b} mod, err := imp.ImportModuleLevel(exec, name, pkgname, level) if err != nil { + // A missing module must surface as a ModuleNotFoundError whose + // `name` member is the dotted name being imported. runpy reads + // exc.name to decide whether to keep searching, so a generic Go + // error synthesized without the attribute breaks that contract. + // + // CPython: Python/import.c:1759 import_name (ModuleNotFoundError, name=) + if errors.Is(err, imp.ErrModuleNotFound) { + exc := pyerrors.MakeModuleNotFound(name) + return nil, objects.NewRaisedError(exc, err.Error()) + } return nil, err } + // _handle_fromlist / head-of-dotted-name selection, exactly like the + // IMPORT_NAME opcode path in importName: a non-empty fromlist forces + // the named submodules and returns the deepest module, while an empty + // fromlist for a dotted import returns the top-level package. + // + // CPython: Python/bltinmodule.c:259 builtin___import___impl + // CPython: Lib/importlib/_bootstrap.py:1463 _handle_fromlist + e := &evalState{ts: ts, f: topFrame} + if !isEmptyFromlist(fromlist) { + if herr := e.handleFromlist(mod, fromlist, false); herr != nil { + return nil, herr + } + return mod, nil + } + if strings.Contains(name, ".") { + top := name[:strings.IndexByte(name, '.')] + if tm, ok := imp.GetModule(top); ok { + return tm, nil + } + } return mod, nil } diff --git a/vm/builtins_hook_test.go b/vm/builtins_hook_test.go index 1e60b0096..32d66c6cc 100644 --- a/vm/builtins_hook_test.go +++ b/vm/builtins_hook_test.go @@ -82,7 +82,7 @@ func TestCurrentImporterRoutesThroughInittab(t *testing.T) { } defer imp.RemoveModule(name) - got, err := currentImporter(name, "", 0, nil) + got, err := currentImporter(name, "", 0, nil, nil) if err != nil { t.Fatalf("currentImporter: %v", err) } diff --git a/vm/copyreg_hook.go b/vm/copyreg_hook.go index ccabb6d47..21f466764 100644 --- a/vm/copyreg_hook.go +++ b/vm/copyreg_hook.go @@ -20,6 +20,52 @@ func init() { objects.BuiltinLookup = builtinLookup objects.CurrentBuiltinsHook = currentBuiltins objects.ImportModuleHook = importModuleByName + objects.ModuleReprHook = moduleReprViaImportlib +} + +// moduleReprViaImportlib renders a module's repr by calling +// importlib._bootstrap._module_repr, the same delegation CPython's C +// module_repr performs through _PyImport_ImportlibModuleRepr. Any +// failure to reach importlib falls back to the minimal Go rendering so +// repr() never raises. +// +// CPython: Python/import.c:3346 _PyImport_ImportlibModuleRepr +func moduleReprViaImportlib(m objects.Object) (string, error) { + bootstrap, ok := imp.GetModule("importlib._bootstrap") + if !ok || bootstrap == nil { + mod, err := importModuleByName("importlib._bootstrap") + if err != nil || mod == nil { + return objects.ModuleReprFallback(m) + } + var modOk bool + if bootstrap, modOk = mod.(*objects.Module); !modOk { + return objects.ModuleReprFallback(m) + } + } + // importlib._bootstrap caches the _bootstrap_external module in a + // module global, normally wired by _install_external_importers during + // the frozen bootstrap. gopy resolves imports Go-side and never runs + // that hook, so _module_repr_from_spec's isinstance(loader, + // NamespaceLoader) check would always miss. Wire the global the way + // _install_external_importers does so the namespace-package repr (and + // the other consumers of the cached module) behave like CPython. + // + // CPython: Lib/importlib/_bootstrap.py:1565 _install_external_importers + if cur, _ := bootstrap.Dict().GetItem(objects.NewStr("_bootstrap_external")); cur == nil || cur == objects.None() { + ext, err := importModuleByName("importlib._bootstrap_external") + if err == nil && ext != nil { + _ = bootstrap.Dict().SetItem(objects.NewStr("_bootstrap_external"), ext) + } + } + fn, err := bootstrap.Dict().GetItem(objects.NewStr("_module_repr")) + if err != nil || fn == nil { + return objects.ModuleReprFallback(m) + } + res, err := objects.Call(fn, objects.NewTuple([]objects.Object{m}), nil) + if err != nil { + return objects.ModuleReprFallback(m) + } + return objects.Str(res) } // importModuleByName imports an absolute module name, returning the @@ -33,6 +79,21 @@ func importModuleByName(name string) (objects.Object, error) { if mod, ok := imp.GetModule(name); ok && mod != nil { return mod, nil } + // PyImport_ImportModule drives the live importlib _gcd_import, which + // recurses parent packages and resolves namespace packages (directories + // with no __init__.py). The Go ImportModule driver below resolves only + // the named module against sys.path and does not import the parents, so + // it misses a deeply dotted namespace submodule. Prefer _gcd_import once + // _frozen_importlib is installed; fall back to the Go driver during early + // bootstrap, before importlib is live. + // + // CPython: Python/import.c:1450 PyImport_ImportModule (_gcd_import) + if frozen, ok := imp.GetModule("_frozen_importlib"); ok && frozen != nil { + gcd, err := objects.GetAttr(frozen, objects.NewStr("_gcd_import")) + if err == nil && gcd != nil { + return objects.Call(gcd, objects.NewTuple([]objects.Object{objects.NewStr(name)}), nil) + } + } ts := currentThread() if ts == nil { ts = state.NewThread() diff --git a/vm/dispatch.go b/vm/dispatch.go index f96604ae3..12282a97a 100644 --- a/vm/dispatch.go +++ b/vm/dispatch.go @@ -29,8 +29,6 @@ import ( // - otherwise: the loop sets InstrPtr = next and continues. // // CPython: Python/ceval.c switch over op -// -//nolint:gocognit // mirrors CPython's ceval.c per-opcode dispatch; complexity is the surface, not algorithmic branching func (e *evalState) dispatch(op compile.Opcode, oparg uint32) (next int, err error) { // CPython: Python/ceval_macros.h:63 INSTRUCTION_STATS. Bumps the // per-opcode counter + pair counter before any specializer / fast @@ -38,34 +36,19 @@ func (e *evalState) dispatch(op compile.Opcode, oparg uint32) (next int, err err // INSTRUCTION_STATS(op) just before the TARGET label). e.recordOpcode(op) // Instrumentation routing: the common case (op is not an - // INSTRUMENTED_ variant) bails on a single [256]bool load. Only - // when op is one of the 21 INSTRUMENTED_ opcodes do we route - // through the LINE handler / PEP 669 callback fire / base-rewrite - // sequence. Pre-D1, monitor.IsInstrumented was called for every - // dispatch and burned ~6% of CPU on the tight bench just on the - // non-instrumented path. + // INSTRUMENTED_ variant) bails on a single [256]bool load inside + // applyInstrumentation. Only when op is one of the 21 INSTRUMENTED_ + // opcodes do we route through the LINE handler / PEP 669 callback fire + // / base-rewrite / EXTENDED_ARG-prefix sequence. // // CPython: Python/ceval.c TARGET(INSTRUMENTED_*) labels are // reached directly via the computed-goto table, so the // non-instrumented path costs zero. Mirrored here by the // instrumentedRewrite gate. - if instrumentedRewrite[op] { - if op == compile.INSTRUMENTED_LINE { - newOp, err := e.handleInstrumentedLine() - if err != nil { - return 0, err - } - op = newOp - if !instrumentedRewrite[op] { - goto afterInstrument - } - } - if err := e.fireInstrumented(op, oparg); err != nil { - return 0, err - } - op = instrumentedToBase[op] + op, oparg, err = e.applyInstrumentation(op, oparg) + if err != nil { + return 0, err } -afterInstrument: // Specializer routing: only Quickened code carries inline-cache // counters and specialized variants; non-Quickened code (raw // compile output before specialize.Quicken) skips the entire @@ -77,7 +60,21 @@ afterInstrument: // CPython: Python/ceval.c only enters the adaptive ladder under // the per-opcode TARGET() label, never on the generic // non-quickened body. - if e.f.Code.Quickened { + // + // An instrumented instruction never enters the adaptive ladder: its + // visible bytecode byte is an INSTRUMENTED_ marker, and the + // specializer / unspecializer write the rewritten opcode straight + // into code[InstrPtr], which would clobber the marker and orphan the + // original opcode parked in the per-instruction / line side table. + // CPython avoids this by dispatching instrumented code through the + // TARGET(INSTRUMENTED_*) labels, which carry no specialization + // counter logic; specialization only fires on the bare adaptive + // target. applyInstrumentation already resolved op to the runnable + // base opcode, so the adaptive pass has nothing left to do here. + // + // CPython: Python/instrumentation.c the instrumented opcodes are not + // specialized; specialization runs only on the de-instrumented form. + if e.f.Code.Quickened && !monitor.IsInstrumented(compile.Opcode(e.f.Code.Code[e.f.InstrPtr])) { if next, ok, err := e.trySpecialized(op, oparg); ok { return next, err } @@ -88,19 +85,19 @@ afterInstrument: // fresh op and give the fast-path arm a shot before // falling back to the generic body. op = compile.Opcode(e.f.Code.Code[e.f.InstrPtr]) - // The re-read may yield INSTRUMENTED_LINE when the slot - // was overwritten by the monitoring shadow walk. Resolve - // the original opcode without re-firing the line event; - // the fire already happened above. + // The re-read may yield an instrumentation marker + // (INSTRUMENTED_LINE when the slot was overwritten by the + // monitoring shadow walk, or INSTRUMENTED_INSTRUCTION when + // opcode tracing hides the real opcode in the per-instruction + // side table). Resolve the original opcode without re-firing + // the event; the fire already happened above. // // CPython: Python/ceval.c DISPATCH_GOTO avoids this by - // jumping directly to TARGET(INSTRUMENTED_LINE) from the - // adaptive rewrite path, which then re-enters the line - // handler. Here we short-circuit to the opcode lookup. - if op == compile.INSTRUMENTED_LINE { - instr := e.f.InstrPtr / 2 - data := monitor.CoMonitoring(e.f.Code) - op = monitor.GetOriginalOpcode(data, instr) + // jumping directly to TARGET(INSTRUMENTED_*) from the + // adaptive rewrite path, which then re-enters the marker + // handler. Here we short-circuit to the resolved opcode. + if monitor.IsInstrumented(op) { + op = monitor.GetBaseCodeUnit(e.f.Code, e.f.InstrPtr/2) if op == 0 { op = compile.NOP } @@ -147,6 +144,78 @@ afterInstrument: return 0, opcodeNotImplemented(op) } +// applyInstrumentation runs the INSTRUMENTED_ routing for op (LINE handler, +// PEP 669 callback fire, base rewrite) and then resolves any EXTENDED_ARG +// prefix the rewrite exposed, returning the real opcode and accumulated arg the +// generic dispatch body should run. The non-instrumented path is a single +// [256]bool load plus the EXTENDED_ARG fast-out. +// +// CPython: Python/ceval.c TARGET(INSTRUMENTED_*) +func (e *evalState) applyInstrumentation(op compile.Opcode, oparg uint32) (compile.Opcode, uint32, error) { + if instrumentedRewrite[op] { + if op == compile.INSTRUMENTED_LINE { + newOp, err := e.handleInstrumentedLine() + if err != nil { + return 0, 0, err + } + op = newOp + if !instrumentedRewrite[op] { + return e.resolveExtendedArgPrefix(op, oparg) + } + } + // INSTRUMENTED_INSTRUCTION fires the per-instruction (opcode) + // event, then re-dispatches the opcode it hides. That opcode may + // itself be an INSTRUMENTED_ variant (a monitored site + // that also carries opcode tracing), so fall through to the + // event-fire block below. + // + // CPython: Python/bytecodes.c INSTRUMENTED_INSTRUCTION + if op == compile.INSTRUMENTED_INSTRUCTION { + newOp, err := e.handleInstrumentedInstruction() + if err != nil { + return 0, 0, err + } + op = newOp + if !instrumentedRewrite[op] { + return e.resolveExtendedArgPrefix(op, oparg) + } + } + if err := e.fireInstrumented(op, oparg); err != nil { + return 0, 0, err + } + op = instrumentedToBase[op] + } + return e.resolveExtendedArgPrefix(op, oparg) +} + +// resolveExtendedArgPrefix consumes an EXTENDED_ARG prefix that surfaced from +// the instrumented-line handler (the line started on a prefixed instruction) +// and returns the trailing real opcode and accumulated arg; for any other +// opcode it is a pass-through. EXTENDED_ARG never reaches here from the +// straight-line fetch path, which consumes the prefix run before dispatch. The +// trailing opcode may itself be instrumented (the jump or call the prefix feeds +// is a monitored site), and the instrumentation block already ran for the +// EXTENDED_ARG slot, so its event is fired and rebased here. This mirrors +// CPython dispatching from TARGET(EXTENDED_ARG) straight into TARGET(INSTRUMENTED_*). +// +// CPython: Python/ceval.c TARGET(EXTENDED_ARG) +func (e *evalState) resolveExtendedArgPrefix(op compile.Opcode, oparg uint32) (compile.Opcode, uint32, error) { + if op != compile.EXTENDED_ARG { + return op, oparg, nil + } + realOp, realArg, ok := e.fetchExtended(e.f.InstrPtr, oparg) + if !ok { + return 0, 0, opcodeNotImplemented(compile.EXTENDED_ARG) + } + if instrumentedRewrite[realOp] { + if err := e.fireInstrumented(realOp, realArg); err != nil { + return 0, 0, err + } + realOp = instrumentedToBase[realOp] + } + return realOp, realArg, nil +} + // opcodeNotImplemented wraps ErrNotImplemented with the offending op. func opcodeNotImplemented(op compile.Opcode) error { return ¬Implemented{op: op} diff --git a/vm/eval.go b/vm/eval.go index 7d58729d5..b01c9b5c5 100644 --- a/vm/eval.go +++ b/vm/eval.go @@ -17,6 +17,7 @@ import ( "github.com/tamnd/gopy/compile" "github.com/tamnd/gopy/frame" "github.com/tamnd/gopy/gil" + "github.com/tamnd/gopy/monitor" "github.com/tamnd/gopy/objects" "github.com/tamnd/gopy/stackref" "github.com/tamnd/gopy/state" @@ -408,6 +409,28 @@ func (e *evalState) run() (objects.Object, error) { if e.handleException(err) { continue } + // No handler in this frame: the exception propagates to the + // caller and this activation record is about to be torn down. + // CPython's exception_unwind clears the whole frame (every + // remaining operand-stack temporary is Py_XDECREF'd) before + // _PyEvalFrameClearAndPop hands control up. gopy otherwise + // defers that release to FrameStack.Pop -> frame.Clear, which + // runs only after chunk.Pop has snapshotted the frame for any + // live tb_frame wrapper. A traceback attached during this same + // unwind wraps this very frame, so a stale exc-info temporary + // still sitting on the operand stack would be copied (and + // Incref'd) into the snapshot, forming a traceback -> snapshot + // -> operand-stack -> traceback cycle that pins the frame's + // locals (e.g. a `with _ModuleLockManager(name)` manager) long + // after the exception itself is gone. Releasing the operand + // stack here matches CPython and leaves the snapshot to capture + // only fast locals / cells / frees, exactly what tb_frame + // exposes. + // + // CPython: Python/ceval.c exception_unwind (_PyEvalFrameClearAndPop) + if e.f.Owner != frame.OwnedByGenerator { + e.f.DropStack(e.f.StackTop) + } return nil, err } e.f.InstrPtr = next @@ -474,6 +497,24 @@ func (e *evalState) advance() int { return ip + 2 } op := compile.Opcode(code[ip]) + // Under monitoring the live byte at ip may be INSTRUMENTED_LINE (a + // marker left in place while dispatch runs the hidden opcode), + // INSTRUMENTED_INSTRUCTION (opcode tracing hides the real opcode in + // the per-instruction side table), or an INSTRUMENTED_ variant. + // All three preserve the base opcode's inline cache layout, but the + // cache table is keyed by base opcode, so a stride computed off the + // raw instrumented byte would count zero cache codeunits and land one + // codeunit short. GetBaseCodeUnit walks the line table, then the + // per-instruction table, then the de-instrument / deopt maps, so it + // recovers the true base opcode whichever marker is on top. + // + // CPython: the JUMPBY stride is the base arm's compile-time + // INLINE_CACHE_ENTRIES_, independent of the instrumented byte. + if monitor.IsInstrumented(op) { + op = monitor.GetBaseCodeUnit(e.f.Code, ip/2) + } else { + op = monitor.DeInstrument(op) + } return ip + 2 + 2*compile.CacheCount(op) } diff --git a/vm/eval_call.go b/vm/eval_call.go index 4febc5a1f..682394593 100644 --- a/vm/eval_call.go +++ b/vm/eval_call.go @@ -239,6 +239,12 @@ func init() { // // CPython: pycore_frame.h _PyThreadState_GetFrame is the same shape. objects.CurrentFrameHook = currentInterpreterFrame + // Arm opcode-level tracing when Python assigns frame.f_trace_opcodes + // (bdb / pdb). objects/ cannot reach the monitoring instrumentation + // directly, so it routes through this hook. + // + // CPython: Python/legacy_tracing.c:159 _PyEval_SetOpcodeTrace + objects.SetOpcodeTraceHook = setOpcodeTraceHook // Expose the same hook to module/sys for sys._getframe(). // // CPython: Python/sysmodule.c:1180 sys__getframe_impl @@ -257,6 +263,36 @@ func init() { } ts.SetException(nil) } + // Save/restore the thread's raised exception around a __del__ call. + // slotTpFinalize uses these to mirror slot_tp_finalize's + // PyErr_GetRaisedException / PyErr_SetRaisedException bracket so a + // finalizer fired mid-unwind cannot disturb the in-flight exception. + // + // CPython: Objects/typeobject.c:9883 slot_tp_finalize + objects.SaveRaisedExceptionHook = func() objects.Object { + ts := currentThread() + if ts == nil { + return nil + } + if exc := pyerrors.Occurred(ts); exc != nil { + ts.SetException(nil) + return exc + } + return nil + } + objects.RestoreRaisedExceptionHook = func(o objects.Object) { + ts := currentThread() + if ts == nil { + return + } + if o == nil { + ts.SetException(nil) + return + } + if exc, ok := o.(*pyerrors.Exception); ok { + ts.SetException(exc) + } + } // seqIterNext's fast path returns a Go errIndexOutOfRange without ever // installing an IndexError on the thread state, so it must not blindly // clear: an exception being raised while its repr walks a contained diff --git a/vm/eval_dispatch_handwritten.go b/vm/eval_dispatch_handwritten.go index 092e0d5cd..224564cb6 100644 --- a/vm/eval_dispatch_handwritten.go +++ b/vm/eval_dispatch_handwritten.go @@ -217,13 +217,20 @@ func deriveGroupHere(src *pyerrors.Exception, subset []*pyerrors.Exception) *pye return pyerrors.New(src.ExcType, objects.NewTuple([]objects.Object{message, leaves})) } -// CPython: Python/bytecodes.c LOAD_CONST: (-- value) reads from frame->code->co_consts[oparg]. +// LOAD_CONST pushes a NEW (owned) reference to co_consts[oparg]: the +// const stays alive in the code object, so the stack slot must own its +// own strong reference. Otherwise an opcode that decrefs its inputs +// (CALL_KW decrefing the kwnames const, BUILD_* consuming a const, +// etc.) drives the shared const's refcount to zero and frees an object +// the code object still references. Mirrors PyStackRef_FromPyObjectNew. +// +// CPython: Python/bytecodes.c LOAD_CONST (value = PyStackRef_FromPyObjectNew(GETITEM(...))) func (e *evalState) opLOAD_CONST(oparg uint32) (next int, ok bool, err error) { co := e.f.Code if int(oparg) >= len(co.Consts) { return 0, true, fmt.Errorf("vm: LOAD_CONST index %d out of range", oparg) } - e.pushObject(e.constAt(int(oparg))) + e.push(stackref.FromObjectNew(e.constAt(int(oparg)))) return e.advance(), true, nil } diff --git a/vm/eval_helpers.go b/vm/eval_helpers.go index d5a3947b7..866844072 100644 --- a/vm/eval_helpers.go +++ b/vm/eval_helpers.go @@ -275,12 +275,24 @@ func (e *evalState) importName(name, fromlist, level objects.Object) objects.Obj mod, ierr := imp.ImportModuleLevel(exec, modname, pkgname, lvl) if ierr != nil { if errors.Is(ierr, imp.ErrModuleNotFound) { - pyerrors.SetString(e.ts, pyerrors.PyExc_ModuleNotFoundError, - fmt.Sprintf("No module named %q", modname)) + pyerrors.SetModuleNotFound(e.ts, modname) } e.pendingErr = ierr return nil } + // A non-empty fromlist drives _handle_fromlist: force-import any + // submodule named in the fromlist that is not already an attribute, + // so a later IMPORT_FROM/import_all_from finds it via plain getattr. + // CPython runs this inside __import__ before returning the module. + // + // CPython: Lib/importlib/_bootstrap.py:1463 _handle_fromlist + if !isEmptyFromlist(fromlist) { + if herr := e.handleFromlist(mod, fromlist, false); herr != nil { + e.pendingErr = herr + return nil + } + } + // When fromlist is empty (`import a.b.c`) return the top-level // package; otherwise return the deepest module so IMPORT_FROM can // extract attributes. diff --git a/vm/eval_import.go b/vm/eval_import.go index b3597f3a2..0613cfce2 100644 --- a/vm/eval_import.go +++ b/vm/eval_import.go @@ -13,6 +13,7 @@ import ( "fmt" "strings" + "github.com/tamnd/gopy/builtins" "github.com/tamnd/gopy/compile" pyerrors "github.com/tamnd/gopy/errors" "github.com/tamnd/gopy/frame" @@ -44,11 +45,11 @@ func callerBuiltins(f *frame.Frame) objects.Object { // the mapping lacks the key, and (nil, false, err) for a real failure. // // CPython: Python/ceval.c:2805 PyMapping_GetOptionalItemString(f_builtins, "__import__") -func optionalImportFunc(builtins objects.Object) (objects.Object, bool, error) { - if builtins == nil { +func optionalImportFunc(builtinsMap objects.Object) (objects.Object, bool, error) { + if builtinsMap == nil { return nil, false, nil } - return objects.MappingGetOptionalItem(builtins, objects.NewStr("__import__")) + return objects.MappingGetOptionalItem(builtinsMap, objects.NewStr("__import__")) } // isDefaultImport reports whether fn is the built-in __import__ the @@ -58,15 +59,7 @@ func optionalImportFunc(builtins objects.Object) (objects.Object, bool, error) { // // CPython: Python/ceval.c:2820 import_name (fast-path identity check) func isDefaultImport(fn objects.Object) bool { - bm, ok := imp.GetModule("builtins") - if !ok || bm == nil { - return false - } - def, err := bm.Dict().GetItem(objects.NewStr("__import__")) - if err != nil || def == nil { - return false - } - return fn == def + return builtins.DefaultImport != nil && fn == builtins.DefaultImport } // frameHasExplicitBuiltins reports whether the frame's globals carry an @@ -196,10 +189,43 @@ func (e *evalState) tryImport(op compile.Opcode, oparg uint32) (next int, ok boo } level := importLevel(levelObj) + // A relative import requires __package__ to be a string. CPython's + // _sanity_check raises TypeError before any resolution when level>0 + // and __package__ is set to a non-string (e.g. an object()). + // + // CPython: Lib/importlib/_bootstrap.py:1390 _sanity_check + if level > 0 { + if terr := checkPackageType(e.f.Globals); terr != nil { + return 0, true, terr + } + } pkgname := globalName(e.f.Globals) + // Route through the live Python importlib the way CPython's + // import_name calls the builtin __import__ (= + // _frozen_importlib.__import__). _bootstrap.__import__ runs + // _find_and_load / _handle_fromlist and returns the head of a dotted + // name for an empty fromlist, so the module pushed here is already + // the one CPython would push. Delegating keeps a single import path + // so a patched loader.exec_module fires and the traceback carries the + // frames. Only when the bootstrap is not yet + // installed (early startup) does the Go driver below run. + // + // CPython: Python/ceval.c:2898 import_name + if mod, ok, derr := importViaDelegate(modname, orNone(e.f.Globals), orNone(fromlistObj), level); ok { + if derr != nil { + // CPython: Python/import.c:3959 import_name trims the importlib + // machinery frames off the traceback before the calling frame is + // recorded on the way out. + removeImportlibFrames(e.ts) + return 0, true, derr + } + e.pushObject(mod) + return e.advance(), true, nil + } + exec := &vmExecutor{ts: e.ts, builtins: builtinsNS} - mod, ierr := imp.ImportModuleLevel(exec, modname, pkgname, level) + mod, ierr := imp.ImportModuleLevelObject(exec, modname, pkgname, level) if ierr != nil { // Promote Go-level ErrModuleNotFound into a typed // ModuleNotFoundError so `try: ... except ImportError:` @@ -208,13 +234,39 @@ func (e *evalState) tryImport(op compile.Opcode, oparg uint32) (next int, ok boo // the import-machinery contract. // // CPython: Python/import.c:1759 import_name (sets ImportError) - if errors.Is(ierr, imp.ErrModuleNotFound) { - pyerrors.SetString(e.ts, pyerrors.PyExc_ModuleNotFoundError, - fmt.Sprintf("No module named %q", modname)) + // + // A failure raised while executing the module body (the imported + // module itself ran a failing `import`, etc.) already left the + // real exception on the thread state with its own traceback, so + // re-synthesizing here would discard it and the inner frame it + // points at. Only synthesize for a genuine lookup miss. + // + // CPython: Python/import.c:1759 import_name only sets the error + // when PyImport_ImportModuleLevelObject returns NULL without one. + if errors.Is(ierr, imp.ErrBlockedNone) { + // sys.modules[name] is None: raise the halted ModuleNotFoundError + // with name set, so `except ImportError as exc: exc.name` works. + // A blocked sentinel is always an absolute name (level 0), so + // modname is already the resolved key in sys.modules. + pyerrors.SetModuleNotFoundHalted(e.ts, modname) + } else if errors.Is(ierr, imp.ErrModuleNotFound) && !errors.Is(ierr, imp.ErrModuleExecFailed) { + pyerrors.SetModuleNotFound(e.ts, modname) } return 0, true, ierr } + // A non-empty fromlist drives _handle_fromlist: force-import any + // submodule named in the fromlist that the package does not already + // expose, so the IMPORT_FROM / import_all_from that follows resolves + // it via a plain attribute read. + // + // CPython: Lib/importlib/_bootstrap.py:1409 _handle_fromlist + if !isEmptyFromlist(fromlistObj) { + if herr := e.handleFromlist(mod, fromlistObj, false); herr != nil { + return 0, true, herr + } + } + // CPython semantics: when fromlist is None/empty (plain `import // a.b.c`), push the TOP-LEVEL package so the name `a` is bound. // When fromlist is non-empty (`from a.b import c`), push the @@ -222,7 +274,7 @@ func (e *evalState) tryImport(op compile.Opcode, oparg uint32) (next int, ok boo // // CPython: Python/bytecodes.c IMPORT_NAME comment "return the // head of the dotted name" when fromlist is empty. - result := objects.Object(mod) + result := mod if isEmptyFromlist(fromlistObj) && strings.Contains(modname, ".") { top := strings.SplitN(modname, ".", 2)[0] if tm, ok := imp.GetModule(top); ok { @@ -279,18 +331,25 @@ func (e *evalState) importStar(from objects.Object) error { var all []objects.Object skipUnder := false - // Check for __all__. - allAttr, aerr := objects.GetAttr(from, objects.NewStr("__all__")) - if aerr == nil && allAttr != nil { + // Prefer __all__; fall back to __dict__ keys (skipping leading "_"). + // Neither read force-imports anything: _handle_fromlist already + // pulled in the fromlist's submodules during IMPORT_NAME. + allAttr, allFound, aerr := getOptionalAttr(e, from, "__all__") + if aerr != nil { + return aerr + } + if allFound { items, ierr := iterToSlice(allAttr) if ierr != nil { return ierr } all = items } else { - // Fall back to __dict__ keys, skipping names starting with "_". - dictAttr, derr := objects.GetAttr(from, objects.NewStr("__dict__")) - if derr != nil || dictAttr == nil { + dictAttr, dictFound, derr := getOptionalAttr(e, from, "__dict__") + if derr != nil { + return derr + } + if !dictFound { return fmt.Errorf("ImportError: from-import-* object has no __dict__ and no __all__") } items, ierr := iterToSlice(dictAttr) @@ -302,20 +361,21 @@ func (e *evalState) importStar(from objects.Object) error { } for _, nameObj := range all { - name, nerr := objects.Str(nameObj) - if nerr != nil { - return fmt.Errorf("TypeError: 'import *' name must be str") + name, ok := nameObj.(*objects.Unicode) + if !ok { + return importStarNonStrError(from, nameObj, skipUnder) } - if skipUnder && name != "" && name[0] == '_' { + s := name.Value() + if skipUnder && s != "" && s[0] == '_' { continue } - val, verr := objects.GetAttr(from, objects.NewStr(name)) + val, verr := objects.GetAttr(from, objects.NewStr(s)) if verr != nil { return verr } - serr := dst.SetItem(objects.NewStr(name), val) - // CPython: Python/ceval.c import_star_from — always releases the - // GetAttr new-ref after SetItem takes its own. + serr := dst.SetItem(objects.NewStr(s), val) + // CPython: Python/intrinsics.c import_all_from releases the GetAttr + // new-ref after SetItem takes its own. objects.Decref(val) if serr != nil { return serr @@ -324,6 +384,28 @@ func (e *evalState) importStar(from objects.Object) error { return nil } +// importStarNonStrError builds the TypeError import_all_from raises for a +// non-string entry in __all__ (or non-string key in __dict__). When the +// module's own __name__ is not a string, the error is about __name__ +// itself. +// +// CPython: Python/intrinsics.c:77 import_all_from (non-str name branch) +func importStarNonStrError(from, name objects.Object, skipUnder bool) error { + modNameObj, err := objects.GetAttr(from, objects.NewStr("__name__")) + if err != nil { + return err + } + mn, ok := modNameObj.(*objects.Unicode) + if !ok { + return fmt.Errorf("TypeError: module __name__ must be a string, not %s", modNameObj.Type().Name) + } + key, container := "Item", "__all__" + if skipUnder { + key, container = "Key", "__dict__" + } + return fmt.Errorf("TypeError: %s in %s.%s must be str, not %s", key, mn.Value(), container, name.Type().Name) +} + // isEmptyFromlist reports whether fromlist is None, the empty tuple, or // the empty list. This mirrors CPython's check in import_name: // "if fromlist is NULL or fromlist is empty tuple, head is returned". @@ -369,6 +451,27 @@ func importLevel(obj objects.Object) int { // module path while __package__ correctly points at the parent. // // CPython: Python/import.c:1665 import_name (read __package__ first) +// checkPackageType returns a TypeError when globals carries a __package__ +// that is set (not None) but is not a string. A relative import with such a +// package is rejected before resolution. +// +// CPython: Lib/importlib/_bootstrap.py:1390 _sanity_check ("__package__ not +// set to a string") +func checkPackageType(globals objects.Object) error { + d, ok := globals.(*objects.Dict) + if !ok { + return nil + } + v, _ := d.GetItem(objects.NewStr("__package__")) + if v == nil || objects.IsNone(v) { + return nil + } + if _, isStr := v.(*objects.Unicode); !isStr { + return fmt.Errorf("TypeError: __package__ not set to a string") + } + return nil +} + func globalName(globals objects.Object) string { if globals == nil { return "" @@ -377,20 +480,45 @@ func globalName(globals objects.Object) string { if !ok { return "" } + // __package__ takes precedence and is returned verbatim, even when it + // is the empty string: an empty package with a relative import is + // exactly the "no known parent package" case resolveAbsName rejects. + // Only a missing or None __package__ falls through to derivation. + // + // CPython: Lib/importlib/_bootstrap.py:1350 _calc___package__ if v, err := d.GetItem(objects.NewStr("__package__")); err == nil && v != nil && !objects.IsNone(v) { - if s, serr := objects.Str(v); serr == nil && s != "" { + if s, serr := objects.Str(v); serr == nil { return s } } + // __spec__.parent is the next anchor when no explicit __package__ is set. + // + // CPython: Lib/importlib/_bootstrap.py:1358 _calc___package__ (spec.parent) + if v, err := d.GetItem(objects.NewStr("__spec__")); err == nil && v != nil && !objects.IsNone(v) { + if parent, perr := objects.GetAttr(v, objects.NewStr("parent")); perr == nil && parent != nil && !objects.IsNone(parent) { + if s, serr := objects.Str(parent); serr == nil { + return s + } + } + } + // Fall back to __name__. A package (one carrying __path__) anchors at + // its own name; a plain module strips its final dotted component. For + // __main__ this yields "" so a relative import raises. + // + // CPython: Lib/importlib/_bootstrap.py:1362 _calc___package__ (rpartition) v, err := d.GetItem(objects.NewStr("__name__")) if err != nil || v == nil { return "" } - if tp := v.Type(); tp.Str != nil { - s, serr := tp.Str(v) - if serr == nil { - return s - } + s, serr := objects.Str(v) + if serr != nil { + return "" + } + if hp, herr := d.GetItem(objects.NewStr("__path__")); herr == nil && hp != nil { + return s + } + if dot := strings.LastIndex(s, "."); dot >= 0 { + return s[:dot] } return "" } @@ -438,11 +566,18 @@ func isAttributeErrorMsg(err error) bool { return strings.HasPrefix(msg, "AttributeError:") } -// evalImportFrom ports _PyEval_ImportFrom. It tries to fetch `name` as -// an attribute of `v`; on miss it consults sys.modules under -// "." using the parent's __name__. As a gopy-specific -// extension (we lack importlib's _handle_fromlist plumbing), it -// force-imports the submodule when sys.modules has not cached it yet. +// errImportFromRaised is a sentinel returned by evalImportFrom after it +// has already installed a typed ImportError on the thread state. The VM +// unwind reads the thread-state exception, so the Go error only needs to +// be non-nil to signal failure. +var errImportFromRaised = errors.New("vm: import-from error raised") + +// evalImportFrom ports _PyEval_ImportFrom. It fetches `name` as an +// attribute of `v`; on miss it falls back to reading "." +// straight out of sys.modules (the circular-import path), and when that +// also misses it raises the "cannot import name X from Y (location)" +// ImportError, reproducing the stdlib-shadowing and circular-import +// message variants. // // CPython: Python/ceval.c:3154 _PyEval_ImportFrom func evalImportFrom(e *evalState, v objects.Object, name string) (objects.Object, error) { @@ -452,33 +587,208 @@ func evalImportFrom(e *evalState, v objects.Object, name string) (objects.Object return x, nil } - // Issue #17636 fallback: read parent.__name__ and look up - // "." in sys.modules. + // Issue #17636: in case this failed because of a circular relative + // import, fall back on reading the module directly from sys.modules. modNameObj, found, err := getOptionalAttr(e, v, "__name__") if err != nil { return nil, err } - if !found { - return nil, fmt.Errorf("vm: ImportError: cannot import name %q from ", name) + // CPython requires PyUnicode_Check (str or subclass); a non-str + // __name__ is treated as missing. + var modNameStr objects.Object + if found && objects.IsSubtype(modNameObj.Type(), objects.StrType()) { + modNameStr = modNameObj + } + if modNameStr != nil { + if s, ok := modNameStr.(*objects.Unicode); ok { + full := s.Value() + "." + name + if cached, ok := imp.GetModule(full); ok { + return cached, nil + } + } + } + + return nil, e.importFromError(v, name, modNameStr) +} + +// importFromError builds and raises the ImportError for a failed +// `from v import name`, porting the error block of _PyEval_ImportFrom. +// +// CPython: Python/ceval.c:3185 _PyEval_ImportFrom (error label) +func (e *evalState) importFromError(v objects.Object, name string, modNameObj objects.Object) error { + nameRepr, _ := objects.Repr(objects.NewStr(name)) + // mod_name_or_unknown is the real __name__ object when present, else a + // fresh "" str. It is the object handed to + // PySet_Contains so an unhashable __name__ raises through. + haveModName := modNameObj != nil + modNameOrUnknownObj := modNameObj + if !haveModName { + modNameOrUnknownObj = objects.NewStr("") + } + modRepr, _ := objects.Repr(modNameOrUnknownObj) + + // modName is the value forwarded as the ImportError `name` member: the + // real module name when __name__ was a string, else unset. + modName := "" + if haveModName { + if s, ok := modNameObj.(*objects.Unicode); ok { + modName = s.Value() + } } - parentName, serr := objects.Str(modNameObj) + + spec, specFound, serr := getOptionalAttr(e, v, "__spec__") if serr != nil { - return nil, fmt.Errorf("vm: ImportError: cannot import name %q from ", name) + return serr + } + if !specFound { + msg := fmt.Sprintf("cannot import name %s from %s (unknown location)", nameRepr, modRepr) + pyerrors.SetImportErrorWithNameFrom(e.ts, msg, modName, "", name) + return errImportFromRaised + } + + origin, originFound, oerr := imp.SpecFileOrigin(spec) + if oerr != nil { + return oerr + } + + shadowing, sherr := imp.ModuleIsPossiblyShadowing(originFound, origin) + if sherr != nil { + return sherr + } + shadowingStdlib := false + if shadowing { + c, cerr := imp.StdlibModuleNamesContains(modNameOrUnknownObj) + if cerr != nil { + return cerr + } + shadowingStdlib = c + } + + // Fall back to __file__ for diagnostics when the spec carries no + // location origin and v is a module. + if !originFound { + if mod, ok := v.(*objects.Module); ok { + if f, ferr := mod.Dict().GetItem(objects.NewStr("__file__")); ferr == nil && f != nil { + if fs, ok := f.(*objects.Unicode); ok { + origin = fs.Value() + originFound = true + } + } + } + } + + var msg string + switch { + case shadowingStdlib: + originRepr, _ := objects.Repr(objects.NewStr(origin)) + msg = fmt.Sprintf("cannot import name %s from %s (consider renaming %s since it has the same name as the standard library module named %s and prevents importing that standard library module)", + nameRepr, modRepr, originRepr, modRepr) + default: + initializing, ierr := imp.SpecIsInitializing(spec) + if ierr != nil { + return ierr + } + switch { + case initializing && shadowing: + originRepr, _ := objects.Repr(objects.NewStr(origin)) + msg = fmt.Sprintf("cannot import name %s from %s (consider renaming %s if it has the same name as a library you intended to import)", + nameRepr, modRepr, originRepr) + case initializing && originFound: + msg = fmt.Sprintf("cannot import name %s from partially initialized module %s (most likely due to a circular import) (%s)", + nameRepr, modRepr, origin) + case initializing: + msg = fmt.Sprintf("cannot import name %s from partially initialized module %s (most likely due to a circular import)", + nameRepr, modRepr) + case originFound: + msg = fmt.Sprintf("cannot import name %s from %s (%s)", nameRepr, modRepr, origin) + default: + msg = fmt.Sprintf("cannot import name %s from %s (unknown location)", nameRepr, modRepr) + } + } + + originArg := "" + if originFound { + originArg = origin } - full := parentName + "." + name + pyerrors.SetImportErrorWithNameFrom(e.ts, msg, modName, originArg, name) + return errImportFromRaised +} - if cached, ok := imp.GetModule(full); ok { - return cached, nil +// handleFromlist ports _handle_fromlist: for a package module (one that +// carries __path__), force-import each fromlist entry that is not already +// an attribute so a later attribute read resolves the submodule. A `*` +// entry recurses over module.__all__; a non-str entry raises TypeError. +// +// CPython: Lib/importlib/_bootstrap.py:1409 _handle_fromlist +func (e *evalState) handleFromlist(mod objects.Object, fromlist objects.Object, recursive bool) error { + // _handle_fromlist runs only for packages (hasattr(module, '__path__')). + // __import__ guards the call with the same check, and a non-module + // cached entry never carries __path__, so it no-ops here. + // + // CPython: Lib/importlib/_bootstrap.py:1503 elif hasattr(module, '__path__') + if !recursive { + if _, present, herr := getOptionalAttr(e, mod, "__path__"); herr != nil { + return herr + } else if !present { + return nil + } } - // gopy extension: no _handle_fromlist runs during IMPORT_NAME, so - // the submodule may never have entered sys.modules. Force-import - // it here. CPython's _handle_fromlist (Lib/importlib/_bootstrap.py) - // performs the same _call_with_frames_removed(import_, ...) per - // fromlist entry. - exec := &vmExecutor{ts: e.ts, builtins: callerBuiltins(e.f)} - sub, ierr := imp.ImportModuleLevel(exec, full, "", 0) - if ierr != nil { - return nil, fmt.Errorf("vm: ImportError: cannot import name %q from %q: %w", name, parentName, ierr) + + items, err := iterToSlice(fromlist) + if err != nil { + return err } - return sub, nil + modName := "" + if nm, present, _ := getOptionalAttr(e, mod, "__name__"); present { + if s, ok := nm.(*objects.Unicode); ok { + modName = s.Value() + } + } + + for _, item := range items { + x, ok := item.(*objects.Unicode) + if !ok { + where := "``from list''" + if recursive { + where = modName + ".__all__" + } + return fmt.Errorf("TypeError: Item in %s must be str, not %s", where, item.Type().Name) + } + entry := x.Value() + switch entry { + case "*": + if !recursive { + if allObj, present, _ := getOptionalAttr(e, mod, "__all__"); present && allObj != nil { + if rerr := e.handleFromlist(mod, allObj, true); rerr != nil { + return rerr + } + } + } + default: + _, present, gerr := getOptionalAttr(e, mod, entry) + if gerr != nil { + return gerr + } + if present { + continue + } + fromName := modName + "." + entry + exec := &vmExecutor{ts: e.ts, builtins: callerBuiltins(e.f)} + if _, ierr := imp.ImportModuleLevel(exec, fromName, "", 0); ierr != nil { + // Backwards-compatibility: ignore a fromlist-triggered import + // of a submodule that simply does not exist, but only when the + // miss is for exactly this submodule. + // + // CPython: Lib/importlib/_bootstrap.py:1433 except ModuleNotFoundError + if errors.Is(ierr, imp.ErrModuleNotFound) { + if _, cached := imp.GetModule(fromName); !cached { + pyerrors.Clear(e.ts) + continue + } + } + return ierr + } + } + } + return nil } diff --git a/vm/eval_import_test.go b/vm/eval_import_test.go index bdfec5b97..abe40cd37 100644 --- a/vm/eval_import_test.go +++ b/vm/eval_import_test.go @@ -217,7 +217,11 @@ func TestImportLevelHelper(t *testing.T) { } } -// TestGlobalNameHelper pins globalName for nil, non-dict, and a dict with __name__. +// TestGlobalNameHelper pins globalName for nil, non-dict, and the +// __name__ anchoring rules. globalName computes the package anchor the +// way _calc___package__ does: a plain module strips its final dotted +// component, while a package (one carrying __path__) anchors at its own +// name. func TestGlobalNameHelper(t *testing.T) { if got := globalName(nil); got != "" { t.Errorf("globalName(nil) = %q, want \"\"", got) @@ -225,9 +229,23 @@ func TestGlobalNameHelper(t *testing.T) { if got := globalName(objects.NewStr("x")); got != "" { t.Errorf("globalName(str) = %q, want \"\"", got) } - d := objects.NewDict() - _ = d.SetItem(objects.NewStr("__name__"), objects.NewStr("mypkg")) - if got := globalName(d); got != "mypkg" { - t.Errorf("globalName(dict) = %q, want \"mypkg\"", got) + // A top-level module rpartitions to the empty package. + mod := objects.NewDict() + _ = mod.SetItem(objects.NewStr("__name__"), objects.NewStr("mypkg")) + if got := globalName(mod); got != "" { + t.Errorf("globalName(module) = %q, want \"\"", got) + } + // A submodule strips its final component. + sub := objects.NewDict() + _ = sub.SetItem(objects.NewStr("__name__"), objects.NewStr("mypkg.sub")) + if got := globalName(sub); got != "mypkg" { + t.Errorf("globalName(submodule) = %q, want \"mypkg\"", got) + } + // A package (carrying __path__) anchors at its own name. + pkg := objects.NewDict() + _ = pkg.SetItem(objects.NewStr("__name__"), objects.NewStr("mypkg")) + _ = pkg.SetItem(objects.NewStr("__path__"), objects.NewList(nil)) + if got := globalName(pkg); got != "mypkg" { + t.Errorf("globalName(package) = %q, want \"mypkg\"", got) } } diff --git a/vm/eval_resume.go b/vm/eval_resume.go index 02ce1a02d..8a38e12d6 100644 --- a/vm/eval_resume.go +++ b/vm/eval_resume.go @@ -30,9 +30,24 @@ func (e *evalState) handleResume(op compile.Opcode, oparg uint32) (next int, err // // CPython: Python/bytecodes.c:196 _Py_Instrument(_PyFrame_GetCode(frame), tstate->interp) if interp := e.ts.Interp(); interp != nil && interp.Monitors != nil { + before := e.f.Code.Code[e.f.InstrPtr] if merr := monitor.Instrument(e.f.Code, interp.Monitors); merr != nil { return 0, merr } + // CPython's RESUME re-reads this_instr after _Py_Instrument and + // dispatches straight into INSTRUMENTED_RESUME, so the PY_START / + // PY_RESUME event fires before the body runs. When the walk just + // rewrote this slot (the frame began after sys.settrace and so + // missed the chance to instrument at entry), re-dispatch the same + // offset instead of advancing; the second pass fetches the now + // INSTRUMENTED_RESUME, fires the event, and the no-op re-instrument + // leaves the slot unchanged so this does not loop. + // + // CPython: Python/bytecodes.c:196 RESUME + if e.f.Code.Code[e.f.InstrPtr] != before && + compile.Opcode(e.f.Code.Code[e.f.InstrPtr]) == compile.INSTRUMENTED_RESUME { + return e.f.InstrPtr, nil + } } if oparg < 2 { if e.gilTimer != nil { diff --git a/vm/eval_unwind.go b/vm/eval_unwind.go index ba40d9c85..cb424ef04 100644 --- a/vm/eval_unwind.go +++ b/vm/eval_unwind.go @@ -309,7 +309,7 @@ func buildOSErrorFromGo(err error) *pyerrors.Exception { if errno == 0 { return nil } - return pyerrors.NewOSError(int(errno), strerrorString(errno), filename, filename2) + return pyerrors.NewOSError(winerrorToErrno(int(errno)), strerrorString(errno), filename, filename2) } // strerrorString renders the errno's message the way CPython's @@ -346,26 +346,26 @@ func promoteOSErrorByErrno(typ *objects.Type, err error) *objects.Type { if errors.As(err, &pathErr) { var errno syscall.Errno if errors.As(pathErr.Err, &errno) { - return pyerrors.ErrnoSubclass(int(errno)) + return pyerrors.ErrnoSubclass(winerrorToErrno(int(errno))) } } var linkErr *os.LinkError if errors.As(err, &linkErr) { var errno syscall.Errno if errors.As(linkErr.Err, &errno) { - return pyerrors.ErrnoSubclass(int(errno)) + return pyerrors.ErrnoSubclass(winerrorToErrno(int(errno))) } } var sysErr *os.SyscallError if errors.As(err, &sysErr) { var errno syscall.Errno if errors.As(sysErr.Err, &errno) { - return pyerrors.ErrnoSubclass(int(errno)) + return pyerrors.ErrnoSubclass(winerrorToErrno(int(errno))) } } var errno syscall.Errno if errors.As(err, &errno) { - return pyerrors.ErrnoSubclass(int(errno)) + return pyerrors.ErrnoSubclass(winerrorToErrno(int(errno))) } return typ } diff --git a/vm/import_delegate.go b/vm/import_delegate.go new file mode 100644 index 000000000..45acd5441 --- /dev/null +++ b/vm/import_delegate.go @@ -0,0 +1,127 @@ +package vm + +import ( + pyerrors "github.com/tamnd/gopy/errors" + "github.com/tamnd/gopy/imp" + "github.com/tamnd/gopy/objects" + "github.com/tamnd/gopy/state" + "github.com/tamnd/gopy/traceback" +) + +// delegateImport routes an import through the live Python importlib +// machinery, the same way CPython's import_name looks up __import__ from +// the frame builtins and calls import_func(name, globals, locals, +// fromlist, level). The builtin __import__ resolves to +// _frozen_importlib.__import__ (interp->import_func, wired at bootstrap), +// which runs _find_and_load / _handle_fromlist and registers the result +// in the shared sys.modules. Delegating here keeps a single import path +// so a monkeypatched loader.exec_module fires and the traceback carries +// the frozen importlib frames. +// +// The returned ok is false when _frozen_importlib is not yet installed +// (early bootstrap, before _bootstrap._install has run), so the caller +// falls back to the Go import driver to load the bootstrap itself. +// +// CPython: Python/ceval.c:2898 import_name +// CPython: Lib/importlib/_bootstrap.py:1390 __import__ +func delegateImport(name string, globals, locals, fromlist objects.Object, level int) (objects.Object, bool, error) { + frozen, ok := imp.GetModule("_frozen_importlib") + if !ok { + return nil, false, nil + } + importFunc, err := objects.GetAttr(frozen, objects.NewStr("__import__")) + if err != nil { + return nil, false, nil //nolint:nilerr // missing __import__ means fall back to the Go driver. + } + if globals == nil { + globals = objects.None() + } + if locals == nil { + locals = objects.None() + } + if fromlist == nil { + fromlist = objects.None() + } + args := objects.NewTuple([]objects.Object{ + objects.NewStr(name), + globals, + locals, + fromlist, + objects.NewInt(int64(level)), + }) + mod, callErr := objects.Call(importFunc, args, nil) + if callErr != nil { + return nil, true, callErr + } + return mod, true, nil +} + +// importVerbose reports whether the interpreter runs with -v, which +// suppresses the importlib frame trimming so the full machinery shows. +// +// CPython: Python/import.c:3522 _PyInterpreterState_GetConfig(...)->verbose +func importVerbose() bool { + sysMod, ok := imp.GetModule("sys") + if !ok { + return false + } + flags, err := objects.GetAttr(sysMod, objects.NewStr("flags")) + if err != nil { + return false + } + v, err := objects.GetAttr(flags, objects.NewStr("verbose")) + if err != nil { + return false + } + if i, ok := v.(*objects.Int); ok { + n, _ := i.Int64() + return n != 0 + } + return false +} + +// removeImportlibFrames strips importlib frames from the traceback of the +// exception currently on the thread. If it is an ImportError, every +// importlib chunk is trimmed; otherwise only chunks that end with a call +// to _call_with_frames_removed are trimmed. Matches CPython's behavior of +// hiding the import machinery from user tracebacks. +// +// CPython: Python/import.c:3500 remove_importlib_frames +func removeImportlibFrames(ts *state.Thread) { + exc := pyerrors.Occurred(ts) + if exc == nil || importVerbose() { + return + } + const ( + removeFrames = "_call_with_frames_removed" + bootstrapFile = "" + externalFile = "" + ) + alwaysTrim := exc.ExcType != nil && objects.IsSubtype(exc.ExcType, pyerrors.PyExc_ImportError) + + // A dummy head node lets *outerLink overwrite the chain head uniformly, + // the way CPython threads prev_link/outer_link through PyObject** slots. + var dummy traceback.Traceback + dummy.Next = exc.TB + prevLink := &dummy.Next + var outerLink **traceback.Traceback + inImportlib := false + for tb := exc.TB; tb != nil; { + next := tb.Next + fn := tb.Entry.File + nowInImportlib := fn == bootstrapFile || fn == externalFile + if nowInImportlib && !inImportlib { + outerLink = prevLink + } + inImportlib = nowInImportlib + + if nowInImportlib && (alwaysTrim || tb.Entry.Name == removeFrames) { + *outerLink = next + prevLink = outerLink + } else { + prevLink = &tb.Next + } + tb = next + } + exc.TB = dummy.Next +} diff --git a/vm/import_level.go b/vm/import_level.go new file mode 100644 index 000000000..9431e5a85 --- /dev/null +++ b/vm/import_level.go @@ -0,0 +1,470 @@ +package vm + +import ( + "fmt" + "strings" + + pyerrors "github.com/tamnd/gopy/errors" + "github.com/tamnd/gopy/imp" + "github.com/tamnd/gopy/objects" +) + +// importModuleLevelObject ports PyImport_ImportModuleLevelObject, the C +// body behind the builtin __import__. CPython describes it as +// "importlib.__import__() & _gcd_import(), ported to C for added +// performance": it resolves the absolute name, drives the live importlib +// _gcd_import / _find_and_load to load it, then performs the fromlist / +// dotted-head selection in C rather than in _bootstrap.__import__. +// +// Routing the builtin through this port (instead of calling +// _frozen_importlib.__import__ wholesale) matters for the dotted-head +// selection: the C code slices the standalone abs_name and re-reads +// sys.modules, raising KeyError("%R not in sys.modules as expected") when +// the entry is missing. The Python mirror instead reads module.__name__, +// which raises AttributeError when a caller has stuffed a non-module +// object into sys.modules (the test_malicious_relative_import regression +// guard, gh-134100). +// +// The returned ok is false when _frozen_importlib is not installed yet +// (early bootstrap), so currentImporter falls back to the Go driver. +// +// CPython: Python/import.c:3798 PyImport_ImportModuleLevelObject +func importModuleLevelObject(name string, globals objects.Object, fromlist objects.Object, level int) (objects.Object, bool, error) { + frozen, ok := imp.GetModule("_frozen_importlib") + if !ok { + return nil, false, nil + } + + // CPython: Python/import.c:3829 resolve_name / abs_name selection. + absName, packageStr, pkgObj, rerr := resolveImportContext(frozen, name, globals, level) + if rerr != nil { + return nil, true, rerr + } + + // CPython: Python/import.c:3842 import_get_module + import_ensure_initialized. + // When abs_name is already in sys.modules AND still initializing (another + // thread is mid-import on it), the C body waits for it via + // _bootstrap._lock_unlock_module instead of re-entering _find_and_load. + // That matters for concurrent circular imports: _lock_unlock_module + // CATCHES the _DeadlockError the per-module lock raises, whereas the + // _ModuleLockManager context inside _find_and_load lets it propagate and + // kill the importing thread. Skipping this fast path is the difference + // between test_threaded_import.test_circular_imports resolving (both + // threads finish) and one thread dying on an uncaught _DeadlockError. + // + // Only the still-initializing case takes this fast path. Every other + // case (cold miss or fully loaded) falls through to _gcd_import below, + // exactly as the C body calls import_find_and_load. + module, accepted, ferr := acceptInitializingModule(frozen, absName) + if ferr != nil { + return nil, true, ferr + } + if !accepted { + // CPython: Python/import.c:3718 import_find_and_load -> _gcd_import. + // Drive the live importlib _gcd_import to load (or return the cached) + // module, then perform the fromlist / dotted-head selection in C + // below rather than in _bootstrap.__import__. Routing through the + // Python __import__ here would re-run the dotted-head slice via + // module.__name__, which raises AttributeError instead of the + // expected KeyError when a caller stuffed a non-module into + // sys.modules (test_malicious_relative_import, gh-134100). + gcd, err := objects.GetAttr(frozen, objects.NewStr("_gcd_import")) + if err != nil { + return nil, true, err + } + var gcdArgs *objects.Tuple + if level > 0 { + gcdArgs = objects.NewTuple([]objects.Object{ + objects.NewStr(name), pkgObj, objects.NewInt(int64(level)), + }) + } else { + gcdArgs = objects.NewTuple([]objects.Object{objects.NewStr(name)}) + } + module, err = objects.Call(gcd, gcdArgs, nil) + if err != nil { + return nil, true, err + } + } + + // CPython: Python/import.c:3881 has_from = PyObject_IsTrue(fromlist). + hasFrom := false + if fromlist != nil && !objects.IsNone(fromlist) { + t, err := objects.IsTruthy(fromlist) + if err != nil { + return nil, true, err + } + hasFrom = t + } + if !hasFrom { + return headSelection(name, packageStr, level, module) + } + return fromlistSelection(frozen, module, fromlist) +} + +// resolveImportContext ports the abs_name / package resolution prologue of +// PyImport_ImportModuleLevelObject. For level>0 it runs +// _bootstrap._calc___package__ against the caller globals (carrying its +// DeprecationWarning / ImportWarning / KeyError / TypeError), derives the +// package string, and recomputes abs_name via _resolve_name; for level==0 it +// rejects an empty name with ValueError and uses name verbatim. pkgObj is the +// package object _gcd_import expects for a relative import (nil for level==0). +// +// CPython: Python/import.c:3829 PyImport_ImportModuleLevelObject (resolve) +// CPython: Lib/importlib/_bootstrap.py:1487 __import__ +func resolveImportContext(frozen objects.Object, name string, globals objects.Object, level int) (absName, packageStr string, pkgObj objects.Object, err error) { + if level > 0 { + // globals_ = globals if globals is not None else {}; _calc___package__ + // runs the __package__/__spec__/__name__ fallback against that dict. + g := globals + if g == nil || objects.IsNone(g) { + g = objects.NewDict() + } + calc, gerr := objects.GetAttr(frozen, objects.NewStr("_calc___package__")) + if gerr != nil { + return "", "", nil, gerr + } + pkgObj, err = objects.Call(calc, objects.NewTuple([]objects.Object{g}), nil) + if err != nil { + return "", "", nil, err + } + if u, isStr := pkgObj.(*objects.Unicode); isStr { + packageStr = u.Value() + } + absName = resolveImportName(name, packageStr, level) + return absName, packageStr, pkgObj, nil + } + // CPython: Python/import.c:3835 level == 0 requires a non-empty name. + if name == "" { + return "", "", nil, fmt.Errorf("ValueError: Empty module name") + } + return name, "", nil, nil +} + +// importViaDelegate is the IMPORT_NAME opcode's import path. It runs the same +// import_ensure_initialized still-initializing fast path as the builtin +// __import__ (so concurrent circular imports resolve instead of dying on an +// uncaught _DeadlockError), but otherwise delegates the load to +// _frozen_importlib.__import__ rather than driving _gcd_import + the C +// dotted-head selection directly. +// +// The distinction is a refcount one, not a semantic one. IMPORT_NAME applies +// DECREF_INPUTS to the module it pushes, and the long-standing delegateImport +// route returns an owned reference whose count that decref was proven against +// (#223). Routing IMPORT_NAME through importModuleLevelObject's _gcd_import + +// headSelection path produces a module whose net refcount differs, so the +// DECREF_INPUTS drops it to a GC-clearable state and a later collection +// tp_clears its globals out from under live code. The builtin __import__ has +// no DECREF_INPUTS, so it keeps the C-faithful importModuleLevelObject body +// (whose headSelection raises the gh-134100 KeyError); IMPORT_NAME keeps the +// delegate body it was proven against, with only the circular-import fast path +// prepended. +// +// CPython: Python/import.c:3842 import_ensure_initialized (cache fast path) +func importViaDelegate(name string, globals objects.Object, fromlist objects.Object, level int) (objects.Object, bool, error) { + frozen, ok := imp.GetModule("_frozen_importlib") + if !ok { + return nil, false, nil + } + absName, packageStr, _, rerr := resolveImportContext(frozen, name, globals, level) + if rerr != nil { + return nil, true, rerr + } + + // If abs_name is already in sys.modules and still initializing (another + // thread is mid-import on it), wait for it via _bootstrap._lock_unlock_module. + // _lock_unlock_module CATCHES the _DeadlockError a concurrent circular + // import raises, whereas re-entering _find_and_load's _ModuleLockManager + // would let it propagate and kill the thread. + // + // CPython: Python/import.c:3842 import_ensure_initialized (cache fast path) + raw, stillInit, werr := waitForInitializingModule(frozen, absName) + if werr != nil { + return nil, true, werr + } + + // When the module is STILL initializing after the wait, another thread + // holds the per-module lock and is itself blocked: re-entering + // _find_and_load via the delegate would re-acquire that lock and deadlock. + // Return the (partially initialized) cached module directly, exactly as + // CPython's import_ensure_initialized fast path does, then run the fromlist + // / dotted-head selection against it. This is the only path that must not + // delegate; it is reached only under genuine concurrent circular imports. + // + // CPython: Python/import.c:3851 PyImport_ImportModuleLevelObject (fast path) + if raw != nil && stillInit { + // import_get_module returns a NEW reference (PyMapping_GetOptionalItem); + // imp.GetModuleRaw borrows from sys.modules, so incref before the + // selection consumes it. + // + // CPython: Python/import.c:238 import_get_module + objects.Incref(raw) + hasFrom := false + if fromlist != nil && !objects.IsNone(fromlist) { + t, terr := objects.IsTruthy(fromlist) + if terr != nil { + return nil, true, terr + } + hasFrom = t + } + if !hasFrom { + return headSelection(name, packageStr, level, raw) + } + return fromlistSelection(frozen, raw, fromlist) + } + + // Otherwise (not cached, or the wait completed and the module is fully + // initialized) delegate. _frozen_importlib.__import__ runs _find_and_load + // and the fromlist / dotted-head selection itself, so its return is already + // the module CPython's import_name would push. This is the long-standing + // refcount-proven route (#223); IMPORT_NAME's DECREF_INPUTS was proven + // against the reference it owns. + mod, ok2, err := delegateImport(name, globals, objects.None(), fromlist, level) + if err != nil { + return nil, true, err + } + if !ok2 { + return nil, false, nil + } + return mod, true, nil +} + +// acceptInitializingModule ports the still-initializing arm of +// import_ensure_initialized. It returns (module, true, nil) only when +// sys.modules already holds a module for absName whose __spec__._initializing +// is True: another thread is mid-import on it. In that case it waits via +// _bootstrap._lock_unlock_module, which CATCHES the _DeadlockError a +// concurrent circular import raises, and the caller accepts the (possibly +// partially initialized) cached module instead of re-entering _find_and_load +// (whose _ModuleLockManager would let that _DeadlockError propagate and kill +// the thread). +// +// Every other case (absent, None, or a fully initialized cache hit) returns +// (nil, false, nil): the caller delegates to the live __import__, whose own +// _find_and_load optimization returns the module without locking. Restricting +// the special handling to the initializing case keeps the common import path +// byte-for-byte identical to the long-standing delegateImport route. +// +// CPython: Python/import.c:244 import_ensure_initialized +// CPython: Python/import.c:3842 PyImport_ImportModuleLevelObject (cache check) +func acceptInitializingModule(frozen objects.Object, absName string) (objects.Object, bool, error) { + raw, _, werr := waitForInitializingModule(frozen, absName) + if werr != nil { + return nil, false, werr + } + if raw == nil { + return nil, false, nil + } + // import_get_module returns a NEW reference (PyMapping_GetOptionalItem); + // imp.GetModuleRaw borrows from sys.modules (PyDict_GetItem semantics), so + // incref before returning or the C-faithful headSelection / fromlistSelection + // that follows in the builtin __import__ path under-counts the module and a + // later GC tp_clears its globals out from under live code. + // + // CPython: Python/import.c:238 import_get_module (Py_INCREF via GetOptionalItem) + objects.Incref(raw) + return raw, true, nil +} + +// waitForInitializingModule ports the still-initializing arm of +// import_ensure_initialized without taking ownership of the result. When +// sys.modules already holds a module for absName whose __spec__._initializing +// is True (another thread is mid-import on it), it waits via +// _bootstrap._lock_unlock_module (which CATCHES the _DeadlockError a concurrent +// circular import raises) and returns the borrowed cached module. The second +// return value reports whether the module is STILL initializing after the wait: +// True means the holding thread is itself blocked (a genuine circular-import +// deadlock the lock_unlock swallowed), so the caller must use the module +// directly instead of re-entering _find_and_load. Every other case (absent, +// None, no usable __spec__, or not initializing) returns (nil, false, nil). +// +// The returned reference is BORROWED from sys.modules. The caller increfs it +// before use. +// +// CPython: Python/import.c:244 import_ensure_initialized +func waitForInitializingModule(frozen objects.Object, absName string) (objects.Object, bool, error) { + raw, present := imp.GetModuleRaw(absName) + if !present || objects.IsNone(raw) { + return nil, false, nil + } + + // Optimization: only call _lock_unlock_module when __spec__._initializing + // is true (set before the module is stuffed in sys.modules). + // + // CPython: Python/import.c:249 import_ensure_initialized (_initializing check) + initializing, serr := specInitializing(raw) + if serr != nil { + return nil, false, serr + } + if !initializing { + return nil, false, nil + } + + // Wait until the module is done importing. _lock_unlock_module acquires + // then releases the per-module lock and CATCHES the _DeadlockError a + // concurrent circular import raises, accepting a partially initialized + // module rather than propagating the error. + // + // CPython: Python/import.c:267 _bootstrap._lock_unlock_module + lockUnlock, lerr := objects.GetAttr(frozen, objects.NewStr("_lock_unlock_module")) + if lerr != nil { + return nil, false, lerr + } + if _, cerr := objects.Call(lockUnlock, objects.NewTuple([]objects.Object{objects.NewStr(absName)}), nil); cerr != nil { + return nil, false, cerr + } + + // Verify the module is still in sys.modules. Another thread may have + // removed it (import failure) between the lookup and the initializing + // check; if so, signal nothing to wait on so the caller does a full import. + // + // CPython: Python/import.c:3851 mod_check != mod + check, stillPresent := imp.GetModuleRaw(absName) + if !stillPresent || check != raw { + return nil, false, nil + } + // Re-read _initializing after the wait. If it is still True the holding + // thread is blocked (circular-import deadlock the lock_unlock swallowed), + // and the caller must not re-enter _find_and_load. + stillInit, ierr := specInitializing(raw) + if ierr != nil { + return nil, false, ierr + } + return raw, stillInit, nil +} + +// specInitializing reports whether raw's __spec__._initializing is True. A +// missing or None __spec__ means the module cannot be mid-import. +// +// CPython: Python/import.c:249 import_ensure_initialized (_initializing check) +func specInitializing(raw objects.Object) (bool, error) { + spec, serr := objects.GetAttr(raw, objects.NewStr("__spec__")) + if serr != nil { + return false, nil //nolint:nilerr // missing __spec__ is a fall-back, not an error + } + if spec == nil || objects.IsNone(spec) { + return false, nil + } + return imp.SpecIsInitializing(spec) +} + +// headSelection mirrors the !has_from branch of +// PyImport_ImportModuleLevelObject: an absolute dotted import returns the +// top-level package, while a relative dotted import re-reads the +// already-loaded head from sys.modules by slicing the resolved abs_name. +// +// CPython: Python/import.c:3887 (!has_from branch) +func headSelection(name, packageStr string, level int, module objects.Object) (objects.Object, bool, error) { + if level != 0 && name == "" { + // CPython: Python/import.c:3895 (elif !name: final_mod = mod). + return module, true, nil + } + runes := []rune(name) + dot := indexRune(runes, '.') + if dot < 0 { + // CPython: Python/import.c:3897 (no dot, simple exit). + return module, true, nil + } + if level == 0 { + // CPython: Python/import.c:3903 re-import the front absolutely. + front := string(runes[:dot]) + return importModuleLevelObject(front, objects.None(), nil, 0) + } + // CPython: Python/import.c:3912 slice abs_name to its first `dot` + // components and re-read sys.modules. abs_name is the standalone + // resolved name, not module.__name__, so a non-module sys.modules entry + // surfaces as the KeyError below rather than an AttributeError. + absName := resolveImportName(name, packageStr, level) + absRunes := []rune(absName) + cutOff := len(runes) - dot + toReturn := string(absRunes[:len(absRunes)-cutOff]) + mod, err := imp.SysModules().GetItem(objects.NewStr(toReturn)) + if err == nil && mod != nil { + // GetItem borrows from sys.modules; import_get_module hands back a + // new reference, so incref before returning. + // + // CPython: Python/import.c:3917 import_get_module(to_return) + objects.Incref(mod) + } + if err != nil || mod == nil { + // CPython: Python/import.c:3924 KeyError "%R not in sys.modules + // as expected". + r, rerr := objects.Repr(objects.NewStr(toReturn)) + if rerr != nil { + r = "'" + toReturn + "'" + } + msg := fmt.Sprintf("%s not in sys.modules as expected", r) + exc := pyerrors.New(pyerrors.PyExc_KeyError, objects.NewTuple([]objects.Object{objects.NewStr(msg)})) + return nil, true, objects.NewRaisedError(exc, "KeyError: "+msg) + } + return mod, true, nil +} + +// fromlistSelection mirrors the has_from branch: when the loaded module is +// a package (carries __path__) defer to importlib._handle_fromlist to +// force-import each requested submodule, otherwise return the module +// untouched. +// +// CPython: Python/import.c:3939 (has_from branch) +func fromlistSelection(frozen objects.Object, module objects.Object, fromlist objects.Object) (objects.Object, bool, error) { + hasPath, err := objects.HasAttrString(module, "__path__") + if err != nil { + return nil, true, err + } + if !hasPath { + return module, true, nil + } + // _bootstrap.__import__ passes _gcd_import as the import callable so + // _handle_fromlist loads `pkg.sub` by absolute name. fromlist reaches + // _handle_fromlist untouched: it iterates the object and raises the + // "Item in ``from list'' must be str" TypeError for a non-str entry, + // matching the C body which never pre-validates the elements. + // + // CPython: Lib/importlib/_bootstrap.py:1505 _handle_fromlist(module, + // fromlist, _gcd_import) + gcd, err := objects.GetAttr(frozen, objects.NewStr("_gcd_import")) + if err != nil { + return nil, true, err + } + handle, err := objects.GetAttr(frozen, objects.NewStr("_handle_fromlist")) + if err != nil { + return nil, true, err + } + res, err := objects.Call(handle, objects.NewTuple([]objects.Object{ + module, fromlist, gcd, + }), nil) + if err != nil { + return nil, true, err + } + return res, true, nil +} + +// resolveImportName mirrors _bootstrap._resolve_name: strip the trailing +// (level-1) dotted components from package, then re-attach name. It +// recomputes the abs_name that _gcd_import derived internally so +// headSelection can slice it. +// +// CPython: Lib/importlib/_bootstrap.py _resolve_name +func resolveImportName(name, pkg string, level int) string { + for i := 1; i < level; i++ { + dot := strings.LastIndexByte(pkg, '.') + if dot < 0 { + break + } + pkg = pkg[:dot] + } + if name == "" { + return pkg + } + return pkg + "." + name +} + +// indexRune returns the index of the first occurrence of r in runes, or +// -1. Matches PyUnicode_FindChar(..., direction=1) on code points. +func indexRune(runes []rune, r rune) int { + for i, c := range runes { + if c == r { + return i + } + } + return -1 +} diff --git a/vm/instrument_fire.go b/vm/instrument_fire.go index 670d7e44c..93bcdb267 100644 --- a/vm/instrument_fire.go +++ b/vm/instrument_fire.go @@ -47,6 +47,18 @@ func (e *evalState) fireInstrumented(op compile.Opcode, oparg uint32) error { } base := monitor.DeInstrument(op) ev := monitor.EventForOpcode(base) + // RESUME fires PY_START at entry (oparg 0) and PY_RESUME on a + // generator / coroutine re-entry (oparg > 0); the event is not in + // the static EventForOpcode table because it depends on the oparg. + // + // CPython: Python/bytecodes.c:245 _MONITOR_RESUME (oparg > 0) + if base == compile.RESUME { + if oparg > 0 { + ev = monitor.EventPyResume + } else { + ev = monitor.EventPyStart + } + } if int(ev) >= monitor.Events { return nil } @@ -92,6 +104,30 @@ func (e *evalState) handleInstrumentedLine() (compile.Opcode, error) { return disp.OriginalOpcode, nil } +// handleInstrumentedInstruction resolves an INSTRUMENTED_INSTRUCTION +// marker at the current instr pointer: it fires INSTRUCTION for any +// subscribed tool and returns the underlying opcode the dispatcher +// should run instead of the marker. Returns NOP when no per-instruction +// data is present. +// +// CPython: Python/instrumentation.c:1401 _Py_call_instrumentation_instruction +func (e *evalState) handleInstrumentedInstruction() (compile.Opcode, error) { + co := e.f.Code + var interp *monitor.InterpState + if it := e.ts.Interp(); it != nil { + interp = it.Monitors + } + instr := e.f.InstrPtr / 2 + next, err := monitor.CallInstrumentationInstruction(interp, co, instr) + if err != nil { + return compile.NOP, err + } + if next == 0 { + return compile.NOP, nil + } + return next, nil +} + // fireForEvent dispatches to the matching FireXxx entry point in the // monitor package. Currently covers the events whose argument // signature the eval loop can already supply; the rest land with @@ -107,6 +143,10 @@ func fireForEvent( e *evalState, ) error { switch ev { + case monitor.EventPyStart: + return monitor.FirePyStart(interp, state, co, offset) + case monitor.EventPyResume: + return monitor.FirePyResume(interp, state, co, offset) case monitor.EventPyReturn: retval := objects.None() if e.f.StackTop > 0 { diff --git a/vm/legacy_tracing.go b/vm/legacy_tracing.go index 531d1321d..18f9ea369 100644 --- a/vm/legacy_tracing.go +++ b/vm/legacy_tracing.go @@ -642,17 +642,19 @@ func SetTrace(ts *state.Thread, fn LegacyTraceFunc, arg objects.Object) (objects if err := setMonitoringTraceEvents(interp); err != nil { return old, err } - // Instrument the currently-executing frame's code so line events fire - // immediately, without waiting for the next RESUME. CPython does this - // after set_monitoring_trace_events when func != NULL. + // set_monitoring_trace_events stamps only the global event mask; in + // CPython the matching re-instrumentation happens one level down in + // _PyMonitoring_SetEvents, which runs instrument_all_executing_code_objects + // under stop-the-world. gopy's SetEvents only bumps the global + // version, and a frame picks new events up at its next RESUME, so a + // frame already past its RESUME (the one that called settrace and + // its live callers) would otherwise never see them. Walk the live + // stack and instrument each frame's code so line / return events + // fire on the current call chain. // - // CPython: Python/legacy_tracing.c:725 _Py_Instrument(current_frame code) - if fn != nil { - if f := frameStackFor(ts).Top(); f != nil { - if merr := monitor.Instrument(f.Code, interp); merr != nil { - return old, merr - } - } + // CPython: Python/instrumentation.c:1941 instrument_all_executing_code_objects + if err := instrumentExecutingFrames(ts, interp); err != nil { + return old, err } if interp.SysTracingThreads > 0 { if err := maybeSetOpcodeTrace(ts); err != nil { @@ -662,6 +664,27 @@ func SetTrace(ts *state.Thread, fn LegacyTraceFunc, arg objects.Object) (objects return old, nil } +// instrumentExecutingFrames re-instruments the code object of every +// frame currently on the thread's stack, the gopy stand-in for the +// instrument_all_executing_code_objects pass _PyMonitoring_SetEvents +// runs under stop-the-world. Walking the f_back chain from the top +// frame covers the frame that called settrace and every live caller, +// so events fire on the current call chain instead of waiting for a +// RESUME those frames have already passed. +// +// CPython: Python/instrumentation.c:1941 instrument_all_executing_code_objects +func instrumentExecutingFrames(ts *state.Thread, interp *monitor.InterpState) error { + for f := frameStackFor(ts).Top(); f != nil; f = f.Previous { + if f.Code == nil { + continue + } + if err := monitor.Instrument(f.Code, interp); err != nil { + return err + } + } + return nil +} + // maybeSetOpcodeTrace turns INSTRUMENTED_INSTRUCTION on for ts's // current frame when that frame requested opcode tracing. // @@ -673,3 +696,53 @@ func maybeSetOpcodeTrace(ts *state.Thread) error { } return setOpcodeTrace(f, true) } + +// setOpcodeTraceHook backs objects.SetOpcodeTraceHook. It is invoked +// when Python code assigns frame.f_trace_opcodes (bdb / pdb does this +// on the frame it is about to single-step). It toggles the local +// INSTRUCTION event on the frame's code and re-instruments the live +// call chain so the marker takes effect on the current instruction +// rather than only after the next RESUME the frame has already passed. +// +// CPython: Python/legacy_tracing.c:159 _PyEval_SetOpcodeTrace +func setOpcodeTraceHook(w *objects.Frame, enable bool) error { + if w == nil { + return nil + } + ip := w.Interp() + f, ok := ip.(*frame.Frame) + if !ok || f == nil { + return nil + } + // Mirror the wrapper's f_trace_opcodes onto the iframe so the line + // trampoline (callTraceFunc) re-arms opcode tracing each time it + // dispatches an event for this frame. + f.TraceOpcodes = enable + // CPython only installs the instrumentation when enabling AND a + // trace function is already set on the frame; otherwise the next + // dispatched event re-arms it via callTraceFunc. Disabling always + // tears the instrumentation down. + // + // CPython: Objects/frameobject.c:1148 frame_trace_opcodes_set_impl + if enable && w.Trace() == objects.None() { + return nil + } + if err := setOpcodeTrace(f, enable); err != nil { + return err + } + ts := currentThread() + if ts == nil { + return nil + } + interp := ts.Interp().Monitors + if interp == nil { + return nil + } + // SetLocalEvents only bumped the version; the executing frame is + // already past its RESUME, so walk the live stack and instrument + // each code object now, matching CPython's stop-the-world + // instrument_all_executing_code_objects. + // + // CPython: Python/instrumentation.c:1941 instrument_all_executing_code_objects + return instrumentExecutingFrames(ts, interp) +} diff --git a/vm/oserrno_other.go b/vm/oserrno_other.go new file mode 100644 index 000000000..850155120 --- /dev/null +++ b/vm/oserrno_other.go @@ -0,0 +1,7 @@ +//go:build !windows + +package vm + +// winerrorToErrno is the identity on non-Windows platforms: Go's +// syscall.Errno already carries the POSIX errno there. +func winerrorToErrno(errno int) int { return errno } diff --git a/vm/oserrno_windows.go b/vm/oserrno_windows.go new file mode 100644 index 000000000..49f755edf --- /dev/null +++ b/vm/oserrno_windows.go @@ -0,0 +1,159 @@ +//go:build windows + +package vm + +// winerrorToErrno maps a Windows system error code to the POSIX errno +// CPython stores in OSError.errno (OSError.winerror keeps the raw code). +// Go's syscall.Errno on Windows carries the raw WinAPI error, so without +// this translation `except FileNotFoundError` / `except FileExistsError` +// would never match (errnomap is keyed on POSIX errno). +// +// CPython: PC/errmap.h winerror_to_errno +func winerrorToErrno(winerror int) int { + // Unwrap FACILITY_WIN32 HRESULT errors. + if winerror&0xFFFF0000 == 0x80070000 { + winerror &= 0x0000FFFF + } + + // Winsock error codes (10000-11999) are errno values. + if winerror >= 10000 && winerror < 12000 { + switch winerror { + case 10004, // WSAEINTR + 10009, // WSAEBADF + 10013, // WSAEACCES + 10014, // WSAEFAULT + 10022, // WSAEINVAL + 10024: // WSAEMFILE + return winerror - 10000 + default: + return winerror + } + } + + switch winerror { + case 2, // ERROR_FILE_NOT_FOUND + 3, // ERROR_PATH_NOT_FOUND + 15, // ERROR_INVALID_DRIVE + 18, // ERROR_NO_MORE_FILES + 53, // ERROR_BAD_NETPATH + 67, // ERROR_BAD_NET_NAME + 161, // ERROR_BAD_PATHNAME + 206: // ERROR_FILENAME_EXCED_RANGE + return errENOENT + case 10: // ERROR_BAD_ENVIRONMENT + return errE2BIG + case 11, // ERROR_BAD_FORMAT + 188, // ERROR_INVALID_STARTING_CODESEG + 189, // ERROR_INVALID_STACKSEG + 190, // ERROR_INVALID_MODULETYPE + 191, // ERROR_INVALID_EXE_SIGNATURE + 192, // ERROR_EXE_MARKED_INVALID + 193, // ERROR_BAD_EXE_FORMAT + 194, // ERROR_ITERATED_DATA_EXCEEDS_64k + 195, // ERROR_INVALID_MINALLOCSIZE + 196, // ERROR_DYNLINK_FROM_INVALID_RING + 197, // ERROR_IOPL_NOT_ENABLED + 198, // ERROR_INVALID_SEGDPL + 199, // ERROR_AUTODATASEG_EXCEEDS_64k + 200, // ERROR_RING2SEG_MUST_BE_MOVABLE + 201, // ERROR_RELOC_CHAIN_XEEDS_SEGLIM + 202: // ERROR_INFLOOP_IN_RELOC_CHAIN + return errENOEXEC + case 6, // ERROR_INVALID_HANDLE + 114, // ERROR_INVALID_TARGET_HANDLE + 130: // ERROR_DIRECT_ACCESS_HANDLE + return errEBADF + case 128, // ERROR_WAIT_NO_CHILDREN + 129: // ERROR_CHILD_NOT_COMPLETE + return errECHILD + case 89, // ERROR_NO_PROC_SLOTS + 164, // ERROR_MAX_THRDS_REACHED + 215: // ERROR_NESTING_NOT_ALLOWED + return errEAGAIN + case 7, // ERROR_ARENA_TRASHED + 8, // ERROR_NOT_ENOUGH_MEMORY + 9, // ERROR_INVALID_BLOCK + 1816: // ERROR_NOT_ENOUGH_QUOTA + return errENOMEM + case 5, // ERROR_ACCESS_DENIED + 16, // ERROR_CURRENT_DIRECTORY + 19, // ERROR_WRITE_PROTECT + 20, // ERROR_BAD_UNIT + 21, // ERROR_NOT_READY + 22, // ERROR_BAD_COMMAND + 23, // ERROR_CRC + 24, // ERROR_BAD_LENGTH + 25, // ERROR_SEEK + 26, // ERROR_NOT_DOS_DISK + 27, // ERROR_SECTOR_NOT_FOUND + 28, // ERROR_OUT_OF_PAPER + 29, // ERROR_WRITE_FAULT + 30, // ERROR_READ_FAULT + 31, // ERROR_GEN_FAILURE + 32, // ERROR_SHARING_VIOLATION + 33, // ERROR_LOCK_VIOLATION + 34, // ERROR_WRONG_DISK + 36, // ERROR_SHARING_BUFFER_EXCEEDED + 65, // ERROR_NETWORK_ACCESS_DENIED + 82, // ERROR_CANNOT_MAKE + 83, // ERROR_FAIL_I24 + 108, // ERROR_DRIVE_LOCKED + 132, // ERROR_SEEK_ON_DEVICE + 158, // ERROR_NOT_LOCKED + 167, // ERROR_LOCK_FAILED + 35: // 35 (undefined) + return errEACCES + case 80, // ERROR_FILE_EXISTS + 183: // ERROR_ALREADY_EXISTS + return errEEXIST + case 17: // ERROR_NOT_SAME_DEVICE + return errEXDEV + case 267: // ERROR_DIRECTORY (bpo-12802) + return errENOTDIR + case 4: // ERROR_TOO_MANY_OPEN_FILES + return errEMFILE + case 112: // ERROR_DISK_FULL + return errENOSPC + case 109, // ERROR_BROKEN_PIPE + 232: // ERROR_NO_DATA (bpo-13063) + return errEPIPE + case 145: // ERROR_DIR_NOT_EMPTY + return errENOTEMPTY + case 1113: // ERROR_NO_UNICODE_TRANSLATION + return errEILSEQ + case 258: // WAIT_TIMEOUT + return errETIMEDOUT + case 1, // ERROR_INVALID_FUNCTION + 12, // ERROR_INVALID_ACCESS + 13, // ERROR_INVALID_DATA + 87, // ERROR_INVALID_PARAMETER + 131: // ERROR_NEGATIVE_SEEK + return errEINVAL + default: + return errEINVAL + } +} + +// POSIX errno values CPython maps Windows errors onto. Hard-coded to the +// standard POSIX numbers (not Go's Windows-side syscall constants, which +// are fabricated) so they line up with module/errno and errnomap. +const ( + errENOENT = 2 + errE2BIG = 7 + errENOEXEC = 8 + errEBADF = 9 + errECHILD = 10 + errEAGAIN = 11 + errENOMEM = 12 + errEACCES = 13 + errEEXIST = 17 + errEXDEV = 18 + errENOTDIR = 20 + errEINVAL = 22 + errEMFILE = 24 + errENOSPC = 28 + errEPIPE = 32 + errENOTEMPTY = 41 + errEILSEQ = 42 + errETIMEDOUT = 138 +) diff --git a/vm/sys_trace_builtins.go b/vm/sys_trace_builtins.go index b4d0b1baf..90e1c635a 100644 --- a/vm/sys_trace_builtins.go +++ b/vm/sys_trace_builtins.go @@ -37,20 +37,20 @@ var whatStrings = [8]string{ } // callTrampoline invokes a Python-level trace / profile callback -// with (frame, what, arg). Mirrors call_trampoline in CPython. +// with (frame, what, arg) and returns whatever the callback returned. +// Mirrors call_trampoline in CPython. // // CPython: Python/sysmodule.c:1071 call_trampoline -func callTrampoline(callback objects.Object, f *frame.Frame, what int, arg objects.Object) error { +func callTrampoline(callback objects.Object, f *frame.Frame, what int, arg objects.Object) (objects.Object, error) { if arg == nil { arg = objects.None() } if what < 0 || what >= len(whatStrings) { - return fmt.Errorf("invalid trace what %d", what) + return nil, fmt.Errorf("invalid trace what %d", what) } frameObj := objects.NewFrame(f) args := []objects.Object{frameObj, objects.NewStr(whatStrings[what]), arg} - _, err := objects.Vectorcall(callback, args, uint(len(args)), nil) - return err + return objects.Vectorcall(callback, args, uint(len(args)), nil) } // profileTrampoline is the LegacyTraceFunc SetProfile installs when @@ -61,7 +61,7 @@ func callTrampoline(callback objects.Object, f *frame.Frame, what int, arg objec // CPython: Python/sysmodule.c:1086 profile_trampoline func profileTrampoline(callback objects.Object) LegacyTraceFunc { return func(_ objects.Object, f *frame.Frame, what int, arg objects.Object) error { - err := callTrampoline(callback, f, what, arg) + _, err := callTrampoline(callback, f, what, arg) if err != nil { ts := currentThread() if ts != nil { @@ -72,23 +72,50 @@ func profileTrampoline(callback objects.Object) LegacyTraceFunc { } } -// traceTrampoline is the LegacyTraceFunc SetTrace installs. -// Mirrors trace_trampoline. The local-callback model is simplified: -// gopy's frame does not yet expose a per-frame f_trace slot for the -// trace function to mutate, so the install-time callable services -// every event until the user clears it again. +// traceTrampoline is the LegacyTraceFunc SetTrace installs. It is the +// faithful trace_trampoline: on a CALL event the install-time global +// callback runs and its return value becomes that frame's local trace +// function (f_trace); on every other event the frame's own f_trace is +// invoked, and nothing happens when it is NULL. This per-frame gate is +// why the frame that called settrace fires no events (no CALL fired +// for it, so its f_trace stays unset) while a frame whose f_trace was +// set explicitly still receives line and return events. +// +// gopy's frame split keeps f_trace on the Python-level wrapper +// (objects.Frame), so the trampoline reaches it through +// objects.NewFrame, which returns the wrapper already registered on +// the live activation record. // // CPython: Python/sysmodule.c:1101 trace_trampoline func traceTrampoline(callback objects.Object) LegacyTraceFunc { return func(_ objects.Object, f *frame.Frame, what int, arg objects.Object) error { - err := callTrampoline(callback, f, what, arg) + wrapper := objects.NewFrame(f) + var cb objects.Object + if what == PyTraceCall { + cb = callback + } else if local := wrapper.Trace(); local != objects.None() { + cb = local + } + if cb == nil { + return nil + } + result, err := callTrampoline(cb, f, what, arg) if err != nil { ts := currentThread() if ts != nil { _, _ = SetTrace(ts, nil, nil) } + wrapper.SetTrace(nil) + return err } - return err + // A None return leaves the existing f_trace in place; any other + // value installs it as the frame's local trace function. + // + // CPython: Python/sysmodule.c:1124 trace_trampoline (Py_XSETREF) + if result != objects.None() { + wrapper.SetTrace(result) + } + return nil } } diff --git a/website/docs/specs/1700/1731_modules_imports_panel.md b/website/docs/specs/1700/1731_modules_imports_panel.md new file mode 100644 index 000000000..5b1341fea --- /dev/null +++ b/website/docs/specs/1700/1731_modules_imports_panel.md @@ -0,0 +1,187 @@ +--- +id: "1731" +slug: 1731 +title: "1731: Modules / imports test panel — CPython 3.14 parity port" +sidebar_label: "1731 Modules imports panel" +description: "Audit and port of the Modules/imports test panel from spec 1700 (12 files plus test_import/, test_importlib/, test_module/) against CPython 3.14 under the spec 1726 zero-skip bridge." +--- + +## Status + +Active. Branch `feat/v0.13.5-spec-modules-imports`. + +Run under the [[1726]] bridge so every `@cpython_only` test executes on gopy +instead of being skipped. "No skip" means parity with CPython: if CPython +skips a test on this platform, gopy skips it too; everything else must pass. + +## Goal + +Drive every test in the spec 1700 Modules / imports panel to CPython 3.14 +parity via faithful CPython ports. No shims, no partial slices: when a gate +lands on a subsystem, port every function in that subsystem from CPython as the +single source of truth. + +Sources of truth: `$HOME/cpython-314/`. Every cited function is read from that +tree before porting. + +## Panel + +The panel is the 12 flat files plus the three directory suites. CPython 3.14.5 +runs all of the non-interpreter files green. + +| Test | CPython 3.14.5 | gopy (audit 2026-06-16) | +| --- | --- | --- | +| `test_module/` (dir) | OK | **OK (39 tests)** | +| `test_import/` (dir) | OK | **OK (118 tests, 4 skipped)** — 3 platform skips + `test_frozen_compat` (needs a frozen `_frozen_importlib`, P7) | +| `test_importlib/` (dir) | OK | 1346 tests; 0 failures, 0 errors, 63 skipped — threaded circular import and incomplete multi-phase init both closed; module-lock GC lifetime closed via tp_clear-from-delete_garbage. Run from a clean cwd: invoking `test_importlib.test_util` directly from the repo root puts `module/` (the Go module-port dir) on `sys.path[0]`, where `import module` then resolves as a PEP 420 namespace package and `test_find_submodule_in_module` no longer sees a `ModuleNotFoundError`. CPython fails identically from such a cwd; the canonical regrtest run uses a clean directory. | +| `test_modulefinder` | OK | **OK (17 tests)** | +| `test_pkg` | OK | **OK (8 tests)** | +| `test_pkgutil` | OK | **OK (21 tests)** | +| `test_pyclbr` | OK | **OK (6 tests)** | +| `test_pkgimport` | (covered by `test_import/`) | no flat file | +| `test_runpy` | OK | **OK (40 tests)** | +| `test_frozen` | OK | **OK (3/3)** — frozen test modules + override + `sys._stdlib_dir` shipped | +| `test_zipimport` | OK | **OK (91 tests, 4 skipped)** | +| `test_zipimport_support` | OK | **OK (4 tests)** — vendored `test.test_doctest`; pdb single-step under doctest now works after the opcode-tracing fix | +| `test_zipapp` | OK | **OK (35 tests)** | +| `test__interpchannels` | PEP 554 | deferred (see below) | +| `test__interpreters` | PEP 554 | deferred (`_interpreters.run_string` missing) | + +## Plan + +Phased, smallest-blast-radius first. Re-audit after each phase against +CPython 3.14.5 (counts and `-v` lists). + +- **P1 — `os.altsep` and the module namespace surface.** `os.altsep` is `None` + on POSIX; its absence blocks `test_pkgutil`, `test_zipimport`, + `test_zipimport_support`. The `test_pkg` `dir()` gap is a module-object + attribute surface issue (`__cached__`, `__doc__`, `__loader__`, `__spec__`). + Port these first. +- **P2 — pure-Python stdlib modules.** `modulefinder`, `pyclbr`, `zipapp` are + pure-Python `Lib/*.py`; vendor them and whatever import-machinery they lean + on. Confirm they run under gopy's import system. +- **P3 — frozen modules.** `test_frozen` needs `__hello__` and the frozen + module table. Port the frozen-module surface from CPython. +- **P4 — `test_runpy` residual.** Single ERROR in + `test_run_package_init_exceptions`; port the package-init exception path. +- **P5 — directory suites.** Re-audit `test_import/`, `test_importlib/`, + `test_module/` against CPython and close residuals. +- **P7 — live importlib finders (architectural).** gopy dispatches imports + Go-side: `sys.meta_path` is empty where CPython has + `[BuiltinImporter, FrozenImporter, PathFinder]`, and `importlib.machinery` + is a stub that does not re-export `PathFinder` / `FrozenImporter` / + `BuiltinImporter`. The Python finder classes in `_bootstrap.py` exist but + are not wired into `sys.meta_path`, and `_imp` is missing the functions the + full bootstrap drives (`extension_suffixes`, `find_frozen`, + `get_frozen_object`, `is_frozen_package`, `create_builtin`, `exec_builtin`, + `create_dynamic`, `exec_dynamic`, `_fix_co_filename`). This is the root of + the `test_import/`, `test_importlib/`, `test_modulefinder`, and `test_runpy` + residuals. Closing it means making the Python finders the real dispatch path + (populate `sys.meta_path`, port the `_imp` C functions, vendor the full + `_bootstrap_external.py` with `PathFinder`) instead of the Go-side shim. + This is a subsystem port on the scale of its own spec. + +## Notable fixes + +- `func_getattro` now increfs `__dict__` attribute reads + (`Objects/funcobject.c` Py_XINCREF). A list stored on a function (mock keeps + its `patchings` list this way) was emptied by `list_dealloc` after the first + read, so a shared decorator silently stopped patching across test classes. + This fixed `test_zipimport.test_checked_hash_based_change_pyc` in the + cross-class run. +- `_testcapi.config_get` / `config_getint` / `config_names` ported over a + `PyConfig_Get` spec table (`Python/initconfig.c`), fixing the two + `testTraceback` errors. +- **P6 — interpreters.** `test__interpreters` / `test__interpchannels` are + PEP 554 subinterpreters. Match CPython's behaviour: if CPython skips on this + build, gopy skips; otherwise port the `_interpreters` surface the tests reach. + +## Checklist + +- [x] P1: `os.altsep` +- [x] P1: module-object `dir()` surface (`__cached__`, `__doc__`, `__loader__`, `__spec__`) for `test_pkg` — `test_pkg` green (8 tests) +- [x] P2: vendor `modulefinder` — `test_modulefinder` green (17 tests) +- [x] P2: vendor `pyclbr` — `test_pyclbr` green (6 tests) +- [x] P2: vendor `zipapp` — `test_zipapp` green (35 tests) +- [x] P2: `test_pkgutil` green (21 tests) +- [x] `test_zipimport` green (91 tests): `func_getattro` incref + `config_get` port +- [x] `test_module/` green (39 tests) +- [x] P3: frozen `__hello__`/`__phello__` + aliases, frozen override, `sys._stdlib_dir` — `test_frozen` green (3/3) +- [x] P4: `test_runpy` green (40 tests) — package-init exception path closed +- [x] P5: `test_import/` runs all 118 tests without the threaded crash — `os.fstat`/`os.isatty` no longer borrow the fd in a finalizer-bearing `os.File` +- [x] P5: `test_import/` green — ported the single-phase extension cache (`_testsinglephase*` variants, `m_size` kinds, the extensions cache + `m_copy` reload), the gh-123950 circular import (`_testsinglephase_circular` via the `_gcd_import` import hook), and per-subinterpreter `sys.modules` isolation so the PEP 489 compat gate fires on re-import. 4 skips remain: 3 platform-specific, plus `test_frozen_compat`, which needs a frozen `_frozen_importlib` (P7) +- [x] P5: `test_module_with_large_stack` no longer flakes with `bad file descriptor` — `os.NewFile`/`os.OpenFile` arm the close finalizer on the unexported inner `*os.file`, so `SetFinalizer(f, nil)` on the outer handle was a no-op. A leaked borrowed-fd wrapper (subprocess pipes) would close a reused descriptor mid-write. `objects.ClearOSFileFinalizer` reaches the inner pointer; the `io` and `_posixsubprocess` borrows route through it +- [x] P5: re-audit `test_module/` — green (39 tests) +- [x] `test_doctest` green (71 tests) and `test_zipimport_support` green (4 tests): pdb single-stepping + under doctest needed faithful opcode (INSTRUCTION) tracing. The bug was in the monitoring shadow walk: + `add_tools`/`remove_tools` bailed when a slot's live byte was already `INSTRUMENTED_LINE` or + `INSTRUMENTED_INSTRUCTION`, so installing the global `PY_RETURN` event the legacy `sys.settrace` bridge + needs never reached the real opcode parked in the line / per-instruction side table. When `pdb`'s + `step` toggled `f_trace_opcodes` off, the slot restored to the plain opcode and the return event was + lost, so the debugger jumped back into `doctest.__run` instead of stopping at the function's + `--Return--`. Ported `instrument()` and `de_instrument()` (Python/instrumentation.c) to walk the live + byte through both side tables to the location CPython tracks as `opcode_ptr` and rewrite the + (de)instrumented opcode there. The `module/` dir, the directory, and the ZIP archive each run as their + own `__main__` via the new `pymain_get_importer` path in `cmd/gopy`. +- [x] P5: `test_importlib/` residuals — 1346 tests run, 0 failures + 0 errors. `test_all_locks` + passes: the collector grew a `tp_clear` slot that runs from `delete_garbage` (the cyclic-GC path, + after the collector has proven unreachability) instead of the eager refcount-zero dealloc path, so an + instance `__dict__` that pins a `_ModuleLock` is cleared at GC time and `_bootstrap._module_locks` + drains to zero. The eager path deliberately no longer clears, so an object the VM under-counts to zero + while still live is never cleared out from under its users. `test_incomplete_multi_phase_init_module` + passes once the `_testmultiphase` incomplete-init path is wired. `test_circular_imports` passes by + splitting the import entry points: the VM `IMPORT_NAME` opcode keeps the refcount-proven + `_frozen_importlib.__import__` delegate route (it applies `DECREF_INPUTS` to the module it pushes), + with the `import_ensure_initialized` still-initializing fast path prepended so a concurrent circular + import waits via `_bootstrap._lock_unlock_module` (which catches `_DeadlockError`) instead of dying on + an uncaught `_DeadlockError` inside `_find_and_load`'s `_ModuleLockManager`. The builtin `__import__` + keeps the full C-faithful `PyImport_ImportModuleLevelObject` body (`_gcd_import` + the dotted-head + `KeyError` for gh-134100), which a shared function could not satisfy without breaking one side or the + other. +- [x] P5: `test_zipimport.testZip64LargeFile` runs under the bare-file harness (`use_resources is None` + enables `largefile`). The reconstruction stitches the `>4 GiB` sparse zip back from + `zipimport_data/sparse-zip64-c0-*.part`; CPython ships three parts (offsets `0`, `0x1_0000_0000`, + `0x2_0000_0000`) and `test/cpython/zipimport_data/` carries all three, so the central directory near + the 8 GiB mark is present and the file parses. `testAFakeZlib` self-skips with `'zlib is a builtin + module'`: gopy statically links zlib, which is exactly the static-zlib build configuration CPython's + own comment says to skip on, so the 4-vs-2 skip delta against a dynamic-zlib CPython is faithful, not + a divergence. +- [x] P5: cyclic collector no longer reclaims `_frozen_importlib._blocking_on` + (`'_WeakValueDictionary' object has no attribute 'data'`) under `testZip64`'s heap churn. `sys.modules` + is held through a Go pointer the refcount pass cannot see, so `pin_roots` floated only the direct + module entries and trusted `move_unreachable` to resurrect the rest. gopy containers do not incref what + they store (instance `__dict__` among them), so `subtract_refs` over-decrements an interior node on the + `module -> module __dict__ -> _WeakValueDictionary -> instance __dict__` chain and a partition order that + fails to resurrect every hop drops a still-live object. `markReachableClosure` now walks the whole + strongly-reachable closure from the static roots and floats each candidate to `refs >= 1`, recursing + only through candidates so a young-generation collection stays as cheap as before. +- Note: `test_namespace_pkgs.SeparatedNamespacePackagesCreatedWhileRunning.test_invalidate_caches` and + `LoaderTests.test_path_indexable` fail when the file is run standalone (`gopy test_namespace_pkgs.py`), + but CPython 3.14 fails identically standalone — `PathFinder.invalidate_caches` does + `from importlib.metadata import MetadataPathFinder`, and the test's `sys.path` replacement strips the + stdlib, so the first-time `import json` under that restricted path raises `ModuleNotFoundError`. Under + the canonical package run (`python -m test test_importlib`) an earlier submodule imports + `importlib.metadata` while the path is unrestricted, so it stays cached and both tests pass. This is a + run-mode artifact, not a gopy defect; gopy matches CPython behavior in both modes. +- Note: `type_new` copies the class body into a transient dict, drains it onto the type, then drops the + copy. That copy captures every class method, whose `__globals__` pins the defining module dict, so the + drop has to release synchronously the way `dict_dealloc` does. `dropTransientDict` decrefs the copy and, + if that takes it to zero, clears its contents and untracks it on the spot. A plain `Decref` left it at + refcount zero but still tracked (gopy keeps refcount-zero non-finalizable containers tracked for the + weakref pass), so the next collection counted it as an extra cycle member and the `module/gc` unit tests + (`TestUserDelFiresDuringCycleCollect`) reported one too many reclaims. +- Note: `test_importlib.frozen.test_finder` compares the frozen loader's `filename` against + `os.path.join(STDLIB_DIR, '__hello__.py')`, where `STDLIB_DIR` is derived from where `test/support` + lives. gopy reports the live on-disk frozen path, matching CPython (which freezes `__hello__` with its + `Lib/__hello__.py` filename). The two agree when the suite runs in its natural location under the + stdlib (12/12 green); they only diverge if the corpus is relocated, which moves `STDLIB_DIR` away from + the real stdlib. Run-mode artifact, not a gopy defect. +- [x] P5: the regrtest runner drives the three directory suites the way CPython's regrtest does, with + `gopy -m unittest test.`, instead of looking for a non-existent `/.py` entry point. + The command runs from the corpus directory so the repo-root `module/` Go-port tree does not shadow + stdlib imports on `sys.path[0]` (otherwise `find_spec('module.name')` resolves `module/` as a PEP 420 + namespace package and the two `test_find_submodule_in_module` rows stop raising `ModuleNotFoundError`). + `TestModulesImportsPanelPackages` pins all three: `test_import` 118/118, `test_module` 39/39, + `test_importlib` 1346/1346. +- [ ] P7: live importlib finders on `sys.meta_path` + `_imp` C functions (architectural) +- [ ] P6: `test__interpreters` / `test__interpchannels` parity with CPython skip/run