Todo: 集成多平台 解决因SaiNiu线程抢占资源问题 本地提交测试环境打包 和 正式打包脚本与正式环境打包bat 提交Python32环境包 改进多日志文件生成情况修改打包日志细节

This commit is contained in:
2025-09-18 15:52:03 +08:00
parent 8b9fc925fa
commit 7cfc0c22b7
7608 changed files with 2424791 additions and 25 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,977 @@
#-----------------------------------------------------------------------------
# Copyright (c) 2013-2023, PyInstaller Development Team.
#
# Distributed under the terms of the GNU General Public License (version 2
# or later) with exception for distributing the bootloader.
#
# The full license is in the file COPYING.txt, distributed with this software.
#
# SPDX-License-Identifier: (GPL-2.0-or-later WITH Bootloader-exception)
#-----------------------------------------------------------------------------
"""
Find external dependencies of binary libraries.
"""
import ctypes.util
import functools
import os
import pathlib
import re
import sys
import sysconfig
import subprocess
from PyInstaller import compat
from PyInstaller import log as logging
from PyInstaller.depend import dylib, utils
from PyInstaller.utils.win32 import winutils
if compat.is_darwin:
import PyInstaller.utils.osx as osxutils
logger = logging.getLogger(__name__)
_exe_machine_type = None
if compat.is_win:
_exe_machine_type = winutils.get_pe_file_machine_type(compat.python_executable)
#- High-level binary dependency analysis
def _get_paths_for_parent_directory_preservation():
"""
Return list of paths that serve as prefixes for parent-directory preservation of collected binaries and/or
shared libraries. If a binary is collected from a location that starts with a path from this list, the relative
directory structure is preserved within the frozen application bundle; otherwise, the binary is collected to the
frozen application's top-level directory.
"""
# Use only site-packages paths. We have no control over contents of `sys.path`, so using all paths from that may
# lead to unintended behavior in corner cases. For example, if `sys.path` contained the drive root (see #7028),
# all paths that do not match some other sub-path rooted in that drive will end up recognized as relative to the
# drive root. In such case, any DLL collected from `c:\Windows\system32` will be collected into `Windows\system32`
# sub-directory; ucrt DLLs collected from MSVC or Windows SDK installed in `c:\Program Files\...` will end up
# collected into `Program Files\...` subdirectory; etc.
#
# On the other hand, the DLL parent directory preservation is primarily aimed at packages installed via PyPI
# wheels, which are typically installed into site-packages. Therefore, limiting the directory preservation for
# shared libraries collected from site-packages should do the trick, and should be reasonably safe.
import site
orig_paths = site.getsitepackages()
orig_paths.append(site.getusersitepackages())
# Explicitly excluded paths. `site.getsitepackages` seems to include `sys.prefix`, which we need to exclude, to
# avoid issue swith DLLs in its sub-directories. We need both resolved and unresolved variant to handle cases
# where `base_prefix` itself is a symbolic link (e.g., `scoop`-installed python on Windows, see #8023).
excluded_paths = {
pathlib.Path(sys.base_prefix),
pathlib.Path(sys.base_prefix).resolve(),
pathlib.Path(sys.prefix),
pathlib.Path(sys.prefix).resolve(),
}
# For each path in orig_paths, append a resolved variant. This helps with linux venv where we need to consider
# both `venv/lib/python3.11/site-packages` and `venv/lib/python3.11/site-packages` and `lib64` is a symlink
# to `lib`.
orig_paths += [pathlib.Path(path).resolve() for path in orig_paths]
paths = set()
for path in orig_paths:
if not path:
continue
path = pathlib.Path(path)
# Filter out non-directories (e.g., /path/to/python3x.zip) or non-existent paths
if not path.is_dir():
continue
# Filter out explicitly excluded paths
if path in excluded_paths:
continue
paths.add(path)
# Sort by length (in term of path components) to ensure match against the longest common prefix (for example, match
# /path/to/venv/lib/site-packages instead of /path/to/venv when both paths are in site paths).
paths = sorted(paths, key=lambda x: len(x.parents), reverse=True)
return paths
def _select_destination_directory(src_filename, parent_dir_preservation_paths):
# Check parent directory preservation paths
for parent_dir_preservation_path in parent_dir_preservation_paths:
if parent_dir_preservation_path in src_filename.parents:
# Collect into corresponding sub-directory.
return src_filename.relative_to(parent_dir_preservation_path)
# Collect into top-level directory.
return src_filename.name
def binary_dependency_analysis(binaries, search_paths=None, symlink_suppression_patterns=None):
"""
Perform binary dependency analysis on the given TOC list of collected binaries, by recursively scanning each binary
for linked dependencies (shared library imports). Returns new TOC list that contains both original entries and their
binary dependencies.
Additional search paths for dependencies' full path resolution may be supplied via optional argument.
"""
# Get all path prefixes for binaries' parent-directory preservation. For binaries collected from packages in (for
# example) site-packages directory, we should try to preserve the parent directory structure.
parent_dir_preservation_paths = _get_paths_for_parent_directory_preservation()
# Keep track of processed binaries and processed dependencies.
processed_binaries = set()
processed_dependencies = set()
# Keep track of unresolved dependencies, in order to defer the missing-library warnings until after everything has
# been processed. This allows us to suppress warnings for dependencies that end up being collected anyway; for
# details, see the end of this function.
missing_dependencies = []
# Populate output TOC with input binaries - this also serves as TODO list, as we iterate over it while appending
# new entries at the end.
output_toc = binaries[:]
for dest_name, src_name, typecode in output_toc:
# Do not process symbolic links (already present in input TOC list, or added during analysis below).
if typecode == 'SYMLINK':
continue
# Keep track of processed binaries, to avoid unnecessarily repeating analysis of the same file. Use pathlib.Path
# to avoid having to worry about case normalization.
src_path = pathlib.Path(src_name)
if src_path in processed_binaries:
continue
processed_binaries.add(src_path)
logger.debug("Analyzing binary %r", src_name)
# Analyze imports (linked dependencies)
for dep_name, dep_src_path in get_imports(src_name, search_paths):
logger.debug("Processing dependency, name: %r, resolved path: %r", dep_name, dep_src_path)
# Skip unresolved dependencies. Defer the missing-library warnings until after binary dependency analysis
# is complete.
if not dep_src_path:
missing_dependencies.append((dep_name, src_name))
continue
# Compare resolved dependency against global inclusion/exclusion rules.
if not dylib.include_library(dep_src_path):
logger.debug("Skipping dependency %r due to global exclusion rules.", dep_src_path)
continue
dep_src_path = pathlib.Path(dep_src_path) # Turn into pathlib.Path for subsequent processing
# Avoid processing this dependency if we have already processed it.
if dep_src_path in processed_dependencies:
logger.debug("Skipping dependency %r due to prior processing.", str(dep_src_path))
continue
processed_dependencies.add(dep_src_path)
# Try to preserve parent directory structure, if applicable.
# NOTE: do not resolve the source path, because on macOS and linux, it may be a versioned .so (e.g.,
# libsomething.so.1, pointing at libsomething.so.1.2.3), and we need to collect it under original name!
dep_dest_path = _select_destination_directory(dep_src_path, parent_dir_preservation_paths)
dep_dest_path = pathlib.PurePath(dep_dest_path) # Might be a str() if it is just a basename...
# If we are collecting library into top-level directory on macOS, check whether it comes from a
# .framework bundle. If it does, re-create the .framework bundle in the top-level directory
# instead.
if compat.is_darwin and dep_dest_path.parent == pathlib.PurePath('.'):
if osxutils.is_framework_bundle_lib(dep_src_path):
# dst_src_path is parent_path/Name.framework/Versions/Current/Name
framework_parent_path = dep_src_path.parent.parent.parent.parent
dep_dest_path = pathlib.PurePath(dep_src_path.relative_to(framework_parent_path))
logger.debug("Collecting dependency %r as %r.", str(dep_src_path), str(dep_dest_path))
output_toc.append((str(dep_dest_path), str(dep_src_path), 'BINARY'))
# On non-Windows, if we are not collecting the binary into application's top-level directory ('.'),
# add a symbolic link from top-level directory to the actual location. This is to accommodate
# LD_LIBRARY_PATH being set to the top-level application directory on linux (although library search
# should be mostly done via rpaths, so this might be redundant) and to accommodate library path
# rewriting on macOS, which assumes that the library was collected into top-level directory.
if compat.is_win:
# We do not use symlinks on Windows.
pass
elif dep_dest_path.parent == pathlib.PurePath('.'):
# The shared library itself is being collected into top-level application directory.
pass
elif any(dep_src_path.match(pattern) for pattern in symlink_suppression_patterns):
# Honor symlink suppression patterns specified by hooks.
logger.debug(
"Skipping symbolic link from %r to top-level application directory due to source path matching one "
"of symlink suppression path patterns.", str(dep_dest_path)
)
else:
logger.debug("Adding symbolic link from %r to top-level application directory.", str(dep_dest_path))
output_toc.append((str(dep_dest_path.name), str(dep_dest_path), 'SYMLINK'))
# Display warnings about missing dependencies
seen_binaries = set([
os.path.normcase(os.path.basename(src_name)) for dest_name, src_name, typecode in output_toc
if typecode != 'SYMLINK'
])
for dependency_name, referring_binary in missing_dependencies:
# Ignore libraries that we would not collect in the first place.
if not dylib.include_library(dependency_name):
continue
# Apply global warning suppression rules.
if not dylib.warn_missing_lib(dependency_name):
continue
# If the binary with a matching basename happens to be among the discovered binaries, suppress the message as
# well. This might happen either because the library was collected by some other mechanism (for example, via
# hook, or supplied by the user), or because it was discovered during the analysis of another binary (which,
# for example, had properly set run-paths on Linux/macOS or was located next to that other analyzed binary on
# Windows).
if os.path.normcase(os.path.basename(dependency_name)) in seen_binaries:
continue
logger.warning("Library not found: could not resolve %r, dependency of %r.", dependency_name, referring_binary)
return output_toc
#- Low-level import analysis
def get_imports(filename, search_paths=None):
"""
Analyze the given binary file (shared library or executable), and obtain the list of shared libraries it imports
(i.e., link-time dependencies).
Returns set of tuples (name, fullpath). The name component is the referenced name, and on macOS, may not be just
a base name. If the library's full path cannot be resolved, fullpath element is None.
Additional list of search paths may be specified via `search_paths`, to be used as a fall-back when the
platform-specific resolution mechanism fails to resolve a library fullpath.
"""
if compat.is_win:
if str(filename).lower().endswith(".manifest"):
return []
return _get_imports_pefile(filename, search_paths)
elif compat.is_darwin:
return _get_imports_macholib(filename, search_paths)
else:
return _get_imports_ldd(filename, search_paths)
def _get_imports_pefile(filename, search_paths):
"""
Windows-specific helper for `get_imports`, which uses the `pefile` library to walk through PE header.
"""
import pefile
output = set()
# By default, pefile library parses all PE information. We are only interested in the list of dependent dlls.
# Performance is improved by reading only needed information. https://code.google.com/p/pefile/wiki/UsageExamples
pe = pefile.PE(filename, fast_load=True)
pe.parse_data_directories(
directories=[
pefile.DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_IMPORT'],
pefile.DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_EXPORT'],
],
forwarded_exports_only=True,
import_dllnames_only=True,
)
# If a library has no binary dependencies, pe.DIRECTORY_ENTRY_IMPORT does not exist.
for entry in getattr(pe, 'DIRECTORY_ENTRY_IMPORT', []):
dll_str = entry.dll.decode('utf-8')
output.add(dll_str)
# We must also read the exports table to find forwarded symbols:
# http://blogs.msdn.com/b/oldnewthing/archive/2006/07/19/671238.aspx
exported_symbols = getattr(pe, 'DIRECTORY_ENTRY_EXPORT', None)
if exported_symbols:
for symbol in exported_symbols.symbols:
if symbol.forwarder is not None:
# symbol.forwarder is a bytes object. Convert it to a string.
forwarder = symbol.forwarder.decode('utf-8')
# symbol.forwarder is for example 'KERNEL32.EnterCriticalSection'
dll = forwarder.split('.')[0]
output.add(dll + ".dll")
pe.close()
# Attempt to resolve full paths to referenced DLLs. Always add the input binary's parent directory to the search
# paths.
search_paths = [os.path.dirname(filename)] + (search_paths or [])
output = {(lib, resolve_library_path(lib, search_paths)) for lib in output}
return output
def _get_imports_ldd(filename, search_paths):
"""
Helper for `get_imports`, which uses `ldd` to analyze shared libraries. Used on Linux and other POSIX-like platforms
(with exception of macOS).
"""
output = set()
# Output of ldd varies between platforms...
if compat.is_aix:
# Match libs of the form
# 'archivelib.a(objectmember.so/.o)'
# or
# 'sharedlib.so'
# Will not match the fake lib '/unix'
LDD_PATTERN = re.compile(r"^\s*(((?P<libarchive>(.*\.a))(?P<objectmember>\(.*\)))|((?P<libshared>(.*\.so))))$")
elif compat.is_hpux:
# Match libs of the form
# 'sharedlib.so => full-path-to-lib
# e.g.
# 'libpython2.7.so => /usr/local/lib/hpux32/libpython2.7.so'
LDD_PATTERN = re.compile(r"^\s+(.*)\s+=>\s+(.*)$")
elif compat.is_solar:
# Match libs of the form
# 'sharedlib.so => full-path-to-lib
# e.g.
# 'libpython2.7.so.1.0 => /usr/local/lib/libpython2.7.so.1.0'
# Will not match the platform specific libs starting with '/platform'
LDD_PATTERN = re.compile(r"^\s+(.*)\s+=>\s+(.*)$")
elif compat.is_linux:
# Match libs of the form
# libpython3.13.so.1.0 => /home/brenainn/.pyenv/versions/3.13.0/lib/libpython3.13.so.1.0 (0x00007a9e15800000)
# or
# /tmp/python/install/bin/../lib/libpython3.13.so.1.0 (0x00007b9489c82000)
LDD_PATTERN = re.compile(r"^\s*(?:(.*?)\s+=>\s+)?(.*?)\s+\(.*\)")
else:
LDD_PATTERN = re.compile(r"\s*(.*?)\s+=>\s+(.*?)\s+\(.*\)")
# Resolve symlinks since GNU ldd contains a bug in processing a symlink to a binary
# using $ORIGIN: https://sourceware.org/bugzilla/show_bug.cgi?id=25263
p = subprocess.run(
['ldd', os.path.realpath(filename)],
stdin=subprocess.DEVNULL,
stderr=subprocess.PIPE,
stdout=subprocess.PIPE,
encoding='utf-8',
)
ldd_warnings = []
for line in p.stderr.splitlines():
if not line:
continue
# Python extensions (including stdlib ones) are not linked against python.so but rely on Python's symbols having
# already been loaded into symbol space at runtime. musl's ldd issues a series of harmless warnings to stderr
# telling us that those symbols are unfindable. These should be suppressed.
elif line.startswith("Error relocating ") and line.endswith(" symbol not found"):
continue
# Shared libraries should have the executable bits set; however, this is not the case for shared libraries
# shipped in PyPI wheels, which cause ldd to emit `ldd: warning: you do not have execution permission for ...`
# warnings. Suppress these.
elif line.startswith("ldd: warning: you do not have execution permission for "):
continue
# When `ldd` is ran against a file that is not a dynamic binary (i.e., is not a binary at all, or is a static
# binary), it emits a "not a dynamic executable" warning. Suppress it.
elif "not a dynamic executable" in line:
continue
# Propagate any other warnings it might have.
ldd_warnings.append(line)
if ldd_warnings:
logger.warning("ldd warnings for %r:\n%s", filename, "\n".join(ldd_warnings))
for line in p.stdout.splitlines():
name = None # Referenced name
lib = None # Resolved library path
m = LDD_PATTERN.search(line)
if m:
if compat.is_aix:
libarchive = m.group('libarchive')
if libarchive:
# We matched an archive lib with a request for a particular embedded shared object.
# 'archivelib.a(objectmember.so/.o)'
lib = libarchive
name = os.path.basename(lib) + m.group('objectmember')
else:
# We matched a stand-alone shared library.
# 'sharedlib.so'
lib = m.group('libshared')
name = os.path.basename(lib)
elif compat.is_hpux:
name, lib = m.group(1), m.group(2)
else:
name, lib = m.group(1), m.group(2)
name = name or os.path.basename(lib)
if compat.is_linux:
# Skip all ld variants listed https://sourceware.org/glibc/wiki/ABIList
# plus musl's ld-musl-*.so.*.
if re.fullmatch(r"ld(64)?(-linux|-musl)?(-.+)?\.so(\..+)?", os.path.basename(lib)):
continue
if name[:10] in ('linux-gate', 'linux-vdso'):
# linux-gate is a fake library which does not exist and should be ignored. See also:
# http://www.trilithium.com/johan/2005/08/linux-gate/
continue
if compat.is_cygwin:
# exclude Windows system library
if lib.lower().startswith('/cygdrive/c/windows/system'):
continue
# Reset library path if it does not exist
if not os.path.exists(lib):
lib = None
elif line.endswith("not found"):
# On glibc-based linux distributions, missing libraries are marked with name.so => not found
tokens = line.split('=>')
if len(tokens) != 2:
continue
name = tokens[0].strip()
lib = None
else:
# TODO: should we warn about unprocessed lines?
continue
# Fall back to searching the supplied search paths, if any.
if not lib:
lib = _resolve_library_path_in_search_paths(
os.path.basename(name), # Search for basename of the referenced name.
search_paths,
)
# Normalize the resolved path, to remove any extraneous "../" elements.
if lib:
lib = os.path.normpath(lib)
# Return referenced name as-is instead of computing a basename, to provide additional context when library
# cannot be resolved.
output.add((name, lib))
return output
def _get_imports_macholib(filename, search_paths):
"""
macOS-specific helper for `get_imports`, which uses `macholib` to analyze library load commands in Mach-O headers.
"""
from macholib.dyld import dyld_find
from macholib.mach_o import LC_RPATH
from macholib.MachO import MachO
try:
from macholib.dyld import _dyld_shared_cache_contains_path
except ImportError:
_dyld_shared_cache_contains_path = None
output = set()
# Parent directory of the input binary and parent directory of python executable, used to substitute @loader_path
# and @executable_path. The macOS dylib loader (dyld) fully resolves the symbolic links when using @loader_path
# and @executable_path references, so we need to do the same using `os.path.realpath`.
bin_path = os.path.dirname(os.path.realpath(filename))
python_bin = os.path.realpath(sys.executable)
python_bin_path = os.path.dirname(python_bin)
def _get_referenced_libs(m):
# Collect referenced libraries from MachO object.
referenced_libs = set()
for header in m.headers:
for idx, name, lib in header.walkRelocatables():
referenced_libs.add(lib)
return referenced_libs
def _get_run_paths(m):
# Find LC_RPATH commands to collect rpaths from MachO object.
# macholib does not handle @rpath, so we need to handle run paths ourselves.
run_paths = []
for header in m.headers:
for command in header.commands:
# A command is a tuple like:
# (<macholib.mach_o.load_command object at 0x>,
# <macholib.mach_o.rpath_command object at 0x>,
# '../lib\x00\x00')
cmd_type = command[0].cmd
if cmd_type == LC_RPATH:
rpath = command[2].decode('utf-8')
# Remove trailing '\x00' characters. E.g., '../lib\x00\x00'
rpath = rpath.rstrip('\x00')
# If run path starts with @, ensure it starts with either @loader_path or @executable_path.
# We cannot process anything else.
if rpath.startswith("@") and not rpath.startswith(("@executable_path", "@loader_path")):
logger.warning("Unsupported rpath format %r found in binary %r - ignoring...", rpath, filename)
continue
run_paths.append(rpath)
return run_paths
@functools.lru_cache
def get_run_paths_and_referenced_libs(filename):
# Walk through Mach-O headers, and collect all referenced libraries and run paths.
m = MachO(filename)
return _get_referenced_libs(m), _get_run_paths(m)
@functools.lru_cache
def get_run_paths(filename):
# Walk through Mach-O headers, and collect only run paths.
return _get_run_paths(MachO(filename))
# Collect referenced libraries and run paths from the input binary.
referenced_libs, run_paths = get_run_paths_and_referenced_libs(filename)
# On macOS, run paths (rpaths) are inherited from the executable that loads the given shared library (or from the
# shared library that loads the given shared library). This means that shared libraries and python binary extensions
# can reference other shared libraries using @rpath without having set any run paths themselves.
#
# In order to simulate the run path inheritance that happens in unfrozen python programs, we need to augment the
# run paths from the given binary with those set by the python interpreter executable (`sys.executable`). Anaconda
# python, for example, sets the run path on the python executable to `@loader_path/../lib`, which allows python
# extensions to reference shared libraries in the Anaconda environment's `lib` directory via only `@rpath`
# (for example, the `_ssl` extension can reference the OpenSSL library as `@rpath/libssl.3.dylib`). In another
# example, python executable has its run path set to the top-level directory of its .framework bundle; in this
# case the `ssl` extension references the OpenSSL library as `@rpath/Versions/3.10/lib/libssl.1.1.dylib`.
run_paths += get_run_paths(python_bin)
# This fallback should be fully superseded by the above recovery of run paths from python executable; but for now,
# keep it around in case of unforeseen corner cases.
run_paths.append(os.path.join(compat.base_prefix, 'lib'))
# De-duplicate run_paths while preserving their order.
run_paths = list(dict.fromkeys(run_paths))
def _resolve_using_path(lib):
# Absolute paths should not be resolved; we should just check whether the library exists or not. This used to
# be done using macholib's dyld_find() as well (as it properly handles system libraries that are hidden on
# Big Sur and later), but it turns out that even if given an absolute path, it gives precedence to search paths
# from DYLD_LIBRARY_PATH. This leads to confusing errors when directory in DYLD_LIBRARY_PATH contains a file
# (shared library or data file) that happens to have the same name as a library from a system framework.
if os.path.isabs(lib):
if _dyld_shared_cache_contains_path is not None and _dyld_shared_cache_contains_path(lib):
return lib
if os.path.isfile(lib):
return lib
return None
try:
return dyld_find(lib)
except ValueError:
return None
def _resolve_using_loader_path(lib, bin_path, python_bin_path):
# Strictly speaking, @loader_path should be anchored to parent directory of analyzed binary (`bin_path`), while
# @executable_path should be anchored to the parent directory of the process' executable. Typically, this would
# be python executable (`python_bin_path`). Unless we are analyzing a collected 3rd party executable; in that
# case, `bin_path` is correct option. So we first try resolving using `bin_path`, and then fall back to
# `python_bin_path`. This does not account for transitive run paths of higher-order dependencies, but there is
# only so much we can do here...
#
# NOTE: do not use macholib's `dyld_find`, because its fallback search locations might end up resolving wrong
# instance of the library! For example, if our `bin_path` and `python_bin_path` are anchored in an Anaconda
# python environment and the candidate library path does not exit (because we are calling this function when
# trying to resolve @rpath with multiple candidate run paths), we do not want to fall back to eponymous library
# that happens to be present in the Homebrew python environment...
if lib.startswith('@loader_path/'):
lib = lib[len('@loader_path/'):]
elif lib.startswith('@executable_path/'):
lib = lib[len('@executable_path/'):]
# Try resolving with binary's path first...
resolved_lib = _resolve_using_path(os.path.join(bin_path, lib))
if resolved_lib is not None:
return resolved_lib
# ... and fall-back to resolving with python executable's path
return _resolve_using_path(os.path.join(python_bin_path, lib))
# Try to resolve full path of the referenced libraries.
for referenced_lib in referenced_libs:
resolved_lib = None
# If path starts with @rpath, we have to handle it ourselves.
if referenced_lib.startswith('@rpath'):
lib = os.path.join(*referenced_lib.split(os.sep)[1:]) # Remove the @rpath/ prefix
# Try all run paths.
for run_path in run_paths:
# Join the path.
lib_path = os.path.join(run_path, lib)
if lib_path.startswith(("@executable_path", "@loader_path")):
# Run path starts with @executable_path or @loader_path.
lib_path = _resolve_using_loader_path(lib_path, bin_path, python_bin_path)
else:
# If run path was relative, anchor it to binary's location.
if not os.path.isabs(lib_path):
os.path.join(bin_path, lib_path)
lib_path = _resolve_using_path(lib_path)
if lib_path and os.path.exists(lib_path):
resolved_lib = lib_path
break
else:
if referenced_lib.startswith(("@executable_path", "@loader_path")):
resolved_lib = _resolve_using_loader_path(referenced_lib, bin_path, python_bin_path)
else:
resolved_lib = _resolve_using_path(referenced_lib)
# Fall back to searching the supplied search paths, if any.
if not resolved_lib:
resolved_lib = _resolve_library_path_in_search_paths(
os.path.basename(referenced_lib), # Search for basename of the referenced name.
search_paths,
)
# Normalize the resolved path, to remove any extraneous "../" elements.
if resolved_lib:
resolved_lib = os.path.normpath(resolved_lib)
# Return referenced library name as-is instead of computing a basename. Full referenced name carries additional
# information that might be useful for the caller to determine how to deal with unresolved library (e.g., ignore
# unresolved libraries that are supposed to be located in system-wide directories).
output.add((referenced_lib, resolved_lib))
return output
#- Library full path resolution
def resolve_library_path(name, search_paths=None):
"""
Given a library name, attempt to resolve full path to that library. The search for library is done via
platform-specific mechanism and fall back to optionally-provided list of search paths. Returns None if library
cannot be resolved. If give library name is already an absolute path, the given path is returned without any
processing.
"""
# No-op if path is already absolute.
if os.path.isabs(name):
return name
if compat.is_unix:
# Use platform-specific helper.
fullpath = _resolve_library_path_unix(name)
if fullpath:
return fullpath
# Fall back to searching the supplied search paths, if any
return _resolve_library_path_in_search_paths(name, search_paths)
elif compat.is_win:
# Try the caller-supplied search paths, if any.
fullpath = _resolve_library_path_in_search_paths(name, search_paths)
if fullpath:
return fullpath
# Fall back to default Windows search paths, using the PATH environment variable (which should also include
# the system paths, such as c:\windows and c:\windows\system32)
win_search_paths = [path for path in compat.getenv('PATH', '').split(os.pathsep) if path]
return _resolve_library_path_in_search_paths(name, win_search_paths)
else:
return ctypes.util.find_library(name)
return None
# Compatibility aliases for hooks from contributed hooks repository. All of these now point to the high-level
# `resolve_library_path`.
findLibrary = resolve_library_path
findSystemLibrary = resolve_library_path
def _resolve_library_path_in_search_paths(name, search_paths=None):
"""
Low-level helper for resolving given library name to full path in given list of search paths.
"""
for search_path in search_paths or []:
fullpath = os.path.join(search_path, name)
if not os.path.isfile(fullpath):
continue
# On Windows, ensure that architecture matches that of running python interpreter.
if compat.is_win:
try:
dll_machine_type = winutils.get_pe_file_machine_type(fullpath)
except Exception:
# A search path might contain a DLL that we cannot analyze; for example, a stub file. Skip over.
continue
if dll_machine_type != _exe_machine_type:
continue
return os.path.normpath(fullpath)
return None
def _resolve_library_path_unix(name):
"""
UNIX-specific helper for resolving library path.
Emulates the algorithm used by dlopen. `name` must include the prefix, e.g., ``libpython2.4.so``.
"""
assert compat.is_unix, "Current implementation for Unix only (Linux, Solaris, AIX, FreeBSD)"
if name.endswith('.so') or '.so.' in name:
# We have been given full library name that includes suffix. Use `_resolve_library_path_in_search_paths` to find
# the exact match.
lib_search_func = _resolve_library_path_in_search_paths
else:
# We have been given a library name without suffix. Use `_which_library` as search function, which will try to
# find library with matching basename.
lib_search_func = _which_library
# Look in the LD_LIBRARY_PATH according to platform.
if compat.is_aix:
lp = compat.getenv('LIBPATH', '')
elif compat.is_darwin:
lp = compat.getenv('DYLD_LIBRARY_PATH', '')
else:
lp = compat.getenv('LD_LIBRARY_PATH', '')
lib = lib_search_func(name, filter(None, lp.split(os.pathsep)))
# Look in /etc/ld.so.cache
# Solaris does not have /sbin/ldconfig. Just check if this file exists.
if lib is None:
utils.load_ldconfig_cache()
lib = utils.LDCONFIG_CACHE.get(name)
if lib:
assert os.path.isfile(lib)
# Look in the known safe paths.
if lib is None:
# Architecture independent locations.
paths = ['/lib', '/usr/lib']
# Architecture dependent locations.
if compat.architecture == '32bit':
paths.extend(['/lib32', '/usr/lib32'])
else:
paths.extend(['/lib64', '/usr/lib64'])
# Machine dependent locations.
if compat.machine == 'intel':
if compat.architecture == '32bit':
paths.extend(['/usr/lib/i386-linux-gnu'])
else:
paths.extend(['/usr/lib/x86_64-linux-gnu'])
# On Debian/Ubuntu /usr/bin/python is linked statically with libpython. Newer Debian/Ubuntu with multiarch
# support puts the libpythonX.Y.so in paths like /usr/lib/i386-linux-gnu/. Try to query the arch-specific
# sub-directory, if available.
arch_subdir = sysconfig.get_config_var('multiarchsubdir')
if arch_subdir:
arch_subdir = os.path.basename(arch_subdir)
paths.append(os.path.join('/usr/lib', arch_subdir))
else:
logger.debug('Multiarch directory not detected.')
# Termux (a Ubuntu like subsystem for Android) has an additional libraries directory.
if os.path.isdir('/data/data/com.termux/files/usr/lib'):
paths.append('/data/data/com.termux/files/usr/lib')
if compat.is_aix:
paths.append('/opt/freeware/lib')
elif compat.is_hpux:
if compat.architecture == '32bit':
paths.append('/usr/local/lib/hpux32')
else:
paths.append('/usr/local/lib/hpux64')
elif compat.is_freebsd or compat.is_openbsd:
paths.append('/usr/local/lib')
lib = lib_search_func(name, paths)
return lib
def _which_library(name, dirs):
"""
Search for a shared library in a list of directories.
Args:
name:
The library name including the `lib` prefix but excluding any `.so` suffix.
dirs:
An iterable of folders to search in.
Returns:
The path to the library if found or None otherwise.
"""
matcher = _library_matcher(name)
for path in filter(os.path.exists, dirs):
for _path in os.listdir(path):
if matcher(_path):
return os.path.join(path, _path)
def _library_matcher(name):
"""
Create a callable that matches libraries if **name** is a valid library prefix for input library full names.
"""
return re.compile(name + r"[0-9]*\.").match
#- Python shared library search
def get_python_library_path():
"""
Find Python shared library that belongs to the current interpreter.
Return full path to Python dynamic library or None when not found.
PyInstaller needs to collect the Python shared library, so that bootloader can load it, import Python C API
symbols, and use them to set up the embedded Python interpreter.
The name of the shared library is typically fixed (`python3.X.dll` on Windows, libpython3.X.so on Unix systems,
and `libpython3.X.dylib` on macOS for shared library builds and `Python.framework/Python` for framework build).
Its location can usually be inferred from the Python interpreter executable, when the latter is dynamically
linked against the shared library.
However, some situations require extra handling due to various quirks; for example, debian-based some linux
distributions statically link the Python interpreter executable against the Python library, while also providing
a shared library variant for external users.
"""
def _find_lib_in_libdirs(*libdirs):
for libdir in libdirs:
for name in compat.PYDYLIB_NAMES:
full_path = os.path.join(libdir, name)
if not os.path.exists(full_path):
continue
# Resolve potential symbolic links to achieve consistent results with linker-based search; e.g., on
# POSIX systems, linker resolves unversioned library names (python3.X.so) to versioned ones
# (libpython3.X.so.1.0) due to former being symbolic linkes to the latter. See #6831.
full_path = os.path.realpath(full_path)
if not os.path.exists(full_path):
continue
return full_path
return None
# If this is Microsoft App Store Python, check the compat.base_path first. While compat.python_executable resolves
# to actual python.exe file, the latter contains a relative library reference that we fail to properly resolve.
if compat.is_ms_app_store:
python_libname = _find_lib_in_libdirs(compat.base_prefix)
if python_libname:
return python_libname
# Try to get Python library name from the Python executable. It assumes that Python library is not statically
# linked.
imported_libraries = get_imports(compat.python_executable) # (name, fullpath) tuples
for _, lib_path in imported_libraries:
if lib_path is None:
continue # Skip unresolved imports
for name in compat.PYDYLIB_NAMES:
if os.path.normcase(os.path.basename(lib_path)) == name:
# Python library found. Return absolute path to it.
return lib_path
# Work around for Python venv having VERSION.dll rather than pythonXY.dll
if compat.is_win and any([os.path.normcase(lib_name) == 'version.dll' for lib_name, _ in imported_libraries]):
pydll = 'python%d%d.dll' % sys.version_info[:2]
return resolve_library_path(pydll, [os.path.dirname(compat.python_executable)])
# Search the `sys.base_prefix` and `lib` directory in `sys.base_prefix`.
# This covers various Python installations in case we fail to infer the shared library location for whatever reason;
# Anaconda Python, `uv` and `rye` Python, etc.
python_libname = _find_lib_in_libdirs(
compat.base_prefix,
os.path.join(compat.base_prefix, 'lib'),
)
if python_libname:
return python_libname
# On Unix-like systems, perform search in the configured library search locations. This should be done after
# exhausting all other options; it primarily caters to debian-packaged Python, but we need to make sure that we do
# not collect shared library from system-installed Python when the current interpreter is in fact some other Python
# build (for example, `uv` or `rye` Python of the same version as system-installed Python).
if compat.is_unix:
for name in compat.PYDYLIB_NAMES:
python_libname = resolve_library_path(name)
if python_libname:
return python_libname
# Python library NOT found. Return None and let the caller deal with this.
return None
#- Binary vs data (re)classification
def classify_binary_vs_data(filename):
"""
Classify the given file as either BINARY or a DATA, using appropriate platform-specific method. Returns 'BINARY'
or 'DATA' string depending on the determined file type, or None if classification cannot be performed (non-existing
file, missing tool, and other errors during classification).
"""
# We cannot classify non-existent files.
if not os.path.isfile(filename):
return None
# Use platform-specific implementation.
return _classify_binary_vs_data(filename)
if compat.is_linux:
def _classify_binary_vs_data(filename):
# First check for ELF signature, in order to avoid calling `objdump` on every data file, which can be costly.
try:
with open(filename, 'rb') as fp:
sig = fp.read(4)
except Exception:
return None
if sig != b"\x7FELF":
return "DATA"
# Verify the binary by checking if `objdump` recognizes the file. The preceding ELF signature check should
# ensure that this is an ELF file, while this check should ensure that it is a valid ELF file. In the future,
# we could try checking that the architecture matches the running platform.
cmd_args = ['objdump', '-a', filename]
try:
p = subprocess.run(
cmd_args,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
stdin=subprocess.DEVNULL,
encoding='utf8',
)
except Exception:
return None # Failed to run `objdump` or `objdump` unavailable.
return 'BINARY' if p.returncode == 0 else 'DATA'
elif compat.is_win:
def _classify_binary_vs_data(filename):
import pefile
# First check for MZ signature, which should allow us to quickly classify the majority of data files.
try:
with open(filename, 'rb') as fp:
sig = fp.read(2)
except Exception:
return None
if sig != b"MZ":
return "DATA"
# Check if the file can be opened using `pefile`.
try:
with pefile.PE(filename, fast_load=True) as pe: # noqa: F841
pass
return 'BINARY'
except pefile.PEFormatError:
return 'DATA'
except Exception:
pass
return None
elif compat.is_darwin:
def _classify_binary_vs_data(filename):
# See if the file can be opened using `macholib`.
import macholib.MachO
try:
macho = macholib.MachO.MachO(filename) # noqa: F841
return 'BINARY'
except Exception:
# TODO: catch only `ValueError`?
pass
return 'DATA'
else:
def _classify_binary_vs_data(filename):
# Classification not implemented for the platform.
return None

View File

@@ -0,0 +1,346 @@
#-----------------------------------------------------------------------------
# Copyright (c) 2021-2023, PyInstaller Development Team.
#
# Distributed under the terms of the GNU General Public License (version 2
# or later) with exception for distributing the bootloader.
#
# The full license is in the file COPYING.txt, distributed with this software.
#
# SPDX-License-Identifier: (GPL-2.0-or-later WITH Bootloader-exception)
#-----------------------------------------------------------------------------
"""
Tools for searching bytecode for key statements that indicate the need for additional resources, such as data files
and package metadata.
By *bytecode* I mean the ``code`` object given by ``compile()``, accessible from the ``__code__`` attribute of any
non-builtin function or, in PyInstallerLand, the ``PyiModuleGraph.node("some.module").code`` attribute. The best
guide for bytecode format I have found is the disassembler reference: https://docs.python.org/3/library/dis.html
This parser implementation aims to combine the flexibility and speed of regex with the clarity of the output of
``dis.dis(code)``. It has not achieved the 2nd, but C'est la vie...
The biggest clarity killer here is the ``EXTENDED_ARG`` opcode which can appear almost anywhere and therefore needs
to be tiptoed around at every step. If this code needs to expand significantly, I would recommend an upgrade to a
regex-based grammar parsing library such as Reparse. This way, little steps like unpacking ``EXTENDED_ARGS`` can be
defined once then simply referenced forming a nice hierarchy rather than copied everywhere its needed.
"""
import dis
import re
from types import CodeType
from typing import Pattern
from PyInstaller import compat
# opcode name -> opcode map
# Python 3.11 introduced specialized opcodes that are not covered by opcode.opmap (and equivalent dis.opmap), but dis
# has a private map of all opcodes called _all_opmap. So use the latter, if available.
opmap = getattr(dis, '_all_opmap', dis.opmap)
def _instruction_to_regex(x: str):
"""
Get a regex-escaped opcode byte from its human readable name.
"""
return re.escape(bytes([opmap[x]]))
def bytecode_regex(pattern: bytes, flags=re.VERBOSE | re.DOTALL):
"""
A regex-powered Python bytecode matcher.
``bytecode_regex`` provides a very thin wrapper around :func:`re.compile`.
* Any opcode names wrapped in backticks are substituted for their corresponding opcode bytes.
* Patterns are compiled in VERBOSE mode by default so that whitespace and comments may be used.
This aims to mirror the output of :func:`dis.dis`, which is far more readable than looking at raw byte strings.
"""
assert isinstance(pattern, bytes)
# Replace anything wrapped in backticks with regex-escaped opcodes.
pattern = re.sub(
rb"`(\w+)`",
lambda m: _instruction_to_regex(m[1].decode()),
pattern,
)
return re.compile(pattern, flags=flags)
def finditer(pattern: Pattern, string: bytes):
"""
Call ``pattern.finditer(string)``, but remove any matches beginning on an odd byte (i.e., matches where
match.start() is not a multiple of 2).
This should be used to avoid false positive matches where a bytecode pair's argument is mistaken for an opcode.
"""
assert isinstance(string, bytes)
string = _cleanup_bytecode_string(string)
matches = pattern.finditer(string)
while True:
for match in matches:
if match.start() % 2 == 0:
# All is good. This match starts on an OPCODE.
yield match
else:
# This match has started on an odd byte, meaning that it is a false positive and should be skipped.
# There is a very slim chance that a genuine match overlaps this one and, because re.finditer() does not
# allow overlapping matches, it would be lost. To avoid that, restart the regex scan, starting at the
# next even byte.
matches = pattern.finditer(string, match.start() + 1)
break
else:
break
# Opcodes involved in function calls with constant arguments. The differences between python versions are handled by
# variables below, which are then used to construct the _call_function_bytecode regex.
# NOTE1: the _OPCODES_* entries are typically used in (non-capturing) groups that match the opcode plus an arbitrary
# argument. But because the entries themselves may contain more than on opcode (with OR operator between them), they
# themselves need to be enclosed in another (non-capturing) group. E.g., "(?:(?:_OPCODES_FUNCTION_GLOBAL).)".
# NOTE2: _OPCODES_EXTENDED_ARG2 is an exception, as it is used as a list of opcodes to exclude, i.e.,
# "[^_OPCODES_EXTENDED_ARG2]". Therefore, multiple opcodes are not separated by the OR operator.
if not compat.is_py311:
# Python 3.7 introduced two new function-related opcodes, LOAD_METHOD and CALL_METHOD
_OPCODES_EXTENDED_ARG = rb"`EXTENDED_ARG`"
_OPCODES_EXTENDED_ARG2 = _OPCODES_EXTENDED_ARG
_OPCODES_FUNCTION_GLOBAL = rb"`LOAD_NAME`|`LOAD_GLOBAL`|`LOAD_FAST`"
_OPCODES_FUNCTION_LOAD = rb"`LOAD_ATTR`|`LOAD_METHOD`"
_OPCODES_FUNCTION_ARGS = rb"`LOAD_CONST`"
_OPCODES_FUNCTION_CALL = rb"`CALL_FUNCTION`|`CALL_METHOD`|`CALL_FUNCTION_EX`"
def _cleanup_bytecode_string(bytecode):
return bytecode # Nothing to do here
elif not compat.is_py312:
# Python 3.11 removed CALL_FUNCTION and CALL_METHOD, and replaced them with PRECALL + CALL instruction sequence.
# As both PRECALL and CALL have the same parameter (the argument count), we need to match only up to the PRECALL.
# The CALL_FUNCTION_EX is still present.
# From Python 3.11b1 on, there is an EXTENDED_ARG_QUICK specialization opcode present.
_OPCODES_EXTENDED_ARG = rb"`EXTENDED_ARG`|`EXTENDED_ARG_QUICK`"
_OPCODES_EXTENDED_ARG2 = rb"`EXTENDED_ARG``EXTENDED_ARG_QUICK`" # Special case; see note above the if/else block!
_OPCODES_FUNCTION_GLOBAL = rb"`LOAD_NAME`|`LOAD_GLOBAL`|`LOAD_FAST`"
_OPCODES_FUNCTION_LOAD = rb"`LOAD_ATTR`|`LOAD_METHOD`"
_OPCODES_FUNCTION_ARGS = rb"`LOAD_CONST`"
_OPCODES_FUNCTION_CALL = rb"`PRECALL`|`CALL_FUNCTION_EX`"
# Starting with python 3.11, the bytecode is peppered with CACHE instructions (which dis module conveniently hides
# unless show_caches=True is used). Dealing with these CACHE instructions in regex rules is going to render them
# unreadable, so instead we pre-process the bytecode and filter the offending opcodes out.
_cache_instruction_filter = bytecode_regex(rb"(`CACHE`.)|(..)")
def _cleanup_bytecode_string(bytecode):
return _cache_instruction_filter.sub(rb"\2", bytecode)
else:
# Python 3.12 merged EXTENDED_ARG_QUICK back in to EXTENDED_ARG, and LOAD_METHOD in to LOAD_ATTR
# PRECALL is no longer a valid key
_OPCODES_EXTENDED_ARG = rb"`EXTENDED_ARG`"
_OPCODES_EXTENDED_ARG2 = _OPCODES_EXTENDED_ARG
_OPCODES_FUNCTION_GLOBAL = rb"`LOAD_NAME`|`LOAD_GLOBAL`|`LOAD_FAST`"
_OPCODES_FUNCTION_LOAD = rb"`LOAD_ATTR`"
_OPCODES_FUNCTION_ARGS = rb"`LOAD_CONST`"
_OPCODES_FUNCTION_CALL = rb"`CALL`|`CALL_FUNCTION_EX`"
# In Python 3.13, PUSH_NULL opcode is emitted after the LOAD_NAME (and after LOAD_ATTR opcode(s), if applicable).
# In python 3.11 and 3.12, it was emitted before the LOAD_NAME, and thus fell outside of our regex matching; now,
# we have to deal with it. But, instead of trying to add it to matching rules and adjusting the post-processing
# to deal with it, we opt to filter them out (at the same time as we filter out CACHE opcodes), and leave the rest
# of processing untouched.
if compat.is_py313:
_cache_instruction_filter = bytecode_regex(rb"(`CACHE`.)|(`PUSH_NULL`.)|(..)")
def _cleanup_bytecode_string(bytecode):
return _cache_instruction_filter.sub(rb"\3", bytecode)
else:
_cache_instruction_filter = bytecode_regex(rb"(`CACHE`.)|(..)")
def _cleanup_bytecode_string(bytecode):
return _cache_instruction_filter.sub(rb"\2", bytecode)
# language=PythonVerboseRegExp
_call_function_bytecode = bytecode_regex(
rb"""
# Matches `global_function('some', 'constant', 'arguments')`.
# Load the global function. In code with >256 of names, this may require extended name references.
(
(?:(?:""" + _OPCODES_EXTENDED_ARG + rb""").)*
(?:(?:""" + _OPCODES_FUNCTION_GLOBAL + rb""").)
)
# For foo.bar.whizz(), the above is the 'foo', below is the 'bar.whizz' (one opcode per name component, each
# possibly preceded by name reference extension).
(
(?:
(?:(?:""" + _OPCODES_EXTENDED_ARG + rb""").)*
(?:""" + _OPCODES_FUNCTION_LOAD + rb""").
)*
)
# Load however many arguments it takes. These (for now) must all be constants.
# Again, code with >256 constants may need extended enumeration.
(
(?:
(?:(?:""" + _OPCODES_EXTENDED_ARG + rb""").)*
(?:""" + _OPCODES_FUNCTION_ARGS + rb""").
)*
)
# Call the function. If opcode is CALL_FUNCTION_EX, the parameter are flags. For other opcodes, the parameter
# is the argument count (which may be > 256).
(
(?:(?:""" + _OPCODES_EXTENDED_ARG + rb""").)*
(?:""" + _OPCODES_FUNCTION_CALL + rb""").
)
"""
)
# language=PythonVerboseRegExp
_extended_arg_bytecode = bytecode_regex(
rb"""(
# Arbitrary number of EXTENDED_ARG pairs.
(?:(?:""" + _OPCODES_EXTENDED_ARG + rb""").)*
# Followed by some other instruction (usually a LOAD).
[^""" + _OPCODES_EXTENDED_ARG2 + rb"""].
)"""
)
def extended_arguments(extended_args: bytes):
"""
Unpack the (extended) integer used to reference names or constants.
The input should be a bytecode snippet of the following form::
EXTENDED_ARG ? # Repeated 0-4 times.
LOAD_xxx ? # Any of LOAD_NAME/LOAD_CONST/LOAD_METHOD/...
Each ? byte combined together gives the number we want.
"""
return int.from_bytes(extended_args[1::2], "big")
def load(raw: bytes, code: CodeType) -> str:
"""
Parse an (extended) LOAD_xxx instruction.
"""
# Get the enumeration.
index = extended_arguments(raw)
# Work out what that enumeration was for (constant/local var/global var).
# If the last instruction byte is a LOAD_FAST:
if raw[-2] == opmap["LOAD_FAST"]:
# Then this is a local variable.
return code.co_varnames[index]
# Or if it is a LOAD_CONST:
if raw[-2] == opmap["LOAD_CONST"]:
# Then this is a literal.
return code.co_consts[index]
# Otherwise, it is a global name.
if compat.is_py311 and raw[-2] == opmap["LOAD_GLOBAL"]:
# In python 3.11, namei>>1 is pushed on stack...
return code.co_names[index >> 1]
if compat.is_py312 and raw[-2] == opmap["LOAD_ATTR"]:
# In python 3.12, namei>>1 is pushed on stack...
return code.co_names[index >> 1]
return code.co_names[index]
def loads(raw: bytes, code: CodeType) -> list:
"""
Parse multiple consecutive LOAD_xxx instructions. Or load() in a for loop.
May be used to unpack a function's parameters or nested attributes ``(foo.bar.pop.whack)``.
"""
return [load(i, code) for i in _extended_arg_bytecode.findall(raw)]
def function_calls(code: CodeType) -> list:
"""
Scan a code object for all function calls on constant arguments.
"""
match: re.Match
out = []
for match in finditer(_call_function_bytecode, code.co_code):
function_root, methods, args, function_call = match.groups()
# For foo():
# `function_root` contains 'foo' and `methods` is empty.
# For foo.bar.whizz():
# `function_root` contains 'foo' and `methods` contains the rest.
function_root = load(function_root, code)
methods = loads(methods, code)
function = ".".join([function_root] + methods)
args = loads(args, code)
if function_call[0] == opmap['CALL_FUNCTION_EX']:
flags = extended_arguments(function_call)
if flags != 0:
# Keyword arguments present. Unhandled at the moment.
continue
# In calls with const arguments, args contains a single
# tuple with all values.
if len(args) != 1 or not isinstance(args[0], tuple):
continue
args = list(args[0])
else:
arg_count = extended_arguments(function_call)
if arg_count != len(args):
# This happens if there are variable or keyword arguments. Bail out in either case.
continue
out.append((function, args))
return out
def search_recursively(search: callable, code: CodeType, _memo=None) -> dict:
"""
Apply a search function to a code object, recursing into child code objects (function definitions).
"""
if _memo is None:
_memo = {}
if code not in _memo:
_memo[code] = search(code)
for const in code.co_consts:
if isinstance(const, CodeType):
search_recursively(search, const, _memo)
return _memo
def recursive_function_calls(code: CodeType) -> dict:
"""
Scan a code object for function calls on constant arguments, recursing into function definitions and bodies of
comprehension loops.
"""
return search_recursively(function_calls, code)
def any_alias(full_name: str):
"""List possible aliases of a fully qualified Python name.
>>> list(any_alias("foo.bar.wizz"))
['foo.bar.wizz', 'bar.wizz', 'wizz']
This crudely allows us to capture uses of wizz() under any of
::
import foo
foo.bar.wizz()
::
from foo import bar
bar.wizz()
::
from foo.bar import wizz
wizz()
However, it will fail for any form of aliases and quite likely find false matches.
"""
parts = full_name.split('.')
while parts:
yield ".".join(parts)
parts = parts[1:]

View File

@@ -0,0 +1,370 @@
#-----------------------------------------------------------------------------
# Copyright (c) 2013-2023, PyInstaller Development Team.
#
# Distributed under the terms of the GNU General Public License (version 2
# or later) with exception for distributing the bootloader.
#
# The full license is in the file COPYING.txt, distributed with this software.
#
# SPDX-License-Identifier: (GPL-2.0-or-later WITH Bootloader-exception)
#-----------------------------------------------------------------------------
"""
Manipulating with dynamic libraries.
"""
import os
import pathlib
import re
from PyInstaller import compat
import PyInstaller.log as logging
logger = logging.getLogger(__name__)
# Ignoring some system libraries speeds up packaging process
_excludes = {
# Ignore annoying warnings with Windows system DLLs.
#
# 'W: library kernel32.dll required via ctypes not found'
# 'W: library coredll.dll required via ctypes not found'
#
# These these dlls has to be ignored for all operating systems because they might be resolved when scanning code for
# ctypes dependencies.
r'advapi32\.dll',
r'ws2_32\.dll',
r'gdi32\.dll',
r'oleaut32\.dll',
r'shell32\.dll',
r'ole32\.dll',
r'coredll\.dll',
r'crypt32\.dll',
r'kernel32',
r'kernel32\.dll',
r'msvcrt\.dll',
r'rpcrt4\.dll',
r'user32\.dll',
# Some modules tries to import the Python library. e.g. pyreadline.console.console
r'python\%s\%s',
}
# Regex includes - overrides excludes. Include list is used only to override specific libraries from exclude list.
_includes = set()
_win_includes = {
# We need to allow collection of Visual Studio C++ (VC) runtime DLLs from system directories in order to avoid
# missing DLL errors when the frozen application is run on a system that does not have the corresponding VC
# runtime installed. The VC runtime DLLs may be dependencies of python shared library itself or of extension
# modules provided by 3rd party packages.
# Visual Studio 2010 (VC10) runtime
# http://msdn.microsoft.com/en-us/library/8kche8ah(v=vs.100).aspx
r'atl100\.dll',
r'msvcr100\.dll',
r'msvcp100\.dll',
r'mfc100\.dll',
r'mfc100u\.dll',
r'mfcmifc80\.dll',
r'mfcm100\.dll',
r'mfcm100u\.dll',
# Visual Studio 2012 (VC11) runtime
# https://docs.microsoft.com/en-us/visualstudio/releases/2013/2012-redistribution-vs
#
# VC110.ATL
r'atl110\.dll',
# VC110.CRT
r'msvcp110\.dll',
r'msvcr110\.dll',
r'vccorlib110\.dll',
# VC110.CXXAMP
r'vcamp110\.dll',
# VC110.MFC
r'mfc110\.dll',
r'mfc110u\.dll',
r'mfcm110\.dll',
r'mfcm110u\.dll',
# VC110.MFCLOC
r'mfc110chs\.dll',
r'mfc110cht\.dll',
r'mfc110enu\.dll',
r'mfc110esn\.dll',
r'mfc110deu\.dll',
r'mfc110fra\.dll',
r'mfc110ita\.dll',
r'mfc110jpn\.dll',
r'mfc110kor\.dll',
r'mfc110rus\.dll',
# VC110.OpenMP
r'vcomp110\.dll',
# DIA SDK
r'msdia110\.dll',
# Visual Studio 2013 (VC12) runtime
# https://docs.microsoft.com/en-us/visualstudio/releases/2013/2013-redistribution-vs
#
# VC120.CRT
r'msvcp120\.dll',
r'msvcr120\.dll',
r'vccorlib120\.dll',
# VC120.CXXAMP
r'vcamp120\.dll',
# VC120.MFC
r'mfc120\.dll',
r'mfc120u\.dll',
r'mfcm120\.dll',
r'mfcm120u\.dll',
# VC120.MFCLOC
r'mfc120chs\.dll',
r'mfc120cht\.dll',
r'mfc120deu\.dll',
r'mfc120enu\.dll',
r'mfc120esn\.dll',
r'mfc120fra\.dll',
r'mfc120ita\.dll',
r'mfc120jpn\.dll',
r'mfc120kor\.dll',
r'mfc120rus\.dll',
# VC120.OPENMP
r'vcomp120\.dll',
# DIA SDK
r'msdia120\.dll',
# Cpp REST Windows SDK
r'casablanca120.winrt\.dll',
# Mobile Services Cpp Client
r'zumosdk120.winrt\.dll',
# Cpp REST SDK
r'casablanca120\.dll',
# Universal C Runtime Library (since Visual Studio 2015)
#
# NOTE: these should be put under a switch, as they need not to be bundled if deployment target is Windows 10
# and later, as "UCRT is now a system component in Windows 10 and later, managed by Windows Update".
# (https://docs.microsoft.com/en-us/cpp/windows/determining-which-dlls-to-redistribute?view=msvc-170)
# And as discovered in #6326, Windows prefers system-installed version over the bundled one, anyway
# (see https://docs.microsoft.com/en-us/cpp/windows/universal-crt-deployment?view=msvc-170#local-deployment).
r'api-ms-win-core.*',
r'api-ms-win-crt.*',
r'ucrtbase\.dll',
# Visual Studio 2015/2017/2019/2022 (VC14) runtime
# https://docs.microsoft.com/en-us/visualstudio/releases/2022/redistribution
#
# VC141.CRT/VC142.CRT/VC143.CRT
r'concrt140\.dll',
r'msvcp140\.dll',
r'msvcp140_1\.dll',
r'msvcp140_2\.dll',
r'msvcp140_atomic_wait\.dll',
r'msvcp140_codecvt_ids\.dll',
r'vccorlib140\.dll',
r'vcruntime140\.dll',
r'vcruntime140_1\.dll',
# VC141.CXXAMP/VC142.CXXAMP/VC143.CXXAMP
r'vcamp140\.dll',
# VC141.OpenMP/VC142.OpenMP/VC143.OpenMP
r'vcomp140\.dll',
# DIA SDK
r'msdia140\.dll',
# Allow pythonNN.dll, pythoncomNN.dll, pywintypesNN.dll
r'py(?:thon(?:com(?:loader)?)?|wintypes)\d+\.dll',
}
_win_excludes = {
# On Windows, only .dll files can be loaded.
r'.*\.so',
r'.*\.dylib',
# MS assembly excludes
r'Microsoft\.Windows\.Common-Controls',
}
_unix_excludes = {
r'libc\.so(\..*)?',
r'libdl\.so(\..*)?',
r'libm\.so(\..*)?',
r'libpthread\.so(\..*)?',
r'librt\.so(\..*)?',
r'libthread_db\.so(\..*)?',
# glibc regex excludes.
r'ld-linux\.so(\..*)?',
r'libBrokenLocale\.so(\..*)?',
r'libanl\.so(\..*)?',
r'libcidn\.so(\..*)?',
r'libcrypt\.so(\..*)?',
r'libnsl\.so(\..*)?',
r'libnss_compat.*\.so(\..*)?',
r'libnss_dns.*\.so(\..*)?',
r'libnss_files.*\.so(\..*)?',
r'libnss_hesiod.*\.so(\..*)?',
r'libnss_nis.*\.so(\..*)?',
r'libnss_nisplus.*\.so(\..*)?',
r'libresolv\.so(\..*)?',
r'libutil\.so(\..*)?',
# graphical interface libraries come with graphical stack (see libglvnd)
r'libE?(Open)?GLX?(ESv1_CM|ESv2)?(dispatch)?\.so(\..*)?',
r'libdrm\.so(\..*)?',
# a subset of libraries included as part of the Nvidia Linux Graphics Driver as of 520.56.06:
# https://download.nvidia.com/XFree86/Linux-x86_64/520.56.06/README/installedcomponents.html
r'nvidia_drv\.so',
r'libglxserver_nvidia\.so(\..*)?',
r'libnvidia-egl-(gbm|wayland)\.so(\..*)?',
r'libnvidia-(cfg|compiler|e?glcore|glsi|glvkspirv|rtcore|allocator|tls|ml)\.so(\..*)?',
r'lib(EGL|GLX)_nvidia\.so(\..*)?',
# libcuda.so, libcuda.so.1, and libcuda.so.{version} are run-time part of NVIDIA driver, and should not be
# collected, as they need to match the rest of driver components on the target system.
r'libcuda\.so(\..*)?',
r'libcudadebugger\.so(\..*)?',
# libxcb-dri changes ABI frequently (e.g.: between Ubuntu LTS releases) and is usually installed as dependency of
# the graphics stack anyway. No need to bundle it.
r'libxcb\.so(\..*)?',
r'libxcb-dri.*\.so(\..*)?',
# system running a Wayland compositor should already have these libraries
# in versions that should not conflict with system drivers, unlike bundled
r'libwayland.*\.so(\..*)?',
}
_aix_excludes = {
r'libbz2\.a',
r'libc\.a',
r'libC\.a',
r'libcrypt\.a',
r'libdl\.a',
r'libintl\.a',
r'libpthreads\.a',
r'librt\\.a',
r'librtl\.a',
r'libz\.a',
}
_cygwin_excludes = {
r'cygwin1\.dll',
}
if compat.is_win:
_includes |= _win_includes
_excludes |= _win_excludes
elif compat.is_cygwin:
_excludes |= _cygwin_excludes
elif compat.is_aix:
# The exclude list for AIX differs from other *nix platforms.
_excludes |= _aix_excludes
elif compat.is_unix:
# Common excludes for *nix platforms -- except AIX.
_excludes |= _unix_excludes
class MatchList:
def __init__(self, entries):
self._regex = re.compile('|'.join(entries), re.I) if entries else None
def check_library(self, libname):
if self._regex:
return self._regex.match(os.path.basename(libname))
return False
if compat.is_darwin:
import macholib.util
class MacExcludeList(MatchList):
def __init__(self, entries):
super().__init__(entries)
def check_library(self, libname):
# Try the global exclude list.
result = super().check_library(libname)
if result:
return result
# Exclude libraries in standard system locations.
return macholib.util.in_system_path(libname)
exclude_list = MacExcludeList(_excludes)
include_list = MatchList(_includes)
elif compat.is_win:
from PyInstaller.utils.win32 import winutils
class WinExcludeList(MatchList):
def __init__(self, entries):
super().__init__(entries)
self._windows_dir = pathlib.Path(winutils.get_windows_dir()).resolve()
# When running as SYSTEM user, the home directory is `%WINDIR%\system32\config\systemprofile`.
self._home_dir = pathlib.Path.home().resolve()
self._system_home = self._windows_dir in self._home_dir.parents
def check_library(self, libname):
# Try the global exclude list. The global exclude list contains lower-cased names, so lower-case the input
# for case-normalized comparison.
result = super().check_library(libname.lower())
if result:
return result
# Exclude everything from the Windows directory by default; but allow contents of user's gome directory if
# that happens to be rooted under Windows directory (e.g., when running PyInstaller as SYSTEM user).
lib_fullpath = pathlib.Path(libname).resolve()
exclude = self._windows_dir in lib_fullpath.parents
if exclude and self._system_home and self._home_dir in lib_fullpath.parents:
exclude = False
return exclude
exclude_list = WinExcludeList(_excludes)
include_list = MatchList(_includes)
else:
exclude_list = MatchList(_excludes)
include_list = MatchList(_includes)
_seen_wine_dlls = set() # Used for warning tracking in include_library()
def include_library(libname):
"""
Check if the dynamic library should be included with application or not.
"""
if exclude_list.check_library(libname) and not include_list.check_library(libname):
# Library is excluded and is not overridden by include list. It should be excluded.
return False
# If we are running under Wine and the library is a Wine built-in DLL, ensure that it is always excluded. Typically,
# excluding a DLL leads to an incomplete bundle and run-time errors when the said DLL is not installed on the target
# system. However, having Wine built-in DLLs collected is even more detrimental, as they usually provide Wine's
# implementation of low-level functionality, and therefore cannot be used on actual Windows (i.e., system libraries
# from the C:\Windows\system32 directory that might end up collected due to ``_win_includes`` list; a prominent
# example are VC runtime DLLs, for which Wine provides their own implementation, unless user explicitly installs
# Microsoft's VC redistributable package in their Wine environment). Therefore, excluding the Wine built-in DLLs
# actually improves the chances of the bundle running on Windows, or at least makes the issue easier to debug by
# turning it into the "standard" missing DLL problem. Exclusion should not affect the bundle's ability to run under
# Wine itself, as the excluded DLLs are available there.
if compat.is_win_wine and compat.is_wine_dll(libname):
# Display warning message only once per DLL. Note that it is also displayed only if the DLL were to be included
# in the first place.
if libname not in _seen_wine_dlls:
logger.warning("Excluding Wine built-in DLL: %s", libname)
_seen_wine_dlls.add(libname)
return False
return True
# Patterns for suppressing warnings about missing dynamically linked libraries
_warning_suppressions = []
# On some systems (e.g., openwrt), libc.so might point to ldd. Suppress warnings about it.
if compat.is_linux:
_warning_suppressions.append(r'ldd')
# Suppress warnings about unresolvable UCRT DLLs (see issue #1566) on Windows 10 and 11.
if compat.is_win_10 or compat.is_win_11:
_warning_suppressions.append(r'api-ms-win-.*\.dll')
missing_lib_warning_suppression_list = MatchList(_warning_suppressions)
def warn_missing_lib(libname):
"""
Check if a missing-library warning should be displayed for the given library name (or full path).
"""
return not missing_lib_warning_suppression_list.check_library(libname)

View File

@@ -0,0 +1,582 @@
#-----------------------------------------------------------------------------
# Copyright (c) 2005-2023, PyInstaller Development Team.
#
# Distributed under the terms of the GNU General Public License (version 2
# or later) with exception for distributing the bootloader.
#
# The full license is in the file COPYING.txt, distributed with this software.
#
# SPDX-License-Identifier: (GPL-2.0-or-later WITH Bootloader-exception)
#-----------------------------------------------------------------------------
"""
Code related to processing of import hooks.
"""
import glob
import os.path
import sys
import weakref
import re
from PyInstaller import log as logging
from PyInstaller.building.utils import format_binaries_and_datas
from PyInstaller.compat import importlib_load_source
from PyInstaller.depend.imphookapi import PostGraphAPI
from PyInstaller.exceptions import ImportErrorWhenRunningHook
logger = logging.getLogger(__name__)
class ModuleHookCache(dict):
"""
Cache of lazily loadable hook script objects.
This cache is implemented as a `dict` subclass mapping from the fully-qualified names of all modules with at
least one hook script to lists of `ModuleHook` instances encapsulating these scripts. As a `dict` subclass,
all cached module names and hook scripts are accessible via standard dictionary operations.
Attributes
----------
module_graph : ModuleGraph
Current module graph.
_hook_module_name_prefix : str
String prefixing the names of all in-memory modules lazily loaded from cached hook scripts. See also the
`hook_module_name_prefix` parameter passed to the `ModuleHook.__init__()` method.
"""
_cache_id_next = 0
"""
0-based identifier unique to the next `ModuleHookCache` to be instantiated.
This identifier is incremented on each instantiation of a new `ModuleHookCache` to isolate in-memory modules of
lazily loaded hook scripts in that cache to the same cache-specific namespace, preventing edge-case collisions
with existing in-memory modules in other caches.
"""
def __init__(self, module_graph, hook_dirs):
"""
Cache all hook scripts in the passed directories.
**Order of caching is significant** with respect to hooks for the same module, as the values of this
dictionary are lists. Hooks for the same module will be run in the order in which they are cached. Previously
cached hooks are always preserved rather than overridden.
By default, official hooks are cached _before_ user-defined hooks. For modules with both official and
user-defined hooks, this implies that the former take priority over and hence will be loaded _before_ the
latter.
Parameters
----------
module_graph : ModuleGraph
Current module graph.
hook_dirs : list
List of the absolute or relative paths of all directories containing **hook scripts** (i.e.,
Python scripts with filenames matching `hook-{module_name}.py`, where `{module_name}` is the module
hooked by that script) to be cached.
"""
super().__init__()
# To avoid circular references and hence increased memory consumption, a weak rather than strong reference is
# stored to the passed graph. Since this graph is guaranteed to live longer than this cache,
# this is guaranteed to be safe.
self.module_graph = weakref.proxy(module_graph)
# String unique to this cache prefixing the names of all in-memory modules lazily loaded from cached hook
# scripts, privatized for safety.
self._hook_module_name_prefix = '__PyInstaller_hooks_{}_'.format(ModuleHookCache._cache_id_next)
ModuleHookCache._cache_id_next += 1
# Cache all hook scripts in the passed directories.
self._cache_hook_dirs(hook_dirs)
def _cache_hook_dirs(self, hook_dirs):
"""
Cache all hook scripts in the passed directories.
Parameters
----------
hook_dirs : list
List of the absolute or relative paths of all directories containing hook scripts to be cached.
"""
for hook_dir, default_priority in hook_dirs:
# Canonicalize this directory's path and validate its existence.
hook_dir = os.path.abspath(hook_dir)
if not os.path.isdir(hook_dir):
raise FileNotFoundError('Hook directory "{}" not found.'.format(hook_dir))
# For each hook script in this directory...
hook_filenames = glob.glob(os.path.join(hook_dir, 'hook-*.py'))
for hook_filename in hook_filenames:
# Fully-qualified name of this hook's corresponding module, constructed by removing the "hook-" prefix
# and ".py" suffix.
module_name = os.path.basename(hook_filename)[5:-3]
# Lazily loadable hook object.
module_hook = ModuleHook(
module_graph=self.module_graph,
module_name=module_name,
hook_filename=hook_filename,
hook_module_name_prefix=self._hook_module_name_prefix,
default_priority=default_priority,
)
# Add this hook to this module's list of hooks.
module_hooks = self.setdefault(module_name, [])
module_hooks.append(module_hook)
# Post-processing: we allow only one instance of hook per module. Currently, the priority order is defined
# implicitly, via order of hook directories, so the first hook in the list has the highest priority.
for module_name in self.keys():
hooks = self[module_name]
if len(hooks) == 1:
self[module_name] = hooks[0]
else:
# Order by priority value, in descending order.
sorted_hooks = sorted(hooks, key=lambda hook: hook.priority, reverse=True)
self[module_name] = sorted_hooks[0]
def remove_modules(self, *module_names):
"""
Remove the passed modules and all hook scripts cached for these modules from this cache.
Parameters
----------
module_names : list
List of all fully-qualified module names to be removed.
"""
for module_name in module_names:
# Unload this module's hook script modules from memory. Since these are top-level pure-Python modules cached
# only in the "sys.modules" dictionary, popping these modules from this dictionary suffices to garbage
# collect them.
module_hook = self.pop(module_name, None) # Remove our reference, if available.
if module_hook is not None:
sys.modules.pop(module_hook.hook_module_name, None)
def _module_collection_mode_sanitizer(value):
if isinstance(value, dict):
# Hook set a dictionary; use it as-is
return value
elif isinstance(value, str):
# Hook set a mode string; convert to a dictionary and assign the string to `None` (= the hooked module).
return {None: value}
raise ValueError(f"Invalid module collection mode setting value: {value!r}")
def _bindepend_symlink_suppression_sanitizer(value):
if isinstance(value, (list, set)):
# Hook set a list or a set; use it as-is
return set(value)
elif isinstance(value, str):
# Hook set a string; create a set with single element.
return set([value])
raise ValueError(f"Invalid value for bindepend_symlink_suppression: {value!r}")
# Dictionary mapping the names of magic attributes required by the "ModuleHook" class to 2-tuples "(default_type,
# sanitizer_func)", where:
#
# * "default_type" is the type to which that attribute will be initialized when that hook is lazily loaded.
# * "sanitizer_func" is the callable sanitizing the original value of that attribute defined by that hook into a
# safer value consumable by "ModuleHook" callers if any or "None" if the original value requires no sanitization.
#
# To avoid subtleties in the ModuleHook.__getattr__() method, this dictionary is declared as a module rather than a
# class attribute. If declared as a class attribute and then undefined (...for whatever reason), attempting to access
# this attribute from that method would produce infinite recursion.
_MAGIC_MODULE_HOOK_ATTRS = {
# Collections in which order is insignificant. This includes:
#
# * "datas", sanitized from hook-style 2-tuple lists defined by hooks into TOC-style 2-tuple sets consumable by
# "ModuleHook" callers.
# * "binaries", sanitized in the same way.
'datas': (set, format_binaries_and_datas),
'binaries': (set, format_binaries_and_datas),
'excludedimports': (set, None),
# Collections in which order is significant. This includes:
#
# * "hiddenimports", as order of importation is significant. On module importation, hook scripts are loaded and hook
# functions declared by these scripts are called. As these scripts and functions can have side effects dependent
# on module importation order, module importation itself can have side effects dependent on this order!
'hiddenimports': (list, None),
# Flags
'warn_on_missing_hiddenimports': (lambda: True, bool),
# Package/module collection mode dictionary.
'module_collection_mode': (dict, _module_collection_mode_sanitizer),
# Path patterns for suppression of symbolic links created by binary dependency analysis.
'bindepend_symlink_suppression': (set, _bindepend_symlink_suppression_sanitizer),
}
class ModuleHook:
"""
Cached object encapsulating a lazy loadable hook script.
This object exposes public attributes (e.g., `datas`) of the underlying hook script as attributes of the same
name of this object. On the first access of any such attribute, this hook script is lazily loaded into an
in-memory private module reused on subsequent accesses. These dynamic attributes are referred to as "magic." All
other static attributes of this object (e.g., `hook_module_name`) are referred to as "non-magic."
Attributes (Magic)
----------
datas : set
Set of `TOC`-style 2-tuples `(target_file, source_file)` for all external non-executable files required by
the module being hooked, converted from the `datas` list of hook-style 2-tuples `(source_dir_or_glob,
target_dir)` defined by this hook script.
binaries : set
Set of `TOC`-style 2-tuples `(target_file, source_file)` for all external executable files required by the
module being hooked, converted from the `binaries` list of hook-style 2-tuples `(source_dir_or_glob,
target_dir)` defined by this hook script.
excludedimports : set
Set of the fully-qualified names of all modules imported by the module being hooked to be ignored rather than
imported from that module, converted from the `excludedimports` list defined by this hook script. These
modules will only be "locally" rather than "globally" ignored. These modules will remain importable from all
modules other than the module being hooked.
hiddenimports : set
Set of the fully-qualified names of all modules imported by the module being hooked that are _not_
automatically detectable by PyInstaller (usually due to being dynamically imported in that module),
converted from the `hiddenimports` list defined by this hook script.
warn_on_missing_hiddenimports : bool
Boolean flag indicating whether missing hidden imports from the hook should generate warnings or not. This
behavior is enabled by default, but individual hooks can opt out of it.
module_collection_mode : dict
A dictionary of package/module names and their corresponding collection mode strings ('pyz', 'pyc', 'py',
'pyz+py', 'py+pyz').
bindepend_symlink_suppression : set
A set of paths or path patterns corresponding to shared libraries for which binary dependency analysis should
not create symbolic links into top-level application directory.
Attributes (Non-magic)
----------
module_graph : ModuleGraph
Current module graph.
module_name : str
Name of the module hooked by this hook script.
hook_filename : str
Absolute or relative path of this hook script.
hook_module_name : str
Name of the in-memory module of this hook script's interpreted contents.
_hook_module : module
In-memory module of this hook script's interpreted contents, lazily loaded on the first call to the
`_load_hook_module()` method _or_ `None` if this method has yet to be accessed.
_default_priority : int
Default (location-based) priority for this hook.
priority : int
Actual priority for this hook. Might be different from `_default_priority` if hook file specifies the hook
priority override.
"""
#-- Magic --
def __init__(self, module_graph, module_name, hook_filename, hook_module_name_prefix, default_priority):
"""
Initialize this metadata.
Parameters
----------
module_graph : ModuleGraph
Current module graph.
module_name : str
Name of the module hooked by this hook script.
hook_filename : str
Absolute or relative path of this hook script.
hook_module_name_prefix : str
String prefixing the name of the in-memory module for this hook script. To avoid namespace clashes with
similar modules created by other `ModuleHook` objects in other `ModuleHookCache` containers, this string
_must_ be unique to the `ModuleHookCache` container containing this `ModuleHook` object. If this string
is non-unique, an existing in-memory module will be erroneously reused when lazily loading this hook
script, thus erroneously resanitizing previously sanitized hook script attributes (e.g., `datas`) with
the `format_binaries_and_datas()` helper.
default_priority : int
Default, location-based priority for this hook. Used to select active hook when multiple hooks are defined
for the same module.
"""
# Note that the passed module graph is already a weak reference, avoiding circular reference issues. See
# ModuleHookCache.__init__(). TODO: Add a failure message
assert isinstance(module_graph, weakref.ProxyTypes)
self.module_graph = module_graph
self.module_name = module_name
self.hook_filename = hook_filename
# Default priority; used as fall-back for dynamic `hook_priority` attribute.
self._default_priority = default_priority
# Name of the in-memory module fabricated to refer to this hook script.
self.hook_module_name = hook_module_name_prefix + self.module_name.replace('.', '_')
# Attributes subsequently defined by the _load_hook_module() method.
self._loaded = False
self._has_hook_function = False
self._hook_module = None
def __getattr__(self, attr_name):
"""
Get the magic attribute with the passed name (e.g., `datas`) from this lazily loaded hook script if any _or_
raise `AttributeError` otherwise.
This special method is called only for attributes _not_ already defined by this object. This includes
undefined attributes and the first attempt to access magic attributes.
This special method is _not_ called for subsequent attempts to access magic attributes. The first attempt to
access magic attributes defines corresponding instance variables accessible via the `self.__dict__` instance
dictionary (e.g., as `self.datas`) without calling this method. This approach also allows magic attributes to
be deleted from this object _without_ defining the `__delattr__()` special method.
See Also
----------
Class docstring for supported magic attributes.
"""
if attr_name == 'priority':
# If attribute is part of hook metadata, read metadata from hook script and return the attribute value.
self._load_hook_metadata()
return getattr(self, attr_name)
if attr_name in _MAGIC_MODULE_HOOK_ATTRS and not self._loaded:
# If attribute is hook's magic attribute, load and run the hook script, and return the attribute value.
self._load_hook_module()
return getattr(self, attr_name)
else:
# This is an undefined attribute. Raise an exception.
raise AttributeError(attr_name)
def __setattr__(self, attr_name, attr_value):
"""
Set the attribute with the passed name to the passed value.
If this is a magic attribute, this hook script will be lazily loaded before setting this attribute. Unlike
`__getattr__()`, this special method is called to set _any_ attribute -- including magic, non-magic,
and undefined attributes.
See Also
----------
Class docstring for supported magic attributes.
"""
# If this is a magic attribute, initialize this attribute by lazy loading this hook script before overwriting
# this attribute.
if attr_name in _MAGIC_MODULE_HOOK_ATTRS:
self._load_hook_module()
# Set this attribute to the passed value. To avoid recursion, the superclass method rather than setattr() is
# called.
return super().__setattr__(attr_name, attr_value)
#-- Loading --
def _load_hook_metadata(self):
"""
Load hook metadata from its source file.
"""
self.priority = self._default_priority
# Priority override pattern: `# $PyInstaller-Hook-Priority: <value>`
priority_pattern = re.compile(r"^\s*#\s*\$PyInstaller-Hook-Priority:\s*(?P<value>[\S]+)")
with open(self.hook_filename, "r", encoding="utf-8") as f:
for line in f:
# Attempt to match and parse hook priority directive
m = priority_pattern.match(line)
if m is not None:
try:
self.priority = int(m.group('value'))
except Exception:
logger.warning(
"Failed to parse hook priority value string: %r!", m.group('value'), exc_info=True
)
# Currently, this is our only line of interest, so we can stop the search here.
return
def _load_hook_module(self, keep_module_ref=False):
"""
Lazily load this hook script into an in-memory private module.
This method (and, indeed, this class) preserves all attributes and functions defined by this hook script as
is, ensuring sane behaviour in hook functions _not_ expecting unplanned external modification. Instead,
this method copies public attributes defined by this hook script (e.g., `binaries`) into private attributes
of this object, which the special `__getattr__()` and `__setattr__()` methods safely expose to external
callers. For public attributes _not_ defined by this hook script, the corresponding private attributes will
be assigned sane defaults. For some public attributes defined by this hook script, the corresponding private
attributes will be transformed into objects more readily and safely consumed elsewhere by external callers.
See Also
----------
Class docstring for supported attributes.
"""
# If this hook script module has already been loaded, noop.
if self._loaded and (self._hook_module is not None or not keep_module_ref):
return
# Load and execute the hook script. Even if mechanisms from the import machinery are used, this does not import
# the hook as the module.
hook_path, hook_basename = os.path.split(self.hook_filename)
logger.info('Processing standard module hook %r from %r', hook_basename, hook_path)
try:
self._hook_module = importlib_load_source(self.hook_module_name, self.hook_filename)
except ImportError:
logger.debug("Hook failed with:", exc_info=True)
raise ImportErrorWhenRunningHook(self.hook_module_name, self.hook_filename)
# Mark as loaded
self._loaded = True
# Check if module has hook() function.
self._has_hook_function = hasattr(self._hook_module, 'hook')
# Copy hook script attributes into magic attributes exposed as instance variables of the current "ModuleHook"
# instance.
for attr_name, (default_type, sanitizer_func) in _MAGIC_MODULE_HOOK_ATTRS.items():
# Unsanitized value of this attribute.
attr_value = getattr(self._hook_module, attr_name, None)
# If this attribute is undefined, expose a sane default instead.
if attr_value is None:
attr_value = default_type()
# Else if this attribute requires sanitization, do so.
elif sanitizer_func is not None:
attr_value = sanitizer_func(attr_value)
# Else, expose the unsanitized value of this attribute.
# Expose this attribute as an instance variable of the same name.
setattr(self, attr_name, attr_value)
# If module_collection_mode has an entry with None key, reassign it to the hooked module's name.
setattr(
self, 'module_collection_mode', {
key if key is not None else self.module_name: value
for key, value in getattr(self, 'module_collection_mode').items()
}
)
# Release the module if we do not need the reference. This is the case when hook is loaded during the analysis
# rather as part of the post-graph operations.
if not keep_module_ref:
self._hook_module = None
#-- Hooks --
def post_graph(self, analysis):
"""
Call the **post-graph hook** (i.e., `hook()` function) defined by this hook script, if any.
Parameters
----------
analysis: build_main.Analysis
Analysis that calls the hook
This method is intended to be called _after_ the module graph for this application is constructed.
"""
# Lazily load this hook script into an in-memory module.
# The script might have been loaded before during modulegraph analysis; in that case, it needs to be reloaded
# only if it provides a hook() function.
if not self._loaded or self._has_hook_function:
# Keep module reference when loading the hook, so we can call its hook function!
self._load_hook_module(keep_module_ref=True)
# Call this hook script's hook() function, which modifies attributes accessed by subsequent methods and
# hence must be called first.
self._process_hook_func(analysis)
# Order is insignificant here.
self._process_hidden_imports()
def _process_hook_func(self, analysis):
"""
Call this hook's `hook()` function if defined.
Parameters
----------
analysis: build_main.Analysis
Analysis that calls the hook
"""
# If this hook script defines no hook() function, noop.
if not hasattr(self._hook_module, 'hook'):
return
# Call this hook() function.
hook_api = PostGraphAPI(module_name=self.module_name, module_graph=self.module_graph, analysis=analysis)
try:
self._hook_module.hook(hook_api)
except ImportError:
logger.debug("Hook failed with:", exc_info=True)
raise ImportErrorWhenRunningHook(self.hook_module_name, self.hook_filename)
# Update all magic attributes modified by the prior call.
self.datas.update(set(hook_api._added_datas))
self.binaries.update(set(hook_api._added_binaries))
self.hiddenimports.extend(hook_api._added_imports)
self.module_collection_mode.update(hook_api._module_collection_mode)
self.bindepend_symlink_suppression.update(hook_api._bindepend_symlink_suppression)
# FIXME: `hook_api._deleted_imports` should be appended to `self.excludedimports` and used to suppress module
# import during the modulegraph construction rather than handled here. However, for that to work, the `hook()`
# function needs to be ran during modulegraph construction instead of in post-processing (and this in turn
# requires additional code refactoring in order to be able to pass `analysis` to `PostGraphAPI` object at
# that point). So once the modulegraph rewrite is complete, remove the code block below.
for deleted_module_name in hook_api._deleted_imports:
# Remove the graph link between the hooked module and item. This removes the 'item' node from the graph if
# no other links go to it (no other modules import it)
self.module_graph.removeReference(hook_api.node, deleted_module_name)
def _process_hidden_imports(self):
"""
Add all imports listed in this hook script's `hiddenimports` attribute to the module graph as if directly
imported by this hooked module.
These imports are typically _not_ implicitly detectable by PyInstaller and hence must be explicitly defined
by hook scripts.
"""
# For each hidden import required by the module being hooked...
for import_module_name in self.hiddenimports:
try:
# Graph node for this module. Do not implicitly create namespace packages for non-existent packages.
caller = self.module_graph.find_node(self.module_name, create_nspkg=False)
# Manually import this hidden import from this module.
self.module_graph.import_hook(import_module_name, caller)
# If this hidden import is unimportable, print a non-fatal warning. Hidden imports often become
# desynchronized from upstream packages and hence are only "soft" recommendations.
except ImportError:
if self.warn_on_missing_hiddenimports:
logger.warning('Hidden import "%s" not found!', import_module_name)
class AdditionalFilesCache:
"""
Cache for storing what binaries and datas were pushed by what modules when import hooks were processed.
"""
def __init__(self):
self._binaries = {}
self._datas = {}
def add(self, modname, binaries, datas):
self._binaries.setdefault(modname, [])
self._binaries[modname].extend(binaries or [])
self._datas.setdefault(modname, [])
self._datas[modname].extend(datas or [])
def __contains__(self, name):
return name in self._binaries or name in self._datas
def binaries(self, modname):
"""
Return list of binaries for given module name.
"""
return self._binaries.get(modname, [])
def datas(self, modname):
"""
Return list of datas for given module name.
"""
return self._datas.get(modname, [])

View File

@@ -0,0 +1,486 @@
#-----------------------------------------------------------------------------
# Copyright (c) 2005-2023, PyInstaller Development Team.
#
# Distributed under the terms of the GNU General Public License (version 2
# or later) with exception for distributing the bootloader.
#
# The full license is in the file COPYING.txt, distributed with this software.
#
# SPDX-License-Identifier: (GPL-2.0-or-later WITH Bootloader-exception)
#-----------------------------------------------------------------------------
"""
Classes facilitating communication between PyInstaller and import hooks.
PyInstaller passes instances of classes defined by this module to corresponding functions defined by external import
hooks, which commonly modify the contents of these instances before returning. PyInstaller then detects and converts
these modifications into appropriate operations on the current `PyiModuleGraph` instance, thus modifying which
modules will be frozen into the executable.
"""
from PyInstaller.building.utils import format_binaries_and_datas
from PyInstaller.lib.modulegraph.modulegraph import (RuntimeModule, RuntimePackage)
class PreSafeImportModuleAPI:
"""
Metadata communicating changes made by the current **pre-safe import module hook** (i.e., hook run immediately
_before_ a call to `ModuleGraph._safe_import_module()` recursively adding the hooked module, package,
or C extension and all transitive imports thereof to the module graph) back to PyInstaller.
Pre-safe import module hooks _must_ define a `pre_safe_import_module()` function accepting an instance of this
class, whose attributes describe the subsequent `ModuleGraph._safe_import_module()` call creating the hooked
module's graph node.
Each pre-safe import module hook is run _only_ on the first attempt to create the hooked module's graph node and
then subsequently ignored. If this hook successfully creates that graph node, the subsequent
`ModuleGraph._safe_import_module()` call will observe this fact and silently return without attempting to
recreate that graph node.
Pre-safe import module hooks are typically used to create graph nodes for **runtime modules** (i.e.,
modules dynamically defined at runtime). Most modules are physically defined in external `.py`-suffixed scripts.
Some modules, however, are dynamically defined at runtime (e.g., `six.moves`, dynamically defined by the
physically defined `six.py` module). However, `ModuleGraph` only parses `import` statements residing in external
scripts. `ModuleGraph` is _not_ a full-fledged, Turing-complete Python interpreter and hence has no means of
parsing `import` statements performed by runtime modules existing only in-memory.
'With great power comes great responsibility.'
Attributes (Immutable)
----------------------------
The following attributes are **immutable** (i.e., read-only). For safety, any attempts to change these attributes
_will_ result in a raised exception:
module_graph : PyiModuleGraph
Current module graph.
parent_package : Package
Graph node for the package providing this module _or_ `None` if this module is a top-level module.
Attributes (Mutable)
-----------------------------
The following attributes are editable.
module_basename : str
Unqualified name of the module to be imported (e.g., `text`).
module_name : str
Fully-qualified name of this module (e.g., `email.mime.text`).
"""
def __init__(self, module_graph, module_basename, module_name, parent_package):
self._module_graph = module_graph
self.module_basename = module_basename
self.module_name = module_name
self._parent_package = parent_package
# Immutable properties. No corresponding setters are defined.
@property
def module_graph(self):
"""
Current module graph.
"""
return self._module_graph
@property
def parent_package(self):
"""
Parent Package of this node.
"""
return self._parent_package
def add_runtime_module(self, module_name):
"""
Add a graph node representing a non-package Python module with the passed name dynamically defined at runtime.
Most modules are statically defined on-disk as standard Python files. Some modules, however, are dynamically
defined in-memory at runtime (e.g., `gi.repository.Gst`, dynamically defined by the statically defined
`gi.repository.__init__` module).
This method adds a graph node representing such a runtime module. Since this module is _not_ a package,
all attempts to import submodules from this module in `from`-style import statements (e.g., the `queue`
submodule in `from six.moves import queue`) will be silently ignored. To circumvent this, simply call
`add_runtime_package()` instead.
Parameters
----------
module_name : str
Fully-qualified name of this module (e.g., `gi.repository.Gst`).
Examples
----------
This method is typically called by `pre_safe_import_module()` hooks, e.g.:
def pre_safe_import_module(api):
api.add_runtime_module(api.module_name)
"""
self._module_graph.add_module(RuntimeModule(module_name))
def add_runtime_package(self, package_name):
"""
Add a graph node representing a non-namespace Python package with the passed name dynamically defined at
runtime.
Most packages are statically defined on-disk as standard subdirectories containing `__init__.py` files. Some
packages, however, are dynamically defined in-memory at runtime (e.g., `six.moves`, dynamically defined by
the statically defined `six` module).
This method adds a graph node representing such a runtime package. All attributes imported from this package
in `from`-style import statements that are submodules of this package (e.g., the `queue` submodule in `from
six.moves import queue`) will be imported rather than ignored.
Parameters
----------
package_name : str
Fully-qualified name of this package (e.g., `six.moves`).
Examples
----------
This method is typically called by `pre_safe_import_module()` hooks, e.g.:
def pre_safe_import_module(api):
api.add_runtime_package(api.module_name)
"""
self._module_graph.add_module(RuntimePackage(package_name))
def add_alias_module(self, real_module_name, alias_module_name):
"""
Alias the source module to the target module with the passed names.
This method ensures that the next call to findNode() given the target module name will resolve this alias.
This includes importing and adding a graph node for the source module if needed as well as adding a reference
from the target to the source module.
Parameters
----------
real_module_name : str
Fully-qualified name of the **existing module** (i.e., the module being aliased).
alias_module_name : str
Fully-qualified name of the **non-existent module** (i.e., the alias to be created).
"""
self._module_graph.alias_module(real_module_name, alias_module_name)
def append_package_path(self, directory):
"""
Modulegraph does a good job at simulating Python's, but it cannot handle packagepath `__path__` modifications
packages make at runtime.
Therefore there is a mechanism whereby you can register extra paths in this map for a package, and it will be
honored.
Parameters
----------
directory : str
Absolute or relative path of the directory to be appended to this package's `__path__` attribute.
"""
self._module_graph.append_package_path(self.module_name, directory)
class PreFindModulePathAPI:
"""
Metadata communicating changes made by the current **pre-find module path hook** (i.e., hook run immediately
_before_ a call to `ModuleGraph._find_module_path()` finding the hooked module's absolute path) back to PyInstaller.
Pre-find module path hooks _must_ define a `pre_find_module_path()` function accepting an instance of this class,
whose attributes describe the subsequent `ModuleGraph._find_module_path()` call to be performed.
Pre-find module path hooks are typically used to change the absolute path from which a module will be
subsequently imported and thus frozen into the executable. To do so, hooks may overwrite the default
`search_dirs` list of the absolute paths of all directories to be searched for that module: e.g.,
def pre_find_module_path(api):
api.search_dirs = ['/the/one/true/package/providing/this/module']
Each pre-find module path hook is run _only_ on the first call to `ModuleGraph._find_module_path()` for the
corresponding module.
Attributes
----------
The following attributes are **mutable** (i.e., modifiable). All changes to these attributes will be immediately
respected by PyInstaller:
search_dirs : list
List of the absolute paths of all directories to be searched for this module (in order). Searching will halt
at the first directory containing this module.
Attributes (Immutable)
----------
The following attributes are **immutable** (i.e., read-only). For safety, any attempts to change these attributes
_will_ result in a raised exception:
module_name : str
Fully-qualified name of this module.
module_graph : PyiModuleGraph
Current module graph. For efficiency, this attribute is technically mutable. To preserve graph integrity,
this attribute should nonetheless _never_ be modified. While read-only `PyiModuleGraph` methods (e.g.,
`findNode()`) are safely callable from within pre-find module path hooks, methods modifying the graph are
_not_. If graph modifications are required, consider an alternative type of hook (e.g., pre-import module
hooks).
"""
def __init__(
self,
module_graph,
module_name,
search_dirs,
):
# Mutable attributes.
self.search_dirs = search_dirs
# Immutable attributes.
self._module_graph = module_graph
self._module_name = module_name
# Immutable properties. No corresponding setters are defined.
@property
def module_graph(self):
"""
Current module graph.
"""
return self._module_graph
@property
def module_name(self):
"""
Fully-qualified name of this module.
"""
return self._module_name
class PostGraphAPI:
"""
Metadata communicating changes made by the current **post-graph hook** (i.e., hook run for a specific module
transitively imported by the current application _after_ the module graph of all `import` statements performed by
this application has been constructed) back to PyInstaller.
Post-graph hooks may optionally define a `post_graph()` function accepting an instance of this class,
whose attributes describe the current state of the module graph and the hooked module's graph node.
Attributes (Mutable)
----------
The following attributes are **mutable** (i.e., modifiable). All changes to these attributes will be immediately
respected by PyInstaller:
module_graph : PyiModuleGraph
Current module graph.
module : Node
Graph node for the currently hooked module.
'With great power comes great responsibility.'
Attributes (Immutable)
----------
The following attributes are **immutable** (i.e., read-only). For safety, any attempts to change these attributes
_will_ result in a raised exception:
__name__ : str
Fully-qualified name of this module (e.g., `six.moves.tkinter`).
__file__ : str
Absolute path of this module. If this module is:
* A standard (rather than namespace) package, this is the absolute path of this package's directory.
* A namespace (rather than standard) package, this is the abstract placeholder `-`. (Don't ask. Don't tell.)
* A non-package module or C extension, this is the absolute path of the corresponding file.
__path__ : list
List of the absolute paths of all directories comprising this package if this module is a package _or_ `None`
otherwise. If this module is a standard (rather than namespace) package, this list contains only the absolute
path of this package's directory.
co : code
Code object compiled from the contents of `__file__` (e.g., via the `compile()` builtin).
analysis: build_main.Analysis
The Analysis that load the hook.
Attributes (Private)
----------
The following attributes are technically mutable but private, and hence should _never_ be externally accessed or
modified by hooks. Call the corresponding public methods instead:
_added_datas : list
List of the `(name, path)` 2-tuples or TOC objects of all external data files required by the current hook,
defaulting to the empty list. This is equivalent to the global `datas` hook attribute.
_added_imports : list
List of the fully-qualified names of all modules imported by the current hook, defaulting to the empty list.
This is equivalent to the global `hiddenimports` hook attribute.
_added_binaries : list
List of the `(name, path)` 2-tuples or TOC objects of all external C extensions imported by the current hook,
defaulting to the empty list. This is equivalent to the global `binaries` hook attribute.
_module_collection_mode : dict
Dictionary of package/module names and their corresponding collection mode strings. This is equivalent to the
global `module_collection_mode` hook attribute.
_bindepend_symlink_suppression : set
A set of paths or path patterns corresponding to shared libraries for which binary dependency analysis should
not generate symbolic links into top-level application directory.
"""
def __init__(self, module_name, module_graph, analysis):
# Mutable attributes.
self.module_graph = module_graph
self.module = module_graph.find_node(module_name)
assert self.module is not None # should not occur
# Immutable attributes.
self.___name__ = module_name
self.___file__ = self.module.filename
self._co = self.module.code
self._analysis = analysis
# To enforce immutability, convert this module's package path if any into an immutable tuple.
self.___path__ = tuple(self.module.packagepath) \
if self.module.packagepath is not None else None
#FIXME: Refactor "_added_datas", "_added_binaries", and "_deleted_imports" into sets. Since order of
#import is important, "_added_imports" must remain a list.
# Private attributes.
self._added_binaries = []
self._added_datas = []
self._added_imports = []
self._deleted_imports = []
self._module_collection_mode = {}
self._bindepend_symlink_suppression = set()
# Immutable properties. No corresponding setters are defined.
@property
def __file__(self):
"""
Absolute path of this module's file.
"""
return self.___file__
@property
def __path__(self):
"""
List of the absolute paths of all directories comprising this package if this module is a package _or_ `None`
otherwise. If this module is a standard (rather than namespace) package, this list contains only the absolute
path of this package's directory.
"""
return self.___path__
@property
def __name__(self):
"""
Fully-qualified name of this module (e.g., `six.moves.tkinter`).
"""
return self.___name__
@property
def co(self):
"""
Code object compiled from the contents of `__file__` (e.g., via the `compile()` builtin).
"""
return self._co
@property
def analysis(self):
"""
build_main.Analysis that calls the hook.
"""
return self._analysis
# Obsolete immutable properties provided to preserve backward compatibility.
@property
def name(self):
"""
Fully-qualified name of this module (e.g., `six.moves.tkinter`).
**This property has been deprecated by the `__name__` property.**
"""
return self.___name__
@property
def graph(self):
"""
Current module graph.
**This property has been deprecated by the `module_graph` property.**
"""
return self.module_graph
@property
def node(self):
"""
Graph node for the currently hooked module.
**This property has been deprecated by the `module` property.**
"""
return self.module
# TODO: This incorrectly returns the list of the graph nodes of all modules *TRANSITIVELY* (rather than directly)
# imported by this module. Unfortunately, this implies that most uses of this property are currently broken
# (e.g., "hook-PIL.SpiderImagePlugin.py"). We only require this for the aforementioned hook, so contemplate
# alternative approaches.
@property
def imports(self):
"""
List of the graph nodes of all modules directly imported by this module.
"""
return self.module_graph.iter_graph(start=self.module)
def add_imports(self, *module_names):
"""
Add all Python modules whose fully-qualified names are in the passed list as "hidden imports" upon which the
current module depends.
This is equivalent to appending such names to the hook-specific `hiddenimports` attribute.
"""
# Append such names to the current list of all such names.
self._added_imports.extend(module_names)
def del_imports(self, *module_names):
"""
Remove the named fully-qualified modules from the set of imports (either hidden or visible) upon which the
current module depends.
This is equivalent to appending such names to the hook-specific `excludedimports` attribute.
"""
self._deleted_imports.extend(module_names)
def add_binaries(self, binaries):
"""
Add all external dynamic libraries in the passed list of `(src_name, dest_name)` 2-tuples as dependencies of the
current module. This is equivalent to adding to the global `binaries` hook attribute.
For convenience, the `binaries` may also be a list of TOC-style 3-tuples `(dest_name, src_name, typecode)`.
"""
# Detect TOC 3-tuple list by checking the length of the first entry
if binaries and len(binaries[0]) == 3:
self._added_binaries.extend(entry[:2] for entry in binaries)
else:
# NOTE: `format_binaries_and_datas` changes tuples from input format `(src_name, dest_name)` to output
# format `(dest_name, src_name)`.
self._added_binaries.extend(format_binaries_and_datas(binaries))
def add_datas(self, datas):
"""
Add all external data files in the passed list of `(src_name, dest_name)` 2-tuples as dependencies of the
current module. This is equivalent to adding to the global `datas` hook attribute.
For convenience, the `datas` may also be a list of TOC-style 3-tuples `(dest_name, src_name, typecode)`.
"""
# Detect TOC 3-tuple list by checking the length of the first entry
if datas and len(datas[0]) == 3:
self._added_datas.extend(entry[:2] for entry in datas)
else:
# NOTE: `format_binaries_and_datas` changes tuples from input format `(src_name, dest_name)` to output
# format `(dest_name, src_name)`.
self._added_datas.extend(format_binaries_and_datas(datas))
def set_module_collection_mode(self, name, mode):
""""
Set the package/module collection mode for the specified module name. If `name` is `None`, the hooked
module/package name is used. `mode` can be one of valid mode strings (`'pyz'`, `'pyc'`, `'py'`, `'pyz+py'`,
`'py+pyz'`) or `None`, which clears the setting for the module/package - but only within this hook's context!
"""
if name is None:
name = self.__name__
if mode is None:
self._module_collection_mode.pop(name)
else:
self._module_collection_mode[name] = mode
def add_bindepend_symlink_suppression_pattern(self, pattern):
"""
Add the given path or path pattern to the set of patterns that prevent binary dependency analysis from creating
a symbolic link to the top-level application directory.
"""
self._bindepend_symlink_suppression.add(pattern)

View File

@@ -0,0 +1,344 @@
#-----------------------------------------------------------------------------
# Copyright (c) 2005-2023, PyInstaller Development Team.
#
# Distributed under the terms of the GNU General Public License (version 2
# or later) with exception for distributing the bootloader.
#
# The full license is in the file COPYING.txt, distributed with this software.
#
# SPDX-License-Identifier: (GPL-2.0-or-later WITH Bootloader-exception)
#-----------------------------------------------------------------------------
"""
Utility functions related to analyzing/bundling dependencies.
"""
import ctypes.util
import os
import re
import shutil
from types import CodeType
from PyInstaller import compat
from PyInstaller import log as logging
from PyInstaller.depend import bytecode
from PyInstaller.depend.dylib import include_library
from PyInstaller.exceptions import ExecCommandFailed
logger = logging.getLogger(__name__)
def scan_code_for_ctypes(co):
binaries = __recursively_scan_code_objects_for_ctypes(co)
# If any of the libraries has been requested with anything else than the basename, drop that entry and warn the
# user - PyInstaller would need to patch the compiled pyc file to make it work correctly!
binaries = set(binaries)
for binary in list(binaries):
# 'binary' might be in some cases None. Some Python modules (e.g., PyObjC.objc._bridgesupport) might contain
# code like this:
# dll = ctypes.CDLL(None)
if not binary:
# None values have to be removed too.
binaries.remove(binary)
elif binary != os.path.basename(binary):
# TODO make these warnings show up somewhere.
try:
filename = co.co_filename
except Exception:
filename = 'UNKNOWN'
logger.warning(
"Ignoring %s imported from %s - only basenames are supported with ctypes imports!", binary, filename
)
binaries.remove(binary)
binaries = _resolveCtypesImports(binaries)
return binaries
def __recursively_scan_code_objects_for_ctypes(code: CodeType):
"""
Detects ctypes dependencies, using reasonable heuristics that should cover most common ctypes usages; returns a
list containing names of binaries detected as dependencies.
"""
from PyInstaller.depend.bytecode import any_alias, search_recursively
binaries = []
ctypes_dll_names = {
*any_alias("ctypes.CDLL"),
*any_alias("ctypes.cdll.LoadLibrary"),
*any_alias("ctypes.WinDLL"),
*any_alias("ctypes.windll.LoadLibrary"),
*any_alias("ctypes.OleDLL"),
*any_alias("ctypes.oledll.LoadLibrary"),
*any_alias("ctypes.PyDLL"),
*any_alias("ctypes.pydll.LoadLibrary"),
}
find_library_names = {
*any_alias("ctypes.util.find_library"),
}
for calls in bytecode.recursive_function_calls(code).values():
for (name, args) in calls:
if not len(args) == 1 or not isinstance(args[0], str):
continue
if name in ctypes_dll_names:
# ctypes.*DLL() or ctypes.*dll.LoadLibrary()
binaries.append(*args)
elif name in find_library_names:
# ctypes.util.find_library() needs to be handled separately, because we need to resolve the library base
# name given as the argument (without prefix and suffix, e.g. 'gs') into corresponding full name (e.g.,
# 'libgs.so.9').
libname = args[0]
if libname:
try: # this try was inserted due to the ctypes bug https://github.com/python/cpython/issues/93094
libname = ctypes.util.find_library(libname)
except FileNotFoundError:
libname = None
logger.warning(
'ctypes.util.find_library raised a FileNotFoundError. '
'Supressing and assuming no lib with the name "%s" was found.', args[0]
)
if libname:
# On Windows, `find_library` may return a full pathname. See issue #1934.
libname = os.path.basename(libname)
binaries.append(libname)
# The above handles any flavour of function/class call. We still need to capture the (albeit rarely used) case of
# loading libraries with ctypes.cdll's getattr.
for i in search_recursively(_scan_code_for_ctypes_getattr, code).values():
binaries.extend(i)
return binaries
_ctypes_getattr_regex = bytecode.bytecode_regex(
rb"""
# Matches 'foo.bar' or 'foo.bar.whizz'.
# Load the 'foo'.
(
(?:(?:""" + bytecode._OPCODES_EXTENDED_ARG + rb""").)*
(?:""" + bytecode._OPCODES_FUNCTION_GLOBAL + rb""").
)
# Load the 'bar.whizz' (one opcode per name component, each possibly preceded by name reference extension).
(
(?:
(?:(?:""" + bytecode._OPCODES_EXTENDED_ARG + rb""").)*
(?:""" + bytecode._OPCODES_FUNCTION_LOAD + rb""").
)+
)
"""
)
def _scan_code_for_ctypes_getattr(code: CodeType):
"""
Detect uses of ``ctypes.cdll.library_name``, which implies that ``library_name.dll`` should be collected.
"""
key_names = ("cdll", "oledll", "pydll", "windll")
for match in bytecode.finditer(_ctypes_getattr_regex, code.co_code):
name, attrs = match.groups()
name = bytecode.load(name, code)
attrs = bytecode.loads(attrs, code)
if attrs and attrs[-1] == "LoadLibrary":
continue
# Capture `from ctypes import ole; ole.dll_name`.
if len(attrs) == 1:
if name in key_names:
yield attrs[0] + ".dll"
# Capture `import ctypes; ctypes.ole.dll_name`.
if len(attrs) == 2:
if name == "ctypes" and attrs[0] in key_names:
yield attrs[1] + ".dll"
# TODO: reuse this code with modulegraph implementation.
def _resolveCtypesImports(cbinaries):
"""
Completes ctypes BINARY entries for modules with their full path.
Input is a list of c-binary-names (as found by `scan_code_instruction_for_ctypes`). Output is a list of tuples
ready to be appended to the ``binaries`` of a modules.
This function temporarily extents PATH, LD_LIBRARY_PATH or DYLD_LIBRARY_PATH (depending on the platform) by
CONF['pathex'] so shared libs will be search there, too.
Example:
>>> _resolveCtypesImports(['libgs.so'])
[(libgs.so', ''/usr/lib/libgs.so', 'BINARY')]
"""
from ctypes.util import find_library
from PyInstaller.config import CONF
if compat.is_unix:
envvar = "LD_LIBRARY_PATH"
elif compat.is_darwin:
envvar = "DYLD_LIBRARY_PATH"
else:
envvar = "PATH"
def _setPaths():
path = os.pathsep.join(CONF['pathex'])
old = compat.getenv(envvar)
if old is not None:
path = os.pathsep.join((path, old))
compat.setenv(envvar, path)
return old
def _restorePaths(old):
if old is None:
compat.unsetenv(envvar)
else:
compat.setenv(envvar, old)
ret = []
# Try to locate the shared library on the disk. This is done by calling ctypes.util.find_library with
# ImportTracker's local paths temporarily prepended to the library search paths (and restored after the call).
old = _setPaths()
for cbin in cbinaries:
try:
# There is an issue with find_library() where it can run into errors trying to locate the library. See
# #5734.
cpath = find_library(os.path.splitext(cbin)[0])
except FileNotFoundError:
# In these cases, find_library() should return None.
cpath = None
if compat.is_unix or compat.is_cygwin:
# CAVEAT: find_library() is not the correct function. ctype's documentation says that it is meant to resolve
# only the filename (as a *compiler* does) not the full path. Anyway, it works well enough on Windows and
# macOS. On Linux, we need to implement more code to find out the full path.
if cpath is None:
cpath = cbin
# "man ld.so" says that we should first search LD_LIBRARY_PATH and then the ldcache.
for d in compat.getenv(envvar, '').split(os.pathsep):
if os.path.isfile(os.path.join(d, cpath)):
cpath = os.path.join(d, cpath)
break
else:
if LDCONFIG_CACHE is None:
load_ldconfig_cache()
if cpath in LDCONFIG_CACHE:
cpath = LDCONFIG_CACHE[cpath]
assert os.path.isfile(cpath)
else:
cpath = None
if cpath is None:
# Skip warning message if cbin (basename of library) is ignored. This prevents messages like:
# 'W: library kernel32.dll required via ctypes not found'
if not include_library(cbin):
continue
# On non-Windows, automatically ignore all ctypes-based referenes to DLL files. This complements the above
# check, which might not match potential case variations (e.g., `KERNEL32.dll`, instead of `kernel32.dll`)
# due to case-sensitivity of the matching that is in effect on non-Windows platforms.
if (not compat.is_win and not compat.is_cygwin) and cbin.lower().endswith('.dll'):
continue
logger.warning("Library %s required via ctypes not found", cbin)
else:
if not include_library(cpath):
continue
ret.append((cbin, cpath, "BINARY"))
_restorePaths(old)
return ret
LDCONFIG_CACHE = None # cache the output of `/sbin/ldconfig -p`
def load_ldconfig_cache():
"""
Create a cache of the `ldconfig`-output to call it only once.
It contains thousands of libraries and running it on every dylib is expensive.
"""
global LDCONFIG_CACHE
if LDCONFIG_CACHE is not None:
return
if compat.is_cygwin:
# Not available under Cygwin; but we might be re-using general POSIX codepaths, and end up here. So exit early.
LDCONFIG_CACHE = {}
return
if compat.is_musl:
# Musl deliberately doesn't use ldconfig. The ldconfig executable either doesn't exist or it's a functionless
# executable which, on calling with any arguments, simply tells you that those arguments are invalid.
LDCONFIG_CACHE = {}
return
ldconfig = shutil.which('ldconfig')
if ldconfig is None:
# If `ldconfig` is not found in $PATH, search for it in some fixed directories. Simply use a second call instead
# of fiddling around with checks for empty env-vars and string-concat.
ldconfig = shutil.which('ldconfig', path='/usr/sbin:/sbin:/usr/bin:/bin')
# If we still could not find the 'ldconfig' command...
if ldconfig is None:
LDCONFIG_CACHE = {}
return
if compat.is_freebsd or compat.is_openbsd:
# This has a quite different format than other Unixes:
# [vagrant@freebsd-10 ~]$ ldconfig -r
# /var/run/ld-elf.so.hints:
# search directories: /lib:/usr/lib:/usr/lib/compat:...
# 0:-lgeom.5 => /lib/libgeom.so.5
# 184:-lpython2.7.1 => /usr/local/lib/libpython2.7.so.1
ldconfig_arg = '-r'
splitlines_count = 2
pattern = re.compile(r'^\s+\d+:-l(\S+)(\s.*)? => (\S+)')
else:
# Skip first line of the library list because it is just an informative line and might contain localized
# characters. Example of first line with locale set to cs_CZ.UTF-8:
#$ /sbin/ldconfig -p
#V keši „/etc/ld.so.cache“ nalezeno knihoven: 2799
# libzvbi.so.0 (libc6,x86-64) => /lib64/libzvbi.so.0
# libzvbi-chains.so.0 (libc6,x86-64) => /lib64/libzvbi-chains.so.0
ldconfig_arg = '-p'
splitlines_count = 1
pattern = re.compile(r'^\s+(\S+)(\s.*)? => (\S+)')
try:
text = compat.exec_command(ldconfig, ldconfig_arg)
except ExecCommandFailed:
logger.warning("Failed to execute ldconfig. Disabling LD cache.")
LDCONFIG_CACHE = {}
return
text = text.strip().splitlines()[splitlines_count:]
LDCONFIG_CACHE = {}
for line in text:
# :fixme: this assumes library names do not contain whitespace
m = pattern.match(line)
# Sanitize away any abnormal lines of output.
if m is None:
# Warn about it then skip the rest of this iteration.
if re.search("Cache generated by:", line):
# See #5540. This particular line is harmless.
pass
else:
logger.warning("Unrecognised line of output %r from ldconfig", line)
continue
path = m.groups()[-1]
if compat.is_freebsd or compat.is_openbsd:
# Insert `.so` at the end of the lib's basename. soname and filename may have (different) trailing versions.
# We assume the `.so` in the filename to mark the end of the lib's basename.
bname = os.path.basename(path).split('.so', 1)[0]
name = 'lib' + m.group(1)
assert name.startswith(bname)
name = bname + '.so' + name[len(bname):]
else:
name = m.group(1)
# ldconfig may know about several versions of the same lib, e.g., different arch, different libc, etc.
# Use the first entry.
if name not in LDCONFIG_CACHE:
LDCONFIG_CACHE[name] = path