import os import sys import git import requests import datetime import ast from bs4 import BeautifulSoup from pathlib import Path import argparse from typing import Dict, List, Optional, Tuple, Set import importlib.metadata import vermin import logging from logging.handlers import RotatingFileHandler logger = logging.getLogger() logger.setLevel(logging.INFO) def get_file_min_version(file_path): with open(file_path, 'r') as f: contents = f.read() return vermin.version_strings(vermin.detect(contents)) return 'unknown' class ImportVisitor(ast.NodeVisitor): """AST visitor to find all imports in a Python file.""" def __init__(self): self.imports = set() def visit_Import(self, node): """Handle 'import x' statements.""" for name in node.names: # Get the top-level module name self.imports.add(name.name.split(".")[0]) def visit_ImportFrom(self, node): """Handle 'from x import y' statements.""" if node.level == 0: # Absolute import # Get the top-level module name self.imports.add(node.module.split(".")[0]) def get_file_imports(file_path: str) -> Set[str]: """Parse a Python file and extract all import names.""" try: with open(file_path, "r", encoding="utf-8") as f: tree = ast.parse(f.read(), filename=file_path) visitor = ImportVisitor() visitor.visit(tree) return visitor.imports except Exception as e: print(f"Error parsing imports from {file_path}: {e}") return set() def get_all_imports(path: str) -> Set[str]: """Get all imports from a file or directory.""" imports = set() if os.path.isfile(path): if path.endswith(".py"): imports.update(get_file_imports(path)) else: for root, _, files in os.walk(path): for file in files: if file.endswith(".py"): file_path = os.path.join(root, file) imports.update(get_file_imports(file_path)) return imports def get_stdlib_modules() -> set: """Get a set of all Python standard library module names.""" stdlib_modules = set() # Get Python's stdlib directory stdlib_paths = [os.path.dirname(os.__file__)] if hasattr(sys, "base_prefix"): stdlib_paths.append( os.path.join( sys.base_prefix, "lib", f"python{sys.version_info.major}.{sys.version_info.minor}", ) ) try: for stdlib_path in stdlib_paths: # Walk through the standard library directories for root, dirs, files in os.walk(stdlib_path): # Skip test directories if "test" in root or "tests" in root: continue for file in files: if file.endswith(".py"): # Convert path to module name rel_path = os.path.relpath( os.path.join(root, file), stdlib_path ) module_path = os.path.splitext(rel_path)[0] module_name = module_path.replace(os.sep, ".") stdlib_modules.add(module_name.split(".")[0]) # Add some common built-in modules stdlib_modules.update( [ "sys", "builtins", "_thread", "_signal", "_sre", "_ast", "_operator", "_symtable", "_random", "_pickle", "_datetime", "_elementtree", ] ) except Exception as e: print(f"Error getting stdlib modules: {e}") return stdlib_modules def get_import_to_pkg_mapping() -> Dict[str, str]: """Create a mapping of import names to PyPI package names.""" # Common known mappings for imports:pypi_name KNOWN_MAPPINGS = { "bs4": "beautifulsoup4", "PIL": "pillow", "dotenv": "python-dotenv", "yaml": "pyyaml", "mx": "mxnet", "cairo": "pycairo", "dateutil": "python-dateutil", "lxml.etree": "lxml", "nacl": "pynacl", "pkg_resources": "setuptools", "googleapiclient": "google-api-python-client", "psycopg2": "psycopg2-binary", "MySQLdb1": "mysqlclient", "git": "GitPython", "winreg": "windows-curses", "win32com": "pywin32", "win32api": "pywin32", "pythoncom": "pywin32", "scipy.io": "scipy", "skimage": "scikit-image", "sklearn.external.joblib": "joblib", "wx": "wxPython", "sounddevice": "python-sounddevice", "magic": "python-magic", "dbus": "dbus-python", "keyring": "keyring", "tabulate-stubs": "types-tabulate", "seaborn-stubs": "types-seaborn", "stripe-stubs": "types-stripe", "flake8_2020-stubs": "types-flake8-2020", "click_spinner-stubs": "types-click-spinner", "pysftp-stubs": "types-pysftp", "retry-stubs": "types-retry", "regex-stubs": "types-regex", "zxcvbn-stubs": "types-zxcvbn", "debian": "importlib-metadata", "importlib_metadata": "importlib-metadata", "opentracing-stubs": "types-opentracing", "flask_socketio-stubs": "types-Flask-SocketIO", "ttkthemes-stubs": "types-ttkthemes", "humanfriendly-stubs": "types-humanfriendly", "flake8_simplify-stubs": "types-flake8-simplify", "peewee-stubs": "types-peewee", "pika-stubs": "types-pika-ts", "docutils-stubs": "types-docutils", "PIL-stubs": "types-Pillow", "xmltodict-stubs": "types-xmltodict", "pep8ext_naming-stubs": "types-pep8-naming", "playsound-stubs": "types-playsound", "pycurl-stubs": "types-pycurl", "httplib2-stubs": "types-httplib2", "LanguageSelector": "language-selector", "language_support_pkgs": "language-selector", "pytest_lazyfixture-stubs": "types-pytest-lazy-fixture", "fpdf-stubs": "types-fpdf2", "croniter-stubs": "types-croniter", "dateutil-stubs": "types-python-dateutil", "xdg": "pyxdg", "wtforms-stubs": "types-WTForms", "tensorflow-stubs": "types-tensorflow", "OpenSSL": "pyOpenSSL", "google-stubs": "types-google-cloud-ndb", "zope": "zope.interface", "ujson-stubs": "types-ujson", "toml-stubs": "types-toml", "toposort-stubs": "types-toposort", "markdown": "Markdown", "zstd-stubs": "types-zstd", "boltons-stubs": "types-boltons", "decorator-stubs": "types-decorator", "mypy_extensions-stubs": "types-mypy-extensions", "pynput-stubs": "types-pynput", "translationstring-stubs": "types-translationstring", "pymysql-stubs": "types-PyMySQL", "whatthepatch-stubs": "types-whatthepatch", "src": "kiwisolver", "aws_xray_sdk-stubs": "types-aws-xray-sdk", "caldav-stubs": "types-caldav", "paho-stubs": "types-paho-mqtt", "mock-stubs": "types-mock", "matplotlib_inline": "matplotlib-inline", "IPython": "ipython", "consolemenu-stubs": "types-console-menu", "passpy-stubs": "types-passpy", "ibm_db-stubs": "types-ibm-db", "brlapi": "Brlapi", "tree_sitter_languages-stubs": "types-tree-sitter-languages", "_pytest": "pytest", "py": "pytest", "neovim": "pynvim", "pyasn1-stubs": "types-pyasn1", "systemd": "systemd-python", "systemd/_daemon": "systemd-python", "systemd/_journal": "systemd-python", "systemd/_reader": "systemd-python", "systemd/id128": "systemd-python", "systemd/login": "systemd-python", "flake8_plugin_utils-stubs": "types-flake8-plugin-utils", "pkg_resources-stubs": "types-setuptools", "setuptools-stubs": "types-setuptools", "isympy": "sympy", "six-stubs": "types-six", "html5lib-stubs": "types-html5lib", "singledispatch-stubs": "types-singledispatch", "uwsgi-stubs": "types-uWSGI", "uwsgidecorators-stubs": "types-uWSGI", "bs4-stubs": "types-beautifulsoup4", "cups": "pycups", "polib-stubs": "types-polib", "serial-stubs": "types-pyserial", "openpyxl-stubs": "types-openpyxl", "jsonschema-stubs": "types-jsonschema", "redis-stubs": "types-redis", "pytest_flask": "pytest-flask", "portpicker-stubs": "types-portpicker", "pure_eval": "pure-eval", "pygments-stubs": "types-Pygments", "cachetools-stubs": "types-cachetools", "psutil-stubs": "types-psutil", "deprecated-stubs": "types-Deprecated", "Xlib-stubs": "types-python-xlib", "axi": "apt-xapian-index", "flask_cors-stubs": "types-Flask-Cors", "gdb-stubs": "types-gdb", "vobject-stubs": "types-vobject", "yaml-stubs": "types-PyYAML", "simplejson-stubs": "types-simplejson", "Cryptodome": "pycryptodomex", "flake8_builtins-stubs": "types-flake8-builtins", "netaddr-stubs": "types-netaddr", "hgdemandimport": "mercurial", "hgext": "mercurial", "hgext3rd": "mercurial", "flake8_rst_docstrings-stubs": "types-flake8-rst-docstrings", "automat": "Automat", "nmap-stubs": "types-python-nmap", "serial": "pyserial", "_pyrsistent_version": "pyrsistent", "pvectorc": "pyrsistent", "tree_sitter-stubs": "types-tree-sitter", "deb822": "python-debian", "debian_bundle": "python-debian", "secretstorage": "SecretStorage", "ldap3-stubs": "types-ldap3", "jwt": "PyJWT", "_dbus_bindings": "dbus-python", "_dbus_glib_bindings": "dbus-python", "oauthlib-stubs": "types-oauthlib", "MySQLdb-stubs": "types-mysqlclient", "waitress-stubs": "types-waitress", "Xlib": "python-xlib", "jks-stubs": "types-pyjks", "qrcode-stubs": "types-qrcode", "xattr": "pyxattr", "slugify-stubs": "types-python-slugify", "mypy_extensions": "mypy-extensions", "cronlog-stubs": "types-python-crontab", "crontab-stubs": "types-python-crontab", "crontabs-stubs": "types-python-crontab", "entrypoints-stubs": "types-entrypoints", "dateparser-stubs": "types-dateparser", "dateparser_data-stubs": "types-dateparser", "_win32typing-stubs": "types-pywin32", "afxres-stubs": "types-pywin32", "commctrl-stubs": "types-pywin32", "dde-stubs": "types-pywin32", "isapi-stubs": "types-pywin32", "mmapfile-stubs": "types-pywin32", "mmsystem-stubs": "types-pywin32", "ntsecuritycon-stubs": "types-pywin32", "odbc-stubs": "types-pywin32", "perfmon-stubs": "types-pywin32", "pythoncom-stubs": "types-pywin32", "pythonwin-stubs": "types-pywin32", "pywintypes-stubs": "types-pywin32", "regutil-stubs": "types-pywin32", "servicemanager-stubs": "types-pywin32", "sspicon-stubs": "types-pywin32", "timer-stubs": "types-pywin32", "win2kras-stubs": "types-pywin32", "win32-stubs": "types-pywin32", "win32api-stubs": "types-pywin32", "win32clipboard-stubs": "types-pywin32", "win32com-stubs": "types-pywin32", "win32comext-stubs": "types-pywin32", "win32con-stubs": "types-pywin32", "win32console-stubs": "types-pywin32", "win32cred-stubs": "types-pywin32", "win32crypt-stubs": "types-pywin32", "win32cryptcon-stubs": "types-pywin32", "win32event-stubs": "types-pywin32", "win32evtlog-stubs": "types-pywin32", "win32evtlogutil-stubs": "types-pywin32", "win32file-stubs": "types-pywin32", "win32gui-stubs": "types-pywin32", "win32gui_struct-stubs": "types-pywin32", "win32help-stubs": "types-pywin32", "win32inet-stubs": "types-pywin32", "win32inetcon-stubs": "types-pywin32", "win32job-stubs": "types-pywin32", "win32lz-stubs": "types-pywin32", "win32net-stubs": "types-pywin32", "win32netcon-stubs": "types-pywin32", "win32pdh-stubs": "types-pywin32", "win32pdhquery-stubs": "types-pywin32", "win32pipe-stubs": "types-pywin32", "win32print-stubs": "types-pywin32", "win32process-stubs": "types-pywin32", "win32profile-stubs": "types-pywin32", "win32ras-stubs": "types-pywin32", "win32security-stubs": "types-pywin32", "win32service-stubs": "types-pywin32", "win32serviceutil-stubs": "types-pywin32", "win32timezone-stubs": "types-pywin32", "win32trace-stubs": "types-pywin32", "win32transaction-stubs": "types-pywin32", "win32ts-stubs": "types-pywin32", "win32ui-stubs": "types-pywin32", "win32uiole-stubs": "types-pywin32", "win32wnet-stubs": "types-pywin32", "winerror-stubs": "types-pywin32", "winioctlcon-stubs": "types-pywin32", "winnt-stubs": "types-pywin32", "winperf-stubs": "types-pywin32", "winxpgui-stubs": "types-pywin32", "winxptheme-stubs": "types-pywin32", "pyflakes-stubs": "types-pyflakes", "prompt_toolkit": "prompt-toolkit", "icu": "PyICU", "youtube_dl": "youtube-dl", "pexpect-stubs": "types-pexpect", "jaraco": "jaraco.classes", "fontTools": "fonttools", "_yaml": "PyYAML", "xdg-stubs": "types-pyxdg", "NvidiaDetector": "ubuntu-drivers-common", "Quirks": "ubuntu-drivers-common", "UbuntuDrivers": "ubuntu-drivers-common", "flake8_typing_imports-stubs": "types-flake8-typing-imports", "webob-stubs": "types-WebOb", "distro_info": "distro-info", "exifread-stubs": "types-ExifRead", "jose-stubs": "types-python-jose", "farmhash-stubs": "types-pyfarmhash", "fuse": "fuse-python", "fuseparts": "fuse-python", "click_default_group-stubs": "types-click-default-group", "parsimonious-stubs": "types-parsimonious", "markdown-stubs": "types-Markdown", "usersettings-stubs": "types-usersettings", "chevron-stubs": "types-chevron", "cloudinit": "cloud-init", "lazr": "lazr.restfulclient", "pyautogui-stubs": "types-PyAutoGUI", "aiofiles-stubs": "types-aiofiles", "posix1e": "pylibacl", "stack_data": "stack-data", "hdbcli-stubs": "types-hdbcli", "first-stubs": "types-first", "pyscreeze-stubs": "types-PyScreeze", "_bcrypt": "bcrypt", "datemath-stubs": "types-python-datemath", "freetype": "freetype-py", "commonmark-stubs": "types-commonmark", "_cffi_backend-stubs": "types-cffi", "cffi-stubs": "types-cffi", "jack-stubs": "types-JACK-Client", "uaclient": "ubuntu-pro-client", "s2clientprotocol-stubs": "types-s2clientprotocol", "pluggy-stubs": "types-pluggy", "apt": "python-apt", "apt_inst": "python-apt", "apt_pkg": "python-apt", "aptsources": "python-apt", "OpenSSL-stubs": "types-pyOpenSSL", "tzlocal-stubs": "types-tzlocal", "flask_migrate-stubs": "types-Flask-Migrate", "pytz-stubs": "types-pytz", "babel": "Babel", "PyQt5": "PyQt5-sip", "bugbear-stubs": "types-flake8-bugbear", "pyrfc3339": "pyRFC3339", "influxdb_client-stubs": "types-influxdb-client", "untangle-stubs": "types-untangle", "stdlib_list-stubs": "types-stdlib-list", "passlib-stubs": "types-passlib", "jmespath-stubs": "types-jmespath", "docopt-stubs": "types-docopt", "dockerfile_parse-stubs": "types-dockerfile-parse", "PyInstaller-stubs": "types-pyinstaller", "pyi_splash-stubs": "types-pyinstaller", "configargparse": "ConfigArgParse", "ssh_import_id": "ssh-import-id", "pycocotools-stubs": "types-pycocotools", "paramiko-stubs": "types-paramiko", "flake8_docstrings-stubs": "types-flake8-docstrings", "psycopg2-stubs": "types-psycopg2", "workalendar-stubs": "types-workalendar", "websockets/extensions": "websockets", "websockets/legacy": "websockets", "inifile-stubs": "types-inifile", "gflags-stubs": "types-python-gflags", "editdistance-stubs": "types-editdistance", "colorama-stubs": "types-colorama", "sass-stubs": "types-libsass", "sassutils-stubs": "types-libsass", "tqdm-stubs": "types-tqdm", "_distutils_hack": "setuptools", "braintree-stubs": "types-braintree", "pyaudio-stubs": "types-pyaudio", "google": "protobuf", "keyboard-stubs": "types-keyboard", "jinja2": "Jinja2", "send2trash-stubs": "types-Send2Trash", "pyrfc3339-stubs": "types-pyRFC3339", "bleach-stubs": "types-bleach", "_brotli": "Brotli", "brotli": "Brotli", "slumber-stubs": "types-slumber", "requests-stubs": "types-requests", "markupsafe": "MarkupSafe", "usbcreator": "usb-creator", "boto-stubs": "types-boto", "greenlet-stubs": "types-greenlet", "validate": "configobj", "requests_oauthlib-stubs": "types-requests-oauthlib", "mpl_toolkits": "matplotlib", "pylab": "matplotlib", } mappings = KNOWN_MAPPINGS.copy() # using locally installed packages, we can try to add their package names/invocation try: for dist in importlib.metadata.distributions(): try: top_level_names = dist.read_text("top_level.txt").split() for name in top_level_names: if name not in mappings and name != dist.metadata["Name"]: mappings[name] = dist.metadata["Name"] # print(f'Updated local mapping for {name}') except Exception as e: continue # print(f"Error processing metadata for {dist}: {e}") except Exception as e: return #print(f"Error getting package mappings: {e}") return mappings def verify_package_exists(package_name: str) -> Optional[str]: """ Verify if a package exists on PyPI and return the correct name. Returns None if package doesn't exist. """ # first try to see if the local pip can resolve a name # First try the exact name url = f"https://pypi.org/project/{package_name}/" try: response = requests.head(url) if response.status_code == 200: return package_name except requests.RequestException as e: logging.info(f"Error checking package {package_name}: {e}") # If exact name doesn't work, try normalized name normalized_name = package_name.lower().replace("_", "-") if normalized_name != package_name: url = f"https://pypi.org/project/{normalized_name}/" try: response = requests.head(url) if response.status_code == 200: return normalized_name except requests.RequestException as e: print(f"Error checking normalized package name {normalized_name}: {e}") return None def resolve_package_name(import_name: str, mapping: Dict[str, str]) -> Optional[str]: """ Resolve an import name to a PyPI package name. Returns None if no package can be found. """ # Check mapping first if import_name in mapping: pkg_name = mapping[import_name] if verify_package_exists(pkg_name): return pkg_name # Try import name directly if verify_package_exists(import_name): return import_name # Try normalized version of import name normalized_name = import_name.lower().replace("_", "-") if verify_package_exists(normalized_name): return normalized_name return def get_package_releases(package_name: str) -> List[Tuple[str, datetime.datetime]]: """Fetch and parse package release history from PyPI.""" url = f"https://pypi.org/project/{package_name}/#history" try: response = requests.get(url) response.raise_for_status() except requests.RequestException as e: logging.warning(f"Error fetching release history for {package_name}: {e}") return [] soup = BeautifulSoup(response.text, "html.parser") releases = [] for release_div in soup.find_all("div", class_="release"): try: # Skip if release is yanked if "yanked" in release_div.get("class", []): continue version = release_div.find("p", class_="release__version") date = release_div.find("p", class_="release__version-date") if not (version and date): continue version_text = version.text.strip() # Skip pre-releases if any( pre in version_text.lower() for pre in ["a", "b", "rc", "dev", "alpha", "beta"] ): continue try: date_text = date.text.strip() release_date = datetime.datetime.strptime(date_text, "%b %d, %Y") releases.append((version_text, release_date)) except ValueError as e: logger.warning(f"Error parsing date {date_text} for {package_name}: {e}") continue except Exception as e: logger.warning(f"Error processing release for {package_name}: {e}") continue return releases def find_compatible_version( releases: List[Tuple[str, datetime.datetime]], file_date: datetime.datetime ) -> Optional[str]: """Find the newest package version that's older than the file date.""" try: compatible_releases = [ (version, date) for version, date in releases if date <= file_date ] if not compatible_releases: return None return max(compatible_releases, key=lambda x: x[1])[0] except Exception as e: logger.info(f"Error finding compatible version: {e}") return None def get_file_dates(path: str) -> Tuple[datetime.datetime, Optional[datetime.datetime]]: """Get filesystem modification date and last git commit date for a path.""" try: # Get filesystem modification date path_obj = Path(path) mod_timestamp = path_obj.stat().st_mtime mod_date = datetime.datetime.fromtimestamp(mod_timestamp) # Try to get git commit date try: repo = git.Repo(path, search_parent_directories=True) commits = list(repo.iter_commits(paths=path)) if commits: git_date = datetime.datetime.fromtimestamp(commits[0].committed_date) else: git_date = None except: git_date = None #print("Could not get latest git commit reference date.") #print(f"Got date {mod_date} from {path}") return mod_date, git_date except Exception as e: #print(f"Error getting file dates for {path}: {e}") return datetime.datetime.now(), None help_description = """ Guess python3 requirements.txt based on file or git commit dates, by looking at the local installed packages and comparing with release dates on https://PyPI.org. WARNING!: For the file date comparison, it uses the modify date. If you made a local change to the file, you will need to use touch to restore the old date.' """ def main(): parser = argparse.ArgumentParser( formatter_class=argparse.RawTextHelpFormatter, description=help_description ) parser.add_argument("path", help="File or directory path to analyze") parser.add_argument( "--output", default="requirements_guess.txt", help="Output file path (default: requirements_guess.txt)", ) args = parser.parse_args() try: # Get all imports imports = get_all_imports(args.path) if not imports: print("No imports found.") return # Filter out stdlib modules stdlib_modules = get_stdlib_modules() imports = {imp for imp in imports if imp not in stdlib_modules} if not imports: print("No non-stdlib imports found.") return # Get import to package mapping import_mapping = get_import_to_pkg_mapping() # Get dates file_date, git_date = get_file_dates(args.path) reference_date = git_date if git_date else file_date print(f"Using reference date: {reference_date}") # Process each import requirements = [] unresolved_imports = [] for import_name in sorted(imports): print(f"\nProcessing import: {import_name}") # Try to resolve package name package_name = resolve_package_name(import_name, import_mapping) if not package_name: print( f"Warning: Could not find package for import '{import_name}' on PyPI" ) unresolved_imports.append(import_name) continue print(f"Found package: {package_name}") releases = get_package_releases(package_name) if not releases: print(f"Warning: Could not fetch release history for {package_name}") continue version = find_compatible_version(releases, reference_date) if version: req_line = f"{package_name}=={version} # https://pypi.org/project/{package_name}/" requirements.append(req_line) print(f"Found likely compatible version: {package_name}=={version}") else: print(f"Warning: No compatible version found for {package_name}") # Write requirements.txt if requirements: with open(args.output, "w") as f: f.write("# To use: do `python -m venv .venv` then `source .venv/bin/activate` then `pip install -r requirements.txt`") f.write("\n".join(sorted(requirements))) f.write("\n") if unresolved_imports: unresolved_imports.sort() f.write("# Unresolved Requirements:\n") for unresolved in unresolved_imports: f.write(f"#{unresolved}==???") f.write("\n") print(f"\nWritten {len(requirements)} packages to {args.output}") else: print("\nNo requirements to write.") # Report unresolved imports if unresolved_imports: print( "\nWARNING: The following imports could not be resolved to PyPI packages:" ) for imp in sorted(unresolved_imports): print(f" - {imp}") print( "\nPlease manually verify these imports and add them to requirements.txt if needed." ) except Exception as e: print(f"Error in main execution: {e}") sys.exit(1) if __name__ == "__main__": main()