From aa6581a80545029ac7daa8ae5a276d774037d9b7 Mon Sep 17 00:00:00 2001 From: Tyler Date: Wed, 25 Dec 2024 15:05:39 -0800 Subject: [PATCH] Initial Commit --- .gitignore | 3 + app.py | 201 +++++++++++ logs/app.log | 0 req_generator.py | 745 +++++++++++++++++++++++++++++++++++++++++ requirements.txt | 4 + requirements_guess.txt | 4 + templates/index.html | 254 ++++++++++++++ uwsgi.ini | 8 + 8 files changed, 1219 insertions(+) create mode 100644 .gitignore create mode 100644 app.py create mode 100644 logs/app.log create mode 100644 req_generator.py create mode 100644 requirements.txt create mode 100644 requirements_guess.txt create mode 100644 templates/index.html create mode 100644 uwsgi.ini diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d449405 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.venv +__pycache__ + diff --git a/app.py b/app.py new file mode 100644 index 0000000..4425af5 --- /dev/null +++ b/app.py @@ -0,0 +1,201 @@ +from flask import Flask, render_template, request, jsonify, send_file +import os +import time +import tempfile +import zipfile +import io +from werkzeug.utils import secure_filename +from req_generator import ( + get_all_imports, get_stdlib_modules, get_import_to_pkg_mapping, + resolve_package_name, get_package_releases, find_compatible_version, + get_file_dates, get_file_min_version +) +import logging +from logging.handlers import RotatingFileHandler + + +# 1.file date is like right now, how can we preserve the uploaded file mod +# date? Or change to use file creation date? +# 2.it would be better to have a local pypi repository (bandersnatch?) +# to reduce chances of getting too many request errors while scraping +# OR just scrape every so often and build a list of packages and their +# versions, and do a cron job to refresh it every week or so. + +app = Flask(__name__) +app.config['MAX_CONTENT_LENGTH'] = 10 * 1024 * 1024 # 10MB max file size + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) +os.makedirs('logs', exist_ok=True) +file_handler = RotatingFileHandler('logs/app.log', maxBytes=1024*1024, backupCount=5) +file_handler.setLevel(logging.INFO) +console_handler = logging.StreamHandler() +console_handler.setLevel(logging.INFO) + +ALLOWED_EXTENSIONS = {'py', 'zip'} + +def allowed_file(filename): + return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS + +def process_python_file(file_path): + ''' + Tries to get requirements and python version for this file + ''' + # Get all imports + imports = get_all_imports(file_path) + + if not imports: + return {"error": "No imports found."} + + # Filter out stdlib modules + stdlib_modules = get_stdlib_modules() + imports = {imp for imp in imports if imp not in stdlib_modules} + + if not imports: + return {"error": "No non-stdlib imports found."} + interpreter_version = get_file_min_version(file_path=file_path) + + # Get import to package mapping + import_mapping = get_import_to_pkg_mapping() + + # Get dates + file_date, git_date = get_file_dates(file_path) + reference_date = git_date if git_date else file_date + + # Process each import + requirements = [] + unresolved_imports = [] + + for import_name in sorted(imports): + # Try to resolve package name + package_name = resolve_package_name(import_name, import_mapping) + + if not package_name: + unresolved_imports.append(import_name) + continue + + releases = get_package_releases(package_name) + + if not releases: + continue + + version = find_compatible_version(releases, reference_date) + if version: + req_line = f"{package_name}=={version} # https://pypi.org/project/{package_name}/" + requirements.append(req_line) + + return { + "requirements": requirements, + "unresolved": unresolved_imports, + "reference_date": reference_date.isoformat(), + "interpreter_version": interpreter_version + } + +@app.route('/') +def index(): + return render_template('index.html') + +@app.route('/analyze', methods=['POST']) +def analyze(): + if 'file' not in request.files: + return jsonify({"error": "No file provided"}), 400 + + file = request.files['file'] + if file.filename == '': + return jsonify({"error": "No file selected"}), 400 + + if not allowed_file(file.filename): + return jsonify({"error": "Invalid file type"}), 400 + + with tempfile.TemporaryDirectory() as temp_dir: + if file.filename.endswith('.zip'): + scanned_files = [] + # Handle zip file, preserving original file dates from the archive + zip_path = os.path.join(temp_dir, 'upload.zip') + file.save(zip_path) + + requirements = [] + unresolved = set() + min_py_vers = [] + with zipfile.ZipFile(zip_path, 'r') as zip_ref: + zip_ref.extractall(temp_dir) + + # Get list of all files in zip archive + zip_info_list = zip_ref.filelist + + # Create mapping of filenames to their dates from the archive + file_dates = { + os.path.join(temp_dir, info.filename): info.date_time + for info in zip_info_list + } + + for root, _, files in os.walk(temp_dir): + for filename in files: + if filename.endswith('.py'): + file_path = os.path.join(root, filename) + # Set the file's modification time to match the zip archive + if file_path in file_dates: + dt = file_dates[file_path] + timestamp = time.mktime(dt + (0, 0, 0)) # Add hours, minutes, seconds + os.utime(file_path, (timestamp, timestamp)) + result = process_python_file(file_path) + scanned_files.append(file_path + " | Vermin reported version: " + result["interpreter_version"]) + if "error" not in result: + requirements.extend(result["requirements"]) + unresolved.update(result["unresolved"]) + min_py_vers.append(result["interpreter_version"]) + + + if not requirements and not unresolved: + return jsonify({"error": "No Python files found in zip"}), 400 + logging.info(f"Analyzed {len(scanned_files)} with {len(requirements)} requirements. Unresolved: {unresolved}") + return jsonify({ + "requirements": sorted(set(requirements)), + "unresolved": sorted(list(unresolved)), + "file_list": sorted(list(scanned_files)), + #"interpreter_version": " | ".join(min_py_vers) + }) + + else: + # Handle single Python file, we need to use our custom header + # X-Last-Modified-Date (UTC String Format) + file_path = os.path.join(temp_dir, secure_filename(file.filename)) + file.save(file_path) + try: + lastModifiedStr = request.headers.get("X-Last-Modified-Date") + modification_time = time.strptime(lastModifiedStr, "%a, %d %b %Y %H:%M:%S GMT") + modification_timestamp = time.mktime(modification_time) + os.utime(file_path, (modification_timestamp, modification_timestamp)) + except Exception as e: + logging.debug(e) + result = process_python_file(file_path) + + if "error" in result: + return jsonify({"error": result["error"]}), 400 + + return jsonify(result) + +@app.route('/download', methods=['POST']) +def download(): + data = request.json + if not data or 'requirements' not in data: + return jsonify({"error": "No requirements provided"}), 400 + + content = "\n".join(data['requirements']) + if 'unresolved' in data and data['unresolved']: + content += "\n\n# Unresolved Requirements:\n" + content += "\n".join(f"#{imp}==???" for imp in data['unresolved']) + + buffer = io.BytesIO() + buffer.write(content.encode('utf-8')) + buffer.seek(0) + + return send_file( + buffer, + mimetype='text/plain', + as_attachment=True, + download_name='requirements.txt' + ) + +if __name__ == '__main__': + app.run(debug=True) diff --git a/logs/app.log b/logs/app.log new file mode 100644 index 0000000..e69de29 diff --git a/req_generator.py b/req_generator.py new file mode 100644 index 0000000..880db76 --- /dev/null +++ b/req_generator.py @@ -0,0 +1,745 @@ +import os +import sys +import git +import requests +import datetime +import ast +from bs4 import BeautifulSoup +from pathlib import Path +import argparse +from typing import Dict, List, Optional, Tuple, Set +import importlib.metadata +import vermin +import logging +from logging.handlers import RotatingFileHandler + +logger = logging.getLogger() +logger.setLevel(logging.INFO) + + +def get_file_min_version(file_path): + with open(file_path, 'r') as f: + contents = f.read() + return vermin.version_strings(vermin.detect(contents)) + return 'unknown' + +class ImportVisitor(ast.NodeVisitor): + """AST visitor to find all imports in a Python file.""" + + def __init__(self): + self.imports = set() + + def visit_Import(self, node): + """Handle 'import x' statements.""" + for name in node.names: + # Get the top-level module name + self.imports.add(name.name.split(".")[0]) + + def visit_ImportFrom(self, node): + """Handle 'from x import y' statements.""" + if node.level == 0: # Absolute import + # Get the top-level module name + self.imports.add(node.module.split(".")[0]) + + +def get_file_imports(file_path: str) -> Set[str]: + """Parse a Python file and extract all import names.""" + try: + with open(file_path, "r", encoding="utf-8") as f: + tree = ast.parse(f.read(), filename=file_path) + visitor = ImportVisitor() + visitor.visit(tree) + return visitor.imports + except Exception as e: + print(f"Error parsing imports from {file_path}: {e}") + return set() + + +def get_all_imports(path: str) -> Set[str]: + """Get all imports from a file or directory.""" + imports = set() + + if os.path.isfile(path): + if path.endswith(".py"): + imports.update(get_file_imports(path)) + else: + for root, _, files in os.walk(path): + for file in files: + if file.endswith(".py"): + file_path = os.path.join(root, file) + imports.update(get_file_imports(file_path)) + + return imports + + +def get_stdlib_modules() -> set: + """Get a set of all Python standard library module names.""" + stdlib_modules = set() + + # Get Python's stdlib directory + stdlib_paths = [os.path.dirname(os.__file__)] + if hasattr(sys, "base_prefix"): + stdlib_paths.append( + os.path.join( + sys.base_prefix, + "lib", + f"python{sys.version_info.major}.{sys.version_info.minor}", + ) + ) + + try: + for stdlib_path in stdlib_paths: + # Walk through the standard library directories + for root, dirs, files in os.walk(stdlib_path): + # Skip test directories + if "test" in root or "tests" in root: + continue + + for file in files: + if file.endswith(".py"): + # Convert path to module name + rel_path = os.path.relpath( + os.path.join(root, file), stdlib_path + ) + module_path = os.path.splitext(rel_path)[0] + module_name = module_path.replace(os.sep, ".") + stdlib_modules.add(module_name.split(".")[0]) + + # Add some common built-in modules + stdlib_modules.update( + [ + "sys", + "builtins", + "_thread", + "_signal", + "_sre", + "_ast", + "_operator", + "_symtable", + "_random", + "_pickle", + "_datetime", + "_elementtree", + ] + ) + except Exception as e: + print(f"Error getting stdlib modules: {e}") + + return stdlib_modules + + +def get_import_to_pkg_mapping() -> Dict[str, str]: + """Create a mapping of import names to PyPI package names.""" + # Common known mappings for imports:pypi_name + KNOWN_MAPPINGS = { + "bs4": "beautifulsoup4", + "PIL": "pillow", + "dotenv": "python-dotenv", + "yaml": "pyyaml", + "mx": "mxnet", + "cairo": "pycairo", + "dateutil": "python-dateutil", + "lxml.etree": "lxml", + "nacl": "pynacl", + "pkg_resources": "setuptools", + "googleapiclient": "google-api-python-client", + "psycopg2": "psycopg2-binary", + "MySQLdb1": "mysqlclient", + "git": "GitPython", + "winreg": "windows-curses", + "win32com": "pywin32", + "win32api": "pywin32", + "pythoncom": "pywin32", + "scipy.io": "scipy", + "skimage": "scikit-image", + "sklearn.external.joblib": "joblib", + "wx": "wxPython", + "sounddevice": "python-sounddevice", + "magic": "python-magic", + "dbus": "dbus-python", + "keyring": "keyring", + "tabulate-stubs": "types-tabulate", + "seaborn-stubs": "types-seaborn", + "stripe-stubs": "types-stripe", + "flake8_2020-stubs": "types-flake8-2020", + "click_spinner-stubs": "types-click-spinner", + "pysftp-stubs": "types-pysftp", + "retry-stubs": "types-retry", + "regex-stubs": "types-regex", + "zxcvbn-stubs": "types-zxcvbn", + "debian": "importlib-metadata", + "importlib_metadata": "importlib-metadata", + "opentracing-stubs": "types-opentracing", + "flask_socketio-stubs": "types-Flask-SocketIO", + "ttkthemes-stubs": "types-ttkthemes", + "humanfriendly-stubs": "types-humanfriendly", + "flake8_simplify-stubs": "types-flake8-simplify", + "peewee-stubs": "types-peewee", + "pika-stubs": "types-pika-ts", + "docutils-stubs": "types-docutils", + "PIL-stubs": "types-Pillow", + "xmltodict-stubs": "types-xmltodict", + "pep8ext_naming-stubs": "types-pep8-naming", + "playsound-stubs": "types-playsound", + "pycurl-stubs": "types-pycurl", + "httplib2-stubs": "types-httplib2", + "LanguageSelector": "language-selector", + "language_support_pkgs": "language-selector", + "pytest_lazyfixture-stubs": "types-pytest-lazy-fixture", + "fpdf-stubs": "types-fpdf2", + "croniter-stubs": "types-croniter", + "dateutil-stubs": "types-python-dateutil", + "xdg": "pyxdg", + "wtforms-stubs": "types-WTForms", + "tensorflow-stubs": "types-tensorflow", + "OpenSSL": "pyOpenSSL", + "google-stubs": "types-google-cloud-ndb", + "zope": "zope.interface", + "ujson-stubs": "types-ujson", + "toml-stubs": "types-toml", + "toposort-stubs": "types-toposort", + "markdown": "Markdown", + "zstd-stubs": "types-zstd", + "boltons-stubs": "types-boltons", + "decorator-stubs": "types-decorator", + "mypy_extensions-stubs": "types-mypy-extensions", + "pynput-stubs": "types-pynput", + "translationstring-stubs": "types-translationstring", + "pymysql-stubs": "types-PyMySQL", + "whatthepatch-stubs": "types-whatthepatch", + "src": "kiwisolver", + "aws_xray_sdk-stubs": "types-aws-xray-sdk", + "caldav-stubs": "types-caldav", + "paho-stubs": "types-paho-mqtt", + "mock-stubs": "types-mock", + "matplotlib_inline": "matplotlib-inline", + "IPython": "ipython", + "consolemenu-stubs": "types-console-menu", + "passpy-stubs": "types-passpy", + "ibm_db-stubs": "types-ibm-db", + "brlapi": "Brlapi", + "tree_sitter_languages-stubs": "types-tree-sitter-languages", + "_pytest": "pytest", + "py": "pytest", + "neovim": "pynvim", + "pyasn1-stubs": "types-pyasn1", + "systemd": "systemd-python", + "systemd/_daemon": "systemd-python", + "systemd/_journal": "systemd-python", + "systemd/_reader": "systemd-python", + "systemd/id128": "systemd-python", + "systemd/login": "systemd-python", + "flake8_plugin_utils-stubs": "types-flake8-plugin-utils", + "pkg_resources-stubs": "types-setuptools", + "setuptools-stubs": "types-setuptools", + "isympy": "sympy", + "six-stubs": "types-six", + "html5lib-stubs": "types-html5lib", + "singledispatch-stubs": "types-singledispatch", + "uwsgi-stubs": "types-uWSGI", + "uwsgidecorators-stubs": "types-uWSGI", + "bs4-stubs": "types-beautifulsoup4", + "cups": "pycups", + "polib-stubs": "types-polib", + "serial-stubs": "types-pyserial", + "openpyxl-stubs": "types-openpyxl", + "jsonschema-stubs": "types-jsonschema", + "redis-stubs": "types-redis", + "pytest_flask": "pytest-flask", + "portpicker-stubs": "types-portpicker", + "pure_eval": "pure-eval", + "pygments-stubs": "types-Pygments", + "cachetools-stubs": "types-cachetools", + "psutil-stubs": "types-psutil", + "deprecated-stubs": "types-Deprecated", + "Xlib-stubs": "types-python-xlib", + "axi": "apt-xapian-index", + "flask_cors-stubs": "types-Flask-Cors", + "gdb-stubs": "types-gdb", + "vobject-stubs": "types-vobject", + "yaml-stubs": "types-PyYAML", + "simplejson-stubs": "types-simplejson", + "Cryptodome": "pycryptodomex", + "flake8_builtins-stubs": "types-flake8-builtins", + "netaddr-stubs": "types-netaddr", + "hgdemandimport": "mercurial", + "hgext": "mercurial", + "hgext3rd": "mercurial", + "flake8_rst_docstrings-stubs": "types-flake8-rst-docstrings", + "automat": "Automat", + "nmap-stubs": "types-python-nmap", + "serial": "pyserial", + "_pyrsistent_version": "pyrsistent", + "pvectorc": "pyrsistent", + "tree_sitter-stubs": "types-tree-sitter", + "deb822": "python-debian", + "debian_bundle": "python-debian", + "secretstorage": "SecretStorage", + "ldap3-stubs": "types-ldap3", + "jwt": "PyJWT", + "_dbus_bindings": "dbus-python", + "_dbus_glib_bindings": "dbus-python", + "oauthlib-stubs": "types-oauthlib", + "MySQLdb-stubs": "types-mysqlclient", + "waitress-stubs": "types-waitress", + "Xlib": "python-xlib", + "jks-stubs": "types-pyjks", + "qrcode-stubs": "types-qrcode", + "xattr": "pyxattr", + "slugify-stubs": "types-python-slugify", + "mypy_extensions": "mypy-extensions", + "cronlog-stubs": "types-python-crontab", + "crontab-stubs": "types-python-crontab", + "crontabs-stubs": "types-python-crontab", + "entrypoints-stubs": "types-entrypoints", + "dateparser-stubs": "types-dateparser", + "dateparser_data-stubs": "types-dateparser", + "_win32typing-stubs": "types-pywin32", + "afxres-stubs": "types-pywin32", + "commctrl-stubs": "types-pywin32", + "dde-stubs": "types-pywin32", + "isapi-stubs": "types-pywin32", + "mmapfile-stubs": "types-pywin32", + "mmsystem-stubs": "types-pywin32", + "ntsecuritycon-stubs": "types-pywin32", + "odbc-stubs": "types-pywin32", + "perfmon-stubs": "types-pywin32", + "pythoncom-stubs": "types-pywin32", + "pythonwin-stubs": "types-pywin32", + "pywintypes-stubs": "types-pywin32", + "regutil-stubs": "types-pywin32", + "servicemanager-stubs": "types-pywin32", + "sspicon-stubs": "types-pywin32", + "timer-stubs": "types-pywin32", + "win2kras-stubs": "types-pywin32", + "win32-stubs": "types-pywin32", + "win32api-stubs": "types-pywin32", + "win32clipboard-stubs": "types-pywin32", + "win32com-stubs": "types-pywin32", + "win32comext-stubs": "types-pywin32", + "win32con-stubs": "types-pywin32", + "win32console-stubs": "types-pywin32", + "win32cred-stubs": "types-pywin32", + "win32crypt-stubs": "types-pywin32", + "win32cryptcon-stubs": "types-pywin32", + "win32event-stubs": "types-pywin32", + "win32evtlog-stubs": "types-pywin32", + "win32evtlogutil-stubs": "types-pywin32", + "win32file-stubs": "types-pywin32", + "win32gui-stubs": "types-pywin32", + "win32gui_struct-stubs": "types-pywin32", + "win32help-stubs": "types-pywin32", + "win32inet-stubs": "types-pywin32", + "win32inetcon-stubs": "types-pywin32", + "win32job-stubs": "types-pywin32", + "win32lz-stubs": "types-pywin32", + "win32net-stubs": "types-pywin32", + "win32netcon-stubs": "types-pywin32", + "win32pdh-stubs": "types-pywin32", + "win32pdhquery-stubs": "types-pywin32", + "win32pipe-stubs": "types-pywin32", + "win32print-stubs": "types-pywin32", + "win32process-stubs": "types-pywin32", + "win32profile-stubs": "types-pywin32", + "win32ras-stubs": "types-pywin32", + "win32security-stubs": "types-pywin32", + "win32service-stubs": "types-pywin32", + "win32serviceutil-stubs": "types-pywin32", + "win32timezone-stubs": "types-pywin32", + "win32trace-stubs": "types-pywin32", + "win32transaction-stubs": "types-pywin32", + "win32ts-stubs": "types-pywin32", + "win32ui-stubs": "types-pywin32", + "win32uiole-stubs": "types-pywin32", + "win32wnet-stubs": "types-pywin32", + "winerror-stubs": "types-pywin32", + "winioctlcon-stubs": "types-pywin32", + "winnt-stubs": "types-pywin32", + "winperf-stubs": "types-pywin32", + "winxpgui-stubs": "types-pywin32", + "winxptheme-stubs": "types-pywin32", + "pyflakes-stubs": "types-pyflakes", + "prompt_toolkit": "prompt-toolkit", + "icu": "PyICU", + "youtube_dl": "youtube-dl", + "pexpect-stubs": "types-pexpect", + "jaraco": "jaraco.classes", + "fontTools": "fonttools", + "_yaml": "PyYAML", + "xdg-stubs": "types-pyxdg", + "NvidiaDetector": "ubuntu-drivers-common", + "Quirks": "ubuntu-drivers-common", + "UbuntuDrivers": "ubuntu-drivers-common", + "flake8_typing_imports-stubs": "types-flake8-typing-imports", + "webob-stubs": "types-WebOb", + "distro_info": "distro-info", + "exifread-stubs": "types-ExifRead", + "jose-stubs": "types-python-jose", + "farmhash-stubs": "types-pyfarmhash", + "fuse": "fuse-python", + "fuseparts": "fuse-python", + "click_default_group-stubs": "types-click-default-group", + "parsimonious-stubs": "types-parsimonious", + "markdown-stubs": "types-Markdown", + "usersettings-stubs": "types-usersettings", + "chevron-stubs": "types-chevron", + "cloudinit": "cloud-init", + "lazr": "lazr.restfulclient", + "pyautogui-stubs": "types-PyAutoGUI", + "aiofiles-stubs": "types-aiofiles", + "posix1e": "pylibacl", + "stack_data": "stack-data", + "hdbcli-stubs": "types-hdbcli", + "first-stubs": "types-first", + "pyscreeze-stubs": "types-PyScreeze", + "_bcrypt": "bcrypt", + "datemath-stubs": "types-python-datemath", + "freetype": "freetype-py", + "commonmark-stubs": "types-commonmark", + "_cffi_backend-stubs": "types-cffi", + "cffi-stubs": "types-cffi", + "jack-stubs": "types-JACK-Client", + "uaclient": "ubuntu-pro-client", + "s2clientprotocol-stubs": "types-s2clientprotocol", + "pluggy-stubs": "types-pluggy", + "apt": "python-apt", + "apt_inst": "python-apt", + "apt_pkg": "python-apt", + "aptsources": "python-apt", + "OpenSSL-stubs": "types-pyOpenSSL", + "tzlocal-stubs": "types-tzlocal", + "flask_migrate-stubs": "types-Flask-Migrate", + "pytz-stubs": "types-pytz", + "babel": "Babel", + "PyQt5": "PyQt5-sip", + "bugbear-stubs": "types-flake8-bugbear", + "pyrfc3339": "pyRFC3339", + "influxdb_client-stubs": "types-influxdb-client", + "untangle-stubs": "types-untangle", + "stdlib_list-stubs": "types-stdlib-list", + "passlib-stubs": "types-passlib", + "jmespath-stubs": "types-jmespath", + "docopt-stubs": "types-docopt", + "dockerfile_parse-stubs": "types-dockerfile-parse", + "PyInstaller-stubs": "types-pyinstaller", + "pyi_splash-stubs": "types-pyinstaller", + "configargparse": "ConfigArgParse", + "ssh_import_id": "ssh-import-id", + "pycocotools-stubs": "types-pycocotools", + "paramiko-stubs": "types-paramiko", + "flake8_docstrings-stubs": "types-flake8-docstrings", + "psycopg2-stubs": "types-psycopg2", + "workalendar-stubs": "types-workalendar", + "websockets/extensions": "websockets", + "websockets/legacy": "websockets", + "inifile-stubs": "types-inifile", + "gflags-stubs": "types-python-gflags", + "editdistance-stubs": "types-editdistance", + "colorama-stubs": "types-colorama", + "sass-stubs": "types-libsass", + "sassutils-stubs": "types-libsass", + "tqdm-stubs": "types-tqdm", + "_distutils_hack": "setuptools", + "braintree-stubs": "types-braintree", + "pyaudio-stubs": "types-pyaudio", + "google": "protobuf", + "keyboard-stubs": "types-keyboard", + "jinja2": "Jinja2", + "send2trash-stubs": "types-Send2Trash", + "pyrfc3339-stubs": "types-pyRFC3339", + "bleach-stubs": "types-bleach", + "_brotli": "Brotli", + "brotli": "Brotli", + "slumber-stubs": "types-slumber", + "requests-stubs": "types-requests", + "markupsafe": "MarkupSafe", + "usbcreator": "usb-creator", + "boto-stubs": "types-boto", + "greenlet-stubs": "types-greenlet", + "validate": "configobj", + "requests_oauthlib-stubs": "types-requests-oauthlib", + "mpl_toolkits": "matplotlib", + "pylab": "matplotlib", + } + + mappings = KNOWN_MAPPINGS.copy() + # using locally installed packages, we can try to add their package names/invocation + try: + for dist in importlib.metadata.distributions(): + try: + top_level_names = dist.read_text("top_level.txt").split() + for name in top_level_names: + if name not in mappings and name != dist.metadata["Name"]: + mappings[name] = dist.metadata["Name"] + # print(f'Updated local mapping for {name}') + except Exception as e: + continue + # print(f"Error processing metadata for {dist}: {e}") + except Exception as e: + return + #print(f"Error getting package mappings: {e}") + + return mappings + + +def verify_package_exists(package_name: str) -> Optional[str]: + """ + Verify if a package exists on PyPI and return the correct name. + Returns None if package doesn't exist. + """ + # first try to see if the local pip can resolve a name + + + # First try the exact name + url = f"https://pypi.org/project/{package_name}/" + try: + response = requests.head(url) + if response.status_code == 200: + return package_name + except requests.RequestException as e: + logging.info(f"Error checking package {package_name}: {e}") + + # If exact name doesn't work, try normalized name + normalized_name = package_name.lower().replace("_", "-") + if normalized_name != package_name: + url = f"https://pypi.org/project/{normalized_name}/" + try: + response = requests.head(url) + if response.status_code == 200: + return normalized_name + except requests.RequestException as e: + print(f"Error checking normalized package name {normalized_name}: {e}") + + return None + + +def resolve_package_name(import_name: str, mapping: Dict[str, str]) -> Optional[str]: + """ + Resolve an import name to a PyPI package name. + Returns None if no package can be found. + """ + # Check mapping first + if import_name in mapping: + pkg_name = mapping[import_name] + if verify_package_exists(pkg_name): + return pkg_name + + # Try import name directly + if verify_package_exists(import_name): + return import_name + + # Try normalized version of import name + normalized_name = import_name.lower().replace("_", "-") + if verify_package_exists(normalized_name): + return normalized_name + + return + + +def get_package_releases(package_name: str) -> List[Tuple[str, datetime.datetime]]: + """Fetch and parse package release history from PyPI.""" + url = f"https://pypi.org/project/{package_name}/#history" + try: + response = requests.get(url) + response.raise_for_status() + except requests.RequestException as e: + logging.warning(f"Error fetching release history for {package_name}: {e}") + return [] + + soup = BeautifulSoup(response.text, "html.parser") + releases = [] + + for release_div in soup.find_all("div", class_="release"): + try: + # Skip if release is yanked + if "yanked" in release_div.get("class", []): + continue + + version = release_div.find("p", class_="release__version") + date = release_div.find("p", class_="release__version-date") + + if not (version and date): + continue + + version_text = version.text.strip() + + # Skip pre-releases + if any( + pre in version_text.lower() + for pre in ["a", "b", "rc", "dev", "alpha", "beta"] + ): + continue + + try: + date_text = date.text.strip() + release_date = datetime.datetime.strptime(date_text, "%b %d, %Y") + releases.append((version_text, release_date)) + except ValueError as e: + logger.warning(f"Error parsing date {date_text} for {package_name}: {e}") + continue + + except Exception as e: + logger.warning(f"Error processing release for {package_name}: {e}") + continue + + return releases + + +def find_compatible_version( + releases: List[Tuple[str, datetime.datetime]], file_date: datetime.datetime +) -> Optional[str]: + """Find the newest package version that's older than the file date.""" + try: + compatible_releases = [ + (version, date) for version, date in releases if date <= file_date + ] + + if not compatible_releases: + return None + + return max(compatible_releases, key=lambda x: x[1])[0] + except Exception as e: + logger.info(f"Error finding compatible version: {e}") + return None + + +def get_file_dates(path: str) -> Tuple[datetime.datetime, Optional[datetime.datetime]]: + """Get filesystem modification date and last git commit date for a path.""" + try: + # Get filesystem modification date + path_obj = Path(path) + mod_timestamp = path_obj.stat().st_mtime + mod_date = datetime.datetime.fromtimestamp(mod_timestamp) + + # Try to get git commit date + try: + repo = git.Repo(path, search_parent_directories=True) + commits = list(repo.iter_commits(paths=path)) + if commits: + git_date = datetime.datetime.fromtimestamp(commits[0].committed_date) + else: + git_date = None + except: + git_date = None + #print("Could not get latest git commit reference date.") + + #print(f"Got date {mod_date} from {path}") + return mod_date, git_date + except Exception as e: + #print(f"Error getting file dates for {path}: {e}") + return datetime.datetime.now(), None + + +help_description = """ +Guess python3 requirements.txt based on file or git commit dates, +by looking at the local installed packages and comparing with release dates +on https://PyPI.org. + +WARNING!: For the file date comparison, it uses the modify date. If you made a local +change to the file, you will need to use touch to restore the old date.' +""" + + +def main(): + parser = argparse.ArgumentParser( + formatter_class=argparse.RawTextHelpFormatter, description=help_description + ) + parser.add_argument("path", help="File or directory path to analyze") + parser.add_argument( + "--output", + default="requirements_guess.txt", + help="Output file path (default: requirements_guess.txt)", + ) + args = parser.parse_args() + + try: + # Get all imports + imports = get_all_imports(args.path) + if not imports: + print("No imports found.") + return + + # Filter out stdlib modules + stdlib_modules = get_stdlib_modules() + imports = {imp for imp in imports if imp not in stdlib_modules} + + if not imports: + print("No non-stdlib imports found.") + return + + # Get import to package mapping + import_mapping = get_import_to_pkg_mapping() + + # Get dates + file_date, git_date = get_file_dates(args.path) + reference_date = git_date if git_date else file_date + print(f"Using reference date: {reference_date}") + + # Process each import + requirements = [] + unresolved_imports = [] + + for import_name in sorted(imports): + print(f"\nProcessing import: {import_name}") + + # Try to resolve package name + package_name = resolve_package_name(import_name, import_mapping) + + if not package_name: + print( + f"Warning: Could not find package for import '{import_name}' on PyPI" + ) + unresolved_imports.append(import_name) + continue + + print(f"Found package: {package_name}") + releases = get_package_releases(package_name) + + if not releases: + print(f"Warning: Could not fetch release history for {package_name}") + continue + + version = find_compatible_version(releases, reference_date) + if version: + req_line = f"{package_name}=={version} # https://pypi.org/project/{package_name}/" + requirements.append(req_line) + print(f"Found likely compatible version: {package_name}=={version}") + else: + + print(f"Warning: No compatible version found for {package_name}") + + # Write requirements.txt + if requirements: + with open(args.output, "w") as f: + f.write("# To use: do `python -m venv .venv` then `source .venv/bin/activate` then `pip install -r requirements.txt`") + f.write("\n".join(sorted(requirements))) + f.write("\n") + if unresolved_imports: + unresolved_imports.sort() + f.write("# Unresolved Requirements:\n") + for unresolved in unresolved_imports: + f.write(f"#{unresolved}==???") + f.write("\n") + + print(f"\nWritten {len(requirements)} packages to {args.output}") + else: + print("\nNo requirements to write.") + + # Report unresolved imports + if unresolved_imports: + print( + "\nWARNING: The following imports could not be resolved to PyPI packages:" + ) + for imp in sorted(unresolved_imports): + print(f" - {imp}") + print( + "\nPlease manually verify these imports and add them to requirements.txt if needed." + ) + + except Exception as e: + print(f"Error in main execution: {e}") + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..1be8098 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +GitPython==3.1.43 # https://pypi.org/project/GitPython/ +beautifulsoup4==4.12.3 # https://pypi.org/project/beautifulsoup4/ +requests==2.32.3 # https://pypi.org/project/requests/ +vermin==1.6.0 # https://pypi.org/project/vermin/ diff --git a/requirements_guess.txt b/requirements_guess.txt new file mode 100644 index 0000000..cd50669 --- /dev/null +++ b/requirements_guess.txt @@ -0,0 +1,4 @@ +# To use: do `python -m venv .venv` then `source .venv/bin/activate` then `pip install -r requirements.txt`GitPython==3.1.43 # https://pypi.org/project/GitPython/ +beautifulsoup4==4.12.3 # https://pypi.org/project/beautifulsoup4/ +requests==2.32.3 # https://pypi.org/project/requests/ +vermin==1.6.0 # https://pypi.org/project/vermin/ diff --git a/templates/index.html b/templates/index.html new file mode 100644 index 0000000..eff32e9 --- /dev/null +++ b/templates/index.html @@ -0,0 +1,254 @@ + + + + + + Python requirements.txt Generator + + + + +
+
+

Python requirements.txt Generator

+
+

About this tool

+

+ This tool is used to help you find out what original python package versions were used if you are missing a requirements.txt or it is + not included in the program's documentation. In most cases, it will get you 99% of the way there, so depreciations and behavior changes + won't bite you as hard. Take these estimations with a grain of salt. +

+
+
    +
  • 1. Use an original unmodified file - due to JS limitations (skill issue), it cannot use the creation date and it will improperly resolve + unless there's a git repo in a .zip. +
  • +
  • 2. Relative imports may not be resolvable - you will need to check this manually
  • +
  • 2a. Some relative imports with common generic names (tools, utils) will incorrectly resolve to a pip package when they are actually + a relative import. +
  • +
+
+

+ It is up to you, the user, to double check your imports. This tool is just to get you close. +

+
+
+
+
+

Drag and drop your Python file or ZIP archive here

+

(or click to select)

+

Maximum file size: 10MB

+
+
+ +
+ + +
+

Privacy or whatever

+
    +
  • 1. Although processing happens server-side, we don't keep your uploaded source code. It's stored in a temporary + directory then cleaned up after. Might port this to be a WASM project to learn it, if it's possible to do so, so all processing happens + on your device and I don't risk getting rate limited by pypi.org. +
  • +
    +
  • 2. Only normal browser logs are kept in rotation to see if something is broken. You specificically are not tracked.
  • +
  • +
  • +
+
+
+
+ + + + \ No newline at end of file diff --git a/uwsgi.ini b/uwsgi.ini new file mode 100644 index 0000000..389ec33 --- /dev/null +++ b/uwsgi.ini @@ -0,0 +1,8 @@ +[uwsgi] +module = app:app +master = true +processes = 4 +socket = 0.0.0.0:5000 +chmod-socket = 660 +vacuum = true +die-on-term = true \ No newline at end of file