Unified version parsing for filenames, URLs, and version strings.
This module provides centralized version extraction and normalization functionality
that can be used across the entire application for consistent version handling.
VersionParser
Unified version parsing for filenames, URLs, and version strings.
Clean filename by removing git hashes and other variable identifiers.
This prevents interference with version extraction.
Source code in src/appimage_updater/core/version_parser.py
| def clean_filename_for_version_extraction(self, filename: str) -> str:
"""Clean filename by removing git hashes and other variable identifiers.
This prevents interference with version extraction.
"""
# Remove file extension
cleaned = re.sub(r"\.AppImage$", "", filename, flags=re.IGNORECASE)
# Remove git commit hashes (6-8 hex characters, typically 7)
# This prevents extracting parts of git hashes as version numbers
cleaned = re.sub(r"-[a-fA-F0-9]{6,8}(?=-|$)", "", cleaned)
# Remove architecture identifiers that might contain numbers
cleaned = re.sub(r"-(x86_64|amd64|i386|i686|arm64|armv7|armhf)(?=-|$)", "", cleaned)
# Remove platform identifiers
cleaned = re.sub(r"-(linux|win32|win64|windows|macos|darwin)(?=-|$)", "", cleaned, flags=re.IGNORECASE)
# Clean up any double hyphens or trailing hyphens
cleaned = re.sub(r"-+", "-", cleaned)
cleaned = cleaned.strip("-")
return cleaned
|
Extract version from filename using common patterns.
Source code in src/appimage_updater/core/version_parser.py
| def extract_version_from_filename(self, filename: str) -> str | None:
"""Extract version from filename using common patterns."""
# First, eliminate git commit hashes and other variable identifiers to avoid false matches
cleaned_filename = self.clean_filename_for_version_extraction(filename)
# Test each pattern in order of specificity
patterns: list[Callable[[str], str | None]] = [
self._extract_prerelease_version,
self._extract_date_version,
self._extract_semantic_version,
self._extract_two_part_version,
self._extract_single_number_version,
]
for pattern_func in patterns:
result = pattern_func(cleaned_filename)
if result:
# Normalize the extracted version
return normalize_version_string(result)
# Return None if no version pattern found
return None
|
generate_flexible_pattern_from_filename(filename)
Generate a flexible regex pattern from a filename by eliminating variable identifiers.
Source code in src/appimage_updater/core/version_parser.py
| def generate_flexible_pattern_from_filename(self, filename: str) -> str:
"""Generate a flexible regex pattern from a filename by eliminating variable identifiers."""
# Start with the filename
pattern = filename
# Remove file extension to work with base name
base_name = re.sub(r"\.AppImage$", "", pattern, flags=re.IGNORECASE)
# Eliminate git commit hashes (6-8 hex characters, typically 7)
base_name = re.sub(r"-[a-fA-F0-9]{6,8}(?=-|$)", "", base_name)
# Eliminate architecture identifiers
base_name = re.sub(r"-(x86_64|amd64|i386|i686|arm64|armv7|armhf)(?=-|$)", "", base_name)
# Eliminate platform identifiers
base_name = re.sub(r"-(linux|win32|win64|windows|macos|darwin)(?=-|$)", "", base_name, flags=re.IGNORECASE)
# Eliminate version numbers (semantic versions)
base_name = re.sub(r"-\d+\.\d+(?:\.\d+)?(?:-[a-zA-Z0-9]+)?(?=-|$)", "", base_name)
# Clean up any double hyphens or trailing hyphens
base_name = re.sub(r"-+", "-", base_name)
base_name = base_name.strip("-")
# Create flexible pattern that matches the cleaned base name with any suffixes
escaped_base = re.escape(base_name)
return f"(?i)^{escaped_base}.*\\.AppImage$"
|
normalize_version_string(version)
Normalize version strings consistently.
Source code in src/appimage_updater/core/version_parser.py
| def normalize_version_string(self, version: str) -> str:
"""Normalize version strings consistently."""
return normalize_version_string(version)
|