Fixed info plugin's exclusion logic (#6874)

Added more information to platform file
Added processed directory indicator
Changed slash removal in resolved patterns
Changed regex match function to search to support subprojects
Constrained dynamic patterns to the root
Reverted build assets gathering dc808ca90a
Reverted info.gitignore file 79129d5cde
This commit is contained in:
Kamil Krzyśków 2024-03-12 03:55:15 +01:00 committed by GitHub
parent f3252381da
commit 2d3982489d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 169 additions and 157 deletions

View File

@ -1,37 +0,0 @@
# Custom .gitignore-like file
#
# The difference is that those are regex patterns, which will be compared
# against directory and file names case-sensitively. The plugin uses the
# external https://pypi.org/project/regex/ module.
#
# Additional remarks for pattern creation:
# - The compared paths will be always in POSIX format.
# - Each directory path will have a / at the end to allow to distinguish them
# from files.
# - Patterns for dynamic or custom paths like Virtual Environments (venv) or
# build site directories are created during plugin runtime.
# Byte-compiled / optimized / DLL files
# Python cache directory
.*__pycache__/
# macOS
.*\.DS_Store
# .dotfiles in the root directory
^/\.[^/]+$
# Generated files and folders
^/.*\.zip
# Allow .github or .devcontainer directories
# Exclude .cache files and folders
# Exclude known IDE directories
.*\.cache/?
^/\.vscode/
^/\.vs/
^/\.idea/

View File

@ -0,0 +1,27 @@
def get_exclusion_patterns():
"""
Regex patterns, which will be compared against directory and file names
case-sensitively. https://docs.python.org/3/library/re.html#re.search is the
matching function and scans the whole string to find any pattern match. Used
with the https://pypi.org/project/regex/ module.
Additional remarks for pattern creation:
- The compared paths will be always in POSIX format.
- Each directory path will have a / at the end to allow to distinguish them
from files.
- Patterns for dynamic or custom paths like Virtual Environments (venv) or
build site directories are created during plugin runtime.
"""
return [
r"/__pycache__/", # Python cache directory
r"/\.DS_Store$", # macOS
r"/[^/]+\.zip$", # Generated files and folders
r"/[^/]*\.cache($|/)", # .cache files and folders
r"/\.vscode/", # Common autogenerated IDE directories
r"/\.vs/",
r"/\.idea/",
]

View File

@ -39,6 +39,7 @@ from mkdocs.utils import get_yaml_loader
from zipfile import ZipFile, ZIP_DEFLATED
from .config import InfoConfig
from .patterns import get_exclusion_patterns
# -----------------------------------------------------------------------------
# Classes
@ -56,6 +57,7 @@ class InfoPlugin(BasePlugin[InfoConfig]):
# Initialize empty members
self.exclusion_patterns = []
self.excluded_entries = []
# Determine whether we're serving the site
def on_startup(self, *, command, dirty):
@ -183,15 +185,18 @@ class InfoPlugin(BasePlugin[InfoConfig]):
example, _ = os.path.splitext(example)
example = "-".join([present, slugify(example, "-")])
# Load exclusion patterns
self.exclusion_patterns = _load_exclusion_patterns()
# Get local copy of the exclusion patterns
self.exclusion_patterns = get_exclusion_patterns()
self.excluded_entries = []
# Exclude the site_dir at project root
if config.site_dir.startswith(os.getcwd()):
self.exclusion_patterns.append(_resolve_pattern(config.site_dir))
# Exclude the site-packages directory
for path in site.getsitepackages():
# Exclude the Virtual Environment directory. site.getsitepackages() has
# inconsistent results across operating systems, and relies on the
# PREFIXES that will contain the absolute path to the activated venv.
for path in site.PREFIXES:
if path.startswith(os.getcwd()):
self.exclusion_patterns.append(_resolve_pattern(path))
@ -211,24 +216,17 @@ class InfoPlugin(BasePlugin[InfoConfig]):
files: list[str] = []
with ZipFile(archive, "a", ZIP_DEFLATED, False) as f:
for abs_root, dirnames, filenames in os.walk(os.getcwd()):
# Set and print progress indicator
indicator = f"Processing: {abs_root}"
print(indicator, end="\r", flush=True)
# Prune the folders in-place to prevent their processing
for name in list(dirnames):
# Resolve the absolute directory path
path = os.path.join(abs_root, name)
# Exclude the directory and all subdirectories
if self._is_excluded(_resolve_pattern(path)):
dirnames.remove(name)
continue
# Projects, which don't use the projects plugin for
# multi-language support could have separate build folders
# for each config file or language. Therefore, we exclude
# them with the assumption a site_dir contains the sitemap
# file. Example of such a setup: https://t.ly/DLQcy
sitemap_gz = os.path.join(path, "sitemap.xml.gz")
if os.path.exists(sitemap_gz):
log.debug(f"Excluded site_dir: {path}")
if self._is_excluded(path):
dirnames.remove(name)
# Write files to the in-memory archive
@ -237,13 +235,16 @@ class InfoPlugin(BasePlugin[InfoConfig]):
path = os.path.join(abs_root, name)
# Exclude the file
if self._is_excluded(_resolve_pattern(path)):
if self._is_excluded(path):
continue
# Resolve the relative path to create a matching structure
path = os.path.relpath(path, os.path.curdir)
f.write(path, os.path.join(example, path))
# Clear the line for the next indicator
print(" " * len(indicator), end="\r", flush=True)
# Add information on installed packages
f.writestr(
os.path.join(example, "requirements.lock.txt"),
@ -261,11 +262,14 @@ class InfoPlugin(BasePlugin[InfoConfig]):
"system": platform.platform(),
"architecture": platform.architecture(),
"python": platform.python_version(),
"cwd": os.getcwd(),
"command": " ".join([
sys.argv[0].rsplit(os.sep, 1)[-1],
*sys.argv[1:]
]),
"sys.path": sys.path
"env:$PYTHONPATH": os.getenv("PYTHONPATH", ""),
"sys.path": sys.path,
"excluded_entries": self.excluded_entries
},
default = str,
indent = 2
@ -363,10 +367,10 @@ class InfoPlugin(BasePlugin[InfoConfig]):
print(Style.NORMAL)
for path in outside_root:
print(f" {path}")
print(" \nTo assure that all project files are found please adjust")
print("\n To assure that all project files are found please adjust")
print(" your config or file structure and put everything within the")
print(" root directory of the project.\n")
print(" Please also make sure `mkdocs build` is run in the actual")
print(" root directory of the project.")
print("\n Please also make sure `mkdocs build` is run in the actual")
print(" root directory of the project.")
print(Style.RESET_ALL)
@ -374,13 +378,34 @@ class InfoPlugin(BasePlugin[InfoConfig]):
if self.config.archive_stop_on_violation:
sys.exit(1)
# Exclude files which we don't want in our zip file
def _is_excluded(self, posix_path: str) -> bool:
# Check if path is excluded and should be omitted from the zip. Use pattern
# matching for files and folders, and lookahead specific files in folders to
# skip them. Side effect: Save excluded paths to save them in the zip file.
def _is_excluded(self, abspath: str) -> bool:
# Resolve the path into POSIX format to match the patterns
pattern_path = _resolve_pattern(abspath, return_path = True)
for pattern in self.exclusion_patterns:
if regex.match(pattern, posix_path):
log.debug(f"Excluded pattern '{pattern}': {posix_path}")
if regex.search(pattern, pattern_path):
log.debug(f"Excluded pattern '{pattern}': {abspath}")
self.excluded_entries.append(f"{pattern} - {pattern_path}")
return True
# File exclusion should be limited to pattern matching
if os.path.isfile(abspath):
return False
# Projects, which don't use the projects plugin for multi-language
# support could have separate build folders for each config file or
# language. Therefore, we exclude them with the assumption a site_dir
# contains the sitemap file. Example of such a setup: https://t.ly/DLQcy
sitemap_gz = os.path.join(abspath, "sitemap.xml.gz")
if os.path.exists(sitemap_gz):
log.debug(f"Excluded site_dir: {abspath}")
self.excluded_entries.append(f"sitemap.xml.gz - {pattern_path}")
return True
return False
# -----------------------------------------------------------------------------
@ -435,31 +460,22 @@ def _load_yaml(abs_src_path: str):
return result
# Load info.gitignore, ignore any empty lines or # comments
def _load_exclusion_patterns(path: str = None):
if path is None:
path = os.path.dirname(os.path.abspath(__file__))
path = os.path.join(path, "info.gitignore")
with open(path, encoding = "utf-8") as file:
lines = map(str.strip, file.readlines())
return [line for line in lines if line and not line.startswith("#")]
# Get a normalized POSIX path for the pattern matching with removed current
# working directory prefix. Directory paths end with a '/' to allow more control
# in the pattern creation for files and directories.
def _resolve_pattern(abspath: str):
path = abspath.replace(os.getcwd(), "", 1).replace(os.sep, "/")
# in the pattern creation for files and directories. The patterns are matched
# using the search function, so they are prefixed with ^ for specificity.
def _resolve_pattern(abspath: str, return_path: bool = False):
path = abspath.replace(os.getcwd(), "", 1)
path = path.replace(os.sep, "/").rstrip("/")
if not path:
return "/"
# Check abspath, as the file needs to exist
if not os.path.isfile(abspath):
return path.rstrip("/") + "/"
path = path + "/"
return path
return path if return_path else f"^{path}"
# Get project configuration with resolved absolute paths for validation
def _get_project_config(project_config_file: str):

View File

@ -1,37 +0,0 @@
# Custom .gitignore-like file
#
# The difference is that those are regex patterns, which will be compared
# against directory and file names case-sensitively. The plugin uses the
# external https://pypi.org/project/regex/ module.
#
# Additional remarks for pattern creation:
# - The compared paths will be always in POSIX format.
# - Each directory path will have a / at the end to allow to distinguish them
# from files.
# - Patterns for dynamic or custom paths like Virtual Environments (venv) or
# build site directories are created during plugin runtime.
# Byte-compiled / optimized / DLL files
# Python cache directory
.*__pycache__/
# macOS
.*\.DS_Store
# .dotfiles in the root directory
^/\.[^/]+$
# Generated files and folders
^/.*\.zip
# Allow .github or .devcontainer directories
# Exclude .cache files and folders
# Exclude known IDE directories
.*\.cache/?
^/\.vscode/
^/\.vs/
^/\.idea/

View File

@ -0,0 +1,27 @@
def get_exclusion_patterns():
"""
Regex patterns, which will be compared against directory and file names
case-sensitively. https://docs.python.org/3/library/re.html#re.search is the
matching function and scans the whole string to find any pattern match. Used
with the https://pypi.org/project/regex/ module.
Additional remarks for pattern creation:
- The compared paths will be always in POSIX format.
- Each directory path will have a / at the end to allow to distinguish them
from files.
- Patterns for dynamic or custom paths like Virtual Environments (venv) or
build site directories are created during plugin runtime.
"""
return [
r"/__pycache__/", # Python cache directory
r"/\.DS_Store$", # macOS
r"/[^/]+\.zip$", # Generated files and folders
r"/[^/]*\.cache($|/)", # .cache files and folders
r"/\.vscode/", # Common autogenerated IDE directories
r"/\.vs/",
r"/\.idea/",
]

View File

@ -39,6 +39,7 @@ from mkdocs.utils import get_yaml_loader
from zipfile import ZipFile, ZIP_DEFLATED
from .config import InfoConfig
from .patterns import get_exclusion_patterns
# -----------------------------------------------------------------------------
# Classes
@ -56,6 +57,7 @@ class InfoPlugin(BasePlugin[InfoConfig]):
# Initialize empty members
self.exclusion_patterns = []
self.excluded_entries = []
# Determine whether we're serving the site
def on_startup(self, *, command, dirty):
@ -183,15 +185,18 @@ class InfoPlugin(BasePlugin[InfoConfig]):
example, _ = os.path.splitext(example)
example = "-".join([present, slugify(example, "-")])
# Load exclusion patterns
self.exclusion_patterns = _load_exclusion_patterns()
# Get local copy of the exclusion patterns
self.exclusion_patterns = get_exclusion_patterns()
self.excluded_entries = []
# Exclude the site_dir at project root
if config.site_dir.startswith(os.getcwd()):
self.exclusion_patterns.append(_resolve_pattern(config.site_dir))
# Exclude the site-packages directory
for path in site.getsitepackages():
# Exclude the Virtual Environment directory. site.getsitepackages() has
# inconsistent results across operating systems, and relies on the
# PREFIXES that will contain the absolute path to the activated venv.
for path in site.PREFIXES:
if path.startswith(os.getcwd()):
self.exclusion_patterns.append(_resolve_pattern(path))
@ -211,24 +216,17 @@ class InfoPlugin(BasePlugin[InfoConfig]):
files: list[str] = []
with ZipFile(archive, "a", ZIP_DEFLATED, False) as f:
for abs_root, dirnames, filenames in os.walk(os.getcwd()):
# Set and print progress indicator
indicator = f"Processing: {abs_root}"
print(indicator, end="\r", flush=True)
# Prune the folders in-place to prevent their processing
for name in list(dirnames):
# Resolve the absolute directory path
path = os.path.join(abs_root, name)
# Exclude the directory and all subdirectories
if self._is_excluded(_resolve_pattern(path)):
dirnames.remove(name)
continue
# Projects, which don't use the projects plugin for
# multi-language support could have separate build folders
# for each config file or language. Therefore, we exclude
# them with the assumption a site_dir contains the sitemap
# file. Example of such a setup: https://t.ly/DLQcy
sitemap_gz = os.path.join(path, "sitemap.xml.gz")
if os.path.exists(sitemap_gz):
log.debug(f"Excluded site_dir: {path}")
if self._is_excluded(path):
dirnames.remove(name)
# Write files to the in-memory archive
@ -237,13 +235,16 @@ class InfoPlugin(BasePlugin[InfoConfig]):
path = os.path.join(abs_root, name)
# Exclude the file
if self._is_excluded(_resolve_pattern(path)):
if self._is_excluded(path):
continue
# Resolve the relative path to create a matching structure
path = os.path.relpath(path, os.path.curdir)
f.write(path, os.path.join(example, path))
# Clear the line for the next indicator
print(" " * len(indicator), end="\r", flush=True)
# Add information on installed packages
f.writestr(
os.path.join(example, "requirements.lock.txt"),
@ -261,11 +262,14 @@ class InfoPlugin(BasePlugin[InfoConfig]):
"system": platform.platform(),
"architecture": platform.architecture(),
"python": platform.python_version(),
"cwd": os.getcwd(),
"command": " ".join([
sys.argv[0].rsplit(os.sep, 1)[-1],
*sys.argv[1:]
]),
"sys.path": sys.path
"env:$PYTHONPATH": os.getenv("PYTHONPATH", ""),
"sys.path": sys.path,
"excluded_entries": self.excluded_entries
},
default = str,
indent = 2
@ -363,10 +367,10 @@ class InfoPlugin(BasePlugin[InfoConfig]):
print(Style.NORMAL)
for path in outside_root:
print(f" {path}")
print(" \nTo assure that all project files are found please adjust")
print("\n To assure that all project files are found please adjust")
print(" your config or file structure and put everything within the")
print(" root directory of the project.\n")
print(" Please also make sure `mkdocs build` is run in the actual")
print(" root directory of the project.")
print("\n Please also make sure `mkdocs build` is run in the actual")
print(" root directory of the project.")
print(Style.RESET_ALL)
@ -374,13 +378,34 @@ class InfoPlugin(BasePlugin[InfoConfig]):
if self.config.archive_stop_on_violation:
sys.exit(1)
# Exclude files which we don't want in our zip file
def _is_excluded(self, posix_path: str) -> bool:
# Check if path is excluded and should be omitted from the zip. Use pattern
# matching for files and folders, and lookahead specific files in folders to
# skip them. Side effect: Save excluded paths to save them in the zip file.
def _is_excluded(self, abspath: str) -> bool:
# Resolve the path into POSIX format to match the patterns
pattern_path = _resolve_pattern(abspath, return_path = True)
for pattern in self.exclusion_patterns:
if regex.match(pattern, posix_path):
log.debug(f"Excluded pattern '{pattern}': {posix_path}")
if regex.search(pattern, pattern_path):
log.debug(f"Excluded pattern '{pattern}': {abspath}")
self.excluded_entries.append(f"{pattern} - {pattern_path}")
return True
# File exclusion should be limited to pattern matching
if os.path.isfile(abspath):
return False
# Projects, which don't use the projects plugin for multi-language
# support could have separate build folders for each config file or
# language. Therefore, we exclude them with the assumption a site_dir
# contains the sitemap file. Example of such a setup: https://t.ly/DLQcy
sitemap_gz = os.path.join(abspath, "sitemap.xml.gz")
if os.path.exists(sitemap_gz):
log.debug(f"Excluded site_dir: {abspath}")
self.excluded_entries.append(f"sitemap.xml.gz - {pattern_path}")
return True
return False
# -----------------------------------------------------------------------------
@ -435,31 +460,22 @@ def _load_yaml(abs_src_path: str):
return result
# Load info.gitignore, ignore any empty lines or # comments
def _load_exclusion_patterns(path: str = None):
if path is None:
path = os.path.dirname(os.path.abspath(__file__))
path = os.path.join(path, "info.gitignore")
with open(path, encoding = "utf-8") as file:
lines = map(str.strip, file.readlines())
return [line for line in lines if line and not line.startswith("#")]
# Get a normalized POSIX path for the pattern matching with removed current
# working directory prefix. Directory paths end with a '/' to allow more control
# in the pattern creation for files and directories.
def _resolve_pattern(abspath: str):
path = abspath.replace(os.getcwd(), "", 1).replace(os.sep, "/")
# in the pattern creation for files and directories. The patterns are matched
# using the search function, so they are prefixed with ^ for specificity.
def _resolve_pattern(abspath: str, return_path: bool = False):
path = abspath.replace(os.getcwd(), "", 1)
path = path.replace(os.sep, "/").rstrip("/")
if not path:
return "/"
# Check abspath, as the file needs to exist
if not os.path.isfile(abspath):
return path.rstrip("/") + "/"
path = path + "/"
return path
return path if return_path else f"^{path}"
# Get project configuration with resolved absolute paths for validation
def _get_project_config(project_config_file: str):

View File

@ -149,7 +149,7 @@ const assets$ = concat(
})),
/* Copy images and configurations */
...["**/*.{jpg,png,svg,yml,gitignore}"]
...["**/*.{jpg,png,svg,yml}"]
.map(pattern => copyAll(pattern, {
from: "src",
to: base