Skip to content

Commit

Permalink
feat: add strain configuration validation step with schema and repo c…
Browse files Browse the repository at this point in the history
…hecks
  • Loading branch information
magajh committed Nov 8, 2024
1 parent d857639 commit d74f046
Show file tree
Hide file tree
Showing 3 changed files with 260 additions and 3 deletions.
14 changes: 11 additions & 3 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,14 @@ jobs:
env:
TUTOR_ROOT: ${{ github.workspace }}/strains/${{ inputs.STRAIN_PATH }}
TUTOR_PLUGINS_ROOT: ${{ github.workspace }}/strains/${{ inputs.STRAIN_PATH }}/plugins
CONFIG_FILE: strains/${{ inputs.STRAIN_PATH }}/config.yml

steps:
- name: Checkout Picasso repository for utility scripts
uses: actions/checkout@v4
with:
repository: edunext/picasso
ref: main
ref: mjh/repo-and-syntx-analysis
path: picasso

- name: Checkout strains repository for build configurations
Expand All @@ -73,14 +74,21 @@ jobs:

- name: Install necessary dependencies
run: |
pip install pyyaml
pip install pyyaml schema
- name: Validate strain configuration
working-directory: ${{ github.workspace }}
env:
STRAIN_VALIDATION_SCRIPTS_PATH: picasso/.github/workflows/scripts/strain_validation
run: |
python $STRAIN_VALIDATION_SCRIPTS_PATH/validate_schema.py $CONFIG_FILE
python $STRAIN_VALIDATION_SCRIPTS_PATH/validate_repos.py $CONFIG_FILE
- name: Get Tutor Configurations from config.yml and set them as an environment variable
working-directory: ${{ github.workspace }}
env:
REQUIRED_KEYS: TUTOR_VERSION
OPTIONAL_KEYS: DOCKER_REGISTRY
CONFIG_FILE: strains/${{ inputs.STRAIN_PATH }}/config.yml
SCRIPT_PATH: picasso/.github/workflows/scripts/get_tutor_config.py
run: |
ENV_VARS=$(python $SCRIPT_PATH --config-file $CONFIG_FILE --required-keys $REQUIRED_KEYS --optional-keys $OPTIONAL_KEYS)
Expand Down
99 changes: 99 additions & 0 deletions .github/workflows/scripts/strain_validation/validate_repos.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
"""
Validate repository URLs and configurations in the strain config file.
"""
from __future__ import annotations

import argparse
import logging
import re
from collections.abc import Sequence
from typing import Callable, Dict, List, Any
from io import TextIOWrapper

import requests
import yaml

LOG = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)


def validate_repo_url(url: str) -> bool:
"""Validate a repository URL by making a GET request."""
try:
response = requests.get(url, timeout=5)
response.raise_for_status()
return True
except requests.exceptions.RequestException:
LOG.exception("Exception while checking repo URL: %s", url)
return False


def check_edx_platform_repo(data: Dict[str, Any]) -> bool:
"""Check the edx_platform_repository URL in the YAML data."""
edx_platform_repository = data.get('EDX_PLATFORM_REPOSITORY', "").rstrip('.git')
edx_platform_version = data.get('EDX_PLATFORM_VERSION', "")
url = f"{edx_platform_repository}/tree/{edx_platform_version}"
if not validate_repo_url(url):
LOG.error("Failed to validate edx_platform_repository URL: %s", url)
return False
return True


def check_openedx_extra_pip_req_repos(data: Dict[str, Any]) -> bool:
"""Check additional pip requirement repos in the YAML data."""
pattern = r"git\+(https?://\S+?)(?:#|$)"
for repo in data.get('OPENEDX_EXTRA_PIP_REQUIREMENTS', []):
match = re.search(pattern, repo)
if match:
url = match.group(1).replace('@', '/tree/').replace('.git', '')
if not validate_repo_url(url):
LOG.error("Failed to validate OPENEDX_EXTRA_PIP_REQUIREMENTS URL: %s", url)
return False
return True


def validate_data(data: Dict[str, Any], checks: List[Callable[[Dict[str, Any]], bool]]) -> bool:
"""Run all provided validation checks on the YAML data."""
return all(check(data) for check in checks)


def main(argv: Sequence[str] | None = None) -> int:
"""
Entry point for validating repository URLs in a strain configuration file.
This function parses command-line arguments to load a YAML file, performs
validation checks on specific repository URLs, and logs results. If any
validation fails, an error code is returned.
Args:
argv (Sequence[str] | None): Optional sequence of command-line arguments.
If None, arguments will be taken from sys.argv.
Returns:
int: 0 if all URLs are validated successfully, 1 if any validation fails.
"""
parser = argparse.ArgumentParser(description="Validate repository URLs in strain config file.")
parser.add_argument("file", type=argparse.FileType("r"), nargs="+")
args = parser.parse_args(argv)

strain_file: TextIOWrapper = args.file[0]
try:
data = yaml.safe_load(strain_file)
except yaml.YAMLError:
LOG.exception("Error loading YAML data.")
return 1

checks = [
check_edx_platform_repo,
check_openedx_extra_pip_req_repos
]

if not validate_data(data, checks):
return 1

LOG.info("All repository URLs validated successfully.")
return 0


if __name__ == "__main__":
raise SystemExit(main())
150 changes: 150 additions & 0 deletions .github/workflows/scripts/strain_validation/validate_schema.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
#!/usr/bin/env python
"""
Validate syntax in a YAML file based on specific rules.
"""

from __future__ import annotations

import argparse
import logging
import re
from collections.abc import Sequence
from io import TextIOWrapper
from typing import Any, Dict

import yaml
from schema import Schema, And, Optional, Use, SchemaError, Regex

LOG = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)

TUTOR_VERSION_REGEX = r"^v\d+\.\d+\.\d+$"
EMPTY_LIST_ERROR_MSG = "{} must be a non-empty list of non-empty strings"


def perform_extra_validations(data: Dict[str, Any]) -> Dict[str, Any]:
"""
Perform additional validations on the strain configuration data.
Checks for custom conditions not covered by the schema:
- Ensure either PICASSO_THEMES_NAME or PICASSO_DEFAULT_SITE_THEME is defined
if PICASSO_THEMES exists.
Args:
data (Dict[str, Any]): The YAML data dictionary.
Returns:
Dict[str, Any]: Validated data dictionary.
Raises:
SchemaError: If required conditions are not met.
"""
if "PICASSO_THEMES" in data and not ("PICASSO_THEMES_NAME" in data or "PICASSO_DEFAULT_SITE_THEME" in data):
raise SchemaError("PICASSO_THEMES_NAME or PICASSO_DEFAULT_SITE_THEME must be defined.")
return data

STRAIN_SCHEMA = Schema(
Use(perform_extra_validations),
{
"TUTOR_VERSION": And(
str,
Regex(TUTOR_VERSION_REGEX, error="TUTOR_VERSION must be in the format vX.Y.Z (e.g., v5.3.0)")
),
Optional(Regex(r"^PICASSO_.+_DPKG$")): {
"name": And(str, len),
"repo": And(str, len),
"version": And(str, len)
},
Optional("PICASSO_THEMES"): And(
list,
len,
[
{
"name": And(str, len),
"repo": And(str, len),
"version": And(str, len)
}
]
),
Optional("PICASSO_THEMES_NAME"): And(
list,
len,
lambda x: all(isinstance(item, str) and item for item in x),
error=EMPTY_LIST_ERROR_MSG.format("PICASSO_THEMES_NAME"),
),
Optional("PICASSO_THEME_DIRS"): And(
list,
len,
lambda x: all(isinstance(item, str) and item for item in x),
error=EMPTY_LIST_ERROR_MSG.format("PICASSO_THEME_DIRS"),
)
},
ignore_extra_keys=True
)

def validate_with_warnings(data: Dict[str, Any]) -> bool:
"""
Validate the data against the strain schema and log warnings for missing optional keys.
Args:
data (Dict[str, Any]): The loaded YAML data to validate.
Returns:
bool: True if validation is successful; otherwise, False.
Logs warnings for missing optional keys such as PICASSO_THEMES and PICASSO_THEMES_NAME.
"""
try:
STRAIN_SCHEMA.validate(data)
if not data.get("PICASSO_THEMES"):
LOG.warning("No PICASSO_THEMES key found; themes will not be enabled.")
if not data.get("PICASSO_THEMES") and not data.get("PICASSO_THEMES_NAME"):
LOG.warning("No PICASSO_THEMES_NAME key found; default themes will be used.")
LOG.info("Strain syntax and structure validation completed successfully.")
return True
except SchemaError as e:
LOG.error("Schema validation failed: %s", e)
return False

def validate_yaml_file(yaml_file: TextIOWrapper) -> bool:
"""
Load and validate YAML file structure against the defined schema.
Args:
yaml_file (TextIOWrapper): Opened YAML file for reading.
Returns:
bool: True if YAML content is valid; otherwise, False.
Logs syntax errors in the YAML structure.
"""
try:
config_yml = yaml.safe_load(yaml_file)
return validate_with_warnings(config_yml)
except yaml.YAMLError as yaml_error:
LOG.error("YAML syntax error: %s", yaml_error)
return False

def main(argv: Sequence[str] | None = None) -> int:
"""
Execute syntax checks on a configuration file for strains.
Args:
argv (Sequence[str] | None): Command-line arguments.
Returns:
int: 0 if configuration file is valid; 1 if invalid.
"""
parser = argparse.ArgumentParser(description="Validate YAML file syntax and strain schema.")
parser.add_argument("file", type=argparse.FileType("r"), nargs="+", help="YAML file to validate.")
args = parser.parse_args(argv)

strain_file: TextIOWrapper = args.file[0]
if not validate_yaml_file(strain_file):
return 1

return 0

if __name__ == "__main__":
raise SystemExit(main())

0 comments on commit d74f046

Please sign in to comment.