-
Notifications
You must be signed in to change notification settings - Fork 1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: add validation step for strain configuration schema and repository URLs #35
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,99 @@ | ||||||
""" | ||||||
Validate repository URLs and configurations in the strain config file. | ||||||
""" | ||||||
Comment on lines
+1
to
+3
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you please add here how this script it's supposed to be used? As we've done here: https://github.com/eduNEXT/picasso/blob/main/.github/workflows/scripts/get_tutor_config.py#L1-L15 |
||||||
from __future__ import annotations | ||||||
|
||||||
import argparse | ||||||
import logging | ||||||
import re | ||||||
from collections.abc import Sequence | ||||||
from typing import Callable, Dict, List, Any | ||||||
from io import TextIOWrapper | ||||||
|
||||||
import requests | ||||||
import yaml | ||||||
|
||||||
LOG = logging.getLogger(__name__) | ||||||
logging.basicConfig(level=logging.INFO) | ||||||
|
||||||
|
||||||
def validate_repo_url(url: str) -> bool: | ||||||
"""Validate a repository URL by making a GET request.""" | ||||||
try: | ||||||
response = requests.get(url, timeout=5) | ||||||
response.raise_for_status() | ||||||
return True | ||||||
except requests.exceptions.RequestException: | ||||||
LOG.exception("Exception while checking repo URL: %s", url) | ||||||
return False | ||||||
|
||||||
|
||||||
def check_edx_platform_repo(data: Dict[str, Any]) -> bool: | ||||||
"""Check the edx_platform_repository URL in the YAML data.""" | ||||||
edx_platform_repository = data.get('EDX_PLATFORM_REPOSITORY', "").rstrip('.git') | ||||||
edx_platform_version = data.get('EDX_PLATFORM_VERSION', "") | ||||||
url = f"{edx_platform_repository}/tree/{edx_platform_version}" | ||||||
if not validate_repo_url(url): | ||||||
LOG.error("Failed to validate edx_platform_repository URL: %s", url) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
How do we know if the EDX_PLATFORM_REPOSITORY exists but not the EDX_PLATFORM_VERSION? |
||||||
return False | ||||||
return True | ||||||
|
||||||
|
||||||
def check_openedx_extra_pip_req_repos(data: Dict[str, Any]) -> bool: | ||||||
"""Check additional pip requirement repos in the YAML data.""" | ||||||
pattern = r"git\+(https?://\S+?)(?:#|$)" | ||||||
for repo in data.get('OPENEDX_EXTRA_PIP_REQUIREMENTS', []): | ||||||
match = re.search(pattern, repo) | ||||||
if match: | ||||||
url = match.group(1).replace('@', '/tree/').replace('.git', '') | ||||||
if not validate_repo_url(url): | ||||||
LOG.error("Failed to validate OPENEDX_EXTRA_PIP_REQUIREMENTS URL: %s", url) | ||||||
return False | ||||||
return True | ||||||
|
||||||
|
||||||
def validate_data(data: Dict[str, Any], checks: List[Callable[[Dict[str, Any]], bool]]) -> bool: | ||||||
"""Run all provided validation checks on the YAML data.""" | ||||||
return all(check(data) for check in checks) | ||||||
|
||||||
|
||||||
def main(argv: Sequence[str] | None = None) -> int: | ||||||
""" | ||||||
Entry point for validating repository URLs in a strain configuration file. | ||||||
|
||||||
This function parses command-line arguments to load a YAML file, performs | ||||||
validation checks on specific repository URLs, and logs results. If any | ||||||
validation fails, an error code is returned. | ||||||
|
||||||
Args: | ||||||
argv (Sequence[str] | None): Optional sequence of command-line arguments. | ||||||
If None, arguments will be taken from sys.argv. | ||||||
|
||||||
Returns: | ||||||
int: 0 if all URLs are validated successfully, 1 if any validation fails. | ||||||
""" | ||||||
parser = argparse.ArgumentParser(description="Validate repository URLs in strain config file.") | ||||||
parser.add_argument("file", type=argparse.FileType("r"), nargs="+") | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The same as I comment here |
||||||
args = parser.parse_args(argv) | ||||||
|
||||||
strain_file: TextIOWrapper = args.file[0] | ||||||
try: | ||||||
data = yaml.safe_load(strain_file) | ||||||
except yaml.YAMLError: | ||||||
LOG.exception("Error loading YAML data.") | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I like how you're using logs for getting more details about failures. I'll see how we can fit this into the other scripts. Thank you! |
||||||
return 1 | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
|
||||||
checks = [ | ||||||
check_edx_platform_repo, | ||||||
check_openedx_extra_pip_req_repos | ||||||
] | ||||||
|
||||||
if not validate_data(data, checks): | ||||||
return 1 | ||||||
|
||||||
LOG.info("All repository URLs validated successfully.") | ||||||
return 0 | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
|
||||||
|
||||||
if __name__ == "__main__": | ||||||
raise SystemExit(main()) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,149 @@ | ||
#!/usr/bin/env python | ||
""" | ||
Validate syntax in a YAML file based on specific rules. | ||
""" | ||
|
||
from __future__ import annotations | ||
|
||
import argparse | ||
import logging | ||
from collections.abc import Sequence | ||
from io import TextIOWrapper | ||
from typing import Any, Dict | ||
|
||
import yaml | ||
from schema import Schema, And, Optional, Use, SchemaError, Regex | ||
|
||
LOG = logging.getLogger(__name__) | ||
logging.basicConfig(level=logging.INFO) | ||
|
||
TUTOR_VERSION_REGEX = r"^v\d+\.\d+\.\d+$" | ||
EMPTY_LIST_ERROR_MSG = "{} must be a non-empty list of non-empty strings" | ||
|
||
|
||
def perform_extra_validations(data: Dict[str, Any]) -> Dict[str, Any]: | ||
""" | ||
Perform additional validations on the strain configuration data. | ||
|
||
Checks for custom conditions not covered by the schema: | ||
|
||
- Ensure either PICASSO_THEMES_NAME or PICASSO_DEFAULT_SITE_THEME is defined | ||
if PICASSO_THEMES exists. | ||
|
||
Args: | ||
data (Dict[str, Any]): The YAML data dictionary. | ||
|
||
Returns: | ||
Dict[str, Any]: Validated data dictionary. | ||
|
||
Raises: | ||
SchemaError: If required conditions are not met. | ||
Comment on lines
+33
to
+40
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we use this format for all functions? I think this one's missing: https://github.com/eduNEXT/picasso/pull/35/files#diff-1fe9dc0e10d81110fac939c4576da08ead8a873a353cf04a3cff270831958499R42 |
||
""" | ||
if "PICASSO_THEMES" in data and not ("PICASSO_THEMES_NAME" in data or "PICASSO_DEFAULT_SITE_THEME" in data): | ||
raise SchemaError("PICASSO_THEMES_NAME or PICASSO_DEFAULT_SITE_THEME must be defined.") | ||
return data | ||
Comment on lines
+42
to
+44
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can't this be checkec using the schema somehow? |
||
|
||
STRAIN_SCHEMA = Schema( | ||
Use(perform_extra_validations), | ||
{ | ||
"TUTOR_VERSION": And( | ||
str, | ||
Regex(TUTOR_VERSION_REGEX, error="TUTOR_VERSION must be in the format vX.Y.Z (e.g., v5.3.0)") | ||
Comment on lines
+49
to
+51
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually, TUTOR_VERSION should not include the v-prefix. See this issue for more context: #20 |
||
), | ||
Optional(Regex(r"^PICASSO_.+_DPKG$")): { | ||
"name": And(str, len), | ||
"repo": And(str, len), | ||
"version": And(str, len) | ||
}, | ||
Optional("PICASSO_THEMES"): And( | ||
list, | ||
len, | ||
[ | ||
{ | ||
"name": And(str, len), | ||
"repo": And(str, len), | ||
"version": And(str, len) | ||
} | ||
] | ||
), | ||
Optional("PICASSO_THEMES_NAME"): And( | ||
list, | ||
len, | ||
lambda x: all(isinstance(item, str) and item for item in x), | ||
error=EMPTY_LIST_ERROR_MSG.format("PICASSO_THEMES_NAME"), | ||
), | ||
Optional("PICASSO_THEME_DIRS"): And( | ||
list, | ||
len, | ||
lambda x: all(isinstance(item, str) and item for item in x), | ||
error=EMPTY_LIST_ERROR_MSG.format("PICASSO_THEME_DIRS"), | ||
) | ||
Comment on lines
+53
to
+80
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was under the impression this could be done in tutor-contrib-picasso instead when enabling themes or private packages, was that an approach considered during the implementation? Or is this something that can only be done during the building process and not when executing extra-commands? |
||
}, | ||
ignore_extra_keys=True | ||
) | ||
|
||
def validate_with_warnings(data: Dict[str, Any]) -> bool: | ||
""" | ||
Validate the data against the strain schema and log warnings for missing optional keys. | ||
|
||
Args: | ||
data (Dict[str, Any]): The loaded YAML data to validate. | ||
|
||
Returns: | ||
bool: True if validation is successful; otherwise, False. | ||
|
||
Logs warnings for missing optional keys such as PICASSO_THEMES and PICASSO_THEMES_NAME. | ||
""" | ||
try: | ||
STRAIN_SCHEMA.validate(data) | ||
if not data.get("PICASSO_THEMES"): | ||
LOG.warning("No PICASSO_THEMES key found; themes will not be enabled.") | ||
if not data.get("PICASSO_THEMES") and not data.get("PICASSO_THEMES_NAME"): | ||
LOG.warning("No PICASSO_THEMES_NAME key found; default themes will be used.") | ||
LOG.info("Strain syntax and structure validation completed successfully.") | ||
return True | ||
except SchemaError as e: | ||
LOG.error("Schema validation failed: %s", e) | ||
return False | ||
|
||
def validate_yaml_file(yaml_file: TextIOWrapper) -> bool: | ||
""" | ||
Load and validate YAML file structure against the defined schema. | ||
|
||
Args: | ||
yaml_file (TextIOWrapper): Opened YAML file for reading. | ||
|
||
Returns: | ||
bool: True if YAML content is valid; otherwise, False. | ||
|
||
Logs syntax errors in the YAML structure. | ||
""" | ||
try: | ||
config_yml = yaml.safe_load(yaml_file) | ||
return validate_with_warnings(config_yml) | ||
except yaml.YAMLError as yaml_error: | ||
LOG.error("YAML syntax error: %s", yaml_error) | ||
return False | ||
|
||
def main(argv: Sequence[str] | None = None) -> int: | ||
""" | ||
Execute syntax checks on a configuration file for strains. | ||
|
||
Args: | ||
argv (Sequence[str] | None): Command-line arguments. | ||
|
||
Returns: | ||
int: 0 if configuration file is valid; 1 if invalid. | ||
""" | ||
parser = argparse.ArgumentParser(description="Validate YAML file syntax and strain schema.") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could we validate the arguments in another function similar to how it is done here? |
||
parser.add_argument("file", type=argparse.FileType("r"), nargs="+", help="YAML file to validate.") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is the |
||
args = parser.parse_args(argv) | ||
|
||
strain_file: TextIOWrapper = args.file[0] | ||
if not validate_yaml_file(strain_file): | ||
return 1 | ||
|
||
return 0 | ||
|
||
if __name__ == "__main__": | ||
raise SystemExit(main()) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We'll need to merge #30 so we can use the requirements file instead. I'll put a pin on this so I don't forget.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@mariajgrimaldi agree, I left my approval in #30 so we can close it