diff --git a/tested/internationalization/nl.yaml b/tested/internationalization/nl.yaml index 6d6fcebf..13ee515e 100644 --- a/tested/internationalization/nl.yaml +++ b/tested/internationalization/nl.yaml @@ -16,13 +16,11 @@ nl: runtime: "Runtime error" unexpected: "Onverwachte uitvoer" programmed: - student: - default: >- - Er ging iets fout op bij het evalueren van de oplossing. - Meld dit aan de lesgever! - result: "Het resultaat van de geprogrammeerde evaluatie is ongeldig:" - stdout: "Dit werd geproduceerd op stdout:" - stderr: "Dit werd geproduceerd op stderr:" + student: >- + Er ging iets fout op bij het evalueren van de oplossing. + Meld dit aan de lesgever! + stdout: "Het evalueren van de oplossing genereerde deze uitvoer op stderr:" + stderr: "Het evalueren van de oplossing genereerde deze uitvoer op stdout:" specific: student: default: >- diff --git a/tested/oracles/programmed.py b/tested/oracles/programmed.py index 41c00bbb..938a7c2d 100644 --- a/tested/oracles/programmed.py +++ b/tested/oracles/programmed.py @@ -68,19 +68,24 @@ def _catch_output() -> Generator[tuple[StringIO, StringIO], None, None]: sys.stderr = old_stderr -def _evaluate_programmed( - bundle: Bundle, - oracle: CustomCheckOracle, - context: OracleContext, -) -> BaseExecutionResult | BooleanEvalResult: - """ - Run the custom evaluation. Concerning structure and execution, the custom - oracle is very similar to the execution of the whole evaluation. It a - mini-evaluation if you will. +def _execute_custom_check_function( + bundle: Bundle, oracle: CustomCheckOracle, context: OracleContext +): """ - _logger.debug("Doing evaluation in Python mode.") + Execute a custom check function, returning the captured stdout and stderr if + the execution got to that point. + + This function will throw various errors, depending on where in the process it + might fail. For example, invalid syntax will result in SyntaxErrors, but all + exceptions raised by the custom oracles also need to be caught. - # Create a configs bundle for the language of the oracle. + :param bundle: The bundle of the original execution. + :param oracle: The oracle that is executing. + :param context: The context of said oracle. + + :return: A tuple with (result, stdout, stderr), but all can be None. + """ + # Create a config bundle for Python, the programming language of the oracle. eval_bundle = create_bundle(bundle.config, bundle.out, bundle.suite, "python") # Path to the oracle. @@ -102,33 +107,53 @@ def _evaluate_programmed( "__tested_context__": ConvertedOracleContext.from_context(eval_bundle, context), } exec("import sys\n" "sys.modules['evaluation_utils'] = __tested_test__", global_env) - # Make the oracle available. + + # Make the oracle available. This will fail on syntax errors. exec(evaluator_code, global_env) - # Since we pass a class value, we don't want to + # Create the function we will call. check_function_call = FunctionCall( type=FunctionType.FUNCTION, name=oracle.function.name, arguments=[Identifier("__tested_context__"), *oracle.arguments], ) + # The actual code for calling the function. literal_function_call = generate_statement(eval_bundle, check_function_call) + # Call the function while intercepting all output. + with _catch_output() as (stdout_, stderr_): + exec(f"__tested_test__result = {literal_function_call}", global_env) + result_ = cast(BooleanEvalResult | None, global_env["__tested_test__result"]) + stdout_ = stdout_.getvalue() + stderr_ = stderr_.getvalue() + + return result_, stdout_, stderr_ + + +def _evaluate_programmed( + bundle: Bundle, + oracle: CustomCheckOracle, + context: OracleContext, +) -> BaseExecutionResult | BooleanEvalResult: + """ + Run the custom evaluation. This will call a function to do the execution, but + mainly provides error handling. + """ + + result_ = None + stdout_ = None + stderr_ = None messages = [] - # noinspection PyBroadException try: - with _catch_output() as (stdout_, stderr_): - exec(f"__tested_test__result = {literal_function_call}", global_env) - result_ = cast(BooleanEvalResult | None, global_env["__tested_test__result"]) - stdout_ = stdout_.getvalue() - stderr_ = stderr_.getvalue() - except Exception as e: + result_, stdout_, stderr_ = _execute_custom_check_function( + bundle, oracle, context + ) + except SyntaxError as e: + # The oracle might be rubbish, so handle any exception. _logger.exception(e) - result_ = None - stdout_ = None - stderr_ = None messages.append( ExtendedMessage( - description="The custom check oracle failed with the following exception:", + description="The custom check oracle failed with the following syntax error:", format="text", permission=Permission.STAFF, ) @@ -137,43 +162,31 @@ def _evaluate_programmed( messages.append( ExtendedMessage(description=tb, format="code", permission=Permission.STAFF) ) - - if stdout_: - messages.append( - ExtendedMessage( - description=get_i18n_string("judge.programmed.produced.stdout"), - format="text", - ) - ) - messages.append(ExtendedMessage(description=stdout_, format="code")) - if stderr_: + except Exception as e: + _logger.exception(e) messages.append( ExtendedMessage( - description=get_i18n_string("judge.programmed.produced.stderr"), + description="The custom check oracle failed with the following exception:", format="text", - permission=Permission.STUDENT, + permission=Permission.STAFF, ) ) + tb = traceback.format_exc() messages.append( - ExtendedMessage( - description=stderr_, format="code", permission=Permission.STAFF - ) + ExtendedMessage(description=tb, format="code", permission=Permission.STAFF) ) + if stdout_: + messages.append(get_i18n_string("judge.programmed.produced.stdout")) + messages.append(ExtendedMessage(description=stdout_, format="code")) + if stderr_: + messages.append(get_i18n_string("judge.programmed.produced.stderr")) + messages.append(ExtendedMessage(description=stderr_, format="code")) + # If the result is None, the oracle is broken. if result_ is None: - messages.append( - ExtendedMessage( - description=get_i18n_string("judge.programmed.student"), format="text" - ) - ) - messages.append( - ExtendedMessage( - description=get_i18n_string("judge.programmed.failed"), - format="text", - permission=Permission.STAFF, - ) - ) + messages.append(get_i18n_string("judge.programmed.student")) + messages.append("The custom check oracle did not produce a valid return value.") return BooleanEvalResult( result=Status.INTERNAL_ERROR, readable_expected=None, diff --git a/tests/exercises/echo-function/evaluation/evaluator.py b/tests/exercises/echo-function/evaluation/evaluator.py index 02304728..453be3e4 100644 --- a/tests/exercises/echo-function/evaluation/evaluator.py +++ b/tests/exercises/echo-function/evaluation/evaluator.py @@ -18,3 +18,7 @@ def evaluate_value_dsl(context): dsl_expected="{5, 5}", dsl_actual="{4, 4}" ) + + +def evaluate_runtime_crash(context): + return len(context) / 0 diff --git a/tests/exercises/echo-function/evaluation/evaluator_syntax_error.py b/tests/exercises/echo-function/evaluation/evaluator_syntax_error.py new file mode 100644 index 00000000..1c9398e7 --- /dev/null +++ b/tests/exercises/echo-function/evaluation/evaluator_syntax_error.py @@ -0,0 +1,6 @@ +# noinspection PyUnresolvedReferences +from evaluation_utils import EvaluationResult, Message + + +evaluate(context): + return len(context) / 0 diff --git a/tests/exercises/echo-function/evaluation/programmed_crash.yaml b/tests/exercises/echo-function/evaluation/programmed_crash.yaml new file mode 100644 index 00000000..644d338a --- /dev/null +++ b/tests/exercises/echo-function/evaluation/programmed_crash.yaml @@ -0,0 +1,9 @@ +- tab: "My tab" + contexts: + - testcases: + - expression: 'echo("input-1")' + return: !oracle + oracle: "custom_check" + file: "evaluator.py" + name: "evaluate_runtime_crash" + value: "input-2" diff --git a/tests/exercises/echo-function/evaluation/programmed_missing.yaml b/tests/exercises/echo-function/evaluation/programmed_missing.yaml new file mode 100644 index 00000000..29e62b1a --- /dev/null +++ b/tests/exercises/echo-function/evaluation/programmed_missing.yaml @@ -0,0 +1,9 @@ +- tab: "My tab" + contexts: + - testcases: + - expression: 'echo("input-1")' + return: !oracle + oracle: "custom_check" + file: "evaluator_syntax_error.py" + name: "this_does_not_exist" + value: "input-2" diff --git a/tests/exercises/echo-function/evaluation/programmed_syntax_error.yaml b/tests/exercises/echo-function/evaluation/programmed_syntax_error.yaml new file mode 100644 index 00000000..2ff4d24f --- /dev/null +++ b/tests/exercises/echo-function/evaluation/programmed_syntax_error.yaml @@ -0,0 +1,9 @@ +- tab: "My tab" + contexts: + - testcases: + - expression: 'echo("input-1")' + return: !oracle + oracle: "custom_check" + file: "evaluator_syntax_error.py" + name: "evaluate" + value: "input-2" diff --git a/tests/language_markers.py b/tests/language_markers.py new file mode 100644 index 00000000..747b6584 --- /dev/null +++ b/tests/language_markers.py @@ -0,0 +1,15 @@ +import pytest + +COMPILE_LANGUAGES = [ + "python", + "java", + "c", + "kotlin", + pytest.param("haskell", marks=pytest.mark.haskell), + "csharp", +] +ALL_SPECIFIC_LANGUAGES = COMPILE_LANGUAGES + [ + "javascript", + pytest.param("runhaskell", marks=pytest.mark.haskell), +] +ALL_LANGUAGES = ALL_SPECIFIC_LANGUAGES + ["bash"] diff --git a/tests/test_functionality.py b/tests/test_functionality.py index 3cf3b3c2..9434ad98 100644 --- a/tests/test_functionality.py +++ b/tests/test_functionality.py @@ -12,6 +12,7 @@ from pathlib import Path import pytest +from language_markers import ALL_LANGUAGES, ALL_SPECIFIC_LANGUAGES from tested.configs import create_bundle from tested.datatypes import BasicBooleanTypes, BasicNumericTypes, BasicStringTypes @@ -29,20 +30,6 @@ from tested.testsuite import Context, MainInput, Suite, Tab, Testcase, TextData from tests.manual_utils import assert_valid_output, configuration, execute_config -COMPILE_LANGUAGES = [ - "python", - "java", - "c", - "kotlin", - pytest.param("haskell", marks=pytest.mark.haskell), - "csharp", -] -ALL_SPECIFIC_LANGUAGES = COMPILE_LANGUAGES + [ - "javascript", - pytest.param("runhaskell", marks=pytest.mark.haskell), -] -ALL_LANGUAGES = ALL_SPECIFIC_LANGUAGES + ["bash"] - quotes = { "python": "'", "java": '"', @@ -94,31 +81,6 @@ def test_io_exercise_wrong(language: str, tmp_path: Path, pytestconfig): assert updates.find_status_enum() == ["wrong"] -@pytest.mark.parametrize("language", ALL_LANGUAGES) -def test_simple_programmed_eval(language: str, tmp_path: Path, pytestconfig): - conf = configuration( - pytestconfig, - "echo", - language, - tmp_path, - "one-programmed-correct.tson", - "correct", - ) - result = execute_config(conf) - updates = assert_valid_output(result, pytestconfig) - assert updates.find_status_enum() == ["correct"] - - -@pytest.mark.parametrize("language", ALL_LANGUAGES) -def test_simple_programmed_eval_wrong(language: str, tmp_path: Path, pytestconfig): - conf = configuration( - pytestconfig, "echo", language, tmp_path, "one-programmed-wrong.tson", "correct" - ) - result = execute_config(conf) - updates = assert_valid_output(result, pytestconfig) - assert updates.find_status_enum() == ["wrong"] - - @pytest.mark.parametrize("language", ALL_LANGUAGES) def test_io_function_exercise(language: str, tmp_path: Path, pytestconfig): conf = configuration( @@ -278,22 +240,6 @@ def test_specific_evaluation(language: str, tmp_path: Path, pytestconfig): assert len(updates.find_all("append-message")) == 2 -@pytest.mark.parametrize("language", ALL_LANGUAGES) -def test_programmed_evaluation(language: str, tmp_path: Path, pytestconfig): - conf = configuration( - pytestconfig, - "echo-function", - language, - tmp_path, - "programmed.tson", - "correct", - ) - result = execute_config(conf) - updates = assert_valid_output(result, pytestconfig) - assert updates.find_status_enum() == ["correct"] - assert len(updates.find_all("append-message")) - - @pytest.mark.parametrize( "lang", [ diff --git a/tests/test_programmed_oracle.py b/tests/test_programmed_oracle.py new file mode 100644 index 00000000..ee7161e7 --- /dev/null +++ b/tests/test_programmed_oracle.py @@ -0,0 +1,95 @@ +""" +Tests for programmed oracles (also known as custom check functions). +""" + +from pathlib import Path + +import pytest +from language_markers import ALL_LANGUAGES + +from tests.manual_utils import assert_valid_output, configuration, execute_config + + +@pytest.mark.parametrize("language", ALL_LANGUAGES) +def test_custom_check_function_stdout(language: str, tmp_path: Path, pytestconfig): + conf = configuration( + pytestconfig, + "echo", + language, + tmp_path, + "one-programmed-correct.tson", + "correct", + ) + result = execute_config(conf) + updates = assert_valid_output(result, pytestconfig) + assert updates.find_status_enum() == ["correct"] + + +def test_custom_check_function_stdout_wrong_result(tmp_path: Path, pytestconfig): + conf = configuration( + pytestconfig, "echo", "python", tmp_path, "one-programmed-wrong.tson", "correct" + ) + result = execute_config(conf) + updates = assert_valid_output(result, pytestconfig) + assert updates.find_status_enum() == ["wrong"] + + +@pytest.mark.parametrize("language", ALL_LANGUAGES) +def test_custom_check_function_return(language: str, tmp_path: Path, pytestconfig): + conf = configuration( + pytestconfig, + "echo-function", + language, + tmp_path, + "programmed.tson", + "correct", + ) + result = execute_config(conf) + updates = assert_valid_output(result, pytestconfig) + assert updates.find_status_enum() == ["correct"] + assert len(updates.find_all("append-message")) + + +def test_custom_check_function_runtime_crash(tmp_path: Path, pytestconfig): + conf = configuration( + pytestconfig, + "echo-function", + "python", + tmp_path, + "programmed_crash.yaml", + "correct", + ) + result = execute_config(conf) + updates = assert_valid_output(result, pytestconfig) + assert updates.find_status_enum() == ["internal error"] + assert len(updates.find_all("append-message")) == 4 + + +def test_custom_check_function_syntax_error(tmp_path: Path, pytestconfig): + conf = configuration( + pytestconfig, + "echo-function", + "python", + tmp_path, + "programmed_syntax_error.yaml", + "correct", + ) + result = execute_config(conf) + updates = assert_valid_output(result, pytestconfig) + assert updates.find_status_enum() == ["internal error"] + assert len(updates.find_all("append-message")) == 4 + + +def test_missing_custom_check_function(tmp_path: Path, pytestconfig): + conf = configuration( + pytestconfig, + "echo-function", + "python", + tmp_path, + "programmed_missing.yaml", + "correct", + ) + result = execute_config(conf) + updates = assert_valid_output(result, pytestconfig) + assert updates.find_status_enum() == ["internal error"] + assert len(updates.find_all("append-message")) == 4