Skip to content

Commit

Permalink
[3.11] pythongh-107450: Check for overflow in the tokenizer and fix o…
Browse files Browse the repository at this point in the history
…verflow test (pythonGH-110832)

(cherry picked from commit a1ac559)

Co-authored-by: Lysandros Nikolaou <[email protected]>
Co-authored-by: Filipe Laíns <[email protected]>
Co-authored-by: Serhiy Storchaka <[email protected]>
  • Loading branch information
3 people committed Oct 16, 2023
1 parent 6502a13 commit 01760a7
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 29 deletions.
39 changes: 20 additions & 19 deletions Include/errcode.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
extern "C" {
#endif


/* Error codes passed around between file input, tokenizer, parser and
interpreter. This is necessary so we can turn them into Python
exceptions at a higher level. Note that some errors have a
Expand All @@ -13,24 +12,26 @@ extern "C" {
the parser only returns E_EOF when it hits EOF immediately, and it
never returns E_OK. */

#define E_OK 10 /* No error */
#define E_EOF 11 /* End Of File */
#define E_INTR 12 /* Interrupted */
#define E_TOKEN 13 /* Bad token */
#define E_SYNTAX 14 /* Syntax error */
#define E_NOMEM 15 /* Ran out of memory */
#define E_DONE 16 /* Parsing complete */
#define E_ERROR 17 /* Execution error */
#define E_TABSPACE 18 /* Inconsistent mixing of tabs and spaces */
#define E_OVERFLOW 19 /* Node had too many children */
#define E_TOODEEP 20 /* Too many indentation levels */
#define E_DEDENT 21 /* No matching outer block for dedent */
#define E_DECODE 22 /* Error in decoding into Unicode */
#define E_EOFS 23 /* EOF in triple-quoted string */
#define E_EOLS 24 /* EOL in single-quoted string */
#define E_LINECONT 25 /* Unexpected characters after a line continuation */
#define E_BADSINGLE 27 /* Ill-formed single statement input */
#define E_INTERACT_STOP 28 /* Interactive mode stopped tokenization */
#define E_OK 10 /* No error */
#define E_EOF 11 /* End Of File */
#define E_INTR 12 /* Interrupted */
#define E_TOKEN 13 /* Bad token */
#define E_SYNTAX 14 /* Syntax error */
#define E_NOMEM 15 /* Ran out of memory */
#define E_DONE 16 /* Parsing complete */
#define E_ERROR 17 /* Execution error */
#define E_TABSPACE 18 /* Inconsistent mixing of tabs and spaces */
#define E_OVERFLOW 19 /* Node had too many children */
#define E_TOODEEP 20 /* Too many indentation levels */
#define E_DEDENT 21 /* No matching outer block for dedent */
#define E_DECODE 22 /* Error in decoding into Unicode */
#define E_EOFS 23 /* EOF in triple-quoted string */
#define E_EOLS 24 /* EOL in single-quoted string */
#define E_LINECONT 25 /* Unexpected characters after a line continuation */
#define E_BADSINGLE 27 /* Ill-formed single statement input */
#define E_INTERACT_STOP 28 /* Interactive mode stopped tokenization */
#define E_COLUMNOVERFLOW 29 /* Column offset overflow */


#ifdef __cplusplus
}
Expand Down
16 changes: 12 additions & 4 deletions Lib/test/test_exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,12 @@
from test.support.warnings_helper import check_warnings
from test import support

try:
from _testcapi import INT_MAX
except ImportError:
INT_MAX = 2**31 - 1



class NaiveException(Exception):
def __init__(self, x):
Expand Down Expand Up @@ -318,11 +324,13 @@ def baz():
check('(yield i) = 2', 1, 2)
check('def f(*):\n pass', 1, 7)

@unittest.skipIf(INT_MAX >= sys.maxsize, "Downcasting to int is safe for col_offset")
@support.requires_resource('cpu')
@support.bigmemtest(support._2G, memuse=1.5)
def testMemoryErrorBigSource(self, _size):
with self.assertRaises(OverflowError):
exec(f"if True:\n {' ' * 2**31}print('hello world')")
@support.bigmemtest(INT_MAX, memuse=2, dry_run=False)
def testMemoryErrorBigSource(self, size):
src = b"if True:\n%*s" % (size, b"pass")
with self.assertRaisesRegex(OverflowError, "Parser column offset overflow"):
compile(src, '<fragment>', 'exec')

@cpython_only
def testSettingException(self):
Expand Down
10 changes: 4 additions & 6 deletions Parser/pegen_errors.c
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,10 @@ _Pypegen_tokenizer_error(Parser *p)
msg = "unexpected character after line continuation character";
break;
}
case E_COLUMNOVERFLOW:
PyErr_SetString(PyExc_OverflowError,
"Parser column offset overflow - source line is too big");
return -1;
default:
msg = "unknown parsing error";
}
Expand Down Expand Up @@ -224,12 +228,6 @@ _PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...)
col_offset = 0;
} else {
const char* start = p->tok->buf ? p->tok->line_start : p->tok->buf;
if (p->tok->cur - start > INT_MAX) {
PyErr_SetString(PyExc_OverflowError,
"Parser column offset overflow - source line is too big");
p->error_indicator = 1;
return NULL;
}
col_offset = Py_SAFE_DOWNCAST(p->tok->cur - start, intptr_t, int);
}
} else {
Expand Down
4 changes: 4 additions & 0 deletions Parser/tokenizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -1057,6 +1057,10 @@ tok_nextc(struct tok_state *tok)
int rc;
for (;;) {
if (tok->cur != tok->inp) {
if (tok->cur - tok->buf >= INT_MAX) {
tok->done = E_COLUMNOVERFLOW;
return EOF;
}
return Py_CHARMASK(*tok->cur++); /* Fast path */
}
if (tok->done != E_OK) {
Expand Down

0 comments on commit 01760a7

Please sign in to comment.