Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Correct Word Splitting/Globbing #2142

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
4 changes: 2 additions & 2 deletions osh/split.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,10 +182,10 @@ def SplitForWordEval(self, s, ifs=None):
Also used by the explicit shSplit() function.
"""
sp = self._GetSplitter(ifs=ifs)
spans = sp.Split(s, True)
#spans = sp.Split(s, True)

# Note: pass allow_escape=False so \ isn't special
#spans = sp.Split(s, False)
spans = sp.Split(s, False)

if 0:
for span in spans:
Expand Down
138 changes: 91 additions & 47 deletions osh/word_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@
sh_lvalue,
sh_lvalue_t,
)
from core import bash_impl
from core import error
from core import pyos
from core import pyutil
Expand Down Expand Up @@ -504,6 +503,64 @@ def Eval(self, part):
return result



class FrameEvaluator(object):

def __init__(self, splitter):
# type: (SplitContext) -> None
self.frags = [] # type: List[str]
self.frag = [] # type: List[str]
self.splitter = splitter

def _Append(self, text):
# type: (str) -> None
self.frag.append(text)

def _NextFrag(self):
# type: () -> None
if len(self.frag):
self.frags.append("".join(self.frag))
self.frag = []

def _Split(self, piece, quoted, do_split, will_glob):
# type: (str, bool, bool, bool) -> None

# A word like /$exists/$notexists/ should eval to ['/someval//'] not ['/someval/', '/']
if len(piece) == 0 and do_split:
return

if do_split:
splits = self.splitter.SplitForWordEval(piece)
if len(splits) == 0:
self._NextFrag()
return

last = len(splits) - 1
for i, tosplit in enumerate(splits):
self._Split(tosplit, quoted, False, will_glob)

if i != last:
self._NextFrag()

elif quoted and will_glob:
self._Append(glob_.GlobEscape(piece))

else:
self._Append(piece)

def Eval(self, frame, will_glob):
# type: (List[Piece], bool) -> List[str]
del self.frags[:]
del self.frag[:]

for piece in frame:
self._Split(piece.s, piece.quoted, piece.do_split, will_glob)

self._NextFrag()

return self.frags


class AbstractWordEvaluator(StringWordEvaluator):
"""Abstract base class for word evaluators.

Expand Down Expand Up @@ -538,6 +595,8 @@ def __init__(

self.globber = glob_.Globber(exec_opts)

self.frame_ev = FrameEvaluator(self.splitter)

def CheckCircularDeps(self):
# type: () -> None
raise NotImplementedError()
Expand Down Expand Up @@ -790,15 +849,19 @@ def _Length(self, val, token):

elif case(value_e.BashArray):
val = cast(value.BashArray, UP_val)
length = bash_impl.BashArray_Length(val)

elif case(value_e.BashAssoc):
val = cast(value.BashAssoc, UP_val)
length = bash_impl.BashAssoc_Length(val)
# There can be empty placeholder values in the array.
length = 0
for s in val.strs:
if s is not None:
length += 1

elif case(value_e.SparseArray):
val = cast(value.SparseArray, UP_val)
length = bash_impl.SparseArray_Length(val)
length = len(val.d)

elif case(value_e.BashAssoc):
val = cast(value.BashAssoc, UP_val)
length = len(val.d)

else:
raise error.TypeErr(
Expand Down Expand Up @@ -1974,7 +2037,12 @@ def _EvalWordFrame(self, frame, argv):
all_quoted = True
any_quoted = False

#log('--- frame %s', frame)
if 0:
log('---')
log('FRAME')
for i, piece in enumerate(frame):
log('(%d) %s', i, piece)
log('')

for piece in frame:
if len(piece.s):
Expand All @@ -1998,31 +2066,7 @@ def _EvalWordFrame(self, frame, argv):
return

will_glob = not self.exec_opts.noglob()

if 0:
log('---')
log('FRAME')
for i, piece in enumerate(frame):
log('(%d) %s', i, piece)
log('')

# Array of strings, some of which are BOTH IFS-escaped and GLOB escaped!
frags = [] # type: List[str]
for piece in frame:
if will_glob and piece.quoted:
frag = glob_.GlobEscape(piece.s)
else:
# If we have a literal \, then we turn it into \\\\.
# Splitting takes \\\\ -> \\
# Globbing takes \\ to \ if it doesn't match
frag = _BackslashEscape(piece.s)

if piece.do_split:
frag = _BackslashEscape(frag)
else:
frag = self.splitter.Escape(frag)

frags.append(frag)
frags = self.frame_ev.Eval(frame, will_glob)

if 0:
log('---')
Expand All @@ -2031,27 +2075,27 @@ def _EvalWordFrame(self, frame, argv):
log('(%d) %s', i, frag)
log('')

flat = ''.join(frags)
#log('flat: %r', flat)

args = self.splitter.SplitForWordEval(flat)

# space=' '; argv $space"". We have a quoted part, but we CANNOT elide.
# Add it back and don't bother globbing.
if len(args) == 0 and any_quoted:
argv.append('')
if not len(frags):
return

#log('split args: %r', args)
for a in args:
if glob_.LooksLikeGlob(a):
n = self.globber.Expand(a, argv)
for frag in frags:
if glob_.LooksLikeGlob(frag):
n = self.globber.Expand(frag, argv)
if n < 0:
# TODO: location info, with span IDs carried through the frame
raise error.FailGlob('Pattern %r matched no files' % a,
loc.Missing)
elif will_glob:
argv.append(glob_.GlobUnescape(frag))
else:
argv.append(glob_.GlobUnescape(a))
argv.append(frag)

if 0:
log('---')
log('ARGV')
for i, arg in enumerate(argv):
log('(%d) %s', i, arg)
log('')

def _EvalWordToArgv(self, w):
# type: (CompoundWord) -> List[str]
Expand Down
Loading