Skip to content

Commit

Permalink
Fix handling of weird file names in git_status plugin (#1408)
Browse files Browse the repository at this point in the history
  • Loading branch information
Akuli authored Oct 13, 2023
1 parent 2f70548 commit a2ca2cd
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 1 deletion.
45 changes: 44 additions & 1 deletion porcupine/plugins/git_status.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import logging
import os
import re
import subprocess
import sys
import time
Expand All @@ -28,6 +29,48 @@
git_pool = ThreadPoolExecutor(max_workers=os.cpu_count())


# Assuming utf-8 file system encoding, git outputs "\303\266rkki\303\244inen.txt"
# with the quotes when it means örkkiäinen.txt.
#
# The \xxx means byte xxx specified as octal. First digit is 0-3 because the
# biggest possible byte value is 255, which is 0o377 octal.
#
# Because this would be too easy, Git also special-cases some characters. For
# example, tabs come out as \t rather than \011.
_SPECIAL_ESCAPES = {
# There are probably more, but hopefully this covers everything
# that comes up in real-world projects
b"\\t": b"\t", # \t = tab
b"\\r": b"\r", # \r = CR byte (part of CRLF newline: \r\n)
b"\\n": b"\n", # \n = newline
b'\\"': b'"', # \" = quote
b"\\\\": b"\\", # \\ = literal backslash (not path separator)
}
_ESCAPE_REGEX = rb"\\[0-3][0-7][0-7]|" + b"|".join(map(re.escape, _SPECIAL_ESCAPES.keys()))


def _handle_special_git_escape(match: re.Match[bytes]) -> bytes:
try:
return _SPECIAL_ESCAPES[match.group(0)]
except KeyError:
# b"\123" --> bytes([0o123])
return bytes([int(match.group(0)[1:], 8)])


def _parse_ascii_path_from_git(ascii_str: str) -> Path:
assert ascii_str.isascii()

if ascii_str.startswith('"') and ascii_str.endswith('"'):
path_bytes = ascii_str[1:-1].encode("ascii")
path_bytes = re.sub(_ESCAPE_REGEX, _handle_special_git_escape, path_bytes)

# Avoid encoding errors, so that a weird file name will not prevent
# other files from working properly
return Path(path_bytes.decode(sys.getfilesystemencoding(), errors="replace"))
else:
return Path(ascii_str)


def run_git_status(project_root: Path) -> dict[Path, str]:
try:
start = time.perf_counter()
Expand Down Expand Up @@ -57,7 +100,7 @@ def run_git_status(project_root: Path) -> dict[Path, str]:
# Show .git as ignored, even though it actually isn't
result = {project_root / ".git": "git_ignored"}
for line in run_result.stdout.splitlines():
path = project_root / line[3:]
path = project_root / _parse_ascii_path_from_git(line[3:])
if line[1] == "M":
result[path] = "git_modified"
elif line[1] == " ":
Expand Down
33 changes: 33 additions & 0 deletions tests/test_git_status_plugin.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import shutil
import subprocess
import sys
from functools import partial
from pathlib import Path

import pytest

from porcupine import get_tab_manager
from porcupine.plugins.git_status import run_git_status


@pytest.mark.skipif(shutil.which("git") is None, reason="git not found")
Expand All @@ -29,6 +31,37 @@ def test_added_and_modified_content(tree, tmp_path, monkeypatch):
assert set(tree.item(project_id, "tags")) == {"git_added"}


weird_filenames = ["foo bar.txt", "foo'bar.txt", "örkkimörkkiäinen.ö", "bigyó.txt", "2π.txt"]
if sys.platform != "win32":
# Test each "Windows-forbidden" character: https://stackoverflow.com/a/31976060
weird_filenames += [
"foo<bar.txt",
"foo>bar.txt",
"foo:bar.txt",
'foo"bar.txt',
r"foo\bar.txt",
r"foo\123.txt", # not a special escape code, only looks like it
"foo|bar.txt",
"foo?bar.txt",
"foo*bar.txt",
# Not mentioned in linked stackoverflow answer but still doesn't work on Windows
"foo\tbar.txt",
"foo\nbar.txt",
"foo\rbar.txt",
]


@pytest.mark.skipif(shutil.which("git") is None, reason="git not found")
@pytest.mark.parametrize("filename", weird_filenames)
def test_funny_paths(tmp_path, filename):
(tmp_path / filename).write_text("blah")
subprocess.check_call(["git", "init", "--quiet"], cwd=tmp_path, stdout=subprocess.DEVNULL)
subprocess.check_call(["git", "add", "."], cwd=tmp_path)

statuses = run_git_status(tmp_path)
assert statuses[tmp_path / filename] == "git_added"


@pytest.mark.skipif(shutil.which("git") is None, reason="git not found")
def test_merge_conflict(tree, tmp_path, monkeypatch):
monkeypatch.chdir(tmp_path)
Expand Down

0 comments on commit a2ca2cd

Please sign in to comment.