Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improvements to seed processing script #1100

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 68 additions & 22 deletions gfauto/gfauto/process_seeds.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,12 @@


def process_chunk( # pylint: disable=too-many-locals;
chunk_num: int, chunk: Set[str], log_files: List[Path], output_file: TextIO
chunk_num: int, chunk: Set[str], log_files: List[Path], only_show_num_signatures: bool, look_for_spirv_opt_errors: bool, look_for_llpc_errors: bool, output_file: TextIO
) -> None:

log(f"\nChunk {chunk_num}:")
output_file.write(f"\nChunk {chunk_num}:\n")
if not only_show_num_signatures:
output_file.write(f"\nChunk {chunk_num}:\n")

unique_signatures: Set[str] = set()

Expand All @@ -50,30 +51,52 @@ def process_chunk( # pylint: disable=too-many-locals;
end_line = 0
found_bug = False
for i, line in enumerate(lines):
match = re.fullmatch(r"STATUS (\w+)\n", line)
if not match:
continue
status = match.group(1)
if status == "SUCCESS":
start_line = i + 1
continue
found_bug = True
end_line = i + 1
break
if look_for_spirv_opt_errors:
match = re.match(r"Command failed: \['/usr/bin/catchsegv', .*spirv-opt'", line)
if match:
found_bug = True
start_line = i + 1
# TODO: Is there a reliable way to find the end of a spirv-opt failure?
Comment on lines +58 to +59
Copy link
Collaborator

@paulthomson paulthomson Nov 15, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe:

  • Don't break once a bug is found (for the cases you added).
  • if found_bug, do something different:
    • Just look for "Exec (verbose): ..." to find the next "reduce" command, set the end line based on this and then break.
    • if we find "gfauto_interestingness_test: finished ..." then the output above this is actually the output from part of a reduction, so something has gone wrong.
    • if we don't find anything else and get to the end of the file, this is also fine. It just means we did not do a reduction and the file immediately ends after the "Command failed:" output.

end_line = len(lines) - 1
break
elif look_for_llpc_errors:
match = re.match(r"Command failed: \['/usr/bin/catchsegv', '.*amdllpc", line)
if match:
found_bug = True
start_line = i + 1
# TODO: Is there a reliable way to find the end of an LLPC failure?
end_line = len(lines) - 1
break
else:
match = re.fullmatch(r"STATUS (\w+)\n", line)
if not match:
continue
status = match.group(1)
if status == "SUCCESS":
start_line = i + 1
continue
found_bug = True
end_line = i + 1
break

if not found_bug:
continue

failure_log = "\n".join(lines[start_line:end_line])
failure_log = "".join(lines[start_line:end_line])

signature = signature_util.get_signature_from_log_contents(failure_log)

unique_signatures.add(signature)

# Print the signatures.
for signature in sorted(unique_signatures):
log(signature)
output_file.write(f"{signature}\n")
if only_show_num_signatures:
# Print number of unique signatures.
log(str(len(unique_signatures)) + '\n')
output_file.write(f"{len(unique_signatures)}\n")
else:
# Print the unique signatures.
for signature in sorted(unique_signatures):
log(signature)
output_file.write(f"{signature}\n")


def main() -> None:
Expand All @@ -87,30 +110,53 @@ def main() -> None:
"--out", help="Output file.", default="signatures_chunked.txt",
)

parser.add_argument(
"--chunk_size", help="Chunk size.", type=int, default=1_000,

)

parser.add_argument(
"--only_show_num_signatures", help="Output only the number of distinct signatures per chunk, one integer per line. This is useful for gathering statistics.", default=False, action='store_true'
)

parser.add_argument(
"--spirv_opt", help="Look for spirv-opt errors.", default=False, action='store_true'
)

parser.add_argument(
"--llpc", help="Look for LLPC errors.", default=False, action='store_true'
)

parsed_args = parser.parse_args(sys.argv[1:])

seed_file: Path = Path(parsed_args.seed_file)
output_file: Path = Path(parsed_args.out)
chunk_size: int = parsed_args.chunk_size
only_show_num_signatures: bool = parsed_args.only_show_num_signatures
look_for_spirv_opt_errors: bool = parsed_args.spirv_opt
look_for_llpc_errors: bool = parsed_args.llpc

check(not (look_for_spirv_opt_errors and look_for_llpc_errors), AssertionError("At most one of --spirv_opt and --llpc can be used"))

# Get a list of all log files.
log_files: List[Path] = sorted(Path().glob("log_*.txt"))

# Get chunks of seeds and call process_chunk.
seeds: List[str] = util.file_read_text(seed_file).split()

check(len(seeds) == 10_000, AssertionError("Expected 10,000 seeds."))
check((len(seeds) % chunk_size) == 0, AssertionError("The number of seeds should be a multiple of chunk_size."))

with util.file_open_text(output_file, "w") as output:
index = 0
for chunk_num in range(0, 10):
for chunk_num in range(0, len(seeds) // chunk_size):
chunk: Set[str] = set()
for _ in range(0, 1_000):
for _ in range(0, chunk_size):
chunk.add(seeds[index])
index += 1
process_chunk(chunk_num, chunk, log_files, output)
process_chunk(chunk_num, chunk, log_files, only_show_num_signatures, look_for_spirv_opt_errors, look_for_llpc_errors, output)

check(
index == 10_000, AssertionError("Expected to have processed 10,000 seeds.")
index == len(seeds), AssertionError("Expected to have processed all seeds.")
)


Expand Down