Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Start working on main ctapipe cli tool #2371

Closed
wants to merge 15 commits into from
68 changes: 68 additions & 0 deletions ctapipe/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
from traitlets.config import Application

from .version import __version__


class MainTool(Application):
"""
Main entry point for ctapipe, provides other tools as subcommands
"""

name = "ctapipe"
version = __version__

subcommands = {
"process": (
kosack marked this conversation as resolved.
Show resolved Hide resolved
"ctapipe.tools.process.ProcessorTool",
"ctapipe event-wise data processing",
),
"apply-models": (
"ctapipe.tools.apply_models.ApplyModels",
"Apply trained machine learning models",
),
"train": (
"ctapipe.tools.train.TrainTool",
"train various reconstruction models",
),
"merge": (
"ctapipe.tools.merge.MergeTool",
"Merge multiple ctapipe output files into one",
),
"fileinfo": (
"ctapipe.tools.fileinfo.FileInfoTool",
"Obtain metadata and other information from ctapipe output files",
),
"info": (
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

missing "fileinfo" tool

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just pushed a few commits to convert ctapipe-fileinfo into a Tool and include it in this list. So this should be fixed now.

"ctapipe.tools.info.InfoTool",
"Print information about ctapipe and the current installation",
),
"quickstart": (
"ctapipe.tools.quickstart.QuickStartTool",
"Create a directory with example configuration files",
),
"dump-instrument": (
"ctapipe.tools.dump_instrument.DumpInstrumentTool",
"read the subarray description from any file with an EventSource\nand write "
"it to a set of output files.",
),
}

def start(self):
if self.subapp is None:
if len(self.extra_args) > 0:
print(f"Unknown sub-command {self.extra_args[0]}\n\n")
self.print_subcommands()
self.exit(1)

self.subapp.run()

def write_provenance(self):
"""This tool should not write any provenance log"""


def main():
MainTool.launch_instance()


if __name__ == "__main__":
main()
4 changes: 3 additions & 1 deletion ctapipe/core/tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,9 @@ def run(self, argv=None, raises=False):

# check for any traitlets warnings using our custom handler
if len(self.trait_warning_handler.errors) > 0:
raise ToolConfigurationError("Found config errors")
raise ToolConfigurationError(
f"Found config errors: {self.trait_warning_handler.errors}"
)

# remove handler to not impact performance with regex matching
self.log.removeHandler(self.trait_warning_handler)
Expand Down
197 changes: 130 additions & 67 deletions ctapipe/tools/fileinfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,19 @@
Display information about ctapipe output files (DL1 or DL2)
"""

from inspect import cleandoc
from pathlib import Path

import tables
import yaml
from astropy.table import Table
from ctapipe.tools.utils import get_parser

from ..core import Tool, traits
from ..core.provenance import Provenance


def unflatten(dictionary, separator=" "):
""" turn flattened dict keys into nested """
"""turn flattened dict keys into nested"""
hierarch_dict = dict()
for key, value in dictionary.items():
parts = key.split(separator)
Expand All @@ -23,79 +27,138 @@ def unflatten(dictionary, separator=" "):
return hierarch_dict


def fileinfo(args):
"""
Display information about ctapipe output files (DL1 or DL2 in HDF5 format).
Optionally create an index table from all headers
"""

files = [] # accumulated info for table output

for filename in args.files:
info = {}

# prevent failure if a non-file is given (e.g. a directory)
if Path(filename).is_file() is False:
info[filename] = "not a file"

elif tables.is_hdf5_file(filename) is not True:
info[filename] = "unknown file type"
else:
try:
with tables.open_file(filename, mode="r") as infile:
# pylint: disable=W0212,E1101
attrs = {
name: str(infile.root._v_attrs[name])
for name in infile.root._v_attrs._f_list()
}
if args.flat:
info[filename] = attrs.copy()
else:
info[filename] = unflatten(attrs)
class FileInfoTool(Tool):
"""Extract metadata and other information from ctapipe output files"""

if args.output_table:
attrs["PATH"] = filename
files.append(attrs)
name = "ctapipe-fileinfo"
description = __doc__
examples = cleandoc(
"""To get YAML output of all metadata in HDF5 files in the
current directory

except tables.exceptions.HDF5ExtError as err:
info[filename] = f"ERROR {err}"
> ctapipe fileinfo *.h5

print(yaml.dump(info, indent=4))
Generate an index table of all metadata: Note that you can
use any table format allowed by astropy.table. However, formats
with metadata like fits or ecsv are recommended.

if args.output_table:
if args.output_table.endswith(".fits") or args.output_table.endswith(
".fits.gz"
):
files = [{k: v.encode("utf-8") for k, v in info.items()} for info in files]
> ctapipe fileinfo --output-table index.fits *.h5"""
)

table = Table(files)
table.write(args.output_table, format=args.table_format, overwrite=True)
input_files = traits.List(
traits.Path(exists=True, directory_ok=False),
default_value=[],
help=(
"Input ctapipe HDF5 files. These can also be "
"specified as positional command-line arguments."
),
).tag(config=True)

flat = traits.Bool(False, help="Flatten metadata hierarchy").tag(config=True)
output_table = traits.Path(
None,
exists=False,
directory_ok=False,
file_ok=True,
allow_none=True,
help=(
"Filename of output index table with all file information. "
"This can be in any format supported by astropy.table. The output format is "
"guessed from the filename, or you can specify it explicity using the "
"table_format option. E.g: 'index.ecsv', 'index.fits', 'index.html'. "
),
).tag(config=True)

table_format = traits.Unicode(
None,
allow_none=True,
help="Table format for output-table if not automatically guessed from the filename",
).tag(config=True)

aliases = {
("i", "input-files"): "FileInfoTool.input_files",
("T", "table-format"): "FileInfoTool.table_format",
("o", "output-table"): "FileInfoTool.output_table",
}

flags = {
"flat": ({"FileInfoTool": {"flat": True}}, "Flatten metadata hierarchy"),
}

def setup(self):
# Get input Files from positional arguments
positional_input_files = self.__class__.input_files.validate_elements(
self, self.extra_args
)
self.input_files.extend(positional_input_files)

def start(self):
"""
Display information about ctapipe output files (DL1 or DL2 in HDF5 format).
Optionally create an index table from all headers
"""

files = [] # accumulated info for table output

for filename in self.input_files:
info = {}
filename = str(filename)

# prevent failure if a non-file is given (e.g. a directory)
if Path(filename).is_file() is False:
info[filename] = "not a file"

elif tables.is_hdf5_file(filename) is not True:
info[filename] = "unknown file type"
else:
try:
with tables.open_file(filename, mode="r") as infile:
Provenance().add_input_file(
filename, role="ctapipe-fileinfo input file"
)
# pylint: disable=W0212,E1101
attrs = {
name: str(infile.root._v_attrs[name])
for name in infile.root._v_attrs._f_list()
}
if self.flat:
info[filename] = attrs.copy()
else:
info[filename] = unflatten(attrs)

if self.output_table:
attrs["PATH"] = filename
files.append(attrs)

except tables.exceptions.HDF5ExtError as err:
info[filename] = f"ERROR {err}"

print(yaml.dump(info, indent=4))

if self.output_table:
if ".fits" in self.output_table.suffixes:
# need to add proper string encoding for FITS, otherwise the
# conversion fails (libHDF5 gives back raw bytes, not python strings)
files = [
{k: v.encode("utf-8") for k, v in info.items()} for info in files
]

table = Table(files)
table.write(
self.output_table, format=self.table_format, overwrite=self.overwrite
)
Provenance().add_output_file(
self.output_table, role="ctapipe-fileinfo table"
)

def finish(self):
pass


def main():
""" display info """
parser = get_parser(fileinfo)
parser.add_argument(
"files",
metavar="FILENAME",
type=str,
nargs="+",
help="filenames of files in ctapipe format",
)
parser.add_argument(
"-o", "--output-table", help="generate output file in tabular format"
)
parser.add_argument(
"-T",
"--table-format",
help="table format of output-table if not automatically guessed from filename",
)
parser.add_argument(
"-f", "--flat", action="store_true", help="show flat header hierarchy"
)
args = parser.parse_args()

fileinfo(args)
"""display info"""
tool = FileInfoTool()
tool.run()


if __name__ == "__main__":
Expand Down
Loading