Skip to content

Commit

Permalink
Merge branch 'HewlettPackard:master' into Tensorflow
Browse files Browse the repository at this point in the history
  • Loading branch information
varkha-d-sharma authored Feb 21, 2024
2 parents 7ab2b03 + 72734c8 commit 867c5fb
Show file tree
Hide file tree
Showing 12 changed files with 386 additions and 35 deletions.
317 changes: 297 additions & 20 deletions cmflib/cmf.py

Large diffs are not rendered by default.

1 change: 0 additions & 1 deletion cmflib/cmf_merger.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,6 @@ def parse_json_to_mlmd(mlmd_json, path_to_store, cmd, exec_id):
)
elif artifact_type == "Metrics":
# print(props,'parse')
# cmf_class.log_execution_metrics_with_uuid(props, custom_props)
cmf_class.log_execution_metrics_from_client(event["artifact"]["name"], custom_props)
elif artifact_type == "Dataslice":
dataslice = cmf_class.create_dataslice(event["artifact"]["name"])
Expand Down
32 changes: 26 additions & 6 deletions cmflib/cmfquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,6 @@ class CmfQuery(object):
by users via CMF API. When methods in this class accept `name` parameters, it is expected that values of these
parameters are fully-qualified names of respective entities.
TODO: (sergey) need to provide concrete examples and detailed description on how to actually use methods of this
class correctly, e.g., how to determine these fully-qualified names.
Args:
filepath: Path to the MLMD database file.
Expand Down Expand Up @@ -648,7 +646,12 @@ def get_all_child_artifacts(self, artifact_name: str) -> pd.DataFrame:
return df

def get_one_hop_parent_artifacts(self, artifact_name: str) -> pd.DataFrame:
"""Return input artifacts for the execution that produced the given artifact."""
"""Return input artifacts for the execution that produced the given artifact.
Args:
artifact_name: Artifact name.
Returns:
Data frame containing immediate parent artifactog of given artifact.
"""
artifact: t.Optional = self._get_artifact(artifact_name)
if not artifact:
return pd.DataFrame()
Expand All @@ -660,7 +663,12 @@ def get_one_hop_parent_artifacts(self, artifact_name: str) -> pd.DataFrame:
)

def get_all_parent_artifacts(self, artifact_name: str) -> pd.DataFrame:
"""Return all upstream artifacts."""
"""Return all upstream artifacts.
Args:
artifact_name: Artifact name.
Returns:
Data frame containing all parent artifacts.
"""
df = pd.DataFrame()
d1 = self.get_one_hop_parent_artifacts(artifact_name)
# df = df.append(d1, sort=True, ignore_index=True)
Expand All @@ -673,7 +681,12 @@ def get_all_parent_artifacts(self, artifact_name: str) -> pd.DataFrame:
return df

def get_all_parent_executions(self, artifact_name: str) -> pd.DataFrame:
"""Return all executions that produced upstream artifacts for the given artifact."""
"""Return all executions that produced upstream artifacts for the given artifact.
Args:
artifact_name: Artifact name.
Returns:
Data frame containing all parent executions.
"""
parent_artifacts: pd.DataFrame = self.get_all_parent_artifacts(artifact_name)
if parent_artifacts.shape[0] == 0:
# If it's empty, there's no `id` column and the code below raises an exception.
Expand Down Expand Up @@ -728,7 +741,12 @@ def find_producer_execution(self, artifact_name: str) -> t.Optional[mlpb.Executi
get_producer_execution = find_producer_execution

def get_metrics(self, metrics_name: str) -> t.Optional[pd.DataFrame]:
"""Return metric data frame."""
"""Return metric data frame.
Args:
metrics_name: Metrics name.
Returns:
Data frame containing all metrics.
"""
for metric in self.store.get_artifacts_by_type("Step_Metrics"):
if metric.name == metrics_name:
name: t.Optional[str] = metric.custom_properties.get("Name", None)
Expand All @@ -749,6 +767,8 @@ def dumptojson(self, pipeline_name: str, exec_id: t.Optional[int] = None) -> t.O
Args:
pipeline_name: Name of an AI pipelines.
exec_id: Optional stage execution ID - filter stages by this execution ID.
Returns:
Pipeline in JSON format.
"""
if exec_id is not None:
exec_id = int(exec_id)
Expand Down
3 changes: 2 additions & 1 deletion cmflib/commands/artifact/pull.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,8 @@ def extract_repo_args(self, type: str, name: str, url: str, current_directory: s
# url_with_bucket = varkha-test/23/6d9502e0283d91f689d7038b8508a2
# Splitting the string using '/' as the delimiter
bucket_name, object_name = url_with_bucket.split('/', 1)
download_loc = current_directory + "/" + name
download_loc = current_directory + "/" + name if current_directory != "" else name
print(download_loc)
return bucket_name, object_name, download_loc
else:
# returning bucket_name, object_name and download_loc returning as empty
Expand Down
4 changes: 2 additions & 2 deletions cmflib/commands/init/amazonS3.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,12 +127,12 @@ def add_parser(subparsers, parent_parser):
default=argparse.SUPPRESS,
)

required_arguments.add_argument(
parser.add_argument(
"--session-token",
required=True,
help="Specify Session Token.",
metavar="<session_token>",
default=argparse.SUPPRESS,
default="",
)

required_arguments.add_argument(
Expand Down
2 changes: 1 addition & 1 deletion cmflib/dvc_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -435,7 +435,7 @@ def dvc_push() -> str:
process = subprocess.Popen(['dvc', 'push'],
stdout=subprocess.PIPE,
universal_newlines=True)
output, errs = process.communicate(timeout=60)
output, errs = process.communicate()
commit = output.strip()

except Exception as err:
Expand Down
4 changes: 3 additions & 1 deletion cmflib/storage_backends/amazonS3_artifacts.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,9 @@ def download_artifacts(
aws_session_token=session_token
)
s3.head_bucket(Bucket=bucket_name)
dir_path, _ = download_loc.rsplit("/", 1)
dir_path = ""
if "/" in download_loc:
dir_path, _ = download_loc.rsplit("/", 1)
if dir_path != "":
os.makedirs(dir_path, mode=0o777, exist_ok=True) # creating subfolders if needed
response = s3.download_file(bucket_name, object_name, download_loc)
Expand Down
24 changes: 23 additions & 1 deletion docs/api/public/cmf.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,18 @@
# cmflib.cmf.Cmf
# cmflib.cmf

::: cmflib.cmf
options:
show_root_toc_entry: false
merge_init_into_class: true
docstring_style: google
members:
- metadata_push
- metadata_pull
- artifact_pull
- artifact_pull_single
- artifact_push
- cmf_init_show
- cmf_init

::: cmflib.cmf.Cmf
options:
Expand All @@ -8,9 +22,17 @@
members:
- __init__
- create_context
- merge_created_context
- create_execution
- update_execution
- merge_created__execution
- log_dataset
- log_dataset_with_version
- log_model
- log_model_with_version
- log_execution_metrics_from_client
- log_execution_metrics
- log_metric
- commit_existing_metrics
- create_dataslice
- update_dataslice
29 changes: 29 additions & 0 deletions docs/api/public/cmfquery.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# cmflib.cmfquery.CmfQuery

::: cmflib.cmfquery.CmfQuery
options:
show_root_toc_entry: false
merge_init_into_class: true
docstring_style: google
members:
- get_pipeline_names
- get_pipeline_id
- get_pipeline_stages
- get_all_exe_in_stage
- get_all_executions_by_ids_list
- get_all_artifacts_by_context
- get_all_artifacts_by_ids_list
- get_all_executions_in_stage
- get_artifact_df
- get_all_artifacts
- get_artifact
- get_all_artifacts_for_execution
- get_all_artifact_types
- get_all_executions_for_artifact
- get_one_hop_child_artifacts
- get_all_child_artifacts
- get_one_hop_parent_artifacts
- get_all_parent_artifacts
- get_all_parent_executions
- get_metrics
- dumptojson
1 change: 1 addition & 0 deletions mkdocs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ nav:
- Public API:
- CMF: api/public/cmf.md
- DataSlice: api/public/dataslice.md
- CmfQuery: api/public/cmfquery.md
- Ontology:
- Ontology: common-metadata-ontology/readme.md

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "cmflib"
version = "0.0.7"
version = "0.0.8"
dependencies = [
"ml-metadata==1.11.0",
"dvc[ssh,s3]==2.27.0",
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from setuptools import setup, find_packages

VERSION = '0.0.7'
VERSION = '0.0.8'
DESCRIPTION = 'Metadata Python Package'
LONG_DESCRIPTION = 'Metadata framework storing AI metadata into MLMD'

Expand Down

0 comments on commit 867c5fb

Please sign in to comment.