Skip to content

Commit

Permalink
Adds Schema class and modifies schema handling
Browse files Browse the repository at this point in the history
  • Loading branch information
chalmerlowe committed Nov 27, 2024
1 parent 48c8cc6 commit 47474a9
Show file tree
Hide file tree
Showing 5 changed files with 121 additions and 31 deletions.
4 changes: 2 additions & 2 deletions google/cloud/bigquery/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -3680,7 +3680,7 @@ def insert_rows(
if selected_fields is not None:
schema = selected_fields

if len(schema) == 0:
if not schema:
raise ValueError(
(
"Could not determine schema for table '{}'. Call client.get_table() "
Expand Down Expand Up @@ -4029,7 +4029,7 @@ def list_rows(

# No schema, but no selected_fields. Assume the developer wants all
# columns, so get the table resource for them rather than failing.
elif len(schema) == 0:
elif not schema:
table = self.get_table(table.reference, retry=retry, timeout=timeout)
schema = table.schema

Expand Down
121 changes: 101 additions & 20 deletions google/cloud/bigquery/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -547,40 +547,66 @@ def _build_schema_resource(fields):
"""
return [field.to_api_repr() for field in fields]


def _to_schema_fields(schema):
"""Coerce `schema` to a list of schema field instances.
Args:
schema(Sequence[Union[ \
:class:`~google.cloud.bigquery.schema.SchemaField`, \
Mapping[str, Any] \
]]):
Table schema to convert. If some items are passed as mappings,
their content must be compatible with
:meth:`~google.cloud.bigquery.schema.SchemaField.from_api_repr`.
Returns:
Sequence[:class:`~google.cloud.bigquery.schema.SchemaField`]
"""TODO docstring
QUESTION: do we want a flag to force the generation of a Schema object?
Raises:
Exception: If ``schema`` is not a sequence, or if any item in the
sequence is not a :class:`~google.cloud.bigquery.schema.SchemaField`
instance or a compatible mapping representation of the field.
CAST a list of elements to either:
* a Schema object with SchemaFields and an attribute
* a list of SchemaFields but no attribute
"""

for field in schema:
if not isinstance(field, (SchemaField, collections.abc.Mapping)):
raise ValueError(
"Schema items must either be fields or compatible "
"mapping representations."
)


if isinstance(schema, Schema):
schema = Schema([
field if isinstance(field, SchemaField) else SchemaField.from_api_repr(field)
for field in schema
], foreign_type_info=schema.foreign_type_info)
return schema
return [
field if isinstance(field, SchemaField) else SchemaField.from_api_repr(field)
for field in schema
]

# OLD TO DELETE
# def _to_schema_fields(schema):
# """Coerce `schema` to a list of schema field instances.

# Args:
# schema(Sequence[Union[ \
# :class:`~google.cloud.bigquery.schema.SchemaField`, \
# Mapping[str, Any] \
# ]]):
# Table schema to convert. If some items are passed as mappings,
# their content must be compatible with
# :meth:`~google.cloud.bigquery.schema.SchemaField.from_api_repr`.

# Returns:
# Sequence[:class:`~google.cloud.bigquery.schema.SchemaField`]

# Raises:
# Exception: If ``schema`` is not a sequence, or if any item in the
# sequence is not a :class:`~google.cloud.bigquery.schema.SchemaField`
# instance or a compatible mapping representation of the field.
# """

# for field in schema:
# if not isinstance(field, (SchemaField, collections.abc.Mapping)):
# raise ValueError(
# "Schema items must either be fields or compatible "
# "mapping representations."
# )

# return [
# field if isinstance(field, SchemaField) else SchemaField.from_api_repr(field)
# for field in schema
# ]


class PolicyTagList(object):
"""Define Policy Tags for a column.
Expand Down Expand Up @@ -921,3 +947,58 @@ def from_api_repr(cls, resource: dict) -> SerDeInfo:
config = cls("")
config._properties = copy.deepcopy(resource)
return config


class Schema:
def __init__(self, fields=None, foreign_type_info=None):
self._properties = {}
self._fields = [] if fields is None else list(fields) #Internal List
self.foreign_type_info = foreign_type_info

@property
def foreign_type_info(self) -> Any:
"""TODO: docstring"""
return self._properties.get("foreignTypeInfo")

@foreign_type_info.setter
def foreign_type_info(self, value: str) -> None:
value = _isinstance_or_raise(value, str, none_allowed=True)
self._properties["foreignTypeInfo"] = value

@property
def _fields(self) -> Any:
"""TODO: docstring"""
return self._properties.get("_fields")

@_fields.setter
def _fields(self, value: list) -> None:
value = _isinstance_or_raise(value, list, none_allowed=True)
self._properties["_fields"] = value


def __len__(self):
return len(self._properties["_fields"])

def __getitem__(self, index):
return self._properties["_fields"][index]

def __setitem__(self, index, value):
self._properties["_fields"][index] = value

def __delitem__(self, index):
del self._properties["_fields"][index]

def __iter__(self):
return iter(self._properties["_fields"])

def __str__(self):
return str(self._properties["_fields"])

def __repr__(self):
return f"Schema({self.foreign_type_info!r}, {self._properties['_fields']!r})"

def append(self, item):
self._properties["_fields"].append(item)

def extend(self, iterable):
self._properties["_fields"].extend(iterable)
15 changes: 10 additions & 5 deletions google/cloud/bigquery/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@
from google.cloud.bigquery.schema import _build_schema_resource
from google.cloud.bigquery.schema import _parse_schema_resource
from google.cloud.bigquery.schema import _to_schema_fields
from google.cloud.bigquery.schema import Schema
from google.cloud.bigquery.external_config import ExternalCatalogTableOptions

if typing.TYPE_CHECKING: # pragma: NO COVER
Expand Down Expand Up @@ -451,10 +452,13 @@ def schema(self):
instance or a compatible mapping representation of the field.
"""
prop = self._properties.get(self._PROPERTY_TO_API_FIELD["schema"])
if not prop:
return []
else:
return _parse_schema_resource(prop)
if not prop: # if empty Schema, empty list, None
if prop is None:
return None
return prop
elif isinstance(prop, Schema):
return prop
return _parse_schema_resource(prop)

@schema.setter
def schema(self, value):
Expand Down Expand Up @@ -1336,7 +1340,8 @@ def _row_from_mapping(mapping, schema):
Raises:
ValueError: If schema is empty.
"""
if len(schema) == 0:

if not schema:
raise ValueError(_TABLE_HAS_NO_SCHEMA)

row = []
Expand Down
4 changes: 3 additions & 1 deletion tests/unit/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
from google.cloud.bigquery import ParquetOptions
import google.cloud.bigquery.retry
from google.cloud.bigquery.retry import DEFAULT_TIMEOUT
from google.cloud.bigquery.schema import Schema
import google.cloud.bigquery.table

from test_utils.imports import maybe_fail_import
Expand Down Expand Up @@ -2608,7 +2609,8 @@ def test_update_table_w_schema_None(self):
sent = {"schema": None}
self.assertEqual(req[1]["data"], sent)
self.assertEqual(req[1]["path"], "/%s" % path)
self.assertEqual(len(updated_table.schema), 0)
valid_options = [Schema(), [], None]
self.assertIn(updated_table.schema, valid_options)

def test_update_table_delete_property(self):
from google.cloud.bigquery.table import Table
Expand Down
8 changes: 5 additions & 3 deletions tests/unit/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
from google.cloud.bigquery import exceptions
from google.cloud.bigquery.table import TableReference
from google.cloud.bigquery.dataset import DatasetReference
from google.cloud.bigquery.schema import SerDeInfo, StorageDescriptor
from google.cloud.bigquery.schema import SerDeInfo, StorageDescriptor, Schema


def _mock_client():
Expand Down Expand Up @@ -499,7 +499,8 @@ def _verifyResourceProperties(self, table, resource):
if "schema" in resource:
self._verifySchema(table.schema, resource)
else:
self.assertEqual(table.schema, [])
valid_options = [Schema(), [], None]
self.assertIn(table.schema, valid_options)

if "externalDataConfiguration" in resource:
edc = table.external_data_configuration
Expand Down Expand Up @@ -536,7 +537,8 @@ def test_ctor(self):
"/projects/%s/datasets/%s/tables/%s"
% (self.PROJECT, self.DS_ID, self.TABLE_NAME),
)
self.assertEqual(table.schema, [])
valid_options = [Schema(), [], None]
self.assertIn(table.schema, valid_options)

self.assertIsNone(table.created)
self.assertIsNone(table.etag)
Expand Down

0 comments on commit 47474a9

Please sign in to comment.