Skip to content

Commit

Permalink
Allow a field's max-size to be specified in the entity (#452)
Browse files Browse the repository at this point in the history
* Plumb tags through everywhere

* foo

* add test

* fill in thrift -> entity tags handling

* foo

* fix tests

* add CHANGELOG entry

* clean up error message

* oops
  • Loading branch information
phliar authored Nov 3, 2020
1 parent 9a89d8e commit 7e37551
Show file tree
Hide file tree
Showing 7 changed files with 155 additions and 25 deletions.
5 changes: 3 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
# Changelog

## v3.4.30 (unreleased)
- Nothing changed yet.

- Plumb through FieldTag into the YARPC connector.
- Add field tag "maxlen" to define the max. size of a field.

## v3.4.29 (2020-06-05)
- Make fallback cache Scan API call connector Scan API instead of Range

Expand Down
28 changes: 26 additions & 2 deletions connectors/yarpc/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,30 @@ func RPCTypeToClientType(t dosarpc.ElemType) dosa.Type {
panic("bad type")
}

// RPCTagsFromClientTags converts tags to the RPC version.
func RPCTagsFromClientTags(tags map[string]string) []*dosarpc.FieldTag {
rpcTags := make([]*dosarpc.FieldTag, 0, len(tags))
for k, v := range tags {
name := k
value := v
ft := dosarpc.FieldTag{Name: &name, Value: &value}
rpcTags = append(rpcTags, &ft)
}
return rpcTags
}

// RPCTagsToClientTags converts thrift tags to a map.
func RPCTagsToClientTags(rpcTags []*dosarpc.FieldTag) map[string]string {
if len(rpcTags) == 0 {
return nil
}
tags := map[string]string{}
for _, t := range rpcTags {
tags[*t.Name] = *t.Value
}
return tags
}

// PrimaryKeyToThrift converts the dosa primary key to the thrift primary key type
func PrimaryKeyToThrift(key *dosa.PrimaryKey) *dosarpc.PrimaryKey {
ck := make([]*dosarpc.ClusteringKey, len(key.ClusteringKeys))
Expand Down Expand Up @@ -234,7 +258,7 @@ func entityDefToThrift(ed *dosa.EntityDefinition) *dosarpc.EntityDefinition {
fd := make(map[string]*dosarpc.FieldDesc, len(ed.Columns))
for _, column := range ed.Columns {
rpcType := RPCTypeFromClientType(column.Type)
fd[column.Name] = &dosarpc.FieldDesc{Type: &rpcType}
fd[column.Name] = &dosarpc.FieldDesc{Type: &rpcType, Tags: RPCTagsFromClientTags(column.Tags)}
cols = append(cols, column.Name)
}

Expand Down Expand Up @@ -287,7 +311,7 @@ func FromThriftToEntityDefinition(ed *dosarpc.EntityDefinition) *dosa.EntityDefi
fields[i] = &dosa.ColumnDefinition{
Name: colName,
Type: RPCTypeToClientType(*v.Type),
// TODO Tag
Tags: RPCTagsToClientTags(v.Tags),
}
}

Expand Down
3 changes: 3 additions & 0 deletions connectors/yarpc/helpers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ var testEntityDefinition = &dosa.EntityDefinition{
{
Name: stringKeyField,
Type: dosa.String,
Tags: map[string]string{"maxlen": "255"},
},
{
Name: int64KeyField,
Expand All @@ -160,6 +161,7 @@ var testEntityDefinition = &dosa.EntityDefinition{
{
Name: blobField,
Type: dosa.Blob,
Tags: map[string]string{"maxlen": "1000"},
},
{
Name: timestampField,
Expand All @@ -176,6 +178,7 @@ var testEntityDefinition = &dosa.EntityDefinition{
{
Name: stringField,
Type: dosa.String,
Tags: map[string]string{"maxlen": "100"},
},
{
Name: int64Field,
Expand Down
4 changes: 1 addition & 3 deletions entity.go
Original file line number Diff line number Diff line change
Expand Up @@ -171,9 +171,7 @@ type ColumnDefinition struct {
Name string // normalized column name
Type Type
IsPointer bool // used by client only to indicate whether this field is pointer
// TODO: change as need to support tags like pii, etc
// currently it's in the form of a map from tag name to (optional) tag value
Tags map[string]string
Tags map[string]string
}

// Clone returns a deep copy of ColumnDefinition
Expand Down
87 changes: 73 additions & 14 deletions entity_parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ var (
ttlPattern = regexp.MustCompile(`ttl\s*=\s*(\S*)`)

indexType = reflect.TypeOf((*Index)(nil)).Elem()

multSpacePat = regexp.MustCompile(`\s+`)
equalsPat = regexp.MustCompile(`\s*=\s*`)
)

// parseClusteringKeys func parses the clustering key of DOSA object
Expand Down Expand Up @@ -110,7 +113,7 @@ func parsePrimaryKey(tableName, pkStr string) (*PrimaryKey, error) {
if !parensBalanced(pkStr) {
return nil, fmt.Errorf("unmatched parentheses: %q", pkStr)
}
// filter out "trailing comma and space"
// filter out trailing commas and whitespace
pkStr = strings.TrimRight(pkStr, ", ")
pkStr = strings.TrimSpace(pkStr)

Expand Down Expand Up @@ -323,7 +326,7 @@ func parseIndexTag(indexName, dosaAnnotation string) (string, *PrimaryKey, []str
return name, key, indexColumns, nil
}

// parseNameTag functions parses DOSA "name" tag
// parseNameTag recognises "name = word," at the beginning of tag. The matched part and the tag are both returned.
func parseNameTag(tag, defaultName string) (string, string, error) {
fullNameTag := ""
name := defaultName
Expand All @@ -334,7 +337,7 @@ func parseNameTag(tag, defaultName string) (string, string, error) {
name = matches[1]
}

// filter out "trailing comma"
// filter out any trailing commas
name = strings.TrimRight(name, " ,")

var err error
Expand Down Expand Up @@ -384,7 +387,7 @@ func parseETLTag(tag string) (string, ETLState, error) {
return "", EtlOff, nil
}

// filter out "trailing comma" and convert to lowercase
// filter out trailing commas and convert to lowercase
etlTag = strings.ToLower(strings.TrimRight(etlTag, " ,"))

etlState, err := ToETLState(etlTag)
Expand All @@ -409,7 +412,7 @@ func parseTTLTag(tag string) (string, time.Duration, error) {
ttlTag = matches[1]
}

// filter out "trailing comma"
// filter out trailing commas
ttlTag = strings.TrimRight(ttlTag, " ,")
ttl, err := time.ParseDuration(ttlTag)
if err != nil {
Expand Down Expand Up @@ -472,7 +475,7 @@ func parseEntityTag(structName, dosaAnnotation string) (string, time.Duration, E
return name, ttl, etlState, key, nil
}

// parseFieldTag function parses DOSA tag on the fields in the DOSA struct except the "Entity" field
// parseFieldTag parses the tag on a field.
func parseFieldTag(structField reflect.StructField, dosaAnnotation string) (*ColumnDefinition, error) {
typ, isPointer, err := typify(structField.Type)
if err != nil {
Expand All @@ -481,20 +484,76 @@ func parseFieldTag(structField reflect.StructField, dosaAnnotation string) (*Col
return parseField(typ, isPointer, structField.Name, dosaAnnotation)
}

func parseField(typ Type, isPointer bool, name string, tag string) (*ColumnDefinition, error) {
// parse name tag
fullNameTag, name, err := parseNameTag(tag, name)
// The only accepted tags here are "name" and "maxlen". The value of "name" is normalized.
func parseField(typ Type, isPointer bool, name string, tagstr string) (*ColumnDefinition, error) {
origName := name
tags, err := getTags(tagstr)
if err != nil {
// parseNameTag returns a sane error.
return nil, err
}

tag = strings.Replace(tag, fullNameTag, "", 1)
if strings.TrimSpace(tag) != "" {
return nil, fmt.Errorf("field %s with an invalid dosa field tag: %s", name, tag)
// Handle each tag.
for tag, value := range tags {
switch tag {
case "name":
name = value
case "maxlen":
// No action needed.
default:
return nil, errors.Errorf("invalid dosa field tag '%s' on field '%s'", tag, origName)
}
}

// Normalize the name, and remove any "name" tag from the map.
name, err = NormalizeName(name)
if err != nil {
return nil, errors.Wrapf(err, "invalid dosa field tag '%s' on field '%s'", name, origName)
}
delete(tags, "name")

if len(tags) == 0 {
tags = nil
}
return &ColumnDefinition{Name: name, IsPointer: isPointer, Type: typ, Tags: tags}, nil
}

// Split a string that looks like " key1 = value1 key2=value2 , key3=value3 " into a key-value map. No
// special characters are allowed, alphanumeric only. Either commas or spaces may be used to separate
// tags.
func getTags(tagstr string) (map[string]string, error) {
// Convert commas to spaces and remove extra spaces.
tagstr = canonicalize(tagstr)
if tagstr == "" {
return nil, nil
}

tags := map[string]string{}
sections := strings.Split(tagstr, " ")

// Ensure each section is of the form name=value.
for _, s := range sections {
parts := strings.Split(s, "=")
if len(parts) != 2 {
return nil, errors.Errorf("invalid dosa field tag '%s'", s)
}

tags[parts[0]] = parts[1]
}

return tags, nil
}

func canonicalize(s string) string {
// Convert commas to space.
s = strings.Replace(s, ",", " ", -1)

// Remove spaces around the equals sign.
s = string(equalsPat.ReplaceAll([]byte(s), []byte("=")))

// Fold multiple spaces to singles.
s = string(multSpacePat.ReplaceAll([]byte(s), []byte(" ")))

return &ColumnDefinition{Name: name, IsPointer: isPointer, Type: typ}, nil
return strings.TrimSpace(s)
}

func parensBalanced(s string) bool {
Expand Down
2 changes: 1 addition & 1 deletion entity_parser_key_parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -612,7 +612,7 @@ func TestEntityParse(t *testing.T) {
PartitionKeys: []string{"ok"},
ClusteringKeys: nil,
},
Error: "unknown unit d in duration",
Error: `unknown unit "d" in duration`,
ETL: EtlOff,
TTL: NoTTL(),
},
Expand Down
51 changes: 48 additions & 3 deletions entity_parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,10 @@
package dosa

import (
"io"
"testing"

"time"

"io"

"github.com/stretchr/testify/assert"
)

Expand Down Expand Up @@ -607,6 +605,53 @@ func TestParensBalanced(t *testing.T) {
assert.False(t, parensBalanced("((()())"))
}

func TestGetTags(t *testing.T) {
testCases := []struct {
tag string
exp string
err string
}{
{
tag: "",
exp: "{}",
},
{
tag: "a=b",
exp: "{a: b}",
},
{
tag: "a=b,c=d",
exp: "{a: b, c: d}",
},
{
tag: " a=b , c=d",
exp: "{a: b, c: d}",
},
{
tag: "a = b,c = d ",
exp: "{a: b, c: d}",
},
{
tag: "c",
err: "invalid dosa field tag 'c'",
},
{
tag: "a=b=c",
err: "invalid dosa field tag 'a=b=c'",
},
}
for _, tc := range testCases {
tags, err := getTags(tc.tag)
if tc.err == "" {
assert.Nil(t, err)
assert.Equal(t, tc.exp, deterministicPrintMap(tags))
} else {
assert.NotNil(t, err)
assert.Contains(t, err.Error(), tc.err)
}
}
}

/*
These tests do not currently pass, but I think they should
*/
Expand Down

0 comments on commit 7e37551

Please sign in to comment.