From 7e37551b3364d0eb4c7fd06976100da4b14486c7 Mon Sep 17 00:00:00 2001 From: Shamim Mohamed Date: Tue, 3 Nov 2020 12:00:48 -0800 Subject: [PATCH] Allow a field's max-size to be specified in the entity (#452) * Plumb tags through everywhere * foo * add test * fill in thrift -> entity tags handling * foo * fix tests * add CHANGELOG entry * clean up error message * oops --- CHANGELOG.md | 5 +- connectors/yarpc/helpers.go | 28 +++++++++- connectors/yarpc/helpers_test.go | 3 ++ entity.go | 4 +- entity_parser.go | 87 +++++++++++++++++++++++++++----- entity_parser_key_parser_test.go | 2 +- entity_parser_test.go | 51 +++++++++++++++++-- 7 files changed, 155 insertions(+), 25 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c9cbbb09..a04c9942 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,9 @@ # Changelog ## v3.4.30 (unreleased) - - Nothing changed yet. - + - Plumb through FieldTag into the YARPC connector. + - Add field tag "maxlen" to define the max. size of a field. + ## v3.4.29 (2020-06-05) - Make fallback cache Scan API call connector Scan API instead of Range diff --git a/connectors/yarpc/helpers.go b/connectors/yarpc/helpers.go index 724008c9..fd538eb8 100644 --- a/connectors/yarpc/helpers.go +++ b/connectors/yarpc/helpers.go @@ -185,6 +185,30 @@ func RPCTypeToClientType(t dosarpc.ElemType) dosa.Type { panic("bad type") } +// RPCTagsFromClientTags converts tags to the RPC version. +func RPCTagsFromClientTags(tags map[string]string) []*dosarpc.FieldTag { + rpcTags := make([]*dosarpc.FieldTag, 0, len(tags)) + for k, v := range tags { + name := k + value := v + ft := dosarpc.FieldTag{Name: &name, Value: &value} + rpcTags = append(rpcTags, &ft) + } + return rpcTags +} + +// RPCTagsToClientTags converts thrift tags to a map. +func RPCTagsToClientTags(rpcTags []*dosarpc.FieldTag) map[string]string { + if len(rpcTags) == 0 { + return nil + } + tags := map[string]string{} + for _, t := range rpcTags { + tags[*t.Name] = *t.Value + } + return tags +} + // PrimaryKeyToThrift converts the dosa primary key to the thrift primary key type func PrimaryKeyToThrift(key *dosa.PrimaryKey) *dosarpc.PrimaryKey { ck := make([]*dosarpc.ClusteringKey, len(key.ClusteringKeys)) @@ -234,7 +258,7 @@ func entityDefToThrift(ed *dosa.EntityDefinition) *dosarpc.EntityDefinition { fd := make(map[string]*dosarpc.FieldDesc, len(ed.Columns)) for _, column := range ed.Columns { rpcType := RPCTypeFromClientType(column.Type) - fd[column.Name] = &dosarpc.FieldDesc{Type: &rpcType} + fd[column.Name] = &dosarpc.FieldDesc{Type: &rpcType, Tags: RPCTagsFromClientTags(column.Tags)} cols = append(cols, column.Name) } @@ -287,7 +311,7 @@ func FromThriftToEntityDefinition(ed *dosarpc.EntityDefinition) *dosa.EntityDefi fields[i] = &dosa.ColumnDefinition{ Name: colName, Type: RPCTypeToClientType(*v.Type), - // TODO Tag + Tags: RPCTagsToClientTags(v.Tags), } } diff --git a/connectors/yarpc/helpers_test.go b/connectors/yarpc/helpers_test.go index f9963c2a..6a658850 100644 --- a/connectors/yarpc/helpers_test.go +++ b/connectors/yarpc/helpers_test.go @@ -143,6 +143,7 @@ var testEntityDefinition = &dosa.EntityDefinition{ { Name: stringKeyField, Type: dosa.String, + Tags: map[string]string{"maxlen": "255"}, }, { Name: int64KeyField, @@ -160,6 +161,7 @@ var testEntityDefinition = &dosa.EntityDefinition{ { Name: blobField, Type: dosa.Blob, + Tags: map[string]string{"maxlen": "1000"}, }, { Name: timestampField, @@ -176,6 +178,7 @@ var testEntityDefinition = &dosa.EntityDefinition{ { Name: stringField, Type: dosa.String, + Tags: map[string]string{"maxlen": "100"}, }, { Name: int64Field, diff --git a/entity.go b/entity.go index fccdc69c..7b8d3f72 100644 --- a/entity.go +++ b/entity.go @@ -171,9 +171,7 @@ type ColumnDefinition struct { Name string // normalized column name Type Type IsPointer bool // used by client only to indicate whether this field is pointer - // TODO: change as need to support tags like pii, etc - // currently it's in the form of a map from tag name to (optional) tag value - Tags map[string]string + Tags map[string]string } // Clone returns a deep copy of ColumnDefinition diff --git a/entity_parser.go b/entity_parser.go index b3edf554..a63cafd9 100644 --- a/entity_parser.go +++ b/entity_parser.go @@ -56,6 +56,9 @@ var ( ttlPattern = regexp.MustCompile(`ttl\s*=\s*(\S*)`) indexType = reflect.TypeOf((*Index)(nil)).Elem() + + multSpacePat = regexp.MustCompile(`\s+`) + equalsPat = regexp.MustCompile(`\s*=\s*`) ) // parseClusteringKeys func parses the clustering key of DOSA object @@ -110,7 +113,7 @@ func parsePrimaryKey(tableName, pkStr string) (*PrimaryKey, error) { if !parensBalanced(pkStr) { return nil, fmt.Errorf("unmatched parentheses: %q", pkStr) } - // filter out "trailing comma and space" + // filter out trailing commas and whitespace pkStr = strings.TrimRight(pkStr, ", ") pkStr = strings.TrimSpace(pkStr) @@ -323,7 +326,7 @@ func parseIndexTag(indexName, dosaAnnotation string) (string, *PrimaryKey, []str return name, key, indexColumns, nil } -// parseNameTag functions parses DOSA "name" tag +// parseNameTag recognises "name = word," at the beginning of tag. The matched part and the tag are both returned. func parseNameTag(tag, defaultName string) (string, string, error) { fullNameTag := "" name := defaultName @@ -334,7 +337,7 @@ func parseNameTag(tag, defaultName string) (string, string, error) { name = matches[1] } - // filter out "trailing comma" + // filter out any trailing commas name = strings.TrimRight(name, " ,") var err error @@ -384,7 +387,7 @@ func parseETLTag(tag string) (string, ETLState, error) { return "", EtlOff, nil } - // filter out "trailing comma" and convert to lowercase + // filter out trailing commas and convert to lowercase etlTag = strings.ToLower(strings.TrimRight(etlTag, " ,")) etlState, err := ToETLState(etlTag) @@ -409,7 +412,7 @@ func parseTTLTag(tag string) (string, time.Duration, error) { ttlTag = matches[1] } - // filter out "trailing comma" + // filter out trailing commas ttlTag = strings.TrimRight(ttlTag, " ,") ttl, err := time.ParseDuration(ttlTag) if err != nil { @@ -472,7 +475,7 @@ func parseEntityTag(structName, dosaAnnotation string) (string, time.Duration, E return name, ttl, etlState, key, nil } -// parseFieldTag function parses DOSA tag on the fields in the DOSA struct except the "Entity" field +// parseFieldTag parses the tag on a field. func parseFieldTag(structField reflect.StructField, dosaAnnotation string) (*ColumnDefinition, error) { typ, isPointer, err := typify(structField.Type) if err != nil { @@ -481,20 +484,76 @@ func parseFieldTag(structField reflect.StructField, dosaAnnotation string) (*Col return parseField(typ, isPointer, structField.Name, dosaAnnotation) } -func parseField(typ Type, isPointer bool, name string, tag string) (*ColumnDefinition, error) { - // parse name tag - fullNameTag, name, err := parseNameTag(tag, name) +// The only accepted tags here are "name" and "maxlen". The value of "name" is normalized. +func parseField(typ Type, isPointer bool, name string, tagstr string) (*ColumnDefinition, error) { + origName := name + tags, err := getTags(tagstr) if err != nil { - // parseNameTag returns a sane error. return nil, err } - tag = strings.Replace(tag, fullNameTag, "", 1) - if strings.TrimSpace(tag) != "" { - return nil, fmt.Errorf("field %s with an invalid dosa field tag: %s", name, tag) + // Handle each tag. + for tag, value := range tags { + switch tag { + case "name": + name = value + case "maxlen": + // No action needed. + default: + return nil, errors.Errorf("invalid dosa field tag '%s' on field '%s'", tag, origName) + } + } + + // Normalize the name, and remove any "name" tag from the map. + name, err = NormalizeName(name) + if err != nil { + return nil, errors.Wrapf(err, "invalid dosa field tag '%s' on field '%s'", name, origName) + } + delete(tags, "name") + + if len(tags) == 0 { + tags = nil } + return &ColumnDefinition{Name: name, IsPointer: isPointer, Type: typ, Tags: tags}, nil +} + +// Split a string that looks like " key1 = value1 key2=value2 , key3=value3 " into a key-value map. No +// special characters are allowed, alphanumeric only. Either commas or spaces may be used to separate +// tags. +func getTags(tagstr string) (map[string]string, error) { + // Convert commas to spaces and remove extra spaces. + tagstr = canonicalize(tagstr) + if tagstr == "" { + return nil, nil + } + + tags := map[string]string{} + sections := strings.Split(tagstr, " ") + + // Ensure each section is of the form name=value. + for _, s := range sections { + parts := strings.Split(s, "=") + if len(parts) != 2 { + return nil, errors.Errorf("invalid dosa field tag '%s'", s) + } + + tags[parts[0]] = parts[1] + } + + return tags, nil +} + +func canonicalize(s string) string { + // Convert commas to space. + s = strings.Replace(s, ",", " ", -1) + + // Remove spaces around the equals sign. + s = string(equalsPat.ReplaceAll([]byte(s), []byte("="))) + + // Fold multiple spaces to singles. + s = string(multSpacePat.ReplaceAll([]byte(s), []byte(" "))) - return &ColumnDefinition{Name: name, IsPointer: isPointer, Type: typ}, nil + return strings.TrimSpace(s) } func parensBalanced(s string) bool { diff --git a/entity_parser_key_parser_test.go b/entity_parser_key_parser_test.go index eceaac5c..d82d3c21 100644 --- a/entity_parser_key_parser_test.go +++ b/entity_parser_key_parser_test.go @@ -612,7 +612,7 @@ func TestEntityParse(t *testing.T) { PartitionKeys: []string{"ok"}, ClusteringKeys: nil, }, - Error: "unknown unit d in duration", + Error: `unknown unit "d" in duration`, ETL: EtlOff, TTL: NoTTL(), }, diff --git a/entity_parser_test.go b/entity_parser_test.go index 4c6c808a..e10fb933 100644 --- a/entity_parser_test.go +++ b/entity_parser_test.go @@ -21,12 +21,10 @@ package dosa import ( + "io" "testing" - "time" - "io" - "github.com/stretchr/testify/assert" ) @@ -607,6 +605,53 @@ func TestParensBalanced(t *testing.T) { assert.False(t, parensBalanced("((()())")) } +func TestGetTags(t *testing.T) { + testCases := []struct { + tag string + exp string + err string + }{ + { + tag: "", + exp: "{}", + }, + { + tag: "a=b", + exp: "{a: b}", + }, + { + tag: "a=b,c=d", + exp: "{a: b, c: d}", + }, + { + tag: " a=b , c=d", + exp: "{a: b, c: d}", + }, + { + tag: "a = b,c = d ", + exp: "{a: b, c: d}", + }, + { + tag: "c", + err: "invalid dosa field tag 'c'", + }, + { + tag: "a=b=c", + err: "invalid dosa field tag 'a=b=c'", + }, + } + for _, tc := range testCases { + tags, err := getTags(tc.tag) + if tc.err == "" { + assert.Nil(t, err) + assert.Equal(t, tc.exp, deterministicPrintMap(tags)) + } else { + assert.NotNil(t, err) + assert.Contains(t, err.Error(), tc.err) + } + } +} + /* These tests do not currently pass, but I think they should */