Merge branch 'develop'

Grokzen · Jun 23, 2014 · 5fb2753 · 5fb2753
2 parents 28021b9 + 572a1b8
commit 5fb2753
Show file tree

Hide file tree

Showing 32 changed files with 356 additions and 71 deletions.
diff --git a/README.md b/README.md
@@ -69,6 +69,22 @@ Note: It is recomended allways to use a virtual-enviroment when using pyKwalify
 
 
 
+# How to run tests
+
+Install test requirements with
+
+```
+pip install -r test-requirements.txt
+```
+
+Run tests with
+
+```
+nosetests
+```
+
+
+
 # Implemented validation rules
 
 ```
@@ -85,6 +101,7 @@ type:
      - timestamp [NYI]
      - seq
      - map
+     - none
      - scalar (all but seq and map)
      - any (means any implemented type of data)
 
@@ -96,11 +113,10 @@ enum:
 
 pattern:
     Specifies regular expression pattern of value. (Uses re.match() )
-    pattern rule works in map to validate keys, it is usefull when allowempty is set to True.
     Pattern also works on all scalar types.
-    This will be matched against all keys in a map.
+    Pattern no longer works in map. Use regex;<regex-pattern> as keys in "mapping:"
 
-regex;<regex pattern>:
+regex;<regex-pattern>:
     This is only implemented in map where a key inside the mapping keyword can implement this regex; pattern and all keys will be matched against the pattern.
     If a match is found then it will parsed the subrules on that key. A single key can be matched against multiple regex rules and the normal map rules.
 
@@ -137,6 +153,40 @@ matching-rule:
 
 
 
+## Partial schemas
+
+It is possible to create small partial schemas that can be included in other schemas. This feature do not use any built-in YAML or JSON linking.
+
+To define a partial schema use the keyword "schema;<schema-id>:". <schema-id> must be globally unique for the loaded schema partials. If collisions is detected then error will be raised.
+
+To use a partial schema use the keyword "include: <schema-id>:". This will work at any place you can specify the keyword "type". Include directive do not currently work inside a partial schema.
+
+It is possible to define any number of partial schemas in any schema file as long as they are defined at top level of the schema.
+
+For example, this schema contains one partial and the regular schema.
+
+```yaml
+schema;fooone:
+  type: map
+  mapping:
+    foo:
+      type: str
+
+
+type: seq
+sequence:
+  - include: fooone
+
+```
+
+And it can be used to validate the following data
+
+```yaml
+- foo: "opa"
+```
+
+
+
 ## License
 
 MIT [See LICENSE file]
diff --git a/ReleaseNotes.rst b/ReleaseNotes.rst
@@ -2,6 +2,16 @@
 Release Notes
 =============
 
+v14.06.1
+========
+
+ - New feature "partial schema". Define a small schema with a ID that can be reused at other places in the schema. See readme for details.
+ - New directive "include" that is used to include a partial schema at the specefied location.
+ - Cli and Core() now can handle multiple schema files.
+ - Directive "pattern" can no longer be used with map to validate all keys against that regex. Use "regex;" inside "mapping:"
+ - 'none' can now be used as a type
+ - Many more tests added
+
 v14.06
 ======
 

diff --git a/pykwalify/__init__.py b/pykwalify/__init__.py
@@ -4,7 +4,7 @@
 
 __author__ = 'Grokzen <[email protected]>'
 #__version__ = '.'.join(map(str, __version_info__))
-__foobar__ = "0.1.2"
+__foobar__ = "14.06.1"
 
 # Set to True to have revision from Version Control System in version string
 __devel__ = True
@@ -30,3 +30,10 @@ def init_logging():
                     "formatters": {"simple": {"format": " {}".format(msg)}}}
 
     logging.config.dictConfig(logging_conf)
+
+
+partial_schemas = {}
+
+
+def add_partial_schema(schema_id, rule_object):
+    partial_schemas[schema_id] = rule_object
diff --git a/pykwalify/cli.py b/pykwalify/cli.py
@@ -31,7 +31,7 @@ def main():
     #####
 
     __docopt__ = """
-usage: pykwalify -d DATAFILE -s SCHEMAFILE [-q] [-v ...]
+usage: pykwalify -d DATAFILE -s SCHEMAFILE ... [-q] [-v ...]
        pykwalify -h | --help
        pykwalify -V | --version
 
@@ -92,5 +92,5 @@ def main():
     ##### 3. parse cli arguments
     #####
 
-    c = Core(source_file=args["--data-file"], schema_file=args["--schema-file"])
+    c = Core(source_file=args["--data-file"], schema_files=args["--schema-file"])
     c.validate()
diff --git a/pykwalify/core.py b/pykwalify/core.py
@@ -14,6 +14,7 @@
 Log = logging.getLogger(__name__)
 
 # pyKwalify imports
+import pykwalify
 from pykwalify.rule import Rule
 from pykwalify.types import isScalar, tt
 from pykwalify.errors import CoreError, SchemaError
@@ -25,9 +26,9 @@
 class Core(object):
     """ Core class of pyKwalify """
 
-    def __init__(self, source_file=None, schema_file=None, source_data=None, schema_data=None):
+    def __init__(self, source_file=None, schema_files=[], source_data=None, schema_data=None):
         Log.debug("source_file: {}".format(source_file))
-        Log.debug("schema_file: {}".format(schema_file))
+        Log.debug("schema_file: {}".format(schema_files))
         Log.debug("source_data: {}".format(source_data))
         Log.debug("schema_data: {}".format(schema_data))
 
@@ -48,17 +49,35 @@ def __init__(self, source_file=None, schema_file=None, source_data=None, schema_
                 else:
                     raise CoreError("Unable to load source_file. Unknown file format of specified file path: {}".format(source_file))
 
-        if schema_file is not None:
-            if not os.path.exists(schema_file):
-                raise CoreError("Provided source_file do not exists on disk")
+        if not isinstance(schema_files, list):
+            raise CoreError("schema_files must be of list type")
+
+        # Merge all schema files into one signel file for easy parsing
+        if len(schema_files) > 0:
+            schema_data = {}
+            for f in schema_files:
+                if not os.path.exists(f):
+                    raise CoreError("Provided source_file do not exists on disk")
+
+                with open(f, "r") as stream:
+                    if f.endswith(".json"):
+                        data = json.load(stream)
+                        if not data:
+                            raise CoreError("No data loaded from file : {}".format(f))
+                    elif f.endswith(".yaml") or f.endswith(".yml"):
+                        data = yaml.load(stream)
+                        if not data:
+                            raise CoreError("No data loaded from file : {}".format(f))
+                    else:
+                        raise CoreError("Unable to load file : {} : Unknown file format. Supported file endings is [.json, .yaml, .yml]")
+
+                    for key in data.keys():
+                        if key in schema_data.keys():
+                            raise CoreError("Parsed key : {} : two times in schema files...".format(key))
+
+                    schema_data = dict(schema_data, **data)
 
-            with open(schema_file, "r") as stream:
-                if schema_file.endswith(".json"):
-                    self.schema = json.load(stream)
-                elif schema_file.endswith(".yaml"):
-                    self.schema = yaml.load(stream)
-                else:
-                    raise CoreError("Unable to load source_file. Unknown file format of specified file path: {}".format(schema_file))
+            self.schema = schema_data
 
         # Nothing was loaded so try the source_data variable
         if self.source is None:
@@ -101,10 +120,26 @@ def _start_validate(self, value=None):
         errors = []
         done = []
 
+        s = {}
+
+        # Look for schema; tags so they can be parsed before the root rule is parsed
+        for k, v in self.schema.items():
+            if k.startswith("schema;"):
+                Log.debug("Found partial schema; : {}".format(v))
+                r = Rule(schema=v)
+                Log.debug(" Partial schema : {}".format(r))
+                pykwalify.partial_schemas[k.split(";", 1)[1]] = r
+            else:
+                # readd all items that is not schema; so they can be parsed
+                s[k] = v
+
+        self.schema = s
+
         Log.debug("Building root rule object")
         root_rule = Rule(schema=self.schema)
         self.root_rule = root_rule
         Log.debug("Done building root rule")
+        Log.debug("Root rule: {}".format(self.root_rule))
 
         self._validate(value, root_rule, path, errors, done)
 
@@ -122,7 +157,9 @@ def _validate(self, value, rule, path, errors, done):
 
         Log.debug(" ? ValidateRule: {}".format(rule))
         n = len(errors)
-        if rule._sequence is not None:
+        if rule._include_name is not None:
+            self._validate_include(value, rule, path, errors, done=None)
+        elif rule._sequence is not None:
             self._validate_sequence(value, rule, path, errors, done=None)
         elif rule._mapping is not None or rule._allowempty_map:
             self._validate_mapping(value, rule, path, errors, done=None)
@@ -132,6 +169,19 @@ def _validate(self, value, rule, path, errors, done):
         if len(errors) != n:
             return
 
+    def _validate_include(self, value, rule, path, errors=[], done=None):
+        if rule._include_name is None:
+            errors.append("Include name not valid : {} : {}".format(path, value))
+            return
+
+        include_name = rule._include_name
+        partial_schema_rule = pykwalify.partial_schemas.get(include_name, None)
+        if not partial_schema_rule:
+            errors.append("No partial schema found for name : {} : Existing partial schemas: {}".format(include_name, ", ".join(sorted(pykwalify.partial_schemas.keys()))))
+            return
+
+        self._validate(value, partial_schema_rule, path, errors, done)
+
     def _validate_sequence(self, value, rule, path, errors=[], done=None):
         Log.debug("Core Validate sequence")
         Log.debug(" * Data: {}".format(value))
@@ -215,6 +265,10 @@ def _validate_mapping(self, value, rule, path, errors=[], done=None):
             Log.debug(" + Value is None, returning...")
             return
 
+        if not isinstance(value, dict):
+            errors.append("mapping.value.notdict : {} : {}".format(value, path))
+            return
+
         m = rule._mapping
         Log.debug(" + RuleMapping: {}".format(m))
 
@@ -233,13 +287,7 @@ def _validate_mapping(self, value, rule, path, errors=[], done=None):
             regex_mappings = [(regex_rule, re.match(regex_rule._map_regex_rule, str(k))) for regex_rule in rule._regex_mappings]
             Log.debug(" + Mapping Regex matches: {}".format(regex_mappings))
 
-            if rule._pattern:
-                # This is the global regex pattern specefied at the same level as mapping: and type: map keys
-                res = re.match(rule._pattern, str(k))
-                Log.debug("Matching regexPattern: {} with value: {}".format(rule._pattern, k))
-                if res is None:  # Not matching
-                    errors.append("pattern.unmatch : {} --> {} : {}".format(rule._pattern, k, path))
-            elif any(regex_mappings):
+            if any(regex_mappings):
                 # Found atleast one that matches a mapping regex
                 for mm in regex_mappings:
                     if mm[1]:
@@ -279,6 +327,8 @@ def _validate_scalar(self, value, rule, path, errors=[], done=None):
         if rule._default and value is None:
             value = rule._default
 
+        self._validate_scalar_type(value, rule._type, errors, path)
+
         if value is None:
             return
 
@@ -357,7 +407,6 @@ def _validate_scalar(self, value, rule, path, errors=[], done=None):
             if l.get("min-ex", None) is not None and l["min-ex"] >= L:
                 errors.append("length.tooshort-ex : {} >= {} : {}".format(l["min-ex"], L, path))
 
-        self._validate_scalar_type(value, rule._type, errors, path)
 
     def _validate_scalar_type(self, value, t, errors, path):
         Log.debug("Core scalar: validating scalar type")

diff --git a/pykwalify/rule.py b/pykwalify/rule.py
@@ -42,6 +42,7 @@ def __init__(self, schema=None, parent=None):
         self._matching_rule = None
         self._map_regex_rule = None
         self._regex_mappings = None
+        self._include_name = None
 
         self._parent = parent
         self._schema = schema
@@ -56,9 +57,15 @@ def __str__(self):
     def init(self, schema, path):
         Log.debug("Init schema: {}".format(schema))
 
-        if schema is not None:
-            # assert isinstance(schema, dict), "schema is not a dict : {}".format(path)
+        include = schema.get("include", None)
+
+        # Check if this item is a include, overwrite schema with include schema and continue to parse
+        if include:
+            Log.debug("Found include tag...")
+            self._include_name = include
+            return
 
+        if schema is not None:
             if "type" not in schema:
                 raise RuleError("key 'type' not found in schema rule : {}".format(path))
             else:
@@ -96,6 +103,9 @@ def init(self, schema, path):
         for k, v in schema.items():
             if k in func_mapping:
                 func_mapping[k](v, rule, path)
+            elif k.startswith("schema;"):
+                Log.debug("Found schema tag...")
+                raise RuleError("Schema is only allowed on top level of schema file...")
             else:
                 raise RuleError("Unknown key: {} found : {}".format(k, path))
 
@@ -160,6 +170,9 @@ def initPatternValue(self, v, rule, path):
 
         self._pattern = v
 
+        if self._schema_str["type"] == "map":
+            raise RuleError("map.pattern : pattern not allowed inside map : {} : {}".format(v, path))
+
         # TODO: Some form of validation of the regexp? it exists in the source
 
         try:

diff --git a/pykwalify/types.py b/pykwalify/types.py
@@ -24,7 +24,8 @@
           "scalar": None,
           "text": None,
           "any": object,
-          "enum": str
+          "enum": str,
+          "none": None
           }
 
 
@@ -88,6 +89,10 @@ def isEnum(obj):
     return isinstance(obj, str)
 
 
+def isNone(obj):
+    return obj is None
+
+
 tt = {"str": isString,
       "int": isInt,
       "bool": isBool,
@@ -96,4 +101,5 @@ def isEnum(obj):
       "text": isText,
       "any": isAny,
       "enum": isEnum,
+      "none": isNone
       }
diff --git a/setup.py b/setup.py
@@ -7,7 +7,7 @@
 
 settings.update(
     name="pykwalify",
-    version="14.06",
+    version="14.06.1",
     description='Python lib/cli for JSON/YAML schema validation',
     long_description='Python lib/cli for JSON/YAML schema validation',
     author="Grokzen",

diff --git a/tests/files/27a.yaml b/tests/files/27a.yaml
diff --git a/tests/files/27b.yaml b/tests/files/27b.yaml