[PATCH 2/4] Parse as YAML if not valid JSON

Sam Thursfield sam.thursfield at codethink.co.uk
Tue Jan 22 10:36:04 GMT 2013


On 01/21/2013 06:15 PM, Richard Maw wrote:
> Tests are currently broken, one because invalid JSON
> can be valid YAML, and coverage is incomplete.
> ---
>   morphlib/__init__.py  |    1 +
>   morphlib/morph2.py    |   24 ++++++++++---
>   morphlib/yamlparse.py |   91 +++++++++++++++++++++++++++++++++++++++++++++++++
>   3 files changed, 111 insertions(+), 5 deletions(-)
>   create mode 100644 morphlib/yamlparse.py
>
> diff --git a/morphlib/__init__.py b/morphlib/__init__.py
> index 213241d..ad2e76d 100644
> --- a/morphlib/__init__.py
> +++ b/morphlib/__init__.py
> @@ -56,5 +56,6 @@ import stagingarea
>   import stopwatch
>   import tempdir
>   import util
> +import yamlparse
>
>   import app  # this needs to be last
> diff --git a/morphlib/morph2.py b/morphlib/morph2.py
> index 9e5be2e..0e92782 100644
> --- a/morphlib/morph2.py
> +++ b/morphlib/morph2.py
> @@ -17,6 +17,7 @@
>   import copy
>   import re
>
> +import morphlib
>   from morphlib.util import OrderedDict, json
>
>   class Morphology(object):
> @@ -52,8 +53,24 @@ class Morphology(object):
>           ]
>       }
>
> +    @staticmethod
> +    def _load_json(text):
> +        return json.loads(text, object_pairs_hook=OrderedDict)
> +
> +    @staticmethod
> +    def _dump_json(obj, f):
> +        text = json.dumps(obj, indent=4)
> +        text = re.sub(" \n", "\n", text)
> +        f.write(text)
> +        f.write('\n')
> +
>       def __init__(self, text):
> -        self._dict = json.loads(text, object_pairs_hook=OrderedDict)
> +        try:
> +            self._dict = self._load_json(text)
> +            self._dumper = self._dump_json
> +        except Exception, e:
> +            self._dict = morphlib.yamlparse.load(text)
> +            self._dumper = morphlib.yamlparse.dump

Why bother with this, if JSON is a subset of YAML? (I presume this is 
because we're using YAML 1.1, but if so a comment would be good and a 
note that when we upgrade to a YAML 1.2 parser we can remove the JSON 
loading code.

>           self._set_defaults()
>           self._validate_children()
>
> @@ -156,7 +173,4 @@ class Morphology(object):
>                   value = self[key]
>               if value and key[0] != '_':
>                   as_dict[key] = value
> -        text = json.dumps(as_dict, indent=4)
> -        text = re.sub(" \n", "\n", text)
> -        f.write(text)
> -        f.write('\n')
> +        self._dumper(as_dict, f)
> diff --git a/morphlib/yamlparse.py b/morphlib/yamlparse.py
> new file mode 100644
> index 0000000..1567a3c
> --- /dev/null
> +++ b/morphlib/yamlparse.py
> @@ -0,0 +1,91 @@
> +# Copyright (C) 2012  Codethink Limited
> +#
> +# This program is free software; you can redistribute it and/or modify
> +# it under the terms of the GNU General Public License as published by
> +# the Free Software Foundation; version 2 of the License.
> +#
> +# This program is distributed in the hope that it will be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +# GNU General Public License for more details.
> +#
> +# You should have received a copy of the GNU General Public License along
> +# with this program; if not, write to the Free Software Foundation, Inc.,
> +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
> +
> +import yaml
> +import yaml.constructor
> +
> +from morphlib.util import OrderedDict
> +
> +class OrderedDictYAMLLoader(yaml.Loader):
> +    """
> +    A YAML loader that loads mappings into ordered dictionaries.
> +    """

Add a bit on why this class is needed to the docstring.

> +    def __init__(self, *args, **kwargs):
> +        yaml.Loader.__init__(self, *args, **kwargs)
> +
> +        self.add_constructor(u'tag:yaml.org,2002:map', type(self).construct_yaml_map)

This line is a bit cryptic - comment would be nice.

> +    def construct_yaml_map(self, node):
> +        data = OrderedDict()
> +        yield data
> +        value = self.construct_mapping(node)
> +        data.update(value)
> +
> +    def construct_mapping(self, node, deep=False):
> +        if isinstance(node, yaml.MappingNode):
> +            self.flatten_mapping(node)
> +        else:
> +            raise yaml.constructor.ConstructorError(None, None,
> +                'expected a mapping node, but found %s' % node.id, node.start_mark)

We don't use this style of indent anywhere else in Morph. Either line up 
subsequent lines with the opening bracket, or don't put anything after 
the opening bracket on the first line.

> +        mapping = OrderedDict()
> +        for key_node, value_node in node.value:
> +            key = self.construct_object(key_node, deep=deep)
> +            try:
> +                hash(key)
> +            except TypeError, exc:
> +                raise yaml.constructor.ConstructorError('while constructing a mapping',
> +                    node.start_mark, 'found unacceptable key (%s)' % exc, key_node.start_mark)

Indent

> +            value = self.construct_object(value_node, deep=deep)
> +            mapping[key] = value
> +        return mapping
> +
> +class OrderedDictYAMLDumper(yaml.Dumper):
> +
> +    def __init__(self, *args, **kwargs):
> +        yaml.Dumper.__init__(self, *args, **kwargs)
> +
> +        self.add_representer(OrderedDict,
> +                             type(self).represent_ordered_dict)
> +
> +    def represent_ordered_dict(self, odict):
> +        return self.represent_ordered_mapping(u'tag:yaml.org,2002:map', odict)
> +
> +    def represent_ordered_mapping(self, tag, omap):
> +        value = []
> +        node = yaml.MappingNode(tag, value)
> +        if self.alias_key is not None:
> +            self.represented_objects[self.alias_key] = node
> +        best_style = True
> +        for item_key, item_value in omap.iteritems():
> +            node_key = self.represent_data(item_key)
> +            node_value = self.represent_data(item_value)
> +            if not (isinstance(node_key, yaml.ScalarNode) and not node_key.style):
> +                best_style = False
> +            if not (isinstance(node_value, yaml.ScalarNode) and not node_value.style):
> +                best_style = False
> +            value.append((node_key, node_value))
> +        if self.default_flow_style is not None:
> +            node.flow_style = self.default_flow_style
> +        else:
> +            node.flow_style = best_style
> +        return node
> +
> +def load(*args, **kwargs):
> +    return yaml.load(Loader=OrderedDictYAMLLoader, *args, **kwargs)
> +
> +def dump(*args, **kwargs):
> +    return yaml.dump(Dumper=OrderedDictYAMLDumper, default_flow_style=False, *args, **kwargs)
>





More information about the baserock-dev mailing list