Source code for pvl.encoder

# -*- coding: utf-8 -*-
"""Parameter Value Langage encoder.

An encoder deals with converting Python objects into
string values that conform to a PVL specification.
"""

# Copyright 2015, 2019-2021, ``pvl`` library authors.
#
# Reuse is permitted under the terms of the license.
# The AUTHORS file and the LICENSE file are at the
# top level of this library.

import datetime
import re
import textwrap

from collections import abc, namedtuple
from decimal import Decimal
from warnings import warn

from .collections import PVLObject, PVLGroup, Quantity
from .grammar import PVLGrammar, ODLGrammar, PDSGrammar, ISISGrammar
from .token import Token
from .decoder import PVLDecoder, ODLDecoder, PDSLabelDecoder


[docs]class QuantTup(namedtuple("QuantTup", ["cls", "value_prop", "units_prop"])):
    """
    This class is just a convenient namedtuple for internally keeping track
    of quantity classes that encoders can deal with.  In general, users
    should not be instantiating this, instead use your encoder's
    add_quantity_cls() function.
    """


[docs]class PVLEncoder(object):
    """An encoder based on the rules in the CCSDS-641.0-B-2 'Blue Book'
    which defines the PVL language.

    :param grammar: A pvl.grammar object, if None or not specified, it will
                    be set to the grammar parameter of *decoder* (if
                    *decoder* is not None) or will default to PVLGrammar().
    :param grammar: defaults to pvl.grammar.PVLGrammar().
    :param decoder: defaults to pvl.decoder.PVLDecoder().
    :param indent: specifies the number of spaces that will be used to
        indent each level of the PVL document, when Groups or Objects
        are encountered, defaults to 2.
    :param width: specifies the number of characters in width that each
        line should have, defaults to 80.
    :param aggregation_end: when True the encoder will print the value
        of the aggregation's Block Name in the End Aggregation Statement
        (e.g. END_GROUP = foo), and when false, it won't (e.g. END_GROUP).
        Defaults to True.
    :param end_delimiter: when True the encoder will print the grammar's
        delimiter (e.g. ';' for PVL) after each statement, when False
        it won't.  Defaults to True.
    :param newline: is the string that will be placed at the end of each
        'line' of output (and counts against *width*), defaults to '\\\\n'.
    :param group_class: must this class will be tested against with
        isinstance() to determine if various elements of the dict-like
        passed to encode() should be encoded as a PVL Group or PVL Object,
        defaults to PVLGroup.
    :param object_class: must be a class that can take a *group_class*
        object in its constructor (essentially converting a *group_class*
        to an *object_class*), otherwise will raise TypeError.  Defaults
        to PVLObject.
    """

    def __init__(
        self,
        grammar=None,
        decoder=None,
        indent: int = 2,
        width: int = 80,
        aggregation_end: bool = True,
        end_delimiter: bool = True,
        newline: str = "\n",
        group_class=PVLGroup,
        object_class=PVLObject,
    ):

        if grammar is None:
            if decoder is not None:
                self.grammar = decoder.grammar
            else:
                self.grammar = PVLGrammar()
        elif isinstance(grammar, PVLGrammar):
            self.grammar = grammar
        else:
            raise Exception

        if decoder is None:
            self.decoder = PVLDecoder(self.grammar)
        elif isinstance(decoder, PVLDecoder):
            self.decoder = decoder
        else:
            raise Exception

        self.indent = indent
        self.width = width
        self.end_delimiter = end_delimiter
        self.aggregation_end = aggregation_end
        self.newline = newline

        # This list of 3-tuples *always* has our own pvl quantity object,
        # and should *only* be added to with self.add_quantity_cls().
        self.quantities = [QuantTup(Quantity, "value", "units")]
        self._import_quantities()

        if issubclass(group_class, abc.Mapping):
            self.grpcls = group_class
        else:
            raise TypeError("The group_class must be a Mapping type.")

        if issubclass(object_class, abc.Mapping):
            self.objcls = object_class
        else:
            raise TypeError("The object_class must be a Mapping type.")

        try:
            self.objcls(self.grpcls())
        except TypeError:
            raise TypeError(
                f"The object_class type ({object_class}) cannot be "
                f"instantiated with an argument that is of type "
                f"group_class ({group_class})."
            )

        # Finally, let's keep track of everything we consider "numerical":
        self.numeric_types = (int, float, self.decoder.real_cls, Decimal)

    def _import_quantities(self):
        warn_str = (
            "The {} library is not present, so {} objects will "
            "not be properly encoded."
        )
        try:
            from astropy import units as u

            self.add_quantity_cls(u.Quantity, "value", "unit")
        except ImportError:
            warn(
                warn_str.format("astropy", "astropy.units.Quantity"),
                ImportWarning,
            )

        try:
            from pint import Quantity as q

            self.add_quantity_cls(q, "magnitude", "units")
        except ImportError:
            warn(warn_str.format("pint", "pint.Quantity"), ImportWarning)

[docs]    def add_quantity_cls(self, cls, value_prop: str, units_prop: str):
        """Adds a quantity class to the list of possible
        quantities that this encoder can handle.

        :param cls: The name of a quantity class that can be tested
            with ``isinstance()``.
        :param value_prop: A string that is the property name of
            *cls* that contains the value or magnitude of the quantity
            object.
        :param units_prop: A string that is the property name of
            *cls* that contains the units element of the quantity
            object.
        """
        if not isinstance(cls, type):
            raise TypeError(f"The cls given ({cls}) is not a Python class.")

        # If a quantity object can't encode "one meter" its probably not
        # going to work for us.
        test_cls = cls(1, "m")
        for prop in (value_prop, units_prop):
            if not hasattr(test_cls, prop):
                raise AttributeError(
                    f"The class ({cls}) does not have an "
                    f" attribute named {prop}."
                )

        self.quantities.append(QuantTup(cls, value_prop, units_prop))

[docs]    def format(self, s: str, level: int = 0) -> str:
        """Returns a string derived from *s*, which
        has leading space characters equal to
        *level* times the number of spaces specified
        by this encoder's indent property.

        It uses the textwrap library to wrap long lines.
        """

        prefix = level * (self.indent * " ")

        if len(prefix + s + self.newline) > self.width and "=" in s:
            (preq, _, posteq) = s.partition("=")
            new_prefix = prefix + preq.strip() + " = "

            lines = textwrap.wrap(
                posteq.strip(),
                width=(self.width - len(self.newline)),
                replace_whitespace=False,
                initial_indent=new_prefix,
                subsequent_indent=(" " * len(new_prefix)),
                break_long_words=False,
                break_on_hyphens=False,
            )
            return self.newline.join(lines)
        else:
            return prefix + s

[docs]    def encode(self, module: abc.Mapping) -> str:
        """Returns a ``str`` formatted as a PVL document based
        on the dict-like *module* object
        according to the rules of this encoder.
        """
        lines = list()
        lines.append(self.encode_module(module, 0))

        end_line = self.grammar.end_statements[0]
        if self.end_delimiter:
            end_line += self.grammar.delimiters[0]

        lines.append(end_line)

        # Final check to ensure we're sending out the right character set:
        s = self.newline.join(lines)

        for i, c in enumerate(s):
            if not self.grammar.char_allowed(c):
                raise ValueError(
                    "Encountered a character that was not "
                    "a valid character according to the "
                    'grammar: "{}", it is in: '
                    '"{}"'.format(c, s[i - 5, i + 5])
                )

        return self.newline.join(lines)

[docs]    def encode_module(self, module: abc.Mapping, level: int = 0) -> str:
        """Returns a ``str`` formatted as a PVL module based
        on the dict-like *module* object according to the
        rules of this encoder, with an indentation level
        of *level*.
        """
        lines = list()

        # To align things on the equals sign, just need to normalize
        # the non-aggregation key length:

        non_agg_key_lengths = list()
        for k, v in module.items():
            if not isinstance(v, abc.Mapping):
                non_agg_key_lengths.append(len(k))
        longest_key_len = max(non_agg_key_lengths, default=0)

        for k, v in module.items():
            if isinstance(v, abc.Mapping):
                lines.append(self.encode_aggregation_block(k, v, level))
            else:
                lines.append(
                    self.encode_assignment(k, v, level, longest_key_len)
                )
        return self.newline.join(lines)

[docs]    def encode_aggregation_block(
        self, key: str, value: abc.Mapping, level: int = 0
    ) -> str:
        """Returns a ``str`` formatted as a PVL Aggregation Block with
        *key* as its name, and its contents based on the
        dict-like *value* object according to the
        rules of this encoder, with an indentation level
        of *level*.
        """
        lines = list()

        if isinstance(value, self.grpcls):
            agg_keywords = self.grammar.group_pref_keywords
        elif isinstance(value, abc.Mapping):
            agg_keywords = self.grammar.object_pref_keywords
        else:
            raise ValueError("The value {value} is not dict-like.")

        agg_begin = "{} = {}".format(agg_keywords[0], key)
        if self.end_delimiter:
            agg_begin += self.grammar.delimiters[0]
        lines.append(self.format(agg_begin, level))

        lines.append(self.encode_module(value, (level + 1)))

        agg_end = ""
        if self.aggregation_end:
            agg_end += "{} = {}".format(agg_keywords[1], key)
        else:
            agg_end += agg_keywords[1]
        if self.end_delimiter:
            agg_end += self.grammar.delimiters[0]
        lines.append(self.format(agg_end, level))

        return self.newline.join(lines)

[docs]    def encode_assignment(
        self, key: str, value, level: int = 0, key_len: int = None
    ) -> str:
        """Returns a ``str`` formatted as a PVL Assignment Statement
        with *key* as its Parameter Name, and its value based
        on *value* object according to the rules of this encoder,
        with an indentation level of *level*.  It also allows for
        an optional *key_len* which indicates the width in characters
        that the Assignment Statement should be set to, defaults to
        the width of *key*.
        """
        if key_len is None:
            key_len = len(key)

        s = ""
        s += "{} = ".format(key.ljust(key_len))

        enc_val = self.encode_value(value)

        if enc_val.startswith(self.grammar.quotes):
            # deal with quoted lines that need to preserve
            # newlines
            s = self.format(s, level)
            s += enc_val

            if self.end_delimiter:
                s += self.grammar.delimiters[0]

            return s
        else:
            s += enc_val

            if self.end_delimiter:
                s += self.grammar.delimiters[0]

            return self.format(s, level)

[docs]    def encode_value(self, value) -> str:
        """Returns a ``str`` formatted as a PVL Value based
        on the *value* object according to the rules of this encoder.
        """
        try:
            return self.encode_quantity(value)
        except ValueError:
            return self.encode_simple_value(value)

[docs]    def encode_quantity(self, value) -> str:
        """Returns a ``str`` formatted as a PVL Value followed by
        a PVL Units Expression if the *value* object can be
        encoded this way, otherwise raise ValueError."""
        for (cls, v_prop, u_prop) in self.quantities:
            if isinstance(value, cls):
                return self.encode_value_units(
                    getattr(value, v_prop), getattr(value, u_prop)
                )

        raise ValueError(
            f"The value object {value} could not be "
            "encoded as a PVL Value followed by a PVL "
            f"Units Expression, it is of type {type(value)}"
        )

[docs]    def encode_value_units(self, value, units) -> str:
        """Returns a ``str`` formatted as a PVL Value from *value*
        followed by a PVL Units Expressions from *units*."""
        value_str = self.encode_simple_value(value)
        units_str = self.encode_units(str(units))
        return f"{value_str} {units_str}"

[docs]    def encode_simple_value(self, value) -> str:
        """Returns a ``str`` formatted as a PVL Simple Value based
        on the *value* object according to the rules of this encoder.
        """
        if value is None:
            return self.grammar.none_keyword
        elif isinstance(value, (set, frozenset)):
            return self.encode_set(value)
        elif isinstance(value, list):
            return self.encode_sequence(value)
        elif isinstance(
            value, (datetime.datetime, datetime.date, datetime.time)
        ):
            return self.encode_datetype(value)
        elif isinstance(value, bool):
            if value:
                return self.grammar.true_keyword
            else:
                return self.grammar.false_keyword
        elif isinstance(value, self.numeric_types):
            return str(value)
        elif isinstance(value, str):
            return self.encode_string(value)
        else:
            raise TypeError(f"{value!r} is not serializable.")

[docs]    def encode_setseq(self, values: abc.Collection) -> str:
        """This function provides shared functionality for
        encode_sequence() and encode_set().
        """
        return ", ".join([self.encode_value(v) for v in values])

[docs]    def encode_sequence(self, value: abc.Sequence) -> str:
        """Returns a ``str`` formatted as a PVL Sequence based
        on the *value* object according to the rules of this encoder.
        """
        return "(" + self.encode_setseq(value) + ")"

[docs]    def encode_set(self, value: abc.Set) -> str:
        """Returns a ``str`` formatted as a PVL Set based
        on the *value* object according to the rules of this encoder.
        """
        return "{" + self.encode_setseq(value) + "}"

[docs]    def encode_datetype(self, value) -> str:
        """Returns a ``str`` formatted as a PVL Date/Time based
        on the *value* object according to the rules of this encoder.
        If *value* is not a datetime date, time, or datetime object,
        it will raise TypeError.
        """
        if isinstance(value, datetime.datetime):
            return self.encode_datetime(value)
        elif isinstance(value, datetime.date):
            return self.encode_date(value)
        elif isinstance(value, datetime.time):
            return self.encode_time(value)
        else:
            raise TypeError(f"{value!r} is not a datetime type.")

[docs]    @staticmethod
    def encode_date(value: datetime.date) -> str:
        """Returns a ``str`` formatted as a PVL Date based
        on the *value* object according to the rules of this encoder.
        """
        return f"{value:%Y-%m-%d}"

[docs]    @staticmethod
    def encode_time(value: datetime.time) -> str:
        """Returns a ``str`` formatted as a PVL Time based
        on the *value* object according to the rules of this encoder.
        """
        s = f"{value:%H:%M}"

        if value.microsecond:
            s += f":{value:%S.%f}"
        elif value.second:
            s += f":{value:%S}"

        return s

[docs]    def encode_datetime(self, value: datetime.datetime) -> str:
        """Returns a ``str`` formatted as a PVL Date/Time based
        on the *value* object according to the rules of this encoder.
        """
        date = self.encode_date(value)
        time = self.encode_time(value)
        return date + "T" + time

[docs]    def needs_quotes(self, s: str) -> bool:
        """Returns true if *s* must be quoted according to this
        encoder's grammar, false otherwise.
        """
        if any(c in self.grammar.whitespace for c in s):
            return True

        if s in self.grammar.reserved_keywords:
            return True

        tok = Token(s, grammar=self.grammar, decoder=self.decoder)
        return not tok.is_unquoted_string()

[docs]    def encode_string(self, value) -> str:
        """Returns a ``str`` formatted as a PVL String based
        on the *value* object according to the rules of this encoder.
        """
        s = str(value)

        if self.needs_quotes(s):
            for q in self.grammar.quotes:
                if q not in s:
                    return q + s + q
            else:
                raise ValueError(
                    "All of the quote characters, "
                    f"{self.grammar.quotes}, were in the "
                    f'string ("{s}"), so it could not be quoted.'
                )
        else:
            return s

[docs]    def encode_units(self, value: str) -> str:
        """Returns a ``str`` formatted as a PVL Units Value based
        on the *value* object according to the rules of this encoder.
        """
        return (
            self.grammar.units_delimiters[0]
            + value
            + self.grammar.units_delimiters[1]
        )


[docs]class ODLEncoder(PVLEncoder):
    """An encoder based on the rules in the PDS3 Standards Reference
    (version 3.8, 27 Feb 2009) Chapter 12: Object Description
    Language Specification and Usage for ODL only.  This is
    almost certainly not what you want.  There are very rarely
    cases where you'd want to use ODL that you wouldn't also want
    to use the PDS Label restrictions, so you probably really want
    the PDSLabelEncoder class, not this one.  Move along.

    It extends PVLEncoder.

    :param grammar: defaults to pvl.grammar.ODLGrammar().
    :param decoder: defaults to pvl.decoder.ODLDecoder().
    :param end_delimiter: defaults to False.
    :param newline: defaults to '\\\\r\\\\n'.
    """

    def __init__(
        self,
        grammar=None,
        decoder=None,
        indent=2,
        width=80,
        aggregation_end=True,
        end_delimiter=False,
        newline="\r\n",
        group_class=PVLGroup,
        object_class=PVLObject
    ):

        if grammar is None:
            grammar = ODLGrammar()

        if decoder is None:
            decoder = ODLDecoder(grammar)

        if not callable(getattr(decoder, "is_identifier", None)):
            raise TypeError(
                f"The decoder for an ODLEncoder() must have the "
                f"is_identifier() function, and this does not: {decoder}"
            )

        super().__init__(
            grammar,
            decoder,
            indent,
            width,
            aggregation_end,
            end_delimiter,
            newline,
            group_class=group_class,
            object_class=object_class
        )

[docs]    def encode(self, module: abc.Mapping) -> str:
        """Extends parent function, but ODL requires that there must be
        a spacing or format character after the END statement and this
        adds the encoder's ``newline`` sequence.
        """
        s = super().encode(module)
        return s + self.newline

[docs]    def is_scalar(self, value) -> bool:
        """Returns a boolean indicating whether the *value* object
        qualifies as an ODL 'scalar_value'.

        ODL defines a 'scalar-value' as a numeric_value, a
        date_time_string, a text_string_value, or a symbol_value.

        For Python, these correspond to the following:

        * numeric_value: any of self.numeric_types, and Quantity whose value
          is one of the self.numeric_types.
        * date_time_string: datetime objects
        * text_string_value: str
        * symbol_value: str

        """
        for quant in self.quantities:
            if isinstance(value, quant.cls):
                if isinstance(
                    getattr(value, quant.value_prop), self.numeric_types
                ):
                    return True

        scalar_types = (
            *self.numeric_types,
            datetime.date,
            datetime.datetime,
            datetime.time,
            str
        )
        if isinstance(value, scalar_types):
            return True

        return False

[docs]    def is_symbol(self, value) -> bool:
        """Returns true if *value* is an ODL Symbol String, false otherwise.

        An ODL Symbol String is enclosed by single quotes
        and may not contain any of the following characters:

        1. The apostrophe, which is reserved as the symbol string delimiter.
        2. ODL Format Effectors
        3. Control characters

        This means that an ODL Symbol String is a subset of the PVL
        quoted string, and will be represented in Python as a ``str``.
        """
        if isinstance(value, str):
            if "'" in value:  # Item 1
                return False

            for fe in self.grammar.format_effectors:  # Item 2
                if fe in value:
                    return False

            if len(value) > self.width / 2:
                # This means that the string is long and it is very
                # likely to get wrapped and have carriage returns,
                # and thus "ODL Format Effectors" inserted later.
                # Unfortunately, without knowing the width of the
                # parameter term, and the current indent level, this
                # still may end up being incorrect threshhold.
                return False

            if value.isprintable() and len(value) > 0:  # Item 3
                return True
        else:
            return False

[docs]    def needs_quotes(self, s: str) -> bool:
        """Return true if *s* is an ODL Identifier, false otherwise.

        Overrides parent function.
        """
        return not self.decoder.is_identifier(s)

[docs]    def is_assignment_statement(self, s) -> bool:
        """Returns true if *s* is an ODL Assignment Statement, false otherwise.

        An ODL Assignment Statement is either an
        element_identifier or a namespace_identifier
        joined to an element_identifier with a colon.
        """
        if self.decoder.is_identifier(s):
            return True

        (ns, _, el) = s.partition(":")

        if self.decoder.is_identifier(ns) and self.decoder.is_identifier(el):
            return True

        return False

[docs]    def encode_assignment(self, key, value, level=0, key_len=None) -> str:
        """Overrides parent function by restricting the length of
        keywords and enforcing that they be ODL Identifiers
        and uppercasing their characters.
        """

        if key_len is None:
            key_len = len(key)

        if len(key) > 30:
            raise ValueError(
                "ODL keywords must be 30 characters or less "
                f"in length, this one is longer: {key}"
            )

        if (
            key.startswith("^") and self.is_assignment_statement(key[1:])
        ) or self.is_assignment_statement(key):
            ident = key.upper()
        else:
            raise ValueError(
                f'The keyword "{key}" is not a valid ODL ' "Identifier."
            )

        s = "{} = ".format(ident.ljust(key_len))
        s += self.encode_value(value)

        if self.end_delimiter:
            s += self.grammar.delimiters[0]

        return self.format(s, level)

[docs]    def encode_sequence(self, value) -> str:
        """Extends parent function, as ODL only allows one- and
        two-dimensional sequences of ODL scalar_values.
        """
        if len(value) == 0:
            raise ValueError("ODL does not allow empty Sequences.")

        for v in value:  # check the first dimension (list of elements)
            if isinstance(v, list):
                for i in v:  # check the second dimension (list of lists)
                    if isinstance(i, list):
                        # Shouldn't be lists of lists of lists.
                        raise ValueError(
                            "ODL only allows one- and two- "
                            "dimensional Sequences, but "
                            f"this has more: {value}"
                        )
                    elif not self.is_scalar(i):
                        raise ValueError(
                            "ODL only allows scalar_values "
                            f"within sequences: {v}"
                        )

            elif not self.is_scalar(v):
                raise ValueError(
                    "ODL only allows scalar_values within " f"sequences: {v}"
                )

        return super().encode_sequence(value)

[docs]    def encode_set(self, values) -> str:
        """Extends parent function, ODL only allows sets to contain
        scalar values.
        """

        if not all(map(self.is_scalar, values)):
            raise ValueError(
                f"ODL only allows scalar values in sets: {values}"
            )

        return super().encode_set(values)

[docs]    def encode_value(self, value):
        """Extends parent function by only allowing Units Expressions for
        numeric values.
        """
        for quant in self.quantities:
            if isinstance(value, quant.cls):
                if isinstance(
                    getattr(value, quant.value_prop),
                    self.numeric_types
                ):
                    return super().encode_value(value)
                else:
                    raise ValueError(
                        "Unit expressions are only allowed "
                        f"following numeric values: {value}"
                    )

        return super().encode_value(value)

[docs]    def encode_string(self, value):
        """Extends parent function by appropriately quoting Symbol
        Strings.
        """
        if self.decoder.is_identifier(value):
            return value
        elif self.is_symbol(value):
            return "'" + value + "'"
        else:
            return super().encode_string(value)

[docs]    def encode_time(self, value: datetime.time) -> str:
        """Extends parent function since ODL allows a time zone offset
        from UTC to be included, and otherwise recommends that times
        be suffixed with a 'Z' to clearly indicate that they are in UTC.
        """
        if value.tzinfo is None:
            raise ValueError(
                f"ODL cannot output local times, and this time does not "
                f"have a timezone offset: {value}"
            )

        t = super().encode_time(value)

        if value.utcoffset() == datetime.timedelta():
            return t + "Z"
        else:
            td_str = str(value.utcoffset())
            (h, m, s) = td_str.split(":")
            if s != "00":
                raise ValueError(
                    "The datetime value had a timezone offset "
                    f"with seconds values ({value}) which is "
                    "not allowed in ODL."
                )
            if m == "00":
                return t + f"+{h:0>2}"
            else:
                return t + f"+{h:0>2}:{m}"

        return t

[docs]    def encode_units(self, value) -> str:
        """Overrides parent function since ODL limits what characters
        and operators can be present in Units Expressions.
        """

        # if self.is_identifier(value.strip('*/()-')):
        if self.decoder.is_identifier(re.sub(r"[\s*/()-]", "", value)):

            if "**" in value:
                exponents = re.findall(r"\*\*.+?", value)
                for e in exponents:
                    if re.search(r"\*\*-?\d+", e) is None:
                        raise ValueError(
                            "The exponentiation operator (**) in "
                            f'this Units Expression "{value}" '
                            "is not a decimal integer."
                        )

            return (
                self.grammar.units_delimiters[0]
                + value
                + self.grammar.units_delimiters[1]
            )
        else:
            raise ValueError(
                f'The value, "{value}", does not conform to '
                "the specification for an ODL Units Expression."
            )


[docs]class PDSLabelEncoder(ODLEncoder):
    """An encoder based on the rules in the PDS3 Standards Reference
    (version 3.8, 27 Feb 2009) Chapter 12: Object Description
    Language Specification and Usage and writes out labels that
    conform to the PDS 3 standards.

    It extends ODLEncoder.

    You are not allowed to chose *end_delimiter* or *newline*
    as the parent class allows, because to be PDS-compliant,
    those are fixed choices.  However, in some cases, the PDS3
    Standards are asymmetric, allowing for a wider variety of
    PVL-text on "read" and a more narrow variety of PVL-text
    on "write".  The default values of the PDSLabelEncoder enforce
    those strict "write" rules, but if you wish to alter them,
    but still produce PVL-text that would validate against the PDS3
    standard, you may alter them.

    :param convert_group_to_object: Defaults to True, meaning that
        if a GROUP does not conform to the PDS definition of a
        GROUP, then it will be written out as an OBJECT.  If it is
        False, then an exception will be thrown if incompatible
        GROUPs are encountered. In PVL and ODL, the OBJECT and GROUP
        aggregations are interchangeable, but the PDS applies
        restrictions to what can appear in a GROUP.
    :param tab_replace: Defaults to 4 and indicates the number of
        space characters to replace horizontal tab characters with
        (since tabs aren't allowed in PDS labels).  If this is set
        to zero, tabs will not be replaced with spaces.
    :param symbol_single_quotes: Defaults to True, and if a Python `str`
        object qualifies as a PVL Symbol String, it will be written to
        PVL-text as a single-quoted string.  If False, no special
        handling is done, and any PVL Symbol String will be treated
        as a PVL Text String, which is typically enclosed with double-quotes.
    :param time_trailing_z: defaults to True, and suffixes a "Z" to
        datetimes and times written to PVL-text as the PDS encoding
        standard requires.  If False, no trailing "Z" is written.

    """

    def __init__(
        self,
        grammar=None,
        decoder=None,
        indent=2,
        width=80,
        aggregation_end=True,
        group_class=PVLGroup,
        object_class=PVLObject,
        convert_group_to_object=True,
        tab_replace=4,
        symbol_single_quote=True,
        time_trailing_z=True,
    ):

        if grammar is None:
            grammar = PDSGrammar()

        if decoder is None:
            decoder = PDSLabelDecoder(grammar)

        super().__init__(
            grammar,
            decoder,
            indent,
            width,
            aggregation_end,
            end_delimiter=False,
            newline="\r\n",
            group_class=group_class,
            object_class=object_class
        )

        self.convert_group_to_object = convert_group_to_object
        self.tab_replace = tab_replace
        self.symbol_single_quote = symbol_single_quote
        self.time_trailing_z = time_trailing_z

[docs]    def count_aggs(
        self, module: abc.Mapping, obj_count: int = 0, grp_count: int = 0
    ) -> tuple((int, int)):
        """Returns the count of OBJECT and GROUP aggregations
        that are contained within the *module* as a two-tuple
        in that order.
        """
        # This currently just counts the values in the passed
        # in module, it does not 'recurse' if those aggregations also
        # may contain aggregations.

        for k, v in module.items():
            if isinstance(v, abc.Mapping):
                if isinstance(v, self.grpcls):
                    grp_count += 1
                elif isinstance(v, self.objcls):
                    obj_count += 1
                else:
                    # We treat other dict-like Python objects as
                    # PVL Objects for the purposes of this count,
                    # because that is how they will be encoded.
                    obj_count += 1

        return obj_count, grp_count

[docs]    def encode(self, module: abc.MutableMapping) -> str:
        """Extends the parent function, by adding a restriction.
        For PDS, if there are any GROUP elements, there must be at
        least one OBJECT element in the label.  Behavior here
        depends on the value of this encoder's convert_group_to_object
        property.
        """
        (obj_count, grp_count) = self.count_aggs(module)

        if grp_count > 0 and obj_count < 1:
            if self.convert_group_to_object:
                for k, v in module.items():
                    # First try to convert any GROUPs that would not
                    # be valid PDS GROUPs.
                    if isinstance(v, self.grpcls) and not self.is_PDSgroup(v):
                        module[k] = self.objcls(v)
                        break
                else:
                    # Then just convert the first GROUP
                    for k, v in module.items():
                        if isinstance(v, self.grpcls):
                            module[k] = self.objcls(v)
                            break
                    else:
                        raise ValueError(
                            "Couldn't convert any of the GROUPs " "to OBJECTs."
                        )
            else:
                raise ValueError(
                    "This module has a GROUP element, but no "
                    "OBJECT elements, which is not allowed by "
                    "the PDS.  You could set "
                    "*convert_group_to_object* to *True* on the "
                    "encoder to try and convert a GROUP "
                    "to an OBJECT."
                )

        s = super().encode(module)
        if self.tab_replace > 0:
            return s.replace("\t", (" " * self.tab_replace))
        else:
            return s

[docs]    def is_PDSgroup(self, group: abc.Mapping) -> bool:
        """Returns true if the dict-like *group* qualifies as a PDS Group,
        false otherwise.

        PDS applies the following restrictions to GROUPS:

        1. The GROUP structure may only be used in a data product
           label which also contains one or more data OBJECT definitions.
        2. The GROUP statement must contain only attribute assignment
           statements, include pointers, or related information pointers
           (i.e., no data location pointers). If there are multiple
           values, a single statement must be used with either sequence
           or set syntax; no attribute assignment statement or pointer
           may be repeated.
        3. GROUP statements may not be nested.
        4. GROUP statements may not contain OBJECT definitions.
        5. Only PSDD elements may appear within a GROUP statement.
           *PSDD is not defined anywhere in the PDS document, so don't
           know how to test for it.*
        6. The keyword contents associated with a specific GROUP
           identifier must be identical across all labels of a single data
           set (with the exception of the “PARAMETERS” GROUP, as
           explained).

        Use of the GROUP structure must be coordinated with the
        responsible PDS discipline Node.

        Items 1 & 6 and the final sentence above, can't really be tested
        by examining a single group, but must be dealt with in a larger
        context.  The ODLEncoder.encode_module() handles #1, at least.
        You're on your own for the other two issues.

        Item 5: *PSDD* is not defined anywhere in the ODL PDS document,
        so don't know how to test for it.
        """
        (obj_count, grp_count) = self.count_aggs(group)

        # Items 3 and 4:
        if obj_count != 0 or grp_count != 0:
            return False

        # Item 2, no data location pointers:
        for k, v in group.items():
            if k.startswith("^"):
                if isinstance(v, int):
                    return False
                else:
                    for quant in self.quantities:
                        if isinstance(v, quant.cls) and isinstance(
                            getattr(v, quant.value_prop), int
                        ):
                            return False

        # Item 2, no repeated keys:
        keys = list(group.keys())
        if len(keys) != len(set(keys)):
            return False

        return True

[docs]    def encode_aggregation_block(self, key, value, level=0):
        """Extends parent function because PDS has restrictions on
        what may be in a GROUP.

        If the encoder's *convert_group_to_object* parameter is True,
        and a GROUP does not conform to the PDS definition of a GROUP,
        then it will be written out as an OBJECT.  If it is False,
        then an exception will be thrown.
        """

        # print('value at top:')
        # print(value)

        if isinstance(value, self.grpcls) and not self.is_PDSgroup(value):
            if self.convert_group_to_object:
                value = self.objcls(value)
            else:
                raise ValueError(
                    "This GROUP element is not a valid PDS "
                    "GROUP.  You could set "
                    "*convert_group_to_object* to *True* on the "
                    "encoder to try and convert the GROUP"
                    "to an OBJECT."
                )

        # print('value at bottom:')
        # print(value)

        return super().encode_aggregation_block(key, value, level)

[docs]    def encode_set(self, values) -> str:
        """Extends parent function because PDS only allows symbol values
        and integers within sets.
        """
        for v in values:
            if not self.is_symbol(v) and not isinstance(v, int):
                raise ValueError(
                    "The PDS only allows integers and symbols "
                    f"in sets: {values}"
                )

        return super().encode_set(values)

[docs]    def encode_string(self, value):
        """Extends parent function to treat Symbol Strings as Text Strings,
        which typically means that they are double-quoted and not
        single-quoted.
        """
        if self.decoder.is_identifier(value):
            return value
        elif self.is_symbol(value) and self.symbol_single_quote:
            return "'" + value + "'"
        else:
            return super(ODLEncoder, self).encode_string(value)

[docs]    def encode_time(self, value: datetime.time) -> str:
        """Overrides parent's encode_time() function because
        even though ODL allows for timezones, PDS does not.

        Not in the section on times, but at the end of the PDS
        ODL document, in section 12.7.3, para 14, it indicates that
        alternate time zones may not be used in a PDS label, only
        these:
        1. YYYY-MM-DDTHH:MM:SS.SSS.
        2. YYYY-DDDTHH:MM:SS.SSS.

        """
        s = f"{value:%H:%M}"

        if value.microsecond:
            ms = round(value.microsecond / 1000)
            if value.microsecond != ms * 1000:
                raise ValueError(
                    f"PDS labels can only encode time values to the milisecond "
                    f"precision, and this time ({value}) has too much "
                    f"precision."
                )
            else:
                s += f":{value:%S}.{ms}"
        elif value.second:
            s += f":{value:%S}"

        if (
            value.tzinfo is None or
            value.tzinfo.utcoffset(None) == datetime.timedelta(0)
        ):
            if self.time_trailing_z:
                return s + "Z"
            else:
                return s
        else:
            raise ValueError(
                "PDS labels should only have UTC times, but "
                f"this time has a timezone: {value}"
            )


[docs]class ISISEncoder(PVLEncoder):
    """An encoder for writing PVL text that can be parsed by the
    ISIS PVL text parser.

    The ISIS3 implementation (as of 3.9) of PVL/ODL (like) does not
    strictly follow any of the published standards. It was based
    on PDS3 ODL from the 1990s, but has several extensions adopted
    from existing and prior data sets from ISIS2, PDS, JAXA, ISRO,
    ..., and extensions used only within ISIS files (cub, net). This
    is one of the reasons using ISIS cube files or PVL text written by
    ISIS as an archive format has been strongly discouraged.

    Since there is no specification, only a detailed analysis of
    the ISIS software that parses and writes its PVL text would
    yield a strategy for parsing it.  This encoder is most likely the
    least reliable for that reason.  We welcome bug reports to help
    extend our coverage of this flavor of PVL text.

    :param grammar: defaults to pvl.grammar.ISISGrammar().
    :param decoder: defaults to pvl.decoder.PVLDecoder().
    :param end_delimiter: defaults to False.
    :param newline: defaults to '\\\\n'.
    """

    def __init__(
        self,
        grammar=None,
        decoder=None,
        indent=2,
        width=80,
        aggregation_end=True,
        end_delimiter=False,
        newline="\n",
        group_class=PVLGroup,
        object_class=PVLObject
    ):

        if grammar is None:
            grammar = ISISGrammar()

        if decoder is None:
            decoder = PVLDecoder(grammar)

        super().__init__(
            grammar,
            decoder,
            indent,
            width,
            aggregation_end,
            end_delimiter,
            newline,
            group_class=group_class,
            object_class=object_class
        )