Source code for foreshadow.optimizers.param_distribution

"""Classes to be configured by user for customizing parameter tuning."""

from collections import MutableMapping

import hyperopt.hp as hp

import foreshadow.serializers as ser

from .tuner import _replace_list, get


[docs]class ParamSpec(MutableMapping, ser.ConcreteSerializerMixin):
    """Holds the specification of the parameter search space.

    A search space is a dict or list of dicts. This search space should be
    viewed as one run of optimization on the foreshadow object. The
    algorithm for optimization is determined by the optimizer that is
    chosen. Hence, this specification is agnostic of the optimizer chosen.

    A dict represents the set of parameters to be applied in a single run.

    A list represents a set of choices that the algorithm (again, agnostic
    at this point) can pick from.

    For example, imagine s as our top level object, of structure:

        s (object)
            .transformer (object)
                .attr

    s has an attribute that may be optimized and in turn, that object has
    parameters that may be optimized. Below, we try two different
    transformers and try 2 different parameter specifications for each.
    Note that these parameters are specific to the type of transformer
    (StandardScaler does not have the parameter feature_range and vice versa).

    [
            {
                "s__transformer": "StandardScaler",
                "s__transformer__with_mean": [False, True],
            },
            {
                "s__transformer": "MinMaxScaler",
                "s__transformer__feature_range": [(0, 1), (0, 0.5)]
                ),
            },
        ],

    Here, the dicts are used to tell the optimizer where to values to set
    are. The lists showcase the different values that are possible.
    """

    def __init__(self, fs_pipeline=None, X_df=None, y_df=None):
        """Initialize, and if args are passed, auto create param distribution.

        Only pass the init arguments if automatic param spec determination
        is desired.

        Args:
            fs_pipeline: Foreshadow.pipeline
            X_df: input DataFrame of data points
            y_df: input DataFrame of labels

        Raises:
            ValueError: if either all kwargs are not passed or all aren't
                passed.

        """
        if not (fs_pipeline is None) == (X_df is None) == (y_df is None):
            raise ValueError(
                "Either all kwargs are None or all are set. To "
                "use automatic param determination, pass all "
                "kwargs. Otherwise, manual setting can be "
                "accomplished using set_params."
            )
        self._param_set = False
        self.param_distributions = []

        # automatic pipeline determination.
        if not (fs_pipeline is None) and (X_df is None) and (y_df) is None:
            self.param_distributions = [
                {
                    "X_preparer__feature_preprocessor___"
                    "parallel_process__group: 0__CategoricalEncoder__"
                    "transformer__ohe": get("OneHotEncoder"),
                    "X_preparer__feature_preprocessor"
                    "___parallel_process__group: 0__CategoricalEncoder__"
                    "transformer__ohe__drop_invariant": [True, False],
                },
                {
                    "X_preparer__feature_preprocessor___"
                    "parallel_process__group: 0__CategoricalEncoder__"
                    "transformer__ohe": get("HashingEncoder")
                },
            ]

[docs]    def convert(self, key, replace_val=hp.choice):
        """Convert internal self.param_distributions to valid distribution.

        Uses _replace_list to replace all lists with replace_val

        Args:
            key: key to use for top level hp.choice name
            replace_val: value to replace lists with.

        """
        self.param_distributions = _replace_list(
            key, self.param_distributions, replace_with=replace_val
        )

[docs]    def get_params(self, deep=True):
        """Get the params for this object. Used for serialization.

        Args:
            deep: Does nothing. Here for sklearn compatibility.

        Returns:
            Members that need to be set for this object.

        """
        return self.param_distributions

[docs]    def set_params(self, **params):
        """Set the params for this object. Used for serialization.

        Also used to init this object when automatic tuning is not used.

        Args:
            **params: Members to set from get_params.

        Returns:
              self.

        """
        self.param_distributions = params["param_distributions"]
        self._param_set = True
        return self

    def __call__(self):
        """Overridden for MutableMapping.

        Returns:
            self.param_distributions

        """
        return self.param_distributions

    def __iter__(self):
        """Iterate over self.param_distributions.

        Returns:
            iter(self.param_distributions)

        """
        return iter(self.param_distributions)

    def __getitem__(self, item):
        """Return value at index item from internal list of params.

        Args:
            item: index in list.

        Returns:
            item at index from self.param_distributions.

        """
        return self.param_distributions[item]

    def __setitem__(self, key, value):
        """Set value at index key from internal list of params.

        Args:
            key: index
            value: value

        """
        self.param_distributions[key] = value

    def __len__(self):
        """Length of self.param_distributions list.

        Returns:
            len(self.param_distributions)

        """
        return len(self.param_distributions)

    def __contains__(self, item):
        """Get if internal param distribution contains item.

        Args:
            item: item to check

        Returns:
            True if it contains the item. False else.

        """
        return self.param_distributions.__contains__(item)

    def __delitem__(self, key):  # overriding abstract method, not to be used.
        """Not implemented, only overrode because it is an abstract method.

        Args:
            key: not used.

        Raises:
            NotImplementedError: If called

        """
        raise NotImplementedError(
            "Abstract method not implemented. Should " "not be called.fl"
        )

    def __hash__(self):
        """Return unique hash from self.param_distributions.

        Returns:
            unique hash from internal param distribution

        """
        return self.param_distributions.__hash__()