Module dalex.dataset_level

Expand source code
from ._aggregated_profiles.object import AggregatedProfiles
from ._model_performance.object import ModelPerformance
from ._variable_importance.object import VariableImportance
from ._residual_diagnostics import ResidualDiagnostics

__all__ = [
    "ModelPerformance",
    "VariableImportance",
    "AggregatedProfiles",
    "ResidualDiagnostics"
]

Classes

class AggregatedProfiles (type='partial', variables=None, variable_type='numerical', groups=None, span=0.25, center=True, random_state=None)

Calculate dataset level variable profiles as Partial or Accumulated Dependence

Partial Dependence Profile (average across CP Profiles), Individual Conditional Expectation (local weighted average across CP Profiles), Accumulated Local Effects (cummulated average local changes in CP Profiles).

Parameters

type : {'partial', 'accumulated', 'conditional'}
Type of model profiles (default is 'partial' for Partial Dependence Profiles).
variables : str or array_like of str, optional
Variables for which the profiles will be calculated (default is None, which means all of the variables).
variable_type : {'numerical', 'categorical'}
Calculate the profiles for numerical or categorical variables (default is 'numerical').
groups : str or array_like of str, optional
Names of categorical variables that will be used for profile grouping (default is None, which means no grouping).
span : float, optional
Smoothing coefficient used as sd for gaussian kernel (default is 0.25).
center : bool, optional
Theoretically Accumulated Profiles start at 0, but are centered to compare them with Partial Dependence Profiles (default is True, which means center around the average y_hat calculated on the data sample).
random_state : int, optional
Set seed for random number generator (default is random seed).

Attributes

result : pd.DataFrame
Main result attribute of an explanation.
mean_prediction : float
Average prediction for sampled data (using N).
raw_profiles : pd.DataFrame or None
Saved CeterisParibus object. NOTE: None if more objects were passed to the fit method.
type : {'partial', 'accumulated', 'conditional'}
Type of model profiles.
variables : array_like of str or None
Variables for which the profiles will be calculated
variable_type : {'numerical', 'categorical'}
Calculate the profiles for numerical or categorical variables.
groups : str or array_like of str or None
Names of categorical variables that will be used for profile grouping.
span : float
Smoothing coefficient used as sd for gaussian kernel.
center : bool
Theoretically Accumulated Profiles start at 0, but are centered to compare them with Partial Dependence Profiles (default is True, which means center around the average y_hat calculated on the data sample).
random_state : int or None
Set seed for random number generator.

Notes

https://pbiecek.github.io/ema/partialDependenceProfiles.html https://pbiecek.github.io/ema/accumulatedLocalProfiles.html

Expand source code
class AggregatedProfiles:
    """Calculate dataset level variable profiles as Partial or Accumulated Dependence

    Partial Dependence Profile (average across CP Profiles),
    Individual Conditional Expectation (local weighted average across CP Profiles),
    Accumulated Local Effects (cummulated average local changes in CP Profiles).

    Parameters
    -----------
    type : {'partial', 'accumulated', 'conditional'}
        Type of model profiles (default is 'partial' for Partial Dependence Profiles).
    variables : str or array_like of str, optional
        Variables for which the profiles will be calculated
        (default is None, which means all of the variables).
    variable_type : {'numerical', 'categorical'}
        Calculate the profiles for numerical or categorical variables
        (default is 'numerical').
    groups : str or array_like of str, optional
        Names of categorical variables that will be used for profile grouping
        (default is None, which means no grouping).
    span : float, optional
        Smoothing coefficient used as sd for gaussian kernel (default is 0.25).
    center : bool, optional
        Theoretically Accumulated Profiles start at 0, but are centered to compare
        them with Partial Dependence Profiles (default is True, which means center
        around the average y_hat calculated on the data sample).
    random_state : int, optional
        Set seed for random number generator (default is random seed).

    Attributes
    -----------
    result : pd.DataFrame
        Main result attribute of an explanation.
    mean_prediction : float
        Average prediction for sampled `data` (using `N`).
    raw_profiles : pd.DataFrame or None
        Saved CeterisParibus object.
        NOTE: None if more objects were passed to the `fit` method.
    type : {'partial', 'accumulated', 'conditional'}
        Type of model profiles.
    variables : array_like of str or None
        Variables for which the profiles will be calculated
    variable_type : {'numerical', 'categorical'}
        Calculate the profiles for numerical or categorical variables.
    groups : str or array_like of str or None
        Names of categorical variables that will be used for profile grouping.
    span : float
        Smoothing coefficient used as sd for gaussian kernel.
    center : bool
        Theoretically Accumulated Profiles start at 0, but are centered to compare
        them with Partial Dependence Profiles (default is True, which means center
        around the average y_hat calculated on the data sample).
    random_state : int or None
        Set seed for random number generator.

    Notes
    --------
    https://pbiecek.github.io/ema/partialDependenceProfiles.html
    https://pbiecek.github.io/ema/accumulatedLocalProfiles.html
    """

    def __init__(self,
                 type='partial',
                 variables=None,
                 variable_type='numerical',
                 groups=None,
                 span=0.25,
                 center=True,
                 random_state=None):

        check_variable_type(variable_type)
        variables_ = check_variables(variables)
        groups_ = check_groups(groups)

        self.variable_type = variable_type
        self.groups = groups_
        self.type = type
        self.variables = variables_
        self.span = span
        self.center = center
        self.result = None
        self.mean_prediction = None
        self.raw_profiles = None
        self.random_state = random_state

    def _repr_html_(self):
        return self.result._repr_html_()

    def fit(self,
            ceteris_paribus,
            verbose=True):
        """Calculate the result of explanation

        Fit method makes calculations in place and changes the attributes.

        Parameters
        -----------
        ceteris_paribus : CeterisParibus object or array_like of CeterisParibus objects
            Profile objects to aggregate.
        verbose : bool, optional
            Print tqdm progress bar (default is True).

        Returns
        -----------
        None
        """
        # are there any other cp?
        from dalex.instance_level import CeterisParibus
        if isinstance(ceteris_paribus, CeterisParibus):  # allow for ceteris_paribus to be a single element
            all_profiles = ceteris_paribus.result.copy()
            all_observations = ceteris_paribus.new_observation.copy()
            self.raw_profiles = deepcopy(ceteris_paribus)
        elif isinstance(ceteris_paribus, (list, tuple)):  # ceteris_paribus as tuple or array
            all_profiles = None
            all_observations = None
            for cp in ceteris_paribus:
                _global_checks.global_check_class(cp, CeterisParibus)
                all_profiles = pd.concat([all_profiles, cp.result.copy()])
                all_observations = pd.concat([all_observations, cp.new_observation.copy()])
        else:
            _global_checks.global_raise_objects_class(ceteris_paribus, CeterisParibus)

        all_profiles, vnames = prepare_numerical_categorical(all_profiles, self.variables, self.variable_type)

        # select only suitable variables
        all_profiles = all_profiles.loc[all_profiles['_vname_'].isin(vnames), :]

        all_profiles = create_x(all_profiles, self.variable_type)

        self.mean_prediction = all_observations['_yhat_'].mean()

        self.result = aggregate_profiles(all_profiles, self.mean_prediction, self.type, self.groups, self.center,
                                         self.span, verbose)

    def plot(self,
             objects=None,
             geom='aggregates',
             variables=None,
             center=True,
             size=2,
             alpha=1,
             facet_ncol=2,
             title="Aggregated Profiles",
             y_title='prediction',
             horizontal_spacing=0.05,
             vertical_spacing=None,
             show=True):
        """Plot the Aggregated Profiles explanation

        Parameters
        -----------
        objects : AggregatedProfiles object or array_like of AggregatedProfiles objects
            Additional objects to plot in subplots (default is None).
        geom : {'aggregates', 'profiles'}
            If 'profiles' then raw profiles will be plotted in the background
            (default is 'aggregates', which means plot only aggregated profiles).
            NOTE: It is useful to use small values of the `N` parameter in object creation
            before using `profiles`, because of plot performance and clarity (e.g. 100).
        variables : str or array_like of str, optional
            Variables for which the profiles will be calculated
            (default is None, which means all of the variables).
        center : bool, optional
            Theoretically Accumulated Profiles start at 0, but are centered to compare
            them with Partial Dependence Profiles (default is True, which means center
            around the average y_hat calculated on the data sample).
        size : float, optional
            Width of lines in px (default is 2).
        alpha : float <0, 1>, optional
            Opacity of lines (default is 1).
        color : str, optional
            Variable name used for grouping (default is '_label_', which groups by models).
        facet_ncol : int, optional
            Number of columns on the plot grid (default is 2).
        title : str, optional
            Title of the plot (default is "Aggregated Profiles").
        y_title : str, optional
            Title of the x axis (default is "prediction").
        horizontal_spacing : float <0, 1>, optional
            Ratio of horizontal space between the plots (default is 0.05).
        vertical_spacing : float <0, 1>, optional
            Ratio of vertical space between the plots (default is 0.3/number of rows).
        show : bool, optional
            True shows the plot; False returns the plotly Figure object that can be
            edited or saved using the `write_image()` method (default is True).

        Returns
        -----------
        None or plotly.graph_objects.Figure
            Return figure that can be edited or saved. See `show` parameter.
        """
        # TODO: numerical+categorical in one plot https://github.com/plotly/plotly.py/issues/2647

        if geom not in ("aggregates", "profiles"):
            raise TypeError("geom should be 'aggregates' or 'profiles'")
        if isinstance(variables, str):
            variables = (variables,)

        # are there any other objects to plot?
        if objects is None:
            _result_df = self.result.assign(_mp_=self.mean_prediction if center else 0)
        elif isinstance(objects, self.__class__):  # allow for objects to be a single element
            _result_df = pd.concat([self.result.assign(_mp_=self.mean_prediction if center else 0),
                                    objects.result.assign(_mp_=objects.mean_prediction if center else 0)])
        elif isinstance(objects, (list, tuple)):  # objects as tuple or array
            _result_df = self.result.assign(_mp_=self.mean_prediction if center else 0)
            for ob in objects:
                _global_checks.global_check_object_class(ob, self.__class__)
                _result_df = pd.concat([_result_df, ob.result.assign(_mp_=ob.mean_prediction if center else 0)])
        else:
            _global_checks.global_raise_objects_class(objects, self.__class__)

        # variables to use
        all_variables = _result_df['_vname_'].dropna().unique().tolist()

        if variables is not None:
            all_variables = _global_utils.intersect_unsorted(variables, all_variables)
            if len(all_variables) == 0:
                raise TypeError("variables do not overlap with " + ''.join(variables))

            _result_df = _result_df.loc[_result_df['_vname_'].isin(all_variables), :]

        #  calculate y axis range to allow for fixedrange True
        dl = _result_df['_yhat_'].to_numpy()
        min_max_margin = dl.ptp() * 0.10
        min_max = [dl.min() - min_max_margin, dl.max() + min_max_margin]

        is_x_numeric = pd.api.types.is_numeric_dtype(_result_df['_x_'])
        n = len(all_variables)

        facet_nrow = int(np.ceil(n / facet_ncol))
        if vertical_spacing is None:
            vertical_spacing = 0.3 / facet_nrow
        plot_height = 78 + 71 + facet_nrow * (280 + 60)
        hovermode, render_mode = 'x unified', 'svg'

        color = '_label_'  # _groups_ doesnt make much sense for multiple AP objects
        m = len(_result_df[color].dropna().unique())

        if is_x_numeric:
            if geom == 'profiles' and self.raw_profiles is not None:
                render_mode = 'webgl'

            fig = px.line(_result_df,
                          x="_x_", y="_yhat_", color=color, facet_col="_vname_",
                          category_orders={"_vname_": list(all_variables)},
                          labels={'_yhat_': 'prediction', '_mp_': 'mean_prediction'},  # , color: 'group'},
                          hover_name=color,
                          hover_data={'_yhat_': ':.3f', '_mp_': ':.3f',
                                      color: False, '_vname_': False, '_x_': False},
                          facet_col_wrap=facet_ncol,
                          facet_row_spacing=vertical_spacing,
                          facet_col_spacing=horizontal_spacing,
                          template="none",
                          render_mode=render_mode,
                          color_discrete_sequence=_theme.get_default_colors(m, 'line')) \
                    .update_traces(dict(line_width=size, opacity=alpha)) \
                    .update_xaxes({'matches': None, 'showticklabels': True,
                                   'type': 'linear', 'gridwidth': 2, 'zeroline': False, 'automargin': True,
                                   'ticks': "outside", 'tickcolor': 'white', 'ticklen': 3, 'fixedrange': True}) \
                    .update_yaxes({'type': 'linear', 'gridwidth': 2, 'zeroline': False, 'automargin': True,
                                   'ticks': 'outside', 'tickcolor': 'white', 'ticklen': 3, 'fixedrange': True,
                                   'range': min_max})

            if geom == 'profiles' and self.raw_profiles is not None:
                fig.update_traces(dict(line_width=2*size, opacity=1))
                fig_cp = self.raw_profiles.plot(variables=list(all_variables),
                                                facet_ncol=facet_ncol,
                                                show_observations=False, show=False) \
                    .update_traces(dict(line_width=1, opacity=0.5, line_color='#ceced9'))

                for _, value in enumerate(fig.data):
                    fig_cp.add_trace(value)
                hovermode = False
                fig = fig_cp
        else:
            _result_df = _result_df.assign(_diff_=lambda x: x['_yhat_'] - x['_mp_'])
            mp_format = ':.3f'
            if not center:
                min_max = [np.min([min_max[0], 0]), np.max([min_max[1], 0])]
                mp_format = False

            fig = px.bar(_result_df,
                         x="_x_", y="_diff_", color="_label_", facet_col="_vname_",
                         category_orders={"_vname_": list(all_variables)},
                         labels={'_yhat_': 'prediction', '_mp_': 'mean_prediction'},  # , color: 'group'},
                         hover_name=color,
                         base="_mp_",
                         hover_data={'_yhat_': ':.3f', '_mp_': mp_format, '_diff_': False,
                                     color: False, '_vname_': False, '_x_': False},
                         facet_col_wrap=facet_ncol,
                         facet_row_spacing=vertical_spacing,
                         facet_col_spacing=horizontal_spacing,
                         template="none",
                         color_discrete_sequence=_theme.get_default_colors(m, 'line'),  # bar was forgotten
                         barmode='group')  \
                    .update_xaxes({'matches': None, 'showticklabels': True,
                                   'type': 'category', 'gridwidth': 2, 'automargin': True,  # autorange="reversed"
                                   'ticks': "outside", 'tickcolor': 'white', 'ticklen': 10, 'fixedrange': True}) \
                    .update_yaxes({'type': 'linear', 'gridwidth': 2, 'zeroline': False, 'automargin': True,
                                   'ticks': 'outside', 'tickcolor': 'white', 'ticklen': 3, 'fixedrange': True,
                                   'range': min_max})

            # add hline https://github.com/plotly/plotly.py/issues/2141
            for i, bar in enumerate(fig.data):
                fig.add_shape(type='line', y0=bar.base[0], y1=bar.base[0], x0=-1, x1=len(bar.x),
                              xref=bar.xaxis, yref=bar.yaxis, layer='below',
                              line={'color': "#371ea3", 'width': 1.5, 'dash': 'dot'})

        fig = _theme.fig_update_line_plot(fig, title, y_title, plot_height, hovermode)

        if show:
            fig.show(config=_theme.get_default_config())
        else:
            return fig

Methods

def fit(self, ceteris_paribus, verbose=True)

Calculate the result of explanation

Fit method makes calculations in place and changes the attributes.

Parameters

ceteris_paribus : CeterisParibus object or array_like of CeterisParibus objects
Profile objects to aggregate.
verbose : bool, optional
Print tqdm progress bar (default is True).

Returns

None
 
Expand source code
def fit(self,
        ceteris_paribus,
        verbose=True):
    """Calculate the result of explanation

    Fit method makes calculations in place and changes the attributes.

    Parameters
    -----------
    ceteris_paribus : CeterisParibus object or array_like of CeterisParibus objects
        Profile objects to aggregate.
    verbose : bool, optional
        Print tqdm progress bar (default is True).

    Returns
    -----------
    None
    """
    # are there any other cp?
    from dalex.instance_level import CeterisParibus
    if isinstance(ceteris_paribus, CeterisParibus):  # allow for ceteris_paribus to be a single element
        all_profiles = ceteris_paribus.result.copy()
        all_observations = ceteris_paribus.new_observation.copy()
        self.raw_profiles = deepcopy(ceteris_paribus)
    elif isinstance(ceteris_paribus, (list, tuple)):  # ceteris_paribus as tuple or array
        all_profiles = None
        all_observations = None
        for cp in ceteris_paribus:
            _global_checks.global_check_class(cp, CeterisParibus)
            all_profiles = pd.concat([all_profiles, cp.result.copy()])
            all_observations = pd.concat([all_observations, cp.new_observation.copy()])
    else:
        _global_checks.global_raise_objects_class(ceteris_paribus, CeterisParibus)

    all_profiles, vnames = prepare_numerical_categorical(all_profiles, self.variables, self.variable_type)

    # select only suitable variables
    all_profiles = all_profiles.loc[all_profiles['_vname_'].isin(vnames), :]

    all_profiles = create_x(all_profiles, self.variable_type)

    self.mean_prediction = all_observations['_yhat_'].mean()

    self.result = aggregate_profiles(all_profiles, self.mean_prediction, self.type, self.groups, self.center,
                                     self.span, verbose)
def plot(self, objects=None, geom='aggregates', variables=None, center=True, size=2, alpha=1, facet_ncol=2, title='Aggregated Profiles', y_title='prediction', horizontal_spacing=0.05, vertical_spacing=None, show=True)

Plot the Aggregated Profiles explanation

Parameters

objects : AggregatedProfiles object or array_like of AggregatedProfiles objects
Additional objects to plot in subplots (default is None).
geom : {'aggregates', 'profiles'}
If 'profiles' then raw profiles will be plotted in the background (default is 'aggregates', which means plot only aggregated profiles). NOTE: It is useful to use small values of the N parameter in object creation before using profiles, because of plot performance and clarity (e.g. 100).
variables : str or array_like of str, optional
Variables for which the profiles will be calculated (default is None, which means all of the variables).
center : bool, optional
Theoretically Accumulated Profiles start at 0, but are centered to compare them with Partial Dependence Profiles (default is True, which means center around the average y_hat calculated on the data sample).
size : float, optional
Width of lines in px (default is 2).
alpha : float <0, 1>, optional
Opacity of lines (default is 1).
color : str, optional
Variable name used for grouping (default is 'label', which groups by models).
facet_ncol : int, optional
Number of columns on the plot grid (default is 2).
title : str, optional
Title of the plot (default is "Aggregated Profiles").
y_title : str, optional
Title of the x axis (default is "prediction").
horizontal_spacing : float <0, 1>, optional
Ratio of horizontal space between the plots (default is 0.05).
vertical_spacing : float <0, 1>, optional
Ratio of vertical space between the plots (default is 0.3/number of rows).
show : bool, optional
True shows the plot; False returns the plotly Figure object that can be edited or saved using the write_image() method (default is True).

Returns

None or plotly.graph_objects.Figure
Return figure that can be edited or saved. See show parameter.
Expand source code
def plot(self,
         objects=None,
         geom='aggregates',
         variables=None,
         center=True,
         size=2,
         alpha=1,
         facet_ncol=2,
         title="Aggregated Profiles",
         y_title='prediction',
         horizontal_spacing=0.05,
         vertical_spacing=None,
         show=True):
    """Plot the Aggregated Profiles explanation

    Parameters
    -----------
    objects : AggregatedProfiles object or array_like of AggregatedProfiles objects
        Additional objects to plot in subplots (default is None).
    geom : {'aggregates', 'profiles'}
        If 'profiles' then raw profiles will be plotted in the background
        (default is 'aggregates', which means plot only aggregated profiles).
        NOTE: It is useful to use small values of the `N` parameter in object creation
        before using `profiles`, because of plot performance and clarity (e.g. 100).
    variables : str or array_like of str, optional
        Variables for which the profiles will be calculated
        (default is None, which means all of the variables).
    center : bool, optional
        Theoretically Accumulated Profiles start at 0, but are centered to compare
        them with Partial Dependence Profiles (default is True, which means center
        around the average y_hat calculated on the data sample).
    size : float, optional
        Width of lines in px (default is 2).
    alpha : float <0, 1>, optional
        Opacity of lines (default is 1).
    color : str, optional
        Variable name used for grouping (default is '_label_', which groups by models).
    facet_ncol : int, optional
        Number of columns on the plot grid (default is 2).
    title : str, optional
        Title of the plot (default is "Aggregated Profiles").
    y_title : str, optional
        Title of the x axis (default is "prediction").
    horizontal_spacing : float <0, 1>, optional
        Ratio of horizontal space between the plots (default is 0.05).
    vertical_spacing : float <0, 1>, optional
        Ratio of vertical space between the plots (default is 0.3/number of rows).
    show : bool, optional
        True shows the plot; False returns the plotly Figure object that can be
        edited or saved using the `write_image()` method (default is True).

    Returns
    -----------
    None or plotly.graph_objects.Figure
        Return figure that can be edited or saved. See `show` parameter.
    """
    # TODO: numerical+categorical in one plot https://github.com/plotly/plotly.py/issues/2647

    if geom not in ("aggregates", "profiles"):
        raise TypeError("geom should be 'aggregates' or 'profiles'")
    if isinstance(variables, str):
        variables = (variables,)

    # are there any other objects to plot?
    if objects is None:
        _result_df = self.result.assign(_mp_=self.mean_prediction if center else 0)
    elif isinstance(objects, self.__class__):  # allow for objects to be a single element
        _result_df = pd.concat([self.result.assign(_mp_=self.mean_prediction if center else 0),
                                objects.result.assign(_mp_=objects.mean_prediction if center else 0)])
    elif isinstance(objects, (list, tuple)):  # objects as tuple or array
        _result_df = self.result.assign(_mp_=self.mean_prediction if center else 0)
        for ob in objects:
            _global_checks.global_check_object_class(ob, self.__class__)
            _result_df = pd.concat([_result_df, ob.result.assign(_mp_=ob.mean_prediction if center else 0)])
    else:
        _global_checks.global_raise_objects_class(objects, self.__class__)

    # variables to use
    all_variables = _result_df['_vname_'].dropna().unique().tolist()

    if variables is not None:
        all_variables = _global_utils.intersect_unsorted(variables, all_variables)
        if len(all_variables) == 0:
            raise TypeError("variables do not overlap with " + ''.join(variables))

        _result_df = _result_df.loc[_result_df['_vname_'].isin(all_variables), :]

    #  calculate y axis range to allow for fixedrange True
    dl = _result_df['_yhat_'].to_numpy()
    min_max_margin = dl.ptp() * 0.10
    min_max = [dl.min() - min_max_margin, dl.max() + min_max_margin]

    is_x_numeric = pd.api.types.is_numeric_dtype(_result_df['_x_'])
    n = len(all_variables)

    facet_nrow = int(np.ceil(n / facet_ncol))
    if vertical_spacing is None:
        vertical_spacing = 0.3 / facet_nrow
    plot_height = 78 + 71 + facet_nrow * (280 + 60)
    hovermode, render_mode = 'x unified', 'svg'

    color = '_label_'  # _groups_ doesnt make much sense for multiple AP objects
    m = len(_result_df[color].dropna().unique())

    if is_x_numeric:
        if geom == 'profiles' and self.raw_profiles is not None:
            render_mode = 'webgl'

        fig = px.line(_result_df,
                      x="_x_", y="_yhat_", color=color, facet_col="_vname_",
                      category_orders={"_vname_": list(all_variables)},
                      labels={'_yhat_': 'prediction', '_mp_': 'mean_prediction'},  # , color: 'group'},
                      hover_name=color,
                      hover_data={'_yhat_': ':.3f', '_mp_': ':.3f',
                                  color: False, '_vname_': False, '_x_': False},
                      facet_col_wrap=facet_ncol,
                      facet_row_spacing=vertical_spacing,
                      facet_col_spacing=horizontal_spacing,
                      template="none",
                      render_mode=render_mode,
                      color_discrete_sequence=_theme.get_default_colors(m, 'line')) \
                .update_traces(dict(line_width=size, opacity=alpha)) \
                .update_xaxes({'matches': None, 'showticklabels': True,
                               'type': 'linear', 'gridwidth': 2, 'zeroline': False, 'automargin': True,
                               'ticks': "outside", 'tickcolor': 'white', 'ticklen': 3, 'fixedrange': True}) \
                .update_yaxes({'type': 'linear', 'gridwidth': 2, 'zeroline': False, 'automargin': True,
                               'ticks': 'outside', 'tickcolor': 'white', 'ticklen': 3, 'fixedrange': True,
                               'range': min_max})

        if geom == 'profiles' and self.raw_profiles is not None:
            fig.update_traces(dict(line_width=2*size, opacity=1))
            fig_cp = self.raw_profiles.plot(variables=list(all_variables),
                                            facet_ncol=facet_ncol,
                                            show_observations=False, show=False) \
                .update_traces(dict(line_width=1, opacity=0.5, line_color='#ceced9'))

            for _, value in enumerate(fig.data):
                fig_cp.add_trace(value)
            hovermode = False
            fig = fig_cp
    else:
        _result_df = _result_df.assign(_diff_=lambda x: x['_yhat_'] - x['_mp_'])
        mp_format = ':.3f'
        if not center:
            min_max = [np.min([min_max[0], 0]), np.max([min_max[1], 0])]
            mp_format = False

        fig = px.bar(_result_df,
                     x="_x_", y="_diff_", color="_label_", facet_col="_vname_",
                     category_orders={"_vname_": list(all_variables)},
                     labels={'_yhat_': 'prediction', '_mp_': 'mean_prediction'},  # , color: 'group'},
                     hover_name=color,
                     base="_mp_",
                     hover_data={'_yhat_': ':.3f', '_mp_': mp_format, '_diff_': False,
                                 color: False, '_vname_': False, '_x_': False},
                     facet_col_wrap=facet_ncol,
                     facet_row_spacing=vertical_spacing,
                     facet_col_spacing=horizontal_spacing,
                     template="none",
                     color_discrete_sequence=_theme.get_default_colors(m, 'line'),  # bar was forgotten
                     barmode='group')  \
                .update_xaxes({'matches': None, 'showticklabels': True,
                               'type': 'category', 'gridwidth': 2, 'automargin': True,  # autorange="reversed"
                               'ticks': "outside", 'tickcolor': 'white', 'ticklen': 10, 'fixedrange': True}) \
                .update_yaxes({'type': 'linear', 'gridwidth': 2, 'zeroline': False, 'automargin': True,
                               'ticks': 'outside', 'tickcolor': 'white', 'ticklen': 3, 'fixedrange': True,
                               'range': min_max})

        # add hline https://github.com/plotly/plotly.py/issues/2141
        for i, bar in enumerate(fig.data):
            fig.add_shape(type='line', y0=bar.base[0], y1=bar.base[0], x0=-1, x1=len(bar.x),
                          xref=bar.xaxis, yref=bar.yaxis, layer='below',
                          line={'color': "#371ea3", 'width': 1.5, 'dash': 'dot'})

    fig = _theme.fig_update_line_plot(fig, title, y_title, plot_height, hovermode)

    if show:
        fig.show(config=_theme.get_default_config())
    else:
        return fig
class ModelPerformance (model_type, cutoff=0.5)

Calculate dataset level model performance measures

Parameters

model_type : {'regression', 'classification'}
Model task type that is used to choose the proper performance measures.
cutoff : float, optional
Cutoff for predictions in classification models. Needed for measures like recall, precision, acc, f1 (default is 0.5).

Attributes

result : pd.DataFrame
Main result attribute of an explanation.
residuals : pd.DataFrame
Residuals for data.
model_type : {'regression', 'classification'}
Model task type that is used to choose the proper performance measures.
cutoff : float
Cutoff for predictions in classification models.

Notes

https://pbiecek.github.io/ema/modelPerformance.html

Expand source code
class ModelPerformance:
    """Calculate dataset level model performance measures

    Parameters
    -----------
    model_type : {'regression', 'classification'}
        Model task type that is used to choose the proper performance measures.
    cutoff : float, optional
        Cutoff for predictions in classification models. Needed for measures like
        recall, precision, acc, f1 (default is 0.5).

    Attributes
    -----------
    result : pd.DataFrame
        Main result attribute of an explanation.
    residuals : pd.DataFrame
        Residuals for `data`.
    model_type : {'regression', 'classification'}
        Model task type that is used to choose the proper performance measures.
    cutoff : float
        Cutoff for predictions in classification models.

    Notes
    --------
    https://pbiecek.github.io/ema/modelPerformance.html
    """
    def __init__(self,
                 model_type,
                 cutoff=0.5):

        self.cutoff = cutoff
        self.model_type = model_type
        self.result = None
        self.residuals = None

    def _repr_html_(self):
        return self.result._repr_html_()

    def fit(self, explainer):
        """Calculate the result of explanation

        Fit method makes calculations in place and changes the attributes.

        Parameters
        -----------
        explainer : Explainer object
            Model wrapper created using the Explainer class.

        Returns
        -----------
        None
        """

        if explainer.y_hat is not None:
            y_pred = explainer.y_hat
        else:
            y_pred = explainer.predict(explainer.data)

        if explainer.residuals is not None:
            _residuals = explainer.residuals
        else:
            _residuals = explainer.residual(explainer.data, explainer.y)

        y_true = explainer.y

        if self.model_type == 'regression':
            mse_ = mse(y_pred, y_true)
            rmse_ = rmse(y_pred, y_true)
            r2_ = r2(y_pred, y_true)
            mae_ = mae(y_pred, y_true)
            mad_ = mad(y_pred, y_true)

            self.result = pd.DataFrame(
                {
                    'mse': [mse_],
                    'rmse': [rmse_],
                    'r2': [r2_],
                    'mae': [mae_],
                    'mad': [mad_]
                }, index=[explainer.label])
        elif self.model_type == 'classification':
            tp = ((y_true == 1) * (y_pred >= self.cutoff)).sum()
            fp = ((y_true == 0) * (y_pred >= self.cutoff)).sum()
            tn = ((y_true == 0) * (y_pred < self.cutoff)).sum()
            fn = ((y_true == 1) * (y_pred < self.cutoff)).sum()

            recall_ = recall(tp, fp, tn, fn)
            precision_ = precision(tp, fp, tn, fn)
            f1_ = f1(tp, fp, tn, fn)
            accuracy_ = accuracy(tp, fp, tn, fn)
            auc_ = auc(y_pred, y_true)

            self.result = pd.DataFrame({
                'recall': [recall_],
                'precision': [precision_],
                'f1': [f1_],
                'accuracy': [accuracy_],
                'auc': [auc_]
            }, index=[explainer.label])
        else:
            raise ValueError("'model_type' must be 'regression' or 'classification'")

        _residuals = pd.DataFrame({
            'y_hat': y_pred,
            'y': y_true,
            'residuals': _residuals,
            'label': explainer.label
        })

        self.residuals = _residuals

    def plot(self,
             objects=None,
             title="Reverse cumulative distribution of |residual|",
             show=False):
        """Plot the Model Performance explanation

        Parameters
        -----------
        objects : ModelPerformance object or array_like of ModelPerformance objects
            Additional objects to plot (default is None).
        title : str, optional
            Title of the plot (default depends on the `type` attribute).
        show : bool, optional
            True shows the plot; False returns the plotly Figure object that can be
            edited or saved using the `write_image()` method (default is True).

        Returns
        -----------
        None or plotly.graph_objects.Figure
            Return figure that can be edited or saved. See `show` parameter.
        """

        # are there any other objects to plot?
        if objects is None:
            _df_list = [self.residuals.copy()]
        elif isinstance(objects, self.__class__):  # allow for objects to be a single element
            _df_list = [self.residuals.copy(), objects.residuals.copy()]
        elif isinstance(objects, (list, tuple)):  # objects as tuple or array
            _df_list = [self.residuals.copy()]
            for ob in objects:
                _global_checks.global_check_object_class(ob, self.__class__)
                _df_list += [ob.residuals.copy()]
        else:
            _global_checks.global_raise_objects_class(objects, self.__class__)

        colors = _theme.get_default_colors(len(_df_list), 'line')
        fig = go.Figure()

        for i, _df in enumerate(_df_list):
            _abs_residuals = np.abs(_df['residuals'])
            _unique_abs_residuals = np.unique(_abs_residuals)

            fig.add_scatter(
                x=_unique_abs_residuals,
                y=1 - ecdf(_abs_residuals)(_unique_abs_residuals),
                line_shape='hv',
                name=_df.iloc[0, _df.columns.get_loc('label')],
                marker=dict(color=colors[i])
            )

        fig.update_yaxes({'type': 'linear', 'gridwidth': 2, 'zeroline': False, 'automargin': True, 'ticks': 'outside',
                          'tickcolor': 'white', 'ticklen': 10, 'fixedrange': True, 'tickformat': ',.0%'})

        fig.update_xaxes({'type': 'linear', 'gridwidth': 2, 'zeroline': False, 'automargin': True, 'ticks': "outside",
                          'tickcolor': 'white', 'ticklen': 10, 'fixedrange': True, 'title_text': '|residual|'})

        fig.update_layout(title_text=title, title_x=0.15, font={'color': "#371ea3"}, template="none",
                          margin={'t': 78, 'b': 71, 'r': 30})

        if show:
            fig.show(config=_theme.get_default_config())
        else:
            return fig

Methods

def fit(self, explainer)

Calculate the result of explanation

Fit method makes calculations in place and changes the attributes.

Parameters

explainer : Explainer object
Model wrapper created using the Explainer class.

Returns

None
 
Expand source code
def fit(self, explainer):
    """Calculate the result of explanation

    Fit method makes calculations in place and changes the attributes.

    Parameters
    -----------
    explainer : Explainer object
        Model wrapper created using the Explainer class.

    Returns
    -----------
    None
    """

    if explainer.y_hat is not None:
        y_pred = explainer.y_hat
    else:
        y_pred = explainer.predict(explainer.data)

    if explainer.residuals is not None:
        _residuals = explainer.residuals
    else:
        _residuals = explainer.residual(explainer.data, explainer.y)

    y_true = explainer.y

    if self.model_type == 'regression':
        mse_ = mse(y_pred, y_true)
        rmse_ = rmse(y_pred, y_true)
        r2_ = r2(y_pred, y_true)
        mae_ = mae(y_pred, y_true)
        mad_ = mad(y_pred, y_true)

        self.result = pd.DataFrame(
            {
                'mse': [mse_],
                'rmse': [rmse_],
                'r2': [r2_],
                'mae': [mae_],
                'mad': [mad_]
            }, index=[explainer.label])
    elif self.model_type == 'classification':
        tp = ((y_true == 1) * (y_pred >= self.cutoff)).sum()
        fp = ((y_true == 0) * (y_pred >= self.cutoff)).sum()
        tn = ((y_true == 0) * (y_pred < self.cutoff)).sum()
        fn = ((y_true == 1) * (y_pred < self.cutoff)).sum()

        recall_ = recall(tp, fp, tn, fn)
        precision_ = precision(tp, fp, tn, fn)
        f1_ = f1(tp, fp, tn, fn)
        accuracy_ = accuracy(tp, fp, tn, fn)
        auc_ = auc(y_pred, y_true)

        self.result = pd.DataFrame({
            'recall': [recall_],
            'precision': [precision_],
            'f1': [f1_],
            'accuracy': [accuracy_],
            'auc': [auc_]
        }, index=[explainer.label])
    else:
        raise ValueError("'model_type' must be 'regression' or 'classification'")

    _residuals = pd.DataFrame({
        'y_hat': y_pred,
        'y': y_true,
        'residuals': _residuals,
        'label': explainer.label
    })

    self.residuals = _residuals
def plot(self, objects=None, title='Reverse cumulative distribution of |residual|', show=False)

Plot the Model Performance explanation

Parameters

objects : ModelPerformance object or array_like of ModelPerformance objects
Additional objects to plot (default is None).
title : str, optional
Title of the plot (default depends on the type attribute).
show : bool, optional
True shows the plot; False returns the plotly Figure object that can be edited or saved using the write_image() method (default is True).

Returns

None or plotly.graph_objects.Figure
Return figure that can be edited or saved. See show parameter.
Expand source code
def plot(self,
         objects=None,
         title="Reverse cumulative distribution of |residual|",
         show=False):
    """Plot the Model Performance explanation

    Parameters
    -----------
    objects : ModelPerformance object or array_like of ModelPerformance objects
        Additional objects to plot (default is None).
    title : str, optional
        Title of the plot (default depends on the `type` attribute).
    show : bool, optional
        True shows the plot; False returns the plotly Figure object that can be
        edited or saved using the `write_image()` method (default is True).

    Returns
    -----------
    None or plotly.graph_objects.Figure
        Return figure that can be edited or saved. See `show` parameter.
    """

    # are there any other objects to plot?
    if objects is None:
        _df_list = [self.residuals.copy()]
    elif isinstance(objects, self.__class__):  # allow for objects to be a single element
        _df_list = [self.residuals.copy(), objects.residuals.copy()]
    elif isinstance(objects, (list, tuple)):  # objects as tuple or array
        _df_list = [self.residuals.copy()]
        for ob in objects:
            _global_checks.global_check_object_class(ob, self.__class__)
            _df_list += [ob.residuals.copy()]
    else:
        _global_checks.global_raise_objects_class(objects, self.__class__)

    colors = _theme.get_default_colors(len(_df_list), 'line')
    fig = go.Figure()

    for i, _df in enumerate(_df_list):
        _abs_residuals = np.abs(_df['residuals'])
        _unique_abs_residuals = np.unique(_abs_residuals)

        fig.add_scatter(
            x=_unique_abs_residuals,
            y=1 - ecdf(_abs_residuals)(_unique_abs_residuals),
            line_shape='hv',
            name=_df.iloc[0, _df.columns.get_loc('label')],
            marker=dict(color=colors[i])
        )

    fig.update_yaxes({'type': 'linear', 'gridwidth': 2, 'zeroline': False, 'automargin': True, 'ticks': 'outside',
                      'tickcolor': 'white', 'ticklen': 10, 'fixedrange': True, 'tickformat': ',.0%'})

    fig.update_xaxes({'type': 'linear', 'gridwidth': 2, 'zeroline': False, 'automargin': True, 'ticks': "outside",
                      'tickcolor': 'white', 'ticklen': 10, 'fixedrange': True, 'title_text': '|residual|'})

    fig.update_layout(title_text=title, title_x=0.15, font={'color': "#371ea3"}, template="none",
                      margin={'t': 78, 'b': 71, 'r': 30})

    if show:
        fig.show(config=_theme.get_default_config())
    else:
        return fig
class ResidualDiagnostics (variables=None)

Calculate dataset level residuals diagnostics

Parameters

variables : str or array_like of str, optional
Variables for which the profiles will be calculated (default is None, which means all of the variables).

Attributes

result : pd.DataFrame
Main result attribute of an explanation.
variables : array_like of str or None
Variables for which the profiles will be calculated

Notes

https://pbiecek.github.io/ema/residualDiagnostic.html

Expand source code
class ResidualDiagnostics:
    """Calculate dataset level residuals diagnostics

    Parameters
    -----------
    variables : str or array_like of str, optional
        Variables for which the profiles will be calculated
        (default is None, which means all of the variables).

    Attributes
    -----------
    result : pd.DataFrame
        Main result attribute of an explanation.
    variables : array_like of str or None
        Variables for which the profiles will be calculated

    Notes
    --------
    https://pbiecek.github.io/ema/residualDiagnostic.html
    """
    def __init__(self,
                 variables=None):

        variables_ = check_variables(variables)

        self.result = None
        self.variables = variables_

    def _repr_html_(self):
        return self.result._repr_html_()

    def fit(self, explainer):
        """Calculate the result of explanation

        Fit method makes calculations in place and changes the attributes.

        Parameters
        -----------
        explainer : Explainer object
            Model wrapper created using the Explainer class.

        Returns
        -----------
        None
        """
        result = explainer.data.copy()

        # if variables = NULL then all variables are added
        # otherwise only selected
        if self.variables is not None:
            result = result.loc[:, _global_utils.intersect_unsorted(self.variables, result.columns)]
        # is there target
        if explainer.y is not None:
            result = result.assign(y=explainer.y)
        # are there predictions - add y_hat to the Explainer for the future
        if explainer.y_hat is None:
            explainer.y_hat = explainer.predict(explainer.data)
        # are there residuals - add residuals to the Explainer for the future
        if explainer.residuals is None:
            explainer.residuals = explainer.residual(explainer.data, explainer.y)

        self.result = result.assign(
            y_hat=explainer.y_hat,
            residuals=explainer.residuals,
            abs_residuals=np.abs(explainer.residuals),
            label=explainer.label,
            ids=np.arange(result.shape[0])+1
        )

    def plot(self,
             objects=None,
             variable="y_hat",
             yvariable="residuals",
             smooth=True,
             line_width=2,
             marker_size=3,
             title="Residual Diagnostics",
             N=50000,
             show=True):
        """Plot the Residual Diagnostics explanation

        Parameters
        ----------
        objects : ResidualDiagnostics object or array_like of ResidualDiagnostics objects
            Additional objects to plot (default is None).
        variable : str, optional
            Name of the variable from the `result` attribute to appear on the OX axis
            (default is 'y_hat').
        yvariable : str, optional
            Name of the variable from the `result` attribute to appear on the OY axis
            (default is 'residuals').
        smooth : bool, optional
            Add the smooth line (default is True).
        line_width : float, optional
            Width of lines in px (default is 2).
        marker_size : float, optional
            Size of points (default is 3).
        title : str, optional
            Title of the plot (default depends on the `type` attribute).
        N : int, optional
            Number of observations that will be sampled from the `result` attribute before
            calculating the smooth line. This is for performance issues with large data.
            None means all `result` (default is 50 000).
        show : bool, optional
            True shows the plot; False returns the plotly Figure object that can be
            edited or saved using the `write_image()` method (default is True).

        Returns
        -----------
        None or plotly.graph_objects.Figure
            Return figure that can be edited or saved. See `show` parameter.
        """

        _global_checks.global_check_import('statsmodels', 'smoothing line')

        # are there any other objects to plot?
        if objects is None:
            _df_list = [self.result.copy()]
        elif isinstance(objects, self.__class__):  # allow for objects to be a single element
            _df_list = [self.result.copy(), objects.result.copy()]
        elif isinstance(objects, (list, tuple)):  # objects as tuple or array
            _df_list = [self.result.copy()]
            for ob in objects:
                _global_checks.global_check_object_class(ob, self.__class__)
                _df_list += [ob.result.copy()]
        else:
            _global_checks.global_raise_objects_class(objects, self.__class__)

        _df = pd.concat(_df_list)

        if N and smooth:
            if N < _df.shape[0]:
                _df = _df.sample(N, random_state=0, replace=False)

        fig = px.scatter(_df,
                         x=variable,
                         y=yvariable,
                         hover_name='ids',
                         color="label",
                         trendline="lowess" if smooth else None,
                         color_discrete_sequence=_theme.get_default_colors(len(_df_list), 'line')) \
                .update_traces(dict(marker_size=marker_size, line_width=line_width))

        # wait for https://github.com/plotly/plotly.py/pull/2558 to add hline to the plot

        fig.update_yaxes({'type': 'linear', 'gridwidth': 2, 'zeroline': False, 'automargin': True, 'ticks': 'outside',
                          'tickcolor': 'white', 'ticklen': 10, 'fixedrange': True, 'title_text': yvariable})

        fig.update_xaxes({'type': 'linear', 'gridwidth': 2, 'zeroline': False, 'automargin': True, 'ticks': "outside",
                          'tickcolor': 'white', 'ticklen': 10, 'fixedrange': True, 'title_text': variable})

        fig.update_layout(title_text=title, title_x=0.15, font={'color': "#371ea3"}, template="none",
                          margin={'t': 78, 'b': 71, 'r': 30})

        if show:
            fig.show(config=_theme.get_default_config())
        else:
            return fig

Methods

def fit(self, explainer)

Calculate the result of explanation

Fit method makes calculations in place and changes the attributes.

Parameters

explainer : Explainer object
Model wrapper created using the Explainer class.

Returns

None
 
Expand source code
def fit(self, explainer):
    """Calculate the result of explanation

    Fit method makes calculations in place and changes the attributes.

    Parameters
    -----------
    explainer : Explainer object
        Model wrapper created using the Explainer class.

    Returns
    -----------
    None
    """
    result = explainer.data.copy()

    # if variables = NULL then all variables are added
    # otherwise only selected
    if self.variables is not None:
        result = result.loc[:, _global_utils.intersect_unsorted(self.variables, result.columns)]
    # is there target
    if explainer.y is not None:
        result = result.assign(y=explainer.y)
    # are there predictions - add y_hat to the Explainer for the future
    if explainer.y_hat is None:
        explainer.y_hat = explainer.predict(explainer.data)
    # are there residuals - add residuals to the Explainer for the future
    if explainer.residuals is None:
        explainer.residuals = explainer.residual(explainer.data, explainer.y)

    self.result = result.assign(
        y_hat=explainer.y_hat,
        residuals=explainer.residuals,
        abs_residuals=np.abs(explainer.residuals),
        label=explainer.label,
        ids=np.arange(result.shape[0])+1
    )
def plot(self, objects=None, variable='y_hat', yvariable='residuals', smooth=True, line_width=2, marker_size=3, title='Residual Diagnostics', N=50000, show=True)

Plot the Residual Diagnostics explanation

Parameters

objects : ResidualDiagnostics object or array_like of ResidualDiagnostics objects
Additional objects to plot (default is None).
variable : str, optional
Name of the variable from the result attribute to appear on the OX axis (default is 'y_hat').
yvariable : str, optional
Name of the variable from the result attribute to appear on the OY axis (default is 'residuals').
smooth : bool, optional
Add the smooth line (default is True).
line_width : float, optional
Width of lines in px (default is 2).
marker_size : float, optional
Size of points (default is 3).
title : str, optional
Title of the plot (default depends on the type attribute).
N : int, optional
Number of observations that will be sampled from the result attribute before calculating the smooth line. This is for performance issues with large data. None means all result (default is 50 000).
show : bool, optional
True shows the plot; False returns the plotly Figure object that can be edited or saved using the write_image() method (default is True).

Returns

None or plotly.graph_objects.Figure
Return figure that can be edited or saved. See show parameter.
Expand source code
def plot(self,
         objects=None,
         variable="y_hat",
         yvariable="residuals",
         smooth=True,
         line_width=2,
         marker_size=3,
         title="Residual Diagnostics",
         N=50000,
         show=True):
    """Plot the Residual Diagnostics explanation

    Parameters
    ----------
    objects : ResidualDiagnostics object or array_like of ResidualDiagnostics objects
        Additional objects to plot (default is None).
    variable : str, optional
        Name of the variable from the `result` attribute to appear on the OX axis
        (default is 'y_hat').
    yvariable : str, optional
        Name of the variable from the `result` attribute to appear on the OY axis
        (default is 'residuals').
    smooth : bool, optional
        Add the smooth line (default is True).
    line_width : float, optional
        Width of lines in px (default is 2).
    marker_size : float, optional
        Size of points (default is 3).
    title : str, optional
        Title of the plot (default depends on the `type` attribute).
    N : int, optional
        Number of observations that will be sampled from the `result` attribute before
        calculating the smooth line. This is for performance issues with large data.
        None means all `result` (default is 50 000).
    show : bool, optional
        True shows the plot; False returns the plotly Figure object that can be
        edited or saved using the `write_image()` method (default is True).

    Returns
    -----------
    None or plotly.graph_objects.Figure
        Return figure that can be edited or saved. See `show` parameter.
    """

    _global_checks.global_check_import('statsmodels', 'smoothing line')

    # are there any other objects to plot?
    if objects is None:
        _df_list = [self.result.copy()]
    elif isinstance(objects, self.__class__):  # allow for objects to be a single element
        _df_list = [self.result.copy(), objects.result.copy()]
    elif isinstance(objects, (list, tuple)):  # objects as tuple or array
        _df_list = [self.result.copy()]
        for ob in objects:
            _global_checks.global_check_object_class(ob, self.__class__)
            _df_list += [ob.result.copy()]
    else:
        _global_checks.global_raise_objects_class(objects, self.__class__)

    _df = pd.concat(_df_list)

    if N and smooth:
        if N < _df.shape[0]:
            _df = _df.sample(N, random_state=0, replace=False)

    fig = px.scatter(_df,
                     x=variable,
                     y=yvariable,
                     hover_name='ids',
                     color="label",
                     trendline="lowess" if smooth else None,
                     color_discrete_sequence=_theme.get_default_colors(len(_df_list), 'line')) \
            .update_traces(dict(marker_size=marker_size, line_width=line_width))

    # wait for https://github.com/plotly/plotly.py/pull/2558 to add hline to the plot

    fig.update_yaxes({'type': 'linear', 'gridwidth': 2, 'zeroline': False, 'automargin': True, 'ticks': 'outside',
                      'tickcolor': 'white', 'ticklen': 10, 'fixedrange': True, 'title_text': yvariable})

    fig.update_xaxes({'type': 'linear', 'gridwidth': 2, 'zeroline': False, 'automargin': True, 'ticks': "outside",
                      'tickcolor': 'white', 'ticklen': 10, 'fixedrange': True, 'title_text': variable})

    fig.update_layout(title_text=title, title_x=0.15, font={'color': "#371ea3"}, template="none",
                      margin={'t': 78, 'b': 71, 'r': 30})

    if show:
        fig.show(config=_theme.get_default_config())
    else:
        return fig
class VariableImportance (loss_function='rmse', type='variable_importance', N=1000, B=10, variables=None, variable_groups=None, keep_raw_permutations=True, processes=1, random_state=None)

Calculate dataset level variable importance

Parameters

loss_function : {'rmse', '1-auc', 'mse', 'mae', 'mad'} or function, optional
If string, then such loss function will be used to assess variable importance (default is 'rmse' or 1-auc, depends on model_type attribute).
type : {'variable_importance', 'ratio', 'difference'}, optional
Type of transformation that will be applied to dropout loss.
N : int, optional
Number of observations that will be sampled from the data attribute before the calculation of variable importance. None means all data (default is 1000).
B : int, optional
Number of permutation rounds to perform on each variable (default is 10).
variables : array_like of str, optional
Variables for which the importance will be calculated (default is None, which means all of the variables). NOTE: Ignored if variable_groups is not None.
variable_groups : dict of lists, optional
Group the variables to calculate their joint variable importance e.g. {'X': ['x1', 'x2'], 'Y': ['y1', 'y2']} (default is None).
keep_raw_permutations : bool, optional
Save results for all permutation rounds (default is True).
processes : int, optional
Number of parallel processes to use in calculations. Iterated over B (default is 1, which means no parallel computation).
random_state : int, optional
Set seed for random number generator (default is random seed).

Attributes

result : pd.DataFrame
Main result attribute of an explanation.
loss_function : function
Loss function used to assess the variable importance.
type : {'variable_importance', 'ratio', 'difference'}
Type of transformation that will be applied to dropout loss.
N : int
Number of observations that will be sampled from the data attribute before the calculation of variable importance.
B : int
Number of permutation rounds to perform on each variable.
variables : array_like of str or None
Variables for which the importance will be calculated
variable_groups : dict of lists or None
Grouped variables to calculate their joint variable importance.
keep_raw_permutations : bool
Save the results for all permutation rounds.
permutation : pd.DataFrame or None
The results for all permutation rounds.
processes : int
Number of parallel processes to use in calculations. Iterated over B.
random_state : int or None
Set seed for random number generator.

Notes

https://pbiecek.github.io/ema/featureImportance.html

Expand source code
class VariableImportance:
    """Calculate dataset level variable importance

    Parameters
    -----------
    loss_function : {'rmse', '1-auc', 'mse', 'mae', 'mad'} or function, optional
        If string, then such loss function will be used to assess variable importance
        (default is 'rmse' or `1-auc`, depends on `model_type` attribute).
    type : {'variable_importance', 'ratio', 'difference'}, optional
        Type of transformation that will be applied to dropout loss.
    N : int, optional
        Number of observations that will be sampled from the `data` attribute before
        the calculation of variable importance. None means all `data` (default is 1000).
    B : int, optional
        Number of permutation rounds to perform on each variable (default is 10).
    variables : array_like of str, optional
        Variables for which the importance will be calculated
        (default is None, which means all of the variables).
        NOTE: Ignored if `variable_groups` is not None.
    variable_groups : dict of lists, optional
        Group the variables to calculate their joint variable importance
        e.g. {'X': ['x1', 'x2'], 'Y': ['y1', 'y2']} (default is None).
    keep_raw_permutations: bool, optional
        Save results for all permutation rounds (default is True).
    processes : int, optional
        Number of parallel processes to use in calculations. Iterated over `B`
        (default is 1, which means no parallel computation).
    random_state : int, optional
        Set seed for random number generator (default is random seed).

    Attributes
    -----------
    result : pd.DataFrame
        Main result attribute of an explanation.
    loss_function : function
        Loss function used to assess the variable importance.
    type : {'variable_importance', 'ratio', 'difference'}
        Type of transformation that will be applied to dropout loss.
    N : int
        Number of observations that will be sampled from the `data` attribute before
        the calculation of variable importance.
    B : int
        Number of permutation rounds to perform on each variable.
    variables : array_like of str or None
        Variables for which the importance will be calculated
    variable_groups : dict of lists or None
        Grouped variables to calculate their joint variable importance.
    keep_raw_permutations: bool
        Save the results for all permutation rounds.
    permutation : pd.DataFrame or None
        The results for all permutation rounds.
    processes : int
        Number of parallel processes to use in calculations. Iterated over `B`.
    random_state : int or None
        Set seed for random number generator.

    Notes
    --------
    https://pbiecek.github.io/ema/featureImportance.html
    """

    def __init__(self,
                 loss_function='rmse',
                 type='variable_importance',
                 N=1000,
                 B=10,
                 variables=None,
                 variable_groups=None,
                 keep_raw_permutations=True,
                 processes=1,
                 random_state=None):

        loss_function = check_loss_function(loss_function)
        B = check_B(B)
        type = check_type(type)
        random_state = check_random_state(random_state)
        keep_raw_permutations = check_keep_raw_permutations(keep_raw_permutations, B)

        processes_ = check_processes(processes)

        self.loss_function = loss_function
        self.type = type
        self.N = N
        self.B = B
        self.variables = variables
        self.variable_groups = variable_groups
        self.random_state = random_state
        self.keep_raw_permutations = keep_raw_permutations
        self.result = None
        self.permutation = None
        self.processes = processes_

    def _repr_html_(self):
        return self.result._repr_html_()

    def fit(self, explainer):
        """Calculate the result of explanation

        Fit method makes calculations in place and changes the attributes.

        Parameters
        -----------
        explainer : Explainer object
            Model wrapper created using the Explainer class.

        Returns
        -----------
        None
        """

        # if `variable_groups` are not specified, then extract from `variables`
        self.variable_groups = check_variable_groups(self.variable_groups, explainer)
        self.variables = check_variables(self.variables, self.variable_groups, explainer)
        self.result, self.permutation = calculate_variable_importance(explainer,
                                                                      self.type,
                                                                      self.loss_function,
                                                                      self.variables,
                                                                      self.N,
                                                                      self.B,
                                                                      explainer.label,
                                                                      self.processes,
                                                                      self.keep_raw_permutations)

    def plot(self,
             objects=None,
             max_vars=10,
             digits=3,
             rounding_function=np.around,
             bar_width=16,
             split=("model", "variable"),
             title="Variable Importance",
             vertical_spacing=None,
             show=True):
        """Plot the Variable Importance explanation

        Parameters
        -----------
        objects : VariableImportance object or array_like of VariableImportance objects
            Additional objects to plot in subplots (default is None).
        max_vars : int, optional
            Maximum number of variables that will be presented for for each subplot
            (default is 10).
        digits : int, optional
            Number of decimal places (np.around) to round contributions.
            See `rounding_function` parameter (default is 3).
        rounding_function : function, optional
            A funciton that will be used for rounding numbers (default is np.around).
        bar_width : float, optional
            Width of bars in px (default is 16).
        split : {'model', 'variable'}, optional
            Split the subplots by model or variable (default is 'model').
        title : str, optional
            Title of the plot (default is "Variable Importance").
        vertical_spacing : float <0, 1>, optional
            Ratio of vertical space between the plots (default is 0.2/number of rows).
        show : bool, optional
            True shows the plot; False returns the plotly Figure object that can be
            edited or saved using the `write_image()` method (default is True).

        Returns
        -----------
        None or plotly.graph_objects.Figure
            Return figure that can be edited or saved. See `show` parameter.
        """

        if isinstance(split, tuple):
            split = split[0]

        if split not in ("model", "variable"):
            raise TypeError("split should be 'model' or 'variable'")

        # are there any other objects to plot?
        if objects is None:
            n = 1
            _result_df = self.result.copy()
            if split == 'variable':  # force split by model if only one explainer
                split = 'model'
        elif isinstance(objects, self.__class__):  # allow for objects to be a single element
            n = 2
            _result_df = pd.concat([self.result.copy(), objects.result.copy()])
        elif isinstance(objects, (list, tuple)):  # objects as tuple or array
            n = len(objects) + 1
            _result_df = self.result.copy()
            for ob in objects:
                _global_checks.global_check_object_class(ob, self.__class__)
                _result_df = pd.concat([_result_df, ob.result.copy()])
        else:
            _global_checks.global_raise_objects_class(objects, self.__class__)

        dl = _result_df.loc[_result_df.variable != '_baseline_', 'dropout_loss'].to_numpy()
        min_max_margin = dl.ptp() * 0.15
        min_max = [dl.min() - min_max_margin, dl.max() + min_max_margin]

        # take out full model
        best_fits = _result_df[_result_df.variable == '_full_model_']

        # this produces dropout_loss_x and dropout_loss_y columns
        _result_df = _result_df.merge(best_fits[['label', 'dropout_loss']], how="left", on="label")
        _result_df = _result_df[['label', 'variable', 'dropout_loss_x', 'dropout_loss_y']].rename(
            columns={'dropout_loss_x': 'dropout_loss', 'dropout_loss_y': 'full_model'})

        # remove full_model and baseline
        _result_df = _result_df[(_result_df.variable != '_full_model_') & (_result_df.variable != '_baseline_')]

        # calculate order of bars or variable plots (split = 'variable')
        # get variable permutation
        perm = _result_df[['variable', 'dropout_loss']].groupby('variable').mean().reset_index(). \
            sort_values('dropout_loss', ascending=False).variable.values

        plot_height = 78 + 71

        colors = _theme.get_default_colors(n, 'bar')

        if vertical_spacing is None:
            vertical_spacing = 0.2 / n

        model_names = _result_df['label'].unique().tolist()

        if len(model_names) != n:
            raise ValueError('label must be unique for each model')

        if split == "model":
            # init plot
            fig = make_subplots(rows=n, cols=1, shared_xaxes=True, vertical_spacing=vertical_spacing,
                                x_title='drop-out loss',
                                subplot_titles=model_names)

            # split df by model
            df_list = [v for k, v in _result_df.groupby('label', sort=False)]

            for i, df in enumerate(df_list):
                m = df.shape[0]
                if max_vars is not None and max_vars < m:
                    m = max_vars

                # take only m variables (for max_vars)
                # sort rows of df by variable permutation and drop unused variables
                df = df.sort_values('dropout_loss').tail(m) \
                    .set_index('variable').reindex(perm).dropna().reset_index()

                baseline = df.iloc[0, df.columns.get_loc('full_model')]

                df = df.assign(difference=lambda x: x['dropout_loss'] - baseline)

                lt = df.difference.apply(lambda val:
                                         "+"+str(rounding_function(np.abs(val), digits)) if val > 0
                                         else str(rounding_function(np.abs(val), digits)))
                tt = df.apply(lambda row: tooltip_text(row, rounding_function, digits), axis=1)
                df = df.assign(label_text=lt,
                               tooltip_text=tt)

                fig.add_shape(type='line', x0=baseline, x1=baseline, y0=-1, y1=m, yref="paper", xref="x",
                              line={'color': "#371ea3", 'width': 1.5, 'dash': 'dot'}, row=i + 1, col=1)

                fig.add_bar(
                    orientation="h",
                    y=df['variable'].tolist(),
                    x=df['difference'].tolist(),
                    textposition="outside",
                    text=df['label_text'].tolist(),
                    marker_color=colors[i],
                    base=baseline,
                    hovertext=df['tooltip_text'].tolist(),
                    hoverinfo='text',
                    hoverlabel={'bgcolor': 'rgba(0,0,0,0.8)'},
                    showlegend=False,
                    row=i + 1, col=1
                )

                fig.update_yaxes({'type': 'category', 'autorange': 'reversed', 'gridwidth': 2, 'automargin': True,
                                  'ticks': 'outside', 'tickcolor': 'white', 'ticklen': 10, 'fixedrange': True},
                                 row=i + 1, col=1)

                fig.update_xaxes(
                    {'type': 'linear', 'gridwidth': 2, 'zeroline': False, 'automargin': True, 'ticks': "outside",
                     'tickcolor': 'white', 'ticklen': 3, 'fixedrange': True},
                    row=i + 1, col=1)

                plot_height += m * bar_width + (m + 1) * bar_width / 4 + 30
        else:
            # split df by variable
            df_list = [v for k, v in _result_df.groupby('variable', sort=False)]

            n = len(df_list)
            if max_vars is not None and max_vars < n:
                n = max_vars
            
            if vertical_spacing is None:
                vertical_spacing = 0.2 / n
            
            # init plot
            variable_names = perm[0:n]
            fig = make_subplots(rows=n, cols=1, shared_xaxes=True, vertical_spacing=vertical_spacing, x_title='drop-out loss',
                                subplot_titles=variable_names)

            df_dict = {e.variable.array[0]: e for e in df_list}

            # take only n=max_vars elements from df_dict
            for i in range(n):
                df = df_dict[perm[i]]
                m = df.shape[0]

                baseline = 0

                df = df.assign(difference=lambda x: x['dropout_loss'] - x['full_model'])

                lt = df.difference.apply(lambda val:
                                         "+"+str(rounding_function(np.abs(val), digits)) if val > 0
                                         else str(rounding_function(np.abs(val), digits)))
                tt = df.apply(lambda row: tooltip_text(row, rounding_function, digits), axis=1)
                df = df.assign(label_text=lt,
                               tooltip_text=tt)

                fig.add_shape(type='line', x0=baseline, x1=baseline, y0=-1, y1=m, yref="paper", xref="x",
                              line={'color': "#371ea3", 'width': 1.5, 'dash': 'dot'}, row=i + 1, col=1)

                fig.add_bar(
                    orientation="h",
                    y=df['label'].tolist(),
                    x=df['dropout_loss'].tolist(),
                    # textposition="outside",
                    # text=df['label_text'].tolist(),
                    marker_color=colors,
                    base=baseline,
                    hovertext=df['tooltip_text'].tolist(),
                    hoverinfo='text',
                    hoverlabel={'bgcolor': 'rgba(0,0,0,0.8)'},
                    showlegend=False,
                    row=i + 1, col=1)

                fig.update_yaxes({'type': 'category', 'autorange': 'reversed', 'gridwidth': 2, 'automargin': True,
                                  'ticks': 'outside', 'tickcolor': 'white', 'ticklen': 10, 'fixedrange': True},
                                 row=i + 1, col=1)

                fig.update_xaxes(
                    {'type': 'linear', 'gridwidth': 2, 'zeroline': False, 'automargin': True, 'ticks': "outside",
                     'tickcolor': 'white', 'ticklen': 3, 'fixedrange': True},
                    row=i + 1, col=1)

                plot_height += m * bar_width + (m + 1) * bar_width / 4

        plot_height += (n - 1) * 70

        fig.update_xaxes({'range': min_max})
        fig.update_layout(title_text=title, title_x=0.15, font={'color': "#371ea3"}, template="none",
                          height=plot_height, margin={'t': 78, 'b': 71, 'r': 30})

        if show:
            fig.show(config=_theme.get_default_config())
        else:
            return fig

Methods

def fit(self, explainer)

Calculate the result of explanation

Fit method makes calculations in place and changes the attributes.

Parameters

explainer : Explainer object
Model wrapper created using the Explainer class.

Returns

None
 
Expand source code
def fit(self, explainer):
    """Calculate the result of explanation

    Fit method makes calculations in place and changes the attributes.

    Parameters
    -----------
    explainer : Explainer object
        Model wrapper created using the Explainer class.

    Returns
    -----------
    None
    """

    # if `variable_groups` are not specified, then extract from `variables`
    self.variable_groups = check_variable_groups(self.variable_groups, explainer)
    self.variables = check_variables(self.variables, self.variable_groups, explainer)
    self.result, self.permutation = calculate_variable_importance(explainer,
                                                                  self.type,
                                                                  self.loss_function,
                                                                  self.variables,
                                                                  self.N,
                                                                  self.B,
                                                                  explainer.label,
                                                                  self.processes,
                                                                  self.keep_raw_permutations)
def plot(self, objects=None, max_vars=10, digits=3, rounding_function=<function around>, bar_width=16, split=('model', 'variable'), title='Variable Importance', vertical_spacing=None, show=True)

Plot the Variable Importance explanation

Parameters

objects : VariableImportance object or array_like of VariableImportance objects
Additional objects to plot in subplots (default is None).
max_vars : int, optional
Maximum number of variables that will be presented for for each subplot (default is 10).
digits : int, optional
Number of decimal places (np.around) to round contributions. See rounding_function parameter (default is 3).
rounding_function : function, optional
A funciton that will be used for rounding numbers (default is np.around).
bar_width : float, optional
Width of bars in px (default is 16).
split : {'model', 'variable'}, optional
Split the subplots by model or variable (default is 'model').
title : str, optional
Title of the plot (default is "Variable Importance").
vertical_spacing : float <0, 1>, optional
Ratio of vertical space between the plots (default is 0.2/number of rows).
show : bool, optional
True shows the plot; False returns the plotly Figure object that can be edited or saved using the write_image() method (default is True).

Returns

None or plotly.graph_objects.Figure
Return figure that can be edited or saved. See show parameter.
Expand source code
def plot(self,
         objects=None,
         max_vars=10,
         digits=3,
         rounding_function=np.around,
         bar_width=16,
         split=("model", "variable"),
         title="Variable Importance",
         vertical_spacing=None,
         show=True):
    """Plot the Variable Importance explanation

    Parameters
    -----------
    objects : VariableImportance object or array_like of VariableImportance objects
        Additional objects to plot in subplots (default is None).
    max_vars : int, optional
        Maximum number of variables that will be presented for for each subplot
        (default is 10).
    digits : int, optional
        Number of decimal places (np.around) to round contributions.
        See `rounding_function` parameter (default is 3).
    rounding_function : function, optional
        A funciton that will be used for rounding numbers (default is np.around).
    bar_width : float, optional
        Width of bars in px (default is 16).
    split : {'model', 'variable'}, optional
        Split the subplots by model or variable (default is 'model').
    title : str, optional
        Title of the plot (default is "Variable Importance").
    vertical_spacing : float <0, 1>, optional
        Ratio of vertical space between the plots (default is 0.2/number of rows).
    show : bool, optional
        True shows the plot; False returns the plotly Figure object that can be
        edited or saved using the `write_image()` method (default is True).

    Returns
    -----------
    None or plotly.graph_objects.Figure
        Return figure that can be edited or saved. See `show` parameter.
    """

    if isinstance(split, tuple):
        split = split[0]

    if split not in ("model", "variable"):
        raise TypeError("split should be 'model' or 'variable'")

    # are there any other objects to plot?
    if objects is None:
        n = 1
        _result_df = self.result.copy()
        if split == 'variable':  # force split by model if only one explainer
            split = 'model'
    elif isinstance(objects, self.__class__):  # allow for objects to be a single element
        n = 2
        _result_df = pd.concat([self.result.copy(), objects.result.copy()])
    elif isinstance(objects, (list, tuple)):  # objects as tuple or array
        n = len(objects) + 1
        _result_df = self.result.copy()
        for ob in objects:
            _global_checks.global_check_object_class(ob, self.__class__)
            _result_df = pd.concat([_result_df, ob.result.copy()])
    else:
        _global_checks.global_raise_objects_class(objects, self.__class__)

    dl = _result_df.loc[_result_df.variable != '_baseline_', 'dropout_loss'].to_numpy()
    min_max_margin = dl.ptp() * 0.15
    min_max = [dl.min() - min_max_margin, dl.max() + min_max_margin]

    # take out full model
    best_fits = _result_df[_result_df.variable == '_full_model_']

    # this produces dropout_loss_x and dropout_loss_y columns
    _result_df = _result_df.merge(best_fits[['label', 'dropout_loss']], how="left", on="label")
    _result_df = _result_df[['label', 'variable', 'dropout_loss_x', 'dropout_loss_y']].rename(
        columns={'dropout_loss_x': 'dropout_loss', 'dropout_loss_y': 'full_model'})

    # remove full_model and baseline
    _result_df = _result_df[(_result_df.variable != '_full_model_') & (_result_df.variable != '_baseline_')]

    # calculate order of bars or variable plots (split = 'variable')
    # get variable permutation
    perm = _result_df[['variable', 'dropout_loss']].groupby('variable').mean().reset_index(). \
        sort_values('dropout_loss', ascending=False).variable.values

    plot_height = 78 + 71

    colors = _theme.get_default_colors(n, 'bar')

    if vertical_spacing is None:
        vertical_spacing = 0.2 / n

    model_names = _result_df['label'].unique().tolist()

    if len(model_names) != n:
        raise ValueError('label must be unique for each model')

    if split == "model":
        # init plot
        fig = make_subplots(rows=n, cols=1, shared_xaxes=True, vertical_spacing=vertical_spacing,
                            x_title='drop-out loss',
                            subplot_titles=model_names)

        # split df by model
        df_list = [v for k, v in _result_df.groupby('label', sort=False)]

        for i, df in enumerate(df_list):
            m = df.shape[0]
            if max_vars is not None and max_vars < m:
                m = max_vars

            # take only m variables (for max_vars)
            # sort rows of df by variable permutation and drop unused variables
            df = df.sort_values('dropout_loss').tail(m) \
                .set_index('variable').reindex(perm).dropna().reset_index()

            baseline = df.iloc[0, df.columns.get_loc('full_model')]

            df = df.assign(difference=lambda x: x['dropout_loss'] - baseline)

            lt = df.difference.apply(lambda val:
                                     "+"+str(rounding_function(np.abs(val), digits)) if val > 0
                                     else str(rounding_function(np.abs(val), digits)))
            tt = df.apply(lambda row: tooltip_text(row, rounding_function, digits), axis=1)
            df = df.assign(label_text=lt,
                           tooltip_text=tt)

            fig.add_shape(type='line', x0=baseline, x1=baseline, y0=-1, y1=m, yref="paper", xref="x",
                          line={'color': "#371ea3", 'width': 1.5, 'dash': 'dot'}, row=i + 1, col=1)

            fig.add_bar(
                orientation="h",
                y=df['variable'].tolist(),
                x=df['difference'].tolist(),
                textposition="outside",
                text=df['label_text'].tolist(),
                marker_color=colors[i],
                base=baseline,
                hovertext=df['tooltip_text'].tolist(),
                hoverinfo='text',
                hoverlabel={'bgcolor': 'rgba(0,0,0,0.8)'},
                showlegend=False,
                row=i + 1, col=1
            )

            fig.update_yaxes({'type': 'category', 'autorange': 'reversed', 'gridwidth': 2, 'automargin': True,
                              'ticks': 'outside', 'tickcolor': 'white', 'ticklen': 10, 'fixedrange': True},
                             row=i + 1, col=1)

            fig.update_xaxes(
                {'type': 'linear', 'gridwidth': 2, 'zeroline': False, 'automargin': True, 'ticks': "outside",
                 'tickcolor': 'white', 'ticklen': 3, 'fixedrange': True},
                row=i + 1, col=1)

            plot_height += m * bar_width + (m + 1) * bar_width / 4 + 30
    else:
        # split df by variable
        df_list = [v for k, v in _result_df.groupby('variable', sort=False)]

        n = len(df_list)
        if max_vars is not None and max_vars < n:
            n = max_vars
        
        if vertical_spacing is None:
            vertical_spacing = 0.2 / n
        
        # init plot
        variable_names = perm[0:n]
        fig = make_subplots(rows=n, cols=1, shared_xaxes=True, vertical_spacing=vertical_spacing, x_title='drop-out loss',
                            subplot_titles=variable_names)

        df_dict = {e.variable.array[0]: e for e in df_list}

        # take only n=max_vars elements from df_dict
        for i in range(n):
            df = df_dict[perm[i]]
            m = df.shape[0]

            baseline = 0

            df = df.assign(difference=lambda x: x['dropout_loss'] - x['full_model'])

            lt = df.difference.apply(lambda val:
                                     "+"+str(rounding_function(np.abs(val), digits)) if val > 0
                                     else str(rounding_function(np.abs(val), digits)))
            tt = df.apply(lambda row: tooltip_text(row, rounding_function, digits), axis=1)
            df = df.assign(label_text=lt,
                           tooltip_text=tt)

            fig.add_shape(type='line', x0=baseline, x1=baseline, y0=-1, y1=m, yref="paper", xref="x",
                          line={'color': "#371ea3", 'width': 1.5, 'dash': 'dot'}, row=i + 1, col=1)

            fig.add_bar(
                orientation="h",
                y=df['label'].tolist(),
                x=df['dropout_loss'].tolist(),
                # textposition="outside",
                # text=df['label_text'].tolist(),
                marker_color=colors,
                base=baseline,
                hovertext=df['tooltip_text'].tolist(),
                hoverinfo='text',
                hoverlabel={'bgcolor': 'rgba(0,0,0,0.8)'},
                showlegend=False,
                row=i + 1, col=1)

            fig.update_yaxes({'type': 'category', 'autorange': 'reversed', 'gridwidth': 2, 'automargin': True,
                              'ticks': 'outside', 'tickcolor': 'white', 'ticklen': 10, 'fixedrange': True},
                             row=i + 1, col=1)

            fig.update_xaxes(
                {'type': 'linear', 'gridwidth': 2, 'zeroline': False, 'automargin': True, 'ticks': "outside",
                 'tickcolor': 'white', 'ticklen': 3, 'fixedrange': True},
                row=i + 1, col=1)

            plot_height += m * bar_width + (m + 1) * bar_width / 4

    plot_height += (n - 1) * 70

    fig.update_xaxes({'range': min_max})
    fig.update_layout(title_text=title, title_x=0.15, font={'color': "#371ea3"}, template="none",
                      height=plot_height, margin={'t': 78, 'b': 71, 'r': 30})

    if show:
        fig.show(config=_theme.get_default_config())
    else:
        return fig