Module `dalex.arena.plots`

Expand source code Browse git

from ._break_down_container import BreakDownContainer
from ._shapley_values_container import ShapleyValuesContainer
from ._feature_importance_container import FeatureImportanceContainer
from ._partial_dependence_container import PartialDependenceContainer
from ._accumulated_dependence_container import AccumulatedDependenceContainer
from ._ceteris_paribus_container import CeterisParibusContainer
from ._metrics_container import MetricsContainer
from ._roc_container import ROCContainer
from ._fairness_check_container import FairnessCheckContainer
from ._shapley_values_dependence_container import ShapleyValuesDependenceContainer
from ._shapley_values_variable_importance_container import ShapleyValuesVariableImportanceContainer

__all__ = [
    'ShapleyValuesContainer',
    'FeatureImportanceContainer',
    'PartialDependenceContainer',
    'AccumulatedDependenceContainer',
    'CeterisParibusContainer',
    'BreakDownContainer',
    'MetricsContainer',
    'ROCContainer',
    'FairnessCheckContainer',
    'ShapleyValuesDependenceContainer',
    'ShapleyValuesVariableImportanceContainer'
]

Classes

class AccumulatedDependenceContainer (arena, cache=True)

Class representing a chart.

Parameters

arena : Arena: Instance of Arena.
cache : bool: If this object is allowed to use cache when requesting resources

Attributes

arena : Arena: Instance of dalex.Arena
name : str: Display name of chart
plot_type : str: Identifier of chart type
plot_component : str: Identifier of Arena's component that should render this chart
plot_category : str: Name of category of chart
params : dict: Dictionary with required param types as keys and param labels as values. This attribute is set when calling fit.
data : dict: Results of computations are placed there
progress : float: If progress is supprted, then value should be between [0,1]. For other situations -1 value must be set. Progress of plot container is based of progress of used resources at the moment of calling fit method. This value will not be updated.
use_cache : bool: If this object is allowed to use cache when requesting resources

Expand source code Browse git

class AccumulatedDependenceContainer(PlotContainer):
    info = {
        'name': "Accumulated Dependence",
        'plotType': 'AccumulatedDependence',
        'plotCategory': 'Dataset Level',
        'requiredParams': ['model', 'variable']
    }
    options_category = 'AccumulatedDependence'
    options = {
        'grid_type': { 'default': 'quantile', 'desc': 'grid type "quantile" or "uniform"'},
        'grid_points': { 'default': 101, 'desc': 'Maximum number of points for profile' },
        'N': { 'default': 500, 'desc': 'Number of observations to use. None for all.' }
    }
    def _fit(self, model, variable):
        if not variable.variable in model.variables:
            raise Exception('Variable is not a column of explainer')
        if is_numeric_dtype(model.explainer.data[variable.variable]):
            self.plot_component = 'LinearDependence'
            variable_type = 'numerical'
        else:
            self.plot_component = 'CategoricalDependence'
            variable_type = 'categorical'
        profile = model.explainer.model_profile(
            type='accumulated',
            variables=variable.variable,
            variable_type=variable_type,
            center=False,
            grid_points=self.get_option('grid_points'),
            variable_splits_type=self.get_option('grid_type'),
            N=self.get_option('N'),
            verbose=False
        )
        self.data = {
            'x': profile.result['_x_'].tolist(),
            'y': profile.result['_yhat_'].tolist(),
            'variable': variable.variable,
            'base': 0
        }

Ancestors

dalex.arena._plot_container.PlotContainer
dalex.arena._option_base.OptionBase

Class variables

var info
var options
var options_category

class BreakDownContainer (arena, cache=True)

Class representing a chart.

Parameters

arena : Arena: Instance of Arena.
cache : bool: If this object is allowed to use cache when requesting resources

Attributes

arena : Arena: Instance of dalex.Arena
name : str: Display name of chart
plot_type : str: Identifier of chart type
plot_component : str: Identifier of Arena's component that should render this chart
plot_category : str: Name of category of chart
params : dict: Dictionary with required param types as keys and param labels as values. This attribute is set when calling fit.
data : dict: Results of computations are placed there
progress : float: If progress is supprted, then value should be between [0,1]. For other situations -1 value must be set. Progress of plot container is based of progress of used resources at the moment of calling fit method. This value will not be updated.
use_cache : bool: If this object is allowed to use cache when requesting resources

Expand source code Browse git

class BreakDownContainer(PlotContainer):
    info = {
        'name': "Break Down",
        'plotType': "Breakdown",
        'plotCategory': "Observation Level",
        'requiredParams': ["model", "observation"]
    }
    options_category = 'BreakDown'
    options = {
    }
    def _fit(self, model, observation):
        row = observation.get_row_for_model(model)
        if row is None:
            self.set_message('Observation is not valid for given model.')
            return
        bd = model.explainer.predict_parts(row, type='break_down').result
        self.data = {
            'variables': bd[1:-1].variable_name.tolist(),
            'variables_value': bd[1:-1].variable_value.tolist(),
            'contribution': bd[1:-1].contribution.tolist(),
            'intercept': bd.contribution[0],
            'prediction': bd.cumulative.tail(1).iloc[0]
        }

Ancestors

dalex.arena._plot_container.PlotContainer
dalex.arena._option_base.OptionBase

Class variables

var info
var options
var options_category

class CeterisParibusContainer (arena, cache=True)

Class representing a chart.

Parameters

arena : Arena: Instance of Arena.
cache : bool: If this object is allowed to use cache when requesting resources

Attributes

arena : Arena: Instance of dalex.Arena
name : str: Display name of chart
plot_type : str: Identifier of chart type
plot_component : str: Identifier of Arena's component that should render this chart
plot_category : str: Name of category of chart
params : dict: Dictionary with required param types as keys and param labels as values. This attribute is set when calling fit.
data : dict: Results of computations are placed there
progress : float: If progress is supprted, then value should be between [0,1]. For other situations -1 value must be set. Progress of plot container is based of progress of used resources at the moment of calling fit method. This value will not be updated.
use_cache : bool: If this object is allowed to use cache when requesting resources

Expand source code Browse git

class CeterisParibusContainer(PlotContainer):
    info = {
        'name': 'Ceteris Paribus',
        'plotType': 'CeterisParibus',
        'plotCategory': 'Observation Level',
        'requiredParams': ['model', 'variable', 'observation']
    }
    options_category = 'CeterisParibus'
    options = {
        'grid_points': { 'default': 101, 'desc': 'Maximum number of points for profile' },
        'grid_type': { 'default': 'quantile', 'desc': 'grid type "quantile" or "uniform"'}
    }
    def _fit(self, model, variable, observation):
        if not variable.variable in model.variables:
            raise Exception('Variable is not a column of explainer')
        row = observation.get_row_for_model(model)
        if row is None:
            self.set_message('Observation is not valid for given model.')
            return
        cp = model.explainer.predict_profile(
            row,
            variables=variable.variable,
            grid_points=self.get_option('grid_points'),
            variable_splits_type=self.get_option('grid_type'),
            variable_splits_with_obs=False,
            verbose=False
        )
        if is_numeric_dtype(row[variable.variable]):
            self.plot_component = 'NumericalCeterisParibus'
        else:
            self.plot_component = 'CategoricalCeterisParibus'
        self.data = {
            'x': cp.result[variable.variable].tolist(),
            'y': cp.result['_yhat_'].tolist(),
            'variable': variable.variable,
            'min': cp.result['_yhat_'].min(),
            'max': cp.result['_yhat_'].max(),
            'observation': cp.new_observation.iloc[0].to_dict()
        }

Ancestors

dalex.arena._plot_container.PlotContainer
dalex.arena._option_base.OptionBase

Class variables

var info
var options
var options_category

class FairnessCheckContainer (arena, cache=True)

Class representing a chart.

Parameters

arena : Arena: Instance of Arena.
cache : bool: If this object is allowed to use cache when requesting resources

Attributes

arena : Arena: Instance of dalex.Arena
name : str: Display name of chart
plot_type : str: Identifier of chart type
plot_component : str: Identifier of Arena's component that should render this chart
plot_category : str: Name of category of chart
params : dict: Dictionary with required param types as keys and param labels as values. This attribute is set when calling fit.
data : dict: Results of computations are placed there
progress : float: If progress is supprted, then value should be between [0,1]. For other situations -1 value must be set. Progress of plot container is based of progress of used resources at the moment of calling fit method. This value will not be updated.
use_cache : bool: If this object is allowed to use cache when requesting resources

Expand source code Browse git

class FairnessCheckContainer(PlotContainer):
    info = {
        'name': 'Fairness',
        'plotType': 'Fairness',
        'plotCategory': 'Dataset Level',
        'requiredParams': ['model', 'variable']
    }
    options_category = 'Fairness'
    options = {
        'cutoffs': { 'default': [x / 100 for x in range(5, 100, 5)], 'desc': 'List of tested cutoff levels' },
    }
    def _fit(self, model, variable):
        if not variable.variable in model.variables:
            raise Exception('Variable is not a column of explainer')
        exp = model.explainer
        y_hat = exp.predict(exp.data) if exp.y_hat is None else exp.y_hat
        protected = exp.data[variable.variable]
        if exp.model_type != 'classification':
            self.set_message('Fairness plot is only available for classificators')
            return
        if not is_object_dtype(protected):
            self.set_message('Select categorical variable to check fairness')
            return

        output_df = None
        for cutoff in self.get_option('cutoffs'):
            cutoff_dict = checks.check_cutoff(protected, cutoff, False)
            sub_confusion_matrix = utils.SubgroupConfusionMatrix(exp.y, y_hat, protected, cutoff_dict)
            sub_confusion_matrix_metrics = utils.SubgroupConfusionMatrixMetrics(sub_confusion_matrix)
            df = sub_confusion_matrix_metrics.to_vertical_DataFrame()
            df['cutoff'] = cutoff
            output_df = df if output_df is None else output_df.append(df)

        output = {}
        for (subgroup, x) in output_df.set_index('metric').groupby('subgroup'):
            output[subgroup] = {}
            for (cutoff, y) in x.groupby('cutoff'):
                output[subgroup][cutoff] = rm_nan(y['score'].to_dict())

        self.data = { 'subgroups': output }

    def test_arena(arena):
        if type(arena).__name__ != 'Arena' or type(arena).__module__ != 'dalex.arena.object':
            raise Exception('Invalid Arena argument')
        return next((True for model in arena.get_params('model') if model.explainer.model_type == 'classification'), False)

Ancestors

dalex.arena._plot_container.PlotContainer
dalex.arena._option_base.OptionBase

Class variables

var info
var options
var options_category

Methods

def test_arena(arena)

Tests if plot can be created for at least one combination of params

This method searches for params, that can produce valid chart. Displaying error messages are not counted as valid. One example of usage are charts for classification models. Such charts should override this method and check if there is at least one classification model in arena.

Parameters

arena : Arena: Object of class dalex.Arena

Returns

bool

Expand source code Browse git

def test_arena(arena):
    if type(arena).__name__ != 'Arena' or type(arena).__module__ != 'dalex.arena.object':
        raise Exception('Invalid Arena argument')
    return next((True for model in arena.get_params('model') if model.explainer.model_type == 'classification'), False)

class FeatureImportanceContainer (arena, cache=True)

Class representing a chart.

Parameters

arena : Arena: Instance of Arena.
cache : bool: If this object is allowed to use cache when requesting resources

Attributes

arena : Arena: Instance of dalex.Arena
name : str: Display name of chart
plot_type : str: Identifier of chart type
plot_component : str: Identifier of Arena's component that should render this chart
plot_category : str: Name of category of chart
params : dict: Dictionary with required param types as keys and param labels as values. This attribute is set when calling fit.
data : dict: Results of computations are placed there
progress : float: If progress is supprted, then value should be between [0,1]. For other situations -1 value must be set. Progress of plot container is based of progress of used resources at the moment of calling fit method. This value will not be updated.
use_cache : bool: If this object is allowed to use cache when requesting resources

Expand source code Browse git

class FeatureImportanceContainer(PlotContainer):
    info = {
        'name': "Variable Importance",
        'plotType': "FeatureImportance",
        'plotCategory': "Dataset Level",
        'requiredParams': ["model"]
    }
    options_category = 'VariableImportance'
    options = {
        'N': { 'default': None, 'desc': 'Number of observations to use. None for all.' },
        'B': { 'default': 10, 'desc': 'Number of permutation rounds to perform each variable' }
    }
    def _fit(self, model):
        fi = model.explainer.model_parts(
            N=self.get_option('N'),
            B=self.get_option('B')
        ).permutation
        def q1(x):
            return x.quantile(0.25)
        def q3(x):
            return x.quantile(0.75)
        stats = fi.agg(['mean', 'max', 'min', q1, q3])
        full_model = stats.loc['mean', '_full_model_']
        stats = stats.drop(['_baseline_', '_full_model_'], axis=1) \
            .sort_values(by='mean', axis=1, ascending=False)
        self.data = {
            'base': full_model,
            'variables': stats.columns.tolist(),
            'dropout_loss': stats.loc['mean'].tolist(),
            'min': stats.loc['min'].tolist(),
            'max': stats.loc['max'].tolist(),
            'q1': stats.loc['q1'].tolist(),
            'q3': stats.loc['q3'].tolist()
        }

Ancestors

dalex.arena._plot_container.PlotContainer
dalex.arena._option_base.OptionBase

Class variables

var info
var options
var options_category

class MetricsContainer (arena, cache=True)

Class representing a chart.

Parameters

arena : Arena: Instance of Arena.
cache : bool: If this object is allowed to use cache when requesting resources

Attributes

arena : Arena: Instance of dalex.Arena
name : str: Display name of chart
plot_type : str: Identifier of chart type
plot_component : str: Identifier of Arena's component that should render this chart
plot_category : str: Name of category of chart
params : dict: Dictionary with required param types as keys and param labels as values. This attribute is set when calling fit.
data : dict: Results of computations are placed there
progress : float: If progress is supprted, then value should be between [0,1]. For other situations -1 value must be set. Progress of plot container is based of progress of used resources at the moment of calling fit method. This value will not be updated.
use_cache : bool: If this object is allowed to use cache when requesting resources

Expand source code Browse git

class MetricsContainer(PlotContainer):
    info = {
        'name': "Metrics",
        'plotType': "Metrics",
        'plotCategory': "Model Performance",
        'requiredParams': ["model"]
    }
    options_category = 'Metrics'
    options = {}
    def _fit(self, model):
        perf = model.explainer.model_performance().result
        self.data = dict(perf.iloc[0])

Ancestors

dalex.arena._plot_container.PlotContainer
dalex.arena._option_base.OptionBase

Class variables

var info
var options
var options_category

class PartialDependenceContainer (arena, cache=True)

Class representing a chart.

Parameters

arena : Arena: Instance of Arena.
cache : bool: If this object is allowed to use cache when requesting resources

Attributes

arena : Arena: Instance of dalex.Arena
name : str: Display name of chart
plot_type : str: Identifier of chart type
plot_component : str: Identifier of Arena's component that should render this chart
plot_category : str: Name of category of chart
params : dict: Dictionary with required param types as keys and param labels as values. This attribute is set when calling fit.
data : dict: Results of computations are placed there
progress : float: If progress is supprted, then value should be between [0,1]. For other situations -1 value must be set. Progress of plot container is based of progress of used resources at the moment of calling fit method. This value will not be updated.
use_cache : bool: If this object is allowed to use cache when requesting resources

Expand source code Browse git

class PartialDependenceContainer(PlotContainer):
    info = {
        'name': "Partial Dependence",
        'plotType': 'PartialDependence',
        'plotCategory': 'Dataset Level',
        'requiredParams': ['model', 'variable']
    }
    options_category = 'PartialDependence'
    options = {
        'grid_type': { 'default': 'quantile', 'desc': 'grid type "quantile" or "uniform"'},
        'grid_points': { 'default': 101, 'desc': 'Maximum number of points for profile' },
        'N': { 'default': 500, 'desc': 'Number of observations to use. None for all.' }
    }
    def _fit(self, model, variable):
        if not variable.variable in model.variables:
            raise Exception('Variable is not a column of explainer')
        if is_numeric_dtype(model.explainer.data[variable.variable]):
            self.plot_component = 'LinearDependence'
            profile = model.explainer.model_profile(
                type='partial',
                variables=variable.variable,
                variable_type='numerical',
                grid_points=self.get_option('grid_points'),
                variable_splits_type=self.get_option('grid_type'),
                N=self.get_option('N'),
                verbose=False
            )
        else:
            self.plot_component = 'CategoricalDependence'
            profile = model.explainer.model_profile(
                type='partial',
                variables=variable.variable,
                variable_type='categorical',
                N=self.get_option('N'),
                verbose=False
            )
        self.data = {
            'x': profile.result['_x_'].tolist(),
            'y': profile.result['_yhat_'].tolist(),
            'variable': variable.variable,
            'base': profile.mean_prediction
        }

Ancestors

dalex.arena._plot_container.PlotContainer
dalex.arena._option_base.OptionBase

Class variables

var info
var options
var options_category

class ROCContainer (arena, cache=True)

Class representing a chart.

Parameters

arena : Arena: Instance of Arena.
cache : bool: If this object is allowed to use cache when requesting resources

Attributes

arena : Arena: Instance of dalex.Arena
name : str: Display name of chart
plot_type : str: Identifier of chart type
plot_component : str: Identifier of Arena's component that should render this chart
plot_category : str: Name of category of chart
params : dict: Dictionary with required param types as keys and param labels as values. This attribute is set when calling fit.
data : dict: Results of computations are placed there
progress : float: If progress is supprted, then value should be between [0,1]. For other situations -1 value must be set. Progress of plot container is based of progress of used resources at the moment of calling fit method. This value will not be updated.
use_cache : bool: If this object is allowed to use cache when requesting resources

Expand source code Browse git

class ROCContainer(PlotContainer):
    info = {
        'name': 'Receiver Operating Characterstic',
        'plotType': 'ROC',
        'plotCategory': 'Model Performance',
        'requiredParams': ['model']
    }
    options_category = 'ROC'
    options = {
        'grid_points': { 'default': 101, 'desc': 'Maximum number of points for ROC curve' },
    }

    def _fit(self, model):
        exp = model.explainer
        if exp.model_type != 'classification':
            self.set_message('ROC plot is only available for classificators')
            return

        y_hat = exp.predict(exp.data) if exp.y_hat is None else exp.y_hat
        df = pd.DataFrame({ 'y': exp.y.astype(bool), 'y_hat': y_hat })

        P_n = df.y.sum()
        N_n = df.shape[0] - P_n
        if P_n == 0 or N_n == 0:
            self.set_message('Provided dataset contains only positive or only negative cases.', 'error')
            return

        tpr_temp = df.groupby('y_hat').sum().reset_index().sort_values('y_hat', ascending=False)
        fpr_temp = df.assign(y=1-df.y).groupby('y_hat').sum().reset_index().sort_values('y_hat', ascending=False)
        
        _df = pd.DataFrame({
            'TPR': tpr_temp.y.cumsum() / P_n,
            'TNR': 1 - (fpr_temp.y.cumsum() / N_n),
            'cutoff': np.sort(df['y_hat'].unique())[::-1]
        })

        grid_points = self.get_option('grid_points')
        if _df.shape[0] > grid_points:
            _df = _df.sample(grid_points).sort_values('cutoff', ascending=False)

        self.data = {
            'cutoff': _df['cutoff'].tolist(),
            'specifity': [1] + _df['TNR'].tolist(),
            'sensivity': [0] + _df['TPR'].tolist()
        }

    def test_arena(arena):
        if type(arena).__name__ != 'Arena' or type(arena).__module__ != 'dalex.arena.object':
            raise Exception('Invalid Arena argument')
        return next((True for model in arena.get_params('model') if model.explainer.model_type == 'classification'), False)

Ancestors

dalex.arena._plot_container.PlotContainer
dalex.arena._option_base.OptionBase

Class variables

var info
var options
var options_category

Methods

def test_arena(arena)

Tests if plot can be created for at least one combination of params

Parameters

arena : Arena: Object of class dalex.Arena

Returns

bool

Expand source code Browse git

def test_arena(arena):
    if type(arena).__name__ != 'Arena' or type(arena).__module__ != 'dalex.arena.object':
        raise Exception('Invalid Arena argument')
    return next((True for model in arena.get_params('model') if model.explainer.model_type == 'classification'), False)

class ShapleyValuesContainer (arena, cache=True)

Class representing a chart.

Parameters

arena : Arena: Instance of Arena.
cache : bool: If this object is allowed to use cache when requesting resources

Attributes

arena : Arena: Instance of dalex.Arena
name : str: Display name of chart
plot_type : str: Identifier of chart type
plot_component : str: Identifier of Arena's component that should render this chart
plot_category : str: Name of category of chart
params : dict: Dictionary with required param types as keys and param labels as values. This attribute is set when calling fit.
data : dict: Results of computations are placed there
progress : float: If progress is supprted, then value should be between [0,1]. For other situations -1 value must be set. Progress of plot container is based of progress of used resources at the moment of calling fit method. This value will not be updated.
use_cache : bool: If this object is allowed to use cache when requesting resources

Expand source code Browse git

class ShapleyValuesContainer(PlotContainer):
    info = {
        'name': "Shapley Values",
        'plotType': "SHAPValues",
        'plotCategory': "Observation Level",
        'requiredParams': ["model", "observation"]
    }
    options_category = 'ShapleyValues'
    options = {}
    def _fit(self, model, observation):
        resource = self.arena.resource_manager.get_resource('ShapleyValues', {'model': model, 'observation': observation}, cache=self.use_cache)
        try:
            data, progress, is_done = resource.get_result()
            if data.get('stats') is None:
                resource.wait_for_update()
                data, progress, is_done = resource.get_result()
        except Exception as e:
            self.set_message(str(e))
            return
        self.is_done = is_done
        self.progress = progress
        stats = data.get('stats').sort_values('abs', ascending=False).reset_index()
        self.data = {
            'variables': stats.variable_name.tolist(),
            'variables_value': stats.variable_value.tolist(),
            'mean': stats['mean'].tolist(),
            'min': stats['min'].tolist(),
            'max': stats['max'].tolist(),
            'q1': stats.q1.tolist(),
            'q3': stats.q3.tolist(),
            'intercept': resource.data.get('intercept').astype(float)
        }

Ancestors

dalex.arena._plot_container.PlotContainer
dalex.arena._option_base.OptionBase

Class variables

var info
var options
var options_category

class ShapleyValuesDependenceContainer (arena, cache=True)

Class representing a chart.

Parameters

arena : Arena: Instance of Arena.
cache : bool: If this object is allowed to use cache when requesting resources

Attributes

arena : Arena: Instance of dalex.Arena
name : str: Display name of chart
plot_type : str: Identifier of chart type
plot_component : str: Identifier of Arena's component that should render this chart
plot_category : str: Name of category of chart
params : dict: Dictionary with required param types as keys and param labels as values. This attribute is set when calling fit.
data : dict: Results of computations are placed there
progress : float: If progress is supprted, then value should be between [0,1]. For other situations -1 value must be set. Progress of plot container is based of progress of used resources at the moment of calling fit method. This value will not be updated.
use_cache : bool: If this object is allowed to use cache when requesting resources

Expand source code Browse git

class ShapleyValuesDependenceContainer(PlotContainer):
    info = {
        'name': "Shapley Values Dependence",
        'plotType': "ShapleyValuesDependence",
        'plotCategory': "Dataset Level",
        'requiredParams': ["model", "variable"]
    }
    options_category = 'DatasetShapleyValues'
    options = {}
    def _fit(self, model, variable):
        if variable.variable not in model.variables:
            raise Exception('Variable is not a column of explainer')
        is_numeric = is_numeric_dtype(model.explainer.data[variable.variable])
        if is_numeric and variable.levels is None:
            self.plot_component = 'LinearShapleyDependence'
        else:
            self.plot_component = 'CategoricalShapleyDependence'
        resource = self.arena.resource_manager.get_resource('DatasetShapleyValues', {'model': model}, cache=self.use_cache)
        try:
            data, progress, is_done = resource.get_result()
            if data.get('result') is None:
                resource.wait_for_update()
                data, progress, is_done = resource.get_result()
        except Exception as e:
            self.set_message(str(e))
            return
        self.is_done = is_done
        self.progress = progress
        result = data.get('result')
        result = result[result['variable_name'] == variable.variable]
        stats = result.groupby(['variable_value', 'row']).agg({'contribution': ['mean', 'min', 'max']}).contribution
        if self.plot_component == 'LinearShapleyDependence':
            stats = stats.sort_index()
        transform_index = float if self.plot_component == 'LinearShapleyDependence' else str
        self.data = {
            'x': [transform_index(x[0]) for x in stats.index],
            'mean': stats['mean'].values.tolist(),
            'min': stats['min'].values.tolist(),
            'max': stats['max'].values.tolist(),
            'variable': variable.variable
        }

Ancestors

dalex.arena._plot_container.PlotContainer
dalex.arena._option_base.OptionBase

Class variables

var info
var options
var options_category

class ShapleyValuesVariableImportanceContainer (arena, cache=True)

Class representing a chart.

Parameters

arena : Arena: Instance of Arena.
cache : bool: If this object is allowed to use cache when requesting resources

Attributes

arena : Arena: Instance of dalex.Arena
name : str: Display name of chart
plot_type : str: Identifier of chart type
plot_component : str: Identifier of Arena's component that should render this chart
plot_category : str: Name of category of chart
params : dict: Dictionary with required param types as keys and param labels as values. This attribute is set when calling fit.
data : dict: Results of computations are placed there
progress : float: If progress is supprted, then value should be between [0,1]. For other situations -1 value must be set. Progress of plot container is based of progress of used resources at the moment of calling fit method. This value will not be updated.
use_cache : bool: If this object is allowed to use cache when requesting resources

Expand source code Browse git

class ShapleyValuesVariableImportanceContainer(PlotContainer):
    info = {
        'name': "Shapley Variable Importance",
        'plotType': "ShapleyValuesVariableImportance",
        'plotCategory': "Dataset Level",
        'requiredParams': ["model"]
    }
    options_category = 'DatasetShapleyValues'
    options = {}
    def _fit(self, model):
        resource = self.arena.resource_manager.get_resource('DatasetShapleyValues', {'model': model}, cache=self.use_cache)
        try:
            data, progress, is_done = resource.get_result()
            if data.get('result') is None:
                resource.wait_for_update()
                data, progress, is_done = resource.get_result()
        except Exception as e:
            self.set_message(str(e))
            return
        self.is_done = is_done
        self.progress = progress
        result = data.get('result')

        stats = result.groupby(['variable_name', 'row']) \
            .agg({'contribution': ['mean']}) \
            .contribution
        stats['abs_contribution'] = stats['mean'].abs()
        stats = stats.reset_index()
        box_stats = stats.groupby(['variable_name']) \
            .agg({'abs_contribution': ['mean', 'max', 'min', 'median', q1, q3, lf, uf]}) \
            .abs_contribution.sort_values(by='mean', ascending=False)
        outliers1 = stats.loc[stats['abs_contribution'] > box_stats.uf.loc[stats['variable_name']].reset_index(drop=True)]
        outliers2 = stats.loc[stats['abs_contribution'] < box_stats.lf.loc[stats['variable_name']].reset_index(drop=True)]
        outliers = pd.concat([outliers1, outliers2]).groupby('variable_name')['abs_contribution'].apply(list).to_dict()
        self.data = {
            'variables': list(box_stats.index),
            'mean': box_stats['mean'].values.tolist(),
            'median': box_stats['median'].values.tolist(),
            'min': box_stats['min'].values.tolist(),
            'max': box_stats['max'].values.tolist(),
            'q1': box_stats['q1'].values.tolist(),
            'q3': box_stats['q3'].values.tolist(),
            'lf': box_stats['lf'].values.tolist(),
            'uf': box_stats['uf'].values.tolist(),
            'outliers': outliers,
            'intercept': data.get('intercept')
        }

Ancestors

dalex.arena._plot_container.PlotContainer
dalex.arena._option_base.OptionBase

Class variables

var info
var options
var options_category