Module dalex.arena.plots

Expand source code Browse git
from ._break_down_container import BreakDownContainer
from ._shapley_values_container import ShapleyValuesContainer
from ._feature_importance_container import FeatureImportanceContainer
from ._partial_dependence_container import PartialDependenceContainer
from ._accumulated_dependence_container import AccumulatedDependenceContainer
from ._ceteris_paribus_container import CeterisParibusContainer
from ._metrics_container import MetricsContainer
from ._roc_container import ROCContainer
from ._fairness_check_container import FairnessCheckContainer
from ._shapley_values_dependence_container import ShapleyValuesDependenceContainer
from ._shapley_values_variable_importance_container import ShapleyValuesVariableImportanceContainer

__all__ = [
    'ShapleyValuesContainer',
    'FeatureImportanceContainer',
    'PartialDependenceContainer',
    'AccumulatedDependenceContainer',
    'CeterisParibusContainer',
    'BreakDownContainer',
    'MetricsContainer',
    'ROCContainer',
    'FairnessCheckContainer',
    'ShapleyValuesDependenceContainer',
    'ShapleyValuesVariableImportanceContainer'
]

Classes

class AccumulatedDependenceContainer (arena, cache=True)

Class representing a chart.

Parameters

arena : Arena
Instance of Arena.
cache : bool
If this object is allowed to use cache when requesting resources

Attributes

arena : Arena
Instance of dalex.Arena
name : str
Display name of chart
plot_type : str
Identifier of chart type
plot_component : str
Identifier of Arena's component that should render this chart
plot_category : str
Name of category of chart
params : dict
Dictionary with required param types as keys and param labels as values. This attribute is set when calling fit.
data : dict
Results of computations are placed there
progress : float
If progress is supprted, then value should be between [0,1]. For other situations -1 value must be set. Progress of plot container is based of progress of used resources at the moment of calling fit method. This value will not be updated.
use_cache : bool
If this object is allowed to use cache when requesting resources
Expand source code Browse git
class AccumulatedDependenceContainer(PlotContainer):
    info = {
        'name': "Accumulated Dependence",
        'plotType': 'AccumulatedDependence',
        'plotCategory': 'Dataset Level',
        'requiredParams': ['model', 'variable']
    }
    options_category = 'AccumulatedDependence'
    options = {
        'grid_type': { 'default': 'quantile', 'desc': 'grid type "quantile" or "uniform"'},
        'grid_points': { 'default': 101, 'desc': 'Maximum number of points for profile' },
        'N': { 'default': 500, 'desc': 'Number of observations to use. None for all.' }
    }
    def _fit(self, model, variable):
        if not variable.variable in model.variables:
            raise Exception('Variable is not a column of explainer')
        if is_numeric_dtype(model.explainer.data[variable.variable]):
            self.plot_component = 'LinearDependence'
            variable_type = 'numerical'
        else:
            self.plot_component = 'CategoricalDependence'
            variable_type = 'categorical'
        profile = model.explainer.model_profile(
            type='accumulated',
            variables=variable.variable,
            variable_type=variable_type,
            center=False,
            grid_points=self.get_option('grid_points'),
            variable_splits_type=self.get_option('grid_type'),
            N=self.get_option('N'),
            verbose=False
        )
        self.data = {
            'x': profile.result['_x_'].tolist(),
            'y': profile.result['_yhat_'].tolist(),
            'variable': variable.variable,
            'base': 0
        }

Ancestors

  • dalex.arena._plot_container.PlotContainer
  • dalex.arena._option_base.OptionBase

Class variables

var info
var options
var options_category
class BreakDownContainer (arena, cache=True)

Class representing a chart.

Parameters

arena : Arena
Instance of Arena.
cache : bool
If this object is allowed to use cache when requesting resources

Attributes

arena : Arena
Instance of dalex.Arena
name : str
Display name of chart
plot_type : str
Identifier of chart type
plot_component : str
Identifier of Arena's component that should render this chart
plot_category : str
Name of category of chart
params : dict
Dictionary with required param types as keys and param labels as values. This attribute is set when calling fit.
data : dict
Results of computations are placed there
progress : float
If progress is supprted, then value should be between [0,1]. For other situations -1 value must be set. Progress of plot container is based of progress of used resources at the moment of calling fit method. This value will not be updated.
use_cache : bool
If this object is allowed to use cache when requesting resources
Expand source code Browse git
class BreakDownContainer(PlotContainer):
    info = {
        'name': "Break Down",
        'plotType': "Breakdown",
        'plotCategory': "Observation Level",
        'requiredParams': ["model", "observation"]
    }
    options_category = 'BreakDown'
    options = {
    }
    def _fit(self, model, observation):
        row = observation.get_row_for_model(model)
        if row is None:
            self.set_message('Observation is not valid for given model.')
            return
        bd = model.explainer.predict_parts(row, type='break_down').result
        self.data = {
            'variables': bd[1:-1].variable_name.tolist(),
            'variables_value': bd[1:-1].variable_value.tolist(),
            'contribution': bd[1:-1].contribution.tolist(),
            'intercept': bd.contribution[0],
            'prediction': bd.cumulative.tail(1).iloc[0]
        }

Ancestors

  • dalex.arena._plot_container.PlotContainer
  • dalex.arena._option_base.OptionBase

Class variables

var info
var options
var options_category
class CeterisParibusContainer (arena, cache=True)

Class representing a chart.

Parameters

arena : Arena
Instance of Arena.
cache : bool
If this object is allowed to use cache when requesting resources

Attributes

arena : Arena
Instance of dalex.Arena
name : str
Display name of chart
plot_type : str
Identifier of chart type
plot_component : str
Identifier of Arena's component that should render this chart
plot_category : str
Name of category of chart
params : dict
Dictionary with required param types as keys and param labels as values. This attribute is set when calling fit.
data : dict
Results of computations are placed there
progress : float
If progress is supprted, then value should be between [0,1]. For other situations -1 value must be set. Progress of plot container is based of progress of used resources at the moment of calling fit method. This value will not be updated.
use_cache : bool
If this object is allowed to use cache when requesting resources
Expand source code Browse git
class CeterisParibusContainer(PlotContainer):
    info = {
        'name': 'Ceteris Paribus',
        'plotType': 'CeterisParibus',
        'plotCategory': 'Observation Level',
        'requiredParams': ['model', 'variable', 'observation']
    }
    options_category = 'CeterisParibus'
    options = {
        'grid_points': { 'default': 101, 'desc': 'Maximum number of points for profile' },
        'grid_type': { 'default': 'quantile', 'desc': 'grid type "quantile" or "uniform"'}
    }
    def _fit(self, model, variable, observation):
        if not variable.variable in model.variables:
            raise Exception('Variable is not a column of explainer')
        row = observation.get_row_for_model(model)
        if row is None:
            self.set_message('Observation is not valid for given model.')
            return
        cp = model.explainer.predict_profile(
            row,
            variables=variable.variable,
            grid_points=self.get_option('grid_points'),
            variable_splits_type=self.get_option('grid_type'),
            variable_splits_with_obs=False,
            verbose=False
        )
        if is_numeric_dtype(row[variable.variable]):
            self.plot_component = 'NumericalCeterisParibus'
        else:
            self.plot_component = 'CategoricalCeterisParibus'
        self.data = {
            'x': cp.result[variable.variable].tolist(),
            'y': cp.result['_yhat_'].tolist(),
            'variable': variable.variable,
            'min': cp.result['_yhat_'].min(),
            'max': cp.result['_yhat_'].max(),
            'observation': cp.new_observation.iloc[0].to_dict()
        }

Ancestors

  • dalex.arena._plot_container.PlotContainer
  • dalex.arena._option_base.OptionBase

Class variables

var info
var options
var options_category
class FairnessCheckContainer (arena, cache=True)

Class representing a chart.

Parameters

arena : Arena
Instance of Arena.
cache : bool
If this object is allowed to use cache when requesting resources

Attributes

arena : Arena
Instance of dalex.Arena
name : str
Display name of chart
plot_type : str
Identifier of chart type
plot_component : str
Identifier of Arena's component that should render this chart
plot_category : str
Name of category of chart
params : dict
Dictionary with required param types as keys and param labels as values. This attribute is set when calling fit.
data : dict
Results of computations are placed there
progress : float
If progress is supprted, then value should be between [0,1]. For other situations -1 value must be set. Progress of plot container is based of progress of used resources at the moment of calling fit method. This value will not be updated.
use_cache : bool
If this object is allowed to use cache when requesting resources
Expand source code Browse git
class FairnessCheckContainer(PlotContainer):
    info = {
        'name': 'Fairness',
        'plotType': 'Fairness',
        'plotCategory': 'Dataset Level',
        'requiredParams': ['model', 'variable']
    }
    options_category = 'Fairness'
    options = {
        'cutoffs': { 'default': [x / 100 for x in range(5, 100, 5)], 'desc': 'List of tested cutoff levels' },
    }
    def _fit(self, model, variable):
        if not variable.variable in model.variables:
            raise Exception('Variable is not a column of explainer')
        exp = model.explainer
        y_hat = exp.predict(exp.data) if exp.y_hat is None else exp.y_hat
        protected = exp.data[variable.variable]
        if exp.model_type != 'classification':
            self.set_message('Fairness plot is only available for classificators')
            return
        if not is_object_dtype(protected):
            self.set_message('Select categorical variable to check fairness')
            return

        output_df = None
        for cutoff in self.get_option('cutoffs'):
            cutoff_dict = checks.check_cutoff(protected, cutoff, False)
            sub_confusion_matrix = utils.SubgroupConfusionMatrix(exp.y, y_hat, protected, cutoff_dict)
            sub_confusion_matrix_metrics = utils.SubgroupConfusionMatrixMetrics(sub_confusion_matrix)
            df = sub_confusion_matrix_metrics.to_vertical_DataFrame()
            df['cutoff'] = cutoff
            output_df = df if output_df is None else output_df.append(df)

        output = {}
        for (subgroup, x) in output_df.set_index('metric').groupby('subgroup'):
            output[subgroup] = {}
            for (cutoff, y) in x.groupby('cutoff'):
                output[subgroup][cutoff] = rm_nan(y['score'].to_dict())

        self.data = { 'subgroups': output }

    def test_arena(arena):
        if type(arena).__name__ != 'Arena' or type(arena).__module__ != 'dalex.arena.object':
            raise Exception('Invalid Arena argument')
        return next((True for model in arena.get_params('model') if model.explainer.model_type == 'classification'), False)

Ancestors

  • dalex.arena._plot_container.PlotContainer
  • dalex.arena._option_base.OptionBase

Class variables

var info
var options
var options_category

Methods

def test_arena(arena)

Tests if plot can be created for at least one combination of params

This method searches for params, that can produce valid chart. Displaying error messages are not counted as valid. One example of usage are charts for classification models. Such charts should override this method and check if there is at least one classification model in arena.

Parameters

arena : Arena
Object of class dalex.Arena

Returns

bool
 
Expand source code Browse git
def test_arena(arena):
    if type(arena).__name__ != 'Arena' or type(arena).__module__ != 'dalex.arena.object':
        raise Exception('Invalid Arena argument')
    return next((True for model in arena.get_params('model') if model.explainer.model_type == 'classification'), False)
class FeatureImportanceContainer (arena, cache=True)

Class representing a chart.

Parameters

arena : Arena
Instance of Arena.
cache : bool
If this object is allowed to use cache when requesting resources

Attributes

arena : Arena
Instance of dalex.Arena
name : str
Display name of chart
plot_type : str
Identifier of chart type
plot_component : str
Identifier of Arena's component that should render this chart
plot_category : str
Name of category of chart
params : dict
Dictionary with required param types as keys and param labels as values. This attribute is set when calling fit.
data : dict
Results of computations are placed there
progress : float
If progress is supprted, then value should be between [0,1]. For other situations -1 value must be set. Progress of plot container is based of progress of used resources at the moment of calling fit method. This value will not be updated.
use_cache : bool
If this object is allowed to use cache when requesting resources
Expand source code Browse git
class FeatureImportanceContainer(PlotContainer):
    info = {
        'name': "Variable Importance",
        'plotType': "FeatureImportance",
        'plotCategory': "Dataset Level",
        'requiredParams': ["model"]
    }
    options_category = 'VariableImportance'
    options = {
        'N': { 'default': None, 'desc': 'Number of observations to use. None for all.' },
        'B': { 'default': 10, 'desc': 'Number of permutation rounds to perform each variable' }
    }
    def _fit(self, model):
        fi = model.explainer.model_parts(
            N=self.get_option('N'),
            B=self.get_option('B')
        ).permutation
        def q1(x):
            return x.quantile(0.25)
        def q3(x):
            return x.quantile(0.75)
        stats = fi.agg(['mean', 'max', 'min', q1, q3])
        full_model = stats.loc['mean', '_full_model_']
        stats = stats.drop(['_baseline_', '_full_model_'], axis=1) \
            .sort_values(by='mean', axis=1, ascending=False)
        self.data = {
            'base': full_model,
            'variables': stats.columns.tolist(),
            'dropout_loss': stats.loc['mean'].tolist(),
            'min': stats.loc['min'].tolist(),
            'max': stats.loc['max'].tolist(),
            'q1': stats.loc['q1'].tolist(),
            'q3': stats.loc['q3'].tolist()
        }

Ancestors

  • dalex.arena._plot_container.PlotContainer
  • dalex.arena._option_base.OptionBase

Class variables

var info
var options
var options_category
class MetricsContainer (arena, cache=True)

Class representing a chart.

Parameters

arena : Arena
Instance of Arena.
cache : bool
If this object is allowed to use cache when requesting resources

Attributes

arena : Arena
Instance of dalex.Arena
name : str
Display name of chart
plot_type : str
Identifier of chart type
plot_component : str
Identifier of Arena's component that should render this chart
plot_category : str
Name of category of chart
params : dict
Dictionary with required param types as keys and param labels as values. This attribute is set when calling fit.
data : dict
Results of computations are placed there
progress : float
If progress is supprted, then value should be between [0,1]. For other situations -1 value must be set. Progress of plot container is based of progress of used resources at the moment of calling fit method. This value will not be updated.
use_cache : bool
If this object is allowed to use cache when requesting resources
Expand source code Browse git
class MetricsContainer(PlotContainer):
    info = {
        'name': "Metrics",
        'plotType': "Metrics",
        'plotCategory': "Model Performance",
        'requiredParams': ["model"]
    }
    options_category = 'Metrics'
    options = {}
    def _fit(self, model):
        perf = model.explainer.model_performance().result
        self.data = dict(perf.iloc[0])

Ancestors

  • dalex.arena._plot_container.PlotContainer
  • dalex.arena._option_base.OptionBase

Class variables

var info
var options
var options_category
class PartialDependenceContainer (arena, cache=True)

Class representing a chart.

Parameters

arena : Arena
Instance of Arena.
cache : bool
If this object is allowed to use cache when requesting resources

Attributes

arena : Arena
Instance of dalex.Arena
name : str
Display name of chart
plot_type : str
Identifier of chart type
plot_component : str
Identifier of Arena's component that should render this chart
plot_category : str
Name of category of chart
params : dict
Dictionary with required param types as keys and param labels as values. This attribute is set when calling fit.
data : dict
Results of computations are placed there
progress : float
If progress is supprted, then value should be between [0,1]. For other situations -1 value must be set. Progress of plot container is based of progress of used resources at the moment of calling fit method. This value will not be updated.
use_cache : bool
If this object is allowed to use cache when requesting resources
Expand source code Browse git
class PartialDependenceContainer(PlotContainer):
    info = {
        'name': "Partial Dependence",
        'plotType': 'PartialDependence',
        'plotCategory': 'Dataset Level',
        'requiredParams': ['model', 'variable']
    }
    options_category = 'PartialDependence'
    options = {
        'grid_type': { 'default': 'quantile', 'desc': 'grid type "quantile" or "uniform"'},
        'grid_points': { 'default': 101, 'desc': 'Maximum number of points for profile' },
        'N': { 'default': 500, 'desc': 'Number of observations to use. None for all.' }
    }
    def _fit(self, model, variable):
        if not variable.variable in model.variables:
            raise Exception('Variable is not a column of explainer')
        if is_numeric_dtype(model.explainer.data[variable.variable]):
            self.plot_component = 'LinearDependence'
            profile = model.explainer.model_profile(
                type='partial',
                variables=variable.variable,
                variable_type='numerical',
                grid_points=self.get_option('grid_points'),
                variable_splits_type=self.get_option('grid_type'),
                N=self.get_option('N'),
                verbose=False
            )
        else:
            self.plot_component = 'CategoricalDependence'
            profile = model.explainer.model_profile(
                type='partial',
                variables=variable.variable,
                variable_type='categorical',
                N=self.get_option('N'),
                verbose=False
            )
        self.data = {
            'x': profile.result['_x_'].tolist(),
            'y': profile.result['_yhat_'].tolist(),
            'variable': variable.variable,
            'base': profile.mean_prediction
        }

Ancestors

  • dalex.arena._plot_container.PlotContainer
  • dalex.arena._option_base.OptionBase

Class variables

var info
var options
var options_category
class ROCContainer (arena, cache=True)

Class representing a chart.

Parameters

arena : Arena
Instance of Arena.
cache : bool
If this object is allowed to use cache when requesting resources

Attributes

arena : Arena
Instance of dalex.Arena
name : str
Display name of chart
plot_type : str
Identifier of chart type
plot_component : str
Identifier of Arena's component that should render this chart
plot_category : str
Name of category of chart
params : dict
Dictionary with required param types as keys and param labels as values. This attribute is set when calling fit.
data : dict
Results of computations are placed there
progress : float
If progress is supprted, then value should be between [0,1]. For other situations -1 value must be set. Progress of plot container is based of progress of used resources at the moment of calling fit method. This value will not be updated.
use_cache : bool
If this object is allowed to use cache when requesting resources
Expand source code Browse git
class ROCContainer(PlotContainer):
    info = {
        'name': 'Receiver Operating Characterstic',
        'plotType': 'ROC',
        'plotCategory': 'Model Performance',
        'requiredParams': ['model']
    }
    options_category = 'ROC'
    options = {
        'grid_points': { 'default': 101, 'desc': 'Maximum number of points for ROC curve' },
    }

    def _fit(self, model):
        exp = model.explainer
        if exp.model_type != 'classification':
            self.set_message('ROC plot is only available for classificators')
            return

        y_hat = exp.predict(exp.data) if exp.y_hat is None else exp.y_hat
        df = pd.DataFrame({ 'y': exp.y.astype(bool), 'y_hat': y_hat })

        P_n = df.y.sum()
        N_n = df.shape[0] - P_n
        if P_n == 0 or N_n == 0:
            self.set_message('Provided dataset contains only positive or only negative cases.', 'error')
            return

        tpr_temp = df.groupby('y_hat').sum().reset_index().sort_values('y_hat', ascending=False)
        fpr_temp = df.assign(y=1-df.y).groupby('y_hat').sum().reset_index().sort_values('y_hat', ascending=False)
        
        _df = pd.DataFrame({
            'TPR': tpr_temp.y.cumsum() / P_n,
            'TNR': 1 - (fpr_temp.y.cumsum() / N_n),
            'cutoff': np.sort(df['y_hat'].unique())[::-1]
        })

        grid_points = self.get_option('grid_points')
        if _df.shape[0] > grid_points:
            _df = _df.sample(grid_points).sort_values('cutoff', ascending=False)

        self.data = {
            'cutoff': _df['cutoff'].tolist(),
            'specifity': [1] + _df['TNR'].tolist(),
            'sensivity': [0] + _df['TPR'].tolist()
        }

    def test_arena(arena):
        if type(arena).__name__ != 'Arena' or type(arena).__module__ != 'dalex.arena.object':
            raise Exception('Invalid Arena argument')
        return next((True for model in arena.get_params('model') if model.explainer.model_type == 'classification'), False)

Ancestors

  • dalex.arena._plot_container.PlotContainer
  • dalex.arena._option_base.OptionBase

Class variables

var info
var options
var options_category

Methods

def test_arena(arena)

Tests if plot can be created for at least one combination of params

This method searches for params, that can produce valid chart. Displaying error messages are not counted as valid. One example of usage are charts for classification models. Such charts should override this method and check if there is at least one classification model in arena.

Parameters

arena : Arena
Object of class dalex.Arena

Returns

bool
 
Expand source code Browse git
def test_arena(arena):
    if type(arena).__name__ != 'Arena' or type(arena).__module__ != 'dalex.arena.object':
        raise Exception('Invalid Arena argument')
    return next((True for model in arena.get_params('model') if model.explainer.model_type == 'classification'), False)
class ShapleyValuesContainer (arena, cache=True)

Class representing a chart.

Parameters

arena : Arena
Instance of Arena.
cache : bool
If this object is allowed to use cache when requesting resources

Attributes

arena : Arena
Instance of dalex.Arena
name : str
Display name of chart
plot_type : str
Identifier of chart type
plot_component : str
Identifier of Arena's component that should render this chart
plot_category : str
Name of category of chart
params : dict
Dictionary with required param types as keys and param labels as values. This attribute is set when calling fit.
data : dict
Results of computations are placed there
progress : float
If progress is supprted, then value should be between [0,1]. For other situations -1 value must be set. Progress of plot container is based of progress of used resources at the moment of calling fit method. This value will not be updated.
use_cache : bool
If this object is allowed to use cache when requesting resources
Expand source code Browse git
class ShapleyValuesContainer(PlotContainer):
    info = {
        'name': "Shapley Values",
        'plotType': "SHAPValues",
        'plotCategory': "Observation Level",
        'requiredParams': ["model", "observation"]
    }
    options_category = 'ShapleyValues'
    options = {}
    def _fit(self, model, observation):
        resource = self.arena.resource_manager.get_resource('ShapleyValues', {'model': model, 'observation': observation}, cache=self.use_cache)
        try:
            data, progress, is_done = resource.get_result()
            if data.get('stats') is None:
                resource.wait_for_update()
                data, progress, is_done = resource.get_result()
        except Exception as e:
            self.set_message(str(e))
            return
        self.is_done = is_done
        self.progress = progress
        stats = data.get('stats').sort_values('abs', ascending=False).reset_index()
        self.data = {
            'variables': stats.variable_name.tolist(),
            'variables_value': stats.variable_value.tolist(),
            'mean': stats['mean'].tolist(),
            'min': stats['min'].tolist(),
            'max': stats['max'].tolist(),
            'q1': stats.q1.tolist(),
            'q3': stats.q3.tolist(),
            'intercept': resource.data.get('intercept').astype(float)
        }

Ancestors

  • dalex.arena._plot_container.PlotContainer
  • dalex.arena._option_base.OptionBase

Class variables

var info
var options
var options_category
class ShapleyValuesDependenceContainer (arena, cache=True)

Class representing a chart.

Parameters

arena : Arena
Instance of Arena.
cache : bool
If this object is allowed to use cache when requesting resources

Attributes

arena : Arena
Instance of dalex.Arena
name : str
Display name of chart
plot_type : str
Identifier of chart type
plot_component : str
Identifier of Arena's component that should render this chart
plot_category : str
Name of category of chart
params : dict
Dictionary with required param types as keys and param labels as values. This attribute is set when calling fit.
data : dict
Results of computations are placed there
progress : float
If progress is supprted, then value should be between [0,1]. For other situations -1 value must be set. Progress of plot container is based of progress of used resources at the moment of calling fit method. This value will not be updated.
use_cache : bool
If this object is allowed to use cache when requesting resources
Expand source code Browse git
class ShapleyValuesDependenceContainer(PlotContainer):
    info = {
        'name': "Shapley Values Dependence",
        'plotType': "ShapleyValuesDependence",
        'plotCategory': "Dataset Level",
        'requiredParams': ["model", "variable"]
    }
    options_category = 'DatasetShapleyValues'
    options = {}
    def _fit(self, model, variable):
        if variable.variable not in model.variables:
            raise Exception('Variable is not a column of explainer')
        is_numeric = is_numeric_dtype(model.explainer.data[variable.variable])
        if is_numeric and variable.levels is None:
            self.plot_component = 'LinearShapleyDependence'
        else:
            self.plot_component = 'CategoricalShapleyDependence'
        resource = self.arena.resource_manager.get_resource('DatasetShapleyValues', {'model': model}, cache=self.use_cache)
        try:
            data, progress, is_done = resource.get_result()
            if data.get('result') is None:
                resource.wait_for_update()
                data, progress, is_done = resource.get_result()
        except Exception as e:
            self.set_message(str(e))
            return
        self.is_done = is_done
        self.progress = progress
        result = data.get('result')
        result = result[result['variable_name'] == variable.variable]
        stats = result.groupby(['variable_value', 'row']).agg({'contribution': ['mean', 'min', 'max']}).contribution
        if self.plot_component == 'LinearShapleyDependence':
            stats = stats.sort_index()
        transform_index = float if self.plot_component == 'LinearShapleyDependence' else str
        self.data = {
            'x': [transform_index(x[0]) for x in stats.index],
            'mean': stats['mean'].values.tolist(),
            'min': stats['min'].values.tolist(),
            'max': stats['max'].values.tolist(),
            'variable': variable.variable
        }

Ancestors

  • dalex.arena._plot_container.PlotContainer
  • dalex.arena._option_base.OptionBase

Class variables

var info
var options
var options_category
class ShapleyValuesVariableImportanceContainer (arena, cache=True)

Class representing a chart.

Parameters

arena : Arena
Instance of Arena.
cache : bool
If this object is allowed to use cache when requesting resources

Attributes

arena : Arena
Instance of dalex.Arena
name : str
Display name of chart
plot_type : str
Identifier of chart type
plot_component : str
Identifier of Arena's component that should render this chart
plot_category : str
Name of category of chart
params : dict
Dictionary with required param types as keys and param labels as values. This attribute is set when calling fit.
data : dict
Results of computations are placed there
progress : float
If progress is supprted, then value should be between [0,1]. For other situations -1 value must be set. Progress of plot container is based of progress of used resources at the moment of calling fit method. This value will not be updated.
use_cache : bool
If this object is allowed to use cache when requesting resources
Expand source code Browse git
class ShapleyValuesVariableImportanceContainer(PlotContainer):
    info = {
        'name': "Shapley Variable Importance",
        'plotType': "ShapleyValuesVariableImportance",
        'plotCategory': "Dataset Level",
        'requiredParams': ["model"]
    }
    options_category = 'DatasetShapleyValues'
    options = {}
    def _fit(self, model):
        resource = self.arena.resource_manager.get_resource('DatasetShapleyValues', {'model': model}, cache=self.use_cache)
        try:
            data, progress, is_done = resource.get_result()
            if data.get('result') is None:
                resource.wait_for_update()
                data, progress, is_done = resource.get_result()
        except Exception as e:
            self.set_message(str(e))
            return
        self.is_done = is_done
        self.progress = progress
        result = data.get('result')

        stats = result.groupby(['variable_name', 'row']) \
            .agg({'contribution': ['mean']}) \
            .contribution
        stats['abs_contribution'] = stats['mean'].abs()
        stats = stats.reset_index()
        box_stats = stats.groupby(['variable_name']) \
            .agg({'abs_contribution': ['mean', 'max', 'min', 'median', q1, q3, lf, uf]}) \
            .abs_contribution.sort_values(by='mean', ascending=False)
        outliers1 = stats.loc[stats['abs_contribution'] > box_stats.uf.loc[stats['variable_name']].reset_index(drop=True)]
        outliers2 = stats.loc[stats['abs_contribution'] < box_stats.lf.loc[stats['variable_name']].reset_index(drop=True)]
        outliers = pd.concat([outliers1, outliers2]).groupby('variable_name')['abs_contribution'].apply(list).to_dict()
        self.data = {
            'variables': list(box_stats.index),
            'mean': box_stats['mean'].values.tolist(),
            'median': box_stats['median'].values.tolist(),
            'min': box_stats['min'].values.tolist(),
            'max': box_stats['max'].values.tolist(),
            'q1': box_stats['q1'].values.tolist(),
            'q3': box_stats['q3'].values.tolist(),
            'lf': box_stats['lf'].values.tolist(),
            'uf': box_stats['uf'].values.tolist(),
            'outliers': outliers,
            'intercept': data.get('intercept')
        }

Ancestors

  • dalex.arena._plot_container.PlotContainer
  • dalex.arena._option_base.OptionBase

Class variables

var info
var options
var options_category