Module dalex.arena.plots
Expand source code Browse git
from ._break_down_container import BreakDownContainer
from ._shapley_values_container import ShapleyValuesContainer
from ._feature_importance_container import FeatureImportanceContainer
from ._partial_dependence_container import PartialDependenceContainer
from ._accumulated_dependence_container import AccumulatedDependenceContainer
from ._ceteris_paribus_container import CeterisParibusContainer
from ._metrics_container import MetricsContainer
from ._roc_container import ROCContainer
from ._fairness_check_container import FairnessCheckContainer
from ._shapley_values_dependence_container import ShapleyValuesDependenceContainer
from ._shapley_values_variable_importance_container import ShapleyValuesVariableImportanceContainer
__all__ = [
'ShapleyValuesContainer',
'FeatureImportanceContainer',
'PartialDependenceContainer',
'AccumulatedDependenceContainer',
'CeterisParibusContainer',
'BreakDownContainer',
'MetricsContainer',
'ROCContainer',
'FairnessCheckContainer',
'ShapleyValuesDependenceContainer',
'ShapleyValuesVariableImportanceContainer'
]
Classes
class AccumulatedDependenceContainer (arena, cache=True)
-
Class representing a chart.
Parameters
arena
:Arena
- Instance of Arena.
cache
:bool
- If this object is allowed to use cache when requesting resources
Attributes
arena
:Arena
- Instance of dalex.Arena
name
:str
- Display name of chart
plot_type
:str
- Identifier of chart type
plot_component
:str
- Identifier of Arena's component that should render this chart
plot_category
:str
- Name of category of chart
params
:dict
- Dictionary with required param types as keys and param labels as values. This attribute is set when calling fit.
data
:dict
- Results of computations are placed there
progress
:float
- If progress is supprted, then value should be between [0,1]. For other situations -1 value must be set. Progress of plot container is based of progress of used resources at the moment of calling fit method. This value will not be updated.
use_cache
:bool
- If this object is allowed to use cache when requesting resources
Expand source code Browse git
class AccumulatedDependenceContainer(PlotContainer): info = { 'name': "Accumulated Dependence", 'plotType': 'AccumulatedDependence', 'plotCategory': 'Dataset Level', 'requiredParams': ['model', 'variable'] } options_category = 'AccumulatedDependence' options = { 'grid_type': { 'default': 'quantile', 'desc': 'grid type "quantile" or "uniform"'}, 'grid_points': { 'default': 101, 'desc': 'Maximum number of points for profile' }, 'N': { 'default': 500, 'desc': 'Number of observations to use. None for all.' } } def _fit(self, model, variable): if not variable.variable in model.variables: raise Exception('Variable is not a column of explainer') if is_numeric_dtype(model.explainer.data[variable.variable]): self.plot_component = 'LinearDependence' variable_type = 'numerical' else: self.plot_component = 'CategoricalDependence' variable_type = 'categorical' profile = model.explainer.model_profile( type='accumulated', variables=variable.variable, variable_type=variable_type, center=False, grid_points=self.get_option('grid_points'), variable_splits_type=self.get_option('grid_type'), N=self.get_option('N'), verbose=False ) self.data = { 'x': profile.result['_x_'].tolist(), 'y': profile.result['_yhat_'].tolist(), 'variable': variable.variable, 'base': 0 }
Ancestors
- dalex.arena._plot_container.PlotContainer
- dalex.arena._option_base.OptionBase
Class variables
var info
var options
var options_category
class BreakDownContainer (arena, cache=True)
-
Class representing a chart.
Parameters
arena
:Arena
- Instance of Arena.
cache
:bool
- If this object is allowed to use cache when requesting resources
Attributes
arena
:Arena
- Instance of dalex.Arena
name
:str
- Display name of chart
plot_type
:str
- Identifier of chart type
plot_component
:str
- Identifier of Arena's component that should render this chart
plot_category
:str
- Name of category of chart
params
:dict
- Dictionary with required param types as keys and param labels as values. This attribute is set when calling fit.
data
:dict
- Results of computations are placed there
progress
:float
- If progress is supprted, then value should be between [0,1]. For other situations -1 value must be set. Progress of plot container is based of progress of used resources at the moment of calling fit method. This value will not be updated.
use_cache
:bool
- If this object is allowed to use cache when requesting resources
Expand source code Browse git
class BreakDownContainer(PlotContainer): info = { 'name': "Break Down", 'plotType': "Breakdown", 'plotCategory': "Observation Level", 'requiredParams': ["model", "observation"] } options_category = 'BreakDown' options = { } def _fit(self, model, observation): row = observation.get_row_for_model(model) if row is None: self.set_message('Observation is not valid for given model.') return bd = model.explainer.predict_parts(row, type='break_down').result self.data = { 'variables': bd[1:-1].variable_name.tolist(), 'variables_value': bd[1:-1].variable_value.tolist(), 'contribution': bd[1:-1].contribution.tolist(), 'intercept': bd.contribution[0], 'prediction': bd.cumulative.tail(1).iloc[0] }
Ancestors
- dalex.arena._plot_container.PlotContainer
- dalex.arena._option_base.OptionBase
Class variables
var info
var options
var options_category
class CeterisParibusContainer (arena, cache=True)
-
Class representing a chart.
Parameters
arena
:Arena
- Instance of Arena.
cache
:bool
- If this object is allowed to use cache when requesting resources
Attributes
arena
:Arena
- Instance of dalex.Arena
name
:str
- Display name of chart
plot_type
:str
- Identifier of chart type
plot_component
:str
- Identifier of Arena's component that should render this chart
plot_category
:str
- Name of category of chart
params
:dict
- Dictionary with required param types as keys and param labels as values. This attribute is set when calling fit.
data
:dict
- Results of computations are placed there
progress
:float
- If progress is supprted, then value should be between [0,1]. For other situations -1 value must be set. Progress of plot container is based of progress of used resources at the moment of calling fit method. This value will not be updated.
use_cache
:bool
- If this object is allowed to use cache when requesting resources
Expand source code Browse git
class CeterisParibusContainer(PlotContainer): info = { 'name': 'Ceteris Paribus', 'plotType': 'CeterisParibus', 'plotCategory': 'Observation Level', 'requiredParams': ['model', 'variable', 'observation'] } options_category = 'CeterisParibus' options = { 'grid_points': { 'default': 101, 'desc': 'Maximum number of points for profile' }, 'grid_type': { 'default': 'quantile', 'desc': 'grid type "quantile" or "uniform"'} } def _fit(self, model, variable, observation): if not variable.variable in model.variables: raise Exception('Variable is not a column of explainer') row = observation.get_row_for_model(model) if row is None: self.set_message('Observation is not valid for given model.') return cp = model.explainer.predict_profile( row, variables=variable.variable, grid_points=self.get_option('grid_points'), variable_splits_type=self.get_option('grid_type'), variable_splits_with_obs=False, verbose=False ) if is_numeric_dtype(row[variable.variable]): self.plot_component = 'NumericalCeterisParibus' else: self.plot_component = 'CategoricalCeterisParibus' self.data = { 'x': cp.result[variable.variable].tolist(), 'y': cp.result['_yhat_'].tolist(), 'variable': variable.variable, 'min': cp.result['_yhat_'].min(), 'max': cp.result['_yhat_'].max(), 'observation': cp.new_observation.iloc[0].to_dict() }
Ancestors
- dalex.arena._plot_container.PlotContainer
- dalex.arena._option_base.OptionBase
Class variables
var info
var options
var options_category
class FairnessCheckContainer (arena, cache=True)
-
Class representing a chart.
Parameters
arena
:Arena
- Instance of Arena.
cache
:bool
- If this object is allowed to use cache when requesting resources
Attributes
arena
:Arena
- Instance of dalex.Arena
name
:str
- Display name of chart
plot_type
:str
- Identifier of chart type
plot_component
:str
- Identifier of Arena's component that should render this chart
plot_category
:str
- Name of category of chart
params
:dict
- Dictionary with required param types as keys and param labels as values. This attribute is set when calling fit.
data
:dict
- Results of computations are placed there
progress
:float
- If progress is supprted, then value should be between [0,1]. For other situations -1 value must be set. Progress of plot container is based of progress of used resources at the moment of calling fit method. This value will not be updated.
use_cache
:bool
- If this object is allowed to use cache when requesting resources
Expand source code Browse git
class FairnessCheckContainer(PlotContainer): info = { 'name': 'Fairness', 'plotType': 'Fairness', 'plotCategory': 'Dataset Level', 'requiredParams': ['model', 'variable'] } options_category = 'Fairness' options = { 'cutoffs': { 'default': [x / 100 for x in range(5, 100, 5)], 'desc': 'List of tested cutoff levels' }, } def _fit(self, model, variable): if not variable.variable in model.variables: raise Exception('Variable is not a column of explainer') exp = model.explainer y_hat = exp.predict(exp.data) if exp.y_hat is None else exp.y_hat protected = exp.data[variable.variable] if exp.model_type != 'classification': self.set_message('Fairness plot is only available for classificators') return if not is_object_dtype(protected): self.set_message('Select categorical variable to check fairness') return output_df = None for cutoff in self.get_option('cutoffs'): cutoff_dict = checks.check_cutoff(protected, cutoff, False) sub_confusion_matrix = utils.SubgroupConfusionMatrix(exp.y, y_hat, protected, cutoff_dict) sub_confusion_matrix_metrics = utils.SubgroupConfusionMatrixMetrics(sub_confusion_matrix) df = sub_confusion_matrix_metrics.to_vertical_DataFrame() df['cutoff'] = cutoff output_df = df if output_df is None else output_df.append(df) output = {} for (subgroup, x) in output_df.set_index('metric').groupby('subgroup'): output[subgroup] = {} for (cutoff, y) in x.groupby('cutoff'): output[subgroup][cutoff] = rm_nan(y['score'].to_dict()) self.data = { 'subgroups': output } def test_arena(arena): if type(arena).__name__ != 'Arena' or type(arena).__module__ != 'dalex.arena.object': raise Exception('Invalid Arena argument') return next((True for model in arena.get_params('model') if model.explainer.model_type == 'classification'), False)
Ancestors
- dalex.arena._plot_container.PlotContainer
- dalex.arena._option_base.OptionBase
Class variables
var info
var options
var options_category
Methods
def test_arena(arena)
-
Tests if plot can be created for at least one combination of params
This method searches for params, that can produce valid chart. Displaying error messages are not counted as valid. One example of usage are charts for classification models. Such charts should override this method and check if there is at least one classification model in arena.
Parameters
arena
:Arena
- Object of class dalex.Arena
Returns
bool
Expand source code Browse git
def test_arena(arena): if type(arena).__name__ != 'Arena' or type(arena).__module__ != 'dalex.arena.object': raise Exception('Invalid Arena argument') return next((True for model in arena.get_params('model') if model.explainer.model_type == 'classification'), False)
class FeatureImportanceContainer (arena, cache=True)
-
Class representing a chart.
Parameters
arena
:Arena
- Instance of Arena.
cache
:bool
- If this object is allowed to use cache when requesting resources
Attributes
arena
:Arena
- Instance of dalex.Arena
name
:str
- Display name of chart
plot_type
:str
- Identifier of chart type
plot_component
:str
- Identifier of Arena's component that should render this chart
plot_category
:str
- Name of category of chart
params
:dict
- Dictionary with required param types as keys and param labels as values. This attribute is set when calling fit.
data
:dict
- Results of computations are placed there
progress
:float
- If progress is supprted, then value should be between [0,1]. For other situations -1 value must be set. Progress of plot container is based of progress of used resources at the moment of calling fit method. This value will not be updated.
use_cache
:bool
- If this object is allowed to use cache when requesting resources
Expand source code Browse git
class FeatureImportanceContainer(PlotContainer): info = { 'name': "Variable Importance", 'plotType': "FeatureImportance", 'plotCategory': "Dataset Level", 'requiredParams': ["model"] } options_category = 'VariableImportance' options = { 'N': { 'default': None, 'desc': 'Number of observations to use. None for all.' }, 'B': { 'default': 10, 'desc': 'Number of permutation rounds to perform each variable' } } def _fit(self, model): fi = model.explainer.model_parts( N=self.get_option('N'), B=self.get_option('B') ).permutation def q1(x): return x.quantile(0.25) def q3(x): return x.quantile(0.75) stats = fi.agg(['mean', 'max', 'min', q1, q3]) full_model = stats.loc['mean', '_full_model_'] stats = stats.drop(['_baseline_', '_full_model_'], axis=1) \ .sort_values(by='mean', axis=1, ascending=False) self.data = { 'base': full_model, 'variables': stats.columns.tolist(), 'dropout_loss': stats.loc['mean'].tolist(), 'min': stats.loc['min'].tolist(), 'max': stats.loc['max'].tolist(), 'q1': stats.loc['q1'].tolist(), 'q3': stats.loc['q3'].tolist() }
Ancestors
- dalex.arena._plot_container.PlotContainer
- dalex.arena._option_base.OptionBase
Class variables
var info
var options
var options_category
class MetricsContainer (arena, cache=True)
-
Class representing a chart.
Parameters
arena
:Arena
- Instance of Arena.
cache
:bool
- If this object is allowed to use cache when requesting resources
Attributes
arena
:Arena
- Instance of dalex.Arena
name
:str
- Display name of chart
plot_type
:str
- Identifier of chart type
plot_component
:str
- Identifier of Arena's component that should render this chart
plot_category
:str
- Name of category of chart
params
:dict
- Dictionary with required param types as keys and param labels as values. This attribute is set when calling fit.
data
:dict
- Results of computations are placed there
progress
:float
- If progress is supprted, then value should be between [0,1]. For other situations -1 value must be set. Progress of plot container is based of progress of used resources at the moment of calling fit method. This value will not be updated.
use_cache
:bool
- If this object is allowed to use cache when requesting resources
Expand source code Browse git
class MetricsContainer(PlotContainer): info = { 'name': "Metrics", 'plotType': "Metrics", 'plotCategory': "Model Performance", 'requiredParams': ["model"] } options_category = 'Metrics' options = {} def _fit(self, model): perf = model.explainer.model_performance().result self.data = dict(perf.iloc[0])
Ancestors
- dalex.arena._plot_container.PlotContainer
- dalex.arena._option_base.OptionBase
Class variables
var info
var options
var options_category
class PartialDependenceContainer (arena, cache=True)
-
Class representing a chart.
Parameters
arena
:Arena
- Instance of Arena.
cache
:bool
- If this object is allowed to use cache when requesting resources
Attributes
arena
:Arena
- Instance of dalex.Arena
name
:str
- Display name of chart
plot_type
:str
- Identifier of chart type
plot_component
:str
- Identifier of Arena's component that should render this chart
plot_category
:str
- Name of category of chart
params
:dict
- Dictionary with required param types as keys and param labels as values. This attribute is set when calling fit.
data
:dict
- Results of computations are placed there
progress
:float
- If progress is supprted, then value should be between [0,1]. For other situations -1 value must be set. Progress of plot container is based of progress of used resources at the moment of calling fit method. This value will not be updated.
use_cache
:bool
- If this object is allowed to use cache when requesting resources
Expand source code Browse git
class PartialDependenceContainer(PlotContainer): info = { 'name': "Partial Dependence", 'plotType': 'PartialDependence', 'plotCategory': 'Dataset Level', 'requiredParams': ['model', 'variable'] } options_category = 'PartialDependence' options = { 'grid_type': { 'default': 'quantile', 'desc': 'grid type "quantile" or "uniform"'}, 'grid_points': { 'default': 101, 'desc': 'Maximum number of points for profile' }, 'N': { 'default': 500, 'desc': 'Number of observations to use. None for all.' } } def _fit(self, model, variable): if not variable.variable in model.variables: raise Exception('Variable is not a column of explainer') if is_numeric_dtype(model.explainer.data[variable.variable]): self.plot_component = 'LinearDependence' profile = model.explainer.model_profile( type='partial', variables=variable.variable, variable_type='numerical', grid_points=self.get_option('grid_points'), variable_splits_type=self.get_option('grid_type'), N=self.get_option('N'), verbose=False ) else: self.plot_component = 'CategoricalDependence' profile = model.explainer.model_profile( type='partial', variables=variable.variable, variable_type='categorical', N=self.get_option('N'), verbose=False ) self.data = { 'x': profile.result['_x_'].tolist(), 'y': profile.result['_yhat_'].tolist(), 'variable': variable.variable, 'base': profile.mean_prediction }
Ancestors
- dalex.arena._plot_container.PlotContainer
- dalex.arena._option_base.OptionBase
Class variables
var info
var options
var options_category
class ROCContainer (arena, cache=True)
-
Class representing a chart.
Parameters
arena
:Arena
- Instance of Arena.
cache
:bool
- If this object is allowed to use cache when requesting resources
Attributes
arena
:Arena
- Instance of dalex.Arena
name
:str
- Display name of chart
plot_type
:str
- Identifier of chart type
plot_component
:str
- Identifier of Arena's component that should render this chart
plot_category
:str
- Name of category of chart
params
:dict
- Dictionary with required param types as keys and param labels as values. This attribute is set when calling fit.
data
:dict
- Results of computations are placed there
progress
:float
- If progress is supprted, then value should be between [0,1]. For other situations -1 value must be set. Progress of plot container is based of progress of used resources at the moment of calling fit method. This value will not be updated.
use_cache
:bool
- If this object is allowed to use cache when requesting resources
Expand source code Browse git
class ROCContainer(PlotContainer): info = { 'name': 'Receiver Operating Characterstic', 'plotType': 'ROC', 'plotCategory': 'Model Performance', 'requiredParams': ['model'] } options_category = 'ROC' options = { 'grid_points': { 'default': 101, 'desc': 'Maximum number of points for ROC curve' }, } def _fit(self, model): exp = model.explainer if exp.model_type != 'classification': self.set_message('ROC plot is only available for classificators') return y_hat = exp.predict(exp.data) if exp.y_hat is None else exp.y_hat df = pd.DataFrame({ 'y': exp.y.astype(bool), 'y_hat': y_hat }) P_n = df.y.sum() N_n = df.shape[0] - P_n if P_n == 0 or N_n == 0: self.set_message('Provided dataset contains only positive or only negative cases.', 'error') return tpr_temp = df.groupby('y_hat').sum().reset_index().sort_values('y_hat', ascending=False) fpr_temp = df.assign(y=1-df.y).groupby('y_hat').sum().reset_index().sort_values('y_hat', ascending=False) _df = pd.DataFrame({ 'TPR': tpr_temp.y.cumsum() / P_n, 'TNR': 1 - (fpr_temp.y.cumsum() / N_n), 'cutoff': np.sort(df['y_hat'].unique())[::-1] }) grid_points = self.get_option('grid_points') if _df.shape[0] > grid_points: _df = _df.sample(grid_points).sort_values('cutoff', ascending=False) self.data = { 'cutoff': _df['cutoff'].tolist(), 'specifity': [1] + _df['TNR'].tolist(), 'sensivity': [0] + _df['TPR'].tolist() } def test_arena(arena): if type(arena).__name__ != 'Arena' or type(arena).__module__ != 'dalex.arena.object': raise Exception('Invalid Arena argument') return next((True for model in arena.get_params('model') if model.explainer.model_type == 'classification'), False)
Ancestors
- dalex.arena._plot_container.PlotContainer
- dalex.arena._option_base.OptionBase
Class variables
var info
var options
var options_category
Methods
def test_arena(arena)
-
Tests if plot can be created for at least one combination of params
This method searches for params, that can produce valid chart. Displaying error messages are not counted as valid. One example of usage are charts for classification models. Such charts should override this method and check if there is at least one classification model in arena.
Parameters
arena
:Arena
- Object of class dalex.Arena
Returns
bool
Expand source code Browse git
def test_arena(arena): if type(arena).__name__ != 'Arena' or type(arena).__module__ != 'dalex.arena.object': raise Exception('Invalid Arena argument') return next((True for model in arena.get_params('model') if model.explainer.model_type == 'classification'), False)
class ShapleyValuesContainer (arena, cache=True)
-
Class representing a chart.
Parameters
arena
:Arena
- Instance of Arena.
cache
:bool
- If this object is allowed to use cache when requesting resources
Attributes
arena
:Arena
- Instance of dalex.Arena
name
:str
- Display name of chart
plot_type
:str
- Identifier of chart type
plot_component
:str
- Identifier of Arena's component that should render this chart
plot_category
:str
- Name of category of chart
params
:dict
- Dictionary with required param types as keys and param labels as values. This attribute is set when calling fit.
data
:dict
- Results of computations are placed there
progress
:float
- If progress is supprted, then value should be between [0,1]. For other situations -1 value must be set. Progress of plot container is based of progress of used resources at the moment of calling fit method. This value will not be updated.
use_cache
:bool
- If this object is allowed to use cache when requesting resources
Expand source code Browse git
class ShapleyValuesContainer(PlotContainer): info = { 'name': "Shapley Values", 'plotType': "SHAPValues", 'plotCategory': "Observation Level", 'requiredParams': ["model", "observation"] } options_category = 'ShapleyValues' options = {} def _fit(self, model, observation): resource = self.arena.resource_manager.get_resource('ShapleyValues', {'model': model, 'observation': observation}, cache=self.use_cache) try: data, progress, is_done = resource.get_result() if data.get('stats') is None: resource.wait_for_update() data, progress, is_done = resource.get_result() except Exception as e: self.set_message(str(e)) return self.is_done = is_done self.progress = progress stats = data.get('stats').sort_values('abs', ascending=False).reset_index() self.data = { 'variables': stats.variable_name.tolist(), 'variables_value': stats.variable_value.tolist(), 'mean': stats['mean'].tolist(), 'min': stats['min'].tolist(), 'max': stats['max'].tolist(), 'q1': stats.q1.tolist(), 'q3': stats.q3.tolist(), 'intercept': resource.data.get('intercept').astype(float) }
Ancestors
- dalex.arena._plot_container.PlotContainer
- dalex.arena._option_base.OptionBase
Class variables
var info
var options
var options_category
class ShapleyValuesDependenceContainer (arena, cache=True)
-
Class representing a chart.
Parameters
arena
:Arena
- Instance of Arena.
cache
:bool
- If this object is allowed to use cache when requesting resources
Attributes
arena
:Arena
- Instance of dalex.Arena
name
:str
- Display name of chart
plot_type
:str
- Identifier of chart type
plot_component
:str
- Identifier of Arena's component that should render this chart
plot_category
:str
- Name of category of chart
params
:dict
- Dictionary with required param types as keys and param labels as values. This attribute is set when calling fit.
data
:dict
- Results of computations are placed there
progress
:float
- If progress is supprted, then value should be between [0,1]. For other situations -1 value must be set. Progress of plot container is based of progress of used resources at the moment of calling fit method. This value will not be updated.
use_cache
:bool
- If this object is allowed to use cache when requesting resources
Expand source code Browse git
class ShapleyValuesDependenceContainer(PlotContainer): info = { 'name': "Shapley Values Dependence", 'plotType': "ShapleyValuesDependence", 'plotCategory': "Dataset Level", 'requiredParams': ["model", "variable"] } options_category = 'DatasetShapleyValues' options = {} def _fit(self, model, variable): if variable.variable not in model.variables: raise Exception('Variable is not a column of explainer') is_numeric = is_numeric_dtype(model.explainer.data[variable.variable]) if is_numeric and variable.levels is None: self.plot_component = 'LinearShapleyDependence' else: self.plot_component = 'CategoricalShapleyDependence' resource = self.arena.resource_manager.get_resource('DatasetShapleyValues', {'model': model}, cache=self.use_cache) try: data, progress, is_done = resource.get_result() if data.get('result') is None: resource.wait_for_update() data, progress, is_done = resource.get_result() except Exception as e: self.set_message(str(e)) return self.is_done = is_done self.progress = progress result = data.get('result') result = result[result['variable_name'] == variable.variable] stats = result.groupby(['variable_value', 'row']).agg({'contribution': ['mean', 'min', 'max']}).contribution if self.plot_component == 'LinearShapleyDependence': stats = stats.sort_index() transform_index = float if self.plot_component == 'LinearShapleyDependence' else str self.data = { 'x': [transform_index(x[0]) for x in stats.index], 'mean': stats['mean'].values.tolist(), 'min': stats['min'].values.tolist(), 'max': stats['max'].values.tolist(), 'variable': variable.variable }
Ancestors
- dalex.arena._plot_container.PlotContainer
- dalex.arena._option_base.OptionBase
Class variables
var info
var options
var options_category
class ShapleyValuesVariableImportanceContainer (arena, cache=True)
-
Class representing a chart.
Parameters
arena
:Arena
- Instance of Arena.
cache
:bool
- If this object is allowed to use cache when requesting resources
Attributes
arena
:Arena
- Instance of dalex.Arena
name
:str
- Display name of chart
plot_type
:str
- Identifier of chart type
plot_component
:str
- Identifier of Arena's component that should render this chart
plot_category
:str
- Name of category of chart
params
:dict
- Dictionary with required param types as keys and param labels as values. This attribute is set when calling fit.
data
:dict
- Results of computations are placed there
progress
:float
- If progress is supprted, then value should be between [0,1]. For other situations -1 value must be set. Progress of plot container is based of progress of used resources at the moment of calling fit method. This value will not be updated.
use_cache
:bool
- If this object is allowed to use cache when requesting resources
Expand source code Browse git
class ShapleyValuesVariableImportanceContainer(PlotContainer): info = { 'name': "Shapley Variable Importance", 'plotType': "ShapleyValuesVariableImportance", 'plotCategory': "Dataset Level", 'requiredParams': ["model"] } options_category = 'DatasetShapleyValues' options = {} def _fit(self, model): resource = self.arena.resource_manager.get_resource('DatasetShapleyValues', {'model': model}, cache=self.use_cache) try: data, progress, is_done = resource.get_result() if data.get('result') is None: resource.wait_for_update() data, progress, is_done = resource.get_result() except Exception as e: self.set_message(str(e)) return self.is_done = is_done self.progress = progress result = data.get('result') stats = result.groupby(['variable_name', 'row']) \ .agg({'contribution': ['mean']}) \ .contribution stats['abs_contribution'] = stats['mean'].abs() stats = stats.reset_index() box_stats = stats.groupby(['variable_name']) \ .agg({'abs_contribution': ['mean', 'max', 'min', 'median', q1, q3, lf, uf]}) \ .abs_contribution.sort_values(by='mean', ascending=False) outliers1 = stats.loc[stats['abs_contribution'] > box_stats.uf.loc[stats['variable_name']].reset_index(drop=True)] outliers2 = stats.loc[stats['abs_contribution'] < box_stats.lf.loc[stats['variable_name']].reset_index(drop=True)] outliers = pd.concat([outliers1, outliers2]).groupby('variable_name')['abs_contribution'].apply(list).to_dict() self.data = { 'variables': list(box_stats.index), 'mean': box_stats['mean'].values.tolist(), 'median': box_stats['median'].values.tolist(), 'min': box_stats['min'].values.tolist(), 'max': box_stats['max'].values.tolist(), 'q1': box_stats['q1'].values.tolist(), 'q3': box_stats['q3'].values.tolist(), 'lf': box_stats['lf'].values.tolist(), 'uf': box_stats['uf'].values.tolist(), 'outliers': outliers, 'intercept': data.get('intercept') }
Ancestors
- dalex.arena._plot_container.PlotContainer
- dalex.arena._option_base.OptionBase
Class variables
var info
var options
var options_category