Module dalex.arena.resources
Expand source code Browse git
from ._shapley_values_resource import ShapleyValuesResource
from ._dataset_shapley_values_resource import DatasetShapleyValuesResource
__all__ = [
'ShapleyValuesResource',
'DatasetShapleyValuesResource'
]
Classes
class DatasetShapleyValuesResource (arena)
-
Class representing a resource that need to be calculated and will be used by one or more charts (PlotContainer). Computations will run in seperate thread and can support partial results.
Parameters
arena
:Arena
- Instance of Arena.
Attributes
arena
:Arena
- Instance of dalex.Arena
params
:dict
- Dictionary with required param types as keys and param labels as values. This attribute is set when calling fit.
mutex
:_thread.lock
- Mutex used for data and progress attributes. See init code for details.
update_events
:threading.Event
- Object used to block execution until update of results.
cancel_signal
:bool
- This variable is set to signal resource it should cancel computations.
exception
:Exception
- When exception occurs during computations, then that exception is saved to this variable
is_done
:bool
- Flag set by thread when computations are done.
progress
:float
- If progress is supprted, then value should be between [0,1]. For other situations -1 value must be set.
data
:dict
- Results of computations are placed there
thread
:threading.Thread
- Thread used for computations
Expand source code Browse git
class DatasetShapleyValuesResource(Resource): resource_type = 'DatasetShapleyValues' required_params = ["model"] options_category = 'DatasetShapleyValues' options = { 'B': {'default': 4, 'desc': 'Number of random paths'}, 'N': {'default': 500, 'desc': 'Number of randomly sampled rows from dataset'}, 'cpus': {'default': 4, 'desc': 'Number of parallel processes'} } def add_to_data(self, shaps): with self.mutex: if self.data.get('intercept') is None: self.data['intercept'] = shaps[0].intercept results = [shap.result[shap.result.B != 0] for shap in shaps] if self.data.get('result') is not None: result = pd.concat([self.data['result']] + results) else: result = pd.concat(results) self.data['result'] = result def _fit(self, model): B = self.get_option('B') cpus = self.get_option('cpus') with self.mutex: self.progress = 0 dataset = model.explainer.data rows = dataset.shape[0] # Sample N rows N = self.get_option('N') if N < rows: sampled_rows = np.random.choice(np.arange(rows), N, replace=False) dataset = dataset.iloc[sampled_rows, :] rows = N buffor = [] for i in range(rows): with self.mutex: if self.cancel_signal: return shap = model.explainer.predict_parts( dataset.iloc[i], type='shap', B=B, processes=cpus ) shap.result['row'] = i buffor.append(shap) # Append buffor to results if buffor size >= 10% of already appended if (len(buffor) >= 0.1 * (i + 1 - len(buffor))) or i == rows - 1: self.add_to_data(buffor) buffor = [] with self.mutex: self.progress = (i + 1) / rows self._emit_update() with self.mutex: self.progress = 1 self.is_done = True self._emit_update()
Ancestors
- dalex.arena._resource.Resource
- dalex.arena._option_base.OptionBase
Class variables
var options
var options_category
var required_params
var resource_type
Methods
def add_to_data(self, shaps)
-
Expand source code Browse git
def add_to_data(self, shaps): with self.mutex: if self.data.get('intercept') is None: self.data['intercept'] = shaps[0].intercept results = [shap.result[shap.result.B != 0] for shap in shaps] if self.data.get('result') is not None: result = pd.concat([self.data['result']] + results) else: result = pd.concat(results) self.data['result'] = result
class ShapleyValuesResource (arena)
-
Class representing a resource that need to be calculated and will be used by one or more charts (PlotContainer). Computations will run in seperate thread and can support partial results.
Parameters
arena
:Arena
- Instance of Arena.
Attributes
arena
:Arena
- Instance of dalex.Arena
params
:dict
- Dictionary with required param types as keys and param labels as values. This attribute is set when calling fit.
mutex
:_thread.lock
- Mutex used for data and progress attributes. See init code for details.
update_events
:threading.Event
- Object used to block execution until update of results.
cancel_signal
:bool
- This variable is set to signal resource it should cancel computations.
exception
:Exception
- When exception occurs during computations, then that exception is saved to this variable
is_done
:bool
- Flag set by thread when computations are done.
progress
:float
- If progress is supprted, then value should be between [0,1]. For other situations -1 value must be set.
data
:dict
- Results of computations are placed there
thread
:threading.Thread
- Thread used for computations
Expand source code Browse git
class ShapleyValuesResource(Resource): resource_type = 'ShapleyValues' required_params = ["model", "observation"] options_category = 'ShapleyValues' options = { 'B': {'default': 20, 'desc': 'Number of random paths'}, 'cpus': {'default': 4, 'desc': 'Number of parallel processes'} } def add_to_data(self, shap): with self.mutex: if self.data.get('intercept') is None: self.data['intercept'] = shap.intercept result = shap.result result = result[result.B != 0] if self.data.get('result') is not None: result = pd.concat([self.data['result'], result]) self.data['result'] = result stats = result.groupby(['variable_name', 'variable_value']) \ .agg({'contribution': ['mean', 'max', 'min', q1, q3]}) \ .contribution stats['abs'] = stats['mean'].abs() self.data['stats'] = stats def _fit(self, model, observation): row = observation.get_row_for_model(model) if row is None: raise Exception('Observation is not valid for given model.') B = self.get_option('B') cpus = self.get_option('cpus') with self.mutex: self.progress = 0 for i in range(B // cpus): with self.mutex: if self.cancel_signal: return shap = model.explainer.predict_parts( row, type='shap', B=cpus, processes=cpus ) self.add_to_data(shap) with self.mutex: self.progress = (i + 1) * cpus / B self._emit_update() if B % cpus > 0: with self.mutex: if self.cancel_signal: return shap = model.explainer.predict_parts( row, type='shap', B=B % cpus, processes=B % cpus ) self.add_to_data(shap) with self.mutex: self.progress = 1 self.is_done = True self._emit_update()
Ancestors
- dalex.arena._resource.Resource
- dalex.arena._option_base.OptionBase
Class variables
var options
var options_category
var required_params
var resource_type
Methods
def add_to_data(self, shap)
-
Expand source code Browse git
def add_to_data(self, shap): with self.mutex: if self.data.get('intercept') is None: self.data['intercept'] = shap.intercept result = shap.result result = result[result.B != 0] if self.data.get('result') is not None: result = pd.concat([self.data['result'], result]) self.data['result'] = result stats = result.groupby(['variable_name', 'variable_value']) \ .agg({'contribution': ['mean', 'max', 'min', q1, q3]}) \ .contribution stats['abs'] = stats['mean'].abs() self.data['stats'] = stats