Module dalex.arena.resources

Expand source code Browse git
from ._shapley_values_resource import ShapleyValuesResource
from ._dataset_shapley_values_resource import DatasetShapleyValuesResource

__all__ = [
    'ShapleyValuesResource',
    'DatasetShapleyValuesResource'
]

Classes

class DatasetShapleyValuesResource (arena)

Class representing a resource that need to be calculated and will be used by one or more charts (PlotContainer). Computations will run in seperate thread and can support partial results.

Parameters

arena : Arena
Instance of Arena.

Attributes

arena : Arena
Instance of dalex.Arena
params : dict
Dictionary with required param types as keys and param labels as values. This attribute is set when calling fit.
mutex : _thread.lock
Mutex used for data and progress attributes. See init code for details.
update_events : threading.Event
Object used to block execution until update of results.
cancel_signal : bool
This variable is set to signal resource it should cancel computations.
exception : Exception
When exception occurs during computations, then that exception is saved to this variable
is_done : bool
Flag set by thread when computations are done.
progress : float
If progress is supprted, then value should be between [0,1]. For other situations -1 value must be set.
data : dict
Results of computations are placed there
thread : threading.Thread
Thread used for computations
Expand source code Browse git
class DatasetShapleyValuesResource(Resource):
    resource_type = 'DatasetShapleyValues'
    required_params = ["model"]
    options_category = 'DatasetShapleyValues'
    options = {
        'B': {'default': 4, 'desc': 'Number of random paths'},
        'N': {'default': 500, 'desc': 'Number of randomly sampled rows from dataset'},
        'cpus': {'default': 4, 'desc': 'Number of parallel processes'}
    }
    def add_to_data(self, shaps):
        with self.mutex:
            if self.data.get('intercept') is None:
                self.data['intercept'] = shaps[0].intercept
            results = [shap.result[shap.result.B != 0] for shap in shaps]
            if self.data.get('result') is not None:
                result = pd.concat([self.data['result']] + results)
            else:
                result = pd.concat(results)
            self.data['result'] = result

    def _fit(self, model):
        B = self.get_option('B')
        cpus = self.get_option('cpus')
        with self.mutex:
            self.progress = 0
        dataset = model.explainer.data
        rows = dataset.shape[0]
        # Sample N rows
        N = self.get_option('N')
        if N < rows:
            sampled_rows = np.random.choice(np.arange(rows), N, replace=False)
            dataset = dataset.iloc[sampled_rows, :]
            rows = N
        buffor = []
        for i in range(rows):
            with self.mutex:
                if self.cancel_signal:
                    return
            shap = model.explainer.predict_parts(
                dataset.iloc[i],
                type='shap',
                B=B,
                processes=cpus
            )
            shap.result['row'] = i
            buffor.append(shap)
            # Append buffor to results if buffor size >= 10% of already appended
            if (len(buffor) >= 0.1 * (i + 1 - len(buffor))) or i == rows - 1:
                self.add_to_data(buffor)
                buffor = []
                with self.mutex:
                    self.progress = (i + 1) / rows
                self._emit_update()
        with self.mutex:
            self.progress = 1
            self.is_done = True
            self._emit_update()

Ancestors

  • dalex.arena._resource.Resource
  • dalex.arena._option_base.OptionBase

Class variables

var options
var options_category
var required_params
var resource_type

Methods

def add_to_data(self, shaps)
Expand source code Browse git
def add_to_data(self, shaps):
    with self.mutex:
        if self.data.get('intercept') is None:
            self.data['intercept'] = shaps[0].intercept
        results = [shap.result[shap.result.B != 0] for shap in shaps]
        if self.data.get('result') is not None:
            result = pd.concat([self.data['result']] + results)
        else:
            result = pd.concat(results)
        self.data['result'] = result
class ShapleyValuesResource (arena)

Class representing a resource that need to be calculated and will be used by one or more charts (PlotContainer). Computations will run in seperate thread and can support partial results.

Parameters

arena : Arena
Instance of Arena.

Attributes

arena : Arena
Instance of dalex.Arena
params : dict
Dictionary with required param types as keys and param labels as values. This attribute is set when calling fit.
mutex : _thread.lock
Mutex used for data and progress attributes. See init code for details.
update_events : threading.Event
Object used to block execution until update of results.
cancel_signal : bool
This variable is set to signal resource it should cancel computations.
exception : Exception
When exception occurs during computations, then that exception is saved to this variable
is_done : bool
Flag set by thread when computations are done.
progress : float
If progress is supprted, then value should be between [0,1]. For other situations -1 value must be set.
data : dict
Results of computations are placed there
thread : threading.Thread
Thread used for computations
Expand source code Browse git
class ShapleyValuesResource(Resource):
    resource_type = 'ShapleyValues'
    required_params = ["model", "observation"]
    options_category = 'ShapleyValues'
    options = {
        'B': {'default': 20, 'desc': 'Number of random paths'},
        'cpus': {'default': 4, 'desc': 'Number of parallel processes'}
    }
    def add_to_data(self, shap):
        with self.mutex:
            if self.data.get('intercept') is None:
                self.data['intercept'] = shap.intercept
            result = shap.result
            result = result[result.B != 0]
            if self.data.get('result') is not None:
                result = pd.concat([self.data['result'], result])
            self.data['result'] = result
            stats = result.groupby(['variable_name', 'variable_value']) \
                .agg({'contribution': ['mean', 'max', 'min', q1, q3]}) \
                .contribution
            stats['abs'] = stats['mean'].abs()
            self.data['stats'] = stats

    def _fit(self, model, observation):
        row = observation.get_row_for_model(model)
        if row is None:
            raise Exception('Observation is not valid for given model.')
        B = self.get_option('B')
        cpus = self.get_option('cpus')
        with self.mutex:
            self.progress = 0
        for i in range(B // cpus):
            with self.mutex:
                if self.cancel_signal:
                    return
            shap = model.explainer.predict_parts(
                row,
                type='shap',
                B=cpus,
                processes=cpus
            )
            self.add_to_data(shap)
            with self.mutex:
                self.progress = (i + 1) * cpus / B
            self._emit_update()
        if B % cpus > 0:
            with self.mutex:
                if self.cancel_signal:
                    return
            shap = model.explainer.predict_parts(
                row,
                type='shap',
                B=B % cpus,
                processes=B % cpus
            )
            self.add_to_data(shap)
        with self.mutex:
            self.progress = 1
            self.is_done = True
            self._emit_update()

Ancestors

  • dalex.arena._resource.Resource
  • dalex.arena._option_base.OptionBase

Class variables

var options
var options_category
var required_params
var resource_type

Methods

def add_to_data(self, shap)
Expand source code Browse git
def add_to_data(self, shap):
    with self.mutex:
        if self.data.get('intercept') is None:
            self.data['intercept'] = shap.intercept
        result = shap.result
        result = result[result.B != 0]
        if self.data.get('result') is not None:
            result = pd.concat([self.data['result'], result])
        self.data['result'] = result
        stats = result.groupby(['variable_name', 'variable_value']) \
            .agg({'contribution': ['mean', 'max', 'min', q1, q3]}) \
            .contribution
        stats['abs'] = stats['mean'].abs()
        self.data['stats'] = stats