import dalex as dx
import warnings
warnings.filterwarnings('ignore')
dx.__version__
train = dx.datasets.load_apartments()
test = dx.datasets.load_apartments_test()
X_train = train.drop(columns='m2_price')
y_train = train["m2_price"]
X_test= test.drop(columns='m2_price')
y_test = test["m2_price"]
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
numerical_features = X_train.select_dtypes(exclude=[object]).columns
numerical_transformer = Pipeline(
steps=[
('scaler', StandardScaler())
]
)
categorical_features = X_train.select_dtypes(include=[object]).columns
categorical_transformer = Pipeline(
steps=[
('onehot', OneHotEncoder(handle_unknown='ignore'))
]
)
preprocessor = ColumnTransformer(
transformers=[
('num', numerical_transformer, numerical_features),
('cat', categorical_transformer, categorical_features)
]
)
from sklearn.linear_model import ElasticNet
from sklearn.tree import DecisionTreeRegressor
model_elastic_net = Pipeline(
steps=[
('preprocessor', preprocessor),
('model', ElasticNet())
]
)
model_elastic_net.fit(X=X_train, y=y_train)
model_decision_tree = Pipeline(
steps=[
('preprocessor', preprocessor),
('model', DecisionTreeRegressor())
]
)
model_decision_tree.fit(X=X_train, y=y_train)
exp_elastic_net = dx.Explainer(model_elastic_net, data=X_test, y=y_test)
exp_decision_tree = dx.Explainer(model_decision_tree, data=X_test, y=y_test)
# create empty Arena
arena=dx.Arena()
# push created explainer
arena.push_model(exp_elastic_net)
# push whole test dataset (including target column)
arena.push_observations(test)
# run server on port 9294
arena.run_server(port=9294)
Server is auto updating. You can add second model when it is running.
arena.push_model(exp_decision_tree)
You can stop the server using this method
arena.stop_server()
You create Arena exacly the same way.
# create empty Arena
arena=dx.Arena()
# push created explainers
arena.push_model(exp_elastic_net)
arena.push_model(exp_decision_tree)
# push first 3 rows of tasting dataset
arena.push_observations(test.iloc[0:3])
# save arena to file
arena.save("data.json")
You can auto upload this data source to GitHub Gist service. By default OAuth is used, but you can provide your Personal Access Token using token
argument.
arena.upload(open_browser=False)
Options are described for each plot in official Arena's Guide
https://arena.drwhy.ai/docs/guide/observation-level
https://arena.drwhy.ai/docs/guide/dataset-level
https://arena.drwhy.ai/docs/guide/fairness
https://arena.drwhy.ai/docs/guide/model-performance
https://arena.drwhy.ai/docs/guide/eda-charts
Short description are available using print_options
method.
arena=dx.Arena()
arena.push_model(exp_decision_tree)
arena.push_observations(test)
arena.run_server(port=9294)
arena.print_options()
You can easily change options for charts and dashboard will be automaticly refreshed.
# Chart-specific
arena.set_option('CeterisParibus', 'grid_type', 'uniform')
# For all charts
arena.set_option(None, 'grid_points', 200)
arena.print_options()
Cache contains already generated charts. In live mode there are those charts, that user have opened. In static mode cache contains all charts if precalculate=True or save method was called.
# default way with precalculate=False
arena=dx.Arena()
arena.push_model(exp_elastic_net)
print(len(arena.cache))
arena.save('data.json')
print(len(arena.cache))
# default way with precalculate=True
arena=dx.Arena(precalculate=True)
arena.push_model(exp_elastic_net)
print(len(arena.cache))
arena.push_model(exp_decision_tree)
print(len(arena.cache))
print(len(arena.cache))
arena.clear_cache()
print(len(arena.cache))
arena.fill_cache()
print(len(arena.cache))
Changing options removes specified charts for cache. If precalculate is True, then charts are generated again.
# precalculate is enabled
print(len(arena.cache))
arena.set_option('FeatureImportance', 'B', 5)
print(len(arena.cache))
arena.precalculate = False
print(len(arena.cache))
arena.set_option('FeatureImportance', 'B', 5)
print(len(arena.cache))
dalex
package: Titanic: tutorial and examples