import dalex as dx
import warnings
warnings.filterwarnings('ignore')
dx.__version__
train = dx.datasets.load_apartments()
test = dx.datasets.load_apartments_test()
X_train = train.drop(columns='m2_price')
y_train = train["m2_price"]
X_test= test.drop(columns='m2_price')
y_test = test["m2_price"]
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
numerical_features = X_train.select_dtypes(exclude=[object]).columns
numerical_transformer = Pipeline(
steps=[
('scaler', StandardScaler())
]
)
categorical_features = X_train.select_dtypes(include=[object]).columns
categorical_transformer = Pipeline(
steps=[
('onehot', OneHotEncoder(handle_unknown='ignore'))
]
)
preprocessor = ColumnTransformer(
transformers=[
('num', numerical_transformer, numerical_features),
('cat', categorical_transformer, categorical_features)
]
)
from sklearn.linear_model import ElasticNet
from sklearn.tree import DecisionTreeRegressor
model_elastic_net = Pipeline(
steps=[
('preprocessor', preprocessor),
('model', ElasticNet(alpha=0.2))
]
)
model_elastic_net.fit(X=X_train, y=y_train)
model_decision_tree = Pipeline(
steps=[
('preprocessor', preprocessor),
('model', DecisionTreeRegressor())
]
)
model_decision_tree.fit(X=X_train, y=y_train)
exp_elastic_net = dx.Explainer(model_elastic_net, data=X_test, y=y_test)
exp_decision_tree = dx.Explainer(model_decision_tree, data=X_test, y=y_test)
exp_elastic_net.model_performance()
exp_decision_tree.model_performance()
# create empty Arena
arena = dx.Arena()
# push created explainer
arena.push_model(exp_elastic_net)
# push whole test dataset (including target column)
arena.push_observations(test)
# run server on port 9294
arena.run_server(port=9294)
The server is updating automatically. One can add the second model while it is running.
arena.push_model(exp_decision_tree)
And a third one!
from lightgbm import LGBMRegressor
model_gbm = Pipeline(
steps=[
('preprocessor', preprocessor),
('model', LGBMRegressor())
]
)
model_gbm.fit(X=X_train, y=y_train)
exp_gbm = dx.Explainer(model_gbm, data=X_test, y=y_test)
arena.push_model(exp_gbm)
Stop the server using this method:
arena.stop_server()
Create an Arena exacly the same way.
# create empty Arena
arena = dx.Arena()
# this takes too long to compute
arena.set_option('DatasetShapleyValues', 'N', 10)
# push created explainers
arena.push_model(exp_gbm)
arena.push_model(exp_decision_tree)
# push first 3 rows of tasting dataset
arena.push_observations(test.iloc[0:3])
# save arena to file
arena.save("data.json")
You can automatically upload this data source to the GitHub Gist service. By default, OAuth is used, but you can provide a Personal Access Token using the token argument.
arena.upload(open_browser=False)
Options are described for each plot in the official Arena's Guide:
Short description are available using the print_options
method.
arena=dx.Arena()
arena.push_model(exp_decision_tree)
arena.push_observations(test)
arena.run_server(port=9294)
arena.print_options()
You can easily change options for charts and the dashboard will be automatically refreshed.
# Chart-specific
arena.set_option('CeterisParibus', 'grid_type', 'uniform')
# For all charts
arena.set_option(None, 'grid_points', 200)
arena.print_options()
This package uses plotly to render the plots:
plotly
in JupyterLab: Getting Started Troubleshootingshow=False
parameter in plot
method to return plotly Figure
objectdalex
package: Titanic: tutorial and examples