SHAP insights

SHAP is an open-source method for explaining the predictions from machine learning models. (You can find more information about SHAP at its repository on GitHub: https://github.com/slundberg/shap) DataRobot supports SHAP computations for all regression and binary classification blueprints. You can compute three different insights:

  • “SHAP matrix”: Raw SHAP values for each feature column and each row.

  • “SHAP impact”: Overall importance for each feature column across all rows, based on aggregated SHAP matrix values.

  • “SHAP preview”: SHAP values for the most important features in each row, presented with the values of the features in that row.

The following example code assumes that you have a trained model object called model.

import datarobot as dr
from datarobot.insights.shap_matrix import ShapMatrix
from datarobot.insights.shap_impact import ShapImpact
from datarobot.insights.shap_preview import ShapPreview
model_id = model.id  # or model_id = 'YOUR_MODEL_ID'
# request SHAP Matrix, and wait for it to complete
result = ShapMatrix.create(entity_id=model_id)  # default source is 'validation'
# view the properties of the SHAP Matrix
print(result.columns)
>>> ['AUCGUART', 'Color', 'Make', ...
print(result.matrix)
>>> [[ 1.22604372e-02  1.98424454e-01  2.23308013e-01  ...] ... ]
# request SHAP Matrix on a different partition, and return immediately with job reference
job = ShapMatrix.compute(entity_id=model_id, source='holdout')
# wait for the job to complete
result = job.get_result_when_complete()
print(result.columns)
>>> ['AUCGUART', 'Color', 'Make', ...
print(result.matrix)
>>> [[-0.11443075 -0.01130723  0.22330801 ... ] ... ]
# request SHAP Impact; only works for training currently
job = ShapImpact.compute(entity_id=model_id, source='training', row_count=100)
result = job.get_result_when_complete()
# Impacts are listed as [feature_name, normalized_impact, unnormalized_impact]
print(result.shap_impacts)
>>> [['AUCGUART', 0.07989059458051094, 0.022147886593333888], ...]
# list all matrices computed for this model, including each partition
matrix_list = ShapMatrix.list(entity_id=model_id)
print(matrix_list)
>>> [<datarobot.insights.shap_matrix.ShapMatrix object at 0x114e52090>, ...]
print([(matrix_obj, matrix_obj.source) for matrix_obj in matrix_list])
>>> [(<datarobot.insights.shap_matrix.ShapMatrix object at 0x114e52090>, 'validation'), ... ]
# upload a file to the AI Catalog
dataset = dr.Dataset.upload("./path/to/dataset.csv")
# request explanations for that file in the "preview" format
job = ShapPreview.compute(entity_id=model_id, source='externalTestSet', external_dataset_id=dataset.id)
result = job.get_result_when_complete()
print(result.previews[0])
>>> {'row_index': 0,
>>> 'prediction_value': 0.3024851286385187,
>>>  'preview_values': [{'feature_rank': 1,
>>>    'feature_name': 'BYRNO',
>>>    'feature_value': '21973',
>>>    'shap_value': 0.22025144078391848,
>>>    'has_text_explanations': False,
>>>    'text_explanations': []},
>>> ... }