Spaces:

IVSD
/

EasyMachineLearningDemo

Sleeping

App Files Files Community

LLH commited on Feb 14, 2024

Commit

0136ac6

1 Parent(s): c95b9af

2024/02/14/12:17

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.idea/.gitignore +0 -8
.idea/EasyMachineLearningDemo.iml +0 -12
.idea/inspectionProfiles/Project_Default.xml +0 -12
.idea/inspectionProfiles/profiles_settings.xml +0 -6
.idea/modules.xml +0 -8
.idea/vcs.xml +0 -6
analysis/shap_model.py +4 -3
app.py +1 -1
lib/__init__.py +0 -0
lib/shap/__init__.py +0 -144
lib/shap/_cext.cp310-win_amd64.pyd +0 -0
lib/shap/_explanation.py +0 -901
lib/shap/_serializable.py +0 -204
lib/shap/_version.py +0 -16
lib/shap/actions/__init__.py +0 -3
lib/shap/actions/_action.py +0 -8
lib/shap/actions/_optimizer.py +0 -92
lib/shap/benchmark/__init__.py +0 -9
lib/shap/benchmark/_compute.py +0 -9
lib/shap/benchmark/_explanation_error.py +0 -181
lib/shap/benchmark/_result.py +0 -34
lib/shap/benchmark/_sequential.py +0 -332
lib/shap/benchmark/experiments.py +0 -414
lib/shap/benchmark/framework.py +0 -113
lib/shap/benchmark/measures.py +0 -424
lib/shap/benchmark/methods.py +0 -148
lib/shap/benchmark/metrics.py +0 -824
lib/shap/benchmark/models.py +0 -230
lib/shap/benchmark/plots.py +0 -566
lib/shap/cext/_cext.cc +0 -560
lib/shap/cext/_cext_gpu.cc +0 -187
lib/shap/cext/_cext_gpu.cu +0 -353
lib/shap/cext/gpu_treeshap.h +0 -1535
lib/shap/cext/tree_shap.h +0 -1460
lib/shap/datasets.py +0 -309
lib/shap/explainers/__init__.py +0 -38
lib/shap/explainers/_additive.py +0 -187
lib/shap/explainers/_deep/__init__.py +0 -125
lib/shap/explainers/_deep/deep_pytorch.py +0 -386
lib/shap/explainers/_deep/deep_tf.py +0 -763
lib/shap/explainers/_deep/deep_utils.py +0 -23
lib/shap/explainers/_exact.py +0 -366
lib/shap/explainers/_explainer.py +0 -457
lib/shap/explainers/_gpu_tree.py +0 -179
lib/shap/explainers/_gradient.py +0 -592
lib/shap/explainers/_kernel.py +0 -696
lib/shap/explainers/_linear.py +0 -406
lib/shap/explainers/_partition.py +0 -681
lib/shap/explainers/_permutation.py +0 -217
lib/shap/explainers/_sampling.py +0 -199

.idea/.gitignore DELETED Viewed

@@ -1,8 +0,0 @@
-# Default ignored files
-/shelf/
-/workspace.xml
-# Editor-based HTTP Client requests
-/httpRequests/
-# Datasource local storage ignored files
-/dataSources/
-/dataSources.local.xml

.idea/EasyMachineLearningDemo.iml DELETED Viewed

@@ -1,12 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<module type="PYTHON_MODULE" version="4">
-  <component name="NewModuleRootManager">
-    <content url="file://$MODULE_DIR$" />
-    <orderEntry type="inheritedJdk" />
-    <orderEntry type="sourceFolder" forTests="false" />
-  </component>
-  <component name="PyDocumentationSettings">
-    <option name="format" value="PLAIN" />
-    <option name="myDocStringFormat" value="Plain" />
-  </component>
-</module>

.idea/inspectionProfiles/Project_Default.xml DELETED Viewed

@@ -1,12 +0,0 @@
-<component name="InspectionProjectProfileManager">
-  <profile version="1.0">
-    <option name="myName" value="Project Default" />
-    <inspection_tool class="PyUnresolvedReferencesInspection" enabled="true" level="WARNING" enabled_by_default="true">
-      <option name="ignoredIdentifiers">
-        <list>
-          <option value="object.pop" />
-        </list>
-      </option>
-    </inspection_tool>
-  </profile>
-</component>

.idea/inspectionProfiles/profiles_settings.xml DELETED Viewed

@@ -1,6 +0,0 @@
-<component name="InspectionProjectProfileManager">
-  <settings>
-    <option name="USE_PROJECT_PROFILE" value="false" />
-    <version value="1.0" />
-  </settings>
-</component>

.idea/modules.xml DELETED Viewed

@@ -1,8 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="ProjectModuleManager">
-    <modules>
-      <module fileurl="file://$PROJECT_DIR$/.idea/EasyMachineLearningDemo.iml" filepath="$PROJECT_DIR$/.idea/EasyMachineLearningDemo.iml" />
-    </modules>
-  </component>
-</project>

.idea/vcs.xml DELETED Viewed

@@ -1,6 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="VcsDirectoryMappings">
-    <mapping directory="" vcs="Git" />
-  </component>
-</project>

analysis/shap_model.py CHANGED Viewed

@@ -1,16 +1,17 @@
 import matplotlib.pyplot as plt
-import lib.shap as shap
 def shap_calculate(model, x, feature_names):
     explainer = shap.Explainer(model.predict, x)
     shap_values = explainer(x)
-    return shap.summary_plot(shap_values, x, feature_names=feature_names)
     # title = "shap"
-    # cur_plt.savefig("./diagram/{}.png".format(title), dpi=300)

 import matplotlib.pyplot as plt
+import shap
 def shap_calculate(model, x, feature_names):
     explainer = shap.Explainer(model.predict, x)
     shap_values = explainer(x)
+    shap.summary_plot(shap_values, x, feature_names=feature_names, show=False)
+    return plt
     # title = "shap"

app.py CHANGED Viewed

@@ -69,7 +69,7 @@ class Container:
 class FilePath:
-    base = "../diagram/{}.png"
     shap_beeswarm_plot = "shap_beeswarm_plot"

 class FilePath:
+    base = "./diagram/{}.png"
     shap_beeswarm_plot = "shap_beeswarm_plot"

lib/__init__.py DELETED Viewed

File without changes

lib/shap/__init__.py DELETED Viewed

@@ -1,144 +0,0 @@
-from ._explanation import Cohorts, Explanation
-# explainers
-from .explainers import other
-from .explainers._additive import AdditiveExplainer
-from .explainers._deep import DeepExplainer
-from .explainers._exact import ExactExplainer
-from .explainers._explainer import Explainer
-from .explainers._gpu_tree import GPUTreeExplainer
-from .explainers._gradient import GradientExplainer
-from .explainers._kernel import KernelExplainer
-from .explainers._linear import LinearExplainer
-from .explainers._partition import PartitionExplainer
-from .explainers._permutation import PermutationExplainer
-from .explainers._sampling import SamplingExplainer
-from .explainers._tree import TreeExplainer
-try:
-    # Version from setuptools-scm
-    from ._version import version as __version__
-except ImportError:
-    # Expected when running locally without build
-    __version__ = "0.0.0-not-built"
-_no_matplotlib_warning = "matplotlib is not installed so plotting is not available! Run `pip install matplotlib` " \
-                         "to fix this."
-# plotting (only loaded if matplotlib is present)
-def unsupported(*args, **kwargs):
-    raise ImportError(_no_matplotlib_warning)
-class UnsupportedModule:
-    def __getattribute__(self, item):
-        raise ImportError(_no_matplotlib_warning)
-try:
-    import matplotlib  # noqa: F401
-    have_matplotlib = True
-except ImportError:
-    have_matplotlib = False
-if have_matplotlib:
-    from . import plots
-    from .plots._bar import bar_legacy as bar_plot
-    from .plots._beeswarm import summary_legacy as summary_plot
-    from .plots._decision import decision as decision_plot
-    from .plots._decision import multioutput_decision as multioutput_decision_plot
-    from .plots._embedding import embedding as embedding_plot
-    from .plots._force import force as force_plot
-    from .plots._force import getjs, initjs, save_html
-    from .plots._group_difference import group_difference as group_difference_plot
-    from .plots._heatmap import heatmap as heatmap_plot
-    from .plots._image import image as image_plot
-    from .plots._monitoring import monitoring as monitoring_plot
-    from .plots._partial_dependence import partial_dependence as partial_dependence_plot
-    from .plots._scatter import dependence_legacy as dependence_plot
-    from .plots._text import text as text_plot
-    from .plots._violin import violin as violin_plot
-    from .plots._waterfall import waterfall as waterfall_plot
-else:
-    bar_plot = unsupported
-    summary_plot = unsupported
-    decision_plot = unsupported
-    multioutput_decision_plot = unsupported
-    embedding_plot = unsupported
-    force_plot = unsupported
-    getjs = unsupported
-    initjs = unsupported
-    save_html = unsupported
-    group_difference_plot = unsupported
-    heatmap_plot = unsupported
-    image_plot = unsupported
-    monitoring_plot = unsupported
-    partial_dependence_plot = unsupported
-    dependence_plot = unsupported
-    text_plot = unsupported
-    violin_plot = unsupported
-    waterfall_plot = unsupported
-    # If matplotlib is available, then the plots submodule will be directly available.
-    # If not, we need to define something that will issue a meaningful warning message
-    # (rather than ModuleNotFound).
-    plots = UnsupportedModule()
-# other stuff :)
-from . import datasets, links, utils  # noqa: E402
-from .actions._optimizer import ActionOptimizer  # noqa: E402
-from .utils import approximate_interactions, sample  # noqa: E402
-#from . import benchmark
-from .utils._legacy import kmeans  # noqa: E402
-# Use __all__ to let type checkers know what is part of the public API.
-__all__ = [
-    "Cohorts",
-    "Explanation",
-    # Explainers
-    "other",
-    "AdditiveExplainer",
-    "DeepExplainer",
-    "ExactExplainer",
-    "Explainer",
-    "GPUTreeExplainer",
-    "GradientExplainer",
-    "KernelExplainer",
-    "LinearExplainer",
-    "PartitionExplainer",
-    "PermutationExplainer",
-    "SamplingExplainer",
-    "TreeExplainer",
-    # Plots
-    "plots",
-    "bar_plot",
-    "summary_plot",
-    "decision_plot",
-    "multioutput_decision_plot",
-    "embedding_plot",
-    "force_plot",
-    "getjs",
-    "initjs",
-    "save_html",
-    "group_difference_plot",
-    "heatmap_plot",
-    "image_plot",
-    "monitoring_plot",
-    "partial_dependence_plot",
-    "dependence_plot",
-    "text_plot",
-    "violin_plot",
-    "waterfall_plot",
-    # Other stuff
-    "datasets",
-    "links",
-    "utils",
-    "ActionOptimizer",
-    "approximate_interactions",
-    "sample",
-    "kmeans",
-]

lib/shap/_cext.cp310-win_amd64.pyd DELETED Viewed

Binary file (44 kB)

lib/shap/_explanation.py DELETED Viewed

@@ -1,901 +0,0 @@
-import copy
-import operator
-import numpy as np
-import pandas as pd
-import scipy.cluster
-import scipy.sparse
-import scipy.spatial
-import sklearn
-from slicer import Alias, Obj, Slicer
-from .utils._exceptions import DimensionError
-from .utils._general import OpChain
-op_chain_root = OpChain("shap.Explanation")
-class MetaExplanation(type):
-    """ This metaclass exposes the Explanation object's methods for creating template op chains.
-    """
-    def __getitem__(cls, item):
-        return op_chain_root.__getitem__(item)
-    @property
-    def abs(cls):
-        """ Element-wise absolute value op.
-        """
-        return op_chain_root.abs
-    @property
-    def identity(cls):
-        """ A no-op.
-        """
-        return op_chain_root.identity
-    @property
-    def argsort(cls):
-        """ Numpy style argsort.
-        """
-        return op_chain_root.argsort
-    @property
-    def sum(cls):
-        """ Numpy style sum.
-        """
-        return op_chain_root.sum
-    @property
-    def max(cls):
-        """ Numpy style max.
-        """
-        return op_chain_root.max
-    @property
-    def min(cls):
-        """ Numpy style min.
-        """
-        return op_chain_root.min
-    @property
-    def mean(cls):
-        """ Numpy style mean.
-        """
-        return op_chain_root.mean
-    @property
-    def sample(cls):
-        """ Numpy style sample.
-        """
-        return op_chain_root.sample
-    @property
-    def hclust(cls):
-        """ Hierarchical clustering op.
-        """
-        return op_chain_root.hclust
-class Explanation(metaclass=MetaExplanation):
-    """ A sliceable set of parallel arrays representing a SHAP explanation.
-    """
-    def __init__(
-        self,
-        values,
-        base_values=None,
-        data=None,
-        display_data=None,
-        instance_names=None,
-        feature_names=None,
-        output_names=None,
-        output_indexes=None,
-        lower_bounds=None,
-        upper_bounds=None,
-        error_std=None,
-        main_effects=None,
-        hierarchical_values=None,
-        clustering=None,
-        compute_time=None
-    ):
-        self.op_history = []
-        self.compute_time = compute_time
-        # cloning. TODOsomeday: better cloning :)
-        if issubclass(type(values), Explanation):
-            e = values
-            values = e.values
-            base_values = e.base_values
-            data = e.data
-        self.output_dims = compute_output_dims(values, base_values, data, output_names)
-        values_shape = _compute_shape(values)
-        if output_names is None and len(self.output_dims) == 1:
-            output_names = [f"Output {i}" for i in range(values_shape[self.output_dims[0]])]
-        if len(_compute_shape(feature_names)) == 1:  # TODO: should always be an alias once slicer supports per-row aliases
-            if len(values_shape) >= 2 and len(feature_names) == values_shape[1]:
-                feature_names = Alias(list(feature_names), 1)
-            elif len(values_shape) >= 1 and len(feature_names) == values_shape[0]:
-                feature_names = Alias(list(feature_names), 0)
-        if len(_compute_shape(output_names)) == 1:  # TODO: should always be an alias once slicer supports per-row aliases
-            output_names = Alias(list(output_names), self.output_dims[0])
-            # if len(values_shape) >= 1 and len(output_names) == values_shape[0]:
-            #     output_names = Alias(list(output_names), 0)
-            # elif len(values_shape) >= 2 and len(output_names) == values_shape[1]:
-            #     output_names = Alias(list(output_names), 1)
-        if output_names is not None and not isinstance(output_names, Alias):
-            output_names_order = len(_compute_shape(output_names))
-            if output_names_order == 0:
-                pass
-            elif output_names_order == 1:
-                output_names = Obj(output_names, self.output_dims)
-            elif output_names_order == 2:
-                output_names = Obj(output_names, [0] + list(self.output_dims))
-            else:
-                raise ValueError("shap.Explanation does not yet support output_names of order greater than 3!")
-        if not hasattr(base_values, "__len__") or len(base_values) == 0:
-            pass
-        elif len(_compute_shape(base_values)) == len(self.output_dims):
-            base_values = Obj(base_values, list(self.output_dims))
-        else:
-            base_values = Obj(base_values, [0] + list(self.output_dims))
-        self._s = Slicer(
-            values=values,
-            base_values=base_values,
-            data=list_wrap(data),
-            display_data=list_wrap(display_data),
-            instance_names=None if instance_names is None else Alias(instance_names, 0),
-            feature_names=feature_names,
-            output_names=output_names,
-            output_indexes=None if output_indexes is None else (self.output_dims, output_indexes),
-            lower_bounds=list_wrap(lower_bounds),
-            upper_bounds=list_wrap(upper_bounds),
-            error_std=list_wrap(error_std),
-            main_effects=list_wrap(main_effects),
-            hierarchical_values=list_wrap(hierarchical_values),
-            clustering=None if clustering is None else Obj(clustering, [0])
-        )
-    @property
-    def shape(self):
-        """ Compute the shape over potentially complex data nesting.
-        """
-        return _compute_shape(self._s.values)
-    @property
-    def values(self):
-        """ Pass-through from the underlying slicer object.
-        """
-        return self._s.values
-    @values.setter
-    def values(self, new_values):
-        self._s.values = new_values
-    @property
-    def base_values(self):
-        """ Pass-through from the underlying slicer object.
-        """
-        return self._s.base_values
-    @base_values.setter
-    def base_values(self, new_base_values):
-        self._s.base_values = new_base_values
-    @property
-    def data(self):
-        """ Pass-through from the underlying slicer object.
-        """
-        return self._s.data
-    @data.setter
-    def data(self, new_data):
-        self._s.data = new_data
-    @property
-    def display_data(self):
-        """ Pass-through from the underlying slicer object.
-        """
-        return self._s.display_data
-    @display_data.setter
-    def display_data(self, new_display_data):
-        if issubclass(type(new_display_data), pd.DataFrame):
-            new_display_data = new_display_data.values
-        self._s.display_data = new_display_data
-    @property
-    def instance_names(self):
-        """ Pass-through from the underlying slicer object.
-        """
-        return self._s.instance_names
-    @property
-    def output_names(self):
-        """ Pass-through from the underlying slicer object.
-        """
-        return self._s.output_names
-    @output_names.setter
-    def output_names(self, new_output_names):
-        self._s.output_names = new_output_names
-    @property
-    def output_indexes(self):
-        """ Pass-through from the underlying slicer object.
-        """
-        return self._s.output_indexes
-    @property
-    def feature_names(self):
-        """ Pass-through from the underlying slicer object.
-        """
-        return self._s.feature_names
-    @feature_names.setter
-    def feature_names(self, new_feature_names):
-        self._s.feature_names = new_feature_names
-    @property
-    def lower_bounds(self):
-        """ Pass-through from the underlying slicer object.
-        """
-        return self._s.lower_bounds
-    @property
-    def upper_bounds(self):
-        """ Pass-through from the underlying slicer object.
-        """
-        return self._s.upper_bounds
-    @property
-    def error_std(self):
-        """ Pass-through from the underlying slicer object.
-        """
-        return self._s.error_std
-    @property
-    def main_effects(self):
-        """ Pass-through from the underlying slicer object.
-        """
-        return self._s.main_effects
-    @main_effects.setter
-    def main_effects(self, new_main_effects):
-        self._s.main_effects = new_main_effects
-    @property
-    def hierarchical_values(self):
-        """ Pass-through from the underlying slicer object.
-        """
-        return self._s.hierarchical_values
-    @hierarchical_values.setter
-    def hierarchical_values(self, new_hierarchical_values):
-        self._s.hierarchical_values = new_hierarchical_values
-    @property
-    def clustering(self):
-        """ Pass-through from the underlying slicer object.
-        """
-        return self._s.clustering
-    @clustering.setter
-    def clustering(self, new_clustering):
-        self._s.clustering = new_clustering
-    def cohorts(self, cohorts):
-        """ Split this explanation into several cohorts.
-        Parameters
-        ----------
-        cohorts : int or array
-            If this is an integer then we auto build that many cohorts using a decision tree. If this is
-            an array then we treat that as an array of cohort names/ids for each instance.
-        """
-        if isinstance(cohorts, int):
-            return _auto_cohorts(self, max_cohorts=cohorts)
-        if isinstance(cohorts, (list, tuple, np.ndarray)):
-            cohorts = np.array(cohorts)
-            return Cohorts(**{name: self[cohorts == name] for name in np.unique(cohorts)})
-        raise TypeError("The given set of cohort indicators is not recognized! Please give an array or int.")
-    def __repr__(self):
-        """ Display some basic printable info, but not everything.
-        """
-        out = ".values =\n"+self.values.__repr__()
-        if self.base_values is not None:
-            out += "\n\n.base_values =\n"+self.base_values.__repr__()
-        if self.data is not None:
-            out += "\n\n.data =\n"+self.data.__repr__()
-        return out
-    def __getitem__(self, item):
-        """ This adds support for OpChain indexing.
-        """
-        new_self = None
-        if not isinstance(item, tuple):
-            item = (item,)
-        # convert any OpChains or magic strings
-        pos = -1
-        for t in item:
-            pos += 1
-            # skip over Ellipsis
-            if t is Ellipsis:
-                pos += len(self.shape) - len(item)
-                continue
-            orig_t = t
-            if issubclass(type(t), OpChain):
-                t = t.apply(self)
-                if issubclass(type(t), (np.int64, np.int32)): # because slicer does not like numpy indexes
-                    t = int(t)
-                elif issubclass(type(t), np.ndarray):
-                    t = [int(v) for v in t] # slicer wants lists not numpy arrays for indexing
-            elif issubclass(type(t), Explanation):
-                t = t.values
-            elif isinstance(t, str):
-                # work around for 2D output_names since they are not yet slicer supported
-                output_names_dims = []
-                if "output_names" in self._s._objects:
-                    output_names_dims = self._s._objects["output_names"].dim
-                elif "output_names" in self._s._aliases:
-                    output_names_dims = self._s._aliases["output_names"].dim
-                if pos != 0 and pos in output_names_dims:
-                    if len(output_names_dims) == 1:
-                        t = np.argwhere(np.array(self.output_names) == t)[0][0]
-                    elif len(output_names_dims) == 2:
-                        new_values = []
-                        new_base_values = []
-                        new_data = []
-                        new_self = copy.deepcopy(self)
-                        for i, v in enumerate(self.values):
-                            for j, s in enumerate(self.output_names[i]):
-                                if s == t:
-                                    new_values.append(np.array(v[:,j]))
-                                    new_data.append(np.array(self.data[i]))
-                                    new_base_values.append(self.base_values[i][j])
-                        new_self = Explanation(
-                            np.array(new_values),
-                            np.array(new_base_values),
-                            np.array(new_data),
-                            self.display_data,
-                            self.instance_names,
-                            np.array(new_data),
-                            t, # output_names
-                            self.output_indexes,
-                            self.lower_bounds,
-                            self.upper_bounds,
-                            self.error_std,
-                            self.main_effects,
-                            self.hierarchical_values,
-                            self.clustering
-                        )
-                        new_self.op_history = copy.copy(self.op_history)
-                        # new_self = copy.deepcopy(self)
-                        # new_self.values = np.array(new_values)
-                        # new_self.base_values = np.array(new_base_values)
-                        # new_self.data = np.array(new_data)
-                        # new_self.output_names = t
-                        # new_self.feature_names = np.array(new_data)
-                        # new_self.clustering = None
-                # work around for 2D feature_names since they are not yet slicer supported
-                feature_names_dims = []
-                if "feature_names" in self._s._objects:
-                    feature_names_dims = self._s._objects["feature_names"].dim
-                if pos != 0 and pos in feature_names_dims and len(feature_names_dims) == 2:
-                    new_values = []
-                    new_data = []
-                    for i, val_i in enumerate(self.values):
-                        for s,v,d in zip(self.feature_names[i], val_i, self.data[i]):
-                            if s == t:
-                                new_values.append(v)
-                                new_data.append(d)
-                    new_self = copy.deepcopy(self)
-                    new_self.values = new_values
-                    new_self.data = new_data
-                    new_self.feature_names = t
-                    new_self.clustering = None
-                    # return new_self
-            if issubclass(type(t), (np.int8, np.int16, np.int32, np.int64)):
-                t = int(t)
-            if t is not orig_t:
-                tmp = list(item)
-                tmp[pos] = t
-                item = tuple(tmp)
-        # call slicer for the real work
-        item = tuple(v for v in item) # SML I cut out: `if not isinstance(v, str)`
-        if len(item) == 0:
-            return new_self
-        if new_self is None:
-            new_self = copy.copy(self)
-        new_self._s = new_self._s.__getitem__(item)
-        new_self.op_history.append({
-            "name": "__getitem__",
-            "args": (item,),
-            "prev_shape": self.shape
-        })
-        return new_self
-    def __len__(self):
-        return self.shape[0]
-    def __copy__(self):
-        new_exp = Explanation(
-            self.values,
-            self.base_values,
-            self.data,
-            self.display_data,
-            self.instance_names,
-            self.feature_names,
-            self.output_names,
-            self.output_indexes,
-            self.lower_bounds,
-            self.upper_bounds,
-            self.error_std,
-            self.main_effects,
-            self.hierarchical_values,
-            self.clustering
-        )
-        new_exp.op_history = copy.copy(self.op_history)
-        return new_exp
-    def _apply_binary_operator(self, other, binary_op, op_name):
-        new_exp = self.__copy__()
-        new_exp.op_history = copy.copy(self.op_history)
-        new_exp.op_history.append({
-            "name": op_name,
-            "args": (other,),
-            "prev_shape": self.shape
-        })
-        if isinstance(other, Explanation):
-            new_exp.values = binary_op(new_exp.values, other.values)
-            if new_exp.data is not None:
-                new_exp.data = binary_op(new_exp.data, other.data)
-            if new_exp.base_values is not None:
-                new_exp.base_values = binary_op(new_exp.base_values, other.base_values)
-        else:
-            new_exp.values = binary_op(new_exp.values, other)
-            if new_exp.data is not None:
-                new_exp.data = binary_op(new_exp.data, other)
-            if new_exp.base_values is not None:
-                new_exp.base_values = binary_op(new_exp.base_values, other)
-        return new_exp
-    def __add__(self, other):
-        return self._apply_binary_operator(other, operator.add, "__add__")
-    def __radd__(self, other):
-        return self._apply_binary_operator(other, operator.add, "__add__")
-    def __sub__(self, other):
-        return self._apply_binary_operator(other, operator.sub, "__sub__")
-    def __rsub__(self, other):
-        return self._apply_binary_operator(other, operator.sub, "__sub__")
-    def __mul__(self, other):
-        return self._apply_binary_operator(other, operator.mul, "__mul__")
-    def __rmul__(self, other):
-        return self._apply_binary_operator(other, operator.mul, "__mul__")
-    def __truediv__(self, other):
-        return self._apply_binary_operator(other, operator.truediv, "__truediv__")
-    # @property
-    # def abs(self):
-    #     """ Element-size absolute value operator.
-    #     """
-    #     new_self = copy.copy(self)
-    #     new_self.values = np.abs(new_self.values)
-    #     new_self.op_history.append({
-    #         "name": "abs",
-    #         "prev_shape": self.shape
-    #     })
-    #     return new_self
-    def _numpy_func(self, fname, **kwargs):
-        """ Apply a numpy-style function to this Explanation.
-        """
-        new_self = copy.copy(self)
-        axis = kwargs.get("axis", None)
-        # collapse the slicer to right shape
-        if axis == 0:
-            new_self = new_self[0]
-        elif axis == 1:
-            new_self = new_self[1]
-        elif axis == 2:
-            new_self = new_self[2]
-        if axis in [0,1,2]:
-            new_self.op_history = new_self.op_history[:-1] # pop off the slicing operation we just used
-        if self.feature_names is not None and not is_1d(self.feature_names) and axis == 0:
-            new_values = self._flatten_feature_names()
-            new_self.feature_names = np.array(list(new_values.keys()))
-            new_self.values = np.array([getattr(np, fname)(v,0) for v in new_values.values()])
-            new_self.clustering = None
-        else:
-            new_self.values = getattr(np, fname)(np.array(self.values), **kwargs)
-            if new_self.data is not None:
-                try:
-                    new_self.data = getattr(np, fname)(np.array(self.data), **kwargs)
-                except Exception:
-                    new_self.data = None
-            if new_self.base_values is not None and issubclass(type(axis), int) and len(self.base_values.shape) > axis:
-                new_self.base_values = getattr(np, fname)(self.base_values, **kwargs)
-            elif issubclass(type(axis), int):
-                new_self.base_values = None
-        if axis == 0 and self.clustering is not None and len(self.clustering.shape) == 3:
-            if self.clustering.std(0).sum() < 1e-8:
-                new_self.clustering = self.clustering[0]
-            else:
-                new_self.clustering = None
-        new_self.op_history.append({
-            "name": fname,
-            "kwargs": kwargs,
-            "prev_shape": self.shape,
-            "collapsed_instances": axis == 0
-        })
-        return new_self
-    def mean(self, axis):
-        """ Numpy-style mean function.
-        """
-        return self._numpy_func("mean", axis=axis)
-    def max(self, axis):
-        """ Numpy-style mean function.
-        """
-        return self._numpy_func("max", axis=axis)
-    def min(self, axis):
-        """ Numpy-style mean function.
-        """
-        return self._numpy_func("min", axis=axis)
-    def sum(self, axis=None, grouping=None):
-        """ Numpy-style mean function.
-        """
-        if grouping is None:
-            return self._numpy_func("sum", axis=axis)
-        elif axis == 1 or len(self.shape) == 1:
-            return group_features(self, grouping)
-        else:
-            raise DimensionError("Only axis = 1 is supported for grouping right now...")
-    def hstack(self, other):
-        """ Stack two explanations column-wise.
-        """
-        assert self.shape[0] == other.shape[0], "Can't hstack explanations with different numbers of rows!"
-        assert np.max(np.abs(self.base_values - other.base_values)) < 1e-6, "Can't hstack explanations with different base values!"
-        new_exp = Explanation(
-            values=np.hstack([self.values, other.values]),
-            base_values=self.base_values,
-            data=self.data,
-            display_data=self.display_data,
-            instance_names=self.instance_names,
-            feature_names=self.feature_names,
-            output_names=self.output_names,
-            output_indexes=self.output_indexes,
-            lower_bounds=self.lower_bounds,
-            upper_bounds=self.upper_bounds,
-            error_std=self.error_std,
-            main_effects=self.main_effects,
-            hierarchical_values=self.hierarchical_values,
-            clustering=self.clustering,
-        )
-        return new_exp
-    # def reshape(self, *args):
-    #     return self._numpy_func("reshape", newshape=args)
-    @property
-    def abs(self):
-        return self._numpy_func("abs")
-    @property
-    def identity(self):
-        return self
-    @property
-    def argsort(self):
-        return self._numpy_func("argsort")
-    @property
-    def flip(self):
-        return self._numpy_func("flip")
-    def hclust(self, metric="sqeuclidean", axis=0):
-        """ Computes an optimal leaf ordering sort order using hclustering.
-        hclust(metric="sqeuclidean")
-        Parameters
-        ----------
-        metric : string
-            A metric supported by scipy clustering.
-        axis : int
-            The axis to cluster along.
-        """
-        values = self.values
-        if len(values.shape) != 2:
-            raise DimensionError("The hclust order only supports 2D arrays right now!")
-        if axis == 1:
-            values = values.T
-        # compute a hierarchical clustering and return the optimal leaf ordering
-        D = scipy.spatial.distance.pdist(values, metric)
-        cluster_matrix = scipy.cluster.hierarchy.complete(D)
-        inds = scipy.cluster.hierarchy.leaves_list(scipy.cluster.hierarchy.optimal_leaf_ordering(cluster_matrix, D))
-        return inds
-    def sample(self, max_samples, replace=False, random_state=0):
-        """ Randomly samples the instances (rows) of the Explanation object.
-        Parameters
-        ----------
-        max_samples : int
-            The number of rows to sample. Note that if replace=False then less than
-            fewer than max_samples will be drawn if explanation.shape[0] < max_samples.
-        replace : bool
-            Sample with or without replacement.
-        """
-        prev_seed = np.random.seed(random_state)
-        inds = np.random.choice(self.shape[0], min(max_samples, self.shape[0]), replace=replace)
-        np.random.seed(prev_seed)
-        return self[list(inds)]
-    def _flatten_feature_names(self):
-        new_values = {}
-        for i in range(len(self.values)):
-            for s,v in zip(self.feature_names[i], self.values[i]):
-                if s not in new_values:
-                    new_values[s] = []
-                new_values[s].append(v)
-        return new_values
-    def _use_data_as_feature_names(self):
-        new_values = {}
-        for i in range(len(self.values)):
-            for s,v in zip(self.data[i], self.values[i]):
-                if s not in new_values:
-                    new_values[s] = []
-                new_values[s].append(v)
-        return new_values
-    def percentile(self, q, axis=None):
-        new_self = copy.deepcopy(self)
-        if self.feature_names is not None and not is_1d(self.feature_names) and axis == 0:
-            new_values = self._flatten_feature_names()
-            new_self.feature_names = np.array(list(new_values.keys()))
-            new_self.values = np.array([np.percentile(v, q) for v in new_values.values()])
-            new_self.clustering = None
-        else:
-            new_self.values = np.percentile(new_self.values, q, axis)
-            new_self.data = np.percentile(new_self.data, q, axis)
-        #new_self.data = None
-        new_self.op_history.append({
-            "name": "percentile",
-            "args": (axis,),
-            "prev_shape": self.shape,
-            "collapsed_instances": axis == 0
-        })
-        return new_self
-def group_features(shap_values, feature_map):
-    # TODOsomeday: support and deal with clusterings
-    reverse_map = {}
-    for name in feature_map:
-        reverse_map[feature_map[name]] = reverse_map.get(feature_map[name], []) + [name]
-    curr_names = shap_values.feature_names
-    sv_new = copy.deepcopy(shap_values)
-    found = {}
-    i = 0
-    rank1 = len(shap_values.shape) == 1
-    for name in curr_names:
-        new_name = feature_map.get(name, name)
-        if new_name in found:
-            continue
-        found[new_name] = True
-        new_name = feature_map.get(name, name)
-        cols_to_sum = reverse_map.get(new_name, [new_name])
-        old_inds = [curr_names.index(v) for v in cols_to_sum]
-        if rank1:
-            sv_new.values[i] = shap_values.values[old_inds].sum()
-            sv_new.data[i] = shap_values.data[old_inds].sum()
-        else:
-            sv_new.values[:,i] = shap_values.values[:,old_inds].sum(1)
-            sv_new.data[:,i] = shap_values.data[:,old_inds].sum(1)
-        sv_new.feature_names[i] = new_name
-        i += 1
-    return Explanation(
-        sv_new.values[:i] if rank1 else sv_new.values[:,:i],
-        base_values = sv_new.base_values,
-        data = sv_new.data[:i] if rank1 else sv_new.data[:,:i],
-        display_data = None if sv_new.display_data is None else (sv_new.display_data[:,:i] if rank1 else sv_new.display_data[:,:i]),
-        instance_names = None,
-        feature_names = None if sv_new.feature_names is None else sv_new.feature_names[:i],
-        output_names = None,
-        output_indexes = None,
-        lower_bounds = None,
-        upper_bounds = None,
-        error_std = None,
-        main_effects = None,
-        hierarchical_values = None,
-        clustering = None
-    )
-def compute_output_dims(values, base_values, data, output_names):
-    """ Uses the passed data to infer which dimensions correspond to the model's output.
-    """
-    values_shape = _compute_shape(values)
-    # input shape matches the data shape
-    if data is not None:
-        data_shape = _compute_shape(data)
-    # if we are not given any data we assume it would be the same shape as the given values
-    else:
-        data_shape = values_shape
-    # output shape is known from the base values or output names
-    if output_names is not None:
-        output_shape = _compute_shape(output_names)
-        # if our output_names are per sample then we need to drop the sample dimension here
-        if values_shape[-len(output_shape):] != output_shape and \
-                values_shape[-len(output_shape)+1:] == output_shape[1:] and values_shape[0] == output_shape[0]:
-            output_shape = output_shape[1:]
-    elif base_values is not None:
-        output_shape = _compute_shape(base_values)[1:]
-    else:
-        output_shape = tuple()
-    interaction_order = len(values_shape) - len(data_shape) - len(output_shape)
-    output_dims = range(len(data_shape) + interaction_order, len(values_shape))
-    return tuple(output_dims)
-def is_1d(val):
-    return not (isinstance(val[0], list) or isinstance(val[0], np.ndarray))
-class Op:
-    pass
-class Percentile(Op):
-    def __init__(self, percentile):
-        self.percentile = percentile
-    def add_repr(self, s, verbose=False):
-        return "percentile("+s+", "+str(self.percentile)+")"
-def _first_item(x):
-    for item in x:
-        return item
-    return None
-def _compute_shape(x):
-    if not hasattr(x, "__len__") or isinstance(x, str):
-        return tuple()
-    elif not scipy.sparse.issparse(x) and len(x) > 0 and isinstance(_first_item(x), str):
-        return (None,)
-    else:
-        if isinstance(x, dict):
-            return (len(x),) + _compute_shape(x[next(iter(x))])
-        # 2D arrays we just take their shape as-is
-        if len(getattr(x, "shape", tuple())) > 1:
-            return x.shape
-        # 1D arrays we need to look inside
-        if len(x) == 0:
-            return (0,)
-        elif len(x) == 1:
-            return (1,) + _compute_shape(_first_item(x))
-        else:
-            first_shape = _compute_shape(_first_item(x))
-            if first_shape == tuple():
-                return (len(x),)
-            else: # we have an array of arrays...
-                matches = np.ones(len(first_shape), dtype=bool)
-                for i in range(1, len(x)):
-                    shape = _compute_shape(x[i])
-                    assert len(shape) == len(first_shape), "Arrays in Explanation objects must have consistent inner dimensions!"
-                    for j in range(0, len(shape)):
-                        matches[j] &= shape[j] == first_shape[j]
-                return (len(x),) + tuple(first_shape[j] if match else None for j, match in enumerate(matches))
-class Cohorts:
-    def __init__(self, **kwargs):
-        self.cohorts = kwargs
-        for k in self.cohorts:
-            assert isinstance(self.cohorts[k], Explanation), "All the arguments to a Cohorts set must be Explanation objects!"
-    def __getitem__(self, item):
-        new_cohorts = Cohorts()
-        for k in self.cohorts:
-            new_cohorts.cohorts[k] = self.cohorts[k].__getitem__(item)
-        return new_cohorts
-    def __getattr__(self, name):
-        new_cohorts = Cohorts()
-        for k in self.cohorts:
-            new_cohorts.cohorts[k] = getattr(self.cohorts[k], name)
-        return new_cohorts
-    def __call__(self, *args, **kwargs):
-        new_cohorts = Cohorts()
-        for k in self.cohorts:
-            new_cohorts.cohorts[k] = self.cohorts[k].__call__(*args, **kwargs)
-        return new_cohorts
-    def __repr__(self):
-        return f"<shap._explanation.Cohorts object with {len(self.cohorts)} cohorts of sizes: {[v.shape for v in self.cohorts.values()]}>"
-def _auto_cohorts(shap_values, max_cohorts):
-    """ This uses a DecisionTreeRegressor to build a group of cohorts with similar SHAP values.
-    """
-    # fit a decision tree that well separates the SHAP values
-    m = sklearn.tree.DecisionTreeRegressor(max_leaf_nodes=max_cohorts)
-    m.fit(shap_values.data, shap_values.values)
-    # group instances by their decision paths
-    paths = m.decision_path(shap_values.data).toarray()
-    path_names = []
-    # mark each instance with a path name
-    for i in range(shap_values.shape[0]):
-        name = ""
-        for j in range(len(paths[i])):
-            if paths[i,j] > 0:
-                feature = m.tree_.feature[j]
-                threshold = m.tree_.threshold[j]
-                val = shap_values.data[i,feature]
-                if feature >= 0:
-                    name += str(shap_values.feature_names[feature])
-                    if val < threshold:
-                        name += " < "
-                    else:
-                        name += " >= "
-                    name += str(threshold) + " & "
-        path_names.append(name[:-3]) # the -3 strips off the last unneeded ' & '
-    path_names = np.array(path_names)
-    # split the instances into cohorts by their path names
-    cohorts = {}
-    for name in np.unique(path_names):
-        cohorts[name] = shap_values[path_names == name]
-    return Cohorts(**cohorts)
-def list_wrap(x):
-    """ A helper to patch things since slicer doesn't handle arrays of arrays (it does handle lists of arrays)
-    """
-    if isinstance(x, np.ndarray) and len(x.shape) == 1 and isinstance(x[0], np.ndarray):
-        return [v for v in x]
-    else:
-        return x

lib/shap/_serializable.py DELETED Viewed

@@ -1,204 +0,0 @@
-import inspect
-import logging
-import pickle
-import cloudpickle
-import numpy as np
-log = logging.getLogger('shap')
-class Serializable:
-    """ This is the superclass of all serializable objects.
-    """
-    def save(self, out_file):
-        """ Save the model to the given file stream.
-        """
-        pickle.dump(type(self), out_file)
-    @classmethod
-    def load(cls, in_file, instantiate=True):
-        """ This is meant to be overridden by subclasses and called with super.
-        We return constructor argument values when not being instantiated. Since there are no
-        constructor arguments for the Serializable class we just return an empty dictionary.
-        """
-        if instantiate:
-            return cls._instantiated_load(in_file)
-        return {}
-    @classmethod
-    def _instantiated_load(cls, in_file, **kwargs):
-        """ This is meant to be overridden by subclasses and called with super.
-        We return constructor argument values (we have no values to load in this abstract class).
-        """
-        obj_type = pickle.load(in_file)
-        if obj_type is None:
-            return None
-        if not inspect.isclass(obj_type) or (not issubclass(obj_type, cls) and (obj_type is not cls)):
-            raise Exception(f"Invalid object type loaded from file. {obj_type} is not a subclass of {cls}.")
-        # here we call the constructor with all the arguments we have loaded
-        constructor_args = obj_type.load(in_file, instantiate=False, **kwargs)
-        used_args = inspect.getfullargspec(obj_type.__init__)[0]
-        return obj_type(**{k: constructor_args[k] for k in constructor_args if k in used_args})
-class Serializer:
-    """ Save data items to an input stream.
-    """
-    def __init__(self, out_stream, block_name, version):
-        self.out_stream = out_stream
-        self.block_name = block_name
-        self.block_version = version
-        self.serializer_version = 0 # update this when the serializer changes
-    def __enter__(self):
-        log.debug("serializer_version = %d", self.serializer_version)
-        pickle.dump(self.serializer_version, self.out_stream)
-        log.debug("block_name = %s", self.block_name)
-        pickle.dump(self.block_name, self.out_stream)
-        log.debug("block_version = %d", self.block_version)
-        pickle.dump(self.block_version, self.out_stream)
-        return self
-    def __exit__(self, exception_type, exception_value, traceback):
-        log.debug("END_BLOCK___")
-        pickle.dump("END_BLOCK___", self.out_stream)
-    def save(self, name, value, encoder="auto"):
-        """ Dump a data item to the current input stream.
-        """
-        log.debug("name = %s", name)
-        pickle.dump(name, self.out_stream)
-        if encoder is None or encoder is False:
-            log.debug("encoder_name = %s", "no_encoder")
-            pickle.dump("no_encoder", self.out_stream)
-        elif callable(encoder):
-            log.debug("encoder_name = %s", "custom_encoder")
-            pickle.dump("custom_encoder", self.out_stream)
-            encoder(value, self.out_stream)
-        elif encoder == ".save" or (isinstance(value, Serializable) and encoder == "auto"):
-            log.debug("encoder_name = %s", "serializable.save")
-            pickle.dump("serializable.save", self.out_stream)
-            if len(inspect.getfullargspec(value.save)[0]) == 3: # backward compat for MLflow, can remove 4/1/2021
-                value.save(self.out_stream, value)
-            else:
-                value.save(self.out_stream)
-        elif encoder == "auto":
-            if isinstance(value, (int, float, str)):
-                log.debug("encoder_name = %s", "pickle.dump")
-                pickle.dump("pickle.dump", self.out_stream)
-                pickle.dump(value, self.out_stream)
-            else:
-                log.debug("encoder_name = %s", "cloudpickle.dump")
-                pickle.dump("cloudpickle.dump", self.out_stream)
-                cloudpickle.dump(value, self.out_stream)
-        else:
-            raise ValueError(f"Unknown encoder type '{encoder}' given for serialization!")
-        log.debug("value = %s", str(value))
-class Deserializer:
-    """ Load data items from an input stream.
-    """
-    def __init__(self, in_stream, block_name, min_version, max_version):
-        self.in_stream = in_stream
-        self.block_name = block_name
-        self.block_min_version = min_version
-        self.block_max_version = max_version
-        # update these when the serializer changes
-        self.serializer_min_version = 0
-        self.serializer_max_version = 0
-    def __enter__(self):
-        # confirm the serializer version
-        serializer_version = pickle.load(self.in_stream)
-        log.debug("serializer_version = %d", serializer_version)
-        if serializer_version < self.serializer_min_version:
-            raise ValueError(
-                f"The file being loaded was saved with a serializer version of {serializer_version}, " + \
-                f"but the current deserializer in SHAP requires at least version {self.serializer_min_version}."
-            )
-        if serializer_version > self.serializer_max_version:
-            raise ValueError(
-                f"The file being loaded was saved with a serializer version of {serializer_version}, " + \
-                f"but the current deserializer in SHAP only support up to version {self.serializer_max_version}."
-            )
-        # confirm the block name
-        block_name = pickle.load(self.in_stream)
-        log.debug("block_name = %s", block_name)
-        if block_name != self.block_name:
-            raise ValueError(
-                f"The next data block in the file being loaded was supposed to be {self.block_name}, " + \
-                f"but the next block found was {block_name}."
-            )
-        # confirm the block version
-        block_version = pickle.load(self.in_stream)
-        log.debug("block_version = %d", block_version)
-        if block_version < self.block_min_version:
-            raise ValueError(
-                f"The file being loaded was saved with a block version of {block_version}, " + \
-                f"but the current deserializer in SHAP requires at least version {self.block_min_version}."
-            )
-        if block_version > self.block_max_version:
-            raise ValueError(
-                f"The file being loaded was saved with a block version of {block_version}, " + \
-                f"but the current deserializer in SHAP only support up to version {self.block_max_version}."
-            )
-        return self
-    def __exit__(self, exception_type, exception_value, traceback):
-        # confirm the block end token
-        for _ in range(100):
-            end_token = pickle.load(self.in_stream)
-            log.debug("end_token = %s", end_token)
-            if end_token == "END_BLOCK___":
-                return
-            self._load_data_value()
-        raise ValueError(
-            f"The data block end token wsa not found for the block {self.block_name}."
-        )
-    def load(self, name, decoder=None):
-        """ Load a data item from the current input stream.
-        """
-        # confirm the block name
-        loaded_name = pickle.load(self.in_stream)
-        log.debug("loaded_name = %s", loaded_name)
-        print("loaded_name", loaded_name)
-        if loaded_name != name:
-            raise ValueError(
-                f"The next data item in the file being loaded was supposed to be {name}, " + \
-                f"but the next block found was {loaded_name}."
-            ) # We should eventually add support for skipping over unused data items in old formats...
-        value = self._load_data_value(decoder)
-        log.debug("value = %s", str(value))
-        return value
-    def _load_data_value(self, decoder=None):
-        encoder_name = pickle.load(self.in_stream)
-        log.debug("encoder_name = %s", encoder_name)
-        if encoder_name == "custom_encoder" or callable(decoder):
-            assert callable(decoder), "You must provide a callable custom decoder for the data item {name}!"
-            return decoder(self.in_stream)
-        if encoder_name == "no_encoder":
-            return None
-        if encoder_name == "serializable.save":
-            return Serializable.load(self.in_stream)
-        if encoder_name == "numpy.save":
-            return np.load(self.in_stream)
-        if encoder_name == "pickle.dump":
-            return pickle.load(self.in_stream)
-        if encoder_name == "cloudpickle.dump":
-            return cloudpickle.load(self.in_stream)
-        raise ValueError(f"Unsupported encoder type found: {encoder_name}")

lib/shap/_version.py DELETED Viewed

@@ -1,16 +0,0 @@
-# file generated by setuptools_scm
-# don't change, don't track in version control
-TYPE_CHECKING = False
-if TYPE_CHECKING:
-    from typing import Tuple, Union
-    VERSION_TUPLE = Tuple[Union[int, str], ...]
-else:
-    VERSION_TUPLE = object
-version: str
-__version__: str
-__version_tuple__: VERSION_TUPLE
-version_tuple: VERSION_TUPLE
-__version__ = version = '0.44.1'
-__version_tuple__ = version_tuple = (0, 44, 1)

lib/shap/actions/__init__.py DELETED Viewed

@@ -1,3 +0,0 @@
-from ._action import Action
-__all__ = ["Action"]

lib/shap/actions/_action.py DELETED Viewed

@@ -1,8 +0,0 @@
-class Action:
-    """ Abstract action class.
-    """
-    def __lt__(self, other_action):
-        return self.cost < other_action.cost
-    def __repr__(self):
-        return f"<Action '{self.__str__()}'>"

lib/shap/actions/_optimizer.py DELETED Viewed

@@ -1,92 +0,0 @@
-import copy
-import queue
-import warnings
-from ..utils._exceptions import ConvergenceError, InvalidAction
-from ._action import Action
-class ActionOptimizer:
-    def __init__(self, model, actions):
-        self.model = model
-        warnings.warn(
-            "Note that ActionOptimizer is still in an alpha state and is subjust to API changes."
-        )
-        # actions go into mutually exclusive groups
-        self.action_groups = []
-        for group in actions:
-            if issubclass(type(group), Action):
-                group._group_index = len(self.action_groups)
-                group._grouped_index = 0
-                self.action_groups.append([copy.copy(group)])
-            elif issubclass(type(group), list):
-                group = sorted([copy.copy(v) for v in group], key=lambda a: a.cost)
-                for i, v in enumerate(group):
-                    v._group_index = len(self.action_groups)
-                    v._grouped_index = i
-                self.action_groups.append(group)
-            else:
-                raise InvalidAction(
-                    "A passed action was not an Action or list of actions!"
-                )
-    def __call__(self, *args, max_evals=10000):
-        # init our queue with all the least costly actions
-        q = queue.PriorityQueue()
-        for i in range(len(self.action_groups)):
-            group = self.action_groups[i]
-            q.put((group[0].cost, [group[0]]))
-        nevals = 0
-        while not q.empty():
-            # see if we have exceeded our runtime budget
-            nevals += 1
-            if nevals > max_evals:
-                raise ConvergenceError(
-                    f"Failed to find a solution with max_evals={max_evals}! Try reducing the number of actions or increasing max_evals."
-                )
-            # get the next cheapest set of actions we can do
-            cost, actions = q.get()
-            # apply those actions
-            args_tmp = copy.deepcopy(args)
-            for a in actions:
-                a(*args_tmp)
-            # if the model is now satisfied we are done!!
-            v = self.model(*args_tmp)
-            if v:
-                return actions
-            # if not then we add all possible follow-on actions to our queue
-            else:
-                for i in range(len(self.action_groups)):
-                    group = self.action_groups[i]
-                    # look to to see if we already have a action from this group, if so we need to
-                    # move to a more expensive action in the same group
-                    next_ind = 0
-                    prev_in_group = -1
-                    for j, a in enumerate(actions):
-                        if a._group_index == i:
-                            next_ind = max(next_ind, a._grouped_index + 1)
-                            prev_in_group = j
-                    # we are adding a new action type
-                    if prev_in_group == -1:
-                        new_actions = actions + [group[next_ind]]
-                    # we are moving from one action to a more expensive one in the same group
-                    elif next_ind < len(group):
-                        new_actions = copy.copy(actions)
-                        new_actions[prev_in_group] = group[next_ind]
-                    # we don't have a more expensive action left in this group
-                    else:
-                        new_actions = None
-                    # add the new option to our queue
-                    if new_actions is not None:
-                        q.put((sum([a.cost for a in new_actions]), new_actions))

lib/shap/benchmark/__init__.py DELETED Viewed

@@ -1,9 +0,0 @@
-from ._compute import ComputeTime
-from ._explanation_error import ExplanationError
-from ._result import BenchmarkResult
-from ._sequential import SequentialMasker
-# from . import framework
-# from .. import datasets
-__all__ = ["ComputeTime", "ExplanationError", "BenchmarkResult", "SequentialMasker"]

lib/shap/benchmark/_compute.py DELETED Viewed

@@ -1,9 +0,0 @@
-from ._result import BenchmarkResult
-class ComputeTime:
-    """ Extracts a runtime benchmark result from the passed Explanation.
-    """
-    def __call__(self, explanation, name):
-        return BenchmarkResult("compute time", name, value=explanation.compute_time / explanation.shape[0])

lib/shap/benchmark/_explanation_error.py DELETED Viewed

@@ -1,181 +0,0 @@
-import time
-import numpy as np
-from tqdm.auto import tqdm
-from shap import Explanation, links
-from shap.maskers import FixedComposite, Image, Text
-from shap.utils import MaskedModel, partition_tree_shuffle
-from shap.utils._exceptions import DimensionError
-from ._result import BenchmarkResult
-class ExplanationError:
-    """ A measure of the explanation error relative to a model's actual output.
-    This benchmark metric measures the discrepancy between the output of the model predicted by an
-    attribution explanation vs. the actual output of the model. This discrepancy is measured over
-    many masking patterns drawn from permutations of the input features.
-    For explanations (like Shapley values) that explain the difference between one alternative and another
-    (for example a current sample and typical background feature values) there is possible explanation error
-    for every pattern of mixing foreground and background, or other words every possible masking pattern.
-    In this class we compute the standard deviation over these explanation errors where masking patterns
-    are drawn from prefixes of random feature permutations. This seems natural, and aligns with Shapley value
-    computations, but of course you could choose to summarize explanation errors in others ways as well.
-    """
-    def __init__(self, masker, model, *model_args, batch_size=500, num_permutations=10, link=links.identity, linearize_link=True, seed=38923):
-        """ Build a new explanation error benchmarker with the given masker, model, and model args.
-        Parameters
-        ----------
-        masker : function or shap.Masker
-            The masker defines how we hide features during the perturbation process.
-        model : function or shap.Model
-            The model we want to evaluate explanations against.
-        model_args : ...
-            The list of arguments we will give to the model that we will have explained. When we later call this benchmark
-            object we should pass explanations that have been computed on this same data.
-        batch_size : int
-            The maximum batch size we should use when calling the model. For some large NLP models this needs to be set
-            lower (at say 1) to avoid running out of GPU memory.
-        num_permutations : int
-            How many permutations we will use to estimate the average explanation error for each sample. If you are running
-            this benchmark on a large dataset with many samples then you can reduce this value since the final result is
-            averaged over samples as well and the averages of both directly combine to reduce variance. So for 10k samples
-            num_permutations=1 is appropreiate.
-        link : function
-            Allows for a non-linear link function to be used to bringe between the model output space and the explanation
-            space.
-        linearize_link : bool
-            Non-linear links can destroy additive separation in generalized linear models, so by linearizing the link we can
-            retain additive separation. See upcoming paper/doc for details.
-        """
-        self.masker = masker
-        self.model = model
-        self.model_args = model_args
-        self.num_permutations = num_permutations
-        self.link = link
-        self.linearize_link = linearize_link
-        self.model_args = model_args
-        self.batch_size = batch_size
-        self.seed = seed
-        # user must give valid masker
-        underlying_masker = masker.masker if isinstance(masker, FixedComposite) else masker
-        if isinstance(underlying_masker, Text):
-            self.data_type = "text"
-        elif isinstance(underlying_masker, Image):
-            self.data_type = "image"
-        else:
-            self.data_type = "tabular"
-    def __call__(self, explanation, name, step_fraction=0.01, indices=[], silent=False):
-        """ Run this benchmark on the given explanation.
-        """
-        if isinstance(explanation, np.ndarray):
-            attributions = explanation
-        elif isinstance(explanation, Explanation):
-            attributions = explanation.values
-        else:
-            raise ValueError("The passed explanation must be either of type numpy.ndarray or shap.Explanation!")
-        if len(attributions) != len(self.model_args[0]):
-            emsg = (
-                "The explanation passed must have the same number of rows as "
-                "the self.model_args that were passed!"
-            )
-            raise DimensionError(emsg)
-        # it is important that we choose the same permutations for the different explanations we are comparing
-        # so as to avoid needless noise
-        old_seed = np.random.seed()
-        np.random.seed(self.seed)
-        pbar = None
-        start_time = time.time()
-        svals = []
-        mask_vals = []
-        for i, args in enumerate(zip(*self.model_args)):
-            if len(args[0].shape) != len(attributions[i].shape):
-                raise ValueError("The passed explanation must have the same dim as the model_args and must not have a vector output!")
-            feature_size = np.prod(attributions[i].shape)
-            sample_attributions = attributions[i].flatten()
-            # compute any custom clustering for this row
-            row_clustering = None
-            if getattr(self.masker, "clustering", None) is not None:
-                if isinstance(self.masker.clustering, np.ndarray):
-                    row_clustering = self.masker.clustering
-                elif callable(self.masker.clustering):
-                    row_clustering = self.masker.clustering(*args)
-                else:
-                    raise NotImplementedError("The masker passed has a .clustering attribute that is not yet supported by the ExplanationError benchmark!")
-            masked_model = MaskedModel(self.model, self.masker, self.link, self.linearize_link, *args)
-            total_values = None
-            for _ in range(self.num_permutations):
-                masks = []
-                mask = np.zeros(feature_size, dtype=bool)
-                masks.append(mask.copy())
-                ordered_inds = np.arange(feature_size)
-                # shuffle the indexes so we get a random permutation ordering
-                if row_clustering is not None:
-                    inds_mask = np.ones(feature_size, dtype=bool)
-                    partition_tree_shuffle(ordered_inds, inds_mask, row_clustering)
-                else:
-                    np.random.shuffle(ordered_inds)
-                increment = max(1, int(feature_size * step_fraction))
-                for j in range(0, feature_size, increment):
-                    mask[ordered_inds[np.arange(j, min(feature_size, j+increment))]] = True
-                    masks.append(mask.copy())
-                mask_vals.append(masks)
-                values = []
-                masks_arr = np.array(masks)
-                for j in range(0, len(masks_arr), self.batch_size):
-                    values.append(masked_model(masks_arr[j:j + self.batch_size]))
-                values = np.concatenate(values)
-                base_value = values[0]
-                for j, v in enumerate(values):
-                    values[j] = (v - (base_value + np.sum(sample_attributions[masks_arr[j]])))**2
-                if total_values is None:
-                    total_values = values
-                else:
-                    total_values += values
-            total_values /= self.num_permutations
-            svals.append(total_values)
-            if pbar is None and time.time() - start_time > 5:
-                pbar = tqdm(total=len(self.model_args[0]), disable=silent, leave=False, desc=f"ExplanationError for {name}")
-                pbar.update(i+1)
-            if pbar is not None:
-                pbar.update(1)
-        if pbar is not None:
-            pbar.close()
-        svals = np.array(svals)
-        # reset the random seed so we don't mess up the caller
-        np.random.seed(old_seed)
-        return BenchmarkResult("explanation error", name, value=np.sqrt(np.sum(total_values)/len(total_values)))

lib/shap/benchmark/_result.py DELETED Viewed

@@ -1,34 +0,0 @@
-import numpy as np
-import sklearn
-sign_defaults = {
-    "keep positive": 1,
-    "keep negative": -1,
-    "remove positive": -1,
-    "remove negative": 1,
-    "compute time": -1,
-    "keep absolute": -1, # the absolute signs are defaults that make sense when scoring losses
-    "remove absolute": 1,
-    "explanation error": -1
-}
-class BenchmarkResult:
-    """ The result of a benchmark run.
-    """
-    def __init__(self, metric, method, value=None, curve_x=None, curve_y=None, curve_y_std=None, value_sign=None):
-        self.metric = metric
-        self.method = method
-        self.value = value
-        self.curve_x = curve_x
-        self.curve_y = curve_y
-        self.curve_y_std = curve_y_std
-        self.value_sign = value_sign
-        if self.value_sign is None and self.metric in sign_defaults:
-            self.value_sign = sign_defaults[self.metric]
-        if self.value is None:
-            self.value = sklearn.metrics.auc(curve_x, (np.array(curve_y) - curve_y[0]))
-    @property
-    def full_name(self):
-        return self.method + " " + self.metric

lib/shap/benchmark/_sequential.py DELETED Viewed

@@ -1,332 +0,0 @@
-import time
-import matplotlib.pyplot as pl
-import numpy as np
-import pandas as pd
-import sklearn
-from tqdm.auto import tqdm
-from shap import Explanation, links
-from shap.maskers import FixedComposite, Image, Text
-from shap.utils import MaskedModel
-from ._result import BenchmarkResult
-class SequentialMasker:
-    def __init__(self, mask_type, sort_order, masker, model, *model_args, batch_size=500):
-        for arg in model_args:
-            if isinstance(arg, pd.DataFrame):
-                raise TypeError("DataFrame arguments dont iterate correctly, pass numpy arrays instead!")
-        # convert any DataFrames to numpy arrays
-        # self.model_arg_cols = []
-        # self.model_args = []
-        # self.has_df = False
-        # for arg in model_args:
-        #     if isinstance(arg, pd.DataFrame):
-        #         self.model_arg_cols.append(arg.columns)
-        #         self.model_args.append(arg.values)
-        #         self.has_df = True
-        #     else:
-        #         self.model_arg_cols.append(None)
-        #         self.model_args.append(arg)
-        # if self.has_df:
-        #     given_model = model
-        #     def new_model(*args):
-        #         df_args = []
-        #         for i, arg in enumerate(args):
-        #             if self.model_arg_cols[i] is not None:
-        #                 df_args.append(pd.DataFrame(arg, columns=self.model_arg_cols[i]))
-        #             else:
-        #                 df_args.append(arg)
-        #         return given_model(*df_args)
-        #     model = new_model
-        self.inner = SequentialPerturbation(
-            model, masker, sort_order, mask_type
-        )
-        self.model_args = model_args
-        self.batch_size = batch_size
-    def __call__(self, explanation, name, **kwargs):
-        return self.inner(name, explanation, *self.model_args, batch_size=self.batch_size, **kwargs)
-class SequentialPerturbation:
-    def __init__(self, model, masker, sort_order, perturbation, linearize_link=False):
-        # self.f = lambda masked, x, index: model.predict(masked)
-        self.model = model if callable(model) else model.predict
-        self.masker = masker
-        self.sort_order = sort_order
-        self.perturbation = perturbation
-        self.linearize_link = linearize_link
-        # define our sort order
-        if self.sort_order == "positive":
-            self.sort_order_map = lambda x: np.argsort(-x)
-        elif self.sort_order == "negative":
-            self.sort_order_map = lambda x: np.argsort(x)
-        elif self.sort_order == "absolute":
-            self.sort_order_map = lambda x: np.argsort(-abs(x))
-        else:
-            raise ValueError("sort_order must be either \"positive\", \"negative\", or \"absolute\"!")
-        # user must give valid masker
-        underlying_masker = masker.masker if isinstance(masker, FixedComposite) else masker
-        if isinstance(underlying_masker, Text):
-            self.data_type = "text"
-        elif isinstance(underlying_masker, Image):
-            self.data_type = "image"
-        else:
-            self.data_type = "tabular"
-            #raise ValueError("masker must be for \"tabular\", \"text\", or \"image\"!")
-        self.score_values = []
-        self.score_aucs = []
-        self.labels = []
-    def __call__(self, name, explanation, *model_args, percent=0.01, indices=[], y=None, label=None, silent=False, debug_mode=False, batch_size=10):
-        # if explainer is already the attributions
-        if isinstance(explanation, np.ndarray):
-            attributions = explanation
-        elif isinstance(explanation, Explanation):
-            attributions = explanation.values
-        else:
-            raise ValueError("The passed explanation must be either of type numpy.ndarray or shap.Explanation!")
-        assert len(attributions) == len(model_args[0]), "The explanation passed must have the same number of rows as the model_args that were passed!"
-        if label is None:
-            label = "Score %d" % len(self.score_values)
-        # convert dataframes
-        # if isinstance(X, (pd.Series, pd.DataFrame)):
-        #     X = X.values
-        # convert all single-sample vectors to matrices
-        # if not hasattr(attributions[0], "__len__"):
-        #     attributions = np.array([attributions])
-        # if not hasattr(X[0], "__len__") and self.data_type == "tabular":
-        #     X = np.array([X])
-        pbar = None
-        start_time = time.time()
-        svals = []
-        mask_vals = []
-        for i, args in enumerate(zip(*model_args)):
-            # if self.data_type == "image":
-            #     x_shape, y_shape = attributions[i].shape[0], attributions[i].shape[1]
-            #     feature_size = np.prod([x_shape, y_shape])
-            #     sample_attributions = attributions[i].mean(2).reshape(feature_size, -1)
-            #     data = X[i].flatten()
-            #     mask_shape = X[i].shape
-            # else:
-            feature_size = np.prod(attributions[i].shape)
-            sample_attributions = attributions[i].flatten()
-            # data = X[i]
-            # mask_shape = feature_size
-            self.masked_model = MaskedModel(self.model, self.masker, links.identity, self.linearize_link, *args)
-            masks = []
-            mask = np.ones(feature_size, dtype=bool) * (self.perturbation == "remove")
-            masks.append(mask.copy())
-            ordered_inds = self.sort_order_map(sample_attributions)
-            increment = max(1,int(feature_size*percent))
-            for j in range(0, feature_size, increment):
-                oind_list = [ordered_inds[t] for t in range(j, min(feature_size, j+increment))]
-                for oind in oind_list:
-                    if not ((self.sort_order == "positive" and sample_attributions[oind] <= 0) or \
-                            (self.sort_order == "negative" and sample_attributions[oind] >= 0)):
-                        mask[oind] = self.perturbation == "keep"
-                masks.append(mask.copy())
-            mask_vals.append(masks)
-            # mask_size = len(range(0, feature_size, increment)) + 1
-            values = []
-            masks_arr = np.array(masks)
-            for j in range(0, len(masks_arr), batch_size):
-                values.append(self.masked_model(masks_arr[j:j + batch_size]))
-            values = np.concatenate(values)
-            svals.append(values)
-            if pbar is None and time.time() - start_time > 5:
-                pbar = tqdm(total=len(model_args[0]), disable=silent, leave=False, desc="SequentialMasker")
-                pbar.update(i+1)
-            if pbar is not None:
-                pbar.update(1)
-        if pbar is not None:
-            pbar.close()
-        self.score_values.append(np.array(svals))
-        # if self.sort_order == "negative":
-        #     curve_sign = -1
-        # else:
-        curve_sign = 1
-        self.labels.append(label)
-        xs = np.linspace(0, 1, 100)
-        curves = np.zeros((len(self.score_values[-1]), len(xs)))
-        for j in range(len(self.score_values[-1])):
-            xp = np.linspace(0, 1, len(self.score_values[-1][j]))
-            yp = self.score_values[-1][j]
-            curves[j,:] = np.interp(xs, xp, yp)
-        ys = curves.mean(0)
-        std = curves.std(0) / np.sqrt(curves.shape[0])
-        auc = sklearn.metrics.auc(np.linspace(0, 1, len(ys)), curve_sign*(ys-ys[0]))
-        if not debug_mode:
-            return BenchmarkResult(self.perturbation + " " + self.sort_order, name, curve_x=xs, curve_y=ys, curve_y_std=std)
-        else:
-            aucs = []
-            for j in range(len(self.score_values[-1])):
-                curve = curves[j,:]
-                auc = sklearn.metrics.auc(np.linspace(0, 1, len(curve)), curve_sign*(curve-curve[0]))
-                aucs.append(auc)
-            return mask_vals, curves, aucs
-    def score(self, explanation, X, percent=0.01, y=None, label=None, silent=False, debug_mode=False):
-        '''
-        Will be deprecated once MaskedModel is in complete support
-        '''
-        # if explainer is already the attributions
-        if isinstance(explanation, np.ndarray):
-            attributions = explanation
-        elif isinstance(explanation, Explanation):
-            attributions = explanation.values
-        if label is None:
-            label = "Score %d" % len(self.score_values)
-        # convert dataframes
-        if isinstance(X, (pd.Series, pd.DataFrame)):
-            X = X.values
-        # convert all single-sample vectors to matrices
-        if not hasattr(attributions[0], "__len__"):
-            attributions = np.array([attributions])
-        if not hasattr(X[0], "__len__") and self.data_type == "tabular":
-            X = np.array([X])
-        pbar = None
-        start_time = time.time()
-        svals = []
-        mask_vals = []
-        for i in range(len(X)):
-            if self.data_type == "image":
-                x_shape, y_shape = attributions[i].shape[0], attributions[i].shape[1]
-                feature_size = np.prod([x_shape, y_shape])
-                sample_attributions = attributions[i].mean(2).reshape(feature_size, -1)
-            else:
-                feature_size = attributions[i].shape[0]
-                sample_attributions = attributions[i]
-            if len(attributions[i].shape) == 1 or self.data_type == "tabular":
-                output_size = 1
-            else:
-                output_size = attributions[i].shape[-1]
-            for k in range(output_size):
-                if self.data_type == "image":
-                    mask_shape = X[i].shape
-                else:
-                    mask_shape = feature_size
-                mask = np.ones(mask_shape, dtype=bool) * (self.perturbation == "remove")
-                masks = [mask.copy()]
-                values = np.zeros(feature_size+1)
-                # masked, data = self.masker(mask, X[i])
-                masked = self.masker(mask, X[i])
-                data = None
-                curr_val = self.f(masked, data, k).mean(0)
-                values[0] = curr_val
-                if output_size != 1:
-                    test_attributions = sample_attributions[:,k]
-                else:
-                    test_attributions = sample_attributions
-                ordered_inds = self.sort_order_map(test_attributions)
-                increment = max(1,int(feature_size*percent))
-                for j in range(0, feature_size, increment):
-                    oind_list = [ordered_inds[t] for t in range(j, min(feature_size, j+increment))]
-                    for oind in oind_list:
-                        if not ((self.sort_order == "positive" and test_attributions[oind] <= 0) or \
-                                (self.sort_order == "negative" and test_attributions[oind] >= 0)):
-                            if self.data_type == "image":
-                                xoind, yoind = oind // attributions[i].shape[1], oind % attributions[i].shape[1]
-                                mask[xoind][yoind] = self.perturbation == "keep"
-                            else:
-                                mask[oind] = self.perturbation == "keep"
-                    masks.append(mask.copy())
-                    # masked, data = self.masker(mask, X[i])
-                    masked = self.masker(mask, X[i])
-                    curr_val = self.f(masked, data, k).mean(0)
-                    for t in range(j, min(feature_size, j+increment)):
-                        values[t+1] = curr_val
-                svals.append(values)
-                mask_vals.append(masks)
-            if pbar is None and time.time() - start_time > 5:
-                pbar = tqdm(total=len(X), disable=silent, leave=False)
-                pbar.update(i+1)
-            if pbar is not None:
-                pbar.update(1)
-        if pbar is not None:
-            pbar.close()
-        self.score_values.append(np.array(svals))
-        if self.sort_order == "negative":
-            curve_sign = -1
-        else:
-            curve_sign = 1
-        self.labels.append(label)
-        xs = np.linspace(0, 1, 100)
-        curves = np.zeros((len(self.score_values[-1]), len(xs)))
-        for j in range(len(self.score_values[-1])):
-            xp = np.linspace(0, 1, len(self.score_values[-1][j]))
-            yp = self.score_values[-1][j]
-            curves[j,:] = np.interp(xs, xp, yp)
-        ys = curves.mean(0)
-        if debug_mode:
-            aucs = []
-            for j in range(len(self.score_values[-1])):
-                curve = curves[j,:]
-                auc = sklearn.metrics.auc(np.linspace(0, 1, len(curve)), curve_sign*(curve-curve[0]))
-                aucs.append(auc)
-            return mask_vals, curves, aucs
-        else:
-            auc = sklearn.metrics.auc(np.linspace(0, 1, len(ys)), curve_sign*(ys-ys[0]))
-            return xs, ys, auc
-    def plot(self, xs, ys, auc):
-        pl.plot(xs, ys, label="AUC %0.4f" % auc)
-        pl.legend()
-        xlabel = "Percent Unmasked" if self.perturbation == "keep" else "Percent Masked"
-        pl.xlabel(xlabel)
-        pl.ylabel("Model Output")
-        pl.show()

lib/shap/benchmark/experiments.py DELETED Viewed

@@ -1,414 +0,0 @@
-import copy
-import itertools
-import os
-import pickle
-import random
-import subprocess
-import sys
-import time
-from multiprocessing import Pool
-from .. import __version__, datasets
-from . import metrics, models
-try:
-    from queue import Queue
-except ImportError:
-    from Queue import Queue
-from threading import Lock, Thread
-regression_metrics = [
-    "local_accuracy",
-    "consistency_guarantees",
-    "keep_positive_mask",
-    "keep_positive_resample",
-    #"keep_positive_impute",
-    "keep_negative_mask",
-    "keep_negative_resample",
-    #"keep_negative_impute",
-    "keep_absolute_mask__r2",
-    "keep_absolute_resample__r2",
-    #"keep_absolute_impute__r2",
-    "remove_positive_mask",
-    "remove_positive_resample",
-    #"remove_positive_impute",
-    "remove_negative_mask",
-    "remove_negative_resample",
-    #"remove_negative_impute",
-    "remove_absolute_mask__r2",
-    "remove_absolute_resample__r2",
-    #"remove_absolute_impute__r2"
-    "runtime",
-]
-binary_classification_metrics = [
-    "local_accuracy",
-    "consistency_guarantees",
-    "keep_positive_mask",
-    "keep_positive_resample",
-    #"keep_positive_impute",
-    "keep_negative_mask",
-    "keep_negative_resample",
-    #"keep_negative_impute",
-    "keep_absolute_mask__roc_auc",
-    "keep_absolute_resample__roc_auc",
-    #"keep_absolute_impute__roc_auc",
-    "remove_positive_mask",
-    "remove_positive_resample",
-    #"remove_positive_impute",
-    "remove_negative_mask",
-    "remove_negative_resample",
-    #"remove_negative_impute",
-    "remove_absolute_mask__roc_auc",
-    "remove_absolute_resample__roc_auc",
-    #"remove_absolute_impute__roc_auc"
-    "runtime",
-]
-human_metrics = [
-    "human_and_00",
-    "human_and_01",
-    "human_and_11",
-    "human_or_00",
-    "human_or_01",
-    "human_or_11",
-    "human_xor_00",
-    "human_xor_01",
-    "human_xor_11",
-    "human_sum_00",
-    "human_sum_01",
-    "human_sum_11"
-]
-linear_regress_methods = [
-    "linear_shap_corr",
-    "linear_shap_ind",
-    "coef",
-    "random",
-    "kernel_shap_1000_meanref",
-    #"kernel_shap_100_meanref",
-    #"sampling_shap_10000",
-    "sampling_shap_1000",
-    "lime_tabular_regression_1000"
-    #"sampling_shap_100"
-]
-linear_classify_methods = [
-    # NEED LIME
-    "linear_shap_corr",
-    "linear_shap_ind",
-    "coef",
-    "random",
-    "kernel_shap_1000_meanref",
-    #"kernel_shap_100_meanref",
-    #"sampling_shap_10000",
-    "sampling_shap_1000",
-    #"lime_tabular_regression_1000"
-    #"sampling_shap_100"
-]
-tree_regress_methods = [
-    # NEED tree_shap_ind
-    # NEED split_count?
-    "tree_shap_tree_path_dependent",
-    "tree_shap_independent_200",
-    "saabas",
-    "random",
-    "tree_gain",
-    "kernel_shap_1000_meanref",
-    "mean_abs_tree_shap",
-    #"kernel_shap_100_meanref",
-    #"sampling_shap_10000",
-    "sampling_shap_1000",
-    "lime_tabular_regression_1000",
-    "maple"
-    #"sampling_shap_100"
-]
-rf_regress_methods = [ # methods that only support random forest models
-    "tree_maple"
-]
-tree_classify_methods = [
-    # NEED tree_shap_ind
-    # NEED split_count?
-    "tree_shap_tree_path_dependent",
-    "tree_shap_independent_200",
-    "saabas",
-    "random",
-    "tree_gain",
-    "kernel_shap_1000_meanref",
-    "mean_abs_tree_shap",
-    #"kernel_shap_100_meanref",
-    #"sampling_shap_10000",
-    "sampling_shap_1000",
-    "lime_tabular_classification_1000",
-    "maple"
-    #"sampling_shap_100"
-]
-deep_regress_methods = [
-    "deep_shap",
-    "expected_gradients",
-    "random",
-    "kernel_shap_1000_meanref",
-    "sampling_shap_1000",
-    #"lime_tabular_regression_1000"
-]
-deep_classify_methods = [
-    "deep_shap",
-    "expected_gradients",
-    "random",
-    "kernel_shap_1000_meanref",
-    "sampling_shap_1000",
-    #"lime_tabular_regression_1000"
-]
-_experiments = []
-_experiments += [["corrgroups60", "lasso", m, s] for s in regression_metrics for m in linear_regress_methods]
-_experiments += [["corrgroups60", "ridge", m, s] for s in regression_metrics for m in linear_regress_methods]
-_experiments += [["corrgroups60", "decision_tree", m, s] for s in regression_metrics for m in tree_regress_methods]
-_experiments += [["corrgroups60", "random_forest", m, s] for s in regression_metrics for m in (tree_regress_methods + rf_regress_methods)]
-_experiments += [["corrgroups60", "gbm", m, s] for s in regression_metrics for m in tree_regress_methods]
-_experiments += [["corrgroups60", "ffnn", m, s] for s in regression_metrics for m in deep_regress_methods]
-_experiments += [["independentlinear60", "lasso", m, s] for s in regression_metrics for m in linear_regress_methods]
-_experiments += [["independentlinear60", "ridge", m, s] for s in regression_metrics for m in linear_regress_methods]
-_experiments += [["independentlinear60", "decision_tree", m, s] for s in regression_metrics for m in tree_regress_methods]
-_experiments += [["independentlinear60", "random_forest", m, s] for s in regression_metrics for m in (tree_regress_methods + rf_regress_methods)]
-_experiments += [["independentlinear60", "gbm", m, s] for s in regression_metrics for m in tree_regress_methods]
-_experiments += [["independentlinear60", "ffnn", m, s] for s in regression_metrics for m in deep_regress_methods]
-_experiments += [["cric", "lasso", m, s] for s in binary_classification_metrics for m in linear_classify_methods]
-_experiments += [["cric", "ridge", m, s] for s in binary_classification_metrics for m in linear_classify_methods]
-_experiments += [["cric", "decision_tree", m, s] for s in binary_classification_metrics for m in tree_classify_methods]
-_experiments += [["cric", "random_forest", m, s] for s in binary_classification_metrics for m in tree_classify_methods]
-_experiments += [["cric", "gbm", m, s] for s in binary_classification_metrics for m in tree_classify_methods]
-_experiments += [["cric", "ffnn", m, s] for s in binary_classification_metrics for m in deep_classify_methods]
-_experiments += [["human", "decision_tree", m, s] for s in human_metrics for m in tree_regress_methods]
-def experiments(dataset=None, model=None, method=None, metric=None):
-    for experiment in _experiments:
-        if dataset is not None and dataset != experiment[0]:
-            continue
-        if model is not None and model != experiment[1]:
-            continue
-        if method is not None and method != experiment[2]:
-            continue
-        if metric is not None and metric != experiment[3]:
-            continue
-        yield experiment
-def run_experiment(experiment, use_cache=True, cache_dir="/tmp"):
-    dataset_name, model_name, method_name, metric_name = experiment
-    # see if we have a cached version
-    cache_id = __gen_cache_id(experiment)
-    cache_file = os.path.join(cache_dir, cache_id + ".pickle")
-    if use_cache and os.path.isfile(cache_file):
-        with open(cache_file, "rb") as f:
-            #print(cache_id.replace("__", " ") + " ...loaded from cache.")
-            return pickle.load(f)
-    # compute the scores
-    print(cache_id.replace("__", " ", 4) + " ...")
-    sys.stdout.flush()
-    start = time.time()
-    X,y = getattr(datasets, dataset_name)()
-    score = getattr(metrics, metric_name)(
-        X, y,
-        getattr(models, dataset_name+"__"+model_name),
-        method_name
-    )
-    print("...took %f seconds.\n" % (time.time() - start))
-    # cache the scores
-    with open(cache_file, "wb") as f:
-        pickle.dump(score, f)
-    return score
-def run_experiments_helper(args):
-    experiment, cache_dir = args
-    return run_experiment(experiment, cache_dir=cache_dir)
-def run_experiments(dataset=None, model=None, method=None, metric=None, cache_dir="/tmp", nworkers=1):
-    experiments_arr = list(experiments(dataset=dataset, model=model, method=method, metric=metric))
-    if nworkers == 1:
-        out = list(map(run_experiments_helper, zip(experiments_arr, itertools.repeat(cache_dir))))
-    else:
-        with Pool(nworkers) as pool:
-            out = pool.map(run_experiments_helper, zip(experiments_arr, itertools.repeat(cache_dir)))
-    return list(zip(experiments_arr, out))
-nexperiments = 0
-total_sent = 0
-total_done = 0
-total_failed = 0
-host_records = {}
-worker_lock = Lock()
-ssh_conn_per_min_limit = 0 # set as an argument to run_remote_experiments
-def __thread_worker(q, host):
-    global total_sent, total_done
-    hostname, python_binary = host.split(":")
-    while True:
-        # make sure we are not sending too many ssh connections to the host
-        # (if we send too many connections ssh thottling will lock us out)
-        while True:
-            all_clear = False
-            worker_lock.acquire()
-            try:
-                if hostname not in host_records:
-                    host_records[hostname] = []
-                if len(host_records[hostname]) < ssh_conn_per_min_limit:
-                    all_clear = True
-                elif time.time() - host_records[hostname][-ssh_conn_per_min_limit] > 61:
-                    all_clear = True
-            finally:
-                worker_lock.release()
-            # if we are clear to send a new ssh connection then break
-            if all_clear:
-                break
-            # if we are not clear then we sleep and try again
-            time.sleep(5)
-        experiment = q.get()
-        # if we are not loading from the cache then we note that we have called the host
-        cache_dir = "/tmp"
-        cache_file = os.path.join(cache_dir, __gen_cache_id(experiment) + ".pickle")
-        if not os.path.isfile(cache_file):
-            worker_lock.acquire()
-            try:
-                host_records[hostname].append(time.time())
-            finally:
-                worker_lock.release()
-        # record how many we have sent off for execution
-        worker_lock.acquire()
-        try:
-            total_sent += 1
-            __print_status()
-        finally:
-            worker_lock.release()
-        __run_remote_experiment(experiment, hostname, cache_dir=cache_dir, python_binary=python_binary)
-        # record how many are finished
-        worker_lock.acquire()
-        try:
-            total_done += 1
-            __print_status()
-        finally:
-            worker_lock.release()
-        q.task_done()
-def __print_status():
-    print("Benchmark task %d of %d done (%d failed, %d running)" % (total_done, nexperiments, total_failed, total_sent - total_done), end="\r")
-    sys.stdout.flush()
-def run_remote_experiments(experiments, thread_hosts, rate_limit=10):
-    """ Use ssh to run the experiments on remote machines in parallel.
-    Parameters
-    ----------
-    experiments : iterable
-        Output of shap.benchmark.experiments(...).
-    thread_hosts : list of strings
-        Each host has the format "host_name:path_to_python_binary" and can appear multiple times
-        in the list (one for each parallel execution you want on that machine).
-    rate_limit : int
-        How many ssh connections we make per minute to each host (to avoid throttling issues).
-    """
-    global ssh_conn_per_min_limit
-    ssh_conn_per_min_limit = rate_limit
-    # first we kill any remaining workers from previous runs
-    # note we don't check_call because pkill kills our ssh call as well
-    thread_hosts = copy.copy(thread_hosts)
-    random.shuffle(thread_hosts)
-    for host in set(thread_hosts):
-        hostname,_ = host.split(":")
-        try:
-            subprocess.run(["ssh", hostname, "pkill -f shap.benchmark.run_experiment"], timeout=15)
-        except subprocess.TimeoutExpired:
-            print("Failed to connect to", hostname, "after 15 seconds! Exiting.")
-            return
-    experiments = copy.copy(list(experiments))
-    random.shuffle(experiments) # this way all the hard experiments don't get put on one machine
-    global nexperiments, total_sent, total_done, total_failed, host_records
-    nexperiments = len(experiments)
-    total_sent = 0
-    total_done = 0
-    total_failed = 0
-    host_records = {}
-    q = Queue()
-    for host in thread_hosts:
-        worker = Thread(target=__thread_worker, args=(q, host))
-        worker.setDaemon(True)
-        worker.start()
-    for experiment in experiments:
-        q.put(experiment)
-    q.join()
-def __run_remote_experiment(experiment, remote, cache_dir="/tmp", python_binary="python"):
-    global total_failed
-    dataset_name, model_name, method_name, metric_name = experiment
-    # see if we have a cached version
-    cache_id = __gen_cache_id(experiment)
-    cache_file = os.path.join(cache_dir, cache_id + ".pickle")
-    if os.path.isfile(cache_file):
-        with open(cache_file, "rb") as f:
-            return pickle.load(f)
-    # this is just so we don't dump everything at once on a machine
-    time.sleep(random.uniform(0,5))
-    # run the benchmark on the remote machine
-    #start = time.time()
-    cmd = "CUDA_VISIBLE_DEVICES=\"\" "+python_binary+" -c \"import shap; shap.benchmark.run_experiment(['{}', '{}', '{}', '{}'], cache_dir='{}')\" &> {}/{}.output".format(
-        dataset_name, model_name, method_name, metric_name, cache_dir, cache_dir, cache_id
-    )
-    try:
-        subprocess.check_output(["ssh", remote, cmd])
-    except subprocess.CalledProcessError as e:
-        print("The following command failed on %s:" % remote, file=sys.stderr)
-        print(cmd, file=sys.stderr)
-        total_failed += 1
-        print(e)
-        return
-    # copy the results back
-    subprocess.check_output(["scp", remote+":"+cache_file, cache_file])
-    if os.path.isfile(cache_file):
-        with open(cache_file, "rb") as f:
-            #print(cache_id.replace("__", " ") + " ...loaded from remote after %f seconds" % (time.time() - start))
-            return pickle.load(f)
-    else:
-        raise FileNotFoundError("Remote benchmark call finished but no local file was found!")
-def __gen_cache_id(experiment):
-    dataset_name, model_name, method_name, metric_name = experiment
-    return "v" + "__".join([__version__, dataset_name, model_name, method_name, metric_name])

lib/shap/benchmark/framework.py DELETED Viewed

@@ -1,113 +0,0 @@
-import itertools as it
-import matplotlib.pyplot as plt
-import numpy as np
-import pandas as pd
-from . import perturbation
-def update(model, attributions, X, y, masker, sort_order, perturbation_method, scores):
-    metric = perturbation_method + ' ' + sort_order
-    sp = perturbation.SequentialPerturbation(model, masker, sort_order, perturbation_method)
-    xs, ys, auc = sp.model_score(attributions, X, y=y)
-    scores['metrics'].append(metric)
-    scores['values'][metric] = [xs, ys, auc]
-def get_benchmark(model, attributions, X, y, masker, metrics):
-    # convert dataframes
-    if isinstance(X, (pd.Series, pd.DataFrame)):
-        X = X.values
-    if isinstance(masker, (pd.Series, pd.DataFrame)):
-        masker = masker.values
-    # record scores per metric
-    scores = {'metrics': list(), 'values': dict()}
-    for sort_order, perturbation_method in list(it.product(metrics['sort_order'], metrics['perturbation'])):
-        update(model, attributions, X, y, masker, sort_order, perturbation_method, scores)
-    return scores
-def get_metrics(benchmarks, selection):
-    # select metrics to plot using selection function
-    explainer_metrics = set()
-    for explainer in benchmarks:
-        scores = benchmarks[explainer]
-        if len(explainer_metrics) == 0:
-            explainer_metrics = set(scores['metrics'])
-        else:
-            explainer_metrics = selection(explainer_metrics, set(scores['metrics']))
-    return list(explainer_metrics)
-def trend_plot(benchmarks):
-    explainer_metrics = get_metrics(benchmarks, lambda x, y: x.union(y))
-    # plot all curves if metric exists
-    for metric in explainer_metrics:
-        plt.clf()
-        for explainer in benchmarks:
-            scores = benchmarks[explainer]
-            if metric in scores['values']:
-                x, y, auc = scores['values'][metric]
-                plt.plot(x, y, label=f'{round(auc, 3)} - {explainer}')
-        if 'keep' in metric:
-            xlabel = 'Percent Unmasked'
-        if 'remove' in metric:
-            xlabel = 'Percent Masked'
-        plt.ylabel('Model Output')
-        plt.xlabel(xlabel)
-        plt.title(metric)
-        plt.legend()
-        plt.show()
-def compare_plot(benchmarks):
-    explainer_metrics = get_metrics(benchmarks, lambda x, y: x.intersection(y))
-    explainers = list(benchmarks.keys())
-    num_explainers = len(explainers)
-    num_metrics = len(explainer_metrics)
-    # dummy start to evenly distribute explainers on the left
-    # can later be replaced by boolean metrics
-    aucs = dict()
-    for i in range(num_explainers):
-        explainer = explainers[i]
-        aucs[explainer] = [i/(num_explainers-1)]
-    # normalize per metric
-    for metric in explainer_metrics:
-        max_auc, min_auc = -float('inf'), float('inf')
-        for explainer in explainers:
-            scores = benchmarks[explainer]
-            _, _, auc = scores['values'][metric]
-            min_auc = min(auc, min_auc)
-            max_auc = max(auc, max_auc)
-        for explainer in explainers:
-            scores = benchmarks[explainer]
-            _, _, auc = scores['values'][metric]
-            aucs[explainer].append((auc-min_auc)/(max_auc-min_auc))
-    # plot common curves
-    ax = plt.gca()
-    for explainer in explainers:
-        plt.plot(np.linspace(0, 1, len(explainer_metrics)+1), aucs[explainer], '--o')
-    ax.tick_params(which='major', axis='both', labelsize=8)
-    ax.set_yticks([i/(num_explainers-1) for i in range(0, num_explainers)])
-    ax.set_yticklabels(explainers, rotation=0)
-    ax.set_xticks(np.linspace(0, 1, num_metrics+1))
-    ax.set_xticklabels([' '] + explainer_metrics, rotation=45, ha='right')
-    plt.grid(which='major', axis='x', linestyle='--')
-    plt.tight_layout()
-    plt.ylabel('Relative Performance of Each Explanation Method')
-    plt.xlabel('Evaluation Metrics')
-    plt.title('Explanation Method Performance Across Metrics')
-    plt.show()

lib/shap/benchmark/measures.py DELETED Viewed

@@ -1,424 +0,0 @@
-import warnings
-import numpy as np
-import pandas as pd
-import sklearn.utils
-from tqdm.auto import tqdm
-_remove_cache = {}
-def remove_retrain(nmask, X_train, y_train, X_test, y_test, attr_test, model_generator, metric, trained_model, random_state):
-    """ The model is retrained for each test sample with the important features set to a constant.
-    If you want to know how important a set of features is you can ask how the model would be
-    different if those features had never existed. To determine this we can mask those features
-    across the entire training and test datasets, then retrain the model. If we apply compare the
-    output of this retrained model to the original model we can see the effect produced by knowning
-    the features we masked. Since for individualized explanation methods each test sample has a
-    different set of most important features we need to retrain the model for every test sample
-    to get the change in model performance when a specified fraction of the most important features
-    are withheld.
-    """
-    warnings.warn("The retrain based measures can incorrectly evaluate models in some cases!")
-    # see if we match the last cached call
-    global _remove_cache
-    args = (X_train, y_train, X_test, y_test, model_generator, metric)
-    cache_match = False
-    if "args" in _remove_cache:
-        if all(a is b for a,b in zip(_remove_cache["args"], args)) and np.all(_remove_cache["attr_test"] == attr_test):
-            cache_match = True
-    X_train, X_test = to_array(X_train, X_test)
-    # how many features to mask
-    assert X_train.shape[1] == X_test.shape[1]
-    # this is the model we will retrain many times
-    model_masked = model_generator()
-    # mask nmask top features and re-train the model for each test explanation
-    X_train_tmp = np.zeros(X_train.shape)
-    X_test_tmp = np.zeros(X_test.shape)
-    yp_masked_test = np.zeros(y_test.shape)
-    tie_breaking_noise = const_rand(X_train.shape[1]) * 1e-6
-    last_nmask = _remove_cache.get("nmask", None)
-    last_yp_masked_test = _remove_cache.get("yp_masked_test", None)
-    for i in tqdm(range(len(y_test)), "Retraining for the 'remove' metric"):
-        if cache_match and last_nmask[i] == nmask[i]:
-            yp_masked_test[i] = last_yp_masked_test[i]
-        elif nmask[i] == 0:
-            yp_masked_test[i] = trained_model.predict(X_test[i:i+1])[0]
-        else:
-            # mask out the most important features for this test instance
-            X_train_tmp[:] = X_train
-            X_test_tmp[:] = X_test
-            ordering = np.argsort(-attr_test[i,:] + tie_breaking_noise)
-            X_train_tmp[:,ordering[:nmask[i]]] = X_train[:,ordering[:nmask[i]]].mean()
-            X_test_tmp[i,ordering[:nmask[i]]] = X_train[:,ordering[:nmask[i]]].mean()
-            # retrain the model and make a prediction
-            model_masked.fit(X_train_tmp, y_train)
-            yp_masked_test[i] = model_masked.predict(X_test_tmp[i:i+1])[0]
-    # save our results so the next call to us can be faster when there is redundancy
-    _remove_cache["nmask"] = nmask
-    _remove_cache["yp_masked_test"] = yp_masked_test
-    _remove_cache["attr_test"] = attr_test
-    _remove_cache["args"] = args
-    return metric(y_test, yp_masked_test)
-def remove_mask(nmask, X_train, y_train, X_test, y_test, attr_test, model_generator, metric, trained_model, random_state):
-    """ Each test sample is masked by setting the important features to a constant.
-    """
-    X_train, X_test = to_array(X_train, X_test)
-    # how many features to mask
-    assert X_train.shape[1] == X_test.shape[1]
-    # mask nmask top features for each test explanation
-    X_test_tmp = X_test.copy()
-    tie_breaking_noise = const_rand(X_train.shape[1], random_state) * 1e-6
-    mean_vals = X_train.mean(0)
-    for i in range(len(y_test)):
-        if nmask[i] > 0:
-            ordering = np.argsort(-attr_test[i,:] + tie_breaking_noise)
-            X_test_tmp[i,ordering[:nmask[i]]] = mean_vals[ordering[:nmask[i]]]
-    yp_masked_test = trained_model.predict(X_test_tmp)
-    return metric(y_test, yp_masked_test)
-def remove_impute(nmask, X_train, y_train, X_test, y_test, attr_test, model_generator, metric, trained_model, random_state):
-    """ The model is reevaluated for each test sample with the important features set to an imputed value.
-    Note that the imputation is done using a multivariate normality assumption on the dataset. This depends on
-    being able to estimate the full data covariance matrix (and inverse) accuractly. So X_train.shape[0] should
-    be significantly bigger than X_train.shape[1].
-    """
-    X_train, X_test = to_array(X_train, X_test)
-    # how many features to mask
-    assert X_train.shape[1] == X_test.shape[1]
-    # keep nkeep top features for each test explanation
-    C = np.cov(X_train.T)
-    C += np.eye(C.shape[0]) * 1e-6
-    X_test_tmp = X_test.copy()
-    yp_masked_test = np.zeros(y_test.shape)
-    tie_breaking_noise = const_rand(X_train.shape[1], random_state) * 1e-6
-    mean_vals = X_train.mean(0)
-    for i in range(len(y_test)):
-        if nmask[i] > 0:
-            ordering = np.argsort(-attr_test[i,:] + tie_breaking_noise)
-            observe_inds = ordering[nmask[i]:]
-            impute_inds = ordering[:nmask[i]]
-            # impute missing data assuming it follows a multivariate normal distribution
-            Coo_inv = np.linalg.inv(C[observe_inds,:][:,observe_inds])
-            Cio = C[impute_inds,:][:,observe_inds]
-            impute = mean_vals[impute_inds] + Cio @ Coo_inv @ (X_test[i, observe_inds] - mean_vals[observe_inds])
-            X_test_tmp[i, impute_inds] = impute
-    yp_masked_test = trained_model.predict(X_test_tmp)
-    return metric(y_test, yp_masked_test)
-def remove_resample(nmask, X_train, y_train, X_test, y_test, attr_test, model_generator, metric, trained_model, random_state):
-    """ The model is reevaluated for each test sample with the important features set to resample background values.
-    """
-    X_train, X_test = to_array(X_train, X_test)
-    # how many features to mask
-    assert X_train.shape[1] == X_test.shape[1]
-    # how many samples to take
-    nsamples = 100
-    # keep nkeep top features for each test explanation
-    N,M = X_test.shape
-    X_test_tmp = np.tile(X_test, [1, nsamples]).reshape(nsamples * N, M)
-    tie_breaking_noise = const_rand(M) * 1e-6
-    inds = sklearn.utils.resample(np.arange(N), n_samples=nsamples, random_state=random_state)
-    for i in range(N):
-        if nmask[i] > 0:
-            ordering = np.argsort(-attr_test[i,:] + tie_breaking_noise)
-            X_test_tmp[i*nsamples:(i+1)*nsamples, ordering[:nmask[i]]] = X_train[inds, :][:, ordering[:nmask[i]]]
-    yp_masked_test = trained_model.predict(X_test_tmp)
-    yp_masked_test = np.reshape(yp_masked_test, (N, nsamples)).mean(1) # take the mean output over all samples
-    return metric(y_test, yp_masked_test)
-def batch_remove_retrain(nmask_train, nmask_test, X_train, y_train, X_test, y_test, attr_train, attr_test, model_generator, metric):
-    """ An approximation of holdout that only retraines the model once.
-    This is also called ROAR (RemOve And Retrain) in work by Google. It is much more computationally
-    efficient that the holdout method because it masks the most important features in every sample
-    and then retrains the model once, instead of retraining the model for every test sample like
-    the holdout metric.
-    """
-    warnings.warn("The retrain based measures can incorrectly evaluate models in some cases!")
-    X_train, X_test = to_array(X_train, X_test)
-    # how many features to mask
-    assert X_train.shape[1] == X_test.shape[1]
-    # mask nmask top features for each explanation
-    X_train_tmp = X_train.copy()
-    X_train_mean = X_train.mean(0)
-    tie_breaking_noise = const_rand(X_train.shape[1]) * 1e-6
-    for i in range(len(y_train)):
-        if nmask_train[i] > 0:
-            ordering = np.argsort(-attr_train[i, :] + tie_breaking_noise)
-            X_train_tmp[i, ordering[:nmask_train[i]]] = X_train_mean[ordering[:nmask_train[i]]]
-    X_test_tmp = X_test.copy()
-    for i in range(len(y_test)):
-        if nmask_test[i] > 0:
-            ordering = np.argsort(-attr_test[i, :] + tie_breaking_noise)
-            X_test_tmp[i, ordering[:nmask_test[i]]] = X_train_mean[ordering[:nmask_test[i]]]
-    # train the model with all the given features masked
-    model_masked = model_generator()
-    model_masked.fit(X_train_tmp, y_train)
-    yp_test_masked = model_masked.predict(X_test_tmp)
-    return metric(y_test, yp_test_masked)
-_keep_cache = {}
-def keep_retrain(nkeep, X_train, y_train, X_test, y_test, attr_test, model_generator, metric, trained_model, random_state):
-    """ The model is retrained for each test sample with the non-important features set to a constant.
-    If you want to know how important a set of features is you can ask how the model would be
-    different if only those features had existed. To determine this we can mask the other features
-    across the entire training and test datasets, then retrain the model. If we apply compare the
-    output of this retrained model to the original model we can see the effect produced by only
-    knowning the important features. Since for individualized explanation methods each test sample
-    has a different set of most important features we need to retrain the model for every test sample
-    to get the change in model performance when a specified fraction of the most important features
-    are retained.
-    """
-    warnings.warn("The retrain based measures can incorrectly evaluate models in some cases!")
-    # see if we match the last cached call
-    global _keep_cache
-    args = (X_train, y_train, X_test, y_test, model_generator, metric)
-    cache_match = False
-    if "args" in _keep_cache:
-        if all(a is b for a,b in zip(_keep_cache["args"], args)) and np.all(_keep_cache["attr_test"] == attr_test):
-            cache_match = True
-    X_train, X_test = to_array(X_train, X_test)
-    # how many features to mask
-    assert X_train.shape[1] == X_test.shape[1]
-    # this is the model we will retrain many times
-    model_masked = model_generator()
-    # keep nkeep top features and re-train the model for each test explanation
-    X_train_tmp = np.zeros(X_train.shape)
-    X_test_tmp = np.zeros(X_test.shape)
-    yp_masked_test = np.zeros(y_test.shape)
-    tie_breaking_noise = const_rand(X_train.shape[1]) * 1e-6
-    last_nkeep = _keep_cache.get("nkeep", None)
-    last_yp_masked_test = _keep_cache.get("yp_masked_test", None)
-    for i in tqdm(range(len(y_test)), "Retraining for the 'keep' metric"):
-        if cache_match and last_nkeep[i] == nkeep[i]:
-            yp_masked_test[i] = last_yp_masked_test[i]
-        elif nkeep[i] == attr_test.shape[1]:
-            yp_masked_test[i] = trained_model.predict(X_test[i:i+1])[0]
-        else:
-            # mask out the most important features for this test instance
-            X_train_tmp[:] = X_train
-            X_test_tmp[:] = X_test
-            ordering = np.argsort(-attr_test[i,:] + tie_breaking_noise)
-            X_train_tmp[:,ordering[nkeep[i]:]] = X_train[:,ordering[nkeep[i]:]].mean()
-            X_test_tmp[i,ordering[nkeep[i]:]] = X_train[:,ordering[nkeep[i]:]].mean()
-            # retrain the model and make a prediction
-            model_masked.fit(X_train_tmp, y_train)
-            yp_masked_test[i] = model_masked.predict(X_test_tmp[i:i+1])[0]
-    # save our results so the next call to us can be faster when there is redundancy
-    _keep_cache["nkeep"] = nkeep
-    _keep_cache["yp_masked_test"] = yp_masked_test
-    _keep_cache["attr_test"] = attr_test
-    _keep_cache["args"] = args
-    return metric(y_test, yp_masked_test)
-def keep_mask(nkeep, X_train, y_train, X_test, y_test, attr_test, model_generator, metric, trained_model, random_state):
-    """ The model is reevaluated for each test sample with the non-important features set to their mean.
-    """
-    X_train, X_test = to_array(X_train, X_test)
-    # how many features to mask
-    assert X_train.shape[1] == X_test.shape[1]
-    # keep nkeep top features for each test explanation
-    X_test_tmp = X_test.copy()
-    yp_masked_test = np.zeros(y_test.shape)
-    tie_breaking_noise = const_rand(X_train.shape[1], random_state) * 1e-6
-    mean_vals = X_train.mean(0)
-    for i in range(len(y_test)):
-        if nkeep[i] < X_test.shape[1]:
-            ordering = np.argsort(-attr_test[i,:] + tie_breaking_noise)
-            X_test_tmp[i,ordering[nkeep[i]:]] = mean_vals[ordering[nkeep[i]:]]
-    yp_masked_test = trained_model.predict(X_test_tmp)
-    return metric(y_test, yp_masked_test)
-def keep_impute(nkeep, X_train, y_train, X_test, y_test, attr_test, model_generator, metric, trained_model, random_state):
-    """ The model is reevaluated for each test sample with the non-important features set to an imputed value.
-    Note that the imputation is done using a multivariate normality assumption on the dataset. This depends on
-    being able to estimate the full data covariance matrix (and inverse) accuractly. So X_train.shape[0] should
-    be significantly bigger than X_train.shape[1].
-    """
-    X_train, X_test = to_array(X_train, X_test)
-    # how many features to mask
-    assert X_train.shape[1] == X_test.shape[1]
-    # keep nkeep top features for each test explanation
-    C = np.cov(X_train.T)
-    C += np.eye(C.shape[0]) * 1e-6
-    X_test_tmp = X_test.copy()
-    yp_masked_test = np.zeros(y_test.shape)
-    tie_breaking_noise = const_rand(X_train.shape[1], random_state) * 1e-6
-    mean_vals = X_train.mean(0)
-    for i in range(len(y_test)):
-        if nkeep[i] < X_test.shape[1]:
-            ordering = np.argsort(-attr_test[i,:] + tie_breaking_noise)
-            observe_inds = ordering[:nkeep[i]]
-            impute_inds = ordering[nkeep[i]:]
-            # impute missing data assuming it follows a multivariate normal distribution
-            Coo_inv = np.linalg.inv(C[observe_inds,:][:,observe_inds])
-            Cio = C[impute_inds,:][:,observe_inds]
-            impute = mean_vals[impute_inds] + Cio @ Coo_inv @ (X_test[i, observe_inds] - mean_vals[observe_inds])
-            X_test_tmp[i, impute_inds] = impute
-    yp_masked_test = trained_model.predict(X_test_tmp)
-    return metric(y_test, yp_masked_test)
-def keep_resample(nkeep, X_train, y_train, X_test, y_test, attr_test, model_generator, metric, trained_model, random_state):
-    """ The model is reevaluated for each test sample with the non-important features set to resample background values.
-    """ # why broken? overwriting?
-    X_train, X_test = to_array(X_train, X_test)
-    # how many features to mask
-    assert X_train.shape[1] == X_test.shape[1]
-    # how many samples to take
-    nsamples = 100
-    # keep nkeep top features for each test explanation
-    N,M = X_test.shape
-    X_test_tmp = np.tile(X_test, [1, nsamples]).reshape(nsamples * N, M)
-    tie_breaking_noise = const_rand(M) * 1e-6
-    inds = sklearn.utils.resample(np.arange(N), n_samples=nsamples, random_state=random_state)
-    for i in range(N):
-        if nkeep[i] < M:
-            ordering = np.argsort(-attr_test[i,:] + tie_breaking_noise)
-            X_test_tmp[i*nsamples:(i+1)*nsamples, ordering[nkeep[i]:]] = X_train[inds, :][:, ordering[nkeep[i]:]]
-    yp_masked_test = trained_model.predict(X_test_tmp)
-    yp_masked_test = np.reshape(yp_masked_test, (N, nsamples)).mean(1) # take the mean output over all samples
-    return metric(y_test, yp_masked_test)
-def batch_keep_retrain(nkeep_train, nkeep_test, X_train, y_train, X_test, y_test, attr_train, attr_test, model_generator, metric):
-    """ An approximation of keep that only retraines the model once.
-    This is also called KAR (Keep And Retrain) in work by Google. It is much more computationally
-    efficient that the keep method because it masks the unimportant features in every sample
-    and then retrains the model once, instead of retraining the model for every test sample like
-    the keep metric.
-    """
-    warnings.warn("The retrain based measures can incorrectly evaluate models in some cases!")
-    X_train, X_test = to_array(X_train, X_test)
-    # how many features to mask
-    assert X_train.shape[1] == X_test.shape[1]
-    # mask nkeep top features for each explanation
-    X_train_tmp = X_train.copy()
-    X_train_mean = X_train.mean(0)
-    tie_breaking_noise = const_rand(X_train.shape[1]) * 1e-6
-    for i in range(len(y_train)):
-        if nkeep_train[i] < X_train.shape[1]:
-            ordering = np.argsort(-attr_train[i, :] + tie_breaking_noise)
-            X_train_tmp[i, ordering[nkeep_train[i]:]] = X_train_mean[ordering[nkeep_train[i]:]]
-    X_test_tmp = X_test.copy()
-    for i in range(len(y_test)):
-        if nkeep_test[i] < X_test.shape[1]:
-            ordering = np.argsort(-attr_test[i, :] + tie_breaking_noise)
-            X_test_tmp[i, ordering[nkeep_test[i]:]] = X_train_mean[ordering[nkeep_test[i]:]]
-    # train the model with all the features not given masked
-    model_masked = model_generator()
-    model_masked.fit(X_train_tmp, y_train)
-    yp_test_masked = model_masked.predict(X_test_tmp)
-    return metric(y_test, yp_test_masked)
-def local_accuracy(X_train, y_train, X_test, y_test, attr_test, model_generator, metric, trained_model):
-    """ The how well do the features plus a constant base rate sum up to the model output.
-    """
-    X_train, X_test = to_array(X_train, X_test)
-    # how many features to mask
-    assert X_train.shape[1] == X_test.shape[1]
-    # keep nkeep top features and re-train the model for each test explanation
-    yp_test = trained_model.predict(X_test)
-    return metric(yp_test, strip_list(attr_test).sum(1))
-def to_array(*args):
-    return [a.values if isinstance(a, pd.DataFrame) else a for a in args]
-def const_rand(size, seed=23980):
-    """ Generate a random array with a fixed seed.
-    """
-    old_seed = np.random.seed()
-    np.random.seed(seed)
-    out = np.random.rand(size)
-    np.random.seed(old_seed)
-    return out
-def const_shuffle(arr, seed=23980):
-    """ Shuffle an array in-place with a fixed seed.
-    """
-    old_seed = np.random.seed()
-    np.random.seed(seed)
-    np.random.shuffle(arr)
-    np.random.seed(old_seed)
-def strip_list(attrs):
-    """ This assumes that if you have a list of outputs you just want the second one (the second class is the '1' class).
-    """
-    if isinstance(attrs, list):
-        return attrs[1]
-    else:
-        return attrs

lib/shap/benchmark/methods.py DELETED Viewed

@@ -1,148 +0,0 @@
-import numpy as np
-import sklearn
-from .. import (
-    DeepExplainer,
-    GradientExplainer,
-    KernelExplainer,
-    LinearExplainer,
-    SamplingExplainer,
-    TreeExplainer,
-    kmeans,
-)
-from ..explainers import other
-from .models import KerasWrap
-def linear_shap_corr(model, data):
-    """ Linear SHAP (corr 1000)
-    """
-    return LinearExplainer(model, data, feature_dependence="correlation", nsamples=1000).shap_values
-def linear_shap_ind(model, data):
-    """ Linear SHAP (ind)
-    """
-    return LinearExplainer(model, data, feature_dependence="independent").shap_values
-def coef(model, data):
-    """ Coefficients
-    """
-    return other.CoefficentExplainer(model).attributions
-def random(model, data):
-    """ Random
-    color = #777777
-    linestyle = solid
-    """
-    return other.RandomExplainer().attributions
-def kernel_shap_1000_meanref(model, data):
-    """ Kernel SHAP 1000 mean ref.
-    color = red_blue_circle(0.5)
-    linestyle = solid
-    """
-    return lambda X: KernelExplainer(model.predict, kmeans(data, 1)).shap_values(X, nsamples=1000, l1_reg=0)
-def sampling_shap_1000(model, data):
-    """ IME 1000
-    color = red_blue_circle(0.5)
-    linestyle = dashed
-    """
-    return lambda X: SamplingExplainer(model.predict, data).shap_values(X, nsamples=1000)
-def tree_shap_tree_path_dependent(model, data):
-    """ TreeExplainer
-    color = red_blue_circle(0)
-    linestyle = solid
-    """
-    return TreeExplainer(model, feature_dependence="tree_path_dependent").shap_values
-def tree_shap_independent_200(model, data):
-    """ TreeExplainer (independent)
-    color = red_blue_circle(0)
-    linestyle = dashed
-    """
-    data_subsample = sklearn.utils.resample(data, replace=False, n_samples=min(200, data.shape[0]), random_state=0)
-    return TreeExplainer(model, data_subsample, feature_dependence="independent").shap_values
-def mean_abs_tree_shap(model, data):
-    """ mean(|TreeExplainer|)
-    color = red_blue_circle(0.25)
-    linestyle = solid
-    """
-    def f(X):
-        v = TreeExplainer(model).shap_values(X)
-        if isinstance(v, list):
-            return [np.tile(np.abs(sv).mean(0), (X.shape[0], 1)) for sv in v]
-        else:
-            return np.tile(np.abs(v).mean(0), (X.shape[0], 1))
-    return f
-def saabas(model, data):
-    """ Saabas
-    color = red_blue_circle(0)
-    linestyle = dotted
-    """
-    return lambda X: TreeExplainer(model).shap_values(X, approximate=True)
-def tree_gain(model, data):
-    """ Gain/Gini Importance
-    color = red_blue_circle(0.25)
-    linestyle = dotted
-    """
-    return other.TreeGainExplainer(model).attributions
-def lime_tabular_regression_1000(model, data):
-    """ LIME Tabular 1000
-    color = red_blue_circle(0.75)
-    """
-    return lambda X: other.LimeTabularExplainer(model.predict, data, mode="regression").attributions(X, nsamples=1000)
-def lime_tabular_classification_1000(model, data):
-    """ LIME Tabular 1000
-    color = red_blue_circle(0.75)
-    """
-    return lambda X: other.LimeTabularExplainer(model.predict_proba, data, mode="classification").attributions(X, nsamples=1000)[1]
-def maple(model, data):
-    """ MAPLE
-    color = red_blue_circle(0.6)
-    """
-    return lambda X: other.MapleExplainer(model.predict, data).attributions(X, multiply_by_input=False)
-def tree_maple(model, data):
-    """ Tree MAPLE
-    color = red_blue_circle(0.6)
-    linestyle = dashed
-    """
-    return lambda X: other.TreeMapleExplainer(model, data).attributions(X, multiply_by_input=False)
-def deep_shap(model, data):
-    """ Deep SHAP (DeepLIFT)
-    """
-    if isinstance(model, KerasWrap):
-        model = model.model
-    explainer = DeepExplainer(model, kmeans(data, 1).data)
-    def f(X):
-        phi = explainer.shap_values(X)
-        if isinstance(phi, list) and len(phi) == 1:
-            return phi[0]
-        else:
-            return phi
-    return f
-def expected_gradients(model, data):
-    """ Expected Gradients
-    """
-    if isinstance(model, KerasWrap):
-        model = model.model
-    explainer = GradientExplainer(model, data)
-    def f(X):
-        phi = explainer.shap_values(X)
-        if isinstance(phi, list) and len(phi) == 1:
-            return phi[0]
-        else:
-            return phi
-    return f

lib/shap/benchmark/metrics.py DELETED Viewed

@@ -1,824 +0,0 @@
-import hashlib
-import os
-import time
-import numpy as np
-import sklearn
-from .. import __version__
-from . import measures, methods
-try:
-    import dill as pickle
-except Exception:
-    pass
-try:
-    from sklearn.model_selection import train_test_split
-except Exception:
-    from sklearn.cross_validation import train_test_split
-def runtime(X, y, model_generator, method_name):
-    """ Runtime (sec / 1k samples)
-    transform = "negate_log"
-    sort_order = 2
-    """
-    old_seed = np.random.seed()
-    np.random.seed(3293)
-    # average the method scores over several train/test splits
-    method_reps = []
-    for i in range(3):
-        X_train, X_test, y_train, _ = train_test_split(__toarray(X), y, test_size=100, random_state=i)
-        # define the model we are going to explain
-        model = model_generator()
-        model.fit(X_train, y_train)
-        # evaluate each method
-        start = time.time()
-        explainer = getattr(methods, method_name)(model, X_train)
-        build_time = time.time() - start
-        start = time.time()
-        explainer(X_test)
-        explain_time = time.time() - start
-        # we always normalize the explain time as though we were explaining 1000 samples
-        # even if to reduce the runtime of the benchmark we do less (like just 100)
-        method_reps.append(build_time + explain_time * 1000.0 / X_test.shape[0])
-    np.random.seed(old_seed)
-    return None, np.mean(method_reps)
-def local_accuracy(X, y, model_generator, method_name):
-    """ Local Accuracy
-    transform = "identity"
-    sort_order = 0
-    """
-    def score_map(true, pred):
-        """ Computes local accuracy as the normalized standard deviation of numerical scores.
-        """
-        return np.std(pred - true) / (np.std(true) + 1e-6)
-    def score_function(X_train, X_test, y_train, y_test, attr_function, trained_model, random_state):
-        return measures.local_accuracy(
-            X_train, y_train, X_test, y_test, attr_function(X_test),
-            model_generator, score_map, trained_model
-        )
-    return None, __score_method(X, y, None, model_generator, score_function, method_name)
-def consistency_guarantees(X, y, model_generator, method_name):
-    """ Consistency Guarantees
-    transform = "identity"
-    sort_order = 1
-    """
-    # 1.0 - perfect consistency
-    # 0.8 - guarantees depend on sampling
-    # 0.6 - guarantees depend on approximation
-    # 0.0 - no garuntees
-    guarantees = {
-        "linear_shap_corr": 1.0,
-        "linear_shap_ind": 1.0,
-        "coef": 0.0,
-        "kernel_shap_1000_meanref": 0.8,
-        "sampling_shap_1000": 0.8,
-        "random": 0.0,
-        "saabas": 0.0,
-        "tree_gain": 0.0,
-        "tree_shap_tree_path_dependent": 1.0,
-        "tree_shap_independent_200": 1.0,
-        "mean_abs_tree_shap": 1.0,
-        "lime_tabular_regression_1000": 0.8,
-        "lime_tabular_classification_1000": 0.8,
-        "maple": 0.8,
-        "tree_maple": 0.8,
-        "deep_shap": 0.6,
-        "expected_gradients": 0.6
-    }
-    return None, guarantees[method_name]
-def __mean_pred(true, pred):
-    """ A trivial metric that is just is the output of the model.
-    """
-    return np.mean(pred)
-def keep_positive_mask(X, y, model_generator, method_name, num_fcounts=11):
-    """ Keep Positive (mask)
-    xlabel = "Max fraction of features kept"
-    ylabel = "Mean model output"
-    transform = "identity"
-    sort_order = 4
-    """
-    return __run_measure(measures.keep_mask, X, y, model_generator, method_name, 1, num_fcounts, __mean_pred)
-def keep_negative_mask(X, y, model_generator, method_name, num_fcounts=11):
-    """ Keep Negative (mask)
-    xlabel = "Max fraction of features kept"
-    ylabel = "Negative mean model output"
-    transform = "negate"
-    sort_order = 5
-    """
-    return __run_measure(measures.keep_mask, X, y, model_generator, method_name, -1, num_fcounts, __mean_pred)
-def keep_absolute_mask__r2(X, y, model_generator, method_name, num_fcounts=11):
-    """ Keep Absolute (mask)
-    xlabel = "Max fraction of features kept"
-    ylabel = "R^2"
-    transform = "identity"
-    sort_order = 6
-    """
-    return __run_measure(measures.keep_mask, X, y, model_generator, method_name, 0, num_fcounts, sklearn.metrics.r2_score)
-def keep_absolute_mask__roc_auc(X, y, model_generator, method_name, num_fcounts=11):
-    """ Keep Absolute (mask)
-    xlabel = "Max fraction of features kept"
-    ylabel = "ROC AUC"
-    transform = "identity"
-    sort_order = 6
-    """
-    return __run_measure(measures.keep_mask, X, y, model_generator, method_name, 0, num_fcounts, sklearn.metrics.roc_auc_score)
-def remove_positive_mask(X, y, model_generator, method_name, num_fcounts=11):
-    """ Remove Positive (mask)
-    xlabel = "Max fraction of features removed"
-    ylabel = "Negative mean model output"
-    transform = "negate"
-    sort_order = 7
-    """
-    return __run_measure(measures.remove_mask, X, y, model_generator, method_name, 1, num_fcounts, __mean_pred)
-def remove_negative_mask(X, y, model_generator, method_name, num_fcounts=11):
-    """ Remove Negative (mask)
-    xlabel = "Max fraction of features removed"
-    ylabel = "Mean model output"
-    transform = "identity"
-    sort_order = 8
-    """
-    return __run_measure(measures.remove_mask, X, y, model_generator, method_name, -1, num_fcounts, __mean_pred)
-def remove_absolute_mask__r2(X, y, model_generator, method_name, num_fcounts=11):
-    """ Remove Absolute (mask)
-    xlabel = "Max fraction of features removed"
-    ylabel = "1 - R^2"
-    transform = "one_minus"
-    sort_order = 9
-    """
-    return __run_measure(measures.remove_mask, X, y, model_generator, method_name, 0, num_fcounts, sklearn.metrics.r2_score)
-def remove_absolute_mask__roc_auc(X, y, model_generator, method_name, num_fcounts=11):
-    """ Remove Absolute (mask)
-    xlabel = "Max fraction of features removed"
-    ylabel = "1 - ROC AUC"
-    transform = "one_minus"
-    sort_order = 9
-    """
-    return __run_measure(measures.remove_mask, X, y, model_generator, method_name, 0, num_fcounts, sklearn.metrics.roc_auc_score)
-def keep_positive_resample(X, y, model_generator, method_name, num_fcounts=11):
-    """ Keep Positive (resample)
-    xlabel = "Max fraction of features kept"
-    ylabel = "Mean model output"
-    transform = "identity"
-    sort_order = 10
-    """
-    return __run_measure(measures.keep_resample, X, y, model_generator, method_name, 1, num_fcounts, __mean_pred)
-def keep_negative_resample(X, y, model_generator, method_name, num_fcounts=11):
-    """ Keep Negative (resample)
-    xlabel = "Max fraction of features kept"
-    ylabel = "Negative mean model output"
-    transform = "negate"
-    sort_order = 11
-    """
-    return __run_measure(measures.keep_resample, X, y, model_generator, method_name, -1, num_fcounts, __mean_pred)
-def keep_absolute_resample__r2(X, y, model_generator, method_name, num_fcounts=11):
-    """ Keep Absolute (resample)
-    xlabel = "Max fraction of features kept"
-    ylabel = "R^2"
-    transform = "identity"
-    sort_order = 12
-    """
-    return __run_measure(measures.keep_resample, X, y, model_generator, method_name, 0, num_fcounts, sklearn.metrics.r2_score)
-def keep_absolute_resample__roc_auc(X, y, model_generator, method_name, num_fcounts=11):
-    """ Keep Absolute (resample)
-    xlabel = "Max fraction of features kept"
-    ylabel = "ROC AUC"
-    transform = "identity"
-    sort_order = 12
-    """
-    return __run_measure(measures.keep_resample, X, y, model_generator, method_name, 0, num_fcounts, sklearn.metrics.roc_auc_score)
-def remove_positive_resample(X, y, model_generator, method_name, num_fcounts=11):
-    """ Remove Positive (resample)
-    xlabel = "Max fraction of features removed"
-    ylabel = "Negative mean model output"
-    transform = "negate"
-    sort_order = 13
-    """
-    return __run_measure(measures.remove_resample, X, y, model_generator, method_name, 1, num_fcounts, __mean_pred)
-def remove_negative_resample(X, y, model_generator, method_name, num_fcounts=11):
-    """ Remove Negative (resample)
-    xlabel = "Max fraction of features removed"
-    ylabel = "Mean model output"
-    transform = "identity"
-    sort_order = 14
-    """
-    return __run_measure(measures.remove_resample, X, y, model_generator, method_name, -1, num_fcounts, __mean_pred)
-def remove_absolute_resample__r2(X, y, model_generator, method_name, num_fcounts=11):
-    """ Remove Absolute (resample)
-    xlabel = "Max fraction of features removed"
-    ylabel = "1 - R^2"
-    transform = "one_minus"
-    sort_order = 15
-    """
-    return __run_measure(measures.remove_resample, X, y, model_generator, method_name, 0, num_fcounts, sklearn.metrics.r2_score)
-def remove_absolute_resample__roc_auc(X, y, model_generator, method_name, num_fcounts=11):
-    """ Remove Absolute (resample)
-    xlabel = "Max fraction of features removed"
-    ylabel = "1 - ROC AUC"
-    transform = "one_minus"
-    sort_order = 15
-    """
-    return __run_measure(measures.remove_resample, X, y, model_generator, method_name, 0, num_fcounts, sklearn.metrics.roc_auc_score)
-def keep_positive_impute(X, y, model_generator, method_name, num_fcounts=11):
-    """ Keep Positive (impute)
-    xlabel = "Max fraction of features kept"
-    ylabel = "Mean model output"
-    transform = "identity"
-    sort_order = 16
-    """
-    return __run_measure(measures.keep_impute, X, y, model_generator, method_name, 1, num_fcounts, __mean_pred)
-def keep_negative_impute(X, y, model_generator, method_name, num_fcounts=11):
-    """ Keep Negative (impute)
-    xlabel = "Max fraction of features kept"
-    ylabel = "Negative mean model output"
-    transform = "negate"
-    sort_order = 17
-    """
-    return __run_measure(measures.keep_impute, X, y, model_generator, method_name, -1, num_fcounts, __mean_pred)
-def keep_absolute_impute__r2(X, y, model_generator, method_name, num_fcounts=11):
-    """ Keep Absolute (impute)
-    xlabel = "Max fraction of features kept"
-    ylabel = "R^2"
-    transform = "identity"
-    sort_order = 18
-    """
-    return __run_measure(measures.keep_impute, X, y, model_generator, method_name, 0, num_fcounts, sklearn.metrics.r2_score)
-def keep_absolute_impute__roc_auc(X, y, model_generator, method_name, num_fcounts=11):
-    """ Keep Absolute (impute)
-    xlabel = "Max fraction of features kept"
-    ylabel = "ROC AUC"
-    transform = "identity"
-    sort_order = 19
-    """
-    return __run_measure(measures.keep_mask, X, y, model_generator, method_name, 0, num_fcounts, sklearn.metrics.roc_auc_score)
-def remove_positive_impute(X, y, model_generator, method_name, num_fcounts=11):
-    """ Remove Positive (impute)
-    xlabel = "Max fraction of features removed"
-    ylabel = "Negative mean model output"
-    transform = "negate"
-    sort_order = 7
-    """
-    return __run_measure(measures.remove_impute, X, y, model_generator, method_name, 1, num_fcounts, __mean_pred)
-def remove_negative_impute(X, y, model_generator, method_name, num_fcounts=11):
-    """ Remove Negative (impute)
-    xlabel = "Max fraction of features removed"
-    ylabel = "Mean model output"
-    transform = "identity"
-    sort_order = 8
-    """
-    return __run_measure(measures.remove_impute, X, y, model_generator, method_name, -1, num_fcounts, __mean_pred)
-def remove_absolute_impute__r2(X, y, model_generator, method_name, num_fcounts=11):
-    """ Remove Absolute (impute)
-    xlabel = "Max fraction of features removed"
-    ylabel = "1 - R^2"
-    transform = "one_minus"
-    sort_order = 9
-    """
-    return __run_measure(measures.remove_impute, X, y, model_generator, method_name, 0, num_fcounts, sklearn.metrics.r2_score)
-def remove_absolute_impute__roc_auc(X, y, model_generator, method_name, num_fcounts=11):
-    """ Remove Absolute (impute)
-    xlabel = "Max fraction of features removed"
-    ylabel = "1 - ROC AUC"
-    transform = "one_minus"
-    sort_order = 9
-    """
-    return __run_measure(measures.remove_mask, X, y, model_generator, method_name, 0, num_fcounts, sklearn.metrics.roc_auc_score)
-def keep_positive_retrain(X, y, model_generator, method_name, num_fcounts=11):
-    """ Keep Positive (retrain)
-    xlabel = "Max fraction of features kept"
-    ylabel = "Mean model output"
-    transform = "identity"
-    sort_order = 6
-    """
-    return __run_measure(measures.keep_retrain, X, y, model_generator, method_name, 1, num_fcounts, __mean_pred)
-def keep_negative_retrain(X, y, model_generator, method_name, num_fcounts=11):
-    """ Keep Negative (retrain)
-    xlabel = "Max fraction of features kept"
-    ylabel = "Negative mean model output"
-    transform = "negate"
-    sort_order = 7
-    """
-    return __run_measure(measures.keep_retrain, X, y, model_generator, method_name, -1, num_fcounts, __mean_pred)
-def remove_positive_retrain(X, y, model_generator, method_name, num_fcounts=11):
-    """ Remove Positive (retrain)
-    xlabel = "Max fraction of features removed"
-    ylabel = "Negative mean model output"
-    transform = "negate"
-    sort_order = 11
-    """
-    return __run_measure(measures.remove_retrain, X, y, model_generator, method_name, 1, num_fcounts, __mean_pred)
-def remove_negative_retrain(X, y, model_generator, method_name, num_fcounts=11):
-    """ Remove Negative (retrain)
-    xlabel = "Max fraction of features removed"
-    ylabel = "Mean model output"
-    transform = "identity"
-    sort_order = 12
-    """
-    return __run_measure(measures.remove_retrain, X, y, model_generator, method_name, -1, num_fcounts, __mean_pred)
-def __run_measure(measure, X, y, model_generator, method_name, attribution_sign, num_fcounts, summary_function):
-    def score_function(fcount, X_train, X_test, y_train, y_test, attr_function, trained_model, random_state):
-        if attribution_sign == 0:
-            A = np.abs(__strip_list(attr_function(X_test)))
-        else:
-            A = attribution_sign * __strip_list(attr_function(X_test))
-        nmask = np.ones(len(y_test)) * fcount
-        nmask = np.minimum(nmask, np.array(A >= 0).sum(1)).astype(int)
-        return measure(
-            nmask, X_train, y_train, X_test, y_test, A,
-            model_generator, summary_function, trained_model, random_state
-        )
-    fcounts = __intlogspace(0, X.shape[1], num_fcounts)
-    return fcounts, __score_method(X, y, fcounts, model_generator, score_function, method_name)
-def batch_remove_absolute_retrain__r2(X, y, model_generator, method_name, num_fcounts=11):
-    """ Batch Remove Absolute (retrain)
-    xlabel = "Fraction of features removed"
-    ylabel = "1 - R^2"
-    transform = "one_minus"
-    sort_order = 13
-    """
-    return __run_batch_abs_metric(measures.batch_remove_retrain, X, y, model_generator, method_name, sklearn.metrics.r2_score, num_fcounts)
-def batch_keep_absolute_retrain__r2(X, y, model_generator, method_name, num_fcounts=11):
-    """ Batch Keep Absolute (retrain)
-    xlabel = "Fraction of features kept"
-    ylabel = "R^2"
-    transform = "identity"
-    sort_order = 13
-    """
-    return __run_batch_abs_metric(measures.batch_keep_retrain, X, y, model_generator, method_name, sklearn.metrics.r2_score, num_fcounts)
-def batch_remove_absolute_retrain__roc_auc(X, y, model_generator, method_name, num_fcounts=11):
-    """ Batch Remove Absolute (retrain)
-    xlabel = "Fraction of features removed"
-    ylabel = "1 - ROC AUC"
-    transform = "one_minus"
-    sort_order = 13
-    """
-    return __run_batch_abs_metric(measures.batch_remove_retrain, X, y, model_generator, method_name, sklearn.metrics.roc_auc_score, num_fcounts)
-def batch_keep_absolute_retrain__roc_auc(X, y, model_generator, method_name, num_fcounts=11):
-    """ Batch Keep Absolute (retrain)
-    xlabel = "Fraction of features kept"
-    ylabel = "ROC AUC"
-    transform = "identity"
-    sort_order = 13
-    """
-    return __run_batch_abs_metric(measures.batch_keep_retrain, X, y, model_generator, method_name, sklearn.metrics.roc_auc_score, num_fcounts)
-def __run_batch_abs_metric(metric, X, y, model_generator, method_name, loss, num_fcounts):
-    def score_function(fcount, X_train, X_test, y_train, y_test, attr_function, trained_model):
-        A_train = np.abs(__strip_list(attr_function(X_train)))
-        nkeep_train = (np.ones(len(y_train)) * fcount).astype(int)
-        #nkeep_train = np.minimum(nkeep_train, np.array(A_train > 0).sum(1)).astype(int)
-        A_test = np.abs(__strip_list(attr_function(X_test)))
-        nkeep_test = (np.ones(len(y_test)) * fcount).astype(int)
-        #nkeep_test = np.minimum(nkeep_test, np.array(A_test >= 0).sum(1)).astype(int)
-        return metric(
-            nkeep_train, nkeep_test, X_train, y_train, X_test, y_test, A_train, A_test,
-            model_generator, loss
-        )
-    fcounts = __intlogspace(0, X.shape[1], num_fcounts)
-    return fcounts, __score_method(X, y, fcounts, model_generator, score_function, method_name)
-_attribution_cache = {}
-def __score_method(X, y, fcounts, model_generator, score_function, method_name, nreps=10, test_size=100, cache_dir="/tmp"):
-    """ Test an explanation method.
-    """
-    try:
-        pickle
-    except NameError:
-        raise ImportError("The 'dill' package could not be loaded and is needed for the benchmark!")
-    old_seed = np.random.seed()
-    np.random.seed(3293)
-    # average the method scores over several train/test splits
-    method_reps = []
-    data_hash = hashlib.sha256(__toarray(X).flatten()).hexdigest() + hashlib.sha256(__toarray(y)).hexdigest()
-    for i in range(nreps):
-        X_train, X_test, y_train, y_test = train_test_split(__toarray(X), y, test_size=test_size, random_state=i)
-        # define the model we are going to explain, caching so we onlu build it once
-        model_id = "model_cache__v" + "__".join([__version__, data_hash, model_generator.__name__])+".pickle"
-        cache_file = os.path.join(cache_dir, model_id + ".pickle")
-        if os.path.isfile(cache_file):
-            with open(cache_file, "rb") as f:
-                model = pickle.load(f)
-        else:
-            model = model_generator()
-            model.fit(X_train, y_train)
-            with open(cache_file, "wb") as f:
-                pickle.dump(model, f)
-        attr_key = "_".join([model_generator.__name__, method_name, str(test_size), str(nreps), str(i), data_hash])
-        def score(attr_function):
-            def cached_attr_function(X_inner):
-                if attr_key not in _attribution_cache:
-                    _attribution_cache[attr_key] = attr_function(X_inner)
-                return _attribution_cache[attr_key]
-            #cached_attr_function = lambda X: __check_cache(attr_function, X)
-            if fcounts is None:
-                return score_function(X_train, X_test, y_train, y_test, cached_attr_function, model, i)
-            else:
-                scores = []
-                for f in fcounts:
-                    scores.append(score_function(f, X_train, X_test, y_train, y_test, cached_attr_function, model, i))
-                return np.array(scores)
-        # evaluate the method (only building the attribution function if we need to)
-        if attr_key not in _attribution_cache:
-            method_reps.append(score(getattr(methods, method_name)(model, X_train)))
-        else:
-            method_reps.append(score(None))
-    np.random.seed(old_seed)
-    return np.array(method_reps).mean(0)
-# used to memoize explainer functions so we don't waste time re-explaining the same object
-__cache0 = None
-__cache_X0 = None
-__cache_f0 = None
-__cache1 = None
-__cache_X1 = None
-__cache_f1 = None
-def __check_cache(f, X):
-    global __cache0, __cache_X0, __cache_f0
-    global __cache1, __cache_X1, __cache_f1
-    if X is __cache_X0 and f is __cache_f0:
-        return __cache0
-    elif X is __cache_X1 and f is __cache_f1:
-        return __cache1
-    else:
-        __cache_f1 = __cache_f0
-        __cache_X1 = __cache_X0
-        __cache1 = __cache0
-        __cache_f0 = f
-        __cache_X0 = X
-        __cache0 = f(X)
-        return __cache0
-def __intlogspace(start, end, count):
-    return np.unique(np.round(start + (end-start) * (np.logspace(0, 1, count, endpoint=True) - 1) / 9).astype(int))
-def __toarray(X):
-    """ Converts DataFrames to numpy arrays.
-    """
-    if hasattr(X, "values"):
-        X = X.values
-    return X
-def __strip_list(attrs):
-    """ This assumes that if you have a list of outputs you just want the second one (the second class).
-    """
-    if isinstance(attrs, list):
-        return attrs[1]
-    else:
-        return attrs
-def _fit_human(model_generator, val00, val01, val11):
-    # force the model to fit a function with almost entirely zero background
-    N = 1000000
-    M = 3
-    X = np.zeros((N,M))
-    X.shape
-    y = np.ones(N) * val00
-    X[0:1000, 0] = 1
-    y[0:1000] = val01
-    for i in range(0,1000000,1000):
-        X[i, 1] = 1
-        y[i] = val01
-    y[0] = val11
-    model = model_generator()
-    model.fit(X, y)
-    return model
-def _human_and(X, model_generator, method_name, fever, cough):
-    assert np.abs(X).max() == 0, "Human agreement metrics are only for use with the human_agreement dataset!"
-    # these are from the sickness_score mturk user study experiment
-    X_test = np.zeros((100,3))
-    if not fever and not cough:
-        human_consensus = np.array([0., 0., 0.])
-        X_test[0,:] = np.array([[0., 0., 1.]])
-    elif not fever and cough:
-        human_consensus = np.array([0., 2., 0.])
-        X_test[0,:] = np.array([[0., 1., 1.]])
-    elif fever and cough:
-        human_consensus = np.array([5., 5., 0.])
-        X_test[0,:] = np.array([[1., 1., 1.]])
-    # force the model to fit an XOR function with almost entirely zero background
-    model = _fit_human(model_generator, 0, 2, 10)
-    attr_function = getattr(methods, method_name)(model, X)
-    methods_attrs = attr_function(X_test)
-    return "human", (human_consensus, methods_attrs[0,:])
-def human_and_00(X, y, model_generator, method_name):
-    """ AND (false/false)
-    This tests how well a feature attribution method agrees with human intuition
-    for an AND operation combined with linear effects. This metric deals
-    specifically with the question of credit allocation for the following function
-    when all three inputs are true:
-    if fever: +2 points
-    if cough: +2 points
-    if fever and cough: +6 points
-    transform = "identity"
-    sort_order = 0
-    """
-    return _human_and(X, model_generator, method_name, False, False)
-def human_and_01(X, y, model_generator, method_name):
-    """ AND (false/true)
-    This tests how well a feature attribution method agrees with human intuition
-    for an AND operation combined with linear effects. This metric deals
-    specifically with the question of credit allocation for the following function
-    when all three inputs are true:
-    if fever: +2 points
-    if cough: +2 points
-    if fever and cough: +6 points
-    transform = "identity"
-    sort_order = 1
-    """
-    return _human_and(X, model_generator, method_name, False, True)
-def human_and_11(X, y, model_generator, method_name):
-    """ AND (true/true)
-    This tests how well a feature attribution method agrees with human intuition
-    for an AND operation combined with linear effects. This metric deals
-    specifically with the question of credit allocation for the following function
-    when all three inputs are true:
-    if fever: +2 points
-    if cough: +2 points
-    if fever and cough: +6 points
-    transform = "identity"
-    sort_order = 2
-    """
-    return _human_and(X, model_generator, method_name, True, True)
-def _human_or(X, model_generator, method_name, fever, cough):
-    assert np.abs(X).max() == 0, "Human agreement metrics are only for use with the human_agreement dataset!"
-    # these are from the sickness_score mturk user study experiment
-    X_test = np.zeros((100,3))
-    if not fever and not cough:
-        human_consensus = np.array([0., 0., 0.])
-        X_test[0,:] = np.array([[0., 0., 1.]])
-    elif not fever and cough:
-        human_consensus = np.array([0., 8., 0.])
-        X_test[0,:] = np.array([[0., 1., 1.]])
-    elif fever and cough:
-        human_consensus = np.array([5., 5., 0.])
-        X_test[0,:] = np.array([[1., 1., 1.]])
-    # force the model to fit an XOR function with almost entirely zero background
-    model = _fit_human(model_generator, 0, 8, 10)
-    attr_function = getattr(methods, method_name)(model, X)
-    methods_attrs = attr_function(X_test)
-    return "human", (human_consensus, methods_attrs[0,:])
-def human_or_00(X, y, model_generator, method_name):
-    """ OR (false/false)
-    This tests how well a feature attribution method agrees with human intuition
-    for an OR operation combined with linear effects. This metric deals
-    specifically with the question of credit allocation for the following function
-    when all three inputs are true:
-    if fever: +2 points
-    if cough: +2 points
-    if fever or cough: +6 points
-    transform = "identity"
-    sort_order = 0
-    """
-    return _human_or(X, model_generator, method_name, False, False)
-def human_or_01(X, y, model_generator, method_name):
-    """ OR (false/true)
-    This tests how well a feature attribution method agrees with human intuition
-    for an OR operation combined with linear effects. This metric deals
-    specifically with the question of credit allocation for the following function
-    when all three inputs are true:
-    if fever: +2 points
-    if cough: +2 points
-    if fever or cough: +6 points
-    transform = "identity"
-    sort_order = 1
-    """
-    return _human_or(X, model_generator, method_name, False, True)
-def human_or_11(X, y, model_generator, method_name):
-    """ OR (true/true)
-    This tests how well a feature attribution method agrees with human intuition
-    for an OR operation combined with linear effects. This metric deals
-    specifically with the question of credit allocation for the following function
-    when all three inputs are true:
-    if fever: +2 points
-    if cough: +2 points
-    if fever or cough: +6 points
-    transform = "identity"
-    sort_order = 2
-    """
-    return _human_or(X, model_generator, method_name, True, True)
-def _human_xor(X, model_generator, method_name, fever, cough):
-    assert np.abs(X).max() == 0, "Human agreement metrics are only for use with the human_agreement dataset!"
-    # these are from the sickness_score mturk user study experiment
-    X_test = np.zeros((100,3))
-    if not fever and not cough:
-        human_consensus = np.array([0., 0., 0.])
-        X_test[0,:] = np.array([[0., 0., 1.]])
-    elif not fever and cough:
-        human_consensus = np.array([0., 8., 0.])
-        X_test[0,:] = np.array([[0., 1., 1.]])
-    elif fever and cough:
-        human_consensus = np.array([2., 2., 0.])
-        X_test[0,:] = np.array([[1., 1., 1.]])
-    # force the model to fit an XOR function with almost entirely zero background
-    model = _fit_human(model_generator, 0, 8, 4)
-    attr_function = getattr(methods, method_name)(model, X)
-    methods_attrs = attr_function(X_test)
-    return "human", (human_consensus, methods_attrs[0,:])
-def human_xor_00(X, y, model_generator, method_name):
-    """ XOR (false/false)
-    This tests how well a feature attribution method agrees with human intuition
-    for an eXclusive OR operation combined with linear effects. This metric deals
-    specifically with the question of credit allocation for the following function
-    when all three inputs are true:
-    if fever: +2 points
-    if cough: +2 points
-    if fever or cough but not both: +6 points
-    transform = "identity"
-    sort_order = 3
-    """
-    return _human_xor(X, model_generator, method_name, False, False)
-def human_xor_01(X, y, model_generator, method_name):
-    """ XOR (false/true)
-    This tests how well a feature attribution method agrees with human intuition
-    for an eXclusive OR operation combined with linear effects. This metric deals
-    specifically with the question of credit allocation for the following function
-    when all three inputs are true:
-    if fever: +2 points
-    if cough: +2 points
-    if fever or cough but not both: +6 points
-    transform = "identity"
-    sort_order = 4
-    """
-    return _human_xor(X, model_generator, method_name, False, True)
-def human_xor_11(X, y, model_generator, method_name):
-    """ XOR (true/true)
-    This tests how well a feature attribution method agrees with human intuition
-    for an eXclusive OR operation combined with linear effects. This metric deals
-    specifically with the question of credit allocation for the following function
-    when all three inputs are true:
-    if fever: +2 points
-    if cough: +2 points
-    if fever or cough but not both: +6 points
-    transform = "identity"
-    sort_order = 5
-    """
-    return _human_xor(X, model_generator, method_name, True, True)
-def _human_sum(X, model_generator, method_name, fever, cough):
-    assert np.abs(X).max() == 0, "Human agreement metrics are only for use with the human_agreement dataset!"
-    # these are from the sickness_score mturk user study experiment
-    X_test = np.zeros((100,3))
-    if not fever and not cough:
-        human_consensus = np.array([0., 0., 0.])
-        X_test[0,:] = np.array([[0., 0., 1.]])
-    elif not fever and cough:
-        human_consensus = np.array([0., 2., 0.])
-        X_test[0,:] = np.array([[0., 1., 1.]])
-    elif fever and cough:
-        human_consensus = np.array([2., 2., 0.])
-        X_test[0,:] = np.array([[1., 1., 1.]])
-    # force the model to fit an XOR function with almost entirely zero background
-    model = _fit_human(model_generator, 0, 2, 4)
-    attr_function = getattr(methods, method_name)(model, X)
-    methods_attrs = attr_function(X_test)
-    return "human", (human_consensus, methods_attrs[0,:])
-def human_sum_00(X, y, model_generator, method_name):
-    """ SUM (false/false)
-    This tests how well a feature attribution method agrees with human intuition
-    for a SUM operation. This metric deals
-    specifically with the question of credit allocation for the following function
-    when all three inputs are true:
-    if fever: +2 points
-    if cough: +2 points
-    transform = "identity"
-    sort_order = 0
-    """
-    return _human_sum(X, model_generator, method_name, False, False)
-def human_sum_01(X, y, model_generator, method_name):
-    """ SUM (false/true)
-    This tests how well a feature attribution method agrees with human intuition
-    for a SUM operation. This metric deals
-    specifically with the question of credit allocation for the following function
-    when all three inputs are true:
-    if fever: +2 points
-    if cough: +2 points
-    transform = "identity"
-    sort_order = 1
-    """
-    return _human_sum(X, model_generator, method_name, False, True)
-def human_sum_11(X, y, model_generator, method_name):
-    """ SUM (true/true)
-    This tests how well a feature attribution method agrees with human intuition
-    for a SUM operation. This metric deals
-    specifically with the question of credit allocation for the following function
-    when all three inputs are true:
-    if fever: +2 points
-    if cough: +2 points
-    transform = "identity"
-    sort_order = 2
-    """
-    return _human_sum(X, model_generator, method_name, True, True)

lib/shap/benchmark/models.py DELETED Viewed

@@ -1,230 +0,0 @@
-import numpy as np
-import sklearn
-import sklearn.ensemble
-from sklearn.preprocessing import StandardScaler
-class KerasWrap:
-    """ A wrapper that allows us to set parameters in the constructor and do a reset before fitting.
-    """
-    def __init__(self, model, epochs, flatten_output=False):
-        self.model = model
-        self.epochs = epochs
-        self.flatten_output = flatten_output
-        self.init_weights = None
-        self.scaler = StandardScaler()
-    def fit(self, X, y, verbose=0):
-        if self.init_weights is None:
-            self.init_weights = self.model.get_weights()
-        else:
-            self.model.set_weights(self.init_weights)
-        self.scaler.fit(X)
-        return self.model.fit(X, y, epochs=self.epochs, verbose=verbose)
-    def predict(self, X):
-        X = self.scaler.transform(X)
-        if self.flatten_output:
-            return self.model.predict(X).flatten()
-        else:
-            return self.model.predict(X)
-# This models are all tuned for the corrgroups60 dataset
-def corrgroups60__lasso():
-    """ Lasso Regression
-    """
-    return sklearn.linear_model.Lasso(alpha=0.1)
-def corrgroups60__ridge():
-    """ Ridge Regression
-    """
-    return sklearn.linear_model.Ridge(alpha=1.0)
-def corrgroups60__decision_tree():
-    """ Decision Tree
-    """
-    # max_depth was chosen to minimise test error
-    return sklearn.tree.DecisionTreeRegressor(random_state=0, max_depth=6)
-def corrgroups60__random_forest():
-    """ Random Forest
-    """
-    return sklearn.ensemble.RandomForestRegressor(100, random_state=0)
-def corrgroups60__gbm():
-    """ Gradient Boosted Trees
-    """
-    import xgboost
-    # max_depth and learning_rate were fixed then n_estimators was chosen using a train/test split
-    return xgboost.XGBRegressor(max_depth=6, n_estimators=50, learning_rate=0.1, n_jobs=8, random_state=0)
-def corrgroups60__ffnn():
-    """ 4-Layer Neural Network
-    """
-    from tensorflow.keras.layers import Dense
-    from tensorflow.keras.models import Sequential
-    model = Sequential()
-    model.add(Dense(32, activation='relu', input_dim=60))
-    model.add(Dense(20, activation='relu'))
-    model.add(Dense(20, activation='relu'))
-    model.add(Dense(1))
-    model.compile(optimizer='adam',
-                loss='mean_squared_error',
-                metrics=['mean_squared_error'])
-    return KerasWrap(model, 30, flatten_output=True)
-def independentlinear60__lasso():
-    """ Lasso Regression
-    """
-    return sklearn.linear_model.Lasso(alpha=0.1)
-def independentlinear60__ridge():
-    """ Ridge Regression
-    """
-    return sklearn.linear_model.Ridge(alpha=1.0)
-def independentlinear60__decision_tree():
-    """ Decision Tree
-    """
-    # max_depth was chosen to minimise test error
-    return sklearn.tree.DecisionTreeRegressor(random_state=0, max_depth=4)
-def independentlinear60__random_forest():
-    """ Random Forest
-    """
-    return sklearn.ensemble.RandomForestRegressor(100, random_state=0)
-def independentlinear60__gbm():
-    """ Gradient Boosted Trees
-    """
-    import xgboost
-     # max_depth and learning_rate were fixed then n_estimators was chosen using a train/test split
-    return xgboost.XGBRegressor(max_depth=6, n_estimators=100, learning_rate=0.1, n_jobs=8, random_state=0)
-def independentlinear60__ffnn():
-    """ 4-Layer Neural Network
-    """
-    from tensorflow.keras.layers import Dense
-    from tensorflow.keras.models import Sequential
-    model = Sequential()
-    model.add(Dense(32, activation='relu', input_dim=60))
-    model.add(Dense(20, activation='relu'))
-    model.add(Dense(20, activation='relu'))
-    model.add(Dense(1))
-    model.compile(optimizer='adam',
-                loss='mean_squared_error',
-                metrics=['mean_squared_error'])
-    return KerasWrap(model, 30, flatten_output=True)
-def cric__lasso():
-    """ Lasso Regression
-    """
-    model = sklearn.linear_model.LogisticRegression(penalty="l1", C=0.002)
-    # we want to explain the raw probability outputs of the trees
-    model.predict = lambda X: model.predict_proba(X)[:,1]
-    return model
-def cric__ridge():
-    """ Ridge Regression
-    """
-    model = sklearn.linear_model.LogisticRegression(penalty="l2")
-    # we want to explain the raw probability outputs of the trees
-    model.predict = lambda X: model.predict_proba(X)[:,1]
-    return model
-def cric__decision_tree():
-    """ Decision Tree
-    """
-    model = sklearn.tree.DecisionTreeClassifier(random_state=0, max_depth=4)
-    # we want to explain the raw probability outputs of the trees
-    model.predict = lambda X: model.predict_proba(X)[:,1]
-    return model
-def cric__random_forest():
-    """ Random Forest
-    """
-    model = sklearn.ensemble.RandomForestClassifier(100, random_state=0)
-    # we want to explain the raw probability outputs of the trees
-    model.predict = lambda X: model.predict_proba(X)[:,1]
-    return model
-def cric__gbm():
-    """ Gradient Boosted Trees
-    """
-    import xgboost
-    # max_depth and subsample match the params used for the full cric data in the paper
-    # learning_rate was set a bit higher to allow for faster runtimes
-    # n_estimators was chosen based on a train/test split of the data
-    model = xgboost.XGBClassifier(max_depth=5, n_estimators=400, learning_rate=0.01, subsample=0.2, n_jobs=8, random_state=0)
-    # we want to explain the margin, not the transformed probability outputs
-    model.__orig_predict = model.predict
-    model.predict = lambda X: model.__orig_predict(X, output_margin=True)
-    return model
-def cric__ffnn():
-    """ 4-Layer Neural Network
-    """
-    from tensorflow.keras.layers import Dense, Dropout
-    from tensorflow.keras.models import Sequential
-    model = Sequential()
-    model.add(Dense(10, activation='relu', input_dim=336))
-    model.add(Dropout(0.5))
-    model.add(Dense(10, activation='relu'))
-    model.add(Dropout(0.5))
-    model.add(Dense(1, activation='sigmoid'))
-    model.compile(optimizer='adam',
-                loss='binary_crossentropy',
-                metrics=['accuracy'])
-    return KerasWrap(model, 30, flatten_output=True)
-def human__decision_tree():
-    """ Decision Tree
-    """
-    # build data
-    N = 1000000
-    M = 3
-    X = np.zeros((N,M))
-    X.shape
-    y = np.zeros(N)
-    X[0, 0] = 1
-    y[0] = 8
-    X[1, 1] = 1
-    y[1] = 8
-    X[2, 0:2] = 1
-    y[2] = 4
-    # fit model
-    xor_model = sklearn.tree.DecisionTreeRegressor(max_depth=2)
-    xor_model.fit(X, y)
-    return xor_model

lib/shap/benchmark/plots.py DELETED Viewed

@@ -1,566 +0,0 @@
-import base64
-import io
-import os
-import numpy as np
-import sklearn
-from matplotlib.colors import LinearSegmentedColormap
-from .. import __version__
-from ..plots import colors
-from . import methods, metrics, models
-from .experiments import run_experiments
-try:
-    import matplotlib
-    import matplotlib.pyplot as pl
-    from IPython.display import HTML
-except ImportError:
-    pass
-metadata = {
-    # "runtime": {
-    #     "title": "Runtime",
-    #     "sort_order": 1
-    # },
-    # "local_accuracy": {
-    #     "title": "Local Accuracy",
-    #     "sort_order": 2
-    # },
-    # "consistency_guarantees": {
-    #     "title": "Consistency Guarantees",
-    #     "sort_order": 3
-    # },
-    # "keep_positive_mask": {
-    #     "title": "Keep Positive (mask)",
-    #     "xlabel": "Max fraction of features kept",
-    #     "ylabel": "Mean model output",
-    #     "sort_order": 4
-    # },
-    # "keep_negative_mask": {
-    #     "title": "Keep Negative (mask)",
-    #     "xlabel": "Max fraction of features kept",
-    #     "ylabel": "Negative mean model output",
-    #     "sort_order": 5
-    # },
-    # "keep_absolute_mask__r2": {
-    #     "title": "Keep Absolute (mask)",
-    #     "xlabel": "Max fraction of features kept",
-    #     "ylabel": "R^2",
-    #     "sort_order": 6
-    # },
-    # "keep_absolute_mask__roc_auc": {
-    #     "title": "Keep Absolute (mask)",
-    #     "xlabel": "Max fraction of features kept",
-    #     "ylabel": "ROC AUC",
-    #     "sort_order": 6
-    # },
-    # "remove_positive_mask": {
-    #     "title": "Remove Positive (mask)",
-    #     "xlabel": "Max fraction of features removed",
-    #     "ylabel": "Negative mean model output",
-    #     "sort_order": 7
-    # },
-    # "remove_negative_mask": {
-    #     "title": "Remove Negative (mask)",
-    #     "xlabel": "Max fraction of features removed",
-    #     "ylabel": "Mean model output",
-    #     "sort_order": 8
-    # },
-    # "remove_absolute_mask__r2": {
-    #     "title": "Remove Absolute (mask)",
-    #     "xlabel": "Max fraction of features removed",
-    #     "ylabel": "1 - R^2",
-    #     "sort_order": 9
-    # },
-    # "remove_absolute_mask__roc_auc": {
-    #     "title": "Remove Absolute (mask)",
-    #     "xlabel": "Max fraction of features removed",
-    #     "ylabel": "1 - ROC AUC",
-    #     "sort_order": 9
-    # },
-    # "keep_positive_resample": {
-    #     "title": "Keep Positive (resample)",
-    #     "xlabel": "Max fraction of features kept",
-    #     "ylabel": "Mean model output",
-    #     "sort_order": 10
-    # },
-    # "keep_negative_resample": {
-    #     "title": "Keep Negative (resample)",
-    #     "xlabel": "Max fraction of features kept",
-    #     "ylabel": "Negative mean model output",
-    #     "sort_order": 11
-    # },
-    # "keep_absolute_resample__r2": {
-    #     "title": "Keep Absolute (resample)",
-    #     "xlabel": "Max fraction of features kept",
-    #     "ylabel": "R^2",
-    #     "sort_order": 12
-    # },
-    # "keep_absolute_resample__roc_auc": {
-    #     "title": "Keep Absolute (resample)",
-    #     "xlabel": "Max fraction of features kept",
-    #     "ylabel": "ROC AUC",
-    #     "sort_order": 12
-    # },
-    # "remove_positive_resample": {
-    #     "title": "Remove Positive (resample)",
-    #     "xlabel": "Max fraction of features removed",
-    #     "ylabel": "Negative mean model output",
-    #     "sort_order": 13
-    # },
-    # "remove_negative_resample": {
-    #     "title": "Remove Negative (resample)",
-    #     "xlabel": "Max fraction of features removed",
-    #     "ylabel": "Mean model output",
-    #     "sort_order": 14
-    # },
-    # "remove_absolute_resample__r2": {
-    #     "title": "Remove Absolute (resample)",
-    #     "xlabel": "Max fraction of features removed",
-    #     "ylabel": "1 - R^2",
-    #     "sort_order": 15
-    # },
-    # "remove_absolute_resample__roc_auc": {
-    #     "title": "Remove Absolute (resample)",
-    #     "xlabel": "Max fraction of features removed",
-    #     "ylabel": "1 - ROC AUC",
-    #     "sort_order": 15
-    # },
-    # "remove_positive_retrain": {
-    #     "title": "Remove Positive (retrain)",
-    #     "xlabel": "Max fraction of features removed",
-    #     "ylabel": "Negative mean model output",
-    #     "sort_order": 11
-    # },
-    # "remove_negative_retrain": {
-    #     "title": "Remove Negative (retrain)",
-    #     "xlabel": "Max fraction of features removed",
-    #     "ylabel": "Mean model output",
-    #     "sort_order": 12
-    # },
-    # "keep_positive_retrain": {
-    #     "title": "Keep Positive (retrain)",
-    #     "xlabel": "Max fraction of features kept",
-    #     "ylabel": "Mean model output",
-    #     "sort_order": 6
-    # },
-    # "keep_negative_retrain": {
-    #     "title": "Keep Negative (retrain)",
-    #     "xlabel": "Max fraction of features kept",
-    #     "ylabel": "Negative mean model output",
-    #     "sort_order": 7
-    # },
-    # "batch_remove_absolute__r2": {
-    #     "title": "Batch Remove Absolute",
-    #     "xlabel": "Fraction of features removed",
-    #     "ylabel": "1 - R^2",
-    #     "sort_order": 13
-    # },
-    # "batch_keep_absolute__r2": {
-    #     "title": "Batch Keep Absolute",
-    #     "xlabel": "Fraction of features kept",
-    #     "ylabel": "R^2",
-    #     "sort_order": 8
-    # },
-    # "batch_remove_absolute__roc_auc": {
-    #     "title": "Batch Remove Absolute",
-    #     "xlabel": "Fraction of features removed",
-    #     "ylabel": "1 - ROC AUC",
-    #     "sort_order": 13
-    # },
-    # "batch_keep_absolute__roc_auc": {
-    #     "title": "Batch Keep Absolute",
-    #     "xlabel": "Fraction of features kept",
-    #     "ylabel": "ROC AUC",
-    #     "sort_order": 8
-    # },
-    # "linear_shap_corr": {
-    #     "title": "Linear SHAP (corr)"
-    # },
-    # "linear_shap_ind": {
-    #     "title": "Linear SHAP (ind)"
-    # },
-    # "coef": {
-    #     "title": "Coefficients"
-    # },
-    # "random": {
-    #     "title": "Random"
-    # },
-    # "kernel_shap_1000_meanref": {
-    #     "title": "Kernel SHAP 1000 mean ref."
-    # },
-    # "sampling_shap_1000": {
-    #     "title": "Sampling SHAP 1000"
-    # },
-    # "tree_shap_tree_path_dependent": {
-    #     "title": "Tree SHAP"
-    # },
-    # "saabas": {
-    #     "title": "Saabas"
-    # },
-    # "tree_gain": {
-    #     "title": "Gain/Gini Importance"
-    # },
-    # "mean_abs_tree_shap": {
-    #     "title": "mean(|Tree SHAP|)"
-    # },
-    # "lasso_regression": {
-    #     "title": "Lasso Regression"
-    # },
-    # "ridge_regression": {
-    #     "title": "Ridge Regression"
-    # },
-    # "gbm_regression": {
-    #     "title": "Gradient Boosting Regression"
-    # }
-}
-benchmark_color_map = {
-    "tree_shap": "#1E88E5",
-    "deep_shap": "#1E88E5",
-    "linear_shap_corr": "#1E88E5",
-    "linear_shap_ind": "#ff0d57",
-    "coef": "#13B755",
-    "random": "#999999",
-    "const_random": "#666666",
-    "kernel_shap_1000_meanref": "#7C52FF"
-}
-# negated_metrics = [
-#     "runtime",
-#     "remove_positive_retrain",
-#     "remove_positive_mask",
-#     "remove_positive_resample",
-#     "keep_negative_retrain",
-#     "keep_negative_mask",
-#     "keep_negative_resample"
-# ]
-# one_minus_metrics = [
-#     "remove_absolute_mask__r2",
-#     "remove_absolute_mask__roc_auc",
-#     "remove_absolute_resample__r2",
-#     "remove_absolute_resample__roc_auc"
-# ]
-def get_method_color(method):
-    for line in getattr(methods, method).__doc__.split("\n"):
-        line = line.strip()
-        if line.startswith("color = "):
-            v = line.split("=")[1].strip()
-            if v.startswith("red_blue_circle("):
-                return colors.red_blue_circle(float(v[16:-1]))
-            else:
-                return v
-    return "#000000"
-def get_method_linestyle(method):
-    for line in getattr(methods, method).__doc__.split("\n"):
-        line = line.strip()
-        if line.startswith("linestyle = "):
-            return line.split("=")[1].strip()
-    return "solid"
-def get_metric_attr(metric, attr):
-    for line in getattr(metrics, metric).__doc__.split("\n"):
-        line = line.strip()
-        # string
-        prefix = attr+" = \""
-        suffix = "\""
-        if line.startswith(prefix) and line.endswith(suffix):
-            return line[len(prefix):-len(suffix)]
-        # number
-        prefix = attr+" = "
-        if line.startswith(prefix):
-            return float(line[len(prefix):])
-    return ""
-def plot_curve(dataset, model, metric, cmap=benchmark_color_map):
-    experiments = run_experiments(dataset=dataset, model=model, metric=metric)
-    pl.figure()
-    method_arr = []
-    for (name,(fcounts,scores)) in experiments:
-        _,_,method,_ = name
-        transform = get_metric_attr(metric, "transform")
-        if transform == "negate":
-            scores = -scores
-        elif transform == "one_minus":
-            scores = 1 - scores
-        auc = sklearn.metrics.auc(fcounts, scores) / fcounts[-1]
-        method_arr.append((auc, method, scores))
-    for (auc,method,scores) in sorted(method_arr):
-        method_title = getattr(methods, method).__doc__.split("\n")[0].strip()
-        label = f"{auc:6.3f} - " + method_title
-        pl.plot(
-            fcounts / fcounts[-1], scores, label=label,
-            color=get_method_color(method), linewidth=2,
-            linestyle=get_method_linestyle(method)
-            )
-    metric_title = getattr(metrics, metric).__doc__.split("\n")[0].strip()
-    pl.xlabel(get_metric_attr(metric, "xlabel"))
-    pl.ylabel(get_metric_attr(metric, "ylabel"))
-    model_title = getattr(models, dataset+"__"+model).__doc__.split("\n")[0].strip()
-    pl.title(metric_title + " - " + model_title)
-    pl.gca().xaxis.set_ticks_position('bottom')
-    pl.gca().yaxis.set_ticks_position('left')
-    pl.gca().spines['right'].set_visible(False)
-    pl.gca().spines['top'].set_visible(False)
-    ahandles, alabels = pl.gca().get_legend_handles_labels()
-    pl.legend(reversed(ahandles), reversed(alabels))
-    return pl.gcf()
-def plot_human(dataset, model, metric, cmap=benchmark_color_map):
-    experiments = run_experiments(dataset=dataset, model=model, metric=metric)
-    pl.figure()
-    method_arr = []
-    for (name,(fcounts,scores)) in experiments:
-        _,_,method,_ = name
-        diff_sum = np.sum(np.abs(scores[1] - scores[0]))
-        method_arr.append((diff_sum, method, scores[0], scores[1]))
-    inds = np.arange(3)    # the x locations for the groups
-    inc_width = (1.0 / len(method_arr)) * 0.8
-    width = inc_width * 0.9
-    pl.bar(inds, method_arr[0][2], width, label="Human Consensus", color="black", edgecolor="white")
-    i = 1
-    line_style_to_hatch = {
-        "dashed": "///",
-        "dotted": "..."
-    }
-    for (diff_sum, method, _, methods_attrs) in sorted(method_arr):
-        method_title = getattr(methods, method).__doc__.split("\n")[0].strip()
-        label = f"{diff_sum:.2f} - " + method_title
-        pl.bar(
-            inds + inc_width * i, methods_attrs.flatten(), width, label=label, edgecolor="white",
-            color=get_method_color(method), hatch=line_style_to_hatch.get(get_method_linestyle(method), None)
-        )
-        i += 1
-    metric_title = getattr(metrics, metric).__doc__.split("\n")[0].strip()
-    pl.xlabel("Features in the model")
-    pl.ylabel("Feature attribution value")
-    model_title = getattr(models, dataset+"__"+model).__doc__.split("\n")[0].strip()
-    pl.title(metric_title + " - " + model_title)
-    pl.gca().xaxis.set_ticks_position('bottom')
-    pl.gca().yaxis.set_ticks_position('left')
-    pl.gca().spines['right'].set_visible(False)
-    pl.gca().spines['top'].set_visible(False)
-    ahandles, alabels = pl.gca().get_legend_handles_labels()
-    #pl.legend(ahandles, alabels)
-    pl.xticks(np.array([0, 1, 2, 3]) - (inc_width + width)/2, ["", "", "", ""])
-    pl.gca().xaxis.set_minor_locator(matplotlib.ticker.FixedLocator([0.4, 1.4, 2.4]))
-    pl.gca().xaxis.set_minor_formatter(matplotlib.ticker.FixedFormatter(["Fever", "Cough", "Headache"]))
-    pl.gca().tick_params(which='minor', length=0)
-    pl.axhline(0, color="#aaaaaa", linewidth=0.5)
-    box = pl.gca().get_position()
-    pl.gca().set_position([
-        box.x0, box.y0 + box.height * 0.3,
-        box.width, box.height * 0.7
-    ])
-    # Put a legend below current axis
-    pl.gca().legend(ahandles, alabels, loc='upper center', bbox_to_anchor=(0.5, -0.15), ncol=2)
-    return pl.gcf()
-def _human_score_map(human_consensus, methods_attrs):
-    """ Converts human agreement differences to numerical scores for coloring.
-    """
-    v = 1 - min(np.sum(np.abs(methods_attrs - human_consensus)) / (np.abs(human_consensus).sum() + 1), 1.0)
-    return v
-def make_grid(scores, dataset, model, normalize=True, transform=True):
-    color_vals = {}
-    metric_sort_order = {}
-    for (_,_,method,metric),(fcounts,score) in filter(lambda x: x[0][0] == dataset and x[0][1] == model, scores):
-        metric_sort_order[metric] = get_metric_attr(metric, "sort_order")
-        if metric not in color_vals:
-            color_vals[metric] = {}
-        if transform:
-            transform_type = get_metric_attr(metric, "transform")
-            if transform_type == "negate":
-                score = -score
-            elif transform_type == "one_minus":
-                score = 1 - score
-            elif transform_type == "negate_log":
-                score = -np.log10(score)
-        if fcounts is None:
-            color_vals[metric][method] = score
-        elif fcounts == "human":
-            color_vals[metric][method] = _human_score_map(*score)
-        else:
-            auc = sklearn.metrics.auc(fcounts, score) / fcounts[-1]
-            color_vals[metric][method] = auc
-    # print(metric_sort_order)
-    # col_keys = sorted(list(color_vals.keys()), key=lambda v: metric_sort_order[v])
-    # print(col_keys)
-    col_keys = list(color_vals.keys())
-    row_keys = list({v for k in col_keys for v in color_vals[k].keys()})
-    data = -28567 * np.ones((len(row_keys), len(col_keys)))
-    for i in range(len(row_keys)):
-        for j in range(len(col_keys)):
-            data[i,j] = color_vals[col_keys[j]][row_keys[i]]
-    assert np.sum(data == -28567) == 0, "There are missing data values!"
-    if normalize:
-        data = (data - data.min(0)) / (data.max(0) - data.min(0) + 1e-8)
-    # sort by performans
-    inds = np.argsort(-data.mean(1))
-    row_keys = [row_keys[i] for i in inds]
-    data = data[inds,:]
-    return row_keys, col_keys, data
-red_blue_solid = LinearSegmentedColormap('red_blue_solid', {
-    'red': ((0.0, 198./255, 198./255),
-            (1.0, 5./255, 5./255)),
-    'green': ((0.0, 34./255, 34./255),
-              (1.0, 198./255, 198./255)),
-    'blue': ((0.0, 5./255, 5./255),
-             (1.0, 24./255, 24./255)),
-    'alpha': ((0.0, 1, 1),
-              (1.0, 1, 1))
-})
-def plot_grids(dataset, model_names, out_dir=None):
-    if out_dir is not None:
-        os.mkdir(out_dir)
-    scores = []
-    for model in model_names:
-        scores.extend(run_experiments(dataset=dataset, model=model))
-    prefix = "<style type='text/css'> .shap_benchmark__select:focus { outline-width: 0 }</style>"
-    out = "" # background: rgb(30, 136, 229)
-    # out += "<div style='font-weight: regular; font-size: 24px; text-align: center; background: #f8f8f8; color: #000; padding: 20px;'>SHAP Benchmark</div>\n"
-    # out += "<div style='height: 1px; background: #ddd;'></div>\n"
-    #out += "<div style='height: 7px; background-image: linear-gradient(to right, rgb(30, 136, 229), rgb(255, 13, 87));'></div>"
-    out += "<div style='position: fixed; left: 0px; top: 0px; right: 0px; height: 230px; background: #fff;'>\n" # box-shadow: 0 4px 8px 0 rgba(0, 0, 0, 0.2), 0 6px 20px 0 rgba(0, 0, 0, 0.19);
-    out += "<div style='position: absolute; bottom: 0px; left: 0px; right: 0px;' align='center'><table style='border-width: 1px; margin-right: 100px'>\n"
-    for ind,model in enumerate(model_names):
-        row_keys, col_keys, data = make_grid(scores, dataset, model)
-#         print(data)
-#         print(colors.red_blue_solid(0.))
-#         print(colors.red_blue_solid(1.))
-#         return
-        for metric in col_keys:
-            save_plot = False
-            if metric.startswith("human_"):
-                plot_human(dataset, model, metric)
-                save_plot = True
-            elif metric not in ["local_accuracy", "runtime", "consistency_guarantees"]:
-                plot_curve(dataset, model, metric)
-                save_plot = True
-            if save_plot:
-                buf = io.BytesIO()
-                pl.gcf().set_size_inches(1200.0/175,1000.0/175)
-                pl.savefig(buf, format='png', dpi=175)
-                if out_dir is not None:
-                    pl.savefig(f"{out_dir}/plot_{dataset}_{model}_{metric}.pdf", format='pdf')
-                pl.close()
-                buf.seek(0)
-                data_uri = base64.b64encode(buf.read()).decode('utf-8').replace('\n', '')
-                plot_id = "plot__"+dataset+"__"+model+"__"+metric
-                prefix += f"<div onclick='document.getElementById(\"{plot_id}\").style.display = \"none\"' style='display: none; position: fixed; z-index: 10000; left: 0px; right: 0px; top: 0px; bottom: 0px; background: rgba(255,255,255,0.9);' id='{plot_id}'>"
-                prefix += "<img width='600' height='500' style='margin-left: auto; margin-right: auto; margin-top: 230px; box-shadow: 0 4px 8px 0 rgba(0, 0, 0, 0.2), 0 6px 20px 0 rgba(0, 0, 0, 0.19);' src='data:image/png;base64,%s'>" % data_uri
-                prefix += "</div>"
-        model_title = getattr(models, dataset+"__"+model).__doc__.split("\n")[0].strip()
-        if ind == 0:
-            out += "<tr><td style='background: #fff; width: 250px'></td></td>"
-            for j in range(data.shape[1]):
-                metric_title = getattr(metrics, col_keys[j]).__doc__.split("\n")[0].strip()
-                out += "<td style='width: 40px; min-width: 40px; background: #fff; text-align: right;'><div style='margin-left: 10px; margin-bottom: -5px; white-space: nowrap; transform: rotate(-45deg); transform-origin: left top 0; width: 1.5em; margin-top: 8em'>" + metric_title + "</div></td>"
-            out += "</tr>\n"
-            out += "</table></div></div>\n"
-            out += "<table style='border-width: 1px; margin-right: 100px; margin-top: 230px;'>\n"
-        out += "<tr><td style='background: #fff'></td><td colspan='%d' style='background: #fff; font-weight: bold; text-align: center; margin-top: 10px;'>%s</td></tr>\n" % (data.shape[1], model_title)
-        for i in range(data.shape[0]):
-            out += "<tr>"
-#             if i == 0:
-#                 out += "<td rowspan='%d' style='background: #fff; text-align: center; white-space: nowrap; vertical-align: middle; '><div style='font-weight: bold; transform: rotate(-90deg); transform-origin: left top 0; width: 1.5em; margin-top: 8em'>%s</div></td>" % (data.shape[0], model_name)
-            method_title = getattr(methods, row_keys[i]).__doc__.split("\n")[0].strip()
-            out += "<td style='background: #ffffff; text-align: right; width: 250px' title='shap.LinearExplainer(model)'>" + method_title + "</td>\n"
-            for j in range(data.shape[1]):
-                plot_id = "plot__"+dataset+"__"+model+"__"+col_keys[j]
-                out += "<td onclick='document.getElementById(\"%s\").style.display = \"block\"' style='padding: 0px; padding-left: 0px; padding-right: 0px; border-left: 0px solid #999; width: 42px; min-width: 42px; height: 34px; background-color: #fff'>" % plot_id
-                #out += "<div style='opacity: "+str(2*(max(1-data[i,j], data[i,j])-0.5))+"; background-color: rgb" + str(tuple(v*255 for v in colors.red_blue_solid(0. if data[i,j] < 0.5 else 1.)[:-1])) + "; height: "+str((30*max(1-data[i,j], data[i,j])))+"px; margin-left: auto; margin-right: auto; width:"+str((30*max(1-data[i,j], data[i,j])))+"px'></div>"
-                out += "<div style='opacity: "+str(1)+"; background-color: rgb" + str(tuple(int(v*255) for v in colors.red_blue_no_bounds(5*(data[i,j]-0.8))[:-1])) + "; height: "+str(30*data[i,j])+"px; margin-left: auto; margin-right: auto; width:"+str(30*data[i,j])+"px'></div>"
-                #out += "<div style='float: left; background-color: #eee; height: 10px; width: "+str((40*(1-data[i,j])))+"px'></div>"
-                out += "</td>\n"
-            out += "</tr>\n" #
-        out += "<tr><td colspan='%d' style='background: #fff'></td></tr>" % (data.shape[1] + 1)
-    out += "</table>"
-    out += "<div style='position: fixed; left: 0px; top: 0px; right: 0px; text-align: left; padding: 20px; text-align: right'>\n"
-    out += "<div style='float: left; font-weight: regular; font-size: 24px; color: #000;'>SHAP Benchmark <span style='font-size: 14px; color: #777777;'>v"+__version__+"</span></div>\n"
-# select {
-#   margin: 50px;
-#   width: 150px;
-#   padding: 5px 35px 5px 5px;
-#   font-size: 16px;
-#   border: 1px solid #ccc;
-#   height: 34px;
-#   -webkit-appearance: none;
-#   -moz-appearance: none;
-#   appearance: none;
-#   background: url(http://www.stackoverflow.com/favicon.ico) 96% / 15% no-repeat #eee;
-# }
-    #out += "<div style='display: inline-block; margin-right: 20px; font-weight: normal; text-decoration: none; font-size: 18px; color: #000;'>Dataset:</div>\n"
-    out += "<select id='shap_benchmark__select' onchange=\"document.location = '../' + this.value + '/index.html'\"dir='rtl' class='shap_benchmark__select' style='font-weight: normal; font-size: 20px; color: #000; padding: 10px; background: #fff; border: 1px solid #fff; -webkit-appearance: none; appearance: none;'>\n"
-    out += "<option value='human' "+("selected" if dataset == "human" else "")+">Agreement with Human Intuition</option>\n"
-    out += "<option value='corrgroups60' "+("selected" if dataset == "corrgroups60" else "")+">Correlated Groups 60 Dataset</option>\n"
-    out += "<option value='independentlinear60' "+("selected" if dataset == "independentlinear60" else "")+">Independent Linear 60 Dataset</option>\n"
-    #out += "<option>CRIC</option>\n"
-    out += "</select>\n"
-    #out += "<script> document.onload = function() { document.getElementById('shap_benchmark__select').value = '"+dataset+"'; }</script>"
-    #out += "<div style='display: inline-block; margin-left: 20px; font-weight: normal; text-decoration: none; font-size: 18px; color: #000;'>CRIC</div>\n"
-    out += "</div>\n"
-    # output the legend
-    out += "<table style='border-width: 0px; width: 100px; position: fixed; right: 50px; top: 200px; background: rgba(255, 255, 255, 0.9)'>\n"
-    out += "<tr><td style='background: #fff; font-weight: normal; text-align: center'>Higher score</td></tr>\n"
-    legend_size = 21
-    for i in range(legend_size-9):
-        out += "<tr>"
-        out += "<td style='padding: 0px; padding-left: 0px; padding-right: 0px; border-left: 0px solid #999; height: 34px'>"
-        val = (legend_size-i-1) / (legend_size-1)
-        out += "<div style='opacity: 1; background-color: rgb" + str(tuple(int(v*255) for v in colors.red_blue_no_bounds(5*(val-0.8)))[:-1]) + "; height: "+str(30*val)+"px; margin-left: auto; margin-right: auto; width:"+str(30*val)+"px'></div>"
-        out += "</td>"
-        out += "</tr>\n" #
-    out += "<tr><td style='background: #fff; font-weight: normal; text-align: center'>Lower score</td></tr>\n"
-    out += "</table>\n"
-    if out_dir is not None:
-        with open(out_dir + "/index.html", "w") as f:
-            f.write("<html><body style='margin: 0px; font-size: 16px; font-family: \"Myriad Pro\", Arial, sans-serif;'><center>")
-            f.write(prefix)
-            f.write(out)
-            f.write("</center></body></html>")
-    else:
-        return HTML(prefix + out)

lib/shap/cext/_cext.cc DELETED Viewed

@@ -1,560 +0,0 @@
-#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
-#include <Python.h>
-#include <numpy/arrayobject.h>
-#include "tree_shap.h"
-#include <iostream>
-static PyObject *_cext_dense_tree_shap(PyObject *self, PyObject *args);
-static PyObject *_cext_dense_tree_predict(PyObject *self, PyObject *args);
-static PyObject *_cext_dense_tree_update_weights(PyObject *self, PyObject *args);
-static PyObject *_cext_dense_tree_saabas(PyObject *self, PyObject *args);
-static PyObject *_cext_compute_expectations(PyObject *self, PyObject *args);
-static PyMethodDef module_methods[] = {
-    {"dense_tree_shap", _cext_dense_tree_shap, METH_VARARGS, "C implementation of Tree SHAP for dense."},
-    {"dense_tree_predict", _cext_dense_tree_predict, METH_VARARGS, "C implementation of tree predictions."},
-    {"dense_tree_update_weights", _cext_dense_tree_update_weights, METH_VARARGS, "C implementation of tree node weight compuatations."},
-    {"dense_tree_saabas", _cext_dense_tree_saabas, METH_VARARGS, "C implementation of Saabas (rough fast approximation to Tree SHAP)."},
-    {"compute_expectations", _cext_compute_expectations, METH_VARARGS, "Compute expectations of internal nodes."},
-    {NULL, NULL, 0, NULL}
-};
-#if PY_MAJOR_VERSION >= 3
-static struct PyModuleDef moduledef = {
-    PyModuleDef_HEAD_INIT,
-    "_cext",
-    "This module provides an interface for a fast Tree SHAP implementation.",
-    -1,
-    module_methods,
-    NULL,
-    NULL,
-    NULL,
-    NULL
-};
-#endif
-#if PY_MAJOR_VERSION >= 3
-PyMODINIT_FUNC PyInit__cext(void)
-#else
-PyMODINIT_FUNC init_cext(void)
-#endif
-{
-    #if PY_MAJOR_VERSION >= 3
-        PyObject *module = PyModule_Create(&moduledef);
-        if (!module) return NULL;
-    #else
-        PyObject *module = Py_InitModule("_cext", module_methods);
-        if (!module) return;
-    #endif
-    /* Load `numpy` functionality. */
-    import_array();
-    #if PY_MAJOR_VERSION >= 3
-        return module;
-    #endif
-}
-static PyObject *_cext_compute_expectations(PyObject *self, PyObject *args)
-{
-    PyObject *children_left_obj;
-    PyObject *children_right_obj;
-    PyObject *node_sample_weight_obj;
-    PyObject *values_obj;
-    /* Parse the input tuple */
-    if (!PyArg_ParseTuple(
-        args, "OOOO", &children_left_obj, &children_right_obj, &node_sample_weight_obj, &values_obj
-    )) return NULL;
-    /* Interpret the input objects as numpy arrays. */
-    PyArrayObject *children_left_array = (PyArrayObject*)PyArray_FROM_OTF(children_left_obj, NPY_INT, NPY_ARRAY_IN_ARRAY);
-    PyArrayObject *children_right_array = (PyArrayObject*)PyArray_FROM_OTF(children_right_obj, NPY_INT, NPY_ARRAY_IN_ARRAY);
-    PyArrayObject *node_sample_weight_array = (PyArrayObject*)PyArray_FROM_OTF(node_sample_weight_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
-    PyArrayObject *values_array = (PyArrayObject*)PyArray_FROM_OTF(values_obj, NPY_DOUBLE, NPY_ARRAY_INOUT_ARRAY);
-    /* If that didn't work, throw an exception. */
-    if (children_left_array == NULL || children_right_array == NULL ||
-        values_array == NULL || node_sample_weight_array == NULL) {
-        Py_XDECREF(children_left_array);
-        Py_XDECREF(children_right_array);
-        //PyArray_ResolveWritebackIfCopy(values_array);
-        Py_XDECREF(values_array);
-        Py_XDECREF(node_sample_weight_array);
-        return NULL;
-    }
-    TreeEnsemble tree;
-    // number of outputs
-    tree.num_outputs = PyArray_DIM(values_array, 1);
-    /* Get pointers to the data as C-types. */
-    tree.children_left = (int*)PyArray_DATA(children_left_array);
-    tree.children_right = (int*)PyArray_DATA(children_right_array);
-    tree.values = (tfloat*)PyArray_DATA(values_array);
-    tree.node_sample_weights = (tfloat*)PyArray_DATA(node_sample_weight_array);
-    const int max_depth = compute_expectations(tree);
-    // clean up the created python objects
-    Py_XDECREF(children_left_array);
-    Py_XDECREF(children_right_array);
-    //PyArray_ResolveWritebackIfCopy(values_array);
-    Py_XDECREF(values_array);
-    Py_XDECREF(node_sample_weight_array);
-    PyObject *ret = Py_BuildValue("i", max_depth);
-    return ret;
-}
-static PyObject *_cext_dense_tree_shap(PyObject *self, PyObject *args)
-{
-    PyObject *children_left_obj;
-    PyObject *children_right_obj;
-    PyObject *children_default_obj;
-    PyObject *features_obj;
-    PyObject *thresholds_obj;
-    PyObject *values_obj;
-    PyObject *node_sample_weights_obj;
-    int max_depth;
-    PyObject *X_obj;
-    PyObject *X_missing_obj;
-    PyObject *y_obj;
-    PyObject *R_obj;
-    PyObject *R_missing_obj;
-    int tree_limit;
-    PyObject *out_contribs_obj;
-    int feature_dependence;
-    int model_output;
-    PyObject *base_offset_obj;
-    bool interactions;
-    /* Parse the input tuple */
-    if (!PyArg_ParseTuple(
-        args, "OOOOOOOiOOOOOiOOiib", &children_left_obj, &children_right_obj, &children_default_obj,
-        &features_obj, &thresholds_obj, &values_obj, &node_sample_weights_obj,
-        &max_depth, &X_obj, &X_missing_obj, &y_obj, &R_obj, &R_missing_obj, &tree_limit, &base_offset_obj,
-        &out_contribs_obj, &feature_dependence, &model_output, &interactions
-    )) return NULL;
-    /* Interpret the input objects as numpy arrays. */
-    PyArrayObject *children_left_array = (PyArrayObject*)PyArray_FROM_OTF(children_left_obj, NPY_INT, NPY_ARRAY_IN_ARRAY);
-    PyArrayObject *children_right_array = (PyArrayObject*)PyArray_FROM_OTF(children_right_obj, NPY_INT, NPY_ARRAY_IN_ARRAY);
-    PyArrayObject *children_default_array = (PyArrayObject*)PyArray_FROM_OTF(children_default_obj, NPY_INT, NPY_ARRAY_IN_ARRAY);
-    PyArrayObject *features_array = (PyArrayObject*)PyArray_FROM_OTF(features_obj, NPY_INT, NPY_ARRAY_IN_ARRAY);
-    PyArrayObject *thresholds_array = (PyArrayObject*)PyArray_FROM_OTF(thresholds_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
-    PyArrayObject *values_array = (PyArrayObject*)PyArray_FROM_OTF(values_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
-    PyArrayObject *node_sample_weights_array = (PyArrayObject*)PyArray_FROM_OTF(node_sample_weights_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
-    PyArrayObject *X_array = (PyArrayObject*)PyArray_FROM_OTF(X_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
-    PyArrayObject *X_missing_array = (PyArrayObject*)PyArray_FROM_OTF(X_missing_obj, NPY_BOOL, NPY_ARRAY_IN_ARRAY);
-    PyArrayObject *y_array = NULL;
-    if (y_obj != Py_None) y_array = (PyArrayObject*)PyArray_FROM_OTF(y_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
-    PyArrayObject *R_array = NULL;
-    if (R_obj != Py_None) R_array = (PyArrayObject*)PyArray_FROM_OTF(R_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
-    PyArrayObject *R_missing_array = NULL;
-    if (R_missing_obj != Py_None) R_missing_array = (PyArrayObject*)PyArray_FROM_OTF(R_missing_obj, NPY_BOOL, NPY_ARRAY_IN_ARRAY);
-    PyArrayObject *out_contribs_array = (PyArrayObject*)PyArray_FROM_OTF(out_contribs_obj, NPY_DOUBLE, NPY_ARRAY_INOUT_ARRAY);
-    PyArrayObject *base_offset_array = (PyArrayObject*)PyArray_FROM_OTF(base_offset_obj, NPY_DOUBLE, NPY_ARRAY_INOUT_ARRAY);
-    /* If that didn't work, throw an exception. Note that R and y are optional. */
-    if (children_left_array == NULL || children_right_array == NULL ||
-        children_default_array == NULL || features_array == NULL || thresholds_array == NULL ||
-        values_array == NULL || node_sample_weights_array == NULL || X_array == NULL ||
-        X_missing_array == NULL || out_contribs_array == NULL) {
-        Py_XDECREF(children_left_array);
-        Py_XDECREF(children_right_array);
-        Py_XDECREF(children_default_array);
-        Py_XDECREF(features_array);
-        Py_XDECREF(thresholds_array);
-        Py_XDECREF(values_array);
-        Py_XDECREF(node_sample_weights_array);
-        Py_XDECREF(X_array);
-        Py_XDECREF(X_missing_array);
-        if (y_array != NULL) Py_XDECREF(y_array);
-        if (R_array != NULL) Py_XDECREF(R_array);
-        if (R_missing_array != NULL) Py_XDECREF(R_missing_array);
-        //PyArray_ResolveWritebackIfCopy(out_contribs_array);
-        Py_XDECREF(out_contribs_array);
-        Py_XDECREF(base_offset_array);
-        return NULL;
-    }
-    const unsigned num_X = PyArray_DIM(X_array, 0);
-    const unsigned M = PyArray_DIM(X_array, 1);
-    const unsigned max_nodes = PyArray_DIM(values_array, 1);
-    const unsigned num_outputs = PyArray_DIM(values_array, 2);
-    unsigned num_R = 0;
-    if (R_array != NULL) num_R = PyArray_DIM(R_array, 0);
-    // Get pointers to the data as C-types
-    int *children_left = (int*)PyArray_DATA(children_left_array);
-    int *children_right = (int*)PyArray_DATA(children_right_array);
-    int *children_default = (int*)PyArray_DATA(children_default_array);
-    int *features = (int*)PyArray_DATA(features_array);
-    tfloat *thresholds = (tfloat*)PyArray_DATA(thresholds_array);
-    tfloat *values = (tfloat*)PyArray_DATA(values_array);
-    tfloat *node_sample_weights = (tfloat*)PyArray_DATA(node_sample_weights_array);
-    tfloat *X = (tfloat*)PyArray_DATA(X_array);
-    bool *X_missing = (bool*)PyArray_DATA(X_missing_array);
-    tfloat *y = NULL;
-    if (y_array != NULL) y = (tfloat*)PyArray_DATA(y_array);
-    tfloat *R = NULL;
-    if (R_array != NULL) R = (tfloat*)PyArray_DATA(R_array);
-    bool *R_missing = NULL;
-    if (R_missing_array != NULL) R_missing = (bool*)PyArray_DATA(R_missing_array);
-    tfloat *out_contribs = (tfloat*)PyArray_DATA(out_contribs_array);
-    tfloat *base_offset = (tfloat*)PyArray_DATA(base_offset_array);
-    // these are just a wrapper objects for all the pointers and numbers associated with
-    // the ensemble tree model and the dataset we are explaining
-    TreeEnsemble trees = TreeEnsemble(
-        children_left, children_right, children_default, features, thresholds, values,
-        node_sample_weights, max_depth, tree_limit, base_offset,
-        max_nodes, num_outputs
-    );
-    ExplanationDataset data = ExplanationDataset(X, X_missing, y, R, R_missing, num_X, M, num_R);
-    dense_tree_shap(trees, data, out_contribs, feature_dependence, model_output, interactions);
-    // retrieve return value before python cleanup of objects
-    tfloat ret_value = (double)values[0];
-    // clean up the created python objects
-    Py_XDECREF(children_left_array);
-    Py_XDECREF(children_right_array);
-    Py_XDECREF(children_default_array);
-    Py_XDECREF(features_array);
-    Py_XDECREF(thresholds_array);
-    Py_XDECREF(values_array);
-    Py_XDECREF(node_sample_weights_array);
-    Py_XDECREF(X_array);
-    Py_XDECREF(X_missing_array);
-    if (y_array != NULL) Py_XDECREF(y_array);
-    if (R_array != NULL) Py_XDECREF(R_array);
-    if (R_missing_array != NULL) Py_XDECREF(R_missing_array);
-    //PyArray_ResolveWritebackIfCopy(out_contribs_array);
-    Py_XDECREF(out_contribs_array);
-    Py_XDECREF(base_offset_array);
-    /* Build the output tuple */
-    PyObject *ret = Py_BuildValue("d", ret_value);
-    return ret;
-}
-static PyObject *_cext_dense_tree_predict(PyObject *self, PyObject *args)
-{
-    PyObject *children_left_obj;
-    PyObject *children_right_obj;
-    PyObject *children_default_obj;
-    PyObject *features_obj;
-    PyObject *thresholds_obj;
-    PyObject *values_obj;
-    int max_depth;
-    int tree_limit;
-    PyObject *base_offset_obj;
-    int model_output;
-    PyObject *X_obj;
-    PyObject *X_missing_obj;
-    PyObject *y_obj;
-    PyObject *out_pred_obj;
-    /* Parse the input tuple */
-    if (!PyArg_ParseTuple(
-        args, "OOOOOOiiOiOOOO", &children_left_obj, &children_right_obj, &children_default_obj,
-        &features_obj, &thresholds_obj, &values_obj, &max_depth, &tree_limit, &base_offset_obj, &model_output,
-        &X_obj, &X_missing_obj, &y_obj, &out_pred_obj
-    )) return NULL;
-    /* Interpret the input objects as numpy arrays. */
-    PyArrayObject *children_left_array = (PyArrayObject*)PyArray_FROM_OTF(children_left_obj, NPY_INT, NPY_ARRAY_IN_ARRAY);
-    PyArrayObject *children_right_array = (PyArrayObject*)PyArray_FROM_OTF(children_right_obj, NPY_INT, NPY_ARRAY_IN_ARRAY);
-    PyArrayObject *children_default_array = (PyArrayObject*)PyArray_FROM_OTF(children_default_obj, NPY_INT, NPY_ARRAY_IN_ARRAY);
-    PyArrayObject *features_array = (PyArrayObject*)PyArray_FROM_OTF(features_obj, NPY_INT, NPY_ARRAY_IN_ARRAY);
-    PyArrayObject *thresholds_array = (PyArrayObject*)PyArray_FROM_OTF(thresholds_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
-    PyArrayObject *values_array = (PyArrayObject*)PyArray_FROM_OTF(values_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
-    PyArrayObject *base_offset_array = (PyArrayObject*)PyArray_FROM_OTF(base_offset_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
-    PyArrayObject *X_array = (PyArrayObject*)PyArray_FROM_OTF(X_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
-    PyArrayObject *X_missing_array = (PyArrayObject*)PyArray_FROM_OTF(X_missing_obj, NPY_BOOL, NPY_ARRAY_IN_ARRAY);
-    PyArrayObject *y_array = NULL;
-    if (y_obj != Py_None) y_array = (PyArrayObject*)PyArray_FROM_OTF(y_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
-    PyArrayObject *out_pred_array = (PyArrayObject*)PyArray_FROM_OTF(out_pred_obj, NPY_DOUBLE, NPY_ARRAY_INOUT_ARRAY);
-    /* If that didn't work, throw an exception. Note that R and y are optional. */
-    if (children_left_array == NULL || children_right_array == NULL ||
-        children_default_array == NULL || features_array == NULL || thresholds_array == NULL ||
-        values_array == NULL || X_array == NULL ||
-        X_missing_array == NULL || out_pred_array == NULL) {
-        Py_XDECREF(children_left_array);
-        Py_XDECREF(children_right_array);
-        Py_XDECREF(children_default_array);
-        Py_XDECREF(features_array);
-        Py_XDECREF(thresholds_array);
-        Py_XDECREF(values_array);
-        Py_XDECREF(base_offset_array);
-        Py_XDECREF(X_array);
-        Py_XDECREF(X_missing_array);
-        if (y_array != NULL) Py_XDECREF(y_array);
-        //PyArray_ResolveWritebackIfCopy(out_pred_array);
-        Py_XDECREF(out_pred_array);
-        return NULL;
-    }
-    const unsigned num_X = PyArray_DIM(X_array, 0);
-    const unsigned M = PyArray_DIM(X_array, 1);
-    const unsigned max_nodes = PyArray_DIM(values_array, 1);
-    const unsigned num_outputs = PyArray_DIM(values_array, 2);
-    const unsigned num_offsets = PyArray_DIM(base_offset_array, 0);
-    if (num_offsets != num_outputs) {
-        std::cerr << "The passed base_offset array does that have the same number of outputs as the values array: " << num_offsets << " vs. " << num_outputs << std::endl;
-        return NULL;
-    }
-    // Get pointers to the data as C-types
-    int *children_left = (int*)PyArray_DATA(children_left_array);
-    int *children_right = (int*)PyArray_DATA(children_right_array);
-    int *children_default = (int*)PyArray_DATA(children_default_array);
-    int *features = (int*)PyArray_DATA(features_array);
-    tfloat *thresholds = (tfloat*)PyArray_DATA(thresholds_array);
-    tfloat *values = (tfloat*)PyArray_DATA(values_array);
-    tfloat *base_offset = (tfloat*)PyArray_DATA(base_offset_array);
-    tfloat *X = (tfloat*)PyArray_DATA(X_array);
-    bool *X_missing = (bool*)PyArray_DATA(X_missing_array);
-    tfloat *y = NULL;
-    if (y_array != NULL) y = (tfloat*)PyArray_DATA(y_array);
-    tfloat *out_pred = (tfloat*)PyArray_DATA(out_pred_array);
-    // these are just wrapper objects for all the pointers and numbers associated with
-    // the ensemble tree model and the dataset we are explaining
-    TreeEnsemble trees = TreeEnsemble(
-        children_left, children_right, children_default, features, thresholds, values,
-        NULL, max_depth, tree_limit, base_offset,
-        max_nodes, num_outputs
-    );
-    ExplanationDataset data = ExplanationDataset(X, X_missing, y, NULL, NULL, num_X, M, 0);
-    dense_tree_predict(out_pred, trees, data, model_output);
-    // clean up the created python objects
-    Py_XDECREF(children_left_array);
-    Py_XDECREF(children_right_array);
-    Py_XDECREF(children_default_array);
-    Py_XDECREF(features_array);
-    Py_XDECREF(thresholds_array);
-    Py_XDECREF(values_array);
-    Py_XDECREF(base_offset_array);
-    Py_XDECREF(X_array);
-    Py_XDECREF(X_missing_array);
-    if (y_array != NULL) Py_XDECREF(y_array);
-    //PyArray_ResolveWritebackIfCopy(out_pred_array);
-    Py_XDECREF(out_pred_array);
-    /* Build the output tuple */
-    PyObject *ret = Py_BuildValue("d", (double)values[0]);
-    return ret;
-}
-static PyObject *_cext_dense_tree_update_weights(PyObject *self, PyObject *args)
-{
-    PyObject *children_left_obj;
-    PyObject *children_right_obj;
-    PyObject *children_default_obj;
-    PyObject *features_obj;
-    PyObject *thresholds_obj;
-    PyObject *values_obj;
-    int tree_limit;
-    PyObject *node_sample_weight_obj;
-    PyObject *X_obj;
-    PyObject *X_missing_obj;
-    /* Parse the input tuple */
-    if (!PyArg_ParseTuple(
-        args, "OOOOOOiOOO", &children_left_obj, &children_right_obj, &children_default_obj,
-        &features_obj, &thresholds_obj, &values_obj, &tree_limit, &node_sample_weight_obj, &X_obj, &X_missing_obj
-    )) return NULL;
-    /* Interpret the input objects as numpy arrays. */
-    PyArrayObject *children_left_array = (PyArrayObject*)PyArray_FROM_OTF(children_left_obj, NPY_INT, NPY_ARRAY_IN_ARRAY);
-    PyArrayObject *children_right_array = (PyArrayObject*)PyArray_FROM_OTF(children_right_obj, NPY_INT, NPY_ARRAY_IN_ARRAY);
-    PyArrayObject *children_default_array = (PyArrayObject*)PyArray_FROM_OTF(children_default_obj, NPY_INT, NPY_ARRAY_IN_ARRAY);
-    PyArrayObject *features_array = (PyArrayObject*)PyArray_FROM_OTF(features_obj, NPY_INT, NPY_ARRAY_IN_ARRAY);
-    PyArrayObject *thresholds_array = (PyArrayObject*)PyArray_FROM_OTF(thresholds_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
-    PyArrayObject *values_array = (PyArrayObject*)PyArray_FROM_OTF(values_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
-    PyArrayObject *node_sample_weight_array = (PyArrayObject*)PyArray_FROM_OTF(node_sample_weight_obj, NPY_DOUBLE, NPY_ARRAY_INOUT_ARRAY);
-    PyArrayObject *X_array = (PyArrayObject*)PyArray_FROM_OTF(X_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
-    PyArrayObject *X_missing_array = (PyArrayObject*)PyArray_FROM_OTF(X_missing_obj, NPY_BOOL, NPY_ARRAY_IN_ARRAY);
-    /* If that didn't work, throw an exception. */
-    if (children_left_array == NULL || children_right_array == NULL ||
-        children_default_array == NULL || features_array == NULL || thresholds_array == NULL ||
-        values_array == NULL || node_sample_weight_array == NULL || X_array == NULL ||
-        X_missing_array == NULL) {
-        Py_XDECREF(children_left_array);
-        Py_XDECREF(children_right_array);
-        Py_XDECREF(children_default_array);
-        Py_XDECREF(features_array);
-        Py_XDECREF(thresholds_array);
-        Py_XDECREF(values_array);
-        //PyArray_ResolveWritebackIfCopy(node_sample_weight_array);
-        Py_XDECREF(node_sample_weight_array);
-        Py_XDECREF(X_array);
-        Py_XDECREF(X_missing_array);
-        std::cerr << "Found a NULL input array in _cext_dense_tree_update_weights!\n";
-        return NULL;
-    }
-    const unsigned num_X = PyArray_DIM(X_array, 0);
-    const unsigned M = PyArray_DIM(X_array, 1);
-    const unsigned max_nodes = PyArray_DIM(values_array, 1);
-    // Get pointers to the data as C-types
-    int *children_left = (int*)PyArray_DATA(children_left_array);
-    int *children_right = (int*)PyArray_DATA(children_right_array);
-    int *children_default = (int*)PyArray_DATA(children_default_array);
-    int *features = (int*)PyArray_DATA(features_array);
-    tfloat *thresholds = (tfloat*)PyArray_DATA(thresholds_array);
-    tfloat *values = (tfloat*)PyArray_DATA(values_array);
-    tfloat *node_sample_weight = (tfloat*)PyArray_DATA(node_sample_weight_array);
-    tfloat *X = (tfloat*)PyArray_DATA(X_array);
-    bool *X_missing = (bool*)PyArray_DATA(X_missing_array);
-    // these are just wrapper objects for all the pointers and numbers associated with
-    // the ensemble tree model and the dataset we are explaining
-    TreeEnsemble trees = TreeEnsemble(
-        children_left, children_right, children_default, features, thresholds, values,
-        node_sample_weight, 0, tree_limit, 0, max_nodes, 0
-    );
-    ExplanationDataset data = ExplanationDataset(X, X_missing, NULL, NULL, NULL, num_X, M, 0);
-    dense_tree_update_weights(trees, data);
-    // clean up the created python objects
-    Py_XDECREF(children_left_array);
-    Py_XDECREF(children_right_array);
-    Py_XDECREF(children_default_array);
-    Py_XDECREF(features_array);
-    Py_XDECREF(thresholds_array);
-    Py_XDECREF(values_array);
-    // PyArray_ResolveWritebackIfCopy(node_sample_weight_array);
-    Py_XDECREF(node_sample_weight_array);
-    Py_XDECREF(X_array);
-    Py_XDECREF(X_missing_array);
-    /* Build the output tuple */
-    PyObject *ret = Py_BuildValue("d", 1);
-    return ret;
-}
-static PyObject *_cext_dense_tree_saabas(PyObject *self, PyObject *args)
-{
-    PyObject *children_left_obj;
-    PyObject *children_right_obj;
-    PyObject *children_default_obj;
-    PyObject *features_obj;
-    PyObject *thresholds_obj;
-    PyObject *values_obj;
-    int max_depth;
-    int tree_limit;
-    PyObject *base_offset_obj;
-    int model_output;
-    PyObject *X_obj;
-    PyObject *X_missing_obj;
-    PyObject *y_obj;
-    PyObject *out_pred_obj;
-    /* Parse the input tuple */
-    if (!PyArg_ParseTuple(
-        args, "OOOOOOiiOiOOOO", &children_left_obj, &children_right_obj, &children_default_obj,
-        &features_obj, &thresholds_obj, &values_obj, &max_depth, &tree_limit, &base_offset_obj, &model_output,
-        &X_obj, &X_missing_obj, &y_obj, &out_pred_obj
-    )) return NULL;
-    /* Interpret the input objects as numpy arrays. */
-    PyArrayObject *children_left_array = (PyArrayObject*)PyArray_FROM_OTF(children_left_obj, NPY_INT, NPY_ARRAY_IN_ARRAY);
-    PyArrayObject *children_right_array = (PyArrayObject*)PyArray_FROM_OTF(children_right_obj, NPY_INT, NPY_ARRAY_IN_ARRAY);
-    PyArrayObject *children_default_array = (PyArrayObject*)PyArray_FROM_OTF(children_default_obj, NPY_INT, NPY_ARRAY_IN_ARRAY);
-    PyArrayObject *features_array = (PyArrayObject*)PyArray_FROM_OTF(features_obj, NPY_INT, NPY_ARRAY_IN_ARRAY);
-    PyArrayObject *thresholds_array = (PyArrayObject*)PyArray_FROM_OTF(thresholds_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
-    PyArrayObject *values_array = (PyArrayObject*)PyArray_FROM_OTF(values_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
-    PyArrayObject *base_offset_array = (PyArrayObject*)PyArray_FROM_OTF(base_offset_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
-    PyArrayObject *X_array = (PyArrayObject*)PyArray_FROM_OTF(X_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
-    PyArrayObject *X_missing_array = (PyArrayObject*)PyArray_FROM_OTF(X_missing_obj, NPY_BOOL, NPY_ARRAY_IN_ARRAY);
-    PyArrayObject *y_array = NULL;
-    if (y_obj != Py_None) y_array = (PyArrayObject*)PyArray_FROM_OTF(y_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
-    PyArrayObject *out_pred_array = (PyArrayObject*)PyArray_FROM_OTF(out_pred_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
-    /* If that didn't work, throw an exception. Note that R and y are optional. */
-    if (children_left_array == NULL || children_right_array == NULL ||
-        children_default_array == NULL || features_array == NULL || thresholds_array == NULL ||
-        values_array == NULL || X_array == NULL ||
-        X_missing_array == NULL || out_pred_array == NULL) {
-        Py_XDECREF(children_left_array);
-        Py_XDECREF(children_right_array);
-        Py_XDECREF(children_default_array);
-        Py_XDECREF(features_array);
-        Py_XDECREF(thresholds_array);
-        Py_XDECREF(values_array);
-        Py_XDECREF(base_offset_array);
-        Py_XDECREF(X_array);
-        Py_XDECREF(X_missing_array);
-        if (y_array != NULL) Py_XDECREF(y_array);
-        //PyArray_ResolveWritebackIfCopy(out_pred_array);
-        Py_XDECREF(out_pred_array);
-        return NULL;
-    }
-    const unsigned num_X = PyArray_DIM(X_array, 0);
-    const unsigned M = PyArray_DIM(X_array, 1);
-    const unsigned max_nodes = PyArray_DIM(values_array, 1);
-    const unsigned num_outputs = PyArray_DIM(values_array, 2);
-    // Get pointers to the data as C-types
-    int *children_left = (int*)PyArray_DATA(children_left_array);
-    int *children_right = (int*)PyArray_DATA(children_right_array);
-    int *children_default = (int*)PyArray_DATA(children_default_array);
-    int *features = (int*)PyArray_DATA(features_array);
-    tfloat *thresholds = (tfloat*)PyArray_DATA(thresholds_array);
-    tfloat *values = (tfloat*)PyArray_DATA(values_array);
-    tfloat *base_offset = (tfloat*)PyArray_DATA(base_offset_array);
-    tfloat *X = (tfloat*)PyArray_DATA(X_array);
-    bool *X_missing = (bool*)PyArray_DATA(X_missing_array);
-    tfloat *y = NULL;
-    if (y_array != NULL) y = (tfloat*)PyArray_DATA(y_array);
-    tfloat *out_pred = (tfloat*)PyArray_DATA(out_pred_array);
-    // these are just wrapper objects for all the pointers and numbers associated with
-    // the ensemble tree model and the dataset we are explaining
-    TreeEnsemble trees = TreeEnsemble(
-        children_left, children_right, children_default, features, thresholds, values,
-        NULL, max_depth, tree_limit, base_offset,
-        max_nodes, num_outputs
-    );
-    ExplanationDataset data = ExplanationDataset(X, X_missing, y, NULL, NULL, num_X, M, 0);
-    dense_tree_saabas(out_pred, trees, data);
-    // clean up the created python objects
-    Py_XDECREF(children_left_array);
-    Py_XDECREF(children_right_array);
-    Py_XDECREF(children_default_array);
-    Py_XDECREF(features_array);
-    Py_XDECREF(thresholds_array);
-    Py_XDECREF(values_array);
-    Py_XDECREF(base_offset_array);
-    Py_XDECREF(X_array);
-    Py_XDECREF(X_missing_array);
-    if (y_array != NULL) Py_XDECREF(y_array);
-    //PyArray_ResolveWritebackIfCopy(out_pred_array);
-    Py_XDECREF(out_pred_array);
-    /* Build the output tuple */
-    PyObject *ret = Py_BuildValue("d", (double)values[0]);
-    return ret;
-}

lib/shap/cext/_cext_gpu.cc DELETED Viewed

@@ -1,187 +0,0 @@
-#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
-#include <Python.h>
-#include <numpy/arrayobject.h>
-#include "tree_shap.h"
-#include <iostream>
-static PyObject *_cext_dense_tree_shap(PyObject *self, PyObject *args);
-static PyMethodDef module_methods[] = {
-    {"dense_tree_shap", _cext_dense_tree_shap, METH_VARARGS, "C implementation of Tree SHAP for dense."},
-    {NULL, NULL, 0, NULL}
-};
-#if PY_MAJOR_VERSION >= 3
-static struct PyModuleDef moduledef = {
-    PyModuleDef_HEAD_INIT,
-    "_cext_gpu",
-    "This module provides an interface for a fast Tree SHAP implementation.",
-    -1,
-    module_methods,
-    NULL,
-    NULL,
-    NULL,
-    NULL
-};
-#endif
-#if PY_MAJOR_VERSION >= 3
-PyMODINIT_FUNC PyInit__cext_gpu(void)
-#else
-PyMODINIT_FUNC init_cext(void)
-#endif
-{
-    #if PY_MAJOR_VERSION >= 3
-        PyObject *module = PyModule_Create(&moduledef);
-        if (!module) return NULL;
-    #else
-        PyObject *module = Py_InitModule("_cext", module_methods);
-        if (!module) return;
-    #endif
-    /* Load `numpy` functionality. */
-    import_array();
-    #if PY_MAJOR_VERSION >= 3
-        return module;
-    #endif
-}
-void dense_tree_shap_gpu(const TreeEnsemble& trees, const ExplanationDataset &data, tfloat *out_contribs,
-                     const int feature_dependence, unsigned model_transform, bool interactions);
-static PyObject *_cext_dense_tree_shap(PyObject *self, PyObject *args)
-{
-    PyObject *children_left_obj;
-    PyObject *children_right_obj;
-    PyObject *children_default_obj;
-    PyObject *features_obj;
-    PyObject *thresholds_obj;
-    PyObject *values_obj;
-    PyObject *node_sample_weights_obj;
-    int max_depth;
-    PyObject *X_obj;
-    PyObject *X_missing_obj;
-    PyObject *y_obj;
-    PyObject *R_obj;
-    PyObject *R_missing_obj;
-    int tree_limit;
-    PyObject *out_contribs_obj;
-    int feature_dependence;
-    int model_output;
-    PyObject *base_offset_obj;
-    bool interactions;
-    /* Parse the input tuple */
-    if (!PyArg_ParseTuple(
-        args, "OOOOOOOiOOOOOiOOiib", &children_left_obj, &children_right_obj, &children_default_obj,
-        &features_obj, &thresholds_obj, &values_obj, &node_sample_weights_obj,
-        &max_depth, &X_obj, &X_missing_obj, &y_obj, &R_obj, &R_missing_obj, &tree_limit, &base_offset_obj,
-        &out_contribs_obj, &feature_dependence, &model_output, &interactions
-    )) return NULL;
-    /* Interpret the input objects as numpy arrays. */
-    PyArrayObject *children_left_array = (PyArrayObject*)PyArray_FROM_OTF(children_left_obj, NPY_INT, NPY_ARRAY_IN_ARRAY);
-    PyArrayObject *children_right_array = (PyArrayObject*)PyArray_FROM_OTF(children_right_obj, NPY_INT, NPY_ARRAY_IN_ARRAY);
-    PyArrayObject *children_default_array = (PyArrayObject*)PyArray_FROM_OTF(children_default_obj, NPY_INT, NPY_ARRAY_IN_ARRAY);
-    PyArrayObject *features_array = (PyArrayObject*)PyArray_FROM_OTF(features_obj, NPY_INT, NPY_ARRAY_IN_ARRAY);
-    PyArrayObject *thresholds_array = (PyArrayObject*)PyArray_FROM_OTF(thresholds_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
-    PyArrayObject *values_array = (PyArrayObject*)PyArray_FROM_OTF(values_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
-    PyArrayObject *node_sample_weights_array = (PyArrayObject*)PyArray_FROM_OTF(node_sample_weights_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
-    PyArrayObject *X_array = (PyArrayObject*)PyArray_FROM_OTF(X_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
-    PyArrayObject *X_missing_array = (PyArrayObject*)PyArray_FROM_OTF(X_missing_obj, NPY_BOOL, NPY_ARRAY_IN_ARRAY);
-    PyArrayObject *y_array = NULL;
-    if (y_obj != Py_None) y_array = (PyArrayObject*)PyArray_FROM_OTF(y_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
-    PyArrayObject *R_array = NULL;
-    if (R_obj != Py_None) R_array = (PyArrayObject*)PyArray_FROM_OTF(R_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
-    PyArrayObject *R_missing_array = NULL;
-    if (R_missing_obj != Py_None) R_missing_array = (PyArrayObject*)PyArray_FROM_OTF(R_missing_obj, NPY_BOOL, NPY_ARRAY_IN_ARRAY);
-    PyArrayObject *out_contribs_array = (PyArrayObject*)PyArray_FROM_OTF(out_contribs_obj, NPY_DOUBLE, NPY_ARRAY_INOUT_ARRAY);
-    PyArrayObject *base_offset_array = (PyArrayObject*)PyArray_FROM_OTF(base_offset_obj, NPY_DOUBLE, NPY_ARRAY_INOUT_ARRAY);
-    /* If that didn't work, throw an exception. Note that R and y are optional. */
-    if (children_left_array == NULL || children_right_array == NULL ||
-        children_default_array == NULL || features_array == NULL || thresholds_array == NULL ||
-        values_array == NULL || node_sample_weights_array == NULL || X_array == NULL ||
-        X_missing_array == NULL || out_contribs_array == NULL) {
-        Py_XDECREF(children_left_array);
-        Py_XDECREF(children_right_array);
-        Py_XDECREF(children_default_array);
-        Py_XDECREF(features_array);
-        Py_XDECREF(thresholds_array);
-        Py_XDECREF(values_array);
-        Py_XDECREF(node_sample_weights_array);
-        Py_XDECREF(X_array);
-        Py_XDECREF(X_missing_array);
-        if (y_array != NULL) Py_XDECREF(y_array);
-        if (R_array != NULL) Py_XDECREF(R_array);
-        if (R_missing_array != NULL) Py_XDECREF(R_missing_array);
-        //PyArray_ResolveWritebackIfCopy(out_contribs_array);
-        Py_XDECREF(out_contribs_array);
-        Py_XDECREF(base_offset_array);
-        return NULL;
-    }
-    const unsigned num_X = PyArray_DIM(X_array, 0);
-    const unsigned M = PyArray_DIM(X_array, 1);
-    const unsigned max_nodes = PyArray_DIM(values_array, 1);
-    const unsigned num_outputs = PyArray_DIM(values_array, 2);
-    unsigned num_R = 0;
-    if (R_array != NULL) num_R = PyArray_DIM(R_array, 0);
-    // Get pointers to the data as C-types
-    int *children_left = (int*)PyArray_DATA(children_left_array);
-    int *children_right = (int*)PyArray_DATA(children_right_array);
-    int *children_default = (int*)PyArray_DATA(children_default_array);
-    int *features = (int*)PyArray_DATA(features_array);
-    tfloat *thresholds = (tfloat*)PyArray_DATA(thresholds_array);
-    tfloat *values = (tfloat*)PyArray_DATA(values_array);
-    tfloat *node_sample_weights = (tfloat*)PyArray_DATA(node_sample_weights_array);
-    tfloat *X = (tfloat*)PyArray_DATA(X_array);
-    bool *X_missing = (bool*)PyArray_DATA(X_missing_array);
-    tfloat *y = NULL;
-    if (y_array != NULL) y = (tfloat*)PyArray_DATA(y_array);
-    tfloat *R = NULL;
-    if (R_array != NULL) R = (tfloat*)PyArray_DATA(R_array);
-    bool *R_missing = NULL;
-    if (R_missing_array != NULL) R_missing = (bool*)PyArray_DATA(R_missing_array);
-    tfloat *out_contribs = (tfloat*)PyArray_DATA(out_contribs_array);
-    tfloat *base_offset = (tfloat*)PyArray_DATA(base_offset_array);
-    // these are just a wrapper objects for all the pointers and numbers associated with
-    // the ensemble tree model and the dataset we are explaining
-    TreeEnsemble trees = TreeEnsemble(
-        children_left, children_right, children_default, features, thresholds, values,
-        node_sample_weights, max_depth, tree_limit, base_offset,
-        max_nodes, num_outputs
-    );
-    ExplanationDataset data = ExplanationDataset(X, X_missing, y, R, R_missing, num_X, M, num_R);
-    dense_tree_shap_gpu(trees, data, out_contribs, feature_dependence, model_output, interactions);
-    // retrieve return value before python cleanup of objects
-    tfloat ret_value = (double)values[0];
-    // clean up the created python objects
-    Py_XDECREF(children_left_array);
-    Py_XDECREF(children_right_array);
-    Py_XDECREF(children_default_array);
-    Py_XDECREF(features_array);
-    Py_XDECREF(thresholds_array);
-    Py_XDECREF(values_array);
-    Py_XDECREF(node_sample_weights_array);
-    Py_XDECREF(X_array);
-    Py_XDECREF(X_missing_array);
-    if (y_array != NULL) Py_XDECREF(y_array);
-    if (R_array != NULL) Py_XDECREF(R_array);
-    if (R_missing_array != NULL) Py_XDECREF(R_missing_array);
-    //PyArray_ResolveWritebackIfCopy(out_contribs_array);
-    Py_XDECREF(out_contribs_array);
-    Py_XDECREF(base_offset_array);
-    /* Build the output tuple */
-    PyObject *ret = Py_BuildValue("d", ret_value);
-    return ret;
-}

lib/shap/cext/_cext_gpu.cu DELETED Viewed

@@ -1,353 +0,0 @@
-#include <Python.h>
-#include "gpu_treeshap.h"
-#include "tree_shap.h"
-const float inf = std::numeric_limits<tfloat>::infinity();
-struct ShapSplitCondition {
-  ShapSplitCondition() = default;
-  ShapSplitCondition(tfloat feature_lower_bound, tfloat feature_upper_bound,
-                     bool is_missing_branch)
-      : feature_lower_bound(feature_lower_bound),
-        feature_upper_bound(feature_upper_bound),
-        is_missing_branch(is_missing_branch) {
-    assert(feature_lower_bound <= feature_upper_bound);
-  }
-  /*! Feature values >= lower and < upper flow down this path. */
-  tfloat feature_lower_bound;
-  tfloat feature_upper_bound;
-  /*! Do missing values flow down this path? */
-  bool is_missing_branch;
-  // Does this instance flow down this path?
-  __host__ __device__ bool EvaluateSplit(float x) const {
-    // is nan
-    if (isnan(x)) {
-      return is_missing_branch;
-    }
-    return x > feature_lower_bound && x <= feature_upper_bound;
-  }
-  // Combine two split conditions on the same feature
-  __host__ __device__ void
-  Merge(const ShapSplitCondition &other) {  // Combine duplicate features
-    feature_lower_bound = max(feature_lower_bound, other.feature_lower_bound);
-    feature_upper_bound = min(feature_upper_bound, other.feature_upper_bound);
-    is_missing_branch = is_missing_branch && other.is_missing_branch;
-  }
-};
-// Inspired by: https://en.cppreference.com/w/cpp/iterator/size
-// Limited implementation of std::size fo arrays
-template <class T, size_t N>
-constexpr size_t array_size(const T (&array)[N]) noexcept
-{
-    return N;
-}
-void RecurseTree(
-    unsigned pos, const TreeEnsemble &tree,
-    std::vector<gpu_treeshap::PathElement<ShapSplitCondition>> *tmp_path,
-    std::vector<gpu_treeshap::PathElement<ShapSplitCondition>> *paths,
-    size_t *path_idx, int num_outputs) {
-  if (tree.is_leaf(pos)) {
-    for (auto j = 0ull; j < num_outputs; j++) {
-      auto v = tree.values[pos * num_outputs + j];
-      if (v == 0.0) {
-        // The tree has no output for this class, don't bother adding the path
-        continue;
-      }
-      // Go back over path, setting v, path_idx
-      for (auto &e : *tmp_path) {
-        e.v = v;
-        e.group = j;
-        e.path_idx = *path_idx;
-      }
-      paths->insert(paths->end(), tmp_path->begin(), tmp_path->end());
-      // Increment path index
-      (*path_idx)++;
-    }
-    return;
-  }
-  // Add left split to the path
-  unsigned left_child = tree.children_left[pos];
-  double left_zero_fraction =
-      tree.node_sample_weights[left_child] / tree.node_sample_weights[pos];
-  // Encode the range of feature values that flow down this path
-  tmp_path->emplace_back(0, tree.features[pos], 0,
-                         ShapSplitCondition{-inf, tree.thresholds[pos], false},
-                         left_zero_fraction, 0.0f);
-  RecurseTree(left_child, tree, tmp_path, paths, path_idx, num_outputs);
-  // Add left split to the path
-  tmp_path->back() = gpu_treeshap::PathElement<ShapSplitCondition>(
-      0, tree.features[pos], 0,
-      ShapSplitCondition{tree.thresholds[pos], inf, false},
-      1.0 - left_zero_fraction, 0.0f);
-  RecurseTree(tree.children_right[pos], tree, tmp_path, paths, path_idx,
-              num_outputs);
-  tmp_path->pop_back();
-}
-std::vector<gpu_treeshap::PathElement<ShapSplitCondition>>
-ExtractPaths(const TreeEnsemble &trees) {
-  std::vector<gpu_treeshap::PathElement<ShapSplitCondition>> paths;
-  size_t path_idx = 0;
-  for (auto i = 0; i < trees.tree_limit; i++) {
-    TreeEnsemble tree;
-    trees.get_tree(tree, i);
-    std::vector<gpu_treeshap::PathElement<ShapSplitCondition>> tmp_path;
-    tmp_path.reserve(tree.max_depth);
-    tmp_path.emplace_back(0, -1, 0, ShapSplitCondition{-inf, inf, false}, 1.0,
-                          0.0f);
-    RecurseTree(0, tree, &tmp_path, &paths, &path_idx, tree.num_outputs);
-  }
-  return paths;
-}
-class DeviceExplanationDataset {
-  thrust::device_vector<tfloat> data;
-  thrust::device_vector<bool> missing;
-  size_t num_features;
-  size_t num_rows;
- public:
-  DeviceExplanationDataset(const ExplanationDataset &host_data,
-                           bool background_dataset = false) {
-    num_features = host_data.M;
-    if (background_dataset) {
-      num_rows = host_data.num_R;
-      data = thrust::device_vector<tfloat>(
-          host_data.R, host_data.R + host_data.num_R * host_data.M);
-      missing = thrust::device_vector<bool>(host_data.R_missing,
-                                            host_data.R_missing +
-                                                host_data.num_R * host_data.M);
-    } else {
-      num_rows = host_data.num_X;
-      data = thrust::device_vector<tfloat>(
-          host_data.X, host_data.X + host_data.num_X * host_data.M);
-      missing = thrust::device_vector<bool>(host_data.X_missing,
-                                            host_data.X_missing +
-                                                host_data.num_X * host_data.M);
-    }
-  }
-  class DenseDatasetWrapper {
-    const tfloat *data;
-    const bool *missing;
-    int num_rows;
-    int num_cols;
-   public:
-    DenseDatasetWrapper() = default;
-    DenseDatasetWrapper(const tfloat *data, const bool *missing, int num_rows,
-                        int num_cols)
-        : data(data), missing(missing), num_rows(num_rows), num_cols(num_cols) {
-    }
-    __device__ tfloat GetElement(size_t row_idx, size_t col_idx) const {
-      auto idx = row_idx * num_cols + col_idx;
-      if (missing[idx]) {
-        return std::numeric_limits<tfloat>::quiet_NaN();
-      }
-      return data[idx];
-    }
-    __host__ __device__ size_t NumRows() const { return num_rows; }
-    __host__ __device__ size_t NumCols() const { return num_cols; }
-  };
-  DenseDatasetWrapper GetDeviceAccessor() {
-    return DenseDatasetWrapper(data.data().get(), missing.data().get(),
-                               num_rows, num_features);
-  }
-};
-inline void dense_tree_path_dependent_gpu(
-    const TreeEnsemble &trees, const ExplanationDataset &data,
-    tfloat *out_contribs, tfloat transform(const tfloat, const tfloat)) {
-  auto paths = ExtractPaths(trees);
-  DeviceExplanationDataset device_data(data);
-  DeviceExplanationDataset::DenseDatasetWrapper X =
-      device_data.GetDeviceAccessor();
-  thrust::device_vector<float> phis((X.NumCols() + 1) * X.NumRows() *
-                                    trees.num_outputs);
-  gpu_treeshap::GPUTreeShap(X, paths.begin(), paths.end(), trees.num_outputs,
-                            phis.begin(), phis.end());
-  // Add the base offset term to bias
-  thrust::device_vector<double> base_offset(
-      trees.base_offset, trees.base_offset + trees.num_outputs);
-  auto counting = thrust::make_counting_iterator(size_t(0));
-  auto d_phis = phis.data().get();
-  auto d_base_offset = base_offset.data().get();
-  size_t num_groups = trees.num_outputs;
-  thrust::for_each(counting, counting + X.NumRows() * trees.num_outputs,
-                   [=] __device__(size_t idx) {
-                     size_t row_idx = idx / num_groups;
-                     size_t group = idx % num_groups;
-                     auto phi_idx = gpu_treeshap::IndexPhi(
-                         row_idx, num_groups, group, X.NumCols(), X.NumCols());
-                     d_phis[phi_idx] += d_base_offset[group];
-                   });
-  // Shap uses a slightly different layout for multiclass
-  thrust::device_vector<float> transposed_phis(phis.size());
-  auto d_transposed_phis = transposed_phis.data();
-  thrust::for_each(
-      counting, counting + phis.size(), [=] __device__(size_t idx) {
-        size_t old_shape[] = {X.NumRows(), num_groups, (X.NumCols() + 1)};
-        size_t old_idx[array_size(old_shape)];
-        gpu_treeshap::FlatIdxToTensorIdx(idx, old_shape, old_idx);
-        // Define new tensor format, switch num_groups axis to end
-        size_t new_shape[] = {X.NumRows(), (X.NumCols() + 1), num_groups};
-        size_t new_idx[] = {old_idx[0], old_idx[2], old_idx[1]};
-        size_t transposed_idx =
-            gpu_treeshap::TensorIdxToFlatIdx(new_shape, new_idx);
-        d_transposed_phis[transposed_idx] = d_phis[idx];
-      });
-  thrust::copy(transposed_phis.begin(), transposed_phis.end(), out_contribs);
-}
-inline void
-dense_tree_independent_gpu(const TreeEnsemble &trees,
-                           const ExplanationDataset &data, tfloat *out_contribs,
-                           tfloat transform(const tfloat, const tfloat)) {
-  auto paths = ExtractPaths(trees);
-  DeviceExplanationDataset device_data(data);
-  DeviceExplanationDataset::DenseDatasetWrapper X =
-      device_data.GetDeviceAccessor();
-  DeviceExplanationDataset background_device_data(data, true);
-  DeviceExplanationDataset::DenseDatasetWrapper R =
-      background_device_data.GetDeviceAccessor();
-  thrust::device_vector<float> phis((X.NumCols() + 1) * X.NumRows() *
-                                    trees.num_outputs);
-  gpu_treeshap::GPUTreeShapInterventional(X, R, paths.begin(), paths.end(),
-                                          trees.num_outputs, phis.begin(),
-                                          phis.end());
-  // Add the base offset term to bias
-  thrust::device_vector<double> base_offset(
-      trees.base_offset, trees.base_offset + trees.num_outputs);
-  auto counting = thrust::make_counting_iterator(size_t(0));
-  auto d_phis = phis.data().get();
-  auto d_base_offset = base_offset.data().get();
-  size_t num_groups = trees.num_outputs;
-  thrust::for_each(counting, counting + X.NumRows() * trees.num_outputs,
-                   [=] __device__(size_t idx) {
-                     size_t row_idx = idx / num_groups;
-                     size_t group = idx % num_groups;
-                     auto phi_idx = gpu_treeshap::IndexPhi(
-                         row_idx, num_groups, group, X.NumCols(), X.NumCols());
-                     d_phis[phi_idx] += d_base_offset[group];
-                   });
-  // Shap uses a slightly different layout for multiclass
-  thrust::device_vector<float> transposed_phis(phis.size());
-  auto d_transposed_phis = transposed_phis.data();
-  thrust::for_each(
-      counting, counting + phis.size(), [=] __device__(size_t idx) {
-        size_t old_shape[] = {X.NumRows(), num_groups, (X.NumCols() + 1)};
-        size_t old_idx[array_size(old_shape)];
-        gpu_treeshap::FlatIdxToTensorIdx(idx, old_shape, old_idx);
-        // Define new tensor format, switch num_groups axis to end
-        size_t new_shape[] = {X.NumRows(), (X.NumCols() + 1), num_groups};
-        size_t new_idx[] = {old_idx[0], old_idx[2], old_idx[1]};
-        size_t transposed_idx =
-            gpu_treeshap::TensorIdxToFlatIdx(new_shape, new_idx);
-        d_transposed_phis[transposed_idx] = d_phis[idx];
-      });
-  thrust::copy(transposed_phis.begin(), transposed_phis.end(), out_contribs);
-}
-inline void dense_tree_path_dependent_interactions_gpu(
-    const TreeEnsemble &trees, const ExplanationDataset &data,
-    tfloat *out_contribs, tfloat transform(const tfloat, const tfloat)) {
-  auto paths = ExtractPaths(trees);
-  DeviceExplanationDataset device_data(data);
-  DeviceExplanationDataset::DenseDatasetWrapper X =
-      device_data.GetDeviceAccessor();
-  thrust::device_vector<float> phis((X.NumCols() + 1) * (X.NumCols() + 1) *
-                                    X.NumRows() * trees.num_outputs);
-  gpu_treeshap::GPUTreeShapInteractions(X, paths.begin(), paths.end(),
-                                        trees.num_outputs, phis.begin(),
-                                        phis.end());
-  // Add the base offset term to bias
-  thrust::device_vector<double> base_offset(
-      trees.base_offset, trees.base_offset + trees.num_outputs);
-  auto counting = thrust::make_counting_iterator(size_t(0));
-  auto d_phis = phis.data().get();
-  auto d_base_offset = base_offset.data().get();
-  size_t num_groups = trees.num_outputs;
-  thrust::for_each(counting, counting + X.NumRows() * num_groups,
-                   [=] __device__(size_t idx) {
-                     size_t row_idx = idx / num_groups;
-                     size_t group = idx % num_groups;
-                     auto phi_idx = gpu_treeshap::IndexPhiInteractions(
-                         row_idx, num_groups, group, X.NumCols(), X.NumCols(),
-                         X.NumCols());
-                     d_phis[phi_idx] += d_base_offset[group];
-                   });
-  // Shap uses a slightly different layout for multiclass
-  thrust::device_vector<float> transposed_phis(phis.size());
-  auto d_transposed_phis = transposed_phis.data();
-  thrust::for_each(
-      counting, counting + phis.size(), [=] __device__(size_t idx) {
-        size_t old_shape[] = {X.NumRows(), num_groups, (X.NumCols() + 1),
-                              (X.NumCols() + 1)};
-        size_t old_idx[array_size(old_shape)];
-        gpu_treeshap::FlatIdxToTensorIdx(idx, old_shape, old_idx);
-        // Define new tensor format, switch num_groups axis to end
-        size_t new_shape[] = {X.NumRows(), (X.NumCols() + 1), (X.NumCols() + 1),
-                              num_groups};
-        size_t new_idx[] = {old_idx[0], old_idx[2], old_idx[3], old_idx[1]};
-        size_t transposed_idx =
-            gpu_treeshap::TensorIdxToFlatIdx(new_shape, new_idx);
-        d_transposed_phis[transposed_idx] = d_phis[idx];
-      });
-  thrust::copy(transposed_phis.begin(), transposed_phis.end(), out_contribs);
-}
-void dense_tree_shap_gpu(const TreeEnsemble &trees,
-                         const ExplanationDataset &data, tfloat *out_contribs,
-                         const int feature_dependence, unsigned model_transform,
-                         bool interactions) {
-  // see what transform (if any) we have
-  transform_f transform = get_transform(model_transform);
-  // dispatch to the correct algorithm handler
-  switch (feature_dependence) {
-  case FEATURE_DEPENDENCE::independent:
-    if (interactions) {
-      std::cerr << "FEATURE_DEPENDENCE::independent with interactions not yet "
-                   "supported\n";
-    } else {
-      dense_tree_independent_gpu(trees, data, out_contribs, transform);
-    }
-    return;
-  case FEATURE_DEPENDENCE::tree_path_dependent:
-    if (interactions) {
-      dense_tree_path_dependent_interactions_gpu(trees, data, out_contribs,
-                                                 transform);
-    } else {
-      dense_tree_path_dependent_gpu(trees, data, out_contribs, transform);
-    }
-    return;
-  case FEATURE_DEPENDENCE::global_path_dependent:
-    std::cerr << "FEATURE_DEPENDENCE::global_path_dependent not supported\n";
-    return;
-  default:
-    std::cerr << "Unknown feature dependence option\n";
-    return;
-  }
-}

lib/shap/cext/gpu_treeshap.h DELETED Viewed

@@ -1,1535 +0,0 @@
-/*
- * Copyright (c) 2020, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#pragma once
-#include <thrust/device_allocator.h>
-#include <thrust/device_vector.h>
-#include <thrust/iterator/discard_iterator.h>
-#include <thrust/logical.h>
-#include <thrust/reduce.h>
-#include <thrust/host_vector.h>
-#if (CUDART_VERSION >= 11000)
-#include <cub/cub.cuh>
-#else
-// Hack to get cub device reduce on older toolkits
-#include <thrust/system/cuda/detail/cub/device/device_reduce.cuh>
-using namespace thrust::cuda_cub;
-#endif
-#include <algorithm>
-#include <functional>
-#include <set>
-#include <stdexcept>
-#include <utility>
-#include <vector>
-namespace gpu_treeshap {
-struct XgboostSplitCondition {
-  XgboostSplitCondition() = default;
-  XgboostSplitCondition(float feature_lower_bound, float feature_upper_bound,
-                        bool is_missing_branch)
-      : feature_lower_bound(feature_lower_bound),
-        feature_upper_bound(feature_upper_bound),
-        is_missing_branch(is_missing_branch) {
-    assert(feature_lower_bound <= feature_upper_bound);
-  }
-  /*! Feature values >= lower and < upper flow down this path. */
-  float feature_lower_bound;
-  float feature_upper_bound;
-  /*! Do missing values flow down this path? */
-  bool is_missing_branch;
-  // Does this instance flow down this path?
-  __host__ __device__ bool EvaluateSplit(float x) const {
-    // is nan
-    if (isnan(x)) {
-      return is_missing_branch;
-    }
-    return x >= feature_lower_bound && x < feature_upper_bound;
-  }
-  // Combine two split conditions on the same feature
-  __host__ __device__ void Merge(
-      const XgboostSplitCondition& other) {  // Combine duplicate features
-    feature_lower_bound = max(feature_lower_bound, other.feature_lower_bound);
-    feature_upper_bound = min(feature_upper_bound, other.feature_upper_bound);
-    is_missing_branch = is_missing_branch && other.is_missing_branch;
-  }
-};
-/*!
- * An element of a unique path through a decision tree. Can implement various
- * types of splits via the templated SplitConditionT. Some decision tree
- * implementations may wish to use double precision or single precision, some
- * may use < or <= as the threshold, missing values can be handled differently,
- * categoricals may be supported.
- *
- * \tparam  SplitConditionT A split condition implementing the methods
- * EvaluateSplit and Merge.
- */
-template <typename SplitConditionT>
-struct PathElement {
-  using split_type = SplitConditionT;
-  __host__ __device__ PathElement(size_t path_idx, int64_t feature_idx,
-                                  int group, SplitConditionT split_condition,
-                                  double zero_fraction, float v)
-      : path_idx(path_idx),
-        feature_idx(feature_idx),
-        group(group),
-        split_condition(split_condition),
-        zero_fraction(zero_fraction),
-        v(v) {}
-  PathElement() = default;
-  __host__ __device__ bool IsRoot() const { return feature_idx == -1; }
-  template <typename DatasetT>
-  __host__ __device__ bool EvaluateSplit(DatasetT X, size_t row_idx) const {
-    if (this->IsRoot()) {
-      return 1.0;
-    }
-    return split_condition.EvaluateSplit(X.GetElement(row_idx, feature_idx));
-  }
-  /*! Unique path index. */
-  size_t path_idx;
-  /*! Feature of this split, -1 indicates bias term. */
-  int64_t feature_idx;
-  /*! Indicates class for multiclass problems. */
-  int group;
-  SplitConditionT split_condition;
-  /*! Probability of following this path when feature_idx is not in the active
-   * set. */
-  double zero_fraction;
-  float v;  // Leaf weight at the end of the path
-};
-// Helper function that accepts an index into a flat contiguous array and the
-// dimensions of a tensor and returns the indices with respect to the tensor
-template <typename T, size_t N>
-__device__ void FlatIdxToTensorIdx(T flat_idx, const T (&shape)[N],
-                                   T (&out_idx)[N]) {
-  T current_size = shape[0];
-  for (auto i = 1ull; i < N; i++) {
-    current_size *= shape[i];
-  }
-  for (auto i = 0ull; i < N; i++) {
-    current_size /= shape[i];
-    out_idx[i] = flat_idx / current_size;
-    flat_idx -= current_size * out_idx[i];
-  }
-}
-// Given a shape and coordinates into a tensor, return the index into the
-// backing storage one-dimensional array
-template <typename T, size_t N>
-__device__ T TensorIdxToFlatIdx(const T (&shape)[N], const T (&tensor_idx)[N]) {
-  T current_size = shape[0];
-  for (auto i = 1ull; i < N; i++) {
-    current_size *= shape[i];
-  }
-  T idx = 0;
-  for (auto i = 0ull; i < N; i++) {
-    current_size /= shape[i];
-    idx += tensor_idx[i] * current_size;
-  }
-  return idx;
-}
-// Maps values to the phi array according to row, group and column
-__host__ __device__ inline size_t IndexPhi(size_t row_idx, size_t num_groups,
-                                           size_t group, size_t num_columns,
-                                           size_t column_idx) {
-  return (row_idx * num_groups + group) * (num_columns + 1) + column_idx;
-}
-__host__ __device__ inline size_t IndexPhiInteractions(size_t row_idx,
-                                                       size_t num_groups,
-                                                       size_t group,
-                                                       size_t num_columns,
-                                                       size_t i, size_t j) {
-  size_t matrix_size = (num_columns + 1) * (num_columns + 1);
-  size_t matrix_offset = (row_idx * num_groups + group) * matrix_size;
-  return matrix_offset + i * (num_columns + 1) + j;
-}
-namespace detail {
-// Shorthand for creating a device vector with an appropriate allocator type
-template <class T, class DeviceAllocatorT>
-using RebindVector =
-    thrust::device_vector<T,
-                          typename DeviceAllocatorT::template rebind<T>::other>;
-#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600 || defined(__clang__)
-__device__ __forceinline__ double atomicAddDouble(double* address, double val) {
-  return atomicAdd(address, val);
-}
-#else  // In device code and CUDA < 600
-__device__ __forceinline__ double atomicAddDouble(double* address,
-                                                  double val) {  // NOLINT
-  unsigned long long int* address_as_ull =                       // NOLINT
-      (unsigned long long int*)address;                          // NOLINT
-  unsigned long long int old = *address_as_ull, assumed;         // NOLINT
-  do {
-    assumed = old;
-    old = atomicCAS(address_as_ull, assumed,
-                    __double_as_longlong(val + __longlong_as_double(assumed)));
-    // Note: uses integer comparison to avoid hang in case of NaN (since NaN !=
-    // NaN)
-  } while (assumed != old);
-  return __longlong_as_double(old);
-}
-#endif
-__forceinline__ __device__ unsigned int lanemask32_lt() {
-  unsigned int lanemask32_lt;
-  asm volatile("mov.u32 %0, %%lanemask_lt;" : "=r"(lanemask32_lt));
-  return (lanemask32_lt);
-}
-// Like a coalesced group, except we can make the assumption that all threads in
-// a group are next to each other. This makes shuffle operations much cheaper.
-class ContiguousGroup {
- public:
-  __device__ ContiguousGroup(uint32_t mask) : mask_(mask) {}
-  __device__ uint32_t size() const { return __popc(mask_); }
-  __device__ uint32_t thread_rank() const {
-    return __popc(mask_ & lanemask32_lt());
-  }
-  template <typename T>
-  __device__ T shfl(T val, uint32_t src) const {
-    return __shfl_sync(mask_, val, src + __ffs(mask_) - 1);
-  }
-  template <typename T>
-  __device__ T shfl_up(T val, uint32_t delta) const {
-    return __shfl_up_sync(mask_, val, delta);
-  }
-  __device__ uint32_t ballot(int predicate) const {
-    return __ballot_sync(mask_, predicate) >> (__ffs(mask_) - 1);
-  }
-  template <typename T, typename OpT>
-  __device__ T reduce(T val, OpT op) {
-    for (int i = 1; i < this->size(); i *= 2) {
-      T shfl = shfl_up(val, i);
-      if (static_cast<int>(thread_rank()) - i >= 0) {
-        val = op(val, shfl);
-      }
-    }
-    return shfl(val, size() - 1);
-  }
-  uint32_t mask_;
-};
-// Separate the active threads by labels
-// This functionality is available in cuda 11.0 on cc >=7.0
-// We reimplement for backwards compatibility
-// Assumes partitions are contiguous
-inline __device__ ContiguousGroup active_labeled_partition(uint32_t mask,
-                                                           int label) {
-#if __CUDA_ARCH__ >= 700
-  uint32_t subgroup_mask = __match_any_sync(mask, label);
-#else
-  uint32_t subgroup_mask = 0;
-  for (int i = 0; i < 32;) {
-    int current_label = __shfl_sync(mask, label, i);
-    uint32_t ballot = __ballot_sync(mask, label == current_label);
-    if (label == current_label) {
-      subgroup_mask = ballot;
-    }
-    uint32_t completed_mask =
-        (1 << (32 - __clz(ballot))) - 1;  // Threads that have finished
-    // Find the start of the next group, mask off completed threads from active
-    // threads Then use ffs - 1 to find the position of the next group
-    int next_i = __ffs(mask & ~completed_mask) - 1;
-    if (next_i == -1) break;  // -1 indicates all finished
-    assert(next_i > i);  // Prevent infinite loops when the constraints not met
-    i = next_i;
-  }
-#endif
-  return ContiguousGroup(subgroup_mask);
-}
-// Group of threads where each thread holds a path element
-class GroupPath {
- protected:
-  const ContiguousGroup& g_;
-  // These are combined so we can communicate them in a single 64 bit shuffle
-  // instruction
-  float zero_one_fraction_[2];
-  float pweight_;
-  int unique_depth_;
- public:
-  __device__ GroupPath(const ContiguousGroup& g, float zero_fraction,
-                       float one_fraction)
-      : g_(g),
-        zero_one_fraction_{zero_fraction, one_fraction},
-        pweight_(g.thread_rank() == 0 ? 1.0f : 0.0f),
-        unique_depth_(0) {}
-  // Cooperatively extend the path with a group of threads
-  // Each thread maintains pweight for its path element in register
-  __device__ void Extend() {
-    unique_depth_++;
-    // Broadcast the zero and one fraction from the newly added path element
-    // Combine 2 shuffle operations into 64 bit word
-    const size_t rank = g_.thread_rank();
-    const float inv_unique_depth =
-        __fdividef(1.0f, static_cast<float>(unique_depth_ + 1));
-    uint64_t res = g_.shfl(*reinterpret_cast<uint64_t*>(&zero_one_fraction_),
-                           unique_depth_);
-    const float new_zero_fraction = reinterpret_cast<float*>(&res)[0];
-    const float new_one_fraction = reinterpret_cast<float*>(&res)[1];
-    float left_pweight = g_.shfl_up(pweight_, 1);
-    // pweight of threads with rank < unique_depth_ is 0
-    // We use max(x,0) to avoid using a branch
-    // pweight_ *=
-    // new_zero_fraction * max(unique_depth_ - rank, 0llu) * inv_unique_depth;
-    pweight_ = __fmul_rn(
-        __fmul_rn(pweight_, new_zero_fraction),
-        __fmul_rn(max(unique_depth_ - rank, size_t(0)), inv_unique_depth));
-    // pweight_  += new_one_fraction * left_pweight * rank * inv_unique_depth;
-    pweight_ = __fmaf_rn(__fmul_rn(new_one_fraction, left_pweight),
-                         __fmul_rn(rank, inv_unique_depth), pweight_);
-  }
-  // Each thread unwinds the path for its feature and returns the sum
-  __device__ float UnwoundPathSum() {
-    float next_one_portion = g_.shfl(pweight_, unique_depth_);
-    float total = 0.0f;
-    const float zero_frac_div_unique_depth = __fdividef(
-        zero_one_fraction_[0], static_cast<float>(unique_depth_ + 1));
-    for (int i = unique_depth_ - 1; i >= 0; i--) {
-      float ith_pweight = g_.shfl(pweight_, i);
-      float precomputed =
-          __fmul_rn((unique_depth_ - i), zero_frac_div_unique_depth);
-      const float tmp =
-          __fdividef(__fmul_rn(next_one_portion, unique_depth_ + 1), i + 1);
-      total = __fmaf_rn(tmp, zero_one_fraction_[1], total);
-      next_one_portion = __fmaf_rn(-tmp, precomputed, ith_pweight);
-      float numerator =
-          __fmul_rn(__fsub_rn(1.0f, zero_one_fraction_[1]), ith_pweight);
-      if (precomputed > 0.0f) {
-        total += __fdividef(numerator, precomputed);
-      }
-    }
-    return total;
-  }
-};
-// Has different permutation weightings to the above
-// Used in Taylor Shapley interaction index
-class TaylorGroupPath : GroupPath {
- public:
-  __device__ TaylorGroupPath(const ContiguousGroup& g, float zero_fraction,
-                             float one_fraction)
-      : GroupPath(g, zero_fraction, one_fraction) {}
-  // Extend the path is normal, all reweighting can happen in UnwoundPathSum
-  __device__ void Extend() { GroupPath::Extend(); }
-  // Each thread unwinds the path for its feature and returns the sum
-  // We use a different permutation weighting for Taylor interactions
-  // As if the total number of features was one larger
-  __device__ float UnwoundPathSum() {
-    float one_fraction = zero_one_fraction_[1];
-    float zero_fraction = zero_one_fraction_[0];
-    float next_one_portion = g_.shfl(pweight_, unique_depth_) /
-                             static_cast<float>(unique_depth_ + 2);
-    float total = 0.0f;
-    for (int i = unique_depth_ - 1; i >= 0; i--) {
-      float ith_pweight =
-          g_.shfl(pweight_, i) * (static_cast<float>(unique_depth_ - i + 1) /
-                                  static_cast<float>(unique_depth_ + 2));
-      if (one_fraction > 0.0f) {
-        const float tmp =
-            next_one_portion * (unique_depth_ + 2) / ((i + 1) * one_fraction);
-        total += tmp;
-        next_one_portion =
-            ith_pweight - tmp * zero_fraction *
-                              ((unique_depth_ - i + 1) /
-                               static_cast<float>(unique_depth_ + 2));
-      } else if (zero_fraction > 0.0f) {
-        total +=
-            (ith_pweight / zero_fraction) /
-            ((unique_depth_ - i + 1) / static_cast<float>(unique_depth_ + 2));
-      }
-    }
-    return 2 * total;
-  }
-};
-template <typename DatasetT, typename SplitConditionT>
-__device__ float ComputePhi(const PathElement<SplitConditionT>& e,
-                            size_t row_idx, const DatasetT& X,
-                            const ContiguousGroup& group, float zero_fraction) {
-  float one_fraction =
-      e.EvaluateSplit(X, row_idx);
-  GroupPath path(group, zero_fraction, one_fraction);
-  size_t unique_path_length = group.size();
-  // Extend the path
-  for (auto unique_depth = 1ull; unique_depth < unique_path_length;
-       unique_depth++) {
-    path.Extend();
-  }
-  float sum = path.UnwoundPathSum();
-  return sum * (one_fraction - zero_fraction) * e.v;
-}
-inline __host__ __device__ size_t DivRoundUp(size_t a, size_t b) {
-  return (a + b - 1) / b;
-}
-template <typename DatasetT, size_t kBlockSize, size_t kRowsPerWarp,
-          typename SplitConditionT>
-void __device__
-ConfigureThread(const DatasetT& X, const size_t bins_per_row,
-                const PathElement<SplitConditionT>* path_elements,
-                const size_t* bin_segments, size_t* start_row, size_t* end_row,
-                PathElement<SplitConditionT>* e, bool* thread_active) {
-  // Partition work
-  // Each warp processes a set of training instances applied to a path
-  size_t tid = kBlockSize * blockIdx.x + threadIdx.x;
-  const size_t warp_size = 32;
-  size_t warp_rank = tid / warp_size;
-  if (warp_rank >= bins_per_row * DivRoundUp(X.NumRows(), kRowsPerWarp)) {
-    *thread_active = false;
-    return;
-  }
-  size_t bin_idx = warp_rank % bins_per_row;
-  size_t bank = warp_rank / bins_per_row;
-  size_t path_start = bin_segments[bin_idx];
-  size_t path_end = bin_segments[bin_idx + 1];
-  uint32_t thread_rank = threadIdx.x % warp_size;
-  if (thread_rank >= path_end - path_start) {
-    *thread_active = false;
-  } else {
-    *e = path_elements[path_start + thread_rank];
-    *start_row = bank * kRowsPerWarp;
-    *end_row = min((bank + 1) * kRowsPerWarp, X.NumRows());
-    *thread_active = true;
-  }
-}
-#define GPUTREESHAP_MAX_THREADS_PER_BLOCK 256
-#define FULL_MASK 0xffffffff
-template <typename DatasetT, size_t kBlockSize, size_t kRowsPerWarp,
-          typename SplitConditionT>
-__global__ void __launch_bounds__(GPUTREESHAP_MAX_THREADS_PER_BLOCK)
-    ShapKernel(DatasetT X, size_t bins_per_row,
-               const PathElement<SplitConditionT>* path_elements,
-               const size_t* bin_segments, size_t num_groups, double* phis) {
-  // Use shared memory for structs, otherwise nvcc puts in local memory
-  __shared__ DatasetT s_X;
-  s_X = X;
-  __shared__ PathElement<SplitConditionT> s_elements[kBlockSize];
-  PathElement<SplitConditionT>& e = s_elements[threadIdx.x];
-  size_t start_row, end_row;
-  bool thread_active;
-  ConfigureThread<DatasetT, kBlockSize, kRowsPerWarp>(
-      s_X, bins_per_row, path_elements, bin_segments, &start_row, &end_row, &e,
-      &thread_active);
-  uint32_t mask = __ballot_sync(FULL_MASK, thread_active);
-  if (!thread_active) return;
-  float zero_fraction = e.zero_fraction;
-  auto labelled_group = active_labeled_partition(mask, e.path_idx);
-  for (int64_t row_idx = start_row; row_idx < end_row; row_idx++) {
-    float phi = ComputePhi(e, row_idx, X, labelled_group, zero_fraction);
-    if (!e.IsRoot()) {
-      atomicAddDouble(&phis[IndexPhi(row_idx, num_groups, e.group, X.NumCols(),
-                                     e.feature_idx)],
-                      phi);
-    }
-  }
-}
-template <typename DatasetT, typename SizeTAllocatorT, typename PathAllocatorT,
-          typename SplitConditionT>
-void ComputeShap(
-    DatasetT X,
-    const thrust::device_vector<size_t, SizeTAllocatorT>& bin_segments,
-    const thrust::device_vector<PathElement<SplitConditionT>, PathAllocatorT>&
-        path_elements,
-    size_t num_groups, double* phis) {
-  size_t bins_per_row = bin_segments.size() - 1;
-  const int kBlockThreads = GPUTREESHAP_MAX_THREADS_PER_BLOCK;
-  const int warps_per_block = kBlockThreads / 32;
-  const int kRowsPerWarp = 1024;
-  size_t warps_needed = bins_per_row * DivRoundUp(X.NumRows(), kRowsPerWarp);
-  const uint32_t grid_size = DivRoundUp(warps_needed, warps_per_block);
-  ShapKernel<DatasetT, kBlockThreads, kRowsPerWarp>
-      <<<grid_size, kBlockThreads>>>(
-          X, bins_per_row, path_elements.data().get(),
-          bin_segments.data().get(), num_groups, phis);
-}
-template <typename PathT, typename DatasetT, typename SplitConditionT>
-__device__ float ComputePhiCondition(const PathElement<SplitConditionT>& e,
-                                     size_t row_idx, const DatasetT& X,
-                                     const ContiguousGroup& group,
-                                     int64_t condition_feature) {
-  float one_fraction = e.EvaluateSplit(X, row_idx);
-  PathT path(group, e.zero_fraction, one_fraction);
-  size_t unique_path_length = group.size();
-  float condition_on_fraction = 1.0f;
-  float condition_off_fraction = 1.0f;
-  // Extend the path
-  for (auto i = 1ull; i < unique_path_length; i++) {
-    bool is_condition_feature =
-        group.shfl(e.feature_idx, i) == condition_feature;
-    float o_i = group.shfl(one_fraction, i);
-    float z_i = group.shfl(e.zero_fraction, i);
-    if (is_condition_feature) {
-      condition_on_fraction = o_i;
-      condition_off_fraction = z_i;
-    } else {
-      path.Extend();
-    }
-  }
-  float sum = path.UnwoundPathSum();
-  if (e.feature_idx == condition_feature) {
-    return 0.0f;
-  }
-  float phi = sum * (one_fraction - e.zero_fraction) * e.v;
-  return phi * (condition_on_fraction - condition_off_fraction) * 0.5f;
-}
-// If there is a feature in the path we are conditioning on, swap it to the end
-// of the path
-template <typename SplitConditionT>
-inline __device__ void SwapConditionedElement(
-    PathElement<SplitConditionT>** e, PathElement<SplitConditionT>* s_elements,
-    uint32_t condition_rank, const ContiguousGroup& group) {
-  auto last_rank = group.size() - 1;
-  auto this_rank = group.thread_rank();
-  if (this_rank == last_rank) {
-    *e = &s_elements[(threadIdx.x - this_rank) + condition_rank];
-  } else if (this_rank == condition_rank) {
-    *e = &s_elements[(threadIdx.x - this_rank) + last_rank];
-  }
-}
-template <typename DatasetT, size_t kBlockSize, size_t kRowsPerWarp,
-          typename SplitConditionT>
-__global__ void __launch_bounds__(GPUTREESHAP_MAX_THREADS_PER_BLOCK)
-    ShapInteractionsKernel(DatasetT X, size_t bins_per_row,
-                           const PathElement<SplitConditionT>* path_elements,
-                           const size_t* bin_segments, size_t num_groups,
-                           double* phis_interactions) {
-  // Use shared memory for structs, otherwise nvcc puts in local memory
-  __shared__ DatasetT s_X;
-  s_X = X;
-  __shared__ PathElement<SplitConditionT> s_elements[kBlockSize];
-  PathElement<SplitConditionT>* e = &s_elements[threadIdx.x];
-  size_t start_row, end_row;
-  bool thread_active;
-  ConfigureThread<DatasetT, kBlockSize, kRowsPerWarp>(
-      s_X, bins_per_row, path_elements, bin_segments, &start_row, &end_row, e,
-      &thread_active);
-  uint32_t mask = __ballot_sync(FULL_MASK, thread_active);
-  if (!thread_active) return;
-  auto labelled_group = active_labeled_partition(mask, e->path_idx);
-  for (int64_t row_idx = start_row; row_idx < end_row; row_idx++) {
-    float phi = ComputePhi(*e, row_idx, X, labelled_group, e->zero_fraction);
-    if (!e->IsRoot()) {
-      auto phi_offset =
-          IndexPhiInteractions(row_idx, num_groups, e->group, X.NumCols(),
-                               e->feature_idx, e->feature_idx);
-      atomicAddDouble(phis_interactions + phi_offset, phi);
-    }
-    for (auto condition_rank = 1ull; condition_rank < labelled_group.size();
-         condition_rank++) {
-      e = &s_elements[threadIdx.x];
-      int64_t condition_feature =
-          labelled_group.shfl(e->feature_idx, condition_rank);
-      SwapConditionedElement(&e, s_elements, condition_rank, labelled_group);
-      float x = ComputePhiCondition<GroupPath>(*e, row_idx, X, labelled_group,
-                                               condition_feature);
-      if (!e->IsRoot()) {
-        auto phi_offset =
-            IndexPhiInteractions(row_idx, num_groups, e->group, X.NumCols(),
-                                 e->feature_idx, condition_feature);
-        atomicAddDouble(phis_interactions + phi_offset, x);
-        // Subtract effect from diagonal
-        auto phi_diag =
-            IndexPhiInteractions(row_idx, num_groups, e->group, X.NumCols(),
-                                 e->feature_idx, e->feature_idx);
-        atomicAddDouble(phis_interactions + phi_diag, -x);
-      }
-    }
-  }
-}
-template <typename DatasetT, typename SizeTAllocatorT, typename PathAllocatorT,
-          typename SplitConditionT>
-void ComputeShapInteractions(
-    DatasetT X,
-    const thrust::device_vector<size_t, SizeTAllocatorT>& bin_segments,
-    const thrust::device_vector<PathElement<SplitConditionT>, PathAllocatorT>&
-        path_elements,
-    size_t num_groups, double* phis) {
-  size_t bins_per_row = bin_segments.size() - 1;
-  const int kBlockThreads = GPUTREESHAP_MAX_THREADS_PER_BLOCK;
-  const int warps_per_block = kBlockThreads / 32;
-  const int kRowsPerWarp = 100;
-  size_t warps_needed = bins_per_row * DivRoundUp(X.NumRows(), kRowsPerWarp);
-  const uint32_t grid_size = DivRoundUp(warps_needed, warps_per_block);
-  ShapInteractionsKernel<DatasetT, kBlockThreads, kRowsPerWarp>
-      <<<grid_size, kBlockThreads>>>(
-          X, bins_per_row, path_elements.data().get(),
-          bin_segments.data().get(), num_groups, phis);
-}
-template <typename DatasetT, size_t kBlockSize, size_t kRowsPerWarp,
-          typename SplitConditionT>
-__global__ void __launch_bounds__(GPUTREESHAP_MAX_THREADS_PER_BLOCK)
-    ShapTaylorInteractionsKernel(
-        DatasetT X, size_t bins_per_row,
-        const PathElement<SplitConditionT>* path_elements,
-        const size_t* bin_segments, size_t num_groups,
-        double* phis_interactions) {
-  // Use shared memory for structs, otherwise nvcc puts in local memory
-  __shared__ DatasetT s_X;
-  if (threadIdx.x == 0) {
-    s_X = X;
-  }
-  __syncthreads();
-  __shared__ PathElement<SplitConditionT> s_elements[kBlockSize];
-  PathElement<SplitConditionT>* e = &s_elements[threadIdx.x];
-  size_t start_row, end_row;
-  bool thread_active;
-  ConfigureThread<DatasetT, kBlockSize, kRowsPerWarp>(
-      s_X, bins_per_row, path_elements, bin_segments, &start_row, &end_row, e,
-      &thread_active);
-  uint32_t mask = __ballot_sync(FULL_MASK, thread_active);
-  if (!thread_active) return;
-  auto labelled_group = active_labeled_partition(mask, e->path_idx);
-  for (int64_t row_idx = start_row; row_idx < end_row; row_idx++) {
-    for (auto condition_rank = 1ull; condition_rank < labelled_group.size();
-         condition_rank++) {
-      e = &s_elements[threadIdx.x];
-      // Compute the diagonal terms
-      // TODO(Rory): this can be more efficient
-      float reduce_input =
-          e->IsRoot() || labelled_group.thread_rank() == condition_rank
-              ? 1.0f
-              : e->zero_fraction;
-      float reduce =
-          labelled_group.reduce(reduce_input, thrust::multiplies<float>());
-      if (labelled_group.thread_rank() == condition_rank) {
-        float one_fraction = e->split_condition.EvaluateSplit(
-            X.GetElement(row_idx, e->feature_idx));
-        auto phi_offset =
-            IndexPhiInteractions(row_idx, num_groups, e->group, X.NumCols(),
-                                 e->feature_idx, e->feature_idx);
-        atomicAddDouble(phis_interactions + phi_offset,
-                        reduce * (one_fraction - e->zero_fraction) * e->v);
-      }
-      int64_t condition_feature =
-          labelled_group.shfl(e->feature_idx, condition_rank);
-      SwapConditionedElement(&e, s_elements, condition_rank, labelled_group);
-      float x = ComputePhiCondition<TaylorGroupPath>(
-          *e, row_idx, X, labelled_group, condition_feature);
-      if (!e->IsRoot()) {
-        auto phi_offset =
-            IndexPhiInteractions(row_idx, num_groups, e->group, X.NumCols(),
-                                 e->feature_idx, condition_feature);
-        atomicAddDouble(phis_interactions + phi_offset, x);
-      }
-    }
-  }
-}
-template <typename DatasetT, typename SizeTAllocatorT, typename PathAllocatorT,
-          typename SplitConditionT>
-void ComputeShapTaylorInteractions(
-    DatasetT X,
-    const thrust::device_vector<size_t, SizeTAllocatorT>& bin_segments,
-    const thrust::device_vector<PathElement<SplitConditionT>, PathAllocatorT>&
-        path_elements,
-    size_t num_groups, double* phis) {
-  size_t bins_per_row = bin_segments.size() - 1;
-  const int kBlockThreads = GPUTREESHAP_MAX_THREADS_PER_BLOCK;
-  const int warps_per_block = kBlockThreads / 32;
-  const int kRowsPerWarp = 100;
-  size_t warps_needed = bins_per_row * DivRoundUp(X.NumRows(), kRowsPerWarp);
-  const uint32_t grid_size = DivRoundUp(warps_needed, warps_per_block);
-  ShapTaylorInteractionsKernel<DatasetT, kBlockThreads, kRowsPerWarp>
-      <<<grid_size, kBlockThreads>>>(
-          X, bins_per_row, path_elements.data().get(),
-          bin_segments.data().get(), num_groups, phis);
-}
-inline __host__ __device__ int64_t Factorial(int64_t x) {
-  int64_t y = 1;
-  for (auto i = 2; i <= x; i++) {
-    y *= i;
-  }
-  return y;
-}
-// Compute factorials in log space using lgamma to avoid overflow
-inline __host__ __device__ double W(double s, double n) {
-  assert(n - s - 1 >= 0);
-  return exp(lgamma(s + 1) - lgamma(n + 1) + lgamma(n - s));
-}
-template <typename DatasetT, size_t kBlockSize, size_t kRowsPerWarp,
-          typename SplitConditionT>
-__global__ void __launch_bounds__(GPUTREESHAP_MAX_THREADS_PER_BLOCK)
-    ShapInterventionalKernel(DatasetT X, DatasetT R, size_t bins_per_row,
-                             const PathElement<SplitConditionT>* path_elements,
-                             const size_t* bin_segments, size_t num_groups,
-                             double* phis) {
-  // Cache W coefficients
-  __shared__ float s_W[33][33];
-  for (int i = threadIdx.x; i < 33 * 33; i += kBlockSize) {
-    auto s = i % 33;
-    auto n = i / 33;
-    if (n - s - 1 >= 0) {
-      s_W[s][n] = W(s, n);
-    } else {
-      s_W[s][n] = 0.0;
-    }
-  }
-  __syncthreads();
-  __shared__ PathElement<SplitConditionT> s_elements[kBlockSize];
-  PathElement<SplitConditionT>& e = s_elements[threadIdx.x];
-  size_t start_row, end_row;
-  bool thread_active;
-  ConfigureThread<DatasetT, kBlockSize, kRowsPerWarp>(
-      X, bins_per_row, path_elements, bin_segments, &start_row, &end_row, &e,
-      &thread_active);
-  uint32_t mask = __ballot_sync(FULL_MASK, thread_active);
-  if (!thread_active) return;
-  auto labelled_group = active_labeled_partition(mask, e.path_idx);
-  for (int64_t x_idx = start_row; x_idx < end_row; x_idx++) {
-    float result = 0.0f;
-    bool x_cond = e.EvaluateSplit(X, x_idx);
-    uint32_t x_ballot = labelled_group.ballot(x_cond);
-    for (int64_t r_idx = 0; r_idx < R.NumRows(); r_idx++) {
-      bool r_cond = e.EvaluateSplit(R, r_idx);
-      uint32_t r_ballot = labelled_group.ballot(r_cond);
-      assert(!e.IsRoot() ||
-             (x_cond == r_cond));  // These should be the same for the root
-      uint32_t s = __popc(x_ballot & ~r_ballot);
-      uint32_t n = __popc(x_ballot ^ r_ballot);
-      float tmp = 0.0f;
-      // Theorem 1
-      if (x_cond && !r_cond) {
-        tmp += s_W[s - 1][n];
-      }
-      tmp -= s_W[s][n] * (r_cond && !x_cond);
-      // No foreground samples make it to this leaf, increment bias
-      if (e.IsRoot() && s == 0) {
-        tmp += 1.0f;
-      }
-      // If neither foreground or background go down this path, ignore this path
-      bool reached_leaf = !labelled_group.ballot(!x_cond && !r_cond);
-      tmp *= reached_leaf;
-      result += tmp;
-    }
-    if (result != 0.0) {
-      result /= R.NumRows();
-      // Root writes bias
-      auto feature = e.IsRoot() ? X.NumCols() : e.feature_idx;
-      atomicAddDouble(
-          &phis[IndexPhi(x_idx, num_groups, e.group, X.NumCols(), feature)],
-          result * e.v);
-    }
-  }
-}
-template <typename DatasetT, typename SizeTAllocatorT, typename PathAllocatorT,
-          typename SplitConditionT>
-void ComputeShapInterventional(
-    DatasetT X, DatasetT R,
-    const thrust::device_vector<size_t, SizeTAllocatorT>& bin_segments,
-    const thrust::device_vector<PathElement<SplitConditionT>, PathAllocatorT>&
-        path_elements,
-    size_t num_groups, double* phis) {
-  size_t bins_per_row = bin_segments.size() - 1;
-  const int kBlockThreads = GPUTREESHAP_MAX_THREADS_PER_BLOCK;
-  const int warps_per_block = kBlockThreads / 32;
-  const int kRowsPerWarp = 100;
-  size_t warps_needed = bins_per_row * DivRoundUp(X.NumRows(), kRowsPerWarp);
-  const uint32_t grid_size = DivRoundUp(warps_needed, warps_per_block);
-  ShapInterventionalKernel<DatasetT, kBlockThreads, kRowsPerWarp>
-      <<<grid_size, kBlockThreads>>>(
-          X, R, bins_per_row, path_elements.data().get(),
-          bin_segments.data().get(), num_groups, phis);
-}
-template <typename PathVectorT, typename SizeVectorT, typename DeviceAllocatorT>
-void GetBinSegments(const PathVectorT& paths, const SizeVectorT& bin_map,
-                    SizeVectorT* bin_segments) {
-  DeviceAllocatorT alloc;
-  size_t num_bins =
-      thrust::reduce(thrust::cuda::par(alloc), bin_map.begin(), bin_map.end(),
-                     size_t(0), thrust::maximum<size_t>()) +
-      1;
-  bin_segments->resize(num_bins + 1, 0);
-  auto counting = thrust::make_counting_iterator(0llu);
-  auto d_paths = paths.data().get();
-  auto d_bin_segments = bin_segments->data().get();
-  auto d_bin_map = bin_map.data();
-  thrust::for_each_n(counting, paths.size(), [=] __device__(size_t idx) {
-    auto path_idx = d_paths[idx].path_idx;
-    atomicAdd(reinterpret_cast<unsigned long long*>(d_bin_segments) +  // NOLINT
-                  d_bin_map[path_idx],
-              1);
-  });
-  thrust::exclusive_scan(thrust::cuda::par(alloc), bin_segments->begin(),
-                         bin_segments->end(), bin_segments->begin());
-}
-struct DeduplicateKeyTransformOp {
-  template <typename SplitConditionT>
-  __device__ thrust::pair<size_t, int64_t> operator()(
-      const PathElement<SplitConditionT>& e) {
-    return {e.path_idx, e.feature_idx};
-  }
-};
-inline void CheckCuda(cudaError_t err) {
-  if (err != cudaSuccess) {
-    throw thrust::system_error(err, thrust::cuda_category());
-  }
-}
-template <typename Return>
-class DiscardOverload : public thrust::discard_iterator<Return> {
- public:
-  using value_type = Return;  // NOLINT
-};
-template <typename PathVectorT, typename DeviceAllocatorT,
-          typename SplitConditionT>
-void DeduplicatePaths(PathVectorT* device_paths,
-                      PathVectorT* deduplicated_paths) {
-  DeviceAllocatorT alloc;
-  // Sort by feature
-  thrust::sort(thrust::cuda::par(alloc), device_paths->begin(),
-               device_paths->end(),
-               [=] __device__(const PathElement<SplitConditionT>& a,
-                              const PathElement<SplitConditionT>& b) {
-                 if (a.path_idx < b.path_idx) return true;
-                 if (b.path_idx < a.path_idx) return false;
-                 if (a.feature_idx < b.feature_idx) return true;
-                 if (b.feature_idx < a.feature_idx) return false;
-                 return false;
-               });
-  deduplicated_paths->resize(device_paths->size());
-  using Pair = thrust::pair<size_t, int64_t>;
-  auto key_transform = thrust::make_transform_iterator(
-      device_paths->begin(), DeduplicateKeyTransformOp());
-  thrust::device_vector<size_t> d_num_runs_out(1);
-  size_t* h_num_runs_out;
-  CheckCuda(cudaMallocHost(&h_num_runs_out, sizeof(size_t)));
-  auto combine = [] __device__(PathElement<SplitConditionT> a,
-                               PathElement<SplitConditionT> b) {
-    // Combine duplicate features
-    a.split_condition.Merge(b.split_condition);
-    a.zero_fraction *= b.zero_fraction;
-    return a;
-  };  // NOLINT
-  size_t temp_size = 0;
-  CheckCuda(cub::DeviceReduce::ReduceByKey(
-      nullptr, temp_size, key_transform, DiscardOverload<Pair>(),
-      device_paths->begin(), deduplicated_paths->begin(),
-      d_num_runs_out.begin(), combine, device_paths->size()));
-  using TempAlloc = RebindVector<char, DeviceAllocatorT>;
-  TempAlloc tmp(temp_size);
-  CheckCuda(cub::DeviceReduce::ReduceByKey(
-      tmp.data().get(), temp_size, key_transform, DiscardOverload<Pair>(),
-      device_paths->begin(), deduplicated_paths->begin(),
-      d_num_runs_out.begin(), combine, device_paths->size()));
-  CheckCuda(cudaMemcpy(h_num_runs_out, d_num_runs_out.data().get(),
-                       sizeof(size_t), cudaMemcpyDeviceToHost));
-  deduplicated_paths->resize(*h_num_runs_out);
-  CheckCuda(cudaFreeHost(h_num_runs_out));
-}
-template <typename PathVectorT, typename SplitConditionT, typename SizeVectorT,
-          typename DeviceAllocatorT>
-void SortPaths(PathVectorT* paths, const SizeVectorT& bin_map) {
-  auto d_bin_map = bin_map.data();
-  DeviceAllocatorT alloc;
-  thrust::sort(thrust::cuda::par(alloc), paths->begin(), paths->end(),
-               [=] __device__(const PathElement<SplitConditionT>& a,
-                              const PathElement<SplitConditionT>& b) {
-                 size_t a_bin = d_bin_map[a.path_idx];
-                 size_t b_bin = d_bin_map[b.path_idx];
-                 if (a_bin < b_bin) return true;
-                 if (b_bin < a_bin) return false;
-                 if (a.path_idx < b.path_idx) return true;
-                 if (b.path_idx < a.path_idx) return false;
-                 if (a.feature_idx < b.feature_idx) return true;
-                 if (b.feature_idx < a.feature_idx) return false;
-                 return false;
-               });
-}
-using kv = std::pair<size_t, int>;
-struct BFDCompare {
-  bool operator()(const kv& lhs, const kv& rhs) const {
-    if (lhs.second == rhs.second) {
-      return lhs.first < rhs.first;
-    }
-    return lhs.second < rhs.second;
-  }
-};
-// Best Fit Decreasing bin packing
-// Efficient O(nlogn) implementation with balanced tree using std::set
-template <typename IntVectorT>
-std::vector<size_t> BFDBinPacking(const IntVectorT& counts,
-                                  int bin_limit = 32) {
-  thrust::host_vector<int> counts_host(counts);
-  std::vector<kv> path_lengths(counts_host.size());
-  for (auto i = 0ull; i < counts_host.size(); i++) {
-    path_lengths[i] = {i, counts_host[i]};
-  }
-  std::sort(path_lengths.begin(), path_lengths.end(),
-            [&](const kv& a, const kv& b) {
-              std::greater<> op;
-              return op(a.second, b.second);
-            });
-  // map unique_id -> bin
-  std::vector<size_t> bin_map(counts_host.size());
-  std::set<kv, BFDCompare> bin_capacities;
-  bin_capacities.insert({bin_capacities.size(), bin_limit});
-  for (auto pair : path_lengths) {
-    int new_size = pair.second;
-    auto itr = bin_capacities.lower_bound({0, new_size});
-    // Does not fit in any bin
-    if (itr == bin_capacities.end()) {
-      size_t new_bin_idx = bin_capacities.size();
-      bin_capacities.insert({new_bin_idx, bin_limit - new_size});
-      bin_map[pair.first] = new_bin_idx;
-    } else {
-      kv entry = *itr;
-      entry.second -= new_size;
-      bin_map[pair.first] = entry.first;
-      bin_capacities.erase(itr);
-      bin_capacities.insert(entry);
-    }
-  }
-  return bin_map;
-}
-// First Fit Decreasing bin packing
-// Inefficient O(n^2) implementation
-template <typename IntVectorT>
-std::vector<size_t> FFDBinPacking(const IntVectorT& counts,
-                                  int bin_limit = 32) {
-  thrust::host_vector<int> counts_host(counts);
-  std::vector<kv> path_lengths(counts_host.size());
-  for (auto i = 0ull; i < counts_host.size(); i++) {
-    path_lengths[i] = {i, counts_host[i]};
-  }
-  std::sort(path_lengths.begin(), path_lengths.end(),
-            [&](const kv& a, const kv& b) {
-              std::greater<> op;
-              return op(a.second, b.second);
-            });
-  // map unique_id -> bin
-  std::vector<size_t> bin_map(counts_host.size());
-  std::vector<int> bin_capacities(path_lengths.size(), bin_limit);
-  for (auto pair : path_lengths) {
-    int new_size = pair.second;
-    for (auto j = 0ull; j < bin_capacities.size(); j++) {
-      int& capacity = bin_capacities[j];
-      if (capacity >= new_size) {
-        capacity -= new_size;
-        bin_map[pair.first] = j;
-        break;
-      }
-    }
-  }
-  return bin_map;
-}
-// Next Fit bin packing
-// O(n) implementation
-template <typename IntVectorT>
-std::vector<size_t> NFBinPacking(const IntVectorT& counts, int bin_limit = 32) {
-  thrust::host_vector<int> counts_host(counts);
-  std::vector<size_t> bin_map(counts_host.size());
-  size_t current_bin = 0;
-  int current_capacity = bin_limit;
-  for (auto i = 0ull; i < counts_host.size(); i++) {
-    int new_size = counts_host[i];
-    size_t path_idx = i;
-    if (new_size <= current_capacity) {
-      current_capacity -= new_size;
-      bin_map[path_idx] = current_bin;
-    } else {
-      current_capacity = bin_limit - new_size;
-      bin_map[path_idx] = ++current_bin;
-    }
-  }
-  return bin_map;
-}
-template <typename DeviceAllocatorT, typename SplitConditionT,
-          typename PathVectorT, typename LengthVectorT>
-void GetPathLengths(const PathVectorT& device_paths,
-                    LengthVectorT* path_lengths) {
-  path_lengths->resize(
-      static_cast<PathElement<SplitConditionT>>(device_paths.back()).path_idx +
-          1,
-      0);
-  auto counting = thrust::make_counting_iterator(0llu);
-  auto d_paths = device_paths.data().get();
-  auto d_lengths = path_lengths->data().get();
-  thrust::for_each_n(counting, device_paths.size(), [=] __device__(size_t idx) {
-    auto path_idx = d_paths[idx].path_idx;
-    atomicAdd(d_lengths + path_idx, 1ull);
-  });
-}
-struct PathTooLongOp {
-  __device__ size_t operator()(size_t length) { return length > 32; }
-};
-template <typename SplitConditionT>
-struct IncorrectVOp {
-  const PathElement<SplitConditionT>* paths;
-  __device__ size_t operator()(size_t idx) {
-    auto a = paths[idx - 1];
-    auto b = paths[idx];
-    return a.path_idx == b.path_idx && a.v != b.v;
-  }
-};
-template <typename DeviceAllocatorT, typename SplitConditionT,
-          typename PathVectorT, typename LengthVectorT>
-void ValidatePaths(const PathVectorT& device_paths,
-                   const LengthVectorT& path_lengths) {
-  DeviceAllocatorT alloc;
-  PathTooLongOp too_long_op;
-  auto invalid_length =
-      thrust::any_of(thrust::cuda::par(alloc), path_lengths.begin(),
-                     path_lengths.end(), too_long_op);
-  if (invalid_length) {
-    throw std::invalid_argument("Tree depth must be < 32");
-  }
-  IncorrectVOp<SplitConditionT> incorrect_v_op{device_paths.data().get()};
-  auto counting = thrust::counting_iterator<size_t>(0);
-  auto incorrect_v =
-      thrust::any_of(thrust::cuda::par(alloc), counting + 1,
-                     counting + device_paths.size(), incorrect_v_op);
-  if (incorrect_v) {
-    throw std::invalid_argument(
-        "Leaf value v should be the same across a single path");
-  }
-}
-template <typename DeviceAllocatorT, typename SplitConditionT,
-          typename PathVectorT, typename SizeVectorT>
-void PreprocessPaths(PathVectorT* device_paths, PathVectorT* deduplicated_paths,
-                     SizeVectorT* bin_segments) {
-  // Sort paths by length and feature
-  detail::DeduplicatePaths<PathVectorT, DeviceAllocatorT, SplitConditionT>(
-      device_paths, deduplicated_paths);
-  using int_vector = RebindVector<int, DeviceAllocatorT>;
-  int_vector path_lengths;
-  detail::GetPathLengths<DeviceAllocatorT, SplitConditionT>(*deduplicated_paths,
-                                                            &path_lengths);
-  SizeVectorT device_bin_map = detail::BFDBinPacking(path_lengths);
-  ValidatePaths<DeviceAllocatorT, SplitConditionT>(*deduplicated_paths,
-                                                   path_lengths);
-  detail::SortPaths<PathVectorT, SplitConditionT, SizeVectorT,
-                    DeviceAllocatorT>(deduplicated_paths, device_bin_map);
-  detail::GetBinSegments<PathVectorT, SizeVectorT, DeviceAllocatorT>(
-      *deduplicated_paths, device_bin_map, bin_segments);
-}
-struct PathIdxTransformOp {
-  template <typename SplitConditionT>
-  __device__ size_t operator()(const PathElement<SplitConditionT>& e) {
-    return e.path_idx;
-  }
-};
-struct GroupIdxTransformOp {
-  template <typename SplitConditionT>
-  __device__ size_t operator()(const PathElement<SplitConditionT>& e) {
-    return e.group;
-  }
-};
-struct BiasTransformOp {
-  template <typename SplitConditionT>
-  __device__ double operator()(const PathElement<SplitConditionT>& e) {
-    return e.zero_fraction * e.v;
-  }
-};
-// While it is possible to compute bias in the primary kernel, we do it here
-// using double precision to avoid numerical stability issues
-template <typename PathVectorT, typename DoubleVectorT,
-          typename DeviceAllocatorT, typename SplitConditionT>
-void ComputeBias(const PathVectorT& device_paths, DoubleVectorT* bias) {
-  using double_vector = thrust::device_vector<
-      double, typename DeviceAllocatorT::template rebind<double>::other>;
-  PathVectorT sorted_paths(device_paths);
-  DeviceAllocatorT alloc;
-  // Make sure groups are contiguous
-  thrust::sort(thrust::cuda::par(alloc), sorted_paths.begin(),
-               sorted_paths.end(),
-               [=] __device__(const PathElement<SplitConditionT>& a,
-                              const PathElement<SplitConditionT>& b) {
-                 if (a.group < b.group) return true;
-                 if (b.group < a.group) return false;
-                 if (a.path_idx < b.path_idx) return true;
-                 if (b.path_idx < a.path_idx) return false;
-                 return false;
-               });
-  // Combine zero fraction for all paths
-  auto path_key = thrust::make_transform_iterator(sorted_paths.begin(),
-                                                  PathIdxTransformOp());
-  PathVectorT combined(sorted_paths.size());
-  auto combined_out = thrust::reduce_by_key(
-      thrust::cuda ::par(alloc), path_key, path_key + sorted_paths.size(),
-      sorted_paths.begin(), thrust::make_discard_iterator(), combined.begin(),
-      thrust::equal_to<size_t>(),
-      [=] __device__(PathElement<SplitConditionT> a,
-                     const PathElement<SplitConditionT>& b) {
-        a.zero_fraction *= b.zero_fraction;
-        return a;
-      });
-  size_t num_paths = combined_out.second - combined.begin();
-  // Combine bias for each path, over each group
-  using size_vector = thrust::device_vector<
-      size_t, typename DeviceAllocatorT::template rebind<size_t>::other>;
-  size_vector keys_out(num_paths);
-  double_vector values_out(num_paths);
-  auto group_key =
-      thrust::make_transform_iterator(combined.begin(), GroupIdxTransformOp());
-  auto values =
-      thrust::make_transform_iterator(combined.begin(), BiasTransformOp());
-  auto out_itr = thrust::reduce_by_key(thrust::cuda::par(alloc), group_key,
-                                       group_key + num_paths, values,
-                                       keys_out.begin(), values_out.begin());
-  // Write result
-  size_t n = out_itr.first - keys_out.begin();
-  auto counting = thrust::make_counting_iterator(0llu);
-  auto d_keys_out = keys_out.data().get();
-  auto d_values_out = values_out.data().get();
-  auto d_bias = bias->data().get();
-  thrust::for_each_n(counting, n, [=] __device__(size_t idx) {
-    d_bias[d_keys_out[idx]] = d_values_out[idx];
-  });
-}
-};  // namespace detail
-/*!
- * Compute feature contributions on the GPU given a set of unique paths through
- * a tree ensemble and a dataset. Uses device memory proportional to the tree
- * ensemble size.
- *
- * \exception std::invalid_argument Thrown when an invalid argument error
- * condition occurs. \tparam  PathIteratorT     Thrust type iterator, may be
- * thrust::device_ptr for device memory, or stl iterator/raw pointer for host
- * memory. \tparam  PhiIteratorT      Thrust type iterator, may be
- * thrust::device_ptr for device memory, or stl iterator/raw pointer for host
- * memory. Value type must be floating point. \tparam  DatasetT User-specified
- * dataset container. \tparam  DeviceAllocatorT  Optional thrust style
- * allocator.
- *
- * \param X           Thin wrapper over a dataset allocated in device memory. X
- * should be trivially copyable as a kernel parameter (i.e. contain only
- * pointers to actual data) and must implement the methods
- * NumRows()/NumCols()/GetElement(size_t row_idx, size_t col_idx) as __device__
- * functions. GetElement may return NaN where the feature value is missing.
- * \param begin       Iterator to paths, where separate paths are delineated by
- *                    PathElement.path_idx. Each unique path should contain 1
- * root with feature_idx = -1 and zero_fraction = 1.0. The ordering of path
- * elements inside a unique path does not matter - the result will be the same.
- * Paths may contain duplicate features. See the PathElement class for more
- * information. \param end         Path end iterator. \param num_groups  Number
- * of output groups. In multiclass classification the algorithm outputs feature
- * contributions per output class. \param phis_begin  Begin iterator for output
- * phis. \param phis_end    End iterator for output phis.
- */
-template <typename DeviceAllocatorT = thrust::device_allocator<int>,
-          typename DatasetT, typename PathIteratorT, typename PhiIteratorT>
-void GPUTreeShap(DatasetT X, PathIteratorT begin, PathIteratorT end,
-                 size_t num_groups, PhiIteratorT phis_begin,
-                 PhiIteratorT phis_end) {
-  if (X.NumRows() == 0 || X.NumCols() == 0 || end - begin <= 0) return;
-  if (size_t(phis_end - phis_begin) <
-      X.NumRows() * (X.NumCols() + 1) * num_groups) {
-    throw std::invalid_argument(
-        "phis_out must be at least of size X.NumRows() * (X.NumCols() + 1) * "
-        "num_groups");
-  }
-  using size_vector = detail::RebindVector<size_t, DeviceAllocatorT>;
-  using double_vector = detail::RebindVector<double, DeviceAllocatorT>;
-  using path_vector = detail::RebindVector<
-      typename std::iterator_traits<PathIteratorT>::value_type,
-      DeviceAllocatorT>;
-  using split_condition =
-      typename std::iterator_traits<PathIteratorT>::value_type::split_type;
-  // Compute the global bias
-  double_vector temp_phi(phis_end - phis_begin, 0.0);
-  path_vector device_paths(begin, end);
-  double_vector bias(num_groups, 0.0);
-  detail::ComputeBias<path_vector, double_vector, DeviceAllocatorT,
-                      split_condition>(device_paths, &bias);
-  auto d_bias = bias.data().get();
-  auto d_temp_phi = temp_phi.data().get();
-  thrust::for_each_n(thrust::make_counting_iterator(0llu),
-                     X.NumRows() * num_groups, [=] __device__(size_t idx) {
-                       size_t group = idx % num_groups;
-                       size_t row_idx = idx / num_groups;
-                       d_temp_phi[IndexPhi(row_idx, num_groups, group,
-                                           X.NumCols(), X.NumCols())] +=
-                           d_bias[group];
-                     });
-  path_vector deduplicated_paths;
-  size_vector device_bin_segments;
-  detail::PreprocessPaths<DeviceAllocatorT, split_condition>(
-      &device_paths, &deduplicated_paths, &device_bin_segments);
-  detail::ComputeShap(X, device_bin_segments, deduplicated_paths, num_groups,
-                      temp_phi.data().get());
-  thrust::copy(temp_phi.begin(), temp_phi.end(), phis_begin);
-}
-/*!
- * Compute feature interaction contributions on the GPU given a set of unique
- * paths through a tree ensemble and a dataset. Uses device memory
- * proportional to the tree ensemble size.
- *
- * \exception std::invalid_argument Thrown when an invalid argument error
- *                                  condition occurs.
- * \tparam  DeviceAllocatorT  Optional thrust style allocator.
- * \tparam  DatasetT          User-specified dataset container.
- * \tparam  PathIteratorT     Thrust type iterator, may be thrust::device_ptr
- *                            for device memory, or stl iterator/raw pointer for
- *                            host memory.
- * \tparam  PhiIteratorT      Thrust type iterator, may be thrust::device_ptr
- *                            for device memory, or stl iterator/raw pointer for
- *                            host memory. Value type must be floating point.
- *
- * \param X           Thin wrapper over a dataset allocated in device memory. X
- *                    should be trivially copyable as a kernel parameter (i.e.
- *                    contain only pointers to actual data) and must implement
- *                    the methods NumRows()/NumCols()/GetElement(size_t row_idx,
- *                    size_t col_idx) as __device__ functions. GetElement may
- *                    return NaN where the feature value is missing.
- * \param begin       Iterator to paths, where separate paths are delineated by
- *                    PathElement.path_idx. Each unique path should contain 1
- *                    root with feature_idx = -1 and zero_fraction = 1.0. The
- *                    ordering of path elements inside a unique path does not
- *                    matter - the result will be the same. Paths may contain
- *                    duplicate features. See the PathElement class for more
- *                    information.
- * \param end         Path end iterator.
- * \param num_groups  Number of output groups. In multiclass classification the
- *                    algorithm outputs feature contributions per output class.
- * \param phis_begin  Begin iterator for output phis.
- * \param phis_end    End iterator for output phis.
- */
-template <typename DeviceAllocatorT = thrust::device_allocator<int>,
-          typename DatasetT, typename PathIteratorT, typename PhiIteratorT>
-void GPUTreeShapInteractions(DatasetT X, PathIteratorT begin, PathIteratorT end,
-                             size_t num_groups, PhiIteratorT phis_begin,
-                             PhiIteratorT phis_end) {
-  if (X.NumRows() == 0 || X.NumCols() == 0 || end - begin <= 0) return;
-  if (size_t(phis_end - phis_begin) <
-      X.NumRows() * (X.NumCols() + 1) * (X.NumCols() + 1) * num_groups) {
-    throw std::invalid_argument(
-        "phis_out must be at least of size X.NumRows() * (X.NumCols() + 1)  * "
-        "(X.NumCols() + 1) * "
-        "num_groups");
-  }
-  using size_vector = detail::RebindVector<size_t, DeviceAllocatorT>;
-  using double_vector = detail::RebindVector<double, DeviceAllocatorT>;
-  using path_vector = detail::RebindVector<
-      typename std::iterator_traits<PathIteratorT>::value_type,
-      DeviceAllocatorT>;
-  using split_condition =
-      typename std::iterator_traits<PathIteratorT>::value_type::split_type;
-  // Compute the global bias
-  double_vector temp_phi(phis_end - phis_begin, 0.0);
-  path_vector device_paths(begin, end);
-  double_vector bias(num_groups, 0.0);
-  detail::ComputeBias<path_vector, double_vector, DeviceAllocatorT,
-                      split_condition>(device_paths, &bias);
-  auto d_bias = bias.data().get();
-  auto d_temp_phi = temp_phi.data().get();
-  thrust::for_each_n(
-      thrust::make_counting_iterator(0llu), X.NumRows() * num_groups,
-      [=] __device__(size_t idx) {
-        size_t group = idx % num_groups;
-        size_t row_idx = idx / num_groups;
-        d_temp_phi[IndexPhiInteractions(row_idx, num_groups, group, X.NumCols(),
-                                        X.NumCols(), X.NumCols())] +=
-            d_bias[group];
-      });
-  path_vector deduplicated_paths;
-  size_vector device_bin_segments;
-  detail::PreprocessPaths<DeviceAllocatorT, split_condition>(
-      &device_paths, &deduplicated_paths, &device_bin_segments);
-  detail::ComputeShapInteractions(X, device_bin_segments, deduplicated_paths,
-                                  num_groups, temp_phi.data().get());
-  thrust::copy(temp_phi.begin(), temp_phi.end(), phis_begin);
-}
-/*!
- * Compute feature interaction contributions using the Shapley Taylor index on
- * the GPU, given a set of unique paths through a tree ensemble and a dataset.
- * Uses device memory proportional to the tree ensemble size.
- *
- * \exception std::invalid_argument Thrown when an invalid argument error
- *                                  condition occurs.
- * \tparam  PhiIteratorT      Thrust type iterator, may be thrust::device_ptr
- *                            for device memory, or stl iterator/raw pointer for
- *                            host memory. Value type must be floating point.
- * \tparam  PathIteratorT     Thrust type iterator, may be thrust::device_ptr
- *                            for device memory, or stl iterator/raw pointer for
- *                            host memory.
- * \tparam  DatasetT          User-specified dataset container.
- * \tparam  DeviceAllocatorT  Optional thrust style allocator.
- *
- * \param X           Thin wrapper over a dataset allocated in device memory. X
- *                    should be trivially copyable as a kernel parameter (i.e.
- *                    contain only pointers to actual data) and must implement
- *                    the methods NumRows()/NumCols()/GetElement(size_t row_idx,
- *                    size_t col_idx) as __device__ functions. GetElement may
- *                    return NaN where the feature value is missing.
- * \param begin       Iterator to paths, where separate paths are delineated by
- *                    PathElement.path_idx. Each unique path should contain 1
- *                    root with feature_idx = -1 and zero_fraction = 1.0. The
- *                    ordering of path elements inside a unique path does not
- *                    matter - the result will be the same. Paths may contain
- *                    duplicate features. See the PathElement class for more
- *                    information.
- * \param end         Path end iterator.
- * \param num_groups  Number of output groups. In multiclass classification the
- *                    algorithm outputs feature contributions per output class.
- * \param phis_begin  Begin iterator for output phis.
- * \param phis_end    End iterator for output phis.
- */
-template <typename DeviceAllocatorT = thrust::device_allocator<int>,
-          typename DatasetT, typename PathIteratorT, typename PhiIteratorT>
-void GPUTreeShapTaylorInteractions(DatasetT X, PathIteratorT begin,
-                                   PathIteratorT end, size_t num_groups,
-                                   PhiIteratorT phis_begin,
-                                   PhiIteratorT phis_end) {
-  using phis_type = typename std::iterator_traits<PhiIteratorT>::value_type;
-  static_assert(std::is_floating_point<phis_type>::value,
-                "Phis type must be floating point");
-  if (X.NumRows() == 0 || X.NumCols() == 0 || end - begin <= 0) return;
-  if (size_t(phis_end - phis_begin) <
-      X.NumRows() * (X.NumCols() + 1) * (X.NumCols() + 1) * num_groups) {
-    throw std::invalid_argument(
-        "phis_out must be at least of size X.NumRows() * (X.NumCols() + 1)  * "
-        "(X.NumCols() + 1) * "
-        "num_groups");
-  }
-  using size_vector = detail::RebindVector<size_t, DeviceAllocatorT>;
-  using double_vector = detail::RebindVector<double, DeviceAllocatorT>;
-  using path_vector = detail::RebindVector<
-      typename std::iterator_traits<PathIteratorT>::value_type,
-      DeviceAllocatorT>;
-  using split_condition =
-      typename std::iterator_traits<PathIteratorT>::value_type::split_type;
-  // Compute the global bias
-  double_vector temp_phi(phis_end - phis_begin, 0.0);
-  path_vector device_paths(begin, end);
-  double_vector bias(num_groups, 0.0);
-  detail::ComputeBias<path_vector, double_vector, DeviceAllocatorT,
-                      split_condition>(device_paths, &bias);
-  auto d_bias = bias.data().get();
-  auto d_temp_phi = temp_phi.data().get();
-  thrust::for_each_n(
-      thrust::make_counting_iterator(0llu), X.NumRows() * num_groups,
-      [=] __device__(size_t idx) {
-        size_t group = idx % num_groups;
-        size_t row_idx = idx / num_groups;
-        d_temp_phi[IndexPhiInteractions(row_idx, num_groups, group, X.NumCols(),
-                                        X.NumCols(), X.NumCols())] +=
-            d_bias[group];
-      });
-  path_vector deduplicated_paths;
-  size_vector device_bin_segments;
-  detail::PreprocessPaths<DeviceAllocatorT, split_condition>(
-      &device_paths, &deduplicated_paths, &device_bin_segments);
-  detail::ComputeShapTaylorInteractions(X, device_bin_segments,
-                                        deduplicated_paths, num_groups,
-                                        temp_phi.data().get());
-  thrust::copy(temp_phi.begin(), temp_phi.end(), phis_begin);
-}
-/*!
- * Compute feature contributions on the GPU given a set of unique paths through a tree ensemble
- * and a dataset. Uses device memory proportional to the tree ensemble size. This variant
- * implements the interventional tree shap algorithm described here:
- * https://drafts.distill.pub/HughChen/its_blog/
- *
- * It requires a background dataset R.
- *
- * \exception std::invalid_argument Thrown when an invalid argument error condition occurs.
- * \tparam  DeviceAllocatorT  Optional thrust style allocator.
- * \tparam  DatasetT          User-specified dataset container.
- * \tparam  PathIteratorT     Thrust type iterator, may be thrust::device_ptr for device memory, or
- *                            stl iterator/raw pointer for host memory.
- *
- * \param X           Thin wrapper over a dataset allocated in device memory. X should be trivially
- *                    copyable as a kernel parameter (i.e. contain only pointers to actual data) and
- *                    must implement the methods NumRows()/NumCols()/GetElement(size_t row_idx,
- *                    size_t col_idx) as __device__ functions. GetElement may return NaN where the
- *                    feature value is missing.
- * \param R           Background dataset.
- * \param begin       Iterator to paths, where separate paths are delineated by
- *                    PathElement.path_idx. Each unique path should contain 1 root with feature_idx =
- *                    -1 and zero_fraction = 1.0. The ordering of path elements inside a unique path
- *                    does not matter - the result will be the same. Paths may contain duplicate
- *                    features. See the PathElement class for more information.
- * \param end         Path end iterator.
- * \param num_groups  Number of output groups. In multiclass classification the algorithm outputs
- *                    feature contributions per output class.
- * \param phis_begin  Begin iterator for output phis.
- * \param phis_end    End iterator for output phis.
- */
-template <typename DeviceAllocatorT = thrust::device_allocator<int>,
-          typename DatasetT, typename PathIteratorT, typename PhiIteratorT>
-void GPUTreeShapInterventional(DatasetT X, DatasetT R, PathIteratorT begin,
-                               PathIteratorT end, size_t num_groups,
-                               PhiIteratorT phis_begin, PhiIteratorT phis_end) {
-  if (X.NumRows() == 0 || X.NumCols() == 0 || end - begin <= 0) return;
-  if (size_t(phis_end - phis_begin) <
-      X.NumRows() * (X.NumCols() + 1) * num_groups) {
-    throw std::invalid_argument(
-        "phis_out must be at least of size X.NumRows() * (X.NumCols() + 1) * "
-        "num_groups");
-  }
-  using size_vector = detail::RebindVector<size_t, DeviceAllocatorT>;
-  using double_vector = detail::RebindVector<double, DeviceAllocatorT>;
-  using path_vector = detail::RebindVector<
-      typename std::iterator_traits<PathIteratorT>::value_type,
-      DeviceAllocatorT>;
-  using split_condition =
-      typename std::iterator_traits<PathIteratorT>::value_type::split_type;
-  double_vector temp_phi(phis_end - phis_begin, 0.0);
-  path_vector device_paths(begin, end);
-  path_vector deduplicated_paths;
-  size_vector device_bin_segments;
-  detail::PreprocessPaths<DeviceAllocatorT, split_condition>(
-      &device_paths, &deduplicated_paths, &device_bin_segments);
-  detail::ComputeShapInterventional(X, R, device_bin_segments,
-                                    deduplicated_paths, num_groups,
-                                    temp_phi.data().get());
-  thrust::copy(temp_phi.begin(), temp_phi.end(), phis_begin);
-}
-}  // namespace gpu_treeshap

lib/shap/cext/tree_shap.h DELETED Viewed

@@ -1,1460 +0,0 @@
-/**
- * Fast recursive computation of SHAP values in trees.
- * See https://arxiv.org/abs/1802.03888 for details.
- *
- * Scott Lundberg, 2018 (independent algorithm courtesy of Hugh Chen 2018)
- */
-#include <algorithm>
-#include <iostream>
-#include <fstream>
-#include <stdio.h>
-#include <cmath>
-#include <ctime>
-#if defined(_WIN32) || defined(WIN32)
-    #include <malloc.h>
-#elif defined(__MVS__)
-    #include <stdlib.h>
-#else
-    #include <alloca.h>
-#endif
-using namespace std;
-typedef double tfloat;
-typedef tfloat (* transform_f)(const tfloat margin, const tfloat y);
-namespace FEATURE_DEPENDENCE {
-    const unsigned independent = 0;
-    const unsigned tree_path_dependent = 1;
-    const unsigned global_path_dependent = 2;
-}
-struct TreeEnsemble {
-    int *children_left;
-    int *children_right;
-    int *children_default;
-    int *features;
-    tfloat *thresholds;
-    tfloat *values;
-    tfloat *node_sample_weights;
-    unsigned max_depth;
-    unsigned tree_limit;
-    tfloat *base_offset;
-    unsigned max_nodes;
-    unsigned num_outputs;
-    TreeEnsemble() {}
-    TreeEnsemble(int *children_left, int *children_right, int *children_default, int *features,
-                 tfloat *thresholds, tfloat *values, tfloat *node_sample_weights,
-                 unsigned max_depth, unsigned tree_limit, tfloat *base_offset,
-                 unsigned max_nodes, unsigned num_outputs) :
-        children_left(children_left), children_right(children_right),
-        children_default(children_default), features(features), thresholds(thresholds),
-        values(values), node_sample_weights(node_sample_weights),
-        max_depth(max_depth), tree_limit(tree_limit),
-        base_offset(base_offset), max_nodes(max_nodes), num_outputs(num_outputs) {}
-    void get_tree(TreeEnsemble &tree, const unsigned i) const {
-        const unsigned d = i * max_nodes;
-        tree.children_left = children_left + d;
-        tree.children_right = children_right + d;
-        tree.children_default = children_default + d;
-        tree.features = features + d;
-        tree.thresholds = thresholds + d;
-        tree.values = values + d * num_outputs;
-        tree.node_sample_weights = node_sample_weights + d;
-        tree.max_depth = max_depth;
-        tree.tree_limit = 1;
-        tree.base_offset = base_offset;
-        tree.max_nodes = max_nodes;
-        tree.num_outputs = num_outputs;
-    }
-    bool is_leaf(unsigned pos)const {
-        return children_left[pos] < 0;
-    }
-    void allocate(unsigned tree_limit_in, unsigned max_nodes_in, unsigned num_outputs_in) {
-        tree_limit = tree_limit_in;
-        max_nodes = max_nodes_in;
-        num_outputs = num_outputs_in;
-        children_left = new int[tree_limit * max_nodes];
-        children_right = new int[tree_limit * max_nodes];
-        children_default = new int[tree_limit * max_nodes];
-        features = new int[tree_limit * max_nodes];
-        thresholds = new tfloat[tree_limit * max_nodes];
-        values = new tfloat[tree_limit * max_nodes * num_outputs];
-        node_sample_weights = new tfloat[tree_limit * max_nodes];
-    }
-    void free() {
-        delete[] children_left;
-        delete[] children_right;
-        delete[] children_default;
-        delete[] features;
-        delete[] thresholds;
-        delete[] values;
-        delete[] node_sample_weights;
-    }
-};
-struct ExplanationDataset {
-    tfloat *X;
-    bool *X_missing;
-    tfloat *y;
-    tfloat *R;
-    bool *R_missing;
-    unsigned num_X;
-    unsigned M;
-    unsigned num_R;
-    ExplanationDataset() {}
-    ExplanationDataset(tfloat *X, bool *X_missing, tfloat *y, tfloat *R, bool *R_missing, unsigned num_X,
-                       unsigned M, unsigned num_R) :
-        X(X), X_missing(X_missing), y(y), R(R), R_missing(R_missing), num_X(num_X), M(M), num_R(num_R) {}
-    void get_x_instance(ExplanationDataset &instance, const unsigned i) const {
-        instance.M = M;
-        instance.X = X + i * M;
-        instance.X_missing = X_missing + i * M;
-        instance.num_X = 1;
-    }
-};
-// data we keep about our decision path
-// note that pweight is included for convenience and is not tied with the other attributes
-// the pweight of the i'th path element is the permutation weight of paths with i-1 ones in them
-struct PathElement {
-    int feature_index;
-    tfloat zero_fraction;
-    tfloat one_fraction;
-    tfloat pweight;
-    PathElement() {}
-    PathElement(int i, tfloat z, tfloat o, tfloat w) :
-        feature_index(i), zero_fraction(z), one_fraction(o), pweight(w) {}
-};
-inline tfloat logistic_transform(const tfloat margin, const tfloat y) {
-    return 1 / (1 + exp(-margin));
-}
-inline tfloat logistic_nlogloss_transform(const tfloat margin, const tfloat y) {
-    return log(1 + exp(margin)) - y * margin; // y is in {0, 1}
-}
-inline tfloat squared_loss_transform(const tfloat margin, const tfloat y) {
-    return (margin - y) * (margin - y);
-}
-namespace MODEL_TRANSFORM {
-    const unsigned identity = 0;
-    const unsigned logistic = 1;
-    const unsigned logistic_nlogloss = 2;
-    const unsigned squared_loss = 3;
-}
-inline transform_f get_transform(unsigned model_transform) {
-    transform_f transform = NULL;
-    switch (model_transform) {
-        case MODEL_TRANSFORM::logistic:
-            transform = logistic_transform;
-            break;
-        case MODEL_TRANSFORM::logistic_nlogloss:
-            transform = logistic_nlogloss_transform;
-            break;
-        case MODEL_TRANSFORM::squared_loss:
-            transform = squared_loss_transform;
-            break;
-    }
-    return transform;
-}
-inline tfloat *tree_predict(unsigned i, const TreeEnsemble &trees, const tfloat *x, const bool *x_missing) {
-    const unsigned offset = i * trees.max_nodes;
-    unsigned node = 0;
-    while (true) {
-        const unsigned pos = offset + node;
-        const unsigned feature = trees.features[pos];
-        // we hit a leaf so return a pointer to the values
-        if (trees.is_leaf(pos)) {
-            return trees.values + pos * trees.num_outputs;
-        }
-        // otherwise we are at an internal node and need to recurse
-        if (x_missing[feature]) {
-            node = trees.children_default[pos];
-        } else if (x[feature] <= trees.thresholds[pos]) {
-            node = trees.children_left[pos];
-        } else {
-            node = trees.children_right[pos];
-        }
-    }
-}
-inline void dense_tree_predict(tfloat *out, const TreeEnsemble &trees, const ExplanationDataset &data, unsigned model_transform) {
-    tfloat *row_out = out;
-    const tfloat *x = data.X;
-    const bool *x_missing = data.X_missing;
-    // see what transform (if any) we have
-    transform_f transform = get_transform(model_transform);
-    for (unsigned i = 0; i < data.num_X; ++i) {
-        // add the base offset
-        for (unsigned k = 0; k < trees.num_outputs; ++k) {
-            row_out[k] += trees.base_offset[k];
-        }
-        // add the leaf values from each tree
-        for (unsigned j = 0; j < trees.tree_limit; ++j) {
-            const tfloat *leaf_value = tree_predict(j, trees, x, x_missing);
-            for (unsigned k = 0; k < trees.num_outputs; ++k) {
-                row_out[k] += leaf_value[k];
-            }
-        }
-        // apply any needed transform
-        if (transform != NULL) {
-            const tfloat y_i = data.y == NULL ? 0 : data.y[i];
-            for (unsigned k = 0; k < trees.num_outputs; ++k) {
-                row_out[k] = transform(row_out[k], y_i);
-            }
-        }
-        x += data.M;
-        x_missing += data.M;
-        row_out += trees.num_outputs;
-    }
-}
-inline void tree_update_weights(unsigned i, TreeEnsemble &trees, const tfloat *x, const bool *x_missing) {
-    const unsigned offset = i * trees.max_nodes;
-    unsigned node = 0;
-    while (true) {
-        const unsigned pos = offset + node;
-        const unsigned feature = trees.features[pos];
-        // Record that a sample passed through this node
-        trees.node_sample_weights[pos] += 1.0;
-        // we hit a leaf so return a pointer to the values
-        if (trees.children_left[pos] < 0) break;
-        // otherwise we are at an internal node and need to recurse
-        if (x_missing[feature]) {
-            node = trees.children_default[pos];
-        } else if (x[feature] <= trees.thresholds[pos]) {
-            node = trees.children_left[pos];
-        } else {
-            node = trees.children_right[pos];
-        }
-    }
-}
-inline void dense_tree_update_weights(TreeEnsemble &trees, const ExplanationDataset &data) {
-    const tfloat *x = data.X;
-    const bool *x_missing = data.X_missing;
-    for (unsigned i = 0; i < data.num_X; ++i) {
-        // add the leaf values from each tree
-        for (unsigned j = 0; j < trees.tree_limit; ++j) {
-            tree_update_weights(j, trees, x, x_missing);
-        }
-        x += data.M;
-        x_missing += data.M;
-    }
-}
-inline void tree_saabas(tfloat *out, const TreeEnsemble &tree, const ExplanationDataset &data) {
-    unsigned curr_node = 0;
-    unsigned next_node = 0;
-    while (true) {
-        // we hit a leaf and are done
-        if (tree.children_left[curr_node] < 0) return;
-        // otherwise we are at an internal node and need to recurse
-        const unsigned feature = tree.features[curr_node];
-        if (data.X_missing[feature]) {
-            next_node = tree.children_default[curr_node];
-        } else if (data.X[feature] <= tree.thresholds[curr_node]) {
-            next_node = tree.children_left[curr_node];
-        } else {
-            next_node = tree.children_right[curr_node];
-        }
-        // assign credit to this feature as the difference in values at the current node vs. the next node
-        for (unsigned i = 0; i < tree.num_outputs; ++i) {
-            out[feature * tree.num_outputs + i] += tree.values[next_node * tree.num_outputs + i] - tree.values[curr_node * tree.num_outputs + i];
-        }
-        curr_node = next_node;
-    }
-}
-/**
- * This runs Tree SHAP with a per tree path conditional dependence assumption.
- */
-inline void dense_tree_saabas(tfloat *out_contribs, const TreeEnsemble& trees, const ExplanationDataset &data) {
-    tfloat *instance_out_contribs;
-    TreeEnsemble tree;
-    ExplanationDataset instance;
-    // build explanation for each sample
-    for (unsigned i = 0; i < data.num_X; ++i) {
-        instance_out_contribs = out_contribs + i * (data.M + 1) * trees.num_outputs;
-        data.get_x_instance(instance, i);
-        // aggregate the effect of explaining each tree
-        // (this works because of the linearity property of Shapley values)
-        for (unsigned j = 0; j < trees.tree_limit; ++j) {
-            trees.get_tree(tree, j);
-            tree_saabas(instance_out_contribs, tree, instance);
-        }
-        // apply the base offset to the bias term
-        for (unsigned j = 0; j < trees.num_outputs; ++j) {
-            instance_out_contribs[data.M * trees.num_outputs + j] += trees.base_offset[j];
-        }
-    }
-}
-// extend our decision path with a fraction of one and zero extensions
-inline void extend_path(PathElement *unique_path, unsigned unique_depth,
-                        tfloat zero_fraction, tfloat one_fraction, int feature_index) {
-    unique_path[unique_depth].feature_index = feature_index;
-    unique_path[unique_depth].zero_fraction = zero_fraction;
-    unique_path[unique_depth].one_fraction = one_fraction;
-    unique_path[unique_depth].pweight = (unique_depth == 0 ? 1.0f : 0.0f);
-    for (int i = unique_depth - 1; i >= 0; i--) {
-        unique_path[i + 1].pweight += one_fraction * unique_path[i].pweight * (i + 1)
-                                      / static_cast<tfloat>(unique_depth + 1);
-        unique_path[i].pweight = zero_fraction * unique_path[i].pweight * (unique_depth - i)
-                                 / static_cast<tfloat>(unique_depth + 1);
-    }
-}
-// undo a previous extension of the decision path
-inline void unwind_path(PathElement *unique_path, unsigned unique_depth, unsigned path_index) {
-    const tfloat one_fraction = unique_path[path_index].one_fraction;
-    const tfloat zero_fraction = unique_path[path_index].zero_fraction;
-    tfloat next_one_portion = unique_path[unique_depth].pweight;
-    for (int i = unique_depth - 1; i >= 0; --i) {
-        if (one_fraction != 0) {
-            const tfloat tmp = unique_path[i].pweight;
-            unique_path[i].pweight = next_one_portion * (unique_depth + 1)
-                                     / static_cast<tfloat>((i + 1) * one_fraction);
-            next_one_portion = tmp - unique_path[i].pweight * zero_fraction * (unique_depth - i)
-                               / static_cast<tfloat>(unique_depth + 1);
-        } else {
-            unique_path[i].pweight = (unique_path[i].pweight * (unique_depth + 1))
-                                     / static_cast<tfloat>(zero_fraction * (unique_depth - i));
-        }
-    }
-    for (unsigned i = path_index; i < unique_depth; ++i) {
-        unique_path[i].feature_index = unique_path[i+1].feature_index;
-        unique_path[i].zero_fraction = unique_path[i+1].zero_fraction;
-        unique_path[i].one_fraction = unique_path[i+1].one_fraction;
-    }
-}
-// determine what the total permutation weight would be if
-// we unwound a previous extension in the decision path
-inline tfloat unwound_path_sum(const PathElement *unique_path, unsigned unique_depth,
-                               unsigned path_index) {
-    const tfloat one_fraction = unique_path[path_index].one_fraction;
-    const tfloat zero_fraction = unique_path[path_index].zero_fraction;
-    tfloat next_one_portion = unique_path[unique_depth].pweight;
-    tfloat total = 0;
-    if (one_fraction != 0) {
-        for (int i = unique_depth - 1; i >= 0; --i) {
-            const tfloat tmp = next_one_portion / static_cast<tfloat>((i + 1) * one_fraction);
-            total += tmp;
-            next_one_portion = unique_path[i].pweight - tmp * zero_fraction * (unique_depth - i);
-        }
-    } else {
-        for (int i = unique_depth - 1; i >= 0; --i) {
-            total += unique_path[i].pweight / (zero_fraction * (unique_depth - i));
-        }
-    }
-    return total * (unique_depth + 1);
-}
-// recursive computation of SHAP values for a decision tree
-inline void tree_shap_recursive(const unsigned num_outputs, const int *children_left,
-                                const int *children_right,
-                                const int *children_default, const int *features,
-                                const tfloat *thresholds, const tfloat *values,
-                                const tfloat *node_sample_weight,
-                                const tfloat *x, const bool *x_missing, tfloat *phi,
-                                unsigned node_index, unsigned unique_depth,
-                                PathElement *parent_unique_path, tfloat parent_zero_fraction,
-                                tfloat parent_one_fraction, int parent_feature_index,
-                                int condition, unsigned condition_feature,
-                                tfloat condition_fraction) {
-    // stop if we have no weight coming down to us
-    if (condition_fraction == 0) return;
-    // extend the unique path
-    PathElement *unique_path = parent_unique_path + unique_depth + 1;
-    std::copy(parent_unique_path, parent_unique_path + unique_depth + 1, unique_path);
-    if (condition == 0 || condition_feature != static_cast<unsigned>(parent_feature_index)) {
-        extend_path(unique_path, unique_depth, parent_zero_fraction,
-                    parent_one_fraction, parent_feature_index);
-    }
-    const unsigned split_index = features[node_index];
-    // leaf node
-    if (children_right[node_index] < 0) {
-        for (unsigned i = 1; i <= unique_depth; ++i) {
-            const tfloat w = unwound_path_sum(unique_path, unique_depth, i);
-            const PathElement &el = unique_path[i];
-            const unsigned phi_offset = el.feature_index * num_outputs;
-            const unsigned values_offset = node_index * num_outputs;
-            const tfloat scale = w * (el.one_fraction - el.zero_fraction) * condition_fraction;
-            for (unsigned j = 0; j < num_outputs; ++j) {
-                phi[phi_offset + j] += scale * values[values_offset + j];
-            }
-        }
-    // internal node
-    } else {
-        // find which branch is "hot" (meaning x would follow it)
-        unsigned hot_index = 0;
-        if (x_missing[split_index]) {
-            hot_index = children_default[node_index];
-        } else if (x[split_index] <= thresholds[node_index]) {
-            hot_index = children_left[node_index];
-        } else {
-            hot_index = children_right[node_index];
-        }
-        const unsigned cold_index = (static_cast<int>(hot_index) == children_left[node_index] ?
-                                        children_right[node_index] : children_left[node_index]);
-        const tfloat w = node_sample_weight[node_index];
-        const tfloat hot_zero_fraction = node_sample_weight[hot_index] / w;
-        const tfloat cold_zero_fraction = node_sample_weight[cold_index] / w;
-        tfloat incoming_zero_fraction = 1;
-        tfloat incoming_one_fraction = 1;
-        // see if we have already split on this feature,
-        // if so we undo that split so we can redo it for this node
-        unsigned path_index = 0;
-        for (; path_index <= unique_depth; ++path_index) {
-            if (static_cast<unsigned>(unique_path[path_index].feature_index) == split_index) break;
-        }
-        if (path_index != unique_depth + 1) {
-            incoming_zero_fraction = unique_path[path_index].zero_fraction;
-            incoming_one_fraction = unique_path[path_index].one_fraction;
-            unwind_path(unique_path, unique_depth, path_index);
-            unique_depth -= 1;
-        }
-        // divide up the condition_fraction among the recursive calls
-        tfloat hot_condition_fraction = condition_fraction;
-        tfloat cold_condition_fraction = condition_fraction;
-        if (condition > 0 && split_index == condition_feature) {
-            cold_condition_fraction = 0;
-            unique_depth -= 1;
-        } else if (condition < 0 && split_index == condition_feature) {
-            hot_condition_fraction *= hot_zero_fraction;
-            cold_condition_fraction *= cold_zero_fraction;
-            unique_depth -= 1;
-        }
-        tree_shap_recursive(
-            num_outputs, children_left, children_right, children_default, features, thresholds, values,
-            node_sample_weight, x, x_missing, phi, hot_index, unique_depth + 1, unique_path,
-            hot_zero_fraction * incoming_zero_fraction, incoming_one_fraction,
-            split_index, condition, condition_feature, hot_condition_fraction
-        );
-        tree_shap_recursive(
-            num_outputs, children_left, children_right, children_default, features, thresholds, values,
-            node_sample_weight, x, x_missing, phi, cold_index, unique_depth + 1, unique_path,
-            cold_zero_fraction * incoming_zero_fraction, 0,
-            split_index, condition, condition_feature, cold_condition_fraction
-        );
-    }
-}
-inline int compute_expectations(TreeEnsemble &tree, int i = 0, int depth = 0) {
-    unsigned max_depth = 0;
-    if (tree.children_right[i] >= 0) {
-        const unsigned li = tree.children_left[i];
-        const unsigned ri = tree.children_right[i];
-        const unsigned depth_left = compute_expectations(tree, li, depth + 1);
-        const unsigned depth_right = compute_expectations(tree, ri, depth + 1);
-        const tfloat left_weight = tree.node_sample_weights[li];
-        const tfloat right_weight = tree.node_sample_weights[ri];
-        const unsigned li_offset = li * tree.num_outputs;
-        const unsigned ri_offset = ri * tree.num_outputs;
-        const unsigned i_offset = i * tree.num_outputs;
-        for (unsigned j = 0; j < tree.num_outputs; ++j) {
-            if ((left_weight == 0) && (right_weight == 0)) {
-                tree.values[i_offset + j] = 0.0;
-            } else {
-                const tfloat v = (left_weight * tree.values[li_offset + j] + right_weight * tree.values[ri_offset + j]) / (left_weight + right_weight);
-                tree.values[i_offset + j] = v;
-            }
-        }
-        max_depth = std::max(depth_left, depth_right) + 1;
-    }
-    if (depth == 0) tree.max_depth = max_depth;
-    return max_depth;
-}
-inline void tree_shap(const TreeEnsemble& tree, const ExplanationDataset &data,
-                      tfloat *out_contribs, int condition, unsigned condition_feature) {
-    // update the reference value with the expected value of the tree's predictions
-    if (condition == 0) {
-        for (unsigned j = 0; j < tree.num_outputs; ++j) {
-            out_contribs[data.M * tree.num_outputs + j] += tree.values[j];
-        }
-    }
-    // Pre-allocate space for the unique path data
-    const unsigned maxd = tree.max_depth + 2; // need a bit more space than the max depth
-    PathElement *unique_path_data = new PathElement[(maxd * (maxd + 1)) / 2];
-    tree_shap_recursive(
-        tree.num_outputs, tree.children_left, tree.children_right, tree.children_default,
-        tree.features, tree.thresholds, tree.values, tree.node_sample_weights, data.X,
-        data.X_missing, out_contribs, 0, 0, unique_path_data, 1, 1, -1, condition,
-        condition_feature, 1
-    );
-    delete[] unique_path_data;
-}
-inline unsigned build_merged_tree_recursive(TreeEnsemble &out_tree, const TreeEnsemble &trees,
-                                     const tfloat *data, const bool *data_missing, int *data_inds,
-                                     const unsigned num_background_data_inds, unsigned num_data_inds,
-                                     unsigned M, unsigned row = 0, unsigned i = 0, unsigned pos = 0,
-                                     tfloat *leaf_value = NULL) {
-    //tfloat new_leaf_value[trees.num_outputs];
-    tfloat *new_leaf_value = (tfloat *) alloca(sizeof(tfloat) * trees.num_outputs); // allocate on the stack
-    unsigned row_offset = row * trees.max_nodes;
-    // we have hit a terminal leaf!!!
-    if (trees.children_left[row_offset + i] < 0 && row + 1 == trees.tree_limit) {
-        // create the leaf node
-        const tfloat *vals = trees.values + (row * trees.max_nodes + i) * trees.num_outputs;
-        if (leaf_value == NULL) {
-            for (unsigned j = 0; j < trees.num_outputs; ++j) {
-                out_tree.values[pos * trees.num_outputs + j] = vals[j];
-            }
-        } else {
-            for (unsigned j = 0; j < trees.num_outputs; ++j) {
-                out_tree.values[pos * trees.num_outputs + j] = leaf_value[j] + vals[j];
-            }
-        }
-        out_tree.children_left[pos] = -1;
-        out_tree.children_right[pos] = -1;
-        out_tree.children_default[pos] = -1;
-        out_tree.features[pos] = -1;
-        out_tree.thresholds[pos] = 0;
-        out_tree.node_sample_weights[pos] = num_background_data_inds;
-        return pos;
-    }
-    // we hit an intermediate leaf (so just add the value to our accumulator and move to the next tree)
-    if (trees.children_left[row_offset + i] < 0) {
-        // accumulate the value of this original leaf so it will land on all eventual terminal leaves
-        const tfloat *vals = trees.values + (row * trees.max_nodes + i) * trees.num_outputs;
-        if (leaf_value == NULL) {
-            for (unsigned j = 0; j < trees.num_outputs; ++j) {
-                new_leaf_value[j] = vals[j];
-            }
-        } else {
-            for (unsigned j = 0; j < trees.num_outputs; ++j) {
-                new_leaf_value[j] = leaf_value[j] + vals[j];
-            }
-        }
-        leaf_value = new_leaf_value;
-        // move forward to the next tree
-        row += 1;
-        row_offset += trees.max_nodes;
-        i = 0;
-    }
-    // split the data inds by this node's threshold
-    const tfloat t = trees.thresholds[row_offset + i];
-    const int f = trees.features[row_offset + i];
-    const bool right_default = trees.children_default[row_offset + i] == trees.children_right[row_offset + i];
-    int low_ptr = 0;
-    int high_ptr = num_data_inds - 1;
-    unsigned num_left_background_data_inds = 0;
-    int low_data_ind;
-    while (low_ptr <= high_ptr) {
-        low_data_ind = data_inds[low_ptr];
-        const int data_ind = std::abs(low_data_ind) * M + f;
-        const bool is_missing = data_missing[data_ind];
-        if ((!is_missing && data[data_ind] > t) || (right_default && is_missing)) {
-            data_inds[low_ptr] = data_inds[high_ptr];
-            data_inds[high_ptr] = low_data_ind;
-            high_ptr -= 1;
-        } else {
-            if (low_data_ind >= 0) ++num_left_background_data_inds; // negative data_inds are not background samples
-            low_ptr += 1;
-        }
-    }
-    int *left_data_inds = data_inds;
-    const unsigned num_left_data_inds = low_ptr;
-    int *right_data_inds = data_inds + low_ptr;
-    const unsigned num_right_data_inds = num_data_inds - num_left_data_inds;
-    const unsigned num_right_background_data_inds = num_background_data_inds - num_left_background_data_inds;
-    // all the data went right, so we skip creating this node and just recurse right
-    if (num_left_data_inds == 0) {
-        return build_merged_tree_recursive(
-            out_tree, trees, data, data_missing, data_inds,
-            num_background_data_inds, num_data_inds, M, row,
-            trees.children_right[row_offset + i], pos, leaf_value
-        );
-    // all the data went left, so we skip creating this node and just recurse left
-    } else if (num_right_data_inds == 0) {
-        return build_merged_tree_recursive(
-            out_tree, trees, data, data_missing, data_inds,
-            num_background_data_inds, num_data_inds, M, row,
-            trees.children_left[row_offset + i], pos, leaf_value
-        );
-    // data went both ways so we create this node and recurse down both paths
-    } else {
-        // build the left subtree
-        const unsigned new_pos = build_merged_tree_recursive(
-            out_tree, trees, data, data_missing, left_data_inds,
-            num_left_background_data_inds, num_left_data_inds, M, row,
-            trees.children_left[row_offset + i], pos + 1, leaf_value
-        );
-        // fill in the data for this node
-        out_tree.children_left[pos] = pos + 1;
-        out_tree.children_right[pos] = new_pos + 1;
-        if (trees.children_left[row_offset + i] == trees.children_default[row_offset + i]) {
-            out_tree.children_default[pos] = pos + 1;
-        } else {
-            out_tree.children_default[pos] = new_pos + 1;
-        }
-        out_tree.features[pos] = trees.features[row_offset + i];
-        out_tree.thresholds[pos] = trees.thresholds[row_offset + i];
-        out_tree.node_sample_weights[pos] = num_background_data_inds;
-        // build the right subtree
-        return build_merged_tree_recursive(
-            out_tree, trees, data, data_missing, right_data_inds,
-            num_right_background_data_inds, num_right_data_inds, M, row,
-            trees.children_right[row_offset + i], new_pos + 1, leaf_value
-        );
-    }
-}
-inline void build_merged_tree(TreeEnsemble &out_tree, const ExplanationDataset &data, const TreeEnsemble &trees) {
-    // create a joint data matrix from both X and R matrices
-    tfloat *joined_data = new tfloat[(data.num_X + data.num_R) * data.M];
-    std::copy(data.X, data.X + data.num_X * data.M, joined_data);
-    std::copy(data.R, data.R + data.num_R * data.M, joined_data + data.num_X * data.M);
-    bool *joined_data_missing = new bool[(data.num_X + data.num_R) * data.M];
-    std::copy(data.X_missing, data.X_missing + data.num_X * data.M, joined_data_missing);
-    std::copy(data.R_missing, data.R_missing + data.num_R * data.M, joined_data_missing + data.num_X * data.M);
-    // create an starting array of data indexes we will recursively sort
-    int *data_inds = new int[data.num_X + data.num_R];
-    for (unsigned i = 0; i < data.num_X; ++i) data_inds[i] = i;
-    for (unsigned i = data.num_X; i < data.num_X + data.num_R; ++i) {
-        data_inds[i] = -i; // a negative index means it won't be recorded as a background sample
-    }
-    build_merged_tree_recursive(
-        out_tree, trees, joined_data, joined_data_missing, data_inds, data.num_R,
-        data.num_X + data.num_R, data.M
-    );
-    delete[] joined_data;
-    delete[] joined_data_missing;
-    delete[] data_inds;
-}
-// Independent Tree SHAP functions below here
-// ------------------------------------------
-struct Node {
-    short cl, cr, cd, pnode, feat, pfeat; // uint_16
-    float thres, value;
-    char from_flag;
-};
-#define FROM_NEITHER 0
-#define FROM_X_NOT_R 1
-#define FROM_R_NOT_X 2
-// https://www.geeksforgeeks.org/space-and-time-efficient-binomial-coefficient/
-inline int bin_coeff(int n, int k) {
-    int res = 1;
-    if (k > n - k)
-        k = n - k;
-    for (int i = 0; i < k; ++i) {
-        res *= (n - i);
-        res /= (i + 1);
-    }
-    return res;
-}
-// note this only handles single output models, so multi-output models get explained using multiple passes
-inline void tree_shap_indep(const unsigned max_depth, const unsigned num_feats,
-                            const unsigned num_nodes, const tfloat *x,
-                            const bool *x_missing, const tfloat *r,
-                            const bool *r_missing, tfloat *out_contribs,
-                            float *pos_lst, float *neg_lst, signed short *feat_hist,
-                            float *memoized_weights, int *node_stack, Node *mytree) {
-//     const bool DEBUG = true;
-//     ofstream myfile;
-//     if (DEBUG) {
-//       myfile.open ("/homes/gws/hughchen/shap/out.txt",fstream::app);
-//       myfile << "Entering tree_shap_indep\n";
-//     }
-    int ns_ctr = 0;
-    std::fill_n(feat_hist, num_feats, 0);
-    short node = 0, feat, cl, cr, cd, pnode, pfeat = -1;
-    short next_xnode = -1, next_rnode = -1;
-    short next_node = -1, from_child = -1;
-    float thres, pos_x = 0, neg_x = 0, pos_r = 0, neg_r = 0;
-    char from_flag;
-    unsigned M = 0, N = 0;
-    Node curr_node = mytree[node];
-    feat = curr_node.feat;
-    thres = curr_node.thres;
-    cl = curr_node.cl;
-    cr = curr_node.cr;
-    cd = curr_node.cd;
-    // short circuit when this is a stump tree (with no splits)
-    if (cl < 0) {
-        out_contribs[num_feats] += curr_node.value;
-        return;
-    }
-//     if (DEBUG) {
-//       myfile << "\nNode: " << node << "\n";
-//       myfile << "x[feat]: " << x[feat] << ", r[feat]: " << r[feat] << "\n";
-//       myfile << "thres: " << thres << "\n";
-//     }
-    if (x_missing[feat]) {
-        next_xnode = cd;
-    } else if (x[feat] > thres) {
-        next_xnode = cr;
-    } else if (x[feat] <= thres) {
-        next_xnode = cl;
-    }
-    if (r_missing[feat]) {
-        next_rnode = cd;
-    } else if (r[feat] > thres) {
-        next_rnode = cr;
-    } else if (r[feat] <= thres) {
-        next_rnode = cl;
-    }
-    if (next_xnode != next_rnode) {
-        mytree[next_xnode].from_flag = FROM_X_NOT_R;
-        mytree[next_rnode].from_flag = FROM_R_NOT_X;
-    } else {
-        mytree[next_xnode].from_flag = FROM_NEITHER;
-    }
-    // Check if x and r go the same way
-    if (next_xnode == next_rnode) {
-        next_node = next_xnode;
-    }
-    // If not, go left
-    if (next_node < 0) {
-        next_node = cl;
-        if (next_rnode == next_node) { // rpath
-            N = N+1;
-            feat_hist[feat] -= 1;
-        } else if (next_xnode == next_node) { // xpath
-            M = M+1;
-            N = N+1;
-            feat_hist[feat] += 1;
-        }
-    }
-    node_stack[ns_ctr] = node;
-    ns_ctr += 1;
-    while (true) {
-        node = next_node;
-        curr_node = mytree[node];
-        feat = curr_node.feat;
-        thres = curr_node.thres;
-        cl = curr_node.cl;
-        cr = curr_node.cr;
-        cd = curr_node.cd;
-        pnode = curr_node.pnode;
-        pfeat = curr_node.pfeat;
-        from_flag = curr_node.from_flag;
-//         if (DEBUG) {
-//           myfile << "\nNode: " << node << "\n";
-//           myfile << "N: " << N << ", M: " << M << "\n";
-//           myfile << "from_flag==FROM_X_NOT_R: " << (from_flag==FROM_X_NOT_R) << "\n";
-//           myfile << "from_flag==FROM_R_NOT_X: " << (from_flag==FROM_R_NOT_X) << "\n";
-//           myfile << "from_flag==FROM_NEITHER: " << (from_flag==FROM_NEITHER) << "\n";
-//           myfile << "feat_hist[feat]: " << feat_hist[feat] << "\n";
-//         }
-        // At a leaf
-        if (cl < 0) {
-            //      if (DEBUG) {
-            //        myfile << "At a leaf\n";
-            //      }
-            if (M == 0) {
-              out_contribs[num_feats] += mytree[node].value;
-            }
-            // Currently assuming a single output
-            if (N != 0) {
-                if (M != 0) {
-                    pos_lst[node] = mytree[node].value * memoized_weights[N + max_depth * (M-1)];
-                }
-                if (M != N) {
-                    neg_lst[node] = -mytree[node].value * memoized_weights[N + max_depth * M];
-                }
-            }
-//             if (DEBUG) {
-//               myfile << "pos_lst[node]: " << pos_lst[node] << "\n";
-//               myfile << "neg_lst[node]: " << neg_lst[node] << "\n";
-//             }
-            // Pop from node_stack
-            ns_ctr -= 1;
-            next_node = node_stack[ns_ctr];
-            from_child = node;
-            // Unwind
-            if (feat_hist[pfeat] > 0) {
-                feat_hist[pfeat] -= 1;
-            } else if (feat_hist[pfeat] < 0) {
-                feat_hist[pfeat] += 1;
-            }
-            if (feat_hist[pfeat] == 0) {
-                if (from_flag == FROM_X_NOT_R) {
-                    N = N-1;
-                    M = M-1;
-                } else if (from_flag == FROM_R_NOT_X) {
-                    N = N-1;
-                }
-            }
-            continue;
-        }
-        const bool x_right = x[feat] > thres;
-        const bool r_right = r[feat] > thres;
-        if (x_missing[feat]) {
-            next_xnode = cd;
-        } else if (x_right) {
-            next_xnode = cr;
-        } else if (!x_right) {
-            next_xnode = cl;
-        }
-        if (r_missing[feat]) {
-            next_rnode = cd;
-        } else if (r_right) {
-            next_rnode = cr;
-        } else if (!r_right) {
-            next_rnode = cl;
-        }
-        if (next_xnode >= 0) {
-          if (next_xnode != next_rnode) {
-              mytree[next_xnode].from_flag = FROM_X_NOT_R;
-              mytree[next_rnode].from_flag = FROM_R_NOT_X;
-          } else {
-              mytree[next_xnode].from_flag = FROM_NEITHER;
-          }
-        }
-        // Arriving at node from parent
-        if (from_child == -1) {
-            //      if (DEBUG) {
-            //        myfile << "Arriving at node from parent\n";
-            //      }
-            node_stack[ns_ctr] = node;
-            ns_ctr += 1;
-            next_node = -1;
-            //      if (DEBUG) {
-            //        myfile << "feat_hist[feat]" << feat_hist[feat] << "\n";
-            //      }
-            // Feature is set upstream
-            if (feat_hist[feat] > 0) {
-                next_node = next_xnode;
-                feat_hist[feat] += 1;
-            } else if (feat_hist[feat] < 0) {
-                next_node = next_rnode;
-                feat_hist[feat] -= 1;
-            }
-            // x and r go the same way
-            if (next_node < 0) {
-                if (next_xnode == next_rnode) {
-                    next_node = next_xnode;
-                }
-            }
-            // Go down one path
-            if (next_node >= 0) {
-                continue;
-            }
-            // Go down both paths, but go left first
-            next_node = cl;
-            if (next_rnode == next_node) {
-                N = N+1;
-                feat_hist[feat] -= 1;
-            } else if (next_xnode == next_node) {
-                M = M+1;
-                N = N+1;
-                feat_hist[feat] += 1;
-            }
-            from_child = -1;
-            continue;
-        }
-        // Arriving at node from child
-        if (from_child != -1) {
-//             if (DEBUG) {
-//               myfile << "Arriving at node from child\n";
-//             }
-            next_node = -1;
-            // Check if we should unroll immediately
-            if ((next_rnode == next_xnode) || (feat_hist[feat] != 0)) {
-                next_node = pnode;
-            }
-            // Came from a single path, so unroll
-            if (next_node >= 0) {
-//                 if (DEBUG) {
-//                   myfile << "Came from a single path, so unroll\n";
-//                 }
-                // At the root node
-                if (node == 0) {
-                    break;
-                }
-                // Update and unroll
-                pos_lst[node] = pos_lst[from_child];
-                neg_lst[node] = neg_lst[from_child];
-//                 if (DEBUG) {
-//                   myfile << "pos_lst[node]: " << pos_lst[node] << "\n";
-//                   myfile << "neg_lst[node]: " << neg_lst[node] << "\n";
-//                 }
-                from_child = node;
-                ns_ctr -= 1;
-                // Unwind
-                if (feat_hist[pfeat] > 0) {
-                    feat_hist[pfeat] -= 1;
-                } else if (feat_hist[pfeat] < 0) {
-                    feat_hist[pfeat] += 1;
-                }
-                if (feat_hist[pfeat] == 0) {
-                    if (from_flag == FROM_X_NOT_R) {
-                        N = N-1;
-                        M = M-1;
-                    } else if (from_flag == FROM_R_NOT_X) {
-                        N = N-1;
-                    }
-                }
-                continue;
-                // Go right - Arriving from the left child
-            } else if (from_child == cl) {
-//                 if (DEBUG) {
-//                   myfile << "Go right - Arriving from the left child\n";
-//                 }
-                node_stack[ns_ctr] = node;
-                ns_ctr += 1;
-                next_node = cr;
-                if (next_xnode == next_node) {
-                    M = M+1;
-                    N = N+1;
-                    feat_hist[feat] += 1;
-                } else if (next_rnode == next_node) {
-                    N = N+1;
-                    feat_hist[feat] -= 1;
-                }
-                from_child = -1;
-                continue;
-                // Compute stuff and unroll - Arriving from the right child
-            } else if (from_child == cr) {
-//                 if (DEBUG) {
-//                   myfile << "Compute stuff and unroll - Arriving from the right child\n";
-//                 }
-                pos_x = 0;
-                neg_x = 0;
-                pos_r = 0;
-                neg_r = 0;
-                if ((next_xnode == cr) && (next_rnode == cl)) {
-                    pos_x = pos_lst[cr];
-                    neg_x = neg_lst[cr];
-                    pos_r = pos_lst[cl];
-                    neg_r = neg_lst[cl];
-                } else if ((next_xnode == cl) && (next_rnode == cr)) {
-                    pos_x = pos_lst[cl];
-                    neg_x = neg_lst[cl];
-                    pos_r = pos_lst[cr];
-                    neg_r = neg_lst[cr];
-                }
-                // out_contribs needs to have been initialized as all zeros
-                // if (pos_x + neg_r != 0) {
-                //   std::cout << "val " << pos_x + neg_r << "\n";
-                // }
-                out_contribs[feat] += pos_x + neg_r;
-                pos_lst[node] = pos_x + pos_r;
-                neg_lst[node] = neg_x + neg_r;
-//                 if (DEBUG) {
-//                   myfile << "out_contribs[feat]: " << out_contribs[feat] << "\n";
-//                   myfile << "pos_lst[node]: " << pos_lst[node] << "\n";
-//                   myfile << "neg_lst[node]: " << neg_lst[node] << "\n";
-//                 }
-                // Check if at root
-                if (node == 0) {
-                    break;
-                }
-                // Pop
-                ns_ctr -= 1;
-                next_node = node_stack[ns_ctr];
-                from_child = node;
-                // Unwind
-                if (feat_hist[pfeat] > 0) {
-                    feat_hist[pfeat] -= 1;
-                } else if (feat_hist[pfeat] < 0) {
-                    feat_hist[pfeat] += 1;
-                }
-                if (feat_hist[pfeat] == 0) {
-                    if (from_flag == FROM_X_NOT_R) {
-                        N = N-1;
-                        M = M-1;
-                    } else if (from_flag == FROM_R_NOT_X) {
-                        N = N-1;
-                    }
-                }
-                continue;
-            }
-        }
-    }
-    //  if (DEBUG) {
-    //    myfile.close();
-    //  }
-}
-inline void print_progress_bar(tfloat &last_print, tfloat start_time, unsigned i, unsigned total_count) {
-    const tfloat elapsed_seconds = difftime(time(NULL), start_time);
-    if (elapsed_seconds > 10 && elapsed_seconds - last_print > 0.5) {
-        const tfloat fraction = static_cast<tfloat>(i) / total_count;
-        const double total_seconds = elapsed_seconds / fraction;
-        last_print = elapsed_seconds;
-        PySys_WriteStderr(
-            "\r%3.0f%%|%.*s%.*s| %d/%d [%02d:%02d<%02d:%02d]       ",
-            fraction * 100, int(0.5 + fraction*20), "===================",
-            20-int(0.5 + fraction*20), "                   ",
-            i, total_count,
-            int(elapsed_seconds/60), int(elapsed_seconds) % 60,
-            int((total_seconds - elapsed_seconds)/60), int(total_seconds - elapsed_seconds) % 60
-        );
-        // Get handle to python stderr file and flush it (https://mail.python.org/pipermail/python-list/2004-November/294912.html)
-        PyObject *pyStderr = PySys_GetObject("stderr");
-        if (pyStderr) {
-            PyObject *result = PyObject_CallMethod(pyStderr, "flush", NULL);
-            Py_XDECREF(result);
-        }
-    }
-}
-/**
- * Runs Tree SHAP with feature independence assumptions on dense data.
- */
-inline void dense_independent(const TreeEnsemble& trees, const ExplanationDataset &data,
-                       tfloat *out_contribs, tfloat transform(const tfloat, const tfloat)) {
-    // reformat the trees for faster access
-    Node *node_trees = new Node[trees.tree_limit * trees.max_nodes];
-    for (unsigned i = 0; i < trees.tree_limit; ++i) {
-        Node *node_tree = node_trees + i * trees.max_nodes;
-        for (unsigned j = 0; j < trees.max_nodes; ++j) {
-            const unsigned en_ind = i * trees.max_nodes + j;
-            node_tree[j].cl = trees.children_left[en_ind];
-            node_tree[j].cr = trees.children_right[en_ind];
-            node_tree[j].cd = trees.children_default[en_ind];
-            if (j == 0) {
-                node_tree[j].pnode = 0;
-            }
-            if (trees.children_left[en_ind] >= 0) { // relies on all unused entries having negative values in them
-                node_tree[trees.children_left[en_ind]].pnode = j;
-                node_tree[trees.children_left[en_ind]].pfeat = trees.features[en_ind];
-            }
-            if (trees.children_right[en_ind] >= 0) { // relies on all unused entries having negative values in them
-                node_tree[trees.children_right[en_ind]].pnode = j;
-                node_tree[trees.children_right[en_ind]].pfeat = trees.features[en_ind];
-            }
-            node_tree[j].thres = trees.thresholds[en_ind];
-            node_tree[j].feat = trees.features[en_ind];
-        }
-    }
-    // preallocate arrays needed by the algorithm
-    float *pos_lst = new float[trees.max_nodes];
-    float *neg_lst = new float[trees.max_nodes];
-    int *node_stack = new int[(unsigned) trees.max_depth];
-    signed short *feat_hist = new signed short[data.M];
-    tfloat *tmp_out_contribs = new tfloat[(data.M + 1)];
-    // precompute all the weight coefficients
-    float *memoized_weights = new float[(trees.max_depth+1) * (trees.max_depth+1)];
-    for (unsigned n = 0; n <= trees.max_depth; ++n) {
-        for (unsigned m = 0; m <= trees.max_depth; ++m) {
-            memoized_weights[n + trees.max_depth * m] = 1.0 / (n * bin_coeff(n-1, m));
-        }
-    }
-    // compute the explanations for each sample
-    tfloat *instance_out_contribs;
-    tfloat rescale_factor = 1.0;
-    tfloat margin_x = 0;
-    tfloat margin_r = 0;
-    time_t start_time = time(NULL);
-    tfloat last_print = 0;
-    for (unsigned oind = 0; oind < trees.num_outputs; ++oind) {
-        // set the values in the reformatted tree to the current output index
-        for (unsigned i = 0; i < trees.tree_limit; ++i) {
-            Node *node_tree = node_trees + i * trees.max_nodes;
-            for (unsigned j = 0; j < trees.max_nodes; ++j) {
-                const unsigned en_ind = i * trees.max_nodes + j;
-                node_tree[j].value = trees.values[en_ind * trees.num_outputs + oind];
-            }
-        }
-        // loop over all the samples
-        for (unsigned i = 0; i < data.num_X; ++i) {
-            const tfloat *x = data.X + i * data.M;
-            const bool *x_missing = data.X_missing + i * data.M;
-            instance_out_contribs = out_contribs + i * (data.M + 1) * trees.num_outputs;
-            const tfloat y_i = data.y == NULL ? 0 : data.y[i];
-            print_progress_bar(last_print, start_time, oind * data.num_X + i, data.num_X * trees.num_outputs);
-            // compute the model's margin output for x
-            if (transform != NULL) {
-                margin_x = trees.base_offset[oind];
-                for (unsigned k = 0; k < trees.tree_limit; ++k) {
-                    margin_x += tree_predict(k, trees, x, x_missing)[oind];
-                }
-            }
-            for (unsigned j = 0; j < data.num_R; ++j) {
-                const tfloat *r = data.R + j * data.M;
-                const bool *r_missing = data.R_missing + j * data.M;
-                std::fill_n(tmp_out_contribs, (data.M + 1), 0);
-                // compute the model's margin output for r
-                if (transform != NULL) {
-                    margin_r = trees.base_offset[oind];
-                    for (unsigned k = 0; k < trees.tree_limit; ++k) {
-                        margin_r += tree_predict(k, trees, r, r_missing)[oind];
-                    }
-                }
-                for (unsigned k = 0; k < trees.tree_limit; ++k) {
-                    tree_shap_indep(
-                        trees.max_depth, data.M, trees.max_nodes, x, x_missing, r, r_missing,
-                        tmp_out_contribs, pos_lst, neg_lst, feat_hist, memoized_weights,
-                        node_stack, node_trees + k * trees.max_nodes
-                    );
-                }
-                // compute the rescale factor
-                if (transform != NULL) {
-                    if (margin_x == margin_r) {
-                        rescale_factor = 1.0;
-                    } else {
-                        rescale_factor = (*transform)(margin_x, y_i) - (*transform)(margin_r, y_i);
-                        rescale_factor /= margin_x - margin_r;
-                    }
-                }
-                // add the effect of the current reference to our running total
-                // this is where we can do per reference scaling for non-linear transformations
-                for (unsigned k = 0; k < data.M; ++k) {
-                    instance_out_contribs[k * trees.num_outputs + oind] += tmp_out_contribs[k] * rescale_factor;
-                }
-                // Add the base offset
-                if (transform != NULL) {
-                    instance_out_contribs[data.M * trees.num_outputs + oind] += (*transform)(trees.base_offset[oind] + tmp_out_contribs[data.M], 0);
-                } else {
-                    instance_out_contribs[data.M * trees.num_outputs + oind] += trees.base_offset[oind] + tmp_out_contribs[data.M];
-                }
-            }
-            // average the results over all the references.
-            for (unsigned j = 0; j < (data.M + 1); ++j) {
-                instance_out_contribs[j * trees.num_outputs + oind] /= data.num_R;
-            }
-            // apply the base offset to the bias term
-            // for (unsigned j = 0; j < trees.num_outputs; ++j) {
-            //     instance_out_contribs[data.M * trees.num_outputs + j] += (*transform)(trees.base_offset[j], 0);
-            // }
-        }
-    }
-    delete[] tmp_out_contribs;
-    delete[] node_trees;
-    delete[] pos_lst;
-    delete[] neg_lst;
-    delete[] node_stack;
-    delete[] feat_hist;
-    delete[] memoized_weights;
-}
-/**
- * This runs Tree SHAP with a per tree path conditional dependence assumption.
- */
-inline void dense_tree_path_dependent(const TreeEnsemble& trees, const ExplanationDataset &data,
-                               tfloat *out_contribs, tfloat transform(const tfloat, const tfloat)) {
-    tfloat *instance_out_contribs;
-    TreeEnsemble tree;
-    ExplanationDataset instance;
-    // build explanation for each sample
-    for (unsigned i = 0; i < data.num_X; ++i) {
-        instance_out_contribs = out_contribs + i * (data.M + 1) * trees.num_outputs;
-        data.get_x_instance(instance, i);
-        // aggregate the effect of explaining each tree
-        // (this works because of the linearity property of Shapley values)
-        for (unsigned j = 0; j < trees.tree_limit; ++j) {
-            trees.get_tree(tree, j);
-            tree_shap(tree, instance, instance_out_contribs, 0, 0);
-        }
-        // apply the base offset to the bias term
-        for (unsigned j = 0; j < trees.num_outputs; ++j) {
-            instance_out_contribs[data.M * trees.num_outputs + j] += trees.base_offset[j];
-        }
-    }
-}
-// phi = np.zeros((self._current_X.shape[1] + 1, self._current_X.shape[1] + 1, self.n_outputs))
-//         phi_diag = np.zeros((self._current_X.shape[1] + 1, self.n_outputs))
-//         for t in range(self.tree_limit):
-//             self.tree_shap(self.trees[t], self._current_X[i,:], self._current_x_missing, phi_diag)
-//             for j in self.trees[t].unique_features:
-//                 phi_on = np.zeros((self._current_X.shape[1] + 1, self.n_outputs))
-//                 phi_off = np.zeros((self._current_X.shape[1] + 1, self.n_outputs))
-//                 self.tree_shap(self.trees[t], self._current_X[i,:], self._current_x_missing, phi_on, 1, j)
-//                 self.tree_shap(self.trees[t], self._current_X[i,:], self._current_x_missing, phi_off, -1, j)
-//                 phi[j] += np.true_divide(np.subtract(phi_on,phi_off),2.0)
-//                 phi_diag[j] -= np.sum(np.true_divide(np.subtract(phi_on,phi_off),2.0))
-//         for j in range(self._current_X.shape[1]+1):
-//             phi[j][j] = phi_diag[j]
-//         phi /= self.tree_limit
-//         return phi
-inline void dense_tree_interactions_path_dependent(const TreeEnsemble& trees, const ExplanationDataset &data,
-                                            tfloat *out_contribs,
-                                            tfloat transform(const tfloat, const tfloat)) {
-    // build a list of all the unique features in each tree
-    int amount_of_unique_features = min(data.M, trees.max_nodes);
-    int *unique_features = new int[trees.tree_limit * amount_of_unique_features];
-    std::fill(unique_features, unique_features + trees.tree_limit * amount_of_unique_features, -1);
-    for (unsigned j = 0; j < trees.tree_limit; ++j) {
-        const int *features_row = trees.features + j * trees.max_nodes;
-        int *unique_features_row = unique_features + j * amount_of_unique_features;
-        for (unsigned k = 0; k < trees.max_nodes; ++k) {
-            for (unsigned l = 0; l < amount_of_unique_features; ++l) {
-                if (features_row[k] == unique_features_row[l]) break;
-                if (unique_features_row[l] < 0) {
-                    unique_features_row[l] = features_row[k];
-                    break;
-                }
-            }
-        }
-    }
-    // build an interaction explanation for each sample
-    tfloat *instance_out_contribs;
-    TreeEnsemble tree;
-    ExplanationDataset instance;
-    const unsigned contrib_row_size = (data.M + 1) * trees.num_outputs;
-    tfloat *diag_contribs = new tfloat[contrib_row_size];
-    tfloat *on_contribs = new tfloat[contrib_row_size];
-    tfloat *off_contribs = new tfloat[contrib_row_size];
-    for (unsigned i = 0; i < data.num_X; ++i) {
-        instance_out_contribs = out_contribs + i * (data.M + 1) * contrib_row_size;
-        data.get_x_instance(instance, i);
-        // aggregate the effect of explaining each tree
-        // (this works because of the linearity property of Shapley values)
-        std::fill(diag_contribs, diag_contribs + contrib_row_size, 0);
-        for (unsigned j = 0; j < trees.tree_limit; ++j) {
-            trees.get_tree(tree, j);
-            tree_shap(tree, instance, diag_contribs, 0, 0);
-            const int *unique_features_row = unique_features + j * amount_of_unique_features;
-            for (unsigned k = 0; k < amount_of_unique_features; ++k) {
-                const int ind = unique_features_row[k];
-                if (ind < 0) break; // < 0 means we have seen all the features for this tree
-                // compute the shap value with this feature held on and off
-                std::fill(on_contribs, on_contribs + contrib_row_size, 0);
-                std::fill(off_contribs, off_contribs + contrib_row_size, 0);
-                tree_shap(tree, instance, on_contribs, 1, ind);
-                tree_shap(tree, instance, off_contribs, -1, ind);
-                // save the difference between on and off as the interaction value
-                for (unsigned l = 0; l < contrib_row_size; ++l) {
-                    const tfloat val = (on_contribs[l] - off_contribs[l]) / 2;
-                    instance_out_contribs[ind * contrib_row_size + l] += val;
-                    diag_contribs[l] -= val;
-                }
-            }
-        }
-        // set the diagonal
-        for (unsigned j = 0; j < data.M + 1; ++j) {
-            const unsigned offset = j * contrib_row_size + j * trees.num_outputs;
-            for (unsigned k = 0; k < trees.num_outputs; ++k) {
-                instance_out_contribs[offset + k] = diag_contribs[j * trees.num_outputs + k];
-            }
-        }
-        // apply the base offset to the bias term
-        const unsigned last_ind = (data.M * (data.M + 1) + data.M) * trees.num_outputs;
-        for (unsigned j = 0; j < trees.num_outputs; ++j) {
-            instance_out_contribs[last_ind + j] += trees.base_offset[j];
-        }
-    }
-    delete[] diag_contribs;
-    delete[] on_contribs;
-    delete[] off_contribs;
-    delete[] unique_features;
-}
-/**
- * This runs Tree SHAP with a global path conditional dependence assumption.
- *
- * By first merging all the trees in a tree ensemble into an equivalent single tree
- * this method allows arbitrary marginal transformations and also ensures that all the
- * evaluations of the model are consistent with some training data point.
- */
-inline void dense_global_path_dependent(const TreeEnsemble& trees, const ExplanationDataset &data,
-                                 tfloat *out_contribs, tfloat transform(const tfloat, const tfloat)) {
-    // allocate space for our new merged tree (we save enough room to totally split all samples if need be)
-    TreeEnsemble merged_tree;
-    merged_tree.allocate(1, (data.num_X + data.num_R) * 2, trees.num_outputs);
-    // collapse the ensemble of trees into a single tree that has the same behavior
-    // for all the X and R samples in the dataset
-    build_merged_tree(merged_tree, data, trees);
-    // compute the expected value and depth of the new merged tree
-    compute_expectations(merged_tree);
-    // explain each sample using our new merged tree
-    ExplanationDataset instance;
-    tfloat *instance_out_contribs;
-    for (unsigned i = 0; i < data.num_X; ++i) {
-        instance_out_contribs = out_contribs + i * (data.M + 1) * trees.num_outputs;
-        data.get_x_instance(instance, i);
-        // since we now just have a single merged tree we can just use the tree_path_dependent algorithm
-        tree_shap(merged_tree, instance, instance_out_contribs, 0, 0);
-        // apply the base offset to the bias term
-        for (unsigned j = 0; j < trees.num_outputs; ++j) {
-            instance_out_contribs[data.M * trees.num_outputs + j] += trees.base_offset[j];
-        }
-    }
-    merged_tree.free();
-}
-/**
- * The main method for computing Tree SHAP on models using dense data.
- */
-inline void dense_tree_shap(const TreeEnsemble& trees, const ExplanationDataset &data, tfloat *out_contribs,
-                     const int feature_dependence, unsigned model_transform, bool interactions) {
-    // see what transform (if any) we have
-    transform_f transform = get_transform(model_transform);
-    // dispatch to the correct algorithm handler
-    switch (feature_dependence) {
-        case FEATURE_DEPENDENCE::independent:
-            if (interactions) {
-                std::cerr << "FEATURE_DEPENDENCE::independent does not support interactions!\n";
-            } else dense_independent(trees, data, out_contribs, transform);
-            return;
-        case FEATURE_DEPENDENCE::tree_path_dependent:
-            if (interactions) dense_tree_interactions_path_dependent(trees, data, out_contribs, transform);
-            else dense_tree_path_dependent(trees, data, out_contribs, transform);
-            return;
-        case FEATURE_DEPENDENCE::global_path_dependent:
-            if (interactions) {
-                std::cerr << "FEATURE_DEPENDENCE::global_path_dependent does not support interactions!\n";
-            } else dense_global_path_dependent(trees, data, out_contribs, transform);
-            return;
-    }
-}

lib/shap/datasets.py DELETED Viewed

@@ -1,309 +0,0 @@
-import os
-from urllib.request import urlretrieve
-import numpy as np
-import pandas as pd
-import sklearn.datasets
-import shap
-github_data_url = "https://github.com/shap/shap/raw/master/data/"
-def imagenet50(display=False, resolution=224, n_points=None):
-    """ This is a set of 50 images representative of ImageNet images.
-    This dataset was collected by randomly finding a working ImageNet link and then pasting the
-    original ImageNet image into Google image search restricted to images licensed for reuse. A
-    similar image (now with rights to reuse) was downloaded as a rough replacement for the original
-    ImageNet image. The point is to have a random sample of ImageNet for use as a background
-    distribution for explaining models trained on ImageNet data.
-    Note that because the images are only rough replacements the labels might no longer be correct.
-    """
-    prefix = github_data_url + "imagenet50_"
-    X = np.load(cache(f"{prefix}{resolution}x{resolution}.npy")).astype(np.float32)
-    y = np.loadtxt(cache(f"{prefix}labels.csv"))
-    if n_points is not None:
-        X = shap.utils.sample(X, n_points, random_state=0)
-        y = shap.utils.sample(y, n_points, random_state=0)
-    return X, y
-def california(display=False, n_points=None):
-    """ Return the california housing data in a nice package. """
-    d = sklearn.datasets.fetch_california_housing()
-    df = pd.DataFrame(data=d.data, columns=d.feature_names)
-    target = d.target
-    if n_points is not None:
-        df = shap.utils.sample(df, n_points, random_state=0)
-        target = shap.utils.sample(target, n_points, random_state=0)
-    return df, target
-def linnerud(display=False, n_points=None):
-    """ Return the linnerud data in a nice package (multi-target regression). """
-    d = sklearn.datasets.load_linnerud()
-    X = pd.DataFrame(d.data, columns=d.feature_names)
-    y = pd.DataFrame(d.target, columns=d.target_names)
-    if n_points is not None:
-        X = shap.utils.sample(X, n_points, random_state=0)
-        y = shap.utils.sample(y, n_points, random_state=0)
-    return X, y
-def imdb(display=False, n_points=None):
-    """ Return the classic IMDB sentiment analysis training data in a nice package.
-    Full data is at: http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz
-    Paper to cite when using the data is: http://www.aclweb.org/anthology/P11-1015
-    """
-    with open(cache(github_data_url + "imdb_train.txt"), encoding="utf-8") as f:
-        data = f.readlines()
-    y = np.ones(25000, dtype=bool)
-    y[:12500] = 0
-    if n_points is not None:
-        data = shap.utils.sample(data, n_points, random_state=0)
-        y = shap.utils.sample(y, n_points, random_state=0)
-    return data, y
-def communitiesandcrime(display=False, n_points=None):
-    """ Predict total number of non-violent crimes per 100K popuation.
-    This dataset is from the classic UCI Machine Learning repository:
-    https://archive.ics.uci.edu/ml/datasets/Communities+and+Crime+Unnormalized
-    """
-    raw_data = pd.read_csv(
-        cache(github_data_url + "CommViolPredUnnormalizedData.txt"),
-        na_values="?"
-    )
-    # find the indices where the total violent crimes are known
-    valid_inds = np.where(np.invert(np.isnan(raw_data.iloc[:,-2])))[0]
-    if n_points is not None:
-        valid_inds = shap.utils.sample(valid_inds, n_points, random_state=0)
-    y = np.array(raw_data.iloc[valid_inds,-2], dtype=float)
-    # extract the predictive features and remove columns with missing values
-    X = raw_data.iloc[valid_inds,5:-18]
-    valid_cols = np.where(np.isnan(X.values).sum(0) == 0)[0]
-    X = X.iloc[:,valid_cols]
-    return X, y
-def diabetes(display=False, n_points=None):
-    """ Return the diabetes data in a nice package. """
-    d = sklearn.datasets.load_diabetes()
-    df = pd.DataFrame(data=d.data, columns=d.feature_names)
-    target = d.target
-    if n_points is not None:
-        df = shap.utils.sample(df, n_points, random_state=0)
-        target = shap.utils.sample(target, n_points, random_state=0)
-    return df, target
-def iris(display=False, n_points=None):
-    """ Return the classic iris data in a nice package. """
-    d = sklearn.datasets.load_iris()
-    df = pd.DataFrame(data=d.data, columns=d.feature_names)
-    target = d.target
-    if n_points is not None:
-        df = shap.utils.sample(df, n_points, random_state=0)
-        target = shap.utils.sample(target, n_points, random_state=0)
-    if display:
-        return df, [d.target_names[v] for v in target]
-    return df, target
-def adult(display=False, n_points=None):
-    """ Return the Adult census data in a nice package. """
-    dtypes = [
-        ("Age", "float32"), ("Workclass", "category"), ("fnlwgt", "float32"),
-        ("Education", "category"), ("Education-Num", "float32"), ("Marital Status", "category"),
-        ("Occupation", "category"), ("Relationship", "category"), ("Race", "category"),
-        ("Sex", "category"), ("Capital Gain", "float32"), ("Capital Loss", "float32"),
-        ("Hours per week", "float32"), ("Country", "category"), ("Target", "category")
-    ]
-    raw_data = pd.read_csv(
-        cache(github_data_url + "adult.data"),
-        names=[d[0] for d in dtypes],
-        na_values="?",
-        dtype=dict(dtypes)
-    )
-    if n_points is not None:
-        raw_data = shap.utils.sample(raw_data, n_points, random_state=0)
-    data = raw_data.drop(["Education"], axis=1)  # redundant with Education-Num
-    filt_dtypes = list(filter(lambda x: x[0] not in ["Target", "Education"], dtypes))
-    data["Target"] = data["Target"] == " >50K"
-    rcode = {
-        "Not-in-family": 0,
-        "Unmarried": 1,
-        "Other-relative": 2,
-        "Own-child": 3,
-        "Husband": 4,
-        "Wife": 5
-    }
-    for k, dtype in filt_dtypes:
-        if dtype == "category":
-            if k == "Relationship":
-                data[k] = np.array([rcode[v.strip()] for v in data[k]])
-            else:
-                data[k] = data[k].cat.codes
-    if display:
-        return raw_data.drop(["Education", "Target", "fnlwgt"], axis=1), data["Target"].values
-    return data.drop(["Target", "fnlwgt"], axis=1), data["Target"].values
-def nhanesi(display=False, n_points=None):
-    """ A nicely packaged version of NHANES I data with surivival times as labels.
-    """
-    X = pd.read_csv(cache(github_data_url + "NHANESI_X.csv"), index_col=0)
-    y = pd.read_csv(cache(github_data_url + "NHANESI_y.csv"), index_col=0)["y"]
-    if n_points is not None:
-        X = shap.utils.sample(X, n_points, random_state=0)
-        y = shap.utils.sample(y, n_points, random_state=0)
-    if display:
-        X_display = X.copy()
-        # X_display["sex_isFemale"] = ["Female" if v else "Male" for v in X["sex_isFemale"]]
-        return X_display, np.array(y)
-    return X, np.array(y)
-def corrgroups60(display=False, n_points=1_000):
-    """ Correlated Groups 60
-    A simulated dataset with tight correlations among distinct groups of features.
-    """
-    # set a constant seed
-    old_seed = np.random.seed()
-    np.random.seed(0)
-    # generate dataset with known correlation
-    N, M = n_points, 60
-    # set one coefficient from each group of 3 to 1
-    beta = np.zeros(M)
-    beta[0:30:3] = 1
-    # build a correlation matrix with groups of 3 tightly correlated features
-    C = np.eye(M)
-    for i in range(0,30,3):
-        C[i,i+1] = C[i+1,i] = 0.99
-        C[i,i+2] = C[i+2,i] = 0.99
-        C[i+1,i+2] = C[i+2,i+1] = 0.99
-    def f(X):
-        return np.matmul(X, beta)
-    # Make sure the sample correlation is a perfect match
-    X_start = np.random.randn(N, M)
-    X_centered = X_start - X_start.mean(0)
-    Sigma = np.matmul(X_centered.T, X_centered) / X_centered.shape[0]
-    W = np.linalg.cholesky(np.linalg.inv(Sigma)).T
-    X_white = np.matmul(X_centered, W.T)
-    assert np.linalg.norm(np.corrcoef(np.matmul(X_centered, W.T).T) - np.eye(M)) < 1e-6 # ensure this decorrelates the data
-    # create the final data
-    X_final = np.matmul(X_white, np.linalg.cholesky(C).T)
-    X = X_final
-    y = f(X) + np.random.randn(N) * 1e-2
-    # restore the previous numpy random seed
-    np.random.seed(old_seed)
-    return pd.DataFrame(X), y
-def independentlinear60(display=False, n_points=1_000):
-    """ A simulated dataset with tight correlations among distinct groups of features.
-    """
-    # set a constant seed
-    old_seed = np.random.seed()
-    np.random.seed(0)
-    # generate dataset with known correlation
-    N, M = n_points, 60
-    # set one coefficient from each group of 3 to 1
-    beta = np.zeros(M)
-    beta[0:30:3] = 1
-    def f(X):
-        return np.matmul(X, beta)
-    # Make sure the sample correlation is a perfect match
-    X_start = np.random.randn(N, M)
-    X = X_start - X_start.mean(0)
-    y = f(X) + np.random.randn(N) * 1e-2
-    # restore the previous numpy random seed
-    np.random.seed(old_seed)
-    return pd.DataFrame(X), y
-def a1a(n_points=None):
-    """ A sparse dataset in scipy csr matrix format.
-    """
-    data, target = sklearn.datasets.load_svmlight_file(cache(github_data_url + 'a1a.svmlight'))
-    if n_points is not None:
-        data = shap.utils.sample(data, n_points, random_state=0)
-        target = shap.utils.sample(target, n_points, random_state=0)
-    return data, target
-def rank():
-    """ Ranking datasets from lightgbm repository.
-    """
-    rank_data_url = 'https://raw.githubusercontent.com/Microsoft/LightGBM/master/examples/lambdarank/'
-    x_train, y_train = sklearn.datasets.load_svmlight_file(cache(rank_data_url + 'rank.train'))
-    x_test, y_test = sklearn.datasets.load_svmlight_file(cache(rank_data_url + 'rank.test'))
-    q_train = np.loadtxt(cache(rank_data_url + 'rank.train.query'))
-    q_test = np.loadtxt(cache(rank_data_url + 'rank.test.query'))
-    return x_train, y_train, x_test, y_test, q_train, q_test
-def cache(url, file_name=None):
-    """ Loads a file from the URL and caches it locally.
-    """
-    if file_name is None:
-        file_name = os.path.basename(url)
-    data_dir = os.path.join(os.path.dirname(__file__), "cached_data")
-    os.makedirs(data_dir, exist_ok=True)
-    file_path = os.path.join(data_dir, file_name)
-    if not os.path.isfile(file_path):
-        urlretrieve(url, file_path)
-    return file_path

lib/shap/explainers/__init__.py DELETED Viewed

@@ -1,38 +0,0 @@
-from ._additive import AdditiveExplainer
-from ._deep import DeepExplainer
-from ._exact import ExactExplainer
-from ._gpu_tree import GPUTreeExplainer
-from ._gradient import GradientExplainer
-from ._kernel import KernelExplainer
-from ._linear import LinearExplainer
-from ._partition import PartitionExplainer
-from ._permutation import PermutationExplainer
-from ._sampling import SamplingExplainer
-from ._tree import TreeExplainer
-# Alternative legacy "short-form" aliases, which are kept here for backwards-compatibility
-Additive = AdditiveExplainer
-Deep = DeepExplainer
-Exact = ExactExplainer
-GPUTree = GPUTreeExplainer
-Gradient = GradientExplainer
-Kernel = KernelExplainer
-Linear = LinearExplainer
-Partition = PartitionExplainer
-Permutation = PermutationExplainer
-Sampling = SamplingExplainer
-Tree = TreeExplainer
-__all__ = [
-    "AdditiveExplainer",
-    "DeepExplainer",
-    "ExactExplainer",
-    "GPUTreeExplainer",
-    "GradientExplainer",
-    "KernelExplainer",
-    "LinearExplainer",
-    "PartitionExplainer",
-    "PermutationExplainer",
-    "SamplingExplainer",
-    "TreeExplainer",
-]

lib/shap/explainers/_additive.py DELETED Viewed

@@ -1,187 +0,0 @@
-import numpy as np
-from ..utils import MaskedModel, safe_isinstance
-from ._explainer import Explainer
-class AdditiveExplainer(Explainer):
-    """ Computes SHAP values for generalized additive models.
-    This assumes that the model only has first-order effects. Extending this to
-    second- and third-order effects is future work (if you apply this to those models right now
-    you will get incorrect answers that fail additivity).
-    """
-    def __init__(self, model, masker, link=None, feature_names=None, linearize_link=True):
-        """ Build an Additive explainer for the given model using the given masker object.
-        Parameters
-        ----------
-        model : function
-            A callable python object that executes the model given a set of input data samples.
-        masker : function or numpy.array or pandas.DataFrame
-            A callable python object used to "mask" out hidden features of the form `masker(mask, *fargs)`.
-            It takes a single a binary mask and an input sample and returns a matrix of masked samples. These
-            masked samples are evaluated using the model function and the outputs are then averaged.
-            As a shortcut for the standard masking used by SHAP you can pass a background data matrix
-            instead of a function and that matrix will be used for masking. To use a clustering
-            game structure you can pass a shap.maskers.Tabular(data, hclustering=\"correlation\") object, but
-            note that this structure information has no effect on the explanations of additive models.
-        """
-        super().__init__(model, masker, feature_names=feature_names, linearize_link=linearize_link)
-        if safe_isinstance(model, "interpret.glassbox.ExplainableBoostingClassifier"):
-            self.model = model.decision_function
-            if self.masker is None:
-                self._expected_value = model.intercept_
-                # num_features = len(model.additive_terms_)
-                # fm = MaskedModel(self.model, self.masker, self.link, np.zeros(num_features))
-                # masks = np.ones((1, num_features), dtype=bool)
-                # outputs = fm(masks)
-                # self.model(np.zeros(num_features))
-                # self._zero_offset = self.model(np.zeros(num_features))#model.intercept_#outputs[0]
-                # self._input_offsets = np.zeros(num_features) #* self._zero_offset
-                raise NotImplementedError("Masker not given and we don't yet support pulling the distribution centering directly from the EBM model!")
-                return
-        # here we need to compute the offsets ourselves because we can't pull them directly from a model we know about
-        assert safe_isinstance(self.masker, "shap.maskers.Independent"), "The Additive explainer only supports the Tabular masker at the moment!"
-        # pre-compute per-feature offsets
-        fm = MaskedModel(self.model, self.masker, self.link, self.linearize_link, np.zeros(self.masker.shape[1]))
-        masks = np.ones((self.masker.shape[1]+1, self.masker.shape[1]), dtype=bool)
-        for i in range(1, self.masker.shape[1]+1):
-            masks[i,i-1] = False
-        outputs = fm(masks)
-        self._zero_offset = outputs[0]
-        self._input_offsets = np.zeros(masker.shape[1])
-        for i in range(1, self.masker.shape[1]+1):
-            self._input_offsets[i-1] = outputs[i] - self._zero_offset
-        self._expected_value = self._input_offsets.sum() + self._zero_offset
-    def __call__(self, *args, max_evals=None, silent=False):
-        """ Explains the output of model(*args), where args represents one or more parallel iterable args.
-        """
-        # we entirely rely on the general call implementation, we override just to remove **kwargs
-        # from the function signature
-        return super().__call__(*args, max_evals=max_evals, silent=silent)
-    @staticmethod
-    def supports_model_with_masker(model, masker):
-        """ Determines if this explainer can handle the given model.
-        This is an abstract static method meant to be implemented by each subclass.
-        """
-        if safe_isinstance(model, "interpret.glassbox.ExplainableBoostingClassifier"):
-            if model.interactions != 0:
-                raise NotImplementedError("Need to add support for interaction effects!")
-            return True
-        return False
-    def explain_row(self, *row_args, max_evals, main_effects, error_bounds, batch_size, outputs, silent):
-        """ Explains a single row and returns the tuple (row_values, row_expected_values, row_mask_shapes).
-        """
-        x = row_args[0]
-        inputs = np.zeros((len(x), len(x)))
-        for i in range(len(x)):
-            inputs[i,i] = x[i]
-        phi = self.model(inputs) - self._zero_offset - self._input_offsets
-        return {
-            "values": phi,
-            "expected_values": self._expected_value,
-            "mask_shapes": [a.shape for a in row_args],
-            "main_effects": phi,
-            "clustering": getattr(self.masker, "clustering", None)
-        }
-# class AdditiveExplainer(Explainer):
-#     """ Computes SHAP values for generalized additive models.
-#     This assumes that the model only has first order effects. Extending this to
-#     2nd and third order effects is future work (if you apply this to those models right now
-#     you will get incorrect answers that fail additivity).
-#     Parameters
-#     ----------
-#     model : function or ExplainableBoostingRegressor
-#         User supplied additive model either as either a function or a model object.
-#     data : numpy.array, pandas.DataFrame
-#         The background dataset to use for computing conditional expectations.
-#     feature_perturbation : "interventional"
-#         Only the standard interventional SHAP values are supported by AdditiveExplainer right now.
-#     """
-#     def __init__(self, model, data, feature_perturbation="interventional"):
-#         if feature_perturbation != "interventional":
-#             raise Exception("Unsupported type of feature_perturbation provided: " + feature_perturbation)
-#         if safe_isinstance(model, "interpret.glassbox.ebm.ebm.ExplainableBoostingRegressor"):
-#             self.f = model.predict
-#         elif callable(model):
-#             self.f = model
-#         else:
-#             raise ValueError("The passed model must be a recognized object or a function!")
-#         # convert dataframes
-#         if isinstance(data, (pd.Series, pd.DataFrame)):
-#             data = data.values
-#         self.data = data
-#         # compute the expected value of the model output
-#         self.expected_value = self.f(data).mean()
-#         # pre-compute per-feature offsets
-#         tmp = np.zeros(data.shape)
-#         self._zero_offset = self.f(tmp).mean()
-#         self._feature_offset = np.zeros(data.shape[1])
-#         for i in range(data.shape[1]):
-#             tmp[:,i] = data[:,i]
-#             self._feature_offset[i] = self.f(tmp).mean() - self._zero_offset
-#             tmp[:,i] = 0
-#     def shap_values(self, X):
-#         """ Estimate the SHAP values for a set of samples.
-#         Parameters
-#         ----------
-#         X : numpy.array, pandas.DataFrame or scipy.csr_matrix
-#             A matrix of samples (# samples x # features) on which to explain the model's output.
-#         Returns
-#         -------
-#         For models with a single output this returns a matrix of SHAP values
-#         (# samples x # features). Each row sums to the difference between the model output for that
-#         sample and the expected value of the model output (which is stored as expected_value
-#         attribute of the explainer).
-#         """
-#         # convert dataframes
-#         if isinstance(X, (pd.Series, pd.DataFrame)):
-#             X = X.values
-#         # assert isinstance(X, np.ndarray), "Unknown instance type: " + str(type(X))
-#         assert len(X.shape) == 1 or len(X.shape) == 2, "Instance must have 1 or 2 dimensions!"
-#         # convert dataframes
-#         if isinstance(X, (pd.Series, pd.DataFrame)):
-#             X = X.values
-#         phi = np.zeros(X.shape)
-#         tmp = np.zeros(X.shape)
-#         for i in range(X.shape[1]):
-#             tmp[:,i] = X[:,i]
-#             phi[:,i] = self.f(tmp) - self._zero_offset - self._feature_offset[i]
-#             tmp[:,i] = 0
-#         return phi

lib/shap/explainers/_deep/__init__.py DELETED Viewed

@@ -1,125 +0,0 @@
-from .._explainer import Explainer
-from .deep_pytorch import PyTorchDeep
-from .deep_tf import TFDeep
-class DeepExplainer(Explainer):
-    """ Meant to approximate SHAP values for deep learning models.
-    This is an enhanced version of the DeepLIFT algorithm (Deep SHAP) where, similar to Kernel SHAP, we
-    approximate the conditional expectations of SHAP values using a selection of background samples.
-    Lundberg and Lee, NIPS 2017 showed that the per node attribution rules in DeepLIFT (Shrikumar,
-    Greenside, and Kundaje, arXiv 2017) can be chosen to approximate Shapley values. By integrating
-    over many background samples Deep estimates approximate SHAP values such that they sum
-    up to the difference between the expected model output on the passed background samples and the
-    current model output (f(x) - E[f(x)]).
-    Examples
-    --------
-    See :ref:`Deep Explainer Examples <deep_explainer_examples>`
-    """
-    def __init__(self, model, data, session=None, learning_phase_flags=None):
-        """ An explainer object for a differentiable model using a given background dataset.
-        Note that the complexity of the method scales linearly with the number of background data
-        samples. Passing the entire training dataset as `data` will give very accurate expected
-        values, but be unreasonably expensive. The variance of the expectation estimates scale by
-        roughly 1/sqrt(N) for N background data samples. So 100 samples will give a good estimate,
-        and 1000 samples a very good estimate of the expected values.
-        Parameters
-        ----------
-        model : if framework == 'tensorflow', (input : [tf.Tensor], output : tf.Tensor)
-             A pair of TensorFlow tensors (or a list and a tensor) that specifies the input and
-            output of the model to be explained. Note that SHAP values are specific to a single
-            output value, so the output tf.Tensor should be a single dimensional output (,1).
-            if framework == 'pytorch', an nn.Module object (model), or a tuple (model, layer),
-                where both are nn.Module objects
-            The model is an nn.Module object which takes as input a tensor (or list of tensors) of
-            shape data, and returns a single dimensional output.
-            If the input is a tuple, the returned shap values will be for the input of the
-            layer argument. layer must be a layer in the model, i.e. model.conv2
-        data :
-            if framework == 'tensorflow': [numpy.array] or [pandas.DataFrame]
-            if framework == 'pytorch': [torch.tensor]
-            The background dataset to use for integrating out features. Deep integrates
-            over these samples. The data passed here must match the input tensors given in the
-            first argument. Note that since these samples are integrated over for each sample you
-            should only something like 100 or 1000 random background samples, not the whole training
-            dataset.
-        if framework == 'tensorflow':
-        session : None or tensorflow.Session
-            The TensorFlow session that has the model we are explaining. If None is passed then
-            we do our best to find the right session, first looking for a keras session, then
-            falling back to the default TensorFlow session.
-        learning_phase_flags : None or list of tensors
-            If you have your own custom learning phase flags pass them here. When explaining a prediction
-            we need to ensure we are not in training mode, since this changes the behavior of ops like
-            batch norm or dropout. If None is passed then we look for tensors in the graph that look like
-            learning phase flags (this works for Keras models). Note that we assume all the flags should
-            have a value of False during predictions (and hence explanations).
-        """
-        # first, we need to find the framework
-        if type(model) is tuple:
-            a, b = model
-            try:
-                a.named_parameters()
-                framework = 'pytorch'
-            except Exception:
-                framework = 'tensorflow'
-        else:
-            try:
-                model.named_parameters()
-                framework = 'pytorch'
-            except Exception:
-                framework = 'tensorflow'
-        if framework == 'tensorflow':
-            self.explainer = TFDeep(model, data, session, learning_phase_flags)
-        elif framework == 'pytorch':
-            self.explainer = PyTorchDeep(model, data)
-        self.expected_value = self.explainer.expected_value
-        self.explainer.framework = framework
-    def shap_values(self, X, ranked_outputs=None, output_rank_order='max', check_additivity=True):
-        """ Return approximate SHAP values for the model applied to the data given by X.
-        Parameters
-        ----------
-        X : list,
-            if framework == 'tensorflow': numpy.array, or pandas.DataFrame
-            if framework == 'pytorch': torch.tensor
-            A tensor (or list of tensors) of samples (where X.shape[0] == # samples) on which to
-            explain the model's output.
-        ranked_outputs : None or int
-            If ranked_outputs is None then we explain all the outputs in a multi-output model. If
-            ranked_outputs is a positive integer then we only explain that many of the top model
-            outputs (where "top" is determined by output_rank_order). Note that this causes a pair
-            of values to be returned (shap_values, indexes), where shap_values is a list of numpy
-            arrays for each of the output ranks, and indexes is a matrix that indicates for each sample
-            which output indexes were choses as "top".
-        output_rank_order : "max", "min", or "max_abs"
-            How to order the model outputs when using ranked_outputs, either by maximum, minimum, or
-            maximum absolute value.
-        Returns
-        -------
-        array or list
-            For a models with a single output this returns a tensor of SHAP values with the same shape
-            as X. For a model with multiple outputs this returns a list of SHAP value tensors, each of
-            which are the same shape as X. If ranked_outputs is None then this list of tensors matches
-            the number of model outputs. If ranked_outputs is a positive integer a pair is returned
-            (shap_values, indexes), where shap_values is a list of tensors with a length of
-            ranked_outputs, and indexes is a matrix that indicates for each sample which output indexes
-            were chosen as "top".
-        """
-        return self.explainer.shap_values(X, ranked_outputs, output_rank_order, check_additivity=check_additivity)

lib/shap/explainers/_deep/deep_pytorch.py DELETED Viewed

@@ -1,386 +0,0 @@
-import warnings
-import numpy as np
-from packaging import version
-from .._explainer import Explainer
-from .deep_utils import _check_additivity
-torch = None
-class PyTorchDeep(Explainer):
-    def __init__(self, model, data):
-        # try and import pytorch
-        global torch
-        if torch is None:
-            import torch
-            if version.parse(torch.__version__) < version.parse("0.4"):
-                warnings.warn("Your PyTorch version is older than 0.4 and not supported.")
-        # check if we have multiple inputs
-        self.multi_input = False
-        if isinstance(data, list):
-            self.multi_input = True
-        if not isinstance(data, list):
-            data = [data]
-        self.data = data
-        self.layer = None
-        self.input_handle = None
-        self.interim = False
-        self.interim_inputs_shape = None
-        self.expected_value = None  # to keep the DeepExplainer base happy
-        if type(model) == tuple:
-            self.interim = True
-            model, layer = model
-            model = model.eval()
-            self.layer = layer
-            self.add_target_handle(self.layer)
-            # if we are taking an interim layer, the 'data' is going to be the input
-            # of the interim layer; we will capture this using a forward hook
-            with torch.no_grad():
-                _ = model(*data)
-                interim_inputs = self.layer.target_input
-                if type(interim_inputs) is tuple:
-                    # this should always be true, but just to be safe
-                    self.interim_inputs_shape = [i.shape for i in interim_inputs]
-                else:
-                    self.interim_inputs_shape = [interim_inputs.shape]
-            self.target_handle.remove()
-            del self.layer.target_input
-        self.model = model.eval()
-        self.multi_output = False
-        self.num_outputs = 1
-        with torch.no_grad():
-            outputs = model(*data)
-            # also get the device everything is running on
-            self.device = outputs.device
-            if outputs.shape[1] > 1:
-                self.multi_output = True
-                self.num_outputs = outputs.shape[1]
-            self.expected_value = outputs.mean(0).cpu().numpy()
-    def add_target_handle(self, layer):
-        input_handle = layer.register_forward_hook(get_target_input)
-        self.target_handle = input_handle
-    def add_handles(self, model, forward_handle, backward_handle):
-        """
-        Add handles to all non-container layers in the model.
-        Recursively for non-container layers
-        """
-        handles_list = []
-        model_children = list(model.children())
-        if model_children:
-            for child in model_children:
-                handles_list.extend(self.add_handles(child, forward_handle, backward_handle))
-        else:  # leaves
-            handles_list.append(model.register_forward_hook(forward_handle))
-            handles_list.append(model.register_full_backward_hook(backward_handle))
-        return handles_list
-    def remove_attributes(self, model):
-        """
-        Removes the x and y attributes which were added by the forward handles
-        Recursively searches for non-container layers
-        """
-        for child in model.children():
-            if 'nn.modules.container' in str(type(child)):
-                self.remove_attributes(child)
-            else:
-                try:
-                    del child.x
-                except AttributeError:
-                    pass
-                try:
-                    del child.y
-                except AttributeError:
-                    pass
-    def gradient(self, idx, inputs):
-        self.model.zero_grad()
-        X = [x.requires_grad_() for x in inputs]
-        outputs = self.model(*X)
-        selected = [val for val in outputs[:, idx]]
-        grads = []
-        if self.interim:
-            interim_inputs = self.layer.target_input
-            for idx, input in enumerate(interim_inputs):
-                grad = torch.autograd.grad(selected, input,
-                                           retain_graph=True if idx + 1 < len(interim_inputs) else None,
-                                           allow_unused=True)[0]
-                if grad is not None:
-                    grad = grad.cpu().numpy()
-                else:
-                    grad = torch.zeros_like(X[idx]).cpu().numpy()
-                grads.append(grad)
-            del self.layer.target_input
-            return grads, [i.detach().cpu().numpy() for i in interim_inputs]
-        else:
-            for idx, x in enumerate(X):
-                grad = torch.autograd.grad(selected, x,
-                                           retain_graph=True if idx + 1 < len(X) else None,
-                                           allow_unused=True)[0]
-                if grad is not None:
-                    grad = grad.cpu().numpy()
-                else:
-                    grad = torch.zeros_like(X[idx]).cpu().numpy()
-                grads.append(grad)
-            return grads
-    def shap_values(self, X, ranked_outputs=None, output_rank_order="max", check_additivity=True):
-        # X ~ self.model_input
-        # X_data ~ self.data
-        # check if we have multiple inputs
-        if not self.multi_input:
-            assert not isinstance(X, list), "Expected a single tensor model input!"
-            X = [X]
-        else:
-            assert isinstance(X, list), "Expected a list of model inputs!"
-        X = [x.detach().to(self.device) for x in X]
-        model_output_values = None
-        if ranked_outputs is not None and self.multi_output:
-            with torch.no_grad():
-                model_output_values = self.model(*X)
-            # rank and determine the model outputs that we will explain
-            if output_rank_order == "max":
-                _, model_output_ranks = torch.sort(model_output_values, descending=True)
-            elif output_rank_order == "min":
-                _, model_output_ranks = torch.sort(model_output_values, descending=False)
-            elif output_rank_order == "max_abs":
-                _, model_output_ranks = torch.sort(torch.abs(model_output_values), descending=True)
-            else:
-                emsg = "output_rank_order must be max, min, or max_abs!"
-                raise ValueError(emsg)
-            model_output_ranks = model_output_ranks[:, :ranked_outputs]
-        else:
-            model_output_ranks = (torch.ones((X[0].shape[0], self.num_outputs)).int() *
-                                  torch.arange(0, self.num_outputs).int())
-        # add the gradient handles
-        handles = self.add_handles(self.model, add_interim_values, deeplift_grad)
-        if self.interim:
-            self.add_target_handle(self.layer)
-        # compute the attributions
-        output_phis = []
-        for i in range(model_output_ranks.shape[1]):
-            phis = []
-            if self.interim:
-                for k in range(len(self.interim_inputs_shape)):
-                    phis.append(np.zeros((X[0].shape[0], ) + self.interim_inputs_shape[k][1: ]))
-            else:
-                for k in range(len(X)):
-                    phis.append(np.zeros(X[k].shape))
-            for j in range(X[0].shape[0]):
-                # tile the inputs to line up with the background data samples
-                tiled_X = [X[t][j:j + 1].repeat(
-                                   (self.data[t].shape[0],) + tuple([1 for k in range(len(X[t].shape) - 1)])) for t
-                           in range(len(X))]
-                joint_x = [torch.cat((tiled_X[t], self.data[t]), dim=0) for t in range(len(X))]
-                # run attribution computation graph
-                feature_ind = model_output_ranks[j, i]
-                sample_phis = self.gradient(feature_ind, joint_x)
-                # assign the attributions to the right part of the output arrays
-                if self.interim:
-                    sample_phis, output = sample_phis
-                    x, data = [], []
-                    for k in range(len(output)):
-                        x_temp, data_temp = np.split(output[k], 2)
-                        x.append(x_temp)
-                        data.append(data_temp)
-                    for t in range(len(self.interim_inputs_shape)):
-                        phis[t][j] = (sample_phis[t][self.data[t].shape[0]:] * (x[t] - data[t])).mean(0)
-                else:
-                    for t in range(len(X)):
-                        phis[t][j] = (torch.from_numpy(sample_phis[t][self.data[t].shape[0]:]).to(self.device) * (X[t][j: j + 1] - self.data[t])).cpu().detach().numpy().mean(0)
-            output_phis.append(phis[0] if not self.multi_input else phis)
-        # cleanup; remove all gradient handles
-        for handle in handles:
-            handle.remove()
-        self.remove_attributes(self.model)
-        if self.interim:
-            self.target_handle.remove()
-        # check that the SHAP values sum up to the model output
-        if check_additivity:
-            if model_output_values is None:
-                with torch.no_grad():
-                    model_output_values = self.model(*X)
-            _check_additivity(self, model_output_values.cpu(), output_phis)
-        if not self.multi_output:
-            return output_phis[0]
-        elif ranked_outputs is not None:
-            return output_phis, model_output_ranks
-        else:
-            return output_phis
-# Module hooks
-def deeplift_grad(module, grad_input, grad_output):
-    """The backward hook which computes the deeplift
-    gradient for an nn.Module
-    """
-    # first, get the module type
-    module_type = module.__class__.__name__
-    # first, check the module is supported
-    if module_type in op_handler:
-        if op_handler[module_type].__name__ not in ['passthrough', 'linear_1d']:
-            return op_handler[module_type](module, grad_input, grad_output)
-    else:
-        warnings.warn(f'unrecognized nn.Module: {module_type}')
-        return grad_input
-def add_interim_values(module, input, output):
-    """The forward hook used to save interim tensors, detached
-    from the graph. Used to calculate the multipliers
-    """
-    try:
-        del module.x
-    except AttributeError:
-        pass
-    try:
-        del module.y
-    except AttributeError:
-        pass
-    module_type = module.__class__.__name__
-    if module_type in op_handler:
-        func_name = op_handler[module_type].__name__
-        # First, check for cases where we don't need to save the x and y tensors
-        if func_name == 'passthrough':
-            pass
-        else:
-            # check only the 0th input varies
-            for i in range(len(input)):
-                if i != 0 and type(output) is tuple:
-                    assert input[i] == output[i], "Only the 0th input may vary!"
-            # if a new method is added, it must be added here too. This ensures tensors
-            # are only saved if necessary
-            if func_name in ['maxpool', 'nonlinear_1d']:
-                # only save tensors if necessary
-                if type(input) is tuple:
-                    setattr(module, 'x', torch.nn.Parameter(input[0].detach()))
-                else:
-                    setattr(module, 'x', torch.nn.Parameter(input.detach()))
-                if type(output) is tuple:
-                    setattr(module, 'y', torch.nn.Parameter(output[0].detach()))
-                else:
-                    setattr(module, 'y', torch.nn.Parameter(output.detach()))
-def get_target_input(module, input, output):
-    """A forward hook which saves the tensor - attached to its graph.
-    Used if we want to explain the interim outputs of a model
-    """
-    try:
-        del module.target_input
-    except AttributeError:
-        pass
-    setattr(module, 'target_input', input)
-def passthrough(module, grad_input, grad_output):
-    """No change made to gradients"""
-    return None
-def maxpool(module, grad_input, grad_output):
-    pool_to_unpool = {
-        'MaxPool1d': torch.nn.functional.max_unpool1d,
-        'MaxPool2d': torch.nn.functional.max_unpool2d,
-        'MaxPool3d': torch.nn.functional.max_unpool3d
-    }
-    pool_to_function = {
-        'MaxPool1d': torch.nn.functional.max_pool1d,
-        'MaxPool2d': torch.nn.functional.max_pool2d,
-        'MaxPool3d': torch.nn.functional.max_pool3d
-    }
-    delta_in = module.x[: int(module.x.shape[0] / 2)] - module.x[int(module.x.shape[0] / 2):]
-    dup0 = [2] + [1 for i in delta_in.shape[1:]]
-    # we also need to check if the output is a tuple
-    y, ref_output = torch.chunk(module.y, 2)
-    cross_max = torch.max(y, ref_output)
-    diffs = torch.cat([cross_max - ref_output, y - cross_max], 0)
-    # all of this just to unpool the outputs
-    with torch.no_grad():
-        _, indices = pool_to_function[module.__class__.__name__](
-            module.x, module.kernel_size, module.stride, module.padding,
-            module.dilation, module.ceil_mode, True)
-        xmax_pos, rmax_pos = torch.chunk(pool_to_unpool[module.__class__.__name__](
-            grad_output[0] * diffs, indices, module.kernel_size, module.stride,
-            module.padding, list(module.x.shape)), 2)
-    grad_input = [None for _ in grad_input]
-    grad_input[0] = torch.where(torch.abs(delta_in) < 1e-7, torch.zeros_like(delta_in),
-                           (xmax_pos + rmax_pos) / delta_in).repeat(dup0)
-    return tuple(grad_input)
-def linear_1d(module, grad_input, grad_output):
-    """No change made to gradients."""
-    return None
-def nonlinear_1d(module, grad_input, grad_output):
-    delta_out = module.y[: int(module.y.shape[0] / 2)] - module.y[int(module.y.shape[0] / 2):]
-    delta_in = module.x[: int(module.x.shape[0] / 2)] - module.x[int(module.x.shape[0] / 2):]
-    dup0 = [2] + [1 for i in delta_in.shape[1:]]
-    # handles numerical instabilities where delta_in is very small by
-    # just taking the gradient in those cases
-    grads = [None for _ in grad_input]
-    grads[0] = torch.where(torch.abs(delta_in.repeat(dup0)) < 1e-6, grad_input[0],
-                           grad_output[0] * (delta_out / delta_in).repeat(dup0))
-    return tuple(grads)
-op_handler = {}
-# passthrough ops, where we make no change to the gradient
-op_handler['Dropout3d'] = passthrough
-op_handler['Dropout2d'] = passthrough
-op_handler['Dropout'] = passthrough
-op_handler['AlphaDropout'] = passthrough
-op_handler['Conv1d'] = linear_1d
-op_handler['Conv2d'] = linear_1d
-op_handler['Conv3d'] = linear_1d
-op_handler['ConvTranspose1d'] = linear_1d
-op_handler['ConvTranspose2d'] = linear_1d
-op_handler['ConvTranspose3d'] = linear_1d
-op_handler['Linear'] = linear_1d
-op_handler['AvgPool1d'] = linear_1d
-op_handler['AvgPool2d'] = linear_1d
-op_handler['AvgPool3d'] = linear_1d
-op_handler['AdaptiveAvgPool1d'] = linear_1d
-op_handler['AdaptiveAvgPool2d'] = linear_1d
-op_handler['AdaptiveAvgPool3d'] = linear_1d
-op_handler['BatchNorm1d'] = linear_1d
-op_handler['BatchNorm2d'] = linear_1d
-op_handler['BatchNorm3d'] = linear_1d
-op_handler['LeakyReLU'] = nonlinear_1d
-op_handler['ReLU'] = nonlinear_1d
-op_handler['ELU'] = nonlinear_1d
-op_handler['Sigmoid'] = nonlinear_1d
-op_handler["Tanh"] = nonlinear_1d
-op_handler["Softplus"] = nonlinear_1d
-op_handler['Softmax'] = nonlinear_1d
-op_handler['MaxPool1d'] = maxpool
-op_handler['MaxPool2d'] = maxpool
-op_handler['MaxPool3d'] = maxpool

lib/shap/explainers/_deep/deep_tf.py DELETED Viewed

@@ -1,763 +0,0 @@
-import warnings
-import numpy as np
-from packaging import version
-from ...utils._exceptions import DimensionError
-from .._explainer import Explainer
-from ..tf_utils import _get_graph, _get_model_inputs, _get_model_output, _get_session
-from .deep_utils import _check_additivity
-tf = None
-tf_ops = None
-tf_backprop = None
-tf_execute = None
-tf_gradients_impl = None
-def custom_record_gradient(op_name, inputs, attrs, results):
-    """ This overrides tensorflow.python.eager.backprop._record_gradient.
-    We need to override _record_gradient in order to get gradient backprop to
-    get called for ResourceGather operations. In order to make this work we
-    temporarily "lie" about the input type to prevent the node from getting
-    pruned from the gradient backprop process. We then reset the type directly
-    afterwards back to what it was (an integer type).
-    """
-    reset_input = False
-    if op_name == "ResourceGather" and inputs[1].dtype == tf.int32:
-        inputs[1].__dict__["_dtype"] = tf.float32
-        reset_input = True
-    try:
-        out = tf_backprop._record_gradient("shap_"+op_name, inputs, attrs, results)
-    except AttributeError:
-        out = tf_backprop.record_gradient("shap_"+op_name, inputs, attrs, results)
-    if reset_input:
-        inputs[1].__dict__["_dtype"] = tf.int32
-    return out
-class TFDeep(Explainer):
-    """
-    Using tf.gradients to implement the backpropagation was
-    inspired by the gradient-based implementation approach proposed by Ancona et al, ICLR 2018. Note
-    that this package does not currently use the reveal-cancel rule for ReLu units proposed in DeepLIFT.
-    """
-    def __init__(self, model, data, session=None, learning_phase_flags=None):
-        """ An explainer object for a deep model using a given background dataset.
-        Note that the complexity of the method scales linearly with the number of background data
-        samples. Passing the entire training dataset as `data` will give very accurate expected
-        values, but will be computationally expensive. The variance of the expectation estimates scales by
-        roughly 1/sqrt(N) for N background data samples. So 100 samples will give a good estimate,
-        and 1000 samples a very good estimate of the expected values.
-        Parameters
-        ----------
-        model : tf.keras.Model or (input : [tf.Operation], output : tf.Operation)
-            A keras model object or a pair of TensorFlow operations (or a list and an op) that
-            specifies the input and output of the model to be explained. Note that SHAP values
-            are specific to a single output value, so you get an explanation for each element of
-            the output tensor (which must be a flat rank one vector).
-        data : [numpy.array] or [pandas.DataFrame] or function
-            The background dataset to use for integrating out features. DeepExplainer integrates
-            over all these samples for each explanation. The data passed here must match the input
-            operations given to the model. If a function is supplied, it must be a function that
-            takes a particular input example and generates the background dataset for that example
-        session : None or tensorflow.Session
-            The TensorFlow session that has the model we are explaining. If None is passed then
-            we do our best to find the right session, first looking for a keras session, then
-            falling back to the default TensorFlow session.
-        learning_phase_flags : None or list of tensors
-            If you have your own custom learning phase flags pass them here. When explaining a prediction
-            we need to ensure we are not in training mode, since this changes the behavior of ops like
-            batch norm or dropout. If None is passed then we look for tensors in the graph that look like
-            learning phase flags (this works for Keras models). Note that we assume all the flags should
-            have a value of False during predictions (and hence explanations).
-        """
-        # try to import tensorflow
-        global tf, tf_ops, tf_backprop, tf_execute, tf_gradients_impl
-        if tf is None:
-            from tensorflow.python.eager import backprop as tf_backprop
-            from tensorflow.python.eager import execute as tf_execute
-            from tensorflow.python.framework import (
-                ops as tf_ops,
-            )
-            from tensorflow.python.ops import (
-                gradients_impl as tf_gradients_impl,
-            )
-            if not hasattr(tf_gradients_impl, "_IsBackpropagatable"):
-                from tensorflow.python.ops import gradients_util as tf_gradients_impl
-            import tensorflow as tf
-            if version.parse(tf.__version__) < version.parse("1.4.0"):
-                warnings.warn("Your TensorFlow version is older than 1.4.0 and not supported.")
-        if version.parse(tf.__version__) >= version.parse("2.4.0"):
-            warnings.warn("Your TensorFlow version is newer than 2.4.0 and so graph support has been removed in eager mode and some static graphs may not be supported. See PR #1483 for discussion.")
-        # determine the model inputs and outputs
-        self.model_inputs = _get_model_inputs(model)
-        self.model_output = _get_model_output(model)
-        assert not isinstance(self.model_output, list), "The model output to be explained must be a single tensor!"
-        assert len(self.model_output.shape) < 3, "The model output must be a vector or a single value!"
-        self.multi_output = True
-        if len(self.model_output.shape) == 1:
-            self.multi_output = False
-        if tf.executing_eagerly():
-            if isinstance(model, tuple) or isinstance(model, list):
-                assert len(model) == 2, "When a tuple is passed it must be of the form (inputs, outputs)"
-                from tensorflow.keras import Model
-                self.model = Model(model[0], model[1])
-            else:
-                self.model = model
-        # check if we have multiple inputs
-        self.multi_input = True
-        if not isinstance(self.model_inputs, list) or len(self.model_inputs) == 1:
-            self.multi_input = False
-            if not isinstance(self.model_inputs, list):
-                self.model_inputs = [self.model_inputs]
-        if not isinstance(data, list) and (hasattr(data, "__call__") is False):
-            data = [data]
-        self.data = data
-        self._vinputs = {} # used to track what op inputs depends on the model inputs
-        self.orig_grads = {}
-        if not tf.executing_eagerly():
-            self.session = _get_session(session)
-        self.graph = _get_graph(self)
-        # if no learning phase flags were given we go looking for them
-        # ...this will catch the one that keras uses
-        # we need to find them since we want to make sure learning phase flags are set to False
-        if learning_phase_flags is None:
-            self.learning_phase_ops = []
-            for op in self.graph.get_operations():
-                if 'learning_phase' in op.name and op.type == "Const" and len(op.outputs[0].shape) == 0:
-                    if op.outputs[0].dtype == tf.bool:
-                        self.learning_phase_ops.append(op)
-            self.learning_phase_flags = [op.outputs[0] for op in self.learning_phase_ops]
-        else:
-            self.learning_phase_ops = [t.op for t in learning_phase_flags]
-        # save the expected output of the model
-        # if self.data is a function, set self.expected_value to None
-        if (hasattr(self.data, '__call__')):
-            self.expected_value = None
-        else:
-            if self.data[0].shape[0] > 5000:
-                warnings.warn("You have provided over 5k background samples! For better performance consider using smaller random sample.")
-            if not tf.executing_eagerly():
-                self.expected_value = self.run(self.model_output, self.model_inputs, self.data).mean(0)
-            else:
-                #if type(self.model)is tuple:
-                #    self.fModel(cnn.inputs, cnn.get_layer(theNameYouWant).outputs)
-                self.expected_value = tf.reduce_mean(self.model(self.data), 0)
-        if not tf.executing_eagerly():
-            self._init_between_tensors(self.model_output.op, self.model_inputs)
-        # make a blank array that will get lazily filled in with the SHAP value computation
-        # graphs for each output. Lazy is important since if there are 1000 outputs and we
-        # only explain the top 5 it would be a waste to build graphs for the other 995
-        if not self.multi_output:
-            self.phi_symbolics = [None]
-        else:
-            noutputs = self.model_output.shape.as_list()[1]
-            if noutputs is not None:
-                self.phi_symbolics = [None for i in range(noutputs)]
-            else:
-                raise DimensionError("The model output tensor to be explained cannot have a static shape in dim 1 of None!")
-    def _get_model_output(self, model):
-        if len(model.layers[-1]._inbound_nodes) == 0:
-            if len(model.outputs) > 1:
-                warnings.warn("Only one model output supported.")
-            return model.outputs[0]
-        else:
-            return model.layers[-1].output
-    def _init_between_tensors(self, out_op, model_inputs):
-        # find all the operations in the graph between our inputs and outputs
-        tensor_blacklist = tensors_blocked_by_false(self.learning_phase_ops) # don't follow learning phase branches
-        dependence_breakers = [k for k in op_handlers if op_handlers[k] == break_dependence]
-        back_ops = backward_walk_ops(
-            [out_op], tensor_blacklist,
-            dependence_breakers
-        )
-        start_ops = []
-        for minput in model_inputs:
-            for op in minput.consumers():
-                start_ops.append(op)
-        self.between_ops = forward_walk_ops(
-            start_ops,
-            tensor_blacklist, dependence_breakers,
-            within_ops=back_ops
-        )
-        # note all the tensors that are on the path between the inputs and the output
-        self.between_tensors = {}
-        for op in self.between_ops:
-            for t in op.outputs:
-                self.between_tensors[t.name] = True
-        for t in model_inputs:
-            self.between_tensors[t.name] = True
-        # save what types are being used
-        self.used_types = {}
-        for op in self.between_ops:
-            self.used_types[op.type] = True
-    def _variable_inputs(self, op):
-        """ Return which inputs of this operation are variable (i.e. depend on the model inputs).
-        """
-        if op not in self._vinputs:
-            out = np.zeros(len(op.inputs), dtype=bool)
-            for i,t in enumerate(op.inputs):
-                out[i] = t.name in self.between_tensors
-            self._vinputs[op] = out
-        return self._vinputs[op]
-    def phi_symbolic(self, i):
-        """ Get the SHAP value computation graph for a given model output.
-        """
-        if self.phi_symbolics[i] is None:
-            if not tf.executing_eagerly():
-                def anon():
-                    out = self.model_output[:,i] if self.multi_output else self.model_output
-                    return tf.gradients(out, self.model_inputs)
-                self.phi_symbolics[i] = self.execute_with_overridden_gradients(anon)
-            else:
-                @tf.function
-                def grad_graph(shap_rAnD):
-                    phase = tf.keras.backend.learning_phase()
-                    tf.keras.backend.set_learning_phase(0)
-                    with tf.GradientTape(watch_accessed_variables=False) as tape:
-                        tape.watch(shap_rAnD)
-                        out = self.model(shap_rAnD)
-                        if self.multi_output:
-                            out = out[:,i]
-                    self._init_between_tensors(out.op, shap_rAnD)
-                    x_grad = tape.gradient(out, shap_rAnD)
-                    tf.keras.backend.set_learning_phase(phase)
-                    return x_grad
-                self.phi_symbolics[i] = grad_graph
-        return self.phi_symbolics[i]
-    def shap_values(self, X, ranked_outputs=None, output_rank_order="max", check_additivity=True):
-        # check if we have multiple inputs
-        if not self.multi_input:
-            if isinstance(X, list) and len(X) != 1:
-                raise ValueError("Expected a single tensor as model input!")
-            elif not isinstance(X, list):
-                X = [X]
-        else:
-            assert isinstance(X, list), "Expected a list of model inputs!"
-        assert len(self.model_inputs) == len(X), "Number of model inputs (%d) does not match the number given (%d)!" % (len(self.model_inputs), len(X))
-        # rank and determine the model outputs that we will explain
-        if ranked_outputs is not None and self.multi_output:
-            if not tf.executing_eagerly():
-                model_output_values = self.run(self.model_output, self.model_inputs, X)
-            else:
-                model_output_values = self.model(X)
-            if output_rank_order == "max":
-                model_output_ranks = np.argsort(-model_output_values)
-            elif output_rank_order == "min":
-                model_output_ranks = np.argsort(model_output_values)
-            elif output_rank_order == "max_abs":
-                model_output_ranks = np.argsort(np.abs(model_output_values))
-            else:
-                emsg = "output_rank_order must be max, min, or max_abs!"
-                raise ValueError(emsg)
-            model_output_ranks = model_output_ranks[:,:ranked_outputs]
-        else:
-            model_output_ranks = np.tile(np.arange(len(self.phi_symbolics)), (X[0].shape[0], 1))
-        # compute the attributions
-        output_phis = []
-        for i in range(model_output_ranks.shape[1]):
-            phis = []
-            for k in range(len(X)):
-                phis.append(np.zeros(X[k].shape))
-            for j in range(X[0].shape[0]):
-                if (hasattr(self.data, '__call__')):
-                    bg_data = self.data([X[t][j] for t in range(len(X))])
-                    if not isinstance(bg_data, list):
-                        bg_data = [bg_data]
-                else:
-                    bg_data = self.data
-                # tile the inputs to line up with the background data samples
-                tiled_X = [np.tile(X[t][j:j+1], (bg_data[t].shape[0],) + tuple([1 for k in range(len(X[t].shape)-1)])) for t in range(len(X))]
-                # we use the first sample for the current sample and the rest for the references
-                joint_input = [np.concatenate([tiled_X[t], bg_data[t]], 0) for t in range(len(X))]
-                # run attribution computation graph
-                feature_ind = model_output_ranks[j,i]
-                sample_phis = self.run(self.phi_symbolic(feature_ind), self.model_inputs, joint_input)
-                # assign the attributions to the right part of the output arrays
-                for t in range(len(X)):
-                    phis[t][j] = (sample_phis[t][bg_data[t].shape[0]:] * (X[t][j] - bg_data[t])).mean(0)
-            output_phis.append(phis[0] if not self.multi_input else phis)
-        # check that the SHAP values sum up to the model output
-        if check_additivity:
-            if not tf.executing_eagerly():
-                model_output = self.run(self.model_output, self.model_inputs, X)
-            else:
-                model_output = self.model(X)
-            _check_additivity(self, model_output, output_phis)
-        if not self.multi_output:
-            return output_phis[0]
-        elif ranked_outputs is not None:
-            return output_phis, model_output_ranks
-        else:
-            return output_phis
-    def run(self, out, model_inputs, X):
-        """ Runs the model while also setting the learning phase flags to False.
-        """
-        if not tf.executing_eagerly():
-            feed_dict = dict(zip(model_inputs, X))
-            for t in self.learning_phase_flags:
-                feed_dict[t] = False
-            return self.session.run(out, feed_dict)
-        else:
-            def anon():
-                tf_execute.record_gradient = custom_record_gradient
-                # build inputs that are correctly shaped, typed, and tf-wrapped
-                inputs = []
-                for i in range(len(X)):
-                    shape = list(self.model_inputs[i].shape)
-                    shape[0] = -1
-                    data = X[i].reshape(shape)
-                    v = tf.constant(data, dtype=self.model_inputs[i].dtype)
-                    inputs.append(v)
-                final_out = out(inputs)
-                try:
-                    tf_execute.record_gradient = tf_backprop._record_gradient
-                except AttributeError:
-                    tf_execute.record_gradient = tf_backprop.record_gradient
-                return final_out
-            return self.execute_with_overridden_gradients(anon)
-    def custom_grad(self, op, *grads):
-        """ Passes a gradient op creation request to the correct handler.
-        """
-        type_name = op.type[5:] if op.type.startswith("shap_") else op.type
-        out = op_handlers[type_name](self, op, *grads) # we cut off the shap_ prefix before the lookup
-        return out
-    def execute_with_overridden_gradients(self, f):
-        # replace the gradients for all the non-linear activations
-        # we do this by hacking our way into the registry (TODO: find a public API for this if it exists)
-        reg = tf_ops._gradient_registry._registry
-        ops_not_in_registry = ['TensorListReserve']
-        # NOTE: location_tag taken from tensorflow source for None type ops
-        location_tag = ("UNKNOWN", "UNKNOWN", "UNKNOWN", "UNKNOWN", "UNKNOWN")
-        # TODO: unclear why some ops are not in the registry with TF 2.0 like TensorListReserve
-        for non_reg_ops in ops_not_in_registry:
-            reg[non_reg_ops] = {'type': None, 'location': location_tag}
-        for n in op_handlers:
-            if n in reg:
-                self.orig_grads[n] = reg[n]["type"]
-                reg["shap_"+n] = {
-                    "type": self.custom_grad,
-                    "location": reg[n]["location"]
-                }
-                reg[n]["type"] = self.custom_grad
-        # In TensorFlow 1.10 they started pruning out nodes that they think can't be backpropped
-        # unfortunately that includes the index of embedding layers so we disable that check here
-        if hasattr(tf_gradients_impl, "_IsBackpropagatable"):
-            orig_IsBackpropagatable = tf_gradients_impl._IsBackpropagatable
-            tf_gradients_impl._IsBackpropagatable = lambda tensor: True
-        # define the computation graph for the attribution values using a custom gradient-like computation
-        try:
-            out = f()
-        finally:
-            # reinstate the backpropagatable check
-            if hasattr(tf_gradients_impl, "_IsBackpropagatable"):
-                tf_gradients_impl._IsBackpropagatable = orig_IsBackpropagatable
-            # restore the original gradient definitions
-            for n in op_handlers:
-                if n in reg:
-                    del reg["shap_"+n]
-                    reg[n]["type"] = self.orig_grads[n]
-            for non_reg_ops in ops_not_in_registry:
-                del reg[non_reg_ops]
-        if not tf.executing_eagerly():
-            return out
-        else:
-            return [v.numpy() for v in out]
-def tensors_blocked_by_false(ops):
-    """ Follows a set of ops assuming their value is False and find blocked Switch paths.
-    This is used to prune away parts of the model graph that are only used during the training
-    phase (like dropout, batch norm, etc.).
-    """
-    blocked = []
-    def recurse(op):
-        if op.type == "Switch":
-            blocked.append(op.outputs[1]) # the true path is blocked since we assume the ops we trace are False
-        else:
-            for out in op.outputs:
-                for c in out.consumers():
-                    recurse(c)
-    for op in ops:
-        recurse(op)
-    return blocked
-def backward_walk_ops(start_ops, tensor_blacklist, op_type_blacklist):
-    found_ops = []
-    op_stack = [op for op in start_ops]
-    while len(op_stack) > 0:
-        op = op_stack.pop()
-        if op.type not in op_type_blacklist and op not in found_ops:
-            found_ops.append(op)
-            for input in op.inputs:
-                if input not in tensor_blacklist:
-                    op_stack.append(input.op)
-    return found_ops
-def forward_walk_ops(start_ops, tensor_blacklist, op_type_blacklist, within_ops):
-    found_ops = []
-    op_stack = [op for op in start_ops]
-    while len(op_stack) > 0:
-        op = op_stack.pop()
-        if op.type not in op_type_blacklist and op in within_ops and op not in found_ops:
-            found_ops.append(op)
-            for out in op.outputs:
-                if out not in tensor_blacklist:
-                    for c in out.consumers():
-                        op_stack.append(c)
-    return found_ops
-def softmax(explainer, op, *grads):
-    """ Just decompose softmax into its components and recurse, we can handle all of them :)
-    We assume the 'axis' is the last dimension because the TF codebase swaps the 'axis' to
-    the last dimension before the softmax op if 'axis' is not already the last dimension.
-    We also don't subtract the max before tf.exp for numerical stability since that might
-    mess up the attributions and it seems like TensorFlow doesn't define softmax that way
-    (according to the docs)
-    """
-    in0 = op.inputs[0]
-    in0_max = tf.reduce_max(in0, axis=-1, keepdims=True, name="in0_max")
-    in0_centered = in0 - in0_max
-    evals = tf.exp(in0_centered, name="custom_exp")
-    rsum = tf.reduce_sum(evals, axis=-1, keepdims=True)
-    div = evals / rsum
-    # mark these as in-between the inputs and outputs
-    for op in [evals.op, rsum.op, div.op, in0_centered.op]:
-        for t in op.outputs:
-            if t.name not in explainer.between_tensors:
-                explainer.between_tensors[t.name] = False
-    out = tf.gradients(div, in0_centered, grad_ys=grads[0])[0]
-    # remove the names we just added
-    for op in [evals.op, rsum.op, div.op, in0_centered.op]:
-        for t in op.outputs:
-            if explainer.between_tensors[t.name] is False:
-                del explainer.between_tensors[t.name]
-    # rescale to account for our shift by in0_max (which we did for numerical stability)
-    xin0,rin0 = tf.split(in0, 2)
-    xin0_centered,rin0_centered = tf.split(in0_centered, 2)
-    delta_in0 = xin0 - rin0
-    dup0 = [2] + [1 for i in delta_in0.shape[1:]]
-    return tf.where(
-        tf.tile(tf.abs(delta_in0), dup0) < 1e-6,
-        out,
-        out * tf.tile((xin0_centered - rin0_centered) / delta_in0, dup0)
-    )
-def maxpool(explainer, op, *grads):
-    xin0,rin0 = tf.split(op.inputs[0], 2)
-    xout,rout = tf.split(op.outputs[0], 2)
-    delta_in0 = xin0 - rin0
-    dup0 = [2] + [1 for i in delta_in0.shape[1:]]
-    cross_max = tf.maximum(xout, rout)
-    diffs = tf.concat([cross_max - rout, xout - cross_max], 0)
-    if op.type.startswith("shap_"):
-        op.type = op.type[5:]
-    xmax_pos,rmax_pos = tf.split(explainer.orig_grads[op.type](op, grads[0] * diffs), 2)
-    return tf.tile(tf.where(
-        tf.abs(delta_in0) < 1e-7,
-        tf.zeros_like(delta_in0),
-        (xmax_pos + rmax_pos) / delta_in0
-    ), dup0)
-def gather(explainer, op, *grads):
-    #params = op.inputs[0]
-    indices = op.inputs[1]
-    #axis = op.inputs[2]
-    var = explainer._variable_inputs(op)
-    if var[1] and not var[0]:
-        assert len(indices.shape) == 2, "Only scalar indices supported right now in GatherV2!"
-        xin1,rin1 = tf.split(tf.cast(op.inputs[1], tf.float32), 2)
-        xout,rout = tf.split(op.outputs[0], 2)
-        dup_in1 = [2] + [1 for i in xin1.shape[1:]]
-        dup_out = [2] + [1 for i in xout.shape[1:]]
-        delta_in1_t = tf.tile(xin1 - rin1, dup_in1)
-        out_sum = tf.reduce_sum(grads[0] * tf.tile(xout - rout, dup_out), list(range(len(indices.shape), len(grads[0].shape))))
-        if op.type == "ResourceGather":
-            return [None, tf.where(
-                tf.abs(delta_in1_t) < 1e-6,
-                tf.zeros_like(delta_in1_t),
-                out_sum / delta_in1_t
-            )]
-        return [None, tf.where(
-            tf.abs(delta_in1_t) < 1e-6,
-            tf.zeros_like(delta_in1_t),
-            out_sum / delta_in1_t
-        ), None]
-    elif var[0] and not var[1]:
-        if op.type.startswith("shap_"):
-            op.type = op.type[5:]
-        return [explainer.orig_grads[op.type](op, grads[0]), None] # linear in this case
-    else:
-        raise ValueError("Axis not yet supported to be varying for gather op!")
-def linearity_1d_nonlinearity_2d(input_ind0, input_ind1, op_func):
-    def handler(explainer, op, *grads):
-        var = explainer._variable_inputs(op)
-        if var[input_ind0] and not var[input_ind1]:
-            return linearity_1d_handler(input_ind0, explainer, op, *grads)
-        elif var[input_ind1] and not var[input_ind0]:
-            return linearity_1d_handler(input_ind1, explainer, op, *grads)
-        elif var[input_ind0] and var[input_ind1]:
-            return nonlinearity_2d_handler(input_ind0, input_ind1, op_func, explainer, op, *grads)
-        else:
-            return [None for _ in op.inputs] # no inputs vary, we must be hidden by a switch function
-    return handler
-def nonlinearity_1d_nonlinearity_2d(input_ind0, input_ind1, op_func):
-    def handler(explainer, op, *grads):
-        var = explainer._variable_inputs(op)
-        if var[input_ind0] and not var[input_ind1]:
-            return nonlinearity_1d_handler(input_ind0, explainer, op, *grads)
-        elif var[input_ind1] and not var[input_ind0]:
-            return nonlinearity_1d_handler(input_ind1, explainer, op, *grads)
-        elif var[input_ind0] and var[input_ind1]:
-            return nonlinearity_2d_handler(input_ind0, input_ind1, op_func, explainer, op, *grads)
-        else:
-            return [None for _ in op.inputs] # no inputs vary, we must be hidden by a switch function
-    return handler
-def nonlinearity_1d(input_ind):
-    def handler(explainer, op, *grads):
-        return nonlinearity_1d_handler(input_ind, explainer, op, *grads)
-    return handler
-def nonlinearity_1d_handler(input_ind, explainer, op, *grads):
-    # make sure only the given input varies
-    op_inputs = op.inputs
-    if op_inputs is None:
-        op_inputs = op.outputs[0].op.inputs
-    for i in range(len(op_inputs)):
-        if i != input_ind:
-            assert not explainer._variable_inputs(op)[i], str(i) + "th input to " + op.name + " cannot vary!"
-    xin0, rin0 = tf.split(op_inputs[input_ind], 2)
-    xout, rout = tf.split(op.outputs[input_ind], 2)
-    delta_in0 = xin0 - rin0
-    if delta_in0.shape is None:
-        dup0 = [2, 1]
-    else:
-        dup0 = [2] + [1 for i in delta_in0.shape[1:]]
-    out = [None for _ in op_inputs]
-    if op.type.startswith("shap_"):
-        op.type = op.type[5:]
-    orig_grad = explainer.orig_grads[op.type](op, grads[0])
-    out[input_ind] = tf.where(
-        tf.tile(tf.abs(delta_in0), dup0) < 1e-6,
-        orig_grad[input_ind] if len(op_inputs) > 1 else orig_grad,
-        grads[0] * tf.tile((xout - rout) / delta_in0, dup0)
-    )
-    return out
-def nonlinearity_2d_handler(input_ind0, input_ind1, op_func, explainer, op, *grads):
-    if not (input_ind0 == 0 and input_ind1 == 1):
-        emsg = "TODO: Can't yet handle double inputs that are not first!"
-        raise Exception(emsg)
-    xout,rout = tf.split(op.outputs[0], 2)
-    in0 = op.inputs[input_ind0]
-    in1 = op.inputs[input_ind1]
-    xin0,rin0 = tf.split(in0, 2)
-    xin1,rin1 = tf.split(in1, 2)
-    delta_in0 = xin0 - rin0
-    delta_in1 = xin1 - rin1
-    dup0 = [2] + [1 for i in delta_in0.shape[1:]]
-    out10 = op_func(xin0, rin1)
-    out01 = op_func(rin0, xin1)
-    out11,out00 = xout,rout
-    out0 = 0.5 * (out11 - out01 + out10 - out00)
-    out0 = grads[0] * tf.tile(out0 / delta_in0, dup0)
-    out1 = 0.5 * (out11 - out10 + out01 - out00)
-    out1 = grads[0] * tf.tile(out1 / delta_in1, dup0)
-    # Avoid divide by zero nans
-    out0 = tf.where(tf.abs(tf.tile(delta_in0, dup0)) < 1e-7, tf.zeros_like(out0), out0)
-    out1 = tf.where(tf.abs(tf.tile(delta_in1, dup0)) < 1e-7, tf.zeros_like(out1), out1)
-    # see if due to broadcasting our gradient shapes don't match our input shapes
-    if (np.any(np.array(out1.shape) != np.array(in1.shape))):
-        broadcast_index = np.where(np.array(out1.shape) != np.array(in1.shape))[0][0]
-        out1 = tf.reduce_sum(out1, axis=broadcast_index, keepdims=True)
-    elif (np.any(np.array(out0.shape) != np.array(in0.shape))):
-        broadcast_index = np.where(np.array(out0.shape) != np.array(in0.shape))[0][0]
-        out0 = tf.reduce_sum(out0, axis=broadcast_index, keepdims=True)
-    return [out0, out1]
-def linearity_1d(input_ind):
-    def handler(explainer, op, *grads):
-        return linearity_1d_handler(input_ind, explainer, op, *grads)
-    return handler
-def linearity_1d_handler(input_ind, explainer, op, *grads):
-    # make sure only the given input varies (negative means only that input cannot vary, and is measured from the end of the list)
-    for i in range(len(op.inputs)):
-        if i != input_ind:
-            assert not explainer._variable_inputs(op)[i], str(i) + "th input to " + op.name + " cannot vary!"
-    if op.type.startswith("shap_"):
-        op.type = op.type[5:]
-    return explainer.orig_grads[op.type](op, *grads)
-def linearity_with_excluded(input_inds):
-    def handler(explainer, op, *grads):
-        return linearity_with_excluded_handler(input_inds, explainer, op, *grads)
-    return handler
-def linearity_with_excluded_handler(input_inds, explainer, op, *grads):
-    # make sure the given inputs don't vary (negative is measured from the end of the list)
-    for i in range(len(op.inputs)):
-        if i in input_inds or i - len(op.inputs) in input_inds:
-            assert not explainer._variable_inputs(op)[i], str(i) + "th input to " + op.name + " cannot vary!"
-    if op.type.startswith("shap_"):
-        op.type = op.type[5:]
-    return explainer.orig_grads[op.type](op, *grads)
-def passthrough(explainer, op, *grads):
-    if op.type.startswith("shap_"):
-        op.type = op.type[5:]
-    return explainer.orig_grads[op.type](op, *grads)
-def break_dependence(explainer, op, *grads):
-    """ This function name is used to break attribution dependence in the graph traversal.
-    These operation types may be connected above input data values in the graph but their outputs
-    don't depend on the input values (for example they just depend on the shape).
-    """
-    return [None for _ in op.inputs]
-op_handlers = {}
-# ops that are always linear
-op_handlers["Identity"] = passthrough
-op_handlers["StridedSlice"] = passthrough
-op_handlers["Squeeze"] = passthrough
-op_handlers["ExpandDims"] = passthrough
-op_handlers["Pack"] = passthrough
-op_handlers["BiasAdd"] = passthrough
-op_handlers["Unpack"] = passthrough
-op_handlers["Add"] = passthrough
-op_handlers["Sub"] = passthrough
-op_handlers["Merge"] = passthrough
-op_handlers["Sum"] = passthrough
-op_handlers["Mean"] = passthrough
-op_handlers["Cast"] = passthrough
-op_handlers["Transpose"] = passthrough
-op_handlers["Enter"] = passthrough
-op_handlers["Exit"] = passthrough
-op_handlers["NextIteration"] = passthrough
-op_handlers["Tile"] = passthrough
-op_handlers["TensorArrayScatterV3"] = passthrough
-op_handlers["TensorArrayReadV3"] = passthrough
-op_handlers["TensorArrayWriteV3"] = passthrough
-# ops that don't pass any attributions to their inputs
-op_handlers["Shape"] = break_dependence
-op_handlers["RandomUniform"] = break_dependence
-op_handlers["ZerosLike"] = break_dependence
-#op_handlers["StopGradient"] = break_dependence # this allows us to stop attributions when we want to (like softmax re-centering)
-# ops that are linear and only allow a single input to vary
-op_handlers["Reshape"] = linearity_1d(0)
-op_handlers["Pad"] = linearity_1d(0)
-op_handlers["ReverseV2"] = linearity_1d(0)
-op_handlers["ConcatV2"] = linearity_with_excluded([-1])
-op_handlers["Conv2D"] = linearity_1d(0)
-op_handlers["Switch"] = linearity_1d(0)
-op_handlers["AvgPool"] = linearity_1d(0)
-op_handlers["FusedBatchNorm"] = linearity_1d(0)
-# ops that are nonlinear and only allow a single input to vary
-op_handlers["Relu"] = nonlinearity_1d(0)
-op_handlers["Elu"] = nonlinearity_1d(0)
-op_handlers["Sigmoid"] = nonlinearity_1d(0)
-op_handlers["Tanh"] = nonlinearity_1d(0)
-op_handlers["Softplus"] = nonlinearity_1d(0)
-op_handlers["Exp"] = nonlinearity_1d(0)
-op_handlers["ClipByValue"] = nonlinearity_1d(0)
-op_handlers["Rsqrt"] = nonlinearity_1d(0)
-op_handlers["Square"] = nonlinearity_1d(0)
-op_handlers["Max"] = nonlinearity_1d(0)
-# ops that are nonlinear and allow two inputs to vary
-op_handlers["SquaredDifference"] = nonlinearity_1d_nonlinearity_2d(0, 1, lambda x, y: (x - y) * (x - y))
-op_handlers["Minimum"] = nonlinearity_1d_nonlinearity_2d(0, 1, lambda x, y: tf.minimum(x, y))
-op_handlers["Maximum"] = nonlinearity_1d_nonlinearity_2d(0, 1, lambda x, y: tf.maximum(x, y))
-# ops that allow up to two inputs to vary are are linear when only one input varies
-op_handlers["Mul"] = linearity_1d_nonlinearity_2d(0, 1, lambda x, y: x * y)
-op_handlers["RealDiv"] = linearity_1d_nonlinearity_2d(0, 1, lambda x, y: x / y)
-op_handlers["MatMul"] = linearity_1d_nonlinearity_2d(0, 1, lambda x, y: tf.matmul(x, y))
-# ops that need their own custom attribution functions
-op_handlers["GatherV2"] = gather
-op_handlers["ResourceGather"] = gather
-op_handlers["MaxPool"] = maxpool
-op_handlers["Softmax"] = softmax
-# TODO items
-# TensorArrayGatherV3
-# Max
-# TensorArraySizeV3
-# Range

lib/shap/explainers/_deep/deep_utils.py DELETED Viewed

@@ -1,23 +0,0 @@
-import numpy as np
-def _check_additivity(explainer, model_output_values, output_phis):
-    TOLERANCE = 1e-2
-    assert len(explainer.expected_value) == model_output_values.shape[1], "Length of expected values and model outputs does not match."
-    for t in range(len(explainer.expected_value)):
-        if not explainer.multi_input:
-            diffs = model_output_values[:, t] - explainer.expected_value[t] - output_phis[t].sum(axis=tuple(range(1, output_phis[t].ndim)))
-        else:
-            diffs = model_output_values[:, t] - explainer.expected_value[t]
-            for i in range(len(output_phis[t])):
-                diffs -= output_phis[t][i].sum(axis=tuple(range(1, output_phis[t][i].ndim)))
-        maxdiff = np.abs(diffs).max()
-        assert maxdiff < TOLERANCE, "The SHAP explanations do not sum up to the model's output! This is either because of a " \
-                                    "rounding error or because an operator in your computation graph was not fully supported. If " \
-                                    "the sum difference of %f is significant compared to the scale of your model outputs, please post " \
-                                    f"as a github issue, with a reproducible example so we can debug it. Used framework: {explainer.framework} - Max. diff: {maxdiff} - Tolerance: {TOLERANCE}"

lib/shap/explainers/_exact.py DELETED Viewed

@@ -1,366 +0,0 @@
-import logging
-import numpy as np
-from numba import njit
-from .. import links
-from ..models import Model
-from ..utils import (
-    MaskedModel,
-    delta_minimization_order,
-    make_masks,
-    shapley_coefficients,
-)
-from ._explainer import Explainer
-log = logging.getLogger('shap')
-class ExactExplainer(Explainer):
-    """ Computes SHAP values via an optimized exact enumeration.
-    This works well for standard Shapley value maskers for models with less than ~15 features that vary
-    from the background per sample. It also works well for Owen values from hclustering structured
-    maskers when there are less than ~100 features that vary from the background per sample. This
-    explainer minimizes the number of function evaluations needed by ordering the masking sets to
-    minimize sequential differences. This is done using gray codes for standard Shapley values
-    and a greedy sorting method for hclustering structured maskers.
-    """
-    def __init__(self, model, masker, link=links.identity, linearize_link=True, feature_names=None):
-        """ Build an explainers.Exact object for the given model using the given masker object.
-        Parameters
-        ----------
-        model : function
-            A callable python object that executes the model given a set of input data samples.
-        masker : function or numpy.array or pandas.DataFrame
-            A callable python object used to "mask" out hidden features of the form `masker(mask, *fargs)`.
-            It takes a single a binary mask and an input sample and returns a matrix of masked samples. These
-            masked samples are evaluated using the model function and the outputs are then averaged.
-            As a shortcut for the standard masking used by SHAP you can pass a background data matrix
-            instead of a function and that matrix will be used for masking. To use a clustering
-            game structure you can pass a shap.maskers.TabularPartitions(data) object.
-        link : function
-            The link function used to map between the output units of the model and the SHAP value units. By
-            default it is shap.links.identity, but shap.links.logit can be useful so that expectations are
-            computed in probability units while explanations remain in the (more naturally additive) log-odds
-            units. For more details on how link functions work see any overview of link functions for generalized
-            linear models.
-        linearize_link : bool
-            If we use a non-linear link function to take expectations then models that are additive with respect to that
-            link function for a single background sample will no longer be additive when using a background masker with
-            many samples. This for example means that a linear logistic regression model would have interaction effects
-            that arise from the non-linear changes in expectation averaging. To retain the additively of the model with
-            still respecting the link function we linearize the link function by default.
-        """ # TODO link to the link linearization paper when done
-        super().__init__(model, masker, link=link, linearize_link=linearize_link, feature_names=feature_names)
-        self.model = Model(model)
-        if getattr(masker, "clustering", None) is not None:
-            self._partition_masks, self._partition_masks_inds = partition_masks(masker.clustering)
-            self._partition_delta_indexes = partition_delta_indexes(masker.clustering, self._partition_masks)
-        self._gray_code_cache = {} # used to avoid regenerating the same gray code patterns
-    def __call__(self, *args, max_evals=100000, main_effects=False, error_bounds=False, batch_size="auto", interactions=1, silent=False):
-        """ Explains the output of model(*args), where args represents one or more parallel iterators.
-        """
-        # we entirely rely on the general call implementation, we override just to remove **kwargs
-        # from the function signature
-        return super().__call__(
-            *args, max_evals=max_evals, main_effects=main_effects, error_bounds=error_bounds,
-            batch_size=batch_size, interactions=interactions, silent=silent
-        )
-    def _cached_gray_codes(self, n):
-        if n not in self._gray_code_cache:
-            self._gray_code_cache[n] = gray_code_indexes(n)
-        return self._gray_code_cache[n]
-    def explain_row(self, *row_args, max_evals, main_effects, error_bounds, batch_size, outputs, interactions, silent):
-        """ Explains a single row and returns the tuple (row_values, row_expected_values, row_mask_shapes).
-        """
-        # build a masked version of the model for the current input sample
-        fm = MaskedModel(self.model, self.masker, self.link, self.linearize_link, *row_args)
-        # do the standard Shapley values
-        inds = None
-        if getattr(self.masker, "clustering", None) is None:
-            # see which elements we actually need to perturb
-            inds = fm.varying_inputs()
-            # make sure we have enough evals
-            if max_evals is not None and max_evals != "auto" and max_evals < 2**len(inds):
-                raise ValueError(
-                    f"It takes {2**len(inds)} masked evaluations to run the Exact explainer on this instance, but max_evals={max_evals}!"
-                )
-            # generate the masks in gray code order (so that we change the inputs as little
-            # as possible while we iterate to minimize the need to re-eval when the inputs
-            # don't vary from the background)
-            delta_indexes = self._cached_gray_codes(len(inds))
-            # map to a larger mask that includes the invariant entries
-            extended_delta_indexes = np.zeros(2**len(inds), dtype=int)
-            for i in range(2**len(inds)):
-                if delta_indexes[i] == MaskedModel.delta_mask_noop_value:
-                    extended_delta_indexes[i] = delta_indexes[i]
-                else:
-                    extended_delta_indexes[i] = inds[delta_indexes[i]]
-            # run the model
-            outputs = fm(extended_delta_indexes, zero_index=0, batch_size=batch_size)
-            # Shapley values
-            # Care: Need to distinguish between `True` and `1`
-            if interactions is False or (interactions == 1 and interactions is not True):
-                # loop over all the outputs to update the rows
-                coeff = shapley_coefficients(len(inds))
-                row_values = np.zeros((len(fm),) + outputs.shape[1:])
-                mask = np.zeros(len(fm), dtype=bool)
-                _compute_grey_code_row_values(row_values, mask, inds, outputs, coeff, extended_delta_indexes, MaskedModel.delta_mask_noop_value)
-            # Shapley-Taylor interaction values
-            elif interactions is True or interactions == 2:
-                # loop over all the outputs to update the rows
-                coeff = shapley_coefficients(len(inds))
-                row_values = np.zeros((len(fm), len(fm)) + outputs.shape[1:])
-                mask = np.zeros(len(fm), dtype=bool)
-                _compute_grey_code_row_values_st(row_values, mask, inds, outputs, coeff, extended_delta_indexes, MaskedModel.delta_mask_noop_value)
-            elif interactions > 2:
-                raise NotImplementedError("Currently the Exact explainer does not support interactions higher than order 2!")
-        # do a partition tree constrained version of Shapley values
-        else:
-            # make sure we have enough evals
-            if max_evals is not None and max_evals != "auto" and max_evals < len(fm)**2:
-                raise ValueError(
-                    f"It takes {len(fm)**2} masked evaluations to run the Exact explainer on this instance, but max_evals={max_evals}!"
-                )
-            # generate the masks in a hclust order (so that we change the inputs as little
-            # as possible while we iterate to minimize the need to re-eval when the inputs
-            # don't vary from the background)
-            delta_indexes = self._partition_delta_indexes
-            # run the model
-            outputs = fm(delta_indexes, batch_size=batch_size)
-            # loop over each output feature
-            row_values = np.zeros((len(fm),) + outputs.shape[1:])
-            for i in range(len(fm)):
-                on_outputs = outputs[self._partition_masks_inds[i][1]]
-                off_outputs = outputs[self._partition_masks_inds[i][0]]
-                row_values[i] = (on_outputs - off_outputs).mean(0)
-        # compute the main effects if we need to
-        main_effect_values = None
-        if main_effects or interactions is True or interactions == 2:
-            if inds is None:
-                inds = np.arange(len(fm))
-            main_effect_values = fm.main_effects(inds)
-            if interactions is True or interactions == 2:
-                for i in range(len(fm)):
-                    row_values[i, i] = main_effect_values[i]
-        return {
-            "values": row_values,
-            "expected_values": outputs[0],
-            "mask_shapes": fm.mask_shapes,
-            "main_effects": main_effect_values if main_effects else None,
-            "clustering": getattr(self.masker, "clustering", None)
-        }
-@njit
-def _compute_grey_code_row_values(row_values, mask, inds, outputs, shapley_coeff, extended_delta_indexes, noop_code):
-    set_size = 0
-    M = len(inds)
-    for i in range(2**M):
-        # update the mask
-        delta_ind = extended_delta_indexes[i]
-        if delta_ind != noop_code:
-            mask[delta_ind] = ~mask[delta_ind]
-            if mask[delta_ind]:
-                set_size += 1
-            else:
-                set_size -= 1
-        # update the output row values
-        on_coeff = shapley_coeff[set_size-1]
-        if set_size < M:
-            off_coeff = shapley_coeff[set_size]
-        out = outputs[i]
-        for j in inds:
-            if mask[j]:
-                row_values[j] += out * on_coeff
-            else:
-                row_values[j] -= out * off_coeff
-@njit
-def _compute_grey_code_row_values_st(row_values, mask, inds, outputs, shapley_coeff, extended_delta_indexes, noop_code):
-    set_size = 0
-    M = len(inds)
-    for i in range(2**M):
-        # update the mask
-        delta_ind = extended_delta_indexes[i]
-        if delta_ind != noop_code:
-            mask[delta_ind] = ~mask[delta_ind]
-            if mask[delta_ind]:
-                set_size += 1
-            else:
-                set_size -= 1
-        # distribute the effect of this mask set over all the terms it impacts
-        out = outputs[i]
-        for j in range(M):
-            for k in range(j+1, M):
-                if not mask[j] and not mask[k]:
-                    delta = out * shapley_coeff[set_size] # * 2
-                elif (not mask[j] and mask[k]) or (mask[j] and not mask[k]):
-                    delta = -out * shapley_coeff[set_size - 1] # * 2
-                else: # both true
-                    delta = out * shapley_coeff[set_size - 2] # * 2
-                row_values[j,k] += delta
-                row_values[k,j] += delta
-def partition_delta_indexes(partition_tree, all_masks):
-    """ Return an delta index encoded array of all the masks possible while following the given partition tree.
-    """
-    # convert the masks to delta index format
-    mask = np.zeros(all_masks.shape[1], dtype=bool)
-    delta_inds = []
-    for i in range(len(all_masks)):
-        inds = np.where(mask ^ all_masks[i,:])[0]
-        for j in inds[:-1]:
-            delta_inds.append(-j - 1) # negative + (-1) means we have more inds still to change...
-        if len(inds) == 0:
-            delta_inds.append(MaskedModel.delta_mask_noop_value)
-        else:
-            delta_inds.extend(inds[-1:])
-        mask = all_masks[i,:]
-    return np.array(delta_inds)
-def partition_masks(partition_tree):
-    """ Return an array of all the masks possible while following the given partition tree.
-    """
-    M = partition_tree.shape[0] + 1
-    mask_matrix = make_masks(partition_tree)
-    all_masks = []
-    m00 = np.zeros(M, dtype=bool)
-    all_masks.append(m00)
-    all_masks.append(~m00)
-    #inds_stack = [0,1]
-    inds_lists = [[[], []] for i in range(M)]
-    _partition_masks_recurse(len(partition_tree)-1, m00, 0, 1, inds_lists, mask_matrix, partition_tree, M, all_masks)
-    all_masks = np.array(all_masks)
-    # we resort the clustering matrix to minimize the sequential difference between the masks
-    # this minimizes the number of model evaluations we need to run when the background sometimes
-    # matches the foreground. We seem to average about 1.5 feature changes per mask with this
-    # approach. This is not as clean as the grey code ordering, but a perfect 1 feature change
-    # ordering is not possible with a clustering tree
-    order = delta_minimization_order(all_masks)
-    inverse_order = np.arange(len(order))[np.argsort(order)]
-    for inds_list0,inds_list1 in inds_lists:
-        for i in range(len(inds_list0)):
-            inds_list0[i] = inverse_order[inds_list0[i]]
-            inds_list1[i] = inverse_order[inds_list1[i]]
-    # Care: inds_lists have different lengths, so partition_masks_inds is a "ragged" array. See GH #3063
-    partition_masks = all_masks[order]
-    partition_masks_inds = [[np.array(on), np.array(off)] for on, off in inds_lists]
-    return partition_masks, partition_masks_inds
-# TODO: this should be a jit function... which would require preallocating the inds_lists (sizes are 2**depth of that ind)
-# TODO: we could also probable avoid making the masks at all and just record the deltas if we want...
-def _partition_masks_recurse(index, m00, ind00, ind11, inds_lists, mask_matrix, partition_tree, M, all_masks):
-    if index < 0:
-        inds_lists[index + M][0].append(ind00)
-        inds_lists[index + M][1].append(ind11)
-        return
-    # get our children indexes
-    left_index = int(partition_tree[index,0] - M)
-    right_index = int(partition_tree[index,1] - M)
-    # build more refined masks
-    m10 = m00.copy() # we separate the copy from the add so as to not get converted to a matrix
-    m10[:] += mask_matrix[left_index+M, :]
-    m01 = m00.copy()
-    m01[:] += mask_matrix[right_index+M, :]
-    # record the new masks we made
-    ind01 = len(all_masks)
-    all_masks.append(m01)
-    ind10 = len(all_masks)
-    all_masks.append(m10)
-    # inds_stack.append(len(all_masks) - 2)
-    # inds_stack.append(len(all_masks) - 1)
-    # recurse left and right with both 1 (True) and 0 (False) contexts
-    _partition_masks_recurse(left_index, m00, ind00, ind10, inds_lists, mask_matrix, partition_tree, M, all_masks)
-    _partition_masks_recurse(right_index, m10, ind10, ind11, inds_lists, mask_matrix, partition_tree, M, all_masks)
-    _partition_masks_recurse(left_index, m01, ind01, ind11, inds_lists, mask_matrix, partition_tree, M, all_masks)
-    _partition_masks_recurse(right_index, m00, ind00, ind01, inds_lists, mask_matrix, partition_tree, M, all_masks)
-def gray_code_masks(nbits):
-    """ Produces an array of all binary patterns of size nbits in gray code order.
-    This is based on code from: http://code.activestate.com/recipes/576592-gray-code-generatoriterator/
-    """
-    out = np.zeros((2**nbits, nbits), dtype=bool)
-    li = np.zeros(nbits, dtype=bool)
-    for term in range(2, (1<<nbits)+1):
-        if term % 2 == 1: # odd
-            for i in range(-1,-nbits,-1):
-                if li[i] == 1:
-                    li[i-1] = li[i-1]^1
-                    break
-        else: # even
-            li[-1] = li[-1]^1
-        out[term-1,:] = li
-    return out
-def gray_code_indexes(nbits):
-    """ Produces an array of which bits flip at which position.
-    We assume the masks start at all zero and -1 means don't do a flip.
-    This is a more efficient representation of the gray_code_masks version.
-    """
-    out = np.ones(2**nbits, dtype=int) * MaskedModel.delta_mask_noop_value
-    li = np.zeros(nbits, dtype=bool)
-    for term in range((1<<nbits)-1):
-        if term % 2 == 1: # odd
-            for i in range(-1,-nbits,-1):
-                if li[i] == 1:
-                    li[i-1] = li[i-1]^1
-                    out[term+1] = nbits + (i-1)
-                    break
-        else: # even
-            li[-1] = li[-1]^1
-            out[term+1] = nbits-1
-    return out

lib/shap/explainers/_explainer.py DELETED Viewed

@@ -1,457 +0,0 @@
-import copy
-import time
-import numpy as np
-import pandas as pd
-import scipy.sparse
-from .. import explainers, links, maskers, models
-from .._explanation import Explanation
-from .._serializable import Deserializer, Serializable, Serializer
-from ..maskers import Masker
-from ..models import Model
-from ..utils import safe_isinstance, show_progress
-from ..utils._exceptions import InvalidAlgorithmError
-from ..utils.transformers import is_transformers_lm
-class Explainer(Serializable):
-    """ Uses Shapley values to explain any machine learning model or python function.
-    This is the primary explainer interface for the SHAP library. It takes any combination
-    of a model and masker and returns a callable subclass object that implements
-    the particular estimation algorithm that was chosen.
-    """
-    def __init__(self, model, masker=None, link=links.identity, algorithm="auto", output_names=None, feature_names=None, linearize_link=True,
-                 seed=None, **kwargs):
-        """ Build a new explainer for the passed model.
-        Parameters
-        ----------
-        model : object or function
-            User supplied function or model object that takes a dataset of samples and
-            computes the output of the model for those samples.
-        masker : function, numpy.array, pandas.DataFrame, tokenizer, None, or a list of these for each model input
-            The function used to "mask" out hidden features of the form `masked_args = masker(*model_args, mask=mask)`.
-            It takes input in the same form as the model, but for just a single sample with a binary
-            mask, then returns an iterable of masked samples. These
-            masked samples will then be evaluated using the model function and the outputs averaged.
-            As a shortcut for the standard masking using by SHAP you can pass a background data matrix
-            instead of a function and that matrix will be used for masking. Domain specific masking
-            functions are available in shap such as shap.ImageMasker for images and shap.TokenMasker
-            for text. In addition to determining how to replace hidden features, the masker can also
-            constrain the rules of the cooperative game used to explain the model. For example
-            shap.TabularMasker(data, hclustering="correlation") will enforce a hierarchical clustering
-            of coalitions for the game (in this special case the attributions are known as the Owen values).
-        link : function
-            The link function used to map between the output units of the model and the SHAP value units. By
-            default it is shap.links.identity, but shap.links.logit can be useful so that expectations are
-            computed in probability units while explanations remain in the (more naturally additive) log-odds
-            units. For more details on how link functions work see any overview of link functions for generalized
-            linear models.
-        algorithm : "auto", "permutation", "partition", "tree", or "linear"
-            The algorithm used to estimate the Shapley values. There are many different algorithms that
-            can be used to estimate the Shapley values (and the related value for constrained games), each
-            of these algorithms have various tradeoffs and are preferable in different situations. By
-            default the "auto" options attempts to make the best choice given the passed model and masker,
-            but this choice can always be overridden by passing the name of a specific algorithm. The type of
-            algorithm used will determine what type of subclass object is returned by this constructor, and
-            you can also build those subclasses directly if you prefer or need more fine grained control over
-            their options.
-        output_names : None or list of strings
-            The names of the model outputs. For example if the model is an image classifier, then output_names would
-            be the names of all the output classes. This parameter is optional. When output_names is None then
-            the Explanation objects produced by this explainer will not have any output_names, which could effect
-            downstream plots.
-        seed: None or int
-            seed for reproducibility
-        """
-        self.model = model
-        self.output_names = output_names
-        self.feature_names = feature_names
-        # wrap the incoming masker object as a shap.Masker object
-        if (
-            isinstance(masker, pd.DataFrame)
-            or ((isinstance(masker, np.ndarray) or scipy.sparse.issparse(masker)) and len(masker.shape) == 2)
-        ):
-            if algorithm == "partition":
-                self.masker = maskers.Partition(masker)
-            else:
-                self.masker = maskers.Independent(masker)
-        elif safe_isinstance(masker, ["transformers.PreTrainedTokenizer", "transformers.tokenization_utils_base.PreTrainedTokenizerBase"]):
-            if is_transformers_lm(self.model):
-                # auto assign text infilling if model is a transformer model with lm head
-                self.masker = maskers.Text(masker, mask_token="...", collapse_mask_token=True)
-            else:
-                self.masker = maskers.Text(masker)
-        elif (masker is list or masker is tuple) and masker[0] is not str:
-            self.masker = maskers.Composite(*masker)
-        elif (masker is dict) and ("mean" in masker):
-            self.masker = maskers.Independent(masker)
-        elif masker is None and isinstance(self.model, models.TransformersPipeline):
-            return self.__init__(
-                self.model, self.model.inner_model.tokenizer,
-                link=link, algorithm=algorithm, output_names=output_names, feature_names=feature_names, linearize_link=linearize_link, **kwargs
-            )
-        else:
-            self.masker = masker
-        # Check for transformer pipeline objects and wrap them
-        if safe_isinstance(self.model, "transformers.pipelines.Pipeline"):
-            if is_transformers_lm(self.model.model):
-                return self.__init__(
-                    self.model.model, self.model.tokenizer if self.masker is None else self.masker,
-                    link=link, algorithm=algorithm, output_names=output_names, feature_names=feature_names, linearize_link=linearize_link, **kwargs
-                )
-            else:
-                return self.__init__(
-                    models.TransformersPipeline(self.model), self.masker,
-                    link=link, algorithm=algorithm, output_names=output_names, feature_names=feature_names, linearize_link=linearize_link, **kwargs
-                )
-        # wrap self.masker and self.model for output text explanation algorithm
-        if is_transformers_lm(self.model):
-            self.model = models.TeacherForcing(self.model, self.masker.tokenizer)
-            self.masker = maskers.OutputComposite(self.masker, self.model.text_generate)
-        elif safe_isinstance(self.model, "shap.models.TeacherForcing") and safe_isinstance(self.masker, ["shap.maskers.Text", "shap.maskers.Image"]):
-            self.masker = maskers.OutputComposite(self.masker, self.model.text_generate)
-        elif safe_isinstance(self.model, "shap.models.TopKLM") and safe_isinstance(self.masker, "shap.maskers.Text"):
-            self.masker = maskers.FixedComposite(self.masker)
-        #self._brute_force_fallback = explainers.BruteForce(self.model, self.masker)
-        # validate and save the link function
-        if callable(link):
-            self.link = link
-        else:
-            raise TypeError("The passed link function needs to be callable!")
-        self.linearize_link = linearize_link
-        # if we are called directly (as opposed to through super()) then we convert ourselves to the subclass
-        # that implements the specific algorithm that was chosen
-        if self.__class__ is Explainer:
-            # do automatic algorithm selection
-            #from .. import explainers
-            if algorithm == "auto":
-                # use implementation-aware methods if possible
-                if explainers.LinearExplainer.supports_model_with_masker(model, self.masker):
-                    algorithm = "linear"
-                elif explainers.TreeExplainer.supports_model_with_masker(model, self.masker): # TODO: check for Partition?
-                    algorithm = "tree"
-                elif explainers.AdditiveExplainer.supports_model_with_masker(model, self.masker):
-                    algorithm = "additive"
-                # otherwise use a model agnostic method
-                elif callable(self.model):
-                    if issubclass(type(self.masker), maskers.Independent):
-                        if self.masker.shape[1] <= 10:
-                            algorithm = "exact"
-                        else:
-                            algorithm = "permutation"
-                    elif issubclass(type(self.masker), maskers.Partition):
-                        if self.masker.shape[1] <= 32:
-                            algorithm = "exact"
-                        else:
-                            algorithm = "permutation"
-                    elif (getattr(self.masker, "text_data", False) or getattr(self.masker, "image_data", False)) and hasattr(self.masker, "clustering"):
-                        algorithm = "partition"
-                    else:
-                        algorithm = "permutation"
-                # if we get here then we don't know how to handle what was given to us
-                else:
-                    raise TypeError("The passed model is not callable and cannot be analyzed directly with the given masker! Model: " + str(model))
-            # build the right subclass
-            if algorithm == "exact":
-                self.__class__ = explainers.ExactExplainer
-                explainers.ExactExplainer.__init__(self, self.model, self.masker, link=self.link, feature_names=self.feature_names, linearize_link=linearize_link, **kwargs)
-            elif algorithm == "permutation":
-                self.__class__ = explainers.PermutationExplainer
-                explainers.PermutationExplainer.__init__(self, self.model, self.masker, link=self.link, feature_names=self.feature_names, linearize_link=linearize_link, seed=seed, **kwargs)
-            elif algorithm == "partition":
-                self.__class__ = explainers.PartitionExplainer
-                explainers.PartitionExplainer.__init__(self, self.model, self.masker, link=self.link, feature_names=self.feature_names, linearize_link=linearize_link, output_names=self.output_names, **kwargs)
-            elif algorithm == "tree":
-                self.__class__ = explainers.TreeExplainer
-                explainers.TreeExplainer.__init__(self, self.model, self.masker, link=self.link, feature_names=self.feature_names, linearize_link=linearize_link, **kwargs)
-            elif algorithm == "additive":
-                self.__class__ = explainers.AdditiveExplainer
-                explainers.AdditiveExplainer.__init__(self, self.model, self.masker, link=self.link, feature_names=self.feature_names, linearize_link=linearize_link, **kwargs)
-            elif algorithm == "linear":
-                self.__class__ = explainers.LinearExplainer
-                explainers.LinearExplainer.__init__(self, self.model, self.masker, link=self.link, feature_names=self.feature_names, linearize_link=linearize_link, **kwargs)
-            elif algorithm == "deep":
-                self.__class__ = explainers.DeepExplainer
-                explainers.DeepExplainer.__init__(self, self.model, self.masker, link=self.link, feature_names=self.feature_names, linearize_link=linearize_link, **kwargs)
-            else:
-                raise InvalidAlgorithmError("Unknown algorithm type passed: %s!" % algorithm)
-    def __call__(self, *args, max_evals="auto", main_effects=False, error_bounds=False, batch_size="auto",
-                 outputs=None, silent=False, **kwargs):
-        """ Explains the output of model(*args), where args is a list of parallel iterable datasets.
-        Note this default version could be an abstract method that is implemented by each algorithm-specific
-        subclass of Explainer. Descriptions of each subclasses' __call__ arguments
-        are available in their respective doc-strings.
-        """
-        # if max_evals == "auto":
-        #     self._brute_force_fallback
-        start_time = time.time()
-        if issubclass(type(self.masker), maskers.OutputComposite) and len(args)==2:
-            self.masker.model = models.TextGeneration(target_sentences=args[1])
-            args = args[:1]
-        # parse our incoming arguments
-        num_rows = None
-        args = list(args)
-        if self.feature_names is None:
-            feature_names = [None for _ in range(len(args))]
-        elif issubclass(type(self.feature_names[0]), (list, tuple)):
-            feature_names = copy.deepcopy(self.feature_names)
-        else:
-            feature_names = [copy.deepcopy(self.feature_names)]
-        for i in range(len(args)):
-            # try and see if we can get a length from any of the for our progress bar
-            if num_rows is None:
-                try:
-                    num_rows = len(args[i])
-                except Exception:
-                    pass
-            # convert DataFrames to numpy arrays
-            if isinstance(args[i], pd.DataFrame):
-                feature_names[i] = list(args[i].columns)
-                args[i] = args[i].to_numpy()
-            # convert nlp Dataset objects to lists
-            if safe_isinstance(args[i], "nlp.arrow_dataset.Dataset"):
-                args[i] = args[i]["text"]
-            elif issubclass(type(args[i]), dict) and "text" in args[i]:
-                args[i] = args[i]["text"]
-        if batch_size == "auto":
-            if hasattr(self.masker, "default_batch_size"):
-                batch_size = self.masker.default_batch_size
-            else:
-                batch_size = 10
-        # loop over each sample, filling in the values array
-        values = []
-        output_indices = []
-        expected_values = []
-        mask_shapes = []
-        main_effects = []
-        hierarchical_values = []
-        clustering = []
-        output_names = []
-        error_std = []
-        if callable(getattr(self.masker, "feature_names", None)):
-            feature_names = [[] for _ in range(len(args))]
-        for row_args in show_progress(zip(*args), num_rows, self.__class__.__name__+" explainer", silent):
-            row_result = self.explain_row(
-                *row_args, max_evals=max_evals, main_effects=main_effects, error_bounds=error_bounds,
-                batch_size=batch_size, outputs=outputs, silent=silent, **kwargs
-            )
-            values.append(row_result.get("values", None))
-            output_indices.append(row_result.get("output_indices", None))
-            expected_values.append(row_result.get("expected_values", None))
-            mask_shapes.append(row_result["mask_shapes"])
-            main_effects.append(row_result.get("main_effects", None))
-            clustering.append(row_result.get("clustering", None))
-            hierarchical_values.append(row_result.get("hierarchical_values", None))
-            tmp = row_result.get("output_names", None)
-            output_names.append(tmp(*row_args) if callable(tmp) else tmp)
-            error_std.append(row_result.get("error_std", None))
-            if callable(getattr(self.masker, "feature_names", None)):
-                row_feature_names = self.masker.feature_names(*row_args)
-                for i in range(len(row_args)):
-                    feature_names[i].append(row_feature_names[i])
-        # split the values up according to each input
-        arg_values = [[] for a in args]
-        for i, v in enumerate(values):
-            pos = 0
-            for j in range(len(args)):
-                mask_length = np.prod(mask_shapes[i][j])
-                arg_values[j].append(values[i][pos:pos+mask_length])
-                pos += mask_length
-        # collapse the arrays as possible
-        expected_values = pack_values(expected_values)
-        main_effects = pack_values(main_effects)
-        output_indices = pack_values(output_indices)
-        main_effects = pack_values(main_effects)
-        hierarchical_values = pack_values(hierarchical_values)
-        error_std = pack_values(error_std)
-        clustering = pack_values(clustering)
-        # getting output labels
-        ragged_outputs = False
-        if output_indices is not None:
-            ragged_outputs = not all(len(x) == len(output_indices[0]) for x in output_indices)
-        if self.output_names is None:
-            if None not in output_names:
-                if not ragged_outputs:
-                    sliced_labels = np.array(output_names)
-                else:
-                    sliced_labels = [np.array(output_names[i])[index_list] for i,index_list in enumerate(output_indices)]
-            else:
-                sliced_labels = None
-        else:
-            assert output_indices is not None, "You have passed a list for output_names but the model seems to not have multiple outputs!"
-            labels = np.array(self.output_names)
-            sliced_labels = [labels[index_list] for index_list in output_indices]
-            if not ragged_outputs:
-                sliced_labels = np.array(sliced_labels)
-        if isinstance(sliced_labels, np.ndarray) and len(sliced_labels.shape) == 2:
-            if np.all(sliced_labels[0,:] == sliced_labels):
-                sliced_labels = sliced_labels[0]
-        # allow the masker to transform the input data to better match the masking pattern
-        # (such as breaking text into token segments)
-        if hasattr(self.masker, "data_transform"):
-            new_args = []
-            for row_args in zip(*args):
-                new_args.append([pack_values(v) for v in self.masker.data_transform(*row_args)])
-            args = list(zip(*new_args))
-        # build the explanation objects
-        out = []
-        for j, data in enumerate(args):
-            # reshape the attribution values using the mask_shapes
-            tmp = []
-            for i, v in enumerate(arg_values[j]):
-                if np.prod(mask_shapes[i][j]) != np.prod(v.shape): # see if we have multiple outputs
-                    tmp.append(v.reshape(*mask_shapes[i][j], -1))
-                else:
-                    tmp.append(v.reshape(*mask_shapes[i][j]))
-            arg_values[j] = pack_values(tmp)
-            if feature_names[j] is None:
-                feature_names[j] = ["Feature " + str(i) for i in range(data.shape[1])]
-            # build an explanation object for this input argument
-            out.append(Explanation(
-                arg_values[j], expected_values, data,
-                feature_names=feature_names[j], main_effects=main_effects,
-                clustering=clustering,
-                hierarchical_values=hierarchical_values,
-                output_names=sliced_labels, # self.output_names
-                error_std=error_std,
-                compute_time=time.time() - start_time
-                # output_shape=output_shape,
-                #lower_bounds=v_min, upper_bounds=v_max
-            ))
-        return out[0] if len(out) == 1 else out
-    def explain_row(self, *row_args, max_evals, main_effects, error_bounds, outputs, silent, **kwargs):
-        """ Explains a single row and returns the tuple (row_values, row_expected_values, row_mask_shapes, main_effects).
-        This is an abstract method meant to be implemented by each subclass.
-        Returns
-        -------
-        tuple
-            A tuple of (row_values, row_expected_values, row_mask_shapes), where row_values is an array of the
-            attribution values for each sample, row_expected_values is an array (or single value) representing
-            the expected value of the model for each sample (which is the same for all samples unless there
-            are fixed inputs present, like labels when explaining the loss), and row_mask_shapes is a list
-            of all the input shapes (since the row_values is always flattened),
-        """
-        return {}
-    @staticmethod
-    def supports_model_with_masker(model, masker):
-        """ Determines if this explainer can handle the given model.
-        This is an abstract static method meant to be implemented by each subclass.
-        """
-        return False
-    @staticmethod
-    def _compute_main_effects(fm, expected_value, inds):
-        """ A utility method to compute the main effects from a MaskedModel.
-        """
-        # mask each input on in isolation
-        masks = np.zeros(2*len(inds)-1, dtype=int)
-        last_ind = -1
-        for i in range(len(inds)):
-            if i > 0:
-                masks[2*i - 1] = -last_ind - 1 # turn off the last input
-            masks[2*i] = inds[i] # turn on this input
-            last_ind = inds[i]
-        # compute the main effects for the given indexes
-        main_effects = fm(masks) - expected_value
-        # expand the vector to the full input size
-        expanded_main_effects = np.zeros(len(fm))
-        for i, ind in enumerate(inds):
-            expanded_main_effects[ind] = main_effects[i]
-        return expanded_main_effects
-    def save(self, out_file, model_saver=".save", masker_saver=".save"):
-        """ Write the explainer to the given file stream.
-        """
-        super().save(out_file)
-        with Serializer(out_file, "shap.Explainer", version=0) as s:
-            s.save("model", self.model, model_saver)
-            s.save("masker", self.masker, masker_saver)
-            s.save("link", self.link)
-    @classmethod
-    def load(cls, in_file, model_loader=Model.load, masker_loader=Masker.load, instantiate=True):
-        """ Load an Explainer from the given file stream.
-        Parameters
-        ----------
-        in_file : The file stream to load objects from.
-        """
-        if instantiate:
-            return cls._instantiated_load(in_file, model_loader=model_loader, masker_loader=masker_loader)
-        kwargs = super().load(in_file, instantiate=False)
-        with Deserializer(in_file, "shap.Explainer", min_version=0, max_version=0) as s:
-            kwargs["model"] = s.load("model", model_loader)
-            kwargs["masker"] = s.load("masker", masker_loader)
-            kwargs["link"] = s.load("link")
-        return kwargs
-def pack_values(values):
-    """ Used the clean up arrays before putting them into an Explanation object.
-    """
-    if not hasattr(values, "__len__"):
-        return values
-    # collapse the values if we didn't compute them
-    if values is None or values[0] is None:
-        return None
-    # convert to a single numpy matrix when the array is not ragged
-    elif np.issubdtype(type(values[0]), np.number) or len(np.unique([len(v) for v in values])) == 1:
-        return np.array(values)
-    else:
-        return np.array(values, dtype=object)

lib/shap/explainers/_gpu_tree.py DELETED Viewed

@@ -1,179 +0,0 @@
-"""GPU accelerated tree explanations"""
-import numpy as np
-from ..utils import assert_import, record_import_error
-from ._tree import TreeExplainer, feature_perturbation_codes, output_transform_codes
-try:
-    from .. import _cext_gpu
-except ImportError as e:
-    record_import_error("cext_gpu", "cuda extension was not built during install!", e)
-class GPUTreeExplainer(TreeExplainer):
-    """
-    Experimental GPU accelerated version of TreeExplainer. Currently requires source build with
-    cuda available and 'CUDA_PATH' environment variable defined.
-    Parameters
-    ----------
-    model : model object
-        The tree based machine learning model that we want to explain. XGBoost, LightGBM,
-        CatBoost, Pyspark and most tree-based scikit-learn models are supported.
-    data : numpy.array or pandas.DataFrame
-        The background dataset to use for integrating out features. This argument is optional when
-        feature_perturbation="tree_path_dependent", since in that case we can use the number of
-        training samples that went down each tree path as our background dataset (this is recorded
-        in the model object).
-    feature_perturbation : "interventional" (default) or "tree_path_dependent" (default when data=None)
-        Since SHAP values rely on conditional expectations we need to decide how to handle correlated
-        (or otherwise dependent) input features. The "interventional" approach breaks the dependencies
-        between features according to the rules dictated by casual inference (Janzing et al. 2019). Note
-        that the "interventional" option requires a background dataset and its runtime scales linearly
-        with the size of the background dataset you use. Anywhere from 100 to 1000 random background samples
-        are good sizes to use. The "tree_path_dependent" approach is to just follow the trees and use the
-        number of training examples that went down each leaf to represent the background distribution.
-        This approach does not require a background dataset and so is used by default when no background
-        dataset is provided.
-    model_output : "raw", "probability", "log_loss", or model method name
-        What output of the model should be explained. If "raw" then we explain the raw output of the
-        trees, which varies by model. For regression models "raw" is the standard output, for binary
-        classification in XGBoost this is the log odds ratio. If model_output is the name of a
-        supported prediction method on the model object then we explain the output of that model
-        method name. For example model_output="predict_proba" explains the result of calling
-        model.predict_proba. If "probability" then we explain the output of the model transformed into
-        probability space (note that this means the SHAP values now sum to the probability output of the
-        model). If "logloss" then we explain the log base e of the model loss function, so that the SHAP
-        values sum up to the log loss of the model for each sample. This is helpful for breaking
-        down model performance by feature. Currently the probability and logloss options are only
-        supported when
-        feature_dependence="independent".
-    Examples
-    --------
-    See `GPUTree explainer examples <https://shap.readthedocs.io/en/latest/api_examples/explainers/GPUTreeExplainer.html>`_
-    """
-    def shap_values(self, X, y=None, tree_limit=None, approximate=False, check_additivity=True,
-                    from_call=False):
-        """ Estimate the SHAP values for a set of samples.
-        Parameters
-        ----------
-        X : numpy.array, pandas.DataFrame or catboost.Pool (for catboost)
-            A matrix of samples (# samples x # features) on which to explain the model's output.
-        y : numpy.array
-            An array of label values for each sample. Used when explaining loss functions.
-        tree_limit : None (default) or int
-            Limit the number of trees used by the model. By default None means no use the limit
-            of the
-            original model, and -1 means no limit.
-        approximate : bool
-            Not supported.
-        check_additivity : bool
-            Run a validation check that the sum of the SHAP values equals the output of the
-            model. This
-            check takes only a small amount of time, and will catch potential unforeseen errors.
-            Note that this check only runs right now when explaining the margin of the model.
-        Returns
-        -------
-        array or list
-            For models with a single output this returns a matrix of SHAP values
-            (# samples x # features). Each row sums to the difference between the model output
-            for that
-            sample and the expected value of the model output (which is stored in the expected_value
-            attribute of the explainer when it is constant). For models with vector outputs this
-            returns
-            a list of such matrices, one for each output.
-        """
-        assert not approximate, "approximate not supported"
-        X, y, X_missing, flat_output, tree_limit, check_additivity = \
-            self._validate_inputs(X, y,
-                                  tree_limit,
-                                  check_additivity)
-        transform = self.model.get_transform()
-        # run the core algorithm using the C extension
-        assert_import("cext_gpu")
-        phi = np.zeros((X.shape[0], X.shape[1] + 1, self.model.num_outputs))
-        _cext_gpu.dense_tree_shap(
-            self.model.children_left, self.model.children_right, self.model.children_default,
-            self.model.features, self.model.thresholds, self.model.values,
-            self.model.node_sample_weight,
-            self.model.max_depth, X, X_missing, y, self.data, self.data_missing, tree_limit,
-            self.model.base_offset, phi, feature_perturbation_codes[self.feature_perturbation],
-            output_transform_codes[transform], False
-        )
-        out = self._get_shap_output(phi, flat_output)
-        if check_additivity and self.model.model_output == "raw":
-            self.assert_additivity(out, self.model.predict(X))
-        return out
-    def shap_interaction_values(self, X, y=None, tree_limit=None):
-        """ Estimate the SHAP interaction values for a set of samples.
-        Parameters
-        ----------
-        X : numpy.array, pandas.DataFrame or catboost.Pool (for catboost)
-            A matrix of samples (# samples x # features) on which to explain the model's output.
-        y : numpy.array
-            An array of label values for each sample. Used when explaining loss functions (not
-            yet supported).
-        tree_limit : None (default) or int
-            Limit the number of trees used by the model. By default None means no use the limit
-            of the
-            original model, and -1 means no limit.
-        Returns
-        -------
-        array or list
-            For models with a single output this returns a tensor of SHAP values
-            (# samples x # features x # features). The matrix (# features x # features) for each
-            sample sums
-            to the difference between the model output for that sample and the expected value of
-            the model output
-            (which is stored in the expected_value attribute of the explainer). Each row of this
-            matrix sums to the
-            SHAP value for that feature for that sample. The diagonal entries of the matrix
-            represent the
-            "main effect" of that feature on the prediction and the symmetric off-diagonal
-            entries represent the
-            interaction effects between all pairs of features for that sample. For models with
-            vector outputs
-            this returns a list of tensors, one for each output.
-        """
-        assert self.model.model_output == "raw", "Only model_output = \"raw\" is supported for " \
-                                                 "SHAP interaction values right now!"
-        assert self.feature_perturbation != "interventional", 'feature_perturbation="interventional" is not yet supported for ' + \
-                                                              'interaction values. Use feature_perturbation="tree_path_dependent" instead.'
-        transform = "identity"
-        X, y, X_missing, flat_output, tree_limit, _ = self._validate_inputs(X, y, tree_limit,
-                                                                            False)
-        # run the core algorithm using the C extension
-        assert_import("cext_gpu")
-        phi = np.zeros((X.shape[0], X.shape[1] + 1, X.shape[1] + 1, self.model.num_outputs))
-        _cext_gpu.dense_tree_shap(
-            self.model.children_left, self.model.children_right, self.model.children_default,
-            self.model.features, self.model.thresholds, self.model.values,
-            self.model.node_sample_weight,
-            self.model.max_depth, X, X_missing, y, self.data, self.data_missing, tree_limit,
-            self.model.base_offset, phi, feature_perturbation_codes[self.feature_perturbation],
-            output_transform_codes[transform], True
-        )
-        return self._get_shap_interactions_output(phi, flat_output)

lib/shap/explainers/_gradient.py DELETED Viewed

@@ -1,592 +0,0 @@
-import warnings
-import numpy as np
-import pandas as pd
-from packaging import version
-from .._explanation import Explanation
-from ..explainers._explainer import Explainer
-from ..explainers.tf_utils import (
-    _get_graph,
-    _get_model_inputs,
-    _get_model_output,
-    _get_session,
-)
-keras = None
-tf = None
-torch = None
-class GradientExplainer(Explainer):
-    """ Explains a model using expected gradients (an extension of integrated gradients).
-    Expected gradients an extension of the integrated gradients method (Sundararajan et al. 2017), a
-    feature attribution method designed for differentiable models based on an extension of Shapley
-    values to infinite player games (Aumann-Shapley values). Integrated gradients values are a bit
-    different from SHAP values, and require a single reference value to integrate from. As an adaptation
-    to make them approximate SHAP values, expected gradients reformulates the integral as an expectation
-    and combines that expectation with sampling reference values from the background dataset. This leads
-    to a single combined expectation of gradients that converges to attributions that sum to the
-    difference between the expected model output and the current output.
-    Examples
-    --------
-    See :ref:`Gradient Explainer Examples <gradient_explainer_examples>`
-    """
-    def __init__(self, model, data, session=None, batch_size=50, local_smoothing=0):
-        """ An explainer object for a differentiable model using a given background dataset.
-        Parameters
-        ----------
-        model : tf.keras.Model, (input : [tf.Tensor], output : tf.Tensor), torch.nn.Module, or a tuple
-                (model, layer), where both are torch.nn.Module objects
-            For TensorFlow this can be a model object, or a pair of TensorFlow tensors (or a list and
-            a tensor) that specifies the input and output of the model to be explained. Note that for
-            TensowFlow 2 you must pass a tensorflow function, not a tuple of input/output tensors).
-            For PyTorch this can be a nn.Module object (model), or a tuple (model, layer), where both
-            are nn.Module objects. The model is an nn.Module object which takes as input a tensor
-            (or list of tensors) of shape data, and returns a single dimensional output. If the input
-            is a tuple, the returned shap values will be for the input of the layer argument. layer must
-            be a layer in the model, i.e. model.conv2.
-        data : [numpy.array] or [pandas.DataFrame] or [torch.tensor]
-            The background dataset to use for integrating out features. Gradient explainer integrates
-            over these samples. The data passed here must match the input tensors given in the
-            first argument. Single element lists can be passed unwrapped.
-        """
-        # first, we need to find the framework
-        if type(model) is tuple:
-            a, b = model
-            try:
-                a.named_parameters()
-                framework = 'pytorch'
-            except Exception:
-                framework = 'tensorflow'
-        else:
-            try:
-                model.named_parameters()
-                framework = 'pytorch'
-            except Exception:
-                framework = 'tensorflow'
-        if isinstance(data, pd.DataFrame):
-            self.features = data.columns.values
-        else:
-            self.features = None
-        if framework == 'tensorflow':
-            self.explainer = _TFGradient(model, data, session, batch_size, local_smoothing)
-        elif framework == 'pytorch':
-            self.explainer = _PyTorchGradient(model, data, batch_size, local_smoothing)
-    def __call__(self, X, nsamples=200):
-        """ Return an explanation object for the model applied to X.
-        Parameters
-        ----------
-        X : list,
-            if framework == 'tensorflow': numpy.array, or pandas.DataFrame
-            if framework == 'pytorch': torch.tensor
-            A tensor (or list of tensors) of samples (where X.shape[0] == # samples) on which to
-            explain the model's output.
-        nsamples : int
-            number of background samples
-        Returns
-        -------
-        shap.Explanation:
-        """
-        shap_values = self.shap_values(X, nsamples)
-        return Explanation(values=shap_values, data=X, feature_names=self.features)
-    def shap_values(self, X, nsamples=200, ranked_outputs=None, output_rank_order="max", rseed=None, return_variances=False):
-        """ Return the values for the model applied to X.
-        Parameters
-        ----------
-        X : list,
-            if framework == 'tensorflow': numpy.array, or pandas.DataFrame
-            if framework == 'pytorch': torch.tensor
-            A tensor (or list of tensors) of samples (where X.shape[0] == # samples) on which to
-            explain the model's output.
-        ranked_outputs : None or int
-            If ranked_outputs is None then we explain all the outputs in a multi-output model. If
-            ranked_outputs is a positive integer then we only explain that many of the top model
-            outputs (where "top" is determined by output_rank_order). Note that this causes a pair
-            of values to be returned (shap_values, indexes), where shap_values is a list of numpy arrays
-            for each of the output ranks, and indexes is a matrix that tells for each sample which output
-            indexes were chosen as "top".
-        output_rank_order : "max", "min", "max_abs", or "custom"
-            How to order the model outputs when using ranked_outputs, either by maximum, minimum, or
-            maximum absolute value. If "custom" Then "ranked_outputs" contains a list of output nodes.
-        rseed : None or int
-            Seeding the randomness in shap value computation  (background example choice,
-            interpolation between current and background example, smoothing).
-        Returns
-        -------
-        array or list
-            For a models with a single output this returns a tensor of SHAP values with the same shape
-            as X. For a model with multiple outputs this returns a list of SHAP value tensors, each of
-            which are the same shape as X. If ranked_outputs is None then this list of tensors matches
-            the number of model outputs. If ranked_outputs is a positive integer a pair is returned
-            (shap_values, indexes), where shap_values is a list of tensors with a length of
-            ranked_outputs, and indexes is a matrix that tells for each sample which output indexes
-            were chosen as "top".
-        """
-        return self.explainer.shap_values(X, nsamples, ranked_outputs, output_rank_order, rseed, return_variances)
-class _TFGradient(Explainer):
-    def __init__(self, model, data, session=None, batch_size=50, local_smoothing=0):
-        # try and import keras and tensorflow
-        global tf, keras
-        if tf is None:
-            import tensorflow as tf
-            if version.parse(tf.__version__) < version.parse("1.4.0"):
-                warnings.warn("Your TensorFlow version is older than 1.4.0 and not supported.")
-        if keras is None:
-            try:
-                from tensorflow import keras
-                if version.parse(keras.__version__) < version.parse("2.1.0"):
-                    warnings.warn("Your Keras version is older than 2.1.0 and not supported.")
-            except Exception:
-                pass
-        # determine the model inputs and outputs
-        self.model = model
-        self.model_inputs = _get_model_inputs(model)
-        self.model_output = _get_model_output(model)
-        assert not isinstance(self.model_output, list), "The model output to be explained must be a single tensor!"
-        assert len(self.model_output.shape) < 3, "The model output must be a vector or a single value!"
-        self.multi_output = True
-        if len(self.model_output.shape) == 1:
-            self.multi_output = False
-        # check if we have multiple inputs
-        self.multi_input = True
-        if not isinstance(self.model_inputs, list):
-            self.model_inputs = [self.model_inputs]
-        self.multi_input = len(self.model_inputs) > 1
-        if isinstance(data, pd.DataFrame):
-            data = [data.values]
-        if not isinstance(data, list):
-            data = [data]
-        self.data = data
-        self._num_vinputs = {}
-        self.batch_size = batch_size
-        self.local_smoothing = local_smoothing
-        if not tf.executing_eagerly():
-            self.session = _get_session(session)
-            self.graph = _get_graph(self)
-            # see if there is a keras operation we need to save
-            self.keras_phase_placeholder = None
-            for op in self.graph.get_operations():
-                if 'keras_learning_phase' in op.name:
-                    self.keras_phase_placeholder = op.outputs[0]
-        # save the expected output of the model (commented out because self.data could be huge for GradientExpliner)
-        #self.expected_value = self.run(self.model_output, self.model_inputs, self.data).mean(0)
-        if not self.multi_output:
-            self.gradients = [None]
-        else:
-            self.gradients = [None for i in range(self.model_output.shape[1])]
-    def gradient(self, i):
-        global tf, keras
-        if self.gradients[i] is None:
-            if not tf.executing_eagerly():
-                out = self.model_output[:,i] if self.multi_output else self.model_output
-                self.gradients[i] = tf.gradients(out, self.model_inputs)
-            else:
-                @tf.function
-                def grad_graph(x):
-                    phase = tf.keras.backend.learning_phase()
-                    tf.keras.backend.set_learning_phase(0)
-                    with tf.GradientTape(watch_accessed_variables=False) as tape:
-                        tape.watch(x)
-                        out = self.model(x)
-                        if self.multi_output:
-                            out = out[:,i]
-                    x_grad = tape.gradient(out, x)
-                    tf.keras.backend.set_learning_phase(phase)
-                    return x_grad
-                self.gradients[i] = grad_graph
-        return self.gradients[i]
-    def shap_values(self, X, nsamples=200, ranked_outputs=None, output_rank_order="max", rseed=None, return_variances=False):
-        global tf, keras
-        import tensorflow as tf
-        import tensorflow.keras as keras
-        # check if we have multiple inputs
-        if not self.multi_input:
-            assert not isinstance(X, list), "Expected a single tensor model input!"
-            X = [X]
-        else:
-            assert isinstance(X, list), "Expected a list of model inputs!"
-        assert len(self.model_inputs) == len(X), "Number of model inputs does not match the number given!"
-        # rank and determine the model outputs that we will explain
-        if not tf.executing_eagerly():
-            model_output_values = self.run(self.model_output, self.model_inputs, X)
-        else:
-            model_output_values = self.run(self.model, self.model_inputs, X)
-        if ranked_outputs is not None and self.multi_output:
-            if output_rank_order == "max":
-                model_output_ranks = np.argsort(-model_output_values)
-            elif output_rank_order == "min":
-                model_output_ranks = np.argsort(model_output_values)
-            elif output_rank_order == "max_abs":
-                model_output_ranks = np.argsort(np.abs(model_output_values))
-            elif output_rank_order == "custom":
-                model_output_ranks = ranked_outputs
-            else:
-                emsg = "output_rank_order must be max, min, max_abs or custom!"
-                raise ValueError(emsg)
-            if output_rank_order in ["max", "min", "max_abs"]:
-                model_output_ranks = model_output_ranks[:,:ranked_outputs]
-        else:
-            model_output_ranks = np.tile(np.arange(len(self.gradients)), (X[0].shape[0], 1))
-        # compute the attributions
-        output_phis = []
-        output_phi_vars = []
-        samples_input = [np.zeros((nsamples,) + X[t].shape[1:], dtype=np.float32) for t in range(len(X))]
-        samples_delta = [np.zeros((nsamples,) + X[t].shape[1:], dtype=np.float32) for t in range(len(X))]
-        # use random seed if no argument given
-        if rseed is None:
-            rseed = np.random.randint(0, 1e6)
-        for i in range(model_output_ranks.shape[1]):
-            np.random.seed(rseed) # so we get the same noise patterns for each output class
-            phis = []
-            phi_vars = []
-            for k in range(len(X)):
-                phis.append(np.zeros(X[k].shape))
-                phi_vars.append(np.zeros(X[k].shape))
-            for j in range(X[0].shape[0]):
-                # fill in the samples arrays
-                for k in range(nsamples):
-                    rind = np.random.choice(self.data[0].shape[0])
-                    t = np.random.uniform()
-                    for u in range(len(X)):
-                        if self.local_smoothing > 0:
-                            x = X[u][j] + np.random.randn(*X[u][j].shape) * self.local_smoothing
-                        else:
-                            x = X[u][j]
-                        samples_input[u][k] = t * x + (1 - t) * self.data[u][rind]
-                        samples_delta[u][k] = x - self.data[u][rind]
-                # compute the gradients at all the sample points
-                find = model_output_ranks[j,i]
-                grads = []
-                for b in range(0, nsamples, self.batch_size):
-                    batch = [samples_input[a][b:min(b+self.batch_size,nsamples)] for a in range(len(X))]
-                    grads.append(self.run(self.gradient(find), self.model_inputs, batch))
-                grad = [np.concatenate([g[a] for g in grads], 0) for a in range(len(X))]
-                # assign the attributions to the right part of the output arrays
-                for a in range(len(X)):
-                    samples = grad[a] * samples_delta[a]
-                    phis[a][j] = samples.mean(0)
-                    phi_vars[a][j] = samples.var(0) / np.sqrt(samples.shape[0]) # estimate variance of means
-                # TODO: this could be avoided by integrating between endpoints if no local smoothing is used
-                # correct the sum of the values to equal the output of the model using a linear
-                # regression model with priors of the coefficients equal to the estimated variances for each
-                # value (note that 1e-6 is designed to increase the weight of the sample and so closely
-                # match the correct sum)
-                # if False and self.local_smoothing == 0: # disabled right now to make sure it doesn't mask problems
-                #     phis_sum = np.sum([phis[l][j].sum() for l in range(len(X))])
-                #     phi_vars_s = np.stack([phi_vars[l][j] for l in range(len(X))], 0).flatten()
-                #     if self.multi_output:
-                #         sum_error = model_output_values[j,find] - phis_sum - self.expected_value[find]
-                #     else:
-                #         sum_error = model_output_values[j] - phis_sum - self.expected_value
-                #     # this is a ridge regression with one sample of all ones with sum_error as the label
-                #     # and 1/v as the ridge penalties. This simplified (and stable) form comes from the
-                #     # Sherman-Morrison formula
-                #     v = (phi_vars_s / phi_vars_s.max()) * 1e6
-                #     adj = sum_error * (v - (v * v.sum()) / (1 + v.sum()))
-                #     # add the adjustment to the output so the sum matches
-                #     offset = 0
-                #     for l in range(len(X)):
-                #         s = np.prod(phis[l][j].shape)
-                #         phis[l][j] += adj[offset:offset+s].reshape(phis[l][j].shape)
-                #         offset += s
-            output_phis.append(phis[0] if not self.multi_input else phis)
-            output_phi_vars.append(phi_vars[0] if not self.multi_input else phi_vars)
-        if not self.multi_output:
-            if return_variances:
-                return output_phis[0], output_phi_vars[0]
-            else:
-                return output_phis[0]
-        elif ranked_outputs is not None:
-            if return_variances:
-                return output_phis, output_phi_vars, model_output_ranks
-            else:
-                return output_phis, model_output_ranks
-        else:
-            if return_variances:
-                return output_phis, output_phi_vars
-            else:
-                return output_phis
-    def run(self, out, model_inputs, X):
-        global tf, keras
-        if not tf.executing_eagerly():
-            feed_dict = dict(zip(model_inputs, X))
-            if self.keras_phase_placeholder is not None:
-                feed_dict[self.keras_phase_placeholder] = 0
-            return self.session.run(out, feed_dict)
-        else:
-            # build inputs that are correctly shaped, typed, and tf-wrapped
-            inputs = []
-            for i in range(len(X)):
-                shape = list(self.model_inputs[i].shape)
-                shape[0] = -1
-                v = tf.constant(X[i].reshape(shape), dtype=self.model_inputs[i].dtype)
-                inputs.append(v)
-            return out(inputs)
-class _PyTorchGradient(Explainer):
-    def __init__(self, model, data, batch_size=50, local_smoothing=0):
-        # try and import pytorch
-        global torch
-        if torch is None:
-            import torch
-            if version.parse(torch.__version__) < version.parse("0.4"):
-                warnings.warn("Your PyTorch version is older than 0.4 and not supported.")
-        # check if we have multiple inputs
-        self.multi_input = False
-        if isinstance(data, list):
-            self.multi_input = True
-        if not isinstance(data, list):
-            data = [data]
-        # for consistency, the method signature calls for data as the model input.
-        # However, within this class, self.model_inputs is the input (i.e. the data passed by the user)
-        # and self.data is the background data for the layer we want to assign importances to. If this layer is
-        # the input, then self.data = self.model_inputs
-        self.model_inputs = data
-        self.batch_size = batch_size
-        self.local_smoothing = local_smoothing
-        self.layer = None
-        self.input_handle = None
-        self.interim = False
-        if type(model) == tuple:
-            self.interim = True
-            model, layer = model
-            model = model.eval()
-            self.add_handles(layer)
-            self.layer = layer
-            # now, if we are taking an interim layer, the 'data' is going to be the input
-            # of the interim layer; we will capture this using a forward hook
-            with torch.no_grad():
-                _ = model(*data)
-                interim_inputs = self.layer.target_input
-                if type(interim_inputs) is tuple:
-                    # this should always be true, but just to be safe
-                    self.data = [i.clone().detach() for i in interim_inputs]
-                else:
-                    self.data = [interim_inputs.clone().detach()]
-        else:
-            self.data = data
-        self.model = model.eval()
-        multi_output = False
-        outputs = self.model(*self.model_inputs)
-        if len(outputs.shape) > 1 and outputs.shape[1] > 1:
-            multi_output = True
-        self.multi_output = multi_output
-        if not self.multi_output:
-            self.gradients = [None]
-        else:
-            self.gradients = [None for i in range(outputs.shape[1])]
-    def gradient(self, idx, inputs):
-        self.model.zero_grad()
-        X = [x.requires_grad_() for x in inputs]
-        outputs = self.model(*X)
-        selected = [val for val in outputs[:, idx]]
-        if self.input_handle is not None:
-            interim_inputs = self.layer.target_input
-            grads = [torch.autograd.grad(selected, input,
-                                         retain_graph=True if idx + 1 < len(interim_inputs) else None)[0].cpu().numpy()
-                     for idx, input in enumerate(interim_inputs)]
-            del self.layer.target_input
-        else:
-            grads = [torch.autograd.grad(selected, x,
-                                         retain_graph=True if idx + 1 < len(X) else None)[0].cpu().numpy()
-                     for idx, x in enumerate(X)]
-        return grads
-    @staticmethod
-    def get_interim_input(self, input, output):
-        try:
-            del self.target_input
-        except AttributeError:
-            pass
-        setattr(self, 'target_input', input)
-    def add_handles(self, layer):
-        input_handle = layer.register_forward_hook(self.get_interim_input)
-        self.input_handle = input_handle
-    def shap_values(self, X, nsamples=200, ranked_outputs=None, output_rank_order="max", rseed=None, return_variances=False):
-        # X ~ self.model_input
-        # X_data ~ self.data
-        # check if we have multiple inputs
-        if not self.multi_input:
-            assert not isinstance(X, list), "Expected a single tensor model input!"
-            X = [X]
-        else:
-            assert isinstance(X, list), "Expected a list of model inputs!"
-        if ranked_outputs is not None and self.multi_output:
-            with torch.no_grad():
-                model_output_values = self.model(*X)
-            # rank and determine the model outputs that we will explain
-            if output_rank_order == "max":
-                _, model_output_ranks = torch.sort(model_output_values, descending=True)
-            elif output_rank_order == "min":
-                _, model_output_ranks = torch.sort(model_output_values, descending=False)
-            elif output_rank_order == "max_abs":
-                _, model_output_ranks = torch.sort(torch.abs(model_output_values), descending=True)
-            else:
-                emsg = "output_rank_order must be max, min, or max_abs!"
-                raise ValueError(emsg)
-            model_output_ranks = model_output_ranks[:, :ranked_outputs]
-        else:
-            model_output_ranks = (torch.ones((X[0].shape[0], len(self.gradients))).int() *
-                                  torch.arange(0, len(self.gradients)).int())
-        # if a cleanup happened, we need to add the handles back
-        # this allows shap_values to be called multiple times, but the model to be
-        # 'clean' at the end of each run for other uses
-        if self.input_handle is None and self.interim is True:
-            self.add_handles(self.layer)
-        # compute the attributions
-        X_batches = X[0].shape[0]
-        output_phis = []
-        output_phi_vars = []
-        # samples_input = input to the model
-        # samples_delta = (x - x') for the input being explained - may be an interim input
-        samples_input = [torch.zeros((nsamples,) + X[t].shape[1:], device=X[t].device) for t in range(len(X))]
-        samples_delta = [np.zeros((nsamples, ) + self.data[t].shape[1:]) for t in range(len(self.data))]
-        # use random seed if no argument given
-        if rseed is None:
-            rseed = np.random.randint(0, 1e6)
-        for i in range(model_output_ranks.shape[1]):
-            np.random.seed(rseed)  # so we get the same noise patterns for each output class
-            phis = []
-            phi_vars = []
-            for k in range(len(self.data)):
-                # for each of the inputs being explained - may be an interim input
-                phis.append(np.zeros((X_batches,) + self.data[k].shape[1:]))
-                phi_vars.append(np.zeros((X_batches, ) + self.data[k].shape[1:]))
-            for j in range(X[0].shape[0]):
-                # fill in the samples arrays
-                for k in range(nsamples):
-                    rind = np.random.choice(self.data[0].shape[0])
-                    t = np.random.uniform()
-                    for a in range(len(X)):
-                        if self.local_smoothing > 0:
-                            # local smoothing is added to the base input, unlike in the TF gradient explainer
-                            x = X[a][j].clone().detach() + torch.empty(X[a][j].shape, device=X[a].device).normal_() \
-                                * self.local_smoothing
-                        else:
-                            x = X[a][j].clone().detach()
-                        samples_input[a][k] = (t * x + (1 - t) * (self.model_inputs[a][rind]).clone().detach()).\
-                            clone().detach()
-                        if self.input_handle is None:
-                            samples_delta[a][k] = (x - (self.data[a][rind]).clone().detach()).cpu().numpy()
-                    if self.interim is True:
-                        with torch.no_grad():
-                            _ = self.model(*[samples_input[a][k].unsqueeze(0) for a in range(len(X))])
-                            interim_inputs = self.layer.target_input
-                            del self.layer.target_input
-                            if type(interim_inputs) is tuple:
-                                if type(interim_inputs) is tuple:
-                                    # this should always be true, but just to be safe
-                                    for a in range(len(interim_inputs)):
-                                        samples_delta[a][k] = interim_inputs[a].cpu().numpy()
-                                else:
-                                    samples_delta[0][k] = interim_inputs.cpu().numpy()
-                # compute the gradients at all the sample points
-                find = model_output_ranks[j, i]
-                grads = []
-                for b in range(0, nsamples, self.batch_size):
-                    batch = [samples_input[c][b:min(b+self.batch_size,nsamples)].clone().detach() for c in range(len(X))]
-                    grads.append(self.gradient(find, batch))
-                grad = [np.concatenate([g[z] for g in grads], 0) for z in range(len(self.data))]
-                # assign the attributions to the right part of the output arrays
-                for t in range(len(self.data)):
-                    samples = grad[t] * samples_delta[t]
-                    phis[t][j] = samples.mean(0)
-                    phi_vars[t][j] = samples.var(0) / np.sqrt(samples.shape[0]) # estimate variance of means
-            output_phis.append(phis[0] if len(self.data) == 1 else phis)
-            output_phi_vars.append(phi_vars[0] if not self.multi_input else phi_vars)
-        # cleanup: remove the handles, if they were added
-        if self.input_handle is not None:
-            self.input_handle.remove()
-            self.input_handle = None
-            # note: the target input attribute is deleted in the loop
-        if not self.multi_output:
-            if return_variances:
-                return output_phis[0], output_phi_vars[0]
-            else:
-                return output_phis[0]
-        elif ranked_outputs is not None:
-            if return_variances:
-                return output_phis, output_phi_vars, model_output_ranks
-            else:
-                return output_phis, model_output_ranks
-        else:
-            if return_variances:
-                return output_phis, output_phi_vars
-            else:
-                return output_phis

lib/shap/explainers/_kernel.py DELETED Viewed

@@ -1,696 +0,0 @@
-import copy
-import gc
-import itertools
-import logging
-import time
-import warnings
-import numpy as np
-import pandas as pd
-import scipy.sparse
-import sklearn
-from packaging import version
-from scipy.special import binom
-from sklearn.linear_model import Lasso, LassoLarsIC, lars_path
-from sklearn.pipeline import make_pipeline
-from sklearn.preprocessing import StandardScaler
-from tqdm.auto import tqdm
-from .._explanation import Explanation
-from ..utils import safe_isinstance
-from ..utils._exceptions import DimensionError
-from ..utils._legacy import (
-    DenseData,
-    SparseData,
-    convert_to_data,
-    convert_to_instance,
-    convert_to_instance_with_index,
-    convert_to_link,
-    convert_to_model,
-    match_instance_to_data,
-    match_model_to_data,
-)
-from ._explainer import Explainer
-log = logging.getLogger('shap')
-class KernelExplainer(Explainer):
-    """Uses the Kernel SHAP method to explain the output of any function.
-    Kernel SHAP is a method that uses a special weighted linear regression
-    to compute the importance of each feature. The computed importance values
-    are Shapley values from game theory and also coefficients from a local linear
-    regression.
-    Parameters
-    ----------
-    model : function or iml.Model
-        User supplied function that takes a matrix of samples (# samples x # features) and
-        computes the output of the model for those samples. The output can be a vector
-        (# samples) or a matrix (# samples x # model outputs).
-    data : numpy.array or pandas.DataFrame or shap.common.DenseData or any scipy.sparse matrix
-        The background dataset to use for integrating out features. To determine the impact
-        of a feature, that feature is set to "missing" and the change in the model output
-        is observed. Since most models aren't designed to handle arbitrary missing data at test
-        time, we simulate "missing" by replacing the feature with the values it takes in the
-        background dataset. So if the background dataset is a simple sample of all zeros, then
-        we would approximate a feature being missing by setting it to zero. For small problems,
-        this background dataset can be the whole training set, but for larger problems consider
-        using a single reference value or using the ``kmeans`` function to summarize the dataset.
-        Note: for the sparse case, we accept any sparse matrix but convert to lil format for
-        performance.
-    feature_names : list
-        The names of the features in the background dataset. If the background dataset is
-        supplied as a pandas.DataFrame, then ``feature_names`` can be set to ``None`` (default),
-        and the feature names will be taken as the column names of the dataframe.
-    link : "identity" or "logit"
-        A generalized linear model link to connect the feature importance values to the model
-        output. Since the feature importance values, phi, sum up to the model output, it often makes
-        sense to connect them to the output with a link function where link(output) = sum(phi).
-        Default is "identity" (a no-op).
-        If the model output is a probability, then "logit" can be used to transform the SHAP values
-        into log-odds units.
-    Examples
-    --------
-    See :ref:`Kernel Explainer Examples <kernel_explainer_examples>`.
-    """
-    def __init__(self, model, data, feature_names=None, link="identity", **kwargs):
-        if feature_names is not None:
-            self.data_feature_names=feature_names
-        elif isinstance(data, pd.DataFrame):
-            self.data_feature_names = list(data.columns)
-        # convert incoming inputs to standardized iml objects
-        self.link = convert_to_link(link)
-        self.keep_index = kwargs.get("keep_index", False)
-        self.keep_index_ordered = kwargs.get("keep_index_ordered", False)
-        self.model = convert_to_model(model, keep_index=self.keep_index)
-        self.data = convert_to_data(data, keep_index=self.keep_index)
-        model_null = match_model_to_data(self.model, self.data)
-        # enforce our current input type limitations
-        if not isinstance(self.data, (DenseData, SparseData)):
-            emsg = "Shap explainer only supports the DenseData and SparseData input currently."
-            raise TypeError(emsg)
-        if self.data.transposed:
-            emsg = "Shap explainer does not support transposed DenseData or SparseData currently."
-            raise DimensionError(emsg)
-        # warn users about large background data sets
-        if len(self.data.weights) > 100:
-            log.warning("Using " + str(len(self.data.weights)) + " background data samples could cause " +
-                        "slower run times. Consider using shap.sample(data, K) or shap.kmeans(data, K) to " +
-                        "summarize the background as K samples.")
-        # init our parameters
-        self.N = self.data.data.shape[0]
-        self.P = self.data.data.shape[1]
-        self.linkfv = np.vectorize(self.link.f)
-        self.nsamplesAdded = 0
-        self.nsamplesRun = 0
-        # find E_x[f(x)]
-        if isinstance(model_null, (pd.DataFrame, pd.Series)):
-            model_null = np.squeeze(model_null.values)
-        if safe_isinstance(model_null, "tensorflow.python.framework.ops.EagerTensor"):
-            model_null = model_null.numpy()
-        self.fnull = np.sum((model_null.T * self.data.weights).T, 0)
-        self.expected_value = self.linkfv(self.fnull)
-        # see if we have a vector output
-        self.vector_out = True
-        if len(self.fnull.shape) == 0:
-            self.vector_out = False
-            self.fnull = np.array([self.fnull])
-            self.D = 1
-            self.expected_value = float(self.expected_value)
-        else:
-            self.D = self.fnull.shape[0]
-    def __call__(self, X):
-        start_time = time.time()
-        if isinstance(X, pd.DataFrame):
-            feature_names = list(X.columns)
-        else:
-            feature_names = getattr(self, "data_feature_names", None)
-        v = self.shap_values(X)
-        if isinstance(v, list):
-            v = np.stack(v, axis=-1) # put outputs at the end
-        # the explanation object expects an expected value for each row
-        if hasattr(self.expected_value, "__len__"):
-            ev_tiled = np.tile(self.expected_value, (v.shape[0],1))
-        else:
-            ev_tiled = np.tile(self.expected_value, v.shape[0])
-        return Explanation(
-            v,
-            base_values=ev_tiled,
-            data=X.to_numpy() if isinstance(X, pd.DataFrame) else X,
-            feature_names=feature_names,
-            compute_time=time.time() - start_time,
-        )
-    def shap_values(self, X, **kwargs):
-        """ Estimate the SHAP values for a set of samples.
-        Parameters
-        ----------
-        X : numpy.array or pandas.DataFrame or any scipy.sparse matrix
-            A matrix of samples (# samples x # features) on which to explain the model's output.
-        nsamples : "auto" or int
-            Number of times to re-evaluate the model when explaining each prediction. More samples
-            lead to lower variance estimates of the SHAP values. The "auto" setting uses
-            `nsamples = 2 * X.shape[1] + 2048`.
-        l1_reg : "num_features(int)", "auto" (default for now, but deprecated), "aic", "bic", or float
-            The l1 regularization to use for feature selection (the estimation procedure is based on
-            a debiased lasso). The auto option currently uses "aic" when less that 20% of the possible sample
-            space is enumerated, otherwise it uses no regularization. THE BEHAVIOR OF "auto" WILL CHANGE
-            in a future version to be based on num_features instead of AIC.
-            The "aic" and "bic" options use the AIC and BIC rules for regularization.
-            Using "num_features(int)" selects a fix number of top features. Passing a float directly sets the
-            "alpha" parameter of the sklearn.linear_model.Lasso model used for feature selection.
-        gc_collect : bool
-           Run garbage collection after each explanation round. Sometime needed for memory intensive explanations (default False).
-        Returns
-        -------
-        array or list
-            For models with a single output this returns a matrix of SHAP values
-            (# samples x # features). Each row sums to the difference between the model output for that
-            sample and the expected value of the model output (which is stored as expected_value
-            attribute of the explainer). For models with vector outputs this returns a list
-            of such matrices, one for each output.
-        """
-        # convert dataframes
-        if isinstance(X, pd.Series):
-            X = X.values
-        elif isinstance(X, pd.DataFrame):
-            if self.keep_index:
-                index_value = X.index.values
-                index_name = X.index.name
-                column_name = list(X.columns)
-            X = X.values
-        x_type = str(type(X))
-        arr_type = "'numpy.ndarray'>"
-        # if sparse, convert to lil for performance
-        if scipy.sparse.issparse(X) and not scipy.sparse.isspmatrix_lil(X):
-            X = X.tolil()
-        assert x_type.endswith(arr_type) or scipy.sparse.isspmatrix_lil(X), "Unknown instance type: " + x_type
-        # single instance
-        if len(X.shape) == 1:
-            data = X.reshape((1, X.shape[0]))
-            if self.keep_index:
-                data = convert_to_instance_with_index(data, column_name, index_name, index_value)
-            explanation = self.explain(data, **kwargs)
-            # vector-output
-            s = explanation.shape
-            if len(s) == 2:
-                outs = [np.zeros(s[0]) for j in range(s[1])]
-                for j in range(s[1]):
-                    outs[j] = explanation[:, j]
-                return outs
-            # single-output
-            else:
-                out = np.zeros(s[0])
-                out[:] = explanation
-                return out
-        # explain the whole dataset
-        elif len(X.shape) == 2:
-            explanations = []
-            for i in tqdm(range(X.shape[0]), disable=kwargs.get("silent", False)):
-                data = X[i:i + 1, :]
-                if self.keep_index:
-                    data = convert_to_instance_with_index(data, column_name, index_value[i:i + 1], index_name)
-                explanations.append(self.explain(data, **kwargs))
-                if kwargs.get("gc_collect", False):
-                    gc.collect()
-            # vector-output
-            s = explanations[0].shape
-            if len(s) == 2:
-                outs = [np.zeros((X.shape[0], s[0])) for j in range(s[1])]
-                for i in range(X.shape[0]):
-                    for j in range(s[1]):
-                        outs[j][i] = explanations[i][:, j]
-                return outs
-            # single-output
-            else:
-                out = np.zeros((X.shape[0], s[0]))
-                for i in range(X.shape[0]):
-                    out[i] = explanations[i]
-                return out
-        else:
-            emsg = "Instance must have 1 or 2 dimensions!"
-            raise DimensionError(emsg)
-    def explain(self, incoming_instance, **kwargs):
-        # convert incoming input to a standardized iml object
-        instance = convert_to_instance(incoming_instance)
-        match_instance_to_data(instance, self.data)
-        # find the feature groups we will test. If a feature does not change from its
-        # current value then we know it doesn't impact the model
-        self.varyingInds = self.varying_groups(instance.x)
-        if self.data.groups is None:
-            self.varyingFeatureGroups = np.array([i for i in self.varyingInds])
-            self.M = self.varyingFeatureGroups.shape[0]
-        else:
-            self.varyingFeatureGroups = [self.data.groups[i] for i in self.varyingInds]
-            self.M = len(self.varyingFeatureGroups)
-            groups = self.data.groups
-            # convert to numpy array as it is much faster if not jagged array (all groups of same length)
-            if self.varyingFeatureGroups and all(len(groups[i]) == len(groups[0]) for i in self.varyingInds):
-                self.varyingFeatureGroups = np.array(self.varyingFeatureGroups)
-                # further performance optimization in case each group has a single value
-                if self.varyingFeatureGroups.shape[1] == 1:
-                    self.varyingFeatureGroups = self.varyingFeatureGroups.flatten()
-        # find f(x)
-        if self.keep_index:
-            model_out = self.model.f(instance.convert_to_df())
-        else:
-            model_out = self.model.f(instance.x)
-        if isinstance(model_out, (pd.DataFrame, pd.Series)):
-            model_out = model_out.values
-        self.fx = model_out[0]
-        if not self.vector_out:
-            self.fx = np.array([self.fx])
-        # if no features vary then no feature has an effect
-        if self.M == 0:
-            phi = np.zeros((self.data.groups_size, self.D))
-            phi_var = np.zeros((self.data.groups_size, self.D))
-        # if only one feature varies then it has all the effect
-        elif self.M == 1:
-            phi = np.zeros((self.data.groups_size, self.D))
-            phi_var = np.zeros((self.data.groups_size, self.D))
-            diff = self.link.f(self.fx) - self.link.f(self.fnull)
-            for d in range(self.D):
-                phi[self.varyingInds[0],d] = diff[d]
-        # if more than one feature varies then we have to do real work
-        else:
-            self.l1_reg = kwargs.get("l1_reg", "auto")
-            # pick a reasonable number of samples if the user didn't specify how many they wanted
-            self.nsamples = kwargs.get("nsamples", "auto")
-            if self.nsamples == "auto":
-                self.nsamples = 2 * self.M + 2**11
-            # if we have enough samples to enumerate all subsets then ignore the unneeded samples
-            self.max_samples = 2 ** 30
-            if self.M <= 30:
-                self.max_samples = 2 ** self.M - 2
-                if self.nsamples > self.max_samples:
-                    self.nsamples = self.max_samples
-            # reserve space for some of our computations
-            self.allocate()
-            # weight the different subset sizes
-            num_subset_sizes = int(np.ceil((self.M - 1) / 2.0))
-            num_paired_subset_sizes = int(np.floor((self.M - 1) / 2.0))
-            weight_vector = np.array([(self.M - 1.0) / (i * (self.M - i)) for i in range(1, num_subset_sizes + 1)])
-            weight_vector[:num_paired_subset_sizes] *= 2
-            weight_vector /= np.sum(weight_vector)
-            log.debug(f"{weight_vector = }")
-            log.debug(f"{num_subset_sizes = }")
-            log.debug(f"{num_paired_subset_sizes = }")
-            log.debug(f"{self.M = }")
-            # fill out all the subset sizes we can completely enumerate
-            # given nsamples*remaining_weight_vector[subset_size]
-            num_full_subsets = 0
-            num_samples_left = self.nsamples
-            group_inds = np.arange(self.M, dtype='int64')
-            mask = np.zeros(self.M)
-            remaining_weight_vector = copy.copy(weight_vector)
-            for subset_size in range(1, num_subset_sizes + 1):
-                # determine how many subsets (and their complements) are of the current size
-                nsubsets = binom(self.M, subset_size)
-                if subset_size <= num_paired_subset_sizes:
-                    nsubsets *= 2
-                log.debug(f"{subset_size = }")
-                log.debug(f"{nsubsets = }")
-                log.debug("self.nsamples*weight_vector[subset_size-1] = {}".format(
-                    num_samples_left * remaining_weight_vector[subset_size - 1]))
-                log.debug("self.nsamples*weight_vector[subset_size-1]/nsubsets = {}".format(
-                    num_samples_left * remaining_weight_vector[subset_size - 1] / nsubsets))
-                # see if we have enough samples to enumerate all subsets of this size
-                if num_samples_left * remaining_weight_vector[subset_size - 1] / nsubsets >= 1.0 - 1e-8:
-                    num_full_subsets += 1
-                    num_samples_left -= nsubsets
-                    # rescale what's left of the remaining weight vector to sum to 1
-                    if remaining_weight_vector[subset_size - 1] < 1.0:
-                        remaining_weight_vector /= (1 - remaining_weight_vector[subset_size - 1])
-                    # add all the samples of the current subset size
-                    w = weight_vector[subset_size - 1] / binom(self.M, subset_size)
-                    if subset_size <= num_paired_subset_sizes:
-                        w /= 2.0
-                    for inds in itertools.combinations(group_inds, subset_size):
-                        mask[:] = 0.0
-                        mask[np.array(inds, dtype='int64')] = 1.0
-                        self.addsample(instance.x, mask, w)
-                        if subset_size <= num_paired_subset_sizes:
-                            mask[:] = np.abs(mask - 1)
-                            self.addsample(instance.x, mask, w)
-                else:
-                    break
-            log.info(f"{num_full_subsets = }")
-            # add random samples from what is left of the subset space
-            nfixed_samples = self.nsamplesAdded
-            samples_left = self.nsamples - self.nsamplesAdded
-            log.debug(f"{samples_left = }")
-            if num_full_subsets != num_subset_sizes:
-                remaining_weight_vector = copy.copy(weight_vector)
-                remaining_weight_vector[:num_paired_subset_sizes] /= 2 # because we draw two samples each below
-                remaining_weight_vector = remaining_weight_vector[num_full_subsets:]
-                remaining_weight_vector /= np.sum(remaining_weight_vector)
-                log.info(f"{remaining_weight_vector = }")
-                log.info(f"{num_paired_subset_sizes = }")
-                ind_set = np.random.choice(len(remaining_weight_vector), 4 * samples_left, p=remaining_weight_vector)
-                ind_set_pos = 0
-                used_masks = {}
-                while samples_left > 0 and ind_set_pos < len(ind_set):
-                    mask.fill(0.0)
-                    ind = ind_set[ind_set_pos] # we call np.random.choice once to save time and then just read it here
-                    ind_set_pos += 1
-                    subset_size = ind + num_full_subsets + 1
-                    mask[np.random.permutation(self.M)[:subset_size]] = 1.0
-                    # only add the sample if we have not seen it before, otherwise just
-                    # increment a previous sample's weight
-                    mask_tuple = tuple(mask)
-                    new_sample = False
-                    if mask_tuple not in used_masks:
-                        new_sample = True
-                        used_masks[mask_tuple] = self.nsamplesAdded
-                        samples_left -= 1
-                        self.addsample(instance.x, mask, 1.0)
-                    else:
-                        self.kernelWeights[used_masks[mask_tuple]] += 1.0
-                    # add the compliment sample
-                    if samples_left > 0 and subset_size <= num_paired_subset_sizes:
-                        mask[:] = np.abs(mask - 1)
-                        # only add the sample if we have not seen it before, otherwise just
-                        # increment a previous sample's weight
-                        if new_sample:
-                            samples_left -= 1
-                            self.addsample(instance.x, mask, 1.0)
-                        else:
-                            # we know the compliment sample is the next one after the original sample, so + 1
-                            self.kernelWeights[used_masks[mask_tuple] + 1] += 1.0
-                # normalize the kernel weights for the random samples to equal the weight left after
-                # the fixed enumerated samples have been already counted
-                weight_left = np.sum(weight_vector[num_full_subsets:])
-                log.info(f"{weight_left = }")
-                self.kernelWeights[nfixed_samples:] *= weight_left / self.kernelWeights[nfixed_samples:].sum()
-            # execute the model on the synthetic samples we have created
-            self.run()
-            # solve then expand the feature importance (Shapley value) vector to contain the non-varying features
-            phi = np.zeros((self.data.groups_size, self.D))
-            phi_var = np.zeros((self.data.groups_size, self.D))
-            for d in range(self.D):
-                vphi, vphi_var = self.solve(self.nsamples / self.max_samples, d)
-                phi[self.varyingInds, d] = vphi
-                phi_var[self.varyingInds, d] = vphi_var
-        if not self.vector_out:
-            phi = np.squeeze(phi, axis=1)
-            phi_var = np.squeeze(phi_var, axis=1)
-        return phi
-    @staticmethod
-    def not_equal(i, j):
-        number_types = (int, float, np.number)
-        if isinstance(i, number_types) and isinstance(j, number_types):
-            return 0 if np.isclose(i, j, equal_nan=True) else 1
-        else:
-            return 0 if i == j else 1
-    def varying_groups(self, x):
-        if not scipy.sparse.issparse(x):
-            varying = np.zeros(self.data.groups_size)
-            for i in range(0, self.data.groups_size):
-                inds = self.data.groups[i]
-                x_group = x[0, inds]
-                if scipy.sparse.issparse(x_group):
-                    if all(j not in x.nonzero()[1] for j in inds):
-                        varying[i] = False
-                        continue
-                    x_group = x_group.todense()
-                num_mismatches = np.sum(np.frompyfunc(self.not_equal, 2, 1)(x_group, self.data.data[:, inds]))
-                varying[i] = num_mismatches > 0
-            varying_indices = np.nonzero(varying)[0]
-            return varying_indices
-        else:
-            varying_indices = []
-            # go over all nonzero columns in background and evaluation data
-            # if both background and evaluation are zero, the column does not vary
-            varying_indices = np.unique(np.union1d(self.data.data.nonzero()[1], x.nonzero()[1]))
-            remove_unvarying_indices = []
-            for i in range(0, len(varying_indices)):
-                varying_index = varying_indices[i]
-                # now verify the nonzero values do vary
-                data_rows = self.data.data[:, [varying_index]]
-                nonzero_rows = data_rows.nonzero()[0]
-                if nonzero_rows.size > 0:
-                    background_data_rows = data_rows[nonzero_rows]
-                    if scipy.sparse.issparse(background_data_rows):
-                        background_data_rows = background_data_rows.toarray()
-                    num_mismatches = np.sum(np.abs(background_data_rows - x[0, varying_index]) > 1e-7)
-                    # Note: If feature column non-zero but some background zero, can't remove index
-                    if num_mismatches == 0 and not \
-                        (np.abs(x[0, [varying_index]][0, 0]) > 1e-7 and len(nonzero_rows) < data_rows.shape[0]):
-                        remove_unvarying_indices.append(i)
-            mask = np.ones(len(varying_indices), dtype=bool)
-            mask[remove_unvarying_indices] = False
-            varying_indices = varying_indices[mask]
-            return varying_indices
-    def allocate(self):
-        if scipy.sparse.issparse(self.data.data):
-            # We tile the sparse matrix in csr format but convert it to lil
-            # for performance when adding samples
-            shape = self.data.data.shape
-            nnz = self.data.data.nnz
-            data_rows, data_cols = shape
-            rows = data_rows * self.nsamples
-            shape = rows, data_cols
-            if nnz == 0:
-                self.synth_data = scipy.sparse.csr_matrix(shape, dtype=self.data.data.dtype).tolil()
-            else:
-                data = self.data.data.data
-                indices = self.data.data.indices
-                indptr = self.data.data.indptr
-                last_indptr_idx = indptr[len(indptr) - 1]
-                indptr_wo_last = indptr[:-1]
-                new_indptrs = []
-                for i in range(0, self.nsamples - 1):
-                    new_indptrs.append(indptr_wo_last + (i * last_indptr_idx))
-                new_indptrs.append(indptr + ((self.nsamples - 1) * last_indptr_idx))
-                new_indptr = np.concatenate(new_indptrs)
-                new_data = np.tile(data, self.nsamples)
-                new_indices = np.tile(indices, self.nsamples)
-                self.synth_data = scipy.sparse.csr_matrix((new_data, new_indices, new_indptr), shape=shape).tolil()
-        else:
-            self.synth_data = np.tile(self.data.data, (self.nsamples, 1))
-        self.maskMatrix = np.zeros((self.nsamples, self.M))
-        self.kernelWeights = np.zeros(self.nsamples)
-        self.y = np.zeros((self.nsamples * self.N, self.D))
-        self.ey = np.zeros((self.nsamples, self.D))
-        self.lastMask = np.zeros(self.nsamples)
-        self.nsamplesAdded = 0
-        self.nsamplesRun = 0
-        if self.keep_index:
-            self.synth_data_index = np.tile(self.data.index_value, self.nsamples)
-    def addsample(self, x, m, w):
-        offset = self.nsamplesAdded * self.N
-        if isinstance(self.varyingFeatureGroups, (list,)):
-            for j in range(self.M):
-                for k in self.varyingFeatureGroups[j]:
-                    if m[j] == 1.0:
-                        self.synth_data[offset:offset+self.N, k] = x[0, k]
-        else:
-            # for non-jagged numpy array we can significantly boost performance
-            mask = m == 1.0
-            groups = self.varyingFeatureGroups[mask]
-            if len(groups.shape) == 2:
-                for group in groups:
-                    self.synth_data[offset:offset+self.N, group] = x[0, group]
-            else:
-                # further performance optimization in case each group has a single feature
-                evaluation_data = x[0, groups]
-                # In edge case where background is all dense but evaluation data
-                # is all sparse, make evaluation data dense
-                if scipy.sparse.issparse(x) and not scipy.sparse.issparse(self.synth_data):
-                    evaluation_data = evaluation_data.toarray()
-                self.synth_data[offset:offset+self.N, groups] = evaluation_data
-        self.maskMatrix[self.nsamplesAdded, :] = m
-        self.kernelWeights[self.nsamplesAdded] = w
-        self.nsamplesAdded += 1
-    def run(self):
-        num_to_run = self.nsamplesAdded * self.N - self.nsamplesRun * self.N
-        data = self.synth_data[self.nsamplesRun*self.N:self.nsamplesAdded*self.N,:]
-        if self.keep_index:
-            index = self.synth_data_index[self.nsamplesRun*self.N:self.nsamplesAdded*self.N]
-            index = pd.DataFrame(index, columns=[self.data.index_name])
-            data = pd.DataFrame(data, columns=self.data.group_names)
-            data = pd.concat([index, data], axis=1).set_index(self.data.index_name)
-            if self.keep_index_ordered:
-                data = data.sort_index()
-        modelOut = self.model.f(data)
-        if isinstance(modelOut, (pd.DataFrame, pd.Series)):
-            modelOut = modelOut.values
-        self.y[self.nsamplesRun * self.N:self.nsamplesAdded * self.N, :] = np.reshape(modelOut, (num_to_run, self.D))
-        # find the expected value of each output
-        for i in range(self.nsamplesRun, self.nsamplesAdded):
-            eyVal = np.zeros(self.D)
-            for j in range(0, self.N):
-                eyVal += self.y[i * self.N + j, :] * self.data.weights[j]
-            self.ey[i, :] = eyVal
-            self.nsamplesRun += 1
-    def solve(self, fraction_evaluated, dim):
-        eyAdj = self.linkfv(self.ey[:, dim]) - self.link.f(self.fnull[dim])
-        s = np.sum(self.maskMatrix, 1)
-        # do feature selection if we have not well enumerated the space
-        nonzero_inds = np.arange(self.M)
-        log.debug(f"{fraction_evaluated = }")
-        # if self.l1_reg == "auto":
-        #     warnings.warn(
-        #         "l1_reg=\"auto\" is deprecated and in the next version (v0.29) the behavior will change from a " \
-        #         "conditional use of AIC to simply \"num_features(10)\"!"
-        #     )
-        if (self.l1_reg not in ["auto", False, 0]) or (fraction_evaluated < 0.2 and self.l1_reg == "auto"):
-            w_aug = np.hstack((self.kernelWeights * (self.M - s), self.kernelWeights * s))
-            log.info(f"{np.sum(w_aug) = }")
-            log.info(f"{np.sum(self.kernelWeights) = }")
-            w_sqrt_aug = np.sqrt(w_aug)
-            eyAdj_aug = np.hstack((eyAdj, eyAdj - (self.link.f(self.fx[dim]) - self.link.f(self.fnull[dim]))))
-            eyAdj_aug *= w_sqrt_aug
-            mask_aug = np.transpose(w_sqrt_aug * np.transpose(np.vstack((self.maskMatrix, self.maskMatrix - 1))))
-            #var_norms = np.array([np.linalg.norm(mask_aug[:, i]) for i in range(mask_aug.shape[1])])
-            # select a fixed number of top features
-            if isinstance(self.l1_reg, str) and self.l1_reg.startswith("num_features("):
-                r = int(self.l1_reg[len("num_features("):-1])
-                nonzero_inds = lars_path(mask_aug, eyAdj_aug, max_iter=r)[1]
-            # use an adaptive regularization method
-            elif self.l1_reg == "auto" or self.l1_reg == "bic" or self.l1_reg == "aic":
-                c = "aic" if self.l1_reg == "auto" else self.l1_reg
-                # "Normalize" parameter of LassoLarsIC was deprecated in sklearn version 1.2
-                if version.parse(sklearn.__version__) < version.parse("1.2.0"):
-                    kwg = dict(normalize=False)
-                else:
-                    kwg = {}
-                model = make_pipeline(StandardScaler(with_mean=False), LassoLarsIC(criterion=c, **kwg))
-                nonzero_inds = np.nonzero(model.fit(mask_aug, eyAdj_aug)[1].coef_)[0]
-            # use a fixed regularization coefficient
-            else:
-                nonzero_inds = np.nonzero(Lasso(alpha=self.l1_reg).fit(mask_aug, eyAdj_aug).coef_)[0]
-        if len(nonzero_inds) == 0:
-            return np.zeros(self.M), np.ones(self.M)
-        # eliminate one variable with the constraint that all features sum to the output
-        eyAdj2 = eyAdj - self.maskMatrix[:, nonzero_inds[-1]] * (
-                    self.link.f(self.fx[dim]) - self.link.f(self.fnull[dim]))
-        etmp = np.transpose(np.transpose(self.maskMatrix[:, nonzero_inds[:-1]]) - self.maskMatrix[:, nonzero_inds[-1]])
-        log.debug(f"{etmp[:4, :] = }")
-        # solve a weighted least squares equation to estimate phi
-        # least squares:
-        #     phi = min_w ||W^(1/2) (y - X w)||^2
-        # the corresponding normal equation:
-        #     (X' W X) phi = X' W y
-        # with
-        #     X = etmp
-        #     W = np.diag(self.kernelWeights)
-        #     y = eyAdj2
-        #
-        # We could just rely on sciki-learn
-        #     from sklearn.linear_model import LinearRegression
-        #     lm = LinearRegression(fit_intercept=False).fit(etmp, eyAdj2, sample_weight=self.kernelWeights)
-        # Under the hood, as of scikit-learn version 1.3, LinearRegression still uses np.linalg.lstsq and
-        # there are more performant options. See https://github.com/scikit-learn/scikit-learn/issues/22855.
-        y = eyAdj2
-        X = etmp
-        WX = self.kernelWeights[:, None] * X
-        try:
-            w = np.linalg.solve(X.T @ WX, WX.T @ y)
-        except np.linalg.LinAlgError:
-            warnings.warn(
-                "Linear regression equation is singular, a least squares solutions is used instead.\n"
-                "To avoid this situation and get a regular matrix do one of the following:\n"
-                "1) turn up the number of samples,\n"
-                "2) turn up the L1 regularization with num_features(N) where N is less than the number of samples,\n"
-                "3) group features together to reduce the number of inputs that need to be explained."
-            )
-            # XWX = np.linalg.pinv(X.T @ WX)
-            # w = np.dot(XWX, np.dot(np.transpose(WX), y))
-            sqrt_W = np.sqrt(self.kernelWeights)
-            w = np.linalg.lstsq(sqrt_W[:, None] * X, sqrt_W * y, rcond=None)[0]
-        log.debug(f"{np.sum(w) = }")
-        log.debug("self.link(self.fx) - self.link(self.fnull) = {}".format(
-            self.link.f(self.fx[dim]) - self.link.f(self.fnull[dim])))
-        log.debug(f"self.fx = {self.fx[dim]}")
-        log.debug(f"self.link(self.fx) = {self.link.f(self.fx[dim])}")
-        log.debug(f"self.fnull = {self.fnull[dim]}")
-        log.debug(f"self.link(self.fnull) = {self.link.f(self.fnull[dim])}")
-        phi = np.zeros(self.M)
-        phi[nonzero_inds[:-1]] = w
-        phi[nonzero_inds[-1]] = (self.link.f(self.fx[dim]) - self.link.f(self.fnull[dim])) - sum(w)
-        log.info(f"{phi = }")
-        # clean up any rounding errors
-        for i in range(self.M):
-            if np.abs(phi[i]) < 1e-10:
-                phi[i] = 0
-        return phi, np.ones(len(phi))

lib/shap/explainers/_linear.py DELETED Viewed

@@ -1,406 +0,0 @@
-import warnings
-import numpy as np
-import pandas as pd
-from scipy.sparse import issparse
-from tqdm.auto import tqdm
-from .. import links, maskers
-from ..utils._exceptions import (
-    DimensionError,
-    InvalidFeaturePerturbationError,
-    InvalidModelError,
-)
-from ._explainer import Explainer
-class LinearExplainer(Explainer):
-    """ Computes SHAP values for a linear model, optionally accounting for inter-feature correlations.
-    This computes the SHAP values for a linear model and can account for the correlations among
-    the input features. Assuming features are independent leads to interventional SHAP values which
-    for a linear model are coef[i] * (x[i] - X.mean(0)[i]) for the ith feature. If instead we account
-    for correlations then we prevent any problems arising from collinearity and share credit among
-    correlated features. Accounting for correlations can be computationally challenging, but
-    LinearExplainer uses sampling to estimate a transform that can then be applied to explain
-    any prediction of the model.
-    Parameters
-    ----------
-    model : (coef, intercept) or sklearn.linear_model.*
-        User supplied linear model either as either a parameter pair or sklearn object.
-    data : (mean, cov), numpy.array, pandas.DataFrame, iml.DenseData or scipy.csr_matrix
-        The background dataset to use for computing conditional expectations. Note that only the
-        mean and covariance of the dataset are used. This means passing a raw data matrix is just
-        a convenient alternative to passing the mean and covariance directly.
-    nsamples : int
-        Number of samples to use when estimating the transformation matrix used to account for
-        feature correlations.
-    feature_perturbation : "interventional" (default) or "correlation_dependent"
-        There are two ways we might want to compute SHAP values, either the full conditional SHAP
-        values or the interventional SHAP values. For interventional SHAP values we break any
-        dependence structure between features in the model and so uncover how the model would behave if we
-        intervened and changed some of the inputs. For the full conditional SHAP values we respect
-        the correlations among the input features, so if the model depends on one input but that
-        input is correlated with another input, then both get some credit for the model's behavior. The
-        interventional option stays "true to the model" meaning it will only give credit to features that are
-        actually used by the model, while the correlation option stays "true to the data" in the sense that
-        it only considers how the model would behave when respecting the correlations in the input data.
-        For sparse case only interventional option is supported.
-    Examples
-    --------
-    See `Linear explainer examples <https://shap.readthedocs.io/en/latest/api_examples/explainers/LinearExplainer.html>`_
-    """
-    def __init__(self, model, masker, link=links.identity, nsamples=1000, feature_perturbation=None, **kwargs):
-        if 'feature_dependence' in kwargs:
-            warnings.warn('The option feature_dependence has been renamed to feature_perturbation!')
-            feature_perturbation = kwargs["feature_dependence"]
-        if feature_perturbation == "independent":
-            warnings.warn('The option feature_perturbation="independent" is has been renamed to feature_perturbation="interventional"!')
-            feature_perturbation = "interventional"
-        elif feature_perturbation == "correlation":
-            warnings.warn('The option feature_perturbation="correlation" is has been renamed to feature_perturbation="correlation_dependent"!')
-            feature_perturbation = "correlation_dependent"
-        if feature_perturbation is not None:
-            warnings.warn("The feature_perturbation option is now deprecated in favor of using the appropriate masker (maskers.Independent, or maskers.Impute)")
-        else:
-            feature_perturbation = "interventional"
-        self.feature_perturbation = feature_perturbation
-        # wrap the incoming masker object as a shap.Masker object before calling
-        # parent class constructor, which does the same but without respecting
-        # the user-provided feature_perturbation choice
-        if isinstance(masker, pd.DataFrame) or ((isinstance(masker, np.ndarray) or issparse(masker)) and len(masker.shape) == 2):
-            if self.feature_perturbation == "correlation_dependent":
-                masker = maskers.Impute(masker)
-            else:
-                masker = maskers.Independent(masker)
-        elif issubclass(type(masker), tuple) and len(masker) == 2:
-            if self.feature_perturbation == "correlation_dependent":
-                masker = maskers.Impute({"mean": masker[0], "cov": masker[1]}, method="linear")
-            else:
-                masker = maskers.Independent({"mean": masker[0], "cov": masker[1]})
-        super().__init__(model, masker, link=link, **kwargs)
-        self.nsamples = nsamples
-        # extract what we need from the given model object
-        self.coef, self.intercept = LinearExplainer._parse_model(model)
-        # extract the data
-        if issubclass(type(self.masker), (maskers.Independent, maskers.Partition)):
-            self.feature_perturbation = "interventional"
-        elif issubclass(type(self.masker), maskers.Impute):
-            self.feature_perturbation = "correlation_dependent"
-        else:
-            raise NotImplementedError("The Linear explainer only supports the Independent, Partition, and Impute maskers right now!")
-        data = getattr(self.masker, "data", None)
-        # convert DataFrame's to numpy arrays
-        if isinstance(data, pd.DataFrame):
-            data = data.values
-        # get the mean and covariance of the model
-        if getattr(self.masker, "mean", None) is not None:
-            self.mean = self.masker.mean
-            self.cov = self.masker.cov
-        elif isinstance(data, dict) and len(data) == 2:
-            self.mean = data["mean"]
-            if isinstance(self.mean, pd.Series):
-                self.mean = self.mean.values
-            self.cov = data["cov"]
-            if isinstance(self.cov, pd.DataFrame):
-                self.cov = self.cov.values
-        elif isinstance(data, tuple) and len(data) == 2:
-            self.mean = data[0]
-            if isinstance(self.mean, pd.Series):
-                self.mean = self.mean.values
-            self.cov = data[1]
-            if isinstance(self.cov, pd.DataFrame):
-                self.cov = self.cov.values
-        elif data is None:
-            raise ValueError("A background data distribution must be provided!")
-        else:
-            if issparse(data):
-                self.mean = np.array(np.mean(data, 0))[0]
-                if self.feature_perturbation != "interventional":
-                    raise NotImplementedError("Only feature_perturbation = 'interventional' is supported for sparse data")
-            else:
-                self.mean = np.array(np.mean(data, 0)).flatten() # assumes it is an array
-                if self.feature_perturbation == "correlation_dependent":
-                    self.cov = np.cov(data, rowvar=False)
-        #print(self.coef, self.mean.flatten(), self.intercept)
-        # Note: mean can be numpy.matrixlib.defmatrix.matrix or numpy.matrix type depending on numpy version
-        if issparse(self.mean) or str(type(self.mean)).endswith("matrix'>"):
-            # accept both sparse and dense coef
-            # if not issparse(self.coef):
-            #     self.coef = np.asmatrix(self.coef)
-            self.expected_value = np.dot(self.coef, self.mean) + self.intercept
-            # unwrap the matrix form
-            if len(self.expected_value) == 1:
-                self.expected_value = self.expected_value[0,0]
-            else:
-                self.expected_value = np.array(self.expected_value)[0]
-        else:
-            self.expected_value = np.dot(self.coef, self.mean) + self.intercept
-        self.M = len(self.mean)
-        # if needed, estimate the transform matrices
-        if self.feature_perturbation == "correlation_dependent":
-            self.valid_inds = np.where(np.diag(self.cov) > 1e-8)[0]
-            self.mean = self.mean[self.valid_inds]
-            self.cov = self.cov[:,self.valid_inds][self.valid_inds,:]
-            self.coef = self.coef[self.valid_inds]
-            # group perfectly redundant variables together
-            self.avg_proj,sum_proj = duplicate_components(self.cov)
-            self.cov = np.matmul(np.matmul(self.avg_proj, self.cov), self.avg_proj.T)
-            self.mean = np.matmul(self.avg_proj, self.mean)
-            self.coef = np.matmul(sum_proj, self.coef)
-            # if we still have some multi-collinearity present then we just add regularization...
-            e,_ = np.linalg.eig(self.cov)
-            if e.min() < 1e-7:
-                self.cov = self.cov + np.eye(self.cov.shape[0]) * 1e-6
-            mean_transform, x_transform = self._estimate_transforms(nsamples)
-            self.mean_transformed = np.matmul(mean_transform, self.mean)
-            self.x_transform = x_transform
-        elif self.feature_perturbation == "interventional":
-            if nsamples != 1000:
-                warnings.warn("Setting nsamples has no effect when feature_perturbation = 'interventional'!")
-        else:
-            raise InvalidFeaturePerturbationError("Unknown type of feature_perturbation provided: " + self.feature_perturbation)
-    def _estimate_transforms(self, nsamples):
-        """ Uses block matrix inversion identities to quickly estimate transforms.
-        After a bit of matrix math we can isolate a transform matrix (# features x # features)
-        that is independent of any sample we are explaining. It is the result of averaging over
-        all feature permutations, but we just use a fixed number of samples to estimate the value.
-        TODO: Do a brute force enumeration when # feature subsets is less than nsamples. This could
-              happen through a recursive method that uses the same block matrix inversion as below.
-        """
-        M = len(self.coef)
-        mean_transform = np.zeros((M,M))
-        x_transform = np.zeros((M,M))
-        inds = np.arange(M, dtype=int)
-        for _ in tqdm(range(nsamples), "Estimating transforms"):
-            np.random.shuffle(inds)
-            cov_inv_SiSi = np.zeros((0,0))
-            cov_Si = np.zeros((M,0))
-            for j in range(M):
-                i = inds[j]
-                # use the last Si as the new S
-                cov_S = cov_Si
-                cov_inv_SS = cov_inv_SiSi
-                # get the new cov_Si
-                cov_Si = self.cov[:,inds[:j+1]]
-                # compute the new cov_inv_SiSi from cov_inv_SS
-                d = cov_Si[i,:-1].T
-                t = np.matmul(cov_inv_SS, d)
-                Z = self.cov[i, i]
-                u = Z - np.matmul(t.T, d)
-                cov_inv_SiSi = np.zeros((j+1, j+1))
-                if j > 0:
-                    cov_inv_SiSi[:-1, :-1] = cov_inv_SS + np.outer(t, t) / u
-                    cov_inv_SiSi[:-1, -1] = cov_inv_SiSi[-1,:-1] = -t / u
-                cov_inv_SiSi[-1, -1] = 1 / u
-                # + coef @ (Q(bar(Sui)) - Q(bar(S)))
-                mean_transform[i, i] += self.coef[i]
-                # + coef @ R(Sui)
-                coef_R_Si = np.matmul(self.coef[inds[j+1:]], np.matmul(cov_Si, cov_inv_SiSi)[inds[j+1:]])
-                mean_transform[i, inds[:j+1]] += coef_R_Si
-                # - coef @ R(S)
-                coef_R_S = np.matmul(self.coef[inds[j:]], np.matmul(cov_S, cov_inv_SS)[inds[j:]])
-                mean_transform[i, inds[:j]] -= coef_R_S
-                # - coef @ (Q(Sui) - Q(S))
-                x_transform[i, i] += self.coef[i]
-                # + coef @ R(Sui)
-                x_transform[i, inds[:j+1]] += coef_R_Si
-                # - coef @ R(S)
-                x_transform[i, inds[:j]] -= coef_R_S
-        mean_transform /= nsamples
-        x_transform /= nsamples
-        return mean_transform, x_transform
-    @staticmethod
-    def _parse_model(model):
-        """ Attempt to pull out the coefficients and intercept from the given model object.
-        """
-        # raw coefficients
-        if type(model) == tuple and len(model) == 2:
-            coef = model[0]
-            intercept = model[1]
-        # sklearn style model
-        elif hasattr(model, "coef_") and hasattr(model, "intercept_"):
-            # work around for multi-class with a single class
-            if len(model.coef_.shape) > 1 and model.coef_.shape[0] == 1:
-                coef = model.coef_[0]
-                try:
-                    intercept = model.intercept_[0]
-                except TypeError:
-                    intercept = model.intercept_
-            else:
-                coef = model.coef_
-                intercept = model.intercept_
-        else:
-            raise InvalidModelError("An unknown model type was passed: " + str(type(model)))
-        return coef,intercept
-    @staticmethod
-    def supports_model_with_masker(model, masker):
-        """ Determines if we can parse the given model.
-        """
-        if not isinstance(masker, (maskers.Independent, maskers.Partition, maskers.Impute)):
-            return False
-        try:
-            LinearExplainer._parse_model(model)
-        except Exception:
-            return False
-        return True
-    def explain_row(self, *row_args, max_evals, main_effects, error_bounds, batch_size, outputs, silent):
-        """ Explains a single row and returns the tuple (row_values, row_expected_values, row_mask_shapes).
-        """
-        assert len(row_args) == 1, "Only single-argument functions are supported by the Linear explainer!"
-        X = row_args[0]
-        if len(X.shape) == 1:
-            X = X.reshape(1, -1)
-        # convert dataframes
-        if isinstance(X, (pd.Series, pd.DataFrame)):
-            X = X.values
-        if len(X.shape) not in (1, 2):
-            raise DimensionError("Instance must have 1 or 2 dimensions! Not: %s" %len(X.shape))
-        if self.feature_perturbation == "correlation_dependent":
-            if issparse(X):
-                raise InvalidFeaturePerturbationError("Only feature_perturbation = 'interventional' is supported for sparse data")
-            phi = np.matmul(np.matmul(X[:,self.valid_inds], self.avg_proj.T), self.x_transform.T) - self.mean_transformed
-            phi = np.matmul(phi, self.avg_proj)
-            full_phi = np.zeros((phi.shape[0], self.M))
-            full_phi[:,self.valid_inds] = phi
-            phi = full_phi
-        elif self.feature_perturbation == "interventional":
-            if issparse(X):
-                phi = np.array(np.multiply(X - self.mean, self.coef))
-                # if len(self.coef.shape) == 1:
-                #     return np.array(np.multiply(X - self.mean, self.coef))
-                # else:
-                #     return [np.array(np.multiply(X - self.mean, self.coef[i])) for i in range(self.coef.shape[0])]
-            else:
-                phi = np.array(X - self.mean) * self.coef
-                # if len(self.coef.shape) == 1:
-                #     phi = np.array(X - self.mean) * self.coef
-                #     return np.array(X - self.mean) * self.coef
-                # else:
-                #     return [np.array(X - self.mean) * self.coef[i] for i in range(self.coef.shape[0])]
-        return {
-            "values": phi.T,
-            "expected_values": self.expected_value,
-            "mask_shapes": (X.shape[1:],),
-            "main_effects": phi.T,
-            "clustering": None
-        }
-    def shap_values(self, X):
-        """ Estimate the SHAP values for a set of samples.
-        Parameters
-        ----------
-        X : numpy.array, pandas.DataFrame or scipy.csr_matrix
-            A matrix of samples (# samples x # features) on which to explain the model's output.
-        Returns
-        -------
-        array or list
-            For models with a single output this returns a matrix of SHAP values
-            (# samples x # features). Each row sums to the difference between the model output for that
-            sample and the expected value of the model output (which is stored as expected_value
-            attribute of the explainer).
-        """
-        # convert dataframes
-        if isinstance(X, (pd.Series, pd.DataFrame)):
-            X = X.values
-        # assert isinstance(X, np.ndarray), "Unknown instance type: " + str(type(X))
-        if len(X.shape) not in (1, 2):
-            raise DimensionError("Instance must have 1 or 2 dimensions! Not: %s" % len(X.shape))
-        if self.feature_perturbation == "correlation_dependent":
-            if issparse(X):
-                raise InvalidFeaturePerturbationError("Only feature_perturbation = 'interventional' is supported for sparse data")
-            phi = np.matmul(np.matmul(X[:,self.valid_inds], self.avg_proj.T), self.x_transform.T) - self.mean_transformed
-            phi = np.matmul(phi, self.avg_proj)
-            full_phi = np.zeros((phi.shape[0], self.M))
-            full_phi[:,self.valid_inds] = phi
-            return full_phi
-        elif self.feature_perturbation == "interventional":
-            if issparse(X):
-                if len(self.coef.shape) == 1:
-                    return np.array(np.multiply(X - self.mean, self.coef))
-                else:
-                    return [np.array(np.multiply(X - self.mean, self.coef[i])) for i in range(self.coef.shape[0])]
-            else:
-                if len(self.coef.shape) == 1:
-                    return np.array(X - self.mean) * self.coef
-                else:
-                    return [np.array(X - self.mean) * self.coef[i] for i in range(self.coef.shape[0])]
-def duplicate_components(C):
-    D = np.diag(1/np.sqrt(np.diag(C)))
-    C = np.matmul(np.matmul(D, C), D)
-    components = -np.ones(C.shape[0], dtype=int)
-    count = -1
-    for i in range(C.shape[0]):
-        found_group = False
-        for j in range(C.shape[0]):
-            if components[j] < 0 and np.abs(2*C[i,j] - C[i,i] - C[j,j]) < 1e-8:
-                if not found_group:
-                    count += 1
-                    found_group = True
-                components[j] = count
-    proj = np.zeros((len(np.unique(components)), C.shape[0]))
-    proj[0, 0] = 1
-    for i in range(1,C.shape[0]):
-        proj[components[i], i] = 1
-    return (proj.T / proj.sum(1)).T, proj

lib/shap/explainers/_partition.py DELETED Viewed

@@ -1,681 +0,0 @@
-import queue
-import time
-import numpy as np
-from numba import njit
-from tqdm.auto import tqdm
-from .. import Explanation, links
-from ..models import Model
-from ..utils import MaskedModel, OpChain, make_masks, safe_isinstance
-from ._explainer import Explainer
-class PartitionExplainer(Explainer):
-    """Uses the Partition SHAP method to explain the output of any function.
-    Partition SHAP computes Shapley values recursively through a hierarchy of features, this
-    hierarchy defines feature coalitions and results in the Owen values from game theory.
-    The PartitionExplainer has two particularly nice properties:
-    1) PartitionExplainer is model-agnostic but when using a balanced partition tree only has
-       quadratic exact runtime (in term of the number of input features). This is in contrast to the
-       exponential exact runtime of KernelExplainer or SamplingExplainer.
-    2) PartitionExplainer always assigns to groups of correlated features the credit that set of features
-       would have had if treated as a group. This means if the hierarchical clustering given to
-       PartitionExplainer groups correlated features together, then feature correlations are
-       "accounted for" in the sense that the total credit assigned to a group of tightly dependent features
-       does not depend on how they behave if their correlation structure was broken during the explanation's
-       perturbation process.
-    Note that for linear models the Owen values that PartitionExplainer returns are the same as the standard
-    non-hierarchical Shapley values.
-    """
-    def __init__(self, model, masker, *, output_names=None, link=links.identity, linearize_link=True,
-                 feature_names=None, **call_args):
-        """Build a PartitionExplainer for the given model with the given masker.
-        Parameters
-        ----------
-        model : function
-            User supplied function that takes a matrix of samples (# samples x # features) and
-            computes the output of the model for those samples.
-        masker : function or numpy.array or pandas.DataFrame or tokenizer
-            The function used to "mask" out hidden features of the form `masker(mask, x)`. It takes a
-            single input sample and a binary mask and returns a matrix of masked samples. These
-            masked samples will then be evaluated using the model function and the outputs averaged.
-            As a shortcut for the standard masking using by SHAP you can pass a background data matrix
-            instead of a function and that matrix will be used for masking. Domain specific masking
-            functions are available in shap such as shap.maksers.Image for images and shap.maskers.Text
-            for text.
-        partition_tree : None or function or numpy.array
-            A hierarchical clustering of the input features represented by a matrix that follows the format
-            used by scipy.cluster.hierarchy (see the notebooks_html/partition_explainer directory an example).
-            If this is a function then the function produces a clustering matrix when given a single input
-            example. If you are using a standard SHAP masker object then you can pass masker.clustering
-            to use that masker's built-in clustering of the features, or if partition_tree is None then
-            masker.clustering will be used by default.
-        Examples
-        --------
-        See `Partition explainer examples <https://shap.readthedocs.io/en/latest/api_examples/explainers/PartitionExplainer.html>`_
-        """
-        super().__init__(model, masker, link=link, linearize_link=linearize_link, algorithm="partition", \
-                         output_names = output_names, feature_names=feature_names)
-        # convert dataframes
-        # if isinstance(masker, pd.DataFrame):
-        #     masker = TabularMasker(masker)
-        # elif isinstance(masker, np.ndarray) and len(masker.shape) == 2:
-        #     masker = TabularMasker(masker)
-        # elif safe_isinstance(masker, "transformers.PreTrainedTokenizer"):
-        #     masker = TextMasker(masker)
-        # self.masker = masker
-        # TODO: maybe? if we have a tabular masker then we build a PermutationExplainer that we
-        # will use for sampling
-        self.input_shape = masker.shape[1:] if hasattr(masker, "shape") and not callable(masker.shape) else None
-        # self.output_names = output_names
-        if not safe_isinstance(self.model, "shap.models.Model"):
-            self.model = Model(self.model)#lambda *args: np.array(model(*args))
-        self.expected_value = None
-        self._curr_base_value = None
-        if getattr(self.masker, "clustering", None) is None:
-            raise ValueError("The passed masker must have a .clustering attribute defined! Try shap.maskers.Partition(data) for example.")
-        # if partition_tree is None:
-        #     if not hasattr(masker, "partition_tree"):
-        #         raise ValueError("The passed masker does not have masker.clustering, so the partition_tree must be passed!")
-        #     self.partition_tree = masker.clustering
-        # else:
-        #     self.partition_tree = partition_tree
-        # handle higher dimensional tensor inputs
-        if self.input_shape is not None and len(self.input_shape) > 1:
-            self._reshaped_model = lambda x: self.model(x.reshape(x.shape[0], *self.input_shape))
-        else:
-            self._reshaped_model = self.model
-        # if we don't have a dynamic clustering algorithm then can precowe mpute
-        # a lot of information
-        if not callable(self.masker.clustering):
-            self._clustering = self.masker.clustering
-            self._mask_matrix = make_masks(self._clustering)
-        # if we have gotten default arguments for the call function we need to wrap ourselves in a new class that
-        # has a call function with those new default arguments
-        if len(call_args) > 0:
-            class PartitionExplainer(self.__class__):
-                # this signature should match the __call__ signature of the class defined below
-                def __call__(self, *args, max_evals=500, fixed_context=None, main_effects=False, error_bounds=False, batch_size="auto",
-                             outputs=None, silent=False):
-                    return super().__call__(
-                        *args, max_evals=max_evals, fixed_context=fixed_context, main_effects=main_effects, error_bounds=error_bounds,
-                        batch_size=batch_size, outputs=outputs, silent=silent
-                    )
-            PartitionExplainer.__call__.__doc__ = self.__class__.__call__.__doc__
-            self.__class__ = PartitionExplainer
-            for k, v in call_args.items():
-                self.__call__.__kwdefaults__[k] = v
-    # note that changes to this function signature should be copied to the default call argument wrapper above
-    def __call__(self, *args, max_evals=500, fixed_context=None, main_effects=False, error_bounds=False, batch_size="auto",
-                 outputs=None, silent=False):
-        """ Explain the output of the model on the given arguments.
-        """
-        return super().__call__(
-            *args, max_evals=max_evals, fixed_context=fixed_context, main_effects=main_effects, error_bounds=error_bounds, batch_size=batch_size,
-            outputs=outputs, silent=silent
-        )
-    def explain_row(self, *row_args, max_evals, main_effects, error_bounds, batch_size, outputs, silent, fixed_context = "auto"):
-        """ Explains a single row and returns the tuple (row_values, row_expected_values, row_mask_shapes).
-        """
-        if fixed_context == "auto":
-            # if isinstance(self.masker, maskers.Text):
-            #     fixed_context = 1 # we err on the side of speed for text models
-            # else:
-            fixed_context = None
-        elif fixed_context not in [0, 1, None]:
-            raise ValueError("Unknown fixed_context value passed (must be 0, 1 or None): %s" %fixed_context)
-        # build a masked version of the model for the current input sample
-        fm = MaskedModel(self.model, self.masker, self.link, self.linearize_link, *row_args)
-        # make sure we have the base value and current value outputs
-        M = len(fm)
-        m00 = np.zeros(M, dtype=bool)
-        # if not fixed background or no base value assigned then compute base value for a row
-        if self._curr_base_value is None or not getattr(self.masker, "fixed_background", False):
-            self._curr_base_value = fm(m00.reshape(1, -1), zero_index=0)[0] # the zero index param tells the masked model what the baseline is
-        f11 = fm(~m00.reshape(1, -1))[0]
-        if callable(self.masker.clustering):
-            self._clustering = self.masker.clustering(*row_args)
-            self._mask_matrix = make_masks(self._clustering)
-        if hasattr(self._curr_base_value, 'shape') and len(self._curr_base_value.shape) > 0:
-            if outputs is None:
-                outputs = np.arange(len(self._curr_base_value))
-            elif isinstance(outputs, OpChain):
-                outputs = outputs.apply(Explanation(f11)).values
-            out_shape = (2*self._clustering.shape[0]+1, len(outputs))
-        else:
-            out_shape = (2*self._clustering.shape[0]+1,)
-        if max_evals == "auto":
-            max_evals = 500
-        self.values = np.zeros(out_shape)
-        self.dvalues = np.zeros(out_shape)
-        self.owen(fm, self._curr_base_value, f11, max_evals - 2, outputs, fixed_context, batch_size, silent)
-        # if False:
-        #     if self.multi_output:
-        #         return [self.dvalues[:,i] for i in range(self.dvalues.shape[1])], oinds
-        #     else:
-        #         return self.dvalues.copy(), oinds
-        # else:
-        # drop the interaction terms down onto self.values
-        self.values[:] = self.dvalues
-        lower_credit(len(self.dvalues) - 1, 0, M, self.values, self._clustering)
-        return {
-            "values": self.values[:M].copy(),
-            "expected_values": self._curr_base_value if outputs is None else self._curr_base_value[outputs],
-            "mask_shapes": [s + out_shape[1:] for s in fm.mask_shapes],
-            "main_effects": None,
-            "hierarchical_values": self.dvalues.copy(),
-            "clustering": self._clustering,
-            "output_indices": outputs,
-            "output_names": getattr(self.model, "output_names", None)
-        }
-    def __str__(self):
-        return "shap.explainers.PartitionExplainer()"
-    def owen(self, fm, f00, f11, max_evals, output_indexes, fixed_context, batch_size, silent):
-        """ Compute a nested set of recursive Owen values based on an ordering recursion.
-        """
-        #f = self._reshaped_model
-        #r = self.masker
-        #masks = np.zeros(2*len(inds)+1, dtype=int)
-        M = len(fm)
-        m00 = np.zeros(M, dtype=bool)
-        #f00 = fm(m00.reshape(1,-1))[0]
-        base_value = f00
-        #f11 = fm(~m00.reshape(1,-1))[0]
-        #f11 = self._reshaped_model(r(~m00, x)).mean(0)
-        ind = len(self.dvalues)-1
-        # make sure output_indexes is a list of indexes
-        if output_indexes is not None:
-            # assert self.multi_output, "output_indexes is only valid for multi-output models!"
-            # inds = output_indexes.apply(f11, 0)
-            # out_len = output_indexes_len(output_indexes)
-            # if output_indexes.startswith("max("):
-            #     output_indexes = np.argsort(-f11)[:out_len]
-            # elif output_indexes.startswith("min("):
-            #     output_indexes = np.argsort(f11)[:out_len]
-            # elif output_indexes.startswith("max(abs("):
-            #     output_indexes = np.argsort(np.abs(f11))[:out_len]
-            f00 = f00[output_indexes]
-            f11 = f11[output_indexes]
-        q = queue.PriorityQueue()
-        q.put((0, 0, (m00, f00, f11, ind, 1.0)))
-        eval_count = 0
-        total_evals = min(max_evals, (M-1)*M) # TODO: (M-1)*M is only right for balanced clusterings, but this is just for plotting progress...
-        pbar = None
-        start_time = time.time()
-        while not q.empty():
-            # if we passed our execution limit then leave everything else on the internal nodes
-            if eval_count >= max_evals:
-                while not q.empty():
-                    m00, f00, f11, ind, weight = q.get()[2]
-                    self.dvalues[ind] += (f11 - f00) * weight
-                break
-            # create a batch of work to do
-            batch_args = []
-            batch_masks = []
-            while not q.empty() and len(batch_masks) < batch_size and eval_count + len(batch_masks) < max_evals:
-                # get our next set of arguments
-                m00, f00, f11, ind, weight = q.get()[2]
-                # get the left and right children of this cluster
-                lind = int(self._clustering[ind-M, 0]) if ind >= M else -1
-                rind = int(self._clustering[ind-M, 1]) if ind >= M else -1
-                # get the distance of this cluster's children
-                if ind < M:
-                    distance = -1
-                else:
-                    if self._clustering.shape[1] >= 3:
-                        distance = self._clustering[ind-M, 2]
-                    else:
-                        distance = 1
-                # check if we are a leaf node (or other negative distance cluster) and so should terminate our decent
-                if distance < 0:
-                    self.dvalues[ind] += (f11 - f00) * weight
-                    continue
-                # build the masks
-                m10 = m00.copy() # we separate the copy from the add so as to not get converted to a matrix
-                m10[:] += self._mask_matrix[lind, :]
-                m01 = m00.copy()
-                m01[:] += self._mask_matrix[rind, :]
-                batch_args.append((m00, m10, m01, f00, f11, ind, lind, rind, weight))
-                batch_masks.append(m10)
-                batch_masks.append(m01)
-            batch_masks = np.array(batch_masks)
-            # run the batch
-            if len(batch_args) > 0:
-                fout = fm(batch_masks)
-                if output_indexes is not None:
-                    fout = fout[:,output_indexes]
-                eval_count += len(batch_masks)
-                if pbar is None and time.time() - start_time > 5:
-                    pbar = tqdm(total=total_evals, disable=silent, leave=False)
-                    pbar.update(eval_count)
-                if pbar is not None:
-                    pbar.update(len(batch_masks))
-            # use the results of the batch to add new nodes
-            for i in range(len(batch_args)):
-                m00, m10, m01, f00, f11, ind, lind, rind, weight = batch_args[i]
-                # get the evaluated model output on the two new masked inputs
-                f10 = fout[2*i]
-                f01 = fout[2*i+1]
-                new_weight = weight
-                if fixed_context is None:
-                    new_weight /= 2
-                elif fixed_context == 0:
-                    self.dvalues[ind] += (f11 - f10 - f01 + f00) * weight # leave the interaction effect on the internal node
-                elif fixed_context == 1:
-                    self.dvalues[ind] -= (f11 - f10 - f01 + f00) * weight # leave the interaction effect on the internal node
-                if fixed_context is None or fixed_context == 0:
-                    # recurse on the left node with zero context
-                    args = (m00, f00, f10, lind, new_weight)
-                    q.put((-np.max(np.abs(f10 - f00)) * new_weight, np.random.randn(), args))
-                    # recurse on the right node with zero context
-                    args = (m00, f00, f01, rind, new_weight)
-                    q.put((-np.max(np.abs(f01 - f00)) * new_weight, np.random.randn(), args))
-                if fixed_context is None or fixed_context == 1:
-                    # recurse on the left node with one context
-                    args = (m01, f01, f11, lind, new_weight)
-                    q.put((-np.max(np.abs(f11 - f01)) * new_weight, np.random.randn(), args))
-                    # recurse on the right node with one context
-                    args = (m10, f10, f11, rind, new_weight)
-                    q.put((-np.max(np.abs(f11 - f10)) * new_weight, np.random.randn(), args))
-        if pbar is not None:
-            pbar.close()
-        self.last_eval_count = eval_count
-        return output_indexes, base_value
-    def owen3(self, fm, f00, f11, max_evals, output_indexes, fixed_context, batch_size, silent):
-        """ Compute a nested set of recursive Owen values based on an ordering recursion.
-        """
-        #f = self._reshaped_model
-        #r = self.masker
-        #masks = np.zeros(2*len(inds)+1, dtype=int)
-        M = len(fm)
-        m00 = np.zeros(M, dtype=bool)
-        #f00 = fm(m00.reshape(1,-1))[0]
-        base_value = f00
-        #f11 = fm(~m00.reshape(1,-1))[0]
-        #f11 = self._reshaped_model(r(~m00, x)).mean(0)
-        ind = len(self.dvalues)-1
-        # make sure output_indexes is a list of indexes
-        if output_indexes is not None:
-            # assert self.multi_output, "output_indexes is only valid for multi-output models!"
-            # inds = output_indexes.apply(f11, 0)
-            # out_len = output_indexes_len(output_indexes)
-            # if output_indexes.startswith("max("):
-            #     output_indexes = np.argsort(-f11)[:out_len]
-            # elif output_indexes.startswith("min("):
-            #     output_indexes = np.argsort(f11)[:out_len]
-            # elif output_indexes.startswith("max(abs("):
-            #     output_indexes = np.argsort(np.abs(f11))[:out_len]
-            f00 = f00[output_indexes]
-            f11 = f11[output_indexes]
-        # our starting plan is to evaluate all the nodes with a fixed_context
-        evals_planned = M
-        q = queue.PriorityQueue()
-        q.put((0, 0, (m00, f00, f11, ind, 1.0, fixed_context))) # (m00, f00, f11, tree_index, weight)
-        eval_count = 0
-        total_evals = min(max_evals, (M-1)*M) # TODO: (M-1)*M is only right for balanced clusterings, but this is just for plotting progress...
-        pbar = None
-        start_time = time.time()
-        while not q.empty():
-            # if we passed our execution limit then leave everything else on the internal nodes
-            if eval_count >= max_evals:
-                while not q.empty():
-                    m00, f00, f11, ind, weight, _ = q.get()[2]
-                    self.dvalues[ind] += (f11 - f00) * weight
-                break
-            # create a batch of work to do
-            batch_args = []
-            batch_masks = []
-            while not q.empty() and len(batch_masks) < batch_size and eval_count < max_evals:
-                # get our next set of arguments
-                m00, f00, f11, ind, weight, context = q.get()[2]
-                # get the left and right children of this cluster
-                lind = int(self._clustering[ind-M, 0]) if ind >= M else -1
-                rind = int(self._clustering[ind-M, 1]) if ind >= M else -1
-                # get the distance of this cluster's children
-                if ind < M:
-                    distance = -1
-                else:
-                    distance = self._clustering[ind-M, 2]
-                # check if we are a leaf node (or other negative distance cluster) and so should terminate our decent
-                if distance < 0:
-                    self.dvalues[ind] += (f11 - f00) * weight
-                    continue
-                # build the masks
-                m10 = m00.copy() # we separate the copy from the add so as to not get converted to a matrix
-                m10[:] += self._mask_matrix[lind, :]
-                m01 = m00.copy()
-                m01[:] += self._mask_matrix[rind, :]
-                batch_args.append((m00, m10, m01, f00, f11, ind, lind, rind, weight, context))
-                batch_masks.append(m10)
-                batch_masks.append(m01)
-            batch_masks = np.array(batch_masks)
-            # run the batch
-            if len(batch_args) > 0:
-                fout = fm(batch_masks)
-                if output_indexes is not None:
-                    fout = fout[:,output_indexes]
-                eval_count += len(batch_masks)
-                if pbar is None and time.time() - start_time > 5:
-                    pbar = tqdm(total=total_evals, disable=silent, leave=False)
-                    pbar.update(eval_count)
-                if pbar is not None:
-                    pbar.update(len(batch_masks))
-            # use the results of the batch to add new nodes
-            for i in range(len(batch_args)):
-                m00, m10, m01, f00, f11, ind, lind, rind, weight, context = batch_args[i]
-                # get the the number of leaves in this cluster
-                if ind < M:
-                    num_leaves = 0
-                else:
-                    num_leaves = self._clustering[ind-M, 3]
-                # get the evaluated model output on the two new masked inputs
-                f10 = fout[2*i]
-                f01 = fout[2*i+1]
-                # see if we have enough evaluations left to get both sides of a fixed context
-                if max_evals - evals_planned > num_leaves:
-                    evals_planned += num_leaves
-                    ignore_context = True
-                else:
-                    ignore_context = False
-                new_weight = weight
-                if context is None or ignore_context:
-                    new_weight /= 2
-                if context is None or context == 0 or ignore_context:
-                    self.dvalues[ind] += (f11 - f10 - f01 + f00) * weight # leave the interaction effect on the internal node
-                    # recurse on the left node with zero context, flip the context for all descendents if we are ignoring it
-                    args = (m00, f00, f10, lind, new_weight, 0 if context == 1 else context)
-                    q.put((-np.max(np.abs(f10 - f00)) * new_weight, np.random.randn(), args))
-                    # recurse on the right node with zero context, flip the context for all descendents if we are ignoring it
-                    args = (m00, f00, f01, rind, new_weight, 0 if context == 1 else context)
-                    q.put((-np.max(np.abs(f01 - f00)) * new_weight, np.random.randn(), args))
-                if context is None or context == 1 or ignore_context:
-                    self.dvalues[ind] -= (f11 - f10 - f01 + f00) * weight # leave the interaction effect on the internal node
-                    # recurse on the left node with one context, flip the context for all descendents if we are ignoring it
-                    args = (m01, f01, f11, lind, new_weight, 1 if context == 0 else context)
-                    q.put((-np.max(np.abs(f11 - f01)) * new_weight, np.random.randn(), args))
-                    # recurse on the right node with one context, flip the context for all descendents if we are ignoring it
-                    args = (m10, f10, f11, rind, new_weight, 1 if context == 0 else context)
-                    q.put((-np.max(np.abs(f11 - f10)) * new_weight, np.random.randn(), args))
-        if pbar is not None:
-            pbar.close()
-        self.last_eval_count = eval_count
-        return output_indexes, base_value
-    # def owen2(self, fm, f00, f11, max_evals, output_indexes, fixed_context, batch_size, silent):
-    #     """ Compute a nested set of recursive Owen values based on an ordering recursion.
-    #     """
-    #     #f = self._reshaped_model
-    #     #r = self.masker
-    #     #masks = np.zeros(2*len(inds)+1, dtype=int)
-    #     M = len(fm)
-    #     m00 = np.zeros(M, dtype=bool)
-    #     #f00 = fm(m00.reshape(1,-1))[0]
-    #     base_value = f00
-    #     #f11 = fm(~m00.reshape(1,-1))[0]
-    #     #f11 = self._reshaped_model(r(~m00, x)).mean(0)
-    #     ind = len(self.dvalues)-1
-    #     # make sure output_indexes is a list of indexes
-    #     if output_indexes is not None:
-    #         # assert self.multi_output, "output_indexes is only valid for multi-output models!"
-    #         # inds = output_indexes.apply(f11, 0)
-    #         # out_len = output_indexes_len(output_indexes)
-    #         # if output_indexes.startswith("max("):
-    #         #     output_indexes = np.argsort(-f11)[:out_len]
-    #         # elif output_indexes.startswith("min("):
-    #         #     output_indexes = np.argsort(f11)[:out_len]
-    #         # elif output_indexes.startswith("max(abs("):
-    #         #     output_indexes = np.argsort(np.abs(f11))[:out_len]
-    #         f00 = f00[output_indexes]
-    #         f11 = f11[output_indexes]
-    #     fc_owen(m00, m11, 1)
-    #     fc_owen(m00, m11, 0)
-    #     def fc_owen(m00, m11, context):
-    #         # recurse on the left node with zero context
-    #         args = (m00, f00, f10, lind, new_weight)
-    #         q.put((-np.max(np.abs(f10 - f00)) * new_weight, np.random.randn(), args))
-    #         # recurse on the right node with zero context
-    #         args = (m00, f00, f01, rind, new_weight)
-    #         q.put((-np.max(np.abs(f01 - f00)) * new_weight, np.random.randn(), args))
-    #         fc_owen(m00, m11, 1)
-    #     m00 m11
-    #     owen(fc=1)
-    #     owen(fc=0)
-    #     q = queue.PriorityQueue()
-    #     q.put((0, 0, (m00, f00, f11, ind, 1.0, 1)))
-    #     eval_count = 0
-    #     total_evals = min(max_evals, (M-1)*M) # TODO: (M-1)*M is only right for balanced clusterings, but this is just for plotting progress...
-    #     pbar = None
-    #     start_time = time.time()
-    #     while not q.empty():
-    #         # if we passed our execution limit then leave everything else on the internal nodes
-    #         if eval_count >= max_evals:
-    #             while not q.empty():
-    #                 m00, f00, f11, ind, weight, _ = q.get()[2]
-    #                 self.dvalues[ind] += (f11 - f00) * weight
-    #             break
-    #         # create a batch of work to do
-    #         batch_args = []
-    #         batch_masks = []
-    #         while not q.empty() and len(batch_masks) < batch_size and eval_count < max_evals:
-    #             # get our next set of arguments
-    #             m00, f00, f11, ind, weight, context = q.get()[2]
-    #             # get the left and right children of this cluster
-    #             lind = int(self._clustering[ind-M, 0]) if ind >= M else -1
-    #             rind = int(self._clustering[ind-M, 1]) if ind >= M else -1
-    #             # get the distance of this cluster's children
-    #             if ind < M:
-    #                 distance = -1
-    #             else:
-    #                 if self._clustering.shape[1] >= 3:
-    #                     distance = self._clustering[ind-M, 2]
-    #                 else:
-    #                     distance = 1
-    #             # check if we are a leaf node (or other negative distance cluster) and so should terminate our decent
-    #             if distance < 0:
-    #                 self.dvalues[ind] += (f11 - f00) * weight
-    #                 continue
-    #             # build the masks
-    #             m10 = m00.copy() # we separate the copy from the add so as to not get converted to a matrix
-    #             m10[:] += self._mask_matrix[lind, :]
-    #             m01 = m00.copy()
-    #             m01[:] += self._mask_matrix[rind, :]
-    #             batch_args.append((m00, m10, m01, f00, f11, ind, lind, rind, weight, context))
-    #             batch_masks.append(m10)
-    #             batch_masks.append(m01)
-    #         batch_masks = np.array(batch_masks)
-    #         # run the batch
-    #         if len(batch_args) > 0:
-    #             fout = fm(batch_masks)
-    #             if output_indexes is not None:
-    #                 fout = fout[:,output_indexes]
-    #             eval_count += len(batch_masks)
-    #             if pbar is None and time.time() - start_time > 5:
-    #                 pbar = tqdm(total=total_evals, disable=silent, leave=False)
-    #                 pbar.update(eval_count)
-    #             if pbar is not None:
-    #                 pbar.update(len(batch_masks))
-    #         # use the results of the batch to add new nodes
-    #         for i in range(len(batch_args)):
-    #             m00, m10, m01, f00, f11, ind, lind, rind, weight, context = batch_args[i]
-    #             # get the evaluated model output on the two new masked inputs
-    #             f10 = fout[2*i]
-    #             f01 = fout[2*i+1]
-    #             new_weight = weight
-    #             if fixed_context is None:
-    #                 new_weight /= 2
-    #             elif fixed_context == 0:
-    #                 self.dvalues[ind] += (f11 - f10 - f01 + f00) * weight # leave the interaction effect on the internal node
-    #             elif fixed_context == 1:
-    #                 self.dvalues[ind] -= (f11 - f10 - f01 + f00) * weight # leave the interaction effect on the internal node
-    #             if fixed_context is None or fixed_context == 0:
-    #                 self.dvalues[ind] += (f11 - f10 - f01 + f00) * weight # leave the interaction effect on the internal node
-    #                 # recurse on the left node with zero context
-    #                 args = (m00, f00, f10, lind, new_weight)
-    #                 q.put((-np.max(np.abs(f10 - f00)) * new_weight, np.random.randn(), args))
-    #                 # recurse on the right node with zero context
-    #                 args = (m00, f00, f01, rind, new_weight)
-    #                 q.put((-np.max(np.abs(f01 - f00)) * new_weight, np.random.randn(), args))
-    #             if fixed_context is None or fixed_context == 1:
-    #                 self.dvalues[ind] -= (f11 - f10 - f01 + f00) * weight # leave the interaction effect on the internal node
-    #                 # recurse on the left node with one context
-    #                 args = (m01, f01, f11, lind, new_weight)
-    #                 q.put((-np.max(np.abs(f11 - f01)) * new_weight, np.random.randn(), args))
-    #                 # recurse on the right node with one context
-    #                 args = (m10, f10, f11, rind, new_weight)
-    #                 q.put((-np.max(np.abs(f11 - f10)) * new_weight, np.random.randn(), args))
-    #     if pbar is not None:
-    #         pbar.close()
-    #     return output_indexes, base_value
-def output_indexes_len(output_indexes):
-    if output_indexes.startswith("max("):
-        return int(output_indexes[4:-1])
-    elif output_indexes.startswith("min("):
-        return int(output_indexes[4:-1])
-    elif output_indexes.startswith("max(abs("):
-        return int(output_indexes[8:-2])
-    elif not isinstance(output_indexes, str):
-        return len(output_indexes)
-@njit
-def lower_credit(i, value, M, values, clustering):
-    if i < M:
-        values[i] += value
-        return
-    li = int(clustering[i-M,0])
-    ri = int(clustering[i-M,1])
-    group_size = int(clustering[i-M,3])
-    lsize = int(clustering[li-M,3]) if li >= M else 1
-    rsize = int(clustering[ri-M,3]) if ri >= M else 1
-    assert lsize+rsize == group_size
-    values[i] += value
-    lower_credit(li, values[i] * lsize / group_size, M, values, clustering)
-    lower_credit(ri, values[i] * rsize / group_size, M, values, clustering)

lib/shap/explainers/_permutation.py DELETED Viewed

@@ -1,217 +0,0 @@
-import warnings
-import numpy as np
-from .. import links
-from ..models import Model
-from ..utils import MaskedModel, partition_tree_shuffle
-from ._explainer import Explainer
-class PermutationExplainer(Explainer):
-    """ This method approximates the Shapley values by iterating through permutations of the inputs.
-    This is a model agnostic explainer that guarantees local accuracy (additivity) by iterating completely
-    through an entire permutation of the features in both forward and reverse directions (antithetic sampling).
-    If we do this once, then we get the exact SHAP values for models with up to second order interaction effects.
-    We can iterate this many times over many random permutations to get better SHAP value estimates for models
-    with higher order interactions. This sequential ordering formulation also allows for easy reuse of
-    model evaluations and the ability to efficiently avoid evaluating the model when the background values
-    for a feature are the same as the current input value. We can also account for hierarchical data
-    structures with partition trees, something not currently implemented for KernalExplainer or SamplingExplainer.
-    """
-    def __init__(self, model, masker, link=links.identity, feature_names=None, linearize_link=True, seed=None, **call_args):
-        """ Build an explainers.Permutation object for the given model using the given masker object.
-        Parameters
-        ----------
-        model : function
-            A callable python object that executes the model given a set of input data samples.
-        masker : function or numpy.array or pandas.DataFrame
-            A callable python object used to "mask" out hidden features of the form `masker(binary_mask, x)`.
-            It takes a single input sample and a binary mask and returns a matrix of masked samples. These
-            masked samples are evaluated using the model function and the outputs are then averaged.
-            As a shortcut for the standard masking using by SHAP you can pass a background data matrix
-            instead of a function and that matrix will be used for masking. To use a clustering
-            game structure you can pass a shap.maskers.Tabular(data, clustering=\"correlation\") object.
-        seed: None or int
-            Seed for reproducibility
-        **call_args : valid argument to the __call__ method
-            These arguments are saved and passed to the __call__ method as the new default values for these arguments.
-        """
-        # setting seed for random generation: if seed is not None, then shap values computation should be reproducible
-        np.random.seed(seed)
-        if masker is None:
-            raise ValueError("masker cannot be None.")
-        super().__init__(model, masker, link=link, linearize_link=linearize_link, feature_names=feature_names)
-        if not isinstance(self.model, Model):
-            self.model = Model(self.model)
-        # if we have gotten default arguments for the call function we need to wrap ourselves in a new class that
-        # has a call function with those new default arguments
-        if len(call_args) > 0:
-            # this signature should match the __call__ signature of the class defined below
-            class PermutationExplainer(self.__class__):
-                def __call__(self, *args, max_evals=500, main_effects=False, error_bounds=False, batch_size="auto",
-                             outputs=None, silent=False):
-                    return super().__call__(
-                        *args, max_evals=max_evals, main_effects=main_effects, error_bounds=error_bounds,
-                        batch_size=batch_size, outputs=outputs, silent=silent
-                    )
-            PermutationExplainer.__call__.__doc__ = self.__class__.__call__.__doc__
-            self.__class__ = PermutationExplainer
-            for k, v in call_args.items():
-                self.__call__.__kwdefaults__[k] = v
-    # note that changes to this function signature should be copied to the default call argument wrapper above
-    def __call__(self, *args, max_evals=500, main_effects=False, error_bounds=False, batch_size="auto",
-                 outputs=None, silent=False):
-        """ Explain the output of the model on the given arguments.
-        """
-        return super().__call__(
-            *args, max_evals=max_evals, main_effects=main_effects, error_bounds=error_bounds, batch_size=batch_size,
-            outputs=outputs, silent=silent
-        )
-    def explain_row(self, *row_args, max_evals, main_effects, error_bounds, batch_size, outputs, silent):
-        """ Explains a single row and returns the tuple (row_values, row_expected_values, row_mask_shapes).
-        """
-        # build a masked version of the model for the current input sample
-        fm = MaskedModel(self.model, self.masker, self.link, self.linearize_link, *row_args)
-        # by default we run 10 permutations forward and backward
-        if max_evals == "auto":
-            max_evals = 10 * 2 * len(fm)
-        # compute any custom clustering for this row
-        row_clustering = None
-        if getattr(self.masker, "clustering", None) is not None:
-            if isinstance(self.masker.clustering, np.ndarray):
-                row_clustering = self.masker.clustering
-            elif callable(self.masker.clustering):
-                row_clustering = self.masker.clustering(*row_args)
-            else:
-                raise NotImplementedError("The masker passed has a .clustering attribute that is not yet supported by the Permutation explainer!")
-        # loop over many permutations
-        inds = fm.varying_inputs()
-        inds_mask = np.zeros(len(fm), dtype=bool)
-        inds_mask[inds] = True
-        masks = np.zeros(2*len(inds)+1, dtype=int)
-        masks[0] = MaskedModel.delta_mask_noop_value
-        npermutations = max_evals // (2*len(inds)+1)
-        row_values = None
-        row_values_history = None
-        history_pos = 0
-        main_effect_values = None
-        if len(inds) > 0:
-            for _ in range(npermutations):
-                # shuffle the indexes so we get a random permutation ordering
-                if row_clustering is not None:
-                    # [TODO] This is shuffle does not work when inds is not a complete set of integers from 0 to M TODO: still true?
-                    #assert len(inds) == len(fm), "Need to support partition shuffle when not all the inds vary!!"
-                    partition_tree_shuffle(inds, inds_mask, row_clustering)
-                else:
-                    np.random.shuffle(inds)
-                # create a large batch of masks to evaluate
-                i = 1
-                for ind in inds:
-                    masks[i] = ind
-                    i += 1
-                for ind in inds:
-                    masks[i] = ind
-                    i += 1
-                # evaluate the masked model
-                outputs = fm(masks, zero_index=0, batch_size=batch_size)
-                if row_values is None:
-                    row_values = np.zeros((len(fm),) + outputs.shape[1:])
-                    if error_bounds:
-                        row_values_history = np.zeros((2 * npermutations, len(fm),) + outputs.shape[1:])
-                # update our SHAP value estimates
-                i = 0
-                for ind in inds: # forward
-                    row_values[ind] += outputs[i + 1] - outputs[i]
-                    if error_bounds:
-                        row_values_history[history_pos][ind] = outputs[i + 1] - outputs[i]
-                    i += 1
-                history_pos += 1
-                for ind in inds: # backward
-                    row_values[ind] += outputs[i] - outputs[i + 1]
-                    if error_bounds:
-                        row_values_history[history_pos][ind] = outputs[i] - outputs[i + 1]
-                    i += 1
-                history_pos += 1
-            if npermutations == 0:
-                raise ValueError(f"max_evals={max_evals} is too low for the Permutation explainer, it must be at least 2 * num_features + 1 = {2 * len(inds) + 1}!")
-            expected_value = outputs[0]
-            # compute the main effects if we need to
-            if main_effects:
-                main_effect_values = fm.main_effects(inds, batch_size=batch_size)
-        else:
-            masks = np.zeros(1, dtype=int)
-            outputs = fm(masks, zero_index=0, batch_size=1)
-            expected_value = outputs[0]
-            row_values = np.zeros((len(fm),) + outputs.shape[1:])
-            if error_bounds:
-                row_values_history = np.zeros((2 * npermutations, len(fm),) + outputs.shape[1:])
-        return {
-            "values": row_values / (2 * npermutations),
-            "expected_values": expected_value,
-            "mask_shapes": fm.mask_shapes,
-            "main_effects": main_effect_values,
-            "clustering": row_clustering,
-            "error_std": None if row_values_history is None else row_values_history.std(0),
-            "output_names": self.model.output_names if hasattr(self.model, "output_names") else None
-        }
-    def shap_values(self, X, npermutations=10, main_effects=False, error_bounds=False, batch_evals=True, silent=False):
-        """ Legacy interface to estimate the SHAP values for a set of samples.
-        Parameters
-        ----------
-        X : numpy.array or pandas.DataFrame or any scipy.sparse matrix
-            A matrix of samples (# samples x # features) on which to explain the model's output.
-        npermutations : int
-            Number of times to cycle through all the features, re-evaluating the model at each step.
-            Each cycle evaluates the model function 2 * (# features + 1) times on a data matrix of
-            (# background data samples) rows. An exception to this is when PermutationExplainer can
-            avoid evaluating the model because a feature's value is the same in X and the background
-            dataset (which is common for example with sparse features).
-        Returns
-        -------
-        array or list
-            For models with a single output this returns a matrix of SHAP values
-            (# samples x # features). Each row sums to the difference between the model output for that
-            sample and the expected value of the model output (which is stored as expected_value
-            attribute of the explainer). For models with vector outputs this returns a list
-            of such matrices, one for each output.
-        """
-        warnings.warn("shap_values() is deprecated; use __call__().", DeprecationWarning)
-        explanation = self(X, max_evals=npermutations * X.shape[1], main_effects=main_effects)
-        return explanation.values
-    def __str__(self):
-        return "shap.explainers.PermutationExplainer()"

lib/shap/explainers/_sampling.py DELETED Viewed

@@ -1,199 +0,0 @@
-import logging
-import numpy as np
-import pandas as pd
-from .._explanation import Explanation
-from ..utils._exceptions import ExplainerError
-from ..utils._legacy import convert_to_instance, match_instance_to_data
-from ._kernel import KernelExplainer
-log = logging.getLogger('shap')
-class SamplingExplainer(KernelExplainer):
-    """Computes SHAP values using an extension of the Shapley sampling values explanation method
-    (also known as IME).
-    SamplingExplainer computes SHAP values under the assumption of feature independence and is an
-    extension of the algorithm proposed in "An Efficient Explanation of Individual Classifications
-    using Game Theory", Erik Strumbelj, Igor Kononenko, JMLR 2010. It is a good alternative to
-    KernelExplainer when you want to use a large background set (as opposed to a single reference
-    value for example).
-    Parameters
-    ----------
-    model : function
-        User supplied function that takes a matrix of samples (# samples x # features) and
-        computes the output of the model for those samples. The output can be a vector
-        (# samples) or a matrix (# samples x # model outputs).
-    data : numpy.array or pandas.DataFrame
-        The background dataset to use for integrating out features. To determine the impact
-        of a feature, that feature is set to "missing" and the change in the model output
-        is observed. Since most models aren't designed to handle arbitrary missing data at test
-        time, we simulate "missing" by replacing the feature with the values it takes in the
-        background dataset. So if the background dataset is a simple sample of all zeros, then
-        we would approximate a feature being missing by setting it to zero. Unlike the
-        KernelExplainer, this data can be the whole training set, even if that is a large set. This
-        is because SamplingExplainer only samples from this background dataset.
-    """
-    def __init__(self, model, data, **kwargs):
-        # silence warning about large datasets
-        level = log.level
-        log.setLevel(logging.ERROR)
-        super().__init__(model, data, **kwargs)
-        log.setLevel(level)
-        if str(self.link) != "identity":
-            emsg = f"SamplingExplainer only supports the identity link, not {self.link}"
-            raise ValueError(emsg)
-    def __call__(self, X, y=None, nsamples=2000):
-        if isinstance(X, pd.DataFrame):
-            feature_names = list(X.columns)
-            X = X.values
-        else:
-            feature_names = None # we can make self.feature_names from background data eventually if we have it
-        v = self.shap_values(X, nsamples=nsamples)
-        if isinstance(v, list):
-            v = np.stack(v, axis=-1) # put outputs at the end
-        e = Explanation(v, self.expected_value, X, feature_names=feature_names)
-        return e
-    def explain(self, incoming_instance, **kwargs):
-        # convert incoming input to a standardized iml object
-        instance = convert_to_instance(incoming_instance)
-        match_instance_to_data(instance, self.data)
-        if len(self.data.groups) != self.P:
-            emsg = "SamplingExplainer does not support feature groups!"
-            raise ExplainerError(emsg)
-        # find the feature groups we will test. If a feature does not change from its
-        # current value then we know it doesn't impact the model
-        self.varyingInds = self.varying_groups(instance.x)
-        #self.varyingFeatureGroups = [self.data.groups[i] for i in self.varyingInds]
-        self.M = len(self.varyingInds)
-        # find f(x)
-        if self.keep_index:
-            model_out = self.model.f(instance.convert_to_df())
-        else:
-            model_out = self.model.f(instance.x)
-        if isinstance(model_out, (pd.DataFrame, pd.Series)):
-            model_out = model_out.values[0]
-        self.fx = model_out[0]
-        if not self.vector_out:
-            self.fx = np.array([self.fx])
-        # if no features vary then there no feature has an effect
-        if self.M == 0:
-            phi = np.zeros((len(self.data.groups), self.D))
-            phi_var = np.zeros((len(self.data.groups), self.D))
-        # if only one feature varies then it has all the effect
-        elif self.M == 1:
-            phi = np.zeros((len(self.data.groups), self.D))
-            phi_var = np.zeros((len(self.data.groups), self.D))
-            diff = self.fx - self.fnull
-            for d in range(self.D):
-                phi[self.varyingInds[0],d] = diff[d]
-        # if more than one feature varies then we have to do real work
-        else:
-            # pick a reasonable number of samples if the user didn't specify how many they wanted
-            self.nsamples = kwargs.get("nsamples", "auto")
-            if self.nsamples == "auto":
-                self.nsamples = 1000 * self.M
-            min_samples_per_feature = kwargs.get("min_samples_per_feature", 100)
-            round1_samples = self.nsamples
-            round2_samples = 0
-            if round1_samples > self.M * min_samples_per_feature:
-                round2_samples = round1_samples - self.M * min_samples_per_feature
-                round1_samples -= round2_samples
-            # divide up the samples among the features for round 1
-            nsamples_each1 = np.ones(self.M, dtype=np.int64) * 2 * (round1_samples // (self.M * 2))
-            for i in range((round1_samples % (self.M * 2)) // 2):
-                nsamples_each1[i] += 2
-            # explain every feature in round 1
-            phi = np.zeros((self.P, self.D))
-            phi_var = np.zeros((self.P, self.D))
-            self.X_masked = np.zeros((nsamples_each1.max() * 2, self.data.data.shape[1]))
-            for i,ind in enumerate(self.varyingInds):
-                phi[ind,:],phi_var[ind,:] = self.sampling_estimate(ind, self.model.f, instance.x, self.data.data, nsamples=nsamples_each1[i])
-            # optimally allocate samples according to the variance
-            if phi_var.sum() == 0:
-                phi_var += 1 # spread samples uniformally if we found no variability
-            phi_var /= phi_var.sum(0)[np.newaxis, :]
-            nsamples_each2 = (phi_var[self.varyingInds,:].mean(1) * round2_samples).astype(int)
-            for i in range(len(nsamples_each2)):
-                if nsamples_each2[i] % 2 == 1:
-                    nsamples_each2[i] += 1
-            for i in range(len(nsamples_each2)):
-                if nsamples_each2.sum() > round2_samples:
-                    nsamples_each2[i] -= 2
-                elif nsamples_each2.sum() < round2_samples:
-                    nsamples_each2[i] += 2
-                else:
-                    break
-            self.X_masked = np.zeros((nsamples_each2.max() * 2, self.data.data.shape[1]))
-            for i,ind in enumerate(self.varyingInds):
-                if nsamples_each2[i] > 0:
-                    val,var = self.sampling_estimate(ind, self.model.f, instance.x, self.data.data, nsamples=nsamples_each2[i])
-                    total_samples = nsamples_each1[i] + nsamples_each2[i]
-                    phi[ind,:] = (phi[ind,:] * nsamples_each1[i] + val * nsamples_each2[i]) / total_samples
-                    phi_var[ind,:] = (phi_var[ind,:] * nsamples_each1[i] + var * nsamples_each2[i]) / total_samples
-            # convert from the variance of the differences to the variance of the mean (phi)
-            for i,ind in enumerate(self.varyingInds):
-                phi_var[ind,:] /= np.sqrt(nsamples_each1[i] + nsamples_each2[i])
-            # correct the sum of the SHAP values to equal the output of the model using a linear
-            # regression model with priors of the coefficients equal to the estimated variances for each
-            # SHAP value (note that 1e6 is designed to increase the weight of the sample and so closely
-            # match the correct sum)
-            sum_error = self.fx - phi.sum(0) - self.fnull
-            for i in range(self.D):
-                # this is a ridge regression with one sample of all ones with sum_error[i] as the label
-                # and 1/v as the ridge penalties. This simplified (and stable) form comes from the
-                # Sherman-Morrison formula
-                v = (phi_var[:,i] / phi_var[:,i].max()) * 1e6
-                adj = sum_error[i] * (v - (v * v.sum()) / (1 + v.sum()))
-                phi[:,i] += adj
-        if phi.shape[1] == 1:
-            phi = phi[:,0]
-        return phi
-    def sampling_estimate(self, j, f, x, X, nsamples=10):
-        X_masked = self.X_masked[:nsamples * 2,:]
-        inds = np.arange(X.shape[1])
-        for i in range(0, nsamples):
-            np.random.shuffle(inds)
-            pos = np.where(inds == j)[0][0]
-            rind = np.random.randint(X.shape[0])
-            X_masked[i, :] = x
-            X_masked[i, inds[pos+1:]] = X[rind, inds[pos+1:]]
-            X_masked[-(i+1), :] = x
-            X_masked[-(i+1), inds[pos:]] = X[rind, inds[pos:]]
-        evals = f(X_masked)
-        evals_on = evals[:nsamples]
-        evals_off = evals[nsamples:][::-1]
-        d = evals_on - evals_off
-        return np.mean(d, 0), np.var(d, 0)