diff --git a/analysis/shap_model.py b/analysis/shap_model.py
index 3ced9109c65c7a9a503f8f22daa8070763edf24c..0f6976d972a72b1e93ccbae377cb383f83a26200 100644
--- a/analysis/shap_model.py
+++ b/analysis/shap_model.py
@@ -1,6 +1,7 @@
-import shap
 import matplotlib.pyplot as plt
 
+import lib.shap as shap
+
 
 def shap_calculate(model, x, feature_names):
     explainer = shap.Explainer(model.predict, x)
diff --git a/diagram/__init__.py b/diagram/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/lib/__init__.py b/lib/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/lib/shap/__init__.py b/lib/shap/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..622f0eb97d264a6c8f2a634f9ddd2347f36b70f7
--- /dev/null
+++ b/lib/shap/__init__.py
@@ -0,0 +1,144 @@
+from ._explanation import Cohorts, Explanation
+
+# explainers
+from .explainers import other
+from .explainers._additive import AdditiveExplainer
+from .explainers._deep import DeepExplainer
+from .explainers._exact import ExactExplainer
+from .explainers._explainer import Explainer
+from .explainers._gpu_tree import GPUTreeExplainer
+from .explainers._gradient import GradientExplainer
+from .explainers._kernel import KernelExplainer
+from .explainers._linear import LinearExplainer
+from .explainers._partition import PartitionExplainer
+from .explainers._permutation import PermutationExplainer
+from .explainers._sampling import SamplingExplainer
+from .explainers._tree import TreeExplainer
+
+try:
+    # Version from setuptools-scm
+    from ._version import version as __version__
+except ImportError:
+    # Expected when running locally without build
+    __version__ = "0.0.0-not-built"
+
+_no_matplotlib_warning = "matplotlib is not installed so plotting is not available! Run `pip install matplotlib` " \
+                         "to fix this."
+
+
+# plotting (only loaded if matplotlib is present)
+def unsupported(*args, **kwargs):
+    raise ImportError(_no_matplotlib_warning)
+
+
+class UnsupportedModule:
+    def __getattribute__(self, item):
+        raise ImportError(_no_matplotlib_warning)
+
+
+try:
+    import matplotlib  # noqa: F401
+    have_matplotlib = True
+except ImportError:
+    have_matplotlib = False
+if have_matplotlib:
+    from . import plots
+    from .plots._bar import bar_legacy as bar_plot
+    from .plots._beeswarm import summary_legacy as summary_plot
+    from .plots._decision import decision as decision_plot
+    from .plots._decision import multioutput_decision as multioutput_decision_plot
+    from .plots._embedding import embedding as embedding_plot
+    from .plots._force import force as force_plot
+    from .plots._force import getjs, initjs, save_html
+    from .plots._group_difference import group_difference as group_difference_plot
+    from .plots._heatmap import heatmap as heatmap_plot
+    from .plots._image import image as image_plot
+    from .plots._monitoring import monitoring as monitoring_plot
+    from .plots._partial_dependence import partial_dependence as partial_dependence_plot
+    from .plots._scatter import dependence_legacy as dependence_plot
+    from .plots._text import text as text_plot
+    from .plots._violin import violin as violin_plot
+    from .plots._waterfall import waterfall as waterfall_plot
+else:
+    bar_plot = unsupported
+    summary_plot = unsupported
+    decision_plot = unsupported
+    multioutput_decision_plot = unsupported
+    embedding_plot = unsupported
+    force_plot = unsupported
+    getjs = unsupported
+    initjs = unsupported
+    save_html = unsupported
+    group_difference_plot = unsupported
+    heatmap_plot = unsupported
+    image_plot = unsupported
+    monitoring_plot = unsupported
+    partial_dependence_plot = unsupported
+    dependence_plot = unsupported
+    text_plot = unsupported
+    violin_plot = unsupported
+    waterfall_plot = unsupported
+    # If matplotlib is available, then the plots submodule will be directly available.
+    # If not, we need to define something that will issue a meaningful warning message
+    # (rather than ModuleNotFound).
+    plots = UnsupportedModule()
+
+
+# other stuff :)
+from . import datasets, links, utils  # noqa: E402
+from .actions._optimizer import ActionOptimizer  # noqa: E402
+from .utils import approximate_interactions, sample  # noqa: E402
+
+#from . import benchmark
+from .utils._legacy import kmeans  # noqa: E402
+
+# Use __all__ to let type checkers know what is part of the public API.
+__all__ = [
+    "Cohorts",
+    "Explanation",
+
+    # Explainers
+    "other",
+    "AdditiveExplainer",
+    "DeepExplainer",
+    "ExactExplainer",
+    "Explainer",
+    "GPUTreeExplainer",
+    "GradientExplainer",
+    "KernelExplainer",
+    "LinearExplainer",
+    "PartitionExplainer",
+    "PermutationExplainer",
+    "SamplingExplainer",
+    "TreeExplainer",
+
+    # Plots
+    "plots",
+    "bar_plot",
+    "summary_plot",
+    "decision_plot",
+    "multioutput_decision_plot",
+    "embedding_plot",
+    "force_plot",
+    "getjs",
+    "initjs",
+    "save_html",
+    "group_difference_plot",
+    "heatmap_plot",
+    "image_plot",
+    "monitoring_plot",
+    "partial_dependence_plot",
+    "dependence_plot",
+    "text_plot",
+    "violin_plot",
+    "waterfall_plot",
+
+    # Other stuff
+    "datasets",
+    "links",
+    "utils",
+    "ActionOptimizer",
+    "approximate_interactions",
+    "sample",
+    "kmeans",
+]
diff --git a/lib/shap/_cext.cp310-win_amd64.pyd b/lib/shap/_cext.cp310-win_amd64.pyd
new file mode 100644
index 0000000000000000000000000000000000000000..5c91703fe2cfb8f7f76402f0f576a9bac07b7e09
Binary files /dev/null and b/lib/shap/_cext.cp310-win_amd64.pyd differ
diff --git a/lib/shap/_explanation.py b/lib/shap/_explanation.py
new file mode 100644
index 0000000000000000000000000000000000000000..0a67fe4c15acfa3e700560823c435bb5de2563fb
--- /dev/null
+++ b/lib/shap/_explanation.py
@@ -0,0 +1,901 @@
+
+import copy
+import operator
+
+import numpy as np
+import pandas as pd
+import scipy.cluster
+import scipy.sparse
+import scipy.spatial
+import sklearn
+from slicer import Alias, Obj, Slicer
+
+from .utils._exceptions import DimensionError
+from .utils._general import OpChain
+
+op_chain_root = OpChain("shap.Explanation")
+class MetaExplanation(type):
+    """ This metaclass exposes the Explanation object's methods for creating template op chains.
+    """
+
+    def __getitem__(cls, item):
+        return op_chain_root.__getitem__(item)
+
+    @property
+    def abs(cls):
+        """ Element-wise absolute value op.
+        """
+        return op_chain_root.abs
+
+    @property
+    def identity(cls):
+        """ A no-op.
+        """
+        return op_chain_root.identity
+
+    @property
+    def argsort(cls):
+        """ Numpy style argsort.
+        """
+        return op_chain_root.argsort
+
+    @property
+    def sum(cls):
+        """ Numpy style sum.
+        """
+        return op_chain_root.sum
+
+    @property
+    def max(cls):
+        """ Numpy style max.
+        """
+        return op_chain_root.max
+
+    @property
+    def min(cls):
+        """ Numpy style min.
+        """
+        return op_chain_root.min
+
+    @property
+    def mean(cls):
+        """ Numpy style mean.
+        """
+        return op_chain_root.mean
+
+    @property
+    def sample(cls):
+        """ Numpy style sample.
+        """
+        return op_chain_root.sample
+
+    @property
+    def hclust(cls):
+        """ Hierarchical clustering op.
+        """
+        return op_chain_root.hclust
+
+
+class Explanation(metaclass=MetaExplanation):
+    """ A sliceable set of parallel arrays representing a SHAP explanation.
+    """
+    def __init__(
+        self,
+        values,
+        base_values=None,
+        data=None,
+        display_data=None,
+        instance_names=None,
+        feature_names=None,
+        output_names=None,
+        output_indexes=None,
+        lower_bounds=None,
+        upper_bounds=None,
+        error_std=None,
+        main_effects=None,
+        hierarchical_values=None,
+        clustering=None,
+        compute_time=None
+    ):
+        self.op_history = []
+
+        self.compute_time = compute_time
+
+        # cloning. TODOsomeday: better cloning :)
+        if issubclass(type(values), Explanation):
+            e = values
+            values = e.values
+            base_values = e.base_values
+            data = e.data
+
+        self.output_dims = compute_output_dims(values, base_values, data, output_names)
+        values_shape = _compute_shape(values)
+
+        if output_names is None and len(self.output_dims) == 1:
+            output_names = [f"Output {i}" for i in range(values_shape[self.output_dims[0]])]
+
+        if len(_compute_shape(feature_names)) == 1:  # TODO: should always be an alias once slicer supports per-row aliases
+            if len(values_shape) >= 2 and len(feature_names) == values_shape[1]:
+                feature_names = Alias(list(feature_names), 1)
+            elif len(values_shape) >= 1 and len(feature_names) == values_shape[0]:
+                feature_names = Alias(list(feature_names), 0)
+
+        if len(_compute_shape(output_names)) == 1:  # TODO: should always be an alias once slicer supports per-row aliases
+            output_names = Alias(list(output_names), self.output_dims[0])
+            # if len(values_shape) >= 1 and len(output_names) == values_shape[0]:
+            #     output_names = Alias(list(output_names), 0)
+            # elif len(values_shape) >= 2 and len(output_names) == values_shape[1]:
+            #     output_names = Alias(list(output_names), 1)
+
+        if output_names is not None and not isinstance(output_names, Alias):
+            output_names_order = len(_compute_shape(output_names))
+            if output_names_order == 0:
+                pass
+            elif output_names_order == 1:
+                output_names = Obj(output_names, self.output_dims)
+            elif output_names_order == 2:
+                output_names = Obj(output_names, [0] + list(self.output_dims))
+            else:
+                raise ValueError("shap.Explanation does not yet support output_names of order greater than 3!")
+
+        if not hasattr(base_values, "__len__") or len(base_values) == 0:
+            pass
+        elif len(_compute_shape(base_values)) == len(self.output_dims):
+            base_values = Obj(base_values, list(self.output_dims))
+        else:
+            base_values = Obj(base_values, [0] + list(self.output_dims))
+
+        self._s = Slicer(
+            values=values,
+            base_values=base_values,
+            data=list_wrap(data),
+            display_data=list_wrap(display_data),
+            instance_names=None if instance_names is None else Alias(instance_names, 0),
+            feature_names=feature_names,
+            output_names=output_names,
+            output_indexes=None if output_indexes is None else (self.output_dims, output_indexes),
+            lower_bounds=list_wrap(lower_bounds),
+            upper_bounds=list_wrap(upper_bounds),
+            error_std=list_wrap(error_std),
+            main_effects=list_wrap(main_effects),
+            hierarchical_values=list_wrap(hierarchical_values),
+            clustering=None if clustering is None else Obj(clustering, [0])
+        )
+
+    @property
+    def shape(self):
+        """ Compute the shape over potentially complex data nesting.
+        """
+        return _compute_shape(self._s.values)
+
+    @property
+    def values(self):
+        """ Pass-through from the underlying slicer object.
+        """
+        return self._s.values
+    @values.setter
+    def values(self, new_values):
+        self._s.values = new_values
+
+    @property
+    def base_values(self):
+        """ Pass-through from the underlying slicer object.
+        """
+        return self._s.base_values
+    @base_values.setter
+    def base_values(self, new_base_values):
+        self._s.base_values = new_base_values
+
+    @property
+    def data(self):
+        """ Pass-through from the underlying slicer object.
+        """
+        return self._s.data
+    @data.setter
+    def data(self, new_data):
+        self._s.data = new_data
+
+    @property
+    def display_data(self):
+        """ Pass-through from the underlying slicer object.
+        """
+        return self._s.display_data
+    @display_data.setter
+    def display_data(self, new_display_data):
+        if issubclass(type(new_display_data), pd.DataFrame):
+            new_display_data = new_display_data.values
+        self._s.display_data = new_display_data
+
+    @property
+    def instance_names(self):
+        """ Pass-through from the underlying slicer object.
+        """
+        return self._s.instance_names
+
+    @property
+    def output_names(self):
+        """ Pass-through from the underlying slicer object.
+        """
+        return self._s.output_names
+    @output_names.setter
+    def output_names(self, new_output_names):
+        self._s.output_names = new_output_names
+
+    @property
+    def output_indexes(self):
+        """ Pass-through from the underlying slicer object.
+        """
+        return self._s.output_indexes
+
+    @property
+    def feature_names(self):
+        """ Pass-through from the underlying slicer object.
+        """
+        return self._s.feature_names
+    @feature_names.setter
+    def feature_names(self, new_feature_names):
+        self._s.feature_names = new_feature_names
+
+    @property
+    def lower_bounds(self):
+        """ Pass-through from the underlying slicer object.
+        """
+        return self._s.lower_bounds
+
+    @property
+    def upper_bounds(self):
+        """ Pass-through from the underlying slicer object.
+        """
+        return self._s.upper_bounds
+
+    @property
+    def error_std(self):
+        """ Pass-through from the underlying slicer object.
+        """
+        return self._s.error_std
+
+    @property
+    def main_effects(self):
+        """ Pass-through from the underlying slicer object.
+        """
+        return self._s.main_effects
+    @main_effects.setter
+    def main_effects(self, new_main_effects):
+        self._s.main_effects = new_main_effects
+
+    @property
+    def hierarchical_values(self):
+        """ Pass-through from the underlying slicer object.
+        """
+        return self._s.hierarchical_values
+    @hierarchical_values.setter
+    def hierarchical_values(self, new_hierarchical_values):
+        self._s.hierarchical_values = new_hierarchical_values
+
+    @property
+    def clustering(self):
+        """ Pass-through from the underlying slicer object.
+        """
+        return self._s.clustering
+    @clustering.setter
+    def clustering(self, new_clustering):
+        self._s.clustering = new_clustering
+
+    def cohorts(self, cohorts):
+        """ Split this explanation into several cohorts.
+
+        Parameters
+        ----------
+        cohorts : int or array
+            If this is an integer then we auto build that many cohorts using a decision tree. If this is
+            an array then we treat that as an array of cohort names/ids for each instance.
+        """
+
+        if isinstance(cohorts, int):
+            return _auto_cohorts(self, max_cohorts=cohorts)
+        if isinstance(cohorts, (list, tuple, np.ndarray)):
+            cohorts = np.array(cohorts)
+            return Cohorts(**{name: self[cohorts == name] for name in np.unique(cohorts)})
+        raise TypeError("The given set of cohort indicators is not recognized! Please give an array or int.")
+
+    def __repr__(self):
+        """ Display some basic printable info, but not everything.
+        """
+        out = ".values =\n"+self.values.__repr__()
+        if self.base_values is not None:
+            out += "\n\n.base_values =\n"+self.base_values.__repr__()
+        if self.data is not None:
+            out += "\n\n.data =\n"+self.data.__repr__()
+        return out
+
+    def __getitem__(self, item):
+        """ This adds support for OpChain indexing.
+        """
+        new_self = None
+        if not isinstance(item, tuple):
+            item = (item,)
+
+        # convert any OpChains or magic strings
+        pos = -1
+        for t in item:
+            pos += 1
+
+            # skip over Ellipsis
+            if t is Ellipsis:
+                pos += len(self.shape) - len(item)
+                continue
+
+            orig_t = t
+            if issubclass(type(t), OpChain):
+                t = t.apply(self)
+                if issubclass(type(t), (np.int64, np.int32)): # because slicer does not like numpy indexes
+                    t = int(t)
+                elif issubclass(type(t), np.ndarray):
+                    t = [int(v) for v in t] # slicer wants lists not numpy arrays for indexing
+            elif issubclass(type(t), Explanation):
+                t = t.values
+            elif isinstance(t, str):
+
+                # work around for 2D output_names since they are not yet slicer supported
+                output_names_dims = []
+                if "output_names" in self._s._objects:
+                    output_names_dims = self._s._objects["output_names"].dim
+                elif "output_names" in self._s._aliases:
+                    output_names_dims = self._s._aliases["output_names"].dim
+                if pos != 0 and pos in output_names_dims:
+                    if len(output_names_dims) == 1:
+                        t = np.argwhere(np.array(self.output_names) == t)[0][0]
+                    elif len(output_names_dims) == 2:
+                        new_values = []
+                        new_base_values = []
+                        new_data = []
+                        new_self = copy.deepcopy(self)
+                        for i, v in enumerate(self.values):
+                            for j, s in enumerate(self.output_names[i]):
+                                if s == t:
+                                    new_values.append(np.array(v[:,j]))
+                                    new_data.append(np.array(self.data[i]))
+                                    new_base_values.append(self.base_values[i][j])
+
+                        new_self = Explanation(
+                            np.array(new_values),
+                            np.array(new_base_values),
+                            np.array(new_data),
+                            self.display_data,
+                            self.instance_names,
+                            np.array(new_data),
+                            t, # output_names
+                            self.output_indexes,
+                            self.lower_bounds,
+                            self.upper_bounds,
+                            self.error_std,
+                            self.main_effects,
+                            self.hierarchical_values,
+                            self.clustering
+                        )
+                        new_self.op_history = copy.copy(self.op_history)
+                        # new_self = copy.deepcopy(self)
+                        # new_self.values = np.array(new_values)
+                        # new_self.base_values = np.array(new_base_values)
+                        # new_self.data = np.array(new_data)
+                        # new_self.output_names = t
+                        # new_self.feature_names = np.array(new_data)
+                        # new_self.clustering = None
+
+                # work around for 2D feature_names since they are not yet slicer supported
+                feature_names_dims = []
+                if "feature_names" in self._s._objects:
+                    feature_names_dims = self._s._objects["feature_names"].dim
+                if pos != 0 and pos in feature_names_dims and len(feature_names_dims) == 2:
+                    new_values = []
+                    new_data = []
+                    for i, val_i in enumerate(self.values):
+                        for s,v,d in zip(self.feature_names[i], val_i, self.data[i]):
+                            if s == t:
+                                new_values.append(v)
+                                new_data.append(d)
+                    new_self = copy.deepcopy(self)
+                    new_self.values = new_values
+                    new_self.data = new_data
+                    new_self.feature_names = t
+                    new_self.clustering = None
+                    # return new_self
+
+            if issubclass(type(t), (np.int8, np.int16, np.int32, np.int64)):
+                t = int(t)
+
+            if t is not orig_t:
+                tmp = list(item)
+                tmp[pos] = t
+                item = tuple(tmp)
+
+        # call slicer for the real work
+        item = tuple(v for v in item) # SML I cut out: `if not isinstance(v, str)`
+        if len(item) == 0:
+            return new_self
+        if new_self is None:
+            new_self = copy.copy(self)
+        new_self._s = new_self._s.__getitem__(item)
+        new_self.op_history.append({
+            "name": "__getitem__",
+            "args": (item,),
+            "prev_shape": self.shape
+        })
+
+        return new_self
+
+    def __len__(self):
+        return self.shape[0]
+
+    def __copy__(self):
+        new_exp = Explanation(
+            self.values,
+            self.base_values,
+            self.data,
+            self.display_data,
+            self.instance_names,
+            self.feature_names,
+            self.output_names,
+            self.output_indexes,
+            self.lower_bounds,
+            self.upper_bounds,
+            self.error_std,
+            self.main_effects,
+            self.hierarchical_values,
+            self.clustering
+        )
+        new_exp.op_history = copy.copy(self.op_history)
+        return new_exp
+
+    def _apply_binary_operator(self, other, binary_op, op_name):
+        new_exp = self.__copy__()
+        new_exp.op_history = copy.copy(self.op_history)
+        new_exp.op_history.append({
+            "name": op_name,
+            "args": (other,),
+            "prev_shape": self.shape
+        })
+        if isinstance(other, Explanation):
+            new_exp.values = binary_op(new_exp.values, other.values)
+            if new_exp.data is not None:
+                new_exp.data = binary_op(new_exp.data, other.data)
+            if new_exp.base_values is not None:
+                new_exp.base_values = binary_op(new_exp.base_values, other.base_values)
+        else:
+            new_exp.values = binary_op(new_exp.values, other)
+            if new_exp.data is not None:
+                new_exp.data = binary_op(new_exp.data, other)
+            if new_exp.base_values is not None:
+                new_exp.base_values = binary_op(new_exp.base_values, other)
+        return new_exp
+
+    def __add__(self, other):
+        return self._apply_binary_operator(other, operator.add, "__add__")
+
+    def __radd__(self, other):
+        return self._apply_binary_operator(other, operator.add, "__add__")
+
+    def __sub__(self, other):
+        return self._apply_binary_operator(other, operator.sub, "__sub__")
+
+    def __rsub__(self, other):
+        return self._apply_binary_operator(other, operator.sub, "__sub__")
+
+    def __mul__(self, other):
+        return self._apply_binary_operator(other, operator.mul, "__mul__")
+
+    def __rmul__(self, other):
+        return self._apply_binary_operator(other, operator.mul, "__mul__")
+
+    def __truediv__(self, other):
+        return self._apply_binary_operator(other, operator.truediv, "__truediv__")
+
+    # @property
+    # def abs(self):
+    #     """ Element-size absolute value operator.
+    #     """
+    #     new_self = copy.copy(self)
+    #     new_self.values = np.abs(new_self.values)
+    #     new_self.op_history.append({
+    #         "name": "abs",
+    #         "prev_shape": self.shape
+    #     })
+    #     return new_self
+
+    def _numpy_func(self, fname, **kwargs):
+        """ Apply a numpy-style function to this Explanation.
+        """
+        new_self = copy.copy(self)
+        axis = kwargs.get("axis", None)
+
+        # collapse the slicer to right shape
+        if axis == 0:
+            new_self = new_self[0]
+        elif axis == 1:
+            new_self = new_self[1]
+        elif axis == 2:
+            new_self = new_self[2]
+        if axis in [0,1,2]:
+            new_self.op_history = new_self.op_history[:-1] # pop off the slicing operation we just used
+
+        if self.feature_names is not None and not is_1d(self.feature_names) and axis == 0:
+            new_values = self._flatten_feature_names()
+            new_self.feature_names = np.array(list(new_values.keys()))
+            new_self.values = np.array([getattr(np, fname)(v,0) for v in new_values.values()])
+            new_self.clustering = None
+        else:
+            new_self.values = getattr(np, fname)(np.array(self.values), **kwargs)
+            if new_self.data is not None:
+                try:
+                    new_self.data = getattr(np, fname)(np.array(self.data), **kwargs)
+                except Exception:
+                    new_self.data = None
+            if new_self.base_values is not None and issubclass(type(axis), int) and len(self.base_values.shape) > axis:
+                new_self.base_values = getattr(np, fname)(self.base_values, **kwargs)
+            elif issubclass(type(axis), int):
+                new_self.base_values = None
+
+        if axis == 0 and self.clustering is not None and len(self.clustering.shape) == 3:
+            if self.clustering.std(0).sum() < 1e-8:
+                new_self.clustering = self.clustering[0]
+            else:
+                new_self.clustering = None
+
+        new_self.op_history.append({
+            "name": fname,
+            "kwargs": kwargs,
+            "prev_shape": self.shape,
+            "collapsed_instances": axis == 0
+        })
+
+        return new_self
+
+    def mean(self, axis):
+        """ Numpy-style mean function.
+        """
+        return self._numpy_func("mean", axis=axis)
+
+    def max(self, axis):
+        """ Numpy-style mean function.
+        """
+        return self._numpy_func("max", axis=axis)
+
+    def min(self, axis):
+        """ Numpy-style mean function.
+        """
+        return self._numpy_func("min", axis=axis)
+
+    def sum(self, axis=None, grouping=None):
+        """ Numpy-style mean function.
+        """
+        if grouping is None:
+            return self._numpy_func("sum", axis=axis)
+        elif axis == 1 or len(self.shape) == 1:
+            return group_features(self, grouping)
+        else:
+            raise DimensionError("Only axis = 1 is supported for grouping right now...")
+
+    def hstack(self, other):
+        """ Stack two explanations column-wise.
+        """
+        assert self.shape[0] == other.shape[0], "Can't hstack explanations with different numbers of rows!"
+        assert np.max(np.abs(self.base_values - other.base_values)) < 1e-6, "Can't hstack explanations with different base values!"
+
+        new_exp = Explanation(
+            values=np.hstack([self.values, other.values]),
+            base_values=self.base_values,
+            data=self.data,
+            display_data=self.display_data,
+            instance_names=self.instance_names,
+            feature_names=self.feature_names,
+            output_names=self.output_names,
+            output_indexes=self.output_indexes,
+            lower_bounds=self.lower_bounds,
+            upper_bounds=self.upper_bounds,
+            error_std=self.error_std,
+            main_effects=self.main_effects,
+            hierarchical_values=self.hierarchical_values,
+            clustering=self.clustering,
+        )
+        return new_exp
+
+    # def reshape(self, *args):
+    #     return self._numpy_func("reshape", newshape=args)
+
+    @property
+    def abs(self):
+        return self._numpy_func("abs")
+
+    @property
+    def identity(self):
+        return self
+
+    @property
+    def argsort(self):
+        return self._numpy_func("argsort")
+
+    @property
+    def flip(self):
+        return self._numpy_func("flip")
+
+
+    def hclust(self, metric="sqeuclidean", axis=0):
+        """ Computes an optimal leaf ordering sort order using hclustering.
+
+        hclust(metric="sqeuclidean")
+
+        Parameters
+        ----------
+        metric : string
+            A metric supported by scipy clustering.
+
+        axis : int
+            The axis to cluster along.
+        """
+        values = self.values
+
+        if len(values.shape) != 2:
+            raise DimensionError("The hclust order only supports 2D arrays right now!")
+
+        if axis == 1:
+            values = values.T
+
+        # compute a hierarchical clustering and return the optimal leaf ordering
+        D = scipy.spatial.distance.pdist(values, metric)
+        cluster_matrix = scipy.cluster.hierarchy.complete(D)
+        inds = scipy.cluster.hierarchy.leaves_list(scipy.cluster.hierarchy.optimal_leaf_ordering(cluster_matrix, D))
+        return inds
+
+    def sample(self, max_samples, replace=False, random_state=0):
+        """ Randomly samples the instances (rows) of the Explanation object.
+
+        Parameters
+        ----------
+        max_samples : int
+            The number of rows to sample. Note that if replace=False then less than
+            fewer than max_samples will be drawn if explanation.shape[0] < max_samples.
+
+        replace : bool
+            Sample with or without replacement.
+        """
+        prev_seed = np.random.seed(random_state)
+        inds = np.random.choice(self.shape[0], min(max_samples, self.shape[0]), replace=replace)
+        np.random.seed(prev_seed)
+        return self[list(inds)]
+
+    def _flatten_feature_names(self):
+        new_values = {}
+        for i in range(len(self.values)):
+            for s,v in zip(self.feature_names[i], self.values[i]):
+                if s not in new_values:
+                    new_values[s] = []
+                new_values[s].append(v)
+        return new_values
+
+    def _use_data_as_feature_names(self):
+        new_values = {}
+        for i in range(len(self.values)):
+            for s,v in zip(self.data[i], self.values[i]):
+                if s not in new_values:
+                    new_values[s] = []
+                new_values[s].append(v)
+        return new_values
+
+    def percentile(self, q, axis=None):
+        new_self = copy.deepcopy(self)
+        if self.feature_names is not None and not is_1d(self.feature_names) and axis == 0:
+            new_values = self._flatten_feature_names()
+            new_self.feature_names = np.array(list(new_values.keys()))
+            new_self.values = np.array([np.percentile(v, q) for v in new_values.values()])
+            new_self.clustering = None
+        else:
+            new_self.values = np.percentile(new_self.values, q, axis)
+            new_self.data = np.percentile(new_self.data, q, axis)
+        #new_self.data = None
+        new_self.op_history.append({
+            "name": "percentile",
+            "args": (axis,),
+            "prev_shape": self.shape,
+            "collapsed_instances": axis == 0
+        })
+        return new_self
+
+def group_features(shap_values, feature_map):
+    # TODOsomeday: support and deal with clusterings
+    reverse_map = {}
+    for name in feature_map:
+        reverse_map[feature_map[name]] = reverse_map.get(feature_map[name], []) + [name]
+
+    curr_names = shap_values.feature_names
+    sv_new = copy.deepcopy(shap_values)
+    found = {}
+    i = 0
+    rank1 = len(shap_values.shape) == 1
+    for name in curr_names:
+        new_name = feature_map.get(name, name)
+        if new_name in found:
+            continue
+        found[new_name] = True
+
+        new_name = feature_map.get(name, name)
+        cols_to_sum = reverse_map.get(new_name, [new_name])
+        old_inds = [curr_names.index(v) for v in cols_to_sum]
+
+        if rank1:
+            sv_new.values[i] = shap_values.values[old_inds].sum()
+            sv_new.data[i] = shap_values.data[old_inds].sum()
+        else:
+            sv_new.values[:,i] = shap_values.values[:,old_inds].sum(1)
+            sv_new.data[:,i] = shap_values.data[:,old_inds].sum(1)
+        sv_new.feature_names[i] = new_name
+        i += 1
+
+    return Explanation(
+        sv_new.values[:i] if rank1 else sv_new.values[:,:i],
+        base_values = sv_new.base_values,
+        data = sv_new.data[:i] if rank1 else sv_new.data[:,:i],
+        display_data = None if sv_new.display_data is None else (sv_new.display_data[:,:i] if rank1 else sv_new.display_data[:,:i]),
+        instance_names = None,
+        feature_names = None if sv_new.feature_names is None else sv_new.feature_names[:i],
+        output_names = None,
+        output_indexes = None,
+        lower_bounds = None,
+        upper_bounds = None,
+        error_std = None,
+        main_effects = None,
+        hierarchical_values = None,
+        clustering = None
+    )
+
+def compute_output_dims(values, base_values, data, output_names):
+    """ Uses the passed data to infer which dimensions correspond to the model's output.
+    """
+    values_shape = _compute_shape(values)
+
+    # input shape matches the data shape
+    if data is not None:
+        data_shape = _compute_shape(data)
+
+    # if we are not given any data we assume it would be the same shape as the given values
+    else:
+        data_shape = values_shape
+
+    # output shape is known from the base values or output names
+    if output_names is not None:
+        output_shape = _compute_shape(output_names)
+
+        # if our output_names are per sample then we need to drop the sample dimension here
+        if values_shape[-len(output_shape):] != output_shape and \
+                values_shape[-len(output_shape)+1:] == output_shape[1:] and values_shape[0] == output_shape[0]:
+            output_shape = output_shape[1:]
+
+    elif base_values is not None:
+        output_shape = _compute_shape(base_values)[1:]
+    else:
+        output_shape = tuple()
+
+    interaction_order = len(values_shape) - len(data_shape) - len(output_shape)
+    output_dims = range(len(data_shape) + interaction_order, len(values_shape))
+    return tuple(output_dims)
+
+def is_1d(val):
+    return not (isinstance(val[0], list) or isinstance(val[0], np.ndarray))
+
+class Op:
+    pass
+
+class Percentile(Op):
+    def __init__(self, percentile):
+        self.percentile = percentile
+
+    def add_repr(self, s, verbose=False):
+        return "percentile("+s+", "+str(self.percentile)+")"
+
+def _first_item(x):
+    for item in x:
+        return item
+    return None
+
+def _compute_shape(x):
+    if not hasattr(x, "__len__") or isinstance(x, str):
+        return tuple()
+    elif not scipy.sparse.issparse(x) and len(x) > 0 and isinstance(_first_item(x), str):
+        return (None,)
+    else:
+        if isinstance(x, dict):
+            return (len(x),) + _compute_shape(x[next(iter(x))])
+
+        # 2D arrays we just take their shape as-is
+        if len(getattr(x, "shape", tuple())) > 1:
+            return x.shape
+
+        # 1D arrays we need to look inside
+        if len(x) == 0:
+            return (0,)
+        elif len(x) == 1:
+            return (1,) + _compute_shape(_first_item(x))
+        else:
+            first_shape = _compute_shape(_first_item(x))
+            if first_shape == tuple():
+                return (len(x),)
+            else: # we have an array of arrays...
+                matches = np.ones(len(first_shape), dtype=bool)
+                for i in range(1, len(x)):
+                    shape = _compute_shape(x[i])
+                    assert len(shape) == len(first_shape), "Arrays in Explanation objects must have consistent inner dimensions!"
+                    for j in range(0, len(shape)):
+                        matches[j] &= shape[j] == first_shape[j]
+                return (len(x),) + tuple(first_shape[j] if match else None for j, match in enumerate(matches))
+
+class Cohorts:
+    def __init__(self, **kwargs):
+        self.cohorts = kwargs
+        for k in self.cohorts:
+            assert isinstance(self.cohorts[k], Explanation), "All the arguments to a Cohorts set must be Explanation objects!"
+
+    def __getitem__(self, item):
+        new_cohorts = Cohorts()
+        for k in self.cohorts:
+            new_cohorts.cohorts[k] = self.cohorts[k].__getitem__(item)
+        return new_cohorts
+
+    def __getattr__(self, name):
+        new_cohorts = Cohorts()
+        for k in self.cohorts:
+            new_cohorts.cohorts[k] = getattr(self.cohorts[k], name)
+        return new_cohorts
+
+    def __call__(self, *args, **kwargs):
+        new_cohorts = Cohorts()
+        for k in self.cohorts:
+            new_cohorts.cohorts[k] = self.cohorts[k].__call__(*args, **kwargs)
+        return new_cohorts
+
+    def __repr__(self):
+        return f"<shap._explanation.Cohorts object with {len(self.cohorts)} cohorts of sizes: {[v.shape for v in self.cohorts.values()]}>"
+
+
+def _auto_cohorts(shap_values, max_cohorts):
+    """ This uses a DecisionTreeRegressor to build a group of cohorts with similar SHAP values.
+    """
+
+    # fit a decision tree that well separates the SHAP values
+    m = sklearn.tree.DecisionTreeRegressor(max_leaf_nodes=max_cohorts)
+    m.fit(shap_values.data, shap_values.values)
+
+    # group instances by their decision paths
+    paths = m.decision_path(shap_values.data).toarray()
+    path_names = []
+
+    # mark each instance with a path name
+    for i in range(shap_values.shape[0]):
+        name = ""
+        for j in range(len(paths[i])):
+            if paths[i,j] > 0:
+                feature = m.tree_.feature[j]
+                threshold = m.tree_.threshold[j]
+                val = shap_values.data[i,feature]
+                if feature >= 0:
+                    name += str(shap_values.feature_names[feature])
+                    if val < threshold:
+                        name += " < "
+                    else:
+                        name += " >= "
+                    name += str(threshold) + " & "
+        path_names.append(name[:-3]) # the -3 strips off the last unneeded ' & '
+    path_names = np.array(path_names)
+
+    # split the instances into cohorts by their path names
+    cohorts = {}
+    for name in np.unique(path_names):
+        cohorts[name] = shap_values[path_names == name]
+
+    return Cohorts(**cohorts)
+
+def list_wrap(x):
+    """ A helper to patch things since slicer doesn't handle arrays of arrays (it does handle lists of arrays)
+    """
+    if isinstance(x, np.ndarray) and len(x.shape) == 1 and isinstance(x[0], np.ndarray):
+        return [v for v in x]
+    else:
+        return x
diff --git a/lib/shap/_serializable.py b/lib/shap/_serializable.py
new file mode 100644
index 0000000000000000000000000000000000000000..d9dbde3881d5ad3addebad7f721889192acfa97d
--- /dev/null
+++ b/lib/shap/_serializable.py
@@ -0,0 +1,204 @@
+
+import inspect
+import logging
+import pickle
+
+import cloudpickle
+import numpy as np
+
+log = logging.getLogger('shap')
+
+class Serializable:
+    """ This is the superclass of all serializable objects.
+    """
+
+    def save(self, out_file):
+        """ Save the model to the given file stream.
+        """
+        pickle.dump(type(self), out_file)
+
+    @classmethod
+    def load(cls, in_file, instantiate=True):
+        """ This is meant to be overridden by subclasses and called with super.
+
+        We return constructor argument values when not being instantiated. Since there are no
+        constructor arguments for the Serializable class we just return an empty dictionary.
+        """
+        if instantiate:
+            return cls._instantiated_load(in_file)
+        return {}
+
+    @classmethod
+    def _instantiated_load(cls, in_file, **kwargs):
+        """ This is meant to be overridden by subclasses and called with super.
+
+        We return constructor argument values (we have no values to load in this abstract class).
+        """
+        obj_type = pickle.load(in_file)
+        if obj_type is None:
+            return None
+
+        if not inspect.isclass(obj_type) or (not issubclass(obj_type, cls) and (obj_type is not cls)):
+            raise Exception(f"Invalid object type loaded from file. {obj_type} is not a subclass of {cls}.")
+
+        # here we call the constructor with all the arguments we have loaded
+        constructor_args = obj_type.load(in_file, instantiate=False, **kwargs)
+        used_args = inspect.getfullargspec(obj_type.__init__)[0]
+        return obj_type(**{k: constructor_args[k] for k in constructor_args if k in used_args})
+
+
+class Serializer:
+    """ Save data items to an input stream.
+    """
+    def __init__(self, out_stream, block_name, version):
+        self.out_stream = out_stream
+        self.block_name = block_name
+        self.block_version = version
+        self.serializer_version = 0 # update this when the serializer changes
+
+    def __enter__(self):
+        log.debug("serializer_version = %d", self.serializer_version)
+        pickle.dump(self.serializer_version, self.out_stream)
+        log.debug("block_name = %s", self.block_name)
+        pickle.dump(self.block_name, self.out_stream)
+        log.debug("block_version = %d", self.block_version)
+        pickle.dump(self.block_version, self.out_stream)
+        return self
+
+    def __exit__(self, exception_type, exception_value, traceback):
+        log.debug("END_BLOCK___")
+        pickle.dump("END_BLOCK___", self.out_stream)
+
+    def save(self, name, value, encoder="auto"):
+        """ Dump a data item to the current input stream.
+        """
+        log.debug("name = %s", name)
+        pickle.dump(name, self.out_stream)
+        if encoder is None or encoder is False:
+            log.debug("encoder_name = %s", "no_encoder")
+            pickle.dump("no_encoder", self.out_stream)
+        elif callable(encoder):
+            log.debug("encoder_name = %s", "custom_encoder")
+            pickle.dump("custom_encoder", self.out_stream)
+            encoder(value, self.out_stream)
+        elif encoder == ".save" or (isinstance(value, Serializable) and encoder == "auto"):
+            log.debug("encoder_name = %s", "serializable.save")
+            pickle.dump("serializable.save", self.out_stream)
+            if len(inspect.getfullargspec(value.save)[0]) == 3: # backward compat for MLflow, can remove 4/1/2021
+                value.save(self.out_stream, value)
+            else:
+                value.save(self.out_stream)
+        elif encoder == "auto":
+            if isinstance(value, (int, float, str)):
+                log.debug("encoder_name = %s", "pickle.dump")
+                pickle.dump("pickle.dump", self.out_stream)
+                pickle.dump(value, self.out_stream)
+            else:
+                log.debug("encoder_name = %s", "cloudpickle.dump")
+                pickle.dump("cloudpickle.dump", self.out_stream)
+                cloudpickle.dump(value, self.out_stream)
+        else:
+            raise ValueError(f"Unknown encoder type '{encoder}' given for serialization!")
+        log.debug("value = %s", str(value))
+
+class Deserializer:
+    """ Load data items from an input stream.
+    """
+
+    def __init__(self, in_stream, block_name, min_version, max_version):
+        self.in_stream = in_stream
+        self.block_name = block_name
+        self.block_min_version = min_version
+        self.block_max_version = max_version
+
+        # update these when the serializer changes
+        self.serializer_min_version = 0
+        self.serializer_max_version = 0
+
+    def __enter__(self):
+
+        # confirm the serializer version
+        serializer_version = pickle.load(self.in_stream)
+        log.debug("serializer_version = %d", serializer_version)
+        if serializer_version < self.serializer_min_version:
+            raise ValueError(
+                f"The file being loaded was saved with a serializer version of {serializer_version}, " + \
+                f"but the current deserializer in SHAP requires at least version {self.serializer_min_version}."
+            )
+        if serializer_version > self.serializer_max_version:
+            raise ValueError(
+                f"The file being loaded was saved with a serializer version of {serializer_version}, " + \
+                f"but the current deserializer in SHAP only support up to version {self.serializer_max_version}."
+            )
+
+        # confirm the block name
+        block_name = pickle.load(self.in_stream)
+        log.debug("block_name = %s", block_name)
+        if block_name != self.block_name:
+            raise ValueError(
+                f"The next data block in the file being loaded was supposed to be {self.block_name}, " + \
+                f"but the next block found was {block_name}."
+            )
+
+        # confirm the block version
+        block_version = pickle.load(self.in_stream)
+        log.debug("block_version = %d", block_version)
+        if block_version < self.block_min_version:
+            raise ValueError(
+                f"The file being loaded was saved with a block version of {block_version}, " + \
+                f"but the current deserializer in SHAP requires at least version {self.block_min_version}."
+            )
+        if block_version > self.block_max_version:
+            raise ValueError(
+                f"The file being loaded was saved with a block version of {block_version}, " + \
+                f"but the current deserializer in SHAP only support up to version {self.block_max_version}."
+            )
+        return self
+
+    def __exit__(self, exception_type, exception_value, traceback):
+        # confirm the block end token
+        for _ in range(100):
+            end_token = pickle.load(self.in_stream)
+            log.debug("end_token = %s", end_token)
+            if end_token == "END_BLOCK___":
+                return
+            self._load_data_value()
+        raise ValueError(
+            f"The data block end token wsa not found for the block {self.block_name}."
+        )
+
+    def load(self, name, decoder=None):
+        """ Load a data item from the current input stream.
+        """
+        # confirm the block name
+        loaded_name = pickle.load(self.in_stream)
+        log.debug("loaded_name = %s", loaded_name)
+        print("loaded_name", loaded_name)
+        if loaded_name != name:
+            raise ValueError(
+                f"The next data item in the file being loaded was supposed to be {name}, " + \
+                f"but the next block found was {loaded_name}."
+            ) # We should eventually add support for skipping over unused data items in old formats...
+
+        value = self._load_data_value(decoder)
+        log.debug("value = %s", str(value))
+        return value
+
+    def _load_data_value(self, decoder=None):
+        encoder_name = pickle.load(self.in_stream)
+        log.debug("encoder_name = %s", encoder_name)
+        if encoder_name == "custom_encoder" or callable(decoder):
+            assert callable(decoder), "You must provide a callable custom decoder for the data item {name}!"
+            return decoder(self.in_stream)
+        if encoder_name == "no_encoder":
+            return None
+        if encoder_name == "serializable.save":
+            return Serializable.load(self.in_stream)
+        if encoder_name == "numpy.save":
+            return np.load(self.in_stream)
+        if encoder_name == "pickle.dump":
+            return pickle.load(self.in_stream)
+        if encoder_name == "cloudpickle.dump":
+            return cloudpickle.load(self.in_stream)
+
+        raise ValueError(f"Unsupported encoder type found: {encoder_name}")
diff --git a/lib/shap/_version.py b/lib/shap/_version.py
new file mode 100644
index 0000000000000000000000000000000000000000..c1324f87db53df2a3c353bed6a1f3de7f411afff
--- /dev/null
+++ b/lib/shap/_version.py
@@ -0,0 +1,16 @@
+# file generated by setuptools_scm
+# don't change, don't track in version control
+TYPE_CHECKING = False
+if TYPE_CHECKING:
+    from typing import Tuple, Union
+    VERSION_TUPLE = Tuple[Union[int, str], ...]
+else:
+    VERSION_TUPLE = object
+
+version: str
+__version__: str
+__version_tuple__: VERSION_TUPLE
+version_tuple: VERSION_TUPLE
+
+__version__ = version = '0.44.1'
+__version_tuple__ = version_tuple = (0, 44, 1)
diff --git a/lib/shap/actions/__init__.py b/lib/shap/actions/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..b2b449ca842bab7e94add7dd4d2a3d386644293b
--- /dev/null
+++ b/lib/shap/actions/__init__.py
@@ -0,0 +1,3 @@
+from ._action import Action
+
+__all__ = ["Action"]
diff --git a/lib/shap/actions/_action.py b/lib/shap/actions/_action.py
new file mode 100644
index 0000000000000000000000000000000000000000..6339e0c105e607226f62a64748ef2c8c124b77bc
--- /dev/null
+++ b/lib/shap/actions/_action.py
@@ -0,0 +1,8 @@
+class Action:
+    """ Abstract action class.
+    """
+    def __lt__(self, other_action):
+        return self.cost < other_action.cost
+
+    def __repr__(self):
+        return f"<Action '{self.__str__()}'>"
diff --git a/lib/shap/actions/_optimizer.py b/lib/shap/actions/_optimizer.py
new file mode 100644
index 0000000000000000000000000000000000000000..752af3ff286b6717cf5c1d673cec3ea53583281b
--- /dev/null
+++ b/lib/shap/actions/_optimizer.py
@@ -0,0 +1,92 @@
+import copy
+import queue
+import warnings
+
+from ..utils._exceptions import ConvergenceError, InvalidAction
+from ._action import Action
+
+
+class ActionOptimizer:
+    def __init__(self, model, actions):
+        self.model = model
+        warnings.warn(
+            "Note that ActionOptimizer is still in an alpha state and is subjust to API changes."
+        )
+        # actions go into mutually exclusive groups
+        self.action_groups = []
+        for group in actions:
+
+            if issubclass(type(group), Action):
+                group._group_index = len(self.action_groups)
+                group._grouped_index = 0
+                self.action_groups.append([copy.copy(group)])
+            elif issubclass(type(group), list):
+                group = sorted([copy.copy(v) for v in group], key=lambda a: a.cost)
+                for i, v in enumerate(group):
+                    v._group_index = len(self.action_groups)
+                    v._grouped_index = i
+                self.action_groups.append(group)
+            else:
+                raise InvalidAction(
+                    "A passed action was not an Action or list of actions!"
+                )
+
+    def __call__(self, *args, max_evals=10000):
+
+        # init our queue with all the least costly actions
+        q = queue.PriorityQueue()
+        for i in range(len(self.action_groups)):
+            group = self.action_groups[i]
+            q.put((group[0].cost, [group[0]]))
+
+        nevals = 0
+        while not q.empty():
+
+            # see if we have exceeded our runtime budget
+            nevals += 1
+            if nevals > max_evals:
+                raise ConvergenceError(
+                    f"Failed to find a solution with max_evals={max_evals}! Try reducing the number of actions or increasing max_evals."
+                )
+
+            # get the next cheapest set of actions we can do
+            cost, actions = q.get()
+
+            # apply those actions
+            args_tmp = copy.deepcopy(args)
+            for a in actions:
+                a(*args_tmp)
+
+            # if the model is now satisfied we are done!!
+            v = self.model(*args_tmp)
+            if v:
+                return actions
+
+            # if not then we add all possible follow-on actions to our queue
+            else:
+                for i in range(len(self.action_groups)):
+                    group = self.action_groups[i]
+
+                    # look to to see if we already have a action from this group, if so we need to
+                    # move to a more expensive action in the same group
+                    next_ind = 0
+                    prev_in_group = -1
+                    for j, a in enumerate(actions):
+                        if a._group_index == i:
+                            next_ind = max(next_ind, a._grouped_index + 1)
+                            prev_in_group = j
+
+                    # we are adding a new action type
+                    if prev_in_group == -1:
+                        new_actions = actions + [group[next_ind]]
+                    # we are moving from one action to a more expensive one in the same group
+                    elif next_ind < len(group):
+                        new_actions = copy.copy(actions)
+                        new_actions[prev_in_group] = group[next_ind]
+                    # we don't have a more expensive action left in this group
+                    else:
+                        new_actions = None
+
+                    # add the new option to our queue
+                    if new_actions is not None:
+                        q.put((sum([a.cost for a in new_actions]), new_actions))
diff --git a/lib/shap/benchmark/__init__.py b/lib/shap/benchmark/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..a1669d7d9244e7f91d53c0798a6050f55ff03f4f
--- /dev/null
+++ b/lib/shap/benchmark/__init__.py
@@ -0,0 +1,9 @@
+from ._compute import ComputeTime
+from ._explanation_error import ExplanationError
+from ._result import BenchmarkResult
+from ._sequential import SequentialMasker
+
+# from . import framework
+# from .. import datasets
+
+__all__ = ["ComputeTime", "ExplanationError", "BenchmarkResult", "SequentialMasker"]
diff --git a/lib/shap/benchmark/_compute.py b/lib/shap/benchmark/_compute.py
new file mode 100644
index 0000000000000000000000000000000000000000..ab46ca5d195dc833f040d84dae3f1b5daca36ee8
--- /dev/null
+++ b/lib/shap/benchmark/_compute.py
@@ -0,0 +1,9 @@
+from ._result import BenchmarkResult
+
+
+class ComputeTime:
+    """ Extracts a runtime benchmark result from the passed Explanation.
+    """
+
+    def __call__(self, explanation, name):
+        return BenchmarkResult("compute time", name, value=explanation.compute_time / explanation.shape[0])
diff --git a/lib/shap/benchmark/_explanation_error.py b/lib/shap/benchmark/_explanation_error.py
new file mode 100644
index 0000000000000000000000000000000000000000..d325adcfe5abaea6bd0efa72f8dab23ebab8b157
--- /dev/null
+++ b/lib/shap/benchmark/_explanation_error.py
@@ -0,0 +1,181 @@
+import time
+
+import numpy as np
+from tqdm.auto import tqdm
+
+from shap import Explanation, links
+from shap.maskers import FixedComposite, Image, Text
+from shap.utils import MaskedModel, partition_tree_shuffle
+from shap.utils._exceptions import DimensionError
+
+from ._result import BenchmarkResult
+
+
+class ExplanationError:
+    """ A measure of the explanation error relative to a model's actual output.
+
+    This benchmark metric measures the discrepancy between the output of the model predicted by an
+    attribution explanation vs. the actual output of the model. This discrepancy is measured over
+    many masking patterns drawn from permutations of the input features.
+
+    For explanations (like Shapley values) that explain the difference between one alternative and another
+    (for example a current sample and typical background feature values) there is possible explanation error
+    for every pattern of mixing foreground and background, or other words every possible masking pattern.
+    In this class we compute the standard deviation over these explanation errors where masking patterns
+    are drawn from prefixes of random feature permutations. This seems natural, and aligns with Shapley value
+    computations, but of course you could choose to summarize explanation errors in others ways as well.
+    """
+
+    def __init__(self, masker, model, *model_args, batch_size=500, num_permutations=10, link=links.identity, linearize_link=True, seed=38923):
+        """ Build a new explanation error benchmarker with the given masker, model, and model args.
+
+        Parameters
+        ----------
+        masker : function or shap.Masker
+            The masker defines how we hide features during the perturbation process.
+
+        model : function or shap.Model
+            The model we want to evaluate explanations against.
+
+        model_args : ...
+            The list of arguments we will give to the model that we will have explained. When we later call this benchmark
+            object we should pass explanations that have been computed on this same data.
+
+        batch_size : int
+            The maximum batch size we should use when calling the model. For some large NLP models this needs to be set
+            lower (at say 1) to avoid running out of GPU memory.
+
+        num_permutations : int
+            How many permutations we will use to estimate the average explanation error for each sample. If you are running
+            this benchmark on a large dataset with many samples then you can reduce this value since the final result is
+            averaged over samples as well and the averages of both directly combine to reduce variance. So for 10k samples
+            num_permutations=1 is appropreiate.
+
+        link : function
+            Allows for a non-linear link function to be used to bringe between the model output space and the explanation
+            space.
+
+        linearize_link : bool
+            Non-linear links can destroy additive separation in generalized linear models, so by linearizing the link we can
+            retain additive separation. See upcoming paper/doc for details.
+        """
+
+        self.masker = masker
+        self.model = model
+        self.model_args = model_args
+        self.num_permutations = num_permutations
+        self.link = link
+        self.linearize_link = linearize_link
+        self.model_args = model_args
+        self.batch_size = batch_size
+        self.seed = seed
+
+        # user must give valid masker
+        underlying_masker = masker.masker if isinstance(masker, FixedComposite) else masker
+        if isinstance(underlying_masker, Text):
+            self.data_type = "text"
+        elif isinstance(underlying_masker, Image):
+            self.data_type = "image"
+        else:
+            self.data_type = "tabular"
+
+    def __call__(self, explanation, name, step_fraction=0.01, indices=[], silent=False):
+        """ Run this benchmark on the given explanation.
+        """
+
+        if isinstance(explanation, np.ndarray):
+            attributions = explanation
+        elif isinstance(explanation, Explanation):
+            attributions = explanation.values
+        else:
+            raise ValueError("The passed explanation must be either of type numpy.ndarray or shap.Explanation!")
+
+        if len(attributions) != len(self.model_args[0]):
+            emsg = (
+                "The explanation passed must have the same number of rows as "
+                "the self.model_args that were passed!"
+            )
+            raise DimensionError(emsg)
+
+        # it is important that we choose the same permutations for the different explanations we are comparing
+        # so as to avoid needless noise
+        old_seed = np.random.seed()
+        np.random.seed(self.seed)
+
+        pbar = None
+        start_time = time.time()
+        svals = []
+        mask_vals = []
+
+        for i, args in enumerate(zip(*self.model_args)):
+
+            if len(args[0].shape) != len(attributions[i].shape):
+                raise ValueError("The passed explanation must have the same dim as the model_args and must not have a vector output!")
+
+            feature_size = np.prod(attributions[i].shape)
+            sample_attributions = attributions[i].flatten()
+
+            # compute any custom clustering for this row
+            row_clustering = None
+            if getattr(self.masker, "clustering", None) is not None:
+                if isinstance(self.masker.clustering, np.ndarray):
+                    row_clustering = self.masker.clustering
+                elif callable(self.masker.clustering):
+                    row_clustering = self.masker.clustering(*args)
+                else:
+                    raise NotImplementedError("The masker passed has a .clustering attribute that is not yet supported by the ExplanationError benchmark!")
+
+            masked_model = MaskedModel(self.model, self.masker, self.link, self.linearize_link, *args)
+
+            total_values = None
+            for _ in range(self.num_permutations):
+                masks = []
+                mask = np.zeros(feature_size, dtype=bool)
+                masks.append(mask.copy())
+                ordered_inds = np.arange(feature_size)
+
+                # shuffle the indexes so we get a random permutation ordering
+                if row_clustering is not None:
+                    inds_mask = np.ones(feature_size, dtype=bool)
+                    partition_tree_shuffle(ordered_inds, inds_mask, row_clustering)
+                else:
+                    np.random.shuffle(ordered_inds)
+
+                increment = max(1, int(feature_size * step_fraction))
+                for j in range(0, feature_size, increment):
+                    mask[ordered_inds[np.arange(j, min(feature_size, j+increment))]] = True
+                    masks.append(mask.copy())
+                mask_vals.append(masks)
+
+                values = []
+                masks_arr = np.array(masks)
+                for j in range(0, len(masks_arr), self.batch_size):
+                    values.append(masked_model(masks_arr[j:j + self.batch_size]))
+                values = np.concatenate(values)
+                base_value = values[0]
+                for j, v in enumerate(values):
+                    values[j] = (v - (base_value + np.sum(sample_attributions[masks_arr[j]])))**2
+
+                if total_values is None:
+                    total_values = values
+                else:
+                    total_values += values
+            total_values /= self.num_permutations
+
+            svals.append(total_values)
+
+            if pbar is None and time.time() - start_time > 5:
+                pbar = tqdm(total=len(self.model_args[0]), disable=silent, leave=False, desc=f"ExplanationError for {name}")
+                pbar.update(i+1)
+            if pbar is not None:
+                pbar.update(1)
+
+        if pbar is not None:
+            pbar.close()
+
+        svals = np.array(svals)
+
+        # reset the random seed so we don't mess up the caller
+        np.random.seed(old_seed)
+
+        return BenchmarkResult("explanation error", name, value=np.sqrt(np.sum(total_values)/len(total_values)))
diff --git a/lib/shap/benchmark/_result.py b/lib/shap/benchmark/_result.py
new file mode 100644
index 0000000000000000000000000000000000000000..0c31e0f0fbd2fca82a56486ee707cc222daf487f
--- /dev/null
+++ b/lib/shap/benchmark/_result.py
@@ -0,0 +1,34 @@
+import numpy as np
+import sklearn
+
+sign_defaults = {
+    "keep positive": 1,
+    "keep negative": -1,
+    "remove positive": -1,
+    "remove negative": 1,
+    "compute time": -1,
+    "keep absolute": -1, # the absolute signs are defaults that make sense when scoring losses
+    "remove absolute": 1,
+    "explanation error": -1
+}
+
+class BenchmarkResult:
+    """ The result of a benchmark run.
+    """
+
+    def __init__(self, metric, method, value=None, curve_x=None, curve_y=None, curve_y_std=None, value_sign=None):
+        self.metric = metric
+        self.method = method
+        self.value = value
+        self.curve_x = curve_x
+        self.curve_y = curve_y
+        self.curve_y_std = curve_y_std
+        self.value_sign = value_sign
+        if self.value_sign is None and self.metric in sign_defaults:
+            self.value_sign = sign_defaults[self.metric]
+        if self.value is None:
+            self.value = sklearn.metrics.auc(curve_x, (np.array(curve_y) - curve_y[0]))
+
+    @property
+    def full_name(self):
+        return self.method + " " + self.metric
diff --git a/lib/shap/benchmark/_sequential.py b/lib/shap/benchmark/_sequential.py
new file mode 100644
index 0000000000000000000000000000000000000000..4c5eca2233518fceafc500575f5dbf74bc320304
--- /dev/null
+++ b/lib/shap/benchmark/_sequential.py
@@ -0,0 +1,332 @@
+import time
+
+import matplotlib.pyplot as pl
+import numpy as np
+import pandas as pd
+import sklearn
+from tqdm.auto import tqdm
+
+from shap import Explanation, links
+from shap.maskers import FixedComposite, Image, Text
+from shap.utils import MaskedModel
+
+from ._result import BenchmarkResult
+
+
+class SequentialMasker:
+    def __init__(self, mask_type, sort_order, masker, model, *model_args, batch_size=500):
+
+        for arg in model_args:
+            if isinstance(arg, pd.DataFrame):
+                raise TypeError("DataFrame arguments dont iterate correctly, pass numpy arrays instead!")
+
+        # convert any DataFrames to numpy arrays
+        # self.model_arg_cols = []
+        # self.model_args = []
+        # self.has_df = False
+        # for arg in model_args:
+        #     if isinstance(arg, pd.DataFrame):
+        #         self.model_arg_cols.append(arg.columns)
+        #         self.model_args.append(arg.values)
+        #         self.has_df = True
+        #     else:
+        #         self.model_arg_cols.append(None)
+        #         self.model_args.append(arg)
+
+        # if self.has_df:
+        #     given_model = model
+        #     def new_model(*args):
+        #         df_args = []
+        #         for i, arg in enumerate(args):
+        #             if self.model_arg_cols[i] is not None:
+        #                 df_args.append(pd.DataFrame(arg, columns=self.model_arg_cols[i]))
+        #             else:
+        #                 df_args.append(arg)
+        #         return given_model(*df_args)
+        #     model = new_model
+
+        self.inner = SequentialPerturbation(
+            model, masker, sort_order, mask_type
+        )
+        self.model_args = model_args
+        self.batch_size = batch_size
+
+    def __call__(self, explanation, name, **kwargs):
+        return self.inner(name, explanation, *self.model_args, batch_size=self.batch_size, **kwargs)
+
+class SequentialPerturbation:
+    def __init__(self, model, masker, sort_order, perturbation, linearize_link=False):
+        # self.f = lambda masked, x, index: model.predict(masked)
+        self.model = model if callable(model) else model.predict
+        self.masker = masker
+        self.sort_order = sort_order
+        self.perturbation = perturbation
+        self.linearize_link = linearize_link
+
+        # define our sort order
+        if self.sort_order == "positive":
+            self.sort_order_map = lambda x: np.argsort(-x)
+        elif self.sort_order == "negative":
+            self.sort_order_map = lambda x: np.argsort(x)
+        elif self.sort_order == "absolute":
+            self.sort_order_map = lambda x: np.argsort(-abs(x))
+        else:
+            raise ValueError("sort_order must be either \"positive\", \"negative\", or \"absolute\"!")
+
+        # user must give valid masker
+        underlying_masker = masker.masker if isinstance(masker, FixedComposite) else masker
+        if isinstance(underlying_masker, Text):
+            self.data_type = "text"
+        elif isinstance(underlying_masker, Image):
+            self.data_type = "image"
+        else:
+            self.data_type = "tabular"
+            #raise ValueError("masker must be for \"tabular\", \"text\", or \"image\"!")
+
+        self.score_values = []
+        self.score_aucs = []
+        self.labels = []
+
+    def __call__(self, name, explanation, *model_args, percent=0.01, indices=[], y=None, label=None, silent=False, debug_mode=False, batch_size=10):
+        # if explainer is already the attributions
+        if isinstance(explanation, np.ndarray):
+            attributions = explanation
+        elif isinstance(explanation, Explanation):
+            attributions = explanation.values
+        else:
+            raise ValueError("The passed explanation must be either of type numpy.ndarray or shap.Explanation!")
+
+        assert len(attributions) == len(model_args[0]), "The explanation passed must have the same number of rows as the model_args that were passed!"
+
+        if label is None:
+            label = "Score %d" % len(self.score_values)
+
+        # convert dataframes
+        # if isinstance(X, (pd.Series, pd.DataFrame)):
+        #     X = X.values
+
+        # convert all single-sample vectors to matrices
+        # if not hasattr(attributions[0], "__len__"):
+        #     attributions = np.array([attributions])
+        # if not hasattr(X[0], "__len__") and self.data_type == "tabular":
+        #     X = np.array([X])
+
+        pbar = None
+        start_time = time.time()
+        svals = []
+        mask_vals = []
+
+        for i, args in enumerate(zip(*model_args)):
+            # if self.data_type == "image":
+            #     x_shape, y_shape = attributions[i].shape[0], attributions[i].shape[1]
+            #     feature_size = np.prod([x_shape, y_shape])
+            #     sample_attributions = attributions[i].mean(2).reshape(feature_size, -1)
+            #     data = X[i].flatten()
+            #     mask_shape = X[i].shape
+            # else:
+            feature_size = np.prod(attributions[i].shape)
+            sample_attributions = attributions[i].flatten()
+            # data = X[i]
+            # mask_shape = feature_size
+
+            self.masked_model = MaskedModel(self.model, self.masker, links.identity, self.linearize_link, *args)
+
+            masks = []
+
+            mask = np.ones(feature_size, dtype=bool) * (self.perturbation == "remove")
+            masks.append(mask.copy())
+
+            ordered_inds = self.sort_order_map(sample_attributions)
+            increment = max(1,int(feature_size*percent))
+            for j in range(0, feature_size, increment):
+                oind_list = [ordered_inds[t] for t in range(j, min(feature_size, j+increment))]
+
+                for oind in oind_list:
+                    if not ((self.sort_order == "positive" and sample_attributions[oind] <= 0) or \
+                            (self.sort_order == "negative" and sample_attributions[oind] >= 0)):
+                        mask[oind] = self.perturbation == "keep"
+
+                masks.append(mask.copy())
+
+            mask_vals.append(masks)
+
+            # mask_size = len(range(0, feature_size, increment)) + 1
+            values = []
+            masks_arr = np.array(masks)
+            for j in range(0, len(masks_arr), batch_size):
+                values.append(self.masked_model(masks_arr[j:j + batch_size]))
+            values = np.concatenate(values)
+
+            svals.append(values)
+
+            if pbar is None and time.time() - start_time > 5:
+                pbar = tqdm(total=len(model_args[0]), disable=silent, leave=False, desc="SequentialMasker")
+                pbar.update(i+1)
+            if pbar is not None:
+                pbar.update(1)
+
+        if pbar is not None:
+            pbar.close()
+
+        self.score_values.append(np.array(svals))
+
+        # if self.sort_order == "negative":
+        #     curve_sign = -1
+        # else:
+        curve_sign = 1
+
+        self.labels.append(label)
+
+        xs = np.linspace(0, 1, 100)
+        curves = np.zeros((len(self.score_values[-1]), len(xs)))
+        for j in range(len(self.score_values[-1])):
+            xp = np.linspace(0, 1, len(self.score_values[-1][j]))
+            yp = self.score_values[-1][j]
+            curves[j,:] = np.interp(xs, xp, yp)
+        ys = curves.mean(0)
+        std = curves.std(0) / np.sqrt(curves.shape[0])
+        auc = sklearn.metrics.auc(np.linspace(0, 1, len(ys)), curve_sign*(ys-ys[0]))
+
+        if not debug_mode:
+            return BenchmarkResult(self.perturbation + " " + self.sort_order, name, curve_x=xs, curve_y=ys, curve_y_std=std)
+        else:
+            aucs = []
+            for j in range(len(self.score_values[-1])):
+                curve = curves[j,:]
+                auc = sklearn.metrics.auc(np.linspace(0, 1, len(curve)), curve_sign*(curve-curve[0]))
+                aucs.append(auc)
+            return mask_vals, curves, aucs
+
+    def score(self, explanation, X, percent=0.01, y=None, label=None, silent=False, debug_mode=False):
+        '''
+        Will be deprecated once MaskedModel is in complete support
+        '''
+        # if explainer is already the attributions
+        if isinstance(explanation, np.ndarray):
+            attributions = explanation
+        elif isinstance(explanation, Explanation):
+            attributions = explanation.values
+
+        if label is None:
+            label = "Score %d" % len(self.score_values)
+
+        # convert dataframes
+        if isinstance(X, (pd.Series, pd.DataFrame)):
+            X = X.values
+
+        # convert all single-sample vectors to matrices
+        if not hasattr(attributions[0], "__len__"):
+            attributions = np.array([attributions])
+        if not hasattr(X[0], "__len__") and self.data_type == "tabular":
+            X = np.array([X])
+
+        pbar = None
+        start_time = time.time()
+        svals = []
+        mask_vals = []
+
+        for i in range(len(X)):
+            if self.data_type == "image":
+                x_shape, y_shape = attributions[i].shape[0], attributions[i].shape[1]
+                feature_size = np.prod([x_shape, y_shape])
+                sample_attributions = attributions[i].mean(2).reshape(feature_size, -1)
+            else:
+                feature_size = attributions[i].shape[0]
+                sample_attributions = attributions[i]
+
+            if len(attributions[i].shape) == 1 or self.data_type == "tabular":
+                output_size = 1
+            else:
+                output_size = attributions[i].shape[-1]
+
+            for k in range(output_size):
+                if self.data_type == "image":
+                    mask_shape = X[i].shape
+                else:
+                    mask_shape = feature_size
+
+                mask = np.ones(mask_shape, dtype=bool) * (self.perturbation == "remove")
+                masks = [mask.copy()]
+
+                values = np.zeros(feature_size+1)
+                # masked, data = self.masker(mask, X[i])
+                masked = self.masker(mask, X[i])
+                data = None
+                curr_val = self.f(masked, data, k).mean(0)
+
+                values[0] = curr_val
+
+                if output_size != 1:
+                    test_attributions = sample_attributions[:,k]
+                else:
+                    test_attributions = sample_attributions
+
+                ordered_inds = self.sort_order_map(test_attributions)
+                increment = max(1,int(feature_size*percent))
+                for j in range(0, feature_size, increment):
+                    oind_list = [ordered_inds[t] for t in range(j, min(feature_size, j+increment))]
+
+                    for oind in oind_list:
+                        if not ((self.sort_order == "positive" and test_attributions[oind] <= 0) or \
+                                (self.sort_order == "negative" and test_attributions[oind] >= 0)):
+                            if self.data_type == "image":
+                                xoind, yoind = oind // attributions[i].shape[1], oind % attributions[i].shape[1]
+                                mask[xoind][yoind] = self.perturbation == "keep"
+                            else:
+                                mask[oind] = self.perturbation == "keep"
+
+                    masks.append(mask.copy())
+                    # masked, data = self.masker(mask, X[i])
+                    masked = self.masker(mask, X[i])
+                    curr_val = self.f(masked, data, k).mean(0)
+
+                    for t in range(j, min(feature_size, j+increment)):
+                        values[t+1] = curr_val
+
+                svals.append(values)
+                mask_vals.append(masks)
+
+            if pbar is None and time.time() - start_time > 5:
+                pbar = tqdm(total=len(X), disable=silent, leave=False)
+                pbar.update(i+1)
+            if pbar is not None:
+                pbar.update(1)
+
+        if pbar is not None:
+            pbar.close()
+
+        self.score_values.append(np.array(svals))
+
+        if self.sort_order == "negative":
+            curve_sign = -1
+        else:
+            curve_sign = 1
+
+        self.labels.append(label)
+
+        xs = np.linspace(0, 1, 100)
+        curves = np.zeros((len(self.score_values[-1]), len(xs)))
+        for j in range(len(self.score_values[-1])):
+            xp = np.linspace(0, 1, len(self.score_values[-1][j]))
+            yp = self.score_values[-1][j]
+            curves[j,:] = np.interp(xs, xp, yp)
+        ys = curves.mean(0)
+
+        if debug_mode:
+            aucs = []
+            for j in range(len(self.score_values[-1])):
+                curve = curves[j,:]
+                auc = sklearn.metrics.auc(np.linspace(0, 1, len(curve)), curve_sign*(curve-curve[0]))
+                aucs.append(auc)
+            return mask_vals, curves, aucs
+        else:
+            auc = sklearn.metrics.auc(np.linspace(0, 1, len(ys)), curve_sign*(ys-ys[0]))
+            return xs, ys, auc
+
+    def plot(self, xs, ys, auc):
+        pl.plot(xs, ys, label="AUC %0.4f" % auc)
+        pl.legend()
+        xlabel = "Percent Unmasked" if self.perturbation == "keep" else "Percent Masked"
+        pl.xlabel(xlabel)
+        pl.ylabel("Model Output")
+        pl.show()
diff --git a/lib/shap/benchmark/experiments.py b/lib/shap/benchmark/experiments.py
new file mode 100644
index 0000000000000000000000000000000000000000..42d2527673596208246ca19420c9f0cf79c3b04f
--- /dev/null
+++ b/lib/shap/benchmark/experiments.py
@@ -0,0 +1,414 @@
+import copy
+import itertools
+import os
+import pickle
+import random
+import subprocess
+import sys
+import time
+from multiprocessing import Pool
+
+from .. import __version__, datasets
+from . import metrics, models
+
+try:
+    from queue import Queue
+except ImportError:
+    from Queue import Queue
+from threading import Lock, Thread
+
+regression_metrics = [
+    "local_accuracy",
+    "consistency_guarantees",
+    "keep_positive_mask",
+    "keep_positive_resample",
+    #"keep_positive_impute",
+    "keep_negative_mask",
+    "keep_negative_resample",
+    #"keep_negative_impute",
+    "keep_absolute_mask__r2",
+    "keep_absolute_resample__r2",
+    #"keep_absolute_impute__r2",
+    "remove_positive_mask",
+    "remove_positive_resample",
+    #"remove_positive_impute",
+    "remove_negative_mask",
+    "remove_negative_resample",
+    #"remove_negative_impute",
+    "remove_absolute_mask__r2",
+    "remove_absolute_resample__r2",
+    #"remove_absolute_impute__r2"
+    "runtime",
+]
+
+binary_classification_metrics = [
+    "local_accuracy",
+    "consistency_guarantees",
+    "keep_positive_mask",
+    "keep_positive_resample",
+    #"keep_positive_impute",
+    "keep_negative_mask",
+    "keep_negative_resample",
+    #"keep_negative_impute",
+    "keep_absolute_mask__roc_auc",
+    "keep_absolute_resample__roc_auc",
+    #"keep_absolute_impute__roc_auc",
+    "remove_positive_mask",
+    "remove_positive_resample",
+    #"remove_positive_impute",
+    "remove_negative_mask",
+    "remove_negative_resample",
+    #"remove_negative_impute",
+    "remove_absolute_mask__roc_auc",
+    "remove_absolute_resample__roc_auc",
+    #"remove_absolute_impute__roc_auc"
+    "runtime",
+]
+
+human_metrics = [
+    "human_and_00",
+    "human_and_01",
+    "human_and_11",
+    "human_or_00",
+    "human_or_01",
+    "human_or_11",
+    "human_xor_00",
+    "human_xor_01",
+    "human_xor_11",
+    "human_sum_00",
+    "human_sum_01",
+    "human_sum_11"
+]
+
+linear_regress_methods = [
+    "linear_shap_corr",
+    "linear_shap_ind",
+    "coef",
+    "random",
+    "kernel_shap_1000_meanref",
+    #"kernel_shap_100_meanref",
+    #"sampling_shap_10000",
+    "sampling_shap_1000",
+    "lime_tabular_regression_1000"
+    #"sampling_shap_100"
+]
+
+linear_classify_methods = [
+    # NEED LIME
+    "linear_shap_corr",
+    "linear_shap_ind",
+    "coef",
+    "random",
+    "kernel_shap_1000_meanref",
+    #"kernel_shap_100_meanref",
+    #"sampling_shap_10000",
+    "sampling_shap_1000",
+    #"lime_tabular_regression_1000"
+    #"sampling_shap_100"
+]
+
+tree_regress_methods = [
+    # NEED tree_shap_ind
+    # NEED split_count?
+    "tree_shap_tree_path_dependent",
+    "tree_shap_independent_200",
+    "saabas",
+    "random",
+    "tree_gain",
+    "kernel_shap_1000_meanref",
+    "mean_abs_tree_shap",
+    #"kernel_shap_100_meanref",
+    #"sampling_shap_10000",
+    "sampling_shap_1000",
+    "lime_tabular_regression_1000",
+    "maple"
+    #"sampling_shap_100"
+]
+
+rf_regress_methods = [ # methods that only support random forest models
+    "tree_maple"
+]
+
+tree_classify_methods = [
+    # NEED tree_shap_ind
+    # NEED split_count?
+    "tree_shap_tree_path_dependent",
+    "tree_shap_independent_200",
+    "saabas",
+    "random",
+    "tree_gain",
+    "kernel_shap_1000_meanref",
+    "mean_abs_tree_shap",
+    #"kernel_shap_100_meanref",
+    #"sampling_shap_10000",
+    "sampling_shap_1000",
+    "lime_tabular_classification_1000",
+    "maple"
+    #"sampling_shap_100"
+]
+
+deep_regress_methods = [
+    "deep_shap",
+    "expected_gradients",
+    "random",
+    "kernel_shap_1000_meanref",
+    "sampling_shap_1000",
+    #"lime_tabular_regression_1000"
+]
+
+deep_classify_methods = [
+    "deep_shap",
+    "expected_gradients",
+    "random",
+    "kernel_shap_1000_meanref",
+    "sampling_shap_1000",
+    #"lime_tabular_regression_1000"
+]
+
+_experiments = []
+_experiments += [["corrgroups60", "lasso", m, s] for s in regression_metrics for m in linear_regress_methods]
+_experiments += [["corrgroups60", "ridge", m, s] for s in regression_metrics for m in linear_regress_methods]
+_experiments += [["corrgroups60", "decision_tree", m, s] for s in regression_metrics for m in tree_regress_methods]
+_experiments += [["corrgroups60", "random_forest", m, s] for s in regression_metrics for m in (tree_regress_methods + rf_regress_methods)]
+_experiments += [["corrgroups60", "gbm", m, s] for s in regression_metrics for m in tree_regress_methods]
+_experiments += [["corrgroups60", "ffnn", m, s] for s in regression_metrics for m in deep_regress_methods]
+
+_experiments += [["independentlinear60", "lasso", m, s] for s in regression_metrics for m in linear_regress_methods]
+_experiments += [["independentlinear60", "ridge", m, s] for s in regression_metrics for m in linear_regress_methods]
+_experiments += [["independentlinear60", "decision_tree", m, s] for s in regression_metrics for m in tree_regress_methods]
+_experiments += [["independentlinear60", "random_forest", m, s] for s in regression_metrics for m in (tree_regress_methods + rf_regress_methods)]
+_experiments += [["independentlinear60", "gbm", m, s] for s in regression_metrics for m in tree_regress_methods]
+_experiments += [["independentlinear60", "ffnn", m, s] for s in regression_metrics for m in deep_regress_methods]
+
+_experiments += [["cric", "lasso", m, s] for s in binary_classification_metrics for m in linear_classify_methods]
+_experiments += [["cric", "ridge", m, s] for s in binary_classification_metrics for m in linear_classify_methods]
+_experiments += [["cric", "decision_tree", m, s] for s in binary_classification_metrics for m in tree_classify_methods]
+_experiments += [["cric", "random_forest", m, s] for s in binary_classification_metrics for m in tree_classify_methods]
+_experiments += [["cric", "gbm", m, s] for s in binary_classification_metrics for m in tree_classify_methods]
+_experiments += [["cric", "ffnn", m, s] for s in binary_classification_metrics for m in deep_classify_methods]
+
+_experiments += [["human", "decision_tree", m, s] for s in human_metrics for m in tree_regress_methods]
+
+
+def experiments(dataset=None, model=None, method=None, metric=None):
+    for experiment in _experiments:
+        if dataset is not None and dataset != experiment[0]:
+            continue
+        if model is not None and model != experiment[1]:
+            continue
+        if method is not None and method != experiment[2]:
+            continue
+        if metric is not None and metric != experiment[3]:
+            continue
+        yield experiment
+
+def run_experiment(experiment, use_cache=True, cache_dir="/tmp"):
+    dataset_name, model_name, method_name, metric_name = experiment
+
+    # see if we have a cached version
+    cache_id = __gen_cache_id(experiment)
+    cache_file = os.path.join(cache_dir, cache_id + ".pickle")
+    if use_cache and os.path.isfile(cache_file):
+        with open(cache_file, "rb") as f:
+            #print(cache_id.replace("__", " ") + " ...loaded from cache.")
+            return pickle.load(f)
+
+    # compute the scores
+    print(cache_id.replace("__", " ", 4) + " ...")
+    sys.stdout.flush()
+    start = time.time()
+    X,y = getattr(datasets, dataset_name)()
+    score = getattr(metrics, metric_name)(
+        X, y,
+        getattr(models, dataset_name+"__"+model_name),
+        method_name
+    )
+    print("...took %f seconds.\n" % (time.time() - start))
+
+    # cache the scores
+    with open(cache_file, "wb") as f:
+        pickle.dump(score, f)
+
+    return score
+
+
+def run_experiments_helper(args):
+    experiment, cache_dir = args
+    return run_experiment(experiment, cache_dir=cache_dir)
+
+def run_experiments(dataset=None, model=None, method=None, metric=None, cache_dir="/tmp", nworkers=1):
+    experiments_arr = list(experiments(dataset=dataset, model=model, method=method, metric=metric))
+    if nworkers == 1:
+        out = list(map(run_experiments_helper, zip(experiments_arr, itertools.repeat(cache_dir))))
+    else:
+        with Pool(nworkers) as pool:
+            out = pool.map(run_experiments_helper, zip(experiments_arr, itertools.repeat(cache_dir)))
+    return list(zip(experiments_arr, out))
+
+
+nexperiments = 0
+total_sent = 0
+total_done = 0
+total_failed = 0
+host_records = {}
+worker_lock = Lock()
+ssh_conn_per_min_limit = 0 # set as an argument to run_remote_experiments
+def __thread_worker(q, host):
+    global total_sent, total_done
+    hostname, python_binary = host.split(":")
+    while True:
+
+        # make sure we are not sending too many ssh connections to the host
+        # (if we send too many connections ssh thottling will lock us out)
+        while True:
+            all_clear = False
+
+            worker_lock.acquire()
+            try:
+                if hostname not in host_records:
+                    host_records[hostname] = []
+
+                if len(host_records[hostname]) < ssh_conn_per_min_limit:
+                    all_clear = True
+                elif time.time() - host_records[hostname][-ssh_conn_per_min_limit] > 61:
+                    all_clear = True
+            finally:
+                worker_lock.release()
+
+            # if we are clear to send a new ssh connection then break
+            if all_clear:
+                break
+
+            # if we are not clear then we sleep and try again
+            time.sleep(5)
+
+        experiment = q.get()
+
+        # if we are not loading from the cache then we note that we have called the host
+        cache_dir = "/tmp"
+        cache_file = os.path.join(cache_dir, __gen_cache_id(experiment) + ".pickle")
+        if not os.path.isfile(cache_file):
+            worker_lock.acquire()
+            try:
+                host_records[hostname].append(time.time())
+            finally:
+                worker_lock.release()
+
+        # record how many we have sent off for execution
+        worker_lock.acquire()
+        try:
+            total_sent += 1
+            __print_status()
+        finally:
+            worker_lock.release()
+
+        __run_remote_experiment(experiment, hostname, cache_dir=cache_dir, python_binary=python_binary)
+
+        # record how many are finished
+        worker_lock.acquire()
+        try:
+            total_done += 1
+            __print_status()
+        finally:
+            worker_lock.release()
+
+        q.task_done()
+
+def __print_status():
+    print("Benchmark task %d of %d done (%d failed, %d running)" % (total_done, nexperiments, total_failed, total_sent - total_done), end="\r")
+    sys.stdout.flush()
+
+
+def run_remote_experiments(experiments, thread_hosts, rate_limit=10):
+    """ Use ssh to run the experiments on remote machines in parallel.
+
+    Parameters
+    ----------
+    experiments : iterable
+        Output of shap.benchmark.experiments(...).
+
+    thread_hosts : list of strings
+        Each host has the format "host_name:path_to_python_binary" and can appear multiple times
+        in the list (one for each parallel execution you want on that machine).
+
+    rate_limit : int
+        How many ssh connections we make per minute to each host (to avoid throttling issues).
+    """
+
+    global ssh_conn_per_min_limit
+    ssh_conn_per_min_limit = rate_limit
+
+    # first we kill any remaining workers from previous runs
+    # note we don't check_call because pkill kills our ssh call as well
+    thread_hosts = copy.copy(thread_hosts)
+    random.shuffle(thread_hosts)
+    for host in set(thread_hosts):
+        hostname,_ = host.split(":")
+        try:
+            subprocess.run(["ssh", hostname, "pkill -f shap.benchmark.run_experiment"], timeout=15)
+        except subprocess.TimeoutExpired:
+            print("Failed to connect to", hostname, "after 15 seconds! Exiting.")
+            return
+
+    experiments = copy.copy(list(experiments))
+    random.shuffle(experiments) # this way all the hard experiments don't get put on one machine
+    global nexperiments, total_sent, total_done, total_failed, host_records
+    nexperiments = len(experiments)
+    total_sent = 0
+    total_done = 0
+    total_failed = 0
+    host_records = {}
+
+    q = Queue()
+
+    for host in thread_hosts:
+        worker = Thread(target=__thread_worker, args=(q, host))
+        worker.setDaemon(True)
+        worker.start()
+
+    for experiment in experiments:
+        q.put(experiment)
+
+    q.join()
+
+def __run_remote_experiment(experiment, remote, cache_dir="/tmp", python_binary="python"):
+    global total_failed
+    dataset_name, model_name, method_name, metric_name = experiment
+
+    # see if we have a cached version
+    cache_id = __gen_cache_id(experiment)
+    cache_file = os.path.join(cache_dir, cache_id + ".pickle")
+    if os.path.isfile(cache_file):
+        with open(cache_file, "rb") as f:
+            return pickle.load(f)
+
+    # this is just so we don't dump everything at once on a machine
+    time.sleep(random.uniform(0,5))
+
+    # run the benchmark on the remote machine
+    #start = time.time()
+    cmd = "CUDA_VISIBLE_DEVICES=\"\" "+python_binary+" -c \"import shap; shap.benchmark.run_experiment(['{}', '{}', '{}', '{}'], cache_dir='{}')\" &> {}/{}.output".format(
+        dataset_name, model_name, method_name, metric_name, cache_dir, cache_dir, cache_id
+    )
+    try:
+        subprocess.check_output(["ssh", remote, cmd])
+    except subprocess.CalledProcessError as e:
+        print("The following command failed on %s:" % remote, file=sys.stderr)
+        print(cmd, file=sys.stderr)
+        total_failed += 1
+        print(e)
+        return
+
+    # copy the results back
+    subprocess.check_output(["scp", remote+":"+cache_file, cache_file])
+
+    if os.path.isfile(cache_file):
+        with open(cache_file, "rb") as f:
+            #print(cache_id.replace("__", " ") + " ...loaded from remote after %f seconds" % (time.time() - start))
+            return pickle.load(f)
+    else:
+        raise FileNotFoundError("Remote benchmark call finished but no local file was found!")
+
+def __gen_cache_id(experiment):
+    dataset_name, model_name, method_name, metric_name = experiment
+    return "v" + "__".join([__version__, dataset_name, model_name, method_name, metric_name])
diff --git a/lib/shap/benchmark/framework.py b/lib/shap/benchmark/framework.py
new file mode 100644
index 0000000000000000000000000000000000000000..3fe1497514aaf10592228ad879f9a3241b5faf0d
--- /dev/null
+++ b/lib/shap/benchmark/framework.py
@@ -0,0 +1,113 @@
+import itertools as it
+
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+
+from . import perturbation
+
+
+def update(model, attributions, X, y, masker, sort_order, perturbation_method, scores):
+    metric = perturbation_method + ' ' + sort_order
+    sp = perturbation.SequentialPerturbation(model, masker, sort_order, perturbation_method)
+    xs, ys, auc = sp.model_score(attributions, X, y=y)
+    scores['metrics'].append(metric)
+    scores['values'][metric] = [xs, ys, auc]
+
+def get_benchmark(model, attributions, X, y, masker, metrics):
+    # convert dataframes
+    if isinstance(X, (pd.Series, pd.DataFrame)):
+        X = X.values
+    if isinstance(masker, (pd.Series, pd.DataFrame)):
+        masker = masker.values
+
+    # record scores per metric
+    scores = {'metrics': list(), 'values': dict()}
+    for sort_order, perturbation_method in list(it.product(metrics['sort_order'], metrics['perturbation'])):
+        update(model, attributions, X, y, masker, sort_order, perturbation_method, scores)
+
+    return scores
+
+def get_metrics(benchmarks, selection):
+    # select metrics to plot using selection function
+    explainer_metrics = set()
+    for explainer in benchmarks:
+        scores = benchmarks[explainer]
+        if len(explainer_metrics) == 0:
+            explainer_metrics = set(scores['metrics'])
+        else:
+            explainer_metrics = selection(explainer_metrics, set(scores['metrics']))
+
+    return list(explainer_metrics)
+
+def trend_plot(benchmarks):
+    explainer_metrics = get_metrics(benchmarks, lambda x, y: x.union(y))
+
+    # plot all curves if metric exists
+    for metric in explainer_metrics:
+        plt.clf()
+
+        for explainer in benchmarks:
+            scores = benchmarks[explainer]
+            if metric in scores['values']:
+                x, y, auc = scores['values'][metric]
+                plt.plot(x, y, label=f'{round(auc, 3)} - {explainer}')
+
+        if 'keep' in metric:
+            xlabel = 'Percent Unmasked'
+        if 'remove' in metric:
+            xlabel = 'Percent Masked'
+
+        plt.ylabel('Model Output')
+        plt.xlabel(xlabel)
+        plt.title(metric)
+        plt.legend()
+        plt.show()
+
+def compare_plot(benchmarks):
+    explainer_metrics = get_metrics(benchmarks, lambda x, y: x.intersection(y))
+    explainers = list(benchmarks.keys())
+    num_explainers = len(explainers)
+    num_metrics = len(explainer_metrics)
+
+    # dummy start to evenly distribute explainers on the left
+    # can later be replaced by boolean metrics
+    aucs = dict()
+    for i in range(num_explainers):
+        explainer = explainers[i]
+        aucs[explainer] = [i/(num_explainers-1)]
+
+    # normalize per metric
+    for metric in explainer_metrics:
+        max_auc, min_auc = -float('inf'), float('inf')
+
+        for explainer in explainers:
+            scores = benchmarks[explainer]
+            _, _, auc = scores['values'][metric]
+            min_auc = min(auc, min_auc)
+            max_auc = max(auc, max_auc)
+
+        for explainer in explainers:
+            scores = benchmarks[explainer]
+            _, _, auc = scores['values'][metric]
+            aucs[explainer].append((auc-min_auc)/(max_auc-min_auc))
+
+    # plot common curves
+    ax = plt.gca()
+    for explainer in explainers:
+        plt.plot(np.linspace(0, 1, len(explainer_metrics)+1), aucs[explainer], '--o')
+
+    ax.tick_params(which='major', axis='both', labelsize=8)
+
+    ax.set_yticks([i/(num_explainers-1) for i in range(0, num_explainers)])
+    ax.set_yticklabels(explainers, rotation=0)
+
+    ax.set_xticks(np.linspace(0, 1, num_metrics+1))
+    ax.set_xticklabels([' '] + explainer_metrics, rotation=45, ha='right')
+
+    plt.grid(which='major', axis='x', linestyle='--')
+    plt.tight_layout()
+    plt.ylabel('Relative Performance of Each Explanation Method')
+    plt.xlabel('Evaluation Metrics')
+    plt.title('Explanation Method Performance Across Metrics')
+    plt.show()
diff --git a/lib/shap/benchmark/measures.py b/lib/shap/benchmark/measures.py
new file mode 100644
index 0000000000000000000000000000000000000000..d7a0fe291efd5c1cfab4fb9d5542a670e6ef103a
--- /dev/null
+++ b/lib/shap/benchmark/measures.py
@@ -0,0 +1,424 @@
+import warnings
+
+import numpy as np
+import pandas as pd
+import sklearn.utils
+from tqdm.auto import tqdm
+
+_remove_cache = {}
+def remove_retrain(nmask, X_train, y_train, X_test, y_test, attr_test, model_generator, metric, trained_model, random_state):
+    """ The model is retrained for each test sample with the important features set to a constant.
+
+    If you want to know how important a set of features is you can ask how the model would be
+    different if those features had never existed. To determine this we can mask those features
+    across the entire training and test datasets, then retrain the model. If we apply compare the
+    output of this retrained model to the original model we can see the effect produced by knowning
+    the features we masked. Since for individualized explanation methods each test sample has a
+    different set of most important features we need to retrain the model for every test sample
+    to get the change in model performance when a specified fraction of the most important features
+    are withheld.
+    """
+
+    warnings.warn("The retrain based measures can incorrectly evaluate models in some cases!")
+
+    # see if we match the last cached call
+    global _remove_cache
+    args = (X_train, y_train, X_test, y_test, model_generator, metric)
+    cache_match = False
+    if "args" in _remove_cache:
+        if all(a is b for a,b in zip(_remove_cache["args"], args)) and np.all(_remove_cache["attr_test"] == attr_test):
+            cache_match = True
+
+    X_train, X_test = to_array(X_train, X_test)
+
+    # how many features to mask
+    assert X_train.shape[1] == X_test.shape[1]
+
+    # this is the model we will retrain many times
+    model_masked = model_generator()
+
+    # mask nmask top features and re-train the model for each test explanation
+    X_train_tmp = np.zeros(X_train.shape)
+    X_test_tmp = np.zeros(X_test.shape)
+    yp_masked_test = np.zeros(y_test.shape)
+    tie_breaking_noise = const_rand(X_train.shape[1]) * 1e-6
+    last_nmask = _remove_cache.get("nmask", None)
+    last_yp_masked_test = _remove_cache.get("yp_masked_test", None)
+    for i in tqdm(range(len(y_test)), "Retraining for the 'remove' metric"):
+        if cache_match and last_nmask[i] == nmask[i]:
+            yp_masked_test[i] = last_yp_masked_test[i]
+        elif nmask[i] == 0:
+            yp_masked_test[i] = trained_model.predict(X_test[i:i+1])[0]
+        else:
+            # mask out the most important features for this test instance
+            X_train_tmp[:] = X_train
+            X_test_tmp[:] = X_test
+            ordering = np.argsort(-attr_test[i,:] + tie_breaking_noise)
+            X_train_tmp[:,ordering[:nmask[i]]] = X_train[:,ordering[:nmask[i]]].mean()
+            X_test_tmp[i,ordering[:nmask[i]]] = X_train[:,ordering[:nmask[i]]].mean()
+
+            # retrain the model and make a prediction
+            model_masked.fit(X_train_tmp, y_train)
+            yp_masked_test[i] = model_masked.predict(X_test_tmp[i:i+1])[0]
+
+    # save our results so the next call to us can be faster when there is redundancy
+    _remove_cache["nmask"] = nmask
+    _remove_cache["yp_masked_test"] = yp_masked_test
+    _remove_cache["attr_test"] = attr_test
+    _remove_cache["args"] = args
+
+    return metric(y_test, yp_masked_test)
+
+def remove_mask(nmask, X_train, y_train, X_test, y_test, attr_test, model_generator, metric, trained_model, random_state):
+    """ Each test sample is masked by setting the important features to a constant.
+    """
+
+    X_train, X_test = to_array(X_train, X_test)
+
+    # how many features to mask
+    assert X_train.shape[1] == X_test.shape[1]
+
+    # mask nmask top features for each test explanation
+    X_test_tmp = X_test.copy()
+    tie_breaking_noise = const_rand(X_train.shape[1], random_state) * 1e-6
+    mean_vals = X_train.mean(0)
+    for i in range(len(y_test)):
+        if nmask[i] > 0:
+            ordering = np.argsort(-attr_test[i,:] + tie_breaking_noise)
+            X_test_tmp[i,ordering[:nmask[i]]] = mean_vals[ordering[:nmask[i]]]
+
+    yp_masked_test = trained_model.predict(X_test_tmp)
+
+    return metric(y_test, yp_masked_test)
+
+def remove_impute(nmask, X_train, y_train, X_test, y_test, attr_test, model_generator, metric, trained_model, random_state):
+    """ The model is reevaluated for each test sample with the important features set to an imputed value.
+
+    Note that the imputation is done using a multivariate normality assumption on the dataset. This depends on
+    being able to estimate the full data covariance matrix (and inverse) accuractly. So X_train.shape[0] should
+    be significantly bigger than X_train.shape[1].
+    """
+
+    X_train, X_test = to_array(X_train, X_test)
+
+    # how many features to mask
+    assert X_train.shape[1] == X_test.shape[1]
+
+    # keep nkeep top features for each test explanation
+    C = np.cov(X_train.T)
+    C += np.eye(C.shape[0]) * 1e-6
+    X_test_tmp = X_test.copy()
+    yp_masked_test = np.zeros(y_test.shape)
+    tie_breaking_noise = const_rand(X_train.shape[1], random_state) * 1e-6
+    mean_vals = X_train.mean(0)
+    for i in range(len(y_test)):
+        if nmask[i] > 0:
+            ordering = np.argsort(-attr_test[i,:] + tie_breaking_noise)
+            observe_inds = ordering[nmask[i]:]
+            impute_inds = ordering[:nmask[i]]
+
+            # impute missing data assuming it follows a multivariate normal distribution
+            Coo_inv = np.linalg.inv(C[observe_inds,:][:,observe_inds])
+            Cio = C[impute_inds,:][:,observe_inds]
+            impute = mean_vals[impute_inds] + Cio @ Coo_inv @ (X_test[i, observe_inds] - mean_vals[observe_inds])
+
+            X_test_tmp[i, impute_inds] = impute
+
+    yp_masked_test = trained_model.predict(X_test_tmp)
+
+    return metric(y_test, yp_masked_test)
+
+def remove_resample(nmask, X_train, y_train, X_test, y_test, attr_test, model_generator, metric, trained_model, random_state):
+    """ The model is reevaluated for each test sample with the important features set to resample background values.
+    """
+
+    X_train, X_test = to_array(X_train, X_test)
+
+    # how many features to mask
+    assert X_train.shape[1] == X_test.shape[1]
+
+    # how many samples to take
+    nsamples = 100
+
+    # keep nkeep top features for each test explanation
+    N,M = X_test.shape
+    X_test_tmp = np.tile(X_test, [1, nsamples]).reshape(nsamples * N, M)
+    tie_breaking_noise = const_rand(M) * 1e-6
+    inds = sklearn.utils.resample(np.arange(N), n_samples=nsamples, random_state=random_state)
+    for i in range(N):
+        if nmask[i] > 0:
+            ordering = np.argsort(-attr_test[i,:] + tie_breaking_noise)
+            X_test_tmp[i*nsamples:(i+1)*nsamples, ordering[:nmask[i]]] = X_train[inds, :][:, ordering[:nmask[i]]]
+
+    yp_masked_test = trained_model.predict(X_test_tmp)
+    yp_masked_test = np.reshape(yp_masked_test, (N, nsamples)).mean(1) # take the mean output over all samples
+
+    return metric(y_test, yp_masked_test)
+
+def batch_remove_retrain(nmask_train, nmask_test, X_train, y_train, X_test, y_test, attr_train, attr_test, model_generator, metric):
+    """ An approximation of holdout that only retraines the model once.
+
+    This is also called ROAR (RemOve And Retrain) in work by Google. It is much more computationally
+    efficient that the holdout method because it masks the most important features in every sample
+    and then retrains the model once, instead of retraining the model for every test sample like
+    the holdout metric.
+    """
+
+    warnings.warn("The retrain based measures can incorrectly evaluate models in some cases!")
+
+    X_train, X_test = to_array(X_train, X_test)
+
+    # how many features to mask
+    assert X_train.shape[1] == X_test.shape[1]
+
+    # mask nmask top features for each explanation
+    X_train_tmp = X_train.copy()
+    X_train_mean = X_train.mean(0)
+    tie_breaking_noise = const_rand(X_train.shape[1]) * 1e-6
+    for i in range(len(y_train)):
+        if nmask_train[i] > 0:
+            ordering = np.argsort(-attr_train[i, :] + tie_breaking_noise)
+            X_train_tmp[i, ordering[:nmask_train[i]]] = X_train_mean[ordering[:nmask_train[i]]]
+    X_test_tmp = X_test.copy()
+    for i in range(len(y_test)):
+        if nmask_test[i] > 0:
+            ordering = np.argsort(-attr_test[i, :] + tie_breaking_noise)
+            X_test_tmp[i, ordering[:nmask_test[i]]] = X_train_mean[ordering[:nmask_test[i]]]
+
+    # train the model with all the given features masked
+    model_masked = model_generator()
+    model_masked.fit(X_train_tmp, y_train)
+    yp_test_masked = model_masked.predict(X_test_tmp)
+
+    return metric(y_test, yp_test_masked)
+
+_keep_cache = {}
+def keep_retrain(nkeep, X_train, y_train, X_test, y_test, attr_test, model_generator, metric, trained_model, random_state):
+    """ The model is retrained for each test sample with the non-important features set to a constant.
+
+    If you want to know how important a set of features is you can ask how the model would be
+    different if only those features had existed. To determine this we can mask the other features
+    across the entire training and test datasets, then retrain the model. If we apply compare the
+    output of this retrained model to the original model we can see the effect produced by only
+    knowning the important features. Since for individualized explanation methods each test sample
+    has a different set of most important features we need to retrain the model for every test sample
+    to get the change in model performance when a specified fraction of the most important features
+    are retained.
+    """
+
+    warnings.warn("The retrain based measures can incorrectly evaluate models in some cases!")
+
+    # see if we match the last cached call
+    global _keep_cache
+    args = (X_train, y_train, X_test, y_test, model_generator, metric)
+    cache_match = False
+    if "args" in _keep_cache:
+        if all(a is b for a,b in zip(_keep_cache["args"], args)) and np.all(_keep_cache["attr_test"] == attr_test):
+            cache_match = True
+
+    X_train, X_test = to_array(X_train, X_test)
+
+    # how many features to mask
+    assert X_train.shape[1] == X_test.shape[1]
+
+    # this is the model we will retrain many times
+    model_masked = model_generator()
+
+    # keep nkeep top features and re-train the model for each test explanation
+    X_train_tmp = np.zeros(X_train.shape)
+    X_test_tmp = np.zeros(X_test.shape)
+    yp_masked_test = np.zeros(y_test.shape)
+    tie_breaking_noise = const_rand(X_train.shape[1]) * 1e-6
+    last_nkeep = _keep_cache.get("nkeep", None)
+    last_yp_masked_test = _keep_cache.get("yp_masked_test", None)
+    for i in tqdm(range(len(y_test)), "Retraining for the 'keep' metric"):
+        if cache_match and last_nkeep[i] == nkeep[i]:
+            yp_masked_test[i] = last_yp_masked_test[i]
+        elif nkeep[i] == attr_test.shape[1]:
+            yp_masked_test[i] = trained_model.predict(X_test[i:i+1])[0]
+        else:
+
+            # mask out the most important features for this test instance
+            X_train_tmp[:] = X_train
+            X_test_tmp[:] = X_test
+            ordering = np.argsort(-attr_test[i,:] + tie_breaking_noise)
+            X_train_tmp[:,ordering[nkeep[i]:]] = X_train[:,ordering[nkeep[i]:]].mean()
+            X_test_tmp[i,ordering[nkeep[i]:]] = X_train[:,ordering[nkeep[i]:]].mean()
+
+            # retrain the model and make a prediction
+            model_masked.fit(X_train_tmp, y_train)
+            yp_masked_test[i] = model_masked.predict(X_test_tmp[i:i+1])[0]
+
+    # save our results so the next call to us can be faster when there is redundancy
+    _keep_cache["nkeep"] = nkeep
+    _keep_cache["yp_masked_test"] = yp_masked_test
+    _keep_cache["attr_test"] = attr_test
+    _keep_cache["args"] = args
+
+    return metric(y_test, yp_masked_test)
+
+def keep_mask(nkeep, X_train, y_train, X_test, y_test, attr_test, model_generator, metric, trained_model, random_state):
+    """ The model is reevaluated for each test sample with the non-important features set to their mean.
+    """
+
+    X_train, X_test = to_array(X_train, X_test)
+
+    # how many features to mask
+    assert X_train.shape[1] == X_test.shape[1]
+
+    # keep nkeep top features for each test explanation
+    X_test_tmp = X_test.copy()
+    yp_masked_test = np.zeros(y_test.shape)
+    tie_breaking_noise = const_rand(X_train.shape[1], random_state) * 1e-6
+    mean_vals = X_train.mean(0)
+    for i in range(len(y_test)):
+        if nkeep[i] < X_test.shape[1]:
+            ordering = np.argsort(-attr_test[i,:] + tie_breaking_noise)
+            X_test_tmp[i,ordering[nkeep[i]:]] = mean_vals[ordering[nkeep[i]:]]
+
+    yp_masked_test = trained_model.predict(X_test_tmp)
+
+    return metric(y_test, yp_masked_test)
+
+def keep_impute(nkeep, X_train, y_train, X_test, y_test, attr_test, model_generator, metric, trained_model, random_state):
+    """ The model is reevaluated for each test sample with the non-important features set to an imputed value.
+
+    Note that the imputation is done using a multivariate normality assumption on the dataset. This depends on
+    being able to estimate the full data covariance matrix (and inverse) accuractly. So X_train.shape[0] should
+    be significantly bigger than X_train.shape[1].
+    """
+
+    X_train, X_test = to_array(X_train, X_test)
+
+    # how many features to mask
+    assert X_train.shape[1] == X_test.shape[1]
+
+    # keep nkeep top features for each test explanation
+    C = np.cov(X_train.T)
+    C += np.eye(C.shape[0]) * 1e-6
+    X_test_tmp = X_test.copy()
+    yp_masked_test = np.zeros(y_test.shape)
+    tie_breaking_noise = const_rand(X_train.shape[1], random_state) * 1e-6
+    mean_vals = X_train.mean(0)
+    for i in range(len(y_test)):
+        if nkeep[i] < X_test.shape[1]:
+            ordering = np.argsort(-attr_test[i,:] + tie_breaking_noise)
+            observe_inds = ordering[:nkeep[i]]
+            impute_inds = ordering[nkeep[i]:]
+
+            # impute missing data assuming it follows a multivariate normal distribution
+            Coo_inv = np.linalg.inv(C[observe_inds,:][:,observe_inds])
+            Cio = C[impute_inds,:][:,observe_inds]
+            impute = mean_vals[impute_inds] + Cio @ Coo_inv @ (X_test[i, observe_inds] - mean_vals[observe_inds])
+
+            X_test_tmp[i, impute_inds] = impute
+
+    yp_masked_test = trained_model.predict(X_test_tmp)
+
+    return metric(y_test, yp_masked_test)
+
+def keep_resample(nkeep, X_train, y_train, X_test, y_test, attr_test, model_generator, metric, trained_model, random_state):
+    """ The model is reevaluated for each test sample with the non-important features set to resample background values.
+    """ # why broken? overwriting?
+
+    X_train, X_test = to_array(X_train, X_test)
+
+    # how many features to mask
+    assert X_train.shape[1] == X_test.shape[1]
+
+    # how many samples to take
+    nsamples = 100
+
+    # keep nkeep top features for each test explanation
+    N,M = X_test.shape
+    X_test_tmp = np.tile(X_test, [1, nsamples]).reshape(nsamples * N, M)
+    tie_breaking_noise = const_rand(M) * 1e-6
+    inds = sklearn.utils.resample(np.arange(N), n_samples=nsamples, random_state=random_state)
+    for i in range(N):
+        if nkeep[i] < M:
+            ordering = np.argsort(-attr_test[i,:] + tie_breaking_noise)
+            X_test_tmp[i*nsamples:(i+1)*nsamples, ordering[nkeep[i]:]] = X_train[inds, :][:, ordering[nkeep[i]:]]
+
+    yp_masked_test = trained_model.predict(X_test_tmp)
+    yp_masked_test = np.reshape(yp_masked_test, (N, nsamples)).mean(1) # take the mean output over all samples
+
+    return metric(y_test, yp_masked_test)
+
+def batch_keep_retrain(nkeep_train, nkeep_test, X_train, y_train, X_test, y_test, attr_train, attr_test, model_generator, metric):
+    """ An approximation of keep that only retraines the model once.
+
+    This is also called KAR (Keep And Retrain) in work by Google. It is much more computationally
+    efficient that the keep method because it masks the unimportant features in every sample
+    and then retrains the model once, instead of retraining the model for every test sample like
+    the keep metric.
+    """
+
+    warnings.warn("The retrain based measures can incorrectly evaluate models in some cases!")
+
+    X_train, X_test = to_array(X_train, X_test)
+
+    # how many features to mask
+    assert X_train.shape[1] == X_test.shape[1]
+
+    # mask nkeep top features for each explanation
+    X_train_tmp = X_train.copy()
+    X_train_mean = X_train.mean(0)
+    tie_breaking_noise = const_rand(X_train.shape[1]) * 1e-6
+    for i in range(len(y_train)):
+        if nkeep_train[i] < X_train.shape[1]:
+            ordering = np.argsort(-attr_train[i, :] + tie_breaking_noise)
+            X_train_tmp[i, ordering[nkeep_train[i]:]] = X_train_mean[ordering[nkeep_train[i]:]]
+    X_test_tmp = X_test.copy()
+    for i in range(len(y_test)):
+        if nkeep_test[i] < X_test.shape[1]:
+            ordering = np.argsort(-attr_test[i, :] + tie_breaking_noise)
+            X_test_tmp[i, ordering[nkeep_test[i]:]] = X_train_mean[ordering[nkeep_test[i]:]]
+
+    # train the model with all the features not given masked
+    model_masked = model_generator()
+    model_masked.fit(X_train_tmp, y_train)
+    yp_test_masked = model_masked.predict(X_test_tmp)
+
+    return metric(y_test, yp_test_masked)
+
+def local_accuracy(X_train, y_train, X_test, y_test, attr_test, model_generator, metric, trained_model):
+    """ The how well do the features plus a constant base rate sum up to the model output.
+    """
+
+    X_train, X_test = to_array(X_train, X_test)
+
+    # how many features to mask
+    assert X_train.shape[1] == X_test.shape[1]
+
+    # keep nkeep top features and re-train the model for each test explanation
+    yp_test = trained_model.predict(X_test)
+
+    return metric(yp_test, strip_list(attr_test).sum(1))
+
+def to_array(*args):
+    return [a.values if isinstance(a, pd.DataFrame) else a for a in args]
+
+def const_rand(size, seed=23980):
+    """ Generate a random array with a fixed seed.
+    """
+    old_seed = np.random.seed()
+    np.random.seed(seed)
+    out = np.random.rand(size)
+    np.random.seed(old_seed)
+    return out
+
+def const_shuffle(arr, seed=23980):
+    """ Shuffle an array in-place with a fixed seed.
+    """
+    old_seed = np.random.seed()
+    np.random.seed(seed)
+    np.random.shuffle(arr)
+    np.random.seed(old_seed)
+
+def strip_list(attrs):
+    """ This assumes that if you have a list of outputs you just want the second one (the second class is the '1' class).
+    """
+    if isinstance(attrs, list):
+        return attrs[1]
+    else:
+        return attrs
diff --git a/lib/shap/benchmark/methods.py b/lib/shap/benchmark/methods.py
new file mode 100644
index 0000000000000000000000000000000000000000..f52bd3fa9c6e171d21c277ee9e90d394802c90c6
--- /dev/null
+++ b/lib/shap/benchmark/methods.py
@@ -0,0 +1,148 @@
+import numpy as np
+import sklearn
+
+from .. import (
+    DeepExplainer,
+    GradientExplainer,
+    KernelExplainer,
+    LinearExplainer,
+    SamplingExplainer,
+    TreeExplainer,
+    kmeans,
+)
+from ..explainers import other
+from .models import KerasWrap
+
+
+def linear_shap_corr(model, data):
+    """ Linear SHAP (corr 1000)
+    """
+    return LinearExplainer(model, data, feature_dependence="correlation", nsamples=1000).shap_values
+
+def linear_shap_ind(model, data):
+    """ Linear SHAP (ind)
+    """
+    return LinearExplainer(model, data, feature_dependence="independent").shap_values
+
+def coef(model, data):
+    """ Coefficients
+    """
+    return other.CoefficentExplainer(model).attributions
+
+def random(model, data):
+    """ Random
+    color = #777777
+    linestyle = solid
+    """
+    return other.RandomExplainer().attributions
+
+def kernel_shap_1000_meanref(model, data):
+    """ Kernel SHAP 1000 mean ref.
+    color = red_blue_circle(0.5)
+    linestyle = solid
+    """
+    return lambda X: KernelExplainer(model.predict, kmeans(data, 1)).shap_values(X, nsamples=1000, l1_reg=0)
+
+def sampling_shap_1000(model, data):
+    """ IME 1000
+    color = red_blue_circle(0.5)
+    linestyle = dashed
+    """
+    return lambda X: SamplingExplainer(model.predict, data).shap_values(X, nsamples=1000)
+
+def tree_shap_tree_path_dependent(model, data):
+    """ TreeExplainer
+    color = red_blue_circle(0)
+    linestyle = solid
+    """
+    return TreeExplainer(model, feature_dependence="tree_path_dependent").shap_values
+
+def tree_shap_independent_200(model, data):
+    """ TreeExplainer (independent)
+    color = red_blue_circle(0)
+    linestyle = dashed
+    """
+    data_subsample = sklearn.utils.resample(data, replace=False, n_samples=min(200, data.shape[0]), random_state=0)
+    return TreeExplainer(model, data_subsample, feature_dependence="independent").shap_values
+
+def mean_abs_tree_shap(model, data):
+    """ mean(|TreeExplainer|)
+    color = red_blue_circle(0.25)
+    linestyle = solid
+    """
+    def f(X):
+        v = TreeExplainer(model).shap_values(X)
+        if isinstance(v, list):
+            return [np.tile(np.abs(sv).mean(0), (X.shape[0], 1)) for sv in v]
+        else:
+            return np.tile(np.abs(v).mean(0), (X.shape[0], 1))
+    return f
+
+def saabas(model, data):
+    """ Saabas
+    color = red_blue_circle(0)
+    linestyle = dotted
+    """
+    return lambda X: TreeExplainer(model).shap_values(X, approximate=True)
+
+def tree_gain(model, data):
+    """ Gain/Gini Importance
+    color = red_blue_circle(0.25)
+    linestyle = dotted
+    """
+    return other.TreeGainExplainer(model).attributions
+
+def lime_tabular_regression_1000(model, data):
+    """ LIME Tabular 1000
+    color = red_blue_circle(0.75)
+    """
+    return lambda X: other.LimeTabularExplainer(model.predict, data, mode="regression").attributions(X, nsamples=1000)
+
+def lime_tabular_classification_1000(model, data):
+    """ LIME Tabular 1000
+    color = red_blue_circle(0.75)
+    """
+    return lambda X: other.LimeTabularExplainer(model.predict_proba, data, mode="classification").attributions(X, nsamples=1000)[1]
+
+def maple(model, data):
+    """ MAPLE
+    color = red_blue_circle(0.6)
+    """
+    return lambda X: other.MapleExplainer(model.predict, data).attributions(X, multiply_by_input=False)
+
+def tree_maple(model, data):
+    """ Tree MAPLE
+    color = red_blue_circle(0.6)
+    linestyle = dashed
+    """
+    return lambda X: other.TreeMapleExplainer(model, data).attributions(X, multiply_by_input=False)
+
+def deep_shap(model, data):
+    """ Deep SHAP (DeepLIFT)
+    """
+    if isinstance(model, KerasWrap):
+        model = model.model
+    explainer = DeepExplainer(model, kmeans(data, 1).data)
+    def f(X):
+        phi = explainer.shap_values(X)
+        if isinstance(phi, list) and len(phi) == 1:
+            return phi[0]
+        else:
+            return phi
+
+    return f
+
+def expected_gradients(model, data):
+    """ Expected Gradients
+    """
+    if isinstance(model, KerasWrap):
+        model = model.model
+    explainer = GradientExplainer(model, data)
+    def f(X):
+        phi = explainer.shap_values(X)
+        if isinstance(phi, list) and len(phi) == 1:
+            return phi[0]
+        else:
+            return phi
+
+    return f
diff --git a/lib/shap/benchmark/metrics.py b/lib/shap/benchmark/metrics.py
new file mode 100644
index 0000000000000000000000000000000000000000..1ff8db7a31d236c4b62e2d90ad067fa70b484b09
--- /dev/null
+++ b/lib/shap/benchmark/metrics.py
@@ -0,0 +1,824 @@
+import hashlib
+import os
+import time
+
+import numpy as np
+import sklearn
+
+from .. import __version__
+from . import measures, methods
+
+try:
+    import dill as pickle
+except Exception:
+    pass
+
+try:
+    from sklearn.model_selection import train_test_split
+except Exception:
+    from sklearn.cross_validation import train_test_split
+
+
+def runtime(X, y, model_generator, method_name):
+    """ Runtime (sec / 1k samples)
+    transform = "negate_log"
+    sort_order = 2
+    """
+
+    old_seed = np.random.seed()
+    np.random.seed(3293)
+
+    # average the method scores over several train/test splits
+    method_reps = []
+    for i in range(3):
+        X_train, X_test, y_train, _ = train_test_split(__toarray(X), y, test_size=100, random_state=i)
+
+        # define the model we are going to explain
+        model = model_generator()
+        model.fit(X_train, y_train)
+
+        # evaluate each method
+        start = time.time()
+        explainer = getattr(methods, method_name)(model, X_train)
+        build_time = time.time() - start
+
+        start = time.time()
+        explainer(X_test)
+        explain_time = time.time() - start
+
+        # we always normalize the explain time as though we were explaining 1000 samples
+        # even if to reduce the runtime of the benchmark we do less (like just 100)
+        method_reps.append(build_time + explain_time * 1000.0 / X_test.shape[0])
+    np.random.seed(old_seed)
+
+    return None, np.mean(method_reps)
+
+def local_accuracy(X, y, model_generator, method_name):
+    """ Local Accuracy
+    transform = "identity"
+    sort_order = 0
+    """
+
+    def score_map(true, pred):
+        """ Computes local accuracy as the normalized standard deviation of numerical scores.
+        """
+        return np.std(pred - true) / (np.std(true) + 1e-6)
+
+    def score_function(X_train, X_test, y_train, y_test, attr_function, trained_model, random_state):
+        return measures.local_accuracy(
+            X_train, y_train, X_test, y_test, attr_function(X_test),
+            model_generator, score_map, trained_model
+        )
+    return None, __score_method(X, y, None, model_generator, score_function, method_name)
+
+def consistency_guarantees(X, y, model_generator, method_name):
+    """ Consistency Guarantees
+    transform = "identity"
+    sort_order = 1
+    """
+
+    # 1.0 - perfect consistency
+    # 0.8 - guarantees depend on sampling
+    # 0.6 - guarantees depend on approximation
+    # 0.0 - no garuntees
+    guarantees = {
+        "linear_shap_corr": 1.0,
+        "linear_shap_ind": 1.0,
+        "coef": 0.0,
+        "kernel_shap_1000_meanref": 0.8,
+        "sampling_shap_1000": 0.8,
+        "random": 0.0,
+        "saabas": 0.0,
+        "tree_gain": 0.0,
+        "tree_shap_tree_path_dependent": 1.0,
+        "tree_shap_independent_200": 1.0,
+        "mean_abs_tree_shap": 1.0,
+        "lime_tabular_regression_1000": 0.8,
+        "lime_tabular_classification_1000": 0.8,
+        "maple": 0.8,
+        "tree_maple": 0.8,
+        "deep_shap": 0.6,
+        "expected_gradients": 0.6
+    }
+
+    return None, guarantees[method_name]
+
+def __mean_pred(true, pred):
+    """ A trivial metric that is just is the output of the model.
+    """
+    return np.mean(pred)
+
+def keep_positive_mask(X, y, model_generator, method_name, num_fcounts=11):
+    """ Keep Positive (mask)
+    xlabel = "Max fraction of features kept"
+    ylabel = "Mean model output"
+    transform = "identity"
+    sort_order = 4
+    """
+    return __run_measure(measures.keep_mask, X, y, model_generator, method_name, 1, num_fcounts, __mean_pred)
+
+def keep_negative_mask(X, y, model_generator, method_name, num_fcounts=11):
+    """ Keep Negative (mask)
+    xlabel = "Max fraction of features kept"
+    ylabel = "Negative mean model output"
+    transform = "negate"
+    sort_order = 5
+    """
+    return __run_measure(measures.keep_mask, X, y, model_generator, method_name, -1, num_fcounts, __mean_pred)
+
+def keep_absolute_mask__r2(X, y, model_generator, method_name, num_fcounts=11):
+    """ Keep Absolute (mask)
+    xlabel = "Max fraction of features kept"
+    ylabel = "R^2"
+    transform = "identity"
+    sort_order = 6
+    """
+    return __run_measure(measures.keep_mask, X, y, model_generator, method_name, 0, num_fcounts, sklearn.metrics.r2_score)
+
+def keep_absolute_mask__roc_auc(X, y, model_generator, method_name, num_fcounts=11):
+    """ Keep Absolute (mask)
+    xlabel = "Max fraction of features kept"
+    ylabel = "ROC AUC"
+    transform = "identity"
+    sort_order = 6
+    """
+    return __run_measure(measures.keep_mask, X, y, model_generator, method_name, 0, num_fcounts, sklearn.metrics.roc_auc_score)
+
+def remove_positive_mask(X, y, model_generator, method_name, num_fcounts=11):
+    """ Remove Positive (mask)
+    xlabel = "Max fraction of features removed"
+    ylabel = "Negative mean model output"
+    transform = "negate"
+    sort_order = 7
+    """
+    return __run_measure(measures.remove_mask, X, y, model_generator, method_name, 1, num_fcounts, __mean_pred)
+
+def remove_negative_mask(X, y, model_generator, method_name, num_fcounts=11):
+    """ Remove Negative (mask)
+    xlabel = "Max fraction of features removed"
+    ylabel = "Mean model output"
+    transform = "identity"
+    sort_order = 8
+    """
+    return __run_measure(measures.remove_mask, X, y, model_generator, method_name, -1, num_fcounts, __mean_pred)
+
+def remove_absolute_mask__r2(X, y, model_generator, method_name, num_fcounts=11):
+    """ Remove Absolute (mask)
+    xlabel = "Max fraction of features removed"
+    ylabel = "1 - R^2"
+    transform = "one_minus"
+    sort_order = 9
+    """
+    return __run_measure(measures.remove_mask, X, y, model_generator, method_name, 0, num_fcounts, sklearn.metrics.r2_score)
+
+def remove_absolute_mask__roc_auc(X, y, model_generator, method_name, num_fcounts=11):
+    """ Remove Absolute (mask)
+    xlabel = "Max fraction of features removed"
+    ylabel = "1 - ROC AUC"
+    transform = "one_minus"
+    sort_order = 9
+    """
+    return __run_measure(measures.remove_mask, X, y, model_generator, method_name, 0, num_fcounts, sklearn.metrics.roc_auc_score)
+
+def keep_positive_resample(X, y, model_generator, method_name, num_fcounts=11):
+    """ Keep Positive (resample)
+    xlabel = "Max fraction of features kept"
+    ylabel = "Mean model output"
+    transform = "identity"
+    sort_order = 10
+    """
+    return __run_measure(measures.keep_resample, X, y, model_generator, method_name, 1, num_fcounts, __mean_pred)
+
+def keep_negative_resample(X, y, model_generator, method_name, num_fcounts=11):
+    """ Keep Negative (resample)
+    xlabel = "Max fraction of features kept"
+    ylabel = "Negative mean model output"
+    transform = "negate"
+    sort_order = 11
+    """
+    return __run_measure(measures.keep_resample, X, y, model_generator, method_name, -1, num_fcounts, __mean_pred)
+
+def keep_absolute_resample__r2(X, y, model_generator, method_name, num_fcounts=11):
+    """ Keep Absolute (resample)
+    xlabel = "Max fraction of features kept"
+    ylabel = "R^2"
+    transform = "identity"
+    sort_order = 12
+    """
+    return __run_measure(measures.keep_resample, X, y, model_generator, method_name, 0, num_fcounts, sklearn.metrics.r2_score)
+
+def keep_absolute_resample__roc_auc(X, y, model_generator, method_name, num_fcounts=11):
+    """ Keep Absolute (resample)
+    xlabel = "Max fraction of features kept"
+    ylabel = "ROC AUC"
+    transform = "identity"
+    sort_order = 12
+    """
+    return __run_measure(measures.keep_resample, X, y, model_generator, method_name, 0, num_fcounts, sklearn.metrics.roc_auc_score)
+
+def remove_positive_resample(X, y, model_generator, method_name, num_fcounts=11):
+    """ Remove Positive (resample)
+    xlabel = "Max fraction of features removed"
+    ylabel = "Negative mean model output"
+    transform = "negate"
+    sort_order = 13
+    """
+    return __run_measure(measures.remove_resample, X, y, model_generator, method_name, 1, num_fcounts, __mean_pred)
+
+def remove_negative_resample(X, y, model_generator, method_name, num_fcounts=11):
+    """ Remove Negative (resample)
+    xlabel = "Max fraction of features removed"
+    ylabel = "Mean model output"
+    transform = "identity"
+    sort_order = 14
+    """
+    return __run_measure(measures.remove_resample, X, y, model_generator, method_name, -1, num_fcounts, __mean_pred)
+
+def remove_absolute_resample__r2(X, y, model_generator, method_name, num_fcounts=11):
+    """ Remove Absolute (resample)
+    xlabel = "Max fraction of features removed"
+    ylabel = "1 - R^2"
+    transform = "one_minus"
+    sort_order = 15
+    """
+    return __run_measure(measures.remove_resample, X, y, model_generator, method_name, 0, num_fcounts, sklearn.metrics.r2_score)
+
+def remove_absolute_resample__roc_auc(X, y, model_generator, method_name, num_fcounts=11):
+    """ Remove Absolute (resample)
+    xlabel = "Max fraction of features removed"
+    ylabel = "1 - ROC AUC"
+    transform = "one_minus"
+    sort_order = 15
+    """
+    return __run_measure(measures.remove_resample, X, y, model_generator, method_name, 0, num_fcounts, sklearn.metrics.roc_auc_score)
+
+def keep_positive_impute(X, y, model_generator, method_name, num_fcounts=11):
+    """ Keep Positive (impute)
+    xlabel = "Max fraction of features kept"
+    ylabel = "Mean model output"
+    transform = "identity"
+    sort_order = 16
+    """
+    return __run_measure(measures.keep_impute, X, y, model_generator, method_name, 1, num_fcounts, __mean_pred)
+
+def keep_negative_impute(X, y, model_generator, method_name, num_fcounts=11):
+    """ Keep Negative (impute)
+    xlabel = "Max fraction of features kept"
+    ylabel = "Negative mean model output"
+    transform = "negate"
+    sort_order = 17
+    """
+    return __run_measure(measures.keep_impute, X, y, model_generator, method_name, -1, num_fcounts, __mean_pred)
+
+def keep_absolute_impute__r2(X, y, model_generator, method_name, num_fcounts=11):
+    """ Keep Absolute (impute)
+    xlabel = "Max fraction of features kept"
+    ylabel = "R^2"
+    transform = "identity"
+    sort_order = 18
+    """
+    return __run_measure(measures.keep_impute, X, y, model_generator, method_name, 0, num_fcounts, sklearn.metrics.r2_score)
+
+def keep_absolute_impute__roc_auc(X, y, model_generator, method_name, num_fcounts=11):
+    """ Keep Absolute (impute)
+    xlabel = "Max fraction of features kept"
+    ylabel = "ROC AUC"
+    transform = "identity"
+    sort_order = 19
+    """
+    return __run_measure(measures.keep_mask, X, y, model_generator, method_name, 0, num_fcounts, sklearn.metrics.roc_auc_score)
+
+def remove_positive_impute(X, y, model_generator, method_name, num_fcounts=11):
+    """ Remove Positive (impute)
+    xlabel = "Max fraction of features removed"
+    ylabel = "Negative mean model output"
+    transform = "negate"
+    sort_order = 7
+    """
+    return __run_measure(measures.remove_impute, X, y, model_generator, method_name, 1, num_fcounts, __mean_pred)
+
+def remove_negative_impute(X, y, model_generator, method_name, num_fcounts=11):
+    """ Remove Negative (impute)
+    xlabel = "Max fraction of features removed"
+    ylabel = "Mean model output"
+    transform = "identity"
+    sort_order = 8
+    """
+    return __run_measure(measures.remove_impute, X, y, model_generator, method_name, -1, num_fcounts, __mean_pred)
+
+def remove_absolute_impute__r2(X, y, model_generator, method_name, num_fcounts=11):
+    """ Remove Absolute (impute)
+    xlabel = "Max fraction of features removed"
+    ylabel = "1 - R^2"
+    transform = "one_minus"
+    sort_order = 9
+    """
+    return __run_measure(measures.remove_impute, X, y, model_generator, method_name, 0, num_fcounts, sklearn.metrics.r2_score)
+
+def remove_absolute_impute__roc_auc(X, y, model_generator, method_name, num_fcounts=11):
+    """ Remove Absolute (impute)
+    xlabel = "Max fraction of features removed"
+    ylabel = "1 - ROC AUC"
+    transform = "one_minus"
+    sort_order = 9
+    """
+    return __run_measure(measures.remove_mask, X, y, model_generator, method_name, 0, num_fcounts, sklearn.metrics.roc_auc_score)
+
+def keep_positive_retrain(X, y, model_generator, method_name, num_fcounts=11):
+    """ Keep Positive (retrain)
+    xlabel = "Max fraction of features kept"
+    ylabel = "Mean model output"
+    transform = "identity"
+    sort_order = 6
+    """
+    return __run_measure(measures.keep_retrain, X, y, model_generator, method_name, 1, num_fcounts, __mean_pred)
+
+def keep_negative_retrain(X, y, model_generator, method_name, num_fcounts=11):
+    """ Keep Negative (retrain)
+    xlabel = "Max fraction of features kept"
+    ylabel = "Negative mean model output"
+    transform = "negate"
+    sort_order = 7
+    """
+    return __run_measure(measures.keep_retrain, X, y, model_generator, method_name, -1, num_fcounts, __mean_pred)
+
+def remove_positive_retrain(X, y, model_generator, method_name, num_fcounts=11):
+    """ Remove Positive (retrain)
+    xlabel = "Max fraction of features removed"
+    ylabel = "Negative mean model output"
+    transform = "negate"
+    sort_order = 11
+    """
+    return __run_measure(measures.remove_retrain, X, y, model_generator, method_name, 1, num_fcounts, __mean_pred)
+
+def remove_negative_retrain(X, y, model_generator, method_name, num_fcounts=11):
+    """ Remove Negative (retrain)
+    xlabel = "Max fraction of features removed"
+    ylabel = "Mean model output"
+    transform = "identity"
+    sort_order = 12
+    """
+    return __run_measure(measures.remove_retrain, X, y, model_generator, method_name, -1, num_fcounts, __mean_pred)
+
+def __run_measure(measure, X, y, model_generator, method_name, attribution_sign, num_fcounts, summary_function):
+
+    def score_function(fcount, X_train, X_test, y_train, y_test, attr_function, trained_model, random_state):
+        if attribution_sign == 0:
+            A = np.abs(__strip_list(attr_function(X_test)))
+        else:
+            A = attribution_sign * __strip_list(attr_function(X_test))
+        nmask = np.ones(len(y_test)) * fcount
+        nmask = np.minimum(nmask, np.array(A >= 0).sum(1)).astype(int)
+        return measure(
+            nmask, X_train, y_train, X_test, y_test, A,
+            model_generator, summary_function, trained_model, random_state
+        )
+    fcounts = __intlogspace(0, X.shape[1], num_fcounts)
+    return fcounts, __score_method(X, y, fcounts, model_generator, score_function, method_name)
+
+def batch_remove_absolute_retrain__r2(X, y, model_generator, method_name, num_fcounts=11):
+    """ Batch Remove Absolute (retrain)
+    xlabel = "Fraction of features removed"
+    ylabel = "1 - R^2"
+    transform = "one_minus"
+    sort_order = 13
+    """
+    return __run_batch_abs_metric(measures.batch_remove_retrain, X, y, model_generator, method_name, sklearn.metrics.r2_score, num_fcounts)
+
+def batch_keep_absolute_retrain__r2(X, y, model_generator, method_name, num_fcounts=11):
+    """ Batch Keep Absolute (retrain)
+    xlabel = "Fraction of features kept"
+    ylabel = "R^2"
+    transform = "identity"
+    sort_order = 13
+    """
+    return __run_batch_abs_metric(measures.batch_keep_retrain, X, y, model_generator, method_name, sklearn.metrics.r2_score, num_fcounts)
+
+def batch_remove_absolute_retrain__roc_auc(X, y, model_generator, method_name, num_fcounts=11):
+    """ Batch Remove Absolute (retrain)
+    xlabel = "Fraction of features removed"
+    ylabel = "1 - ROC AUC"
+    transform = "one_minus"
+    sort_order = 13
+    """
+    return __run_batch_abs_metric(measures.batch_remove_retrain, X, y, model_generator, method_name, sklearn.metrics.roc_auc_score, num_fcounts)
+
+def batch_keep_absolute_retrain__roc_auc(X, y, model_generator, method_name, num_fcounts=11):
+    """ Batch Keep Absolute (retrain)
+    xlabel = "Fraction of features kept"
+    ylabel = "ROC AUC"
+    transform = "identity"
+    sort_order = 13
+    """
+    return __run_batch_abs_metric(measures.batch_keep_retrain, X, y, model_generator, method_name, sklearn.metrics.roc_auc_score, num_fcounts)
+
+def __run_batch_abs_metric(metric, X, y, model_generator, method_name, loss, num_fcounts):
+    def score_function(fcount, X_train, X_test, y_train, y_test, attr_function, trained_model):
+        A_train = np.abs(__strip_list(attr_function(X_train)))
+        nkeep_train = (np.ones(len(y_train)) * fcount).astype(int)
+        #nkeep_train = np.minimum(nkeep_train, np.array(A_train > 0).sum(1)).astype(int)
+        A_test = np.abs(__strip_list(attr_function(X_test)))
+        nkeep_test = (np.ones(len(y_test)) * fcount).astype(int)
+        #nkeep_test = np.minimum(nkeep_test, np.array(A_test >= 0).sum(1)).astype(int)
+        return metric(
+            nkeep_train, nkeep_test, X_train, y_train, X_test, y_test, A_train, A_test,
+            model_generator, loss
+        )
+    fcounts = __intlogspace(0, X.shape[1], num_fcounts)
+    return fcounts, __score_method(X, y, fcounts, model_generator, score_function, method_name)
+
+_attribution_cache = {}
+def __score_method(X, y, fcounts, model_generator, score_function, method_name, nreps=10, test_size=100, cache_dir="/tmp"):
+    """ Test an explanation method.
+    """
+
+    try:
+        pickle
+    except NameError:
+        raise ImportError("The 'dill' package could not be loaded and is needed for the benchmark!")
+
+    old_seed = np.random.seed()
+    np.random.seed(3293)
+
+    # average the method scores over several train/test splits
+    method_reps = []
+
+    data_hash = hashlib.sha256(__toarray(X).flatten()).hexdigest() + hashlib.sha256(__toarray(y)).hexdigest()
+    for i in range(nreps):
+        X_train, X_test, y_train, y_test = train_test_split(__toarray(X), y, test_size=test_size, random_state=i)
+
+        # define the model we are going to explain, caching so we onlu build it once
+        model_id = "model_cache__v" + "__".join([__version__, data_hash, model_generator.__name__])+".pickle"
+        cache_file = os.path.join(cache_dir, model_id + ".pickle")
+        if os.path.isfile(cache_file):
+            with open(cache_file, "rb") as f:
+                model = pickle.load(f)
+        else:
+            model = model_generator()
+            model.fit(X_train, y_train)
+            with open(cache_file, "wb") as f:
+                pickle.dump(model, f)
+
+        attr_key = "_".join([model_generator.__name__, method_name, str(test_size), str(nreps), str(i), data_hash])
+        def score(attr_function):
+            def cached_attr_function(X_inner):
+                if attr_key not in _attribution_cache:
+                    _attribution_cache[attr_key] = attr_function(X_inner)
+                return _attribution_cache[attr_key]
+
+            #cached_attr_function = lambda X: __check_cache(attr_function, X)
+            if fcounts is None:
+                return score_function(X_train, X_test, y_train, y_test, cached_attr_function, model, i)
+            else:
+                scores = []
+                for f in fcounts:
+                    scores.append(score_function(f, X_train, X_test, y_train, y_test, cached_attr_function, model, i))
+                return np.array(scores)
+
+        # evaluate the method (only building the attribution function if we need to)
+        if attr_key not in _attribution_cache:
+            method_reps.append(score(getattr(methods, method_name)(model, X_train)))
+        else:
+            method_reps.append(score(None))
+
+    np.random.seed(old_seed)
+    return np.array(method_reps).mean(0)
+
+
+# used to memoize explainer functions so we don't waste time re-explaining the same object
+__cache0 = None
+__cache_X0 = None
+__cache_f0 = None
+__cache1 = None
+__cache_X1 = None
+__cache_f1 = None
+def __check_cache(f, X):
+    global __cache0, __cache_X0, __cache_f0
+    global __cache1, __cache_X1, __cache_f1
+    if X is __cache_X0 and f is __cache_f0:
+        return __cache0
+    elif X is __cache_X1 and f is __cache_f1:
+        return __cache1
+    else:
+        __cache_f1 = __cache_f0
+        __cache_X1 = __cache_X0
+        __cache1 = __cache0
+        __cache_f0 = f
+        __cache_X0 = X
+        __cache0 = f(X)
+        return __cache0
+
+def __intlogspace(start, end, count):
+    return np.unique(np.round(start + (end-start) * (np.logspace(0, 1, count, endpoint=True) - 1) / 9).astype(int))
+
+def __toarray(X):
+    """ Converts DataFrames to numpy arrays.
+    """
+    if hasattr(X, "values"):
+        X = X.values
+    return X
+
+def __strip_list(attrs):
+    """ This assumes that if you have a list of outputs you just want the second one (the second class).
+    """
+    if isinstance(attrs, list):
+        return attrs[1]
+    else:
+        return attrs
+
+def _fit_human(model_generator, val00, val01, val11):
+    # force the model to fit a function with almost entirely zero background
+    N = 1000000
+    M = 3
+    X = np.zeros((N,M))
+    X.shape
+    y = np.ones(N) * val00
+    X[0:1000, 0] = 1
+    y[0:1000] = val01
+    for i in range(0,1000000,1000):
+        X[i, 1] = 1
+        y[i] = val01
+    y[0] = val11
+    model = model_generator()
+    model.fit(X, y)
+    return model
+
+def _human_and(X, model_generator, method_name, fever, cough):
+    assert np.abs(X).max() == 0, "Human agreement metrics are only for use with the human_agreement dataset!"
+
+    # these are from the sickness_score mturk user study experiment
+    X_test = np.zeros((100,3))
+    if not fever and not cough:
+        human_consensus = np.array([0., 0., 0.])
+        X_test[0,:] = np.array([[0., 0., 1.]])
+    elif not fever and cough:
+        human_consensus = np.array([0., 2., 0.])
+        X_test[0,:] = np.array([[0., 1., 1.]])
+    elif fever and cough:
+        human_consensus = np.array([5., 5., 0.])
+        X_test[0,:] = np.array([[1., 1., 1.]])
+
+    # force the model to fit an XOR function with almost entirely zero background
+    model = _fit_human(model_generator, 0, 2, 10)
+
+    attr_function = getattr(methods, method_name)(model, X)
+    methods_attrs = attr_function(X_test)
+    return "human", (human_consensus, methods_attrs[0,:])
+
+def human_and_00(X, y, model_generator, method_name):
+    """ AND (false/false)
+
+    This tests how well a feature attribution method agrees with human intuition
+    for an AND operation combined with linear effects. This metric deals
+    specifically with the question of credit allocation for the following function
+    when all three inputs are true:
+    if fever: +2 points
+    if cough: +2 points
+    if fever and cough: +6 points
+
+    transform = "identity"
+    sort_order = 0
+    """
+    return _human_and(X, model_generator, method_name, False, False)
+
+def human_and_01(X, y, model_generator, method_name):
+    """ AND (false/true)
+
+    This tests how well a feature attribution method agrees with human intuition
+    for an AND operation combined with linear effects. This metric deals
+    specifically with the question of credit allocation for the following function
+    when all three inputs are true:
+    if fever: +2 points
+    if cough: +2 points
+    if fever and cough: +6 points
+
+    transform = "identity"
+    sort_order = 1
+    """
+    return _human_and(X, model_generator, method_name, False, True)
+
+def human_and_11(X, y, model_generator, method_name):
+    """ AND (true/true)
+
+    This tests how well a feature attribution method agrees with human intuition
+    for an AND operation combined with linear effects. This metric deals
+    specifically with the question of credit allocation for the following function
+    when all three inputs are true:
+    if fever: +2 points
+    if cough: +2 points
+    if fever and cough: +6 points
+
+    transform = "identity"
+    sort_order = 2
+    """
+    return _human_and(X, model_generator, method_name, True, True)
+
+
+def _human_or(X, model_generator, method_name, fever, cough):
+    assert np.abs(X).max() == 0, "Human agreement metrics are only for use with the human_agreement dataset!"
+
+    # these are from the sickness_score mturk user study experiment
+    X_test = np.zeros((100,3))
+    if not fever and not cough:
+        human_consensus = np.array([0., 0., 0.])
+        X_test[0,:] = np.array([[0., 0., 1.]])
+    elif not fever and cough:
+        human_consensus = np.array([0., 8., 0.])
+        X_test[0,:] = np.array([[0., 1., 1.]])
+    elif fever and cough:
+        human_consensus = np.array([5., 5., 0.])
+        X_test[0,:] = np.array([[1., 1., 1.]])
+
+    # force the model to fit an XOR function with almost entirely zero background
+    model = _fit_human(model_generator, 0, 8, 10)
+
+    attr_function = getattr(methods, method_name)(model, X)
+    methods_attrs = attr_function(X_test)
+    return "human", (human_consensus, methods_attrs[0,:])
+
+def human_or_00(X, y, model_generator, method_name):
+    """ OR (false/false)
+
+    This tests how well a feature attribution method agrees with human intuition
+    for an OR operation combined with linear effects. This metric deals
+    specifically with the question of credit allocation for the following function
+    when all three inputs are true:
+    if fever: +2 points
+    if cough: +2 points
+    if fever or cough: +6 points
+
+    transform = "identity"
+    sort_order = 0
+    """
+    return _human_or(X, model_generator, method_name, False, False)
+
+def human_or_01(X, y, model_generator, method_name):
+    """ OR (false/true)
+
+    This tests how well a feature attribution method agrees with human intuition
+    for an OR operation combined with linear effects. This metric deals
+    specifically with the question of credit allocation for the following function
+    when all three inputs are true:
+    if fever: +2 points
+    if cough: +2 points
+    if fever or cough: +6 points
+
+    transform = "identity"
+    sort_order = 1
+    """
+    return _human_or(X, model_generator, method_name, False, True)
+
+def human_or_11(X, y, model_generator, method_name):
+    """ OR (true/true)
+
+    This tests how well a feature attribution method agrees with human intuition
+    for an OR operation combined with linear effects. This metric deals
+    specifically with the question of credit allocation for the following function
+    when all three inputs are true:
+    if fever: +2 points
+    if cough: +2 points
+    if fever or cough: +6 points
+
+    transform = "identity"
+    sort_order = 2
+    """
+    return _human_or(X, model_generator, method_name, True, True)
+
+
+def _human_xor(X, model_generator, method_name, fever, cough):
+    assert np.abs(X).max() == 0, "Human agreement metrics are only for use with the human_agreement dataset!"
+
+    # these are from the sickness_score mturk user study experiment
+    X_test = np.zeros((100,3))
+    if not fever and not cough:
+        human_consensus = np.array([0., 0., 0.])
+        X_test[0,:] = np.array([[0., 0., 1.]])
+    elif not fever and cough:
+        human_consensus = np.array([0., 8., 0.])
+        X_test[0,:] = np.array([[0., 1., 1.]])
+    elif fever and cough:
+        human_consensus = np.array([2., 2., 0.])
+        X_test[0,:] = np.array([[1., 1., 1.]])
+
+    # force the model to fit an XOR function with almost entirely zero background
+    model = _fit_human(model_generator, 0, 8, 4)
+
+    attr_function = getattr(methods, method_name)(model, X)
+    methods_attrs = attr_function(X_test)
+    return "human", (human_consensus, methods_attrs[0,:])
+
+def human_xor_00(X, y, model_generator, method_name):
+    """ XOR (false/false)
+
+    This tests how well a feature attribution method agrees with human intuition
+    for an eXclusive OR operation combined with linear effects. This metric deals
+    specifically with the question of credit allocation for the following function
+    when all three inputs are true:
+    if fever: +2 points
+    if cough: +2 points
+    if fever or cough but not both: +6 points
+
+    transform = "identity"
+    sort_order = 3
+    """
+    return _human_xor(X, model_generator, method_name, False, False)
+
+def human_xor_01(X, y, model_generator, method_name):
+    """ XOR (false/true)
+
+    This tests how well a feature attribution method agrees with human intuition
+    for an eXclusive OR operation combined with linear effects. This metric deals
+    specifically with the question of credit allocation for the following function
+    when all three inputs are true:
+    if fever: +2 points
+    if cough: +2 points
+    if fever or cough but not both: +6 points
+
+    transform = "identity"
+    sort_order = 4
+    """
+    return _human_xor(X, model_generator, method_name, False, True)
+
+def human_xor_11(X, y, model_generator, method_name):
+    """ XOR (true/true)
+
+    This tests how well a feature attribution method agrees with human intuition
+    for an eXclusive OR operation combined with linear effects. This metric deals
+    specifically with the question of credit allocation for the following function
+    when all three inputs are true:
+    if fever: +2 points
+    if cough: +2 points
+    if fever or cough but not both: +6 points
+
+    transform = "identity"
+    sort_order = 5
+    """
+    return _human_xor(X, model_generator, method_name, True, True)
+
+
+def _human_sum(X, model_generator, method_name, fever, cough):
+    assert np.abs(X).max() == 0, "Human agreement metrics are only for use with the human_agreement dataset!"
+
+    # these are from the sickness_score mturk user study experiment
+    X_test = np.zeros((100,3))
+    if not fever and not cough:
+        human_consensus = np.array([0., 0., 0.])
+        X_test[0,:] = np.array([[0., 0., 1.]])
+    elif not fever and cough:
+        human_consensus = np.array([0., 2., 0.])
+        X_test[0,:] = np.array([[0., 1., 1.]])
+    elif fever and cough:
+        human_consensus = np.array([2., 2., 0.])
+        X_test[0,:] = np.array([[1., 1., 1.]])
+
+    # force the model to fit an XOR function with almost entirely zero background
+    model = _fit_human(model_generator, 0, 2, 4)
+
+    attr_function = getattr(methods, method_name)(model, X)
+    methods_attrs = attr_function(X_test)
+    return "human", (human_consensus, methods_attrs[0,:])
+
+def human_sum_00(X, y, model_generator, method_name):
+    """ SUM (false/false)
+
+    This tests how well a feature attribution method agrees with human intuition
+    for a SUM operation. This metric deals
+    specifically with the question of credit allocation for the following function
+    when all three inputs are true:
+    if fever: +2 points
+    if cough: +2 points
+
+    transform = "identity"
+    sort_order = 0
+    """
+    return _human_sum(X, model_generator, method_name, False, False)
+
+def human_sum_01(X, y, model_generator, method_name):
+    """ SUM (false/true)
+
+    This tests how well a feature attribution method agrees with human intuition
+    for a SUM operation. This metric deals
+    specifically with the question of credit allocation for the following function
+    when all three inputs are true:
+    if fever: +2 points
+    if cough: +2 points
+
+    transform = "identity"
+    sort_order = 1
+    """
+    return _human_sum(X, model_generator, method_name, False, True)
+
+def human_sum_11(X, y, model_generator, method_name):
+    """ SUM (true/true)
+
+    This tests how well a feature attribution method agrees with human intuition
+    for a SUM operation. This metric deals
+    specifically with the question of credit allocation for the following function
+    when all three inputs are true:
+    if fever: +2 points
+    if cough: +2 points
+
+    transform = "identity"
+    sort_order = 2
+    """
+    return _human_sum(X, model_generator, method_name, True, True)
diff --git a/lib/shap/benchmark/models.py b/lib/shap/benchmark/models.py
new file mode 100644
index 0000000000000000000000000000000000000000..2c0a886f49712b9ffcb5120d170a5a568d238999
--- /dev/null
+++ b/lib/shap/benchmark/models.py
@@ -0,0 +1,230 @@
+import numpy as np
+import sklearn
+import sklearn.ensemble
+from sklearn.preprocessing import StandardScaler
+
+
+class KerasWrap:
+    """ A wrapper that allows us to set parameters in the constructor and do a reset before fitting.
+    """
+    def __init__(self, model, epochs, flatten_output=False):
+        self.model = model
+        self.epochs = epochs
+        self.flatten_output = flatten_output
+        self.init_weights = None
+        self.scaler = StandardScaler()
+
+    def fit(self, X, y, verbose=0):
+        if self.init_weights is None:
+            self.init_weights = self.model.get_weights()
+        else:
+            self.model.set_weights(self.init_weights)
+        self.scaler.fit(X)
+        return self.model.fit(X, y, epochs=self.epochs, verbose=verbose)
+
+    def predict(self, X):
+        X = self.scaler.transform(X)
+        if self.flatten_output:
+            return self.model.predict(X).flatten()
+        else:
+            return self.model.predict(X)
+
+
+# This models are all tuned for the corrgroups60 dataset
+
+def corrgroups60__lasso():
+    """ Lasso Regression
+    """
+    return sklearn.linear_model.Lasso(alpha=0.1)
+
+def corrgroups60__ridge():
+    """ Ridge Regression
+    """
+    return sklearn.linear_model.Ridge(alpha=1.0)
+
+def corrgroups60__decision_tree():
+    """ Decision Tree
+    """
+
+    # max_depth was chosen to minimise test error
+    return sklearn.tree.DecisionTreeRegressor(random_state=0, max_depth=6)
+
+def corrgroups60__random_forest():
+    """ Random Forest
+    """
+    return sklearn.ensemble.RandomForestRegressor(100, random_state=0)
+
+def corrgroups60__gbm():
+    """ Gradient Boosted Trees
+    """
+    import xgboost
+
+    # max_depth and learning_rate were fixed then n_estimators was chosen using a train/test split
+    return xgboost.XGBRegressor(max_depth=6, n_estimators=50, learning_rate=0.1, n_jobs=8, random_state=0)
+
+def corrgroups60__ffnn():
+    """ 4-Layer Neural Network
+    """
+    from tensorflow.keras.layers import Dense
+    from tensorflow.keras.models import Sequential
+
+    model = Sequential()
+    model.add(Dense(32, activation='relu', input_dim=60))
+    model.add(Dense(20, activation='relu'))
+    model.add(Dense(20, activation='relu'))
+    model.add(Dense(1))
+
+    model.compile(optimizer='adam',
+                loss='mean_squared_error',
+                metrics=['mean_squared_error'])
+
+    return KerasWrap(model, 30, flatten_output=True)
+
+
+def independentlinear60__lasso():
+    """ Lasso Regression
+    """
+    return sklearn.linear_model.Lasso(alpha=0.1)
+
+def independentlinear60__ridge():
+    """ Ridge Regression
+    """
+    return sklearn.linear_model.Ridge(alpha=1.0)
+
+def independentlinear60__decision_tree():
+    """ Decision Tree
+    """
+
+    # max_depth was chosen to minimise test error
+    return sklearn.tree.DecisionTreeRegressor(random_state=0, max_depth=4)
+
+def independentlinear60__random_forest():
+    """ Random Forest
+    """
+    return sklearn.ensemble.RandomForestRegressor(100, random_state=0)
+
+def independentlinear60__gbm():
+    """ Gradient Boosted Trees
+    """
+    import xgboost
+
+     # max_depth and learning_rate were fixed then n_estimators was chosen using a train/test split
+    return xgboost.XGBRegressor(max_depth=6, n_estimators=100, learning_rate=0.1, n_jobs=8, random_state=0)
+
+def independentlinear60__ffnn():
+    """ 4-Layer Neural Network
+    """
+    from tensorflow.keras.layers import Dense
+    from tensorflow.keras.models import Sequential
+
+    model = Sequential()
+    model.add(Dense(32, activation='relu', input_dim=60))
+    model.add(Dense(20, activation='relu'))
+    model.add(Dense(20, activation='relu'))
+    model.add(Dense(1))
+
+    model.compile(optimizer='adam',
+                loss='mean_squared_error',
+                metrics=['mean_squared_error'])
+
+    return KerasWrap(model, 30, flatten_output=True)
+
+
+def cric__lasso():
+    """ Lasso Regression
+    """
+    model = sklearn.linear_model.LogisticRegression(penalty="l1", C=0.002)
+
+    # we want to explain the raw probability outputs of the trees
+    model.predict = lambda X: model.predict_proba(X)[:,1]
+
+    return model
+
+def cric__ridge():
+    """ Ridge Regression
+    """
+    model = sklearn.linear_model.LogisticRegression(penalty="l2")
+
+    # we want to explain the raw probability outputs of the trees
+    model.predict = lambda X: model.predict_proba(X)[:,1]
+
+    return model
+
+def cric__decision_tree():
+    """ Decision Tree
+    """
+    model = sklearn.tree.DecisionTreeClassifier(random_state=0, max_depth=4)
+
+    # we want to explain the raw probability outputs of the trees
+    model.predict = lambda X: model.predict_proba(X)[:,1]
+
+    return model
+
+def cric__random_forest():
+    """ Random Forest
+    """
+    model = sklearn.ensemble.RandomForestClassifier(100, random_state=0)
+
+    # we want to explain the raw probability outputs of the trees
+    model.predict = lambda X: model.predict_proba(X)[:,1]
+
+    return model
+
+def cric__gbm():
+    """ Gradient Boosted Trees
+    """
+    import xgboost
+
+    # max_depth and subsample match the params used for the full cric data in the paper
+    # learning_rate was set a bit higher to allow for faster runtimes
+    # n_estimators was chosen based on a train/test split of the data
+    model = xgboost.XGBClassifier(max_depth=5, n_estimators=400, learning_rate=0.01, subsample=0.2, n_jobs=8, random_state=0)
+
+    # we want to explain the margin, not the transformed probability outputs
+    model.__orig_predict = model.predict
+    model.predict = lambda X: model.__orig_predict(X, output_margin=True)
+
+    return model
+
+def cric__ffnn():
+    """ 4-Layer Neural Network
+    """
+    from tensorflow.keras.layers import Dense, Dropout
+    from tensorflow.keras.models import Sequential
+
+    model = Sequential()
+    model.add(Dense(10, activation='relu', input_dim=336))
+    model.add(Dropout(0.5))
+    model.add(Dense(10, activation='relu'))
+    model.add(Dropout(0.5))
+    model.add(Dense(1, activation='sigmoid'))
+
+    model.compile(optimizer='adam',
+                loss='binary_crossentropy',
+                metrics=['accuracy'])
+
+    return KerasWrap(model, 30, flatten_output=True)
+
+
+def human__decision_tree():
+    """ Decision Tree
+    """
+
+    # build data
+    N = 1000000
+    M = 3
+    X = np.zeros((N,M))
+    X.shape
+    y = np.zeros(N)
+    X[0, 0] = 1
+    y[0] = 8
+    X[1, 1] = 1
+    y[1] = 8
+    X[2, 0:2] = 1
+    y[2] = 4
+
+    # fit model
+    xor_model = sklearn.tree.DecisionTreeRegressor(max_depth=2)
+    xor_model.fit(X, y)
+
+    return xor_model
diff --git a/lib/shap/benchmark/plots.py b/lib/shap/benchmark/plots.py
new file mode 100644
index 0000000000000000000000000000000000000000..56bb204b756f8d978b708eea53a5899ea4de52e4
--- /dev/null
+++ b/lib/shap/benchmark/plots.py
@@ -0,0 +1,566 @@
+import base64
+import io
+import os
+
+import numpy as np
+import sklearn
+from matplotlib.colors import LinearSegmentedColormap
+
+from .. import __version__
+from ..plots import colors
+from . import methods, metrics, models
+from .experiments import run_experiments
+
+try:
+    import matplotlib
+    import matplotlib.pyplot as pl
+    from IPython.display import HTML
+except ImportError:
+    pass
+
+
+metadata = {
+    # "runtime": {
+    #     "title": "Runtime",
+    #     "sort_order": 1
+    # },
+    # "local_accuracy": {
+    #     "title": "Local Accuracy",
+    #     "sort_order": 2
+    # },
+    # "consistency_guarantees": {
+    #     "title": "Consistency Guarantees",
+    #     "sort_order": 3
+    # },
+    # "keep_positive_mask": {
+    #     "title": "Keep Positive (mask)",
+    #     "xlabel": "Max fraction of features kept",
+    #     "ylabel": "Mean model output",
+    #     "sort_order": 4
+    # },
+    # "keep_negative_mask": {
+    #     "title": "Keep Negative (mask)",
+    #     "xlabel": "Max fraction of features kept",
+    #     "ylabel": "Negative mean model output",
+    #     "sort_order": 5
+    # },
+    # "keep_absolute_mask__r2": {
+    #     "title": "Keep Absolute (mask)",
+    #     "xlabel": "Max fraction of features kept",
+    #     "ylabel": "R^2",
+    #     "sort_order": 6
+    # },
+    # "keep_absolute_mask__roc_auc": {
+    #     "title": "Keep Absolute (mask)",
+    #     "xlabel": "Max fraction of features kept",
+    #     "ylabel": "ROC AUC",
+    #     "sort_order": 6
+    # },
+    # "remove_positive_mask": {
+    #     "title": "Remove Positive (mask)",
+    #     "xlabel": "Max fraction of features removed",
+    #     "ylabel": "Negative mean model output",
+    #     "sort_order": 7
+    # },
+    # "remove_negative_mask": {
+    #     "title": "Remove Negative (mask)",
+    #     "xlabel": "Max fraction of features removed",
+    #     "ylabel": "Mean model output",
+    #     "sort_order": 8
+    # },
+    # "remove_absolute_mask__r2": {
+    #     "title": "Remove Absolute (mask)",
+    #     "xlabel": "Max fraction of features removed",
+    #     "ylabel": "1 - R^2",
+    #     "sort_order": 9
+    # },
+    # "remove_absolute_mask__roc_auc": {
+    #     "title": "Remove Absolute (mask)",
+    #     "xlabel": "Max fraction of features removed",
+    #     "ylabel": "1 - ROC AUC",
+    #     "sort_order": 9
+    # },
+    # "keep_positive_resample": {
+    #     "title": "Keep Positive (resample)",
+    #     "xlabel": "Max fraction of features kept",
+    #     "ylabel": "Mean model output",
+    #     "sort_order": 10
+    # },
+    # "keep_negative_resample": {
+    #     "title": "Keep Negative (resample)",
+    #     "xlabel": "Max fraction of features kept",
+    #     "ylabel": "Negative mean model output",
+    #     "sort_order": 11
+    # },
+    # "keep_absolute_resample__r2": {
+    #     "title": "Keep Absolute (resample)",
+    #     "xlabel": "Max fraction of features kept",
+    #     "ylabel": "R^2",
+    #     "sort_order": 12
+    # },
+    # "keep_absolute_resample__roc_auc": {
+    #     "title": "Keep Absolute (resample)",
+    #     "xlabel": "Max fraction of features kept",
+    #     "ylabel": "ROC AUC",
+    #     "sort_order": 12
+    # },
+    # "remove_positive_resample": {
+    #     "title": "Remove Positive (resample)",
+    #     "xlabel": "Max fraction of features removed",
+    #     "ylabel": "Negative mean model output",
+    #     "sort_order": 13
+    # },
+    # "remove_negative_resample": {
+    #     "title": "Remove Negative (resample)",
+    #     "xlabel": "Max fraction of features removed",
+    #     "ylabel": "Mean model output",
+    #     "sort_order": 14
+    # },
+    # "remove_absolute_resample__r2": {
+    #     "title": "Remove Absolute (resample)",
+    #     "xlabel": "Max fraction of features removed",
+    #     "ylabel": "1 - R^2",
+    #     "sort_order": 15
+    # },
+    # "remove_absolute_resample__roc_auc": {
+    #     "title": "Remove Absolute (resample)",
+    #     "xlabel": "Max fraction of features removed",
+    #     "ylabel": "1 - ROC AUC",
+    #     "sort_order": 15
+    # },
+    # "remove_positive_retrain": {
+    #     "title": "Remove Positive (retrain)",
+    #     "xlabel": "Max fraction of features removed",
+    #     "ylabel": "Negative mean model output",
+    #     "sort_order": 11
+    # },
+    # "remove_negative_retrain": {
+    #     "title": "Remove Negative (retrain)",
+    #     "xlabel": "Max fraction of features removed",
+    #     "ylabel": "Mean model output",
+    #     "sort_order": 12
+    # },
+    # "keep_positive_retrain": {
+    #     "title": "Keep Positive (retrain)",
+    #     "xlabel": "Max fraction of features kept",
+    #     "ylabel": "Mean model output",
+    #     "sort_order": 6
+    # },
+    # "keep_negative_retrain": {
+    #     "title": "Keep Negative (retrain)",
+    #     "xlabel": "Max fraction of features kept",
+    #     "ylabel": "Negative mean model output",
+    #     "sort_order": 7
+    # },
+    # "batch_remove_absolute__r2": {
+    #     "title": "Batch Remove Absolute",
+    #     "xlabel": "Fraction of features removed",
+    #     "ylabel": "1 - R^2",
+    #     "sort_order": 13
+    # },
+    # "batch_keep_absolute__r2": {
+    #     "title": "Batch Keep Absolute",
+    #     "xlabel": "Fraction of features kept",
+    #     "ylabel": "R^2",
+    #     "sort_order": 8
+    # },
+    # "batch_remove_absolute__roc_auc": {
+    #     "title": "Batch Remove Absolute",
+    #     "xlabel": "Fraction of features removed",
+    #     "ylabel": "1 - ROC AUC",
+    #     "sort_order": 13
+    # },
+    # "batch_keep_absolute__roc_auc": {
+    #     "title": "Batch Keep Absolute",
+    #     "xlabel": "Fraction of features kept",
+    #     "ylabel": "ROC AUC",
+    #     "sort_order": 8
+    # },
+
+    # "linear_shap_corr": {
+    #     "title": "Linear SHAP (corr)"
+    # },
+    # "linear_shap_ind": {
+    #     "title": "Linear SHAP (ind)"
+    # },
+    # "coef": {
+    #     "title": "Coefficients"
+    # },
+    # "random": {
+    #     "title": "Random"
+    # },
+    # "kernel_shap_1000_meanref": {
+    #     "title": "Kernel SHAP 1000 mean ref."
+    # },
+    # "sampling_shap_1000": {
+    #     "title": "Sampling SHAP 1000"
+    # },
+    # "tree_shap_tree_path_dependent": {
+    #     "title": "Tree SHAP"
+    # },
+    # "saabas": {
+    #     "title": "Saabas"
+    # },
+    # "tree_gain": {
+    #     "title": "Gain/Gini Importance"
+    # },
+    # "mean_abs_tree_shap": {
+    #     "title": "mean(|Tree SHAP|)"
+    # },
+    # "lasso_regression": {
+    #     "title": "Lasso Regression"
+    # },
+    # "ridge_regression": {
+    #     "title": "Ridge Regression"
+    # },
+    # "gbm_regression": {
+    #     "title": "Gradient Boosting Regression"
+    # }
+}
+
+benchmark_color_map = {
+    "tree_shap": "#1E88E5",
+    "deep_shap": "#1E88E5",
+    "linear_shap_corr": "#1E88E5",
+    "linear_shap_ind": "#ff0d57",
+    "coef": "#13B755",
+    "random": "#999999",
+    "const_random": "#666666",
+    "kernel_shap_1000_meanref": "#7C52FF"
+}
+
+# negated_metrics = [
+#     "runtime",
+#     "remove_positive_retrain",
+#     "remove_positive_mask",
+#     "remove_positive_resample",
+#     "keep_negative_retrain",
+#     "keep_negative_mask",
+#     "keep_negative_resample"
+# ]
+
+# one_minus_metrics = [
+#     "remove_absolute_mask__r2",
+#     "remove_absolute_mask__roc_auc",
+#     "remove_absolute_resample__r2",
+#     "remove_absolute_resample__roc_auc"
+# ]
+
+def get_method_color(method):
+    for line in getattr(methods, method).__doc__.split("\n"):
+        line = line.strip()
+        if line.startswith("color = "):
+            v = line.split("=")[1].strip()
+            if v.startswith("red_blue_circle("):
+                return colors.red_blue_circle(float(v[16:-1]))
+            else:
+                return v
+    return "#000000"
+
+def get_method_linestyle(method):
+    for line in getattr(methods, method).__doc__.split("\n"):
+        line = line.strip()
+        if line.startswith("linestyle = "):
+            return line.split("=")[1].strip()
+    return "solid"
+
+def get_metric_attr(metric, attr):
+    for line in getattr(metrics, metric).__doc__.split("\n"):
+        line = line.strip()
+
+        # string
+        prefix = attr+" = \""
+        suffix = "\""
+        if line.startswith(prefix) and line.endswith(suffix):
+            return line[len(prefix):-len(suffix)]
+
+        # number
+        prefix = attr+" = "
+        if line.startswith(prefix):
+            return float(line[len(prefix):])
+    return ""
+
+def plot_curve(dataset, model, metric, cmap=benchmark_color_map):
+    experiments = run_experiments(dataset=dataset, model=model, metric=metric)
+    pl.figure()
+    method_arr = []
+    for (name,(fcounts,scores)) in experiments:
+        _,_,method,_ = name
+        transform = get_metric_attr(metric, "transform")
+        if transform == "negate":
+            scores = -scores
+        elif transform == "one_minus":
+            scores = 1 - scores
+        auc = sklearn.metrics.auc(fcounts, scores) / fcounts[-1]
+        method_arr.append((auc, method, scores))
+    for (auc,method,scores) in sorted(method_arr):
+        method_title = getattr(methods, method).__doc__.split("\n")[0].strip()
+        label = f"{auc:6.3f} - " + method_title
+        pl.plot(
+            fcounts / fcounts[-1], scores, label=label,
+            color=get_method_color(method), linewidth=2,
+            linestyle=get_method_linestyle(method)
+            )
+    metric_title = getattr(metrics, metric).__doc__.split("\n")[0].strip()
+    pl.xlabel(get_metric_attr(metric, "xlabel"))
+    pl.ylabel(get_metric_attr(metric, "ylabel"))
+    model_title = getattr(models, dataset+"__"+model).__doc__.split("\n")[0].strip()
+    pl.title(metric_title + " - " + model_title)
+    pl.gca().xaxis.set_ticks_position('bottom')
+    pl.gca().yaxis.set_ticks_position('left')
+    pl.gca().spines['right'].set_visible(False)
+    pl.gca().spines['top'].set_visible(False)
+    ahandles, alabels = pl.gca().get_legend_handles_labels()
+    pl.legend(reversed(ahandles), reversed(alabels))
+    return pl.gcf()
+
+def plot_human(dataset, model, metric, cmap=benchmark_color_map):
+    experiments = run_experiments(dataset=dataset, model=model, metric=metric)
+    pl.figure()
+    method_arr = []
+    for (name,(fcounts,scores)) in experiments:
+        _,_,method,_ = name
+        diff_sum = np.sum(np.abs(scores[1] - scores[0]))
+        method_arr.append((diff_sum, method, scores[0], scores[1]))
+
+    inds = np.arange(3)    # the x locations for the groups
+    inc_width = (1.0 / len(method_arr)) * 0.8
+    width = inc_width * 0.9
+    pl.bar(inds, method_arr[0][2], width, label="Human Consensus", color="black", edgecolor="white")
+    i = 1
+    line_style_to_hatch = {
+        "dashed": "///",
+        "dotted": "..."
+    }
+    for (diff_sum, method, _, methods_attrs) in sorted(method_arr):
+        method_title = getattr(methods, method).__doc__.split("\n")[0].strip()
+        label = f"{diff_sum:.2f} - " + method_title
+        pl.bar(
+            inds + inc_width * i, methods_attrs.flatten(), width, label=label, edgecolor="white",
+            color=get_method_color(method), hatch=line_style_to_hatch.get(get_method_linestyle(method), None)
+        )
+        i += 1
+    metric_title = getattr(metrics, metric).__doc__.split("\n")[0].strip()
+    pl.xlabel("Features in the model")
+    pl.ylabel("Feature attribution value")
+    model_title = getattr(models, dataset+"__"+model).__doc__.split("\n")[0].strip()
+    pl.title(metric_title + " - " + model_title)
+    pl.gca().xaxis.set_ticks_position('bottom')
+    pl.gca().yaxis.set_ticks_position('left')
+    pl.gca().spines['right'].set_visible(False)
+    pl.gca().spines['top'].set_visible(False)
+    ahandles, alabels = pl.gca().get_legend_handles_labels()
+    #pl.legend(ahandles, alabels)
+    pl.xticks(np.array([0, 1, 2, 3]) - (inc_width + width)/2, ["", "", "", ""])
+
+    pl.gca().xaxis.set_minor_locator(matplotlib.ticker.FixedLocator([0.4, 1.4, 2.4]))
+    pl.gca().xaxis.set_minor_formatter(matplotlib.ticker.FixedFormatter(["Fever", "Cough", "Headache"]))
+    pl.gca().tick_params(which='minor', length=0)
+
+    pl.axhline(0, color="#aaaaaa", linewidth=0.5)
+
+    box = pl.gca().get_position()
+    pl.gca().set_position([
+        box.x0, box.y0 + box.height * 0.3,
+        box.width, box.height * 0.7
+    ])
+
+    # Put a legend below current axis
+    pl.gca().legend(ahandles, alabels, loc='upper center', bbox_to_anchor=(0.5, -0.15), ncol=2)
+
+    return pl.gcf()
+
+def _human_score_map(human_consensus, methods_attrs):
+    """ Converts human agreement differences to numerical scores for coloring.
+    """
+
+    v = 1 - min(np.sum(np.abs(methods_attrs - human_consensus)) / (np.abs(human_consensus).sum() + 1), 1.0)
+    return v
+
+def make_grid(scores, dataset, model, normalize=True, transform=True):
+    color_vals = {}
+    metric_sort_order = {}
+    for (_,_,method,metric),(fcounts,score) in filter(lambda x: x[0][0] == dataset and x[0][1] == model, scores):
+        metric_sort_order[metric] = get_metric_attr(metric, "sort_order")
+        if metric not in color_vals:
+            color_vals[metric] = {}
+
+        if transform:
+            transform_type = get_metric_attr(metric, "transform")
+            if transform_type == "negate":
+                score = -score
+            elif transform_type == "one_minus":
+                score = 1 - score
+            elif transform_type == "negate_log":
+                score = -np.log10(score)
+
+        if fcounts is None:
+            color_vals[metric][method] = score
+        elif fcounts == "human":
+            color_vals[metric][method] = _human_score_map(*score)
+        else:
+            auc = sklearn.metrics.auc(fcounts, score) / fcounts[-1]
+            color_vals[metric][method] = auc
+    # print(metric_sort_order)
+    # col_keys = sorted(list(color_vals.keys()), key=lambda v: metric_sort_order[v])
+    # print(col_keys)
+    col_keys = list(color_vals.keys())
+    row_keys = list({v for k in col_keys for v in color_vals[k].keys()})
+
+    data = -28567 * np.ones((len(row_keys), len(col_keys)))
+
+    for i in range(len(row_keys)):
+        for j in range(len(col_keys)):
+            data[i,j] = color_vals[col_keys[j]][row_keys[i]]
+
+    assert np.sum(data == -28567) == 0, "There are missing data values!"
+
+    if normalize:
+        data = (data - data.min(0)) / (data.max(0) - data.min(0) + 1e-8)
+
+    # sort by performans
+    inds = np.argsort(-data.mean(1))
+    row_keys = [row_keys[i] for i in inds]
+    data = data[inds,:]
+
+    return row_keys, col_keys, data
+
+
+
+red_blue_solid = LinearSegmentedColormap('red_blue_solid', {
+    'red': ((0.0, 198./255, 198./255),
+            (1.0, 5./255, 5./255)),
+
+    'green': ((0.0, 34./255, 34./255),
+              (1.0, 198./255, 198./255)),
+
+    'blue': ((0.0, 5./255, 5./255),
+             (1.0, 24./255, 24./255)),
+
+    'alpha': ((0.0, 1, 1),
+              (1.0, 1, 1))
+})
+def plot_grids(dataset, model_names, out_dir=None):
+
+    if out_dir is not None:
+        os.mkdir(out_dir)
+
+    scores = []
+    for model in model_names:
+        scores.extend(run_experiments(dataset=dataset, model=model))
+
+    prefix = "<style type='text/css'> .shap_benchmark__select:focus { outline-width: 0 }</style>"
+    out = "" # background: rgb(30, 136, 229)
+
+    # out += "<div style='font-weight: regular; font-size: 24px; text-align: center; background: #f8f8f8; color: #000; padding: 20px;'>SHAP Benchmark</div>\n"
+    # out += "<div style='height: 1px; background: #ddd;'></div>\n"
+    #out += "<div style='height: 7px; background-image: linear-gradient(to right, rgb(30, 136, 229), rgb(255, 13, 87));'></div>"
+
+    out += "<div style='position: fixed; left: 0px; top: 0px; right: 0px; height: 230px; background: #fff;'>\n" # box-shadow: 0 4px 8px 0 rgba(0, 0, 0, 0.2), 0 6px 20px 0 rgba(0, 0, 0, 0.19);
+    out += "<div style='position: absolute; bottom: 0px; left: 0px; right: 0px;' align='center'><table style='border-width: 1px; margin-right: 100px'>\n"
+    for ind,model in enumerate(model_names):
+        row_keys, col_keys, data = make_grid(scores, dataset, model)
+#         print(data)
+#         print(colors.red_blue_solid(0.))
+#         print(colors.red_blue_solid(1.))
+#         return
+        for metric in col_keys:
+            save_plot = False
+            if metric.startswith("human_"):
+                plot_human(dataset, model, metric)
+                save_plot = True
+            elif metric not in ["local_accuracy", "runtime", "consistency_guarantees"]:
+                plot_curve(dataset, model, metric)
+                save_plot = True
+
+            if save_plot:
+                buf = io.BytesIO()
+                pl.gcf().set_size_inches(1200.0/175,1000.0/175)
+                pl.savefig(buf, format='png', dpi=175)
+                if out_dir is not None:
+                    pl.savefig(f"{out_dir}/plot_{dataset}_{model}_{metric}.pdf", format='pdf')
+                pl.close()
+                buf.seek(0)
+                data_uri = base64.b64encode(buf.read()).decode('utf-8').replace('\n', '')
+                plot_id = "plot__"+dataset+"__"+model+"__"+metric
+                prefix += f"<div onclick='document.getElementById(\"{plot_id}\").style.display = \"none\"' style='display: none; position: fixed; z-index: 10000; left: 0px; right: 0px; top: 0px; bottom: 0px; background: rgba(255,255,255,0.9);' id='{plot_id}'>"
+                prefix += "<img width='600' height='500' style='margin-left: auto; margin-right: auto; margin-top: 230px; box-shadow: 0 4px 8px 0 rgba(0, 0, 0, 0.2), 0 6px 20px 0 rgba(0, 0, 0, 0.19);' src='data:image/png;base64,%s'>" % data_uri
+                prefix += "</div>"
+
+        model_title = getattr(models, dataset+"__"+model).__doc__.split("\n")[0].strip()
+
+        if ind == 0:
+            out += "<tr><td style='background: #fff; width: 250px'></td></td>"
+            for j in range(data.shape[1]):
+                metric_title = getattr(metrics, col_keys[j]).__doc__.split("\n")[0].strip()
+                out += "<td style='width: 40px; min-width: 40px; background: #fff; text-align: right;'><div style='margin-left: 10px; margin-bottom: -5px; white-space: nowrap; transform: rotate(-45deg); transform-origin: left top 0; width: 1.5em; margin-top: 8em'>" + metric_title + "</div></td>"
+            out += "</tr>\n"
+            out += "</table></div></div>\n"
+            out += "<table style='border-width: 1px; margin-right: 100px; margin-top: 230px;'>\n"
+        out += "<tr><td style='background: #fff'></td><td colspan='%d' style='background: #fff; font-weight: bold; text-align: center; margin-top: 10px;'>%s</td></tr>\n" % (data.shape[1], model_title)
+        for i in range(data.shape[0]):
+            out += "<tr>"
+#             if i == 0:
+#                 out += "<td rowspan='%d' style='background: #fff; text-align: center; white-space: nowrap; vertical-align: middle; '><div style='font-weight: bold; transform: rotate(-90deg); transform-origin: left top 0; width: 1.5em; margin-top: 8em'>%s</div></td>" % (data.shape[0], model_name)
+            method_title = getattr(methods, row_keys[i]).__doc__.split("\n")[0].strip()
+            out += "<td style='background: #ffffff; text-align: right; width: 250px' title='shap.LinearExplainer(model)'>" + method_title + "</td>\n"
+            for j in range(data.shape[1]):
+                plot_id = "plot__"+dataset+"__"+model+"__"+col_keys[j]
+                out += "<td onclick='document.getElementById(\"%s\").style.display = \"block\"' style='padding: 0px; padding-left: 0px; padding-right: 0px; border-left: 0px solid #999; width: 42px; min-width: 42px; height: 34px; background-color: #fff'>" % plot_id
+                #out += "<div style='opacity: "+str(2*(max(1-data[i,j], data[i,j])-0.5))+"; background-color: rgb" + str(tuple(v*255 for v in colors.red_blue_solid(0. if data[i,j] < 0.5 else 1.)[:-1])) + "; height: "+str((30*max(1-data[i,j], data[i,j])))+"px; margin-left: auto; margin-right: auto; width:"+str((30*max(1-data[i,j], data[i,j])))+"px'></div>"
+                out += "<div style='opacity: "+str(1)+"; background-color: rgb" + str(tuple(int(v*255) for v in colors.red_blue_no_bounds(5*(data[i,j]-0.8))[:-1])) + "; height: "+str(30*data[i,j])+"px; margin-left: auto; margin-right: auto; width:"+str(30*data[i,j])+"px'></div>"
+                #out += "<div style='float: left; background-color: #eee; height: 10px; width: "+str((40*(1-data[i,j])))+"px'></div>"
+                out += "</td>\n"
+            out += "</tr>\n" #
+
+        out += "<tr><td colspan='%d' style='background: #fff'></td></tr>" % (data.shape[1] + 1)
+    out += "</table>"
+
+    out += "<div style='position: fixed; left: 0px; top: 0px; right: 0px; text-align: left; padding: 20px; text-align: right'>\n"
+    out += "<div style='float: left; font-weight: regular; font-size: 24px; color: #000;'>SHAP Benchmark <span style='font-size: 14px; color: #777777;'>v"+__version__+"</span></div>\n"
+# select {
+#   margin: 50px;
+#   width: 150px;
+#   padding: 5px 35px 5px 5px;
+#   font-size: 16px;
+#   border: 1px solid #ccc;
+#   height: 34px;
+#   -webkit-appearance: none;
+#   -moz-appearance: none;
+#   appearance: none;
+#   background: url(http://www.stackoverflow.com/favicon.ico) 96% / 15% no-repeat #eee;
+# }
+    #out += "<div style='display: inline-block; margin-right: 20px; font-weight: normal; text-decoration: none; font-size: 18px; color: #000;'>Dataset:</div>\n"
+
+    out += "<select id='shap_benchmark__select' onchange=\"document.location = '../' + this.value + '/index.html'\"dir='rtl' class='shap_benchmark__select' style='font-weight: normal; font-size: 20px; color: #000; padding: 10px; background: #fff; border: 1px solid #fff; -webkit-appearance: none; appearance: none;'>\n"
+    out += "<option value='human' "+("selected" if dataset == "human" else "")+">Agreement with Human Intuition</option>\n"
+    out += "<option value='corrgroups60' "+("selected" if dataset == "corrgroups60" else "")+">Correlated Groups 60 Dataset</option>\n"
+    out += "<option value='independentlinear60' "+("selected" if dataset == "independentlinear60" else "")+">Independent Linear 60 Dataset</option>\n"
+    #out += "<option>CRIC</option>\n"
+    out += "</select>\n"
+    #out += "<script> document.onload = function() { document.getElementById('shap_benchmark__select').value = '"+dataset+"'; }</script>"
+    #out += "<div style='display: inline-block; margin-left: 20px; font-weight: normal; text-decoration: none; font-size: 18px; color: #000;'>CRIC</div>\n"
+    out += "</div>\n"
+
+    # output the legend
+    out += "<table style='border-width: 0px; width: 100px; position: fixed; right: 50px; top: 200px; background: rgba(255, 255, 255, 0.9)'>\n"
+    out += "<tr><td style='background: #fff; font-weight: normal; text-align: center'>Higher score</td></tr>\n"
+    legend_size = 21
+    for i in range(legend_size-9):
+        out += "<tr>"
+        out += "<td style='padding: 0px; padding-left: 0px; padding-right: 0px; border-left: 0px solid #999; height: 34px'>"
+        val = (legend_size-i-1) / (legend_size-1)
+        out += "<div style='opacity: 1; background-color: rgb" + str(tuple(int(v*255) for v in colors.red_blue_no_bounds(5*(val-0.8)))[:-1]) + "; height: "+str(30*val)+"px; margin-left: auto; margin-right: auto; width:"+str(30*val)+"px'></div>"
+        out += "</td>"
+        out += "</tr>\n" #
+    out += "<tr><td style='background: #fff; font-weight: normal; text-align: center'>Lower score</td></tr>\n"
+    out += "</table>\n"
+
+    if out_dir is not None:
+        with open(out_dir + "/index.html", "w") as f:
+            f.write("<html><body style='margin: 0px; font-size: 16px; font-family: \"Myriad Pro\", Arial, sans-serif;'><center>")
+            f.write(prefix)
+            f.write(out)
+            f.write("</center></body></html>")
+    else:
+        return HTML(prefix + out)
diff --git a/lib/shap/cext/_cext.cc b/lib/shap/cext/_cext.cc
new file mode 100644
index 0000000000000000000000000000000000000000..4d8cf379bd1e2d8bf500eaf6544ae1264160586a
--- /dev/null
+++ b/lib/shap/cext/_cext.cc
@@ -0,0 +1,560 @@
+#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
+
+#include <Python.h>
+#include <numpy/arrayobject.h>
+#include "tree_shap.h"
+#include <iostream>
+
+static PyObject *_cext_dense_tree_shap(PyObject *self, PyObject *args);
+static PyObject *_cext_dense_tree_predict(PyObject *self, PyObject *args);
+static PyObject *_cext_dense_tree_update_weights(PyObject *self, PyObject *args);
+static PyObject *_cext_dense_tree_saabas(PyObject *self, PyObject *args);
+static PyObject *_cext_compute_expectations(PyObject *self, PyObject *args);
+
+static PyMethodDef module_methods[] = {
+    {"dense_tree_shap", _cext_dense_tree_shap, METH_VARARGS, "C implementation of Tree SHAP for dense."},
+    {"dense_tree_predict", _cext_dense_tree_predict, METH_VARARGS, "C implementation of tree predictions."},
+    {"dense_tree_update_weights", _cext_dense_tree_update_weights, METH_VARARGS, "C implementation of tree node weight compuatations."},
+    {"dense_tree_saabas", _cext_dense_tree_saabas, METH_VARARGS, "C implementation of Saabas (rough fast approximation to Tree SHAP)."},
+    {"compute_expectations", _cext_compute_expectations, METH_VARARGS, "Compute expectations of internal nodes."},
+    {NULL, NULL, 0, NULL}
+};
+
+#if PY_MAJOR_VERSION >= 3
+static struct PyModuleDef moduledef = {
+    PyModuleDef_HEAD_INIT,
+    "_cext",
+    "This module provides an interface for a fast Tree SHAP implementation.",
+    -1,
+    module_methods,
+    NULL,
+    NULL,
+    NULL,
+    NULL
+};
+#endif
+
+#if PY_MAJOR_VERSION >= 3
+PyMODINIT_FUNC PyInit__cext(void)
+#else
+PyMODINIT_FUNC init_cext(void)
+#endif
+{
+    #if PY_MAJOR_VERSION >= 3
+        PyObject *module = PyModule_Create(&moduledef);
+        if (!module) return NULL;
+    #else
+        PyObject *module = Py_InitModule("_cext", module_methods);
+        if (!module) return;
+    #endif
+
+    /* Load `numpy` functionality. */
+    import_array();
+
+    #if PY_MAJOR_VERSION >= 3
+        return module;
+    #endif
+}
+
+static PyObject *_cext_compute_expectations(PyObject *self, PyObject *args)
+{
+    PyObject *children_left_obj;
+    PyObject *children_right_obj;
+    PyObject *node_sample_weight_obj;
+    PyObject *values_obj;
+
+    /* Parse the input tuple */
+    if (!PyArg_ParseTuple(
+        args, "OOOO", &children_left_obj, &children_right_obj, &node_sample_weight_obj, &values_obj
+    )) return NULL;
+
+    /* Interpret the input objects as numpy arrays. */
+    PyArrayObject *children_left_array = (PyArrayObject*)PyArray_FROM_OTF(children_left_obj, NPY_INT, NPY_ARRAY_IN_ARRAY);
+    PyArrayObject *children_right_array = (PyArrayObject*)PyArray_FROM_OTF(children_right_obj, NPY_INT, NPY_ARRAY_IN_ARRAY);
+    PyArrayObject *node_sample_weight_array = (PyArrayObject*)PyArray_FROM_OTF(node_sample_weight_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
+    PyArrayObject *values_array = (PyArrayObject*)PyArray_FROM_OTF(values_obj, NPY_DOUBLE, NPY_ARRAY_INOUT_ARRAY);
+
+    /* If that didn't work, throw an exception. */
+    if (children_left_array == NULL || children_right_array == NULL ||
+        values_array == NULL || node_sample_weight_array == NULL) {
+        Py_XDECREF(children_left_array);
+        Py_XDECREF(children_right_array);
+        //PyArray_ResolveWritebackIfCopy(values_array);
+        Py_XDECREF(values_array);
+        Py_XDECREF(node_sample_weight_array);
+        return NULL;
+    }
+
+    TreeEnsemble tree;
+
+    // number of outputs
+    tree.num_outputs = PyArray_DIM(values_array, 1);
+
+    /* Get pointers to the data as C-types. */
+    tree.children_left = (int*)PyArray_DATA(children_left_array);
+    tree.children_right = (int*)PyArray_DATA(children_right_array);
+    tree.values = (tfloat*)PyArray_DATA(values_array);
+    tree.node_sample_weights = (tfloat*)PyArray_DATA(node_sample_weight_array);
+
+    const int max_depth = compute_expectations(tree);
+
+    // clean up the created python objects
+    Py_XDECREF(children_left_array);
+    Py_XDECREF(children_right_array);
+    //PyArray_ResolveWritebackIfCopy(values_array);
+    Py_XDECREF(values_array);
+    Py_XDECREF(node_sample_weight_array);
+
+    PyObject *ret = Py_BuildValue("i", max_depth);
+    return ret;
+}
+
+
+static PyObject *_cext_dense_tree_shap(PyObject *self, PyObject *args)
+{
+    PyObject *children_left_obj;
+    PyObject *children_right_obj;
+    PyObject *children_default_obj;
+    PyObject *features_obj;
+    PyObject *thresholds_obj;
+    PyObject *values_obj;
+    PyObject *node_sample_weights_obj;
+    int max_depth;
+    PyObject *X_obj;
+    PyObject *X_missing_obj;
+    PyObject *y_obj;
+    PyObject *R_obj;
+    PyObject *R_missing_obj;
+    int tree_limit;
+    PyObject *out_contribs_obj;
+    int feature_dependence;
+    int model_output;
+    PyObject *base_offset_obj;
+    bool interactions;
+
+    /* Parse the input tuple */
+    if (!PyArg_ParseTuple(
+        args, "OOOOOOOiOOOOOiOOiib", &children_left_obj, &children_right_obj, &children_default_obj,
+        &features_obj, &thresholds_obj, &values_obj, &node_sample_weights_obj,
+        &max_depth, &X_obj, &X_missing_obj, &y_obj, &R_obj, &R_missing_obj, &tree_limit, &base_offset_obj,
+        &out_contribs_obj, &feature_dependence, &model_output, &interactions
+    )) return NULL;
+
+    /* Interpret the input objects as numpy arrays. */
+    PyArrayObject *children_left_array = (PyArrayObject*)PyArray_FROM_OTF(children_left_obj, NPY_INT, NPY_ARRAY_IN_ARRAY);
+    PyArrayObject *children_right_array = (PyArrayObject*)PyArray_FROM_OTF(children_right_obj, NPY_INT, NPY_ARRAY_IN_ARRAY);
+    PyArrayObject *children_default_array = (PyArrayObject*)PyArray_FROM_OTF(children_default_obj, NPY_INT, NPY_ARRAY_IN_ARRAY);
+    PyArrayObject *features_array = (PyArrayObject*)PyArray_FROM_OTF(features_obj, NPY_INT, NPY_ARRAY_IN_ARRAY);
+    PyArrayObject *thresholds_array = (PyArrayObject*)PyArray_FROM_OTF(thresholds_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
+    PyArrayObject *values_array = (PyArrayObject*)PyArray_FROM_OTF(values_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
+    PyArrayObject *node_sample_weights_array = (PyArrayObject*)PyArray_FROM_OTF(node_sample_weights_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
+    PyArrayObject *X_array = (PyArrayObject*)PyArray_FROM_OTF(X_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
+    PyArrayObject *X_missing_array = (PyArrayObject*)PyArray_FROM_OTF(X_missing_obj, NPY_BOOL, NPY_ARRAY_IN_ARRAY);
+    PyArrayObject *y_array = NULL;
+    if (y_obj != Py_None) y_array = (PyArrayObject*)PyArray_FROM_OTF(y_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
+    PyArrayObject *R_array = NULL;
+    if (R_obj != Py_None) R_array = (PyArrayObject*)PyArray_FROM_OTF(R_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
+    PyArrayObject *R_missing_array = NULL;
+    if (R_missing_obj != Py_None) R_missing_array = (PyArrayObject*)PyArray_FROM_OTF(R_missing_obj, NPY_BOOL, NPY_ARRAY_IN_ARRAY);
+    PyArrayObject *out_contribs_array = (PyArrayObject*)PyArray_FROM_OTF(out_contribs_obj, NPY_DOUBLE, NPY_ARRAY_INOUT_ARRAY);
+    PyArrayObject *base_offset_array = (PyArrayObject*)PyArray_FROM_OTF(base_offset_obj, NPY_DOUBLE, NPY_ARRAY_INOUT_ARRAY);
+
+    /* If that didn't work, throw an exception. Note that R and y are optional. */
+    if (children_left_array == NULL || children_right_array == NULL ||
+        children_default_array == NULL || features_array == NULL || thresholds_array == NULL ||
+        values_array == NULL || node_sample_weights_array == NULL || X_array == NULL ||
+        X_missing_array == NULL || out_contribs_array == NULL) {
+        Py_XDECREF(children_left_array);
+        Py_XDECREF(children_right_array);
+        Py_XDECREF(children_default_array);
+        Py_XDECREF(features_array);
+        Py_XDECREF(thresholds_array);
+        Py_XDECREF(values_array);
+        Py_XDECREF(node_sample_weights_array);
+        Py_XDECREF(X_array);
+        Py_XDECREF(X_missing_array);
+        if (y_array != NULL) Py_XDECREF(y_array);
+        if (R_array != NULL) Py_XDECREF(R_array);
+        if (R_missing_array != NULL) Py_XDECREF(R_missing_array);
+        //PyArray_ResolveWritebackIfCopy(out_contribs_array);
+        Py_XDECREF(out_contribs_array);
+        Py_XDECREF(base_offset_array);
+        return NULL;
+    }
+
+    const unsigned num_X = PyArray_DIM(X_array, 0);
+    const unsigned M = PyArray_DIM(X_array, 1);
+    const unsigned max_nodes = PyArray_DIM(values_array, 1);
+    const unsigned num_outputs = PyArray_DIM(values_array, 2);
+    unsigned num_R = 0;
+    if (R_array != NULL) num_R = PyArray_DIM(R_array, 0);
+
+    // Get pointers to the data as C-types
+    int *children_left = (int*)PyArray_DATA(children_left_array);
+    int *children_right = (int*)PyArray_DATA(children_right_array);
+    int *children_default = (int*)PyArray_DATA(children_default_array);
+    int *features = (int*)PyArray_DATA(features_array);
+    tfloat *thresholds = (tfloat*)PyArray_DATA(thresholds_array);
+    tfloat *values = (tfloat*)PyArray_DATA(values_array);
+    tfloat *node_sample_weights = (tfloat*)PyArray_DATA(node_sample_weights_array);
+    tfloat *X = (tfloat*)PyArray_DATA(X_array);
+    bool *X_missing = (bool*)PyArray_DATA(X_missing_array);
+    tfloat *y = NULL;
+    if (y_array != NULL) y = (tfloat*)PyArray_DATA(y_array);
+    tfloat *R = NULL;
+    if (R_array != NULL) R = (tfloat*)PyArray_DATA(R_array);
+    bool *R_missing = NULL;
+    if (R_missing_array != NULL) R_missing = (bool*)PyArray_DATA(R_missing_array);
+    tfloat *out_contribs = (tfloat*)PyArray_DATA(out_contribs_array);
+    tfloat *base_offset = (tfloat*)PyArray_DATA(base_offset_array);
+
+    // these are just a wrapper objects for all the pointers and numbers associated with
+    // the ensemble tree model and the dataset we are explaining
+    TreeEnsemble trees = TreeEnsemble(
+        children_left, children_right, children_default, features, thresholds, values,
+        node_sample_weights, max_depth, tree_limit, base_offset,
+        max_nodes, num_outputs
+    );
+    ExplanationDataset data = ExplanationDataset(X, X_missing, y, R, R_missing, num_X, M, num_R);
+
+    dense_tree_shap(trees, data, out_contribs, feature_dependence, model_output, interactions);
+
+    // retrieve return value before python cleanup of objects
+    tfloat ret_value = (double)values[0];
+
+    // clean up the created python objects
+    Py_XDECREF(children_left_array);
+    Py_XDECREF(children_right_array);
+    Py_XDECREF(children_default_array);
+    Py_XDECREF(features_array);
+    Py_XDECREF(thresholds_array);
+    Py_XDECREF(values_array);
+    Py_XDECREF(node_sample_weights_array);
+    Py_XDECREF(X_array);
+    Py_XDECREF(X_missing_array);
+    if (y_array != NULL) Py_XDECREF(y_array);
+    if (R_array != NULL) Py_XDECREF(R_array);
+    if (R_missing_array != NULL) Py_XDECREF(R_missing_array);
+    //PyArray_ResolveWritebackIfCopy(out_contribs_array);
+    Py_XDECREF(out_contribs_array);
+    Py_XDECREF(base_offset_array);
+
+    /* Build the output tuple */
+    PyObject *ret = Py_BuildValue("d", ret_value);
+    return ret;
+}
+
+
+static PyObject *_cext_dense_tree_predict(PyObject *self, PyObject *args)
+{
+    PyObject *children_left_obj;
+    PyObject *children_right_obj;
+    PyObject *children_default_obj;
+    PyObject *features_obj;
+    PyObject *thresholds_obj;
+    PyObject *values_obj;
+    int max_depth;
+    int tree_limit;
+    PyObject *base_offset_obj;
+    int model_output;
+    PyObject *X_obj;
+    PyObject *X_missing_obj;
+    PyObject *y_obj;
+    PyObject *out_pred_obj;
+
+    /* Parse the input tuple */
+    if (!PyArg_ParseTuple(
+        args, "OOOOOOiiOiOOOO", &children_left_obj, &children_right_obj, &children_default_obj,
+        &features_obj, &thresholds_obj, &values_obj, &max_depth, &tree_limit, &base_offset_obj, &model_output,
+        &X_obj, &X_missing_obj, &y_obj, &out_pred_obj
+    )) return NULL;
+
+    /* Interpret the input objects as numpy arrays. */
+    PyArrayObject *children_left_array = (PyArrayObject*)PyArray_FROM_OTF(children_left_obj, NPY_INT, NPY_ARRAY_IN_ARRAY);
+    PyArrayObject *children_right_array = (PyArrayObject*)PyArray_FROM_OTF(children_right_obj, NPY_INT, NPY_ARRAY_IN_ARRAY);
+    PyArrayObject *children_default_array = (PyArrayObject*)PyArray_FROM_OTF(children_default_obj, NPY_INT, NPY_ARRAY_IN_ARRAY);
+    PyArrayObject *features_array = (PyArrayObject*)PyArray_FROM_OTF(features_obj, NPY_INT, NPY_ARRAY_IN_ARRAY);
+    PyArrayObject *thresholds_array = (PyArrayObject*)PyArray_FROM_OTF(thresholds_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
+    PyArrayObject *values_array = (PyArrayObject*)PyArray_FROM_OTF(values_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
+    PyArrayObject *base_offset_array = (PyArrayObject*)PyArray_FROM_OTF(base_offset_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
+    PyArrayObject *X_array = (PyArrayObject*)PyArray_FROM_OTF(X_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
+    PyArrayObject *X_missing_array = (PyArrayObject*)PyArray_FROM_OTF(X_missing_obj, NPY_BOOL, NPY_ARRAY_IN_ARRAY);
+    PyArrayObject *y_array = NULL;
+    if (y_obj != Py_None) y_array = (PyArrayObject*)PyArray_FROM_OTF(y_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
+    PyArrayObject *out_pred_array = (PyArrayObject*)PyArray_FROM_OTF(out_pred_obj, NPY_DOUBLE, NPY_ARRAY_INOUT_ARRAY);
+
+    /* If that didn't work, throw an exception. Note that R and y are optional. */
+    if (children_left_array == NULL || children_right_array == NULL ||
+        children_default_array == NULL || features_array == NULL || thresholds_array == NULL ||
+        values_array == NULL || X_array == NULL ||
+        X_missing_array == NULL || out_pred_array == NULL) {
+        Py_XDECREF(children_left_array);
+        Py_XDECREF(children_right_array);
+        Py_XDECREF(children_default_array);
+        Py_XDECREF(features_array);
+        Py_XDECREF(thresholds_array);
+        Py_XDECREF(values_array);
+        Py_XDECREF(base_offset_array);
+        Py_XDECREF(X_array);
+        Py_XDECREF(X_missing_array);
+        if (y_array != NULL) Py_XDECREF(y_array);
+        //PyArray_ResolveWritebackIfCopy(out_pred_array);
+        Py_XDECREF(out_pred_array);
+        return NULL;
+    }
+
+    const unsigned num_X = PyArray_DIM(X_array, 0);
+    const unsigned M = PyArray_DIM(X_array, 1);
+    const unsigned max_nodes = PyArray_DIM(values_array, 1);
+    const unsigned num_outputs = PyArray_DIM(values_array, 2);
+
+    const unsigned num_offsets = PyArray_DIM(base_offset_array, 0);
+    if (num_offsets != num_outputs) {
+        std::cerr << "The passed base_offset array does that have the same number of outputs as the values array: " << num_offsets << " vs. " << num_outputs << std::endl;
+        return NULL;
+    }
+
+    // Get pointers to the data as C-types
+    int *children_left = (int*)PyArray_DATA(children_left_array);
+    int *children_right = (int*)PyArray_DATA(children_right_array);
+    int *children_default = (int*)PyArray_DATA(children_default_array);
+    int *features = (int*)PyArray_DATA(features_array);
+    tfloat *thresholds = (tfloat*)PyArray_DATA(thresholds_array);
+    tfloat *values = (tfloat*)PyArray_DATA(values_array);
+    tfloat *base_offset = (tfloat*)PyArray_DATA(base_offset_array);
+    tfloat *X = (tfloat*)PyArray_DATA(X_array);
+    bool *X_missing = (bool*)PyArray_DATA(X_missing_array);
+    tfloat *y = NULL;
+    if (y_array != NULL) y = (tfloat*)PyArray_DATA(y_array);
+    tfloat *out_pred = (tfloat*)PyArray_DATA(out_pred_array);
+
+    // these are just wrapper objects for all the pointers and numbers associated with
+    // the ensemble tree model and the dataset we are explaining
+    TreeEnsemble trees = TreeEnsemble(
+        children_left, children_right, children_default, features, thresholds, values,
+        NULL, max_depth, tree_limit, base_offset,
+        max_nodes, num_outputs
+    );
+    ExplanationDataset data = ExplanationDataset(X, X_missing, y, NULL, NULL, num_X, M, 0);
+
+    dense_tree_predict(out_pred, trees, data, model_output);
+
+    // clean up the created python objects
+    Py_XDECREF(children_left_array);
+    Py_XDECREF(children_right_array);
+    Py_XDECREF(children_default_array);
+    Py_XDECREF(features_array);
+    Py_XDECREF(thresholds_array);
+    Py_XDECREF(values_array);
+    Py_XDECREF(base_offset_array);
+    Py_XDECREF(X_array);
+    Py_XDECREF(X_missing_array);
+    if (y_array != NULL) Py_XDECREF(y_array);
+    //PyArray_ResolveWritebackIfCopy(out_pred_array);
+    Py_XDECREF(out_pred_array);
+
+    /* Build the output tuple */
+    PyObject *ret = Py_BuildValue("d", (double)values[0]);
+    return ret;
+}
+
+
+static PyObject *_cext_dense_tree_update_weights(PyObject *self, PyObject *args)
+{
+    PyObject *children_left_obj;
+    PyObject *children_right_obj;
+    PyObject *children_default_obj;
+    PyObject *features_obj;
+    PyObject *thresholds_obj;
+    PyObject *values_obj;
+    int tree_limit;
+    PyObject *node_sample_weight_obj;
+    PyObject *X_obj;
+    PyObject *X_missing_obj;
+
+    /* Parse the input tuple */
+    if (!PyArg_ParseTuple(
+        args, "OOOOOOiOOO", &children_left_obj, &children_right_obj, &children_default_obj,
+        &features_obj, &thresholds_obj, &values_obj, &tree_limit, &node_sample_weight_obj, &X_obj, &X_missing_obj
+    )) return NULL;
+
+    /* Interpret the input objects as numpy arrays. */
+    PyArrayObject *children_left_array = (PyArrayObject*)PyArray_FROM_OTF(children_left_obj, NPY_INT, NPY_ARRAY_IN_ARRAY);
+    PyArrayObject *children_right_array = (PyArrayObject*)PyArray_FROM_OTF(children_right_obj, NPY_INT, NPY_ARRAY_IN_ARRAY);
+    PyArrayObject *children_default_array = (PyArrayObject*)PyArray_FROM_OTF(children_default_obj, NPY_INT, NPY_ARRAY_IN_ARRAY);
+    PyArrayObject *features_array = (PyArrayObject*)PyArray_FROM_OTF(features_obj, NPY_INT, NPY_ARRAY_IN_ARRAY);
+    PyArrayObject *thresholds_array = (PyArrayObject*)PyArray_FROM_OTF(thresholds_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
+    PyArrayObject *values_array = (PyArrayObject*)PyArray_FROM_OTF(values_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
+    PyArrayObject *node_sample_weight_array = (PyArrayObject*)PyArray_FROM_OTF(node_sample_weight_obj, NPY_DOUBLE, NPY_ARRAY_INOUT_ARRAY);
+    PyArrayObject *X_array = (PyArrayObject*)PyArray_FROM_OTF(X_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
+    PyArrayObject *X_missing_array = (PyArrayObject*)PyArray_FROM_OTF(X_missing_obj, NPY_BOOL, NPY_ARRAY_IN_ARRAY);
+
+    /* If that didn't work, throw an exception. */
+    if (children_left_array == NULL || children_right_array == NULL ||
+        children_default_array == NULL || features_array == NULL || thresholds_array == NULL ||
+        values_array == NULL || node_sample_weight_array == NULL || X_array == NULL ||
+        X_missing_array == NULL) {
+        Py_XDECREF(children_left_array);
+        Py_XDECREF(children_right_array);
+        Py_XDECREF(children_default_array);
+        Py_XDECREF(features_array);
+        Py_XDECREF(thresholds_array);
+        Py_XDECREF(values_array);
+        //PyArray_ResolveWritebackIfCopy(node_sample_weight_array);
+        Py_XDECREF(node_sample_weight_array);
+        Py_XDECREF(X_array);
+        Py_XDECREF(X_missing_array);
+        std::cerr << "Found a NULL input array in _cext_dense_tree_update_weights!\n";
+        return NULL;
+    }
+
+    const unsigned num_X = PyArray_DIM(X_array, 0);
+    const unsigned M = PyArray_DIM(X_array, 1);
+    const unsigned max_nodes = PyArray_DIM(values_array, 1);
+
+    // Get pointers to the data as C-types
+    int *children_left = (int*)PyArray_DATA(children_left_array);
+    int *children_right = (int*)PyArray_DATA(children_right_array);
+    int *children_default = (int*)PyArray_DATA(children_default_array);
+    int *features = (int*)PyArray_DATA(features_array);
+    tfloat *thresholds = (tfloat*)PyArray_DATA(thresholds_array);
+    tfloat *values = (tfloat*)PyArray_DATA(values_array);
+    tfloat *node_sample_weight = (tfloat*)PyArray_DATA(node_sample_weight_array);
+    tfloat *X = (tfloat*)PyArray_DATA(X_array);
+    bool *X_missing = (bool*)PyArray_DATA(X_missing_array);
+
+    // these are just wrapper objects for all the pointers and numbers associated with
+    // the ensemble tree model and the dataset we are explaining
+    TreeEnsemble trees = TreeEnsemble(
+        children_left, children_right, children_default, features, thresholds, values,
+        node_sample_weight, 0, tree_limit, 0, max_nodes, 0
+    );
+    ExplanationDataset data = ExplanationDataset(X, X_missing, NULL, NULL, NULL, num_X, M, 0);
+
+    dense_tree_update_weights(trees, data);
+
+    // clean up the created python objects
+    Py_XDECREF(children_left_array);
+    Py_XDECREF(children_right_array);
+    Py_XDECREF(children_default_array);
+    Py_XDECREF(features_array);
+    Py_XDECREF(thresholds_array);
+    Py_XDECREF(values_array);
+    // PyArray_ResolveWritebackIfCopy(node_sample_weight_array);
+    Py_XDECREF(node_sample_weight_array);
+    Py_XDECREF(X_array);
+    Py_XDECREF(X_missing_array);
+
+    /* Build the output tuple */
+    PyObject *ret = Py_BuildValue("d", 1);
+    return ret;
+}
+
+
+static PyObject *_cext_dense_tree_saabas(PyObject *self, PyObject *args)
+{
+    PyObject *children_left_obj;
+    PyObject *children_right_obj;
+    PyObject *children_default_obj;
+    PyObject *features_obj;
+    PyObject *thresholds_obj;
+    PyObject *values_obj;
+    int max_depth;
+    int tree_limit;
+    PyObject *base_offset_obj;
+    int model_output;
+    PyObject *X_obj;
+    PyObject *X_missing_obj;
+    PyObject *y_obj;
+    PyObject *out_pred_obj;
+
+
+    /* Parse the input tuple */
+    if (!PyArg_ParseTuple(
+        args, "OOOOOOiiOiOOOO", &children_left_obj, &children_right_obj, &children_default_obj,
+        &features_obj, &thresholds_obj, &values_obj, &max_depth, &tree_limit, &base_offset_obj, &model_output,
+        &X_obj, &X_missing_obj, &y_obj, &out_pred_obj
+    )) return NULL;
+
+    /* Interpret the input objects as numpy arrays. */
+    PyArrayObject *children_left_array = (PyArrayObject*)PyArray_FROM_OTF(children_left_obj, NPY_INT, NPY_ARRAY_IN_ARRAY);
+    PyArrayObject *children_right_array = (PyArrayObject*)PyArray_FROM_OTF(children_right_obj, NPY_INT, NPY_ARRAY_IN_ARRAY);
+    PyArrayObject *children_default_array = (PyArrayObject*)PyArray_FROM_OTF(children_default_obj, NPY_INT, NPY_ARRAY_IN_ARRAY);
+    PyArrayObject *features_array = (PyArrayObject*)PyArray_FROM_OTF(features_obj, NPY_INT, NPY_ARRAY_IN_ARRAY);
+    PyArrayObject *thresholds_array = (PyArrayObject*)PyArray_FROM_OTF(thresholds_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
+    PyArrayObject *values_array = (PyArrayObject*)PyArray_FROM_OTF(values_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
+    PyArrayObject *base_offset_array = (PyArrayObject*)PyArray_FROM_OTF(base_offset_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
+    PyArrayObject *X_array = (PyArrayObject*)PyArray_FROM_OTF(X_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
+    PyArrayObject *X_missing_array = (PyArrayObject*)PyArray_FROM_OTF(X_missing_obj, NPY_BOOL, NPY_ARRAY_IN_ARRAY);
+    PyArrayObject *y_array = NULL;
+    if (y_obj != Py_None) y_array = (PyArrayObject*)PyArray_FROM_OTF(y_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
+    PyArrayObject *out_pred_array = (PyArrayObject*)PyArray_FROM_OTF(out_pred_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
+
+    /* If that didn't work, throw an exception. Note that R and y are optional. */
+    if (children_left_array == NULL || children_right_array == NULL ||
+        children_default_array == NULL || features_array == NULL || thresholds_array == NULL ||
+        values_array == NULL || X_array == NULL ||
+        X_missing_array == NULL || out_pred_array == NULL) {
+        Py_XDECREF(children_left_array);
+        Py_XDECREF(children_right_array);
+        Py_XDECREF(children_default_array);
+        Py_XDECREF(features_array);
+        Py_XDECREF(thresholds_array);
+        Py_XDECREF(values_array);
+        Py_XDECREF(base_offset_array);
+        Py_XDECREF(X_array);
+        Py_XDECREF(X_missing_array);
+        if (y_array != NULL) Py_XDECREF(y_array);
+        //PyArray_ResolveWritebackIfCopy(out_pred_array);
+        Py_XDECREF(out_pred_array);
+        return NULL;
+    }
+
+    const unsigned num_X = PyArray_DIM(X_array, 0);
+    const unsigned M = PyArray_DIM(X_array, 1);
+    const unsigned max_nodes = PyArray_DIM(values_array, 1);
+    const unsigned num_outputs = PyArray_DIM(values_array, 2);
+
+    // Get pointers to the data as C-types
+    int *children_left = (int*)PyArray_DATA(children_left_array);
+    int *children_right = (int*)PyArray_DATA(children_right_array);
+    int *children_default = (int*)PyArray_DATA(children_default_array);
+    int *features = (int*)PyArray_DATA(features_array);
+    tfloat *thresholds = (tfloat*)PyArray_DATA(thresholds_array);
+    tfloat *values = (tfloat*)PyArray_DATA(values_array);
+    tfloat *base_offset = (tfloat*)PyArray_DATA(base_offset_array);
+    tfloat *X = (tfloat*)PyArray_DATA(X_array);
+    bool *X_missing = (bool*)PyArray_DATA(X_missing_array);
+    tfloat *y = NULL;
+    if (y_array != NULL) y = (tfloat*)PyArray_DATA(y_array);
+    tfloat *out_pred = (tfloat*)PyArray_DATA(out_pred_array);
+
+    // these are just wrapper objects for all the pointers and numbers associated with
+    // the ensemble tree model and the dataset we are explaining
+    TreeEnsemble trees = TreeEnsemble(
+        children_left, children_right, children_default, features, thresholds, values,
+        NULL, max_depth, tree_limit, base_offset,
+        max_nodes, num_outputs
+    );
+    ExplanationDataset data = ExplanationDataset(X, X_missing, y, NULL, NULL, num_X, M, 0);
+
+    dense_tree_saabas(out_pred, trees, data);
+
+    // clean up the created python objects
+    Py_XDECREF(children_left_array);
+    Py_XDECREF(children_right_array);
+    Py_XDECREF(children_default_array);
+    Py_XDECREF(features_array);
+    Py_XDECREF(thresholds_array);
+    Py_XDECREF(values_array);
+    Py_XDECREF(base_offset_array);
+    Py_XDECREF(X_array);
+    Py_XDECREF(X_missing_array);
+    if (y_array != NULL) Py_XDECREF(y_array);
+    //PyArray_ResolveWritebackIfCopy(out_pred_array);
+    Py_XDECREF(out_pred_array);
+
+    /* Build the output tuple */
+    PyObject *ret = Py_BuildValue("d", (double)values[0]);
+    return ret;
+}
diff --git a/lib/shap/cext/_cext_gpu.cc b/lib/shap/cext/_cext_gpu.cc
new file mode 100644
index 0000000000000000000000000000000000000000..94eec5c24862d03938af3125ce96dd5746de64c6
--- /dev/null
+++ b/lib/shap/cext/_cext_gpu.cc
@@ -0,0 +1,187 @@
+#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
+
+#include <Python.h>
+#include <numpy/arrayobject.h>
+#include "tree_shap.h"
+#include <iostream>
+
+static PyObject *_cext_dense_tree_shap(PyObject *self, PyObject *args);
+
+static PyMethodDef module_methods[] = {
+    {"dense_tree_shap", _cext_dense_tree_shap, METH_VARARGS, "C implementation of Tree SHAP for dense."},
+    {NULL, NULL, 0, NULL}
+};
+
+#if PY_MAJOR_VERSION >= 3
+static struct PyModuleDef moduledef = {
+    PyModuleDef_HEAD_INIT,
+    "_cext_gpu",
+    "This module provides an interface for a fast Tree SHAP implementation.",
+    -1,
+    module_methods,
+    NULL,
+    NULL,
+    NULL,
+    NULL
+};
+#endif
+
+#if PY_MAJOR_VERSION >= 3
+PyMODINIT_FUNC PyInit__cext_gpu(void)
+#else
+PyMODINIT_FUNC init_cext(void)
+#endif
+{
+    #if PY_MAJOR_VERSION >= 3
+        PyObject *module = PyModule_Create(&moduledef);
+        if (!module) return NULL;
+    #else
+        PyObject *module = Py_InitModule("_cext", module_methods);
+        if (!module) return;
+    #endif
+
+    /* Load `numpy` functionality. */
+    import_array();
+
+    #if PY_MAJOR_VERSION >= 3
+        return module;
+    #endif
+}
+
+void dense_tree_shap_gpu(const TreeEnsemble& trees, const ExplanationDataset &data, tfloat *out_contribs,
+                     const int feature_dependence, unsigned model_transform, bool interactions);
+
+static PyObject *_cext_dense_tree_shap(PyObject *self, PyObject *args)
+{
+    PyObject *children_left_obj;
+    PyObject *children_right_obj;
+    PyObject *children_default_obj;
+    PyObject *features_obj;
+    PyObject *thresholds_obj;
+    PyObject *values_obj;
+    PyObject *node_sample_weights_obj;
+    int max_depth;
+    PyObject *X_obj;
+    PyObject *X_missing_obj;
+    PyObject *y_obj;
+    PyObject *R_obj;
+    PyObject *R_missing_obj;
+    int tree_limit;
+    PyObject *out_contribs_obj;
+    int feature_dependence;
+    int model_output;
+    PyObject *base_offset_obj;
+    bool interactions;
+
+    /* Parse the input tuple */
+    if (!PyArg_ParseTuple(
+        args, "OOOOOOOiOOOOOiOOiib", &children_left_obj, &children_right_obj, &children_default_obj,
+        &features_obj, &thresholds_obj, &values_obj, &node_sample_weights_obj,
+        &max_depth, &X_obj, &X_missing_obj, &y_obj, &R_obj, &R_missing_obj, &tree_limit, &base_offset_obj,
+        &out_contribs_obj, &feature_dependence, &model_output, &interactions
+    )) return NULL;
+
+    /* Interpret the input objects as numpy arrays. */
+    PyArrayObject *children_left_array = (PyArrayObject*)PyArray_FROM_OTF(children_left_obj, NPY_INT, NPY_ARRAY_IN_ARRAY);
+    PyArrayObject *children_right_array = (PyArrayObject*)PyArray_FROM_OTF(children_right_obj, NPY_INT, NPY_ARRAY_IN_ARRAY);
+    PyArrayObject *children_default_array = (PyArrayObject*)PyArray_FROM_OTF(children_default_obj, NPY_INT, NPY_ARRAY_IN_ARRAY);
+    PyArrayObject *features_array = (PyArrayObject*)PyArray_FROM_OTF(features_obj, NPY_INT, NPY_ARRAY_IN_ARRAY);
+    PyArrayObject *thresholds_array = (PyArrayObject*)PyArray_FROM_OTF(thresholds_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
+    PyArrayObject *values_array = (PyArrayObject*)PyArray_FROM_OTF(values_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
+    PyArrayObject *node_sample_weights_array = (PyArrayObject*)PyArray_FROM_OTF(node_sample_weights_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
+    PyArrayObject *X_array = (PyArrayObject*)PyArray_FROM_OTF(X_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
+    PyArrayObject *X_missing_array = (PyArrayObject*)PyArray_FROM_OTF(X_missing_obj, NPY_BOOL, NPY_ARRAY_IN_ARRAY);
+    PyArrayObject *y_array = NULL;
+    if (y_obj != Py_None) y_array = (PyArrayObject*)PyArray_FROM_OTF(y_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
+    PyArrayObject *R_array = NULL;
+    if (R_obj != Py_None) R_array = (PyArrayObject*)PyArray_FROM_OTF(R_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
+    PyArrayObject *R_missing_array = NULL;
+    if (R_missing_obj != Py_None) R_missing_array = (PyArrayObject*)PyArray_FROM_OTF(R_missing_obj, NPY_BOOL, NPY_ARRAY_IN_ARRAY);
+    PyArrayObject *out_contribs_array = (PyArrayObject*)PyArray_FROM_OTF(out_contribs_obj, NPY_DOUBLE, NPY_ARRAY_INOUT_ARRAY);
+    PyArrayObject *base_offset_array = (PyArrayObject*)PyArray_FROM_OTF(base_offset_obj, NPY_DOUBLE, NPY_ARRAY_INOUT_ARRAY);
+
+    /* If that didn't work, throw an exception. Note that R and y are optional. */
+    if (children_left_array == NULL || children_right_array == NULL ||
+        children_default_array == NULL || features_array == NULL || thresholds_array == NULL ||
+        values_array == NULL || node_sample_weights_array == NULL || X_array == NULL ||
+        X_missing_array == NULL || out_contribs_array == NULL) {
+        Py_XDECREF(children_left_array);
+        Py_XDECREF(children_right_array);
+        Py_XDECREF(children_default_array);
+        Py_XDECREF(features_array);
+        Py_XDECREF(thresholds_array);
+        Py_XDECREF(values_array);
+        Py_XDECREF(node_sample_weights_array);
+        Py_XDECREF(X_array);
+        Py_XDECREF(X_missing_array);
+        if (y_array != NULL) Py_XDECREF(y_array);
+        if (R_array != NULL) Py_XDECREF(R_array);
+        if (R_missing_array != NULL) Py_XDECREF(R_missing_array);
+        //PyArray_ResolveWritebackIfCopy(out_contribs_array);
+        Py_XDECREF(out_contribs_array);
+        Py_XDECREF(base_offset_array);
+        return NULL;
+    }
+
+    const unsigned num_X = PyArray_DIM(X_array, 0);
+    const unsigned M = PyArray_DIM(X_array, 1);
+    const unsigned max_nodes = PyArray_DIM(values_array, 1);
+    const unsigned num_outputs = PyArray_DIM(values_array, 2);
+    unsigned num_R = 0;
+    if (R_array != NULL) num_R = PyArray_DIM(R_array, 0);
+
+    // Get pointers to the data as C-types
+    int *children_left = (int*)PyArray_DATA(children_left_array);
+    int *children_right = (int*)PyArray_DATA(children_right_array);
+    int *children_default = (int*)PyArray_DATA(children_default_array);
+    int *features = (int*)PyArray_DATA(features_array);
+    tfloat *thresholds = (tfloat*)PyArray_DATA(thresholds_array);
+    tfloat *values = (tfloat*)PyArray_DATA(values_array);
+    tfloat *node_sample_weights = (tfloat*)PyArray_DATA(node_sample_weights_array);
+    tfloat *X = (tfloat*)PyArray_DATA(X_array);
+    bool *X_missing = (bool*)PyArray_DATA(X_missing_array);
+    tfloat *y = NULL;
+    if (y_array != NULL) y = (tfloat*)PyArray_DATA(y_array);
+    tfloat *R = NULL;
+    if (R_array != NULL) R = (tfloat*)PyArray_DATA(R_array);
+    bool *R_missing = NULL;
+    if (R_missing_array != NULL) R_missing = (bool*)PyArray_DATA(R_missing_array);
+    tfloat *out_contribs = (tfloat*)PyArray_DATA(out_contribs_array);
+    tfloat *base_offset = (tfloat*)PyArray_DATA(base_offset_array);
+
+    // these are just a wrapper objects for all the pointers and numbers associated with
+    // the ensemble tree model and the dataset we are explaining
+    TreeEnsemble trees = TreeEnsemble(
+        children_left, children_right, children_default, features, thresholds, values,
+        node_sample_weights, max_depth, tree_limit, base_offset,
+        max_nodes, num_outputs
+    );
+    ExplanationDataset data = ExplanationDataset(X, X_missing, y, R, R_missing, num_X, M, num_R);
+
+    dense_tree_shap_gpu(trees, data, out_contribs, feature_dependence, model_output, interactions);
+
+
+    // retrieve return value before python cleanup of objects
+    tfloat ret_value = (double)values[0];
+
+    // clean up the created python objects
+    Py_XDECREF(children_left_array);
+    Py_XDECREF(children_right_array);
+    Py_XDECREF(children_default_array);
+    Py_XDECREF(features_array);
+    Py_XDECREF(thresholds_array);
+    Py_XDECREF(values_array);
+    Py_XDECREF(node_sample_weights_array);
+    Py_XDECREF(X_array);
+    Py_XDECREF(X_missing_array);
+    if (y_array != NULL) Py_XDECREF(y_array);
+    if (R_array != NULL) Py_XDECREF(R_array);
+    if (R_missing_array != NULL) Py_XDECREF(R_missing_array);
+    //PyArray_ResolveWritebackIfCopy(out_contribs_array);
+    Py_XDECREF(out_contribs_array);
+    Py_XDECREF(base_offset_array);
+
+    /* Build the output tuple */
+    PyObject *ret = Py_BuildValue("d", ret_value);
+    return ret;
+}
diff --git a/lib/shap/cext/_cext_gpu.cu b/lib/shap/cext/_cext_gpu.cu
new file mode 100644
index 0000000000000000000000000000000000000000..bdfe49f269b92a92912cafe34c5a667a061279a8
--- /dev/null
+++ b/lib/shap/cext/_cext_gpu.cu
@@ -0,0 +1,353 @@
+#include <Python.h>
+
+#include "gpu_treeshap.h"
+#include "tree_shap.h"
+
+const float inf = std::numeric_limits<tfloat>::infinity();
+
+struct ShapSplitCondition {
+  ShapSplitCondition() = default;
+  ShapSplitCondition(tfloat feature_lower_bound, tfloat feature_upper_bound,
+                     bool is_missing_branch)
+      : feature_lower_bound(feature_lower_bound),
+        feature_upper_bound(feature_upper_bound),
+        is_missing_branch(is_missing_branch) {
+    assert(feature_lower_bound <= feature_upper_bound);
+  }
+
+  /*! Feature values >= lower and < upper flow down this path. */
+  tfloat feature_lower_bound;
+  tfloat feature_upper_bound;
+  /*! Do missing values flow down this path? */
+  bool is_missing_branch;
+
+  // Does this instance flow down this path?
+  __host__ __device__ bool EvaluateSplit(float x) const {
+    // is nan
+    if (isnan(x)) {
+      return is_missing_branch;
+    }
+    return x > feature_lower_bound && x <= feature_upper_bound;
+  }
+
+  // Combine two split conditions on the same feature
+  __host__ __device__ void
+  Merge(const ShapSplitCondition &other) {  // Combine duplicate features
+    feature_lower_bound = max(feature_lower_bound, other.feature_lower_bound);
+    feature_upper_bound = min(feature_upper_bound, other.feature_upper_bound);
+    is_missing_branch = is_missing_branch && other.is_missing_branch;
+  }
+};
+
+
+// Inspired by: https://en.cppreference.com/w/cpp/iterator/size
+// Limited implementation of std::size fo arrays
+template <class T, size_t N>
+constexpr size_t array_size(const T (&array)[N]) noexcept
+{
+    return N;
+}
+
+void RecurseTree(
+    unsigned pos, const TreeEnsemble &tree,
+    std::vector<gpu_treeshap::PathElement<ShapSplitCondition>> *tmp_path,
+    std::vector<gpu_treeshap::PathElement<ShapSplitCondition>> *paths,
+    size_t *path_idx, int num_outputs) {
+  if (tree.is_leaf(pos)) {
+    for (auto j = 0ull; j < num_outputs; j++) {
+      auto v = tree.values[pos * num_outputs + j];
+      if (v == 0.0) {
+        // The tree has no output for this class, don't bother adding the path
+        continue;
+      }
+      // Go back over path, setting v, path_idx
+      for (auto &e : *tmp_path) {
+        e.v = v;
+        e.group = j;
+        e.path_idx = *path_idx;
+      }
+
+      paths->insert(paths->end(), tmp_path->begin(), tmp_path->end());
+      // Increment path index
+      (*path_idx)++;
+    }
+    return;
+  }
+
+  // Add left split to the path
+  unsigned left_child = tree.children_left[pos];
+  double left_zero_fraction =
+      tree.node_sample_weights[left_child] / tree.node_sample_weights[pos];
+  // Encode the range of feature values that flow down this path
+  tmp_path->emplace_back(0, tree.features[pos], 0,
+                         ShapSplitCondition{-inf, tree.thresholds[pos], false},
+                         left_zero_fraction, 0.0f);
+
+  RecurseTree(left_child, tree, tmp_path, paths, path_idx, num_outputs);
+
+  // Add left split to the path
+  tmp_path->back() = gpu_treeshap::PathElement<ShapSplitCondition>(
+      0, tree.features[pos], 0,
+      ShapSplitCondition{tree.thresholds[pos], inf, false},
+      1.0 - left_zero_fraction, 0.0f);
+
+  RecurseTree(tree.children_right[pos], tree, tmp_path, paths, path_idx,
+              num_outputs);
+
+  tmp_path->pop_back();
+}
+
+std::vector<gpu_treeshap::PathElement<ShapSplitCondition>>
+ExtractPaths(const TreeEnsemble &trees) {
+  std::vector<gpu_treeshap::PathElement<ShapSplitCondition>> paths;
+  size_t path_idx = 0;
+  for (auto i = 0; i < trees.tree_limit; i++) {
+    TreeEnsemble tree;
+    trees.get_tree(tree, i);
+    std::vector<gpu_treeshap::PathElement<ShapSplitCondition>> tmp_path;
+    tmp_path.reserve(tree.max_depth);
+    tmp_path.emplace_back(0, -1, 0, ShapSplitCondition{-inf, inf, false}, 1.0,
+                          0.0f);
+    RecurseTree(0, tree, &tmp_path, &paths, &path_idx, tree.num_outputs);
+  }
+  return paths;
+}
+
+class DeviceExplanationDataset {
+  thrust::device_vector<tfloat> data;
+  thrust::device_vector<bool> missing;
+  size_t num_features;
+  size_t num_rows;
+
+ public:
+  DeviceExplanationDataset(const ExplanationDataset &host_data,
+                           bool background_dataset = false) {
+    num_features = host_data.M;
+    if (background_dataset) {
+      num_rows = host_data.num_R;
+      data = thrust::device_vector<tfloat>(
+          host_data.R, host_data.R + host_data.num_R * host_data.M);
+      missing = thrust::device_vector<bool>(host_data.R_missing,
+                                            host_data.R_missing +
+                                                host_data.num_R * host_data.M);
+
+    } else {
+      num_rows = host_data.num_X;
+      data = thrust::device_vector<tfloat>(
+          host_data.X, host_data.X + host_data.num_X * host_data.M);
+      missing = thrust::device_vector<bool>(host_data.X_missing,
+                                            host_data.X_missing +
+                                                host_data.num_X * host_data.M);
+    }
+  }
+
+  class DenseDatasetWrapper {
+    const tfloat *data;
+    const bool *missing;
+    int num_rows;
+    int num_cols;
+
+   public:
+    DenseDatasetWrapper() = default;
+    DenseDatasetWrapper(const tfloat *data, const bool *missing, int num_rows,
+                        int num_cols)
+        : data(data), missing(missing), num_rows(num_rows), num_cols(num_cols) {
+    }
+    __device__ tfloat GetElement(size_t row_idx, size_t col_idx) const {
+      auto idx = row_idx * num_cols + col_idx;
+      if (missing[idx]) {
+        return std::numeric_limits<tfloat>::quiet_NaN();
+      }
+      return data[idx];
+    }
+    __host__ __device__ size_t NumRows() const { return num_rows; }
+    __host__ __device__ size_t NumCols() const { return num_cols; }
+  };
+
+  DenseDatasetWrapper GetDeviceAccessor() {
+    return DenseDatasetWrapper(data.data().get(), missing.data().get(),
+                               num_rows, num_features);
+  }
+};
+
+inline void dense_tree_path_dependent_gpu(
+    const TreeEnsemble &trees, const ExplanationDataset &data,
+    tfloat *out_contribs, tfloat transform(const tfloat, const tfloat)) {
+  auto paths = ExtractPaths(trees);
+  DeviceExplanationDataset device_data(data);
+  DeviceExplanationDataset::DenseDatasetWrapper X =
+      device_data.GetDeviceAccessor();
+
+  thrust::device_vector<float> phis((X.NumCols() + 1) * X.NumRows() *
+                                    trees.num_outputs);
+  gpu_treeshap::GPUTreeShap(X, paths.begin(), paths.end(), trees.num_outputs,
+                            phis.begin(), phis.end());
+  // Add the base offset term to bias
+  thrust::device_vector<double> base_offset(
+      trees.base_offset, trees.base_offset + trees.num_outputs);
+  auto counting = thrust::make_counting_iterator(size_t(0));
+  auto d_phis = phis.data().get();
+  auto d_base_offset = base_offset.data().get();
+  size_t num_groups = trees.num_outputs;
+  thrust::for_each(counting, counting + X.NumRows() * trees.num_outputs,
+                   [=] __device__(size_t idx) {
+                     size_t row_idx = idx / num_groups;
+                     size_t group = idx % num_groups;
+                     auto phi_idx = gpu_treeshap::IndexPhi(
+                         row_idx, num_groups, group, X.NumCols(), X.NumCols());
+                     d_phis[phi_idx] += d_base_offset[group];
+                   });
+
+  // Shap uses a slightly different layout for multiclass
+  thrust::device_vector<float> transposed_phis(phis.size());
+  auto d_transposed_phis = transposed_phis.data();
+  thrust::for_each(
+      counting, counting + phis.size(), [=] __device__(size_t idx) {
+        size_t old_shape[] = {X.NumRows(), num_groups, (X.NumCols() + 1)};
+        size_t old_idx[array_size(old_shape)];
+        gpu_treeshap::FlatIdxToTensorIdx(idx, old_shape, old_idx);
+        // Define new tensor format, switch num_groups axis to end
+        size_t new_shape[] = {X.NumRows(), (X.NumCols() + 1), num_groups};
+        size_t new_idx[] = {old_idx[0], old_idx[2], old_idx[1]};
+        size_t transposed_idx =
+            gpu_treeshap::TensorIdxToFlatIdx(new_shape, new_idx);
+        d_transposed_phis[transposed_idx] = d_phis[idx];
+      });
+  thrust::copy(transposed_phis.begin(), transposed_phis.end(), out_contribs);
+}
+
+inline void
+dense_tree_independent_gpu(const TreeEnsemble &trees,
+                           const ExplanationDataset &data, tfloat *out_contribs,
+                           tfloat transform(const tfloat, const tfloat)) {
+  auto paths = ExtractPaths(trees);
+  DeviceExplanationDataset device_data(data);
+  DeviceExplanationDataset::DenseDatasetWrapper X =
+      device_data.GetDeviceAccessor();
+  DeviceExplanationDataset background_device_data(data, true);
+  DeviceExplanationDataset::DenseDatasetWrapper R =
+      background_device_data.GetDeviceAccessor();
+
+  thrust::device_vector<float> phis((X.NumCols() + 1) * X.NumRows() *
+                                    trees.num_outputs);
+  gpu_treeshap::GPUTreeShapInterventional(X, R, paths.begin(), paths.end(),
+                                          trees.num_outputs, phis.begin(),
+                                          phis.end());
+  // Add the base offset term to bias
+  thrust::device_vector<double> base_offset(
+      trees.base_offset, trees.base_offset + trees.num_outputs);
+  auto counting = thrust::make_counting_iterator(size_t(0));
+  auto d_phis = phis.data().get();
+  auto d_base_offset = base_offset.data().get();
+  size_t num_groups = trees.num_outputs;
+  thrust::for_each(counting, counting + X.NumRows() * trees.num_outputs,
+                   [=] __device__(size_t idx) {
+                     size_t row_idx = idx / num_groups;
+                     size_t group = idx % num_groups;
+                     auto phi_idx = gpu_treeshap::IndexPhi(
+                         row_idx, num_groups, group, X.NumCols(), X.NumCols());
+                     d_phis[phi_idx] += d_base_offset[group];
+                   });
+
+  // Shap uses a slightly different layout for multiclass
+  thrust::device_vector<float> transposed_phis(phis.size());
+  auto d_transposed_phis = transposed_phis.data();
+  thrust::for_each(
+      counting, counting + phis.size(), [=] __device__(size_t idx) {
+        size_t old_shape[] = {X.NumRows(), num_groups, (X.NumCols() + 1)};
+        size_t old_idx[array_size(old_shape)];
+        gpu_treeshap::FlatIdxToTensorIdx(idx, old_shape, old_idx);
+        // Define new tensor format, switch num_groups axis to end
+        size_t new_shape[] = {X.NumRows(), (X.NumCols() + 1), num_groups};
+        size_t new_idx[] = {old_idx[0], old_idx[2], old_idx[1]};
+        size_t transposed_idx =
+            gpu_treeshap::TensorIdxToFlatIdx(new_shape, new_idx);
+        d_transposed_phis[transposed_idx] = d_phis[idx];
+      });
+  thrust::copy(transposed_phis.begin(), transposed_phis.end(), out_contribs);
+}
+
+inline void dense_tree_path_dependent_interactions_gpu(
+    const TreeEnsemble &trees, const ExplanationDataset &data,
+    tfloat *out_contribs, tfloat transform(const tfloat, const tfloat)) {
+  auto paths = ExtractPaths(trees);
+  DeviceExplanationDataset device_data(data);
+  DeviceExplanationDataset::DenseDatasetWrapper X =
+      device_data.GetDeviceAccessor();
+
+  thrust::device_vector<float> phis((X.NumCols() + 1) * (X.NumCols() + 1) *
+                                    X.NumRows() * trees.num_outputs);
+  gpu_treeshap::GPUTreeShapInteractions(X, paths.begin(), paths.end(),
+                                        trees.num_outputs, phis.begin(),
+                                        phis.end());
+  // Add the base offset term to bias
+  thrust::device_vector<double> base_offset(
+      trees.base_offset, trees.base_offset + trees.num_outputs);
+  auto counting = thrust::make_counting_iterator(size_t(0));
+  auto d_phis = phis.data().get();
+  auto d_base_offset = base_offset.data().get();
+  size_t num_groups = trees.num_outputs;
+  thrust::for_each(counting, counting + X.NumRows() * num_groups,
+                   [=] __device__(size_t idx) {
+                     size_t row_idx = idx / num_groups;
+                     size_t group = idx % num_groups;
+                     auto phi_idx = gpu_treeshap::IndexPhiInteractions(
+                         row_idx, num_groups, group, X.NumCols(), X.NumCols(),
+                         X.NumCols());
+                     d_phis[phi_idx] += d_base_offset[group];
+                   });
+  // Shap uses a slightly different layout for multiclass
+  thrust::device_vector<float> transposed_phis(phis.size());
+  auto d_transposed_phis = transposed_phis.data();
+  thrust::for_each(
+      counting, counting + phis.size(), [=] __device__(size_t idx) {
+        size_t old_shape[] = {X.NumRows(), num_groups, (X.NumCols() + 1),
+                              (X.NumCols() + 1)};
+        size_t old_idx[array_size(old_shape)];
+        gpu_treeshap::FlatIdxToTensorIdx(idx, old_shape, old_idx);
+        // Define new tensor format, switch num_groups axis to end
+        size_t new_shape[] = {X.NumRows(), (X.NumCols() + 1), (X.NumCols() + 1),
+                              num_groups};
+        size_t new_idx[] = {old_idx[0], old_idx[2], old_idx[3], old_idx[1]};
+        size_t transposed_idx =
+            gpu_treeshap::TensorIdxToFlatIdx(new_shape, new_idx);
+        d_transposed_phis[transposed_idx] = d_phis[idx];
+      });
+  thrust::copy(transposed_phis.begin(), transposed_phis.end(), out_contribs);
+}
+
+void dense_tree_shap_gpu(const TreeEnsemble &trees,
+                         const ExplanationDataset &data, tfloat *out_contribs,
+                         const int feature_dependence, unsigned model_transform,
+                         bool interactions) {
+  // see what transform (if any) we have
+  transform_f transform = get_transform(model_transform);
+
+  // dispatch to the correct algorithm handler
+  switch (feature_dependence) {
+  case FEATURE_DEPENDENCE::independent:
+    if (interactions) {
+      std::cerr << "FEATURE_DEPENDENCE::independent with interactions not yet "
+                   "supported\n";
+    } else {
+      dense_tree_independent_gpu(trees, data, out_contribs, transform);
+    }
+    return;
+
+  case FEATURE_DEPENDENCE::tree_path_dependent:
+    if (interactions) {
+      dense_tree_path_dependent_interactions_gpu(trees, data, out_contribs,
+                                                 transform);
+    } else {
+      dense_tree_path_dependent_gpu(trees, data, out_contribs, transform);
+    }
+    return;
+
+  case FEATURE_DEPENDENCE::global_path_dependent:
+    std::cerr << "FEATURE_DEPENDENCE::global_path_dependent not supported\n";
+    return;
+  default:
+    std::cerr << "Unknown feature dependence option\n";
+    return;
+  }
+}
diff --git a/lib/shap/cext/gpu_treeshap.h b/lib/shap/cext/gpu_treeshap.h
new file mode 100644
index 0000000000000000000000000000000000000000..1666f153f0380762bb2bc84dc8eff59e5a8fae2b
--- /dev/null
+++ b/lib/shap/cext/gpu_treeshap.h
@@ -0,0 +1,1535 @@
+/*
+ * Copyright (c) 2020, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+#include <thrust/device_allocator.h>
+#include <thrust/device_vector.h>
+#include <thrust/iterator/discard_iterator.h>
+#include <thrust/logical.h>
+#include <thrust/reduce.h>
+#include <thrust/host_vector.h>
+#if (CUDART_VERSION >= 11000)
+#include <cub/cub.cuh>
+#else
+// Hack to get cub device reduce on older toolkits
+#include <thrust/system/cuda/detail/cub/device/device_reduce.cuh>
+using namespace thrust::cuda_cub;
+#endif
+#include <algorithm>
+#include <functional>
+#include <set>
+#include <stdexcept>
+#include <utility>
+#include <vector>
+
+namespace gpu_treeshap {
+
+struct XgboostSplitCondition {
+  XgboostSplitCondition() = default;
+  XgboostSplitCondition(float feature_lower_bound, float feature_upper_bound,
+                        bool is_missing_branch)
+      : feature_lower_bound(feature_lower_bound),
+        feature_upper_bound(feature_upper_bound),
+        is_missing_branch(is_missing_branch) {
+    assert(feature_lower_bound <= feature_upper_bound);
+  }
+
+  /*! Feature values >= lower and < upper flow down this path. */
+  float feature_lower_bound;
+  float feature_upper_bound;
+  /*! Do missing values flow down this path? */
+  bool is_missing_branch;
+
+  // Does this instance flow down this path?
+  __host__ __device__ bool EvaluateSplit(float x) const {
+    // is nan
+    if (isnan(x)) {
+      return is_missing_branch;
+    }
+    return x >= feature_lower_bound && x < feature_upper_bound;
+  }
+
+  // Combine two split conditions on the same feature
+  __host__ __device__ void Merge(
+      const XgboostSplitCondition& other) {  // Combine duplicate features
+    feature_lower_bound = max(feature_lower_bound, other.feature_lower_bound);
+    feature_upper_bound = min(feature_upper_bound, other.feature_upper_bound);
+    is_missing_branch = is_missing_branch && other.is_missing_branch;
+  }
+};
+
+/*!
+ * An element of a unique path through a decision tree. Can implement various
+ * types of splits via the templated SplitConditionT. Some decision tree
+ * implementations may wish to use double precision or single precision, some
+ * may use < or <= as the threshold, missing values can be handled differently,
+ * categoricals may be supported.
+ *
+ * \tparam  SplitConditionT A split condition implementing the methods
+ * EvaluateSplit and Merge.
+ */
+template <typename SplitConditionT>
+struct PathElement {
+  using split_type = SplitConditionT;
+  __host__ __device__ PathElement(size_t path_idx, int64_t feature_idx,
+                                  int group, SplitConditionT split_condition,
+                                  double zero_fraction, float v)
+      : path_idx(path_idx),
+        feature_idx(feature_idx),
+        group(group),
+        split_condition(split_condition),
+        zero_fraction(zero_fraction),
+        v(v) {}
+
+  PathElement() = default;
+  __host__ __device__ bool IsRoot() const { return feature_idx == -1; }
+
+  template <typename DatasetT>
+  __host__ __device__ bool EvaluateSplit(DatasetT X, size_t row_idx) const {
+    if (this->IsRoot()) {
+      return 1.0;
+    }
+    return split_condition.EvaluateSplit(X.GetElement(row_idx, feature_idx));
+  }
+
+  /*! Unique path index. */
+  size_t path_idx;
+  /*! Feature of this split, -1 indicates bias term. */
+  int64_t feature_idx;
+  /*! Indicates class for multiclass problems. */
+  int group;
+  SplitConditionT split_condition;
+  /*! Probability of following this path when feature_idx is not in the active
+   * set. */
+  double zero_fraction;
+  float v;  // Leaf weight at the end of the path
+};
+
+// Helper function that accepts an index into a flat contiguous array and the
+// dimensions of a tensor and returns the indices with respect to the tensor
+template <typename T, size_t N>
+__device__ void FlatIdxToTensorIdx(T flat_idx, const T (&shape)[N],
+                                   T (&out_idx)[N]) {
+  T current_size = shape[0];
+  for (auto i = 1ull; i < N; i++) {
+    current_size *= shape[i];
+  }
+  for (auto i = 0ull; i < N; i++) {
+    current_size /= shape[i];
+    out_idx[i] = flat_idx / current_size;
+    flat_idx -= current_size * out_idx[i];
+  }
+}
+
+// Given a shape and coordinates into a tensor, return the index into the
+// backing storage one-dimensional array
+template <typename T, size_t N>
+__device__ T TensorIdxToFlatIdx(const T (&shape)[N], const T (&tensor_idx)[N]) {
+  T current_size = shape[0];
+  for (auto i = 1ull; i < N; i++) {
+    current_size *= shape[i];
+  }
+  T idx = 0;
+  for (auto i = 0ull; i < N; i++) {
+    current_size /= shape[i];
+    idx += tensor_idx[i] * current_size;
+  }
+  return idx;
+}
+
+// Maps values to the phi array according to row, group and column
+__host__ __device__ inline size_t IndexPhi(size_t row_idx, size_t num_groups,
+                                           size_t group, size_t num_columns,
+                                           size_t column_idx) {
+  return (row_idx * num_groups + group) * (num_columns + 1) + column_idx;
+}
+
+__host__ __device__ inline size_t IndexPhiInteractions(size_t row_idx,
+                                                       size_t num_groups,
+                                                       size_t group,
+                                                       size_t num_columns,
+                                                       size_t i, size_t j) {
+  size_t matrix_size = (num_columns + 1) * (num_columns + 1);
+  size_t matrix_offset = (row_idx * num_groups + group) * matrix_size;
+  return matrix_offset + i * (num_columns + 1) + j;
+}
+
+namespace detail {
+
+// Shorthand for creating a device vector with an appropriate allocator type
+template <class T, class DeviceAllocatorT>
+using RebindVector =
+    thrust::device_vector<T,
+                          typename DeviceAllocatorT::template rebind<T>::other>;
+
+#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600 || defined(__clang__)
+__device__ __forceinline__ double atomicAddDouble(double* address, double val) {
+  return atomicAdd(address, val);
+}
+#else  // In device code and CUDA < 600
+__device__ __forceinline__ double atomicAddDouble(double* address,
+                                                  double val) {  // NOLINT
+  unsigned long long int* address_as_ull =                       // NOLINT
+      (unsigned long long int*)address;                          // NOLINT
+  unsigned long long int old = *address_as_ull, assumed;         // NOLINT
+
+  do {
+    assumed = old;
+    old = atomicCAS(address_as_ull, assumed,
+                    __double_as_longlong(val + __longlong_as_double(assumed)));
+
+    // Note: uses integer comparison to avoid hang in case of NaN (since NaN !=
+    // NaN)
+  } while (assumed != old);
+
+  return __longlong_as_double(old);
+}
+#endif
+
+__forceinline__ __device__ unsigned int lanemask32_lt() {
+  unsigned int lanemask32_lt;
+  asm volatile("mov.u32 %0, %%lanemask_lt;" : "=r"(lanemask32_lt));
+  return (lanemask32_lt);
+}
+
+// Like a coalesced group, except we can make the assumption that all threads in
+// a group are next to each other. This makes shuffle operations much cheaper.
+class ContiguousGroup {
+ public:
+  __device__ ContiguousGroup(uint32_t mask) : mask_(mask) {}
+
+  __device__ uint32_t size() const { return __popc(mask_); }
+  __device__ uint32_t thread_rank() const {
+    return __popc(mask_ & lanemask32_lt());
+  }
+  template <typename T>
+  __device__ T shfl(T val, uint32_t src) const {
+    return __shfl_sync(mask_, val, src + __ffs(mask_) - 1);
+  }
+  template <typename T>
+  __device__ T shfl_up(T val, uint32_t delta) const {
+    return __shfl_up_sync(mask_, val, delta);
+  }
+  __device__ uint32_t ballot(int predicate) const {
+    return __ballot_sync(mask_, predicate) >> (__ffs(mask_) - 1);
+  }
+
+  template <typename T, typename OpT>
+  __device__ T reduce(T val, OpT op) {
+    for (int i = 1; i < this->size(); i *= 2) {
+      T shfl = shfl_up(val, i);
+      if (static_cast<int>(thread_rank()) - i >= 0) {
+        val = op(val, shfl);
+      }
+    }
+    return shfl(val, size() - 1);
+  }
+  uint32_t mask_;
+};
+
+// Separate the active threads by labels
+// This functionality is available in cuda 11.0 on cc >=7.0
+// We reimplement for backwards compatibility
+// Assumes partitions are contiguous
+inline __device__ ContiguousGroup active_labeled_partition(uint32_t mask,
+                                                           int label) {
+#if __CUDA_ARCH__ >= 700
+  uint32_t subgroup_mask = __match_any_sync(mask, label);
+#else
+  uint32_t subgroup_mask = 0;
+  for (int i = 0; i < 32;) {
+    int current_label = __shfl_sync(mask, label, i);
+    uint32_t ballot = __ballot_sync(mask, label == current_label);
+    if (label == current_label) {
+      subgroup_mask = ballot;
+    }
+    uint32_t completed_mask =
+        (1 << (32 - __clz(ballot))) - 1;  // Threads that have finished
+    // Find the start of the next group, mask off completed threads from active
+    // threads Then use ffs - 1 to find the position of the next group
+    int next_i = __ffs(mask & ~completed_mask) - 1;
+    if (next_i == -1) break;  // -1 indicates all finished
+    assert(next_i > i);  // Prevent infinite loops when the constraints not met
+    i = next_i;
+  }
+#endif
+  return ContiguousGroup(subgroup_mask);
+}
+
+// Group of threads where each thread holds a path element
+class GroupPath {
+ protected:
+  const ContiguousGroup& g_;
+  // These are combined so we can communicate them in a single 64 bit shuffle
+  // instruction
+  float zero_one_fraction_[2];
+  float pweight_;
+  int unique_depth_;
+
+ public:
+  __device__ GroupPath(const ContiguousGroup& g, float zero_fraction,
+                       float one_fraction)
+      : g_(g),
+        zero_one_fraction_{zero_fraction, one_fraction},
+        pweight_(g.thread_rank() == 0 ? 1.0f : 0.0f),
+        unique_depth_(0) {}
+
+  // Cooperatively extend the path with a group of threads
+  // Each thread maintains pweight for its path element in register
+  __device__ void Extend() {
+    unique_depth_++;
+
+    // Broadcast the zero and one fraction from the newly added path element
+    // Combine 2 shuffle operations into 64 bit word
+    const size_t rank = g_.thread_rank();
+    const float inv_unique_depth =
+        __fdividef(1.0f, static_cast<float>(unique_depth_ + 1));
+    uint64_t res = g_.shfl(*reinterpret_cast<uint64_t*>(&zero_one_fraction_),
+                           unique_depth_);
+    const float new_zero_fraction = reinterpret_cast<float*>(&res)[0];
+    const float new_one_fraction = reinterpret_cast<float*>(&res)[1];
+    float left_pweight = g_.shfl_up(pweight_, 1);
+
+    // pweight of threads with rank < unique_depth_ is 0
+    // We use max(x,0) to avoid using a branch
+    // pweight_ *=
+    // new_zero_fraction * max(unique_depth_ - rank, 0llu) * inv_unique_depth;
+    pweight_ = __fmul_rn(
+        __fmul_rn(pweight_, new_zero_fraction),
+        __fmul_rn(max(unique_depth_ - rank, size_t(0)), inv_unique_depth));
+
+    // pweight_  += new_one_fraction * left_pweight * rank * inv_unique_depth;
+    pweight_ = __fmaf_rn(__fmul_rn(new_one_fraction, left_pweight),
+                         __fmul_rn(rank, inv_unique_depth), pweight_);
+  }
+
+  // Each thread unwinds the path for its feature and returns the sum
+  __device__ float UnwoundPathSum() {
+    float next_one_portion = g_.shfl(pweight_, unique_depth_);
+    float total = 0.0f;
+    const float zero_frac_div_unique_depth = __fdividef(
+        zero_one_fraction_[0], static_cast<float>(unique_depth_ + 1));
+    for (int i = unique_depth_ - 1; i >= 0; i--) {
+      float ith_pweight = g_.shfl(pweight_, i);
+      float precomputed =
+          __fmul_rn((unique_depth_ - i), zero_frac_div_unique_depth);
+      const float tmp =
+          __fdividef(__fmul_rn(next_one_portion, unique_depth_ + 1), i + 1);
+      total = __fmaf_rn(tmp, zero_one_fraction_[1], total);
+      next_one_portion = __fmaf_rn(-tmp, precomputed, ith_pweight);
+      float numerator =
+          __fmul_rn(__fsub_rn(1.0f, zero_one_fraction_[1]), ith_pweight);
+      if (precomputed > 0.0f) {
+        total += __fdividef(numerator, precomputed);
+      }
+    }
+
+    return total;
+  }
+};
+
+// Has different permutation weightings to the above
+// Used in Taylor Shapley interaction index
+class TaylorGroupPath : GroupPath {
+ public:
+  __device__ TaylorGroupPath(const ContiguousGroup& g, float zero_fraction,
+                             float one_fraction)
+      : GroupPath(g, zero_fraction, one_fraction) {}
+
+  // Extend the path is normal, all reweighting can happen in UnwoundPathSum
+  __device__ void Extend() { GroupPath::Extend(); }
+
+  // Each thread unwinds the path for its feature and returns the sum
+  // We use a different permutation weighting for Taylor interactions
+  // As if the total number of features was one larger
+  __device__ float UnwoundPathSum() {
+    float one_fraction = zero_one_fraction_[1];
+    float zero_fraction = zero_one_fraction_[0];
+    float next_one_portion = g_.shfl(pweight_, unique_depth_) /
+                             static_cast<float>(unique_depth_ + 2);
+
+    float total = 0.0f;
+    for (int i = unique_depth_ - 1; i >= 0; i--) {
+      float ith_pweight =
+          g_.shfl(pweight_, i) * (static_cast<float>(unique_depth_ - i + 1) /
+                                  static_cast<float>(unique_depth_ + 2));
+      if (one_fraction > 0.0f) {
+        const float tmp =
+            next_one_portion * (unique_depth_ + 2) / ((i + 1) * one_fraction);
+
+        total += tmp;
+        next_one_portion =
+            ith_pweight - tmp * zero_fraction *
+                              ((unique_depth_ - i + 1) /
+                               static_cast<float>(unique_depth_ + 2));
+      } else if (zero_fraction > 0.0f) {
+        total +=
+            (ith_pweight / zero_fraction) /
+            ((unique_depth_ - i + 1) / static_cast<float>(unique_depth_ + 2));
+      }
+    }
+
+    return 2 * total;
+  }
+};
+
+template <typename DatasetT, typename SplitConditionT>
+__device__ float ComputePhi(const PathElement<SplitConditionT>& e,
+                            size_t row_idx, const DatasetT& X,
+                            const ContiguousGroup& group, float zero_fraction) {
+  float one_fraction =
+      e.EvaluateSplit(X, row_idx);
+  GroupPath path(group, zero_fraction, one_fraction);
+  size_t unique_path_length = group.size();
+
+  // Extend the path
+  for (auto unique_depth = 1ull; unique_depth < unique_path_length;
+       unique_depth++) {
+    path.Extend();
+  }
+
+  float sum = path.UnwoundPathSum();
+  return sum * (one_fraction - zero_fraction) * e.v;
+}
+
+inline __host__ __device__ size_t DivRoundUp(size_t a, size_t b) {
+  return (a + b - 1) / b;
+}
+
+template <typename DatasetT, size_t kBlockSize, size_t kRowsPerWarp,
+          typename SplitConditionT>
+void __device__
+ConfigureThread(const DatasetT& X, const size_t bins_per_row,
+                const PathElement<SplitConditionT>* path_elements,
+                const size_t* bin_segments, size_t* start_row, size_t* end_row,
+                PathElement<SplitConditionT>* e, bool* thread_active) {
+  // Partition work
+  // Each warp processes a set of training instances applied to a path
+  size_t tid = kBlockSize * blockIdx.x + threadIdx.x;
+  const size_t warp_size = 32;
+  size_t warp_rank = tid / warp_size;
+  if (warp_rank >= bins_per_row * DivRoundUp(X.NumRows(), kRowsPerWarp)) {
+    *thread_active = false;
+    return;
+  }
+  size_t bin_idx = warp_rank % bins_per_row;
+  size_t bank = warp_rank / bins_per_row;
+  size_t path_start = bin_segments[bin_idx];
+  size_t path_end = bin_segments[bin_idx + 1];
+  uint32_t thread_rank = threadIdx.x % warp_size;
+  if (thread_rank >= path_end - path_start) {
+    *thread_active = false;
+  } else {
+    *e = path_elements[path_start + thread_rank];
+    *start_row = bank * kRowsPerWarp;
+    *end_row = min((bank + 1) * kRowsPerWarp, X.NumRows());
+    *thread_active = true;
+  }
+}
+
+#define GPUTREESHAP_MAX_THREADS_PER_BLOCK 256
+#define FULL_MASK 0xffffffff
+
+template <typename DatasetT, size_t kBlockSize, size_t kRowsPerWarp,
+          typename SplitConditionT>
+__global__ void __launch_bounds__(GPUTREESHAP_MAX_THREADS_PER_BLOCK)
+    ShapKernel(DatasetT X, size_t bins_per_row,
+               const PathElement<SplitConditionT>* path_elements,
+               const size_t* bin_segments, size_t num_groups, double* phis) {
+  // Use shared memory for structs, otherwise nvcc puts in local memory
+  __shared__ DatasetT s_X;
+  s_X = X;
+  __shared__ PathElement<SplitConditionT> s_elements[kBlockSize];
+  PathElement<SplitConditionT>& e = s_elements[threadIdx.x];
+
+  size_t start_row, end_row;
+  bool thread_active;
+  ConfigureThread<DatasetT, kBlockSize, kRowsPerWarp>(
+      s_X, bins_per_row, path_elements, bin_segments, &start_row, &end_row, &e,
+      &thread_active);
+  uint32_t mask = __ballot_sync(FULL_MASK, thread_active);
+  if (!thread_active) return;
+
+  float zero_fraction = e.zero_fraction;
+  auto labelled_group = active_labeled_partition(mask, e.path_idx);
+
+  for (int64_t row_idx = start_row; row_idx < end_row; row_idx++) {
+    float phi = ComputePhi(e, row_idx, X, labelled_group, zero_fraction);
+
+    if (!e.IsRoot()) {
+      atomicAddDouble(&phis[IndexPhi(row_idx, num_groups, e.group, X.NumCols(),
+                                     e.feature_idx)],
+                      phi);
+    }
+  }
+}
+
+template <typename DatasetT, typename SizeTAllocatorT, typename PathAllocatorT,
+          typename SplitConditionT>
+void ComputeShap(
+    DatasetT X,
+    const thrust::device_vector<size_t, SizeTAllocatorT>& bin_segments,
+    const thrust::device_vector<PathElement<SplitConditionT>, PathAllocatorT>&
+        path_elements,
+    size_t num_groups, double* phis) {
+  size_t bins_per_row = bin_segments.size() - 1;
+  const int kBlockThreads = GPUTREESHAP_MAX_THREADS_PER_BLOCK;
+  const int warps_per_block = kBlockThreads / 32;
+  const int kRowsPerWarp = 1024;
+  size_t warps_needed = bins_per_row * DivRoundUp(X.NumRows(), kRowsPerWarp);
+
+  const uint32_t grid_size = DivRoundUp(warps_needed, warps_per_block);
+
+  ShapKernel<DatasetT, kBlockThreads, kRowsPerWarp>
+      <<<grid_size, kBlockThreads>>>(
+          X, bins_per_row, path_elements.data().get(),
+          bin_segments.data().get(), num_groups, phis);
+}
+
+template <typename PathT, typename DatasetT, typename SplitConditionT>
+__device__ float ComputePhiCondition(const PathElement<SplitConditionT>& e,
+                                     size_t row_idx, const DatasetT& X,
+                                     const ContiguousGroup& group,
+                                     int64_t condition_feature) {
+  float one_fraction = e.EvaluateSplit(X, row_idx);
+  PathT path(group, e.zero_fraction, one_fraction);
+  size_t unique_path_length = group.size();
+  float condition_on_fraction = 1.0f;
+  float condition_off_fraction = 1.0f;
+
+  // Extend the path
+  for (auto i = 1ull; i < unique_path_length; i++) {
+    bool is_condition_feature =
+        group.shfl(e.feature_idx, i) == condition_feature;
+    float o_i = group.shfl(one_fraction, i);
+    float z_i = group.shfl(e.zero_fraction, i);
+
+    if (is_condition_feature) {
+      condition_on_fraction = o_i;
+      condition_off_fraction = z_i;
+    } else {
+      path.Extend();
+    }
+  }
+  float sum = path.UnwoundPathSum();
+  if (e.feature_idx == condition_feature) {
+    return 0.0f;
+  }
+  float phi = sum * (one_fraction - e.zero_fraction) * e.v;
+  return phi * (condition_on_fraction - condition_off_fraction) * 0.5f;
+}
+
+// If there is a feature in the path we are conditioning on, swap it to the end
+// of the path
+template <typename SplitConditionT>
+inline __device__ void SwapConditionedElement(
+    PathElement<SplitConditionT>** e, PathElement<SplitConditionT>* s_elements,
+    uint32_t condition_rank, const ContiguousGroup& group) {
+  auto last_rank = group.size() - 1;
+  auto this_rank = group.thread_rank();
+  if (this_rank == last_rank) {
+    *e = &s_elements[(threadIdx.x - this_rank) + condition_rank];
+  } else if (this_rank == condition_rank) {
+    *e = &s_elements[(threadIdx.x - this_rank) + last_rank];
+  }
+}
+
+template <typename DatasetT, size_t kBlockSize, size_t kRowsPerWarp,
+          typename SplitConditionT>
+__global__ void __launch_bounds__(GPUTREESHAP_MAX_THREADS_PER_BLOCK)
+    ShapInteractionsKernel(DatasetT X, size_t bins_per_row,
+                           const PathElement<SplitConditionT>* path_elements,
+                           const size_t* bin_segments, size_t num_groups,
+                           double* phis_interactions) {
+  // Use shared memory for structs, otherwise nvcc puts in local memory
+  __shared__ DatasetT s_X;
+  s_X = X;
+  __shared__ PathElement<SplitConditionT> s_elements[kBlockSize];
+  PathElement<SplitConditionT>* e = &s_elements[threadIdx.x];
+
+  size_t start_row, end_row;
+  bool thread_active;
+  ConfigureThread<DatasetT, kBlockSize, kRowsPerWarp>(
+      s_X, bins_per_row, path_elements, bin_segments, &start_row, &end_row, e,
+      &thread_active);
+  uint32_t mask = __ballot_sync(FULL_MASK, thread_active);
+  if (!thread_active) return;
+
+  auto labelled_group = active_labeled_partition(mask, e->path_idx);
+
+  for (int64_t row_idx = start_row; row_idx < end_row; row_idx++) {
+    float phi = ComputePhi(*e, row_idx, X, labelled_group, e->zero_fraction);
+    if (!e->IsRoot()) {
+      auto phi_offset =
+          IndexPhiInteractions(row_idx, num_groups, e->group, X.NumCols(),
+                               e->feature_idx, e->feature_idx);
+      atomicAddDouble(phis_interactions + phi_offset, phi);
+    }
+
+    for (auto condition_rank = 1ull; condition_rank < labelled_group.size();
+         condition_rank++) {
+      e = &s_elements[threadIdx.x];
+      int64_t condition_feature =
+          labelled_group.shfl(e->feature_idx, condition_rank);
+      SwapConditionedElement(&e, s_elements, condition_rank, labelled_group);
+      float x = ComputePhiCondition<GroupPath>(*e, row_idx, X, labelled_group,
+                                               condition_feature);
+      if (!e->IsRoot()) {
+        auto phi_offset =
+            IndexPhiInteractions(row_idx, num_groups, e->group, X.NumCols(),
+                                 e->feature_idx, condition_feature);
+        atomicAddDouble(phis_interactions + phi_offset, x);
+        // Subtract effect from diagonal
+        auto phi_diag =
+            IndexPhiInteractions(row_idx, num_groups, e->group, X.NumCols(),
+                                 e->feature_idx, e->feature_idx);
+        atomicAddDouble(phis_interactions + phi_diag, -x);
+      }
+    }
+  }
+}
+
+template <typename DatasetT, typename SizeTAllocatorT, typename PathAllocatorT,
+          typename SplitConditionT>
+void ComputeShapInteractions(
+    DatasetT X,
+    const thrust::device_vector<size_t, SizeTAllocatorT>& bin_segments,
+    const thrust::device_vector<PathElement<SplitConditionT>, PathAllocatorT>&
+        path_elements,
+    size_t num_groups, double* phis) {
+  size_t bins_per_row = bin_segments.size() - 1;
+  const int kBlockThreads = GPUTREESHAP_MAX_THREADS_PER_BLOCK;
+  const int warps_per_block = kBlockThreads / 32;
+  const int kRowsPerWarp = 100;
+  size_t warps_needed = bins_per_row * DivRoundUp(X.NumRows(), kRowsPerWarp);
+
+  const uint32_t grid_size = DivRoundUp(warps_needed, warps_per_block);
+
+  ShapInteractionsKernel<DatasetT, kBlockThreads, kRowsPerWarp>
+      <<<grid_size, kBlockThreads>>>(
+          X, bins_per_row, path_elements.data().get(),
+          bin_segments.data().get(), num_groups, phis);
+}
+
+template <typename DatasetT, size_t kBlockSize, size_t kRowsPerWarp,
+          typename SplitConditionT>
+__global__ void __launch_bounds__(GPUTREESHAP_MAX_THREADS_PER_BLOCK)
+    ShapTaylorInteractionsKernel(
+        DatasetT X, size_t bins_per_row,
+        const PathElement<SplitConditionT>* path_elements,
+        const size_t* bin_segments, size_t num_groups,
+        double* phis_interactions) {
+  // Use shared memory for structs, otherwise nvcc puts in local memory
+  __shared__ DatasetT s_X;
+  if (threadIdx.x == 0) {
+    s_X = X;
+  }
+  __syncthreads();
+  __shared__ PathElement<SplitConditionT> s_elements[kBlockSize];
+  PathElement<SplitConditionT>* e = &s_elements[threadIdx.x];
+
+  size_t start_row, end_row;
+  bool thread_active;
+  ConfigureThread<DatasetT, kBlockSize, kRowsPerWarp>(
+      s_X, bins_per_row, path_elements, bin_segments, &start_row, &end_row, e,
+      &thread_active);
+  uint32_t mask = __ballot_sync(FULL_MASK, thread_active);
+  if (!thread_active) return;
+
+  auto labelled_group = active_labeled_partition(mask, e->path_idx);
+
+  for (int64_t row_idx = start_row; row_idx < end_row; row_idx++) {
+    for (auto condition_rank = 1ull; condition_rank < labelled_group.size();
+         condition_rank++) {
+      e = &s_elements[threadIdx.x];
+      // Compute the diagonal terms
+      // TODO(Rory): this can be more efficient
+      float reduce_input =
+          e->IsRoot() || labelled_group.thread_rank() == condition_rank
+              ? 1.0f
+              : e->zero_fraction;
+      float reduce =
+          labelled_group.reduce(reduce_input, thrust::multiplies<float>());
+      if (labelled_group.thread_rank() == condition_rank) {
+        float one_fraction = e->split_condition.EvaluateSplit(
+            X.GetElement(row_idx, e->feature_idx));
+        auto phi_offset =
+            IndexPhiInteractions(row_idx, num_groups, e->group, X.NumCols(),
+                                 e->feature_idx, e->feature_idx);
+        atomicAddDouble(phis_interactions + phi_offset,
+                        reduce * (one_fraction - e->zero_fraction) * e->v);
+      }
+
+      int64_t condition_feature =
+          labelled_group.shfl(e->feature_idx, condition_rank);
+
+      SwapConditionedElement(&e, s_elements, condition_rank, labelled_group);
+
+      float x = ComputePhiCondition<TaylorGroupPath>(
+          *e, row_idx, X, labelled_group, condition_feature);
+      if (!e->IsRoot()) {
+        auto phi_offset =
+            IndexPhiInteractions(row_idx, num_groups, e->group, X.NumCols(),
+                                 e->feature_idx, condition_feature);
+        atomicAddDouble(phis_interactions + phi_offset, x);
+      }
+    }
+  }
+}
+
+template <typename DatasetT, typename SizeTAllocatorT, typename PathAllocatorT,
+          typename SplitConditionT>
+void ComputeShapTaylorInteractions(
+    DatasetT X,
+    const thrust::device_vector<size_t, SizeTAllocatorT>& bin_segments,
+    const thrust::device_vector<PathElement<SplitConditionT>, PathAllocatorT>&
+        path_elements,
+    size_t num_groups, double* phis) {
+  size_t bins_per_row = bin_segments.size() - 1;
+  const int kBlockThreads = GPUTREESHAP_MAX_THREADS_PER_BLOCK;
+  const int warps_per_block = kBlockThreads / 32;
+  const int kRowsPerWarp = 100;
+  size_t warps_needed = bins_per_row * DivRoundUp(X.NumRows(), kRowsPerWarp);
+
+  const uint32_t grid_size = DivRoundUp(warps_needed, warps_per_block);
+
+  ShapTaylorInteractionsKernel<DatasetT, kBlockThreads, kRowsPerWarp>
+      <<<grid_size, kBlockThreads>>>(
+          X, bins_per_row, path_elements.data().get(),
+          bin_segments.data().get(), num_groups, phis);
+}
+
+
+inline __host__ __device__ int64_t Factorial(int64_t x) {
+  int64_t y = 1;
+  for (auto i = 2; i <= x; i++) {
+    y *= i;
+  }
+  return y;
+}
+
+// Compute factorials in log space using lgamma to avoid overflow
+inline __host__ __device__ double W(double s, double n) {
+  assert(n - s - 1 >= 0);
+  return exp(lgamma(s + 1) - lgamma(n + 1) + lgamma(n - s));
+}
+
+template <typename DatasetT, size_t kBlockSize, size_t kRowsPerWarp,
+          typename SplitConditionT>
+__global__ void __launch_bounds__(GPUTREESHAP_MAX_THREADS_PER_BLOCK)
+    ShapInterventionalKernel(DatasetT X, DatasetT R, size_t bins_per_row,
+                             const PathElement<SplitConditionT>* path_elements,
+                             const size_t* bin_segments, size_t num_groups,
+                             double* phis) {
+  // Cache W coefficients
+  __shared__ float s_W[33][33];
+  for (int i = threadIdx.x; i < 33 * 33; i += kBlockSize) {
+    auto s = i % 33;
+    auto n = i / 33;
+    if (n - s - 1 >= 0) {
+      s_W[s][n] = W(s, n);
+    } else {
+      s_W[s][n] = 0.0;
+    }
+  }
+
+  __syncthreads();
+
+  __shared__ PathElement<SplitConditionT> s_elements[kBlockSize];
+  PathElement<SplitConditionT>& e = s_elements[threadIdx.x];
+
+  size_t start_row, end_row;
+  bool thread_active;
+  ConfigureThread<DatasetT, kBlockSize, kRowsPerWarp>(
+      X, bins_per_row, path_elements, bin_segments, &start_row, &end_row, &e,
+      &thread_active);
+
+  uint32_t mask = __ballot_sync(FULL_MASK, thread_active);
+  if (!thread_active) return;
+
+  auto labelled_group = active_labeled_partition(mask, e.path_idx);
+
+  for (int64_t x_idx = start_row; x_idx < end_row; x_idx++) {
+    float result = 0.0f;
+    bool x_cond = e.EvaluateSplit(X, x_idx);
+    uint32_t x_ballot = labelled_group.ballot(x_cond);
+    for (int64_t r_idx = 0; r_idx < R.NumRows(); r_idx++) {
+      bool r_cond = e.EvaluateSplit(R, r_idx);
+      uint32_t r_ballot = labelled_group.ballot(r_cond);
+      assert(!e.IsRoot() ||
+             (x_cond == r_cond));  // These should be the same for the root
+      uint32_t s = __popc(x_ballot & ~r_ballot);
+      uint32_t n = __popc(x_ballot ^ r_ballot);
+      float tmp = 0.0f;
+      // Theorem 1
+      if (x_cond && !r_cond) {
+        tmp += s_W[s - 1][n];
+      }
+      tmp -= s_W[s][n] * (r_cond && !x_cond);
+
+      // No foreground samples make it to this leaf, increment bias
+      if (e.IsRoot() && s == 0) {
+        tmp += 1.0f;
+      }
+      // If neither foreground or background go down this path, ignore this path
+      bool reached_leaf = !labelled_group.ballot(!x_cond && !r_cond);
+      tmp *= reached_leaf;
+      result += tmp;
+    }
+
+    if (result != 0.0) {
+      result /= R.NumRows();
+
+      // Root writes bias
+      auto feature = e.IsRoot() ? X.NumCols() : e.feature_idx;
+      atomicAddDouble(
+          &phis[IndexPhi(x_idx, num_groups, e.group, X.NumCols(), feature)],
+          result * e.v);
+    }
+  }
+}
+
+template <typename DatasetT, typename SizeTAllocatorT, typename PathAllocatorT,
+          typename SplitConditionT>
+void ComputeShapInterventional(
+    DatasetT X, DatasetT R,
+    const thrust::device_vector<size_t, SizeTAllocatorT>& bin_segments,
+    const thrust::device_vector<PathElement<SplitConditionT>, PathAllocatorT>&
+        path_elements,
+    size_t num_groups, double* phis) {
+  size_t bins_per_row = bin_segments.size() - 1;
+  const int kBlockThreads = GPUTREESHAP_MAX_THREADS_PER_BLOCK;
+  const int warps_per_block = kBlockThreads / 32;
+  const int kRowsPerWarp = 100;
+  size_t warps_needed = bins_per_row * DivRoundUp(X.NumRows(), kRowsPerWarp);
+
+  const uint32_t grid_size = DivRoundUp(warps_needed, warps_per_block);
+
+  ShapInterventionalKernel<DatasetT, kBlockThreads, kRowsPerWarp>
+      <<<grid_size, kBlockThreads>>>(
+          X, R, bins_per_row, path_elements.data().get(),
+          bin_segments.data().get(), num_groups, phis);
+}
+
+template <typename PathVectorT, typename SizeVectorT, typename DeviceAllocatorT>
+void GetBinSegments(const PathVectorT& paths, const SizeVectorT& bin_map,
+                    SizeVectorT* bin_segments) {
+  DeviceAllocatorT alloc;
+  size_t num_bins =
+      thrust::reduce(thrust::cuda::par(alloc), bin_map.begin(), bin_map.end(),
+                     size_t(0), thrust::maximum<size_t>()) +
+      1;
+  bin_segments->resize(num_bins + 1, 0);
+  auto counting = thrust::make_counting_iterator(0llu);
+  auto d_paths = paths.data().get();
+  auto d_bin_segments = bin_segments->data().get();
+  auto d_bin_map = bin_map.data();
+  thrust::for_each_n(counting, paths.size(), [=] __device__(size_t idx) {
+    auto path_idx = d_paths[idx].path_idx;
+    atomicAdd(reinterpret_cast<unsigned long long*>(d_bin_segments) +  // NOLINT
+                  d_bin_map[path_idx],
+              1);
+  });
+  thrust::exclusive_scan(thrust::cuda::par(alloc), bin_segments->begin(),
+                         bin_segments->end(), bin_segments->begin());
+}
+
+struct DeduplicateKeyTransformOp {
+  template <typename SplitConditionT>
+  __device__ thrust::pair<size_t, int64_t> operator()(
+      const PathElement<SplitConditionT>& e) {
+    return {e.path_idx, e.feature_idx};
+  }
+};
+
+inline void CheckCuda(cudaError_t err) {
+  if (err != cudaSuccess) {
+    throw thrust::system_error(err, thrust::cuda_category());
+  }
+}
+
+template <typename Return>
+class DiscardOverload : public thrust::discard_iterator<Return> {
+ public:
+  using value_type = Return;  // NOLINT
+};
+
+template <typename PathVectorT, typename DeviceAllocatorT,
+          typename SplitConditionT>
+void DeduplicatePaths(PathVectorT* device_paths,
+                      PathVectorT* deduplicated_paths) {
+  DeviceAllocatorT alloc;
+  // Sort by feature
+  thrust::sort(thrust::cuda::par(alloc), device_paths->begin(),
+               device_paths->end(),
+               [=] __device__(const PathElement<SplitConditionT>& a,
+                              const PathElement<SplitConditionT>& b) {
+                 if (a.path_idx < b.path_idx) return true;
+                 if (b.path_idx < a.path_idx) return false;
+
+                 if (a.feature_idx < b.feature_idx) return true;
+                 if (b.feature_idx < a.feature_idx) return false;
+                 return false;
+               });
+
+  deduplicated_paths->resize(device_paths->size());
+
+  using Pair = thrust::pair<size_t, int64_t>;
+  auto key_transform = thrust::make_transform_iterator(
+      device_paths->begin(), DeduplicateKeyTransformOp());
+
+  thrust::device_vector<size_t> d_num_runs_out(1);
+  size_t* h_num_runs_out;
+  CheckCuda(cudaMallocHost(&h_num_runs_out, sizeof(size_t)));
+
+  auto combine = [] __device__(PathElement<SplitConditionT> a,
+                               PathElement<SplitConditionT> b) {
+    // Combine duplicate features
+    a.split_condition.Merge(b.split_condition);
+    a.zero_fraction *= b.zero_fraction;
+    return a;
+  };  // NOLINT
+  size_t temp_size = 0;
+  CheckCuda(cub::DeviceReduce::ReduceByKey(
+      nullptr, temp_size, key_transform, DiscardOverload<Pair>(),
+      device_paths->begin(), deduplicated_paths->begin(),
+      d_num_runs_out.begin(), combine, device_paths->size()));
+  using TempAlloc = RebindVector<char, DeviceAllocatorT>;
+  TempAlloc tmp(temp_size);
+  CheckCuda(cub::DeviceReduce::ReduceByKey(
+      tmp.data().get(), temp_size, key_transform, DiscardOverload<Pair>(),
+      device_paths->begin(), deduplicated_paths->begin(),
+      d_num_runs_out.begin(), combine, device_paths->size()));
+
+  CheckCuda(cudaMemcpy(h_num_runs_out, d_num_runs_out.data().get(),
+                       sizeof(size_t), cudaMemcpyDeviceToHost));
+  deduplicated_paths->resize(*h_num_runs_out);
+  CheckCuda(cudaFreeHost(h_num_runs_out));
+}
+
+template <typename PathVectorT, typename SplitConditionT, typename SizeVectorT,
+          typename DeviceAllocatorT>
+void SortPaths(PathVectorT* paths, const SizeVectorT& bin_map) {
+  auto d_bin_map = bin_map.data();
+  DeviceAllocatorT alloc;
+  thrust::sort(thrust::cuda::par(alloc), paths->begin(), paths->end(),
+               [=] __device__(const PathElement<SplitConditionT>& a,
+                              const PathElement<SplitConditionT>& b) {
+                 size_t a_bin = d_bin_map[a.path_idx];
+                 size_t b_bin = d_bin_map[b.path_idx];
+                 if (a_bin < b_bin) return true;
+                 if (b_bin < a_bin) return false;
+
+                 if (a.path_idx < b.path_idx) return true;
+                 if (b.path_idx < a.path_idx) return false;
+
+                 if (a.feature_idx < b.feature_idx) return true;
+                 if (b.feature_idx < a.feature_idx) return false;
+                 return false;
+               });
+}
+
+using kv = std::pair<size_t, int>;
+
+struct BFDCompare {
+  bool operator()(const kv& lhs, const kv& rhs) const {
+    if (lhs.second == rhs.second) {
+      return lhs.first < rhs.first;
+    }
+    return lhs.second < rhs.second;
+  }
+};
+
+// Best Fit Decreasing bin packing
+// Efficient O(nlogn) implementation with balanced tree using std::set
+template <typename IntVectorT>
+std::vector<size_t> BFDBinPacking(const IntVectorT& counts,
+                                  int bin_limit = 32) {
+  thrust::host_vector<int> counts_host(counts);
+  std::vector<kv> path_lengths(counts_host.size());
+  for (auto i = 0ull; i < counts_host.size(); i++) {
+    path_lengths[i] = {i, counts_host[i]};
+  }
+
+  std::sort(path_lengths.begin(), path_lengths.end(),
+            [&](const kv& a, const kv& b) {
+              std::greater<> op;
+              return op(a.second, b.second);
+            });
+
+  // map unique_id -> bin
+  std::vector<size_t> bin_map(counts_host.size());
+  std::set<kv, BFDCompare> bin_capacities;
+  bin_capacities.insert({bin_capacities.size(), bin_limit});
+  for (auto pair : path_lengths) {
+    int new_size = pair.second;
+    auto itr = bin_capacities.lower_bound({0, new_size});
+    // Does not fit in any bin
+    if (itr == bin_capacities.end()) {
+      size_t new_bin_idx = bin_capacities.size();
+      bin_capacities.insert({new_bin_idx, bin_limit - new_size});
+      bin_map[pair.first] = new_bin_idx;
+    } else {
+      kv entry = *itr;
+      entry.second -= new_size;
+      bin_map[pair.first] = entry.first;
+      bin_capacities.erase(itr);
+      bin_capacities.insert(entry);
+    }
+  }
+
+  return bin_map;
+}
+
+// First Fit Decreasing bin packing
+// Inefficient O(n^2) implementation
+template <typename IntVectorT>
+std::vector<size_t> FFDBinPacking(const IntVectorT& counts,
+                                  int bin_limit = 32) {
+  thrust::host_vector<int> counts_host(counts);
+  std::vector<kv> path_lengths(counts_host.size());
+  for (auto i = 0ull; i < counts_host.size(); i++) {
+    path_lengths[i] = {i, counts_host[i]};
+  }
+  std::sort(path_lengths.begin(), path_lengths.end(),
+            [&](const kv& a, const kv& b) {
+              std::greater<> op;
+              return op(a.second, b.second);
+            });
+
+  // map unique_id -> bin
+  std::vector<size_t> bin_map(counts_host.size());
+  std::vector<int> bin_capacities(path_lengths.size(), bin_limit);
+  for (auto pair : path_lengths) {
+    int new_size = pair.second;
+    for (auto j = 0ull; j < bin_capacities.size(); j++) {
+      int& capacity = bin_capacities[j];
+
+      if (capacity >= new_size) {
+        capacity -= new_size;
+        bin_map[pair.first] = j;
+        break;
+      }
+    }
+  }
+
+  return bin_map;
+}
+
+// Next Fit bin packing
+// O(n) implementation
+template <typename IntVectorT>
+std::vector<size_t> NFBinPacking(const IntVectorT& counts, int bin_limit = 32) {
+  thrust::host_vector<int> counts_host(counts);
+  std::vector<size_t> bin_map(counts_host.size());
+  size_t current_bin = 0;
+  int current_capacity = bin_limit;
+  for (auto i = 0ull; i < counts_host.size(); i++) {
+    int new_size = counts_host[i];
+    size_t path_idx = i;
+    if (new_size <= current_capacity) {
+      current_capacity -= new_size;
+      bin_map[path_idx] = current_bin;
+    } else {
+      current_capacity = bin_limit - new_size;
+      bin_map[path_idx] = ++current_bin;
+    }
+  }
+  return bin_map;
+}
+
+template <typename DeviceAllocatorT, typename SplitConditionT,
+          typename PathVectorT, typename LengthVectorT>
+void GetPathLengths(const PathVectorT& device_paths,
+                    LengthVectorT* path_lengths) {
+  path_lengths->resize(
+      static_cast<PathElement<SplitConditionT>>(device_paths.back()).path_idx +
+          1,
+      0);
+  auto counting = thrust::make_counting_iterator(0llu);
+  auto d_paths = device_paths.data().get();
+  auto d_lengths = path_lengths->data().get();
+  thrust::for_each_n(counting, device_paths.size(), [=] __device__(size_t idx) {
+    auto path_idx = d_paths[idx].path_idx;
+    atomicAdd(d_lengths + path_idx, 1ull);
+  });
+}
+
+struct PathTooLongOp {
+  __device__ size_t operator()(size_t length) { return length > 32; }
+};
+
+template <typename SplitConditionT>
+struct IncorrectVOp {
+  const PathElement<SplitConditionT>* paths;
+  __device__ size_t operator()(size_t idx) {
+    auto a = paths[idx - 1];
+    auto b = paths[idx];
+    return a.path_idx == b.path_idx && a.v != b.v;
+  }
+};
+
+template <typename DeviceAllocatorT, typename SplitConditionT,
+          typename PathVectorT, typename LengthVectorT>
+void ValidatePaths(const PathVectorT& device_paths,
+                   const LengthVectorT& path_lengths) {
+  DeviceAllocatorT alloc;
+  PathTooLongOp too_long_op;
+  auto invalid_length =
+      thrust::any_of(thrust::cuda::par(alloc), path_lengths.begin(),
+                     path_lengths.end(), too_long_op);
+
+  if (invalid_length) {
+    throw std::invalid_argument("Tree depth must be < 32");
+  }
+
+  IncorrectVOp<SplitConditionT> incorrect_v_op{device_paths.data().get()};
+  auto counting = thrust::counting_iterator<size_t>(0);
+  auto incorrect_v =
+      thrust::any_of(thrust::cuda::par(alloc), counting + 1,
+                     counting + device_paths.size(), incorrect_v_op);
+
+  if (incorrect_v) {
+    throw std::invalid_argument(
+        "Leaf value v should be the same across a single path");
+  }
+}
+
+template <typename DeviceAllocatorT, typename SplitConditionT,
+          typename PathVectorT, typename SizeVectorT>
+void PreprocessPaths(PathVectorT* device_paths, PathVectorT* deduplicated_paths,
+                     SizeVectorT* bin_segments) {
+  // Sort paths by length and feature
+  detail::DeduplicatePaths<PathVectorT, DeviceAllocatorT, SplitConditionT>(
+      device_paths, deduplicated_paths);
+  using int_vector = RebindVector<int, DeviceAllocatorT>;
+  int_vector path_lengths;
+  detail::GetPathLengths<DeviceAllocatorT, SplitConditionT>(*deduplicated_paths,
+                                                            &path_lengths);
+  SizeVectorT device_bin_map = detail::BFDBinPacking(path_lengths);
+  ValidatePaths<DeviceAllocatorT, SplitConditionT>(*deduplicated_paths,
+                                                   path_lengths);
+  detail::SortPaths<PathVectorT, SplitConditionT, SizeVectorT,
+                    DeviceAllocatorT>(deduplicated_paths, device_bin_map);
+  detail::GetBinSegments<PathVectorT, SizeVectorT, DeviceAllocatorT>(
+      *deduplicated_paths, device_bin_map, bin_segments);
+}
+
+struct PathIdxTransformOp {
+  template <typename SplitConditionT>
+  __device__ size_t operator()(const PathElement<SplitConditionT>& e) {
+    return e.path_idx;
+  }
+};
+
+struct GroupIdxTransformOp {
+  template <typename SplitConditionT>
+  __device__ size_t operator()(const PathElement<SplitConditionT>& e) {
+    return e.group;
+  }
+};
+
+struct BiasTransformOp {
+  template <typename SplitConditionT>
+  __device__ double operator()(const PathElement<SplitConditionT>& e) {
+    return e.zero_fraction * e.v;
+  }
+};
+
+// While it is possible to compute bias in the primary kernel, we do it here
+// using double precision to avoid numerical stability issues
+template <typename PathVectorT, typename DoubleVectorT,
+          typename DeviceAllocatorT, typename SplitConditionT>
+void ComputeBias(const PathVectorT& device_paths, DoubleVectorT* bias) {
+  using double_vector = thrust::device_vector<
+      double, typename DeviceAllocatorT::template rebind<double>::other>;
+  PathVectorT sorted_paths(device_paths);
+  DeviceAllocatorT alloc;
+  // Make sure groups are contiguous
+  thrust::sort(thrust::cuda::par(alloc), sorted_paths.begin(),
+               sorted_paths.end(),
+               [=] __device__(const PathElement<SplitConditionT>& a,
+                              const PathElement<SplitConditionT>& b) {
+                 if (a.group < b.group) return true;
+                 if (b.group < a.group) return false;
+
+                 if (a.path_idx < b.path_idx) return true;
+                 if (b.path_idx < a.path_idx) return false;
+
+                 return false;
+               });
+  // Combine zero fraction for all paths
+  auto path_key = thrust::make_transform_iterator(sorted_paths.begin(),
+                                                  PathIdxTransformOp());
+  PathVectorT combined(sorted_paths.size());
+  auto combined_out = thrust::reduce_by_key(
+      thrust::cuda ::par(alloc), path_key, path_key + sorted_paths.size(),
+      sorted_paths.begin(), thrust::make_discard_iterator(), combined.begin(),
+      thrust::equal_to<size_t>(),
+      [=] __device__(PathElement<SplitConditionT> a,
+                     const PathElement<SplitConditionT>& b) {
+        a.zero_fraction *= b.zero_fraction;
+        return a;
+      });
+  size_t num_paths = combined_out.second - combined.begin();
+  // Combine bias for each path, over each group
+  using size_vector = thrust::device_vector<
+      size_t, typename DeviceAllocatorT::template rebind<size_t>::other>;
+  size_vector keys_out(num_paths);
+  double_vector values_out(num_paths);
+  auto group_key =
+      thrust::make_transform_iterator(combined.begin(), GroupIdxTransformOp());
+  auto values =
+      thrust::make_transform_iterator(combined.begin(), BiasTransformOp());
+
+  auto out_itr = thrust::reduce_by_key(thrust::cuda::par(alloc), group_key,
+                                       group_key + num_paths, values,
+                                       keys_out.begin(), values_out.begin());
+
+  // Write result
+  size_t n = out_itr.first - keys_out.begin();
+  auto counting = thrust::make_counting_iterator(0llu);
+  auto d_keys_out = keys_out.data().get();
+  auto d_values_out = values_out.data().get();
+  auto d_bias = bias->data().get();
+  thrust::for_each_n(counting, n, [=] __device__(size_t idx) {
+    d_bias[d_keys_out[idx]] = d_values_out[idx];
+  });
+}
+
+};  // namespace detail
+
+/*!
+ * Compute feature contributions on the GPU given a set of unique paths through
+ * a tree ensemble and a dataset. Uses device memory proportional to the tree
+ * ensemble size.
+ *
+ * \exception std::invalid_argument Thrown when an invalid argument error
+ * condition occurs. \tparam  PathIteratorT     Thrust type iterator, may be
+ * thrust::device_ptr for device memory, or stl iterator/raw pointer for host
+ * memory. \tparam  PhiIteratorT      Thrust type iterator, may be
+ * thrust::device_ptr for device memory, or stl iterator/raw pointer for host
+ * memory. Value type must be floating point. \tparam  DatasetT User-specified
+ * dataset container. \tparam  DeviceAllocatorT  Optional thrust style
+ * allocator.
+ *
+ * \param X           Thin wrapper over a dataset allocated in device memory. X
+ * should be trivially copyable as a kernel parameter (i.e. contain only
+ * pointers to actual data) and must implement the methods
+ * NumRows()/NumCols()/GetElement(size_t row_idx, size_t col_idx) as __device__
+ * functions. GetElement may return NaN where the feature value is missing.
+ * \param begin       Iterator to paths, where separate paths are delineated by
+ *                    PathElement.path_idx. Each unique path should contain 1
+ * root with feature_idx = -1 and zero_fraction = 1.0. The ordering of path
+ * elements inside a unique path does not matter - the result will be the same.
+ * Paths may contain duplicate features. See the PathElement class for more
+ * information. \param end         Path end iterator. \param num_groups  Number
+ * of output groups. In multiclass classification the algorithm outputs feature
+ * contributions per output class. \param phis_begin  Begin iterator for output
+ * phis. \param phis_end    End iterator for output phis.
+ */
+template <typename DeviceAllocatorT = thrust::device_allocator<int>,
+          typename DatasetT, typename PathIteratorT, typename PhiIteratorT>
+void GPUTreeShap(DatasetT X, PathIteratorT begin, PathIteratorT end,
+                 size_t num_groups, PhiIteratorT phis_begin,
+                 PhiIteratorT phis_end) {
+  if (X.NumRows() == 0 || X.NumCols() == 0 || end - begin <= 0) return;
+
+  if (size_t(phis_end - phis_begin) <
+      X.NumRows() * (X.NumCols() + 1) * num_groups) {
+    throw std::invalid_argument(
+        "phis_out must be at least of size X.NumRows() * (X.NumCols() + 1) * "
+        "num_groups");
+  }
+
+  using size_vector = detail::RebindVector<size_t, DeviceAllocatorT>;
+  using double_vector = detail::RebindVector<double, DeviceAllocatorT>;
+  using path_vector = detail::RebindVector<
+      typename std::iterator_traits<PathIteratorT>::value_type,
+      DeviceAllocatorT>;
+  using split_condition =
+      typename std::iterator_traits<PathIteratorT>::value_type::split_type;
+
+  // Compute the global bias
+  double_vector temp_phi(phis_end - phis_begin, 0.0);
+  path_vector device_paths(begin, end);
+  double_vector bias(num_groups, 0.0);
+  detail::ComputeBias<path_vector, double_vector, DeviceAllocatorT,
+                      split_condition>(device_paths, &bias);
+  auto d_bias = bias.data().get();
+  auto d_temp_phi = temp_phi.data().get();
+  thrust::for_each_n(thrust::make_counting_iterator(0llu),
+                     X.NumRows() * num_groups, [=] __device__(size_t idx) {
+                       size_t group = idx % num_groups;
+                       size_t row_idx = idx / num_groups;
+                       d_temp_phi[IndexPhi(row_idx, num_groups, group,
+                                           X.NumCols(), X.NumCols())] +=
+                           d_bias[group];
+                     });
+
+  path_vector deduplicated_paths;
+  size_vector device_bin_segments;
+  detail::PreprocessPaths<DeviceAllocatorT, split_condition>(
+      &device_paths, &deduplicated_paths, &device_bin_segments);
+
+  detail::ComputeShap(X, device_bin_segments, deduplicated_paths, num_groups,
+                      temp_phi.data().get());
+  thrust::copy(temp_phi.begin(), temp_phi.end(), phis_begin);
+}
+
+/*!
+ * Compute feature interaction contributions on the GPU given a set of unique
+ * paths through a tree ensemble and a dataset. Uses device memory
+ * proportional to the tree ensemble size.
+ *
+ * \exception std::invalid_argument Thrown when an invalid argument error
+ *                                  condition occurs.
+ * \tparam  DeviceAllocatorT  Optional thrust style allocator.
+ * \tparam  DatasetT          User-specified dataset container.
+ * \tparam  PathIteratorT     Thrust type iterator, may be thrust::device_ptr
+ *                            for device memory, or stl iterator/raw pointer for
+ *                            host memory.
+ * \tparam  PhiIteratorT      Thrust type iterator, may be thrust::device_ptr
+ *                            for device memory, or stl iterator/raw pointer for
+ *                            host memory. Value type must be floating point.
+ *
+ * \param X           Thin wrapper over a dataset allocated in device memory. X
+ *                    should be trivially copyable as a kernel parameter (i.e.
+ *                    contain only pointers to actual data) and must implement
+ *                    the methods NumRows()/NumCols()/GetElement(size_t row_idx,
+ *                    size_t col_idx) as __device__ functions. GetElement may
+ *                    return NaN where the feature value is missing.
+ * \param begin       Iterator to paths, where separate paths are delineated by
+ *                    PathElement.path_idx. Each unique path should contain 1
+ *                    root with feature_idx = -1 and zero_fraction = 1.0. The
+ *                    ordering of path elements inside a unique path does not
+ *                    matter - the result will be the same. Paths may contain
+ *                    duplicate features. See the PathElement class for more
+ *                    information.
+ * \param end         Path end iterator.
+ * \param num_groups  Number of output groups. In multiclass classification the
+ *                    algorithm outputs feature contributions per output class.
+ * \param phis_begin  Begin iterator for output phis.
+ * \param phis_end    End iterator for output phis.
+ */
+template <typename DeviceAllocatorT = thrust::device_allocator<int>,
+          typename DatasetT, typename PathIteratorT, typename PhiIteratorT>
+void GPUTreeShapInteractions(DatasetT X, PathIteratorT begin, PathIteratorT end,
+                             size_t num_groups, PhiIteratorT phis_begin,
+                             PhiIteratorT phis_end) {
+  if (X.NumRows() == 0 || X.NumCols() == 0 || end - begin <= 0) return;
+  if (size_t(phis_end - phis_begin) <
+      X.NumRows() * (X.NumCols() + 1) * (X.NumCols() + 1) * num_groups) {
+    throw std::invalid_argument(
+        "phis_out must be at least of size X.NumRows() * (X.NumCols() + 1)  * "
+        "(X.NumCols() + 1) * "
+        "num_groups");
+  }
+
+  using size_vector = detail::RebindVector<size_t, DeviceAllocatorT>;
+  using double_vector = detail::RebindVector<double, DeviceAllocatorT>;
+  using path_vector = detail::RebindVector<
+      typename std::iterator_traits<PathIteratorT>::value_type,
+      DeviceAllocatorT>;
+  using split_condition =
+      typename std::iterator_traits<PathIteratorT>::value_type::split_type;
+
+  // Compute the global bias
+  double_vector temp_phi(phis_end - phis_begin, 0.0);
+  path_vector device_paths(begin, end);
+  double_vector bias(num_groups, 0.0);
+  detail::ComputeBias<path_vector, double_vector, DeviceAllocatorT,
+                      split_condition>(device_paths, &bias);
+  auto d_bias = bias.data().get();
+  auto d_temp_phi = temp_phi.data().get();
+  thrust::for_each_n(
+      thrust::make_counting_iterator(0llu), X.NumRows() * num_groups,
+      [=] __device__(size_t idx) {
+        size_t group = idx % num_groups;
+        size_t row_idx = idx / num_groups;
+        d_temp_phi[IndexPhiInteractions(row_idx, num_groups, group, X.NumCols(),
+                                        X.NumCols(), X.NumCols())] +=
+            d_bias[group];
+      });
+
+  path_vector deduplicated_paths;
+  size_vector device_bin_segments;
+  detail::PreprocessPaths<DeviceAllocatorT, split_condition>(
+      &device_paths, &deduplicated_paths, &device_bin_segments);
+
+  detail::ComputeShapInteractions(X, device_bin_segments, deduplicated_paths,
+                                  num_groups, temp_phi.data().get());
+  thrust::copy(temp_phi.begin(), temp_phi.end(), phis_begin);
+}
+
+/*!
+ * Compute feature interaction contributions using the Shapley Taylor index on
+ * the GPU, given a set of unique paths through a tree ensemble and a dataset.
+ * Uses device memory proportional to the tree ensemble size.
+ *
+ * \exception std::invalid_argument Thrown when an invalid argument error
+ *                                  condition occurs.
+ * \tparam  PhiIteratorT      Thrust type iterator, may be thrust::device_ptr
+ *                            for device memory, or stl iterator/raw pointer for
+ *                            host memory. Value type must be floating point.
+ * \tparam  PathIteratorT     Thrust type iterator, may be thrust::device_ptr
+ *                            for device memory, or stl iterator/raw pointer for
+ *                            host memory.
+ * \tparam  DatasetT          User-specified dataset container.
+ * \tparam  DeviceAllocatorT  Optional thrust style allocator.
+ *
+ * \param X           Thin wrapper over a dataset allocated in device memory. X
+ *                    should be trivially copyable as a kernel parameter (i.e.
+ *                    contain only pointers to actual data) and must implement
+ *                    the methods NumRows()/NumCols()/GetElement(size_t row_idx,
+ *                    size_t col_idx) as __device__ functions. GetElement may
+ *                    return NaN where the feature value is missing.
+ * \param begin       Iterator to paths, where separate paths are delineated by
+ *                    PathElement.path_idx. Each unique path should contain 1
+ *                    root with feature_idx = -1 and zero_fraction = 1.0. The
+ *                    ordering of path elements inside a unique path does not
+ *                    matter - the result will be the same. Paths may contain
+ *                    duplicate features. See the PathElement class for more
+ *                    information.
+ * \param end         Path end iterator.
+ * \param num_groups  Number of output groups. In multiclass classification the
+ *                    algorithm outputs feature contributions per output class.
+ * \param phis_begin  Begin iterator for output phis.
+ * \param phis_end    End iterator for output phis.
+ */
+template <typename DeviceAllocatorT = thrust::device_allocator<int>,
+          typename DatasetT, typename PathIteratorT, typename PhiIteratorT>
+void GPUTreeShapTaylorInteractions(DatasetT X, PathIteratorT begin,
+                                   PathIteratorT end, size_t num_groups,
+                                   PhiIteratorT phis_begin,
+                                   PhiIteratorT phis_end) {
+  using phis_type = typename std::iterator_traits<PhiIteratorT>::value_type;
+  static_assert(std::is_floating_point<phis_type>::value,
+                "Phis type must be floating point");
+
+  if (X.NumRows() == 0 || X.NumCols() == 0 || end - begin <= 0) return;
+
+  if (size_t(phis_end - phis_begin) <
+      X.NumRows() * (X.NumCols() + 1) * (X.NumCols() + 1) * num_groups) {
+    throw std::invalid_argument(
+        "phis_out must be at least of size X.NumRows() * (X.NumCols() + 1)  * "
+        "(X.NumCols() + 1) * "
+        "num_groups");
+  }
+
+  using size_vector = detail::RebindVector<size_t, DeviceAllocatorT>;
+  using double_vector = detail::RebindVector<double, DeviceAllocatorT>;
+  using path_vector = detail::RebindVector<
+      typename std::iterator_traits<PathIteratorT>::value_type,
+      DeviceAllocatorT>;
+  using split_condition =
+      typename std::iterator_traits<PathIteratorT>::value_type::split_type;
+
+  // Compute the global bias
+  double_vector temp_phi(phis_end - phis_begin, 0.0);
+  path_vector device_paths(begin, end);
+  double_vector bias(num_groups, 0.0);
+  detail::ComputeBias<path_vector, double_vector, DeviceAllocatorT,
+                      split_condition>(device_paths, &bias);
+  auto d_bias = bias.data().get();
+  auto d_temp_phi = temp_phi.data().get();
+  thrust::for_each_n(
+      thrust::make_counting_iterator(0llu), X.NumRows() * num_groups,
+      [=] __device__(size_t idx) {
+        size_t group = idx % num_groups;
+        size_t row_idx = idx / num_groups;
+        d_temp_phi[IndexPhiInteractions(row_idx, num_groups, group, X.NumCols(),
+                                        X.NumCols(), X.NumCols())] +=
+            d_bias[group];
+      });
+
+  path_vector deduplicated_paths;
+  size_vector device_bin_segments;
+  detail::PreprocessPaths<DeviceAllocatorT, split_condition>(
+      &device_paths, &deduplicated_paths, &device_bin_segments);
+
+  detail::ComputeShapTaylorInteractions(X, device_bin_segments,
+                                        deduplicated_paths, num_groups,
+                                        temp_phi.data().get());
+  thrust::copy(temp_phi.begin(), temp_phi.end(), phis_begin);
+}
+
+/*!
+ * Compute feature contributions on the GPU given a set of unique paths through a tree ensemble
+ * and a dataset. Uses device memory proportional to the tree ensemble size. This variant
+ * implements the interventional tree shap algorithm described here:
+ * https://drafts.distill.pub/HughChen/its_blog/
+ *
+ * It requires a background dataset R.
+ *
+ * \exception std::invalid_argument Thrown when an invalid argument error condition occurs.
+ * \tparam  DeviceAllocatorT  Optional thrust style allocator.
+ * \tparam  DatasetT          User-specified dataset container.
+ * \tparam  PathIteratorT     Thrust type iterator, may be thrust::device_ptr for device memory, or
+ *                            stl iterator/raw pointer for host memory.
+ *
+ * \param X           Thin wrapper over a dataset allocated in device memory. X should be trivially
+ *                    copyable as a kernel parameter (i.e. contain only pointers to actual data) and
+ *                    must implement the methods NumRows()/NumCols()/GetElement(size_t row_idx,
+ *                    size_t col_idx) as __device__ functions. GetElement may return NaN where the
+ *                    feature value is missing.
+ * \param R           Background dataset.
+ * \param begin       Iterator to paths, where separate paths are delineated by
+ *                    PathElement.path_idx. Each unique path should contain 1 root with feature_idx =
+ *                    -1 and zero_fraction = 1.0. The ordering of path elements inside a unique path
+ *                    does not matter - the result will be the same. Paths may contain duplicate
+ *                    features. See the PathElement class for more information.
+ * \param end         Path end iterator.
+ * \param num_groups  Number of output groups. In multiclass classification the algorithm outputs
+ *                    feature contributions per output class.
+ * \param phis_begin  Begin iterator for output phis.
+ * \param phis_end    End iterator for output phis.
+ */
+template <typename DeviceAllocatorT = thrust::device_allocator<int>,
+          typename DatasetT, typename PathIteratorT, typename PhiIteratorT>
+void GPUTreeShapInterventional(DatasetT X, DatasetT R, PathIteratorT begin,
+                               PathIteratorT end, size_t num_groups,
+                               PhiIteratorT phis_begin, PhiIteratorT phis_end) {
+  if (X.NumRows() == 0 || X.NumCols() == 0 || end - begin <= 0) return;
+
+  if (size_t(phis_end - phis_begin) <
+      X.NumRows() * (X.NumCols() + 1) * num_groups) {
+    throw std::invalid_argument(
+        "phis_out must be at least of size X.NumRows() * (X.NumCols() + 1) * "
+        "num_groups");
+  }
+
+  using size_vector = detail::RebindVector<size_t, DeviceAllocatorT>;
+  using double_vector = detail::RebindVector<double, DeviceAllocatorT>;
+  using path_vector = detail::RebindVector<
+      typename std::iterator_traits<PathIteratorT>::value_type,
+      DeviceAllocatorT>;
+  using split_condition =
+      typename std::iterator_traits<PathIteratorT>::value_type::split_type;
+
+  double_vector temp_phi(phis_end - phis_begin, 0.0);
+  path_vector device_paths(begin, end);
+
+  path_vector deduplicated_paths;
+  size_vector device_bin_segments;
+  detail::PreprocessPaths<DeviceAllocatorT, split_condition>(
+      &device_paths, &deduplicated_paths, &device_bin_segments);
+  detail::ComputeShapInterventional(X, R, device_bin_segments,
+                                    deduplicated_paths, num_groups,
+                                    temp_phi.data().get());
+  thrust::copy(temp_phi.begin(), temp_phi.end(), phis_begin);
+}
+}  // namespace gpu_treeshap
diff --git a/lib/shap/cext/tree_shap.h b/lib/shap/cext/tree_shap.h
new file mode 100644
index 0000000000000000000000000000000000000000..eb5eef3c567f36397f48a75b16136010910d2d76
--- /dev/null
+++ b/lib/shap/cext/tree_shap.h
@@ -0,0 +1,1460 @@
+/**
+ * Fast recursive computation of SHAP values in trees.
+ * See https://arxiv.org/abs/1802.03888 for details.
+ *
+ * Scott Lundberg, 2018 (independent algorithm courtesy of Hugh Chen 2018)
+ */
+
+#include <algorithm>
+#include <iostream>
+#include <fstream>
+#include <stdio.h>
+#include <cmath>
+#include <ctime>
+#if defined(_WIN32) || defined(WIN32)
+    #include <malloc.h>
+#elif defined(__MVS__)
+    #include <stdlib.h>
+#else
+    #include <alloca.h>
+#endif
+using namespace std;
+
+typedef double tfloat;
+typedef tfloat (* transform_f)(const tfloat margin, const tfloat y);
+
+namespace FEATURE_DEPENDENCE {
+    const unsigned independent = 0;
+    const unsigned tree_path_dependent = 1;
+    const unsigned global_path_dependent = 2;
+}
+
+struct TreeEnsemble {
+    int *children_left;
+    int *children_right;
+    int *children_default;
+    int *features;
+    tfloat *thresholds;
+    tfloat *values;
+    tfloat *node_sample_weights;
+    unsigned max_depth;
+    unsigned tree_limit;
+    tfloat *base_offset;
+    unsigned max_nodes;
+    unsigned num_outputs;
+
+    TreeEnsemble() {}
+    TreeEnsemble(int *children_left, int *children_right, int *children_default, int *features,
+                 tfloat *thresholds, tfloat *values, tfloat *node_sample_weights,
+                 unsigned max_depth, unsigned tree_limit, tfloat *base_offset,
+                 unsigned max_nodes, unsigned num_outputs) :
+        children_left(children_left), children_right(children_right),
+        children_default(children_default), features(features), thresholds(thresholds),
+        values(values), node_sample_weights(node_sample_weights),
+        max_depth(max_depth), tree_limit(tree_limit),
+        base_offset(base_offset), max_nodes(max_nodes), num_outputs(num_outputs) {}
+
+    void get_tree(TreeEnsemble &tree, const unsigned i) const {
+        const unsigned d = i * max_nodes;
+
+        tree.children_left = children_left + d;
+        tree.children_right = children_right + d;
+        tree.children_default = children_default + d;
+        tree.features = features + d;
+        tree.thresholds = thresholds + d;
+        tree.values = values + d * num_outputs;
+        tree.node_sample_weights = node_sample_weights + d;
+        tree.max_depth = max_depth;
+        tree.tree_limit = 1;
+        tree.base_offset = base_offset;
+        tree.max_nodes = max_nodes;
+        tree.num_outputs = num_outputs;
+    }
+
+    bool is_leaf(unsigned pos)const {
+        return children_left[pos] < 0;
+    }
+
+    void allocate(unsigned tree_limit_in, unsigned max_nodes_in, unsigned num_outputs_in) {
+        tree_limit = tree_limit_in;
+        max_nodes = max_nodes_in;
+        num_outputs = num_outputs_in;
+        children_left = new int[tree_limit * max_nodes];
+        children_right = new int[tree_limit * max_nodes];
+        children_default = new int[tree_limit * max_nodes];
+        features = new int[tree_limit * max_nodes];
+        thresholds = new tfloat[tree_limit * max_nodes];
+        values = new tfloat[tree_limit * max_nodes * num_outputs];
+        node_sample_weights = new tfloat[tree_limit * max_nodes];
+    }
+
+    void free() {
+        delete[] children_left;
+        delete[] children_right;
+        delete[] children_default;
+        delete[] features;
+        delete[] thresholds;
+        delete[] values;
+        delete[] node_sample_weights;
+    }
+};
+
+struct ExplanationDataset {
+    tfloat *X;
+    bool *X_missing;
+    tfloat *y;
+    tfloat *R;
+    bool *R_missing;
+    unsigned num_X;
+    unsigned M;
+    unsigned num_R;
+
+    ExplanationDataset() {}
+    ExplanationDataset(tfloat *X, bool *X_missing, tfloat *y, tfloat *R, bool *R_missing, unsigned num_X,
+                       unsigned M, unsigned num_R) :
+        X(X), X_missing(X_missing), y(y), R(R), R_missing(R_missing), num_X(num_X), M(M), num_R(num_R) {}
+
+    void get_x_instance(ExplanationDataset &instance, const unsigned i) const {
+        instance.M = M;
+        instance.X = X + i * M;
+        instance.X_missing = X_missing + i * M;
+        instance.num_X = 1;
+    }
+};
+
+
+// data we keep about our decision path
+// note that pweight is included for convenience and is not tied with the other attributes
+// the pweight of the i'th path element is the permutation weight of paths with i-1 ones in them
+struct PathElement {
+    int feature_index;
+    tfloat zero_fraction;
+    tfloat one_fraction;
+    tfloat pweight;
+    PathElement() {}
+    PathElement(int i, tfloat z, tfloat o, tfloat w) :
+        feature_index(i), zero_fraction(z), one_fraction(o), pweight(w) {}
+};
+
+inline tfloat logistic_transform(const tfloat margin, const tfloat y) {
+    return 1 / (1 + exp(-margin));
+}
+
+inline tfloat logistic_nlogloss_transform(const tfloat margin, const tfloat y) {
+    return log(1 + exp(margin)) - y * margin; // y is in {0, 1}
+}
+
+inline tfloat squared_loss_transform(const tfloat margin, const tfloat y) {
+    return (margin - y) * (margin - y);
+}
+
+namespace MODEL_TRANSFORM {
+    const unsigned identity = 0;
+    const unsigned logistic = 1;
+    const unsigned logistic_nlogloss = 2;
+    const unsigned squared_loss = 3;
+}
+
+inline transform_f get_transform(unsigned model_transform) {
+    transform_f transform = NULL;
+    switch (model_transform) {
+        case MODEL_TRANSFORM::logistic:
+            transform = logistic_transform;
+            break;
+
+        case MODEL_TRANSFORM::logistic_nlogloss:
+            transform = logistic_nlogloss_transform;
+            break;
+
+        case MODEL_TRANSFORM::squared_loss:
+            transform = squared_loss_transform;
+            break;
+    }
+
+    return transform;
+}
+
+inline tfloat *tree_predict(unsigned i, const TreeEnsemble &trees, const tfloat *x, const bool *x_missing) {
+    const unsigned offset = i * trees.max_nodes;
+    unsigned node = 0;
+    while (true) {
+        const unsigned pos = offset + node;
+        const unsigned feature = trees.features[pos];
+
+        // we hit a leaf so return a pointer to the values
+        if (trees.is_leaf(pos)) {
+            return trees.values + pos * trees.num_outputs;
+        }
+
+        // otherwise we are at an internal node and need to recurse
+        if (x_missing[feature]) {
+            node = trees.children_default[pos];
+        } else if (x[feature] <= trees.thresholds[pos]) {
+            node = trees.children_left[pos];
+        } else {
+            node = trees.children_right[pos];
+        }
+    }
+}
+
+inline void dense_tree_predict(tfloat *out, const TreeEnsemble &trees, const ExplanationDataset &data, unsigned model_transform) {
+    tfloat *row_out = out;
+    const tfloat *x = data.X;
+    const bool *x_missing = data.X_missing;
+
+    // see what transform (if any) we have
+    transform_f transform = get_transform(model_transform);
+
+    for (unsigned i = 0; i < data.num_X; ++i) {
+
+        // add the base offset
+        for (unsigned k = 0; k < trees.num_outputs; ++k) {
+            row_out[k] += trees.base_offset[k];
+        }
+
+        // add the leaf values from each tree
+        for (unsigned j = 0; j < trees.tree_limit; ++j) {
+            const tfloat *leaf_value = tree_predict(j, trees, x, x_missing);
+
+            for (unsigned k = 0; k < trees.num_outputs; ++k) {
+                row_out[k] += leaf_value[k];
+            }
+        }
+
+        // apply any needed transform
+        if (transform != NULL) {
+            const tfloat y_i = data.y == NULL ? 0 : data.y[i];
+            for (unsigned k = 0; k < trees.num_outputs; ++k) {
+                row_out[k] = transform(row_out[k], y_i);
+            }
+        }
+
+        x += data.M;
+        x_missing += data.M;
+        row_out += trees.num_outputs;
+    }
+}
+
+inline void tree_update_weights(unsigned i, TreeEnsemble &trees, const tfloat *x, const bool *x_missing) {
+    const unsigned offset = i * trees.max_nodes;
+    unsigned node = 0;
+    while (true) {
+        const unsigned pos = offset + node;
+        const unsigned feature = trees.features[pos];
+
+        // Record that a sample passed through this node
+        trees.node_sample_weights[pos] += 1.0;
+
+        // we hit a leaf so return a pointer to the values
+        if (trees.children_left[pos] < 0) break;
+
+        // otherwise we are at an internal node and need to recurse
+        if (x_missing[feature]) {
+            node = trees.children_default[pos];
+        } else if (x[feature] <= trees.thresholds[pos]) {
+            node = trees.children_left[pos];
+        } else {
+            node = trees.children_right[pos];
+        }
+    }
+}
+
+inline void dense_tree_update_weights(TreeEnsemble &trees, const ExplanationDataset &data) {
+    const tfloat *x = data.X;
+    const bool *x_missing = data.X_missing;
+
+    for (unsigned i = 0; i < data.num_X; ++i) {
+
+        // add the leaf values from each tree
+        for (unsigned j = 0; j < trees.tree_limit; ++j) {
+            tree_update_weights(j, trees, x, x_missing);
+        }
+
+        x += data.M;
+        x_missing += data.M;
+    }
+}
+
+inline void tree_saabas(tfloat *out, const TreeEnsemble &tree, const ExplanationDataset &data) {
+    unsigned curr_node = 0;
+    unsigned next_node = 0;
+    while (true) {
+
+        // we hit a leaf and are done
+        if (tree.children_left[curr_node] < 0) return;
+
+        // otherwise we are at an internal node and need to recurse
+        const unsigned feature = tree.features[curr_node];
+        if (data.X_missing[feature]) {
+            next_node = tree.children_default[curr_node];
+        } else if (data.X[feature] <= tree.thresholds[curr_node]) {
+            next_node = tree.children_left[curr_node];
+        } else {
+            next_node = tree.children_right[curr_node];
+        }
+
+        // assign credit to this feature as the difference in values at the current node vs. the next node
+        for (unsigned i = 0; i < tree.num_outputs; ++i) {
+            out[feature * tree.num_outputs + i] += tree.values[next_node * tree.num_outputs + i] - tree.values[curr_node * tree.num_outputs + i];
+        }
+
+        curr_node = next_node;
+    }
+}
+
+/**
+ * This runs Tree SHAP with a per tree path conditional dependence assumption.
+ */
+inline void dense_tree_saabas(tfloat *out_contribs, const TreeEnsemble& trees, const ExplanationDataset &data) {
+    tfloat *instance_out_contribs;
+    TreeEnsemble tree;
+    ExplanationDataset instance;
+
+    // build explanation for each sample
+    for (unsigned i = 0; i < data.num_X; ++i) {
+        instance_out_contribs = out_contribs + i * (data.M + 1) * trees.num_outputs;
+        data.get_x_instance(instance, i);
+
+        // aggregate the effect of explaining each tree
+        // (this works because of the linearity property of Shapley values)
+        for (unsigned j = 0; j < trees.tree_limit; ++j) {
+            trees.get_tree(tree, j);
+            tree_saabas(instance_out_contribs, tree, instance);
+        }
+
+        // apply the base offset to the bias term
+        for (unsigned j = 0; j < trees.num_outputs; ++j) {
+            instance_out_contribs[data.M * trees.num_outputs + j] += trees.base_offset[j];
+        }
+    }
+}
+
+
+// extend our decision path with a fraction of one and zero extensions
+inline void extend_path(PathElement *unique_path, unsigned unique_depth,
+                        tfloat zero_fraction, tfloat one_fraction, int feature_index) {
+    unique_path[unique_depth].feature_index = feature_index;
+    unique_path[unique_depth].zero_fraction = zero_fraction;
+    unique_path[unique_depth].one_fraction = one_fraction;
+    unique_path[unique_depth].pweight = (unique_depth == 0 ? 1.0f : 0.0f);
+    for (int i = unique_depth - 1; i >= 0; i--) {
+        unique_path[i + 1].pweight += one_fraction * unique_path[i].pweight * (i + 1)
+                                      / static_cast<tfloat>(unique_depth + 1);
+        unique_path[i].pweight = zero_fraction * unique_path[i].pweight * (unique_depth - i)
+                                 / static_cast<tfloat>(unique_depth + 1);
+    }
+}
+
+// undo a previous extension of the decision path
+inline void unwind_path(PathElement *unique_path, unsigned unique_depth, unsigned path_index) {
+    const tfloat one_fraction = unique_path[path_index].one_fraction;
+    const tfloat zero_fraction = unique_path[path_index].zero_fraction;
+    tfloat next_one_portion = unique_path[unique_depth].pweight;
+
+    for (int i = unique_depth - 1; i >= 0; --i) {
+        if (one_fraction != 0) {
+            const tfloat tmp = unique_path[i].pweight;
+            unique_path[i].pweight = next_one_portion * (unique_depth + 1)
+                                     / static_cast<tfloat>((i + 1) * one_fraction);
+            next_one_portion = tmp - unique_path[i].pweight * zero_fraction * (unique_depth - i)
+                               / static_cast<tfloat>(unique_depth + 1);
+        } else {
+            unique_path[i].pweight = (unique_path[i].pweight * (unique_depth + 1))
+                                     / static_cast<tfloat>(zero_fraction * (unique_depth - i));
+        }
+    }
+
+    for (unsigned i = path_index; i < unique_depth; ++i) {
+        unique_path[i].feature_index = unique_path[i+1].feature_index;
+        unique_path[i].zero_fraction = unique_path[i+1].zero_fraction;
+        unique_path[i].one_fraction = unique_path[i+1].one_fraction;
+    }
+}
+
+// determine what the total permutation weight would be if
+// we unwound a previous extension in the decision path
+inline tfloat unwound_path_sum(const PathElement *unique_path, unsigned unique_depth,
+                               unsigned path_index) {
+    const tfloat one_fraction = unique_path[path_index].one_fraction;
+    const tfloat zero_fraction = unique_path[path_index].zero_fraction;
+    tfloat next_one_portion = unique_path[unique_depth].pweight;
+    tfloat total = 0;
+
+    if (one_fraction != 0) {
+        for (int i = unique_depth - 1; i >= 0; --i) {
+            const tfloat tmp = next_one_portion / static_cast<tfloat>((i + 1) * one_fraction);
+            total += tmp;
+            next_one_portion = unique_path[i].pweight - tmp * zero_fraction * (unique_depth - i);
+        }
+    } else {
+        for (int i = unique_depth - 1; i >= 0; --i) {
+            total += unique_path[i].pweight / (zero_fraction * (unique_depth - i));
+        }
+    }
+    return total * (unique_depth + 1);
+}
+
+// recursive computation of SHAP values for a decision tree
+inline void tree_shap_recursive(const unsigned num_outputs, const int *children_left,
+                                const int *children_right,
+                                const int *children_default, const int *features,
+                                const tfloat *thresholds, const tfloat *values,
+                                const tfloat *node_sample_weight,
+                                const tfloat *x, const bool *x_missing, tfloat *phi,
+                                unsigned node_index, unsigned unique_depth,
+                                PathElement *parent_unique_path, tfloat parent_zero_fraction,
+                                tfloat parent_one_fraction, int parent_feature_index,
+                                int condition, unsigned condition_feature,
+                                tfloat condition_fraction) {
+
+    // stop if we have no weight coming down to us
+    if (condition_fraction == 0) return;
+
+    // extend the unique path
+    PathElement *unique_path = parent_unique_path + unique_depth + 1;
+    std::copy(parent_unique_path, parent_unique_path + unique_depth + 1, unique_path);
+
+    if (condition == 0 || condition_feature != static_cast<unsigned>(parent_feature_index)) {
+        extend_path(unique_path, unique_depth, parent_zero_fraction,
+                    parent_one_fraction, parent_feature_index);
+    }
+    const unsigned split_index = features[node_index];
+
+    // leaf node
+    if (children_right[node_index] < 0) {
+        for (unsigned i = 1; i <= unique_depth; ++i) {
+            const tfloat w = unwound_path_sum(unique_path, unique_depth, i);
+            const PathElement &el = unique_path[i];
+            const unsigned phi_offset = el.feature_index * num_outputs;
+            const unsigned values_offset = node_index * num_outputs;
+            const tfloat scale = w * (el.one_fraction - el.zero_fraction) * condition_fraction;
+            for (unsigned j = 0; j < num_outputs; ++j) {
+                phi[phi_offset + j] += scale * values[values_offset + j];
+            }
+        }
+
+    // internal node
+    } else {
+        // find which branch is "hot" (meaning x would follow it)
+        unsigned hot_index = 0;
+        if (x_missing[split_index]) {
+            hot_index = children_default[node_index];
+        } else if (x[split_index] <= thresholds[node_index]) {
+            hot_index = children_left[node_index];
+        } else {
+            hot_index = children_right[node_index];
+        }
+        const unsigned cold_index = (static_cast<int>(hot_index) == children_left[node_index] ?
+                                        children_right[node_index] : children_left[node_index]);
+        const tfloat w = node_sample_weight[node_index];
+        const tfloat hot_zero_fraction = node_sample_weight[hot_index] / w;
+        const tfloat cold_zero_fraction = node_sample_weight[cold_index] / w;
+        tfloat incoming_zero_fraction = 1;
+        tfloat incoming_one_fraction = 1;
+
+        // see if we have already split on this feature,
+        // if so we undo that split so we can redo it for this node
+        unsigned path_index = 0;
+        for (; path_index <= unique_depth; ++path_index) {
+            if (static_cast<unsigned>(unique_path[path_index].feature_index) == split_index) break;
+        }
+        if (path_index != unique_depth + 1) {
+            incoming_zero_fraction = unique_path[path_index].zero_fraction;
+            incoming_one_fraction = unique_path[path_index].one_fraction;
+            unwind_path(unique_path, unique_depth, path_index);
+            unique_depth -= 1;
+        }
+
+        // divide up the condition_fraction among the recursive calls
+        tfloat hot_condition_fraction = condition_fraction;
+        tfloat cold_condition_fraction = condition_fraction;
+        if (condition > 0 && split_index == condition_feature) {
+            cold_condition_fraction = 0;
+            unique_depth -= 1;
+        } else if (condition < 0 && split_index == condition_feature) {
+            hot_condition_fraction *= hot_zero_fraction;
+            cold_condition_fraction *= cold_zero_fraction;
+            unique_depth -= 1;
+        }
+
+        tree_shap_recursive(
+            num_outputs, children_left, children_right, children_default, features, thresholds, values,
+            node_sample_weight, x, x_missing, phi, hot_index, unique_depth + 1, unique_path,
+            hot_zero_fraction * incoming_zero_fraction, incoming_one_fraction,
+            split_index, condition, condition_feature, hot_condition_fraction
+        );
+
+        tree_shap_recursive(
+            num_outputs, children_left, children_right, children_default, features, thresholds, values,
+            node_sample_weight, x, x_missing, phi, cold_index, unique_depth + 1, unique_path,
+            cold_zero_fraction * incoming_zero_fraction, 0,
+            split_index, condition, condition_feature, cold_condition_fraction
+        );
+    }
+}
+
+inline int compute_expectations(TreeEnsemble &tree, int i = 0, int depth = 0) {
+    unsigned max_depth = 0;
+
+    if (tree.children_right[i] >= 0) {
+        const unsigned li = tree.children_left[i];
+        const unsigned ri = tree.children_right[i];
+        const unsigned depth_left = compute_expectations(tree, li, depth + 1);
+        const unsigned depth_right = compute_expectations(tree, ri, depth + 1);
+        const tfloat left_weight = tree.node_sample_weights[li];
+        const tfloat right_weight = tree.node_sample_weights[ri];
+        const unsigned li_offset = li * tree.num_outputs;
+        const unsigned ri_offset = ri * tree.num_outputs;
+        const unsigned i_offset = i * tree.num_outputs;
+        for (unsigned j = 0; j < tree.num_outputs; ++j) {
+            if ((left_weight == 0) && (right_weight == 0)) {
+                tree.values[i_offset + j] = 0.0;
+            } else {
+                const tfloat v = (left_weight * tree.values[li_offset + j] + right_weight * tree.values[ri_offset + j]) / (left_weight + right_weight);
+                tree.values[i_offset + j] = v;
+            }
+        }
+        max_depth = std::max(depth_left, depth_right) + 1;
+    }
+
+    if (depth == 0) tree.max_depth = max_depth;
+
+    return max_depth;
+}
+
+inline void tree_shap(const TreeEnsemble& tree, const ExplanationDataset &data,
+                      tfloat *out_contribs, int condition, unsigned condition_feature) {
+
+    // update the reference value with the expected value of the tree's predictions
+    if (condition == 0) {
+        for (unsigned j = 0; j < tree.num_outputs; ++j) {
+            out_contribs[data.M * tree.num_outputs + j] += tree.values[j];
+        }
+    }
+
+    // Pre-allocate space for the unique path data
+    const unsigned maxd = tree.max_depth + 2; // need a bit more space than the max depth
+    PathElement *unique_path_data = new PathElement[(maxd * (maxd + 1)) / 2];
+
+    tree_shap_recursive(
+        tree.num_outputs, tree.children_left, tree.children_right, tree.children_default,
+        tree.features, tree.thresholds, tree.values, tree.node_sample_weights, data.X,
+        data.X_missing, out_contribs, 0, 0, unique_path_data, 1, 1, -1, condition,
+        condition_feature, 1
+    );
+
+    delete[] unique_path_data;
+}
+
+
+inline unsigned build_merged_tree_recursive(TreeEnsemble &out_tree, const TreeEnsemble &trees,
+                                     const tfloat *data, const bool *data_missing, int *data_inds,
+                                     const unsigned num_background_data_inds, unsigned num_data_inds,
+                                     unsigned M, unsigned row = 0, unsigned i = 0, unsigned pos = 0,
+                                     tfloat *leaf_value = NULL) {
+    //tfloat new_leaf_value[trees.num_outputs];
+    tfloat *new_leaf_value = (tfloat *) alloca(sizeof(tfloat) * trees.num_outputs); // allocate on the stack
+    unsigned row_offset = row * trees.max_nodes;
+
+    // we have hit a terminal leaf!!!
+    if (trees.children_left[row_offset + i] < 0 && row + 1 == trees.tree_limit) {
+
+        // create the leaf node
+        const tfloat *vals = trees.values + (row * trees.max_nodes + i) * trees.num_outputs;
+        if (leaf_value == NULL) {
+            for (unsigned j = 0; j < trees.num_outputs; ++j) {
+                out_tree.values[pos * trees.num_outputs + j] = vals[j];
+            }
+        } else {
+            for (unsigned j = 0; j < trees.num_outputs; ++j) {
+                out_tree.values[pos * trees.num_outputs + j] = leaf_value[j] + vals[j];
+            }
+        }
+        out_tree.children_left[pos] = -1;
+        out_tree.children_right[pos] = -1;
+        out_tree.children_default[pos] = -1;
+        out_tree.features[pos] = -1;
+        out_tree.thresholds[pos] = 0;
+        out_tree.node_sample_weights[pos] = num_background_data_inds;
+
+        return pos;
+    }
+
+    // we hit an intermediate leaf (so just add the value to our accumulator and move to the next tree)
+    if (trees.children_left[row_offset + i] < 0) {
+
+        // accumulate the value of this original leaf so it will land on all eventual terminal leaves
+        const tfloat *vals = trees.values + (row * trees.max_nodes + i) * trees.num_outputs;
+        if (leaf_value == NULL) {
+            for (unsigned j = 0; j < trees.num_outputs; ++j) {
+                new_leaf_value[j] = vals[j];
+            }
+        } else {
+            for (unsigned j = 0; j < trees.num_outputs; ++j) {
+                new_leaf_value[j] = leaf_value[j] + vals[j];
+            }
+        }
+        leaf_value = new_leaf_value;
+
+        // move forward to the next tree
+        row += 1;
+        row_offset += trees.max_nodes;
+        i = 0;
+    }
+
+    // split the data inds by this node's threshold
+    const tfloat t = trees.thresholds[row_offset + i];
+    const int f = trees.features[row_offset + i];
+    const bool right_default = trees.children_default[row_offset + i] == trees.children_right[row_offset + i];
+    int low_ptr = 0;
+    int high_ptr = num_data_inds - 1;
+    unsigned num_left_background_data_inds = 0;
+    int low_data_ind;
+    while (low_ptr <= high_ptr) {
+        low_data_ind = data_inds[low_ptr];
+        const int data_ind = std::abs(low_data_ind) * M + f;
+        const bool is_missing = data_missing[data_ind];
+        if ((!is_missing && data[data_ind] > t) || (right_default && is_missing)) {
+            data_inds[low_ptr] = data_inds[high_ptr];
+            data_inds[high_ptr] = low_data_ind;
+            high_ptr -= 1;
+        } else {
+            if (low_data_ind >= 0) ++num_left_background_data_inds; // negative data_inds are not background samples
+            low_ptr += 1;
+        }
+    }
+    int *left_data_inds = data_inds;
+    const unsigned num_left_data_inds = low_ptr;
+    int *right_data_inds = data_inds + low_ptr;
+    const unsigned num_right_data_inds = num_data_inds - num_left_data_inds;
+    const unsigned num_right_background_data_inds = num_background_data_inds - num_left_background_data_inds;
+
+    // all the data went right, so we skip creating this node and just recurse right
+    if (num_left_data_inds == 0) {
+        return build_merged_tree_recursive(
+            out_tree, trees, data, data_missing, data_inds,
+            num_background_data_inds, num_data_inds, M, row,
+            trees.children_right[row_offset + i], pos, leaf_value
+        );
+
+    // all the data went left, so we skip creating this node and just recurse left
+    } else if (num_right_data_inds == 0) {
+        return build_merged_tree_recursive(
+            out_tree, trees, data, data_missing, data_inds,
+            num_background_data_inds, num_data_inds, M, row,
+            trees.children_left[row_offset + i], pos, leaf_value
+        );
+
+    // data went both ways so we create this node and recurse down both paths
+    } else {
+
+        // build the left subtree
+        const unsigned new_pos = build_merged_tree_recursive(
+            out_tree, trees, data, data_missing, left_data_inds,
+            num_left_background_data_inds, num_left_data_inds, M, row,
+            trees.children_left[row_offset + i], pos + 1, leaf_value
+        );
+
+        // fill in the data for this node
+        out_tree.children_left[pos] = pos + 1;
+        out_tree.children_right[pos] = new_pos + 1;
+        if (trees.children_left[row_offset + i] == trees.children_default[row_offset + i]) {
+            out_tree.children_default[pos] = pos + 1;
+        } else {
+            out_tree.children_default[pos] = new_pos + 1;
+        }
+
+        out_tree.features[pos] = trees.features[row_offset + i];
+        out_tree.thresholds[pos] = trees.thresholds[row_offset + i];
+        out_tree.node_sample_weights[pos] = num_background_data_inds;
+
+        // build the right subtree
+        return build_merged_tree_recursive(
+            out_tree, trees, data, data_missing, right_data_inds,
+            num_right_background_data_inds, num_right_data_inds, M, row,
+            trees.children_right[row_offset + i], new_pos + 1, leaf_value
+        );
+    }
+}
+
+
+inline void build_merged_tree(TreeEnsemble &out_tree, const ExplanationDataset &data, const TreeEnsemble &trees) {
+
+    // create a joint data matrix from both X and R matrices
+    tfloat *joined_data = new tfloat[(data.num_X + data.num_R) * data.M];
+    std::copy(data.X, data.X + data.num_X * data.M, joined_data);
+    std::copy(data.R, data.R + data.num_R * data.M, joined_data + data.num_X * data.M);
+    bool *joined_data_missing = new bool[(data.num_X + data.num_R) * data.M];
+    std::copy(data.X_missing, data.X_missing + data.num_X * data.M, joined_data_missing);
+    std::copy(data.R_missing, data.R_missing + data.num_R * data.M, joined_data_missing + data.num_X * data.M);
+
+    // create an starting array of data indexes we will recursively sort
+    int *data_inds = new int[data.num_X + data.num_R];
+    for (unsigned i = 0; i < data.num_X; ++i) data_inds[i] = i;
+    for (unsigned i = data.num_X; i < data.num_X + data.num_R; ++i) {
+        data_inds[i] = -i; // a negative index means it won't be recorded as a background sample
+    }
+
+    build_merged_tree_recursive(
+        out_tree, trees, joined_data, joined_data_missing, data_inds, data.num_R,
+        data.num_X + data.num_R, data.M
+    );
+
+    delete[] joined_data;
+    delete[] joined_data_missing;
+    delete[] data_inds;
+}
+
+
+// Independent Tree SHAP functions below here
+// ------------------------------------------
+struct Node {
+    short cl, cr, cd, pnode, feat, pfeat; // uint_16
+    float thres, value;
+    char from_flag;
+};
+
+#define FROM_NEITHER 0
+#define FROM_X_NOT_R 1
+#define FROM_R_NOT_X 2
+
+// https://www.geeksforgeeks.org/space-and-time-efficient-binomial-coefficient/
+inline int bin_coeff(int n, int k) {
+    int res = 1;
+    if (k > n - k)
+        k = n - k;
+    for (int i = 0; i < k; ++i) {
+        res *= (n - i);
+        res /= (i + 1);
+    }
+    return res;
+}
+
+// note this only handles single output models, so multi-output models get explained using multiple passes
+inline void tree_shap_indep(const unsigned max_depth, const unsigned num_feats,
+                            const unsigned num_nodes, const tfloat *x,
+                            const bool *x_missing, const tfloat *r,
+                            const bool *r_missing, tfloat *out_contribs,
+                            float *pos_lst, float *neg_lst, signed short *feat_hist,
+                            float *memoized_weights, int *node_stack, Node *mytree) {
+
+//     const bool DEBUG = true;
+//     ofstream myfile;
+//     if (DEBUG) {
+//       myfile.open ("/homes/gws/hughchen/shap/out.txt",fstream::app);
+//       myfile << "Entering tree_shap_indep\n";
+//     }
+    int ns_ctr = 0;
+    std::fill_n(feat_hist, num_feats, 0);
+    short node = 0, feat, cl, cr, cd, pnode, pfeat = -1;
+    short next_xnode = -1, next_rnode = -1;
+    short next_node = -1, from_child = -1;
+    float thres, pos_x = 0, neg_x = 0, pos_r = 0, neg_r = 0;
+    char from_flag;
+    unsigned M = 0, N = 0;
+
+    Node curr_node = mytree[node];
+    feat = curr_node.feat;
+    thres = curr_node.thres;
+    cl = curr_node.cl;
+    cr = curr_node.cr;
+    cd = curr_node.cd;
+
+    // short circuit when this is a stump tree (with no splits)
+    if (cl < 0) {
+        out_contribs[num_feats] += curr_node.value;
+        return;
+    }
+
+//     if (DEBUG) {
+//       myfile << "\nNode: " << node << "\n";
+//       myfile << "x[feat]: " << x[feat] << ", r[feat]: " << r[feat] << "\n";
+//       myfile << "thres: " << thres << "\n";
+//     }
+
+    if (x_missing[feat]) {
+        next_xnode = cd;
+    } else if (x[feat] > thres) {
+        next_xnode = cr;
+    } else if (x[feat] <= thres) {
+        next_xnode = cl;
+    }
+
+    if (r_missing[feat]) {
+        next_rnode = cd;
+    } else if (r[feat] > thres) {
+        next_rnode = cr;
+    } else if (r[feat] <= thres) {
+        next_rnode = cl;
+    }
+
+    if (next_xnode != next_rnode) {
+        mytree[next_xnode].from_flag = FROM_X_NOT_R;
+        mytree[next_rnode].from_flag = FROM_R_NOT_X;
+    } else {
+        mytree[next_xnode].from_flag = FROM_NEITHER;
+    }
+
+    // Check if x and r go the same way
+    if (next_xnode == next_rnode) {
+        next_node = next_xnode;
+    }
+
+    // If not, go left
+    if (next_node < 0) {
+        next_node = cl;
+        if (next_rnode == next_node) { // rpath
+            N = N+1;
+            feat_hist[feat] -= 1;
+        } else if (next_xnode == next_node) { // xpath
+            M = M+1;
+            N = N+1;
+            feat_hist[feat] += 1;
+        }
+    }
+    node_stack[ns_ctr] = node;
+    ns_ctr += 1;
+    while (true) {
+        node = next_node;
+        curr_node = mytree[node];
+        feat = curr_node.feat;
+        thres = curr_node.thres;
+        cl = curr_node.cl;
+        cr = curr_node.cr;
+        cd = curr_node.cd;
+        pnode = curr_node.pnode;
+        pfeat = curr_node.pfeat;
+        from_flag = curr_node.from_flag;
+
+
+
+//         if (DEBUG) {
+//           myfile << "\nNode: " << node << "\n";
+//           myfile << "N: " << N << ", M: " << M << "\n";
+//           myfile << "from_flag==FROM_X_NOT_R: " << (from_flag==FROM_X_NOT_R) << "\n";
+//           myfile << "from_flag==FROM_R_NOT_X: " << (from_flag==FROM_R_NOT_X) << "\n";
+//           myfile << "from_flag==FROM_NEITHER: " << (from_flag==FROM_NEITHER) << "\n";
+//           myfile << "feat_hist[feat]: " << feat_hist[feat] << "\n";
+//         }
+
+        // At a leaf
+        if (cl < 0) {
+            //      if (DEBUG) {
+            //        myfile << "At a leaf\n";
+            //      }
+
+            if (M == 0) {
+              out_contribs[num_feats] += mytree[node].value;
+            }
+
+            // Currently assuming a single output
+            if (N != 0) {
+                if (M != 0) {
+                    pos_lst[node] = mytree[node].value * memoized_weights[N + max_depth * (M-1)];
+                }
+                if (M != N) {
+                    neg_lst[node] = -mytree[node].value * memoized_weights[N + max_depth * M];
+                }
+            }
+//             if (DEBUG) {
+//               myfile << "pos_lst[node]: " << pos_lst[node] << "\n";
+//               myfile << "neg_lst[node]: " << neg_lst[node] << "\n";
+//             }
+            // Pop from node_stack
+            ns_ctr -= 1;
+            next_node = node_stack[ns_ctr];
+            from_child = node;
+            // Unwind
+            if (feat_hist[pfeat] > 0) {
+                feat_hist[pfeat] -= 1;
+            } else if (feat_hist[pfeat] < 0) {
+                feat_hist[pfeat] += 1;
+            }
+            if (feat_hist[pfeat] == 0) {
+                if (from_flag == FROM_X_NOT_R) {
+                    N = N-1;
+                    M = M-1;
+                } else if (from_flag == FROM_R_NOT_X) {
+                    N = N-1;
+                }
+            }
+            continue;
+        }
+
+        const bool x_right = x[feat] > thres;
+        const bool r_right = r[feat] > thres;
+
+        if (x_missing[feat]) {
+            next_xnode = cd;
+        } else if (x_right) {
+            next_xnode = cr;
+        } else if (!x_right) {
+            next_xnode = cl;
+        }
+
+        if (r_missing[feat]) {
+            next_rnode = cd;
+        } else if (r_right) {
+            next_rnode = cr;
+        } else if (!r_right) {
+            next_rnode = cl;
+        }
+
+        if (next_xnode >= 0) {
+          if (next_xnode != next_rnode) {
+              mytree[next_xnode].from_flag = FROM_X_NOT_R;
+              mytree[next_rnode].from_flag = FROM_R_NOT_X;
+          } else {
+              mytree[next_xnode].from_flag = FROM_NEITHER;
+          }
+        }
+
+        // Arriving at node from parent
+        if (from_child == -1) {
+            //      if (DEBUG) {
+            //        myfile << "Arriving at node from parent\n";
+            //      }
+            node_stack[ns_ctr] = node;
+            ns_ctr += 1;
+            next_node = -1;
+
+            //      if (DEBUG) {
+            //        myfile << "feat_hist[feat]" << feat_hist[feat] << "\n";
+            //      }
+            // Feature is set upstream
+            if (feat_hist[feat] > 0) {
+                next_node = next_xnode;
+                feat_hist[feat] += 1;
+            } else if (feat_hist[feat] < 0) {
+                next_node = next_rnode;
+                feat_hist[feat] -= 1;
+            }
+
+            // x and r go the same way
+            if (next_node < 0) {
+                if (next_xnode == next_rnode) {
+                    next_node = next_xnode;
+                }
+            }
+
+            // Go down one path
+            if (next_node >= 0) {
+                continue;
+            }
+
+            // Go down both paths, but go left first
+            next_node = cl;
+            if (next_rnode == next_node) {
+                N = N+1;
+                feat_hist[feat] -= 1;
+            } else if (next_xnode == next_node) {
+                M = M+1;
+                N = N+1;
+                feat_hist[feat] += 1;
+            }
+            from_child = -1;
+            continue;
+        }
+
+        // Arriving at node from child
+        if (from_child != -1) {
+//             if (DEBUG) {
+//               myfile << "Arriving at node from child\n";
+//             }
+            next_node = -1;
+            // Check if we should unroll immediately
+            if ((next_rnode == next_xnode) || (feat_hist[feat] != 0)) {
+                next_node = pnode;
+            }
+
+            // Came from a single path, so unroll
+            if (next_node >= 0) {
+//                 if (DEBUG) {
+//                   myfile << "Came from a single path, so unroll\n";
+//                 }
+                // At the root node
+                if (node == 0) {
+                    break;
+                }
+                // Update and unroll
+                pos_lst[node] = pos_lst[from_child];
+                neg_lst[node] = neg_lst[from_child];
+
+//                 if (DEBUG) {
+//                   myfile << "pos_lst[node]: " << pos_lst[node] << "\n";
+//                   myfile << "neg_lst[node]: " << neg_lst[node] << "\n";
+//                 }
+                from_child = node;
+                ns_ctr -= 1;
+
+                // Unwind
+                if (feat_hist[pfeat] > 0) {
+                    feat_hist[pfeat] -= 1;
+                } else if (feat_hist[pfeat] < 0) {
+                    feat_hist[pfeat] += 1;
+                }
+                if (feat_hist[pfeat] == 0) {
+                    if (from_flag == FROM_X_NOT_R) {
+                        N = N-1;
+                        M = M-1;
+                    } else if (from_flag == FROM_R_NOT_X) {
+                        N = N-1;
+                    }
+                }
+                continue;
+                // Go right - Arriving from the left child
+            } else if (from_child == cl) {
+//                 if (DEBUG) {
+//                   myfile << "Go right - Arriving from the left child\n";
+//                 }
+                node_stack[ns_ctr] = node;
+                ns_ctr += 1;
+                next_node = cr;
+                if (next_xnode == next_node) {
+                    M = M+1;
+                    N = N+1;
+                    feat_hist[feat] += 1;
+                } else if (next_rnode == next_node) {
+                    N = N+1;
+                    feat_hist[feat] -= 1;
+                }
+                from_child = -1;
+                continue;
+                // Compute stuff and unroll - Arriving from the right child
+            } else if (from_child == cr) {
+//                 if (DEBUG) {
+//                   myfile << "Compute stuff and unroll - Arriving from the right child\n";
+//                 }
+                pos_x = 0;
+                neg_x = 0;
+                pos_r = 0;
+                neg_r = 0;
+                if ((next_xnode == cr) && (next_rnode == cl)) {
+                    pos_x = pos_lst[cr];
+                    neg_x = neg_lst[cr];
+                    pos_r = pos_lst[cl];
+                    neg_r = neg_lst[cl];
+                } else if ((next_xnode == cl) && (next_rnode == cr)) {
+                    pos_x = pos_lst[cl];
+                    neg_x = neg_lst[cl];
+                    pos_r = pos_lst[cr];
+                    neg_r = neg_lst[cr];
+                }
+                // out_contribs needs to have been initialized as all zeros
+                // if (pos_x + neg_r != 0) {
+                //   std::cout << "val " << pos_x + neg_r << "\n";
+                // }
+                out_contribs[feat] += pos_x + neg_r;
+                pos_lst[node] = pos_x + pos_r;
+                neg_lst[node] = neg_x + neg_r;
+
+//                 if (DEBUG) {
+//                   myfile << "out_contribs[feat]: " << out_contribs[feat] << "\n";
+//                   myfile << "pos_lst[node]: " << pos_lst[node] << "\n";
+//                   myfile << "neg_lst[node]: " << neg_lst[node] << "\n";
+//                 }
+
+                // Check if at root
+                if (node == 0) {
+                    break;
+                }
+
+                // Pop
+                ns_ctr -= 1;
+                next_node = node_stack[ns_ctr];
+                from_child = node;
+
+                // Unwind
+                if (feat_hist[pfeat] > 0) {
+                    feat_hist[pfeat] -= 1;
+                } else if (feat_hist[pfeat] < 0) {
+                    feat_hist[pfeat] += 1;
+                }
+                if (feat_hist[pfeat] == 0) {
+                    if (from_flag == FROM_X_NOT_R) {
+                        N = N-1;
+                        M = M-1;
+                    } else if (from_flag == FROM_R_NOT_X) {
+                        N = N-1;
+                    }
+                }
+                continue;
+            }
+        }
+    }
+    //  if (DEBUG) {
+    //    myfile.close();
+    //  }
+}
+
+
+inline void print_progress_bar(tfloat &last_print, tfloat start_time, unsigned i, unsigned total_count) {
+    const tfloat elapsed_seconds = difftime(time(NULL), start_time);
+
+    if (elapsed_seconds > 10 && elapsed_seconds - last_print > 0.5) {
+        const tfloat fraction = static_cast<tfloat>(i) / total_count;
+        const double total_seconds = elapsed_seconds / fraction;
+        last_print = elapsed_seconds;
+
+        PySys_WriteStderr(
+            "\r%3.0f%%|%.*s%.*s| %d/%d [%02d:%02d<%02d:%02d]       ",
+            fraction * 100, int(0.5 + fraction*20), "===================",
+            20-int(0.5 + fraction*20), "                   ",
+            i, total_count,
+            int(elapsed_seconds/60), int(elapsed_seconds) % 60,
+            int((total_seconds - elapsed_seconds)/60), int(total_seconds - elapsed_seconds) % 60
+        );
+
+        // Get handle to python stderr file and flush it (https://mail.python.org/pipermail/python-list/2004-November/294912.html)
+        PyObject *pyStderr = PySys_GetObject("stderr");
+        if (pyStderr) {
+            PyObject *result = PyObject_CallMethod(pyStderr, "flush", NULL);
+            Py_XDECREF(result);
+        }
+    }
+}
+
+/**
+ * Runs Tree SHAP with feature independence assumptions on dense data.
+ */
+inline void dense_independent(const TreeEnsemble& trees, const ExplanationDataset &data,
+                       tfloat *out_contribs, tfloat transform(const tfloat, const tfloat)) {
+
+    // reformat the trees for faster access
+    Node *node_trees = new Node[trees.tree_limit * trees.max_nodes];
+    for (unsigned i = 0; i < trees.tree_limit; ++i) {
+        Node *node_tree = node_trees + i * trees.max_nodes;
+        for (unsigned j = 0; j < trees.max_nodes; ++j) {
+            const unsigned en_ind = i * trees.max_nodes + j;
+            node_tree[j].cl = trees.children_left[en_ind];
+            node_tree[j].cr = trees.children_right[en_ind];
+            node_tree[j].cd = trees.children_default[en_ind];
+            if (j == 0) {
+                node_tree[j].pnode = 0;
+            }
+            if (trees.children_left[en_ind] >= 0) { // relies on all unused entries having negative values in them
+                node_tree[trees.children_left[en_ind]].pnode = j;
+                node_tree[trees.children_left[en_ind]].pfeat = trees.features[en_ind];
+            }
+            if (trees.children_right[en_ind] >= 0) { // relies on all unused entries having negative values in them
+                node_tree[trees.children_right[en_ind]].pnode = j;
+                node_tree[trees.children_right[en_ind]].pfeat = trees.features[en_ind];
+            }
+
+            node_tree[j].thres = trees.thresholds[en_ind];
+            node_tree[j].feat = trees.features[en_ind];
+        }
+    }
+
+    // preallocate arrays needed by the algorithm
+    float *pos_lst = new float[trees.max_nodes];
+    float *neg_lst = new float[trees.max_nodes];
+    int *node_stack = new int[(unsigned) trees.max_depth];
+    signed short *feat_hist = new signed short[data.M];
+    tfloat *tmp_out_contribs = new tfloat[(data.M + 1)];
+
+    // precompute all the weight coefficients
+    float *memoized_weights = new float[(trees.max_depth+1) * (trees.max_depth+1)];
+    for (unsigned n = 0; n <= trees.max_depth; ++n) {
+        for (unsigned m = 0; m <= trees.max_depth; ++m) {
+            memoized_weights[n + trees.max_depth * m] = 1.0 / (n * bin_coeff(n-1, m));
+        }
+    }
+
+    // compute the explanations for each sample
+    tfloat *instance_out_contribs;
+    tfloat rescale_factor = 1.0;
+    tfloat margin_x = 0;
+    tfloat margin_r = 0;
+    time_t start_time = time(NULL);
+    tfloat last_print = 0;
+    for (unsigned oind = 0; oind < trees.num_outputs; ++oind) {
+        // set the values in the reformatted tree to the current output index
+        for (unsigned i = 0; i < trees.tree_limit; ++i) {
+            Node *node_tree = node_trees + i * trees.max_nodes;
+            for (unsigned j = 0; j < trees.max_nodes; ++j) {
+                const unsigned en_ind = i * trees.max_nodes + j;
+                node_tree[j].value = trees.values[en_ind * trees.num_outputs + oind];
+            }
+        }
+
+        // loop over all the samples
+        for (unsigned i = 0; i < data.num_X; ++i) {
+            const tfloat *x = data.X + i * data.M;
+            const bool *x_missing = data.X_missing + i * data.M;
+            instance_out_contribs = out_contribs + i * (data.M + 1) * trees.num_outputs;
+            const tfloat y_i = data.y == NULL ? 0 : data.y[i];
+
+            print_progress_bar(last_print, start_time, oind * data.num_X + i, data.num_X * trees.num_outputs);
+
+            // compute the model's margin output for x
+            if (transform != NULL) {
+                margin_x = trees.base_offset[oind];
+                for (unsigned k = 0; k < trees.tree_limit; ++k) {
+                    margin_x += tree_predict(k, trees, x, x_missing)[oind];
+                }
+            }
+
+            for (unsigned j = 0; j < data.num_R; ++j) {
+                const tfloat *r = data.R + j * data.M;
+                const bool *r_missing = data.R_missing + j * data.M;
+                std::fill_n(tmp_out_contribs, (data.M + 1), 0);
+
+                // compute the model's margin output for r
+                if (transform != NULL) {
+                    margin_r = trees.base_offset[oind];
+                    for (unsigned k = 0; k < trees.tree_limit; ++k) {
+                        margin_r += tree_predict(k, trees, r, r_missing)[oind];
+                    }
+                }
+
+                for (unsigned k = 0; k < trees.tree_limit; ++k) {
+                    tree_shap_indep(
+                        trees.max_depth, data.M, trees.max_nodes, x, x_missing, r, r_missing,
+                        tmp_out_contribs, pos_lst, neg_lst, feat_hist, memoized_weights,
+                        node_stack, node_trees + k * trees.max_nodes
+                    );
+                }
+
+                // compute the rescale factor
+                if (transform != NULL) {
+                    if (margin_x == margin_r) {
+                        rescale_factor = 1.0;
+                    } else {
+                        rescale_factor = (*transform)(margin_x, y_i) - (*transform)(margin_r, y_i);
+                        rescale_factor /= margin_x - margin_r;
+                    }
+                }
+
+                // add the effect of the current reference to our running total
+                // this is where we can do per reference scaling for non-linear transformations
+                for (unsigned k = 0; k < data.M; ++k) {
+                    instance_out_contribs[k * trees.num_outputs + oind] += tmp_out_contribs[k] * rescale_factor;
+                }
+
+                // Add the base offset
+                if (transform != NULL) {
+                    instance_out_contribs[data.M * trees.num_outputs + oind] += (*transform)(trees.base_offset[oind] + tmp_out_contribs[data.M], 0);
+                } else {
+                    instance_out_contribs[data.M * trees.num_outputs + oind] += trees.base_offset[oind] + tmp_out_contribs[data.M];
+                }
+            }
+
+            // average the results over all the references.
+            for (unsigned j = 0; j < (data.M + 1); ++j) {
+                instance_out_contribs[j * trees.num_outputs + oind] /= data.num_R;
+            }
+
+            // apply the base offset to the bias term
+            // for (unsigned j = 0; j < trees.num_outputs; ++j) {
+            //     instance_out_contribs[data.M * trees.num_outputs + j] += (*transform)(trees.base_offset[j], 0);
+            // }
+        }
+    }
+
+    delete[] tmp_out_contribs;
+    delete[] node_trees;
+    delete[] pos_lst;
+    delete[] neg_lst;
+    delete[] node_stack;
+    delete[] feat_hist;
+    delete[] memoized_weights;
+}
+
+
+/**
+ * This runs Tree SHAP with a per tree path conditional dependence assumption.
+ */
+inline void dense_tree_path_dependent(const TreeEnsemble& trees, const ExplanationDataset &data,
+                               tfloat *out_contribs, tfloat transform(const tfloat, const tfloat)) {
+    tfloat *instance_out_contribs;
+    TreeEnsemble tree;
+    ExplanationDataset instance;
+
+    // build explanation for each sample
+    for (unsigned i = 0; i < data.num_X; ++i) {
+        instance_out_contribs = out_contribs + i * (data.M + 1) * trees.num_outputs;
+        data.get_x_instance(instance, i);
+
+        // aggregate the effect of explaining each tree
+        // (this works because of the linearity property of Shapley values)
+        for (unsigned j = 0; j < trees.tree_limit; ++j) {
+            trees.get_tree(tree, j);
+            tree_shap(tree, instance, instance_out_contribs, 0, 0);
+        }
+
+        // apply the base offset to the bias term
+        for (unsigned j = 0; j < trees.num_outputs; ++j) {
+            instance_out_contribs[data.M * trees.num_outputs + j] += trees.base_offset[j];
+        }
+    }
+}
+
+// phi = np.zeros((self._current_X.shape[1] + 1, self._current_X.shape[1] + 1, self.n_outputs))
+//         phi_diag = np.zeros((self._current_X.shape[1] + 1, self.n_outputs))
+//         for t in range(self.tree_limit):
+//             self.tree_shap(self.trees[t], self._current_X[i,:], self._current_x_missing, phi_diag)
+//             for j in self.trees[t].unique_features:
+//                 phi_on = np.zeros((self._current_X.shape[1] + 1, self.n_outputs))
+//                 phi_off = np.zeros((self._current_X.shape[1] + 1, self.n_outputs))
+//                 self.tree_shap(self.trees[t], self._current_X[i,:], self._current_x_missing, phi_on, 1, j)
+//                 self.tree_shap(self.trees[t], self._current_X[i,:], self._current_x_missing, phi_off, -1, j)
+//                 phi[j] += np.true_divide(np.subtract(phi_on,phi_off),2.0)
+//                 phi_diag[j] -= np.sum(np.true_divide(np.subtract(phi_on,phi_off),2.0))
+//         for j in range(self._current_X.shape[1]+1):
+//             phi[j][j] = phi_diag[j]
+//         phi /= self.tree_limit
+//         return phi
+
+inline void dense_tree_interactions_path_dependent(const TreeEnsemble& trees, const ExplanationDataset &data,
+                                            tfloat *out_contribs,
+                                            tfloat transform(const tfloat, const tfloat)) {
+
+    // build a list of all the unique features in each tree
+    int amount_of_unique_features = min(data.M, trees.max_nodes);
+    int *unique_features = new int[trees.tree_limit * amount_of_unique_features];
+    std::fill(unique_features, unique_features + trees.tree_limit * amount_of_unique_features, -1);
+    for (unsigned j = 0; j < trees.tree_limit; ++j) {
+        const int *features_row = trees.features + j * trees.max_nodes;
+        int *unique_features_row = unique_features + j * amount_of_unique_features;
+        for (unsigned k = 0; k < trees.max_nodes; ++k) {
+            for (unsigned l = 0; l < amount_of_unique_features; ++l) {
+                if (features_row[k] == unique_features_row[l]) break;
+                if (unique_features_row[l] < 0) {
+                    unique_features_row[l] = features_row[k];
+                    break;
+                }
+            }
+        }
+    }
+
+    // build an interaction explanation for each sample
+    tfloat *instance_out_contribs;
+    TreeEnsemble tree;
+    ExplanationDataset instance;
+    const unsigned contrib_row_size = (data.M + 1) * trees.num_outputs;
+    tfloat *diag_contribs = new tfloat[contrib_row_size];
+    tfloat *on_contribs = new tfloat[contrib_row_size];
+    tfloat *off_contribs = new tfloat[contrib_row_size];
+    for (unsigned i = 0; i < data.num_X; ++i) {
+        instance_out_contribs = out_contribs + i * (data.M + 1) * contrib_row_size;
+        data.get_x_instance(instance, i);
+
+        // aggregate the effect of explaining each tree
+        // (this works because of the linearity property of Shapley values)
+        std::fill(diag_contribs, diag_contribs + contrib_row_size, 0);
+        for (unsigned j = 0; j < trees.tree_limit; ++j) {
+            trees.get_tree(tree, j);
+            tree_shap(tree, instance, diag_contribs, 0, 0);
+
+            const int *unique_features_row = unique_features + j * amount_of_unique_features;
+            for (unsigned k = 0; k < amount_of_unique_features; ++k) {
+                const int ind = unique_features_row[k];
+                if (ind < 0) break; // < 0 means we have seen all the features for this tree
+
+                // compute the shap value with this feature held on and off
+                std::fill(on_contribs, on_contribs + contrib_row_size, 0);
+                std::fill(off_contribs, off_contribs + contrib_row_size, 0);
+                tree_shap(tree, instance, on_contribs, 1, ind);
+                tree_shap(tree, instance, off_contribs, -1, ind);
+
+                // save the difference between on and off as the interaction value
+                for (unsigned l = 0; l < contrib_row_size; ++l) {
+                    const tfloat val = (on_contribs[l] - off_contribs[l]) / 2;
+                    instance_out_contribs[ind * contrib_row_size + l] += val;
+                    diag_contribs[l] -= val;
+                }
+            }
+        }
+
+        // set the diagonal
+        for (unsigned j = 0; j < data.M + 1; ++j) {
+            const unsigned offset = j * contrib_row_size + j * trees.num_outputs;
+            for (unsigned k = 0; k < trees.num_outputs; ++k) {
+                instance_out_contribs[offset + k] = diag_contribs[j * trees.num_outputs + k];
+            }
+        }
+
+        // apply the base offset to the bias term
+        const unsigned last_ind = (data.M * (data.M + 1) + data.M) * trees.num_outputs;
+        for (unsigned j = 0; j < trees.num_outputs; ++j) {
+            instance_out_contribs[last_ind + j] += trees.base_offset[j];
+        }
+    }
+
+    delete[] diag_contribs;
+    delete[] on_contribs;
+    delete[] off_contribs;
+    delete[] unique_features;
+}
+
+/**
+ * This runs Tree SHAP with a global path conditional dependence assumption.
+ *
+ * By first merging all the trees in a tree ensemble into an equivalent single tree
+ * this method allows arbitrary marginal transformations and also ensures that all the
+ * evaluations of the model are consistent with some training data point.
+ */
+inline void dense_global_path_dependent(const TreeEnsemble& trees, const ExplanationDataset &data,
+                                 tfloat *out_contribs, tfloat transform(const tfloat, const tfloat)) {
+
+    // allocate space for our new merged tree (we save enough room to totally split all samples if need be)
+    TreeEnsemble merged_tree;
+    merged_tree.allocate(1, (data.num_X + data.num_R) * 2, trees.num_outputs);
+
+    // collapse the ensemble of trees into a single tree that has the same behavior
+    // for all the X and R samples in the dataset
+    build_merged_tree(merged_tree, data, trees);
+
+    // compute the expected value and depth of the new merged tree
+    compute_expectations(merged_tree);
+
+    // explain each sample using our new merged tree
+    ExplanationDataset instance;
+    tfloat *instance_out_contribs;
+    for (unsigned i = 0; i < data.num_X; ++i) {
+        instance_out_contribs = out_contribs + i * (data.M + 1) * trees.num_outputs;
+        data.get_x_instance(instance, i);
+
+        // since we now just have a single merged tree we can just use the tree_path_dependent algorithm
+        tree_shap(merged_tree, instance, instance_out_contribs, 0, 0);
+
+        // apply the base offset to the bias term
+        for (unsigned j = 0; j < trees.num_outputs; ++j) {
+            instance_out_contribs[data.M * trees.num_outputs + j] += trees.base_offset[j];
+        }
+    }
+
+    merged_tree.free();
+}
+
+
+/**
+ * The main method for computing Tree SHAP on models using dense data.
+ */
+inline void dense_tree_shap(const TreeEnsemble& trees, const ExplanationDataset &data, tfloat *out_contribs,
+                     const int feature_dependence, unsigned model_transform, bool interactions) {
+
+    // see what transform (if any) we have
+    transform_f transform = get_transform(model_transform);
+
+    // dispatch to the correct algorithm handler
+    switch (feature_dependence) {
+        case FEATURE_DEPENDENCE::independent:
+            if (interactions) {
+                std::cerr << "FEATURE_DEPENDENCE::independent does not support interactions!\n";
+            } else dense_independent(trees, data, out_contribs, transform);
+            return;
+
+        case FEATURE_DEPENDENCE::tree_path_dependent:
+            if (interactions) dense_tree_interactions_path_dependent(trees, data, out_contribs, transform);
+            else dense_tree_path_dependent(trees, data, out_contribs, transform);
+            return;
+
+        case FEATURE_DEPENDENCE::global_path_dependent:
+            if (interactions) {
+                std::cerr << "FEATURE_DEPENDENCE::global_path_dependent does not support interactions!\n";
+            } else dense_global_path_dependent(trees, data, out_contribs, transform);
+            return;
+    }
+}
diff --git a/lib/shap/datasets.py b/lib/shap/datasets.py
new file mode 100644
index 0000000000000000000000000000000000000000..e33bce1f22ed58e8d26c233fea1fbf270b17cbbf
--- /dev/null
+++ b/lib/shap/datasets.py
@@ -0,0 +1,309 @@
+import os
+from urllib.request import urlretrieve
+
+import numpy as np
+import pandas as pd
+import sklearn.datasets
+
+import shap
+
+github_data_url = "https://github.com/shap/shap/raw/master/data/"
+
+
+def imagenet50(display=False, resolution=224, n_points=None):
+    """ This is a set of 50 images representative of ImageNet images.
+
+    This dataset was collected by randomly finding a working ImageNet link and then pasting the
+    original ImageNet image into Google image search restricted to images licensed for reuse. A
+    similar image (now with rights to reuse) was downloaded as a rough replacement for the original
+    ImageNet image. The point is to have a random sample of ImageNet for use as a background
+    distribution for explaining models trained on ImageNet data.
+
+    Note that because the images are only rough replacements the labels might no longer be correct.
+    """
+
+    prefix = github_data_url + "imagenet50_"
+    X = np.load(cache(f"{prefix}{resolution}x{resolution}.npy")).astype(np.float32)
+    y = np.loadtxt(cache(f"{prefix}labels.csv"))
+
+    if n_points is not None:
+        X = shap.utils.sample(X, n_points, random_state=0)
+        y = shap.utils.sample(y, n_points, random_state=0)
+
+    return X, y
+
+
+def california(display=False, n_points=None):
+    """ Return the california housing data in a nice package. """
+
+    d = sklearn.datasets.fetch_california_housing()
+    df = pd.DataFrame(data=d.data, columns=d.feature_names)
+    target = d.target
+
+    if n_points is not None:
+        df = shap.utils.sample(df, n_points, random_state=0)
+        target = shap.utils.sample(target, n_points, random_state=0)
+
+    return df, target
+
+
+def linnerud(display=False, n_points=None):
+    """ Return the linnerud data in a nice package (multi-target regression). """
+
+    d = sklearn.datasets.load_linnerud()
+    X = pd.DataFrame(d.data, columns=d.feature_names)
+    y = pd.DataFrame(d.target, columns=d.target_names)
+
+    if n_points is not None:
+        X = shap.utils.sample(X, n_points, random_state=0)
+        y = shap.utils.sample(y, n_points, random_state=0)
+
+    return X, y
+
+
+def imdb(display=False, n_points=None):
+    """ Return the classic IMDB sentiment analysis training data in a nice package.
+
+    Full data is at: http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz
+    Paper to cite when using the data is: http://www.aclweb.org/anthology/P11-1015
+    """
+
+    with open(cache(github_data_url + "imdb_train.txt"), encoding="utf-8") as f:
+        data = f.readlines()
+    y = np.ones(25000, dtype=bool)
+    y[:12500] = 0
+
+    if n_points is not None:
+        data = shap.utils.sample(data, n_points, random_state=0)
+        y = shap.utils.sample(y, n_points, random_state=0)
+
+    return data, y
+
+
+def communitiesandcrime(display=False, n_points=None):
+    """ Predict total number of non-violent crimes per 100K popuation.
+
+    This dataset is from the classic UCI Machine Learning repository:
+    https://archive.ics.uci.edu/ml/datasets/Communities+and+Crime+Unnormalized
+    """
+
+    raw_data = pd.read_csv(
+        cache(github_data_url + "CommViolPredUnnormalizedData.txt"),
+        na_values="?"
+    )
+
+    # find the indices where the total violent crimes are known
+    valid_inds = np.where(np.invert(np.isnan(raw_data.iloc[:,-2])))[0]
+
+    if n_points is not None:
+        valid_inds = shap.utils.sample(valid_inds, n_points, random_state=0)
+
+    y = np.array(raw_data.iloc[valid_inds,-2], dtype=float)
+
+    # extract the predictive features and remove columns with missing values
+    X = raw_data.iloc[valid_inds,5:-18]
+    valid_cols = np.where(np.isnan(X.values).sum(0) == 0)[0]
+    X = X.iloc[:,valid_cols]
+
+    return X, y
+
+
+def diabetes(display=False, n_points=None):
+    """ Return the diabetes data in a nice package. """
+
+    d = sklearn.datasets.load_diabetes()
+    df = pd.DataFrame(data=d.data, columns=d.feature_names)
+    target = d.target
+
+    if n_points is not None:
+        df = shap.utils.sample(df, n_points, random_state=0)
+        target = shap.utils.sample(target, n_points, random_state=0)
+
+    return df, target
+
+
+def iris(display=False, n_points=None):
+    """ Return the classic iris data in a nice package. """
+
+    d = sklearn.datasets.load_iris()
+    df = pd.DataFrame(data=d.data, columns=d.feature_names)
+    target = d.target
+
+    if n_points is not None:
+        df = shap.utils.sample(df, n_points, random_state=0)
+        target = shap.utils.sample(target, n_points, random_state=0)
+
+    if display:
+        return df, [d.target_names[v] for v in target]
+    return df, target
+
+
+def adult(display=False, n_points=None):
+    """ Return the Adult census data in a nice package. """
+    dtypes = [
+        ("Age", "float32"), ("Workclass", "category"), ("fnlwgt", "float32"),
+        ("Education", "category"), ("Education-Num", "float32"), ("Marital Status", "category"),
+        ("Occupation", "category"), ("Relationship", "category"), ("Race", "category"),
+        ("Sex", "category"), ("Capital Gain", "float32"), ("Capital Loss", "float32"),
+        ("Hours per week", "float32"), ("Country", "category"), ("Target", "category")
+    ]
+    raw_data = pd.read_csv(
+        cache(github_data_url + "adult.data"),
+        names=[d[0] for d in dtypes],
+        na_values="?",
+        dtype=dict(dtypes)
+    )
+
+    if n_points is not None:
+        raw_data = shap.utils.sample(raw_data, n_points, random_state=0)
+
+    data = raw_data.drop(["Education"], axis=1)  # redundant with Education-Num
+    filt_dtypes = list(filter(lambda x: x[0] not in ["Target", "Education"], dtypes))
+    data["Target"] = data["Target"] == " >50K"
+    rcode = {
+        "Not-in-family": 0,
+        "Unmarried": 1,
+        "Other-relative": 2,
+        "Own-child": 3,
+        "Husband": 4,
+        "Wife": 5
+    }
+    for k, dtype in filt_dtypes:
+        if dtype == "category":
+            if k == "Relationship":
+                data[k] = np.array([rcode[v.strip()] for v in data[k]])
+            else:
+                data[k] = data[k].cat.codes
+
+    if display:
+        return raw_data.drop(["Education", "Target", "fnlwgt"], axis=1), data["Target"].values
+    return data.drop(["Target", "fnlwgt"], axis=1), data["Target"].values
+
+
+def nhanesi(display=False, n_points=None):
+    """ A nicely packaged version of NHANES I data with surivival times as labels.
+    """
+    X = pd.read_csv(cache(github_data_url + "NHANESI_X.csv"), index_col=0)
+    y = pd.read_csv(cache(github_data_url + "NHANESI_y.csv"), index_col=0)["y"]
+
+    if n_points is not None:
+        X = shap.utils.sample(X, n_points, random_state=0)
+        y = shap.utils.sample(y, n_points, random_state=0)
+
+    if display:
+        X_display = X.copy()
+        # X_display["sex_isFemale"] = ["Female" if v else "Male" for v in X["sex_isFemale"]]
+        return X_display, np.array(y)
+    return X, np.array(y)
+
+
+def corrgroups60(display=False, n_points=1_000):
+    """ Correlated Groups 60
+
+    A simulated dataset with tight correlations among distinct groups of features.
+    """
+
+    # set a constant seed
+    old_seed = np.random.seed()
+    np.random.seed(0)
+
+    # generate dataset with known correlation
+    N, M = n_points, 60
+
+    # set one coefficient from each group of 3 to 1
+    beta = np.zeros(M)
+    beta[0:30:3] = 1
+
+    # build a correlation matrix with groups of 3 tightly correlated features
+    C = np.eye(M)
+    for i in range(0,30,3):
+        C[i,i+1] = C[i+1,i] = 0.99
+        C[i,i+2] = C[i+2,i] = 0.99
+        C[i+1,i+2] = C[i+2,i+1] = 0.99
+    def f(X):
+        return np.matmul(X, beta)
+
+    # Make sure the sample correlation is a perfect match
+    X_start = np.random.randn(N, M)
+    X_centered = X_start - X_start.mean(0)
+    Sigma = np.matmul(X_centered.T, X_centered) / X_centered.shape[0]
+    W = np.linalg.cholesky(np.linalg.inv(Sigma)).T
+    X_white = np.matmul(X_centered, W.T)
+    assert np.linalg.norm(np.corrcoef(np.matmul(X_centered, W.T).T) - np.eye(M)) < 1e-6 # ensure this decorrelates the data
+
+    # create the final data
+    X_final = np.matmul(X_white, np.linalg.cholesky(C).T)
+    X = X_final
+    y = f(X) + np.random.randn(N) * 1e-2
+
+    # restore the previous numpy random seed
+    np.random.seed(old_seed)
+
+    return pd.DataFrame(X), y
+
+
+def independentlinear60(display=False, n_points=1_000):
+    """ A simulated dataset with tight correlations among distinct groups of features.
+    """
+
+    # set a constant seed
+    old_seed = np.random.seed()
+    np.random.seed(0)
+
+    # generate dataset with known correlation
+    N, M = n_points, 60
+
+    # set one coefficient from each group of 3 to 1
+    beta = np.zeros(M)
+    beta[0:30:3] = 1
+    def f(X):
+        return np.matmul(X, beta)
+
+    # Make sure the sample correlation is a perfect match
+    X_start = np.random.randn(N, M)
+    X = X_start - X_start.mean(0)
+    y = f(X) + np.random.randn(N) * 1e-2
+
+    # restore the previous numpy random seed
+    np.random.seed(old_seed)
+
+    return pd.DataFrame(X), y
+
+
+def a1a(n_points=None):
+    """ A sparse dataset in scipy csr matrix format.
+    """
+    data, target = sklearn.datasets.load_svmlight_file(cache(github_data_url + 'a1a.svmlight'))
+
+    if n_points is not None:
+        data = shap.utils.sample(data, n_points, random_state=0)
+        target = shap.utils.sample(target, n_points, random_state=0)
+
+    return data, target
+
+
+def rank():
+    """ Ranking datasets from lightgbm repository.
+    """
+    rank_data_url = 'https://raw.githubusercontent.com/Microsoft/LightGBM/master/examples/lambdarank/'
+    x_train, y_train = sklearn.datasets.load_svmlight_file(cache(rank_data_url + 'rank.train'))
+    x_test, y_test = sklearn.datasets.load_svmlight_file(cache(rank_data_url + 'rank.test'))
+    q_train = np.loadtxt(cache(rank_data_url + 'rank.train.query'))
+    q_test = np.loadtxt(cache(rank_data_url + 'rank.test.query'))
+
+    return x_train, y_train, x_test, y_test, q_train, q_test
+
+
+def cache(url, file_name=None):
+    """ Loads a file from the URL and caches it locally.
+    """
+    if file_name is None:
+        file_name = os.path.basename(url)
+    data_dir = os.path.join(os.path.dirname(__file__), "cached_data")
+    os.makedirs(data_dir, exist_ok=True)
+
+    file_path = os.path.join(data_dir, file_name)
+    if not os.path.isfile(file_path):
+        urlretrieve(url, file_path)
+
+    return file_path
diff --git a/lib/shap/explainers/__init__.py b/lib/shap/explainers/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..de3a0edd4fbc26753e622730bd043f49d592ad23
--- /dev/null
+++ b/lib/shap/explainers/__init__.py
@@ -0,0 +1,38 @@
+from ._additive import AdditiveExplainer
+from ._deep import DeepExplainer
+from ._exact import ExactExplainer
+from ._gpu_tree import GPUTreeExplainer
+from ._gradient import GradientExplainer
+from ._kernel import KernelExplainer
+from ._linear import LinearExplainer
+from ._partition import PartitionExplainer
+from ._permutation import PermutationExplainer
+from ._sampling import SamplingExplainer
+from ._tree import TreeExplainer
+
+# Alternative legacy "short-form" aliases, which are kept here for backwards-compatibility
+Additive = AdditiveExplainer
+Deep = DeepExplainer
+Exact = ExactExplainer
+GPUTree = GPUTreeExplainer
+Gradient = GradientExplainer
+Kernel = KernelExplainer
+Linear = LinearExplainer
+Partition = PartitionExplainer
+Permutation = PermutationExplainer
+Sampling = SamplingExplainer
+Tree = TreeExplainer
+
+__all__ = [
+    "AdditiveExplainer",
+    "DeepExplainer",
+    "ExactExplainer",
+    "GPUTreeExplainer",
+    "GradientExplainer",
+    "KernelExplainer",
+    "LinearExplainer",
+    "PartitionExplainer",
+    "PermutationExplainer",
+    "SamplingExplainer",
+    "TreeExplainer",
+]
diff --git a/lib/shap/explainers/_additive.py b/lib/shap/explainers/_additive.py
new file mode 100644
index 0000000000000000000000000000000000000000..3eb5dc47e9c45cd2e0b731ae53a56006f413634a
--- /dev/null
+++ b/lib/shap/explainers/_additive.py
@@ -0,0 +1,187 @@
+import numpy as np
+
+from ..utils import MaskedModel, safe_isinstance
+from ._explainer import Explainer
+
+
+class AdditiveExplainer(Explainer):
+    """ Computes SHAP values for generalized additive models.
+
+    This assumes that the model only has first-order effects. Extending this to
+    second- and third-order effects is future work (if you apply this to those models right now
+    you will get incorrect answers that fail additivity).
+    """
+
+    def __init__(self, model, masker, link=None, feature_names=None, linearize_link=True):
+        """ Build an Additive explainer for the given model using the given masker object.
+
+        Parameters
+        ----------
+        model : function
+            A callable python object that executes the model given a set of input data samples.
+
+        masker : function or numpy.array or pandas.DataFrame
+            A callable python object used to "mask" out hidden features of the form `masker(mask, *fargs)`.
+            It takes a single a binary mask and an input sample and returns a matrix of masked samples. These
+            masked samples are evaluated using the model function and the outputs are then averaged.
+            As a shortcut for the standard masking used by SHAP you can pass a background data matrix
+            instead of a function and that matrix will be used for masking. To use a clustering
+            game structure you can pass a shap.maskers.Tabular(data, hclustering=\"correlation\") object, but
+            note that this structure information has no effect on the explanations of additive models.
+        """
+        super().__init__(model, masker, feature_names=feature_names, linearize_link=linearize_link)
+
+        if safe_isinstance(model, "interpret.glassbox.ExplainableBoostingClassifier"):
+            self.model = model.decision_function
+
+            if self.masker is None:
+                self._expected_value = model.intercept_
+                # num_features = len(model.additive_terms_)
+
+                # fm = MaskedModel(self.model, self.masker, self.link, np.zeros(num_features))
+                # masks = np.ones((1, num_features), dtype=bool)
+                # outputs = fm(masks)
+                # self.model(np.zeros(num_features))
+                # self._zero_offset = self.model(np.zeros(num_features))#model.intercept_#outputs[0]
+                # self._input_offsets = np.zeros(num_features) #* self._zero_offset
+                raise NotImplementedError("Masker not given and we don't yet support pulling the distribution centering directly from the EBM model!")
+                return
+
+        # here we need to compute the offsets ourselves because we can't pull them directly from a model we know about
+        assert safe_isinstance(self.masker, "shap.maskers.Independent"), "The Additive explainer only supports the Tabular masker at the moment!"
+
+        # pre-compute per-feature offsets
+        fm = MaskedModel(self.model, self.masker, self.link, self.linearize_link, np.zeros(self.masker.shape[1]))
+        masks = np.ones((self.masker.shape[1]+1, self.masker.shape[1]), dtype=bool)
+        for i in range(1, self.masker.shape[1]+1):
+            masks[i,i-1] = False
+        outputs = fm(masks)
+        self._zero_offset = outputs[0]
+        self._input_offsets = np.zeros(masker.shape[1])
+        for i in range(1, self.masker.shape[1]+1):
+            self._input_offsets[i-1] = outputs[i] - self._zero_offset
+
+        self._expected_value = self._input_offsets.sum() + self._zero_offset
+
+    def __call__(self, *args, max_evals=None, silent=False):
+        """ Explains the output of model(*args), where args represents one or more parallel iterable args.
+        """
+
+        # we entirely rely on the general call implementation, we override just to remove **kwargs
+        # from the function signature
+        return super().__call__(*args, max_evals=max_evals, silent=silent)
+
+    @staticmethod
+    def supports_model_with_masker(model, masker):
+        """ Determines if this explainer can handle the given model.
+
+        This is an abstract static method meant to be implemented by each subclass.
+        """
+        if safe_isinstance(model, "interpret.glassbox.ExplainableBoostingClassifier"):
+            if model.interactions != 0:
+                raise NotImplementedError("Need to add support for interaction effects!")
+            return True
+
+        return False
+
+    def explain_row(self, *row_args, max_evals, main_effects, error_bounds, batch_size, outputs, silent):
+        """ Explains a single row and returns the tuple (row_values, row_expected_values, row_mask_shapes).
+        """
+
+        x = row_args[0]
+        inputs = np.zeros((len(x), len(x)))
+        for i in range(len(x)):
+            inputs[i,i] = x[i]
+
+        phi = self.model(inputs) - self._zero_offset - self._input_offsets
+
+        return {
+            "values": phi,
+            "expected_values": self._expected_value,
+            "mask_shapes": [a.shape for a in row_args],
+            "main_effects": phi,
+            "clustering": getattr(self.masker, "clustering", None)
+        }
+
+# class AdditiveExplainer(Explainer):
+#     """ Computes SHAP values for generalized additive models.
+
+#     This assumes that the model only has first order effects. Extending this to
+#     2nd and third order effects is future work (if you apply this to those models right now
+#     you will get incorrect answers that fail additivity).
+
+#     Parameters
+#     ----------
+#     model : function or ExplainableBoostingRegressor
+#         User supplied additive model either as either a function or a model object.
+
+#     data : numpy.array, pandas.DataFrame
+#         The background dataset to use for computing conditional expectations.
+#     feature_perturbation : "interventional"
+#         Only the standard interventional SHAP values are supported by AdditiveExplainer right now.
+#     """
+
+#     def __init__(self, model, data, feature_perturbation="interventional"):
+#         if feature_perturbation != "interventional":
+#             raise Exception("Unsupported type of feature_perturbation provided: " + feature_perturbation)
+
+#         if safe_isinstance(model, "interpret.glassbox.ebm.ebm.ExplainableBoostingRegressor"):
+#             self.f = model.predict
+#         elif callable(model):
+#             self.f = model
+#         else:
+#             raise ValueError("The passed model must be a recognized object or a function!")
+
+#         # convert dataframes
+#         if isinstance(data, (pd.Series, pd.DataFrame)):
+#             data = data.values
+#         self.data = data
+
+#         # compute the expected value of the model output
+#         self.expected_value = self.f(data).mean()
+
+#         # pre-compute per-feature offsets
+#         tmp = np.zeros(data.shape)
+#         self._zero_offset = self.f(tmp).mean()
+#         self._feature_offset = np.zeros(data.shape[1])
+#         for i in range(data.shape[1]):
+#             tmp[:,i] = data[:,i]
+#             self._feature_offset[i] = self.f(tmp).mean() - self._zero_offset
+#             tmp[:,i] = 0
+
+
+#     def shap_values(self, X):
+#         """ Estimate the SHAP values for a set of samples.
+
+#         Parameters
+#         ----------
+#         X : numpy.array, pandas.DataFrame or scipy.csr_matrix
+#             A matrix of samples (# samples x # features) on which to explain the model's output.
+
+#         Returns
+#         -------
+#         For models with a single output this returns a matrix of SHAP values
+#         (# samples x # features). Each row sums to the difference between the model output for that
+#         sample and the expected value of the model output (which is stored as expected_value
+#         attribute of the explainer).
+#         """
+
+#         # convert dataframes
+#         if isinstance(X, (pd.Series, pd.DataFrame)):
+#             X = X.values
+
+#         # assert isinstance(X, np.ndarray), "Unknown instance type: " + str(type(X))
+#         assert len(X.shape) == 1 or len(X.shape) == 2, "Instance must have 1 or 2 dimensions!"
+
+#         # convert dataframes
+#         if isinstance(X, (pd.Series, pd.DataFrame)):
+#             X = X.values
+
+#         phi = np.zeros(X.shape)
+#         tmp = np.zeros(X.shape)
+#         for i in range(X.shape[1]):
+#             tmp[:,i] = X[:,i]
+#             phi[:,i] = self.f(tmp) - self._zero_offset - self._feature_offset[i]
+#             tmp[:,i] = 0
+
+#         return phi
diff --git a/lib/shap/explainers/_deep/__init__.py b/lib/shap/explainers/_deep/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..ef84549b87e36d13dc65b68282de31b3661163fa
--- /dev/null
+++ b/lib/shap/explainers/_deep/__init__.py
@@ -0,0 +1,125 @@
+from .._explainer import Explainer
+from .deep_pytorch import PyTorchDeep
+from .deep_tf import TFDeep
+
+
+class DeepExplainer(Explainer):
+    """ Meant to approximate SHAP values for deep learning models.
+
+    This is an enhanced version of the DeepLIFT algorithm (Deep SHAP) where, similar to Kernel SHAP, we
+    approximate the conditional expectations of SHAP values using a selection of background samples.
+    Lundberg and Lee, NIPS 2017 showed that the per node attribution rules in DeepLIFT (Shrikumar,
+    Greenside, and Kundaje, arXiv 2017) can be chosen to approximate Shapley values. By integrating
+    over many background samples Deep estimates approximate SHAP values such that they sum
+    up to the difference between the expected model output on the passed background samples and the
+    current model output (f(x) - E[f(x)]).
+
+    Examples
+    --------
+    See :ref:`Deep Explainer Examples <deep_explainer_examples>`
+    """
+
+    def __init__(self, model, data, session=None, learning_phase_flags=None):
+        """ An explainer object for a differentiable model using a given background dataset.
+
+        Note that the complexity of the method scales linearly with the number of background data
+        samples. Passing the entire training dataset as `data` will give very accurate expected
+        values, but be unreasonably expensive. The variance of the expectation estimates scale by
+        roughly 1/sqrt(N) for N background data samples. So 100 samples will give a good estimate,
+        and 1000 samples a very good estimate of the expected values.
+
+        Parameters
+        ----------
+        model : if framework == 'tensorflow', (input : [tf.Tensor], output : tf.Tensor)
+             A pair of TensorFlow tensors (or a list and a tensor) that specifies the input and
+            output of the model to be explained. Note that SHAP values are specific to a single
+            output value, so the output tf.Tensor should be a single dimensional output (,1).
+
+            if framework == 'pytorch', an nn.Module object (model), or a tuple (model, layer),
+                where both are nn.Module objects
+            The model is an nn.Module object which takes as input a tensor (or list of tensors) of
+            shape data, and returns a single dimensional output.
+            If the input is a tuple, the returned shap values will be for the input of the
+            layer argument. layer must be a layer in the model, i.e. model.conv2
+
+        data :
+            if framework == 'tensorflow': [numpy.array] or [pandas.DataFrame]
+            if framework == 'pytorch': [torch.tensor]
+            The background dataset to use for integrating out features. Deep integrates
+            over these samples. The data passed here must match the input tensors given in the
+            first argument. Note that since these samples are integrated over for each sample you
+            should only something like 100 or 1000 random background samples, not the whole training
+            dataset.
+
+        if framework == 'tensorflow':
+
+        session : None or tensorflow.Session
+            The TensorFlow session that has the model we are explaining. If None is passed then
+            we do our best to find the right session, first looking for a keras session, then
+            falling back to the default TensorFlow session.
+
+        learning_phase_flags : None or list of tensors
+            If you have your own custom learning phase flags pass them here. When explaining a prediction
+            we need to ensure we are not in training mode, since this changes the behavior of ops like
+            batch norm or dropout. If None is passed then we look for tensors in the graph that look like
+            learning phase flags (this works for Keras models). Note that we assume all the flags should
+            have a value of False during predictions (and hence explanations).
+        """
+        # first, we need to find the framework
+        if type(model) is tuple:
+            a, b = model
+            try:
+                a.named_parameters()
+                framework = 'pytorch'
+            except Exception:
+                framework = 'tensorflow'
+        else:
+            try:
+                model.named_parameters()
+                framework = 'pytorch'
+            except Exception:
+                framework = 'tensorflow'
+
+        if framework == 'tensorflow':
+            self.explainer = TFDeep(model, data, session, learning_phase_flags)
+        elif framework == 'pytorch':
+            self.explainer = PyTorchDeep(model, data)
+
+        self.expected_value = self.explainer.expected_value
+        self.explainer.framework = framework
+
+    def shap_values(self, X, ranked_outputs=None, output_rank_order='max', check_additivity=True):
+        """ Return approximate SHAP values for the model applied to the data given by X.
+
+        Parameters
+        ----------
+        X : list,
+            if framework == 'tensorflow': numpy.array, or pandas.DataFrame
+            if framework == 'pytorch': torch.tensor
+            A tensor (or list of tensors) of samples (where X.shape[0] == # samples) on which to
+            explain the model's output.
+
+        ranked_outputs : None or int
+            If ranked_outputs is None then we explain all the outputs in a multi-output model. If
+            ranked_outputs is a positive integer then we only explain that many of the top model
+            outputs (where "top" is determined by output_rank_order). Note that this causes a pair
+            of values to be returned (shap_values, indexes), where shap_values is a list of numpy
+            arrays for each of the output ranks, and indexes is a matrix that indicates for each sample
+            which output indexes were choses as "top".
+
+        output_rank_order : "max", "min", or "max_abs"
+            How to order the model outputs when using ranked_outputs, either by maximum, minimum, or
+            maximum absolute value.
+
+        Returns
+        -------
+        array or list
+            For a models with a single output this returns a tensor of SHAP values with the same shape
+            as X. For a model with multiple outputs this returns a list of SHAP value tensors, each of
+            which are the same shape as X. If ranked_outputs is None then this list of tensors matches
+            the number of model outputs. If ranked_outputs is a positive integer a pair is returned
+            (shap_values, indexes), where shap_values is a list of tensors with a length of
+            ranked_outputs, and indexes is a matrix that indicates for each sample which output indexes
+            were chosen as "top".
+        """
+        return self.explainer.shap_values(X, ranked_outputs, output_rank_order, check_additivity=check_additivity)
diff --git a/lib/shap/explainers/_deep/deep_pytorch.py b/lib/shap/explainers/_deep/deep_pytorch.py
new file mode 100644
index 0000000000000000000000000000000000000000..670e4ddb64ffde2e88cdefa7c391b1c63673f146
--- /dev/null
+++ b/lib/shap/explainers/_deep/deep_pytorch.py
@@ -0,0 +1,386 @@
+import warnings
+
+import numpy as np
+from packaging import version
+
+from .._explainer import Explainer
+from .deep_utils import _check_additivity
+
+torch = None
+
+
+class PyTorchDeep(Explainer):
+
+    def __init__(self, model, data):
+        # try and import pytorch
+        global torch
+        if torch is None:
+            import torch
+            if version.parse(torch.__version__) < version.parse("0.4"):
+                warnings.warn("Your PyTorch version is older than 0.4 and not supported.")
+
+        # check if we have multiple inputs
+        self.multi_input = False
+        if isinstance(data, list):
+            self.multi_input = True
+        if not isinstance(data, list):
+            data = [data]
+        self.data = data
+        self.layer = None
+        self.input_handle = None
+        self.interim = False
+        self.interim_inputs_shape = None
+        self.expected_value = None  # to keep the DeepExplainer base happy
+        if type(model) == tuple:
+            self.interim = True
+            model, layer = model
+            model = model.eval()
+            self.layer = layer
+            self.add_target_handle(self.layer)
+
+            # if we are taking an interim layer, the 'data' is going to be the input
+            # of the interim layer; we will capture this using a forward hook
+            with torch.no_grad():
+                _ = model(*data)
+                interim_inputs = self.layer.target_input
+                if type(interim_inputs) is tuple:
+                    # this should always be true, but just to be safe
+                    self.interim_inputs_shape = [i.shape for i in interim_inputs]
+                else:
+                    self.interim_inputs_shape = [interim_inputs.shape]
+            self.target_handle.remove()
+            del self.layer.target_input
+        self.model = model.eval()
+
+        self.multi_output = False
+        self.num_outputs = 1
+        with torch.no_grad():
+            outputs = model(*data)
+
+            # also get the device everything is running on
+            self.device = outputs.device
+            if outputs.shape[1] > 1:
+                self.multi_output = True
+                self.num_outputs = outputs.shape[1]
+            self.expected_value = outputs.mean(0).cpu().numpy()
+
+    def add_target_handle(self, layer):
+        input_handle = layer.register_forward_hook(get_target_input)
+        self.target_handle = input_handle
+
+    def add_handles(self, model, forward_handle, backward_handle):
+        """
+        Add handles to all non-container layers in the model.
+        Recursively for non-container layers
+        """
+        handles_list = []
+        model_children = list(model.children())
+        if model_children:
+            for child in model_children:
+                handles_list.extend(self.add_handles(child, forward_handle, backward_handle))
+        else:  # leaves
+            handles_list.append(model.register_forward_hook(forward_handle))
+            handles_list.append(model.register_full_backward_hook(backward_handle))
+        return handles_list
+
+    def remove_attributes(self, model):
+        """
+        Removes the x and y attributes which were added by the forward handles
+        Recursively searches for non-container layers
+        """
+        for child in model.children():
+            if 'nn.modules.container' in str(type(child)):
+                self.remove_attributes(child)
+            else:
+                try:
+                    del child.x
+                except AttributeError:
+                    pass
+                try:
+                    del child.y
+                except AttributeError:
+                    pass
+
+    def gradient(self, idx, inputs):
+        self.model.zero_grad()
+        X = [x.requires_grad_() for x in inputs]
+        outputs = self.model(*X)
+        selected = [val for val in outputs[:, idx]]
+        grads = []
+        if self.interim:
+            interim_inputs = self.layer.target_input
+            for idx, input in enumerate(interim_inputs):
+                grad = torch.autograd.grad(selected, input,
+                                           retain_graph=True if idx + 1 < len(interim_inputs) else None,
+                                           allow_unused=True)[0]
+                if grad is not None:
+                    grad = grad.cpu().numpy()
+                else:
+                    grad = torch.zeros_like(X[idx]).cpu().numpy()
+                grads.append(grad)
+            del self.layer.target_input
+            return grads, [i.detach().cpu().numpy() for i in interim_inputs]
+        else:
+            for idx, x in enumerate(X):
+                grad = torch.autograd.grad(selected, x,
+                                           retain_graph=True if idx + 1 < len(X) else None,
+                                           allow_unused=True)[0]
+                if grad is not None:
+                    grad = grad.cpu().numpy()
+                else:
+                    grad = torch.zeros_like(X[idx]).cpu().numpy()
+                grads.append(grad)
+            return grads
+
+    def shap_values(self, X, ranked_outputs=None, output_rank_order="max", check_additivity=True):
+        # X ~ self.model_input
+        # X_data ~ self.data
+
+        # check if we have multiple inputs
+        if not self.multi_input:
+            assert not isinstance(X, list), "Expected a single tensor model input!"
+            X = [X]
+        else:
+            assert isinstance(X, list), "Expected a list of model inputs!"
+
+        X = [x.detach().to(self.device) for x in X]
+
+        model_output_values = None
+
+        if ranked_outputs is not None and self.multi_output:
+            with torch.no_grad():
+                model_output_values = self.model(*X)
+            # rank and determine the model outputs that we will explain
+            if output_rank_order == "max":
+                _, model_output_ranks = torch.sort(model_output_values, descending=True)
+            elif output_rank_order == "min":
+                _, model_output_ranks = torch.sort(model_output_values, descending=False)
+            elif output_rank_order == "max_abs":
+                _, model_output_ranks = torch.sort(torch.abs(model_output_values), descending=True)
+            else:
+                emsg = "output_rank_order must be max, min, or max_abs!"
+                raise ValueError(emsg)
+            model_output_ranks = model_output_ranks[:, :ranked_outputs]
+        else:
+            model_output_ranks = (torch.ones((X[0].shape[0], self.num_outputs)).int() *
+                                  torch.arange(0, self.num_outputs).int())
+
+        # add the gradient handles
+        handles = self.add_handles(self.model, add_interim_values, deeplift_grad)
+        if self.interim:
+            self.add_target_handle(self.layer)
+
+        # compute the attributions
+        output_phis = []
+        for i in range(model_output_ranks.shape[1]):
+            phis = []
+            if self.interim:
+                for k in range(len(self.interim_inputs_shape)):
+                    phis.append(np.zeros((X[0].shape[0], ) + self.interim_inputs_shape[k][1: ]))
+            else:
+                for k in range(len(X)):
+                    phis.append(np.zeros(X[k].shape))
+            for j in range(X[0].shape[0]):
+                # tile the inputs to line up with the background data samples
+                tiled_X = [X[t][j:j + 1].repeat(
+                                   (self.data[t].shape[0],) + tuple([1 for k in range(len(X[t].shape) - 1)])) for t
+                           in range(len(X))]
+                joint_x = [torch.cat((tiled_X[t], self.data[t]), dim=0) for t in range(len(X))]
+                # run attribution computation graph
+                feature_ind = model_output_ranks[j, i]
+                sample_phis = self.gradient(feature_ind, joint_x)
+                # assign the attributions to the right part of the output arrays
+                if self.interim:
+                    sample_phis, output = sample_phis
+                    x, data = [], []
+                    for k in range(len(output)):
+                        x_temp, data_temp = np.split(output[k], 2)
+                        x.append(x_temp)
+                        data.append(data_temp)
+                    for t in range(len(self.interim_inputs_shape)):
+                        phis[t][j] = (sample_phis[t][self.data[t].shape[0]:] * (x[t] - data[t])).mean(0)
+                else:
+                    for t in range(len(X)):
+                        phis[t][j] = (torch.from_numpy(sample_phis[t][self.data[t].shape[0]:]).to(self.device) * (X[t][j: j + 1] - self.data[t])).cpu().detach().numpy().mean(0)
+            output_phis.append(phis[0] if not self.multi_input else phis)
+        # cleanup; remove all gradient handles
+        for handle in handles:
+            handle.remove()
+        self.remove_attributes(self.model)
+        if self.interim:
+            self.target_handle.remove()
+
+        # check that the SHAP values sum up to the model output
+        if check_additivity:
+            if model_output_values is None:
+                with torch.no_grad():
+                    model_output_values = self.model(*X)
+
+            _check_additivity(self, model_output_values.cpu(), output_phis)
+
+        if not self.multi_output:
+            return output_phis[0]
+        elif ranked_outputs is not None:
+            return output_phis, model_output_ranks
+        else:
+            return output_phis
+
+# Module hooks
+
+
+def deeplift_grad(module, grad_input, grad_output):
+    """The backward hook which computes the deeplift
+    gradient for an nn.Module
+    """
+    # first, get the module type
+    module_type = module.__class__.__name__
+    # first, check the module is supported
+    if module_type in op_handler:
+        if op_handler[module_type].__name__ not in ['passthrough', 'linear_1d']:
+            return op_handler[module_type](module, grad_input, grad_output)
+    else:
+        warnings.warn(f'unrecognized nn.Module: {module_type}')
+        return grad_input
+
+
+def add_interim_values(module, input, output):
+    """The forward hook used to save interim tensors, detached
+    from the graph. Used to calculate the multipliers
+    """
+    try:
+        del module.x
+    except AttributeError:
+        pass
+    try:
+        del module.y
+    except AttributeError:
+        pass
+    module_type = module.__class__.__name__
+    if module_type in op_handler:
+        func_name = op_handler[module_type].__name__
+        # First, check for cases where we don't need to save the x and y tensors
+        if func_name == 'passthrough':
+            pass
+        else:
+            # check only the 0th input varies
+            for i in range(len(input)):
+                if i != 0 and type(output) is tuple:
+                    assert input[i] == output[i], "Only the 0th input may vary!"
+            # if a new method is added, it must be added here too. This ensures tensors
+            # are only saved if necessary
+            if func_name in ['maxpool', 'nonlinear_1d']:
+                # only save tensors if necessary
+                if type(input) is tuple:
+                    setattr(module, 'x', torch.nn.Parameter(input[0].detach()))
+                else:
+                    setattr(module, 'x', torch.nn.Parameter(input.detach()))
+                if type(output) is tuple:
+                    setattr(module, 'y', torch.nn.Parameter(output[0].detach()))
+                else:
+                    setattr(module, 'y', torch.nn.Parameter(output.detach()))
+
+
+def get_target_input(module, input, output):
+    """A forward hook which saves the tensor - attached to its graph.
+    Used if we want to explain the interim outputs of a model
+    """
+    try:
+        del module.target_input
+    except AttributeError:
+        pass
+    setattr(module, 'target_input', input)
+
+
+def passthrough(module, grad_input, grad_output):
+    """No change made to gradients"""
+    return None
+
+
+def maxpool(module, grad_input, grad_output):
+    pool_to_unpool = {
+        'MaxPool1d': torch.nn.functional.max_unpool1d,
+        'MaxPool2d': torch.nn.functional.max_unpool2d,
+        'MaxPool3d': torch.nn.functional.max_unpool3d
+    }
+    pool_to_function = {
+        'MaxPool1d': torch.nn.functional.max_pool1d,
+        'MaxPool2d': torch.nn.functional.max_pool2d,
+        'MaxPool3d': torch.nn.functional.max_pool3d
+    }
+    delta_in = module.x[: int(module.x.shape[0] / 2)] - module.x[int(module.x.shape[0] / 2):]
+    dup0 = [2] + [1 for i in delta_in.shape[1:]]
+    # we also need to check if the output is a tuple
+    y, ref_output = torch.chunk(module.y, 2)
+    cross_max = torch.max(y, ref_output)
+    diffs = torch.cat([cross_max - ref_output, y - cross_max], 0)
+
+    # all of this just to unpool the outputs
+    with torch.no_grad():
+        _, indices = pool_to_function[module.__class__.__name__](
+            module.x, module.kernel_size, module.stride, module.padding,
+            module.dilation, module.ceil_mode, True)
+        xmax_pos, rmax_pos = torch.chunk(pool_to_unpool[module.__class__.__name__](
+            grad_output[0] * diffs, indices, module.kernel_size, module.stride,
+            module.padding, list(module.x.shape)), 2)
+
+    grad_input = [None for _ in grad_input]
+    grad_input[0] = torch.where(torch.abs(delta_in) < 1e-7, torch.zeros_like(delta_in),
+                           (xmax_pos + rmax_pos) / delta_in).repeat(dup0)
+
+    return tuple(grad_input)
+
+
+def linear_1d(module, grad_input, grad_output):
+    """No change made to gradients."""
+    return None
+
+
+def nonlinear_1d(module, grad_input, grad_output):
+    delta_out = module.y[: int(module.y.shape[0] / 2)] - module.y[int(module.y.shape[0] / 2):]
+
+    delta_in = module.x[: int(module.x.shape[0] / 2)] - module.x[int(module.x.shape[0] / 2):]
+    dup0 = [2] + [1 for i in delta_in.shape[1:]]
+    # handles numerical instabilities where delta_in is very small by
+    # just taking the gradient in those cases
+    grads = [None for _ in grad_input]
+    grads[0] = torch.where(torch.abs(delta_in.repeat(dup0)) < 1e-6, grad_input[0],
+                           grad_output[0] * (delta_out / delta_in).repeat(dup0))
+    return tuple(grads)
+
+
+op_handler = {}
+
+# passthrough ops, where we make no change to the gradient
+op_handler['Dropout3d'] = passthrough
+op_handler['Dropout2d'] = passthrough
+op_handler['Dropout'] = passthrough
+op_handler['AlphaDropout'] = passthrough
+
+op_handler['Conv1d'] = linear_1d
+op_handler['Conv2d'] = linear_1d
+op_handler['Conv3d'] = linear_1d
+op_handler['ConvTranspose1d'] = linear_1d
+op_handler['ConvTranspose2d'] = linear_1d
+op_handler['ConvTranspose3d'] = linear_1d
+op_handler['Linear'] = linear_1d
+op_handler['AvgPool1d'] = linear_1d
+op_handler['AvgPool2d'] = linear_1d
+op_handler['AvgPool3d'] = linear_1d
+op_handler['AdaptiveAvgPool1d'] = linear_1d
+op_handler['AdaptiveAvgPool2d'] = linear_1d
+op_handler['AdaptiveAvgPool3d'] = linear_1d
+op_handler['BatchNorm1d'] = linear_1d
+op_handler['BatchNorm2d'] = linear_1d
+op_handler['BatchNorm3d'] = linear_1d
+
+op_handler['LeakyReLU'] = nonlinear_1d
+op_handler['ReLU'] = nonlinear_1d
+op_handler['ELU'] = nonlinear_1d
+op_handler['Sigmoid'] = nonlinear_1d
+op_handler["Tanh"] = nonlinear_1d
+op_handler["Softplus"] = nonlinear_1d
+op_handler['Softmax'] = nonlinear_1d
+
+op_handler['MaxPool1d'] = maxpool
+op_handler['MaxPool2d'] = maxpool
+op_handler['MaxPool3d'] = maxpool
diff --git a/lib/shap/explainers/_deep/deep_tf.py b/lib/shap/explainers/_deep/deep_tf.py
new file mode 100644
index 0000000000000000000000000000000000000000..55c3d7db282e078d8a6d4d57d4f0d1aeccca992d
--- /dev/null
+++ b/lib/shap/explainers/_deep/deep_tf.py
@@ -0,0 +1,763 @@
+import warnings
+
+import numpy as np
+from packaging import version
+
+from ...utils._exceptions import DimensionError
+from .._explainer import Explainer
+from ..tf_utils import _get_graph, _get_model_inputs, _get_model_output, _get_session
+from .deep_utils import _check_additivity
+
+tf = None
+tf_ops = None
+tf_backprop = None
+tf_execute = None
+tf_gradients_impl = None
+
+def custom_record_gradient(op_name, inputs, attrs, results):
+    """ This overrides tensorflow.python.eager.backprop._record_gradient.
+
+    We need to override _record_gradient in order to get gradient backprop to
+    get called for ResourceGather operations. In order to make this work we
+    temporarily "lie" about the input type to prevent the node from getting
+    pruned from the gradient backprop process. We then reset the type directly
+    afterwards back to what it was (an integer type).
+    """
+    reset_input = False
+    if op_name == "ResourceGather" and inputs[1].dtype == tf.int32:
+        inputs[1].__dict__["_dtype"] = tf.float32
+        reset_input = True
+    try:
+        out = tf_backprop._record_gradient("shap_"+op_name, inputs, attrs, results)
+    except AttributeError:
+        out = tf_backprop.record_gradient("shap_"+op_name, inputs, attrs, results)
+
+    if reset_input:
+        inputs[1].__dict__["_dtype"] = tf.int32
+
+    return out
+
+class TFDeep(Explainer):
+    """
+    Using tf.gradients to implement the backpropagation was
+    inspired by the gradient-based implementation approach proposed by Ancona et al, ICLR 2018. Note
+    that this package does not currently use the reveal-cancel rule for ReLu units proposed in DeepLIFT.
+    """
+
+    def __init__(self, model, data, session=None, learning_phase_flags=None):
+        """ An explainer object for a deep model using a given background dataset.
+
+        Note that the complexity of the method scales linearly with the number of background data
+        samples. Passing the entire training dataset as `data` will give very accurate expected
+        values, but will be computationally expensive. The variance of the expectation estimates scales by
+        roughly 1/sqrt(N) for N background data samples. So 100 samples will give a good estimate,
+        and 1000 samples a very good estimate of the expected values.
+
+        Parameters
+        ----------
+        model : tf.keras.Model or (input : [tf.Operation], output : tf.Operation)
+            A keras model object or a pair of TensorFlow operations (or a list and an op) that
+            specifies the input and output of the model to be explained. Note that SHAP values
+            are specific to a single output value, so you get an explanation for each element of
+            the output tensor (which must be a flat rank one vector).
+
+        data : [numpy.array] or [pandas.DataFrame] or function
+            The background dataset to use for integrating out features. DeepExplainer integrates
+            over all these samples for each explanation. The data passed here must match the input
+            operations given to the model. If a function is supplied, it must be a function that
+            takes a particular input example and generates the background dataset for that example
+        session : None or tensorflow.Session
+            The TensorFlow session that has the model we are explaining. If None is passed then
+            we do our best to find the right session, first looking for a keras session, then
+            falling back to the default TensorFlow session.
+
+        learning_phase_flags : None or list of tensors
+            If you have your own custom learning phase flags pass them here. When explaining a prediction
+            we need to ensure we are not in training mode, since this changes the behavior of ops like
+            batch norm or dropout. If None is passed then we look for tensors in the graph that look like
+            learning phase flags (this works for Keras models). Note that we assume all the flags should
+            have a value of False during predictions (and hence explanations).
+
+        """
+        # try to import tensorflow
+        global tf, tf_ops, tf_backprop, tf_execute, tf_gradients_impl
+        if tf is None:
+            from tensorflow.python.eager import backprop as tf_backprop
+            from tensorflow.python.eager import execute as tf_execute
+            from tensorflow.python.framework import (
+                ops as tf_ops,
+            )
+            from tensorflow.python.ops import (
+                gradients_impl as tf_gradients_impl,
+            )
+            if not hasattr(tf_gradients_impl, "_IsBackpropagatable"):
+                from tensorflow.python.ops import gradients_util as tf_gradients_impl
+            import tensorflow as tf
+            if version.parse(tf.__version__) < version.parse("1.4.0"):
+                warnings.warn("Your TensorFlow version is older than 1.4.0 and not supported.")
+
+        if version.parse(tf.__version__) >= version.parse("2.4.0"):
+            warnings.warn("Your TensorFlow version is newer than 2.4.0 and so graph support has been removed in eager mode and some static graphs may not be supported. See PR #1483 for discussion.")
+
+        # determine the model inputs and outputs
+        self.model_inputs = _get_model_inputs(model)
+        self.model_output = _get_model_output(model)
+        assert not isinstance(self.model_output, list), "The model output to be explained must be a single tensor!"
+        assert len(self.model_output.shape) < 3, "The model output must be a vector or a single value!"
+        self.multi_output = True
+        if len(self.model_output.shape) == 1:
+            self.multi_output = False
+
+        if tf.executing_eagerly():
+            if isinstance(model, tuple) or isinstance(model, list):
+                assert len(model) == 2, "When a tuple is passed it must be of the form (inputs, outputs)"
+                from tensorflow.keras import Model
+                self.model = Model(model[0], model[1])
+            else:
+                self.model = model
+
+        # check if we have multiple inputs
+        self.multi_input = True
+        if not isinstance(self.model_inputs, list) or len(self.model_inputs) == 1:
+            self.multi_input = False
+            if not isinstance(self.model_inputs, list):
+                self.model_inputs = [self.model_inputs]
+        if not isinstance(data, list) and (hasattr(data, "__call__") is False):
+            data = [data]
+        self.data = data
+
+        self._vinputs = {} # used to track what op inputs depends on the model inputs
+        self.orig_grads = {}
+
+        if not tf.executing_eagerly():
+            self.session = _get_session(session)
+
+        self.graph = _get_graph(self)
+
+        # if no learning phase flags were given we go looking for them
+        # ...this will catch the one that keras uses
+        # we need to find them since we want to make sure learning phase flags are set to False
+        if learning_phase_flags is None:
+            self.learning_phase_ops = []
+            for op in self.graph.get_operations():
+                if 'learning_phase' in op.name and op.type == "Const" and len(op.outputs[0].shape) == 0:
+                    if op.outputs[0].dtype == tf.bool:
+                        self.learning_phase_ops.append(op)
+            self.learning_phase_flags = [op.outputs[0] for op in self.learning_phase_ops]
+        else:
+            self.learning_phase_ops = [t.op for t in learning_phase_flags]
+
+        # save the expected output of the model
+        # if self.data is a function, set self.expected_value to None
+        if (hasattr(self.data, '__call__')):
+            self.expected_value = None
+        else:
+            if self.data[0].shape[0] > 5000:
+                warnings.warn("You have provided over 5k background samples! For better performance consider using smaller random sample.")
+            if not tf.executing_eagerly():
+                self.expected_value = self.run(self.model_output, self.model_inputs, self.data).mean(0)
+            else:
+                #if type(self.model)is tuple:
+                #    self.fModel(cnn.inputs, cnn.get_layer(theNameYouWant).outputs)
+                self.expected_value = tf.reduce_mean(self.model(self.data), 0)
+
+        if not tf.executing_eagerly():
+            self._init_between_tensors(self.model_output.op, self.model_inputs)
+
+        # make a blank array that will get lazily filled in with the SHAP value computation
+        # graphs for each output. Lazy is important since if there are 1000 outputs and we
+        # only explain the top 5 it would be a waste to build graphs for the other 995
+        if not self.multi_output:
+            self.phi_symbolics = [None]
+        else:
+            noutputs = self.model_output.shape.as_list()[1]
+            if noutputs is not None:
+                self.phi_symbolics = [None for i in range(noutputs)]
+            else:
+                raise DimensionError("The model output tensor to be explained cannot have a static shape in dim 1 of None!")
+
+    def _get_model_output(self, model):
+        if len(model.layers[-1]._inbound_nodes) == 0:
+            if len(model.outputs) > 1:
+                warnings.warn("Only one model output supported.")
+            return model.outputs[0]
+        else:
+            return model.layers[-1].output
+
+    def _init_between_tensors(self, out_op, model_inputs):
+        # find all the operations in the graph between our inputs and outputs
+        tensor_blacklist = tensors_blocked_by_false(self.learning_phase_ops) # don't follow learning phase branches
+        dependence_breakers = [k for k in op_handlers if op_handlers[k] == break_dependence]
+        back_ops = backward_walk_ops(
+            [out_op], tensor_blacklist,
+            dependence_breakers
+        )
+        start_ops = []
+        for minput in model_inputs:
+            for op in minput.consumers():
+                start_ops.append(op)
+        self.between_ops = forward_walk_ops(
+            start_ops,
+            tensor_blacklist, dependence_breakers,
+            within_ops=back_ops
+        )
+
+        # note all the tensors that are on the path between the inputs and the output
+        self.between_tensors = {}
+        for op in self.between_ops:
+            for t in op.outputs:
+                self.between_tensors[t.name] = True
+        for t in model_inputs:
+            self.between_tensors[t.name] = True
+
+        # save what types are being used
+        self.used_types = {}
+        for op in self.between_ops:
+            self.used_types[op.type] = True
+
+    def _variable_inputs(self, op):
+        """ Return which inputs of this operation are variable (i.e. depend on the model inputs).
+        """
+        if op not in self._vinputs:
+            out = np.zeros(len(op.inputs), dtype=bool)
+            for i,t in enumerate(op.inputs):
+                out[i] = t.name in self.between_tensors
+            self._vinputs[op] = out
+        return self._vinputs[op]
+
+    def phi_symbolic(self, i):
+        """ Get the SHAP value computation graph for a given model output.
+        """
+        if self.phi_symbolics[i] is None:
+
+            if not tf.executing_eagerly():
+                def anon():
+                    out = self.model_output[:,i] if self.multi_output else self.model_output
+                    return tf.gradients(out, self.model_inputs)
+
+                self.phi_symbolics[i] = self.execute_with_overridden_gradients(anon)
+            else:
+                @tf.function
+                def grad_graph(shap_rAnD):
+                    phase = tf.keras.backend.learning_phase()
+                    tf.keras.backend.set_learning_phase(0)
+
+                    with tf.GradientTape(watch_accessed_variables=False) as tape:
+                        tape.watch(shap_rAnD)
+                        out = self.model(shap_rAnD)
+                        if self.multi_output:
+                            out = out[:,i]
+
+                    self._init_between_tensors(out.op, shap_rAnD)
+                    x_grad = tape.gradient(out, shap_rAnD)
+                    tf.keras.backend.set_learning_phase(phase)
+                    return x_grad
+
+                self.phi_symbolics[i] = grad_graph
+
+        return self.phi_symbolics[i]
+
+    def shap_values(self, X, ranked_outputs=None, output_rank_order="max", check_additivity=True):
+        # check if we have multiple inputs
+        if not self.multi_input:
+            if isinstance(X, list) and len(X) != 1:
+                raise ValueError("Expected a single tensor as model input!")
+            elif not isinstance(X, list):
+                X = [X]
+        else:
+            assert isinstance(X, list), "Expected a list of model inputs!"
+        assert len(self.model_inputs) == len(X), "Number of model inputs (%d) does not match the number given (%d)!" % (len(self.model_inputs), len(X))
+
+        # rank and determine the model outputs that we will explain
+        if ranked_outputs is not None and self.multi_output:
+            if not tf.executing_eagerly():
+                model_output_values = self.run(self.model_output, self.model_inputs, X)
+            else:
+                model_output_values = self.model(X)
+
+            if output_rank_order == "max":
+                model_output_ranks = np.argsort(-model_output_values)
+            elif output_rank_order == "min":
+                model_output_ranks = np.argsort(model_output_values)
+            elif output_rank_order == "max_abs":
+                model_output_ranks = np.argsort(np.abs(model_output_values))
+            else:
+                emsg = "output_rank_order must be max, min, or max_abs!"
+                raise ValueError(emsg)
+            model_output_ranks = model_output_ranks[:,:ranked_outputs]
+        else:
+            model_output_ranks = np.tile(np.arange(len(self.phi_symbolics)), (X[0].shape[0], 1))
+
+        # compute the attributions
+        output_phis = []
+        for i in range(model_output_ranks.shape[1]):
+            phis = []
+            for k in range(len(X)):
+                phis.append(np.zeros(X[k].shape))
+            for j in range(X[0].shape[0]):
+                if (hasattr(self.data, '__call__')):
+                    bg_data = self.data([X[t][j] for t in range(len(X))])
+                    if not isinstance(bg_data, list):
+                        bg_data = [bg_data]
+                else:
+                    bg_data = self.data
+
+                # tile the inputs to line up with the background data samples
+                tiled_X = [np.tile(X[t][j:j+1], (bg_data[t].shape[0],) + tuple([1 for k in range(len(X[t].shape)-1)])) for t in range(len(X))]
+
+                # we use the first sample for the current sample and the rest for the references
+                joint_input = [np.concatenate([tiled_X[t], bg_data[t]], 0) for t in range(len(X))]
+
+                # run attribution computation graph
+                feature_ind = model_output_ranks[j,i]
+                sample_phis = self.run(self.phi_symbolic(feature_ind), self.model_inputs, joint_input)
+
+                # assign the attributions to the right part of the output arrays
+                for t in range(len(X)):
+                    phis[t][j] = (sample_phis[t][bg_data[t].shape[0]:] * (X[t][j] - bg_data[t])).mean(0)
+
+            output_phis.append(phis[0] if not self.multi_input else phis)
+
+        # check that the SHAP values sum up to the model output
+        if check_additivity:
+            if not tf.executing_eagerly():
+                model_output = self.run(self.model_output, self.model_inputs, X)
+            else:
+                model_output = self.model(X)
+
+            _check_additivity(self, model_output, output_phis)
+
+        if not self.multi_output:
+            return output_phis[0]
+        elif ranked_outputs is not None:
+            return output_phis, model_output_ranks
+        else:
+            return output_phis
+
+    def run(self, out, model_inputs, X):
+        """ Runs the model while also setting the learning phase flags to False.
+        """
+        if not tf.executing_eagerly():
+            feed_dict = dict(zip(model_inputs, X))
+            for t in self.learning_phase_flags:
+                feed_dict[t] = False
+            return self.session.run(out, feed_dict)
+        else:
+            def anon():
+                tf_execute.record_gradient = custom_record_gradient
+
+                # build inputs that are correctly shaped, typed, and tf-wrapped
+                inputs = []
+                for i in range(len(X)):
+                    shape = list(self.model_inputs[i].shape)
+                    shape[0] = -1
+                    data = X[i].reshape(shape)
+                    v = tf.constant(data, dtype=self.model_inputs[i].dtype)
+                    inputs.append(v)
+                final_out = out(inputs)
+                try:
+                    tf_execute.record_gradient = tf_backprop._record_gradient
+                except AttributeError:
+                    tf_execute.record_gradient = tf_backprop.record_gradient
+
+                return final_out
+            return self.execute_with_overridden_gradients(anon)
+
+    def custom_grad(self, op, *grads):
+        """ Passes a gradient op creation request to the correct handler.
+        """
+        type_name = op.type[5:] if op.type.startswith("shap_") else op.type
+        out = op_handlers[type_name](self, op, *grads) # we cut off the shap_ prefix before the lookup
+        return out
+
+    def execute_with_overridden_gradients(self, f):
+        # replace the gradients for all the non-linear activations
+        # we do this by hacking our way into the registry (TODO: find a public API for this if it exists)
+        reg = tf_ops._gradient_registry._registry
+        ops_not_in_registry = ['TensorListReserve']
+        # NOTE: location_tag taken from tensorflow source for None type ops
+        location_tag = ("UNKNOWN", "UNKNOWN", "UNKNOWN", "UNKNOWN", "UNKNOWN")
+        # TODO: unclear why some ops are not in the registry with TF 2.0 like TensorListReserve
+        for non_reg_ops in ops_not_in_registry:
+            reg[non_reg_ops] = {'type': None, 'location': location_tag}
+        for n in op_handlers:
+            if n in reg:
+                self.orig_grads[n] = reg[n]["type"]
+                reg["shap_"+n] = {
+                    "type": self.custom_grad,
+                    "location": reg[n]["location"]
+                }
+                reg[n]["type"] = self.custom_grad
+
+        # In TensorFlow 1.10 they started pruning out nodes that they think can't be backpropped
+        # unfortunately that includes the index of embedding layers so we disable that check here
+        if hasattr(tf_gradients_impl, "_IsBackpropagatable"):
+            orig_IsBackpropagatable = tf_gradients_impl._IsBackpropagatable
+            tf_gradients_impl._IsBackpropagatable = lambda tensor: True
+
+        # define the computation graph for the attribution values using a custom gradient-like computation
+        try:
+            out = f()
+        finally:
+            # reinstate the backpropagatable check
+            if hasattr(tf_gradients_impl, "_IsBackpropagatable"):
+                tf_gradients_impl._IsBackpropagatable = orig_IsBackpropagatable
+
+            # restore the original gradient definitions
+            for n in op_handlers:
+                if n in reg:
+                    del reg["shap_"+n]
+                    reg[n]["type"] = self.orig_grads[n]
+            for non_reg_ops in ops_not_in_registry:
+                del reg[non_reg_ops]
+        if not tf.executing_eagerly():
+            return out
+        else:
+            return [v.numpy() for v in out]
+
+def tensors_blocked_by_false(ops):
+    """ Follows a set of ops assuming their value is False and find blocked Switch paths.
+
+    This is used to prune away parts of the model graph that are only used during the training
+    phase (like dropout, batch norm, etc.).
+    """
+    blocked = []
+    def recurse(op):
+        if op.type == "Switch":
+            blocked.append(op.outputs[1]) # the true path is blocked since we assume the ops we trace are False
+        else:
+            for out in op.outputs:
+                for c in out.consumers():
+                    recurse(c)
+    for op in ops:
+        recurse(op)
+
+    return blocked
+
+def backward_walk_ops(start_ops, tensor_blacklist, op_type_blacklist):
+    found_ops = []
+    op_stack = [op for op in start_ops]
+    while len(op_stack) > 0:
+        op = op_stack.pop()
+        if op.type not in op_type_blacklist and op not in found_ops:
+            found_ops.append(op)
+            for input in op.inputs:
+                if input not in tensor_blacklist:
+                    op_stack.append(input.op)
+    return found_ops
+
+def forward_walk_ops(start_ops, tensor_blacklist, op_type_blacklist, within_ops):
+    found_ops = []
+    op_stack = [op for op in start_ops]
+    while len(op_stack) > 0:
+        op = op_stack.pop()
+        if op.type not in op_type_blacklist and op in within_ops and op not in found_ops:
+            found_ops.append(op)
+            for out in op.outputs:
+                if out not in tensor_blacklist:
+                    for c in out.consumers():
+                        op_stack.append(c)
+    return found_ops
+
+
+def softmax(explainer, op, *grads):
+    """ Just decompose softmax into its components and recurse, we can handle all of them :)
+
+    We assume the 'axis' is the last dimension because the TF codebase swaps the 'axis' to
+    the last dimension before the softmax op if 'axis' is not already the last dimension.
+    We also don't subtract the max before tf.exp for numerical stability since that might
+    mess up the attributions and it seems like TensorFlow doesn't define softmax that way
+    (according to the docs)
+    """
+    in0 = op.inputs[0]
+    in0_max = tf.reduce_max(in0, axis=-1, keepdims=True, name="in0_max")
+    in0_centered = in0 - in0_max
+    evals = tf.exp(in0_centered, name="custom_exp")
+    rsum = tf.reduce_sum(evals, axis=-1, keepdims=True)
+    div = evals / rsum
+
+    # mark these as in-between the inputs and outputs
+    for op in [evals.op, rsum.op, div.op, in0_centered.op]:
+        for t in op.outputs:
+            if t.name not in explainer.between_tensors:
+                explainer.between_tensors[t.name] = False
+
+    out = tf.gradients(div, in0_centered, grad_ys=grads[0])[0]
+
+    # remove the names we just added
+    for op in [evals.op, rsum.op, div.op, in0_centered.op]:
+        for t in op.outputs:
+            if explainer.between_tensors[t.name] is False:
+                del explainer.between_tensors[t.name]
+
+    # rescale to account for our shift by in0_max (which we did for numerical stability)
+    xin0,rin0 = tf.split(in0, 2)
+    xin0_centered,rin0_centered = tf.split(in0_centered, 2)
+    delta_in0 = xin0 - rin0
+    dup0 = [2] + [1 for i in delta_in0.shape[1:]]
+    return tf.where(
+        tf.tile(tf.abs(delta_in0), dup0) < 1e-6,
+        out,
+        out * tf.tile((xin0_centered - rin0_centered) / delta_in0, dup0)
+    )
+
+def maxpool(explainer, op, *grads):
+    xin0,rin0 = tf.split(op.inputs[0], 2)
+    xout,rout = tf.split(op.outputs[0], 2)
+    delta_in0 = xin0 - rin0
+    dup0 = [2] + [1 for i in delta_in0.shape[1:]]
+    cross_max = tf.maximum(xout, rout)
+    diffs = tf.concat([cross_max - rout, xout - cross_max], 0)
+    if op.type.startswith("shap_"):
+        op.type = op.type[5:]
+    xmax_pos,rmax_pos = tf.split(explainer.orig_grads[op.type](op, grads[0] * diffs), 2)
+    return tf.tile(tf.where(
+        tf.abs(delta_in0) < 1e-7,
+        tf.zeros_like(delta_in0),
+        (xmax_pos + rmax_pos) / delta_in0
+    ), dup0)
+
+def gather(explainer, op, *grads):
+    #params = op.inputs[0]
+    indices = op.inputs[1]
+    #axis = op.inputs[2]
+    var = explainer._variable_inputs(op)
+    if var[1] and not var[0]:
+        assert len(indices.shape) == 2, "Only scalar indices supported right now in GatherV2!"
+
+        xin1,rin1 = tf.split(tf.cast(op.inputs[1], tf.float32), 2)
+        xout,rout = tf.split(op.outputs[0], 2)
+        dup_in1 = [2] + [1 for i in xin1.shape[1:]]
+        dup_out = [2] + [1 for i in xout.shape[1:]]
+        delta_in1_t = tf.tile(xin1 - rin1, dup_in1)
+        out_sum = tf.reduce_sum(grads[0] * tf.tile(xout - rout, dup_out), list(range(len(indices.shape), len(grads[0].shape))))
+        if op.type == "ResourceGather":
+            return [None, tf.where(
+                tf.abs(delta_in1_t) < 1e-6,
+                tf.zeros_like(delta_in1_t),
+                out_sum / delta_in1_t
+            )]
+        return [None, tf.where(
+            tf.abs(delta_in1_t) < 1e-6,
+            tf.zeros_like(delta_in1_t),
+            out_sum / delta_in1_t
+        ), None]
+    elif var[0] and not var[1]:
+        if op.type.startswith("shap_"):
+            op.type = op.type[5:]
+        return [explainer.orig_grads[op.type](op, grads[0]), None] # linear in this case
+    else:
+        raise ValueError("Axis not yet supported to be varying for gather op!")
+
+
+def linearity_1d_nonlinearity_2d(input_ind0, input_ind1, op_func):
+    def handler(explainer, op, *grads):
+        var = explainer._variable_inputs(op)
+        if var[input_ind0] and not var[input_ind1]:
+            return linearity_1d_handler(input_ind0, explainer, op, *grads)
+        elif var[input_ind1] and not var[input_ind0]:
+            return linearity_1d_handler(input_ind1, explainer, op, *grads)
+        elif var[input_ind0] and var[input_ind1]:
+            return nonlinearity_2d_handler(input_ind0, input_ind1, op_func, explainer, op, *grads)
+        else:
+            return [None for _ in op.inputs] # no inputs vary, we must be hidden by a switch function
+    return handler
+
+def nonlinearity_1d_nonlinearity_2d(input_ind0, input_ind1, op_func):
+    def handler(explainer, op, *grads):
+        var = explainer._variable_inputs(op)
+        if var[input_ind0] and not var[input_ind1]:
+            return nonlinearity_1d_handler(input_ind0, explainer, op, *grads)
+        elif var[input_ind1] and not var[input_ind0]:
+            return nonlinearity_1d_handler(input_ind1, explainer, op, *grads)
+        elif var[input_ind0] and var[input_ind1]:
+            return nonlinearity_2d_handler(input_ind0, input_ind1, op_func, explainer, op, *grads)
+        else:
+            return [None for _ in op.inputs] # no inputs vary, we must be hidden by a switch function
+    return handler
+
+def nonlinearity_1d(input_ind):
+    def handler(explainer, op, *grads):
+        return nonlinearity_1d_handler(input_ind, explainer, op, *grads)
+    return handler
+
+def nonlinearity_1d_handler(input_ind, explainer, op, *grads):
+    # make sure only the given input varies
+    op_inputs = op.inputs
+    if op_inputs is None:
+        op_inputs = op.outputs[0].op.inputs
+
+    for i in range(len(op_inputs)):
+        if i != input_ind:
+            assert not explainer._variable_inputs(op)[i], str(i) + "th input to " + op.name + " cannot vary!"
+
+    xin0, rin0 = tf.split(op_inputs[input_ind], 2)
+    xout, rout = tf.split(op.outputs[input_ind], 2)
+    delta_in0 = xin0 - rin0
+    if delta_in0.shape is None:
+        dup0 = [2, 1]
+    else:
+        dup0 = [2] + [1 for i in delta_in0.shape[1:]]
+    out = [None for _ in op_inputs]
+    if op.type.startswith("shap_"):
+        op.type = op.type[5:]
+    orig_grad = explainer.orig_grads[op.type](op, grads[0])
+    out[input_ind] = tf.where(
+        tf.tile(tf.abs(delta_in0), dup0) < 1e-6,
+        orig_grad[input_ind] if len(op_inputs) > 1 else orig_grad,
+        grads[0] * tf.tile((xout - rout) / delta_in0, dup0)
+    )
+    return out
+
+def nonlinearity_2d_handler(input_ind0, input_ind1, op_func, explainer, op, *grads):
+    if not (input_ind0 == 0 and input_ind1 == 1):
+        emsg = "TODO: Can't yet handle double inputs that are not first!"
+        raise Exception(emsg)
+    xout,rout = tf.split(op.outputs[0], 2)
+    in0 = op.inputs[input_ind0]
+    in1 = op.inputs[input_ind1]
+    xin0,rin0 = tf.split(in0, 2)
+    xin1,rin1 = tf.split(in1, 2)
+    delta_in0 = xin0 - rin0
+    delta_in1 = xin1 - rin1
+    dup0 = [2] + [1 for i in delta_in0.shape[1:]]
+    out10 = op_func(xin0, rin1)
+    out01 = op_func(rin0, xin1)
+    out11,out00 = xout,rout
+    out0 = 0.5 * (out11 - out01 + out10 - out00)
+    out0 = grads[0] * tf.tile(out0 / delta_in0, dup0)
+    out1 = 0.5 * (out11 - out10 + out01 - out00)
+    out1 = grads[0] * tf.tile(out1 / delta_in1, dup0)
+
+    # Avoid divide by zero nans
+    out0 = tf.where(tf.abs(tf.tile(delta_in0, dup0)) < 1e-7, tf.zeros_like(out0), out0)
+    out1 = tf.where(tf.abs(tf.tile(delta_in1, dup0)) < 1e-7, tf.zeros_like(out1), out1)
+
+    # see if due to broadcasting our gradient shapes don't match our input shapes
+    if (np.any(np.array(out1.shape) != np.array(in1.shape))):
+        broadcast_index = np.where(np.array(out1.shape) != np.array(in1.shape))[0][0]
+        out1 = tf.reduce_sum(out1, axis=broadcast_index, keepdims=True)
+    elif (np.any(np.array(out0.shape) != np.array(in0.shape))):
+        broadcast_index = np.where(np.array(out0.shape) != np.array(in0.shape))[0][0]
+        out0 = tf.reduce_sum(out0, axis=broadcast_index, keepdims=True)
+
+    return [out0, out1]
+
+def linearity_1d(input_ind):
+    def handler(explainer, op, *grads):
+        return linearity_1d_handler(input_ind, explainer, op, *grads)
+    return handler
+
+def linearity_1d_handler(input_ind, explainer, op, *grads):
+    # make sure only the given input varies (negative means only that input cannot vary, and is measured from the end of the list)
+    for i in range(len(op.inputs)):
+        if i != input_ind:
+            assert not explainer._variable_inputs(op)[i], str(i) + "th input to " + op.name + " cannot vary!"
+    if op.type.startswith("shap_"):
+        op.type = op.type[5:]
+    return explainer.orig_grads[op.type](op, *grads)
+
+def linearity_with_excluded(input_inds):
+    def handler(explainer, op, *grads):
+        return linearity_with_excluded_handler(input_inds, explainer, op, *grads)
+    return handler
+
+def linearity_with_excluded_handler(input_inds, explainer, op, *grads):
+    # make sure the given inputs don't vary (negative is measured from the end of the list)
+    for i in range(len(op.inputs)):
+        if i in input_inds or i - len(op.inputs) in input_inds:
+            assert not explainer._variable_inputs(op)[i], str(i) + "th input to " + op.name + " cannot vary!"
+    if op.type.startswith("shap_"):
+        op.type = op.type[5:]
+    return explainer.orig_grads[op.type](op, *grads)
+
+def passthrough(explainer, op, *grads):
+    if op.type.startswith("shap_"):
+        op.type = op.type[5:]
+    return explainer.orig_grads[op.type](op, *grads)
+
+def break_dependence(explainer, op, *grads):
+    """ This function name is used to break attribution dependence in the graph traversal.
+
+    These operation types may be connected above input data values in the graph but their outputs
+    don't depend on the input values (for example they just depend on the shape).
+    """
+    return [None for _ in op.inputs]
+
+
+op_handlers = {}
+
+# ops that are always linear
+op_handlers["Identity"] = passthrough
+op_handlers["StridedSlice"] = passthrough
+op_handlers["Squeeze"] = passthrough
+op_handlers["ExpandDims"] = passthrough
+op_handlers["Pack"] = passthrough
+op_handlers["BiasAdd"] = passthrough
+op_handlers["Unpack"] = passthrough
+op_handlers["Add"] = passthrough
+op_handlers["Sub"] = passthrough
+op_handlers["Merge"] = passthrough
+op_handlers["Sum"] = passthrough
+op_handlers["Mean"] = passthrough
+op_handlers["Cast"] = passthrough
+op_handlers["Transpose"] = passthrough
+op_handlers["Enter"] = passthrough
+op_handlers["Exit"] = passthrough
+op_handlers["NextIteration"] = passthrough
+op_handlers["Tile"] = passthrough
+op_handlers["TensorArrayScatterV3"] = passthrough
+op_handlers["TensorArrayReadV3"] = passthrough
+op_handlers["TensorArrayWriteV3"] = passthrough
+
+
+# ops that don't pass any attributions to their inputs
+op_handlers["Shape"] = break_dependence
+op_handlers["RandomUniform"] = break_dependence
+op_handlers["ZerosLike"] = break_dependence
+#op_handlers["StopGradient"] = break_dependence # this allows us to stop attributions when we want to (like softmax re-centering)
+
+# ops that are linear and only allow a single input to vary
+op_handlers["Reshape"] = linearity_1d(0)
+op_handlers["Pad"] = linearity_1d(0)
+op_handlers["ReverseV2"] = linearity_1d(0)
+op_handlers["ConcatV2"] = linearity_with_excluded([-1])
+op_handlers["Conv2D"] = linearity_1d(0)
+op_handlers["Switch"] = linearity_1d(0)
+op_handlers["AvgPool"] = linearity_1d(0)
+op_handlers["FusedBatchNorm"] = linearity_1d(0)
+
+# ops that are nonlinear and only allow a single input to vary
+op_handlers["Relu"] = nonlinearity_1d(0)
+op_handlers["Elu"] = nonlinearity_1d(0)
+op_handlers["Sigmoid"] = nonlinearity_1d(0)
+op_handlers["Tanh"] = nonlinearity_1d(0)
+op_handlers["Softplus"] = nonlinearity_1d(0)
+op_handlers["Exp"] = nonlinearity_1d(0)
+op_handlers["ClipByValue"] = nonlinearity_1d(0)
+op_handlers["Rsqrt"] = nonlinearity_1d(0)
+op_handlers["Square"] = nonlinearity_1d(0)
+op_handlers["Max"] = nonlinearity_1d(0)
+
+# ops that are nonlinear and allow two inputs to vary
+op_handlers["SquaredDifference"] = nonlinearity_1d_nonlinearity_2d(0, 1, lambda x, y: (x - y) * (x - y))
+op_handlers["Minimum"] = nonlinearity_1d_nonlinearity_2d(0, 1, lambda x, y: tf.minimum(x, y))
+op_handlers["Maximum"] = nonlinearity_1d_nonlinearity_2d(0, 1, lambda x, y: tf.maximum(x, y))
+
+# ops that allow up to two inputs to vary are are linear when only one input varies
+op_handlers["Mul"] = linearity_1d_nonlinearity_2d(0, 1, lambda x, y: x * y)
+op_handlers["RealDiv"] = linearity_1d_nonlinearity_2d(0, 1, lambda x, y: x / y)
+op_handlers["MatMul"] = linearity_1d_nonlinearity_2d(0, 1, lambda x, y: tf.matmul(x, y))
+
+# ops that need their own custom attribution functions
+op_handlers["GatherV2"] = gather
+op_handlers["ResourceGather"] = gather
+op_handlers["MaxPool"] = maxpool
+op_handlers["Softmax"] = softmax
+
+
+# TODO items
+# TensorArrayGatherV3
+# Max
+# TensorArraySizeV3
+# Range
diff --git a/lib/shap/explainers/_deep/deep_utils.py b/lib/shap/explainers/_deep/deep_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..701a7c45feb4ea79326312cc1bfe8d49769d810a
--- /dev/null
+++ b/lib/shap/explainers/_deep/deep_utils.py
@@ -0,0 +1,23 @@
+import numpy as np
+
+
+def _check_additivity(explainer, model_output_values, output_phis):
+    TOLERANCE = 1e-2
+
+    assert len(explainer.expected_value) == model_output_values.shape[1], "Length of expected values and model outputs does not match."
+
+    for t in range(len(explainer.expected_value)):
+        if not explainer.multi_input:
+            diffs = model_output_values[:, t] - explainer.expected_value[t] - output_phis[t].sum(axis=tuple(range(1, output_phis[t].ndim)))
+        else:
+            diffs = model_output_values[:, t] - explainer.expected_value[t]
+
+            for i in range(len(output_phis[t])):
+                diffs -= output_phis[t][i].sum(axis=tuple(range(1, output_phis[t][i].ndim)))
+
+        maxdiff = np.abs(diffs).max()
+
+        assert maxdiff < TOLERANCE, "The SHAP explanations do not sum up to the model's output! This is either because of a " \
+                                    "rounding error or because an operator in your computation graph was not fully supported. If " \
+                                    "the sum difference of %f is significant compared to the scale of your model outputs, please post " \
+                                    f"as a github issue, with a reproducible example so we can debug it. Used framework: {explainer.framework} - Max. diff: {maxdiff} - Tolerance: {TOLERANCE}"
diff --git a/lib/shap/explainers/_exact.py b/lib/shap/explainers/_exact.py
new file mode 100644
index 0000000000000000000000000000000000000000..3ade9a82112b713bc908896d2c30b606287fd4dd
--- /dev/null
+++ b/lib/shap/explainers/_exact.py
@@ -0,0 +1,366 @@
+import logging
+
+import numpy as np
+from numba import njit
+
+from .. import links
+from ..models import Model
+from ..utils import (
+    MaskedModel,
+    delta_minimization_order,
+    make_masks,
+    shapley_coefficients,
+)
+from ._explainer import Explainer
+
+log = logging.getLogger('shap')
+
+
+class ExactExplainer(Explainer):
+    """ Computes SHAP values via an optimized exact enumeration.
+
+    This works well for standard Shapley value maskers for models with less than ~15 features that vary
+    from the background per sample. It also works well for Owen values from hclustering structured
+    maskers when there are less than ~100 features that vary from the background per sample. This
+    explainer minimizes the number of function evaluations needed by ordering the masking sets to
+    minimize sequential differences. This is done using gray codes for standard Shapley values
+    and a greedy sorting method for hclustering structured maskers.
+    """
+
+    def __init__(self, model, masker, link=links.identity, linearize_link=True, feature_names=None):
+        """ Build an explainers.Exact object for the given model using the given masker object.
+
+        Parameters
+        ----------
+        model : function
+            A callable python object that executes the model given a set of input data samples.
+
+        masker : function or numpy.array or pandas.DataFrame
+            A callable python object used to "mask" out hidden features of the form `masker(mask, *fargs)`.
+            It takes a single a binary mask and an input sample and returns a matrix of masked samples. These
+            masked samples are evaluated using the model function and the outputs are then averaged.
+            As a shortcut for the standard masking used by SHAP you can pass a background data matrix
+            instead of a function and that matrix will be used for masking. To use a clustering
+            game structure you can pass a shap.maskers.TabularPartitions(data) object.
+
+        link : function
+            The link function used to map between the output units of the model and the SHAP value units. By
+            default it is shap.links.identity, but shap.links.logit can be useful so that expectations are
+            computed in probability units while explanations remain in the (more naturally additive) log-odds
+            units. For more details on how link functions work see any overview of link functions for generalized
+            linear models.
+
+        linearize_link : bool
+            If we use a non-linear link function to take expectations then models that are additive with respect to that
+            link function for a single background sample will no longer be additive when using a background masker with
+            many samples. This for example means that a linear logistic regression model would have interaction effects
+            that arise from the non-linear changes in expectation averaging. To retain the additively of the model with
+            still respecting the link function we linearize the link function by default.
+        """ # TODO link to the link linearization paper when done
+        super().__init__(model, masker, link=link, linearize_link=linearize_link, feature_names=feature_names)
+
+        self.model = Model(model)
+
+        if getattr(masker, "clustering", None) is not None:
+            self._partition_masks, self._partition_masks_inds = partition_masks(masker.clustering)
+            self._partition_delta_indexes = partition_delta_indexes(masker.clustering, self._partition_masks)
+
+        self._gray_code_cache = {} # used to avoid regenerating the same gray code patterns
+
+    def __call__(self, *args, max_evals=100000, main_effects=False, error_bounds=False, batch_size="auto", interactions=1, silent=False):
+        """ Explains the output of model(*args), where args represents one or more parallel iterators.
+        """
+
+        # we entirely rely on the general call implementation, we override just to remove **kwargs
+        # from the function signature
+        return super().__call__(
+            *args, max_evals=max_evals, main_effects=main_effects, error_bounds=error_bounds,
+            batch_size=batch_size, interactions=interactions, silent=silent
+        )
+
+    def _cached_gray_codes(self, n):
+        if n not in self._gray_code_cache:
+            self._gray_code_cache[n] = gray_code_indexes(n)
+        return self._gray_code_cache[n]
+
+    def explain_row(self, *row_args, max_evals, main_effects, error_bounds, batch_size, outputs, interactions, silent):
+        """ Explains a single row and returns the tuple (row_values, row_expected_values, row_mask_shapes).
+        """
+
+        # build a masked version of the model for the current input sample
+        fm = MaskedModel(self.model, self.masker, self.link, self.linearize_link, *row_args)
+
+        # do the standard Shapley values
+        inds = None
+        if getattr(self.masker, "clustering", None) is None:
+
+            # see which elements we actually need to perturb
+            inds = fm.varying_inputs()
+
+            # make sure we have enough evals
+            if max_evals is not None and max_evals != "auto" and max_evals < 2**len(inds):
+                raise ValueError(
+                    f"It takes {2**len(inds)} masked evaluations to run the Exact explainer on this instance, but max_evals={max_evals}!"
+                )
+
+            # generate the masks in gray code order (so that we change the inputs as little
+            # as possible while we iterate to minimize the need to re-eval when the inputs
+            # don't vary from the background)
+            delta_indexes = self._cached_gray_codes(len(inds))
+
+            # map to a larger mask that includes the invariant entries
+            extended_delta_indexes = np.zeros(2**len(inds), dtype=int)
+            for i in range(2**len(inds)):
+                if delta_indexes[i] == MaskedModel.delta_mask_noop_value:
+                    extended_delta_indexes[i] = delta_indexes[i]
+                else:
+                    extended_delta_indexes[i] = inds[delta_indexes[i]]
+
+            # run the model
+            outputs = fm(extended_delta_indexes, zero_index=0, batch_size=batch_size)
+
+            # Shapley values
+            # Care: Need to distinguish between `True` and `1`
+            if interactions is False or (interactions == 1 and interactions is not True):
+
+                # loop over all the outputs to update the rows
+                coeff = shapley_coefficients(len(inds))
+                row_values = np.zeros((len(fm),) + outputs.shape[1:])
+                mask = np.zeros(len(fm), dtype=bool)
+                _compute_grey_code_row_values(row_values, mask, inds, outputs, coeff, extended_delta_indexes, MaskedModel.delta_mask_noop_value)
+
+            # Shapley-Taylor interaction values
+            elif interactions is True or interactions == 2:
+
+                # loop over all the outputs to update the rows
+                coeff = shapley_coefficients(len(inds))
+                row_values = np.zeros((len(fm), len(fm)) + outputs.shape[1:])
+                mask = np.zeros(len(fm), dtype=bool)
+                _compute_grey_code_row_values_st(row_values, mask, inds, outputs, coeff, extended_delta_indexes, MaskedModel.delta_mask_noop_value)
+
+            elif interactions > 2:
+                raise NotImplementedError("Currently the Exact explainer does not support interactions higher than order 2!")
+
+        # do a partition tree constrained version of Shapley values
+        else:
+
+            # make sure we have enough evals
+            if max_evals is not None and max_evals != "auto" and max_evals < len(fm)**2:
+                raise ValueError(
+                    f"It takes {len(fm)**2} masked evaluations to run the Exact explainer on this instance, but max_evals={max_evals}!"
+                )
+
+            # generate the masks in a hclust order (so that we change the inputs as little
+            # as possible while we iterate to minimize the need to re-eval when the inputs
+            # don't vary from the background)
+            delta_indexes = self._partition_delta_indexes
+
+            # run the model
+            outputs = fm(delta_indexes, batch_size=batch_size)
+
+            # loop over each output feature
+            row_values = np.zeros((len(fm),) + outputs.shape[1:])
+            for i in range(len(fm)):
+                on_outputs = outputs[self._partition_masks_inds[i][1]]
+                off_outputs = outputs[self._partition_masks_inds[i][0]]
+                row_values[i] = (on_outputs - off_outputs).mean(0)
+
+        # compute the main effects if we need to
+        main_effect_values = None
+        if main_effects or interactions is True or interactions == 2:
+            if inds is None:
+                inds = np.arange(len(fm))
+            main_effect_values = fm.main_effects(inds)
+            if interactions is True or interactions == 2:
+                for i in range(len(fm)):
+                    row_values[i, i] = main_effect_values[i]
+
+        return {
+            "values": row_values,
+            "expected_values": outputs[0],
+            "mask_shapes": fm.mask_shapes,
+            "main_effects": main_effect_values if main_effects else None,
+            "clustering": getattr(self.masker, "clustering", None)
+        }
+
+@njit
+def _compute_grey_code_row_values(row_values, mask, inds, outputs, shapley_coeff, extended_delta_indexes, noop_code):
+    set_size = 0
+    M = len(inds)
+    for i in range(2**M):
+
+        # update the mask
+        delta_ind = extended_delta_indexes[i]
+        if delta_ind != noop_code:
+            mask[delta_ind] = ~mask[delta_ind]
+            if mask[delta_ind]:
+                set_size += 1
+            else:
+                set_size -= 1
+
+        # update the output row values
+        on_coeff = shapley_coeff[set_size-1]
+        if set_size < M:
+            off_coeff = shapley_coeff[set_size]
+        out = outputs[i]
+        for j in inds:
+            if mask[j]:
+                row_values[j] += out * on_coeff
+            else:
+                row_values[j] -= out * off_coeff
+
+@njit
+def _compute_grey_code_row_values_st(row_values, mask, inds, outputs, shapley_coeff, extended_delta_indexes, noop_code):
+    set_size = 0
+    M = len(inds)
+    for i in range(2**M):
+
+        # update the mask
+        delta_ind = extended_delta_indexes[i]
+        if delta_ind != noop_code:
+            mask[delta_ind] = ~mask[delta_ind]
+            if mask[delta_ind]:
+                set_size += 1
+            else:
+                set_size -= 1
+
+        # distribute the effect of this mask set over all the terms it impacts
+        out = outputs[i]
+        for j in range(M):
+            for k in range(j+1, M):
+                if not mask[j] and not mask[k]:
+                    delta = out * shapley_coeff[set_size] # * 2
+                elif (not mask[j] and mask[k]) or (mask[j] and not mask[k]):
+                    delta = -out * shapley_coeff[set_size - 1] # * 2
+                else: # both true
+                    delta = out * shapley_coeff[set_size - 2] # * 2
+                row_values[j,k] += delta
+                row_values[k,j] += delta
+
+def partition_delta_indexes(partition_tree, all_masks):
+    """ Return an delta index encoded array of all the masks possible while following the given partition tree.
+    """
+
+    # convert the masks to delta index format
+    mask = np.zeros(all_masks.shape[1], dtype=bool)
+    delta_inds = []
+    for i in range(len(all_masks)):
+        inds = np.where(mask ^ all_masks[i,:])[0]
+
+        for j in inds[:-1]:
+            delta_inds.append(-j - 1) # negative + (-1) means we have more inds still to change...
+        if len(inds) == 0:
+            delta_inds.append(MaskedModel.delta_mask_noop_value)
+        else:
+            delta_inds.extend(inds[-1:])
+        mask = all_masks[i,:]
+
+    return np.array(delta_inds)
+
+def partition_masks(partition_tree):
+    """ Return an array of all the masks possible while following the given partition tree.
+    """
+
+    M = partition_tree.shape[0] + 1
+    mask_matrix = make_masks(partition_tree)
+    all_masks = []
+    m00 = np.zeros(M, dtype=bool)
+    all_masks.append(m00)
+    all_masks.append(~m00)
+    #inds_stack = [0,1]
+    inds_lists = [[[], []] for i in range(M)]
+    _partition_masks_recurse(len(partition_tree)-1, m00, 0, 1, inds_lists, mask_matrix, partition_tree, M, all_masks)
+
+    all_masks = np.array(all_masks)
+
+    # we resort the clustering matrix to minimize the sequential difference between the masks
+    # this minimizes the number of model evaluations we need to run when the background sometimes
+    # matches the foreground. We seem to average about 1.5 feature changes per mask with this
+    # approach. This is not as clean as the grey code ordering, but a perfect 1 feature change
+    # ordering is not possible with a clustering tree
+    order = delta_minimization_order(all_masks)
+    inverse_order = np.arange(len(order))[np.argsort(order)]
+
+    for inds_list0,inds_list1 in inds_lists:
+        for i in range(len(inds_list0)):
+            inds_list0[i] = inverse_order[inds_list0[i]]
+            inds_list1[i] = inverse_order[inds_list1[i]]
+
+    # Care: inds_lists have different lengths, so partition_masks_inds is a "ragged" array. See GH #3063
+    partition_masks = all_masks[order]
+    partition_masks_inds = [[np.array(on), np.array(off)] for on, off in inds_lists]
+    return partition_masks, partition_masks_inds
+
+# TODO: this should be a jit function... which would require preallocating the inds_lists (sizes are 2**depth of that ind)
+# TODO: we could also probable avoid making the masks at all and just record the deltas if we want...
+def _partition_masks_recurse(index, m00, ind00, ind11, inds_lists, mask_matrix, partition_tree, M, all_masks):
+    if index < 0:
+        inds_lists[index + M][0].append(ind00)
+        inds_lists[index + M][1].append(ind11)
+        return
+
+    # get our children indexes
+    left_index = int(partition_tree[index,0] - M)
+    right_index = int(partition_tree[index,1] - M)
+
+    # build more refined masks
+    m10 = m00.copy() # we separate the copy from the add so as to not get converted to a matrix
+    m10[:] += mask_matrix[left_index+M, :]
+    m01 = m00.copy()
+    m01[:] += mask_matrix[right_index+M, :]
+
+    # record the new masks we made
+    ind01 = len(all_masks)
+    all_masks.append(m01)
+    ind10 = len(all_masks)
+    all_masks.append(m10)
+
+    # inds_stack.append(len(all_masks) - 2)
+    # inds_stack.append(len(all_masks) - 1)
+
+    # recurse left and right with both 1 (True) and 0 (False) contexts
+    _partition_masks_recurse(left_index, m00, ind00, ind10, inds_lists, mask_matrix, partition_tree, M, all_masks)
+    _partition_masks_recurse(right_index, m10, ind10, ind11, inds_lists, mask_matrix, partition_tree, M, all_masks)
+    _partition_masks_recurse(left_index, m01, ind01, ind11, inds_lists, mask_matrix, partition_tree, M, all_masks)
+    _partition_masks_recurse(right_index, m00, ind00, ind01, inds_lists, mask_matrix, partition_tree, M, all_masks)
+
+
+def gray_code_masks(nbits):
+    """ Produces an array of all binary patterns of size nbits in gray code order.
+
+    This is based on code from: http://code.activestate.com/recipes/576592-gray-code-generatoriterator/
+    """
+    out = np.zeros((2**nbits, nbits), dtype=bool)
+    li = np.zeros(nbits, dtype=bool)
+
+    for term in range(2, (1<<nbits)+1):
+        if term % 2 == 1: # odd
+            for i in range(-1,-nbits,-1):
+                if li[i] == 1:
+                    li[i-1] = li[i-1]^1
+                    break
+        else: # even
+            li[-1] = li[-1]^1
+
+        out[term-1,:] = li
+    return out
+
+def gray_code_indexes(nbits):
+    """ Produces an array of which bits flip at which position.
+
+    We assume the masks start at all zero and -1 means don't do a flip.
+    This is a more efficient representation of the gray_code_masks version.
+    """
+    out = np.ones(2**nbits, dtype=int) * MaskedModel.delta_mask_noop_value
+    li = np.zeros(nbits, dtype=bool)
+    for term in range((1<<nbits)-1):
+        if term % 2 == 1: # odd
+            for i in range(-1,-nbits,-1):
+                if li[i] == 1:
+                    li[i-1] = li[i-1]^1
+                    out[term+1] = nbits + (i-1)
+                    break
+        else: # even
+            li[-1] = li[-1]^1
+            out[term+1] = nbits-1
+    return out
diff --git a/lib/shap/explainers/_explainer.py b/lib/shap/explainers/_explainer.py
new file mode 100644
index 0000000000000000000000000000000000000000..cfb6d8cd7be0f13b95f1dd10fbac386e7fa68d36
--- /dev/null
+++ b/lib/shap/explainers/_explainer.py
@@ -0,0 +1,457 @@
+import copy
+import time
+
+import numpy as np
+import pandas as pd
+import scipy.sparse
+
+from .. import explainers, links, maskers, models
+from .._explanation import Explanation
+from .._serializable import Deserializer, Serializable, Serializer
+from ..maskers import Masker
+from ..models import Model
+from ..utils import safe_isinstance, show_progress
+from ..utils._exceptions import InvalidAlgorithmError
+from ..utils.transformers import is_transformers_lm
+
+
+class Explainer(Serializable):
+    """ Uses Shapley values to explain any machine learning model or python function.
+
+    This is the primary explainer interface for the SHAP library. It takes any combination
+    of a model and masker and returns a callable subclass object that implements
+    the particular estimation algorithm that was chosen.
+    """
+
+    def __init__(self, model, masker=None, link=links.identity, algorithm="auto", output_names=None, feature_names=None, linearize_link=True,
+                 seed=None, **kwargs):
+        """ Build a new explainer for the passed model.
+
+        Parameters
+        ----------
+        model : object or function
+            User supplied function or model object that takes a dataset of samples and
+            computes the output of the model for those samples.
+
+        masker : function, numpy.array, pandas.DataFrame, tokenizer, None, or a list of these for each model input
+            The function used to "mask" out hidden features of the form `masked_args = masker(*model_args, mask=mask)`.
+            It takes input in the same form as the model, but for just a single sample with a binary
+            mask, then returns an iterable of masked samples. These
+            masked samples will then be evaluated using the model function and the outputs averaged.
+            As a shortcut for the standard masking using by SHAP you can pass a background data matrix
+            instead of a function and that matrix will be used for masking. Domain specific masking
+            functions are available in shap such as shap.ImageMasker for images and shap.TokenMasker
+            for text. In addition to determining how to replace hidden features, the masker can also
+            constrain the rules of the cooperative game used to explain the model. For example
+            shap.TabularMasker(data, hclustering="correlation") will enforce a hierarchical clustering
+            of coalitions for the game (in this special case the attributions are known as the Owen values).
+
+        link : function
+            The link function used to map between the output units of the model and the SHAP value units. By
+            default it is shap.links.identity, but shap.links.logit can be useful so that expectations are
+            computed in probability units while explanations remain in the (more naturally additive) log-odds
+            units. For more details on how link functions work see any overview of link functions for generalized
+            linear models.
+
+        algorithm : "auto", "permutation", "partition", "tree", or "linear"
+            The algorithm used to estimate the Shapley values. There are many different algorithms that
+            can be used to estimate the Shapley values (and the related value for constrained games), each
+            of these algorithms have various tradeoffs and are preferable in different situations. By
+            default the "auto" options attempts to make the best choice given the passed model and masker,
+            but this choice can always be overridden by passing the name of a specific algorithm. The type of
+            algorithm used will determine what type of subclass object is returned by this constructor, and
+            you can also build those subclasses directly if you prefer or need more fine grained control over
+            their options.
+
+        output_names : None or list of strings
+            The names of the model outputs. For example if the model is an image classifier, then output_names would
+            be the names of all the output classes. This parameter is optional. When output_names is None then
+            the Explanation objects produced by this explainer will not have any output_names, which could effect
+            downstream plots.
+
+        seed: None or int
+            seed for reproducibility
+
+        """
+
+        self.model = model
+        self.output_names = output_names
+        self.feature_names = feature_names
+
+        # wrap the incoming masker object as a shap.Masker object
+        if (
+            isinstance(masker, pd.DataFrame)
+            or ((isinstance(masker, np.ndarray) or scipy.sparse.issparse(masker)) and len(masker.shape) == 2)
+        ):
+            if algorithm == "partition":
+                self.masker = maskers.Partition(masker)
+            else:
+                self.masker = maskers.Independent(masker)
+        elif safe_isinstance(masker, ["transformers.PreTrainedTokenizer", "transformers.tokenization_utils_base.PreTrainedTokenizerBase"]):
+            if is_transformers_lm(self.model):
+                # auto assign text infilling if model is a transformer model with lm head
+                self.masker = maskers.Text(masker, mask_token="...", collapse_mask_token=True)
+            else:
+                self.masker = maskers.Text(masker)
+        elif (masker is list or masker is tuple) and masker[0] is not str:
+            self.masker = maskers.Composite(*masker)
+        elif (masker is dict) and ("mean" in masker):
+            self.masker = maskers.Independent(masker)
+        elif masker is None and isinstance(self.model, models.TransformersPipeline):
+            return self.__init__(
+                self.model, self.model.inner_model.tokenizer,
+                link=link, algorithm=algorithm, output_names=output_names, feature_names=feature_names, linearize_link=linearize_link, **kwargs
+            )
+        else:
+            self.masker = masker
+
+        # Check for transformer pipeline objects and wrap them
+        if safe_isinstance(self.model, "transformers.pipelines.Pipeline"):
+            if is_transformers_lm(self.model.model):
+                return self.__init__(
+                    self.model.model, self.model.tokenizer if self.masker is None else self.masker,
+                    link=link, algorithm=algorithm, output_names=output_names, feature_names=feature_names, linearize_link=linearize_link, **kwargs
+                )
+            else:
+                return self.__init__(
+                    models.TransformersPipeline(self.model), self.masker,
+                    link=link, algorithm=algorithm, output_names=output_names, feature_names=feature_names, linearize_link=linearize_link, **kwargs
+                )
+
+        # wrap self.masker and self.model for output text explanation algorithm
+        if is_transformers_lm(self.model):
+            self.model = models.TeacherForcing(self.model, self.masker.tokenizer)
+            self.masker = maskers.OutputComposite(self.masker, self.model.text_generate)
+        elif safe_isinstance(self.model, "shap.models.TeacherForcing") and safe_isinstance(self.masker, ["shap.maskers.Text", "shap.maskers.Image"]):
+            self.masker = maskers.OutputComposite(self.masker, self.model.text_generate)
+        elif safe_isinstance(self.model, "shap.models.TopKLM") and safe_isinstance(self.masker, "shap.maskers.Text"):
+            self.masker = maskers.FixedComposite(self.masker)
+
+        #self._brute_force_fallback = explainers.BruteForce(self.model, self.masker)
+
+        # validate and save the link function
+        if callable(link):
+            self.link = link
+        else:
+            raise TypeError("The passed link function needs to be callable!")
+        self.linearize_link = linearize_link
+
+        # if we are called directly (as opposed to through super()) then we convert ourselves to the subclass
+        # that implements the specific algorithm that was chosen
+        if self.__class__ is Explainer:
+
+            # do automatic algorithm selection
+            #from .. import explainers
+            if algorithm == "auto":
+
+                # use implementation-aware methods if possible
+                if explainers.LinearExplainer.supports_model_with_masker(model, self.masker):
+                    algorithm = "linear"
+                elif explainers.TreeExplainer.supports_model_with_masker(model, self.masker): # TODO: check for Partition?
+                    algorithm = "tree"
+                elif explainers.AdditiveExplainer.supports_model_with_masker(model, self.masker):
+                    algorithm = "additive"
+
+                # otherwise use a model agnostic method
+                elif callable(self.model):
+                    if issubclass(type(self.masker), maskers.Independent):
+                        if self.masker.shape[1] <= 10:
+                            algorithm = "exact"
+                        else:
+                            algorithm = "permutation"
+                    elif issubclass(type(self.masker), maskers.Partition):
+                        if self.masker.shape[1] <= 32:
+                            algorithm = "exact"
+                        else:
+                            algorithm = "permutation"
+                    elif (getattr(self.masker, "text_data", False) or getattr(self.masker, "image_data", False)) and hasattr(self.masker, "clustering"):
+                        algorithm = "partition"
+                    else:
+                        algorithm = "permutation"
+
+                # if we get here then we don't know how to handle what was given to us
+                else:
+                    raise TypeError("The passed model is not callable and cannot be analyzed directly with the given masker! Model: " + str(model))
+
+            # build the right subclass
+            if algorithm == "exact":
+                self.__class__ = explainers.ExactExplainer
+                explainers.ExactExplainer.__init__(self, self.model, self.masker, link=self.link, feature_names=self.feature_names, linearize_link=linearize_link, **kwargs)
+            elif algorithm == "permutation":
+                self.__class__ = explainers.PermutationExplainer
+                explainers.PermutationExplainer.__init__(self, self.model, self.masker, link=self.link, feature_names=self.feature_names, linearize_link=linearize_link, seed=seed, **kwargs)
+            elif algorithm == "partition":
+                self.__class__ = explainers.PartitionExplainer
+                explainers.PartitionExplainer.__init__(self, self.model, self.masker, link=self.link, feature_names=self.feature_names, linearize_link=linearize_link, output_names=self.output_names, **kwargs)
+            elif algorithm == "tree":
+                self.__class__ = explainers.TreeExplainer
+                explainers.TreeExplainer.__init__(self, self.model, self.masker, link=self.link, feature_names=self.feature_names, linearize_link=linearize_link, **kwargs)
+            elif algorithm == "additive":
+                self.__class__ = explainers.AdditiveExplainer
+                explainers.AdditiveExplainer.__init__(self, self.model, self.masker, link=self.link, feature_names=self.feature_names, linearize_link=linearize_link, **kwargs)
+            elif algorithm == "linear":
+                self.__class__ = explainers.LinearExplainer
+                explainers.LinearExplainer.__init__(self, self.model, self.masker, link=self.link, feature_names=self.feature_names, linearize_link=linearize_link, **kwargs)
+            elif algorithm == "deep":
+                self.__class__ = explainers.DeepExplainer
+                explainers.DeepExplainer.__init__(self, self.model, self.masker, link=self.link, feature_names=self.feature_names, linearize_link=linearize_link, **kwargs)
+            else:
+                raise InvalidAlgorithmError("Unknown algorithm type passed: %s!" % algorithm)
+
+
+    def __call__(self, *args, max_evals="auto", main_effects=False, error_bounds=False, batch_size="auto",
+                 outputs=None, silent=False, **kwargs):
+        """ Explains the output of model(*args), where args is a list of parallel iterable datasets.
+
+        Note this default version could be an abstract method that is implemented by each algorithm-specific
+        subclass of Explainer. Descriptions of each subclasses' __call__ arguments
+        are available in their respective doc-strings.
+        """
+
+        # if max_evals == "auto":
+        #     self._brute_force_fallback
+
+        start_time = time.time()
+
+        if issubclass(type(self.masker), maskers.OutputComposite) and len(args)==2:
+            self.masker.model = models.TextGeneration(target_sentences=args[1])
+            args = args[:1]
+        # parse our incoming arguments
+        num_rows = None
+        args = list(args)
+        if self.feature_names is None:
+            feature_names = [None for _ in range(len(args))]
+        elif issubclass(type(self.feature_names[0]), (list, tuple)):
+            feature_names = copy.deepcopy(self.feature_names)
+        else:
+            feature_names = [copy.deepcopy(self.feature_names)]
+        for i in range(len(args)):
+
+            # try and see if we can get a length from any of the for our progress bar
+            if num_rows is None:
+                try:
+                    num_rows = len(args[i])
+                except Exception:
+                    pass
+
+            # convert DataFrames to numpy arrays
+            if isinstance(args[i], pd.DataFrame):
+                feature_names[i] = list(args[i].columns)
+                args[i] = args[i].to_numpy()
+
+            # convert nlp Dataset objects to lists
+            if safe_isinstance(args[i], "nlp.arrow_dataset.Dataset"):
+                args[i] = args[i]["text"]
+            elif issubclass(type(args[i]), dict) and "text" in args[i]:
+                args[i] = args[i]["text"]
+
+        if batch_size == "auto":
+            if hasattr(self.masker, "default_batch_size"):
+                batch_size = self.masker.default_batch_size
+            else:
+                batch_size = 10
+
+        # loop over each sample, filling in the values array
+        values = []
+        output_indices = []
+        expected_values = []
+        mask_shapes = []
+        main_effects = []
+        hierarchical_values = []
+        clustering = []
+        output_names = []
+        error_std = []
+        if callable(getattr(self.masker, "feature_names", None)):
+            feature_names = [[] for _ in range(len(args))]
+        for row_args in show_progress(zip(*args), num_rows, self.__class__.__name__+" explainer", silent):
+            row_result = self.explain_row(
+                *row_args, max_evals=max_evals, main_effects=main_effects, error_bounds=error_bounds,
+                batch_size=batch_size, outputs=outputs, silent=silent, **kwargs
+            )
+            values.append(row_result.get("values", None))
+            output_indices.append(row_result.get("output_indices", None))
+            expected_values.append(row_result.get("expected_values", None))
+            mask_shapes.append(row_result["mask_shapes"])
+            main_effects.append(row_result.get("main_effects", None))
+            clustering.append(row_result.get("clustering", None))
+            hierarchical_values.append(row_result.get("hierarchical_values", None))
+            tmp = row_result.get("output_names", None)
+            output_names.append(tmp(*row_args) if callable(tmp) else tmp)
+            error_std.append(row_result.get("error_std", None))
+            if callable(getattr(self.masker, "feature_names", None)):
+                row_feature_names = self.masker.feature_names(*row_args)
+                for i in range(len(row_args)):
+                    feature_names[i].append(row_feature_names[i])
+
+        # split the values up according to each input
+        arg_values = [[] for a in args]
+        for i, v in enumerate(values):
+            pos = 0
+            for j in range(len(args)):
+                mask_length = np.prod(mask_shapes[i][j])
+                arg_values[j].append(values[i][pos:pos+mask_length])
+                pos += mask_length
+
+        # collapse the arrays as possible
+        expected_values = pack_values(expected_values)
+        main_effects = pack_values(main_effects)
+        output_indices = pack_values(output_indices)
+        main_effects = pack_values(main_effects)
+        hierarchical_values = pack_values(hierarchical_values)
+        error_std = pack_values(error_std)
+        clustering = pack_values(clustering)
+
+        # getting output labels
+        ragged_outputs = False
+        if output_indices is not None:
+            ragged_outputs = not all(len(x) == len(output_indices[0]) for x in output_indices)
+        if self.output_names is None:
+            if None not in output_names:
+                if not ragged_outputs:
+                    sliced_labels = np.array(output_names)
+                else:
+                    sliced_labels = [np.array(output_names[i])[index_list] for i,index_list in enumerate(output_indices)]
+            else:
+                sliced_labels = None
+        else:
+            assert output_indices is not None, "You have passed a list for output_names but the model seems to not have multiple outputs!"
+            labels = np.array(self.output_names)
+            sliced_labels = [labels[index_list] for index_list in output_indices]
+            if not ragged_outputs:
+                sliced_labels = np.array(sliced_labels)
+
+        if isinstance(sliced_labels, np.ndarray) and len(sliced_labels.shape) == 2:
+            if np.all(sliced_labels[0,:] == sliced_labels):
+                sliced_labels = sliced_labels[0]
+
+        # allow the masker to transform the input data to better match the masking pattern
+        # (such as breaking text into token segments)
+        if hasattr(self.masker, "data_transform"):
+            new_args = []
+            for row_args in zip(*args):
+                new_args.append([pack_values(v) for v in self.masker.data_transform(*row_args)])
+            args = list(zip(*new_args))
+
+        # build the explanation objects
+        out = []
+        for j, data in enumerate(args):
+
+            # reshape the attribution values using the mask_shapes
+            tmp = []
+            for i, v in enumerate(arg_values[j]):
+                if np.prod(mask_shapes[i][j]) != np.prod(v.shape): # see if we have multiple outputs
+                    tmp.append(v.reshape(*mask_shapes[i][j], -1))
+                else:
+                    tmp.append(v.reshape(*mask_shapes[i][j]))
+            arg_values[j] = pack_values(tmp)
+
+            if feature_names[j] is None:
+                feature_names[j] = ["Feature " + str(i) for i in range(data.shape[1])]
+
+
+            # build an explanation object for this input argument
+            out.append(Explanation(
+                arg_values[j], expected_values, data,
+                feature_names=feature_names[j], main_effects=main_effects,
+                clustering=clustering,
+                hierarchical_values=hierarchical_values,
+                output_names=sliced_labels, # self.output_names
+                error_std=error_std,
+                compute_time=time.time() - start_time
+                # output_shape=output_shape,
+                #lower_bounds=v_min, upper_bounds=v_max
+            ))
+        return out[0] if len(out) == 1 else out
+
+    def explain_row(self, *row_args, max_evals, main_effects, error_bounds, outputs, silent, **kwargs):
+        """ Explains a single row and returns the tuple (row_values, row_expected_values, row_mask_shapes, main_effects).
+
+        This is an abstract method meant to be implemented by each subclass.
+
+        Returns
+        -------
+        tuple
+            A tuple of (row_values, row_expected_values, row_mask_shapes), where row_values is an array of the
+            attribution values for each sample, row_expected_values is an array (or single value) representing
+            the expected value of the model for each sample (which is the same for all samples unless there
+            are fixed inputs present, like labels when explaining the loss), and row_mask_shapes is a list
+            of all the input shapes (since the row_values is always flattened),
+        """
+
+        return {}
+
+    @staticmethod
+    def supports_model_with_masker(model, masker):
+        """ Determines if this explainer can handle the given model.
+
+        This is an abstract static method meant to be implemented by each subclass.
+        """
+        return False
+
+    @staticmethod
+    def _compute_main_effects(fm, expected_value, inds):
+        """ A utility method to compute the main effects from a MaskedModel.
+        """
+
+        # mask each input on in isolation
+        masks = np.zeros(2*len(inds)-1, dtype=int)
+        last_ind = -1
+        for i in range(len(inds)):
+            if i > 0:
+                masks[2*i - 1] = -last_ind - 1 # turn off the last input
+            masks[2*i] = inds[i] # turn on this input
+            last_ind = inds[i]
+
+        # compute the main effects for the given indexes
+        main_effects = fm(masks) - expected_value
+
+        # expand the vector to the full input size
+        expanded_main_effects = np.zeros(len(fm))
+        for i, ind in enumerate(inds):
+            expanded_main_effects[ind] = main_effects[i]
+
+        return expanded_main_effects
+
+    def save(self, out_file, model_saver=".save", masker_saver=".save"):
+        """ Write the explainer to the given file stream.
+        """
+        super().save(out_file)
+        with Serializer(out_file, "shap.Explainer", version=0) as s:
+            s.save("model", self.model, model_saver)
+            s.save("masker", self.masker, masker_saver)
+            s.save("link", self.link)
+
+    @classmethod
+    def load(cls, in_file, model_loader=Model.load, masker_loader=Masker.load, instantiate=True):
+        """ Load an Explainer from the given file stream.
+
+        Parameters
+        ----------
+        in_file : The file stream to load objects from.
+        """
+        if instantiate:
+            return cls._instantiated_load(in_file, model_loader=model_loader, masker_loader=masker_loader)
+
+        kwargs = super().load(in_file, instantiate=False)
+        with Deserializer(in_file, "shap.Explainer", min_version=0, max_version=0) as s:
+            kwargs["model"] = s.load("model", model_loader)
+            kwargs["masker"] = s.load("masker", masker_loader)
+            kwargs["link"] = s.load("link")
+        return kwargs
+
+def pack_values(values):
+    """ Used the clean up arrays before putting them into an Explanation object.
+    """
+
+    if not hasattr(values, "__len__"):
+        return values
+
+    # collapse the values if we didn't compute them
+    if values is None or values[0] is None:
+        return None
+
+    # convert to a single numpy matrix when the array is not ragged
+    elif np.issubdtype(type(values[0]), np.number) or len(np.unique([len(v) for v in values])) == 1:
+        return np.array(values)
+    else:
+        return np.array(values, dtype=object)
diff --git a/lib/shap/explainers/_gpu_tree.py b/lib/shap/explainers/_gpu_tree.py
new file mode 100644
index 0000000000000000000000000000000000000000..61bb192d591c319d28eb57108cc5cf950723028f
--- /dev/null
+++ b/lib/shap/explainers/_gpu_tree.py
@@ -0,0 +1,179 @@
+"""GPU accelerated tree explanations"""
+import numpy as np
+
+from ..utils import assert_import, record_import_error
+from ._tree import TreeExplainer, feature_perturbation_codes, output_transform_codes
+
+try:
+    from .. import _cext_gpu
+except ImportError as e:
+    record_import_error("cext_gpu", "cuda extension was not built during install!", e)
+
+
+class GPUTreeExplainer(TreeExplainer):
+    """
+    Experimental GPU accelerated version of TreeExplainer. Currently requires source build with
+    cuda available and 'CUDA_PATH' environment variable defined.
+
+    Parameters
+    ----------
+    model : model object
+        The tree based machine learning model that we want to explain. XGBoost, LightGBM,
+        CatBoost, Pyspark and most tree-based scikit-learn models are supported.
+
+    data : numpy.array or pandas.DataFrame
+        The background dataset to use for integrating out features. This argument is optional when
+        feature_perturbation="tree_path_dependent", since in that case we can use the number of
+        training samples that went down each tree path as our background dataset (this is recorded
+        in the model object).
+
+    feature_perturbation : "interventional" (default) or "tree_path_dependent" (default when data=None)
+        Since SHAP values rely on conditional expectations we need to decide how to handle correlated
+        (or otherwise dependent) input features. The "interventional" approach breaks the dependencies
+        between features according to the rules dictated by casual inference (Janzing et al. 2019). Note
+        that the "interventional" option requires a background dataset and its runtime scales linearly
+        with the size of the background dataset you use. Anywhere from 100 to 1000 random background samples
+        are good sizes to use. The "tree_path_dependent" approach is to just follow the trees and use the
+        number of training examples that went down each leaf to represent the background distribution.
+        This approach does not require a background dataset and so is used by default when no background
+        dataset is provided.
+
+    model_output : "raw", "probability", "log_loss", or model method name
+        What output of the model should be explained. If "raw" then we explain the raw output of the
+        trees, which varies by model. For regression models "raw" is the standard output, for binary
+        classification in XGBoost this is the log odds ratio. If model_output is the name of a
+        supported prediction method on the model object then we explain the output of that model
+        method name. For example model_output="predict_proba" explains the result of calling
+        model.predict_proba. If "probability" then we explain the output of the model transformed into
+        probability space (note that this means the SHAP values now sum to the probability output of the
+        model). If "logloss" then we explain the log base e of the model loss function, so that the SHAP
+        values sum up to the log loss of the model for each sample. This is helpful for breaking
+        down model performance by feature. Currently the probability and logloss options are only
+        supported when
+        feature_dependence="independent".
+
+    Examples
+    --------
+    See `GPUTree explainer examples <https://shap.readthedocs.io/en/latest/api_examples/explainers/GPUTreeExplainer.html>`_
+    """
+
+    def shap_values(self, X, y=None, tree_limit=None, approximate=False, check_additivity=True,
+                    from_call=False):
+        """ Estimate the SHAP values for a set of samples.
+
+        Parameters
+        ----------
+        X : numpy.array, pandas.DataFrame or catboost.Pool (for catboost)
+            A matrix of samples (# samples x # features) on which to explain the model's output.
+
+        y : numpy.array
+            An array of label values for each sample. Used when explaining loss functions.
+
+        tree_limit : None (default) or int
+            Limit the number of trees used by the model. By default None means no use the limit
+            of the
+            original model, and -1 means no limit.
+
+        approximate : bool
+            Not supported.
+
+        check_additivity : bool
+            Run a validation check that the sum of the SHAP values equals the output of the
+            model. This
+            check takes only a small amount of time, and will catch potential unforeseen errors.
+            Note that this check only runs right now when explaining the margin of the model.
+
+        Returns
+        -------
+        array or list
+            For models with a single output this returns a matrix of SHAP values
+            (# samples x # features). Each row sums to the difference between the model output
+            for that
+            sample and the expected value of the model output (which is stored in the expected_value
+            attribute of the explainer when it is constant). For models with vector outputs this
+            returns
+            a list of such matrices, one for each output.
+        """
+        assert not approximate, "approximate not supported"
+
+        X, y, X_missing, flat_output, tree_limit, check_additivity = \
+            self._validate_inputs(X, y,
+                                  tree_limit,
+                                  check_additivity)
+        transform = self.model.get_transform()
+
+        # run the core algorithm using the C extension
+        assert_import("cext_gpu")
+        phi = np.zeros((X.shape[0], X.shape[1] + 1, self.model.num_outputs))
+        _cext_gpu.dense_tree_shap(
+            self.model.children_left, self.model.children_right, self.model.children_default,
+            self.model.features, self.model.thresholds, self.model.values,
+            self.model.node_sample_weight,
+            self.model.max_depth, X, X_missing, y, self.data, self.data_missing, tree_limit,
+            self.model.base_offset, phi, feature_perturbation_codes[self.feature_perturbation],
+            output_transform_codes[transform], False
+        )
+
+        out = self._get_shap_output(phi, flat_output)
+        if check_additivity and self.model.model_output == "raw":
+            self.assert_additivity(out, self.model.predict(X))
+
+        return out
+
+    def shap_interaction_values(self, X, y=None, tree_limit=None):
+        """ Estimate the SHAP interaction values for a set of samples.
+
+        Parameters
+        ----------
+        X : numpy.array, pandas.DataFrame or catboost.Pool (for catboost)
+            A matrix of samples (# samples x # features) on which to explain the model's output.
+
+        y : numpy.array
+            An array of label values for each sample. Used when explaining loss functions (not
+            yet supported).
+
+        tree_limit : None (default) or int
+            Limit the number of trees used by the model. By default None means no use the limit
+            of the
+            original model, and -1 means no limit.
+
+        Returns
+        -------
+        array or list
+            For models with a single output this returns a tensor of SHAP values
+            (# samples x # features x # features). The matrix (# features x # features) for each
+            sample sums
+            to the difference between the model output for that sample and the expected value of
+            the model output
+            (which is stored in the expected_value attribute of the explainer). Each row of this
+            matrix sums to the
+            SHAP value for that feature for that sample. The diagonal entries of the matrix
+            represent the
+            "main effect" of that feature on the prediction and the symmetric off-diagonal
+            entries represent the
+            interaction effects between all pairs of features for that sample. For models with
+            vector outputs
+            this returns a list of tensors, one for each output.
+        """
+
+        assert self.model.model_output == "raw", "Only model_output = \"raw\" is supported for " \
+                                                 "SHAP interaction values right now!"
+        assert self.feature_perturbation != "interventional", 'feature_perturbation="interventional" is not yet supported for ' + \
+                                                              'interaction values. Use feature_perturbation="tree_path_dependent" instead.'
+        transform = "identity"
+
+        X, y, X_missing, flat_output, tree_limit, _ = self._validate_inputs(X, y, tree_limit,
+                                                                            False)
+        # run the core algorithm using the C extension
+        assert_import("cext_gpu")
+        phi = np.zeros((X.shape[0], X.shape[1] + 1, X.shape[1] + 1, self.model.num_outputs))
+        _cext_gpu.dense_tree_shap(
+            self.model.children_left, self.model.children_right, self.model.children_default,
+            self.model.features, self.model.thresholds, self.model.values,
+            self.model.node_sample_weight,
+            self.model.max_depth, X, X_missing, y, self.data, self.data_missing, tree_limit,
+            self.model.base_offset, phi, feature_perturbation_codes[self.feature_perturbation],
+            output_transform_codes[transform], True
+        )
+
+        return self._get_shap_interactions_output(phi, flat_output)
diff --git a/lib/shap/explainers/_gradient.py b/lib/shap/explainers/_gradient.py
new file mode 100644
index 0000000000000000000000000000000000000000..f7c112861353aa42a47e401a8a5871d1afc917ed
--- /dev/null
+++ b/lib/shap/explainers/_gradient.py
@@ -0,0 +1,592 @@
+import warnings
+
+import numpy as np
+import pandas as pd
+from packaging import version
+
+from .._explanation import Explanation
+from ..explainers._explainer import Explainer
+from ..explainers.tf_utils import (
+    _get_graph,
+    _get_model_inputs,
+    _get_model_output,
+    _get_session,
+)
+
+keras = None
+tf = None
+torch = None
+
+
+class GradientExplainer(Explainer):
+    """ Explains a model using expected gradients (an extension of integrated gradients).
+
+    Expected gradients an extension of the integrated gradients method (Sundararajan et al. 2017), a
+    feature attribution method designed for differentiable models based on an extension of Shapley
+    values to infinite player games (Aumann-Shapley values). Integrated gradients values are a bit
+    different from SHAP values, and require a single reference value to integrate from. As an adaptation
+    to make them approximate SHAP values, expected gradients reformulates the integral as an expectation
+    and combines that expectation with sampling reference values from the background dataset. This leads
+    to a single combined expectation of gradients that converges to attributions that sum to the
+    difference between the expected model output and the current output.
+
+    Examples
+    --------
+    See :ref:`Gradient Explainer Examples <gradient_explainer_examples>`
+    """
+
+    def __init__(self, model, data, session=None, batch_size=50, local_smoothing=0):
+        """ An explainer object for a differentiable model using a given background dataset.
+
+        Parameters
+        ----------
+        model : tf.keras.Model, (input : [tf.Tensor], output : tf.Tensor), torch.nn.Module, or a tuple
+                (model, layer), where both are torch.nn.Module objects
+
+            For TensorFlow this can be a model object, or a pair of TensorFlow tensors (or a list and
+            a tensor) that specifies the input and output of the model to be explained. Note that for
+            TensowFlow 2 you must pass a tensorflow function, not a tuple of input/output tensors).
+
+            For PyTorch this can be a nn.Module object (model), or a tuple (model, layer), where both
+            are nn.Module objects. The model is an nn.Module object which takes as input a tensor
+            (or list of tensors) of shape data, and returns a single dimensional output. If the input
+            is a tuple, the returned shap values will be for the input of the layer argument. layer must
+            be a layer in the model, i.e. model.conv2.
+
+        data : [numpy.array] or [pandas.DataFrame] or [torch.tensor]
+            The background dataset to use for integrating out features. Gradient explainer integrates
+            over these samples. The data passed here must match the input tensors given in the
+            first argument. Single element lists can be passed unwrapped.
+        """
+
+        # first, we need to find the framework
+        if type(model) is tuple:
+            a, b = model
+            try:
+                a.named_parameters()
+                framework = 'pytorch'
+            except Exception:
+                framework = 'tensorflow'
+        else:
+            try:
+                model.named_parameters()
+                framework = 'pytorch'
+            except Exception:
+                framework = 'tensorflow'
+
+        if isinstance(data, pd.DataFrame):
+            self.features = data.columns.values
+        else:
+            self.features = None
+
+        if framework == 'tensorflow':
+            self.explainer = _TFGradient(model, data, session, batch_size, local_smoothing)
+        elif framework == 'pytorch':
+            self.explainer = _PyTorchGradient(model, data, batch_size, local_smoothing)
+
+    def __call__(self, X, nsamples=200):
+        """ Return an explanation object for the model applied to X.
+
+        Parameters
+        ----------
+        X : list,
+            if framework == 'tensorflow': numpy.array, or pandas.DataFrame
+            if framework == 'pytorch': torch.tensor
+            A tensor (or list of tensors) of samples (where X.shape[0] == # samples) on which to
+            explain the model's output.
+        nsamples : int
+            number of background samples
+        Returns
+        -------
+        shap.Explanation:
+        """
+        shap_values = self.shap_values(X, nsamples)
+        return Explanation(values=shap_values, data=X, feature_names=self.features)
+
+    def shap_values(self, X, nsamples=200, ranked_outputs=None, output_rank_order="max", rseed=None, return_variances=False):
+        """ Return the values for the model applied to X.
+
+        Parameters
+        ----------
+        X : list,
+            if framework == 'tensorflow': numpy.array, or pandas.DataFrame
+            if framework == 'pytorch': torch.tensor
+            A tensor (or list of tensors) of samples (where X.shape[0] == # samples) on which to
+            explain the model's output.
+
+        ranked_outputs : None or int
+            If ranked_outputs is None then we explain all the outputs in a multi-output model. If
+            ranked_outputs is a positive integer then we only explain that many of the top model
+            outputs (where "top" is determined by output_rank_order). Note that this causes a pair
+            of values to be returned (shap_values, indexes), where shap_values is a list of numpy arrays
+            for each of the output ranks, and indexes is a matrix that tells for each sample which output
+            indexes were chosen as "top".
+
+        output_rank_order : "max", "min", "max_abs", or "custom"
+            How to order the model outputs when using ranked_outputs, either by maximum, minimum, or
+            maximum absolute value. If "custom" Then "ranked_outputs" contains a list of output nodes.
+
+        rseed : None or int
+            Seeding the randomness in shap value computation  (background example choice,
+            interpolation between current and background example, smoothing).
+
+        Returns
+        -------
+        array or list
+            For a models with a single output this returns a tensor of SHAP values with the same shape
+            as X. For a model with multiple outputs this returns a list of SHAP value tensors, each of
+            which are the same shape as X. If ranked_outputs is None then this list of tensors matches
+            the number of model outputs. If ranked_outputs is a positive integer a pair is returned
+            (shap_values, indexes), where shap_values is a list of tensors with a length of
+            ranked_outputs, and indexes is a matrix that tells for each sample which output indexes
+            were chosen as "top".
+        """
+        return self.explainer.shap_values(X, nsamples, ranked_outputs, output_rank_order, rseed, return_variances)
+
+
+class _TFGradient(Explainer):
+
+    def __init__(self, model, data, session=None, batch_size=50, local_smoothing=0):
+
+        # try and import keras and tensorflow
+        global tf, keras
+        if tf is None:
+            import tensorflow as tf
+            if version.parse(tf.__version__) < version.parse("1.4.0"):
+                warnings.warn("Your TensorFlow version is older than 1.4.0 and not supported.")
+        if keras is None:
+            try:
+                from tensorflow import keras
+                if version.parse(keras.__version__) < version.parse("2.1.0"):
+                    warnings.warn("Your Keras version is older than 2.1.0 and not supported.")
+            except Exception:
+                pass
+
+        # determine the model inputs and outputs
+        self.model = model
+        self.model_inputs = _get_model_inputs(model)
+        self.model_output = _get_model_output(model)
+        assert not isinstance(self.model_output, list), "The model output to be explained must be a single tensor!"
+        assert len(self.model_output.shape) < 3, "The model output must be a vector or a single value!"
+        self.multi_output = True
+        if len(self.model_output.shape) == 1:
+            self.multi_output = False
+
+        # check if we have multiple inputs
+        self.multi_input = True
+        if not isinstance(self.model_inputs, list):
+            self.model_inputs = [self.model_inputs]
+        self.multi_input = len(self.model_inputs) > 1
+        if isinstance(data, pd.DataFrame):
+            data = [data.values]
+        if not isinstance(data, list):
+            data = [data]
+
+        self.data = data
+        self._num_vinputs = {}
+        self.batch_size = batch_size
+        self.local_smoothing = local_smoothing
+
+        if not tf.executing_eagerly():
+            self.session = _get_session(session)
+            self.graph = _get_graph(self)
+            # see if there is a keras operation we need to save
+            self.keras_phase_placeholder = None
+            for op in self.graph.get_operations():
+                if 'keras_learning_phase' in op.name:
+                    self.keras_phase_placeholder = op.outputs[0]
+
+        # save the expected output of the model (commented out because self.data could be huge for GradientExpliner)
+        #self.expected_value = self.run(self.model_output, self.model_inputs, self.data).mean(0)
+
+        if not self.multi_output:
+            self.gradients = [None]
+        else:
+            self.gradients = [None for i in range(self.model_output.shape[1])]
+
+    def gradient(self, i):
+        global tf, keras
+
+        if self.gradients[i] is None:
+            if not tf.executing_eagerly():
+                out = self.model_output[:,i] if self.multi_output else self.model_output
+                self.gradients[i] = tf.gradients(out, self.model_inputs)
+            else:
+                @tf.function
+                def grad_graph(x):
+                    phase = tf.keras.backend.learning_phase()
+                    tf.keras.backend.set_learning_phase(0)
+
+                    with tf.GradientTape(watch_accessed_variables=False) as tape:
+                        tape.watch(x)
+                        out = self.model(x)
+                        if self.multi_output:
+                            out = out[:,i]
+
+                    x_grad = tape.gradient(out, x)
+
+                    tf.keras.backend.set_learning_phase(phase)
+
+                    return x_grad
+
+                self.gradients[i] = grad_graph
+
+        return self.gradients[i]
+
+    def shap_values(self, X, nsamples=200, ranked_outputs=None, output_rank_order="max", rseed=None, return_variances=False):
+        global tf, keras
+
+        import tensorflow as tf
+        import tensorflow.keras as keras
+
+        # check if we have multiple inputs
+        if not self.multi_input:
+            assert not isinstance(X, list), "Expected a single tensor model input!"
+            X = [X]
+        else:
+            assert isinstance(X, list), "Expected a list of model inputs!"
+        assert len(self.model_inputs) == len(X), "Number of model inputs does not match the number given!"
+
+        # rank and determine the model outputs that we will explain
+        if not tf.executing_eagerly():
+            model_output_values = self.run(self.model_output, self.model_inputs, X)
+        else:
+            model_output_values = self.run(self.model, self.model_inputs, X)
+        if ranked_outputs is not None and self.multi_output:
+            if output_rank_order == "max":
+                model_output_ranks = np.argsort(-model_output_values)
+            elif output_rank_order == "min":
+                model_output_ranks = np.argsort(model_output_values)
+            elif output_rank_order == "max_abs":
+                model_output_ranks = np.argsort(np.abs(model_output_values))
+            elif output_rank_order == "custom":
+                model_output_ranks = ranked_outputs
+            else:
+                emsg = "output_rank_order must be max, min, max_abs or custom!"
+                raise ValueError(emsg)
+
+            if output_rank_order in ["max", "min", "max_abs"]:
+                model_output_ranks = model_output_ranks[:,:ranked_outputs]
+        else:
+            model_output_ranks = np.tile(np.arange(len(self.gradients)), (X[0].shape[0], 1))
+
+        # compute the attributions
+        output_phis = []
+        output_phi_vars = []
+        samples_input = [np.zeros((nsamples,) + X[t].shape[1:], dtype=np.float32) for t in range(len(X))]
+        samples_delta = [np.zeros((nsamples,) + X[t].shape[1:], dtype=np.float32) for t in range(len(X))]
+        # use random seed if no argument given
+        if rseed is None:
+            rseed = np.random.randint(0, 1e6)
+
+        for i in range(model_output_ranks.shape[1]):
+            np.random.seed(rseed) # so we get the same noise patterns for each output class
+            phis = []
+            phi_vars = []
+            for k in range(len(X)):
+                phis.append(np.zeros(X[k].shape))
+                phi_vars.append(np.zeros(X[k].shape))
+            for j in range(X[0].shape[0]):
+
+                # fill in the samples arrays
+                for k in range(nsamples):
+                    rind = np.random.choice(self.data[0].shape[0])
+                    t = np.random.uniform()
+                    for u in range(len(X)):
+                        if self.local_smoothing > 0:
+                            x = X[u][j] + np.random.randn(*X[u][j].shape) * self.local_smoothing
+                        else:
+                            x = X[u][j]
+                        samples_input[u][k] = t * x + (1 - t) * self.data[u][rind]
+                        samples_delta[u][k] = x - self.data[u][rind]
+
+                # compute the gradients at all the sample points
+                find = model_output_ranks[j,i]
+                grads = []
+                for b in range(0, nsamples, self.batch_size):
+                    batch = [samples_input[a][b:min(b+self.batch_size,nsamples)] for a in range(len(X))]
+                    grads.append(self.run(self.gradient(find), self.model_inputs, batch))
+                grad = [np.concatenate([g[a] for g in grads], 0) for a in range(len(X))]
+
+                # assign the attributions to the right part of the output arrays
+                for a in range(len(X)):
+                    samples = grad[a] * samples_delta[a]
+                    phis[a][j] = samples.mean(0)
+                    phi_vars[a][j] = samples.var(0) / np.sqrt(samples.shape[0]) # estimate variance of means
+
+                # TODO: this could be avoided by integrating between endpoints if no local smoothing is used
+                # correct the sum of the values to equal the output of the model using a linear
+                # regression model with priors of the coefficients equal to the estimated variances for each
+                # value (note that 1e-6 is designed to increase the weight of the sample and so closely
+                # match the correct sum)
+                # if False and self.local_smoothing == 0: # disabled right now to make sure it doesn't mask problems
+                #     phis_sum = np.sum([phis[l][j].sum() for l in range(len(X))])
+                #     phi_vars_s = np.stack([phi_vars[l][j] for l in range(len(X))], 0).flatten()
+                #     if self.multi_output:
+                #         sum_error = model_output_values[j,find] - phis_sum - self.expected_value[find]
+                #     else:
+                #         sum_error = model_output_values[j] - phis_sum - self.expected_value
+
+                #     # this is a ridge regression with one sample of all ones with sum_error as the label
+                #     # and 1/v as the ridge penalties. This simplified (and stable) form comes from the
+                #     # Sherman-Morrison formula
+                #     v = (phi_vars_s / phi_vars_s.max()) * 1e6
+                #     adj = sum_error * (v - (v * v.sum()) / (1 + v.sum()))
+
+                #     # add the adjustment to the output so the sum matches
+                #     offset = 0
+                #     for l in range(len(X)):
+                #         s = np.prod(phis[l][j].shape)
+                #         phis[l][j] += adj[offset:offset+s].reshape(phis[l][j].shape)
+                #         offset += s
+
+            output_phis.append(phis[0] if not self.multi_input else phis)
+            output_phi_vars.append(phi_vars[0] if not self.multi_input else phi_vars)
+        if not self.multi_output:
+            if return_variances:
+                return output_phis[0], output_phi_vars[0]
+            else:
+                return output_phis[0]
+        elif ranked_outputs is not None:
+            if return_variances:
+                return output_phis, output_phi_vars, model_output_ranks
+            else:
+                return output_phis, model_output_ranks
+        else:
+            if return_variances:
+                return output_phis, output_phi_vars
+            else:
+                return output_phis
+
+    def run(self, out, model_inputs, X):
+        global tf, keras
+
+        if not tf.executing_eagerly():
+            feed_dict = dict(zip(model_inputs, X))
+            if self.keras_phase_placeholder is not None:
+                feed_dict[self.keras_phase_placeholder] = 0
+            return self.session.run(out, feed_dict)
+        else:
+            # build inputs that are correctly shaped, typed, and tf-wrapped
+            inputs = []
+            for i in range(len(X)):
+                shape = list(self.model_inputs[i].shape)
+                shape[0] = -1
+                v = tf.constant(X[i].reshape(shape), dtype=self.model_inputs[i].dtype)
+                inputs.append(v)
+            return out(inputs)
+
+
+class _PyTorchGradient(Explainer):
+
+    def __init__(self, model, data, batch_size=50, local_smoothing=0):
+
+        # try and import pytorch
+        global torch
+        if torch is None:
+            import torch
+            if version.parse(torch.__version__) < version.parse("0.4"):
+                warnings.warn("Your PyTorch version is older than 0.4 and not supported.")
+
+        # check if we have multiple inputs
+        self.multi_input = False
+        if isinstance(data, list):
+            self.multi_input = True
+        if not isinstance(data, list):
+            data = [data]
+
+        # for consistency, the method signature calls for data as the model input.
+        # However, within this class, self.model_inputs is the input (i.e. the data passed by the user)
+        # and self.data is the background data for the layer we want to assign importances to. If this layer is
+        # the input, then self.data = self.model_inputs
+        self.model_inputs = data
+        self.batch_size = batch_size
+        self.local_smoothing = local_smoothing
+
+        self.layer = None
+        self.input_handle = None
+        self.interim = False
+        if type(model) == tuple:
+            self.interim = True
+            model, layer = model
+            model = model.eval()
+            self.add_handles(layer)
+            self.layer = layer
+
+            # now, if we are taking an interim layer, the 'data' is going to be the input
+            # of the interim layer; we will capture this using a forward hook
+            with torch.no_grad():
+                _ = model(*data)
+                interim_inputs = self.layer.target_input
+                if type(interim_inputs) is tuple:
+                    # this should always be true, but just to be safe
+                    self.data = [i.clone().detach() for i in interim_inputs]
+                else:
+                    self.data = [interim_inputs.clone().detach()]
+        else:
+            self.data = data
+        self.model = model.eval()
+
+        multi_output = False
+        outputs = self.model(*self.model_inputs)
+        if len(outputs.shape) > 1 and outputs.shape[1] > 1:
+            multi_output = True
+        self.multi_output = multi_output
+
+        if not self.multi_output:
+            self.gradients = [None]
+        else:
+            self.gradients = [None for i in range(outputs.shape[1])]
+
+    def gradient(self, idx, inputs):
+        self.model.zero_grad()
+        X = [x.requires_grad_() for x in inputs]
+        outputs = self.model(*X)
+        selected = [val for val in outputs[:, idx]]
+        if self.input_handle is not None:
+            interim_inputs = self.layer.target_input
+            grads = [torch.autograd.grad(selected, input,
+                                         retain_graph=True if idx + 1 < len(interim_inputs) else None)[0].cpu().numpy()
+                     for idx, input in enumerate(interim_inputs)]
+            del self.layer.target_input
+        else:
+            grads = [torch.autograd.grad(selected, x,
+                                         retain_graph=True if idx + 1 < len(X) else None)[0].cpu().numpy()
+                     for idx, x in enumerate(X)]
+        return grads
+
+    @staticmethod
+    def get_interim_input(self, input, output):
+        try:
+            del self.target_input
+        except AttributeError:
+            pass
+        setattr(self, 'target_input', input)
+
+    def add_handles(self, layer):
+        input_handle = layer.register_forward_hook(self.get_interim_input)
+        self.input_handle = input_handle
+
+    def shap_values(self, X, nsamples=200, ranked_outputs=None, output_rank_order="max", rseed=None, return_variances=False):
+
+        # X ~ self.model_input
+        # X_data ~ self.data
+
+        # check if we have multiple inputs
+        if not self.multi_input:
+            assert not isinstance(X, list), "Expected a single tensor model input!"
+            X = [X]
+        else:
+            assert isinstance(X, list), "Expected a list of model inputs!"
+
+        if ranked_outputs is not None and self.multi_output:
+            with torch.no_grad():
+                model_output_values = self.model(*X)
+            # rank and determine the model outputs that we will explain
+            if output_rank_order == "max":
+                _, model_output_ranks = torch.sort(model_output_values, descending=True)
+            elif output_rank_order == "min":
+                _, model_output_ranks = torch.sort(model_output_values, descending=False)
+            elif output_rank_order == "max_abs":
+                _, model_output_ranks = torch.sort(torch.abs(model_output_values), descending=True)
+            else:
+                emsg = "output_rank_order must be max, min, or max_abs!"
+                raise ValueError(emsg)
+            model_output_ranks = model_output_ranks[:, :ranked_outputs]
+        else:
+            model_output_ranks = (torch.ones((X[0].shape[0], len(self.gradients))).int() *
+                                  torch.arange(0, len(self.gradients)).int())
+
+        # if a cleanup happened, we need to add the handles back
+        # this allows shap_values to be called multiple times, but the model to be
+        # 'clean' at the end of each run for other uses
+        if self.input_handle is None and self.interim is True:
+            self.add_handles(self.layer)
+
+        # compute the attributions
+        X_batches = X[0].shape[0]
+        output_phis = []
+        output_phi_vars = []
+        # samples_input = input to the model
+        # samples_delta = (x - x') for the input being explained - may be an interim input
+        samples_input = [torch.zeros((nsamples,) + X[t].shape[1:], device=X[t].device) for t in range(len(X))]
+        samples_delta = [np.zeros((nsamples, ) + self.data[t].shape[1:]) for t in range(len(self.data))]
+
+        # use random seed if no argument given
+        if rseed is None:
+            rseed = np.random.randint(0, 1e6)
+
+        for i in range(model_output_ranks.shape[1]):
+            np.random.seed(rseed)  # so we get the same noise patterns for each output class
+            phis = []
+            phi_vars = []
+            for k in range(len(self.data)):
+                # for each of the inputs being explained - may be an interim input
+                phis.append(np.zeros((X_batches,) + self.data[k].shape[1:]))
+                phi_vars.append(np.zeros((X_batches, ) + self.data[k].shape[1:]))
+            for j in range(X[0].shape[0]):
+                # fill in the samples arrays
+                for k in range(nsamples):
+                    rind = np.random.choice(self.data[0].shape[0])
+                    t = np.random.uniform()
+                    for a in range(len(X)):
+                        if self.local_smoothing > 0:
+                            # local smoothing is added to the base input, unlike in the TF gradient explainer
+                            x = X[a][j].clone().detach() + torch.empty(X[a][j].shape, device=X[a].device).normal_() \
+                                * self.local_smoothing
+                        else:
+                            x = X[a][j].clone().detach()
+                        samples_input[a][k] = (t * x + (1 - t) * (self.model_inputs[a][rind]).clone().detach()).\
+                            clone().detach()
+                        if self.input_handle is None:
+                            samples_delta[a][k] = (x - (self.data[a][rind]).clone().detach()).cpu().numpy()
+
+                    if self.interim is True:
+                        with torch.no_grad():
+                            _ = self.model(*[samples_input[a][k].unsqueeze(0) for a in range(len(X))])
+                            interim_inputs = self.layer.target_input
+                            del self.layer.target_input
+                            if type(interim_inputs) is tuple:
+                                if type(interim_inputs) is tuple:
+                                    # this should always be true, but just to be safe
+                                    for a in range(len(interim_inputs)):
+                                        samples_delta[a][k] = interim_inputs[a].cpu().numpy()
+                                else:
+                                    samples_delta[0][k] = interim_inputs.cpu().numpy()
+
+                # compute the gradients at all the sample points
+                find = model_output_ranks[j, i]
+                grads = []
+                for b in range(0, nsamples, self.batch_size):
+                    batch = [samples_input[c][b:min(b+self.batch_size,nsamples)].clone().detach() for c in range(len(X))]
+                    grads.append(self.gradient(find, batch))
+                grad = [np.concatenate([g[z] for g in grads], 0) for z in range(len(self.data))]
+                # assign the attributions to the right part of the output arrays
+                for t in range(len(self.data)):
+                    samples = grad[t] * samples_delta[t]
+                    phis[t][j] = samples.mean(0)
+                    phi_vars[t][j] = samples.var(0) / np.sqrt(samples.shape[0]) # estimate variance of means
+
+            output_phis.append(phis[0] if len(self.data) == 1 else phis)
+            output_phi_vars.append(phi_vars[0] if not self.multi_input else phi_vars)
+        # cleanup: remove the handles, if they were added
+        if self.input_handle is not None:
+            self.input_handle.remove()
+            self.input_handle = None
+            # note: the target input attribute is deleted in the loop
+
+        if not self.multi_output:
+            if return_variances:
+                return output_phis[0], output_phi_vars[0]
+            else:
+                return output_phis[0]
+        elif ranked_outputs is not None:
+            if return_variances:
+                return output_phis, output_phi_vars, model_output_ranks
+            else:
+                return output_phis, model_output_ranks
+        else:
+            if return_variances:
+                return output_phis, output_phi_vars
+            else:
+                return output_phis
diff --git a/lib/shap/explainers/_kernel.py b/lib/shap/explainers/_kernel.py
new file mode 100644
index 0000000000000000000000000000000000000000..3ca81111c908c294f00b870d9a5325ea4b6eb493
--- /dev/null
+++ b/lib/shap/explainers/_kernel.py
@@ -0,0 +1,696 @@
+import copy
+import gc
+import itertools
+import logging
+import time
+import warnings
+
+import numpy as np
+import pandas as pd
+import scipy.sparse
+import sklearn
+from packaging import version
+from scipy.special import binom
+from sklearn.linear_model import Lasso, LassoLarsIC, lars_path
+from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import StandardScaler
+from tqdm.auto import tqdm
+
+from .._explanation import Explanation
+from ..utils import safe_isinstance
+from ..utils._exceptions import DimensionError
+from ..utils._legacy import (
+    DenseData,
+    SparseData,
+    convert_to_data,
+    convert_to_instance,
+    convert_to_instance_with_index,
+    convert_to_link,
+    convert_to_model,
+    match_instance_to_data,
+    match_model_to_data,
+)
+from ._explainer import Explainer
+
+log = logging.getLogger('shap')
+
+
+class KernelExplainer(Explainer):
+    """Uses the Kernel SHAP method to explain the output of any function.
+
+    Kernel SHAP is a method that uses a special weighted linear regression
+    to compute the importance of each feature. The computed importance values
+    are Shapley values from game theory and also coefficients from a local linear
+    regression.
+
+    Parameters
+    ----------
+    model : function or iml.Model
+        User supplied function that takes a matrix of samples (# samples x # features) and
+        computes the output of the model for those samples. The output can be a vector
+        (# samples) or a matrix (# samples x # model outputs).
+
+    data : numpy.array or pandas.DataFrame or shap.common.DenseData or any scipy.sparse matrix
+        The background dataset to use for integrating out features. To determine the impact
+        of a feature, that feature is set to "missing" and the change in the model output
+        is observed. Since most models aren't designed to handle arbitrary missing data at test
+        time, we simulate "missing" by replacing the feature with the values it takes in the
+        background dataset. So if the background dataset is a simple sample of all zeros, then
+        we would approximate a feature being missing by setting it to zero. For small problems,
+        this background dataset can be the whole training set, but for larger problems consider
+        using a single reference value or using the ``kmeans`` function to summarize the dataset.
+        Note: for the sparse case, we accept any sparse matrix but convert to lil format for
+        performance.
+
+    feature_names : list
+        The names of the features in the background dataset. If the background dataset is
+        supplied as a pandas.DataFrame, then ``feature_names`` can be set to ``None`` (default),
+        and the feature names will be taken as the column names of the dataframe.
+
+    link : "identity" or "logit"
+        A generalized linear model link to connect the feature importance values to the model
+        output. Since the feature importance values, phi, sum up to the model output, it often makes
+        sense to connect them to the output with a link function where link(output) = sum(phi).
+        Default is "identity" (a no-op).
+        If the model output is a probability, then "logit" can be used to transform the SHAP values
+        into log-odds units.
+
+    Examples
+    --------
+    See :ref:`Kernel Explainer Examples <kernel_explainer_examples>`.
+    """
+
+    def __init__(self, model, data, feature_names=None, link="identity", **kwargs):
+
+        if feature_names is not None:
+            self.data_feature_names=feature_names
+        elif isinstance(data, pd.DataFrame):
+            self.data_feature_names = list(data.columns)
+
+        # convert incoming inputs to standardized iml objects
+        self.link = convert_to_link(link)
+        self.keep_index = kwargs.get("keep_index", False)
+        self.keep_index_ordered = kwargs.get("keep_index_ordered", False)
+        self.model = convert_to_model(model, keep_index=self.keep_index)
+        self.data = convert_to_data(data, keep_index=self.keep_index)
+        model_null = match_model_to_data(self.model, self.data)
+
+        # enforce our current input type limitations
+        if not isinstance(self.data, (DenseData, SparseData)):
+            emsg = "Shap explainer only supports the DenseData and SparseData input currently."
+            raise TypeError(emsg)
+        if self.data.transposed:
+            emsg = "Shap explainer does not support transposed DenseData or SparseData currently."
+            raise DimensionError(emsg)
+
+        # warn users about large background data sets
+        if len(self.data.weights) > 100:
+            log.warning("Using " + str(len(self.data.weights)) + " background data samples could cause " +
+                        "slower run times. Consider using shap.sample(data, K) or shap.kmeans(data, K) to " +
+                        "summarize the background as K samples.")
+
+        # init our parameters
+        self.N = self.data.data.shape[0]
+        self.P = self.data.data.shape[1]
+        self.linkfv = np.vectorize(self.link.f)
+        self.nsamplesAdded = 0
+        self.nsamplesRun = 0
+
+        # find E_x[f(x)]
+        if isinstance(model_null, (pd.DataFrame, pd.Series)):
+            model_null = np.squeeze(model_null.values)
+        if safe_isinstance(model_null, "tensorflow.python.framework.ops.EagerTensor"):
+            model_null = model_null.numpy()
+        self.fnull = np.sum((model_null.T * self.data.weights).T, 0)
+        self.expected_value = self.linkfv(self.fnull)
+
+        # see if we have a vector output
+        self.vector_out = True
+        if len(self.fnull.shape) == 0:
+            self.vector_out = False
+            self.fnull = np.array([self.fnull])
+            self.D = 1
+            self.expected_value = float(self.expected_value)
+        else:
+            self.D = self.fnull.shape[0]
+
+    def __call__(self, X):
+
+        start_time = time.time()
+
+        if isinstance(X, pd.DataFrame):
+            feature_names = list(X.columns)
+        else:
+            feature_names = getattr(self, "data_feature_names", None)
+
+        v = self.shap_values(X)
+        if isinstance(v, list):
+            v = np.stack(v, axis=-1) # put outputs at the end
+
+        # the explanation object expects an expected value for each row
+        if hasattr(self.expected_value, "__len__"):
+            ev_tiled = np.tile(self.expected_value, (v.shape[0],1))
+        else:
+            ev_tiled = np.tile(self.expected_value, v.shape[0])
+
+        return Explanation(
+            v,
+            base_values=ev_tiled,
+            data=X.to_numpy() if isinstance(X, pd.DataFrame) else X,
+            feature_names=feature_names,
+            compute_time=time.time() - start_time,
+        )
+
+    def shap_values(self, X, **kwargs):
+        """ Estimate the SHAP values for a set of samples.
+
+        Parameters
+        ----------
+        X : numpy.array or pandas.DataFrame or any scipy.sparse matrix
+            A matrix of samples (# samples x # features) on which to explain the model's output.
+
+        nsamples : "auto" or int
+            Number of times to re-evaluate the model when explaining each prediction. More samples
+            lead to lower variance estimates of the SHAP values. The "auto" setting uses
+            `nsamples = 2 * X.shape[1] + 2048`.
+
+        l1_reg : "num_features(int)", "auto" (default for now, but deprecated), "aic", "bic", or float
+            The l1 regularization to use for feature selection (the estimation procedure is based on
+            a debiased lasso). The auto option currently uses "aic" when less that 20% of the possible sample
+            space is enumerated, otherwise it uses no regularization. THE BEHAVIOR OF "auto" WILL CHANGE
+            in a future version to be based on num_features instead of AIC.
+            The "aic" and "bic" options use the AIC and BIC rules for regularization.
+            Using "num_features(int)" selects a fix number of top features. Passing a float directly sets the
+            "alpha" parameter of the sklearn.linear_model.Lasso model used for feature selection.
+
+        gc_collect : bool
+           Run garbage collection after each explanation round. Sometime needed for memory intensive explanations (default False).
+
+        Returns
+        -------
+        array or list
+            For models with a single output this returns a matrix of SHAP values
+            (# samples x # features). Each row sums to the difference between the model output for that
+            sample and the expected value of the model output (which is stored as expected_value
+            attribute of the explainer). For models with vector outputs this returns a list
+            of such matrices, one for each output.
+        """
+
+        # convert dataframes
+        if isinstance(X, pd.Series):
+            X = X.values
+        elif isinstance(X, pd.DataFrame):
+            if self.keep_index:
+                index_value = X.index.values
+                index_name = X.index.name
+                column_name = list(X.columns)
+            X = X.values
+
+        x_type = str(type(X))
+        arr_type = "'numpy.ndarray'>"
+        # if sparse, convert to lil for performance
+        if scipy.sparse.issparse(X) and not scipy.sparse.isspmatrix_lil(X):
+            X = X.tolil()
+        assert x_type.endswith(arr_type) or scipy.sparse.isspmatrix_lil(X), "Unknown instance type: " + x_type
+
+        # single instance
+        if len(X.shape) == 1:
+            data = X.reshape((1, X.shape[0]))
+            if self.keep_index:
+                data = convert_to_instance_with_index(data, column_name, index_name, index_value)
+            explanation = self.explain(data, **kwargs)
+
+            # vector-output
+            s = explanation.shape
+            if len(s) == 2:
+                outs = [np.zeros(s[0]) for j in range(s[1])]
+                for j in range(s[1]):
+                    outs[j] = explanation[:, j]
+                return outs
+
+            # single-output
+            else:
+                out = np.zeros(s[0])
+                out[:] = explanation
+                return out
+
+        # explain the whole dataset
+        elif len(X.shape) == 2:
+            explanations = []
+            for i in tqdm(range(X.shape[0]), disable=kwargs.get("silent", False)):
+                data = X[i:i + 1, :]
+                if self.keep_index:
+                    data = convert_to_instance_with_index(data, column_name, index_value[i:i + 1], index_name)
+                explanations.append(self.explain(data, **kwargs))
+                if kwargs.get("gc_collect", False):
+                    gc.collect()
+
+            # vector-output
+            s = explanations[0].shape
+            if len(s) == 2:
+                outs = [np.zeros((X.shape[0], s[0])) for j in range(s[1])]
+                for i in range(X.shape[0]):
+                    for j in range(s[1]):
+                        outs[j][i] = explanations[i][:, j]
+                return outs
+
+            # single-output
+            else:
+                out = np.zeros((X.shape[0], s[0]))
+                for i in range(X.shape[0]):
+                    out[i] = explanations[i]
+                return out
+
+        else:
+            emsg = "Instance must have 1 or 2 dimensions!"
+            raise DimensionError(emsg)
+
+    def explain(self, incoming_instance, **kwargs):
+        # convert incoming input to a standardized iml object
+        instance = convert_to_instance(incoming_instance)
+        match_instance_to_data(instance, self.data)
+
+        # find the feature groups we will test. If a feature does not change from its
+        # current value then we know it doesn't impact the model
+        self.varyingInds = self.varying_groups(instance.x)
+        if self.data.groups is None:
+            self.varyingFeatureGroups = np.array([i for i in self.varyingInds])
+            self.M = self.varyingFeatureGroups.shape[0]
+        else:
+            self.varyingFeatureGroups = [self.data.groups[i] for i in self.varyingInds]
+            self.M = len(self.varyingFeatureGroups)
+            groups = self.data.groups
+            # convert to numpy array as it is much faster if not jagged array (all groups of same length)
+            if self.varyingFeatureGroups and all(len(groups[i]) == len(groups[0]) for i in self.varyingInds):
+                self.varyingFeatureGroups = np.array(self.varyingFeatureGroups)
+                # further performance optimization in case each group has a single value
+                if self.varyingFeatureGroups.shape[1] == 1:
+                    self.varyingFeatureGroups = self.varyingFeatureGroups.flatten()
+
+        # find f(x)
+        if self.keep_index:
+            model_out = self.model.f(instance.convert_to_df())
+        else:
+            model_out = self.model.f(instance.x)
+        if isinstance(model_out, (pd.DataFrame, pd.Series)):
+            model_out = model_out.values
+        self.fx = model_out[0]
+
+        if not self.vector_out:
+            self.fx = np.array([self.fx])
+
+        # if no features vary then no feature has an effect
+        if self.M == 0:
+            phi = np.zeros((self.data.groups_size, self.D))
+            phi_var = np.zeros((self.data.groups_size, self.D))
+
+        # if only one feature varies then it has all the effect
+        elif self.M == 1:
+            phi = np.zeros((self.data.groups_size, self.D))
+            phi_var = np.zeros((self.data.groups_size, self.D))
+            diff = self.link.f(self.fx) - self.link.f(self.fnull)
+            for d in range(self.D):
+                phi[self.varyingInds[0],d] = diff[d]
+
+        # if more than one feature varies then we have to do real work
+        else:
+            self.l1_reg = kwargs.get("l1_reg", "auto")
+
+            # pick a reasonable number of samples if the user didn't specify how many they wanted
+            self.nsamples = kwargs.get("nsamples", "auto")
+            if self.nsamples == "auto":
+                self.nsamples = 2 * self.M + 2**11
+
+            # if we have enough samples to enumerate all subsets then ignore the unneeded samples
+            self.max_samples = 2 ** 30
+            if self.M <= 30:
+                self.max_samples = 2 ** self.M - 2
+                if self.nsamples > self.max_samples:
+                    self.nsamples = self.max_samples
+
+            # reserve space for some of our computations
+            self.allocate()
+
+            # weight the different subset sizes
+            num_subset_sizes = int(np.ceil((self.M - 1) / 2.0))
+            num_paired_subset_sizes = int(np.floor((self.M - 1) / 2.0))
+            weight_vector = np.array([(self.M - 1.0) / (i * (self.M - i)) for i in range(1, num_subset_sizes + 1)])
+            weight_vector[:num_paired_subset_sizes] *= 2
+            weight_vector /= np.sum(weight_vector)
+            log.debug(f"{weight_vector = }")
+            log.debug(f"{num_subset_sizes = }")
+            log.debug(f"{num_paired_subset_sizes = }")
+            log.debug(f"{self.M = }")
+
+            # fill out all the subset sizes we can completely enumerate
+            # given nsamples*remaining_weight_vector[subset_size]
+            num_full_subsets = 0
+            num_samples_left = self.nsamples
+            group_inds = np.arange(self.M, dtype='int64')
+            mask = np.zeros(self.M)
+            remaining_weight_vector = copy.copy(weight_vector)
+            for subset_size in range(1, num_subset_sizes + 1):
+
+                # determine how many subsets (and their complements) are of the current size
+                nsubsets = binom(self.M, subset_size)
+                if subset_size <= num_paired_subset_sizes:
+                    nsubsets *= 2
+                log.debug(f"{subset_size = }")
+                log.debug(f"{nsubsets = }")
+                log.debug("self.nsamples*weight_vector[subset_size-1] = {}".format(
+                    num_samples_left * remaining_weight_vector[subset_size - 1]))
+                log.debug("self.nsamples*weight_vector[subset_size-1]/nsubsets = {}".format(
+                    num_samples_left * remaining_weight_vector[subset_size - 1] / nsubsets))
+
+                # see if we have enough samples to enumerate all subsets of this size
+                if num_samples_left * remaining_weight_vector[subset_size - 1] / nsubsets >= 1.0 - 1e-8:
+                    num_full_subsets += 1
+                    num_samples_left -= nsubsets
+
+                    # rescale what's left of the remaining weight vector to sum to 1
+                    if remaining_weight_vector[subset_size - 1] < 1.0:
+                        remaining_weight_vector /= (1 - remaining_weight_vector[subset_size - 1])
+
+                    # add all the samples of the current subset size
+                    w = weight_vector[subset_size - 1] / binom(self.M, subset_size)
+                    if subset_size <= num_paired_subset_sizes:
+                        w /= 2.0
+                    for inds in itertools.combinations(group_inds, subset_size):
+                        mask[:] = 0.0
+                        mask[np.array(inds, dtype='int64')] = 1.0
+                        self.addsample(instance.x, mask, w)
+                        if subset_size <= num_paired_subset_sizes:
+                            mask[:] = np.abs(mask - 1)
+                            self.addsample(instance.x, mask, w)
+                else:
+                    break
+            log.info(f"{num_full_subsets = }")
+
+            # add random samples from what is left of the subset space
+            nfixed_samples = self.nsamplesAdded
+            samples_left = self.nsamples - self.nsamplesAdded
+            log.debug(f"{samples_left = }")
+            if num_full_subsets != num_subset_sizes:
+                remaining_weight_vector = copy.copy(weight_vector)
+                remaining_weight_vector[:num_paired_subset_sizes] /= 2 # because we draw two samples each below
+                remaining_weight_vector = remaining_weight_vector[num_full_subsets:]
+                remaining_weight_vector /= np.sum(remaining_weight_vector)
+                log.info(f"{remaining_weight_vector = }")
+                log.info(f"{num_paired_subset_sizes = }")
+                ind_set = np.random.choice(len(remaining_weight_vector), 4 * samples_left, p=remaining_weight_vector)
+                ind_set_pos = 0
+                used_masks = {}
+                while samples_left > 0 and ind_set_pos < len(ind_set):
+                    mask.fill(0.0)
+                    ind = ind_set[ind_set_pos] # we call np.random.choice once to save time and then just read it here
+                    ind_set_pos += 1
+                    subset_size = ind + num_full_subsets + 1
+                    mask[np.random.permutation(self.M)[:subset_size]] = 1.0
+
+                    # only add the sample if we have not seen it before, otherwise just
+                    # increment a previous sample's weight
+                    mask_tuple = tuple(mask)
+                    new_sample = False
+                    if mask_tuple not in used_masks:
+                        new_sample = True
+                        used_masks[mask_tuple] = self.nsamplesAdded
+                        samples_left -= 1
+                        self.addsample(instance.x, mask, 1.0)
+                    else:
+                        self.kernelWeights[used_masks[mask_tuple]] += 1.0
+
+                    # add the compliment sample
+                    if samples_left > 0 and subset_size <= num_paired_subset_sizes:
+                        mask[:] = np.abs(mask - 1)
+
+                        # only add the sample if we have not seen it before, otherwise just
+                        # increment a previous sample's weight
+                        if new_sample:
+                            samples_left -= 1
+                            self.addsample(instance.x, mask, 1.0)
+                        else:
+                            # we know the compliment sample is the next one after the original sample, so + 1
+                            self.kernelWeights[used_masks[mask_tuple] + 1] += 1.0
+
+                # normalize the kernel weights for the random samples to equal the weight left after
+                # the fixed enumerated samples have been already counted
+                weight_left = np.sum(weight_vector[num_full_subsets:])
+                log.info(f"{weight_left = }")
+                self.kernelWeights[nfixed_samples:] *= weight_left / self.kernelWeights[nfixed_samples:].sum()
+
+            # execute the model on the synthetic samples we have created
+            self.run()
+
+            # solve then expand the feature importance (Shapley value) vector to contain the non-varying features
+            phi = np.zeros((self.data.groups_size, self.D))
+            phi_var = np.zeros((self.data.groups_size, self.D))
+            for d in range(self.D):
+                vphi, vphi_var = self.solve(self.nsamples / self.max_samples, d)
+                phi[self.varyingInds, d] = vphi
+                phi_var[self.varyingInds, d] = vphi_var
+
+        if not self.vector_out:
+            phi = np.squeeze(phi, axis=1)
+            phi_var = np.squeeze(phi_var, axis=1)
+
+        return phi
+
+    @staticmethod
+    def not_equal(i, j):
+        number_types = (int, float, np.number)
+        if isinstance(i, number_types) and isinstance(j, number_types):
+            return 0 if np.isclose(i, j, equal_nan=True) else 1
+        else:
+            return 0 if i == j else 1
+
+    def varying_groups(self, x):
+        if not scipy.sparse.issparse(x):
+            varying = np.zeros(self.data.groups_size)
+            for i in range(0, self.data.groups_size):
+                inds = self.data.groups[i]
+                x_group = x[0, inds]
+                if scipy.sparse.issparse(x_group):
+                    if all(j not in x.nonzero()[1] for j in inds):
+                        varying[i] = False
+                        continue
+                    x_group = x_group.todense()
+                num_mismatches = np.sum(np.frompyfunc(self.not_equal, 2, 1)(x_group, self.data.data[:, inds]))
+                varying[i] = num_mismatches > 0
+            varying_indices = np.nonzero(varying)[0]
+            return varying_indices
+        else:
+            varying_indices = []
+            # go over all nonzero columns in background and evaluation data
+            # if both background and evaluation are zero, the column does not vary
+            varying_indices = np.unique(np.union1d(self.data.data.nonzero()[1], x.nonzero()[1]))
+            remove_unvarying_indices = []
+            for i in range(0, len(varying_indices)):
+                varying_index = varying_indices[i]
+                # now verify the nonzero values do vary
+                data_rows = self.data.data[:, [varying_index]]
+                nonzero_rows = data_rows.nonzero()[0]
+
+                if nonzero_rows.size > 0:
+                    background_data_rows = data_rows[nonzero_rows]
+                    if scipy.sparse.issparse(background_data_rows):
+                        background_data_rows = background_data_rows.toarray()
+                    num_mismatches = np.sum(np.abs(background_data_rows - x[0, varying_index]) > 1e-7)
+                    # Note: If feature column non-zero but some background zero, can't remove index
+                    if num_mismatches == 0 and not \
+                        (np.abs(x[0, [varying_index]][0, 0]) > 1e-7 and len(nonzero_rows) < data_rows.shape[0]):
+                        remove_unvarying_indices.append(i)
+            mask = np.ones(len(varying_indices), dtype=bool)
+            mask[remove_unvarying_indices] = False
+            varying_indices = varying_indices[mask]
+            return varying_indices
+
+    def allocate(self):
+        if scipy.sparse.issparse(self.data.data):
+            # We tile the sparse matrix in csr format but convert it to lil
+            # for performance when adding samples
+            shape = self.data.data.shape
+            nnz = self.data.data.nnz
+            data_rows, data_cols = shape
+            rows = data_rows * self.nsamples
+            shape = rows, data_cols
+            if nnz == 0:
+                self.synth_data = scipy.sparse.csr_matrix(shape, dtype=self.data.data.dtype).tolil()
+            else:
+                data = self.data.data.data
+                indices = self.data.data.indices
+                indptr = self.data.data.indptr
+                last_indptr_idx = indptr[len(indptr) - 1]
+                indptr_wo_last = indptr[:-1]
+                new_indptrs = []
+                for i in range(0, self.nsamples - 1):
+                    new_indptrs.append(indptr_wo_last + (i * last_indptr_idx))
+                new_indptrs.append(indptr + ((self.nsamples - 1) * last_indptr_idx))
+                new_indptr = np.concatenate(new_indptrs)
+                new_data = np.tile(data, self.nsamples)
+                new_indices = np.tile(indices, self.nsamples)
+                self.synth_data = scipy.sparse.csr_matrix((new_data, new_indices, new_indptr), shape=shape).tolil()
+        else:
+            self.synth_data = np.tile(self.data.data, (self.nsamples, 1))
+
+        self.maskMatrix = np.zeros((self.nsamples, self.M))
+        self.kernelWeights = np.zeros(self.nsamples)
+        self.y = np.zeros((self.nsamples * self.N, self.D))
+        self.ey = np.zeros((self.nsamples, self.D))
+        self.lastMask = np.zeros(self.nsamples)
+        self.nsamplesAdded = 0
+        self.nsamplesRun = 0
+        if self.keep_index:
+            self.synth_data_index = np.tile(self.data.index_value, self.nsamples)
+
+    def addsample(self, x, m, w):
+        offset = self.nsamplesAdded * self.N
+        if isinstance(self.varyingFeatureGroups, (list,)):
+            for j in range(self.M):
+                for k in self.varyingFeatureGroups[j]:
+                    if m[j] == 1.0:
+                        self.synth_data[offset:offset+self.N, k] = x[0, k]
+        else:
+            # for non-jagged numpy array we can significantly boost performance
+            mask = m == 1.0
+            groups = self.varyingFeatureGroups[mask]
+            if len(groups.shape) == 2:
+                for group in groups:
+                    self.synth_data[offset:offset+self.N, group] = x[0, group]
+            else:
+                # further performance optimization in case each group has a single feature
+                evaluation_data = x[0, groups]
+                # In edge case where background is all dense but evaluation data
+                # is all sparse, make evaluation data dense
+                if scipy.sparse.issparse(x) and not scipy.sparse.issparse(self.synth_data):
+                    evaluation_data = evaluation_data.toarray()
+                self.synth_data[offset:offset+self.N, groups] = evaluation_data
+        self.maskMatrix[self.nsamplesAdded, :] = m
+        self.kernelWeights[self.nsamplesAdded] = w
+        self.nsamplesAdded += 1
+
+    def run(self):
+        num_to_run = self.nsamplesAdded * self.N - self.nsamplesRun * self.N
+        data = self.synth_data[self.nsamplesRun*self.N:self.nsamplesAdded*self.N,:]
+        if self.keep_index:
+            index = self.synth_data_index[self.nsamplesRun*self.N:self.nsamplesAdded*self.N]
+            index = pd.DataFrame(index, columns=[self.data.index_name])
+            data = pd.DataFrame(data, columns=self.data.group_names)
+            data = pd.concat([index, data], axis=1).set_index(self.data.index_name)
+            if self.keep_index_ordered:
+                data = data.sort_index()
+        modelOut = self.model.f(data)
+        if isinstance(modelOut, (pd.DataFrame, pd.Series)):
+            modelOut = modelOut.values
+        self.y[self.nsamplesRun * self.N:self.nsamplesAdded * self.N, :] = np.reshape(modelOut, (num_to_run, self.D))
+
+        # find the expected value of each output
+        for i in range(self.nsamplesRun, self.nsamplesAdded):
+            eyVal = np.zeros(self.D)
+            for j in range(0, self.N):
+                eyVal += self.y[i * self.N + j, :] * self.data.weights[j]
+
+            self.ey[i, :] = eyVal
+            self.nsamplesRun += 1
+
+    def solve(self, fraction_evaluated, dim):
+        eyAdj = self.linkfv(self.ey[:, dim]) - self.link.f(self.fnull[dim])
+        s = np.sum(self.maskMatrix, 1)
+
+        # do feature selection if we have not well enumerated the space
+        nonzero_inds = np.arange(self.M)
+        log.debug(f"{fraction_evaluated = }")
+        # if self.l1_reg == "auto":
+        #     warnings.warn(
+        #         "l1_reg=\"auto\" is deprecated and in the next version (v0.29) the behavior will change from a " \
+        #         "conditional use of AIC to simply \"num_features(10)\"!"
+        #     )
+        if (self.l1_reg not in ["auto", False, 0]) or (fraction_evaluated < 0.2 and self.l1_reg == "auto"):
+            w_aug = np.hstack((self.kernelWeights * (self.M - s), self.kernelWeights * s))
+            log.info(f"{np.sum(w_aug) = }")
+            log.info(f"{np.sum(self.kernelWeights) = }")
+            w_sqrt_aug = np.sqrt(w_aug)
+            eyAdj_aug = np.hstack((eyAdj, eyAdj - (self.link.f(self.fx[dim]) - self.link.f(self.fnull[dim]))))
+            eyAdj_aug *= w_sqrt_aug
+            mask_aug = np.transpose(w_sqrt_aug * np.transpose(np.vstack((self.maskMatrix, self.maskMatrix - 1))))
+            #var_norms = np.array([np.linalg.norm(mask_aug[:, i]) for i in range(mask_aug.shape[1])])
+
+            # select a fixed number of top features
+            if isinstance(self.l1_reg, str) and self.l1_reg.startswith("num_features("):
+                r = int(self.l1_reg[len("num_features("):-1])
+                nonzero_inds = lars_path(mask_aug, eyAdj_aug, max_iter=r)[1]
+
+            # use an adaptive regularization method
+            elif self.l1_reg == "auto" or self.l1_reg == "bic" or self.l1_reg == "aic":
+                c = "aic" if self.l1_reg == "auto" else self.l1_reg
+
+                # "Normalize" parameter of LassoLarsIC was deprecated in sklearn version 1.2
+                if version.parse(sklearn.__version__) < version.parse("1.2.0"):
+                    kwg = dict(normalize=False)
+                else:
+                    kwg = {}
+                model = make_pipeline(StandardScaler(with_mean=False), LassoLarsIC(criterion=c, **kwg))
+                nonzero_inds = np.nonzero(model.fit(mask_aug, eyAdj_aug)[1].coef_)[0]
+
+            # use a fixed regularization coefficient
+            else:
+                nonzero_inds = np.nonzero(Lasso(alpha=self.l1_reg).fit(mask_aug, eyAdj_aug).coef_)[0]
+
+        if len(nonzero_inds) == 0:
+            return np.zeros(self.M), np.ones(self.M)
+
+        # eliminate one variable with the constraint that all features sum to the output
+        eyAdj2 = eyAdj - self.maskMatrix[:, nonzero_inds[-1]] * (
+                    self.link.f(self.fx[dim]) - self.link.f(self.fnull[dim]))
+        etmp = np.transpose(np.transpose(self.maskMatrix[:, nonzero_inds[:-1]]) - self.maskMatrix[:, nonzero_inds[-1]])
+        log.debug(f"{etmp[:4, :] = }")
+
+        # solve a weighted least squares equation to estimate phi
+        # least squares:
+        #     phi = min_w ||W^(1/2) (y - X w)||^2
+        # the corresponding normal equation:
+        #     (X' W X) phi = X' W y
+        # with
+        #     X = etmp
+        #     W = np.diag(self.kernelWeights)
+        #     y = eyAdj2
+        #
+        # We could just rely on sciki-learn
+        #     from sklearn.linear_model import LinearRegression
+        #     lm = LinearRegression(fit_intercept=False).fit(etmp, eyAdj2, sample_weight=self.kernelWeights)
+        # Under the hood, as of scikit-learn version 1.3, LinearRegression still uses np.linalg.lstsq and
+        # there are more performant options. See https://github.com/scikit-learn/scikit-learn/issues/22855.
+        y = eyAdj2
+        X = etmp
+        WX = self.kernelWeights[:, None] * X
+        try:
+            w = np.linalg.solve(X.T @ WX, WX.T @ y)
+        except np.linalg.LinAlgError:
+            warnings.warn(
+                "Linear regression equation is singular, a least squares solutions is used instead.\n"
+                "To avoid this situation and get a regular matrix do one of the following:\n"
+                "1) turn up the number of samples,\n"
+                "2) turn up the L1 regularization with num_features(N) where N is less than the number of samples,\n"
+                "3) group features together to reduce the number of inputs that need to be explained."
+            )
+            # XWX = np.linalg.pinv(X.T @ WX)
+            # w = np.dot(XWX, np.dot(np.transpose(WX), y))
+            sqrt_W = np.sqrt(self.kernelWeights)
+            w = np.linalg.lstsq(sqrt_W[:, None] * X, sqrt_W * y, rcond=None)[0]
+        log.debug(f"{np.sum(w) = }")
+        log.debug("self.link(self.fx) - self.link(self.fnull) = {}".format(
+            self.link.f(self.fx[dim]) - self.link.f(self.fnull[dim])))
+        log.debug(f"self.fx = {self.fx[dim]}")
+        log.debug(f"self.link(self.fx) = {self.link.f(self.fx[dim])}")
+        log.debug(f"self.fnull = {self.fnull[dim]}")
+        log.debug(f"self.link(self.fnull) = {self.link.f(self.fnull[dim])}")
+        phi = np.zeros(self.M)
+        phi[nonzero_inds[:-1]] = w
+        phi[nonzero_inds[-1]] = (self.link.f(self.fx[dim]) - self.link.f(self.fnull[dim])) - sum(w)
+        log.info(f"{phi = }")
+
+        # clean up any rounding errors
+        for i in range(self.M):
+            if np.abs(phi[i]) < 1e-10:
+                phi[i] = 0
+
+        return phi, np.ones(len(phi))
diff --git a/lib/shap/explainers/_linear.py b/lib/shap/explainers/_linear.py
new file mode 100644
index 0000000000000000000000000000000000000000..51d8d59fbf7333047c137dd5885fb5955c80fc59
--- /dev/null
+++ b/lib/shap/explainers/_linear.py
@@ -0,0 +1,406 @@
+import warnings
+
+import numpy as np
+import pandas as pd
+from scipy.sparse import issparse
+from tqdm.auto import tqdm
+
+from .. import links, maskers
+from ..utils._exceptions import (
+    DimensionError,
+    InvalidFeaturePerturbationError,
+    InvalidModelError,
+)
+from ._explainer import Explainer
+
+
+class LinearExplainer(Explainer):
+    """ Computes SHAP values for a linear model, optionally accounting for inter-feature correlations.
+
+    This computes the SHAP values for a linear model and can account for the correlations among
+    the input features. Assuming features are independent leads to interventional SHAP values which
+    for a linear model are coef[i] * (x[i] - X.mean(0)[i]) for the ith feature. If instead we account
+    for correlations then we prevent any problems arising from collinearity and share credit among
+    correlated features. Accounting for correlations can be computationally challenging, but
+    LinearExplainer uses sampling to estimate a transform that can then be applied to explain
+    any prediction of the model.
+
+    Parameters
+    ----------
+    model : (coef, intercept) or sklearn.linear_model.*
+        User supplied linear model either as either a parameter pair or sklearn object.
+
+    data : (mean, cov), numpy.array, pandas.DataFrame, iml.DenseData or scipy.csr_matrix
+        The background dataset to use for computing conditional expectations. Note that only the
+        mean and covariance of the dataset are used. This means passing a raw data matrix is just
+        a convenient alternative to passing the mean and covariance directly.
+    nsamples : int
+        Number of samples to use when estimating the transformation matrix used to account for
+        feature correlations.
+    feature_perturbation : "interventional" (default) or "correlation_dependent"
+        There are two ways we might want to compute SHAP values, either the full conditional SHAP
+        values or the interventional SHAP values. For interventional SHAP values we break any
+        dependence structure between features in the model and so uncover how the model would behave if we
+        intervened and changed some of the inputs. For the full conditional SHAP values we respect
+        the correlations among the input features, so if the model depends on one input but that
+        input is correlated with another input, then both get some credit for the model's behavior. The
+        interventional option stays "true to the model" meaning it will only give credit to features that are
+        actually used by the model, while the correlation option stays "true to the data" in the sense that
+        it only considers how the model would behave when respecting the correlations in the input data.
+        For sparse case only interventional option is supported.
+
+    Examples
+    --------
+    See `Linear explainer examples <https://shap.readthedocs.io/en/latest/api_examples/explainers/LinearExplainer.html>`_
+    """
+
+    def __init__(self, model, masker, link=links.identity, nsamples=1000, feature_perturbation=None, **kwargs):
+        if 'feature_dependence' in kwargs:
+            warnings.warn('The option feature_dependence has been renamed to feature_perturbation!')
+            feature_perturbation = kwargs["feature_dependence"]
+        if feature_perturbation == "independent":
+            warnings.warn('The option feature_perturbation="independent" is has been renamed to feature_perturbation="interventional"!')
+            feature_perturbation = "interventional"
+        elif feature_perturbation == "correlation":
+            warnings.warn('The option feature_perturbation="correlation" is has been renamed to feature_perturbation="correlation_dependent"!')
+            feature_perturbation = "correlation_dependent"
+        if feature_perturbation is not None:
+            warnings.warn("The feature_perturbation option is now deprecated in favor of using the appropriate masker (maskers.Independent, or maskers.Impute)")
+        else:
+            feature_perturbation = "interventional"
+        self.feature_perturbation = feature_perturbation
+
+        # wrap the incoming masker object as a shap.Masker object before calling
+        # parent class constructor, which does the same but without respecting
+        # the user-provided feature_perturbation choice
+        if isinstance(masker, pd.DataFrame) or ((isinstance(masker, np.ndarray) or issparse(masker)) and len(masker.shape) == 2):
+            if self.feature_perturbation == "correlation_dependent":
+                masker = maskers.Impute(masker)
+            else:
+                masker = maskers.Independent(masker)
+        elif issubclass(type(masker), tuple) and len(masker) == 2:
+            if self.feature_perturbation == "correlation_dependent":
+                masker = maskers.Impute({"mean": masker[0], "cov": masker[1]}, method="linear")
+            else:
+                masker = maskers.Independent({"mean": masker[0], "cov": masker[1]})
+
+        super().__init__(model, masker, link=link, **kwargs)
+
+        self.nsamples = nsamples
+
+
+        # extract what we need from the given model object
+        self.coef, self.intercept = LinearExplainer._parse_model(model)
+
+        # extract the data
+        if issubclass(type(self.masker), (maskers.Independent, maskers.Partition)):
+            self.feature_perturbation = "interventional"
+        elif issubclass(type(self.masker), maskers.Impute):
+            self.feature_perturbation = "correlation_dependent"
+        else:
+            raise NotImplementedError("The Linear explainer only supports the Independent, Partition, and Impute maskers right now!")
+        data = getattr(self.masker, "data", None)
+
+        # convert DataFrame's to numpy arrays
+        if isinstance(data, pd.DataFrame):
+            data = data.values
+
+        # get the mean and covariance of the model
+        if getattr(self.masker, "mean", None) is not None:
+            self.mean = self.masker.mean
+            self.cov = self.masker.cov
+        elif isinstance(data, dict) and len(data) == 2:
+            self.mean = data["mean"]
+            if isinstance(self.mean, pd.Series):
+                self.mean = self.mean.values
+
+            self.cov = data["cov"]
+            if isinstance(self.cov, pd.DataFrame):
+                self.cov = self.cov.values
+        elif isinstance(data, tuple) and len(data) == 2:
+            self.mean = data[0]
+            if isinstance(self.mean, pd.Series):
+                self.mean = self.mean.values
+
+            self.cov = data[1]
+            if isinstance(self.cov, pd.DataFrame):
+                self.cov = self.cov.values
+        elif data is None:
+            raise ValueError("A background data distribution must be provided!")
+        else:
+            if issparse(data):
+                self.mean = np.array(np.mean(data, 0))[0]
+                if self.feature_perturbation != "interventional":
+                    raise NotImplementedError("Only feature_perturbation = 'interventional' is supported for sparse data")
+            else:
+                self.mean = np.array(np.mean(data, 0)).flatten() # assumes it is an array
+                if self.feature_perturbation == "correlation_dependent":
+                    self.cov = np.cov(data, rowvar=False)
+        #print(self.coef, self.mean.flatten(), self.intercept)
+        # Note: mean can be numpy.matrixlib.defmatrix.matrix or numpy.matrix type depending on numpy version
+        if issparse(self.mean) or str(type(self.mean)).endswith("matrix'>"):
+            # accept both sparse and dense coef
+            # if not issparse(self.coef):
+            #     self.coef = np.asmatrix(self.coef)
+            self.expected_value = np.dot(self.coef, self.mean) + self.intercept
+
+            # unwrap the matrix form
+            if len(self.expected_value) == 1:
+                self.expected_value = self.expected_value[0,0]
+            else:
+                self.expected_value = np.array(self.expected_value)[0]
+        else:
+            self.expected_value = np.dot(self.coef, self.mean) + self.intercept
+
+        self.M = len(self.mean)
+
+        # if needed, estimate the transform matrices
+        if self.feature_perturbation == "correlation_dependent":
+            self.valid_inds = np.where(np.diag(self.cov) > 1e-8)[0]
+            self.mean = self.mean[self.valid_inds]
+            self.cov = self.cov[:,self.valid_inds][self.valid_inds,:]
+            self.coef = self.coef[self.valid_inds]
+
+            # group perfectly redundant variables together
+            self.avg_proj,sum_proj = duplicate_components(self.cov)
+            self.cov = np.matmul(np.matmul(self.avg_proj, self.cov), self.avg_proj.T)
+            self.mean = np.matmul(self.avg_proj, self.mean)
+            self.coef = np.matmul(sum_proj, self.coef)
+
+            # if we still have some multi-collinearity present then we just add regularization...
+            e,_ = np.linalg.eig(self.cov)
+            if e.min() < 1e-7:
+                self.cov = self.cov + np.eye(self.cov.shape[0]) * 1e-6
+
+            mean_transform, x_transform = self._estimate_transforms(nsamples)
+            self.mean_transformed = np.matmul(mean_transform, self.mean)
+            self.x_transform = x_transform
+        elif self.feature_perturbation == "interventional":
+            if nsamples != 1000:
+                warnings.warn("Setting nsamples has no effect when feature_perturbation = 'interventional'!")
+        else:
+            raise InvalidFeaturePerturbationError("Unknown type of feature_perturbation provided: " + self.feature_perturbation)
+
+    def _estimate_transforms(self, nsamples):
+        """ Uses block matrix inversion identities to quickly estimate transforms.
+
+        After a bit of matrix math we can isolate a transform matrix (# features x # features)
+        that is independent of any sample we are explaining. It is the result of averaging over
+        all feature permutations, but we just use a fixed number of samples to estimate the value.
+
+        TODO: Do a brute force enumeration when # feature subsets is less than nsamples. This could
+              happen through a recursive method that uses the same block matrix inversion as below.
+        """
+        M = len(self.coef)
+
+        mean_transform = np.zeros((M,M))
+        x_transform = np.zeros((M,M))
+        inds = np.arange(M, dtype=int)
+        for _ in tqdm(range(nsamples), "Estimating transforms"):
+            np.random.shuffle(inds)
+            cov_inv_SiSi = np.zeros((0,0))
+            cov_Si = np.zeros((M,0))
+            for j in range(M):
+                i = inds[j]
+
+                # use the last Si as the new S
+                cov_S = cov_Si
+                cov_inv_SS = cov_inv_SiSi
+
+                # get the new cov_Si
+                cov_Si = self.cov[:,inds[:j+1]]
+
+                # compute the new cov_inv_SiSi from cov_inv_SS
+                d = cov_Si[i,:-1].T
+                t = np.matmul(cov_inv_SS, d)
+                Z = self.cov[i, i]
+                u = Z - np.matmul(t.T, d)
+                cov_inv_SiSi = np.zeros((j+1, j+1))
+                if j > 0:
+                    cov_inv_SiSi[:-1, :-1] = cov_inv_SS + np.outer(t, t) / u
+                    cov_inv_SiSi[:-1, -1] = cov_inv_SiSi[-1,:-1] = -t / u
+                cov_inv_SiSi[-1, -1] = 1 / u
+
+                # + coef @ (Q(bar(Sui)) - Q(bar(S)))
+                mean_transform[i, i] += self.coef[i]
+
+                # + coef @ R(Sui)
+                coef_R_Si = np.matmul(self.coef[inds[j+1:]], np.matmul(cov_Si, cov_inv_SiSi)[inds[j+1:]])
+                mean_transform[i, inds[:j+1]] += coef_R_Si
+
+                # - coef @ R(S)
+                coef_R_S = np.matmul(self.coef[inds[j:]], np.matmul(cov_S, cov_inv_SS)[inds[j:]])
+                mean_transform[i, inds[:j]] -= coef_R_S
+
+                # - coef @ (Q(Sui) - Q(S))
+                x_transform[i, i] += self.coef[i]
+
+                # + coef @ R(Sui)
+                x_transform[i, inds[:j+1]] += coef_R_Si
+
+                # - coef @ R(S)
+                x_transform[i, inds[:j]] -= coef_R_S
+
+        mean_transform /= nsamples
+        x_transform /= nsamples
+        return mean_transform, x_transform
+
+    @staticmethod
+    def _parse_model(model):
+        """ Attempt to pull out the coefficients and intercept from the given model object.
+        """
+        # raw coefficients
+        if type(model) == tuple and len(model) == 2:
+            coef = model[0]
+            intercept = model[1]
+
+        # sklearn style model
+        elif hasattr(model, "coef_") and hasattr(model, "intercept_"):
+            # work around for multi-class with a single class
+            if len(model.coef_.shape) > 1 and model.coef_.shape[0] == 1:
+                coef = model.coef_[0]
+                try:
+                    intercept = model.intercept_[0]
+                except TypeError:
+                    intercept = model.intercept_
+            else:
+                coef = model.coef_
+                intercept = model.intercept_
+        else:
+            raise InvalidModelError("An unknown model type was passed: " + str(type(model)))
+
+        return coef,intercept
+
+    @staticmethod
+    def supports_model_with_masker(model, masker):
+        """ Determines if we can parse the given model.
+        """
+
+        if not isinstance(masker, (maskers.Independent, maskers.Partition, maskers.Impute)):
+            return False
+
+        try:
+            LinearExplainer._parse_model(model)
+        except Exception:
+            return False
+        return True
+
+    def explain_row(self, *row_args, max_evals, main_effects, error_bounds, batch_size, outputs, silent):
+        """ Explains a single row and returns the tuple (row_values, row_expected_values, row_mask_shapes).
+        """
+
+        assert len(row_args) == 1, "Only single-argument functions are supported by the Linear explainer!"
+
+        X = row_args[0]
+        if len(X.shape) == 1:
+            X = X.reshape(1, -1)
+
+        # convert dataframes
+        if isinstance(X, (pd.Series, pd.DataFrame)):
+            X = X.values
+
+        if len(X.shape) not in (1, 2):
+            raise DimensionError("Instance must have 1 or 2 dimensions! Not: %s" %len(X.shape))
+
+        if self.feature_perturbation == "correlation_dependent":
+            if issparse(X):
+                raise InvalidFeaturePerturbationError("Only feature_perturbation = 'interventional' is supported for sparse data")
+            phi = np.matmul(np.matmul(X[:,self.valid_inds], self.avg_proj.T), self.x_transform.T) - self.mean_transformed
+            phi = np.matmul(phi, self.avg_proj)
+
+            full_phi = np.zeros((phi.shape[0], self.M))
+            full_phi[:,self.valid_inds] = phi
+            phi = full_phi
+
+        elif self.feature_perturbation == "interventional":
+            if issparse(X):
+                phi = np.array(np.multiply(X - self.mean, self.coef))
+
+                # if len(self.coef.shape) == 1:
+                #     return np.array(np.multiply(X - self.mean, self.coef))
+                # else:
+                #     return [np.array(np.multiply(X - self.mean, self.coef[i])) for i in range(self.coef.shape[0])]
+            else:
+                phi = np.array(X - self.mean) * self.coef
+                # if len(self.coef.shape) == 1:
+                #     phi = np.array(X - self.mean) * self.coef
+                #     return np.array(X - self.mean) * self.coef
+                # else:
+                #     return [np.array(X - self.mean) * self.coef[i] for i in range(self.coef.shape[0])]
+
+        return {
+            "values": phi.T,
+            "expected_values": self.expected_value,
+            "mask_shapes": (X.shape[1:],),
+            "main_effects": phi.T,
+            "clustering": None
+        }
+
+
+    def shap_values(self, X):
+        """ Estimate the SHAP values for a set of samples.
+
+        Parameters
+        ----------
+        X : numpy.array, pandas.DataFrame or scipy.csr_matrix
+            A matrix of samples (# samples x # features) on which to explain the model's output.
+
+        Returns
+        -------
+        array or list
+            For models with a single output this returns a matrix of SHAP values
+            (# samples x # features). Each row sums to the difference between the model output for that
+            sample and the expected value of the model output (which is stored as expected_value
+            attribute of the explainer).
+        """
+
+        # convert dataframes
+        if isinstance(X, (pd.Series, pd.DataFrame)):
+            X = X.values
+
+        # assert isinstance(X, np.ndarray), "Unknown instance type: " + str(type(X))
+        if len(X.shape) not in (1, 2):
+            raise DimensionError("Instance must have 1 or 2 dimensions! Not: %s" % len(X.shape))
+
+        if self.feature_perturbation == "correlation_dependent":
+            if issparse(X):
+                raise InvalidFeaturePerturbationError("Only feature_perturbation = 'interventional' is supported for sparse data")
+            phi = np.matmul(np.matmul(X[:,self.valid_inds], self.avg_proj.T), self.x_transform.T) - self.mean_transformed
+            phi = np.matmul(phi, self.avg_proj)
+
+            full_phi = np.zeros((phi.shape[0], self.M))
+            full_phi[:,self.valid_inds] = phi
+
+            return full_phi
+
+        elif self.feature_perturbation == "interventional":
+            if issparse(X):
+                if len(self.coef.shape) == 1:
+                    return np.array(np.multiply(X - self.mean, self.coef))
+                else:
+                    return [np.array(np.multiply(X - self.mean, self.coef[i])) for i in range(self.coef.shape[0])]
+            else:
+                if len(self.coef.shape) == 1:
+                    return np.array(X - self.mean) * self.coef
+                else:
+                    return [np.array(X - self.mean) * self.coef[i] for i in range(self.coef.shape[0])]
+
+def duplicate_components(C):
+    D = np.diag(1/np.sqrt(np.diag(C)))
+    C = np.matmul(np.matmul(D, C), D)
+    components = -np.ones(C.shape[0], dtype=int)
+    count = -1
+    for i in range(C.shape[0]):
+        found_group = False
+        for j in range(C.shape[0]):
+            if components[j] < 0 and np.abs(2*C[i,j] - C[i,i] - C[j,j]) < 1e-8:
+                if not found_group:
+                    count += 1
+                    found_group = True
+                components[j] = count
+
+    proj = np.zeros((len(np.unique(components)), C.shape[0]))
+    proj[0, 0] = 1
+    for i in range(1,C.shape[0]):
+        proj[components[i], i] = 1
+    return (proj.T / proj.sum(1)).T, proj
diff --git a/lib/shap/explainers/_partition.py b/lib/shap/explainers/_partition.py
new file mode 100644
index 0000000000000000000000000000000000000000..79fbbc92c8d69f1253c4a249bfba8e1457f07a8a
--- /dev/null
+++ b/lib/shap/explainers/_partition.py
@@ -0,0 +1,681 @@
+import queue
+import time
+
+import numpy as np
+from numba import njit
+from tqdm.auto import tqdm
+
+from .. import Explanation, links
+from ..models import Model
+from ..utils import MaskedModel, OpChain, make_masks, safe_isinstance
+from ._explainer import Explainer
+
+
+class PartitionExplainer(Explainer):
+    """Uses the Partition SHAP method to explain the output of any function.
+
+    Partition SHAP computes Shapley values recursively through a hierarchy of features, this
+    hierarchy defines feature coalitions and results in the Owen values from game theory.
+
+    The PartitionExplainer has two particularly nice properties:
+
+    1) PartitionExplainer is model-agnostic but when using a balanced partition tree only has
+       quadratic exact runtime (in term of the number of input features). This is in contrast to the
+       exponential exact runtime of KernelExplainer or SamplingExplainer.
+    2) PartitionExplainer always assigns to groups of correlated features the credit that set of features
+       would have had if treated as a group. This means if the hierarchical clustering given to
+       PartitionExplainer groups correlated features together, then feature correlations are
+       "accounted for" in the sense that the total credit assigned to a group of tightly dependent features
+       does not depend on how they behave if their correlation structure was broken during the explanation's
+       perturbation process.
+    Note that for linear models the Owen values that PartitionExplainer returns are the same as the standard
+    non-hierarchical Shapley values.
+    """
+
+    def __init__(self, model, masker, *, output_names=None, link=links.identity, linearize_link=True,
+                 feature_names=None, **call_args):
+        """Build a PartitionExplainer for the given model with the given masker.
+
+        Parameters
+        ----------
+        model : function
+            User supplied function that takes a matrix of samples (# samples x # features) and
+            computes the output of the model for those samples.
+
+        masker : function or numpy.array or pandas.DataFrame or tokenizer
+            The function used to "mask" out hidden features of the form `masker(mask, x)`. It takes a
+            single input sample and a binary mask and returns a matrix of masked samples. These
+            masked samples will then be evaluated using the model function and the outputs averaged.
+            As a shortcut for the standard masking using by SHAP you can pass a background data matrix
+            instead of a function and that matrix will be used for masking. Domain specific masking
+            functions are available in shap such as shap.maksers.Image for images and shap.maskers.Text
+            for text.
+
+        partition_tree : None or function or numpy.array
+            A hierarchical clustering of the input features represented by a matrix that follows the format
+            used by scipy.cluster.hierarchy (see the notebooks_html/partition_explainer directory an example).
+            If this is a function then the function produces a clustering matrix when given a single input
+            example. If you are using a standard SHAP masker object then you can pass masker.clustering
+            to use that masker's built-in clustering of the features, or if partition_tree is None then
+            masker.clustering will be used by default.
+
+        Examples
+        --------
+        See `Partition explainer examples <https://shap.readthedocs.io/en/latest/api_examples/explainers/PartitionExplainer.html>`_
+        """
+
+        super().__init__(model, masker, link=link, linearize_link=linearize_link, algorithm="partition", \
+                         output_names = output_names, feature_names=feature_names)
+
+        # convert dataframes
+        # if isinstance(masker, pd.DataFrame):
+        #     masker = TabularMasker(masker)
+        # elif isinstance(masker, np.ndarray) and len(masker.shape) == 2:
+        #     masker = TabularMasker(masker)
+        # elif safe_isinstance(masker, "transformers.PreTrainedTokenizer"):
+        #     masker = TextMasker(masker)
+        # self.masker = masker
+
+        # TODO: maybe? if we have a tabular masker then we build a PermutationExplainer that we
+        # will use for sampling
+        self.input_shape = masker.shape[1:] if hasattr(masker, "shape") and not callable(masker.shape) else None
+        # self.output_names = output_names
+        if not safe_isinstance(self.model, "shap.models.Model"):
+            self.model = Model(self.model)#lambda *args: np.array(model(*args))
+        self.expected_value = None
+        self._curr_base_value = None
+        if getattr(self.masker, "clustering", None) is None:
+            raise ValueError("The passed masker must have a .clustering attribute defined! Try shap.maskers.Partition(data) for example.")
+        # if partition_tree is None:
+        #     if not hasattr(masker, "partition_tree"):
+        #         raise ValueError("The passed masker does not have masker.clustering, so the partition_tree must be passed!")
+        #     self.partition_tree = masker.clustering
+        # else:
+        #     self.partition_tree = partition_tree
+
+        # handle higher dimensional tensor inputs
+        if self.input_shape is not None and len(self.input_shape) > 1:
+            self._reshaped_model = lambda x: self.model(x.reshape(x.shape[0], *self.input_shape))
+        else:
+            self._reshaped_model = self.model
+
+        # if we don't have a dynamic clustering algorithm then can precowe mpute
+        # a lot of information
+        if not callable(self.masker.clustering):
+            self._clustering = self.masker.clustering
+            self._mask_matrix = make_masks(self._clustering)
+
+        # if we have gotten default arguments for the call function we need to wrap ourselves in a new class that
+        # has a call function with those new default arguments
+        if len(call_args) > 0:
+            class PartitionExplainer(self.__class__):
+                # this signature should match the __call__ signature of the class defined below
+                def __call__(self, *args, max_evals=500, fixed_context=None, main_effects=False, error_bounds=False, batch_size="auto",
+                             outputs=None, silent=False):
+                    return super().__call__(
+                        *args, max_evals=max_evals, fixed_context=fixed_context, main_effects=main_effects, error_bounds=error_bounds,
+                        batch_size=batch_size, outputs=outputs, silent=silent
+                    )
+            PartitionExplainer.__call__.__doc__ = self.__class__.__call__.__doc__
+            self.__class__ = PartitionExplainer
+            for k, v in call_args.items():
+                self.__call__.__kwdefaults__[k] = v
+
+    # note that changes to this function signature should be copied to the default call argument wrapper above
+    def __call__(self, *args, max_evals=500, fixed_context=None, main_effects=False, error_bounds=False, batch_size="auto",
+                 outputs=None, silent=False):
+        """ Explain the output of the model on the given arguments.
+        """
+        return super().__call__(
+            *args, max_evals=max_evals, fixed_context=fixed_context, main_effects=main_effects, error_bounds=error_bounds, batch_size=batch_size,
+            outputs=outputs, silent=silent
+        )
+
+    def explain_row(self, *row_args, max_evals, main_effects, error_bounds, batch_size, outputs, silent, fixed_context = "auto"):
+        """ Explains a single row and returns the tuple (row_values, row_expected_values, row_mask_shapes).
+        """
+
+        if fixed_context == "auto":
+            # if isinstance(self.masker, maskers.Text):
+            #     fixed_context = 1 # we err on the side of speed for text models
+            # else:
+            fixed_context = None
+        elif fixed_context not in [0, 1, None]:
+            raise ValueError("Unknown fixed_context value passed (must be 0, 1 or None): %s" %fixed_context)
+
+        # build a masked version of the model for the current input sample
+        fm = MaskedModel(self.model, self.masker, self.link, self.linearize_link, *row_args)
+
+        # make sure we have the base value and current value outputs
+        M = len(fm)
+        m00 = np.zeros(M, dtype=bool)
+        # if not fixed background or no base value assigned then compute base value for a row
+        if self._curr_base_value is None or not getattr(self.masker, "fixed_background", False):
+            self._curr_base_value = fm(m00.reshape(1, -1), zero_index=0)[0] # the zero index param tells the masked model what the baseline is
+        f11 = fm(~m00.reshape(1, -1))[0]
+
+        if callable(self.masker.clustering):
+            self._clustering = self.masker.clustering(*row_args)
+            self._mask_matrix = make_masks(self._clustering)
+
+        if hasattr(self._curr_base_value, 'shape') and len(self._curr_base_value.shape) > 0:
+            if outputs is None:
+                outputs = np.arange(len(self._curr_base_value))
+            elif isinstance(outputs, OpChain):
+                outputs = outputs.apply(Explanation(f11)).values
+
+            out_shape = (2*self._clustering.shape[0]+1, len(outputs))
+        else:
+            out_shape = (2*self._clustering.shape[0]+1,)
+
+        if max_evals == "auto":
+            max_evals = 500
+
+        self.values = np.zeros(out_shape)
+        self.dvalues = np.zeros(out_shape)
+
+        self.owen(fm, self._curr_base_value, f11, max_evals - 2, outputs, fixed_context, batch_size, silent)
+
+        # if False:
+        #     if self.multi_output:
+        #         return [self.dvalues[:,i] for i in range(self.dvalues.shape[1])], oinds
+        #     else:
+        #         return self.dvalues.copy(), oinds
+        # else:
+        # drop the interaction terms down onto self.values
+        self.values[:] = self.dvalues
+
+        lower_credit(len(self.dvalues) - 1, 0, M, self.values, self._clustering)
+
+        return {
+            "values": self.values[:M].copy(),
+            "expected_values": self._curr_base_value if outputs is None else self._curr_base_value[outputs],
+            "mask_shapes": [s + out_shape[1:] for s in fm.mask_shapes],
+            "main_effects": None,
+            "hierarchical_values": self.dvalues.copy(),
+            "clustering": self._clustering,
+            "output_indices": outputs,
+            "output_names": getattr(self.model, "output_names", None)
+        }
+
+    def __str__(self):
+        return "shap.explainers.PartitionExplainer()"
+
+    def owen(self, fm, f00, f11, max_evals, output_indexes, fixed_context, batch_size, silent):
+        """ Compute a nested set of recursive Owen values based on an ordering recursion.
+        """
+
+        #f = self._reshaped_model
+        #r = self.masker
+        #masks = np.zeros(2*len(inds)+1, dtype=int)
+        M = len(fm)
+        m00 = np.zeros(M, dtype=bool)
+        #f00 = fm(m00.reshape(1,-1))[0]
+        base_value = f00
+        #f11 = fm(~m00.reshape(1,-1))[0]
+        #f11 = self._reshaped_model(r(~m00, x)).mean(0)
+        ind = len(self.dvalues)-1
+
+        # make sure output_indexes is a list of indexes
+        if output_indexes is not None:
+            # assert self.multi_output, "output_indexes is only valid for multi-output models!"
+            # inds = output_indexes.apply(f11, 0)
+            # out_len = output_indexes_len(output_indexes)
+            # if output_indexes.startswith("max("):
+            #     output_indexes = np.argsort(-f11)[:out_len]
+            # elif output_indexes.startswith("min("):
+            #     output_indexes = np.argsort(f11)[:out_len]
+            # elif output_indexes.startswith("max(abs("):
+            #     output_indexes = np.argsort(np.abs(f11))[:out_len]
+
+            f00 = f00[output_indexes]
+            f11 = f11[output_indexes]
+
+        q = queue.PriorityQueue()
+        q.put((0, 0, (m00, f00, f11, ind, 1.0)))
+        eval_count = 0
+        total_evals = min(max_evals, (M-1)*M) # TODO: (M-1)*M is only right for balanced clusterings, but this is just for plotting progress...
+        pbar = None
+        start_time = time.time()
+        while not q.empty():
+
+            # if we passed our execution limit then leave everything else on the internal nodes
+            if eval_count >= max_evals:
+                while not q.empty():
+                    m00, f00, f11, ind, weight = q.get()[2]
+                    self.dvalues[ind] += (f11 - f00) * weight
+                break
+
+            # create a batch of work to do
+            batch_args = []
+            batch_masks = []
+            while not q.empty() and len(batch_masks) < batch_size and eval_count + len(batch_masks) < max_evals:
+
+                # get our next set of arguments
+                m00, f00, f11, ind, weight = q.get()[2]
+
+                # get the left and right children of this cluster
+                lind = int(self._clustering[ind-M, 0]) if ind >= M else -1
+                rind = int(self._clustering[ind-M, 1]) if ind >= M else -1
+
+                # get the distance of this cluster's children
+                if ind < M:
+                    distance = -1
+                else:
+                    if self._clustering.shape[1] >= 3:
+                        distance = self._clustering[ind-M, 2]
+                    else:
+                        distance = 1
+
+                # check if we are a leaf node (or other negative distance cluster) and so should terminate our decent
+                if distance < 0:
+                    self.dvalues[ind] += (f11 - f00) * weight
+                    continue
+
+                # build the masks
+                m10 = m00.copy() # we separate the copy from the add so as to not get converted to a matrix
+                m10[:] += self._mask_matrix[lind, :]
+                m01 = m00.copy()
+                m01[:] += self._mask_matrix[rind, :]
+
+                batch_args.append((m00, m10, m01, f00, f11, ind, lind, rind, weight))
+                batch_masks.append(m10)
+                batch_masks.append(m01)
+
+            batch_masks = np.array(batch_masks)
+
+            # run the batch
+            if len(batch_args) > 0:
+                fout = fm(batch_masks)
+                if output_indexes is not None:
+                    fout = fout[:,output_indexes]
+
+                eval_count += len(batch_masks)
+
+                if pbar is None and time.time() - start_time > 5:
+                    pbar = tqdm(total=total_evals, disable=silent, leave=False)
+                    pbar.update(eval_count)
+                if pbar is not None:
+                    pbar.update(len(batch_masks))
+
+            # use the results of the batch to add new nodes
+            for i in range(len(batch_args)):
+
+                m00, m10, m01, f00, f11, ind, lind, rind, weight = batch_args[i]
+
+                # get the evaluated model output on the two new masked inputs
+                f10 = fout[2*i]
+                f01 = fout[2*i+1]
+
+                new_weight = weight
+                if fixed_context is None:
+                    new_weight /= 2
+                elif fixed_context == 0:
+                    self.dvalues[ind] += (f11 - f10 - f01 + f00) * weight # leave the interaction effect on the internal node
+                elif fixed_context == 1:
+                    self.dvalues[ind] -= (f11 - f10 - f01 + f00) * weight # leave the interaction effect on the internal node
+
+                if fixed_context is None or fixed_context == 0:
+                    # recurse on the left node with zero context
+                    args = (m00, f00, f10, lind, new_weight)
+                    q.put((-np.max(np.abs(f10 - f00)) * new_weight, np.random.randn(), args))
+
+                    # recurse on the right node with zero context
+                    args = (m00, f00, f01, rind, new_weight)
+                    q.put((-np.max(np.abs(f01 - f00)) * new_weight, np.random.randn(), args))
+
+                if fixed_context is None or fixed_context == 1:
+                    # recurse on the left node with one context
+                    args = (m01, f01, f11, lind, new_weight)
+                    q.put((-np.max(np.abs(f11 - f01)) * new_weight, np.random.randn(), args))
+
+                    # recurse on the right node with one context
+                    args = (m10, f10, f11, rind, new_weight)
+                    q.put((-np.max(np.abs(f11 - f10)) * new_weight, np.random.randn(), args))
+
+        if pbar is not None:
+            pbar.close()
+
+        self.last_eval_count = eval_count
+
+        return output_indexes, base_value
+
+    def owen3(self, fm, f00, f11, max_evals, output_indexes, fixed_context, batch_size, silent):
+        """ Compute a nested set of recursive Owen values based on an ordering recursion.
+        """
+
+        #f = self._reshaped_model
+        #r = self.masker
+        #masks = np.zeros(2*len(inds)+1, dtype=int)
+        M = len(fm)
+        m00 = np.zeros(M, dtype=bool)
+        #f00 = fm(m00.reshape(1,-1))[0]
+        base_value = f00
+        #f11 = fm(~m00.reshape(1,-1))[0]
+        #f11 = self._reshaped_model(r(~m00, x)).mean(0)
+        ind = len(self.dvalues)-1
+
+        # make sure output_indexes is a list of indexes
+        if output_indexes is not None:
+            # assert self.multi_output, "output_indexes is only valid for multi-output models!"
+            # inds = output_indexes.apply(f11, 0)
+            # out_len = output_indexes_len(output_indexes)
+            # if output_indexes.startswith("max("):
+            #     output_indexes = np.argsort(-f11)[:out_len]
+            # elif output_indexes.startswith("min("):
+            #     output_indexes = np.argsort(f11)[:out_len]
+            # elif output_indexes.startswith("max(abs("):
+            #     output_indexes = np.argsort(np.abs(f11))[:out_len]
+
+            f00 = f00[output_indexes]
+            f11 = f11[output_indexes]
+
+        # our starting plan is to evaluate all the nodes with a fixed_context
+        evals_planned = M
+
+        q = queue.PriorityQueue()
+        q.put((0, 0, (m00, f00, f11, ind, 1.0, fixed_context))) # (m00, f00, f11, tree_index, weight)
+        eval_count = 0
+        total_evals = min(max_evals, (M-1)*M) # TODO: (M-1)*M is only right for balanced clusterings, but this is just for plotting progress...
+        pbar = None
+        start_time = time.time()
+        while not q.empty():
+
+            # if we passed our execution limit then leave everything else on the internal nodes
+            if eval_count >= max_evals:
+                while not q.empty():
+                    m00, f00, f11, ind, weight, _ = q.get()[2]
+                    self.dvalues[ind] += (f11 - f00) * weight
+                break
+
+            # create a batch of work to do
+            batch_args = []
+            batch_masks = []
+            while not q.empty() and len(batch_masks) < batch_size and eval_count < max_evals:
+
+                # get our next set of arguments
+                m00, f00, f11, ind, weight, context = q.get()[2]
+
+                # get the left and right children of this cluster
+                lind = int(self._clustering[ind-M, 0]) if ind >= M else -1
+                rind = int(self._clustering[ind-M, 1]) if ind >= M else -1
+
+                # get the distance of this cluster's children
+                if ind < M:
+                    distance = -1
+                else:
+                    distance = self._clustering[ind-M, 2]
+
+                # check if we are a leaf node (or other negative distance cluster) and so should terminate our decent
+                if distance < 0:
+                    self.dvalues[ind] += (f11 - f00) * weight
+                    continue
+
+                # build the masks
+                m10 = m00.copy() # we separate the copy from the add so as to not get converted to a matrix
+                m10[:] += self._mask_matrix[lind, :]
+                m01 = m00.copy()
+                m01[:] += self._mask_matrix[rind, :]
+
+                batch_args.append((m00, m10, m01, f00, f11, ind, lind, rind, weight, context))
+                batch_masks.append(m10)
+                batch_masks.append(m01)
+
+            batch_masks = np.array(batch_masks)
+
+            # run the batch
+            if len(batch_args) > 0:
+                fout = fm(batch_masks)
+                if output_indexes is not None:
+                    fout = fout[:,output_indexes]
+
+                eval_count += len(batch_masks)
+
+                if pbar is None and time.time() - start_time > 5:
+                    pbar = tqdm(total=total_evals, disable=silent, leave=False)
+                    pbar.update(eval_count)
+                if pbar is not None:
+                    pbar.update(len(batch_masks))
+
+            # use the results of the batch to add new nodes
+            for i in range(len(batch_args)):
+
+                m00, m10, m01, f00, f11, ind, lind, rind, weight, context = batch_args[i]
+
+                # get the the number of leaves in this cluster
+                if ind < M:
+                    num_leaves = 0
+                else:
+                    num_leaves = self._clustering[ind-M, 3]
+
+                # get the evaluated model output on the two new masked inputs
+                f10 = fout[2*i]
+                f01 = fout[2*i+1]
+
+                # see if we have enough evaluations left to get both sides of a fixed context
+                if max_evals - evals_planned > num_leaves:
+                    evals_planned += num_leaves
+                    ignore_context = True
+                else:
+                    ignore_context = False
+
+                new_weight = weight
+                if context is None or ignore_context:
+                    new_weight /= 2
+
+                if context is None or context == 0 or ignore_context:
+                    self.dvalues[ind] += (f11 - f10 - f01 + f00) * weight # leave the interaction effect on the internal node
+
+                    # recurse on the left node with zero context, flip the context for all descendents if we are ignoring it
+                    args = (m00, f00, f10, lind, new_weight, 0 if context == 1 else context)
+                    q.put((-np.max(np.abs(f10 - f00)) * new_weight, np.random.randn(), args))
+
+                    # recurse on the right node with zero context, flip the context for all descendents if we are ignoring it
+                    args = (m00, f00, f01, rind, new_weight, 0 if context == 1 else context)
+                    q.put((-np.max(np.abs(f01 - f00)) * new_weight, np.random.randn(), args))
+
+                if context is None or context == 1 or ignore_context:
+                    self.dvalues[ind] -= (f11 - f10 - f01 + f00) * weight # leave the interaction effect on the internal node
+
+                    # recurse on the left node with one context, flip the context for all descendents if we are ignoring it
+                    args = (m01, f01, f11, lind, new_weight, 1 if context == 0 else context)
+                    q.put((-np.max(np.abs(f11 - f01)) * new_weight, np.random.randn(), args))
+
+                    # recurse on the right node with one context, flip the context for all descendents if we are ignoring it
+                    args = (m10, f10, f11, rind, new_weight, 1 if context == 0 else context)
+                    q.put((-np.max(np.abs(f11 - f10)) * new_weight, np.random.randn(), args))
+
+        if pbar is not None:
+            pbar.close()
+
+        self.last_eval_count = eval_count
+
+        return output_indexes, base_value
+
+
+
+    # def owen2(self, fm, f00, f11, max_evals, output_indexes, fixed_context, batch_size, silent):
+    #     """ Compute a nested set of recursive Owen values based on an ordering recursion.
+    #     """
+
+    #     #f = self._reshaped_model
+    #     #r = self.masker
+    #     #masks = np.zeros(2*len(inds)+1, dtype=int)
+    #     M = len(fm)
+    #     m00 = np.zeros(M, dtype=bool)
+    #     #f00 = fm(m00.reshape(1,-1))[0]
+    #     base_value = f00
+    #     #f11 = fm(~m00.reshape(1,-1))[0]
+    #     #f11 = self._reshaped_model(r(~m00, x)).mean(0)
+    #     ind = len(self.dvalues)-1
+
+    #     # make sure output_indexes is a list of indexes
+    #     if output_indexes is not None:
+    #         # assert self.multi_output, "output_indexes is only valid for multi-output models!"
+    #         # inds = output_indexes.apply(f11, 0)
+    #         # out_len = output_indexes_len(output_indexes)
+    #         # if output_indexes.startswith("max("):
+    #         #     output_indexes = np.argsort(-f11)[:out_len]
+    #         # elif output_indexes.startswith("min("):
+    #         #     output_indexes = np.argsort(f11)[:out_len]
+    #         # elif output_indexes.startswith("max(abs("):
+    #         #     output_indexes = np.argsort(np.abs(f11))[:out_len]
+
+    #         f00 = f00[output_indexes]
+    #         f11 = f11[output_indexes]
+
+    #     fc_owen(m00, m11, 1)
+    #     fc_owen(m00, m11, 0)
+
+    #     def fc_owen(m00, m11, context):
+
+    #         # recurse on the left node with zero context
+    #         args = (m00, f00, f10, lind, new_weight)
+    #         q.put((-np.max(np.abs(f10 - f00)) * new_weight, np.random.randn(), args))
+
+    #         # recurse on the right node with zero context
+    #         args = (m00, f00, f01, rind, new_weight)
+    #         q.put((-np.max(np.abs(f01 - f00)) * new_weight, np.random.randn(), args))
+    #         fc_owen(m00, m11, 1)
+    #     m00 m11
+    #     owen(fc=1)
+    #     owen(fc=0)
+
+    #     q = queue.PriorityQueue()
+    #     q.put((0, 0, (m00, f00, f11, ind, 1.0, 1)))
+    #     eval_count = 0
+    #     total_evals = min(max_evals, (M-1)*M) # TODO: (M-1)*M is only right for balanced clusterings, but this is just for plotting progress...
+    #     pbar = None
+    #     start_time = time.time()
+    #     while not q.empty():
+
+    #         # if we passed our execution limit then leave everything else on the internal nodes
+    #         if eval_count >= max_evals:
+    #             while not q.empty():
+    #                 m00, f00, f11, ind, weight, _ = q.get()[2]
+    #                 self.dvalues[ind] += (f11 - f00) * weight
+    #             break
+
+    #         # create a batch of work to do
+    #         batch_args = []
+    #         batch_masks = []
+    #         while not q.empty() and len(batch_masks) < batch_size and eval_count < max_evals:
+
+    #             # get our next set of arguments
+    #             m00, f00, f11, ind, weight, context = q.get()[2]
+
+    #             # get the left and right children of this cluster
+    #             lind = int(self._clustering[ind-M, 0]) if ind >= M else -1
+    #             rind = int(self._clustering[ind-M, 1]) if ind >= M else -1
+
+    #             # get the distance of this cluster's children
+    #             if ind < M:
+    #                 distance = -1
+    #             else:
+    #                 if self._clustering.shape[1] >= 3:
+    #                     distance = self._clustering[ind-M, 2]
+    #                 else:
+    #                     distance = 1
+
+    #             # check if we are a leaf node (or other negative distance cluster) and so should terminate our decent
+    #             if distance < 0:
+    #                 self.dvalues[ind] += (f11 - f00) * weight
+    #                 continue
+
+    #             # build the masks
+    #             m10 = m00.copy() # we separate the copy from the add so as to not get converted to a matrix
+    #             m10[:] += self._mask_matrix[lind, :]
+    #             m01 = m00.copy()
+    #             m01[:] += self._mask_matrix[rind, :]
+
+    #             batch_args.append((m00, m10, m01, f00, f11, ind, lind, rind, weight, context))
+    #             batch_masks.append(m10)
+    #             batch_masks.append(m01)
+
+    #         batch_masks = np.array(batch_masks)
+
+    #         # run the batch
+    #         if len(batch_args) > 0:
+    #             fout = fm(batch_masks)
+    #             if output_indexes is not None:
+    #                 fout = fout[:,output_indexes]
+
+    #             eval_count += len(batch_masks)
+
+    #             if pbar is None and time.time() - start_time > 5:
+    #                 pbar = tqdm(total=total_evals, disable=silent, leave=False)
+    #                 pbar.update(eval_count)
+    #             if pbar is not None:
+    #                 pbar.update(len(batch_masks))
+
+    #         # use the results of the batch to add new nodes
+    #         for i in range(len(batch_args)):
+
+    #             m00, m10, m01, f00, f11, ind, lind, rind, weight, context = batch_args[i]
+
+    #             # get the evaluated model output on the two new masked inputs
+    #             f10 = fout[2*i]
+    #             f01 = fout[2*i+1]
+
+    #             new_weight = weight
+    #             if fixed_context is None:
+    #                 new_weight /= 2
+    #             elif fixed_context == 0:
+    #                 self.dvalues[ind] += (f11 - f10 - f01 + f00) * weight # leave the interaction effect on the internal node
+    #             elif fixed_context == 1:
+    #                 self.dvalues[ind] -= (f11 - f10 - f01 + f00) * weight # leave the interaction effect on the internal node
+
+    #             if fixed_context is None or fixed_context == 0:
+    #                 self.dvalues[ind] += (f11 - f10 - f01 + f00) * weight # leave the interaction effect on the internal node
+
+
+    #                 # recurse on the left node with zero context
+    #                 args = (m00, f00, f10, lind, new_weight)
+    #                 q.put((-np.max(np.abs(f10 - f00)) * new_weight, np.random.randn(), args))
+
+    #                 # recurse on the right node with zero context
+    #                 args = (m00, f00, f01, rind, new_weight)
+    #                 q.put((-np.max(np.abs(f01 - f00)) * new_weight, np.random.randn(), args))
+
+    #             if fixed_context is None or fixed_context == 1:
+    #                 self.dvalues[ind] -= (f11 - f10 - f01 + f00) * weight # leave the interaction effect on the internal node
+
+
+    #                 # recurse on the left node with one context
+    #                 args = (m01, f01, f11, lind, new_weight)
+    #                 q.put((-np.max(np.abs(f11 - f01)) * new_weight, np.random.randn(), args))
+
+    #                 # recurse on the right node with one context
+    #                 args = (m10, f10, f11, rind, new_weight)
+    #                 q.put((-np.max(np.abs(f11 - f10)) * new_weight, np.random.randn(), args))
+
+    #     if pbar is not None:
+    #         pbar.close()
+
+    #     return output_indexes, base_value
+
+
+def output_indexes_len(output_indexes):
+    if output_indexes.startswith("max("):
+        return int(output_indexes[4:-1])
+    elif output_indexes.startswith("min("):
+        return int(output_indexes[4:-1])
+    elif output_indexes.startswith("max(abs("):
+        return int(output_indexes[8:-2])
+    elif not isinstance(output_indexes, str):
+        return len(output_indexes)
+
+@njit
+def lower_credit(i, value, M, values, clustering):
+    if i < M:
+        values[i] += value
+        return
+    li = int(clustering[i-M,0])
+    ri = int(clustering[i-M,1])
+    group_size = int(clustering[i-M,3])
+    lsize = int(clustering[li-M,3]) if li >= M else 1
+    rsize = int(clustering[ri-M,3]) if ri >= M else 1
+    assert lsize+rsize == group_size
+    values[i] += value
+    lower_credit(li, values[i] * lsize / group_size, M, values, clustering)
+    lower_credit(ri, values[i] * rsize / group_size, M, values, clustering)
diff --git a/lib/shap/explainers/_permutation.py b/lib/shap/explainers/_permutation.py
new file mode 100644
index 0000000000000000000000000000000000000000..088947d6ef931f3a006f087a39b71735facad6bc
--- /dev/null
+++ b/lib/shap/explainers/_permutation.py
@@ -0,0 +1,217 @@
+import warnings
+
+import numpy as np
+
+from .. import links
+from ..models import Model
+from ..utils import MaskedModel, partition_tree_shuffle
+from ._explainer import Explainer
+
+
+class PermutationExplainer(Explainer):
+    """ This method approximates the Shapley values by iterating through permutations of the inputs.
+
+    This is a model agnostic explainer that guarantees local accuracy (additivity) by iterating completely
+    through an entire permutation of the features in both forward and reverse directions (antithetic sampling).
+    If we do this once, then we get the exact SHAP values for models with up to second order interaction effects.
+    We can iterate this many times over many random permutations to get better SHAP value estimates for models
+    with higher order interactions. This sequential ordering formulation also allows for easy reuse of
+    model evaluations and the ability to efficiently avoid evaluating the model when the background values
+    for a feature are the same as the current input value. We can also account for hierarchical data
+    structures with partition trees, something not currently implemented for KernalExplainer or SamplingExplainer.
+    """
+
+    def __init__(self, model, masker, link=links.identity, feature_names=None, linearize_link=True, seed=None, **call_args):
+        """ Build an explainers.Permutation object for the given model using the given masker object.
+
+        Parameters
+        ----------
+        model : function
+            A callable python object that executes the model given a set of input data samples.
+
+        masker : function or numpy.array or pandas.DataFrame
+            A callable python object used to "mask" out hidden features of the form `masker(binary_mask, x)`.
+            It takes a single input sample and a binary mask and returns a matrix of masked samples. These
+            masked samples are evaluated using the model function and the outputs are then averaged.
+            As a shortcut for the standard masking using by SHAP you can pass a background data matrix
+            instead of a function and that matrix will be used for masking. To use a clustering
+            game structure you can pass a shap.maskers.Tabular(data, clustering=\"correlation\") object.
+
+        seed: None or int
+            Seed for reproducibility
+
+        **call_args : valid argument to the __call__ method
+            These arguments are saved and passed to the __call__ method as the new default values for these arguments.
+        """
+
+        # setting seed for random generation: if seed is not None, then shap values computation should be reproducible
+        np.random.seed(seed)
+
+        if masker is None:
+            raise ValueError("masker cannot be None.")
+
+        super().__init__(model, masker, link=link, linearize_link=linearize_link, feature_names=feature_names)
+
+        if not isinstance(self.model, Model):
+            self.model = Model(self.model)
+
+        # if we have gotten default arguments for the call function we need to wrap ourselves in a new class that
+        # has a call function with those new default arguments
+        if len(call_args) > 0:
+            # this signature should match the __call__ signature of the class defined below
+            class PermutationExplainer(self.__class__):
+                def __call__(self, *args, max_evals=500, main_effects=False, error_bounds=False, batch_size="auto",
+                             outputs=None, silent=False):
+                    return super().__call__(
+                        *args, max_evals=max_evals, main_effects=main_effects, error_bounds=error_bounds,
+                        batch_size=batch_size, outputs=outputs, silent=silent
+                    )
+            PermutationExplainer.__call__.__doc__ = self.__class__.__call__.__doc__
+            self.__class__ = PermutationExplainer
+            for k, v in call_args.items():
+                self.__call__.__kwdefaults__[k] = v
+
+    # note that changes to this function signature should be copied to the default call argument wrapper above
+    def __call__(self, *args, max_evals=500, main_effects=False, error_bounds=False, batch_size="auto",
+                 outputs=None, silent=False):
+        """ Explain the output of the model on the given arguments.
+        """
+        return super().__call__(
+            *args, max_evals=max_evals, main_effects=main_effects, error_bounds=error_bounds, batch_size=batch_size,
+            outputs=outputs, silent=silent
+        )
+
+    def explain_row(self, *row_args, max_evals, main_effects, error_bounds, batch_size, outputs, silent):
+        """ Explains a single row and returns the tuple (row_values, row_expected_values, row_mask_shapes).
+        """
+
+        # build a masked version of the model for the current input sample
+        fm = MaskedModel(self.model, self.masker, self.link, self.linearize_link, *row_args)
+
+        # by default we run 10 permutations forward and backward
+        if max_evals == "auto":
+            max_evals = 10 * 2 * len(fm)
+
+        # compute any custom clustering for this row
+        row_clustering = None
+        if getattr(self.masker, "clustering", None) is not None:
+            if isinstance(self.masker.clustering, np.ndarray):
+                row_clustering = self.masker.clustering
+            elif callable(self.masker.clustering):
+                row_clustering = self.masker.clustering(*row_args)
+            else:
+                raise NotImplementedError("The masker passed has a .clustering attribute that is not yet supported by the Permutation explainer!")
+
+        # loop over many permutations
+        inds = fm.varying_inputs()
+        inds_mask = np.zeros(len(fm), dtype=bool)
+        inds_mask[inds] = True
+        masks = np.zeros(2*len(inds)+1, dtype=int)
+        masks[0] = MaskedModel.delta_mask_noop_value
+        npermutations = max_evals // (2*len(inds)+1)
+        row_values = None
+        row_values_history = None
+        history_pos = 0
+        main_effect_values = None
+        if len(inds) > 0:
+            for _ in range(npermutations):
+
+                # shuffle the indexes so we get a random permutation ordering
+                if row_clustering is not None:
+                    # [TODO] This is shuffle does not work when inds is not a complete set of integers from 0 to M TODO: still true?
+                    #assert len(inds) == len(fm), "Need to support partition shuffle when not all the inds vary!!"
+                    partition_tree_shuffle(inds, inds_mask, row_clustering)
+                else:
+                    np.random.shuffle(inds)
+
+                # create a large batch of masks to evaluate
+                i = 1
+                for ind in inds:
+                    masks[i] = ind
+                    i += 1
+                for ind in inds:
+                    masks[i] = ind
+                    i += 1
+
+                # evaluate the masked model
+                outputs = fm(masks, zero_index=0, batch_size=batch_size)
+
+                if row_values is None:
+                    row_values = np.zeros((len(fm),) + outputs.shape[1:])
+
+                    if error_bounds:
+                        row_values_history = np.zeros((2 * npermutations, len(fm),) + outputs.shape[1:])
+
+                # update our SHAP value estimates
+                i = 0
+                for ind in inds: # forward
+                    row_values[ind] += outputs[i + 1] - outputs[i]
+                    if error_bounds:
+                        row_values_history[history_pos][ind] = outputs[i + 1] - outputs[i]
+                    i += 1
+                history_pos += 1
+                for ind in inds: # backward
+                    row_values[ind] += outputs[i] - outputs[i + 1]
+                    if error_bounds:
+                        row_values_history[history_pos][ind] = outputs[i] - outputs[i + 1]
+                    i += 1
+                history_pos += 1
+
+            if npermutations == 0:
+                raise ValueError(f"max_evals={max_evals} is too low for the Permutation explainer, it must be at least 2 * num_features + 1 = {2 * len(inds) + 1}!")
+
+            expected_value = outputs[0]
+
+            # compute the main effects if we need to
+            if main_effects:
+                main_effect_values = fm.main_effects(inds, batch_size=batch_size)
+        else:
+            masks = np.zeros(1, dtype=int)
+            outputs = fm(masks, zero_index=0, batch_size=1)
+            expected_value = outputs[0]
+            row_values = np.zeros((len(fm),) + outputs.shape[1:])
+            if error_bounds:
+                row_values_history = np.zeros((2 * npermutations, len(fm),) + outputs.shape[1:])
+
+        return {
+            "values": row_values / (2 * npermutations),
+            "expected_values": expected_value,
+            "mask_shapes": fm.mask_shapes,
+            "main_effects": main_effect_values,
+            "clustering": row_clustering,
+            "error_std": None if row_values_history is None else row_values_history.std(0),
+            "output_names": self.model.output_names if hasattr(self.model, "output_names") else None
+        }
+
+
+    def shap_values(self, X, npermutations=10, main_effects=False, error_bounds=False, batch_evals=True, silent=False):
+        """ Legacy interface to estimate the SHAP values for a set of samples.
+
+        Parameters
+        ----------
+        X : numpy.array or pandas.DataFrame or any scipy.sparse matrix
+            A matrix of samples (# samples x # features) on which to explain the model's output.
+
+        npermutations : int
+            Number of times to cycle through all the features, re-evaluating the model at each step.
+            Each cycle evaluates the model function 2 * (# features + 1) times on a data matrix of
+            (# background data samples) rows. An exception to this is when PermutationExplainer can
+            avoid evaluating the model because a feature's value is the same in X and the background
+            dataset (which is common for example with sparse features).
+
+        Returns
+        -------
+        array or list
+            For models with a single output this returns a matrix of SHAP values
+            (# samples x # features). Each row sums to the difference between the model output for that
+            sample and the expected value of the model output (which is stored as expected_value
+            attribute of the explainer). For models with vector outputs this returns a list
+            of such matrices, one for each output.
+        """
+        warnings.warn("shap_values() is deprecated; use __call__().", DeprecationWarning)
+
+        explanation = self(X, max_evals=npermutations * X.shape[1], main_effects=main_effects)
+        return explanation.values
+
+    def __str__(self):
+        return "shap.explainers.PermutationExplainer()"
diff --git a/lib/shap/explainers/_sampling.py b/lib/shap/explainers/_sampling.py
new file mode 100644
index 0000000000000000000000000000000000000000..bffa70a8994ad94702a890be005216ec9e8dc52d
--- /dev/null
+++ b/lib/shap/explainers/_sampling.py
@@ -0,0 +1,199 @@
+import logging
+
+import numpy as np
+import pandas as pd
+
+from .._explanation import Explanation
+from ..utils._exceptions import ExplainerError
+from ..utils._legacy import convert_to_instance, match_instance_to_data
+from ._kernel import KernelExplainer
+
+log = logging.getLogger('shap')
+
+
+class SamplingExplainer(KernelExplainer):
+    """Computes SHAP values using an extension of the Shapley sampling values explanation method
+    (also known as IME).
+
+    SamplingExplainer computes SHAP values under the assumption of feature independence and is an
+    extension of the algorithm proposed in "An Efficient Explanation of Individual Classifications
+    using Game Theory", Erik Strumbelj, Igor Kononenko, JMLR 2010. It is a good alternative to
+    KernelExplainer when you want to use a large background set (as opposed to a single reference
+    value for example).
+
+    Parameters
+    ----------
+    model : function
+        User supplied function that takes a matrix of samples (# samples x # features) and
+        computes the output of the model for those samples. The output can be a vector
+        (# samples) or a matrix (# samples x # model outputs).
+
+    data : numpy.array or pandas.DataFrame
+        The background dataset to use for integrating out features. To determine the impact
+        of a feature, that feature is set to "missing" and the change in the model output
+        is observed. Since most models aren't designed to handle arbitrary missing data at test
+        time, we simulate "missing" by replacing the feature with the values it takes in the
+        background dataset. So if the background dataset is a simple sample of all zeros, then
+        we would approximate a feature being missing by setting it to zero. Unlike the
+        KernelExplainer, this data can be the whole training set, even if that is a large set. This
+        is because SamplingExplainer only samples from this background dataset.
+    """
+
+    def __init__(self, model, data, **kwargs):
+        # silence warning about large datasets
+        level = log.level
+        log.setLevel(logging.ERROR)
+        super().__init__(model, data, **kwargs)
+        log.setLevel(level)
+
+        if str(self.link) != "identity":
+            emsg = f"SamplingExplainer only supports the identity link, not {self.link}"
+            raise ValueError(emsg)
+
+    def __call__(self, X, y=None, nsamples=2000):
+
+        if isinstance(X, pd.DataFrame):
+            feature_names = list(X.columns)
+            X = X.values
+        else:
+            feature_names = None # we can make self.feature_names from background data eventually if we have it
+
+        v = self.shap_values(X, nsamples=nsamples)
+        if isinstance(v, list):
+            v = np.stack(v, axis=-1) # put outputs at the end
+        e = Explanation(v, self.expected_value, X, feature_names=feature_names)
+        return e
+
+    def explain(self, incoming_instance, **kwargs):
+        # convert incoming input to a standardized iml object
+        instance = convert_to_instance(incoming_instance)
+        match_instance_to_data(instance, self.data)
+
+        if len(self.data.groups) != self.P:
+            emsg = "SamplingExplainer does not support feature groups!"
+            raise ExplainerError(emsg)
+
+        # find the feature groups we will test. If a feature does not change from its
+        # current value then we know it doesn't impact the model
+        self.varyingInds = self.varying_groups(instance.x)
+        #self.varyingFeatureGroups = [self.data.groups[i] for i in self.varyingInds]
+        self.M = len(self.varyingInds)
+
+        # find f(x)
+        if self.keep_index:
+            model_out = self.model.f(instance.convert_to_df())
+        else:
+            model_out = self.model.f(instance.x)
+        if isinstance(model_out, (pd.DataFrame, pd.Series)):
+            model_out = model_out.values[0]
+        self.fx = model_out[0]
+
+        if not self.vector_out:
+            self.fx = np.array([self.fx])
+
+        # if no features vary then there no feature has an effect
+        if self.M == 0:
+            phi = np.zeros((len(self.data.groups), self.D))
+            phi_var = np.zeros((len(self.data.groups), self.D))
+
+        # if only one feature varies then it has all the effect
+        elif self.M == 1:
+            phi = np.zeros((len(self.data.groups), self.D))
+            phi_var = np.zeros((len(self.data.groups), self.D))
+            diff = self.fx - self.fnull
+            for d in range(self.D):
+                phi[self.varyingInds[0],d] = diff[d]
+
+        # if more than one feature varies then we have to do real work
+        else:
+
+            # pick a reasonable number of samples if the user didn't specify how many they wanted
+            self.nsamples = kwargs.get("nsamples", "auto")
+            if self.nsamples == "auto":
+                self.nsamples = 1000 * self.M
+
+            min_samples_per_feature = kwargs.get("min_samples_per_feature", 100)
+            round1_samples = self.nsamples
+            round2_samples = 0
+            if round1_samples > self.M * min_samples_per_feature:
+                round2_samples = round1_samples - self.M * min_samples_per_feature
+                round1_samples -= round2_samples
+
+            # divide up the samples among the features for round 1
+            nsamples_each1 = np.ones(self.M, dtype=np.int64) * 2 * (round1_samples // (self.M * 2))
+            for i in range((round1_samples % (self.M * 2)) // 2):
+                nsamples_each1[i] += 2
+
+            # explain every feature in round 1
+            phi = np.zeros((self.P, self.D))
+            phi_var = np.zeros((self.P, self.D))
+            self.X_masked = np.zeros((nsamples_each1.max() * 2, self.data.data.shape[1]))
+            for i,ind in enumerate(self.varyingInds):
+                phi[ind,:],phi_var[ind,:] = self.sampling_estimate(ind, self.model.f, instance.x, self.data.data, nsamples=nsamples_each1[i])
+
+            # optimally allocate samples according to the variance
+            if phi_var.sum() == 0:
+                phi_var += 1 # spread samples uniformally if we found no variability
+            phi_var /= phi_var.sum(0)[np.newaxis, :]
+            nsamples_each2 = (phi_var[self.varyingInds,:].mean(1) * round2_samples).astype(int)
+            for i in range(len(nsamples_each2)):
+                if nsamples_each2[i] % 2 == 1:
+                    nsamples_each2[i] += 1
+            for i in range(len(nsamples_each2)):
+                if nsamples_each2.sum() > round2_samples:
+                    nsamples_each2[i] -= 2
+                elif nsamples_each2.sum() < round2_samples:
+                    nsamples_each2[i] += 2
+                else:
+                    break
+
+            self.X_masked = np.zeros((nsamples_each2.max() * 2, self.data.data.shape[1]))
+            for i,ind in enumerate(self.varyingInds):
+                if nsamples_each2[i] > 0:
+                    val,var = self.sampling_estimate(ind, self.model.f, instance.x, self.data.data, nsamples=nsamples_each2[i])
+
+                    total_samples = nsamples_each1[i] + nsamples_each2[i]
+                    phi[ind,:] = (phi[ind,:] * nsamples_each1[i] + val * nsamples_each2[i]) / total_samples
+                    phi_var[ind,:] = (phi_var[ind,:] * nsamples_each1[i] + var * nsamples_each2[i]) / total_samples
+
+            # convert from the variance of the differences to the variance of the mean (phi)
+            for i,ind in enumerate(self.varyingInds):
+                phi_var[ind,:] /= np.sqrt(nsamples_each1[i] + nsamples_each2[i])
+
+            # correct the sum of the SHAP values to equal the output of the model using a linear
+            # regression model with priors of the coefficients equal to the estimated variances for each
+            # SHAP value (note that 1e6 is designed to increase the weight of the sample and so closely
+            # match the correct sum)
+            sum_error = self.fx - phi.sum(0) - self.fnull
+            for i in range(self.D):
+                # this is a ridge regression with one sample of all ones with sum_error[i] as the label
+                # and 1/v as the ridge penalties. This simplified (and stable) form comes from the
+                # Sherman-Morrison formula
+                v = (phi_var[:,i] / phi_var[:,i].max()) * 1e6
+                adj = sum_error[i] * (v - (v * v.sum()) / (1 + v.sum()))
+                phi[:,i] += adj
+
+        if phi.shape[1] == 1:
+            phi = phi[:,0]
+
+        return phi
+
+    def sampling_estimate(self, j, f, x, X, nsamples=10):
+        X_masked = self.X_masked[:nsamples * 2,:]
+        inds = np.arange(X.shape[1])
+
+        for i in range(0, nsamples):
+            np.random.shuffle(inds)
+            pos = np.where(inds == j)[0][0]
+            rind = np.random.randint(X.shape[0])
+            X_masked[i, :] = x
+            X_masked[i, inds[pos+1:]] = X[rind, inds[pos+1:]]
+            X_masked[-(i+1), :] = x
+            X_masked[-(i+1), inds[pos:]] = X[rind, inds[pos:]]
+
+        evals = f(X_masked)
+        evals_on = evals[:nsamples]
+        evals_off = evals[nsamples:][::-1]
+        d = evals_on - evals_off
+
+        return np.mean(d, 0), np.var(d, 0)
diff --git a/lib/shap/explainers/_tree.py b/lib/shap/explainers/_tree.py
new file mode 100644
index 0000000000000000000000000000000000000000..7896408075cfe30e0858b675826e60ebf22b2a9d
--- /dev/null
+++ b/lib/shap/explainers/_tree.py
@@ -0,0 +1,1979 @@
+import json
+import os
+import struct
+import tempfile
+import time
+import warnings
+from typing import Any, Dict
+
+import numpy as np
+import pandas as pd
+import scipy.sparse
+import scipy.special
+from packaging import version
+
+from .. import maskers
+from .._explanation import Explanation
+from ..utils import assert_import, record_import_error, safe_isinstance
+from ..utils._exceptions import (
+    DimensionError,
+    ExplainerError,
+    InvalidFeaturePerturbationError,
+    InvalidMaskerError,
+    InvalidModelError,
+)
+from ..utils._legacy import DenseData
+from ._explainer import Explainer
+from .other._ubjson import decode_ubjson_buffer
+
+warnings.formatwarning = lambda msg, *args, **kwargs: str(msg) + '\n' # ignore everything except the message
+
+try:
+    from .. import _cext
+except ImportError as e:
+    record_import_error("cext", "C extension was not built during install!", e)
+
+try:
+    import pyspark  # noqa
+except ImportError as e:
+    record_import_error("pyspark", "PySpark could not be imported!", e)
+
+output_transform_codes = {
+    "identity": 0,
+    "logistic": 1,
+    "logistic_nlogloss": 2,
+    "squared_loss": 3,
+}
+
+feature_perturbation_codes = {
+    "interventional": 0,
+    "tree_path_dependent": 1,
+    "global_path_dependent": 2,
+}
+
+
+class TreeExplainer(Explainer):
+    """ Uses Tree SHAP algorithms to explain the output of ensemble tree models.
+
+    Tree SHAP is a fast and exact method to estimate SHAP values for tree models and ensembles of trees,
+    under several different possible assumptions about feature dependence. It depends on fast C++
+    implementations either inside an external model package or in the local compiled C extension.
+    """
+
+    def __init__(
+        self,
+        model,
+        data=None,
+        model_output="raw",
+        feature_perturbation="interventional",
+        feature_names=None,
+        approximate=False,
+        **deprecated_options,
+    ):
+        """ Build a new Tree explainer for the passed model.
+
+        Parameters
+        ----------
+        model : model object
+            The tree based machine learning model that we want to explain. XGBoost, LightGBM, CatBoost, Pyspark
+            and most tree-based scikit-learn models are supported.
+
+        data : numpy.array or pandas.DataFrame
+            The background dataset to use for integrating out features. This argument is optional when
+            ``feature_perturbation="tree_path_dependent"``, since in that case we can use the number of training
+            samples that went down each tree path as our background dataset (this is recorded in the ``model``
+            object).
+
+        feature_perturbation : "interventional" (default) or "tree_path_dependent" (default when data=None)
+            Since SHAP values rely on conditional expectations, we need to decide how to handle correlated
+            (or otherwise dependent) input features.
+            The "interventional" approach breaks the dependencies between features according to the rules
+            dictated by causal inference (Janzing et al. 2019). Note that the "interventional" option
+            requires a background dataset ``data``, and its runtime scales linearly with the size of the
+            background dataset you use. Anywhere from 100 to 1000 random background samples are good
+            sizes to use.
+            The "tree_path_dependent" approach is to just follow the trees and use the number of training
+            examples that went down each leaf to represent the background distribution. This approach
+            does not require a background dataset, and so is used by default when no background dataset
+            is provided.
+
+        model_output : "raw", "probability", "log_loss", or model method name
+            What output of the model should be explained.
+            If "raw", then we explain the raw output of the trees, which varies by model. For regression models,
+            "raw" is the standard output. For binary classification in XGBoost, this is the log odds ratio.
+            If ``model_output`` is the name of a supported prediction method on the ``model`` object, then we
+            explain the output of that model method name. For example, ``model_output="predict_proba"``
+            explains the result of calling ``model.predict_proba``.
+            If "probability", then we explain the output of the model transformed into probability space
+            (note that this means the SHAP values now sum to the probability output of the model).
+            If "log_loss", then we explain the log base e of the model loss function, so that the SHAP values
+            sum up to the log loss of the model for each sample. This is helpful for breaking down model
+            performance by feature.
+            Currently the "probability" and "log_loss" options are only supported when
+            ``feature_perturbation="interventional"``.
+
+        Examples
+        --------
+        See `Tree explainer examples <https://shap.readthedocs.io/en/latest/api_examples/explainers/Tree.html>`_
+        """
+        if feature_names is not None:
+            self.data_feature_names = feature_names
+        elif isinstance(data, pd.DataFrame):
+            self.data_feature_names = list(data.columns)
+
+        masker = data
+        super().__init__(model, masker, feature_names=feature_names)
+
+        if type(self.masker) is maskers.Independent:
+            data = self.masker.data
+        elif masker is not None:
+            raise InvalidMaskerError("Unsupported masker type: %s!" % str(type(self.masker)))
+
+        if getattr(self.masker, "clustering", None) is not None:
+            raise ExplainerError("TreeExplainer does not support clustered data inputs! Please use shap.Explainer or pass an unclustered masker!")
+
+        # check for deprecated options
+        if model_output == "margin":
+            warnings.warn("model_output = \"margin\" has been renamed to model_output = \"raw\"")
+            model_output = "raw"
+        if model_output == "logloss":
+            warnings.warn("model_output = \"logloss\" has been renamed to model_output = \"log_loss\"")
+            model_output = "log_loss"
+        if "feature_dependence" in deprecated_options:
+            dep_val = deprecated_options["feature_dependence"]
+            if dep_val == "independent" and feature_perturbation == "interventional":
+                warnings.warn("feature_dependence = \"independent\" has been renamed to feature_perturbation" \
+                    " = \"interventional\"! See GitHub issue #882.")
+            elif feature_perturbation != "interventional":
+                warnings.warn("feature_dependence = \"independent\" has been renamed to feature_perturbation" \
+                    " = \"interventional\", you can't supply both options! See GitHub issue #882.")
+            if dep_val == "tree_path_dependent" and feature_perturbation == "interventional":
+                raise ValueError("The feature_dependence option has been renamed to feature_perturbation! " \
+                    "Please update the option name before calling TreeExplainer. See GitHub issue #882.")
+        if feature_perturbation == "independent":
+            raise InvalidFeaturePerturbationError("feature_perturbation = \"independent\" is not a valid option value, please use " \
+                "feature_perturbation = \"interventional\" instead. See GitHub issue #882.")
+
+        if isinstance(data, pd.DataFrame):
+            self.data = data.values
+        elif isinstance(data, DenseData):
+            self.data = data.data
+        else:
+            self.data = data
+        if self.data is None:
+            feature_perturbation = "tree_path_dependent"
+            #warnings.warn("Setting feature_perturbation = \"tree_path_dependent\" because no background data was given.")
+        elif feature_perturbation == "interventional" and self.data.shape[0] > 1_000:
+            wmsg = (
+                f"Passing {self.data.shape[0]} background samples may lead to slow runtimes. Consider "
+                "using shap.sample(data, 100) to create a smaller background data set."
+            )
+            warnings.warn(wmsg)
+        self.data_missing = None if self.data is None else pd.isna(self.data)
+        self.feature_perturbation = feature_perturbation
+        self.expected_value = None
+        self.model = TreeEnsemble(model, self.data, self.data_missing, model_output)
+        self.model_output = model_output
+        #self.model_output = self.model.model_output # this allows the TreeEnsemble to translate model outputs types by how it loads the model
+
+        self.approximate = approximate
+
+        if feature_perturbation not in feature_perturbation_codes:
+            raise InvalidFeaturePerturbationError("Invalid feature_perturbation option!")
+
+        # check for unsupported combinations of feature_perturbation and model_outputs
+        if feature_perturbation == "tree_path_dependent":
+            if self.model.model_output != "raw":
+                raise ValueError("Only model_output=\"raw\" is supported for feature_perturbation=\"tree_path_dependent\"")
+        elif data is None:
+            raise ValueError("A background dataset must be provided unless you are using feature_perturbation=\"tree_path_dependent\"!")
+
+        if self.model.model_output != "raw":
+            if self.model.objective is None and self.model.tree_output is None:
+                emsg = (
+                    "Model does not have a known objective or output type! When model_output is "
+                    "not \"raw\" then we need to know the model's objective or link function."
+                )
+                raise Exception(emsg)
+
+        # A change in the signature of `xgboost.Booster.predict()` method has been introduced in XGBoost v1.4:
+        # The introduced `iteration_range` parameter is used when obtaining SHAP (incl. interaction) values from XGBoost models.
+        if self.model.model_type == 'xgboost':
+            import xgboost
+            if version.parse(xgboost.__version__) < version.parse('1.4'):
+                raise RuntimeError(f"SHAP requires XGBoost >= v1.4 , but found version {xgboost.__version__}. Please upgrade XGBoost!")
+
+        # compute the expected value if we have a parsed tree for the cext
+        if self.model.model_output == "log_loss":
+            self.expected_value = self.__dynamic_expected_value
+        elif data is not None:
+            try:
+                self.expected_value = self.model.predict(self.data).mean(0)
+            except ValueError:
+                raise ExplainerError("Currently TreeExplainer can only handle models with categorical splits when " \
+                                "feature_perturbation=\"tree_path_dependent\" and no background data is passed. Please try again using " \
+                                "shap.TreeExplainer(model, feature_perturbation=\"tree_path_dependent\").")
+            if hasattr(self.expected_value, '__len__') and len(self.expected_value) == 1:
+                self.expected_value = self.expected_value[0]
+        elif hasattr(self.model, "node_sample_weight"):
+            self.expected_value = self.model.values[:,0].sum(0)
+            if self.expected_value.size == 1:
+                self.expected_value = self.expected_value[0]
+            self.expected_value += self.model.base_offset
+            if self.model.model_output != "raw":
+                self.expected_value = None # we don't handle transforms in this case right now...
+
+        # if our output format requires binary classification to be represented as two outputs then we do that here
+        if self.model.model_output == "probability_doubled" and self.expected_value is not None:
+            self.expected_value = [1 - self.expected_value, self.expected_value]
+
+    def __dynamic_expected_value(self, y):
+        """ This computes the expected value conditioned on the given label value.
+        """
+
+        return self.model.predict(self.data, np.ones(self.data.shape[0]) * y).mean(0)
+
+    def __call__(self, X, y=None, interactions=False, check_additivity=True):
+
+        start_time = time.time()
+
+        if isinstance(X, pd.DataFrame):
+            feature_names = list(X.columns)
+        else:
+            feature_names = getattr(self, "data_feature_names", None)
+
+        if not interactions:
+            v = self.shap_values(X, y=y, from_call=True, check_additivity=check_additivity, approximate=self.approximate)
+            if isinstance(v, list):
+                v = np.stack(v, axis=-1)  # put outputs at the end
+        else:
+            assert not self.approximate, "Approximate computation not yet supported for interaction effects!"
+            v = self.shap_interaction_values(X)
+
+        # the Explanation object expects an `expected_value` for each row
+        if hasattr(self.expected_value, "__len__") and len(self.expected_value) > 1:
+            # `expected_value` is a list / array of numbers, length k, e.g. for multi-output scenarios
+            # we repeat it N times along the first axis, so ev_tiled.shape == (N, k)
+            if isinstance(v, list):
+                num_rows = v[0].shape[0]
+            else:
+                num_rows = v.shape[0]
+            ev_tiled = np.tile(self.expected_value, (num_rows, 1))
+        else:
+            # `expected_value` is a scalar / array of 1 number, so we simply repeat it for every row in `v`
+            # ev_tiled.shape == (N,)
+            ev_tiled = np.tile(self.expected_value, v.shape[0])
+
+        # cf. GH dsgibbons#66, this conversion to numpy array should be done AFTER
+        # calculation of shap values
+        if isinstance(X, pd.DataFrame):
+            X = X.values
+        elif safe_isinstance(X, "xgboost.core.DMatrix"):
+            import xgboost
+
+            if version.parse(xgboost.__version__) < version.parse("1.7.0"):  # pragma: no cover
+                # cf. GH #3357
+                wmsg = (
+                    "`shap.Explanation` does not support `xgboost.DMatrix` objects for xgboost < 1.7, "
+                    "so the `data` attribute of the `Explanation` object will be set to None. If "
+                    "you require the `data` attribute (e.g. using `shap.plots`), then either "
+                    "update your xgboost to >=1.7.0 or explicitly set `Explanation.data = X`, where "
+                    "`X` is a numpy or scipy array."
+                )
+                warnings.warn(wmsg)
+                X = None
+            else:
+                X: scipy.sparse.csr_matrix = X.get_data()
+
+        return Explanation(
+            v,
+            base_values=ev_tiled,
+            data=X,
+            feature_names=feature_names,
+            compute_time=time.time() - start_time,
+        )
+
+    def _validate_inputs(self, X, y, tree_limit, check_additivity):
+        # see if we have a default tree_limit in place.
+        if tree_limit is None:
+            tree_limit = -1 if self.model.tree_limit is None else self.model.tree_limit
+
+        if tree_limit < 0 or tree_limit > self.model.values.shape[0]:
+            tree_limit = self.model.values.shape[0]
+        # convert dataframes
+        if isinstance(X, (pd.Series, pd.DataFrame)):
+            X = X.values
+        flat_output = False
+        if len(X.shape) == 1:
+            flat_output = True
+            X = X.reshape(1, X.shape[0])
+        if X.dtype != self.model.input_dtype:
+            X = X.astype(self.model.input_dtype)
+        X_missing = np.isnan(X, dtype=bool)
+        assert isinstance(X, np.ndarray), "Unknown instance type: " + str(type(X))
+        assert len(X.shape) == 2, "Passed input data matrix X must have 1 or 2 dimensions!"
+
+        if self.model.model_output == "log_loss":
+            if y is None:
+                emsg = (
+                    "Both samples and labels must be provided when model_output = \"log_loss\" "
+                    "(i.e. `explainer.shap_values(X, y)`)!"
+                )
+                raise ExplainerError(emsg)
+            if X.shape[0] != len(y):
+                emsg = (
+                    f"The number of labels ({len(y)}) does not match the number of samples "
+                    f"to explain ({X.shape[0]})!"
+                )
+                raise DimensionError(emsg)
+
+        if self.feature_perturbation == "tree_path_dependent":
+            if not self.model.fully_defined_weighting:
+                emsg = (
+                    "The background dataset you provided does "
+                    "not cover all the leaves in the model, "
+                    "so TreeExplainer cannot run with the "
+                    "feature_perturbation=\"tree_path_dependent\" option! "
+                    "Try providing a larger background "
+                    "dataset, no background dataset, or using "
+                    "feature_perturbation=\"interventional\"."
+                )
+                raise ExplainerError(emsg)
+
+        if check_additivity and self.model.model_type == "pyspark":
+            warnings.warn(
+                "check_additivity requires us to run predictions which is not supported with "
+                "spark, "
+                "ignoring."
+                " Set check_additivity=False to remove this warning")
+            check_additivity = False
+
+        return X, y, X_missing, flat_output, tree_limit, check_additivity
+
+    def shap_values(self, X, y=None, tree_limit=None, approximate=False, check_additivity=True, from_call=False):
+        """ Estimate the SHAP values for a set of samples.
+
+        Parameters
+        ----------
+        X : numpy.array, pandas.DataFrame or catboost.Pool (for catboost)
+            A matrix of samples (# samples x # features) on which to explain the model's output.
+
+        y : numpy.array
+            An array of label values for each sample. Used when explaining loss functions.
+
+        tree_limit : None (default) or int
+            Limit the number of trees used by the model. By default, the limit of the original model
+            is used (``None``). ``-1`` means no limit.
+
+        approximate : bool
+            Run fast, but only roughly approximate the Tree SHAP values. This runs a method
+            previously proposed by Saabas which only considers a single feature ordering. Take care
+            since this does not have the consistency guarantees of Shapley values and places too
+            much weight on lower splits in the tree.
+
+        check_additivity : bool
+            Run a validation check that the sum of the SHAP values equals the output of the model. This
+            check takes only a small amount of time, and will catch potential unforeseen errors.
+            Note that this check only runs right now when explaining the margin of the model.
+
+        Returns
+        -------
+        array or list
+            For models with a single output, this returns a matrix of SHAP values
+            (# samples x # features). Each row sums to the difference between the model output for that
+            sample and the expected value of the model output (which is stored in the ``expected_value``
+            attribute of the explainer when it is constant). For models with vector outputs, this returns
+            a list of such matrices, one for each output.
+        """
+        # see if we have a default tree_limit in place.
+        if tree_limit is None:
+            tree_limit = -1 if self.model.tree_limit is None else self.model.tree_limit
+
+        # shortcut using the C++ version of Tree SHAP in XGBoost, LightGBM, and CatBoost
+        if self.feature_perturbation == "tree_path_dependent" and self.model.model_type != "internal" and self.data is None:
+            model_output_vals = None
+            phi = None
+            if self.model.model_type == "xgboost":
+                import xgboost
+                if not isinstance(X, xgboost.core.DMatrix):
+                    # Retrieve any DMatrix properties if they have been set on the TreeEnsemble Class
+                    dmatrix_props = getattr(self.model, "_xgb_dmatrix_props", {})
+                    X = xgboost.DMatrix(X, **dmatrix_props)
+                if tree_limit == -1:
+                    tree_limit = 0
+                try:
+                    phi = self.model.original_model.predict(
+                        X, iteration_range=(0, tree_limit), pred_contribs=True,
+                        approx_contribs=approximate, validate_features=False
+                    )
+                except ValueError as e:
+                    emsg = (
+                        "This reshape error is often caused by passing a bad data matrix to SHAP. "
+                        "See https://github.com/shap/shap/issues/580."
+                    )
+                    raise ValueError(emsg) from e
+
+                if check_additivity and self.model.model_output == "raw":
+                    xgb_tree_limit = tree_limit // self.model.num_stacked_models
+                    model_output_vals = self.model.original_model.predict(
+                        X, iteration_range=(0, xgb_tree_limit), output_margin=True,
+                        validate_features=False
+                    )
+
+            elif self.model.model_type == "lightgbm":
+                assert not approximate, "approximate=True is not supported for LightGBM models!"
+                phi = self.model.original_model.predict(X, num_iteration=tree_limit, pred_contrib=True)
+                # Note: the data must be joined on the last axis
+                if self.model.original_model.params['objective'] == 'binary':
+                    if not from_call:
+                        warnings.warn('LightGBM binary classifier with TreeExplainer shap values output has changed to a list of ndarray')
+                    phi = np.concatenate((0-phi, phi), axis=-1)
+                if phi.shape[1] != X.shape[1] + 1:
+                    try:
+                        phi = phi.reshape(X.shape[0], phi.shape[1]//(X.shape[1]+1), X.shape[1]+1)
+                    except ValueError as e:
+                        emsg = (
+                            "This reshape error is often caused by passing a bad data matrix to SHAP. "
+                            "See https://github.com/shap/shap/issues/580."
+                        )
+                        raise ValueError(emsg) from e
+
+            elif self.model.model_type == "catboost": # thanks to the CatBoost team for implementing this...
+                assert not approximate, "approximate=True is not supported for CatBoost models!"
+                assert tree_limit == -1, "tree_limit is not yet supported for CatBoost models!"
+                import catboost
+                if type(X) != catboost.Pool:
+                    X = catboost.Pool(X, cat_features=self.model.cat_feature_indices)
+                phi = self.model.original_model.get_feature_importance(data=X, fstr_type='ShapValues')
+
+            # note we pull off the last column and keep it as our expected_value
+            if phi is not None:
+                if len(phi.shape) == 3:
+                    self.expected_value = [phi[0, i, -1] for i in range(phi.shape[1])]
+                    out = [phi[:, i, :-1] for i in range(phi.shape[1])]
+                else:
+                    self.expected_value = phi[0, -1]
+                    out = phi[:, :-1]
+
+                if check_additivity and model_output_vals is not None:
+                    self.assert_additivity(out, model_output_vals)
+
+                return out
+
+        X, y, X_missing, flat_output, tree_limit, check_additivity = self._validate_inputs(
+            X, y, tree_limit, check_additivity
+        )
+        transform = self.model.get_transform()
+
+        # run the core algorithm using the C extension
+        assert_import("cext")
+        phi = np.zeros((X.shape[0], X.shape[1]+1, self.model.num_outputs))
+        if not approximate:
+            _cext.dense_tree_shap(
+                self.model.children_left, self.model.children_right, self.model.children_default,
+                self.model.features, self.model.thresholds, self.model.values, self.model.node_sample_weight,
+                self.model.max_depth, X, X_missing, y, self.data, self.data_missing, tree_limit,
+                self.model.base_offset, phi, feature_perturbation_codes[self.feature_perturbation],
+                output_transform_codes[transform], False
+            )
+        else:
+            _cext.dense_tree_saabas(
+                self.model.children_left, self.model.children_right, self.model.children_default,
+                self.model.features, self.model.thresholds, self.model.values,
+                self.model.max_depth, tree_limit, self.model.base_offset, output_transform_codes[transform],
+                X, X_missing, y, phi
+            )
+
+        out = self._get_shap_output(phi, flat_output)
+        if check_additivity and self.model.model_output == "raw":
+            self.assert_additivity(out, self.model.predict(X))
+
+        return out
+
+    def _get_shap_output(self, phi, flat_output):
+        """Pull off the last column of ``phi`` and keep it as our expected_value."""
+        if self.model.num_outputs == 1:
+            if self.expected_value is None and self.model.model_output != "log_loss":
+                self.expected_value = phi[0, -1, 0]
+            if flat_output:
+                out = phi[0, :-1, 0]
+            else:
+                out = phi[:, :-1, 0]
+        else:
+            if self.expected_value is None and self.model.model_output != "log_loss":
+                self.expected_value = [phi[0, -1, i] for i in range(phi.shape[2])]
+            if flat_output:
+                out = [phi[0, :-1, i] for i in range(self.model.num_outputs)]
+            else:
+                out = [phi[:, :-1, i] for i in range(self.model.num_outputs)]
+
+        # if our output format requires binary classification to be represented as two outputs then we do that here
+        if self.model.model_output == "probability_doubled":
+            out = [-out, out]
+        return out
+
+    def shap_interaction_values(self, X, y=None, tree_limit=None):
+        """ Estimate the SHAP interaction values for a set of samples.
+
+        Parameters
+        ----------
+        X : numpy.array, pandas.DataFrame or catboost.Pool (for catboost)
+            A matrix of samples (# samples x # features) on which to explain the model's output.
+
+        y : numpy.array
+            An array of label values for each sample. Used when explaining loss functions (not yet supported).
+
+        tree_limit : None (default) or int
+            Limit the number of trees used by the model. By default, the limit of the original model
+            is used (``None``). ``-1`` means no limit.
+
+        Returns
+        -------
+        array or list
+            For models with a single output, this returns a tensor of SHAP values
+            (# samples x # features x # features). The matrix (# features x # features) for each sample sums
+            to the difference between the model output for that sample and the expected value of the model output
+            (which is stored in the ``expected_value`` attribute of the explainer). Each row of this matrix sums to the
+            SHAP value for that feature for that sample. The diagonal entries of the matrix represent the
+            "main effect" of that feature on the prediction. The symmetric off-diagonal entries represent the
+            interaction effects between all pairs of features for that sample.
+            For models with vector outputs, this returns a list of tensors, one for each output.
+        """
+
+        assert self.model.model_output == "raw", "Only model_output = \"raw\" is supported for SHAP interaction values right now!"
+        #assert self.feature_perturbation == "tree_path_dependent", "Only feature_perturbation = \"tree_path_dependent\" is supported for SHAP interaction values right now!"
+        transform = "identity"
+
+        # see if we have a default tree_limit in place.
+        if tree_limit is None:
+            tree_limit = -1 if self.model.tree_limit is None else self.model.tree_limit
+
+        # shortcut using the C++ version of Tree SHAP in XGBoost
+        if (
+            self.model.model_type == "xgboost"
+            and self.feature_perturbation == "tree_path_dependent"
+        ):
+            import xgboost
+            if not isinstance(X, xgboost.core.DMatrix):
+                X = xgboost.DMatrix(X)
+            if tree_limit == -1:
+                tree_limit = 0
+            xgb_tree_limit = tree_limit // self.model.num_stacked_models
+            phi = self.model.original_model.predict(X, iteration_range=(0, xgb_tree_limit), pred_interactions=True, validate_features=False)
+
+            # note we pull off the last column and keep it as our expected_value
+            if len(phi.shape) == 4:
+                self.expected_value = [phi[0, i, -1, -1] for i in range(phi.shape[1])]
+                return [phi[:, i, :-1, :-1] for i in range(phi.shape[1])]
+            else:
+                self.expected_value = phi[0, -1, -1]
+                return phi[:, :-1, :-1]
+        elif (self.model.model_type == "catboost") and (self.feature_perturbation == "tree_path_dependent"): # thanks again to the CatBoost team for implementing this...
+            assert tree_limit == -1, "tree_limit is not yet supported for CatBoost models!"
+            import catboost
+            if type(X) != catboost.Pool:
+                X = catboost.Pool(X, cat_features=self.model.cat_feature_indices)
+            phi = self.model.original_model.get_feature_importance(data=X, fstr_type='ShapInteractionValues')
+            # note we pull off the last column and keep it as our expected_value
+            if len(phi.shape) == 4:
+                self.expected_value = getattr(self, "expected_value", [phi[0, i, -1, -1] for i in range(phi.shape[1])])
+                return [phi[:, i, :-1, :-1] for i in range(phi.shape[1])]
+            else:
+                self.expected_value = getattr(self, "expected_value", phi[0, -1, -1])
+                return phi[:, :-1, :-1]
+
+        X, y, X_missing, flat_output, tree_limit, _ = self._validate_inputs(X, y, tree_limit, False)
+        # run the core algorithm using the C extension
+        assert_import("cext")
+        phi = np.zeros((X.shape[0], X.shape[1]+1, X.shape[1]+1, self.model.num_outputs))
+        _cext.dense_tree_shap(
+            self.model.children_left, self.model.children_right, self.model.children_default,
+            self.model.features, self.model.thresholds, self.model.values, self.model.node_sample_weight,
+            self.model.max_depth, X, X_missing, y, self.data, self.data_missing, tree_limit,
+            self.model.base_offset, phi, feature_perturbation_codes[self.feature_perturbation],
+            output_transform_codes[transform], True
+        )
+
+        return self._get_shap_interactions_output(phi,flat_output)
+
+    def _get_shap_interactions_output(self, phi, flat_output):
+        """Pull off the last column and keep it as our expected_value"""
+        if self.model.num_outputs == 1:
+            # get expected value only if not already set
+            self.expected_value = getattr(self, "expected_value", phi[0, -1, -1, 0])
+            if flat_output:
+                out = phi[0, :-1, :-1, 0]
+            else:
+                out = phi[:, :-1, :-1, 0]
+        else:
+            self.expected_value = [phi[0, -1, -1, i] for i in range(phi.shape[3])]
+            if flat_output:
+                out = [phi[0, :-1, :-1, i] for i in range(self.model.num_outputs)]
+            else:
+                out = [phi[:, :-1, :-1, i] for i in range(self.model.num_outputs)]
+        return out
+
+    def assert_additivity(self, phi, model_output):
+
+        def check_sum(sum_val, model_output):
+            diff = np.abs(sum_val - model_output)
+            if np.max(diff / (np.abs(sum_val) + 1e-2)) > 1e-2:
+                ind = np.argmax(diff)
+                err_msg = "Additivity check failed in TreeExplainer! Please ensure the data matrix you passed to the " \
+                          "explainer is the same shape that the model was trained on. If your data shape is correct " \
+                          "then please report this on GitHub."
+                if self.feature_perturbation != "interventional":
+                    err_msg += " Consider retrying with the feature_perturbation='interventional' option."
+                err_msg += " This check failed because for one of the samples the sum of the SHAP values" \
+                           " was {:f}, while the model output was {:f}. If this difference is acceptable" \
+                           " you can set check_additivity=False to disable this check.".format(sum_val[ind], model_output[ind])
+                raise ExplainerError(err_msg)
+
+        if isinstance(phi, list):
+            for i in range(len(phi)):
+                check_sum(self.expected_value[i] + phi[i].sum(-1), model_output[:,i])
+        else:
+            check_sum(self.expected_value + phi.sum(-1), model_output)
+
+    @staticmethod
+    def supports_model_with_masker(model, masker):
+        """ Determines if this explainer can handle the given model.
+
+        This is an abstract static method meant to be implemented by each subclass.
+        """
+
+        if not isinstance(masker, (maskers.Independent)) and masker is not None:
+            return False
+
+        try:
+            TreeEnsemble(model)
+        except Exception:
+            return False
+        return True
+
+
+class TreeEnsemble:
+    """ An ensemble of decision trees.
+
+    This object provides a common interface to many different types of models.
+    """
+
+    def __init__(self, model, data=None, data_missing=None, model_output=None):
+        self.model_type = "internal"
+        self.trees = None
+        self.base_offset = 0
+        self.model_output = model_output
+        self.objective = None # what we explain when explaining the loss of the model
+        self.tree_output = None # what are the units of the values in the leaves of the trees
+        self.internal_dtype = np.float64
+        self.input_dtype = np.float64 # for sklearn we need to use np.float32 to always get exact matches to their predictions
+        self.data = data
+        self.data_missing = data_missing
+        self.fully_defined_weighting = True # does the background dataset land in every leaf (making it valid for the tree_path_dependent method)
+        self.tree_limit = None # used for limiting the number of trees we use by default (like from early stopping)
+        self.num_stacked_models = 1 # If this is greater than 1 it means we have multiple stacked models with the same number of trees in each model (XGBoost multi-output style)
+        self.cat_feature_indices = None # If this is set it tells us which features are treated categorically
+
+        # we use names like keras
+        objective_name_map = {
+            "mse": "squared_error",
+            "variance": "squared_error",
+            "friedman_mse": "squared_error",
+            "reg:linear": "squared_error",
+            "reg:squarederror": "squared_error",
+            "regression": "squared_error",
+            "regression_l2": "squared_error",
+            "mae": "absolute_error",
+            "gini": "binary_crossentropy",
+            "entropy": "binary_crossentropy",
+            "reg:logistic": "binary_crossentropy",
+            "binary:logistic": "binary_crossentropy",
+            "binary_logloss": "binary_crossentropy",
+            "binary": "binary_crossentropy",
+        }
+
+        tree_output_name_map = {
+            "regression": "raw_value",
+            "regression_l2": "squared_error",
+            "reg:linear": "raw_value",
+            "reg:squarederror": "raw_value",
+            "reg:logistic": "log_odds",
+            "binary:logistic": "log_odds",
+            "binary_logloss": "log_odds",
+            "binary": "log_odds",
+        }
+
+        if isinstance(model, dict) and "trees" in model:
+            # This allows a dictionary to be passed that represents the model.
+            # this dictionary has several numerical parameters and also a list of trees
+            # where each tree is a dictionary describing that tree
+            if "internal_dtype" in model:
+                self.internal_dtype = model["internal_dtype"]
+            if "input_dtype" in model:
+                self.input_dtype = model["input_dtype"]
+            if "objective" in model:
+                self.objective = model["objective"]
+            if "tree_output" in model:
+                self.tree_output = model["tree_output"]
+            if "base_offset" in model:
+                self.base_offset = model["base_offset"]
+            self.trees = [SingleTree(t, data=data, data_missing=data_missing) for t in model["trees"]]
+        elif isinstance(model, list) and isinstance(model[0], SingleTree): # old-style direct-load format
+            self.trees = model
+        elif safe_isinstance(
+            model,
+            [
+                "sklearn.ensemble.RandomForestRegressor",
+                "sklearn.ensemble.forest.RandomForestRegressor",
+                "econml.grf._base_grf.BaseGRF",
+            ],
+        ):
+            assert hasattr(model, "estimators_"), "Model has no `estimators_`! Have you called `model.fit`?"
+            self.internal_dtype = model.estimators_[0].tree_.value.dtype.type
+            self.input_dtype = np.float32
+            scaling = 1.0 / len(model.estimators_) # output is average of trees
+            self.trees = [SingleTree(e.tree_, scaling=scaling, data=data, data_missing=data_missing) for e in model.estimators_]
+            self.objective = objective_name_map.get(model.criterion, None)
+            self.tree_output = "raw_value"
+        elif safe_isinstance(
+            model,
+            [
+                "sklearn.ensemble.IsolationForest",
+                "sklearn.ensemble._iforest.IsolationForest",
+            ],
+        ):
+            self.dtype = np.float32
+            scaling = 1.0 / len(model.estimators_) # output is average of trees
+            self.trees = [IsoTree(e.tree_, f, scaling=scaling, data=data, data_missing=data_missing) for e, f in zip(model.estimators_, model.estimators_features_)]
+            self.tree_output = "raw_value"
+        elif safe_isinstance(model, ["pyod.models.iforest.IForest"]):
+            self.dtype = np.float32
+            scaling = 1.0 / len(model.estimators_) # output is average of trees
+            self.trees = [IsoTree(e.tree_, f, scaling=scaling, data=data, data_missing=data_missing) for e, f in zip(model.detector_.estimators_, model.detector_.estimators_features_)]
+            self.tree_output = "raw_value"
+        elif safe_isinstance(model, "skopt.learning.forest.RandomForestRegressor"):
+            assert hasattr(model, "estimators_"), "Model has no `estimators_`! Have you called `model.fit`?"
+            self.internal_dtype = model.estimators_[0].tree_.value.dtype.type
+            self.input_dtype = np.float32
+            scaling = 1.0 / len(model.estimators_) # output is average of trees
+            self.trees = [SingleTree(e.tree_, scaling=scaling, data=data, data_missing=data_missing) for e in model.estimators_]
+            self.objective = objective_name_map.get(model.criterion, None)
+            self.tree_output = "raw_value"
+        elif safe_isinstance(
+            model,
+            [
+                "sklearn.ensemble.ExtraTreesRegressor",
+                "sklearn.ensemble.forest.ExtraTreesRegressor",
+            ],
+        ):
+            assert hasattr(model, "estimators_"), "Model has no `estimators_`! Have you called `model.fit`?"
+            self.internal_dtype = model.estimators_[0].tree_.value.dtype.type
+            self.input_dtype = np.float32
+            scaling = 1.0 / len(model.estimators_) # output is average of trees
+            self.trees = [SingleTree(e.tree_, scaling=scaling, data=data, data_missing=data_missing) for e in model.estimators_]
+            self.objective = objective_name_map.get(model.criterion, None)
+            self.tree_output = "raw_value"
+        elif safe_isinstance(model, "skopt.learning.forest.ExtraTreesRegressor"):
+            assert hasattr(model, "estimators_"), "Model has no `estimators_`! Have you called `model.fit`?"
+            self.internal_dtype = model.estimators_[0].tree_.value.dtype.type
+            self.input_dtype = np.float32
+            scaling = 1.0 / len(model.estimators_) # output is average of trees
+            self.trees = [SingleTree(e.tree_, scaling=scaling, data=data, data_missing=data_missing) for e in model.estimators_]
+            self.objective = objective_name_map.get(model.criterion, None)
+            self.tree_output = "raw_value"
+        elif safe_isinstance(
+            model,
+            [
+                "sklearn.tree.DecisionTreeRegressor",
+                "sklearn.tree.tree.DecisionTreeRegressor",
+                "econml.grf._base_grftree.GRFTree",
+            ],
+        ):
+            self.internal_dtype = model.tree_.value.dtype.type
+            self.input_dtype = np.float32
+            self.trees = [SingleTree(model.tree_, data=data, data_missing=data_missing)]
+            self.objective = objective_name_map.get(model.criterion, None)
+            self.tree_output = "raw_value"
+        elif safe_isinstance(
+            model,
+            [
+                "sklearn.tree.DecisionTreeClassifier",
+                "sklearn.tree.tree.DecisionTreeClassifier",
+            ],
+        ):
+            self.internal_dtype = model.tree_.value.dtype.type
+            self.input_dtype = np.float32
+            self.trees = [SingleTree(model.tree_, normalize=True, data=data, data_missing=data_missing)]
+            self.objective = objective_name_map.get(model.criterion, None)
+            self.tree_output = "probability"
+        elif safe_isinstance(
+            model,
+            [
+                "sklearn.ensemble.RandomForestClassifier",
+                "sklearn.ensemble.forest.RandomForestClassifier",
+            ],
+        ):
+            assert hasattr(model, "estimators_"), "Model has no `estimators_`! Have you called `model.fit`?"
+            self.internal_dtype = model.estimators_[0].tree_.value.dtype.type
+            self.input_dtype = np.float32
+            scaling = 1.0 / len(model.estimators_) # output is average of trees
+            self.trees = [SingleTree(e.tree_, normalize=True, scaling=scaling, data=data, data_missing=data_missing) for e in model.estimators_]
+            self.objective = objective_name_map.get(model.criterion, None)
+            self.tree_output = "probability"
+        elif safe_isinstance(
+            model,
+            [
+                "sklearn.ensemble.ExtraTreesClassifier",
+                "sklearn.ensemble.forest.ExtraTreesClassifier",
+            ],
+        ): # TODO: add unit test for this case
+            assert hasattr(model, "estimators_"), "Model has no `estimators_`! Have you called `model.fit`?"
+            self.internal_dtype = model.estimators_[0].tree_.value.dtype.type
+            self.input_dtype = np.float32
+            scaling = 1.0 / len(model.estimators_) # output is average of trees
+            self.trees = [SingleTree(e.tree_, normalize=True, scaling=scaling, data=data, data_missing=data_missing) for e in model.estimators_]
+            self.objective = objective_name_map.get(model.criterion, None)
+            self.tree_output = "probability"
+        elif safe_isinstance(
+            model,
+            [
+                "sklearn.ensemble.GradientBoostingRegressor",
+                "sklearn.ensemble.gradient_boosting.GradientBoostingRegressor",
+            ],
+        ):
+            self.input_dtype = np.float32
+
+            # currently we only support the mean and quantile estimators
+            if safe_isinstance(
+                model.init_,
+                [
+                    "sklearn.ensemble.MeanEstimator",
+                    "sklearn.ensemble.gradient_boosting.MeanEstimator",
+                ],
+            ):
+                self.base_offset = model.init_.mean
+            elif safe_isinstance(
+                model.init_,
+                [
+                    "sklearn.ensemble.QuantileEstimator",
+                    "sklearn.ensemble.gradient_boosting.QuantileEstimator",
+                ],
+            ):
+                self.base_offset = model.init_.quantile
+            elif safe_isinstance(model.init_, "sklearn.dummy.DummyRegressor"):
+                self.base_offset = model.init_.constant_[0]
+            else:
+                emsg = f"Unsupported init model type: {type(model.init_)}"
+                raise InvalidModelError(emsg)
+
+            self.trees = [SingleTree(e.tree_, scaling=model.learning_rate, data=data, data_missing=data_missing) for e in model.estimators_[:,0]]
+            self.objective = objective_name_map.get(model.criterion, None)
+            self.tree_output = "raw_value"
+        elif safe_isinstance(model, ["sklearn.ensemble.HistGradientBoostingRegressor"]):
+            # cf. GH #1028 for implementation notes
+            import sklearn
+            if self.model_output == "predict":
+                self.model_output = "raw"
+            self.input_dtype = sklearn.ensemble._hist_gradient_boosting.common.X_DTYPE
+            self.base_offset = model._baseline_prediction
+            self.trees = []
+            for p in model._predictors:
+                nodes = p[0].nodes
+                # each node has values: ('value', 'count', 'feature_idx', 'threshold', 'missing_go_to_left', 'left', 'right', 'gain', 'depth', 'is_leaf', 'bin_threshold')
+                tree = {
+                    "children_left": np.array([-1 if n[9] else n[5] for n in nodes]),
+                    "children_right": np.array([-1 if n[9] else n[6] for n in nodes]),
+                    "children_default": np.array([-1 if n[9] else (n[5] if n[4] else n[6]) for n in nodes]),
+                    "features": np.array([-2 if n[9] else n[2] for n in nodes]),
+                    "thresholds": np.array([n[3] for n in nodes], dtype=np.float64),
+                    "values": np.array([[n[0]] for n in nodes], dtype=np.float64),
+                    "node_sample_weight": np.array([n[1] for n in nodes], dtype=np.float64),
+                }
+                self.trees.append(SingleTree(tree, data=data, data_missing=data_missing))
+            self.objective = objective_name_map.get(model.loss, None)
+            self.tree_output = "raw_value"
+        elif safe_isinstance(model, ["sklearn.ensemble.HistGradientBoostingClassifier"]):
+            # cf. GH #1028 for implementation notes
+            import sklearn
+            self.base_offset = model._baseline_prediction
+            has_len = hasattr(self.base_offset, "__len__")
+            # Note for newer sklearn versions, the base_offset is an array even for binary classification
+            if has_len and self.base_offset.shape == (1, 1):
+                self.base_offset = self.base_offset[0, 0]
+                has_len = False
+            if has_len and self.model_output != "raw":
+                emsg = (
+                    "Multi-output HistGradientBoostingClassifier models are not yet supported unless "
+                    "model_output=\"raw\". See GitHub issue #1028."
+                )
+                raise NotImplementedError(emsg)
+            self.input_dtype = sklearn.ensemble._hist_gradient_boosting.common.X_DTYPE
+            self.num_stacked_models = len(model._predictors[0])
+            if self.model_output == "predict_proba":
+                if self.num_stacked_models == 1:
+                    self.model_output = "probability_doubled"  # with predict_proba we need to double the outputs to match
+                else:
+                    self.model_output = "probability"
+            self.trees = []
+            for p in model._predictors:
+                for i in range(self.num_stacked_models):
+                    nodes = p[i].nodes
+                    # each node has values: ('value', 'count', 'feature_idx', 'threshold', 'missing_go_to_left', 'left', 'right', 'gain', 'depth', 'is_leaf', 'bin_threshold')
+                    tree = {
+                        "children_left": np.array([-1 if n[9] else n[5] for n in nodes]),
+                        "children_right": np.array([-1 if n[9] else n[6] for n in nodes]),
+                        "children_default": np.array([-1 if n[9] else (n[5] if n[4] else n[6]) for n in nodes]),
+                        "features": np.array([-2 if n[9] else n[2] for n in nodes]),
+                        "thresholds": np.array([n[3] for n in nodes], dtype=np.float64),
+                        "values": np.array([[n[0]] for n in nodes], dtype=np.float64),
+                        "node_sample_weight": np.array([n[1] for n in nodes], dtype=np.float64),
+                    }
+                    self.trees.append(SingleTree(tree, data=data, data_missing=data_missing))
+            self.objective = objective_name_map.get(model.loss, None)
+            self.tree_output = "log_odds"
+        elif safe_isinstance(
+            model,
+            [
+                "sklearn.ensemble.GradientBoostingClassifier",
+                "sklearn.ensemble._gb.GradientBoostingClassifier",
+                "sklearn.ensemble.gradient_boosting.GradientBoostingClassifier",
+            ],
+        ):
+            self.input_dtype = np.float32
+
+            # TODO: deal with estimators for each class
+            if model.estimators_.shape[1] > 1:
+                emsg =  "GradientBoostingClassifier is only supported for binary classification right now!"
+                raise InvalidModelError(emsg)
+
+            # currently we only support the logs odds estimator
+            if safe_isinstance(
+                model.init_,
+                [
+                    "sklearn.ensemble.LogOddsEstimator",
+                    "sklearn.ensemble.gradient_boosting.LogOddsEstimator",
+                ],
+            ):
+                self.base_offset = model.init_.prior
+                self.tree_output = "log_odds"
+            elif safe_isinstance(model.init_, "sklearn.dummy.DummyClassifier"):
+                self.base_offset = scipy.special.logit(model.init_.class_prior_[1])  # with two classes the trees only model the second class.
+                self.tree_output = "log_odds"
+            else:
+                emsg = f"Unsupported init model type: {type(model.init_)}"
+                raise InvalidModelError(emsg)
+
+            self.trees = [SingleTree(e.tree_, scaling=model.learning_rate, data=data, data_missing=data_missing) for e in model.estimators_[:,0]]
+            self.objective = objective_name_map.get(model.criterion, None)
+        elif "pyspark.ml" in str(type(model)):
+            assert_import("pyspark")
+            self.model_type = "pyspark"
+            # model._java_obj.getImpurity() can be gini, entropy or variance.
+            self.objective = objective_name_map.get(model._java_obj.getImpurity(), None)
+            if "Classification" in str(type(model)):
+                normalize = True
+                self.tree_output = "probability"
+            else:
+                normalize = False
+                self.tree_output = "raw_value"
+            # Spark Random forest, create 1 weighted (avg) tree per sub-model
+            if safe_isinstance(
+                model,
+                [
+                    "pyspark.ml.classification.RandomForestClassificationModel",
+                    "pyspark.ml.regression.RandomForestRegressionModel",
+                ],
+            ):
+                sum_weight = sum(model.treeWeights)  # output is average of trees
+                self.trees = [
+                    SingleTree(tree, normalize=normalize, scaling=model.treeWeights[i] / sum_weight)
+                    for i, tree in enumerate(model.trees)
+                ]
+            # Spark GBT, create 1 weighted (learning rate) tree per sub-model
+            elif safe_isinstance(
+                model,
+                [
+                    "pyspark.ml.classification.GBTClassificationModel",
+                    "pyspark.ml.regression.GBTRegressionModel",
+                ],
+            ):
+                self.objective = "squared_error"  # GBT subtree use the variance
+                self.tree_output = "raw_value"
+                self.trees = [
+                    SingleTree(tree, normalize=False, scaling=model.treeWeights[i])
+                    for i, tree in enumerate(model.trees)
+                ]
+            # Spark Basic model (single tree)
+            elif safe_isinstance(
+                model,
+                [
+                    "pyspark.ml.classification.DecisionTreeClassificationModel",
+                    "pyspark.ml.regression.DecisionTreeRegressionModel",
+                ],
+            ):
+                self.trees = [SingleTree(model, normalize=normalize, scaling=1)]
+            else:
+                emsg = f"Unsupported Spark model type: {type(model)}"
+                raise NotImplementedError(emsg)
+        elif safe_isinstance(model, "xgboost.core.Booster"):
+            self.original_model = model
+            self.model_type = "xgboost"
+            xgb_loader = XGBTreeModelLoader(self.original_model)
+            self.trees = xgb_loader.get_trees(data=data, data_missing=data_missing)
+            self.base_offset = xgb_loader.base_score
+            self.objective = objective_name_map.get(xgb_loader.name_obj, None)
+            self.tree_output = tree_output_name_map.get(xgb_loader.name_obj, None)
+            if xgb_loader.num_class > 0:
+                self.num_stacked_models = xgb_loader.num_class
+        elif safe_isinstance(model, "xgboost.sklearn.XGBClassifier"):
+            self.input_dtype = np.float32
+            self.model_type = "xgboost"
+            self.original_model = model.get_booster()
+            xgb_loader = XGBTreeModelLoader(self.original_model)
+            self.trees = xgb_loader.get_trees(data=data, data_missing=data_missing)
+            self.base_offset = xgb_loader.base_score
+            self.objective = objective_name_map.get(xgb_loader.name_obj, None)
+            self.tree_output = tree_output_name_map.get(xgb_loader.name_obj, None)
+            # 'best_ntree_limit' is problematic
+            # https://github.com/dmlc/xgboost/issues/6615
+            if hasattr(model, 'best_iteration'):
+                trees_per_iteration = xgb_loader.num_class if xgb_loader.num_class > 0 else 1
+                self.tree_limit = (getattr(model, "best_iteration", None) + 1) * trees_per_iteration
+            else:
+                self.tree_limit = getattr(model, "best_ntree_limit", None)
+            if xgb_loader.num_class > 0:
+                self.num_stacked_models = xgb_loader.num_class
+            if self.model_output == "predict_proba":
+                if self.num_stacked_models == 1:
+                    self.model_output = "probability_doubled" # with predict_proba we need to double the outputs to match
+                else:
+                    self.model_output = "probability"
+            # Some properties of the sklearn API are passed to a DMatrix object in xgboost
+            # We need to make sure we do the same here - GH #3313
+            self._xgb_dmatrix_props = get_xgboost_dmatrix_properties(model)
+        elif safe_isinstance(model, "xgboost.sklearn.XGBRegressor"):
+            self.original_model = model.get_booster()
+            self.model_type = "xgboost"
+            xgb_loader = XGBTreeModelLoader(self.original_model)
+            self.trees = xgb_loader.get_trees(data=data, data_missing=data_missing)
+            self.base_offset = xgb_loader.base_score
+            self.objective = objective_name_map.get(xgb_loader.name_obj, None)
+            self.tree_output = tree_output_name_map.get(xgb_loader.name_obj, None)
+            self.tree_limit = getattr(model, "best_ntree_limit", None)
+            if xgb_loader.num_class > 0:
+                self.num_stacked_models = xgb_loader.num_class
+            # Some properties of the sklearn API are passed to a DMatrix object in xgboost
+            # We need to make sure we do the same here - GH #3313
+            self._xgb_dmatrix_props = get_xgboost_dmatrix_properties(model)
+        elif safe_isinstance(model, "xgboost.sklearn.XGBRanker"):
+            self.original_model = model.get_booster()
+            self.model_type = "xgboost"
+            xgb_loader = XGBTreeModelLoader(self.original_model)
+            self.trees = xgb_loader.get_trees(data=data, data_missing=data_missing)
+            self.base_offset = xgb_loader.base_score
+            # Note: for ranker, leaving tree_output and objective as None as they
+            # are not implemented in native code yet
+            self.tree_limit = getattr(model, "best_ntree_limit", None)
+            if xgb_loader.num_class > 0:
+                self.num_stacked_models = xgb_loader.num_class
+            # Some properties of the sklearn API are passed to a DMatrix object in xgboost
+            # We need to make sure we do the same here - GH #3313
+            self._xgb_dmatrix_props = get_xgboost_dmatrix_properties(model)
+        elif safe_isinstance(model, "lightgbm.basic.Booster"):
+            assert_import("lightgbm")
+            self.model_type = "lightgbm"
+            self.original_model = model
+            tree_info = self.original_model.dump_model()["tree_info"]
+            try:
+                self.trees = [SingleTree(e, data=data, data_missing=data_missing) for e in tree_info]
+            except Exception:
+                self.trees = None # we get here because the cext can't handle categorical splits yet
+
+            self.objective = objective_name_map.get(model.params.get("objective", "regression"), None)
+            self.tree_output = tree_output_name_map.get(model.params.get("objective", "regression"), None)
+
+        elif safe_isinstance(model, "gpboost.basic.Booster"):
+            assert_import("gpboost")
+            self.model_type = "gpboost"
+            self.original_model = model
+            tree_info = self.original_model.dump_model()["tree_info"]
+            try:
+                self.trees = [SingleTree(e, data=data, data_missing=data_missing) for e in tree_info]
+            except Exception:
+                self.trees = None # we get here because the cext can't handle categorical splits yet
+
+            self.objective = objective_name_map.get(model.params.get("objective", "regression"), None)
+            self.tree_output = tree_output_name_map.get(model.params.get("objective", "regression"), None)
+
+        elif safe_isinstance(model, "lightgbm.sklearn.LGBMRegressor"):
+            assert_import("lightgbm")
+            self.model_type = "lightgbm"
+            self.original_model = model.booster_
+            tree_info = self.original_model.dump_model()["tree_info"]
+            try:
+                self.trees = [SingleTree(e, data=data, data_missing=data_missing) for e in tree_info]
+            except Exception:
+                self.trees = None # we get here because the cext can't handle categorical splits yet
+            self.objective = objective_name_map.get(model.objective, None)
+            self.tree_output = tree_output_name_map.get(model.objective, None)
+            if model.objective is None:
+                self.objective = "squared_error"
+                self.tree_output = "raw_value"
+        elif safe_isinstance(model, "lightgbm.sklearn.LGBMRanker"):
+            assert_import("lightgbm")
+            self.model_type = "lightgbm"
+            self.original_model = model.booster_
+            tree_info = self.original_model.dump_model()["tree_info"]
+            try:
+                self.trees = [SingleTree(e, data=data, data_missing=data_missing) for e in tree_info]
+            except Exception:
+                self.trees = None # we get here because the cext can't handle categorical splits yet
+            # Note: for ranker, leaving tree_output and objective as None as they
+            # are not implemented in native code yet
+        elif safe_isinstance(model, "lightgbm.sklearn.LGBMClassifier"):
+            assert_import("lightgbm")
+            self.model_type = "lightgbm"
+            if model.n_classes_ > 2:
+                self.num_stacked_models = model.n_classes_
+            self.original_model = model.booster_
+            tree_info = self.original_model.dump_model()["tree_info"]
+            try:
+                self.trees = [SingleTree(e, data=data, data_missing=data_missing) for e in tree_info]
+            except Exception:
+                self.trees = None # we get here because the cext can't handle categorical splits yet
+            self.objective = objective_name_map.get(model.objective, None)
+            self.tree_output = tree_output_name_map.get(model.objective, None)
+            if model.objective is None:
+                self.objective = "binary_crossentropy"
+                self.tree_output = "log_odds"
+        elif safe_isinstance(model, "catboost.core.CatBoostRegressor"):
+            assert_import("catboost")
+            self.model_type = "catboost"
+            self.original_model = model
+            self.cat_feature_indices = model.get_cat_feature_indices()
+            try:
+                cb_loader = CatBoostTreeModelLoader(model)
+                self.trees = cb_loader.get_trees(data=data, data_missing=data_missing)
+            except Exception:
+                self.trees = None # we get here because the cext can't handle categorical splits yet
+        elif safe_isinstance(model, "catboost.core.CatBoostClassifier"):
+            assert_import("catboost")
+            self.model_type = "catboost"
+            self.original_model = model
+            self.input_dtype = np.float32
+            try:
+                cb_loader = CatBoostTreeModelLoader(model)
+                self.trees = cb_loader.get_trees(data=data, data_missing=data_missing)
+            except Exception:
+                self.trees = None # we get here because the cext can't handle categorical splits yet
+            self.tree_output = "log_odds"
+            self.objective = "binary_crossentropy"
+            self.cat_feature_indices = model.get_cat_feature_indices()
+        elif safe_isinstance(model, "catboost.core.CatBoost"):
+            assert_import("catboost")
+            self.model_type = "catboost"
+            self.original_model = model
+            self.cat_feature_indices = model.get_cat_feature_indices()
+        elif safe_isinstance(model, "imblearn.ensemble._forest.BalancedRandomForestClassifier"):
+            self.input_dtype = np.float32
+            scaling = 1.0 / len(model.estimators_) # output is average of trees
+            self.trees = [SingleTree(e.tree_, normalize=True, scaling=scaling, data=data, data_missing=data_missing) for e in model.estimators_]
+            self.objective = objective_name_map.get(model.criterion, None)
+            self.tree_output = "probability"
+        elif safe_isinstance(
+            model,
+            [
+                "ngboost.ngboost.NGBoost",
+                "ngboost.api.NGBRegressor",
+                "ngboost.api.NGBClassifier",
+            ],
+        ):
+            assert model.base_models, "The NGBoost model has empty `base_models`! Have you called `model.fit`?"
+            if self.model_output == "raw":
+                param_idx = 0 # default to the first parameter of the output distribution
+                warnings.warn("Translating model_output=\"raw\" to model_output=0 for the 0-th parameter in the distribution. Use model_output=0 directly to avoid this warning.")
+            elif isinstance(self.model_output, int):
+                param_idx = self.model_output
+                self.model_output = "raw" # note that after loading we have a new model_output type
+            assert safe_isinstance(model.base_models[0][param_idx], ["sklearn.tree.DecisionTreeRegressor", "sklearn.tree.tree.DecisionTreeRegressor"]), "You must use default_tree_learner!"
+            shap_trees = [trees[param_idx] for trees in model.base_models]
+            self.internal_dtype = shap_trees[0].tree_.value.dtype.type
+            self.input_dtype = np.float32
+            scaling = - model.learning_rate * np.array(model.scalings) # output is weighted average of trees
+            # ngboost reorders the features, so we need to map them back to the original order
+            missing_col_idxs = [[i for i in range(model.n_features) if i not in col_idx] for col_idx in model.col_idxs]
+            feature_mapping = [{i: col_idx for i, col_idx in enumerate(list(col_idxs) + missing_col_idx)}
+                               for col_idxs, missing_col_idx in zip(model.col_idxs, missing_col_idxs)]
+            self.trees = []
+            for idx, shap_tree in enumerate(shap_trees):
+                tree_ = shap_tree.tree_
+                values = tree_.value.reshape(tree_.value.shape[0], tree_.value.shape[1] * tree_.value.shape[2])
+                values = values * scaling[idx]
+                tree = {
+                    "children_left": tree_.children_left.astype(np.int32),
+                    "children_right": tree_.children_right.astype(np.int32),
+                    "children_default": tree_.children_left,
+                    "features": np.array([feature_mapping[idx].get(i, i) for i in tree_.feature]),
+                    "thresholds": tree_.threshold.astype(np.float64),
+                    "values": values,
+                    "node_sample_weight": tree_.weighted_n_node_samples.astype(np.float64)
+                }
+                self.trees.append(SingleTree(tree, data=data, data_missing=data_missing))
+            self.objective = objective_name_map.get(shap_trees[0].criterion, None)
+            self.tree_output = "raw_value"
+            self.base_offset = model.init_params[param_idx]
+        else:
+            raise InvalidModelError("Model type not yet supported by TreeExplainer: " + str(type(model)))
+
+        # build a dense numpy version of all the tree objects
+        if self.trees is not None and self.trees:
+            max_nodes = np.max([len(t.values) for t in self.trees])
+            assert len(np.unique([t.values.shape[1] for t in self.trees])) == 1, "All trees in the ensemble must have the same output dimension!"
+            num_trees = len(self.trees)
+            if self.num_stacked_models > 1:
+                assert len(self.trees) % self.num_stacked_models == 0, "Only stacked models with equal numbers of trees are supported!"
+                assert self.trees[0].values.shape[1] == 1, "Only stacked models with single outputs per model are supported!"
+                self.num_outputs = self.num_stacked_models
+            else:
+                self.num_outputs = self.trees[0].values.shape[1]
+
+            # important to be -1 in unused sections!! This way we can tell which entries are valid.
+            self.children_left = -np.ones((num_trees, max_nodes), dtype=np.int32)
+            self.children_right = -np.ones((num_trees, max_nodes), dtype=np.int32)
+            self.children_default = -np.ones((num_trees, max_nodes), dtype=np.int32)
+            self.features = -np.ones((num_trees, max_nodes), dtype=np.int32)
+
+            self.thresholds = np.zeros((num_trees, max_nodes), dtype=self.internal_dtype)
+            self.values = np.zeros((num_trees, max_nodes, self.num_outputs), dtype=self.internal_dtype)
+            self.node_sample_weight = np.zeros((num_trees, max_nodes), dtype=self.internal_dtype)
+
+            for i in range(num_trees):
+                self.children_left[i,:len(self.trees[i].children_left)] = self.trees[i].children_left
+                self.children_right[i,:len(self.trees[i].children_right)] = self.trees[i].children_right
+                self.children_default[i,:len(self.trees[i].children_default)] = self.trees[i].children_default
+                self.features[i,:len(self.trees[i].features)] = self.trees[i].features
+                self.thresholds[i,:len(self.trees[i].thresholds)] = self.trees[i].thresholds
+                if self.num_stacked_models > 1:
+                    # stack_pos = int(i // (num_trees / self.num_stacked_models))
+                    stack_pos = i % self.num_stacked_models
+                    self.values[i,:len(self.trees[i].values[:,0]),stack_pos] = self.trees[i].values[:,0]
+                else:
+                    self.values[i,:len(self.trees[i].values)] = self.trees[i].values
+                self.node_sample_weight[i,:len(self.trees[i].node_sample_weight)] = self.trees[i].node_sample_weight
+
+                # ensure that the passed background dataset lands in every leaf
+                if np.min(self.trees[i].node_sample_weight) <= 0:
+                    self.fully_defined_weighting = False
+
+            self.num_nodes = np.array([len(t.values) for t in self.trees], dtype=np.int32)
+            self.max_depth = np.max([t.max_depth for t in self.trees])
+
+            # make sure the base offset is a 1D array
+            if not hasattr(self.base_offset, "__len__") or len(self.base_offset) == 0:
+                self.base_offset = (np.ones(self.num_outputs) * self.base_offset).astype(self.internal_dtype)
+            self.base_offset = self.base_offset.flatten()
+            assert len(self.base_offset) == self.num_outputs
+
+    def get_transform(self):
+        """ A consistent interface to make predictions from this model.
+        """
+        if self.model_output == "raw":
+            transform = "identity"
+        elif self.model_output in ("probability", "probability_doubled"):
+            if self.tree_output == "log_odds":
+                transform = "logistic"
+            elif self.tree_output == "probability":
+                transform = "identity"
+            else:
+                emsg = (
+                    "model_output = \"probability\" is not yet supported when model.tree_output = "
+                    f"\"{self.tree_output}\"!"
+                )
+                raise NotImplementedError(emsg)
+        elif self.model_output == "log_loss":
+            if self.objective == "squared_error":
+                transform = "squared_loss"
+            elif self.objective == "binary_crossentropy":
+                transform = "logistic_nlogloss"
+            else:
+                emsg = (
+                    "model_output = \"log_loss\" is not yet supported when model.objective = "
+                    f"\"{self.objective}\"!"
+                )
+                raise NotImplementedError(emsg)
+        else:
+            emsg = (
+                f"Unrecognized model_output parameter value: {str(self.model_output)}! "
+                f"If `model.{str(self.model_output)}` is a valid function, open a Github issue to ask "
+                "that this method be supported. If you want 'predict_proba' just use 'probability' for now."
+            )
+            raise ValueError(emsg)
+
+        return transform
+
+    def predict(self, X, y=None, output=None, tree_limit=None):
+        """ A consistent interface to make predictions from this model.
+
+        Parameters
+        ----------
+        tree_limit : None (default) or int
+            Limit the number of trees used by the model. By default None means no use the limit of the
+            original model, and -1 means no limit.
+        """
+
+        if output is None:
+            output = self.model_output
+
+        if self.model_type == "pyspark":
+            #import pyspark
+            # TODO: support predict for pyspark
+            raise NotImplementedError("Predict with pyspark isn't implemented. Don't run 'interventional' as feature_perturbation.")
+
+        # see if we have a default tree_limit in place.
+        if tree_limit is None:
+            tree_limit = -1 if self.tree_limit is None else self.tree_limit
+
+        # convert dataframes
+        if isinstance(X, (pd.Series, pd.DataFrame)):
+            X = X.values
+        flat_output = False
+        if len(X.shape) == 1:
+            flat_output = True
+            X = X.reshape(1, X.shape[0])
+        if X.dtype.type != self.input_dtype:
+            X = X.astype(self.input_dtype)
+        X_missing = np.isnan(X, dtype=bool)
+        assert isinstance(X, np.ndarray), "Unknown instance type: " + str(type(X))
+        assert len(X.shape) == 2, "Passed input data matrix X must have 1 or 2 dimensions!"
+
+        if tree_limit < 0 or tree_limit > self.values.shape[0]:
+            tree_limit = self.values.shape[0]
+
+        if output == "logloss":
+            assert y is not None, "Both samples and labels must be provided when explaining the loss (i.e. `explainer.shap_values(X, y)`)!"
+            assert X.shape[0] == len(y), "The number of labels (%d) does not match the number of samples to explain (%d)!" % (len(y), X.shape[0])
+        transform = self.get_transform()
+        assert_import("cext")
+        output = np.zeros((X.shape[0], self.num_outputs))
+        _cext.dense_tree_predict(
+            self.children_left, self.children_right, self.children_default,
+            self.features, self.thresholds, self.values,
+            self.max_depth, tree_limit, self.base_offset, output_transform_codes[transform],
+            X, X_missing, y, output
+        )
+
+        # drop dimensions we don't need
+        if flat_output:
+            if self.num_outputs == 1:
+                return output.flatten()[0]
+            else:
+                return output.reshape(-1, self.num_outputs)
+        else:
+            if self.num_outputs == 1:
+                return output.flatten()
+            else:
+                return output
+
+
+class SingleTree:
+    """A single decision tree.
+
+    The primary point of this object is to parse many different tree types into a common format.
+
+    Attributes
+    ----------
+    children_left : numpy.array
+        A 1d array of length #nodes. The index ``i`` of this array contains the index of
+        the left-child of the ``i-th`` node in the tree. An index of -1 is used to
+        represent that the ``i-th`` node is a leaf/terminal node.
+
+    children_right : numpy.array
+        Same as ``children_left``, except it contains the index of the right child of
+        each ``i-th`` node in the tree.
+
+    children_default : numpy.array
+        A 1d numpy array of length #nodes. The index ``i`` of this array contains either
+        the index of the left-child / right-child of the ``i-th`` node in the tree,
+        depending on whether the default split (for handling missing values) is left /
+        right. An index of -1 is used to represent that the ``i-th`` node is a leaf
+        node.
+
+    features : numpy.array
+        A 1d numpy array of length #nodes. The value at the ``i-th`` position is the
+        index of the feature chosen for the split at node ``i``. Leaf nodes have no
+        splits, so is -1.
+
+    thresholds : numpy.array
+        A 1d numpy array of length #nodes. The value at the ``i-th`` position is the
+        threshold used for the split at node ``i``. Leaf nodes have no thresholds, so is
+        -1.
+
+    values : numpy.array
+        A 1d numpy array of length #nodes. The index ``i`` of this array contains the
+        raw predicted value that would be produced by node ``i`` if it were a leaf node.
+
+    node_sample_weight : numpy.array
+        A 1d numpy array of length #nodes. The index ``i`` contains the number of
+        records (usually from the training data) that falls into node ``i``.
+
+    max_depth : int
+        The max depth of the tree.
+    """
+    def __init__(self, tree, normalize=False, scaling=1.0, data=None, data_missing=None):
+        assert_import("cext")
+
+        if safe_isinstance(tree, ["sklearn.tree._tree.Tree", "econml.tree._tree.Tree"]):
+            self.children_left = tree.children_left.astype(np.int32)
+            self.children_right = tree.children_right.astype(np.int32)
+            self.children_default = self.children_left # missing values not supported in sklearn
+            self.features = tree.feature.astype(np.int32)
+            self.thresholds = tree.threshold.astype(np.float64)
+            self.values = tree.value.reshape(tree.value.shape[0], tree.value.shape[1] * tree.value.shape[2])
+            if normalize:
+                self.values = (self.values.T / self.values.sum(1)).T
+            self.values = self.values * scaling
+            self.node_sample_weight = tree.weighted_n_node_samples.astype(np.float64)
+
+        elif isinstance(tree, dict) and "features" in tree:
+            self.children_left = tree["children_left"].astype(np.int32)
+            self.children_right = tree["children_right"].astype(np.int32)
+            self.children_default = tree["children_default"].astype(np.int32)
+            self.features = tree["features"].astype(np.int32)
+            self.thresholds = tree["thresholds"]
+            self.values = tree["values"] * scaling
+            self.node_sample_weight = tree["node_sample_weight"]
+
+        # deprecated dictionary support (with sklearn singular style "feature" and "value" names)
+        elif isinstance(tree, dict) and "children_left" in tree:
+            self.children_left = tree["children_left"].astype(np.int32)
+            self.children_right = tree["children_right"].astype(np.int32)
+            self.children_default = tree["children_default"].astype(np.int32)
+            self.features = tree["feature"].astype(np.int32)
+            self.thresholds = tree["threshold"]
+            self.values = tree["value"] * scaling
+            self.node_sample_weight = tree["node_sample_weight"]
+
+        elif safe_isinstance(
+            tree,
+            [
+                "pyspark.ml.classification.DecisionTreeClassificationModel",
+                "pyspark.ml.regression.DecisionTreeRegressionModel",
+            ],
+        ):
+            #model._java_obj.numNodes() doesn't give leaves, need to recompute the size
+            def getNumNodes(node, size):
+                size = size + 1
+                if node.subtreeDepth() == 0:
+                    return size
+                else:
+                    size = getNumNodes(node.leftChild(), size)
+                    return getNumNodes(node.rightChild(), size)
+
+            num_nodes = getNumNodes(tree._java_obj.rootNode(), 0)
+            self.children_left = np.full(num_nodes, -2, dtype=np.int32)
+            self.children_right = np.full(num_nodes, -2, dtype=np.int32)
+            self.children_default = np.full(num_nodes, -2, dtype=np.int32)
+            self.features = np.full(num_nodes, -2, dtype=np.int32)
+            self.thresholds = np.full(num_nodes, -2, dtype=np.float64)
+            self.values = [-2]*num_nodes
+            self.node_sample_weight = np.full(num_nodes, -2, dtype=np.float64)
+            def buildTree(index, node):
+                index = index + 1
+                if tree._java_obj.getImpurity() == 'variance':
+                    self.values[index] = [node.prediction()]  # prediction for the node
+                else:
+                    self.values[index] = [e for e in node.impurityStats().stats()] #for gini: NDarray(numLabel): 1 per label: number of item for each label which went through this node
+                self.node_sample_weight[index] = node.impurityStats().count()  # weighted count of element through this node
+
+                if node.subtreeDepth() == 0:
+                    return index
+                else:
+                    self.features[index] = node.split().featureIndex() #index of the feature we split on, not available for leaf, int
+                    if str(node.split().getClass()).endswith('tree.CategoricalSplit'):
+                        #Categorical split isn't implemented, TODO: could fake it by creating a fake node to split on the exact value?
+                        raise NotImplementedError('CategoricalSplit are not yet implemented')
+                    self.thresholds[index] = node.split().threshold() #threshold for the feature, not available for leaf, float
+
+                    self.children_left[index] = index + 1
+                    idx = buildTree(index, node.leftChild())
+                    self.children_right[index] = idx + 1
+                    idx = buildTree(idx, node.rightChild())
+                    return idx
+
+            buildTree(-1, tree._java_obj.rootNode())
+            #default Not supported with mlib? (TODO)
+            self.children_default = self.children_left
+            self.values = np.asarray(self.values)
+            if normalize:
+                self.values = (self.values.T / self.values.sum(1)).T
+            self.values = self.values * scaling
+
+        # dictionary output from LightGBM `.dump_model()`
+        elif isinstance(tree, dict) and "tree_structure" in tree:
+            start = tree["tree_structure"]
+            num_parents = tree["num_leaves"] - 1
+            num_nodes = 2 * num_parents + 1
+            self.children_left = np.empty(num_nodes, dtype=np.int32)
+            self.children_right = np.empty(num_nodes, dtype=np.int32)
+            self.children_default = np.empty(num_nodes, dtype=np.int32)
+            self.features = np.empty(num_nodes, dtype=np.int32)
+            self.thresholds = np.empty(num_nodes, dtype=np.float64)
+            self.values = [-2 for _ in range(num_nodes)]
+            self.node_sample_weight = np.empty(num_nodes, dtype=np.float64)
+
+            # BFS traversal through the tree structure
+            visited, queue = [], [start]
+            while queue:
+                vertex = queue.pop(0)  # TODO(perf): benchmark this against deque.popleft()
+                is_branch_node = "split_index" in vertex
+                if is_branch_node:
+                    vsplit_idx: int = vertex["split_index"]
+                    if vsplit_idx in visited:
+                        continue
+
+                    left_child: dict = vertex["left_child"]
+                    right_child: dict = vertex["right_child"]
+                    left_is_branch_node = "split_index" in left_child
+                    if left_is_branch_node:
+                        self.children_left[vsplit_idx] = left_child["split_index"]
+                    else:
+                        self.children_left[vsplit_idx] = left_child["leaf_index"] + num_parents
+                    right_is_branch_node = "split_index" in right_child
+                    if right_is_branch_node:
+                        self.children_right[vsplit_idx] = right_child["split_index"]
+                    else:
+                        self.children_right[vsplit_idx] = right_child["leaf_index"] + num_parents
+                    if vertex["default_left"]:
+                        self.children_default[vsplit_idx] = self.children_left[vsplit_idx]
+                    else:
+                        self.children_default[vsplit_idx] = self.children_right[vsplit_idx]
+
+                    self.features[vsplit_idx] = vertex["split_feature"]
+                    self.thresholds[vsplit_idx] = vertex["threshold"]
+                    self.values[vsplit_idx] = [vertex["internal_value"]]
+                    self.node_sample_weight[vsplit_idx] = vertex["internal_count"]
+                    visited.append(vsplit_idx)
+                    queue.append(left_child)
+                    queue.append(right_child)
+                else:
+                    # NOTE: If "leaf_index" is not present as a key, it means we have a
+                    # stump tree. I.e., num_nodes=1.
+                    vleaf_idx: int = vertex.get("leaf_index", 0) + num_parents
+                    self.children_left[vleaf_idx] = -1
+                    self.children_right[vleaf_idx] = -1
+                    self.children_default[vleaf_idx] = -1
+                    self.features[vleaf_idx] = -1
+                    self.children_left[vleaf_idx] = -1
+                    self.children_right[vleaf_idx] = -1
+                    self.children_default[vleaf_idx] = -1
+                    self.features[vleaf_idx] = -1
+                    self.thresholds[vleaf_idx] = -1
+                    self.values[vleaf_idx] = [vertex["leaf_value"]]
+                    # FIXME: "leaf_count" currently doesn't exist if we have a stump tree.
+                    # We should be technically be assigning the number of samples used to
+                    # train the model as the weight here, but unfortunately this info is
+                    # currently unavailable in `tree`, so we set to 0 first.
+                    # cf. https://github.com/microsoft/LightGBM/issues/5962
+                    self.node_sample_weight[vleaf_idx] = vertex.get("leaf_count", 0)
+            self.values = np.asarray(self.values)
+            self.values = np.multiply(self.values, scaling)
+
+        elif isinstance(tree, dict) and 'nodeid' in tree:
+            """ Directly create tree given the JSON dump (with stats) of a XGBoost model.
+            """
+
+            def max_id(node):
+                if "children" in node:
+                    return max(node["nodeid"], *[max_id(n) for n in node["children"]])
+                else:
+                    return node["nodeid"]
+
+            m = max_id(tree) + 1
+            self.children_left = -np.ones(m, dtype=np.int32)
+            self.children_right = -np.ones(m, dtype=np.int32)
+            self.children_default = -np.ones(m, dtype=np.int32)
+            self.features = -np.ones(m, dtype=np.int32)
+            self.thresholds = np.zeros(m, dtype=np.float64)
+            self.values = np.zeros((m, 1), dtype=np.float64)
+            self.node_sample_weight = np.empty(m, dtype=np.float64)
+
+            def extract_data(node, tree):
+                i = node["nodeid"]
+                tree.node_sample_weight[i] = node["cover"]
+
+                if "children" in node:
+                    tree.children_left[i] = node["yes"]
+                    tree.children_right[i] = node["no"]
+                    tree.children_default[i] = node["missing"]
+                    tree.features[i] = node["split"]
+                    tree.thresholds[i] = node["split_condition"]
+
+                    for n in node["children"]:
+                        extract_data(n, tree)
+                elif "leaf" in node:
+                    tree.values[i] = node["leaf"] * scaling
+
+            extract_data(tree, self)
+
+        elif isinstance(tree, str):
+            """ Build a tree from a text dump (with stats) of xgboost.
+            """
+
+            nodes = [t.lstrip() for t in tree[:-1].split("\n")]
+            nodes_dict = {}
+            for n in nodes:
+                nodes_dict[int(n.split(":")[0])] = n.split(":")[1]
+            m = max(nodes_dict.keys())+1
+            children_left = -1*np.ones(m,dtype="int32")
+            children_right = -1*np.ones(m,dtype="int32")
+            children_default = -1*np.ones(m,dtype="int32")
+            features = -2*np.ones(m,dtype="int32")
+            thresholds = -1*np.ones(m,dtype="float64")
+            values = 1*np.ones(m,dtype="float64")
+            node_sample_weight = np.zeros(m,dtype="float64")
+            values_lst = list(nodes_dict.values())
+            keys_lst = list(nodes_dict.keys())
+            for i in range(0,len(keys_lst)):
+                value = values_lst[i]
+                key = keys_lst[i]
+                if "leaf" in value:
+                    # Extract values
+                    val = float(value.split("leaf=")[1].split(",")[0])
+                    node_sample_weight_val = float(value.split("cover=")[1])
+                    # Append to lists
+                    values[key] = val
+                    node_sample_weight[key] = node_sample_weight_val
+                else:
+                    c_left = int(value.split("yes=")[1].split(",")[0])
+                    c_right = int(value.split("no=")[1].split(",")[0])
+                    c_default = int(value.split("missing=")[1].split(",")[0])
+                    feat_thres = value.split(" ")[0]
+                    if ("<" in feat_thres):
+                        feature = int(feat_thres.split("<")[0][2:])
+                        threshold = float(feat_thres.split("<")[1][:-1])
+                    if ("=" in feat_thres):
+                        feature = int(feat_thres.split("=")[0][2:])
+                        threshold = float(feat_thres.split("=")[1][:-1])
+                    node_sample_weight_val = float(value.split("cover=")[1].split(",")[0])
+                    children_left[key] = c_left
+                    children_right[key] = c_right
+                    children_default[key] = c_default
+                    features[key] = feature
+                    thresholds[key] = threshold
+                    node_sample_weight[key] = node_sample_weight_val
+
+            self.children_left = children_left
+            self.children_right = children_right
+            self.children_default = children_default
+            self.features = features
+            self.thresholds = thresholds
+            self.values = values[:,np.newaxis] * scaling
+            self.node_sample_weight = node_sample_weight
+        else:
+            raise TypeError("Unknown input to SingleTree constructor: " + str(tree))
+
+        # Re-compute the number of samples that pass through each node if we are given data
+        if data is not None and data_missing is not None:
+            self.node_sample_weight.fill(0.0)
+            _cext.dense_tree_update_weights(
+                self.children_left, self.children_right, self.children_default, self.features,
+                self.thresholds, self.values, 1, self.node_sample_weight, data, data_missing
+            )
+
+        # we compute the expectations to make sure they follow the SHAP logic
+        self.max_depth = _cext.compute_expectations(
+            self.children_left, self.children_right, self.node_sample_weight,
+            self.values
+        )
+
+
+class IsoTree(SingleTree):
+    """
+    In sklearn the tree of the Isolation Forest does not calculated in a good way.
+    """
+    def __init__(self, tree, tree_features, normalize=False, scaling=1.0, data=None, data_missing=None):
+        super().__init__(tree, normalize, scaling, data, data_missing)
+        if safe_isinstance(tree, "sklearn.tree._tree.Tree"):
+            from sklearn.ensemble._iforest import (
+                _average_path_length,
+            )
+
+            def _recalculate_value(tree, i , level):
+                if tree.children_left[i] == -1 and tree.children_right[i] == -1:
+                    value = level + _average_path_length(np.array([tree.n_node_samples[i]]))[0]
+                    self.values[i, 0] =  value
+                    return value * tree.n_node_samples[i]
+                else:
+                    value_left = _recalculate_value(tree, tree.children_left[i] , level + 1)
+                    value_right = _recalculate_value(tree, tree.children_right[i] , level + 1)
+                    self.values[i, 0] =  (value_left + value_right) / tree.n_node_samples[i]
+                    return value_left + value_right
+
+            _recalculate_value(tree, 0, 0)
+            if normalize:
+                self.values = (self.values.T / self.values.sum(1)).T
+            self.values = self.values * scaling
+            # re-number the features if each tree gets a different set of features
+            self.features = np.where(self.features >= 0, tree_features[self.features], self.features)
+
+
+def get_xgboost_dmatrix_properties(model):
+    """
+    Retrieves properties from an xgboost.sklearn.XGBModel instance that should be passed to the xgboost.core.DMatrix object before calling predict on the model
+    """
+    properties_to_pass = ["missing", "n_jobs", "enable_categorical", "feature_types"]
+    dmatrix_attributes = {}
+    for attribute in properties_to_pass:
+        if hasattr(model, attribute):
+            dmatrix_attributes[attribute] = getattr(model, attribute)
+
+    # Convert sklearn n_jobs to xgboost nthread
+    if "n_jobs" in dmatrix_attributes:
+        dmatrix_attributes["nthread"] = dmatrix_attributes.pop("n_jobs")
+    return dmatrix_attributes
+
+def get_xgboost_json(model):
+    """ This gets a JSON dump of an XGBoost model while ensuring the features names are their indexes.
+    """
+    fnames = model.feature_names
+    model.feature_names = None
+    json_trees = model.get_dump(with_stats=True, dump_format="json")
+    model.feature_names = fnames
+
+    # this fixes a bug where XGBoost can return invalid JSON
+    json_trees = [t.replace(": inf,", ": 1000000000000.0,") for t in json_trees]
+    json_trees = [t.replace(": -inf,", ": -1000000000000.0,") for t in json_trees]
+
+    return json_trees
+
+
+class XGBTreeModelLoader:
+    """ This loads an XGBoost model directly from a raw memory dump.
+
+    We can't use the JSON dump because due to numerical precision issues those
+    tree can actually be wrong when feature values land almost on a threshold.
+    """
+    def __init__(self, xgb_model):
+        import xgboost
+        xgb_params = self.read_xgb_params(xgb_model)
+
+        self.base_score = float(xgb_params["learner_model_param"]["base_score"])
+        self.num_feature = int(xgb_params["learner_model_param"]["num_feature"])
+        self.num_class = int(xgb_params["learner_model_param"]["num_class"])
+        self.name_obj = xgb_params["objective"]["name"]
+        self.name_gbm = xgb_params["gradient_booster"]["name"]
+
+        # new in XGBoost 1.0 is that the base_score is saved untransformed (https://github.com/dmlc/xgboost/pull/5101)
+        # so we have to transform it depending on the objective
+        if version.parse(xgboost.__version__).major >= 1:
+            if self.name_obj in ["binary:logistic", "reg:logistic"]:
+                self.base_score = scipy.special.logit(self.base_score)
+
+        assert self.name_gbm == "gbtree", "Only the 'gbtree' model type is supported, not '%s'!" % self.name_gbm
+
+        # load the gbtree specific parameters
+        self.num_trees = int(xgb_params["gradient_booster"]["model"]["gbtree_model_param"]["num_trees"])
+        self.num_feature = int(xgb_params["learner_model_param"]["num_feature"])
+        # load each tree
+        self.num_roots = np.zeros(self.num_trees, dtype=np.int32)
+        self.num_nodes = np.zeros(self.num_trees, dtype=np.int32)
+        self.num_deleted = np.zeros(self.num_trees, dtype=np.int32)
+        self.max_depth = np.zeros(self.num_trees, dtype=np.int32)
+        self.num_feature = np.zeros(self.num_trees, dtype=np.int32)
+        self.size_leaf_vector = np.zeros(self.num_trees, dtype=np.int32)
+        self.node_parents = []
+        self.node_cleft = []
+        self.node_cright = []
+        self.node_sindex = []
+        self.node_info = []
+        self.loss_chg = []
+        self.sum_hess = []
+        self.base_weight = []
+        self.leaf_child_cnt = []
+        for i in range(self.num_trees):
+            tree_json = xgb_params["gradient_booster"]["model"]["trees"][i]
+
+            # load the per-tree params
+            self.num_nodes[i] = tree_json["tree_param"]["num_nodes"]
+            self.num_deleted[i] = tree_json["tree_param"]["num_deleted"]
+            self.num_feature[i] = tree_json["tree_param"]["num_feature"]
+            self.size_leaf_vector[i] = tree_json["tree_param"]["size_leaf_vector"]
+
+            # load the nodes
+            self.node_parents.append(np.array(tree_json["parents"], dtype=np.int32))
+            self.node_cleft.append(np.array(tree_json["left_children"], dtype=np.int32))
+            self.node_cright.append(np.array(tree_json["right_children"], dtype=np.int32))
+            self.node_sindex.append(np.array(tree_json["split_indices"], dtype=np.uint32))
+            self.node_info.append(np.array(tree_json["split_conditions"], dtype=np.float32))
+
+            # load the stat nodes
+            self.loss_chg.append(np.array(tree_json["loss_changes"], dtype=np.float32))
+            self.sum_hess.append(np.array(tree_json["sum_hessian"], dtype=np.float64))
+            self.base_weight.append(np.array(tree_json["base_weights"], dtype=np.float32))
+            self.leaf_child_cnt.append(np.array(tree_json["default_left"], dtype=int))
+
+    @staticmethod
+    def read_xgb_params(xgb_model) -> Dict[str, Any]:
+        import xgboost
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            if version.parse(xgboost.__version__) >= version.parse("1.6.0"):
+                tmp_file = os.path.join(tmp_dir, "model.ubj")
+                xgb_model.save_model(tmp_file)
+                xgb_params = decode_ubjson_buffer(open(tmp_file, 'rb'))
+            else:
+                warnings.warn("You are using an XGBoost version below 1.6.0 which is not fully supported by shap. "
+                              "Shap falls back to encoding the model as JSON which can lead to numerical precision issues. "
+                              "Please consider upgrading to XGBoost 1.6.0 or higher.")
+                tmp_file = os.path.join(tmp_dir, "model.json")
+                xgb_model.save_model(tmp_file)
+                with open(tmp_file) as fh:
+                    xgb_params = json.load(fh)
+        return xgb_params["learner"]
+
+    def get_trees(self, data=None, data_missing=None):
+        shape = (self.num_trees, self.num_nodes.max())
+        self.children_default = np.zeros(shape, dtype=int)
+        self.features = np.zeros(shape, dtype=int)
+        self.thresholds = np.zeros(shape, dtype=np.float32)
+        self.values = np.zeros((shape[0], shape[1], 1), dtype=np.float32)
+        trees = []
+        for i in range(self.num_trees):
+            for j in range(self.num_nodes[i]):
+                if np.right_shift(self.node_sindex[i][j], np.uint32(31)) != 0:
+                    self.children_default[i,j] = self.node_cleft[i][j]
+                else:
+                    self.children_default[i,j] = self.node_cright[i][j]
+                self.features[i,j] = self.node_sindex[i][j] & ((np.uint32(1) << np.uint32(31)) - np.uint32(1))
+                if self.node_cleft[i][j] >= 0:
+                    # Xgboost uses < for thresholds where shap uses <=
+                    # Move the threshold down by the smallest possible increment
+                    self.thresholds[i, j] = np.nextafter(self.node_info[i][j], - np.float32(np.inf))
+                else:
+                    self.values[i,j] = self.node_info[i][j]
+
+            k = len(self.node_cleft[i])
+            trees.append(SingleTree({
+                "children_left": self.node_cleft[i],
+                "children_right": self.node_cright[i],
+                "children_default": self.children_default[i,:k],
+                "feature": self.features[i,:k],
+                "threshold": self.thresholds[i,:k],
+                "value": self.values[i,:k],
+                "node_sample_weight": self.sum_hess[i]
+            }, data=data, data_missing=data_missing))
+        return trees
+
+    def read(self, dtype):
+        size = struct.calcsize(dtype)
+        val = struct.unpack(dtype, self.buf[self.pos:self.pos+size])[0]
+        self.pos += size
+        return val
+
+    def read_arr(self, dtype, n_items):
+        format = "%d%s" % (n_items, dtype)
+        size = struct.calcsize(format)
+        val = struct.unpack(format, self.buf[self.pos:self.pos+size])[0]
+        self.pos += size
+        return val
+
+    def read_str(self, size):
+        val = self.buf[self.pos:self.pos+size].decode('utf-8')
+        self.pos += size
+        return val
+
+    def print_info(self):
+
+        print("--- global parameters ---")
+        print("base_score =", self.base_score)
+        print("num_feature =", self.num_feature)
+        print("num_class =", self.num_class)
+        print("name_obj =", self.name_obj)
+        print("name_gbm =", self.name_gbm)
+        print()
+        print("--- gbtree specific parameters ---")
+        print("num_trees =", self.num_trees)
+        print("num_roots =", self.num_roots)
+        print("num_feature =", self.num_feature)
+        print("size_leaf_vector =", self.size_leaf_vector)
+
+
+class CatBoostTreeModelLoader:
+    def __init__(self, cb_model):
+        import tempfile
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            tmp_file = os.path.join(tmp_dir, "model.json")
+            cb_model.save_model(tmp_file, format="json")
+            self.loaded_cb_model = json.load(open(tmp_file))
+
+        # load the CatBoost oblivious trees specific parameters
+        self.num_trees = len(self.loaded_cb_model['oblivious_trees'])
+        self.max_depth = self.loaded_cb_model['model_info']['params']['tree_learner_options']['depth']
+
+    def get_trees(self, data=None, data_missing=None):
+        # load each tree
+        trees = []
+        for tree_index in range(self.num_trees):
+
+            # load the per-tree params
+            #depth = len(self.loaded_cb_model['oblivious_trees'][tree_index]['splits'])
+
+            # load the nodes
+
+            # Re-compute the number of samples that pass through each node if we are given data
+            leaf_weights = self.loaded_cb_model['oblivious_trees'][tree_index]['leaf_weights']
+            leaf_weights_unraveled = [0] * (len(leaf_weights) - 1) + leaf_weights
+            leaf_weights_unraveled[0] = sum(leaf_weights)
+            for index in range(len(leaf_weights) - 2, 0, -1):
+                leaf_weights_unraveled[index] = leaf_weights_unraveled[2 * index + 1] + leaf_weights_unraveled[2 * index + 2]
+
+            leaf_values = self.loaded_cb_model['oblivious_trees'][tree_index]['leaf_values']
+            leaf_values_unraveled = [0] * (len(leaf_values) - 1) + leaf_values
+
+            children_left = [i * 2 + 1 for i in range(len(leaf_values) - 1)]
+            children_left += [-1] * len(leaf_values)
+
+            children_right = [i * 2 for i in range(1, len(leaf_values))]
+            children_right += [-1] * len(leaf_values)
+
+            children_default = [i * 2 + 1 for i in range(len(leaf_values) - 1)]
+            children_default += [-1] * len(leaf_values)
+
+            # load the split features and borders
+            # split features and borders go from leafs to the root
+            split_features_index = []
+            borders = []
+
+            # split features and borders go from leafs to the root
+            for elem in self.loaded_cb_model['oblivious_trees'][tree_index]['splits']:
+                split_type = elem.get('split_type')
+                if split_type == 'FloatFeature':
+                    split_feature_index = elem.get('float_feature_index')
+                    borders.append(elem['border'])
+                elif split_type == 'OneHotFeature':
+                    split_feature_index = elem.get('cat_feature_index')
+                    borders.append(elem['value'])
+                else:
+                    split_feature_index = elem.get('ctr_target_border_idx')
+                    borders.append(elem['border'])
+                split_features_index.append(split_feature_index)
+
+            split_features_index_unraveled = []
+            for counter, feature_index in enumerate(split_features_index[::-1]):
+                split_features_index_unraveled += [feature_index] * (2 ** counter)
+            split_features_index_unraveled += [0] * len(leaf_values)
+
+            borders_unraveled = []
+            for counter, border in enumerate(borders[::-1]):
+                borders_unraveled += [border] * (2 ** counter)
+            borders_unraveled += [0] * len(leaf_values)
+
+            trees.append(SingleTree({"children_left": np.array(children_left),
+                             "children_right": np.array(children_right),
+                             "children_default": np.array(children_default),
+                             "feature": np.array(split_features_index_unraveled),
+                             "threshold": np.array(borders_unraveled),
+                             "value": np.array(leaf_values_unraveled).reshape((-1,1)),
+                             "node_sample_weight": np.array(leaf_weights_unraveled),
+                            }, data=data, data_missing=data_missing))
+
+        return trees
diff --git a/lib/shap/explainers/other/__init__.py b/lib/shap/explainers/other/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..8e2303366d37aa882dce52c5fb971444020b77a5
--- /dev/null
+++ b/lib/shap/explainers/other/__init__.py
@@ -0,0 +1,26 @@
+import warnings
+
+from ._coefficient import Coefficient
+from ._lime import LimeTabular
+from ._maple import Maple, TreeMaple
+from ._random import Random
+from ._treegain import TreeGain
+
+__all__ = [
+    "Coefficient",
+    "LimeTabular",
+    "Maple",
+    "TreeMaple",
+    "Random",
+    "TreeGain",
+]
+
+
+# Deprecated class alias with incorrect spelling
+def Coefficent(*args, **kwargs):  # noqa
+    warnings.warn(
+        "Coefficent has been renamed to Coefficient. "
+        "The former is deprecated and will be removed in shap 0.45.",
+        DeprecationWarning
+    )
+    return Coefficient(*args, **kwargs)
diff --git a/lib/shap/explainers/other/_coefficient.py b/lib/shap/explainers/other/_coefficient.py
new file mode 100644
index 0000000000000000000000000000000000000000..6384710b9583cbc893ccc45a6a34bb78915454c1
--- /dev/null
+++ b/lib/shap/explainers/other/_coefficient.py
@@ -0,0 +1,17 @@
+import numpy as np
+
+from .._explainer import Explainer
+
+
+class Coefficient(Explainer):
+    """ Simply returns the model coefficients as the feature attributions.
+
+    This is only for benchmark comparisons and does not approximate SHAP values in a
+    meaningful way.
+    """
+    def __init__(self, model):
+        assert hasattr(model, "coef_"), "The passed model does not have a coef_ attribute!"
+        self.model = model
+
+    def attributions(self, X):
+        return np.tile(self.model.coef_, (X.shape[0], 1))
diff --git a/lib/shap/explainers/other/_lime.py b/lib/shap/explainers/other/_lime.py
new file mode 100644
index 0000000000000000000000000000000000000000..2757c85376f1f81d0002c22f1946fce153725bcf
--- /dev/null
+++ b/lib/shap/explainers/other/_lime.py
@@ -0,0 +1,73 @@
+import numpy as np
+import pandas as pd
+
+from .._explainer import Explainer
+
+try:
+    import lime
+    import lime.lime_tabular
+except ImportError:
+    pass
+
+class LimeTabular(Explainer):
+    """ Simply wrap of lime.lime_tabular.LimeTabularExplainer into the common shap interface.
+
+    Parameters
+    ----------
+    model : function or iml.Model
+        User supplied function that takes a matrix of samples (# samples x # features) and
+        computes the output of the model for those samples. The output can be a vector
+        (# samples) or a matrix (# samples x # model outputs).
+
+    data : numpy.array
+        The background dataset.
+
+    mode : "classification" or "regression"
+        Control the mode of LIME tabular.
+    """
+
+    def __init__(self, model, data, mode="classification"):
+        self.model = model
+        if mode not in ["classification", "regression"]:
+            emsg = f"Invalid mode {mode!r}, must be one of 'classification' or 'regression'"
+            raise ValueError(emsg)
+        self.mode = mode
+
+        if isinstance(data, pd.DataFrame):
+            data = data.values
+        self.data = data
+        self.explainer = lime.lime_tabular.LimeTabularExplainer(data, mode=mode)
+
+        out = self.model(data[0:1])
+        if len(out.shape) == 1:
+            self.out_dim = 1
+            self.flat_out = True
+            if mode == "classification":
+                def pred(X): # assume that 1d outputs are probabilities
+                    preds = self.model(X).reshape(-1, 1)
+                    p0 = 1 - preds
+                    return np.hstack((p0, preds))
+                self.model = pred
+        else:
+            self.out_dim = self.model(data[0:1]).shape[1]
+            self.flat_out = False
+
+    def attributions(self, X, nsamples=5000, num_features=None):
+        num_features = X.shape[1] if num_features is None else num_features
+
+        if isinstance(X, pd.DataFrame):
+            X = X.values
+
+        out = [np.zeros(X.shape) for j in range(self.out_dim)]
+        for i in range(X.shape[0]):
+            exp = self.explainer.explain_instance(X[i], self.model, labels=range(self.out_dim), num_features=num_features)
+            for j in range(self.out_dim):
+                for k,v in exp.local_exp[j]:
+                    out[j][i,k] = v
+
+        # because it output two results even for only one model output, and they are negated from what we expect
+        if self.mode == "regression":
+            for i in range(len(out)):
+                out[i] = -out[i]
+
+        return out[0] if self.flat_out else out
diff --git a/lib/shap/explainers/other/_maple.py b/lib/shap/explainers/other/_maple.py
new file mode 100644
index 0000000000000000000000000000000000000000..80df106902db2f53247c903c183ed7409c0254dc
--- /dev/null
+++ b/lib/shap/explainers/other/_maple.py
@@ -0,0 +1,306 @@
+import numpy as np
+import pandas as pd
+from sklearn.model_selection import train_test_split
+
+from .._explainer import Explainer
+
+
+class Maple(Explainer):
+    """ Simply wraps MAPLE into the common SHAP interface.
+
+    Parameters
+    ----------
+    model : function
+        User supplied function that takes a matrix of samples (# samples x # features) and
+        computes the output of the model for those samples. The output can be a vector
+        (# samples) or a matrix (# samples x # model outputs).
+
+    data : numpy.array
+        The background dataset.
+    """
+
+    def __init__(self, model, data):
+        self.model = model
+
+        if isinstance(data, pd.DataFrame):
+            data = data.values
+        self.data = data
+        self.data_mean = self.data.mean(0)
+
+        out = self.model(data)
+        if len(out.shape) == 1:
+            self.out_dim = 1
+            self.flat_out = True
+        else:
+            self.out_dim = out.shape[1]
+            self.flat_out = False
+
+        X_train, X_valid, y_train, y_valid = train_test_split(data, out, test_size=0.2, random_state=0)
+        self.explainer = MAPLE(X_train, y_train, X_valid, y_valid)
+
+    def attributions(self, X, multiply_by_input=False):
+        """ Compute the MAPLE coef attributions.
+
+        Parameters
+        ----------
+        multiply_by_input : bool
+            If true, this multiplies the learned coefficients by the mean-centered input. This makes these
+            values roughly comparable to SHAP values.
+        """
+        if isinstance(X, pd.DataFrame):
+            X = X.values
+
+        out = [np.zeros(X.shape) for j in range(self.out_dim)]
+        for i in range(X.shape[0]):
+            exp = self.explainer.explain(X[i])["coefs"]
+            out[0][i,:] = exp[1:]
+            if multiply_by_input:
+                out[0][i,:] = out[0][i,:] * (X[i] - self.data_mean)
+
+        return out[0] if self.flat_out else out
+
+
+class TreeMaple(Explainer):
+    """ Simply tree MAPLE into the common SHAP interface.
+
+    Parameters
+    ----------
+    model : function
+        User supplied function that takes a matrix of samples (# samples x # features) and
+        computes the output of the model for those samples. The output can be a vector
+        (# samples) or a matrix (# samples x # model outputs).
+
+    data : numpy.array
+        The background dataset.
+    """
+
+    def __init__(self, model, data):
+        self.model = model
+
+        if str(type(model)).endswith("sklearn.ensemble.gradient_boosting.GradientBoostingRegressor'>"):
+            fe_type = "gbdt"
+        # elif str(type(model)).endswith("sklearn.tree.tree.DecisionTreeClassifier'>"):
+            # pass
+        elif str(type(model)).endswith("sklearn.ensemble.forest.RandomForestRegressor'>"):
+            fe_type = "rf"
+        # elif str(type(model)).endswith("sklearn.ensemble.forest.RandomForestClassifier'>"):
+        #     pass
+        # elif str(type(model)).endswith("xgboost.sklearn.XGBRegressor'>"):
+        #     pass
+        # elif str(type(model)).endswith("xgboost.sklearn.XGBClassifier'>"):
+        #     pass
+        else:
+            raise NotImplementedError("The passed model is not yet supported by TreeMapleExplainer: " + str(type(model)))
+
+        if isinstance(data, pd.DataFrame):
+            data = data.values
+        self.data = data
+        self.data_mean = self.data.mean(0)
+
+        out = self.model.predict(data[0:1])
+        if len(out.shape) == 1:
+            self.out_dim = 1
+            self.flat_out = True
+        else:
+            self.out_dim = self.model.predict(data[0:1]).shape[1]
+            self.flat_out = False
+
+        #_, X_valid, _, y_valid = train_test_split(data, self.model.predict(data), test_size=0.2, random_state=0)
+        preds = self.model.predict(data)
+        self.explainer = MAPLE(data, preds, data, preds, fe=self.model, fe_type=fe_type)
+
+    def attributions(self, X, multiply_by_input=False):
+        """ Compute the MAPLE coef attributions.
+
+        Parameters
+        ----------
+        multiply_by_input : bool
+            If true, this multiplies the learned coefficients by the mean-centered input. This makes these
+            values roughly comparable to SHAP values.
+        """
+        if isinstance(X, pd.DataFrame):
+            X = X.values
+
+        out = [np.zeros(X.shape) for j in range(self.out_dim)]
+        for i in range(X.shape[0]):
+            exp = self.explainer.explain(X[i])["coefs"]
+            out[0][i,:] = exp[1:]
+            if multiply_by_input:
+                out[0][i,:] = out[0][i,:] * (X[i] - self.data_mean)
+
+        return out[0] if self.flat_out else out
+
+
+#################################################
+# The code below was authored by Gregory Plumb and is
+# from: https://github.com/GDPlumb/MAPLE/blob/master/Code/MAPLE.py
+# It has by copied here to allow for benchmark comparisons. Please see
+# the original repo for the latest version, supporting material, and citations.
+#################################################
+
+# Notes:
+# -  Assumes any required data normalization has already been done
+# -  Can pass Y (desired response) instead of MR (model fit to Y) to make fitting MAPLE to datasets easy
+
+import numpy as np
+from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
+from sklearn.linear_model import Ridge
+from sklearn.metrics import mean_squared_error
+
+
+class MAPLE:
+
+    def __init__(self, X_train, MR_train, X_val, MR_val, fe_type = "rf", fe=None, n_estimators = 200, max_features = 0.5, min_samples_leaf = 10, regularization = 0.001):
+
+        # Features and the target model response
+        self.X_train = X_train
+        self.MR_train = MR_train
+        self.X_val = X_val
+        self.MR_val = MR_val
+
+        # Forest Ensemble Parameters
+        self.n_estimators = n_estimators
+        self.max_features = max_features
+        self.min_samples_leaf = min_samples_leaf
+
+        # Local Linear Model Parameters
+        self.regularization = regularization
+
+        # Data parameters
+        num_features = X_train.shape[1]
+        self.num_features = num_features
+        num_train = X_train.shape[0]
+        self.num_train = num_train
+        num_val = X_val.shape[0]
+
+        # Fit a Forest Ensemble to the model response
+        if fe is None:
+            if fe_type == "rf":
+                fe = RandomForestRegressor(n_estimators = n_estimators, min_samples_leaf = min_samples_leaf, max_features = max_features)
+            elif fe_type == "gbrt":
+                fe = GradientBoostingRegressor(n_estimators = n_estimators, min_samples_leaf = min_samples_leaf, max_features = max_features, max_depth = None)
+            else:
+                print("Unknown FE type ", fe)
+                import sys
+                sys.exit(0)
+            fe.fit(X_train, MR_train)
+        else:
+            self.n_estimators = n_estimators = len(fe.estimators_)
+        self.fe = fe
+
+        train_leaf_ids = fe.apply(X_train)
+        self.train_leaf_ids = train_leaf_ids
+
+        val_leaf_ids_list = fe.apply(X_val)
+
+        # Compute the feature importances: Non-normalized @ Root
+        scores = np.zeros(num_features)
+        if fe_type == "rf":
+            for i in range(n_estimators):
+                splits = fe[i].tree_.feature #-2 indicates leaf, index 0 is root
+                if splits[0] != -2:
+                    scores[splits[0]] += fe[i].tree_.impurity[0] #impurity reduction not normalized per tree
+        elif fe_type == "gbrt":
+            for i in range(n_estimators):
+                splits = fe[i, 0].tree_.feature #-2 indicates leaf, index 0 is root
+                if splits[0] != -2:
+                    scores[splits[0]] += fe[i, 0].tree_.impurity[0] #impurity reduction not normalized per tree
+        self.feature_scores = scores
+        mostImpFeats = np.argsort(-scores)
+
+        # Find the number of features to use for MAPLE
+        retain_best = 0
+        rmse_best = np.inf
+        for retain in range(1, num_features + 1):
+
+            # Drop less important features for local regression
+            X_train_p = np.delete(X_train, mostImpFeats[retain:], axis = 1)
+            X_val_p = np.delete(X_val, mostImpFeats[retain:], axis = 1)
+
+            lr_predictions = np.empty([num_val], dtype=float)
+
+            for i in range(num_val):
+
+                weights = self.training_point_weights(val_leaf_ids_list[i])
+
+                # Local linear model
+                lr_model = Ridge(alpha=regularization)
+                lr_model.fit(X_train_p, MR_train, weights)
+                lr_predictions[i] = lr_model.predict(X_val_p[i].reshape(1, -1))
+
+            rmse_curr = np.sqrt(mean_squared_error(lr_predictions, MR_val))
+
+            if rmse_curr < rmse_best:
+                rmse_best = rmse_curr
+                retain_best = retain
+
+        self.retain = retain_best
+        self.X = np.delete(X_train, mostImpFeats[retain_best:], axis = 1)
+
+    def training_point_weights(self, instance_leaf_ids):
+        weights = np.zeros(self.num_train)
+        for i in range(self.n_estimators):
+            # Get the PNNs for each tree (ones with the same leaf_id)
+            PNNs_Leaf_Node = np.where(self.train_leaf_ids[:, i] == instance_leaf_ids[i])[0]
+            if len(PNNs_Leaf_Node) > 0: # SML: added this to fix degenerate cases
+                weights[PNNs_Leaf_Node] += 1.0 / len(PNNs_Leaf_Node)
+        return weights
+
+    def explain(self, x):
+
+        x = x.reshape(1, -1)
+
+        mostImpFeats = np.argsort(-self.feature_scores)
+        x_p = np.delete(x, mostImpFeats[self.retain:], axis = 1)
+
+        curr_leaf_ids = self.fe.apply(x)[0]
+        weights = self.training_point_weights(curr_leaf_ids)
+
+        # Local linear model
+        lr_model = Ridge(alpha = self.regularization)
+        lr_model.fit(self.X, self.MR_train, weights)
+
+        # Get the model coefficients
+        coefs = np.zeros(self.num_features + 1)
+        coefs[0] = lr_model.intercept_
+        coefs[np.sort(mostImpFeats[0:self.retain]) + 1] = lr_model.coef_
+
+        # Get the prediction at this point
+        prediction = lr_model.predict(x_p.reshape(1, -1))
+
+        out = {}
+        out["weights"] = weights
+        out["coefs"] = coefs
+        out["pred"] = prediction
+
+        return out
+
+    def predict(self, X):
+        n = X.shape[0]
+        pred = np.zeros(n)
+        for i in range(n):
+            exp = self.explain(X[i, :])
+            pred[i] = exp["pred"][0]
+        return pred
+
+    # Make the predictions based on the forest ensemble (either random forest or gradient boosted regression tree) instead of MAPLE
+    def predict_fe(self, X):
+        return self.fe.predict(X)
+
+    # Make the predictions based on SILO (no feature selection) instead of MAPLE
+    def predict_silo(self, X):
+        n = X.shape[0]
+        pred = np.zeros(n)
+        for i in range(n): #The contents of this inner loop are similar to explain(): doesn't use the features selected by MAPLE or return as much information
+            x = X[i, :].reshape(1, -1)
+
+            curr_leaf_ids = self.fe.apply(x)[0]
+            weights = self.training_point_weights(curr_leaf_ids)
+
+            # Local linear model
+            lr_model = Ridge(alpha = self.regularization)
+            lr_model.fit(self.X_train, self.MR_train, weights)
+
+            pred[i] = lr_model.predict(x)[0]
+
+        return pred
diff --git a/lib/shap/explainers/other/_random.py b/lib/shap/explainers/other/_random.py
new file mode 100644
index 0000000000000000000000000000000000000000..414b3de546f9b954a1b172398081b8195736e3c0
--- /dev/null
+++ b/lib/shap/explainers/other/_random.py
@@ -0,0 +1,79 @@
+import numpy as np
+
+from shap import links
+from shap.models import Model
+from shap.utils import MaskedModel
+
+from .._explainer import Explainer
+
+
+class Random(Explainer):
+    """ Simply returns random (normally distributed) feature attributions.
+
+    This is only for benchmark comparisons. It supports both fully random attributions and random
+    attributions that are constant across all explanations.
+    """
+    def __init__(self, model, masker, link=links.identity, feature_names=None, linearize_link=True, constant=False, **call_args):
+        super().__init__(model, masker, link=link, linearize_link=linearize_link, feature_names=feature_names)
+
+        if not isinstance(model, Model):
+            self.model = Model(model)
+
+        for arg in call_args:
+            self.__call__.__kwdefaults__[arg] = call_args[arg]
+
+        self.constant = constant
+        self.constant_attributions = None
+
+    def explain_row(self, *row_args, max_evals, main_effects, error_bounds, batch_size, outputs, silent):
+        """ Explains a single row.
+        """
+
+        # build a masked version of the model for the current input sample
+        fm = MaskedModel(self.model, self.masker, self.link, self.linearize_link, *row_args)
+
+        # compute any custom clustering for this row
+        row_clustering = None
+        if getattr(self.masker, "clustering", None) is not None:
+            if isinstance(self.masker.clustering, np.ndarray):
+                row_clustering = self.masker.clustering
+            elif callable(self.masker.clustering):
+                row_clustering = self.masker.clustering(*row_args)
+            else:
+                raise NotImplementedError("The masker passed has a .clustering attribute that is not yet supported by the Permutation explainer!")
+
+        # compute the correct expected value
+        masks = np.zeros(1, dtype=int)
+        outputs = fm(masks, zero_index=0, batch_size=1)
+        expected_value = outputs[0]
+
+        # generate random feature attributions
+        # we produce small values so our explanation errors are similar to a constant function
+        row_values = np.random.randn(*((len(fm),) + outputs.shape[1:])) * 0.001
+
+        return {
+            "values": row_values,
+            "expected_values": expected_value,
+            "mask_shapes": fm.mask_shapes,
+            "main_effects": None,
+            "clustering": row_clustering,
+            "error_std": None,
+            "output_names": self.model.output_names if hasattr(self.model, "output_names") else None
+        }
+
+    # def __call__(self, X):
+    #     start_time = time.time()
+    #     if self.constant:
+    #         if self.constant_attributions is None:
+    #             self.constant_attributions = np.random.randn(X.shape[1])
+    #         return Explanation(np.tile(self.constant_attributions, (X.shape[0],1)), X, compute_time=time.time() - start_time)
+    #     else:
+    #         return Explanation(np.random.randn(*X.shape), X, compute_time=time.time() - start_time)
+
+    # def attributions(self, X):
+    #     if self.constant:
+    #         if self.constant_attributions is None:
+    #             self.constant_attributions = np.random.randn(X.shape[1])
+    #         return np.tile(self.constant_attributions, (X.shape[0],1))
+    #     else:
+    #         return np.random.randn(*X.shape)
diff --git a/lib/shap/explainers/other/_treegain.py b/lib/shap/explainers/other/_treegain.py
new file mode 100644
index 0000000000000000000000000000000000000000..edcd54bb592c9bdcf9fad61f5c1ae2d5344fb208
--- /dev/null
+++ b/lib/shap/explainers/other/_treegain.py
@@ -0,0 +1,30 @@
+import numpy as np
+
+from .._explainer import Explainer
+
+
+class TreeGain(Explainer):
+    """ Simply returns the global gain/gini feature importances for tree models.
+
+    This is only for benchmark comparisons and is not meant to approximate SHAP values.
+    """
+    def __init__(self, model):
+        if str(type(model)).endswith("sklearn.tree.tree.DecisionTreeRegressor'>"):
+            pass
+        elif str(type(model)).endswith("sklearn.tree.tree.DecisionTreeClassifier'>"):
+            pass
+        elif str(type(model)).endswith("sklearn.ensemble.forest.RandomForestRegressor'>"):
+            pass
+        elif str(type(model)).endswith("sklearn.ensemble.forest.RandomForestClassifier'>"):
+            pass
+        elif str(type(model)).endswith("xgboost.sklearn.XGBRegressor'>"):
+            pass
+        elif str(type(model)).endswith("xgboost.sklearn.XGBClassifier'>"):
+            pass
+        else:
+            raise NotImplementedError("The passed model is not yet supported by TreeGainExplainer: " + str(type(model)))
+        assert hasattr(model, "feature_importances_"), "The passed model does not have a feature_importances_ attribute!"
+        self.model = model
+
+    def attributions(self, X):
+        return np.tile(self.model.feature_importances_, (X.shape[0], 1))
diff --git a/lib/shap/explainers/other/_ubjson.py b/lib/shap/explainers/other/_ubjson.py
new file mode 100644
index 0000000000000000000000000000000000000000..4536ce282f60b0bb9c96636dca688891352008d3
--- /dev/null
+++ b/lib/shap/explainers/other/_ubjson.py
@@ -0,0 +1,199 @@
+"""This is an incomplete implementation of the UBJSON specification. Expected is a readable file pointer to a UBJSON file.
+Things that are not implemented:
+- High precision numbers
+- Optimized arrays with type & count
+- nested arrays, so arrays in arrays are not supported, objects in arrays are supported
+"""
+import struct
+
+import numpy as np
+
+
+def b(s):
+    return isinstance(s, str) and s.encode('latin1') or s
+
+NOOP = b('N')
+NULL = b('Z')
+FALSE = b('F')
+TRUE = b('T')
+INT8 = b('i')
+UINT8 = b('U')
+INT16 = b('I')
+INT32 = b('l')
+INT64 = b('L')
+FLOAT32 = b('d')
+FLOAT64 = b('D')
+CHAR = b('C')
+STRING = b('S')
+HIDEF = b('H')
+ARRAY_OPEN = b('[')
+ARRAY_CLOSE = b(']')
+OBJECT_OPEN = b('{')
+OBJECT_CLOSE = b('}')
+
+INTEGERS = [INT8, UINT8, INT16, INT32, INT64]
+FLOATS = [FLOAT32, FLOAT64]
+
+type_sizes = {
+    INT8: 1,
+    UINT8: 1,
+    INT16: 2,
+    INT32: 4,
+    INT64: 8,
+    FLOAT32: 4,
+    FLOAT64: 8,
+    # todo: maybe add high-precision number
+    CHAR: 1,
+}
+
+struct_mapping = {
+    INT8: 'b',
+    UINT8: 'B',
+    INT16: 'h',
+    INT32: 'i',
+    INT64: 'q',
+    FLOAT32: 'f',
+    FLOAT64: 'd',
+    CHAR: 's',
+}
+
+numpy_type_mapping = {
+    INT8: np.int8,
+    UINT8: np.uint8,
+    INT16: np.int16,
+    INT32: np.int32,
+    INT64: np.int64,
+    FLOAT32: np.float32,
+    FLOAT64: np.float64,
+}
+
+
+objects = [OBJECT_OPEN, OBJECT_CLOSE]
+arrays = [ARRAY_CLOSE, ARRAY_OPEN]
+
+# Can decode optimized array
+# [[][#][i][5] // An array of 5 elements.
+#     [d][29.97]
+#     [d][31.13]
+#     [d][67.0]
+#     [d][2.113]
+#     [d][23.8889]
+# // No end marker since a count was specified.
+
+# AND
+
+# [[][$][d][#][i][5] // An array of 5 float32 elements.
+#     [29.97] // Value type is known, so type markers are omitted.
+#     [31.13]
+#     [67.0]
+#     [2.113]
+#     [23.8889]
+# // No end marker since a count was specified.
+def _decode_array_optimized(fp):
+    tag = fp.read(1)
+    # optimized array with count
+    if tag == b"#":
+        array_length_indicator = fp.read(1)
+        array_length_indicator_type = struct_mapping[array_length_indicator]
+        array_length = type_sizes[array_length_indicator]
+        array_length = struct.unpack(f'>{array_length_indicator_type}', fp.read(array_length))[0]
+
+        array = []
+        for _ in range(array_length):
+            tag = fp.read(1)
+            element = __decode_element(tag, fp)
+            array.append(element)
+        return array
+    # optimized array with type & count
+    elif tag == b"$":
+        value_type_byte = fp.read(1)
+        value_type_length = type_sizes[value_type_byte]
+        tag = fp.read(1)
+        array_type_byte = fp.read(1)
+        array_type_length = type_sizes[array_type_byte]
+        array_type_prefix = struct_mapping[array_type_byte]
+        array_length_bytes = fp.read(array_type_length)
+        array_length = struct.unpack(f'>{array_type_prefix}', array_length_bytes)[0]
+        if array_length == 0:
+            return list()
+        buffer = fp.read(array_length * value_type_length)
+        return list(struct.unpack('>' + f'{struct_mapping[value_type_byte]}' * array_length, buffer))
+    else:
+        raise ValueError("Expected optimized array but got received bytes of unoptimized array.")
+
+def _decode_object(tag, fp):
+    result_dict = dict()
+    if tag == OBJECT_OPEN:
+        key_type = None
+        while key_type != OBJECT_CLOSE:
+            if key_type is None:
+                key_type = fp.read(1)
+                # case for empty object
+                if key_type == OBJECT_CLOSE:
+                    return {}
+            if key_type == OBJECT_OPEN:
+                return _decode_object(key_type, fp)
+            key, value = _decode_simple_key_value_pair(fp, key_type)
+            if key == '}':
+                return result_dict
+            result_dict[key] = value
+            key_type = fp.read(1)
+    return result_dict
+
+def __decode_element(tag, fp):
+    if (element_type_length := type_sizes.get(tag)) is not None:
+        value_bytes = fp.read(element_type_length)
+        value_struct_prefix = struct_mapping[tag]
+        return struct.unpack(f'>{value_struct_prefix}', value_bytes)[0]
+    elif tag == STRING:
+        string_length_type = fp.read(1)
+        length = __decode_element(string_length_type, fp)
+        string_bytes = fp.read(length)
+        return string_bytes.decode("utf-8")
+    elif tag == OBJECT_OPEN:
+        return _decode_object(tag, fp)
+    else:
+        raise ValueError(f"Expected type size for {tag} but got {element_type_length}")
+
+# Can decode
+# [i][3][lat][d][29.976]
+def _decode_simple_key_value_pair(fp, key_type):
+    if key_type in type_sizes:
+        length_of_key = __decode_element(key_type, fp)
+        key_to_decode = fp.read(length_of_key)
+        key = key_to_decode.decode("utf-8")
+        value_type_byte = fp.read(1)
+        if value_type_byte in type_sizes:
+            value = __decode_element(value_type_byte, fp)
+            return key, value
+        elif value_type_byte == STRING:
+            # todo: check for string, high precision number, array, object here
+            # value_type_byte = __decode_element(value_type_byte, fp)
+            value = __decode_element(value_type_byte, fp)
+            return key, str(value)
+        elif value_type_byte == OBJECT_OPEN:
+            value = _decode_object(value_type_byte, fp)
+            return key, value
+        elif value_type_byte == OBJECT_CLOSE:
+            return key, {}
+        elif value_type_byte == ARRAY_OPEN:
+            value = _decode_array_optimized(fp)
+            return key, value
+        elif value_type_byte == ARRAY_CLOSE:
+            return key, []
+        elif value_type_byte == b'' and key == '}':
+            return key, None
+        else:
+            raise ValueError(f"Unmatched value type for {value_type_byte}.")
+    else:
+        raise ValueError(f"Expected type size for {key_type} but could not find any.")
+
+def decode_ubjson_buffer(fp):
+    fp.read(1)
+    complete_dict = dict()
+    key_type = fp.read(1)
+    while key_type != b'' and key_type != OBJECT_CLOSE:
+        key, value = _decode_simple_key_value_pair(fp, key_type)
+        complete_dict[key] = value
+        key_type = fp.read(1)
+    return complete_dict
diff --git a/lib/shap/explainers/pytree.py b/lib/shap/explainers/pytree.py
new file mode 100644
index 0000000000000000000000000000000000000000..2802849b781a6c79b46deea6aeeb081fc50cb3a6
--- /dev/null
+++ b/lib/shap/explainers/pytree.py
@@ -0,0 +1,448 @@
+"""
+This module is a pure python implementation of Tree SHAP.
+It is primarily for illustration since it is slower than the 'tree'
+module which uses a compiled C++ implementation.
+"""
+import numpy as np
+import pandas as pd
+
+#import numba
+from ..utils._exceptions import ExplainerError
+
+# class TreeExplainer(Explainer):
+#     def __init__(self, model, **kwargs):
+#         self.model_type = "internal"
+
+#         if str(type(model)).endswith("sklearn.ensemble.forest.RandomForestRegressor'>"):
+#             self.trees = [Tree(e.tree_) for e in model.estimators_]
+#         elif str(type(model)).endswith("sklearn.ensemble.forest.RandomForestClassifier'>"):
+#             self.trees = [Tree(e.tree_, normalize=True) for e in model.estimators_]
+#         elif str(type(model)).endswith("xgboost.core.Booster'>"):
+#             self.model_type = "xgboost"
+#             self.trees = model
+#         elif str(type(model)).endswith("lightgbm.basic.Booster'>"):
+#             self.model_type = "lightgbm"
+#             self.trees = model
+#         else:
+#             raise Exception("Model type not supported by TreeExplainer: " + str(type(model)))
+
+#     def shap_values(self, X, tree_limit=-1, **kwargs):
+
+#         # shortcut using the C++ version of Tree SHAP in XGBoost and LightGBM
+#         # these are about 10x faster than the numba jit'd implementation below...
+#         if self.model_type == "xgboost":
+#             if not str(type(X)).endswith("xgboost.core.DMatrix'>"):
+#                 X = xgboost.DMatrix(X)
+#             if tree_limit==-1:
+#                 tree_limit=0
+#             return self.trees.predict(X, ntree_limit=tree_limit, pred_contribs=True)
+#         elif self.model_type == "lightgbm":
+#             return self.trees.predict(X, num_iteration=tree_limit, pred_contrib=True)
+
+#         # convert dataframes
+#         if isinstance(X, (pd.Series, pd.DataFrame)):
+#             X = X.values
+
+#         assert isinstance(X, np.ndarray), "Unknown instance type: " + str(type(X))
+#         assert len(X.shape) == 1 or len(X.shape) == 2, "Instance must have 1 or 2 dimensions!"
+
+#         n_outputs = self.trees[0].values.shape[1]
+
+#         # single instance
+#         if len(X.shape) == 1:
+
+#             phi = np.zeros((X.shape[0] + 1, n_outputs))
+#             x_missing = np.zeros(X.shape[0], dtype=bool)
+#             for t in self.trees:
+#                 self.tree_shap(t, X, x_missing, phi)
+#             phi /= len(self.trees)
+
+#             if n_outputs == 1:
+#                 return phi[:, 0]
+#             else:
+#                 return [phi[:, i] for i in range(n_outputs)]
+
+#         elif len(X.shape) == 2:
+#             phi = np.zeros((X.shape[0], X.shape[1] + 1, n_outputs))
+#             x_missing = np.zeros(X.shape[1], dtype=bool)
+#             for i in range(X.shape[0]):
+#                 for t in self.trees:
+#                     self.tree_shap(t, X[i,:], x_missing, phi[i,:,:])
+#             phi /= len(self.trees)
+
+#             if n_outputs == 1:
+#                 return phi[:, :, 0]
+#             else:
+#                 return [phi[:, :, i] for i in range(n_outputs)]
+
+#     def shap_interaction_values(self, X, tree_limit=-1, **kwargs):
+
+#         # shortcut using the C++ version of Tree SHAP in XGBoost and LightGBM
+#         if self.model_type == "xgboost":
+#             if not str(type(X)).endswith("xgboost.core.DMatrix'>"):
+#                 X = xgboost.DMatrix(X)
+#             if tree_limit==-1:
+#                 tree_limit=0
+#             return self.trees.predict(X, ntree_limit=tree_limit, pred_interactions=True)
+#         else:
+#             raise Exception("Interaction values not yet supported for model type: " + str(type(X)))
+
+#     def tree_shap(self, tree, x, x_missing, phi, condition=0, condition_feature=0):
+
+#         # start the recursive algorithm
+#         shap._cext.tree_shap(
+#             tree.max_depth, tree.children_left, tree.children_right, tree.children_default, tree.features,
+#             tree.thresholds, tree.values, tree.node_sample_weight,
+#             x, x_missing, phi, condition, condition_feature
+#         )
+
+
+# class Tree:
+#     def __init__(self, children_left, children_right, children_default, feature, threshold, value, node_sample_weight):
+#         self.children_left = children_left.astype(np.int32)
+#         self.children_right = children_right.astype(np.int32)
+#         self.children_default = children_default.astype(np.int32)
+#         self.features = feature.astype(np.int32)
+#         self.thresholds = threshold
+#         self.values = value
+#         self.node_sample_weight = node_sample_weight
+
+#         # we compute the expectations to make sure they follow the SHAP logic
+#         self.max_depth = shap._cext.compute_expectations(
+#             self.children_left, self.children_right, self.node_sample_weight,
+#             self.values
+#         )
+
+#     def __init__(self, tree, normalize=False):
+#         if str(type(tree)).endswith("'sklearn.tree._tree.Tree'>"):
+#             self.children_left = tree.children_left.astype(np.int32)
+#             self.children_right = tree.children_right.astype(np.int32)
+#             self.children_default = self.children_left # missing values not supported in sklearn
+#             self.features = tree.feature.astype(np.int32)
+#             self.thresholds = tree.threshold.astype(np.float64)
+#             if normalize:
+#                 self.values = (tree.value[:,0,:].T / tree.value[:,0,:].sum(1)).T
+#             else:
+#                 self.values = tree.value[:,0,:]
+
+
+#             self.node_sample_weight = tree.weighted_n_node_samples.astype(np.float64)
+
+#             # we compute the expectations to make sure they follow the SHAP logic
+#             self.max_depth = shap._cext.compute_expectations(
+#                 self.children_left, self.children_right, self.node_sample_weight,
+#                 self.values
+#             )
+
+
+class TreeExplainer:
+    """ A pure Python (slow) implementation of Tree SHAP.
+    """
+
+    def __init__(self, model, **kwargs):
+        self.model_type = "internal"
+
+        if str(type(model)).endswith("sklearn.ensemble.forest.RandomForestRegressor'>"):
+            self.trees = [Tree(e.tree_) for e in model.estimators_]
+        elif str(type(model)).endswith("sklearn.ensemble.forest.RandomForestClassifier'>"):
+            self.trees = [Tree(e.tree_, normalize=True) for e in model.estimators_]
+        elif str(type(model)).endswith("xgboost.core.Booster'>"):
+            self.model_type = "xgboost"
+            self.trees = model
+        elif str(type(model)).endswith("lightgbm.basic.Booster'>"):
+            self.model_type = "lightgbm"
+            self.trees = model
+        else:
+            raise ExplainerError("Model type not supported by TreeExplainer: " + str(type(model)))
+
+        if self.model_type == "internal":
+            # Preallocate space for the unique path data
+            maxd = np.max([t.max_depth for t in self.trees]) + 2
+            s = (maxd * (maxd + 1)) // 2
+            self.feature_indexes = np.zeros(s, dtype=np.int32)
+            self.zero_fractions = np.zeros(s, dtype=np.float64)
+            self.one_fractions = np.zeros(s, dtype=np.float64)
+            self.pweights = np.zeros(s, dtype=np.float64)
+
+    def shap_values(self, X, tree_limit=-1, **kwargs):
+
+        # shortcut using the C++ version of Tree SHAP in XGBoost and LightGBM
+        # these are about 10x faster than the numba jit'd implementation below...
+        if self.model_type == "xgboost":
+            import xgboost
+            if not str(type(X)).endswith("xgboost.core.DMatrix'>"):
+                X = xgboost.DMatrix(X)
+            if tree_limit==-1:
+                tree_limit=0
+            return self.trees.predict(X, ntree_limit=tree_limit, pred_contribs=True)
+        elif self.model_type == "lightgbm":
+            return self.trees.predict(X, num_iteration=tree_limit, pred_contrib=True)
+
+        # convert dataframes
+        if isinstance(X, (pd.Series, pd.DataFrame)):
+            X = X.values
+
+        assert isinstance(X, np.ndarray), "Unknown instance type: " + str(type(X))
+        assert len(X.shape) == 1 or len(X.shape) == 2, "Instance must have 1 or 2 dimensions!"
+
+        n_outputs = self.trees[0].values.shape[1]
+
+        # single instance
+        if len(X.shape) == 1:
+
+            phi = np.zeros(X.shape[0] + 1, n_outputs)
+            x_missing = np.zeros(X.shape[0], dtype=bool)
+            for t in self.trees:
+                self.tree_shap(t, X, x_missing, phi)
+            phi /= len(self.trees)
+
+            if n_outputs == 1:
+                return phi[:, 0]
+            else:
+                return [phi[:, i] for i in range(n_outputs)]
+
+        elif len(X.shape) == 2:
+            phi = np.zeros((X.shape[0], X.shape[1] + 1, n_outputs))
+            x_missing = np.zeros(X.shape[1], dtype=bool)
+            for i in range(X.shape[0]):
+                for t in self.trees:
+                    self.tree_shap(t, X[i,:], x_missing, phi[i,:,:])
+            phi /= len(self.trees)
+
+            if n_outputs == 1:
+                return phi[:, :, 0]
+            else:
+                return [phi[:, :, i] for i in range(n_outputs)]
+
+    def shap_interaction_values(self, X, tree_limit=-1, **kwargs):
+
+        # shortcut using the C++ version of Tree SHAP in XGBoost and LightGBM
+        if self.model_type == "xgboost":
+            import xgboost
+            if not str(type(X)).endswith("xgboost.core.DMatrix'>"):
+                X = xgboost.DMatrix(X)
+            if tree_limit==-1:
+                tree_limit=0
+            return self.trees.predict(X, ntree_limit=tree_limit, pred_interactions=True)
+        else:
+            raise NotImplementedError("Interaction values not yet supported for model type: " + str(type(X)))
+
+    def tree_shap(self, tree, x, x_missing, phi, condition=0, condition_feature=0):
+
+        # update the bias term, which is the last index in phi
+        # (note the paper has this as phi_0 instead of phi_M)
+        if condition == 0:
+            phi[-1,:] += tree.values[0,:]
+
+        # start the recursive algorithm
+        tree_shap_recursive(
+            tree.children_left, tree.children_right, tree.children_default, tree.features,
+            tree.thresholds, tree.values, tree.node_sample_weight,
+            x, x_missing, phi, 0, 0, self.feature_indexes, self.zero_fractions, self.one_fractions, self.pweights,
+            1, 1, -1, condition, condition_feature, 1
+        )
+
+
+# extend our decision path with a fraction of one and zero extensions
+#@numba.jit(nopython=True, nogil=True)
+def extend_path(feature_indexes, zero_fractions, one_fractions, pweights,
+                unique_depth, zero_fraction, one_fraction, feature_index):
+    feature_indexes[unique_depth] = feature_index
+    zero_fractions[unique_depth] = zero_fraction
+    one_fractions[unique_depth] = one_fraction
+    if unique_depth == 0:
+        pweights[unique_depth] = 1.
+    else:
+        pweights[unique_depth] = 0.
+
+    for i in range(unique_depth - 1, -1, -1):
+        pweights[i + 1] += one_fraction * pweights[i] * (i + 1.) / (unique_depth + 1.)
+        pweights[i] = zero_fraction * pweights[i] * (unique_depth - i) / (unique_depth + 1.)
+
+# undo a previous extension of the decision path
+#@numba.jit(nopython=True, nogil=True)
+def unwind_path(feature_indexes, zero_fractions, one_fractions, pweights,
+                unique_depth, path_index):
+    one_fraction = one_fractions[path_index]
+    zero_fraction = zero_fractions[path_index]
+    next_one_portion = pweights[unique_depth]
+
+    for i in range(unique_depth - 1, -1, -1):
+        if one_fraction != 0.:
+            tmp = pweights[i]
+            pweights[i] = next_one_portion * (unique_depth + 1.) / ((i + 1.) * one_fraction)
+            next_one_portion = tmp - pweights[i] * zero_fraction * (unique_depth - i) / (unique_depth + 1.)
+        else:
+            pweights[i] = (pweights[i] * (unique_depth + 1)) / (zero_fraction * (unique_depth - i))
+
+    for i in range(path_index, unique_depth):
+        feature_indexes[i] = feature_indexes[i + 1]
+        zero_fractions[i] = zero_fractions[i + 1]
+        one_fractions[i] = one_fractions[i + 1]
+
+# determine what the total permutation weight would be if
+# we unwound a previous extension in the decision path
+#@numba.jit(nopython=True, nogil=True)
+def unwound_path_sum(feature_indexes, zero_fractions, one_fractions, pweights, unique_depth, path_index):
+    one_fraction = one_fractions[path_index]
+    zero_fraction = zero_fractions[path_index]
+    next_one_portion = pweights[unique_depth]
+    total = 0
+
+    for i in range(unique_depth - 1, -1, -1):
+        if one_fraction != 0.:
+            tmp = next_one_portion * (unique_depth + 1.) / ((i + 1.) * one_fraction)
+            total += tmp
+            next_one_portion = pweights[i] - tmp * zero_fraction * ((unique_depth - i) / (unique_depth + 1.))
+        else:
+            total += (pweights[i] / zero_fraction) / ((unique_depth - i) / (unique_depth + 1.))
+
+    return total
+
+
+class Tree:
+    # def __init__(self, children_left, children_right, children_default, feature, threshold, value, node_sample_weight):
+    #     self.children_left = children_left.astype(np.int32)
+    #     self.children_right = children_right.astype(np.int32)
+    #     self.children_default = children_default.astype(np.int32)
+    #     self.features = feature.astype(np.int32)
+    #     self.thresholds = threshold
+    #     self.values = value
+    #     self.node_sample_weight = node_sample_weight
+
+    #     self.max_depth = compute_expectations(
+    #         self.children_left, self.children_right, self.node_sample_weight,
+    #         self.values, 0
+    #     )
+
+    def __init__(self, tree, normalize=False):
+        if str(type(tree)).endswith("'sklearn.tree._tree.Tree'>"):
+            self.children_left = tree.children_left.astype(np.int32)
+            self.children_right = tree.children_right.astype(np.int32)
+            self.children_default = self.children_left # missing values not supported in sklearn
+            self.features = tree.feature.astype(np.int32)
+            self.thresholds = tree.threshold.astype(np.float64)
+            if normalize:
+                self.values = (tree.value[:,0,:].T / tree.value[:,0,:].sum(1)).T
+            else:
+                self.values = tree.value[:,0,:]
+
+
+            self.node_sample_weight = tree.weighted_n_node_samples.astype(np.float64)
+
+            # we recompute the expectations to make sure they follow the SHAP logic
+            self.max_depth = compute_expectations(
+                self.children_left, self.children_right, self.node_sample_weight,
+                self.values, 0
+            )
+
+#@numba.jit(nopython=True)
+def compute_expectations(children_left, children_right, node_sample_weight, values, i, depth=0):
+    if children_right[i] == -1:
+        values[i,:] = values[i,:]
+        return 0
+    else:
+        li = children_left[i]
+        ri = children_right[i]
+        depth_left = compute_expectations(children_left, children_right, node_sample_weight, values, li, depth + 1)
+        depth_right = compute_expectations(children_left, children_right, node_sample_weight, values, ri, depth + 1)
+        left_weight = node_sample_weight[li]
+        right_weight = node_sample_weight[ri]
+        v = (left_weight * values[li,:] + right_weight * values[ri,:]) / (left_weight + right_weight)
+        values[i,:] = v
+        return max(depth_left, depth_right) + 1
+
+# recursive computation of SHAP values for a decision tree
+#@numba.jit(nopython=True, nogil=True)
+def tree_shap_recursive(children_left, children_right, children_default, features, thresholds, values, node_sample_weight,
+                        x, x_missing, phi, node_index, unique_depth, parent_feature_indexes,
+                        parent_zero_fractions, parent_one_fractions, parent_pweights, parent_zero_fraction,
+                        parent_one_fraction, parent_feature_index, condition, condition_feature, condition_fraction):
+
+    # stop if we have no weight coming down to us
+    if condition_fraction == 0.:
+        return
+
+    # extend the unique path
+    feature_indexes = parent_feature_indexes[unique_depth + 1:]
+    feature_indexes[:unique_depth + 1] = parent_feature_indexes[:unique_depth + 1]
+    zero_fractions = parent_zero_fractions[unique_depth + 1:]
+    zero_fractions[:unique_depth + 1] = parent_zero_fractions[:unique_depth + 1]
+    one_fractions = parent_one_fractions[unique_depth + 1:]
+    one_fractions[:unique_depth + 1] = parent_one_fractions[:unique_depth + 1]
+    pweights = parent_pweights[unique_depth + 1:]
+    pweights[:unique_depth + 1] = parent_pweights[:unique_depth + 1]
+
+    if condition == 0 or condition_feature != parent_feature_index:
+        extend_path(
+            feature_indexes, zero_fractions, one_fractions, pweights,
+            unique_depth, parent_zero_fraction, parent_one_fraction, parent_feature_index
+        )
+
+    split_index = features[node_index]
+
+    # leaf node
+    if children_right[node_index] == -1:
+        for i in range(1, unique_depth+1):
+            w = unwound_path_sum(feature_indexes, zero_fractions, one_fractions, pweights, unique_depth, i)
+            phi[feature_indexes[i],:] += w * (one_fractions[i] - zero_fractions[i]) * values[node_index,:] * condition_fraction
+
+    # internal node
+    else:
+        # find which branch is "hot" (meaning x would follow it)
+        hot_index = 0
+        cleft = children_left[node_index]
+        cright = children_right[node_index]
+        if x_missing[split_index] == 1:
+            hot_index = children_default[node_index]
+        elif x[split_index] < thresholds[node_index]:
+            hot_index = cleft
+        else:
+            hot_index = cright
+        cold_index = (cright if hot_index == cleft else cleft)
+        w = node_sample_weight[node_index]
+        hot_zero_fraction = node_sample_weight[hot_index] / w
+        cold_zero_fraction = node_sample_weight[cold_index] / w
+        incoming_zero_fraction = 1.
+        incoming_one_fraction = 1.
+
+        # see if we have already split on this feature,
+        # if so we undo that split so we can redo it for this node
+        path_index = 0
+        while (path_index <= unique_depth):
+            if feature_indexes[path_index] == split_index:
+                break
+            path_index += 1
+
+        if path_index != unique_depth + 1:
+            incoming_zero_fraction = zero_fractions[path_index]
+            incoming_one_fraction = one_fractions[path_index]
+            unwind_path(feature_indexes, zero_fractions, one_fractions, pweights, unique_depth, path_index)
+            unique_depth -= 1
+
+        # divide up the condition_fraction among the recursive calls
+        hot_condition_fraction = condition_fraction
+        cold_condition_fraction = condition_fraction
+        if condition > 0 and split_index == condition_feature:
+            cold_condition_fraction = 0.
+            unique_depth -= 1
+        elif condition < 0 and split_index == condition_feature:
+            hot_condition_fraction *= hot_zero_fraction
+            cold_condition_fraction *= cold_zero_fraction
+            unique_depth -= 1
+
+        tree_shap_recursive(
+            children_left, children_right, children_default, features, thresholds, values, node_sample_weight,
+            x, x_missing, phi, hot_index, unique_depth + 1,
+            feature_indexes, zero_fractions, one_fractions, pweights,
+            hot_zero_fraction * incoming_zero_fraction, incoming_one_fraction,
+            split_index, condition, condition_feature, hot_condition_fraction
+        )
+
+        tree_shap_recursive(
+            children_left, children_right, children_default, features, thresholds, values, node_sample_weight,
+            x, x_missing, phi, cold_index, unique_depth + 1,
+            feature_indexes, zero_fractions, one_fractions, pweights,
+            cold_zero_fraction * incoming_zero_fraction, 0,
+            split_index, condition, condition_feature, cold_condition_fraction
+        )
diff --git a/lib/shap/explainers/tf_utils.py b/lib/shap/explainers/tf_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..6c66b1f1181374efdc5a262049ef6f8a1fa9c0ae
--- /dev/null
+++ b/lib/shap/explainers/tf_utils.py
@@ -0,0 +1,98 @@
+import warnings
+
+tf = None
+
+
+def _import_tf():
+    """ Tries to import tensorflow.
+    """
+    global tf
+    if tf is None:
+        import tensorflow as tf
+
+def _get_session(session):
+    """ Common utility to get the session for the tensorflow-based explainer.
+
+    Parameters
+    ----------
+    explainer : Explainer
+
+        One of the tensorflow-based explainers.
+
+    session : tf.compat.v1.Session
+
+        An optional existing session.
+    """
+    _import_tf()
+    # if we are not given a session find a default session
+    if session is None:
+        try:
+            session = tf.compat.v1.keras.backend.get_session()
+        except Exception:
+            session = tf.keras.backend.get_session()
+    return tf.get_default_session() if session is None else session
+
+def _get_graph(explainer):
+    """ Common utility to get the graph for the tensorflow-based explainer.
+
+    Parameters
+    ----------
+    explainer : Explainer
+
+        One of the tensorflow-based explainers.
+    """
+    _import_tf()
+    if not tf.executing_eagerly():
+        return explainer.session.graph
+    else:
+        from tensorflow.python.keras import backend
+        graph = backend.get_graph()
+        return graph
+
+def _get_model_inputs(model):
+    """ Common utility to determine the model inputs.
+
+    Parameters
+    ----------
+    model : Tensorflow Keras model or tuple
+
+        The tensorflow model or tuple.
+    """
+    _import_tf()
+    if str(type(model)).endswith("keras.engine.sequential.Sequential'>") or \
+        str(type(model)).endswith("keras.models.Sequential'>") or \
+        str(type(model)).endswith("keras.engine.training.Model'>") or \
+        isinstance(model, tf.keras.Model):
+        return model.inputs
+    if str(type(model)).endswith("tuple'>"):
+        return model[0]
+
+    emsg = f"{type(model)} is not currently a supported model type!"
+    raise ValueError(emsg)
+
+
+def _get_model_output(model):
+    """ Common utility to determine the model output.
+
+    Parameters
+    ----------
+    model : Tensorflow Keras model or tuple
+
+        The tensorflow model or tuple.
+    """
+    _import_tf()
+    if str(type(model)).endswith("keras.engine.sequential.Sequential'>") or \
+        str(type(model)).endswith("keras.models.Sequential'>") or \
+        str(type(model)).endswith("keras.engine.training.Model'>") or \
+        isinstance(model, tf.keras.Model):
+        if len(model.layers[-1]._inbound_nodes) == 0:
+            if len(model.outputs) > 1:
+                warnings.warn("Only one model output supported.")
+            return model.outputs[0]
+        else:
+            return model.layers[-1].output
+    if str(type(model)).endswith("tuple'>"):
+        return model[1]
+
+    emsg = f"{type(model)} is not currently a supported model type!"
+    raise ValueError(emsg)
diff --git a/lib/shap/links.py b/lib/shap/links.py
new file mode 100644
index 0000000000000000000000000000000000000000..3325f698855f08ddc0f2a082383b285e5fd461e3
--- /dev/null
+++ b/lib/shap/links.py
@@ -0,0 +1,23 @@
+import numba
+import numpy as np
+
+
+@numba.njit
+def identity(x):
+    """ A no-op link function.
+    """
+    return x
+@numba.njit
+def _identity_inverse(x):
+    return x
+identity.inverse = _identity_inverse
+
+@numba.njit
+def logit(x):
+    """ A logit link function useful for going from probability units to log-odds units.
+    """
+    return np.log(x/(1-x))
+@numba.njit
+def _logit_inverse(x):
+    return 1/(1+np.exp(-x))
+logit.inverse = _logit_inverse
diff --git a/lib/shap/maskers/__init__.py b/lib/shap/maskers/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..2ddf5e2397c29036e3bff5a216f2b70fa325bab5
--- /dev/null
+++ b/lib/shap/maskers/__init__.py
@@ -0,0 +1,21 @@
+from ._composite import Composite
+from ._fixed import Fixed
+from ._fixed_composite import FixedComposite
+from ._image import Image
+from ._masker import Masker
+from ._output_composite import OutputComposite
+from ._tabular import Impute, Independent, Partition
+from ._text import Text
+
+__all__ = [
+    "Composite",
+    "Fixed",
+    "FixedComposite",
+    "Image",
+    "Masker",
+    "OutputComposite",
+    "Impute",
+    "Independent",
+    "Partition",
+    "Text",
+]
diff --git a/lib/shap/maskers/_composite.py b/lib/shap/maskers/_composite.py
new file mode 100644
index 0000000000000000000000000000000000000000..1f9b132e19fc465fdba7e871b116396381a7b7ce
--- /dev/null
+++ b/lib/shap/maskers/_composite.py
@@ -0,0 +1,144 @@
+import types
+
+from ..utils._exceptions import InvalidMaskerError
+from ._masker import Masker
+
+
+class Composite(Masker):
+    """ This merges several maskers for different inputs together into a single composite masker.
+
+    This is not yet implemented.
+    """
+
+    def __init__(self, *maskers):
+
+        self.maskers = maskers
+
+        self.arg_counts = []
+        self.total_args = 0
+        self.text_data = False
+        self.image_data = False
+        all_have_clustering = True
+        for masker in self.maskers:
+            all_args = masker.__call__.__code__.co_argcount
+
+            if masker.__call__.__defaults__ is not None: # in case there are no kwargs
+                kwargs = len(masker.__call__.__defaults__)
+            else:
+                kwargs = 0
+            num_args = all_args - kwargs - 2
+            self.arg_counts.append(num_args) # -2 is for the self and mask arg
+            self.total_args += num_args
+
+            if not hasattr(masker, "clustering"):
+                all_have_clustering = False
+
+            self.text_data = self.text_data or getattr(masker, "text_data", False)
+            self.image_data = self.image_data or getattr(masker, "image_data", False)
+
+        if all_have_clustering:
+            self.clustering = types.MethodType(joint_clustering, self)
+
+    def shape(self, *args):
+        """ Compute the shape of this masker as the sum of all the sub masker shapes.
+        """
+        assert len(args) == self.total_args, "The number of passed args is incorrect!"
+
+        rows = None
+        cols = 0
+        pos = 0
+        for i, masker in enumerate(self.maskers):
+            if callable(masker.shape):
+                shape = masker.shape(*args[pos:pos+self.arg_counts[i]])
+            else:
+                shape = masker.shape
+            if rows is None:
+                rows = shape[0]
+            else:
+                assert shape[1] == 0 or rows == shape[0], "All submaskers of a Composite masker must return the same number of rows!"
+            cols += shape[1]
+            pos += self.arg_counts[i]
+        return rows, cols
+
+    def mask_shapes(self, *args):
+        """ The shape of the masks we expect.
+        """
+        out = []
+        pos = 0
+        for i, masker in enumerate(self.maskers):
+            out.extend(masker.mask_shapes(*args[pos:pos+self.arg_counts[i]]))
+        return out
+
+    def data_transform(self, *args):
+        """ Transform the argument
+        """
+        arg_pos = 0
+        out = []
+        for i, masker in enumerate(self.maskers):
+            masker_args = args[arg_pos:arg_pos+self.arg_counts[i]]
+            if hasattr(masker, "data_transform"):
+                out.extend(masker.data_transform(*masker_args))
+            else:
+                out.extend(masker_args)
+            arg_pos += self.arg_counts[i]
+
+        return out
+
+    def __call__(self, mask, *args):
+        mask = self._standardize_mask(mask, *args)
+        assert len(args) == self.total_args, "The number of passed args is incorrect!"
+
+        # compute all the shapes and confirm they align
+        arg_pos = 0
+        shapes = []
+        num_rows = None
+        for i, masker in enumerate(self.maskers):
+            masker_args = args[arg_pos:arg_pos+self.arg_counts[i]]
+            if callable(masker.shape):
+                shapes.append(masker.shape(*masker_args))
+            else:
+                shapes.append(masker.shape)
+
+            if num_rows is None:
+                num_rows = shapes[-1][0]
+            elif num_rows == 1 and shapes[-1][0] is not None:
+                num_rows = shapes[-1][0]
+
+            if shapes[-1][0] != num_rows and shapes[-1][0] != 1 and shapes[-1][0] is not None:
+                raise InvalidMaskerError("The composite masker can only join together maskers with a compatible number of background rows!")
+            arg_pos += self.arg_counts[i]
+
+        # call all the submaskers and combine their outputs
+        arg_pos = 0
+        mask_pos = 0
+        masked = []
+        for i, masker in enumerate(self.maskers):
+            masker_args = args[arg_pos:arg_pos+self.arg_counts[i]]
+            masked_out = masker(mask[mask_pos:mask_pos+shapes[i][1]], *masker_args)
+            if num_rows > 1 and (shapes[i][0] == 1 or shapes[i][0] is None):
+                masked_out = tuple([m[0] for _ in range(num_rows)] for m in masked_out)
+            masked.extend(masked_out)
+
+            mask_pos += shapes[i][1]
+            arg_pos += self.arg_counts[i]
+
+        return tuple(masked)
+
+def joint_clustering(self, *args):
+    """ Return a joint clustering that merges the clusterings of all the submaskers.
+    """
+
+    single_clustering = []
+    arg_pos = 0
+    for i, masker in enumerate(self.maskers):
+        masker_args = args[arg_pos:arg_pos+self.arg_counts[i]]
+        if callable(masker.clustering):
+            clustering = masker.clustering(*masker_args)
+        else:
+            clustering = masker.clustering
+
+        if len(single_clustering) == 0:
+            single_clustering = clustering
+        elif len(clustering) != 0:
+            raise NotImplementedError("Joining two non-trivial clusterings is not yet implemented in the Composite masker!")
+    return single_clustering
diff --git a/lib/shap/maskers/_fixed.py b/lib/shap/maskers/_fixed.py
new file mode 100644
index 0000000000000000000000000000000000000000..e00bc307524396cc4f9cfc25351a81f21247850d
--- /dev/null
+++ b/lib/shap/maskers/_fixed.py
@@ -0,0 +1,26 @@
+import numpy as np
+
+from ._masker import Masker
+
+
+class Fixed(Masker):
+    """ This leaves the input unchanged during masking, and is used for things like scoring labels.
+
+    Sometimes there are inputs to the model that we do not want to explain, but rather we want to
+    consider them fixed. The primary example of this is when we explain the loss of the model using
+    the labels. These "true" labels are inputs to the function we are explaining, but we don't want
+    to attribute credit to them, instead we want to consider them fixed and assign all the credit to
+    the model's input features. This is where the Fixed masker can help, since we can apply it to the
+    label inputs.
+    """
+    def __init__(self):
+        self.shape = (None, 0)
+        self.clustering = np.zeros((0, 4))
+
+    def __call__(self, mask, x):
+        return ([x],)
+
+    def mask_shapes(self, x):
+        """ The shape of the masks we expect.
+        """
+        return [(0,)]
diff --git a/lib/shap/maskers/_fixed_composite.py b/lib/shap/maskers/_fixed_composite.py
new file mode 100644
index 0000000000000000000000000000000000000000..861a945eb05e0cd15ed432d1e56d129ca32bd812
--- /dev/null
+++ b/lib/shap/maskers/_fixed_composite.py
@@ -0,0 +1,63 @@
+import numpy as np
+
+from .._serializable import Deserializer, Serializer
+from ._masker import Masker
+
+
+class FixedComposite(Masker):
+    """ A masker that outputs both the masked data and the original data as a pair.
+    """
+
+    def __init__(self, masker):
+        """ Creates a Composite masker from an underlying masker and returns the original args along with the masked output.
+
+        Parameters
+        ----------
+        masker: object
+            An object of the shap.maskers.Masker base class (eg. Text/Image masker).
+
+        Returns
+        -------
+        tuple
+            A tuple consisting of the masked input using the underlying masker appended with the original args in a list.
+        """
+        self.masker = masker
+
+        # copy attributes from the masker we are wrapping
+        masker_attributes = ["shape", "invariants", "clustering", "data_transform", "mask_shapes", "feature_names", "text_data", "image_data"]
+        for masker_attribute in masker_attributes:
+            if getattr(self.masker, masker_attribute, None) is not None:
+                setattr(self, masker_attribute, getattr(self.masker, masker_attribute))
+
+    def __call__(self, mask, *args):
+        """ Computes mask on the args using the masker data attribute and returns tuple containing masked input with args.
+        """
+        masked_X = self.masker(mask, *args)
+        wrapped_args = []
+        for item in args:
+            wrapped_args.append(np.array([item]))
+        wrapped_args = tuple(wrapped_args)
+        if not isinstance(masked_X, tuple):
+            masked_X = (masked_X,)
+        return masked_X + wrapped_args
+
+    def save(self, out_file):
+        """ Write a FixedComposite masker to a file stream.
+        """
+        super().save(out_file)
+
+        # Increment the version number when the encoding changes!
+        with Serializer(out_file, "shap.maskers.FixedComposite", version=0) as s:
+            s.save("masker", self.masker)
+
+    @classmethod
+    def load(cls, in_file, instantiate=True):
+        """ Load a FixedComposite masker from a file stream.
+        """
+        if instantiate:
+            return cls._instantiated_load(in_file)
+
+        kwargs = super().load(in_file, instantiate=False)
+        with Deserializer(in_file, "shap.maskers.FixedComposite", min_version=0, max_version=0) as s:
+            kwargs["masker"] = s.load("masker")
+        return kwargs
diff --git a/lib/shap/maskers/_image.py b/lib/shap/maskers/_image.py
new file mode 100644
index 0000000000000000000000000000000000000000..1e8657cde66bd3b6fbfb2b59fe049081d43a66fc
--- /dev/null
+++ b/lib/shap/maskers/_image.py
@@ -0,0 +1,246 @@
+# TODO: heapq in numba does not yet support Typed Lists so we can move to them yet...
+import heapq
+import warnings
+
+import numba.typed
+import numpy as np
+from numba import njit
+from numba.core.errors import NumbaPendingDeprecationWarning
+
+from .._serializable import Deserializer, Serializer
+from ..utils import assert_import, record_import_error, safe_isinstance
+from ..utils._exceptions import DimensionError
+from ._masker import Masker
+
+try:
+    import torch  # noqa: F401
+except ImportError as e:
+    record_import_error("torch", "torch could not be imported!", e)
+
+warnings.simplefilter('ignore', category=NumbaPendingDeprecationWarning)
+
+try:
+    import cv2
+except ImportError as e:
+    record_import_error("cv2", "cv2 could not be imported!", e)
+
+
+class Image(Masker):
+    """ This masks out image regions with blurring or inpainting.
+    """
+
+    def __init__(self, mask_value, shape=None):
+        """ Build a new Image masker with the given masking value.
+        Parameters
+        ----------
+        mask_value : np.array, "blur(kernel_xsize, kernel_xsize)", "inpaint_telea", or "inpaint_ns"
+            The value used to mask hidden regions of the image.
+        shape : None or tuple
+            If the mask_value is an auto-generated masker instead of a dataset then the input
+            image shape needs to be provided.
+        """
+        if shape is None:
+            if isinstance(mask_value, str):
+                raise TypeError("When the mask_value is a string the shape parameter must be given!")
+            self.input_shape = mask_value.shape # the (1,) is because we only return a single masked sample to average over
+        else:
+            self.input_shape = shape
+
+        self.input_mask_value = mask_value
+
+        # This is the shape of the masks we expect
+        self.shape = (1, np.prod(self.input_shape)) # the (1, ...) is because we only return a single masked sample to average over
+
+        self.image_data = True
+
+        self.blur_kernel = None
+        self._blur_value_cache = None
+        if issubclass(type(mask_value), np.ndarray):
+            self.mask_value = mask_value.flatten()
+        elif isinstance(mask_value, str):
+            assert_import("cv2")
+            self.mask_value = mask_value
+            if mask_value.startswith("blur("):
+                self.blur_kernel = tuple(map(int, mask_value[5:-1].split(",")))
+        else:
+            self.mask_value = np.ones(self.input_shape).flatten() * mask_value
+        self.build_partition_tree()
+
+        # note if this masker can use different background for different samples
+        self.fixed_background = not isinstance(self.mask_value, str)
+
+        #self.scratch_mask = np.zeros(self.input_shape[:-1], dtype=bool)
+        self.last_xid = None
+
+        # flag that we return outputs that will not get changed by later masking calls
+        self.immutable_outputs = True
+
+    def __call__(self, mask, x):
+
+        if safe_isinstance(x, "torch.Tensor"):
+            x = x.cpu().numpy()
+
+        if np.prod(x.shape) != np.prod(self.input_shape):
+            raise DimensionError("The length of the image to be masked must match the shape given in the " + \
+                            "ImageMasker constructor: "+" * ".join([str(i) for i in x.shape])+ \
+                            " != "+" * ".join([str(i) for i in self.input_shape]))
+
+        # unwrap single element lists (which are how single input models look in multi-input format)
+        if isinstance(x, list) and len(x) == 1:
+            x = x[0]
+
+        # we preserve flattened inputs as flattened and full-shaped inputs as their original shape
+        in_shape = x.shape
+        if len(x.shape) > 1:
+            x = x.ravel()
+
+        # if mask is not given then we mask the whole image
+        if mask is None:
+            mask = np.zeros(np.prod(x.shape), dtype=bool)
+
+        if isinstance(self.mask_value, str):
+            if self.blur_kernel is not None:
+                if self.last_xid != id(x):
+                    self._blur_value_cache = cv2.blur(x.reshape(self.input_shape), self.blur_kernel).ravel()
+                    self.last_xid = id(x)
+                out = x.copy()
+                out[~mask] = self._blur_value_cache[~mask]
+
+            elif self.mask_value == "inpaint_telea":
+                out = self.inpaint(x, ~mask, "INPAINT_TELEA")
+            elif self.mask_value == "inpaint_ns":
+                out = self.inpaint(x, ~mask, "INPAINT_NS")
+        else:
+            out = x.copy()
+            out[~mask] = self.mask_value[~mask]
+
+        return (out.reshape(1, *in_shape),)
+
+    def inpaint(self, x, mask, method):
+        """ Fill in the masked parts of the image through inpainting.
+        """
+        reshaped_mask = mask.reshape(self.input_shape).astype(np.uint8).max(2)
+        if reshaped_mask.sum() == np.prod(self.input_shape[:-1]):
+            out = x.reshape(self.input_shape).copy()
+            out[:] = out.mean((0, 1))
+            return out.ravel()
+
+        return cv2.inpaint(
+            x.reshape(self.input_shape).astype(np.uint8),
+            reshaped_mask,
+            inpaintRadius=3,
+            flags=getattr(cv2, method)
+        ).astype(x.dtype).ravel()
+
+    def build_partition_tree(self):
+        """ This partitions an image into a herarchical clustering based on axis-aligned splits.
+        """
+
+        xmin = 0
+        xmax = self.input_shape[0]
+        ymin = 0
+        ymax = self.input_shape[1]
+        zmin = 0
+        zmax = self.input_shape[2]
+        #total_xwidth = xmax - xmin
+        total_ywidth = ymax - ymin
+        total_zwidth = zmax - zmin
+        q = numba.typed.List([(0, xmin, xmax, ymin, ymax, zmin, zmax, -1, False)])
+        M = int((xmax - xmin) * (ymax - ymin) * (zmax - zmin))
+        clustering = np.zeros((M - 1, 4))
+        _jit_build_partition_tree(xmin, xmax, ymin, ymax, zmin, zmax, total_ywidth, total_zwidth, M, clustering, q)
+        self.clustering = clustering
+
+    def save(self, out_file):
+        """ Write a Image masker to a file stream.
+        """
+        super().save(out_file)
+
+        # Increment the version number when the encoding changes!
+        with Serializer(out_file, "shap.maskers.Image", version=0) as s:
+            s.save("mask_value", self.input_mask_value)
+            s.save("shape", self.input_shape)
+
+    @classmethod
+    def load(cls, in_file, instantiate=True):
+        """ Load a Image masker from a file stream.
+        """
+        if instantiate:
+            return cls._instantiated_load(in_file)
+
+        kwargs = super().load(in_file, instantiate=False)
+        with Deserializer(in_file, "shap.maskers.Image", min_version=0, max_version=0) as s:
+            kwargs["mask_value"] = s.load("mask_value")
+            kwargs["shape"] = s.load("shape")
+        return kwargs
+
+@njit
+def _jit_build_partition_tree(xmin, xmax, ymin, ymax, zmin, zmax, total_ywidth, total_zwidth, M, clustering, q):
+    """ This partitions an image into a herarchical clustering based on axis-aligned splits.
+    """
+
+    # heapq.heappush(q, (0, xmin, xmax, ymin, ymax, zmin, zmax, -1, False))
+
+    # q.put((0, xmin, xmax, ymin, ymax, zmin, zmax, -1, False))
+    ind = len(clustering) - 1
+    while len(q) > 0: # q.empty()
+        _, xmin, xmax, ymin, ymax, zmin, zmax, parent_ind, is_left =  heapq.heappop(q)
+        # _, xmin, xmax, ymin, ymax, zmin, zmax, parent_ind, is_left = q.get()
+
+        if parent_ind >= 0:
+            clustering[parent_ind, 0 if is_left else 1] = ind + M
+
+        # make sure we line up with a flattened indexing scheme
+        if ind < 0:
+            assert -ind - 1 == xmin * total_ywidth * total_zwidth + ymin * total_zwidth + zmin
+
+        xwidth = xmax - xmin
+        ywidth = ymax - ymin
+        zwidth = zmax - zmin
+        if xwidth == 1 and ywidth == 1 and zwidth == 1:
+            pass
+        else:
+
+            # by default our ranges remain unchanged
+            lxmin = rxmin = xmin
+            lxmax = rxmax = xmax
+            lymin = rymin = ymin
+            lymax = rymax = ymax
+            lzmin = rzmin = zmin
+            lzmax = rzmax = zmax
+
+            # split the xaxis if it is the largest dimension
+            if xwidth >= ywidth and xwidth > 1:
+                xmid = xmin + xwidth // 2
+                lxmax = xmid
+                rxmin = xmid
+
+            # split the yaxis
+            elif ywidth > 1:
+                ymid = ymin + ywidth // 2
+                lymax = ymid
+                rymin = ymid
+
+            # split the zaxis only when the other ranges are already width 1
+            else:
+                zmid = zmin + zwidth // 2
+                lzmax = zmid
+                rzmin = zmid
+
+            lsize = (lxmax - lxmin) * (lymax - lymin) * (lzmax - lzmin)
+            rsize = (rxmax - rxmin) * (rymax - rymin) * (rzmax - rzmin)
+
+            heapq.heappush(q, (-lsize, lxmin, lxmax, lymin, lymax, lzmin, lzmax, ind, True))
+            heapq.heappush(q, (-rsize, rxmin, rxmax, rymin, rymax, rzmin, rzmax, ind, False))
+            # q.put((-lsize, lxmin, lxmax, lymin, lymax, lzmin, lzmax, ind, True))
+            # q.put((-rsize, rxmin, rxmax, rymin, rymax, rzmin, rzmax, ind, False))
+
+        ind -= 1
+
+    # fill in the group sizes
+    for i in range(len(clustering)):
+        li = int(clustering[i, 0])
+        ri = int(clustering[i, 1])
+        lsize = 1 if li < M else clustering[li-M, 3]
+        rsize = 1 if ri < M else clustering[ri-M, 3]
+        clustering[i, 3] = lsize + rsize
diff --git a/lib/shap/maskers/_masker.py b/lib/shap/maskers/_masker.py
new file mode 100644
index 0000000000000000000000000000000000000000..895a7f1a084b85da729d609c1dbdb77332620e77
--- /dev/null
+++ b/lib/shap/maskers/_masker.py
@@ -0,0 +1,26 @@
+import numpy as np
+
+from .._serializable import Serializable
+
+
+class Masker(Serializable):
+    """ This is the superclass of all maskers.
+    """
+
+    def __call__(self, mask, *args):
+        """ Maskers are callable objects that accept the same inputs as the model plus a binary mask.
+        """
+
+    def _standardize_mask(self, mask, *args):
+        """ This allows users to pass True/False as short hand masks.
+        """
+        if mask is True or mask is False:
+            if callable(self.shape):
+                shape = self.shape(*args)
+            else:
+                shape = self.shape
+
+            if mask is True:
+                return np.ones(shape[1], dtype=bool)
+            return np.zeros(shape[1], dtype=bool)
+        return mask
diff --git a/lib/shap/maskers/_output_composite.py b/lib/shap/maskers/_output_composite.py
new file mode 100644
index 0000000000000000000000000000000000000000..1217aa9abd4e692a9b657679d2be395b84e4244b
--- /dev/null
+++ b/lib/shap/maskers/_output_composite.py
@@ -0,0 +1,70 @@
+from .._serializable import Deserializer, Serializer
+from ._masker import Masker
+
+
+class OutputComposite(Masker):
+    """ A masker that is a combination of a masker and a model and outputs both masked args and the model's output.
+    """
+
+    def __init__(self, masker, model):
+        """ Creates a masker from an underlying masker and and model.
+
+        This masker returns the masked input along with the model output for the passed args.
+
+        Parameters
+        ----------
+        masker: object
+            An object of the shap.maskers.Masker base class (eg. Text/Image masker).
+
+        model: object
+            An object shap.models.Model base class used to generate output.
+
+        Returns
+        -------
+        tuple
+            A tuple consisting of the masked input using the underlying masker appended with the model output for passed args.
+        """
+        self.masker = masker
+        self.model = model
+
+        # copy attributes from the masker we are wrapping
+        masker_attributes = ["shape", "invariants", "clustering", "data_transform", "mask_shapes", "feature_names", "text_data", "image_data"]
+        for masker_attribute in masker_attributes:
+            if getattr(self.masker, masker_attribute, None) is not None:
+                setattr(self, masker_attribute, getattr(self.masker, masker_attribute))
+
+    def __call__(self, mask, *args):
+        """ Mask the args using the masker and return a tuple containing the masked input and the model output on the args.
+        """
+        masked_X = self.masker(mask, *args)
+        y = self.model(*args)
+        # wrap model output
+        if not isinstance(y, tuple):
+            y = (y,)
+        # wrap masked input
+        if not isinstance(masked_X, tuple):
+            masked_X = (masked_X,)
+        return masked_X + y
+
+    def save(self, out_file):
+        """ Write a OutputComposite masker to a file stream.
+        """
+        super().save(out_file)
+
+        # Increment the version number when the encoding changes!
+        with Serializer(out_file, "shap.maskers.OutputComposite", version=0) as s:
+            s.save("masker", self.masker)
+            s.save("model", self.model)
+
+    @classmethod
+    def load(cls, in_file, instantiate=True):
+        """ Load a OutputComposite masker from a file stream.
+        """
+        if instantiate:
+            return cls._instantiated_load(in_file)
+
+        kwargs = super().load(in_file, instantiate=False)
+        with Deserializer(in_file, "shap.maskers.OutputComposite", min_version=0, max_version=0) as s:
+            kwargs["masker"] = s.load("masker")
+            kwargs["model"] = s.load("model")
+        return kwargs
diff --git a/lib/shap/maskers/_tabular.py b/lib/shap/maskers/_tabular.py
new file mode 100644
index 0000000000000000000000000000000000000000..2cb971b79839b644827102229bda2a5dee07ca2a
--- /dev/null
+++ b/lib/shap/maskers/_tabular.py
@@ -0,0 +1,322 @@
+import logging
+
+import numpy as np
+import pandas as pd
+from numba import njit
+
+from .. import utils
+from .._serializable import Deserializer, Serializer
+from ..utils import MaskedModel
+from ..utils._exceptions import DimensionError, InvalidClusteringError
+from ._masker import Masker
+
+log = logging.getLogger('shap')
+
+
+class Tabular(Masker):
+    """ A common base class for Independent and Partition.
+    """
+
+    def __init__(self, data, max_samples=100, clustering=None):
+        """ This masks out tabular features by integrating over the given background dataset.
+
+        Parameters
+        ----------
+        data : np.array, pandas.DataFrame
+            The background dataset that is used for masking.
+
+        max_samples : int
+            The maximum number of samples to use from the passed background data. If data has more
+            than max_samples then shap.utils.sample is used to subsample the dataset. The number of
+            samples coming out of the masker (to be integrated over) matches the number of samples in
+            the background dataset. This means larger background dataset cause longer runtimes. Normally
+            about 1, 10, 100, or 1000 background samples are reasonable choices.
+
+        clustering : string or None (default) or numpy.ndarray
+            The distance metric to use for creating the clustering of the features. The
+            distance function can be any valid scipy.spatial.distance.pdist's metric argument.
+            However we suggest using 'correlation' in most cases. The full list of options is
+            `braycurtis`, `canberra`, `chebyshev`, `cityblock`, `correlation`, `cosine`, `dice`,
+            `euclidean`, `hamming`, `jaccard`, `jensenshannon`, `kulsinski`, `mahalanobis`,
+            `matching`, `minkowski`, `rogerstanimoto`, `russellrao`, `seuclidean`,
+            `sokalmichener`, `sokalsneath`, `sqeuclidean`, `yule`. These are all
+            the options from scipy.spatial.distance.pdist's metric argument.
+        """
+
+        self.output_dataframe = False
+        if isinstance(data, pd.DataFrame):
+            self.feature_names = data.columns
+            data = data.values
+            self.output_dataframe = True
+
+        if isinstance(data, dict) and "mean" in data:
+            self.mean = data.get("mean", None)
+            self.cov = data.get("cov", None)
+            data = np.expand_dims(data["mean"], 0)
+
+        if hasattr(data, "shape") and data.shape[0] > max_samples:
+            data = utils.sample(data, max_samples)
+
+        self.data = data
+        self.clustering = clustering
+        self.max_samples = max_samples
+
+        # # warn users about large background data sets
+        # if self.data.shape[0] > 100:
+        #     log.warning("Using " + str(self.data.shape[0]) + " background data samples could cause slower " +
+        #                 "run times. Consider shap.utils.sample(data, K) to summarize the background using only K samples.")
+
+        # compute the clustering of the data
+        if clustering is not None:
+            if isinstance(clustering, str):
+                self.clustering = utils.hclust(data, metric=clustering)
+            elif isinstance(clustering, np.ndarray):
+                self.clustering = clustering
+            else:
+                raise InvalidClusteringError(
+                    "Unknown clustering given! Make sure you pass a distance metric as a string, or a clustering as a numpy.ndarray."
+                )
+        else:
+            self.clustering = None
+
+        # self._last_mask = np.zeros(self.data.shape[1], dtype=bool)
+        self._masked_data = data.copy()
+        self._last_mask = np.zeros(data.shape[1], dtype=bool)
+        self.shape = self.data.shape
+        self.supports_delta_masking = True
+        # self._last_x = None
+        # self._data_variance = np.ones(self.data.shape, dtype=bool)
+
+        # this is property that allows callers to check what rows actually changed since last time.
+        # self.changed_rows = np.ones(self.data.shape[0], dtype=bool)
+
+    def __call__(self, mask, x):
+        mask = self._standardize_mask(mask, x)
+
+        # make sure we are given a single sample
+        if len(x.shape) != 1 or x.shape[0] != self.data.shape[1]:
+            raise DimensionError("The input passed for tabular masking does not match the background data shape!")
+
+        # if mask is an array of integers then we are doing delta masking
+        if np.issubdtype(mask.dtype, np.integer):
+
+            variants = ~self.invariants(x)
+            curr_delta_inds = np.zeros(len(mask), dtype=int)
+            num_masks = (mask >= 0).sum()
+            varying_rows_out = np.zeros((num_masks, self.shape[0]), dtype=bool)
+            masked_inputs_out = np.zeros((num_masks * self.shape[0], self.shape[1]))
+            self._last_mask[:] = False
+            self._masked_data[:] = self.data
+            _delta_masking(
+                mask, x, curr_delta_inds,
+                varying_rows_out, self._masked_data, self._last_mask, self. data, variants,
+                masked_inputs_out, MaskedModel.delta_mask_noop_value
+            )
+            if self.output_dataframe:
+                return (pd.DataFrame(masked_inputs_out, columns=self.feature_names),), varying_rows_out
+
+            return (masked_inputs_out,), varying_rows_out
+
+        # otherwise we update the whole set of masked data for a single sample
+        self._masked_data[:] = x * mask + self.data * np.invert(mask)
+        self._last_mask[:] = mask
+
+        if self.output_dataframe:
+            return pd.DataFrame(self._masked_data, columns=self.feature_names)
+
+        return (self._masked_data,)
+
+    # def reset_delta_masking(self):
+    #     """ This resets the masker back to all zeros when delta masking.
+
+    #     Note that the presence of this function also denotes that we support delta masking.
+    #     """
+    #     self._masked_data[:] = self.data
+    #     self._last_mask[:] = False
+
+
+    def invariants(self, x):
+        """ This returns a mask of which features change when we mask them.
+
+        This optional masking method allows explainers to avoid re-evaluating the model when
+        the features that would have been masked are all invariant.
+        """
+
+        # make sure we got valid data
+        if x.shape != self.data.shape[1:]:
+            raise DimensionError(
+                "The passed data does not match the background shape expected by the masker! The data of shape " + \
+                str(x.shape) + " was passed while the masker expected data of shape " + str(self.data.shape[1:]) + "."
+            )
+
+        return np.isclose(x, self.data)
+
+    def save(self, out_file):
+        """ Write a Tabular masker to a file stream.
+        """
+        super().save(out_file)
+
+        # Increment the version number when the encoding changes!
+        with Serializer(out_file, "shap.maskers.Tabular", version=0) as s:
+
+            # save the data in the format it was given to us
+            if self.output_dataframe:
+                s.save("data", pd.DataFrame(self.data, columns=self.feature_names))
+            elif getattr(self, "mean", None) is not None:
+                s.save("data", (self.mean, self.cov))
+            else:
+                s.save("data", self.data)
+
+            s.save("max_samples", self.max_samples)
+            s.save("clustering", self.clustering)
+
+    @classmethod
+    def load(cls, in_file, instantiate=True):
+        """ Load a Tabular masker from a file stream.
+        """
+        if instantiate:
+            return cls._instantiated_load(in_file)
+
+        kwargs = super().load(in_file, instantiate=False)
+        with Deserializer(in_file, "shap.maskers.Tabular", min_version=0, max_version=0) as s:
+            kwargs["data"] = s.load("data")
+            kwargs["max_samples"] = s.load("max_samples")
+            kwargs["clustering"] = s.load("clustering")
+        return kwargs
+
+@njit
+def _single_delta_mask(dind, masked_inputs, last_mask, data, x, noop_code):
+    if dind == noop_code:
+        pass
+    elif last_mask[dind]:
+        masked_inputs[:, dind] = data[:, dind]
+        last_mask[dind] = False
+    else:
+        masked_inputs[:, dind] = x[dind]
+        last_mask[dind] = True
+
+@njit
+def _delta_masking(masks, x, curr_delta_inds, varying_rows_out,
+                   masked_inputs_tmp, last_mask, data, variants, masked_inputs_out, noop_code):
+    """ Implements the special (high speed) delta masking API that only flips the positions we need to.
+
+    Note that we attempt to avoid doing any allocation inside this function for speed reasons.
+    """
+
+    dpos = 0
+    i = -1
+    masks_pos = 0
+    output_pos = 0
+    N = masked_inputs_tmp.shape[0]
+    while masks_pos < len(masks):
+        i += 1
+
+        # update the tmp masked inputs array
+        dpos = 0
+        curr_delta_inds[0] = masks[masks_pos]
+        while curr_delta_inds[dpos] < 0: # negative values mean keep going
+            curr_delta_inds[dpos] = -curr_delta_inds[dpos] - 1 # -value + 1 is the original index that needs flipped
+            _single_delta_mask(curr_delta_inds[dpos], masked_inputs_tmp, last_mask, data, x, noop_code)
+            dpos += 1
+            curr_delta_inds[dpos] = masks[masks_pos + dpos]
+        _single_delta_mask(curr_delta_inds[dpos], masked_inputs_tmp, last_mask, data, x, noop_code)
+
+        # copy the tmp masked inputs array to the output
+        masked_inputs_out[output_pos:output_pos+N] = masked_inputs_tmp
+        masks_pos += dpos + 1
+
+        # mark which rows have been updated, so we can only evaluate the model on the rows we need to
+        if i == 0:
+            varying_rows_out[i, :] = True
+
+        else:
+            # only one column was changed
+            if dpos == 0:
+                varying_rows_out[i, :] = variants[:, curr_delta_inds[dpos]]
+
+            # more than one column was changed
+            else:
+                varying_rows_out[i, :] = np.sum(variants[:, curr_delta_inds[:dpos+1]], axis=1) > 0
+
+        output_pos += N
+
+
+
+class Independent(Tabular):
+    """ This masks out tabular features by integrating over the given background dataset.
+    """
+
+    def __init__(self, data, max_samples=100):
+        """ Build a Independent masker with the given background data.
+
+        Parameters
+        ----------
+        data : numpy.ndarray, pandas.DataFrame
+            The background dataset that is used for masking.
+
+        max_samples : int
+            The maximum number of samples to use from the passed background data. If data has more
+            than max_samples then shap.utils.sample is used to subsample the dataset. The number of
+            samples coming out of the masker (to be integrated over) matches the number of samples in
+            the background dataset. This means larger background dataset cause longer runtimes. Normally
+            about 1, 10, 100, or 1000 background samples are reasonable choices.
+        """
+        super().__init__(data, max_samples=max_samples, clustering=None)
+
+
+class Partition(Tabular):
+    """ This masks out tabular features by integrating over the given background dataset.
+
+    Unlike Independent, Partition respects a hierarchical structure of the data.
+    """
+
+    def __init__(self, data, max_samples=100, clustering="correlation"):
+        """ Build a Partition masker with the given background data and clustering.
+
+        Parameters
+        ----------
+        data : numpy.ndarray, pandas.DataFrame
+            The background dataset that is used for masking.
+
+        max_samples : int
+            The maximum number of samples to use from the passed background data. If data has more
+            than max_samples then shap.utils.sample is used to subsample the dataset. The number of
+            samples coming out of the masker (to be integrated over) matches the number of samples in
+            the background dataset. This means larger background dataset cause longer runtimes. Normally
+            about 1, 10, 100, or 1000 background samples are reasonable choices.
+
+        clustering : string or numpy.ndarray
+            If a string, then this is the distance metric to use for creating the clustering of
+            the features. The distance function can be any valid scipy.spatial.distance.pdist's metric
+            argument. However we suggest using 'correlation' in most cases. The full list of options is
+            `braycurtis`, `canberra`, `chebyshev`, `cityblock`, `correlation`, `cosine`, `dice`,
+            `euclidean`, `hamming`, `jaccard`, `jensenshannon`, `kulsinski`, `mahalanobis`,
+            `matching`, `minkowski`, `rogerstanimoto`, `russellrao`, `seuclidean`,
+            `sokalmichener`, `sokalsneath`, `sqeuclidean`, `yule`. These are all
+            the options from scipy.spatial.distance.pdist's metric argument.
+            If an array, then this is assumed to be the clustering of the features.
+        """
+        super().__init__(data, max_samples=max_samples, clustering=clustering)
+
+
+class Impute(Masker): # we should inherit from Tabular once we add support for arbitrary masking
+    """ This imputes the values of missing features using the values of the observed features.
+
+    Unlike Independent, Gaussian imputes missing values based on correlations with observed data points.
+    """
+
+    def __init__(self, data, method="linear"):
+        """ Build a Partition masker with the given background data and clustering.
+
+        Parameters
+        ----------
+        data : numpy.ndarray, pandas.DataFrame or {"mean: numpy.ndarray, "cov": numpy.ndarray} dictionary
+            The background dataset that is used for masking.
+        """
+        if data is dict and "mean" in data:
+            self.mean = data.get("mean", None)
+            self.cov = data.get("cov", None)
+            data = np.expand_dims(data["mean"], 0)
+
+        self.data = data
+        self.method = method
diff --git a/lib/shap/maskers/_text.py b/lib/shap/maskers/_text.py
new file mode 100644
index 0000000000000000000000000000000000000000..2e1d8be0aa2bc6c02717d4f0b6623b5a8a451b35
--- /dev/null
+++ b/lib/shap/maskers/_text.py
@@ -0,0 +1,542 @@
+import math
+import re
+
+import numpy as np
+
+from .._serializable import Deserializer, Serializer
+from ..utils import safe_isinstance
+from ..utils.transformers import (
+    SENTENCEPIECE_TOKENIZERS,
+    getattr_silent,
+    parse_prefix_suffix_for_tokenizer,
+)
+from ._masker import Masker
+
+
+class Text(Masker):
+    """ This masks out tokens according to the given tokenizer.
+
+    The masked variables are
+
+    output_type : "string" (default) or "token_ids"
+
+    """
+    def __init__(self, tokenizer=None, mask_token=None, collapse_mask_token="auto", output_type="string"):
+        """ Build a new Text masker given an optional passed tokenizer.
+
+        Parameters
+        ----------
+        tokenizer : callable or None
+            The tokenizer used to break apart strings during masking. The passed tokenizer must support a minimal
+            subset of the HuggingFace Transformers PreTrainedTokenizerBase API. This minimal subset means the
+            tokenizer must return a dictionary with 'input_ids' and then either include
+            an 'offset_mapping' entry in the same dictionary or provide a .convert_ids_to_tokens or .decode method.
+
+        mask_token : string, int, or None
+            The sub-string or integer token id used to mask out portions of a string. If None it will use the
+            tokenizer's .mask_token attribute, if defined, or "..." if the tokenizer does not have a .mask_token
+            attribute.
+
+        collapse_mask_token : True, False, or "auto"
+            If True, when several consecutive tokens are masked only one mask token is used to replace the entire
+            series of original tokens.
+        """
+
+        if tokenizer is None:
+            self.tokenizer = SimpleTokenizer()
+        elif callable(tokenizer):
+            self.tokenizer = tokenizer
+        else:
+            try:
+                self.tokenizer = SimpleTokenizer(tokenizer)
+            except Exception:
+                raise Exception(
+                    "The passed tokenizer cannot be wrapped as a masker because it does not have a __call__ "
+                    "method, not can it be interpreted as a splitting regexp!"
+                )
+
+        self.output_type = output_type
+        self.collapse_mask_token = collapse_mask_token
+        self.input_mask_token = mask_token
+        self.mask_token = mask_token # could be recomputed later in this function
+        self.mask_token_id = mask_token if isinstance(mask_token, int) else None
+        parsed_tokenizer_dict = parse_prefix_suffix_for_tokenizer(self.tokenizer)
+
+        self.keep_prefix = parsed_tokenizer_dict['keep_prefix']
+        self.keep_suffix = parsed_tokenizer_dict['keep_suffix']
+        # self.prefix_strlen = parsed_tokenizer_dict['prefix_strlen']
+        # self.suffix_strlen = parsed_tokenizer_dict['suffix_strlen']
+        #null_tokens = parsed_tokenizer_dict['null_tokens']
+
+        self.text_data = True
+
+        if mask_token is None:
+            if getattr_silent(self.tokenizer, "mask_token") is not None:
+                self.mask_token = self.tokenizer.mask_token
+                self.mask_token_id = getattr_silent(self.tokenizer, "mask_token_id")
+                if self.collapse_mask_token == "auto":
+                    self.collapse_mask_token = False
+            else:
+                self.mask_token = "..."
+        else:
+            self.mask_token = mask_token
+
+        if self.mask_token_id is None:
+            self.mask_token_id = self.tokenizer(self.mask_token)["input_ids"][self.keep_prefix]
+
+        if self.collapse_mask_token == "auto":
+            self.collapse_mask_token = True
+
+        # assign mask token segment
+        # if self.keep_suffix > 0:
+        #     self.mask_token_segment = self.token_segments(self.mask_token)[self.keep_prefix:-self.keep_suffix]
+        # else:
+        #     self.mask_token_segment = self.token_segments(self.mask_token)[self.keep_prefix:]
+
+        # note if this masker can use a different background for different samples
+        self.fixed_background = self.mask_token_id is None
+
+        self.default_batch_size = 5
+
+        # cache variables
+        self._s = None
+        self._tokenized_s_full = None
+        self._tokenized_s = None
+        self._segments_s = None
+
+        # flag that we return outputs that will not get changed by later masking calls
+        self.immutable_outputs = True
+
+    def __call__(self, mask, s):
+        mask = self._standardize_mask(mask, s)
+        self._update_s_cache(s)
+
+        # if we have a fixed prefix or suffix then we need to grow the mask to account for that
+        if self.keep_prefix > 0 or self.keep_suffix > 0:
+            mask = mask.copy()
+            mask[:self.keep_prefix] = True
+            mask[-self.keep_suffix:] = True
+
+        if self.output_type == "string":
+            # if self.mask_token == "":
+            #     out = self._segments_s[mask]
+            # else:
+            #     #out = np.array([self._segments_s[i] if mask[i] else self.mask_token for i in range(len(mask))])
+            out_parts = []
+            is_previous_appended_token_mask_token = False
+            sep_token = getattr_silent(self.tokenizer, "sep_token")
+            for i, v in enumerate(mask):
+                # mask ignores separator tokens and keeps them unmasked
+                if v or sep_token == self._segments_s[i]:
+                    out_parts.append(self._segments_s[i])
+                    is_previous_appended_token_mask_token = False
+                else:
+                    if not self.collapse_mask_token or (self.collapse_mask_token and not is_previous_appended_token_mask_token):
+                        out_parts.append(" " + self.mask_token)
+                        is_previous_appended_token_mask_token = True
+            out = "".join(out_parts)
+
+            # tokenizers which treat spaces like parts of the tokens and dont replace the special token while decoding need further postprocessing
+            # by replacing whitespace encoded as '_' for sentencepiece tokenizer or 'Ġ' for sentencepiece like encoding (GPT2TokenizerFast)
+            # with ' '
+            if safe_isinstance(self.tokenizer, SENTENCEPIECE_TOKENIZERS):
+                out = out.replace('▁', ' ')
+
+            # replace sequence of spaces with a single space and strip beginning and end spaces
+            out = re.sub(r"[\s]+", " ", out).strip() # TODOmaybe: should do strip?? (originally because of fast vs. slow tokenizer differences)
+
+        else:
+            if self.mask_token_id is None:
+                out = self._tokenized_s[mask]
+            else:
+                out = np.array([self._tokenized_s[i] if mask[i] else self.mask_token_id for i in range(len(mask))])
+                # print("mask len", len(out))
+                # # crop the output if needed
+                # if self.max_length is not None and len(out) > self.max_length:
+                #     new_out = np.zeros(self.max_length)
+                #     new_out[:] = out[:self.max_length]
+                #     new_out[-self.keep_suffix:] = out[-self.keep_suffix:]
+                #     out = new_out
+
+        # for some sentences with strange configurations around the separator tokens, tokenizer encoding/decoding may contain
+        # extra unnecessary tokens, for example ''. you may want to strip out spaces adjacent to separator tokens. Refer to PR
+        # for more details.
+        return (np.array([out]),)
+
+    def data_transform(self, s):
+        """ Called by explainers to allow us to convert data to better match masking (here this means tokenizing).
+        """
+        return (self.token_segments(s)[0],)
+
+    def token_segments(self, s):
+        """ Returns the substrings associated with each token in the given string.
+        """
+
+        try:
+            token_data = self.tokenizer(s, return_offsets_mapping=True)
+            offsets = token_data["offset_mapping"]
+            offsets = [(0, 0) if o is None else o for o in offsets]
+            parts = [s[offsets[i][0]:max(offsets[i][1], offsets[i+1][0])] for i in range(len(offsets)-1)]
+            parts.append(s[offsets[len(offsets)-1][0]:offsets[len(offsets)-1][1]])
+            return parts, token_data["input_ids"]
+        except (NotImplementedError, TypeError): # catch lack of support for return_offsets_mapping
+            token_ids = self.tokenizer(s)['input_ids']
+            if hasattr(self.tokenizer, "convert_ids_to_tokens"):
+                tokens = self.tokenizer.convert_ids_to_tokens(token_ids)
+            else:
+                tokens = [self.tokenizer.decode([id]) for id in token_ids]
+            if hasattr(self.tokenizer, "get_special_tokens_mask"):
+                special_tokens_mask = self.tokenizer.get_special_tokens_mask(token_ids, already_has_special_tokens=True)
+                # avoid masking separator tokens, but still mask beginning of sentence and end of sentence tokens
+                special_keep = [getattr_silent(self.tokenizer, 'sep_token'), getattr_silent(self.tokenizer, 'mask_token')]
+                for i, v in enumerate(special_tokens_mask):
+                    if v == 1 and (tokens[i] not in special_keep or i + 1 == len(special_tokens_mask)):
+                        tokens[i] = ""
+
+            # add spaces to separate the tokens (since we want segments not tokens)
+            if safe_isinstance(self.tokenizer, SENTENCEPIECE_TOKENIZERS):
+                for i, v in enumerate(tokens):
+                    if v.startswith("_"):
+                        tokens[i] = " " + tokens[i][1:]
+            else:
+                for i, v in enumerate(tokens):
+                    if v.startswith("##"):
+                        tokens[i] = tokens[i][2:]
+                    elif v != "" and i != 0:
+                        tokens[i] = " " + tokens[i]
+
+            return tokens, token_ids
+
+    def clustering(self, s):
+        """ Compute the clustering of tokens for the given string.
+        """
+        self._update_s_cache(s)
+        special_tokens = []
+        sep_token = getattr_silent(self.tokenizer, "sep_token")
+        if sep_token is None:
+            special_tokens = []
+        else:
+            special_tokens = [sep_token]
+
+        # convert the text segments to tokens that the partition tree function expects
+        tokens = []
+        space_end = re.compile(r"^.*\W$")
+        letter_start = re.compile(r"^[A-Za-z]")
+        for i, v in enumerate(self._segments_s):
+            if i > 0 and space_end.match(self._segments_s[i-1]) is None and letter_start.match(v) is not None and tokens[i-1] != "":
+                tokens.append("##" + v.strip())
+            else:
+                tokens.append(v.strip())
+
+        pt = partition_tree(tokens, special_tokens)
+
+        # use the rescaled size of the clusters as their height since the merge scores are just a
+        # heuristic and not scaled well
+        pt[:, 2] = pt[:, 3]
+        pt[:, 2] /= pt[:, 2].max()
+
+        return pt
+
+    # unused because restricts meaningful perturbations
+    # def _mark_uninvertable(self, clustering):
+    #     """ This marks which clusters have non-invertable mappings through the tokenizer when masked.
+
+    #     It seems like a bug that you can decode and then encode a set of token ids and not get what
+    #     you started with...but this is possible with word endings in the transformers implementation
+    #     of BERT for example. So here we mark such uninvertable clusters with negative values.
+    #     """
+
+    #     M = len(self._tokenized_s)
+    #     assert len(clustering)+1 == M
+
+    #     def recursive_mark(ind):
+    #         if ind < M:
+    #             return list(self._tokenized_s[ind:ind+1])
+
+    #         lind = int(clustering[ind-M, 0])
+    #         rind = int(clustering[ind-M, 1])
+    #         ltokens = recursive_mark(lind)
+    #         rtokens = recursive_mark(rind)
+
+    #         tmp = ltokens + [self.mask_token_id]
+    #         s2 = self.tokenizer.decode(tmp)
+    #         e2 = self.tokenizer.encode(s2)
+    #         if not np.all(e2[1:-1] == tmp):
+    #             clustering[ind-M, 2] = -1 # set the distance of this cluster negative so it can't be split
+
+    #         tmp = [self.mask_token_id] + rtokens
+    #         s2 = self.tokenizer.decode(tmp)
+    #         e2 = self.tokenizer.encode(s2)
+    #         if not np.all(e2[1:-1] == tmp):
+    #             clustering[ind-M, 2] = -1 # set the distance of this cluster negative so it can't be split
+
+    #         return ltokens + rtokens
+
+    #     recursive_mark(M+len(clustering)-1)
+
+    def _update_s_cache(self, s):
+        if self._s != s:
+            self._s = s
+            tokens, token_ids = self.token_segments(s)
+            self._tokenized_s = np.array(token_ids)
+            self._segments_s = np.array(tokens)
+
+    def shape(self, s):
+        """ The shape of what we return as a masker.
+
+        Note we only return a single sample, so there is no expectation averaging.
+        """
+        self._update_s_cache(s)
+        return (1, len(self._tokenized_s))
+
+    def mask_shapes(self, s):
+        """ The shape of the masks we expect.
+        """
+        self._update_s_cache(s)
+        return [(len(self._tokenized_s),)]
+
+    def invariants(self, s):
+        """ The names of the features for each mask position for the given input string.
+        """
+        self._update_s_cache(s)
+
+        invariants = np.zeros(len(self._tokenized_s), dtype=bool)
+        if self.keep_prefix > 0:
+            invariants[:self.keep_prefix] = True
+        if self.keep_suffix > 0:
+            invariants[-self.keep_suffix:] = True
+        # mark separator tokens as invariant
+        for i, v in enumerate(self._tokenized_s):
+            if v == getattr_silent(self.tokenizer, "sep_token_id"):
+                invariants[i] = True
+        return invariants.reshape(1, -1)
+
+    def feature_names(self, s):
+        """ The names of the features for each mask position for the given input string.
+        """
+        self._update_s_cache(s)
+        return [[v.strip() for v in self._segments_s]]
+
+    def save(self, out_file):
+        """ Save a Text masker to a file stream.
+        """
+        super().save(out_file)
+        with Serializer(out_file, "shap.maskers.Text", version=0) as s:
+            s.save("tokenizer", self.tokenizer)
+            s.save("mask_token", self.input_mask_token)
+            s.save("collapse_mask_token", self.collapse_mask_token)
+            s.save("output_type", self.output_type)
+
+    @classmethod
+    def load(cls, in_file, instantiate=True):
+        """ Load a Text masker from a file stream.
+        """
+        if instantiate:
+            return cls._instantiated_load(in_file)
+
+        kwargs = super().load(in_file, instantiate=False)
+        with Deserializer(in_file, "shap.maskers.Text", min_version=0, max_version=0) as s:
+            kwargs["tokenizer"] = s.load("tokenizer")
+            kwargs["mask_token"] = s.load("mask_token")
+            kwargs["collapse_mask_token"] = s.load("collapse_mask_token")
+            kwargs["output_type"] = s.load("output_type")
+        return kwargs
+
+
+class SimpleTokenizer:
+    """ A basic model agnostic tokenizer.
+    """
+    def __init__(self, split_pattern=r"\W+"):
+        """ Create a tokenizer based on a simple splitting pattern.
+        """
+        self.split_pattern = re.compile(split_pattern)
+
+    def __call__(self, s, return_offsets_mapping=True):
+        """ Tokenize the passed string, optionally returning the offsets of each token in the original string.
+        """
+        pos = 0
+        offset_ranges = []
+        input_ids = []
+        for m in re.finditer(self.split_pattern, s):
+            start, end = m.span(0)
+            offset_ranges.append((pos, start))
+            input_ids.append(s[pos:start])
+            pos = end
+        if pos != len(s):
+            offset_ranges.append((pos, len(s)))
+            input_ids.append(s[pos:])
+
+        out = {}
+        out["input_ids"] = input_ids
+        if return_offsets_mapping:
+            out["offset_mapping"] = offset_ranges
+        return out
+
+
+def post_process_sentencepiece_tokenizer_output(s):
+    """ replaces whitespace encoded as '_' with ' ' for sentencepiece tokenizers.
+    """
+    s = s.replace('▁', ' ')
+    return s
+
+openers = {
+    "(": ")"
+}
+closers = {
+    ")": "("
+}
+enders = [".", ","]
+connectors = ["but", "and", "or"]
+
+class Token:
+    """ A token representation used for token clustering.
+    """
+    def __init__(self, value):
+        self.s = value
+        if value in openers or value in closers:
+            self.balanced = False
+        else:
+            self.balanced = True
+
+    def __str__(self):
+        return self.s
+
+    def __repr__(self):
+        if not self.balanced:
+            return self.s + "!"
+        return self.s
+
+class TokenGroup:
+    """ A token group (substring) representation used for token clustering.
+    """
+    def __init__(self, group, index=None):
+        self.g = group
+        self.index = index
+
+    def __repr__(self):
+        return self.g.__repr__()
+
+    def __getitem__(self, index):
+        return self.g[index]
+
+    def __add__(self, o):
+        return TokenGroup(self.g + o.g)
+
+    def __len__(self):
+        return len(self.g)
+
+def merge_score(group1, group2, special_tokens):
+    """ Compute the score of merging two token groups.
+
+    special_tokens: tokens (such as separator tokens) that should be grouped last
+    """
+    score = 0
+    # ensures special tokens are combined last, so 1st subtree is 1st sentence and 2nd subtree is 2nd sentence
+    if len(special_tokens) > 0:
+        if group1[-1].s in special_tokens and group2[0].s in special_tokens:
+            score -= math.inf # subtracting infinity to create lowest score and ensure combining these groups last
+
+    # merge broken-up parts of words first
+    if group2[0].s.startswith("##"):
+        score += 20
+
+    # merge apostrophe endings next
+    if group2[0].s == "'" and (len(group2) == 1 or (len(group2) == 2 and group2[1].s in ["t", "s"])):
+        score += 15
+    if group1[-1].s == "'" and group2[0].s in ["t", "s"]:
+        score += 15
+
+    start_ctrl = group1[0].s.startswith("[") and group1[0].s.endswith("]")
+    end_ctrl = group2[-1].s.startswith("[") and group2[-1].s.endswith("]")
+
+    if (start_ctrl and not end_ctrl) or (end_ctrl and not start_ctrl):
+        score -= 1000
+    if group2[0].s in openers and not group2[0].balanced:
+        score -= 100
+    if group1[-1].s in closers and not group1[-1].balanced:
+        score -= 100
+
+    # attach surrounding an openers and closers a bit later
+    if group1[0].s in openers and group2[-1] not in closers:
+        score -= 2
+
+    # reach across connectors later
+    if group1[-1].s in connectors or group2[0].s in connectors:
+        score -= 2
+
+    # reach across commas later
+    if group1[-1].s == ",":
+        score -= 10
+    if group2[0].s == ",":
+        if len(group2) > 1: # reach across
+            score -= 10
+        else:
+            score -= 1
+
+    # reach across sentence endings later
+    if group1[-1].s in [".", "?", "!"]:
+        score -= 20
+    if group2[0].s in [".", "?", "!"]:
+        if len(group2) > 1: # reach across
+            score -= 20
+        else:
+            score -= 1
+
+    score -= len(group1) + len(group2)
+    #print(group1, group2, score)
+    return score
+
+def merge_closest_groups(groups, special_tokens):
+    """ Finds the two token groups with the best merge score and merges them.
+    """
+    scores = [merge_score(groups[i], groups[i+1], special_tokens) for i in range(len(groups)-1)]
+    #print(scores)
+    ind = np.argmax(scores)
+    groups[ind] = groups[ind] + groups[ind+1]
+    #print(groups[ind][0].s in openers, groups[ind][0])
+    if groups[ind][0].s in openers and groups[ind+1][-1].s == openers[groups[ind][0].s]:
+        groups[ind][0].balanced = True
+        groups[ind+1][-1].balanced = True
+
+
+    groups.pop(ind+1)
+
+def partition_tree(decoded_tokens, special_tokens):
+    """ Build a heriarchial clustering of tokens that align with sentence structure.
+
+    Note that this is fast and heuristic right now.
+    TODO: Build this using a real constituency parser.
+    """
+    token_groups = [TokenGroup([Token(t)], i) for i, t in enumerate(decoded_tokens)]
+#     print(token_groups)
+    M = len(decoded_tokens)
+    new_index = M
+    clustm = np.zeros((M-1, 4))
+    for i in range(len(token_groups)-1):
+        scores = [merge_score(token_groups[i], token_groups[i+1], special_tokens) for i in range(len(token_groups)-1)]
+#         print(scores)
+        ind = np.argmax(scores)
+
+        lind = token_groups[ind].index
+        rind = token_groups[ind+1].index
+        clustm[new_index-M, 0] = token_groups[ind].index
+        clustm[new_index-M, 1] = token_groups[ind+1].index
+        clustm[new_index-M, 2] = -scores[ind]
+        clustm[new_index-M, 3] = (clustm[lind-M, 3] if lind >= M else 1) + (clustm[rind-M, 3] if rind >= M else 1)
+
+        token_groups[ind] = token_groups[ind] + token_groups[ind+1]
+        token_groups[ind].index = new_index
+
+        # track balancing of openers/closers
+        if token_groups[ind][0].s in openers and token_groups[ind+1][-1].s == openers[token_groups[ind][0].s]:
+            token_groups[ind][0].balanced = True
+            token_groups[ind+1][-1].balanced = True
+
+        token_groups.pop(ind+1)
+        new_index += 1
+
+    # negative means we should never split a group, so we add 10 to ensure these are very tight groups
+    # (such as parts of the same word)
+    clustm[:, 2] = clustm[:, 2] + 10
+
+    return clustm
diff --git a/lib/shap/models/__init__.py b/lib/shap/models/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..dbf0f9bc4338b70f71401283a518271ee1b589bc
--- /dev/null
+++ b/lib/shap/models/__init__.py
@@ -0,0 +1,13 @@
+from ._model import Model
+from ._teacher_forcing import TeacherForcing
+from ._text_generation import TextGeneration
+from ._topk_lm import TopKLM
+from ._transformers_pipeline import TransformersPipeline
+
+__all__ = [
+    "Model",
+    "TeacherForcing",
+    "TextGeneration",
+    "TopKLM",
+    "TransformersPipeline",
+]
diff --git a/lib/shap/models/_model.py b/lib/shap/models/_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..5de369f20b6721cc333fd505304cc02aed1352e6
--- /dev/null
+++ b/lib/shap/models/_model.py
@@ -0,0 +1,48 @@
+import numpy as np
+
+from .._serializable import Deserializer, Serializable, Serializer
+from ..utils import record_import_error, safe_isinstance
+
+try:
+    import torch  # noqa: F401
+except ImportError as e:
+    record_import_error("torch", "torch could not be imported!", e)
+
+
+class Model(Serializable):
+    """ This is the superclass of all models.
+    """
+
+    def __init__(self, model=None):
+        """ Wrap a callable model as a SHAP Model object.
+        """
+        if isinstance(model, Model):
+            self.inner_model = model.inner_model
+        else:
+            self.inner_model = model
+
+        if hasattr(model, "output_names"):
+            self.output_names = model.output_names
+
+    def __call__(self, *args):
+        out = self.inner_model(*args)
+        is_tensor = safe_isinstance(out, "torch.Tensor")
+        out = out.cpu().detach().numpy() if is_tensor else np.array(out)
+        return out
+
+    def save(self, out_file):
+        """ Save the model to the given file stream.
+        """
+        super().save(out_file)
+        with Serializer(out_file, "shap.Model", version=0) as s:
+            s.save("model", self.inner_model)
+
+    @classmethod
+    def load(cls, in_file, instantiate=True):
+        if instantiate:
+            return cls._instantiated_load(in_file)
+
+        kwargs = super().load(in_file, instantiate=False)
+        with Deserializer(in_file, "shap.Model", min_version=0, max_version=0) as s:
+            kwargs["model"] = s.load("model")
+        return kwargs
diff --git a/lib/shap/models/_teacher_forcing.py b/lib/shap/models/_teacher_forcing.py
new file mode 100644
index 0000000000000000000000000000000000000000..790ca4674fb39c0a4e09442ac9dccd878a1d1016
--- /dev/null
+++ b/lib/shap/models/_teacher_forcing.py
@@ -0,0 +1,391 @@
+import numpy as np
+import scipy.special
+
+from .. import models
+from .._serializable import Deserializer, Serializer
+from ..utils import safe_isinstance
+from ..utils.transformers import getattr_silent, parse_prefix_suffix_for_tokenizer
+from ._model import Model
+
+
+class TeacherForcing(Model):
+    """ Generates scores (log odds) for output text explanation algorithms using Teacher Forcing technique.
+
+    This class supports generation of log odds for transformer models as well as functions. In model agnostic
+    cases (model is function) it expects a similarity_model and similarity_tokenizer to approximate log odd scores
+    for target sentence generated by the model.
+    """
+
+    def __init__(self, model, tokenizer=None, similarity_model=None, similarity_tokenizer=None, batch_size=128, device=None):
+        """ Build a teacher forcing model from the given text generation model.
+
+        Parameters
+        ----------
+        model: object or function
+            A object of any pretrained transformer model or function which is to be explained.
+
+        tokenizer: object
+            A tokenizer object(PreTrainedTokenizer/PreTrainedTokenizerFast) which is used to tokenize source and target sentence.
+
+        similarity_model: object
+            A pretrained transformer model object which is used in model agnostic scenario to approximate log odds.
+
+        similarity_tokenizer: object
+            A tokenizer object(PreTrainedTokenizer/PreTrainedTokenizerFast) which is used to tokenize sentence in model agnostic scenario.
+
+        batch_size: int
+            Batch size for model inferencing and computing logodds (default=128).
+
+        device: str
+            By default, it infers if system has a gpu and accordingly sets device. Should be 'cpu' or 'cuda' or pytorch models.
+
+        Returns
+        -------
+        numpy.ndarray
+            The scores (log odds) of generating target sentence ids using the model.
+        """
+        super().__init__(model)
+
+        self.tokenizer = tokenizer
+        # set pad token if not defined
+        if self.tokenizer is not None and getattr_silent(self.tokenizer, "pad_token") is None:
+            self.tokenizer.pad_token = self.tokenizer.eos_token
+
+        # set our working device
+        self.device = device
+        if self.device is None:
+            if getattr(model, "device", None) is not None:
+                self.device = model.device
+            elif getattr(similarity_model, "device", None) is not None:
+                self.device = similarity_model.device
+
+        self.batch_size = batch_size
+        # assign text generation function
+        if safe_isinstance(model, "transformers.PreTrainedModel") or safe_isinstance(model, "transformers.TFPreTrainedModel"):
+            self.text_generate = models.TextGeneration(self.inner_model, tokenizer=self.tokenizer, device=self.device)
+            self.similarity_model = model
+            self.similarity_tokenizer = tokenizer
+            self.model_agnostic = False
+        else:
+            self.text_generate = models.TextGeneration(self.inner_model, device=self.device)
+            self.similarity_model = similarity_model
+            self.similarity_tokenizer = similarity_tokenizer
+            # set pad token for a similarity tokenizer(in a model agnostic scenario) if not defined
+            if self.similarity_tokenizer is not None and self.similarity_tokenizer.pad_token is None:
+                self.similarity_tokenizer.pad_token = self.similarity_tokenizer.eos_token
+            self.model_agnostic = True
+        # initializing target which is the target sentence/ids for every new row of explanation
+        self.output = None
+        self.output_names = None
+
+        self.similarity_model_type = None
+        if safe_isinstance(self.similarity_model, "transformers.PreTrainedModel"):
+            self.similarity_model_type = "pt"
+            if self.device is not None:# = torch.device('cuda' if torch.cuda.is_available() else 'cpu') if self.device is None else self.device
+                d = self.similarity_model.device
+                assert d == self.device or str(d) == self.device, "The passed similarity_model must be on the same device!"
+                #self.similarity_model = self.similarity_model.to(self.device)
+        elif safe_isinstance(self.similarity_model, "transformers.TFPreTrainedModel"):
+            self.similarity_model_type = "tf"
+
+    def __call__(self, X, Y):
+        """ Computes log odds scores of generating output(text) for a given batch of input(text/image) .
+
+        Parameters
+        ----------
+        X: numpy.ndarray
+            An array containing a list of masked inputs.
+
+        Y: numpy.ndarray
+            An array containing a list of target sentence/ids.
+
+        Returns
+        -------
+        numpy.ndarray
+            A numpy array of log odds scores for every input pair (masked_X, X)
+        """
+        output_batch = None
+        # caching updates output names and target sentence ids
+        self.update_output_names(Y[:1])
+        start_batch_idx, end_batch_idx = 0, len(X)
+        while start_batch_idx < end_batch_idx:
+            X_batch = X[start_batch_idx:start_batch_idx+self.batch_size]
+            Y_batch = Y[start_batch_idx:start_batch_idx+self.batch_size]
+            logits = self.get_teacher_forced_logits(X_batch, Y_batch)
+            logodds = self.get_logodds(logits)
+            if output_batch is None:
+                output_batch = logodds
+            else:
+                output_batch = np.concatenate((output_batch, logodds))
+            start_batch_idx += self.batch_size
+        return output_batch
+
+    def update_output_names(self, output):
+        """ The function updates output tokens.
+
+        It mimics the caching mechanism to update the output tokens for every
+        new row of explanation that are to be explained.
+
+        Parameters
+        ----------
+        output: numpy.ndarray
+            Output(sentence/sentence ids) for an explanation row.
+        """
+        # check if the target sentence has been updated (occurs when explaining a new row)
+        if (self.output is None) or (not np.array_equal(self.output, output)):
+            self.output = output
+            self.output_names = self.get_output_names(output)
+
+    def get_output_names(self, output):
+        """ Gets the output tokens by computing the output sentence ids and output names using the similarity_tokenizer.
+
+        Parameters
+        ----------
+        output: numpy.ndarray
+            Output(sentence/sentence ids) for an explanation row.
+
+        Returns
+        -------
+        list
+            A list of output tokens.
+        """
+        output_ids = self.get_outputs(output)
+        output_names = [self.similarity_tokenizer.decode([x]).strip() for x in output_ids[0, :]]
+        return output_names
+
+    def get_outputs(self, X):
+        """ The function tokenizes output sentences and returns ids.
+
+        Parameters
+        ----------
+        X: numpy.ndarray
+            Output(sentence/sentence ids) for an explanation row.
+
+        Returns
+        -------
+        numpy.ndarray
+            An array of output(target sentence) ids.
+        """
+        # check if output is a sentence or already parsed target ids
+        if X.dtype.type is np.str_:
+            parsed_tokenizer_dict = parse_prefix_suffix_for_tokenizer(self.similarity_tokenizer)
+            keep_prefix, keep_suffix = parsed_tokenizer_dict['keep_prefix'], parsed_tokenizer_dict['keep_suffix']
+            if keep_suffix > 0:
+                output_ids = np.array(self.similarity_tokenizer(X.tolist(), padding=True)["input_ids"])[:, keep_prefix:-keep_suffix]
+            else:
+                output_ids = np.array(self.similarity_tokenizer(X.tolist(), padding=True)["input_ids"])[:, keep_prefix:]
+        else:
+            output_ids = X
+        return output_ids
+
+    def get_inputs(self, X, padding_side='right'):
+        """ The function tokenizes source sentences.
+
+        In model agnostic case, the function calls model(X) which is expected to
+        return a batch of output sentences which is tokenized to compute inputs.
+
+        Parameters
+        ----------
+        X: numpy.ndarray
+            X could be a batch of text or images(model agnostic case).
+
+        Returns
+        -------
+        dict
+            Dictionary of padded source sentence ids and attention mask as tensors("pt" or "tf" based on similarity_model_type).
+        """
+        if self.model_agnostic:
+            # In model agnostic case, we first pass the input through the model and then tokenize output sentence
+            input_sentences = np.array(self.inner_model(X))
+        else:
+            input_sentences = np.array(X)
+        # set tokenizer padding to prepare inputs for batch inferencing
+        # padding_side="left" for only decoder models text generation eg. GPT2
+        self.similarity_tokenizer.padding_side = padding_side
+        inputs = self.similarity_tokenizer(input_sentences.tolist(), return_tensors=self.similarity_model_type, padding=True)
+        # set tokenizer padding to default
+        self.similarity_tokenizer.padding_side = 'right'
+        return inputs
+
+    def get_logodds(self, logits):
+        """ Calculates log odds from logits.
+
+        This function passes the logits through softmax and then computes log odds for the output(target sentence) ids.
+
+        Parameters
+        ----------
+        logits: numpy.ndarray
+            An array of logits generated from the model.
+
+        Returns
+        -------
+        numpy.ndarray
+            Computes log odds for corresponding output ids.
+        """
+        # set output ids for which scores are to be extracted
+        if self.output.dtype.type is np.str_:
+            output_ids = self.get_outputs(self.output)[0]
+        else:
+            output_ids = self.output[0]
+
+        def calc_logodds(arr):
+            probs = np.exp(arr) / np.exp(arr).sum(-1)
+            logodds = scipy.special.logit(probs)
+            return logodds
+
+        # pass logits through softmax, get the token corresponding score and convert back to log odds (as one vs all)
+        logodds = np.apply_along_axis(calc_logodds, -1, logits)
+        logodds_for_output_ids = logodds[:, np.array(range(logodds.shape[1])), output_ids]
+        return logodds_for_output_ids
+
+    def model_inference(self, inputs, output_ids):
+        """ This function performs model inference for tensorflow and pytorch models.
+
+        Parameters
+        ----------
+        inputs: dict
+            Dictionary of padded source sentence ids and attention mask as tensors.
+
+        output_ids: numpy.ndarray
+            An array of decoder output ids.
+
+        Returns
+        -------
+        numpy.ndarray
+            Returns output logits from the model.
+        """
+        if self.similarity_model_type == "pt":
+            import torch
+            # create torch tensors and move to device
+            if self.device is not None:
+                inputs = inputs.to(self.device)
+            output_ids = torch.tensor(output_ids, dtype=torch.int64, device=self.device)
+            self.similarity_model.eval()
+            with torch.no_grad():
+                if self.similarity_model.config.is_encoder_decoder:
+                    # model inference
+                    outputs = self.similarity_model(**inputs, decoder_input_ids=output_ids, labels=output_ids, return_dict=True)
+                else:
+                    # combine source and target sentence ids to pass into decoder eg: in case of distillgpt2
+                    inputs["input_ids"] = torch.cat((inputs["input_ids"], output_ids), dim=-1)
+                    attention_mask_for_output_ids = torch.ones(output_ids.shape, dtype=output_ids.dtype, device=self.device)
+                    inputs["attention_mask"] = torch.cat((inputs["attention_mask"], attention_mask_for_output_ids), dim=-1)
+                    # create position ids due to left padding for decoder models
+                    inputs["position_ids"] = (inputs["attention_mask"].long().cumsum(-1) - 1)
+                    inputs["position_ids"].masked_fill_(inputs["attention_mask"] == 0, 0)
+                    # model inference
+                    outputs = self.similarity_model(**inputs, return_dict=True)
+                logits = outputs.logits.detach().cpu().numpy().astype('float64')
+        elif self.similarity_model_type == "tf":
+            import tensorflow as tf
+            output_ids = tf.convert_to_tensor(output_ids, dtype=tf.int32)
+            if self.similarity_model.config.is_encoder_decoder:
+                if self.device is None:
+                    outputs = self.similarity_model(inputs, decoder_input_ids=output_ids, labels=output_ids, return_dict=True)
+                else:
+                    try:
+                        with tf.device(self.device):
+                            outputs = self.similarity_model(inputs, decoder_input_ids=output_ids, labels=output_ids, return_dict=True)
+                    except RuntimeError as err:
+                        print(err)
+            else:
+                # combine source and target sentence ids to pass into decoder eg: in case of distillgpt2
+                inputs["input_ids"] = tf.concat((inputs["input_ids"], output_ids), axis=-1)
+                attention_mask_for_output_ids = tf.ones(output_ids.shape, dtype=output_ids.dtype)
+                inputs["attention_mask"] = tf.concat((inputs["attention_mask"], attention_mask_for_output_ids), axis=-1)
+                inputs["position_ids"] = tf.math.cumsum(inputs["attention_mask"], axis=-1) - 1
+                inputs["position_ids"] = tf.where(inputs["attention_mask"] == 0, 0, inputs["position_ids"])
+                if self.device is None:
+                    outputs = self.similarity_model(inputs, return_dict=True)
+                else:
+                    try:
+                        with tf.device(self.device):
+                            outputs = self.similarity_model(inputs, return_dict=True)
+                    except RuntimeError as err:
+                        print(err)
+            logits = outputs.logits.numpy().astype('float64')
+        return logits
+
+    def get_teacher_forced_logits(self, X, Y):
+        """ The function generates logits for transformer models.
+
+        It generates logits for encoder-decoder models as well as decoder only models by using the teacher forcing technique.
+
+        Parameters
+        ----------
+        X: numpy.ndarray
+            An array containing a list of masked inputs.
+
+        Y: numpy.ndarray
+            An array containing a list of target sentence/ids.
+
+        Returns
+        -------
+        numpy.ndarray
+            Decoder output logits for output(target sentence) ids.
+        """
+        # check if type of model architecture assigned in model config
+        if (hasattr(self.similarity_model.config, "is_encoder_decoder") and not self.similarity_model.config.is_encoder_decoder) \
+            and (hasattr(self.similarity_model.config, "is_decoder") and not self.similarity_model.config.is_decoder):
+            pass #self.similarity_model.config.is_decoder = True # TODOmaybe: is this okay?
+            # raise ValueError(
+            #     "Please assign either of is_encoder_decoder or is_decoder to True in model config for extracting target sentence ids"
+            # )
+        # get output ids for teacher forcing
+        output_ids = self.get_outputs(Y)
+        if self.similarity_model.config.is_encoder_decoder:
+            # encode batched inputs by padding on the right side
+            inputs = self.get_inputs(X, padding_side='right')
+            # assigning decoder start token id as it is needed for encoder decoder model generation
+            decoder_start_token_id = None
+            if hasattr(self.similarity_model.config, "decoder_start_token_id") and \
+                    self.similarity_model.config.decoder_start_token_id is not None:
+                decoder_start_token_id = self.similarity_model.config.decoder_start_token_id
+            elif hasattr(self.similarity_model.config, "bos_token_id") and self.similarity_model.config.bos_token_id is not None:
+                decoder_start_token_id = self.similarity_model.config.bos_token_id
+            elif (hasattr(self.similarity_model.config, "decoder") and hasattr(self.similarity_model.config.decoder, "bos_token_id") and \
+                    self.similarity_model.config.decoder.bos_token_id is not None):
+                decoder_start_token_id = self.similarity_model.config.decoder.bos_token_id
+            else:
+                raise ValueError(
+                    "No decoder_start_token_id or bos_token_id defined in config for encoder-decoder generation"
+                )
+            # concat decoder start token id to target sentence ids
+            output_start_id = np.ones((output_ids.shape[0], 1)) * decoder_start_token_id
+            output_ids = np.concatenate((output_start_id, output_ids), axis=-1)
+            # generate outputs and logits
+            logits = self.model_inference(inputs, output_ids)
+            logits = logits[:, :-1, :]
+        else:
+            # encode batched inputs by padding on the left side
+            inputs = self.get_inputs(X, padding_side='left')
+            # generate outputs and logits
+            logits = self.model_inference(inputs, output_ids)
+            # extract only logits corresponding to target sentence ids
+            logits = logits[:, -output_ids.shape[1]-1:-1, :]
+        return logits
+
+    def save(self, out_file):
+        super().save(out_file)
+
+        # Increment the version number when the encoding changes!
+        with Serializer(out_file, "shap.models.TeacherForcing", version=0) as s:
+            s.save("tokenizer", self.tokenizer)
+            s.save("similarity_model", self.similarity_model)
+            s.save("similarity_tokenizer", self.similarity_tokenizer)
+            s.save("batch_size", self.batch_size)
+            s.save("device", self.device)
+
+    @classmethod
+    def load(cls, in_file, instantiate=True):
+        if instantiate:
+            return cls._instantiated_load(in_file)
+
+        kwargs = super().load(in_file, instantiate=False)
+        with Deserializer(in_file, "shap.models.TeacherForcing", min_version=0, max_version=0) as s:
+            kwargs["tokenizer"] = s.load("tokenizer")
+            kwargs["similarity_model"] = s.load("similarity_model")
+            kwargs["similarity_tokenizer"] = s.load("similarity_tokenizer")
+            kwargs["batch_size"] = s.load("batch_size")
+            kwargs["device"] = s.load("device")
+        return kwargs
diff --git a/lib/shap/models/_text_generation.py b/lib/shap/models/_text_generation.py
new file mode 100644
index 0000000000000000000000000000000000000000..5da41f1a089d219e7441fb346f424fc31f9c20ff
--- /dev/null
+++ b/lib/shap/models/_text_generation.py
@@ -0,0 +1,231 @@
+import numpy as np
+
+from .._serializable import Deserializer, Serializer
+from ..utils import safe_isinstance
+from ._model import Model
+
+
+class TextGeneration(Model):
+    """ Generates target sentence/ids using a base model.
+
+    It generates target sentence/ids for a model (a pretrained transformer model or a function).
+    """
+
+    def __init__(self, model=None, tokenizer=None, target_sentences=None, device=None):
+        """ Create a text generator model from a pretrained transformer model or a function.
+
+        For a pretrained transformer model, a tokenizer should be passed.
+
+        Parameters
+        ----------
+        model: object or function
+            A object of any pretrained transformer model or function for which target sentence/ids are to be generated.
+
+        tokenizer: object
+            A tokenizer object(PreTrainedTokenizer/PreTrainedTokenizerFast) which is used to tokenize sentence.
+
+        target_sentences: list
+            A target sentence for every explanation row.
+
+        device: str
+            By default, it infers if system has a gpu and accordingly sets device. Should be 'cpu' or 'cuda' or pytorch models.
+
+        Returns
+        -------
+        numpy.ndarray
+            Array of target sentence/ids.
+        """
+        super().__init__(model)
+
+        self.explanation_row = 0
+        if target_sentences is not None:
+            self.inner_model = lambda _: np.array([target_sentences[self.explanation_row]])
+
+        self.tokenizer = tokenizer
+        self.device = device
+        if self.device is None:
+            self.device = getattr(self.inner_model, "device", None)
+        if safe_isinstance(model, "transformers.PreTrainedModel"):
+            self.model_agnostic = False
+            self.model_type = "pt"
+        elif safe_isinstance(model, "transformers.TFPreTrainedModel"):
+            self.model_agnostic = False
+            self.model_type = "tf"
+        else:
+            self.model_agnostic = True
+            self.model_type = None
+        # X is input used to generate target sentence used for caching
+        self.X = None
+        # target sentence/ids generated from the model using X
+        self.target_X = None
+
+    def __call__(self, X):
+        """ Generates target sentence/ids from X.
+
+        Parameters
+        ----------
+        X: str or numpy.ndarray
+            Input in the form of text or image.
+
+        Returns
+        -------
+        numpy.ndarray
+            Array of target sentence/ids.
+        """
+        if (self.X is None) or (isinstance(self.X, np.ndarray) and not np.array_equal(self.X, X)) or \
+                (isinstance(self.X, str) and (self.X != X)):
+            self.X = X
+            # wrap text input in a numpy array
+            if isinstance(X, str):
+                X = np.array([X])
+            # generate target sentence ids in model agnostic scenario
+            if self.model_agnostic:
+                self.target_X = self.inner_model(X)
+            else:
+                self.target_X = self.model_generate(X)
+            # update explanation row count
+            self.explanation_row += 1
+        return np.array(self.target_X)
+
+    def get_inputs(self, X, padding_side='right'):
+        """ The function tokenizes source sentences.
+
+        In model agnostic case, the function calls model(X) which is expected to
+        return a batch of output sentences which is tokenized to compute inputs.
+
+        Parameters
+        ----------
+        X: numpy.ndarray
+            X is a batch of sentences.
+
+        Returns
+        -------
+        dict
+            Dictionary of padded source sentence ids and attention mask as tensors("pt" or "tf" based on model_type).
+        """
+        # set tokenizer padding to prepare inputs for batch inferencing
+        # padding_side="left" for only decoder models text generation eg. GPT2
+        self.tokenizer.padding_side = padding_side
+        inputs = self.tokenizer(X.tolist(), return_tensors=self.model_type, padding=True)
+        # set tokenizer padding to default
+        self.tokenizer.padding_side = 'right'
+        return inputs
+
+    def model_generate(self, X):
+        """ This function performs text generation for tensorflow and pytorch models.
+
+        Parameters
+        ----------
+        X: dict
+            Dictionary of padded source sentence ids and attention mask as tensors.
+
+        Returns
+        -------
+        numpy.ndarray
+            Returns target sentence ids.
+        """
+        if (hasattr(self.inner_model.config, "is_encoder_decoder") and not self.inner_model.config.is_encoder_decoder) \
+                and (hasattr(self.inner_model.config, "is_decoder") and not self.inner_model.config.is_decoder):
+            pass
+            # TODOmaybe: Is this okay? I am just assuming we want is_decoder when neither are set
+            #self.inner_model.config.is_decoder = True
+            # raise ValueError(
+            #     "Please assign either of is_encoder_decoder or is_decoder to True in model config for extracting target sentence ids"
+            # )
+        # check if user assigned any text generation specific kwargs
+        text_generation_params = {}
+        if self.inner_model.config.__dict__.get("task_specific_params") is not None and \
+                self.inner_model.config.task_specific_params.get("text-generation") is not None:
+            text_generation_params = self.inner_model.config.task_specific_params["text-generation"]
+            if not isinstance(text_generation_params, dict):
+                raise ValueError(
+                    "Please assign text generation params as a dictionary under task_specific_params with key 'text-generation' "
+                )
+            # remove keys that are overridden by params on the model itself
+            # (this is to mimic how precedence works for transformers pipelines)
+            for k in list(text_generation_params.keys()):
+                if hasattr(self.inner_model.config, k):
+                    del text_generation_params[k]
+        if self.model_type == "pt":
+            # create torch tensors and move to device
+            # TODOmaybe: SML: why move the model from where it was? the could mess with the user env (i.e. it breaks pipelines)
+            # device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') if self.device is None else self.device
+            # self.inner_model = self.inner_model.to(device)
+            # self.inner_model.eval()
+            import torch
+            with torch.no_grad():
+                if self.inner_model.config.is_encoder_decoder:
+                    inputs = self.get_inputs(X)
+                else:
+                    inputs = self.get_inputs(X, padding_side="left")
+                if self.device is not None:
+                    inputs = inputs.to(self.device)
+                outputs = self.inner_model.generate(**inputs, **text_generation_params).detach().cpu().numpy()
+        elif self.model_type == "tf":
+            if self.inner_model.config.is_encoder_decoder:
+                inputs = self.get_inputs(X)
+            else:
+                inputs = self.get_inputs(X, padding_side="left")
+            if self.device is None:
+                outputs = self.inner_model.generate(inputs, **text_generation_params).numpy()
+            else:
+                try:
+                    import tensorflow as tf
+                    with tf.device(self.device):
+                        outputs = self.inner_model.generate(inputs, **text_generation_params).numpy()
+                except RuntimeError as err:
+                    print(err)
+        if getattr(self.inner_model.config, "is_decoder", True):
+            # slice the output ids after the input ids
+            outputs = outputs[:, inputs["input_ids"].shape[1]:]
+        # parse output ids to find special tokens in prefix and suffix
+        parsed_tokenizer_dict = self.parse_prefix_suffix_for_model_generate_output(outputs[0, :].tolist())
+        keep_prefix, keep_suffix = parsed_tokenizer_dict['keep_prefix'], parsed_tokenizer_dict['keep_suffix']
+        # extract target sentence ids by slicing off prefix and suffix
+        if keep_suffix > 0:
+            target_X = outputs[:, keep_prefix:-keep_suffix]
+        else:
+            target_X = outputs[:, keep_prefix:]
+        return target_X
+
+    def parse_prefix_suffix_for_model_generate_output(self, output):
+        """ Calculates if special tokens are present in the beginning/end of the model generated output.
+
+        Parameters
+        ----------
+        output: list
+            A list of output token ids.
+
+        Returns
+        -------
+        dict
+            Dictionary of prefix and suffix lengths concerning special tokens in output ids.
+        """
+        keep_prefix, keep_suffix = 0, 0
+        if self.tokenizer.convert_ids_to_tokens(output[0]) in self.tokenizer.special_tokens_map.values():
+            keep_prefix = 1
+        if len(output) > 1 and self.tokenizer.convert_ids_to_tokens(output[-1]) in self.tokenizer.special_tokens_map.values():
+            keep_suffix = 1
+        return {
+            'keep_prefix' : keep_prefix,
+            'keep_suffix' : keep_suffix
+        }
+
+    def save(self, out_file):
+        super().save(out_file)
+
+        # Increment the version number when the encoding changes!
+        with Serializer(out_file, "shap.models.TextGeneration", version=0) as s:
+            s.save("tokenizer", self.tokenizer)
+            s.save("device", self.device)
+
+    @classmethod
+    def load(cls, in_file, instantiate=True):
+        if instantiate:
+            return cls._instantiated_load(in_file)
+
+        kwargs = super().load(in_file, instantiate=False)
+        with Deserializer(in_file, "shap.models.TextGeneration", min_version=0, max_version=0) as s:
+            kwargs["tokenizer"] = s.load("tokenizer")
+            kwargs["device"] = s.load("device")
+        return kwargs
diff --git a/lib/shap/models/_topk_lm.py b/lib/shap/models/_topk_lm.py
new file mode 100644
index 0000000000000000000000000000000000000000..12281bf6b508c6c4ba9eaaf5239519f9ec26a98f
--- /dev/null
+++ b/lib/shap/models/_topk_lm.py
@@ -0,0 +1,257 @@
+import numpy as np
+import scipy.special
+
+from .._serializable import Deserializer, Serializer
+from ..utils import safe_isinstance
+from ..utils.transformers import MODELS_FOR_CAUSAL_LM, getattr_silent
+from ._model import Model
+
+
+class TopKLM(Model):
+    """ Generates scores (log odds) for the top-k tokens for Causal/Masked LM.
+    """
+
+    def __init__(self, model, tokenizer, k=10, generate_topk_token_ids=None, batch_size=128, device=None):
+        """ Take Causal/Masked LM model and tokenizer and build a log odds output model for the top-k tokens.
+
+        Parameters
+        ----------
+        model: object or function
+            A object of any pretrained transformer model which is to be explained.
+
+        tokenizer: object
+            A tokenizer object(PreTrainedTokenizer/PreTrainedTokenizerFast).
+
+        generation_function_for_topk_token_ids: function
+            A function which is used to generate top-k token ids. Log odds will be generated for these custom token ids.
+
+        batch_size: int
+            Batch size for model inferencing and computing logodds (default=128).
+
+        device: str
+            By default, it infers if system has a gpu and accordingly sets device. Should be 'cpu' or 'cuda' or pytorch models.
+
+        Returns
+        -------
+        numpy.ndarray
+            The scores (log odds) of generating top-k token ids using the model.
+        """
+        super().__init__(model)
+
+        self.tokenizer = tokenizer
+        # set pad token if not defined
+        if getattr_silent(self.tokenizer, "pad_token") is None:
+            self.tokenizer.pad_token = self.tokenizer.eos_token
+        self.k = k
+        self._custom_generate_topk_token_ids = generate_topk_token_ids
+        self.batch_size = batch_size
+        self.device = device
+
+        self.X = None
+        self.topk_token_ids = None
+        self.output_names = None
+
+        self.model_type = None
+        if safe_isinstance(self.inner_model, "transformers.PreTrainedModel"):
+            self.model_type = "pt"
+            import torch
+            self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') if self.device is None else self.device
+            self.inner_model = self.inner_model.to(self.device)
+        elif safe_isinstance(self.inner_model, "transformers.TFPreTrainedModel"):
+            self.model_type = "tf"
+
+
+    def __call__(self, masked_X, X):
+        """ Computes log odds scores for a given batch of masked inputs for the top-k tokens for Causal/Masked LM.
+
+        Parameters
+        ----------
+        masked_X: numpy.ndarray
+            An array containing a list of masked inputs.
+
+        X: numpy.ndarray
+            An array containing a list of original inputs
+
+        Returns
+        -------
+        numpy.ndarray
+            A numpy array of log odds scores for top-k tokens for every input pair (masked_X, X)
+        """
+        output_batch = None
+        self.update_cache_X(X[:1])
+        start_batch_idx, end_batch_idx = 0, len(masked_X)
+        while start_batch_idx < end_batch_idx:
+            logits = self.get_lm_logits(masked_X[start_batch_idx:start_batch_idx+self.batch_size])
+            logodds = self.get_logodds(logits)
+            if output_batch is None:
+                output_batch = logodds
+            else:
+                output_batch = np.concatenate((output_batch, logodds))
+            start_batch_idx += self.batch_size
+        return output_batch
+
+    def update_cache_X(self, X):
+        """ The function updates original input(X) and top-k token ids for the Causal/Masked LM.
+
+        It mimics the caching mechanism to update the original input and topk token ids
+        that are to be explained and which updates for every new row of explanation.
+
+        Parameters
+        ----------
+        X: np.ndarray
+            Input(Text) for an explanation row.
+        """
+        # check if the source sentence has been updated (occurs when explaining a new row)
+        if (self.X is None) or (not np.array_equal(self.X, X)):
+            self.X = X
+            self.output_names = self.get_output_names_and_update_topk_token_ids(self.X)
+
+    def get_output_names_and_update_topk_token_ids(self, X):
+        """ Gets the token names for top-k token ids for Causal/Masked LM.
+
+        Parameters
+        ----------
+        X: np.ndarray
+            Input(Text) for an explanation row.
+
+        Returns
+        -------
+        list
+            A list of output tokens.
+        """
+
+        # see if the user gave a custom token generator
+        if self._custom_generate_topk_token_ids is not None:
+            return self._custom_generate_topk_token_ids(X)
+
+        # otherwise we pick the top k tokens from the model
+        self.topk_token_ids = self.generate_topk_token_ids(X)
+        output_names = [self.tokenizer.decode([x]) for x in self.topk_token_ids]
+        return output_names
+
+    def get_logodds(self, logits):
+        """ Calculates log odds from logits.
+
+        This function passes the logits through softmax and then computes log odds for the top-k token ids.
+
+        Parameters
+        ----------
+        logits: numpy.ndarray
+            An array of logits generated from the model.
+
+        Returns
+        -------
+        numpy.ndarray
+            Computes log odds for corresponding top-k token ids.
+        """
+        # pass logits through softmax, get the token corresponding score and convert back to log odds (as one vs all)
+        def calc_logodds(arr):
+            probs = np.exp(arr) / np.exp(arr).sum(-1)
+            logodds = scipy.special.logit(probs)
+            return logodds
+
+        # pass logits through softmax, get the token corresponding score and convert back to log odds (as one vs all)
+        logodds = np.apply_along_axis(calc_logodds, -1, logits)
+        logodds_for_topk_token_ids = np.take(logodds, self.topk_token_ids, axis=-1)
+        return logodds_for_topk_token_ids
+
+    def get_inputs(self, X, padding_side='right'):
+        """ The function tokenizes source sentence.
+
+        Parameters
+        ----------
+        X: numpy.ndarray
+            X is a batch of text.
+
+        Returns
+        -------
+        dict
+            Dictionary of padded source sentence ids and attention mask as tensors("pt" or "tf" based on similarity_model_type).
+        """
+        self.tokenizer.padding_side = padding_side
+        inputs = self.tokenizer(X.tolist(), return_tensors=self.model_type, padding=True)
+        # set tokenizer padding to default
+        self.tokenizer.padding_side = 'right'
+        return inputs
+
+    def generate_topk_token_ids(self, X):
+        """ Generates top-k token ids for Causal/Masked LM.
+
+        Parameters
+        ----------
+        X: numpy.ndarray
+            X is the original input sentence for an explanation row.
+
+        Returns
+        -------
+        np.ndarray
+            An array of top-k token ids.
+        """
+        logits = self.get_lm_logits(X)
+        topk_tokens_ids = (-logits).argsort()[0, :self.k]
+        return topk_tokens_ids
+
+    def get_lm_logits(self, X):
+        """ Evaluates a Causal/Masked LM model and returns logits corresponding to next word/masked word.
+
+        Parameters
+        ----------
+        X: numpy.ndarray
+            An array containing a list of masked inputs.
+
+        Returns
+        -------
+        numpy.ndarray
+            Logits corresponding to next word/masked word.
+        """
+        if safe_isinstance(self.inner_model, MODELS_FOR_CAUSAL_LM):
+            inputs = self.get_inputs(X, padding_side="left")
+            if self.model_type == "pt":
+                import torch
+                inputs["position_ids"] = (inputs["attention_mask"].long().cumsum(-1) - 1)
+                inputs["position_ids"].masked_fill_(inputs["attention_mask"] == 0, 0)
+                inputs = inputs.to(self.device)
+                # generate outputs and logits
+                with torch.no_grad():
+                    outputs = self.inner_model(**inputs, return_dict=True)
+                # extract only logits corresponding to target sentence ids
+                logits = outputs.logits.detach().cpu().numpy().astype('float64')[:, -1, :]
+            elif self.model_type == "tf":
+                import tensorflow as tf
+                inputs["position_ids"] = tf.math.cumsum(inputs["attention_mask"], axis=-1) - 1
+                inputs["position_ids"] = tf.where(inputs["attention_mask"] == 0, 0, inputs["position_ids"])
+                if self.device is None:
+                    outputs = self.inner_model(inputs, return_dict=True)
+                else:
+                    try:
+                        with tf.device(self.device):
+                            outputs = self.inner_model(inputs, return_dict=True)
+                    except RuntimeError as err:
+                        print(err)
+                logits = outputs.logits.numpy().astype('float64')[:, -1, :]
+        return logits
+
+    def save(self, out_file):
+        super().save(out_file)
+
+        # Increment the version number when the encoding changes!
+        with Serializer(out_file, "shap.models.TextGeneration", version=0) as s:
+            s.save("tokenizer", self.tokenizer)
+            s.save("k", self.k)
+            s.save("generate_topk_token_ids", self._custom_generate_topk_token_ids)
+            s.save("batch_size", self.batch_size)
+            s.save("device", self.device)
+
+    @classmethod
+    def load(cls, in_file, instantiate=True):
+        if instantiate:
+            return cls._instantiated_load(in_file)
+
+        kwargs = super().load(in_file, instantiate=False)
+        with Deserializer(in_file, "shap.models.TextGeneration", min_version=0, max_version=0) as s:
+            kwargs["tokenizer"] = s.load("tokenizer")
+            kwargs["k"] = s.load("k")
+            kwargs["generate_topk_token_ids"] = s.load("generate_topk_token_ids")
+            kwargs["batch_size"] = s.load("batch_size")
+            kwargs["device"] = s.load("device")
+        return kwargs
diff --git a/lib/shap/models/_transformers_pipeline.py b/lib/shap/models/_transformers_pipeline.py
new file mode 100644
index 0000000000000000000000000000000000000000..5b0c85979a1172e7363179bfb161281cde067090
--- /dev/null
+++ b/lib/shap/models/_transformers_pipeline.py
@@ -0,0 +1,37 @@
+import numpy as np
+import scipy.special
+
+from ._model import Model
+
+
+class TransformersPipeline(Model):
+    """ This wraps a transformers pipeline object for easy explanations.
+
+    By default transformers pipeline object output lists of dictionaries, not standard
+    tensors as expected by SHAP. This class wraps pipelines to make them output nice
+    tensor formats.
+    """
+
+    def __init__(self, pipeline, rescale_to_logits=False):
+        """ Build a new model by wrapping the given pipeline object.
+        """
+        super().__init__(pipeline) # the pipeline becomes our inner_model
+        self.rescale_to_logits = rescale_to_logits
+
+        #self.tokenizer = self.inner_model.model.tokenizer
+        self.label2id = self.inner_model.model.config.label2id
+        self.id2label = self.inner_model.model.config.id2label
+        self.output_shape = (max(self.label2id.values())+1,)
+        if len(self.output_shape) == 1:
+            self.output_names = [self.id2label.get(i, "Unknown") for i in range(self.output_shape[0])]
+
+    def __call__(self, strings):
+        assert not isinstance(strings, str), "shap.models.TransformersPipeline expects a list of strings not a single string!"
+        output = np.zeros([len(strings)] + list(self.output_shape))
+        pipeline_dicts = self.inner_model(list(strings))
+        for i, val in enumerate(pipeline_dicts):
+            if not isinstance(val, list):
+                val = [val]
+            for obj in val:
+                output[i, self.label2id[obj["label"]]] = scipy.special.logit(obj["score"]) if self.rescale_to_logits else obj["score"]
+        return output
diff --git a/lib/shap/plots/__init__.py b/lib/shap/plots/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..967ee61e5659abe25fe58562fb49b29375d43744
--- /dev/null
+++ b/lib/shap/plots/__init__.py
@@ -0,0 +1,40 @@
+try:
+    import matplotlib  # noqa: F401
+except ImportError:
+    raise ImportError("matplotlib is not installed so plotting is not available! Run `pip install matplotlib` to fix this.")
+
+from ._bar import bar
+from ._beeswarm import beeswarm
+from ._benchmark import benchmark
+from ._decision import decision
+from ._embedding import embedding
+from ._force import force, initjs
+from ._group_difference import group_difference
+from ._heatmap import heatmap
+from ._image import image, image_to_text
+from ._monitoring import monitoring
+from ._partial_dependence import partial_dependence
+from ._scatter import scatter
+from ._text import text
+from ._violin import violin
+from ._waterfall import waterfall
+
+__all__ = [
+    "bar",
+    "beeswarm",
+    "benchmark",
+    "decision",
+    "embedding",
+    "force",
+    "initjs",
+    "group_difference",
+    "heatmap",
+    "image",
+    "image_to_text",
+    "monitoring",
+    "partial_dependence",
+    "scatter",
+    "text",
+    "violin",
+    "waterfall",
+]
diff --git a/lib/shap/plots/_bar.py b/lib/shap/plots/_bar.py
new file mode 100644
index 0000000000000000000000000000000000000000..044af6e6adbe297f4c167442c0bb25f7931d88c3
--- /dev/null
+++ b/lib/shap/plots/_bar.py
@@ -0,0 +1,401 @@
+import matplotlib.pyplot as pl
+import numpy as np
+import pandas as pd
+import scipy
+
+from .. import Cohorts, Explanation
+from ..utils import format_value, ordinal_str
+from ..utils._exceptions import DimensionError
+from . import colors
+from ._labels import labels
+from ._utils import (
+    convert_ordering,
+    dendrogram_coords,
+    get_sort_order,
+    merge_nodes,
+    sort_inds,
+)
+
+
+# TODO: improve the bar chart to look better like the waterfall plot with numbers inside the bars when they fit
+# TODO: Have the Explanation object track enough data so that we can tell (and so show) how many instances are in each cohort
+def bar(shap_values, max_display=10, order=Explanation.abs, clustering=None, clustering_cutoff=0.5,
+        merge_cohorts=False, show_data="auto", show=True):
+    """Create a bar plot of a set of SHAP values.
+
+    If a single sample is passed, then we plot the SHAP values as a bar chart. If an
+    :class:`.Explanation` with many samples is passed, then we plot the mean absolute
+    value for each feature column as a bar chart.
+
+
+    Parameters
+    ----------
+    shap_values : shap.Explanation or shap.Cohorts or dictionary of shap.Explanation objects
+        A single row of a SHAP :class:`.Explanation` object (i.e. ``shap_values[0]``) or
+        a multi-row Explanation object that we want to summarize.
+
+    max_display : int
+        How many top features to include in the bar plot (default is 10).
+
+    show : bool
+        Whether ``matplotlib.pyplot.show()`` is called before returning.
+        Setting this to ``False`` allows the plot
+        to be customized further after it has been created.
+
+    Examples
+    --------
+
+    See `bar plot examples <https://shap.readthedocs.io/en/latest/example_notebooks/api_examples/plots/bar.html>`_.
+
+    """
+
+    # assert str(type(shap_values)).endswith("Explanation'>"), "The shap_values parameter must be a shap.Explanation object!"
+
+    # convert Explanation objects to dictionaries
+    if isinstance(shap_values, Explanation):
+        cohorts = {"": shap_values}
+    elif isinstance(shap_values, Cohorts):
+        cohorts = shap_values.cohorts
+    elif isinstance(shap_values, dict):
+        cohorts = shap_values
+    else:
+        emsg = (
+            "The shap_values argument must be an Explanation object, Cohorts "
+            "object, or dictionary of Explanation objects!"
+        )
+        raise TypeError(emsg)
+
+    # unpack our list of Explanation objects we need to plot
+    cohort_labels = list(cohorts.keys())
+    cohort_exps = list(cohorts.values())
+    for i, exp in enumerate(cohort_exps):
+        if not isinstance(exp, Explanation):
+            emsg = (
+                "The shap_values argument must be an Explanation object, Cohorts "
+                "object, or dictionary of Explanation objects!"
+            )
+            raise TypeError(emsg)
+
+        if len(exp.shape) == 2:
+            # collapse the Explanation arrays to be of shape (#features,)
+            cohort_exps[i] = exp.abs.mean(0)
+        if cohort_exps[i].shape != cohort_exps[0].shape:
+            emsg = (
+                "When passing several Explanation objects, they must all have "
+                "the same number of feature columns!"
+            )
+            raise DimensionError(emsg)
+        # TODO: check other attributes for equality? like feature names perhaps? probably clustering as well.
+
+    # unpack the Explanation object
+    features = cohort_exps[0].display_data if cohort_exps[0].display_data is not None else cohort_exps[0].data
+    feature_names = cohort_exps[0].feature_names
+    if clustering is None:
+        partition_tree = getattr(cohort_exps[0], "clustering", None)
+    elif clustering is False:
+        partition_tree = None
+    else:
+        partition_tree = clustering
+    if partition_tree is not None:
+        assert partition_tree.shape[1] == 4, "The clustering provided by the Explanation object does not seem to be a partition tree (which is all shap.plots.bar supports)!"
+    op_history = cohort_exps[0].op_history
+    values = np.array([cohort_exps[i].values for i in range(len(cohort_exps))])
+
+    if len(values[0]) == 0:
+        raise Exception("The passed Explanation is empty! (so there is nothing to plot)")
+
+    # we show the data on auto only when there are no transforms
+    if show_data == "auto":
+        show_data = len(op_history) == 0
+
+    # TODO: Rather than just show the "1st token", "2nd token", etc. it would be better to show the "Instance 0's 1st but", etc
+    if issubclass(type(feature_names), str):
+        feature_names = [ordinal_str(i)+" "+feature_names for i in range(len(values[0]))]
+
+    # build our auto xlabel based on the transform history of the Explanation object
+    xlabel = "SHAP value"
+    for op in op_history:
+        if op["name"] == "abs":
+            xlabel = "|"+xlabel+"|"
+        elif op["name"] == "__getitem__":
+            pass # no need for slicing to effect our label, it will be used later to find the sizes of cohorts
+        else:
+            xlabel = str(op["name"])+"("+xlabel+")"
+
+    # find how many instances are in each cohort (if they were created from an Explanation object)
+    cohort_sizes = []
+    for exp in cohort_exps:
+        for op in exp.op_history:
+            if op.get("collapsed_instances", False): # see if this if the first op to collapse the instances
+                cohort_sizes.append(op["prev_shape"][0])
+                break
+
+
+    # unwrap any pandas series
+    if isinstance(features, pd.Series):
+        if feature_names is None:
+            feature_names = list(features.index)
+        features = features.values
+
+    # ensure we at least have default feature names
+    if feature_names is None:
+        feature_names = np.array([labels['FEATURE'] % str(i) for i in range(len(values[0]))])
+
+    # determine how many top features we will plot
+    if max_display is None:
+        max_display = len(feature_names)
+    num_features = min(max_display, len(values[0]))
+    max_display = min(max_display, num_features)
+
+    # iteratively merge nodes until we can cut off the smallest feature values to stay within
+    # num_features without breaking a cluster tree
+    orig_inds = [[i] for i in range(len(values[0]))]
+    orig_values = values.copy()
+    while True:
+        feature_order = np.argsort(np.mean([np.argsort(convert_ordering(order, Explanation(values[i]))) for i in range(values.shape[0])], 0))
+        if partition_tree is not None:
+
+            # compute the leaf order if we were to show (and so have the ordering respect) the whole partition tree
+            clust_order = sort_inds(partition_tree, np.abs(values).mean(0))
+
+            # now relax the requirement to match the partition tree ordering for connections above clustering_cutoff
+            dist = scipy.spatial.distance.squareform(scipy.cluster.hierarchy.cophenet(partition_tree))
+            feature_order = get_sort_order(dist, clust_order, clustering_cutoff, feature_order)
+
+            # if the last feature we can display is connected in a tree the next feature then we can't just cut
+            # off the feature ordering, so we need to merge some tree nodes and then try again.
+            if max_display < len(feature_order) and dist[feature_order[max_display-1],feature_order[max_display-2]] <= clustering_cutoff:
+                #values, partition_tree, orig_inds = merge_nodes(values, partition_tree, orig_inds)
+                partition_tree, ind1, ind2 = merge_nodes(np.abs(values).mean(0), partition_tree)
+                for i in range(len(values)):
+                    values[:,ind1] += values[:,ind2]
+                    values = np.delete(values, ind2, 1)
+                    orig_inds[ind1] += orig_inds[ind2]
+                    del orig_inds[ind2]
+            else:
+                break
+        else:
+            break
+
+    # here we build our feature names, accounting for the fact that some features might be merged together
+    feature_inds = feature_order[:max_display]
+    y_pos = np.arange(len(feature_inds), 0, -1)
+    feature_names_new = []
+    for pos,inds in enumerate(orig_inds):
+        if len(inds) == 1:
+            feature_names_new.append(feature_names[inds[0]])
+        else:
+            full_print = " + ".join([feature_names[i] for i in inds])
+            if len(full_print) <= 40:
+                feature_names_new.append(full_print)
+            else:
+                max_ind = np.argmax(np.abs(orig_values).mean(0)[inds])
+                feature_names_new.append(feature_names[inds[max_ind]] + " + %d other features" % (len(inds)-1))
+    feature_names = feature_names_new
+
+    # see how many individual (vs. grouped at the end) features we are plotting
+    if num_features < len(values[0]):
+        num_cut = np.sum([len(orig_inds[feature_order[i]]) for i in range(num_features-1, len(values[0]))])
+        values[:,feature_order[num_features-1]] = np.sum([values[:,feature_order[i]] for i in range(num_features-1, len(values[0]))], 0)
+
+    # build our y-tick labels
+    yticklabels = []
+    for i in feature_inds:
+        if features is not None and show_data:
+            yticklabels.append(format_value(features[i], "%0.03f") + " = " + feature_names[i])
+        else:
+            yticklabels.append(feature_names[i])
+    if num_features < len(values[0]):
+        yticklabels[-1] = "Sum of %d other features" % num_cut
+
+    # compute our figure size based on how many features we are showing
+    row_height = 0.5
+    pl.gcf().set_size_inches(8, num_features * row_height * np.sqrt(len(values)) + 1.5)
+
+    # if negative values are present then we draw a vertical line to mark 0, otherwise the axis does this for us...
+    negative_values_present = np.sum(values[:,feature_order[:num_features]] < 0) > 0
+    if negative_values_present:
+        pl.axvline(0, 0, 1, color="#000000", linestyle="-", linewidth=1, zorder=1)
+
+    # draw the bars
+    patterns = (None, '\\\\', '++', 'xx', '////', '*', 'o', 'O', '.', '-')
+    total_width = 0.7
+    bar_width = total_width / len(values)
+    for i in range(len(values)):
+        ypos_offset = - ((i - len(values) / 2) * bar_width + bar_width / 2)
+        pl.barh(
+            y_pos + ypos_offset, values[i,feature_inds],
+            bar_width, align='center',
+            color=[colors.blue_rgb if values[i,feature_inds[j]] <= 0 else colors.red_rgb for j in range(len(y_pos))],
+            hatch=patterns[i], edgecolor=(1,1,1,0.8), label=f"{cohort_labels[i]} [{cohort_sizes[i] if i < len(cohort_sizes) else None}]"
+        )
+
+    # draw the yticks (the 1e-8 is so matplotlib 3.3 doesn't try and collapse the ticks)
+    pl.yticks(list(y_pos) + list(y_pos + 1e-8), yticklabels + [t.split('=')[-1] for t in yticklabels], fontsize=13)
+
+    xlen = pl.xlim()[1] - pl.xlim()[0]
+    fig = pl.gcf()
+    ax = pl.gca()
+    #xticks = ax.get_xticks()
+    bbox = ax.get_window_extent().transformed(fig.dpi_scale_trans.inverted())
+    width = bbox.width
+    bbox_to_xscale = xlen/width
+
+    for i in range(len(values)):
+        ypos_offset = - ((i - len(values) / 2) * bar_width + bar_width / 2)
+        for j in range(len(y_pos)):
+            ind = feature_order[j]
+            if values[i,ind] < 0:
+                pl.text(
+                    values[i,ind] - (5/72)*bbox_to_xscale, y_pos[j] + ypos_offset, format_value(values[i,ind], '%+0.02f'),
+                    horizontalalignment='right', verticalalignment='center', color=colors.blue_rgb,
+                    fontsize=12
+                )
+            else:
+                pl.text(
+                    values[i,ind] + (5/72)*bbox_to_xscale, y_pos[j] + ypos_offset, format_value(values[i,ind], '%+0.02f'),
+                    horizontalalignment='left', verticalalignment='center', color=colors.red_rgb,
+                    fontsize=12
+                )
+
+    # put horizontal lines for each feature row
+    for i in range(num_features):
+        pl.axhline(i+1, color="#888888", lw=0.5, dashes=(1, 5), zorder=-1)
+
+    if features is not None:
+        features = list(features)
+
+        # try and round off any trailing zeros after the decimal point in the feature values
+        for i in range(len(features)):
+            try:
+                if round(features[i]) == features[i]:
+                    features[i] = int(features[i])
+            except Exception:
+                pass # features[i] must not be a number
+
+    pl.gca().xaxis.set_ticks_position('bottom')
+    pl.gca().yaxis.set_ticks_position('none')
+    pl.gca().spines['right'].set_visible(False)
+    pl.gca().spines['top'].set_visible(False)
+    if negative_values_present:
+        pl.gca().spines['left'].set_visible(False)
+    pl.gca().tick_params('x', labelsize=11)
+
+    xmin,xmax = pl.gca().get_xlim()
+    ymin,ymax = pl.gca().get_ylim()
+
+    if negative_values_present:
+        pl.gca().set_xlim(xmin - (xmax-xmin)*0.05, xmax + (xmax-xmin)*0.05)
+    else:
+        pl.gca().set_xlim(xmin, xmax + (xmax-xmin)*0.05)
+
+    # if features is None:
+    #     pl.xlabel(labels["GLOBAL_VALUE"], fontsize=13)
+    # else:
+    pl.xlabel(xlabel, fontsize=13)
+
+    if len(values) > 1:
+        pl.legend(fontsize=12)
+
+    # color the y tick labels that have the feature values as gray
+    # (these fall behind the black ones with just the feature name)
+    tick_labels = pl.gca().yaxis.get_majorticklabels()
+    for i in range(num_features):
+        tick_labels[i].set_color("#999999")
+
+    # draw a dendrogram if we are given a partition tree
+    if partition_tree is not None:
+
+        # compute the dendrogram line positions based on our current feature order
+        feature_pos = np.argsort(feature_order)
+        ylines,xlines = dendrogram_coords(feature_pos, partition_tree)
+
+        # plot the distance cut line above which we don't show tree edges
+        xmin,xmax = pl.xlim()
+        xlines_min,xlines_max = np.min(xlines),np.max(xlines)
+        ct_line_pos = (clustering_cutoff / (xlines_max - xlines_min)) * 0.1 * (xmax - xmin) + xmax
+        pl.text(
+            ct_line_pos + 0.005 * (xmax - xmin), (ymax - ymin)/2, "Clustering cutoff = " + format_value(clustering_cutoff, '%0.02f'),
+            horizontalalignment='left', verticalalignment='center', color="#999999",
+            fontsize=12, rotation=-90
+        )
+        line = pl.axvline(ct_line_pos, color="#dddddd", dashes=(1, 1))
+        line.set_clip_on(False)
+
+        for (xline, yline) in zip(xlines, ylines):
+
+            # normalize the x values to fall between 0 and 1
+            xv = (np.array(xline) / (xlines_max - xlines_min))
+
+            # only draw if we are not going past distance threshold
+            if np.array(xline).max() <= clustering_cutoff:
+
+                # only draw if we are not going past the bottom of the plot
+                if yline.max() < max_display:
+                    lines = pl.plot(
+                        xv * 0.1 * (xmax - xmin) + xmax,
+                        max_display - np.array(yline),
+                        color="#999999"
+                    )
+                    for line in lines:
+                        line.set_clip_on(False)
+
+    if show:
+        pl.show()
+
+
+def bar_legacy(shap_values, features=None, feature_names=None, max_display=None, show=True):
+
+    # unwrap pandas series
+    if isinstance(features, pd.Series):
+        if feature_names is None:
+            feature_names = list(features.index)
+        features = features.values
+
+    if feature_names is None:
+        feature_names = np.array([labels['FEATURE'] % str(i) for i in range(len(shap_values))])
+
+    if max_display is None:
+        max_display = 7
+    else:
+        max_display = min(len(feature_names), max_display)
+
+
+    feature_order = np.argsort(-np.abs(shap_values))
+
+    #
+    feature_inds = feature_order[:max_display]
+    y_pos = np.arange(len(feature_inds), 0, -1)
+    pl.barh(
+        y_pos, shap_values[feature_inds],
+        0.7, align='center',
+        color=[colors.red_rgb if shap_values[feature_inds[i]] > 0 else colors.blue_rgb for i in range(len(y_pos))]
+    )
+    pl.yticks(y_pos, fontsize=13)
+    if features is not None:
+        features = list(features)
+
+        # try and round off any trailing zeros after the decimal point in the feature values
+        for i in range(len(features)):
+            try:
+                if round(features[i]) == features[i]:
+                    features[i] = int(features[i])
+            except TypeError:
+                pass # features[i] must not be a number
+    yticklabels = []
+    for i in feature_inds:
+        if features is not None:
+            yticklabels.append(feature_names[i] + " = " + str(features[i]))
+        else:
+            yticklabels.append(feature_names[i])
+    pl.gca().set_yticklabels(yticklabels)
+    pl.gca().xaxis.set_ticks_position('bottom')
+    pl.gca().yaxis.set_ticks_position('none')
+    pl.gca().spines['right'].set_visible(False)
+    pl.gca().spines['top'].set_visible(False)
+    #pl.gca().spines['left'].set_visible(False)
+
+    pl.xlabel("SHAP value (impact on model output)")
+
+    if show:
+        pl.show()
diff --git a/lib/shap/plots/_beeswarm.py b/lib/shap/plots/_beeswarm.py
new file mode 100644
index 0000000000000000000000000000000000000000..5b6dc4197c206d763039cfe34470654163eca7e3
--- /dev/null
+++ b/lib/shap/plots/_beeswarm.py
@@ -0,0 +1,965 @@
+""" Summary plots of SHAP values across a whole dataset.
+"""
+
+import warnings
+
+import matplotlib.pyplot as pl
+import numpy as np
+import pandas as pd
+import scipy.cluster
+import scipy.sparse
+import scipy.spatial
+from scipy.stats import gaussian_kde
+
+from .. import Explanation
+from ..utils import safe_isinstance
+from ..utils._exceptions import DimensionError
+from . import colors
+from ._labels import labels
+from ._utils import (
+    convert_color,
+    convert_ordering,
+    get_sort_order,
+    merge_nodes,
+    sort_inds,
+)
+
+
+# TODO: Add support for hclustering based explanations where we sort the leaf order by magnitude and then show the dendrogram to the left
+def beeswarm(shap_values, max_display=10, order=Explanation.abs.mean(0),
+             clustering=None, cluster_threshold=0.5, color=None,
+             axis_color="#333333", alpha=1, show=True, log_scale=False,
+             color_bar=True, plot_size="auto", color_bar_label=labels["FEATURE_VALUE"]):
+    """Create a SHAP beeswarm plot, colored by feature values when they are provided.
+
+    Parameters
+    ----------
+    shap_values : Explanation
+        This is an :class:`.Explanation` object containing a matrix of SHAP values
+        (# samples x # features).
+
+    max_display : int
+        How many top features to include in the plot (default is 10, or 7 for
+        interaction plots).
+
+    show : bool
+        Whether ``matplotlib.pyplot.show()`` is called before returning.
+        Setting this to ``False`` allows the plot to be customized further
+        after it has been created, returning the current axis via plt.gca().
+
+    color_bar : bool
+        Whether to draw the color bar (legend).
+
+    plot_size : "auto" (default), float, (float, float), or None
+        What size to make the plot. By default, the size is auto-scaled based on the
+        number of features that are being displayed. Passing a single float will cause
+        each row to be that many inches high. Passing a pair of floats will scale the
+        plot by that number of inches. If ``None`` is passed, then the size of the
+        current figure will be left unchanged.
+
+    Examples
+    --------
+
+    See `beeswarm plot examples <https://shap.readthedocs.io/en/latest/example_notebooks/api_examples/plots/beeswarm.html>`_.
+
+    """
+
+    if not isinstance(shap_values, Explanation):
+        emsg = (
+            "The beeswarm plot requires an `Explanation` object as the "
+            "`shap_values` argument."
+        )
+        raise TypeError(emsg)
+
+    sv_shape = shap_values.shape
+    if len(sv_shape) == 1:
+        emsg = (
+            "The beeswarm plot does not support plotting a single instance, please pass "
+            "an explanation matrix with many instances!"
+        )
+        raise ValueError(emsg)
+    elif len(sv_shape) > 2:
+        emsg = (
+            "The beeswarm plot does not support plotting explanations with instances that have more "
+            "than one dimension!"
+        )
+        raise ValueError(emsg)
+
+    shap_exp = shap_values
+    # we make a copy here, because later there are places that might modify this array
+    values = np.copy(shap_exp.values)
+    features = shap_exp.data
+    if scipy.sparse.issparse(features):
+        features = features.toarray()
+    feature_names = shap_exp.feature_names
+    # if out_names is None: # TODO: waiting for slicer support
+    #     out_names = shap_exp.output_names
+
+    order = convert_ordering(order, values)
+
+    # # deprecation warnings
+    # if auto_size_plot is not None:
+    #     warnings.warn("auto_size_plot=False is deprecated and is now ignored! Use plot_size=None instead.")
+
+    # multi_class = False
+    # if isinstance(values, list):
+    #     multi_class = True
+    #     if plot_type is None:
+    #         plot_type = "bar" # default for multi-output explanations
+    #     assert plot_type == "bar", "Only plot_type = 'bar' is supported for multi-output explanations!"
+    # else:
+    #     if plot_type is None:
+    #         plot_type = "dot" # default for single output explanations
+    #     assert len(values.shape) != 1, "Summary plots need a matrix of values, not a vector."
+
+    # default color:
+    if color is None:
+        if features is not None:
+            color = colors.red_blue
+        else:
+            color = colors.blue_rgb
+    color = convert_color(color)
+
+    idx2cat = None
+    # convert from a DataFrame or other types
+    if isinstance(features, pd.DataFrame):
+        if feature_names is None:
+            feature_names = features.columns
+        # feature index to category flag
+        idx2cat = features.dtypes.astype(str).isin(["object", "category"]).tolist()
+        features = features.values
+    elif isinstance(features, list):
+        if feature_names is None:
+            feature_names = features
+        features = None
+    elif (features is not None) and len(features.shape) == 1 and feature_names is None:
+        feature_names = features
+        features = None
+
+    num_features = values.shape[1]
+
+    if features is not None:
+        shape_msg = (
+            "The shape of the shap_values matrix does not match the shape "
+            "of the provided data matrix."
+        )
+        if num_features - 1 == features.shape[1]:
+            shape_msg += (
+                " Perhaps the extra column in the shap_values matrix is the "
+                "constant offset? If so, just pass shap_values[:,:-1]."
+            )
+            raise DimensionError(shape_msg)
+        if num_features != features.shape[1]:
+            raise DimensionError(shape_msg)
+
+    if feature_names is None:
+        feature_names = np.array([labels['FEATURE'] % str(i) for i in range(num_features)])
+
+    if log_scale:
+        pl.xscale('symlog')
+
+    if clustering is None:
+        partition_tree = getattr(shap_values, "clustering", None)
+        if partition_tree is not None and partition_tree.var(0).sum() == 0:
+            partition_tree = partition_tree[0]
+        else:
+            partition_tree = None
+    elif clustering is False:
+        partition_tree = None
+    else:
+        partition_tree = clustering
+
+    if partition_tree is not None:
+        if partition_tree.shape[1] != 4:
+            emsg = (
+                "The clustering provided by the Explanation object does not seem to "
+                "be a partition tree (which is all shap.plots.bar supports)!"
+            )
+            raise ValueError(emsg)
+
+    # FIXME: introduce beeswarm interaction values as a separate function `beeswarm_interaction()` (?)
+    #   In the meantime, users can use the `shap.summary_plot()` function.
+    #
+    # # plotting SHAP interaction values
+    # if len(values.shape) == 3:
+    #
+    #     if plot_type == "compact_dot":
+    #         new_values = values.reshape(values.shape[0], -1)
+    #         new_features = np.tile(features, (1, 1, features.shape[1])).reshape(features.shape[0], -1)
+    #
+    #         new_feature_names = []
+    #         for c1 in feature_names:
+    #             for c2 in feature_names:
+    #                 if c1 == c2:
+    #                     new_feature_names.append(c1)
+    #                 else:
+    #                     new_feature_names.append(c1 + "* - " + c2)
+    #
+    #         return beeswarm(
+    #             new_values, new_features, new_feature_names,
+    #             max_display=max_display, plot_type="dot", color=color, axis_color=axis_color,
+    #             title=title, alpha=alpha, show=show, sort=sort,
+    #             color_bar=color_bar, plot_size=plot_size, class_names=class_names,
+    #             color_bar_label="*" + color_bar_label
+    #         )
+    #
+    #     if max_display is None:
+    #         max_display = 7
+    #     else:
+    #         max_display = min(len(feature_names), max_display)
+    #
+    #     interaction_sort_inds = order#np.argsort(-np.abs(values.sum(1)).sum(0))
+    #
+    #     # get plotting limits
+    #     delta = 1.0 / (values.shape[1] ** 2)
+    #     slow = np.nanpercentile(values, delta)
+    #     shigh = np.nanpercentile(values, 100 - delta)
+    #     v = max(abs(slow), abs(shigh))
+    #     slow = -v
+    #     shigh = v
+    #
+    #     pl.figure(figsize=(1.5 * max_display + 1, 0.8 * max_display + 1))
+    #     pl.subplot(1, max_display, 1)
+    #     proj_values = values[:, interaction_sort_inds[0], interaction_sort_inds]
+    #     proj_values[:, 1:] *= 2  # because off diag effects are split in half
+    #     beeswarm(
+    #         proj_values, features[:, interaction_sort_inds] if features is not None else None,
+    #         feature_names=feature_names[interaction_sort_inds],
+    #         sort=False, show=False, color_bar=False,
+    #         plot_size=None,
+    #         max_display=max_display
+    #     )
+    #     pl.xlim((slow, shigh))
+    #     pl.xlabel("")
+    #     title_length_limit = 11
+    #     pl.title(shorten_text(feature_names[interaction_sort_inds[0]], title_length_limit))
+    #     for i in range(1, min(len(interaction_sort_inds), max_display)):
+    #         ind = interaction_sort_inds[i]
+    #         pl.subplot(1, max_display, i + 1)
+    #         proj_values = values[:, ind, interaction_sort_inds]
+    #         proj_values *= 2
+    #         proj_values[:, i] /= 2  # because only off diag effects are split in half
+    #         summary(
+    #             proj_values, features[:, interaction_sort_inds] if features is not None else None,
+    #             sort=False,
+    #             feature_names=["" for i in range(len(feature_names))],
+    #             show=False,
+    #             color_bar=False,
+    #             plot_size=None,
+    #             max_display=max_display
+    #         )
+    #         pl.xlim((slow, shigh))
+    #         pl.xlabel("")
+    #         if i == min(len(interaction_sort_inds), max_display) // 2:
+    #             pl.xlabel(labels['INTERACTION_VALUE'])
+    #         pl.title(shorten_text(feature_names[ind], title_length_limit))
+    #     pl.tight_layout(pad=0, w_pad=0, h_pad=0.0)
+    #     pl.subplots_adjust(hspace=0, wspace=0.1)
+    #     if show:
+    #         pl.show()
+    #     return
+
+    # determine how many top features we will plot
+    if max_display is None:
+        max_display = len(feature_names)
+    num_features = min(max_display, len(feature_names))
+
+    # iteratively merge nodes until we can cut off the smallest feature values to stay within
+    # num_features without breaking a cluster tree
+    orig_inds = [[i] for i in range(len(feature_names))]
+    orig_values = values.copy()
+    while True:
+        feature_order = convert_ordering(order, Explanation(np.abs(values)))
+        if partition_tree is not None:
+
+            # compute the leaf order if we were to show (and so have the ordering respect) the whole partition tree
+            clust_order = sort_inds(partition_tree, np.abs(values))
+
+            # now relax the requirement to match the partition tree ordering for connections above cluster_threshold
+            dist = scipy.spatial.distance.squareform(scipy.cluster.hierarchy.cophenet(partition_tree))
+            feature_order = get_sort_order(dist, clust_order, cluster_threshold, feature_order)
+
+            # if the last feature we can display is connected in a tree the next feature then we can't just cut
+            # off the feature ordering, so we need to merge some tree nodes and then try again.
+            if max_display < len(feature_order) and dist[feature_order[max_display-1],feature_order[max_display-2]] <= cluster_threshold:
+                #values, partition_tree, orig_inds = merge_nodes(values, partition_tree, orig_inds)
+                partition_tree, ind1, ind2 = merge_nodes(np.abs(values), partition_tree)
+                for i in range(len(values)):
+                    values[:,ind1] += values[:,ind2]
+                    values = np.delete(values, ind2, 1)
+                    orig_inds[ind1] += orig_inds[ind2]
+                    del orig_inds[ind2]
+            else:
+                break
+        else:
+            break
+
+    # here we build our feature names, accounting for the fact that some features might be merged together
+    feature_inds = feature_order[:max_display]
+    feature_names_new = []
+    for pos,inds in enumerate(orig_inds):
+        if len(inds) == 1:
+            feature_names_new.append(feature_names[inds[0]])
+        elif len(inds) <= 2:
+            feature_names_new.append(" + ".join([feature_names[i] for i in inds]))
+        else:
+            max_ind = np.argmax(np.abs(orig_values).mean(0)[inds])
+            feature_names_new.append(feature_names[inds[max_ind]] + " + %d other features" % (len(inds)-1))
+    feature_names = feature_names_new
+
+    # see how many individual (vs. grouped at the end) features we are plotting
+    if num_features < len(values[0]):
+        num_cut = np.sum([len(orig_inds[feature_order[i]]) for i in range(num_features-1, len(values[0]))])
+        values[:,feature_order[num_features-1]] = np.sum([values[:,feature_order[i]] for i in range(num_features-1, len(values[0]))], 0)
+
+    # build our y-tick labels
+    yticklabels = [feature_names[i] for i in feature_inds]
+    if num_features < len(values[0]):
+        yticklabels[-1] = "Sum of %d other features" % num_cut
+
+    row_height = 0.4
+    if plot_size == "auto":
+        pl.gcf().set_size_inches(8, min(len(feature_order), max_display) * row_height + 1.5)
+    elif type(plot_size) in (list, tuple):
+        pl.gcf().set_size_inches(plot_size[0], plot_size[1])
+    elif plot_size is not None:
+        pl.gcf().set_size_inches(8, min(len(feature_order), max_display) * plot_size + 1.5)
+    pl.axvline(x=0, color="#999999", zorder=-1)
+
+    # make the beeswarm dots
+    for pos, i in enumerate(reversed(feature_inds)):
+        pl.axhline(y=pos, color="#cccccc", lw=0.5, dashes=(1, 5), zorder=-1)
+        shaps = values[:, i]
+        fvalues = None if features is None else features[:, i]
+        inds = np.arange(len(shaps))
+        np.random.shuffle(inds)
+        if fvalues is not None:
+            fvalues = fvalues[inds]
+        shaps = shaps[inds]
+        colored_feature = True
+        try:
+            if idx2cat is not None and idx2cat[i]: # check categorical feature
+                colored_feature = False
+            else:
+                fvalues = np.array(fvalues, dtype=np.float64)  # make sure this can be numeric
+        except Exception:
+            colored_feature = False
+        N = len(shaps)
+        # hspacing = (np.max(shaps) - np.min(shaps)) / 200
+        # curr_bin = []
+        nbins = 100
+        quant = np.round(nbins * (shaps - np.min(shaps)) / (np.max(shaps) - np.min(shaps) + 1e-8))
+        inds = np.argsort(quant + np.random.randn(N) * 1e-6)
+        layer = 0
+        last_bin = -1
+        ys = np.zeros(N)
+        for ind in inds:
+            if quant[ind] != last_bin:
+                layer = 0
+            ys[ind] = np.ceil(layer / 2) * ((layer % 2) * 2 - 1)
+            layer += 1
+            last_bin = quant[ind]
+        ys *= 0.9 * (row_height / np.max(ys + 1))
+
+        if safe_isinstance(color, "matplotlib.colors.Colormap") and features is not None and colored_feature:
+            # trim the color range, but prevent the color range from collapsing
+            vmin = np.nanpercentile(fvalues, 5)
+            vmax = np.nanpercentile(fvalues, 95)
+            if vmin == vmax:
+                vmin = np.nanpercentile(fvalues, 1)
+                vmax = np.nanpercentile(fvalues, 99)
+                if vmin == vmax:
+                    vmin = np.min(fvalues)
+                    vmax = np.max(fvalues)
+            if vmin > vmax: # fixes rare numerical precision issues
+                vmin = vmax
+
+            if features.shape[0] != len(shaps):
+                emsg = "Feature and SHAP matrices must have the same number of rows!"
+                raise DimensionError(emsg)
+
+            # plot the nan fvalues in the interaction feature as grey
+            nan_mask = np.isnan(fvalues)
+            pl.scatter(shaps[nan_mask], pos + ys[nan_mask], color="#777777",
+                        s=16, alpha=alpha, linewidth=0,
+                        zorder=3, rasterized=len(shaps) > 500)
+
+            # plot the non-nan fvalues colored by the trimmed feature value
+            cvals = fvalues[np.invert(nan_mask)].astype(np.float64)
+            cvals_imp = cvals.copy()
+            cvals_imp[np.isnan(cvals)] = (vmin + vmax) / 2.0
+            cvals[cvals_imp > vmax] = vmax
+            cvals[cvals_imp < vmin] = vmin
+            pl.scatter(shaps[np.invert(nan_mask)], pos + ys[np.invert(nan_mask)],
+                        cmap=color, vmin=vmin, vmax=vmax, s=16,
+                        c=cvals, alpha=alpha, linewidth=0,
+                        zorder=3, rasterized=len(shaps) > 500)
+        else:
+
+            pl.scatter(shaps, pos + ys, s=16, alpha=alpha, linewidth=0, zorder=3,
+                        color=color if colored_feature else "#777777", rasterized=len(shaps) > 500)
+
+
+    # draw the color bar
+    if safe_isinstance(color, "matplotlib.colors.Colormap") and color_bar and features is not None:
+        import matplotlib.cm as cm
+        m = cm.ScalarMappable(cmap=color)
+        m.set_array([0, 1])
+        cb = pl.colorbar(m, ax=pl.gca(), ticks=[0, 1], aspect=80)
+        cb.set_ticklabels([labels['FEATURE_VALUE_LOW'], labels['FEATURE_VALUE_HIGH']])
+        cb.set_label(color_bar_label, size=12, labelpad=0)
+        cb.ax.tick_params(labelsize=11, length=0)
+        cb.set_alpha(1)
+        cb.outline.set_visible(False)
+#         bbox = cb.ax.get_window_extent().transformed(pl.gcf().dpi_scale_trans.inverted())
+#         cb.ax.set_aspect((bbox.height - 0.9) * 20)
+        # cb.draw_all()
+
+    pl.gca().xaxis.set_ticks_position('bottom')
+    pl.gca().yaxis.set_ticks_position('none')
+    pl.gca().spines['right'].set_visible(False)
+    pl.gca().spines['top'].set_visible(False)
+    pl.gca().spines['left'].set_visible(False)
+    pl.gca().tick_params(color=axis_color, labelcolor=axis_color)
+    pl.yticks(range(len(feature_inds)), reversed(yticklabels), fontsize=13)
+    pl.gca().tick_params('y', length=20, width=0.5, which='major')
+    pl.gca().tick_params('x', labelsize=11)
+    pl.ylim(-1, len(feature_inds))
+    pl.xlabel(labels['VALUE'], fontsize=13)
+    if show:
+        # pl.show()
+        return pl
+    else:
+        return pl.gca()
+
+def shorten_text(text, length_limit):
+    if len(text) > length_limit:
+        return text[:length_limit - 3] + "..."
+    else:
+        return text
+
+
+
+def is_color_map(color):
+    safe_isinstance(color, "matplotlib.colors.Colormap")
+
+
+# TODO: remove unused title argument / use title argument
+# TODO: Add support for hclustering based explanations where we sort the leaf order by magnitude and then show the dendrogram to the left
+def summary_legacy(shap_values, features=None, feature_names=None, max_display=None, plot_type=None,
+                 color=None, axis_color="#333333", title=None, alpha=1, show=True, sort=True,
+                 color_bar=True, plot_size="auto", layered_violin_max_num_bins=20, class_names=None,
+                 class_inds=None,
+                 color_bar_label=labels["FEATURE_VALUE"],
+                 cmap=colors.red_blue,
+                 show_values_in_legend=False,
+                 # depreciated
+                 auto_size_plot=None,
+                 use_log_scale=False):
+    """Create a SHAP beeswarm plot, colored by feature values when they are provided.
+
+    Parameters
+    ----------
+    shap_values : numpy.array
+        For single output explanations this is a matrix of SHAP values (# samples x # features).
+        For multi-output explanations this is a list of such matrices of SHAP values.
+
+    features : numpy.array or pandas.DataFrame or list
+        Matrix of feature values (# samples x # features) or a feature_names list as shorthand
+
+    feature_names : list
+        Names of the features (length # features)
+
+    max_display : int
+        How many top features to include in the plot (default is 20, or 7 for interaction plots)
+
+    plot_type : "dot" (default for single output), "bar" (default for multi-output), "violin",
+        or "compact_dot".
+        What type of summary plot to produce. Note that "compact_dot" is only used for
+        SHAP interaction values.
+
+    plot_size : "auto" (default), float, (float, float), or None
+        What size to make the plot. By default the size is auto-scaled based on the number of
+        features that are being displayed. Passing a single float will cause each row to be that
+        many inches high. Passing a pair of floats will scale the plot by that
+        number of inches. If None is passed then the size of the current figure will be left
+        unchanged.
+
+    show_values_in_legend: bool
+        Flag to print the mean of the SHAP values in the multi-output bar plot. Set to False
+        by default.
+    """
+
+    # support passing an explanation object
+    if str(type(shap_values)).endswith("Explanation'>"):
+        shap_exp = shap_values
+        shap_values = shap_exp.values
+        if features is None:
+            features = shap_exp.data
+        if feature_names is None:
+            feature_names = shap_exp.feature_names
+        # if out_names is None: # TODO: waiting for slicer support of this
+        #     out_names = shap_exp.output_names
+
+    # deprecation warnings
+    if auto_size_plot is not None:
+        warnings.warn("auto_size_plot=False is deprecated and is now ignored! Use plot_size=None instead.")
+
+    multi_class = False
+    if isinstance(shap_values, list):
+        multi_class = True
+        if plot_type is None:
+            plot_type = "bar" # default for multi-output explanations
+        assert plot_type == "bar", "Only plot_type = 'bar' is supported for multi-output explanations!"
+    else:
+        if plot_type is None:
+            plot_type = "dot" # default for single output explanations
+        assert len(shap_values.shape) != 1, "Summary plots need a matrix of shap_values, not a vector."
+
+    # default color:
+    if color is None:
+        if plot_type == 'layered_violin':
+            color = "coolwarm"
+        elif multi_class:
+            def color(i):
+                return colors.red_blue_circle(i / len(shap_values))
+        else:
+            color = colors.blue_rgb
+
+    idx2cat = None
+    # convert from a DataFrame or other types
+    if isinstance(features, pd.DataFrame):
+        if feature_names is None:
+            feature_names = features.columns
+        # feature index to category flag
+        idx2cat = features.dtypes.astype(str).isin(["object", "category"]).tolist()
+        features = features.values
+    elif isinstance(features, list):
+        if feature_names is None:
+            feature_names = features
+        features = None
+    elif (features is not None) and len(features.shape) == 1 and feature_names is None:
+        feature_names = features
+        features = None
+
+    num_features = (shap_values[0].shape[1] if multi_class else shap_values.shape[1])
+
+    if features is not None:
+        shape_msg = "The shape of the shap_values matrix does not match the shape of the " \
+                    "provided data matrix."
+        if num_features - 1 == features.shape[1]:
+            assert False, shape_msg + " Perhaps the extra column in the shap_values matrix is the " \
+                          "constant offset? Of so just pass shap_values[:,:-1]."
+        else:
+            assert num_features == features.shape[1], shape_msg
+
+    if feature_names is None:
+        feature_names = np.array([labels['FEATURE'] % str(i) for i in range(num_features)])
+
+    if use_log_scale:
+        pl.xscale('symlog')
+
+    # plotting SHAP interaction values
+    if not multi_class and len(shap_values.shape) == 3:
+
+        if plot_type == "compact_dot":
+            new_shap_values = shap_values.reshape(shap_values.shape[0], -1)
+            new_features = np.tile(features, (1, 1, features.shape[1])).reshape(features.shape[0], -1)
+
+            new_feature_names = []
+            for c1 in feature_names:
+                for c2 in feature_names:
+                    if c1 == c2:
+                        new_feature_names.append(c1)
+                    else:
+                        new_feature_names.append(c1 + "* - " + c2)
+
+            return summary_legacy(
+                new_shap_values, new_features, new_feature_names,
+                max_display=max_display, plot_type="dot", color=color, axis_color=axis_color,
+                title=title, alpha=alpha, show=show, sort=sort,
+                color_bar=color_bar, plot_size=plot_size, class_names=class_names,
+                color_bar_label="*" + color_bar_label
+            )
+
+        if max_display is None:
+            max_display = 7
+        else:
+            max_display = min(len(feature_names), max_display)
+
+        sort_inds = np.argsort(-np.abs(shap_values.sum(1)).sum(0))
+
+        # get plotting limits
+        delta = 1.0 / (shap_values.shape[1] ** 2)
+        slow = np.nanpercentile(shap_values, delta)
+        shigh = np.nanpercentile(shap_values, 100 - delta)
+        v = max(abs(slow), abs(shigh))
+        slow = -v
+        shigh = v
+
+        pl.figure(figsize=(1.5 * max_display + 1, 0.8 * max_display + 1), dpi=300)
+        pl.subplot(1, max_display, 1)
+        proj_shap_values = shap_values[:, sort_inds[0], sort_inds]
+        proj_shap_values[:, 1:] *= 2  # because off diag effects are split in half
+        summary_legacy(
+            proj_shap_values, features[:, sort_inds] if features is not None else None,
+            feature_names=feature_names[sort_inds],
+            sort=False, show=False, color_bar=False,
+            plot_size=None,
+            max_display=max_display
+        )
+        pl.xlim((slow, shigh))
+        pl.xlabel("")
+        title_length_limit = 11
+        pl.title(shorten_text(feature_names[sort_inds[0]], title_length_limit))
+        for i in range(1, min(len(sort_inds), max_display)):
+            ind = sort_inds[i]
+            pl.subplot(1, max_display, i + 1)
+            proj_shap_values = shap_values[:, ind, sort_inds]
+            proj_shap_values *= 2
+            proj_shap_values[:, i] /= 2  # because only off diag effects are split in half
+            summary_legacy(
+                proj_shap_values, features[:, sort_inds] if features is not None else None,
+                sort=False,
+                feature_names=["" for i in range(len(feature_names))],
+                show=False,
+                color_bar=False,
+                plot_size=None,
+                max_display=max_display
+            )
+            pl.xlim((slow, shigh))
+            pl.xlabel("")
+            if i == min(len(sort_inds), max_display) // 2:
+                pl.xlabel(labels['INTERACTION_VALUE'])
+            pl.title(shorten_text(feature_names[ind], title_length_limit))
+        pl.tight_layout(pad=0, w_pad=0, h_pad=0.0)
+        pl.subplots_adjust(hspace=0, wspace=0.1)
+        if show:
+            # pl.show()
+            return pl
+        return
+
+    if max_display is None:
+        max_display = 20
+
+    if sort:
+        # order features by the sum of their effect magnitudes
+        if multi_class:
+            feature_order = np.argsort(np.sum(np.mean(np.abs(shap_values), axis=1), axis=0))
+        else:
+            feature_order = np.argsort(np.sum(np.abs(shap_values), axis=0))
+        feature_order = feature_order[-min(max_display, len(feature_order)):]
+    else:
+        feature_order = np.flip(np.arange(min(max_display, num_features)), 0)
+
+    row_height = 0.4
+    if plot_size == "auto":
+        pl.gcf().set_size_inches(8, len(feature_order) * row_height + 1.5)
+    elif type(plot_size) in (list, tuple):
+        pl.gcf().set_size_inches(plot_size[0], plot_size[1])
+    elif plot_size is not None:
+        pl.gcf().set_size_inches(8, len(feature_order) * plot_size + 1.5)
+    pl.axvline(x=0, color="#999999", zorder=-1)
+
+    if plot_type == "dot":
+        for pos, i in enumerate(feature_order):
+            pl.axhline(y=pos, color="#cccccc", lw=0.5, dashes=(1, 5), zorder=-1)
+            shaps = shap_values[:, i]
+            values = None if features is None else features[:, i]
+            inds = np.arange(len(shaps))
+            np.random.shuffle(inds)
+            if values is not None:
+                values = values[inds]
+            shaps = shaps[inds]
+            colored_feature = True
+            try:
+                if idx2cat is not None and idx2cat[i]: # check categorical feature
+                    colored_feature = False
+                else:
+                    values = np.array(values, dtype=np.float64)  # make sure this can be numeric
+            except Exception:
+                colored_feature = False
+            N = len(shaps)
+            # hspacing = (np.max(shaps) - np.min(shaps)) / 200
+            # curr_bin = []
+            nbins = 100
+            quant = np.round(nbins * (shaps - np.min(shaps)) / (np.max(shaps) - np.min(shaps) + 1e-8))
+            inds = np.argsort(quant + np.random.randn(N) * 1e-6)
+            layer = 0
+            last_bin = -1
+            ys = np.zeros(N)
+            for ind in inds:
+                if quant[ind] != last_bin:
+                    layer = 0
+                ys[ind] = np.ceil(layer / 2) * ((layer % 2) * 2 - 1)
+                layer += 1
+                last_bin = quant[ind]
+            ys *= 0.9 * (row_height / np.max(ys + 1))
+
+            if features is not None and colored_feature:
+                # trim the color range, but prevent the color range from collapsing
+                vmin = np.nanpercentile(values, 5)
+                vmax = np.nanpercentile(values, 95)
+                if vmin == vmax:
+                    vmin = np.nanpercentile(values, 1)
+                    vmax = np.nanpercentile(values, 99)
+                    if vmin == vmax:
+                        vmin = np.min(values)
+                        vmax = np.max(values)
+                if vmin > vmax: # fixes rare numerical precision issues
+                    vmin = vmax
+
+                assert features.shape[0] == len(shaps), "Feature and SHAP matrices must have the same number of rows!"
+
+                # plot the nan values in the interaction feature as grey
+                nan_mask = np.isnan(values)
+                pl.scatter(shaps[nan_mask], pos + ys[nan_mask], color="#777777",
+                           s=16, alpha=alpha, linewidth=0,
+                           zorder=3, rasterized=len(shaps) > 500)
+
+                # plot the non-nan values colored by the trimmed feature value
+                cvals = values[np.invert(nan_mask)].astype(np.float64)
+                cvals_imp = cvals.copy()
+                cvals_imp[np.isnan(cvals)] = (vmin + vmax) / 2.0
+                cvals[cvals_imp > vmax] = vmax
+                cvals[cvals_imp < vmin] = vmin
+                pl.scatter(shaps[np.invert(nan_mask)], pos + ys[np.invert(nan_mask)],
+                           cmap=cmap, vmin=vmin, vmax=vmax, s=16,
+                           c=cvals, alpha=alpha, linewidth=0,
+                           zorder=3, rasterized=len(shaps) > 500)
+            else:
+
+                pl.scatter(shaps, pos + ys, s=16, alpha=alpha, linewidth=0, zorder=3,
+                           color=color if colored_feature else "#777777", rasterized=len(shaps) > 500)
+
+    elif plot_type == "violin":
+        for pos, i in enumerate(feature_order):
+            pl.axhline(y=pos, color="#cccccc", lw=0.5, dashes=(1, 5), zorder=-1)
+
+        if features is not None:
+            global_low = np.nanpercentile(shap_values[:, :len(feature_names)].flatten(), 1)
+            global_high = np.nanpercentile(shap_values[:, :len(feature_names)].flatten(), 99)
+            for pos, i in enumerate(feature_order):
+                shaps = shap_values[:, i]
+                shap_min, shap_max = np.min(shaps), np.max(shaps)
+                rng = shap_max - shap_min
+                xs = np.linspace(np.min(shaps) - rng * 0.2, np.max(shaps) + rng * 0.2, 100)
+                if np.std(shaps) < (global_high - global_low) / 100:
+                    ds = gaussian_kde(shaps + np.random.randn(len(shaps)) * (global_high - global_low) / 100)(xs)
+                else:
+                    ds = gaussian_kde(shaps)(xs)
+                ds /= np.max(ds) * 3
+
+                values = features[:, i]
+                # window_size = max(10, len(values) // 20)
+                smooth_values = np.zeros(len(xs) - 1)
+                sort_inds = np.argsort(shaps)
+                trailing_pos = 0
+                leading_pos = 0
+                running_sum = 0
+                back_fill = 0
+                for j in range(len(xs) - 1):
+
+                    while leading_pos < len(shaps) and xs[j] >= shaps[sort_inds[leading_pos]]:
+                        running_sum += values[sort_inds[leading_pos]]
+                        leading_pos += 1
+                        if leading_pos - trailing_pos > 20:
+                            running_sum -= values[sort_inds[trailing_pos]]
+                            trailing_pos += 1
+                    if leading_pos - trailing_pos > 0:
+                        smooth_values[j] = running_sum / (leading_pos - trailing_pos)
+                        for k in range(back_fill):
+                            smooth_values[j - k - 1] = smooth_values[j]
+                    else:
+                        back_fill += 1
+
+                vmin = np.nanpercentile(values, 5)
+                vmax = np.nanpercentile(values, 95)
+                if vmin == vmax:
+                    vmin = np.nanpercentile(values, 1)
+                    vmax = np.nanpercentile(values, 99)
+                    if vmin == vmax:
+                        vmin = np.min(values)
+                        vmax = np.max(values)
+
+                # plot the nan values in the interaction feature as grey
+                nan_mask = np.isnan(values)
+                pl.scatter(shaps[nan_mask], np.ones(shap_values[nan_mask].shape[0]) * pos,
+                           color="#777777", s=9,
+                           alpha=alpha, linewidth=0, zorder=1)
+                # plot the non-nan values colored by the trimmed feature value
+                cvals = values[np.invert(nan_mask)].astype(np.float64)
+                cvals_imp = cvals.copy()
+                cvals_imp[np.isnan(cvals)] = (vmin + vmax) / 2.0
+                cvals[cvals_imp > vmax] = vmax
+                cvals[cvals_imp < vmin] = vmin
+                pl.scatter(shaps[np.invert(nan_mask)], np.ones(shap_values[np.invert(nan_mask)].shape[0]) * pos,
+                           cmap=cmap, vmin=vmin, vmax=vmax, s=9,
+                           c=cvals, alpha=alpha, linewidth=0, zorder=1)
+                # smooth_values -= nxp.nanpercentile(smooth_values, 5)
+                # smooth_values /= np.nanpercentile(smooth_values, 95)
+                smooth_values -= vmin
+                if vmax - vmin > 0:
+                    smooth_values /= vmax - vmin
+                for i in range(len(xs) - 1):
+                    if ds[i] > 0.05 or ds[i + 1] > 0.05:
+                        pl.fill_between([xs[i], xs[i + 1]], [pos + ds[i], pos + ds[i + 1]],
+                                        [pos - ds[i], pos - ds[i + 1]], color=colors.red_blue_no_bounds(smooth_values[i]),
+                                        zorder=2)
+
+        else:
+            parts = pl.violinplot(shap_values[:, feature_order], range(len(feature_order)), points=200, vert=False,
+                                  widths=0.7,
+                                  showmeans=False, showextrema=False, showmedians=False)
+
+            for pc in parts['bodies']:
+                pc.set_facecolor(color)
+                pc.set_edgecolor('none')
+                pc.set_alpha(alpha)
+
+    elif plot_type == "layered_violin":  # courtesy of @kodonnell
+        num_x_points = 200
+        bins = np.linspace(0, features.shape[0], layered_violin_max_num_bins + 1).round(0).astype(
+            'int')  # the indices of the feature data corresponding to each bin
+        shap_min, shap_max = np.min(shap_values), np.max(shap_values)
+        x_points = np.linspace(shap_min, shap_max, num_x_points)
+
+        # loop through each feature and plot:
+        for pos, ind in enumerate(feature_order):
+            # decide how to handle: if #unique < layered_violin_max_num_bins then split by unique value, otherwise use bins/percentiles.
+            # to keep simpler code, in the case of uniques, we just adjust the bins to align with the unique counts.
+            feature = features[:, ind]
+            unique, counts = np.unique(feature, return_counts=True)
+            if unique.shape[0] <= layered_violin_max_num_bins:
+                order = np.argsort(unique)
+                thesebins = np.cumsum(counts[order])
+                thesebins = np.insert(thesebins, 0, 0)
+            else:
+                thesebins = bins
+            nbins = thesebins.shape[0] - 1
+            # order the feature data so we can apply percentiling
+            order = np.argsort(feature)
+            # x axis is located at y0 = pos, with pos being there for offset
+            # y0 = np.ones(num_x_points) * pos
+            # calculate kdes:
+            ys = np.zeros((nbins, num_x_points))
+            for i in range(nbins):
+                # get shap values in this bin:
+                shaps = shap_values[order[thesebins[i]:thesebins[i + 1]], ind]
+                # if there's only one element, then we can't
+                if shaps.shape[0] == 1:
+                    warnings.warn(
+                        "not enough data in bin #%d for feature %s, so it'll be ignored. Try increasing the number of records to plot."
+                        % (i, feature_names[ind]))
+                    # to ignore it, just set it to the previous y-values (so the area between them will be zero). Not ys is already 0, so there's
+                    # nothing to do if i == 0
+                    if i > 0:
+                        ys[i, :] = ys[i - 1, :]
+                    continue
+                # save kde of them: note that we add a tiny bit of gaussian noise to avoid singular matrix errors
+                ys[i, :] = gaussian_kde(shaps + np.random.normal(loc=0, scale=0.001, size=shaps.shape[0]))(x_points)
+                # scale it up so that the 'size' of each y represents the size of the bin. For continuous data this will
+                # do nothing, but when we've gone with the unqique option, this will matter - e.g. if 99% are male and 1%
+                # female, we want the 1% to appear a lot smaller.
+                size = thesebins[i + 1] - thesebins[i]
+                bin_size_if_even = features.shape[0] / nbins
+                relative_bin_size = size / bin_size_if_even
+                ys[i, :] *= relative_bin_size
+            # now plot 'em. We don't plot the individual strips, as this can leave whitespace between them.
+            # instead, we plot the full kde, then remove outer strip and plot over it, etc., to ensure no
+            # whitespace
+            ys = np.cumsum(ys, axis=0)
+            width = 0.8
+            scale = ys.max() * 2 / width  # 2 is here as we plot both sides of x axis
+            for i in range(nbins - 1, -1, -1):
+                y = ys[i, :] / scale
+                c = pl.get_cmap(color)(i / (
+                        nbins - 1)) if color in pl.cm.datad else color  # if color is a cmap, use it, otherwise use a color
+                pl.fill_between(x_points, pos - y, pos + y, facecolor=c, edgecolor="face")
+        pl.xlim(shap_min, shap_max)
+
+    elif not multi_class and plot_type == "bar":
+        feature_inds = feature_order[:max_display]
+        y_pos = np.arange(len(feature_inds))
+        global_shap_values = np.abs(shap_values).mean(0)
+        pl.barh(y_pos, global_shap_values[feature_inds], 0.7, align='center', color=color)
+        pl.yticks(y_pos, fontsize=13)
+        pl.gca().set_yticklabels([feature_names[i] for i in feature_inds])
+
+    elif multi_class and plot_type == "bar":
+        if class_names is None:
+            class_names = ["Class "+str(i) for i in range(len(shap_values))]
+        feature_inds = feature_order[:max_display]
+        y_pos = np.arange(len(feature_inds))
+        left_pos = np.zeros(len(feature_inds))
+
+        if class_inds is None:
+            class_inds = np.argsort([-np.abs(shap_values[i]).mean() for i in range(len(shap_values))])
+        elif class_inds == "original":
+            class_inds = range(len(shap_values))
+
+        if show_values_in_legend:
+            # Get the smallest decimal place of the first significant digit
+            # to print on the legend. The legend will print ('n_decimal'+1)
+            # decimal places.
+            # Set to 1 if the smallest number is bigger than 1.
+            smallest_shap = np.min(np.abs(shap_values).mean((1, 2)))
+            if smallest_shap > 1:
+                n_decimals = 1
+            else:
+                n_decimals = int(-np.floor(
+                    np.log10(
+                        smallest_shap
+                    )
+                ))
+
+        for i, ind in enumerate(class_inds):
+            global_shap_values = np.abs(shap_values[ind]).mean(0)
+            if show_values_in_legend:
+                label = f'{class_names[ind]} ({np.round(np.mean(global_shap_values),(n_decimals+1))})'
+            else:
+                label = class_names[ind]
+            pl.barh(
+                y_pos, global_shap_values[feature_inds], 0.7, left=left_pos, align='center',
+                color=color(i), label=label
+            )
+            left_pos += global_shap_values[feature_inds]
+        pl.yticks(y_pos, fontsize=13)
+        pl.gca().set_yticklabels([feature_names[i] for i in feature_inds])
+        pl.legend(frameon=False, fontsize=12)
+
+    # draw the color bar
+    if color_bar and features is not None and plot_type != "bar" and \
+            (plot_type != "layered_violin" or color in pl.cm.datad):
+        import matplotlib.cm as cm
+        m = cm.ScalarMappable(cmap=cmap if plot_type != "layered_violin" else pl.get_cmap(color))
+        m.set_array([0, 1])
+        cb = pl.colorbar(m, ax=pl.gca(), ticks=[0, 1], aspect=80)
+        cb.set_ticklabels([labels['FEATURE_VALUE_LOW'], labels['FEATURE_VALUE_HIGH']])
+        cb.set_label(color_bar_label, size=12, labelpad=0)
+        cb.ax.tick_params(labelsize=11, length=0)
+        cb.set_alpha(1)
+        cb.outline.set_visible(False)
+#         bbox = cb.ax.get_window_extent().transformed(pl.gcf().dpi_scale_trans.inverted())
+#         cb.ax.set_aspect((bbox.height - 0.9) * 20)
+        # cb.draw_all()
+
+    pl.gca().xaxis.set_ticks_position('bottom')
+    pl.gca().yaxis.set_ticks_position('none')
+    pl.gca().spines['right'].set_visible(False)
+    pl.gca().spines['top'].set_visible(False)
+    pl.gca().spines['left'].set_visible(False)
+    pl.gca().tick_params(color=axis_color, labelcolor=axis_color)
+    pl.yticks(range(len(feature_order)), [feature_names[i] for i in feature_order], fontsize=13)
+    if plot_type != "bar":
+        pl.gca().tick_params('y', length=20, width=0.5, which='major')
+    pl.gca().tick_params('x', labelsize=11)
+    pl.ylim(-1, len(feature_order))
+    if plot_type == "bar":
+        pl.xlabel(labels['GLOBAL_VALUE'], fontsize=13)
+    else:
+        pl.xlabel(labels['VALUE'], fontsize=13)
+    pl.tight_layout()
+    if show:
+        # pl.show()
+        return pl
diff --git a/lib/shap/plots/_benchmark.py b/lib/shap/plots/_benchmark.py
new file mode 100644
index 0000000000000000000000000000000000000000..7f7b16bd983f0daf16d94047ccca813649e86696
--- /dev/null
+++ b/lib/shap/plots/_benchmark.py
@@ -0,0 +1,228 @@
+import matplotlib.patheffects as PathEffects
+import matplotlib.pyplot as plt
+import numpy as np
+
+from . import colors
+
+xlabel_names = {
+    "remove absolute": "Fraction removed",
+    "remove positive": "Fraction removed",
+    "remove negative": "Fraction removed",
+    "keep absolute": "Fraction kept",
+    "keep positive": "Fraction kept",
+    "keep negative": "Fraction kept",
+    "explanation error": "Explanation error as std dev.",
+    "compute time": "Seconds per. sample"
+}
+
+def benchmark(benchmark, show=True):
+    """ Plot a BenchmarkResult or list of such results.
+    """
+
+    if hasattr(benchmark, "__iter__"):
+        benchmark = list(benchmark)
+
+        # see if we have multiple metrics or just a single metric
+        single_metric = True
+        metric_name = None
+        has_curves = True
+        for b in benchmark:
+            if metric_name is None:
+                metric_name = b.metric
+            elif metric_name != b.metric:
+                single_metric = False
+
+            if b.curve_x is None or b.curve_y is None:
+                has_curves = False
+
+        methods = list({b.method for b in benchmark})
+        methods.sort()
+        method_color = {}
+
+        for i, m in enumerate(methods):
+            method_color[m] = colors.red_blue_circle(i/len(methods))
+
+        # plot a single metric benchmark result
+        if single_metric and has_curves:
+            benchmark.sort(key=lambda b: -b.value_sign * b.value)
+            for i, b in enumerate(benchmark):
+                plt.fill_between(
+                    b.curve_x, b.curve_y - b.curve_y_std, b.curve_y + b.curve_y_std,
+                    color=method_color[b.method], alpha=0.1, linewidth=0
+                )
+            for i, b in enumerate(benchmark):
+                plt.plot(
+                    b.curve_x, b.curve_y,
+                    color=method_color[b.method],
+                    linewidth=2,
+                    label=b.method + f" ({b.value:0.3})"
+                )
+                #plt.fill_between(b.curve_x, b.curve_y - b.curve_y_std, b.curve_y + b.curve_y_std, color=method_color[b.method], alpha=0.2)
+                ax = plt.gca()
+            ax.set_xlabel(xlabel_names[metric_name], fontsize=13)
+            ax.set_ylabel("Model output", fontsize=13)
+            ax.xaxis.set_ticks_position('bottom')
+            ax.yaxis.set_ticks_position('left')
+            ax.spines['right'].set_visible(False)
+            ax.spines['top'].set_visible(False)
+            plt.title(metric_name.capitalize())
+            plt.legend(fontsize=11)
+            if show:
+                plt.show()
+
+        elif single_metric:
+            benchmark.sort(key=lambda b: -b.value_sign * b.value)
+
+            values = np.array([b.value for b in benchmark])
+            total_width = 0.7
+            bar_width = total_width
+            # for i, b in enumerate(benchmark):
+            #     ypos_offset = 0#- ((i - len(values) / 2) * bar_width + bar_width / 2)
+            plt.barh(
+                np.arange(len(values)), values,
+                bar_width, align='center',
+                color=[method_color[b.method] for b in benchmark],
+                edgecolor=(1,1,1,0.8)
+            )
+                # plt.plot(
+                #     b.curve_x, b.curve_y,
+                #     color=method_color[b.method],
+                #     linewidth=2,
+                #     label=b.method + f" ({b.value:0.3})"
+                # )
+            ax = plt.gca()
+            ax.set_yticks(np.arange(len(methods)))
+            ax.set_yticklabels([b.method for b in benchmark], rotation=0, fontsize=11)
+            ax.set_xlabel(xlabel_names[metric_name], fontsize=13)
+            # ax.set_ylabel("Model output", fontsize=13)
+            ax.xaxis.set_ticks_position('bottom')
+            ax.yaxis.set_ticks_position('left')
+            ax.spines['right'].set_visible(False)
+            ax.spines['top'].set_visible(False)
+            plt.title(metric_name.capitalize())
+            # plt.legend(fontsize=11)
+            plt.gca().invert_yaxis()
+            if show:
+                plt.show()
+
+        # plot a multi-metric benchmark result
+        else:
+
+            # get a list of all the metrics in the order they first appear
+            metrics = []
+            for b in benchmark:
+                if b.metric not in metrics:
+                    metrics.append(b.metric)
+
+            # compute normalized values
+            max_value = {n: -np.inf for n in metrics}
+            min_value = {n: np.inf for n in metrics}
+            for b in benchmark:
+                if max_value[b.metric] < b.value_sign * b.value:
+                    max_value[b.metric] = b.value_sign * b.value
+                if min_value[b.metric] > b.value_sign * b.value:
+                    min_value[b.metric] = b.value_sign * b.value
+            norm_values = {}
+            for b in benchmark:
+                norm_values[b.full_name] = (b.value_sign * b.value - min_value[b.metric]) / (max_value[b.metric] - min_value[b.metric])
+
+            # compute the average value for each method and sort by it
+            # global_values = {}
+            # global_counts = {}
+            # for b in benchmark:
+            #     global_values[b.method] = global_values.get(b.method, 0) + norm_values[b.full_name]
+            #     global_counts[b.method] = global_counts.get(b.method, 0) + 1
+            # for k in global_values:
+            #     global_values[k] /= global_counts[k]
+
+            # sort by the first and then second metric
+            metric_0 = {}
+            metric_1 = {}
+            for b in benchmark:
+                if b.metric == metrics[0]:
+                    metric_0[b.method] = b.value
+                elif b.metric == metrics[1]:
+                    metric_1[b.method] = b.value
+
+            methods.sort(key=lambda method: (np.round(metric_0[method], 3), metric_1[method]))
+            xs = [-0.03 * (len(methods) - 1)] + list(range(len(metrics)+1))
+            for i, method in enumerate(methods):
+                scores = [1 - i/(len(methods)-1), 1 - i/(len(methods)-1)]
+                values = [None, None]
+                for metric in metrics:
+                    for b in benchmark:
+                        if b.method == method and b.metric == metric:
+                            scores.append(norm_values[b.full_name])
+                            values.append(b.value)
+                plt.plot(
+                    xs,
+                    scores,
+                    color=method_color[method],
+                    label=method
+                )
+
+                for x, y, value in zip(xs, scores, values):
+                    if value is None:
+                        continue
+                    label = f"{value:.2f}"
+                    txt = plt.annotate(
+                        label, # this is the text
+                        (x, y), # these are the coordinates to position the label
+                        textcoords="offset points", # how to position the text
+                        xytext=(0, -3), # distance from text to points (x,y)
+                        ha='center', # horizontal alignment can be left, right or center
+                        color=method_color[method],
+                        fontsize=9
+                    )
+                    txt.set_path_effects([PathEffects.withStroke(linewidth=5, foreground='w')])
+
+            ax = plt.gca()
+            ax.set_yticks([1 - i / (len(methods) - 1) for i in range(0, len(methods))])
+            ax.set_yticklabels(methods, rotation=0, fontsize=11)
+
+            ax.set_xticks(np.arange(len(metrics) + 1))
+            # from matplotlib import rcParams
+            # rcParams['text.latex.preamble'] = [r'\boldmath']
+            ax.set_xticklabels([''] + [m.capitalize() for m in metrics], rotation=45, ha='left', fontsize=11)
+
+            ax.xaxis.tick_top()
+            plt.grid(which='major', axis='x', linestyle='--')
+            ax.spines['top'].set_visible(False)
+            ax.spines['bottom'].set_visible(False)
+            ax.spines['right'].set_visible(False)
+            ax.spines['left'].set_visible(False)
+            ax.yaxis.set_ticks_position('none')
+            ax.xaxis.set_ticks_position('none')
+            plt.xlim(xs[0], len(metrics))
+            # for l in ax.get_xticklabels():
+            #     l.set_fontweight('bold')
+            ax.get_xticklabels()[1].set_fontweight('bold')
+            # plt.gca().invert_yaxis()
+            # plt.ylabel("\nAll scores are relative")
+            # ax.yaxis.set_label_position("right")
+            if show:
+                plt.show()
+
+    # plot a single benchmark result
+    else:
+        plt.fill_between(
+            benchmark.curve_x, benchmark.curve_y - benchmark.curve_y_std,
+            benchmark.curve_y + benchmark.curve_y_std,
+            color=colors.blue_rgb, alpha=0.1, linewidth=0
+        )
+        plt.plot(
+            benchmark.curve_x, benchmark.curve_y,
+            color=colors.blue_rgb,
+            linewidth=2,
+            label=benchmark.method + f" ({benchmark.value:0.3})"
+        )
+        ax = plt.gca()
+        ax.set_xlabel(xlabel_names[benchmark.metric], fontsize=13)
+        ax.set_ylabel("Model output", fontsize=13)
+        ax.xaxis.set_ticks_position('bottom')
+        ax.yaxis.set_ticks_position('left')
+        ax.spines['right'].set_visible(False)
+        ax.spines['top'].set_visible(False)
+        plt.legend(fontsize=11)
+        if show:
+            plt.show()
diff --git a/lib/shap/plots/_decision.py b/lib/shap/plots/_decision.py
new file mode 100644
index 0000000000000000000000000000000000000000..90f289603d188dd43882a6e6355910c59c751215
--- /dev/null
+++ b/lib/shap/plots/_decision.py
@@ -0,0 +1,615 @@
+""" Visualize cumulative SHAP values."""
+
+from typing import Union
+
+import matplotlib.cm as cm
+import matplotlib.pyplot as pl
+import numpy as np
+import pandas as pd
+
+from ..utils import hclust_ordering
+from ..utils._legacy import LogitLink, convert_to_link
+from . import colors
+from ._labels import labels
+
+
+def __change_shap_base_value(base_value, new_base_value, shap_values) -> np.ndarray:
+    """Shift SHAP base value to a new value. This function assumes that `base_value` and `new_base_value` are scalars
+    and that `shap_values` is a two or three dimensional array.
+    """
+    # matrix of shap_values
+    if shap_values.ndim == 2:
+        return shap_values + (base_value - new_base_value) / shap_values.shape[1]
+
+    # cube of shap_interaction_values
+    main_effects = shap_values.shape[1]
+    all_effects = main_effects * (main_effects + 1) // 2
+    temp = (base_value - new_base_value) / all_effects / 2  # divided by 2 because interaction effects are halved
+    shap_values = shap_values + temp
+    # Add the other half to the main effects on the diagonal
+    idx = np.diag_indices_from(shap_values[0])
+    shap_values[:, idx[0], idx[1]] += temp
+    return shap_values
+
+
+def __decision_plot_matplotlib(
+    base_value,
+    cumsum,
+    ascending,
+    feature_display_count,
+    features,
+    feature_names,
+    highlight,
+    plot_color,
+    axis_color,
+    y_demarc_color,
+    xlim,
+    alpha,
+    color_bar,
+    auto_size_plot,
+    title,
+    show,
+    legend_labels,
+    legend_location,
+):
+    """matplotlib rendering for decision_plot()"""
+
+    # image size
+    row_height = 0.4
+    if auto_size_plot:
+        pl.gcf().set_size_inches(8, feature_display_count * row_height + 1.5)
+
+    # draw vertical line indicating center
+    pl.axvline(x=base_value, color="#999999", zorder=-1)
+
+    # draw horizontal dashed lines for each feature contribution
+    for i in range(1, feature_display_count):
+        pl.axhline(y=i, color=y_demarc_color, lw=0.5, dashes=(1, 5), zorder=-1)
+
+    # initialize highlighting
+    linestyle = np.array("-", dtype=object)
+    linestyle = np.repeat(linestyle, cumsum.shape[0])
+    linewidth = np.repeat(1, cumsum.shape[0])
+    if highlight is not None:
+        linestyle[highlight] = "-."
+        linewidth[highlight] = 2
+
+    # plot each observation's cumulative SHAP values.
+    ax = pl.gca()
+    ax.set_xlim(xlim)
+    m = cm.ScalarMappable(cmap=plot_color)
+    m.set_clim(xlim)
+    y_pos = np.arange(0, feature_display_count + 1)
+    lines = []
+    for i in range(cumsum.shape[0]):
+        o = pl.plot(
+            cumsum[i, :],
+            y_pos,
+            color=m.to_rgba(cumsum[i, -1], alpha),
+            linewidth=linewidth[i],
+            linestyle=linestyle[i]
+        )
+        lines.append(o[0])
+
+    # determine font size. if ' *\n' character sequence is found (as in interaction labels), use a smaller
+    # font. we don't shrink the font for all interaction plots because if an interaction term is not
+    # in the display window there is no need to shrink the font.
+    s = next((s for s in feature_names if " *\n" in s), None)
+    fontsize = 13 if s is None else 9
+
+    # if there is a single observation and feature values are supplied, print them.
+    if (cumsum.shape[0] == 1) and (features is not None):
+        renderer = pl.gcf().canvas.get_renderer()
+        inverter = pl.gca().transData.inverted()
+        y_pos = y_pos + 0.5
+        for i in range(feature_display_count):
+            v = features[0, i]
+            if isinstance(v, str):
+                v = f"({str(v).strip()})"
+            else:
+                v = "({})".format(f"{v:,.3f}".rstrip("0").rstrip("."))
+            t = ax.text(np.max(cumsum[0, i:(i + 2)]), y_pos[i], "  " + v, fontsize=fontsize,
+                    horizontalalignment="left", verticalalignment="center_baseline", color="#666666")
+            bb = inverter.transform_bbox(t.get_window_extent(renderer=renderer))
+            if bb.xmax > xlim[1]:
+                t.set_text(v + "  ")
+                t.set_x(np.min(cumsum[0, i:(i + 2)]))
+                t.set_horizontalalignment("right")
+                bb = inverter.transform_bbox(t.get_window_extent(renderer=renderer))
+                if bb.xmin < xlim[0]:
+                    t.set_text(v)
+                    t.set_x(xlim[0])
+                    t.set_horizontalalignment("left")
+
+    # style axes
+    ax.xaxis.set_ticks_position("both")
+    ax.yaxis.set_ticks_position("none")
+    ax.spines["right"].set_visible(False)
+    ax.spines["left"].set_visible(False)
+    ax.tick_params(color=axis_color, labelcolor=axis_color, labeltop=True)
+    pl.yticks(np.arange(feature_display_count) + 0.5, feature_names, fontsize=fontsize)
+    ax.tick_params("x", labelsize=11)
+    pl.ylim(0, feature_display_count)
+    pl.xlabel(labels["MODEL_OUTPUT"], fontsize=13)
+
+    # draw the color bar - must come after axes styling
+    if color_bar:
+        m = cm.ScalarMappable(cmap=plot_color)
+        m.set_array(np.array([0, 1]))
+
+        # place the colorbar
+        pl.ylim(0, feature_display_count + 0.25)
+        ax_cb = ax.inset_axes([xlim[0], feature_display_count, xlim[1] - xlim[0], 0.25], transform=ax.transData)
+        cb = pl.colorbar(m, ticks=[0, 1], orientation="horizontal", cax=ax_cb)
+        cb.set_ticklabels([])
+        cb.ax.tick_params(labelsize=11, length=0)
+        cb.set_alpha(alpha)
+        cb.outline.set_visible(False)
+
+        # re-activate the main axis for drawing.
+        pl.sca(ax)
+
+    if title:
+        # TODO decide on style/size
+        pl.title(title)
+
+    if ascending:
+        pl.gca().invert_yaxis()
+
+    if legend_labels is not None:
+        ax.legend(handles=lines, labels=legend_labels, loc=legend_location)
+
+    if show:
+        pl.show()
+
+
+class DecisionPlotResult:
+    """The optional return value of decision_plot.
+
+    The class attributes can be used to apply the same scale and feature ordering to other decision plots.
+    """
+
+    def __init__(self, base_value, shap_values, feature_names, feature_idx, xlim):
+        """
+        Example
+        -------
+        Plot two decision plots using the same feature order and x-axis.
+        >>> range1, range2 = range(20), range(20, 40)
+        >>> r = decision_plot(base, shap_values[range1], features[range1], return_objects=True)
+        >>> decision_plot(base, shap_values[range2], features[range2], feature_order=r.feature_idx, xlim=r.xlim)
+
+        Parameters
+        ----------
+        base_value : float
+            The base value used in the plot. For multioutput models,
+            this will be the mean of the base values. This will inherit `new_base_value` if specified.
+
+        shap_values : numpy.ndarray
+            The `shap_values` passed to decision_plot re-ordered based on `feature_order`. If SHAP interaction values
+            are passed to decision_plot, `shap_values` is a 2D (matrix) representation of the interactions. See
+            `feature_names` to locate the feature positions. If `new_base_value` is specified, the SHAP values are
+            relative to the new base value.
+
+        feature_names : list of str
+            The feature names used in the plot in the order specified in the decision_plot parameter `feature_order`.
+
+        feature_idx : numpy.ndarray
+            The index used to order `shap_values` based on `feature_order`. This attribute can be used to specify
+            identical feature ordering in multiple decision plots.
+
+        xlim : tuple[float, float]
+            The x-axis limits. This attributed can be used to specify the same x-axis in multiple decision plots.
+
+        """
+        self.base_value = base_value
+        self.shap_values = shap_values
+        self.feature_names = feature_names
+        self.feature_idx = feature_idx
+        self.xlim = xlim
+
+
+def decision(
+    base_value,
+    shap_values,
+    features=None,
+    feature_names=None,
+    feature_order="importance",
+    feature_display_range=None,
+    highlight=None,
+    link="identity",
+    plot_color=None,
+    axis_color="#333333",
+    y_demarc_color="#333333",
+    alpha=None,
+    color_bar=True,
+    auto_size_plot=True,
+    title=None,
+    xlim=None,
+    show=True,
+    return_objects=False,
+    ignore_warnings=False,
+    new_base_value=None,
+    legend_labels=None,
+    legend_location="best",
+) -> Union[DecisionPlotResult, None]:
+    """Visualize model decisions using cumulative SHAP values.
+
+    Each plotted line explains a single model prediction. If a single prediction is plotted, feature values will be
+    printed in the plot (if supplied). If multiple predictions are plotted together, feature values will not be printed.
+    Plotting too many predictions together will make the plot unintelligible.
+
+    Parameters
+    ----------
+    base_value : float or numpy.ndarray
+        This is the reference value that the feature contributions start from. Usually, this is
+        ``explainer.expected_value``.
+
+    shap_values : numpy.ndarray
+        Matrix of SHAP values (# features) or (# samples x # features) from
+        ``explainer.shap_values()``. Or cube of SHAP interaction values (# samples x
+        # features x # features) from ``explainer.shap_interaction_values()``.
+
+    features : numpy.array or pandas.Series or pandas.DataFrame or numpy.ndarray or list
+        Matrix of feature values (# features) or (# samples x # features). This provides the values of all the
+        features and, optionally, the feature names.
+
+    feature_names : list or numpy.ndarray
+        List of feature names (# features). If ``None``, names may be derived from the
+        ``features`` argument if a Pandas object is provided. Otherwise, numeric feature
+        names will be generated.
+
+    feature_order : str or None or list or numpy.ndarray
+        Any of "importance" (the default), "hclust" (hierarchical clustering), ``None``,
+        or a list/array of indices.
+
+    feature_display_range: slice or range
+        The slice or range of features to plot after ordering features by ``feature_order``. A step of 1 or ``None``
+        will display the features in ascending order. A step of -1 will display the features in descending order. If
+        ``feature_display_range=None``, ``slice(-1, -21, -1)`` is used (i.e. show the last 20 features in descending order).
+        If ``shap_values`` contains interaction values, the number of features is automatically expanded to include all
+        possible interactions: N(N + 1)/2 where N = ``shap_values.shape[1]``.
+
+    highlight : Any
+        Specify which observations to draw in a different line style. All numpy indexing methods are supported. For
+        example, list of integer indices, or a bool array.
+
+    link : str
+        Use "identity" or "logit" to specify the transformation used for the x-axis. The "logit" link transforms
+        log-odds into probabilities.
+
+    plot_color : str or matplotlib.colors.ColorMap
+        Color spectrum used to draw the plot lines. If ``str``, a registered matplotlib color name is assumed.
+
+    axis_color : str or int
+        Color used to draw plot axes.
+
+    y_demarc_color : str or int
+        Color used to draw feature demarcation lines on the y-axis.
+
+    alpha : float
+        Alpha blending value in [0, 1] used to draw plot lines.
+
+    color_bar : bool
+        Whether to draw the color bar (legend).
+
+    auto_size_plot : bool
+        Whether to automatically size the matplotlib plot to fit the number of features
+        displayed. If ``False``, specify the plot size using matplotlib before calling
+        this function.
+
+    title : str
+        Title of the plot.
+
+    xlim: tuple[float, float]
+        The extents of the x-axis (e.g. ``(-1.0, 1.0)``). If not specified, the limits
+        are determined by the maximum/minimum predictions centered around base_value
+        when ``link="identity"``. When ``link="logit"``, the x-axis extents are ``(0,
+        1)`` centered at 0.5. ``xlim`` values are not transformed by the ``link``
+        function. This argument is provided to simplify producing multiple plots on the
+        same scale for comparison.
+
+    show : bool
+        Whether ``matplotlib.pyplot.show()`` is called before returning.
+        Setting this to ``False`` allows the plot
+        to be customized further after it has been created.
+
+    return_objects : bool
+        Whether to return a :obj:`DecisionPlotResult` object containing various plotting
+        features. This can be used to generate multiple decision plots using the same
+        feature ordering and scale.
+
+    ignore_warnings : bool
+        Plotting many data points or too many features at a time may be slow, or may create very large plots. Set
+        this argument to ``True`` to override hard-coded limits that prevent plotting large amounts of data.
+
+    new_base_value : float
+        SHAP values are relative to a base value. By default, this base value is the
+        expected value of the model's raw predictions. Use ``new_base_value`` to shift
+        the base value to an arbitrary value (e.g. the cutoff point for a binary
+        classification task).
+
+    legend_labels : list of str
+        List of legend labels. If ``None``, legend will not be shown.
+
+    legend_location : str
+        Legend location. Any of "best", "upper right", "upper left", "lower left", "lower right", "right",
+        "center left", "center right", "lower center", "upper center", "center".
+
+    Returns
+    -------
+    DecisionPlotResult or None
+        Returns a :obj:`DecisionPlotResult` object if ``return_objects=True``. Returns ``None`` otherwise (the default).
+
+    Examples
+    --------
+
+    Plot two decision plots using the same feature order and x-axis.
+
+        >>> range1, range2 = range(20), range(20, 40)
+        >>> r = decision_plot(base, shap_values[range1], features[range1], return_objects=True)
+        >>> decision_plot(base, shap_values[range2], features[range2], feature_order=r.feature_idx, xlim=r.xlim)
+
+    See more `decision plot examples here <https://shap.readthedocs.io/en/latest/example_notebooks/api_examples/plots/decision_plot.html>`_.
+
+    """
+
+    # code taken from force_plot. auto unwrap the base_value
+    if type(base_value) == np.ndarray and len(base_value) == 1:
+        base_value = base_value[0]
+
+    if isinstance(base_value, list) or isinstance(shap_values, list):
+        raise TypeError("Looks like multi output. Try base_value[i] and shap_values[i], "
+                        "or use shap.multioutput_decision_plot().")
+
+    # validate shap_values
+    if not isinstance(shap_values, np.ndarray):
+        raise TypeError("The shap_values arg is the wrong type. Try explainer.shap_values().")
+
+    # calculate the various dimensions involved (observations, features, interactions, display, etc.
+    if shap_values.ndim == 1:
+        shap_values = shap_values.reshape(1, -1)
+    observation_count = shap_values.shape[0]
+    feature_count = shap_values.shape[1]
+
+    # code taken from force_plot. convert features from other types.
+    if isinstance(features, pd.DataFrame):
+        if feature_names is None:
+            feature_names = features.columns.to_list()
+        features = features.values
+    elif isinstance(features, pd.Series):
+        if feature_names is None:
+            feature_names = features.index.to_list()
+        features = features.values
+    elif isinstance(features, list):
+        if feature_names is None:
+            feature_names = features
+        features = None
+    elif features is not None and features.ndim == 1 and feature_names is None:
+        feature_names = features.tolist()
+        features = None
+
+    # the above code converts features to either None or np.ndarray. if features is something else at this point,
+    # there's a problem.
+    if not isinstance(features, (np.ndarray, type(None))):
+        raise TypeError("The features arg uses an unsupported type.")
+    if (features is not None) and (features.ndim == 1):
+        features = features.reshape(1, -1)
+
+    # validate/generate feature_names. at this point, feature_names does not include interactions.
+    if feature_names is None:
+        feature_names = [labels['FEATURE'] % str(i) for i in range(feature_count)]
+    elif len(feature_names) != feature_count:
+        raise ValueError("The feature_names arg must include all features represented in shap_values.")
+    elif not isinstance(feature_names, (list, np.ndarray)):
+        raise TypeError("The feature_names arg requires a list or numpy array.")
+
+    # transform interactions cube to a matrix and generate interaction names.
+    if shap_values.ndim == 3:
+        # flatten
+        triu_count = feature_count * (feature_count - 1) // 2
+        idx_diag = np.diag_indices_from(shap_values[0])
+        idx_triu = np.triu_indices_from(shap_values[0], 1)
+        a = np.ndarray((observation_count, feature_count + triu_count), shap_values.dtype)
+        a[:, :feature_count] = shap_values[:, idx_diag[0], idx_diag[1]]
+        a[:, feature_count:] = shap_values[:, idx_triu[0], idx_triu[1]] * 2
+        shap_values = a
+        # names
+        a = [None] * shap_values.shape[1]
+        a[:feature_count] = feature_names
+        for i, row, col in zip(range(feature_count, shap_values.shape[1]), idx_triu[0], idx_triu[1]):
+            a[i] = f"{feature_names[row]} *\n{feature_names[col]}"
+        feature_names = a
+        feature_count = shap_values.shape[1]
+        features = None  # Can't use feature values for interactions...
+
+    # determine feature order
+    if isinstance(feature_order, list):
+        feature_idx = np.array(feature_order)
+    elif isinstance(feature_order, np.ndarray):
+        feature_idx = feature_order
+    elif (feature_order is None) or (feature_order.lower() == "none"):
+        feature_idx = np.arange(feature_count)
+    elif feature_order == "importance":
+        feature_idx = np.argsort(np.sum(np.abs(shap_values), axis=0))
+    elif feature_order == "hclust":
+        feature_idx = np.array(hclust_ordering(shap_values.transpose()))
+    else:
+        raise ValueError("The feature_order arg requires 'importance', 'hclust', 'none', or an integer list/array "
+                         "of feature indices.")
+
+    if (feature_idx.shape != (feature_count, )) or (not np.issubdtype(feature_idx.dtype, np.integer)):
+        raise ValueError("A list or array has been specified for the feature_order arg. The length must match the "
+                         "feature count and the data type must be integer.")
+
+    # validate and convert feature_display_range to a slice. prevents out of range errors later.
+    if feature_display_range is None:
+        feature_display_range = slice(-1, -21, -1)  # show last 20 features in descending order.
+    elif not isinstance(feature_display_range, (slice, range)):
+        raise TypeError("The feature_display_range arg requires a slice or a range.")
+    elif feature_display_range.step not in (-1, 1, None):
+        raise ValueError("The feature_display_range arg supports a step of 1, -1, or None.")
+    elif isinstance(feature_display_range, range):
+        # Negative values in a range are not the same as negs in a slice. Consider range(2, -1, -1) == [2, 1, 0],
+        # but slice(2, -1, -1) == [] when len(features) > 2. However, range(2, -1, -1) == slice(2, -inf, -1) after
+        # clipping.
+        a = np.iinfo(np.integer).min
+        feature_display_range = slice(
+            feature_display_range.start if feature_display_range.start >= 0 else a,  # should never happen, but...
+            feature_display_range.stop if feature_display_range.stop >= 0 else a,
+            feature_display_range.step
+        )
+
+    # apply new_base_value
+    if new_base_value is not None:
+        shap_values = __change_shap_base_value(base_value, new_base_value, shap_values)
+        base_value = new_base_value
+
+    # use feature_display_range to determine which features will be plotted. convert feature_display_range to
+    # ascending indices and expand by one in the negative direction. why? we are plotting the change in prediction
+    # for every feature. this requires that we include the value previous to the first displayed feature
+    # (i.e. i_0 - 1 to i_n).
+    a = feature_display_range.indices(feature_count)
+    ascending = True
+    if a[2] == -1:  # The step
+        ascending = False
+        a = (a[1] + 1, a[0] + 1, 1)
+    feature_display_count = a[1] - a[0]
+    shap_values = shap_values[:, feature_idx]
+    if a[0] == 0:
+        cumsum = np.ndarray((observation_count, feature_display_count + 1), shap_values.dtype)
+        cumsum[:, 0] = base_value
+        cumsum[:, 1:] = base_value + np.nancumsum(shap_values[:, 0:a[1]], axis=1)
+    else:
+        cumsum = base_value + np.nancumsum(shap_values, axis=1)[:, (a[0] - 1):a[1]]
+
+    # Select and sort feature names and features according to the range selected above
+    feature_names = np.array(feature_names)
+    feature_names_display = feature_names[feature_idx[a[0]:a[1]]].tolist()
+    feature_names = feature_names[feature_idx].tolist()
+    features_display = None if features is None else features[:, feature_idx[a[0]:a[1]]]
+
+    # throw large data errors
+    if not ignore_warnings:
+        if observation_count > 2000:
+            raise RuntimeError("Plotting {} observations may be slow. Consider subsampling or set "
+                               "ignore_warnings=True to ignore this message.".format(observation_count))
+        if feature_display_count > 200:
+            raise RuntimeError("Plotting {} features may create a very large plot. Set "
+                               "ignore_warnings=True to ignore this "
+                               "message.".format(feature_display_count))
+        if feature_count * observation_count > 100000000:
+            raise RuntimeError("Processing SHAP values for {} features over {} observations may be slow. Set "
+                               "ignore_warnings=True to ignore this "
+                               "message.".format(feature_count, observation_count))
+
+    # convert values based on link and update x-axis extents
+    create_xlim = xlim is None
+    link = convert_to_link(link)
+    base_value_saved = base_value
+    if isinstance(link, LogitLink):
+        base_value = link.finv(base_value)
+        cumsum = link.finv(cumsum)
+        if create_xlim:
+            # Expand [0, 1] limits a little for a visual margin
+            xlim = (-0.02, 1.02)
+    elif create_xlim:
+        xmin = np.min((cumsum.min(), base_value))
+        xmax = np.max((cumsum.max(), base_value))
+        # create a symmetric axis around base_value
+        a, b = (base_value - xmin), (xmax - base_value)
+        if a > b:
+            xlim = (base_value - a, base_value + a)
+        else:
+            xlim = (base_value - b, base_value + b)
+        # Adjust xlim to include a little visual margin.
+        a = (xlim[1] - xlim[0]) * 0.02
+        xlim = (xlim[0] - a, xlim[1] + a)
+
+    # Initialize style arguments
+    if alpha is None:
+        alpha = 1.0
+
+    if plot_color is None:
+        plot_color = colors.red_blue
+
+    __decision_plot_matplotlib(
+        base_value,
+        cumsum,
+        ascending,
+        feature_display_count,
+        features_display,
+        feature_names_display,
+        highlight,
+        plot_color,
+        axis_color,
+        y_demarc_color,
+        xlim,
+        alpha,
+        color_bar,
+        auto_size_plot,
+        title,
+        show,
+        legend_labels,
+        legend_location,
+    )
+
+    if not return_objects:
+        return None
+
+    return DecisionPlotResult(base_value_saved, shap_values, feature_names, feature_idx, xlim)
+
+
+def multioutput_decision(base_values, shap_values, row_index, **kwargs) -> Union[DecisionPlotResult, None]:
+    """Decision plot for multioutput models.
+
+    Plots all outputs for a single observation. By default, the plotted base value will be the mean of base_values
+    unless new_base_value is specified. Supports both SHAP values and SHAP interaction values.
+
+    Parameters
+    ----------
+    base_values : list of float
+        This is the reference value that the feature contributions start from. Use explainer.expected_value.
+
+    shap_values : list of numpy.ndarray
+        A multioutput list of SHAP matrices or SHAP cubes from explainer.shap_values() or
+        explainer.shap_interaction_values(), respectively.
+
+    row_index : int
+        The integer index of the row to plot.
+
+    **kwargs : Any
+        Arguments to be passed on to decision_plot().
+
+    Returns
+    -------
+    DecisionPlotResult or None
+        Returns a DecisionPlotResult object if `return_objects=True`. Returns `None` otherwise (the default).
+    """
+
+    if not (isinstance(base_values, list) and isinstance(shap_values, list)):
+        raise ValueError("The base_values and shap_values args expect lists.")
+
+    # convert arguments to arrays for simpler handling
+    base_values = np.array(base_values)
+    if not ((base_values.ndim == 1) or (np.issubdtype(base_values.dtype, np.number))):
+        raise ValueError("The base_values arg should be a list of scalars.")
+    shap_values = np.array(shap_values)
+    if shap_values.ndim not in [3, 4]:
+        raise ValueError("The shap_values arg should be a list of two or three dimensional SHAP arrays.")
+    if shap_values.shape[0] != base_values.shape[0]:
+        raise ValueError("The base_values output length is different than shap_values.")
+
+    # shift shap base values to mean of base values
+    base_values_mean = base_values.mean()
+    for i in range(shap_values.shape[0]):
+        shap_values[i] = __change_shap_base_value(base_values[i], base_values_mean, shap_values[i])
+
+    # select the feature row corresponding to row_index
+    if (kwargs is not None) and ("features" in kwargs):
+        features = kwargs["features"]
+        if isinstance(features, np.ndarray) and (features.ndim == 2):
+            kwargs["features"] = features[[row_index]]
+        elif isinstance(features, pd.DataFrame):
+            kwargs["features"] = features.iloc[row_index]
+
+    return decision(base_values_mean, shap_values[:, row_index, :], **kwargs)
diff --git a/lib/shap/plots/_embedding.py b/lib/shap/plots/_embedding.py
new file mode 100644
index 0000000000000000000000000000000000000000..d1f4fc52ac047f7d0fe97e90789c7088d285a4bc
--- /dev/null
+++ b/lib/shap/plots/_embedding.py
@@ -0,0 +1,73 @@
+import matplotlib.pyplot as pl
+import sklearn
+
+from ..utils import convert_name
+from . import colors
+from ._labels import labels
+
+
+def embedding(ind, shap_values, feature_names=None, method="pca", alpha=1.0, show=True):
+    """ Use the SHAP values as an embedding which we project to 2D for visualization.
+
+    Parameters
+    ----------
+    ind : int or string
+        If this is an int it is the index of the feature to use to color the embedding.
+        If this is a string it is either the name of the feature, or it can have the
+        form "rank(int)" to specify the feature with that rank (ordered by mean absolute
+        SHAP value over all the samples), or "sum()" to mean the sum of all the SHAP values,
+        which is the model's output (minus it's expected value).
+
+    shap_values : numpy.array
+        Matrix of SHAP values (# samples x # features).
+
+    feature_names : None or list
+        The names of the features in the shap_values array.
+
+    method : "pca" or numpy.array
+        How to reduce the dimensions of the shap_values to 2D. If "pca" then the 2D
+        PCA projection of shap_values is used. If a numpy array then is should be
+        (# samples x 2) and represent the embedding of that values.
+
+    alpha : float
+        The transparency of the data points (between 0 and 1). This can be useful to the
+        show density of the data points when using a large dataset.
+    """
+
+    if feature_names is None:
+        feature_names = [labels['FEATURE'] % str(i) for i in range(shap_values.shape[1])]
+
+    ind = convert_name(ind, shap_values, feature_names)
+    if ind == "sum()":
+        cvals = shap_values.sum(1)
+        fname = "sum(SHAP values)"
+    else:
+        cvals = shap_values[:,ind]
+        fname = feature_names[ind]
+
+    # see if we need to compute the embedding
+    if isinstance(method, str) and method == "pca":
+        pca = sklearn.decomposition.PCA(2)
+        embedding_values = pca.fit_transform(shap_values)
+    elif hasattr(method, "shape") and method.shape[1] == 2:
+        embedding_values = method
+    else:
+        print("Unsupported embedding method:", method)
+
+    pl.scatter(
+        embedding_values[:,0], embedding_values[:,1], c=cvals,
+        cmap=colors.red_blue, alpha=alpha, linewidth=0
+    )
+    pl.axis("off")
+    #pl.title(feature_names[ind])
+    cb = pl.colorbar()
+    cb.set_label("SHAP value for\n"+fname, size=13)
+    cb.outline.set_visible(False)
+
+
+    pl.gcf().set_size_inches(7.5, 5)
+    bbox = cb.ax.get_window_extent().transformed(pl.gcf().dpi_scale_trans.inverted())
+    cb.ax.set_aspect((bbox.height - 0.7) * 10)
+    cb.set_alpha(1)
+    if show:
+        pl.show()
diff --git a/lib/shap/plots/_force.py b/lib/shap/plots/_force.py
new file mode 100644
index 0000000000000000000000000000000000000000..21a7a1649771228363e48bfb291520dbd2208112
--- /dev/null
+++ b/lib/shap/plots/_force.py
@@ -0,0 +1,592 @@
+""" Visualize the SHAP values with additive force style layouts.
+"""
+
+import base64
+import json
+import os
+import random
+import re
+import string
+import warnings
+from collections.abc import Sequence
+
+import numpy as np
+import pandas as pd
+import scipy.sparse
+
+try:
+    from IPython.display import HTML, display
+    have_ipython = True
+except ImportError:
+    have_ipython = False
+
+from ..plots._force_matplotlib import draw_additive_plot
+from ..utils import hclust_ordering
+from ..utils._exceptions import DimensionError
+from ..utils._legacy import Data, DenseData, Instance, Link, Model, convert_to_link
+from ._labels import labels
+
+
+def force(
+    base_value,
+    shap_values=None,
+    features=None,
+    feature_names=None,
+    out_names=None,
+    link="identity",
+    plot_cmap="RdBu",
+    matplotlib=False,
+    show=True,
+    figsize=(20, 3),
+    ordering_keys=None,
+    ordering_keys_time_format=None,
+    text_rotation=0,
+    contribution_threshold=0.05,
+):
+    """Visualize the given SHAP values with an additive force layout.
+
+    Parameters
+    ----------
+    base_value : float or shap.Explanation
+        If a float is passed in, this is the reference value that the feature contributions start from.
+        For SHAP values, it should be the value of ``explainer.expected_value``.
+        However, it is recommended to pass in a SHAP :class:`.Explanation` object instead (``shap_values``
+        is not necessary in this case).
+
+    shap_values : numpy.array
+        Matrix of SHAP values (# features) or (# samples x # features). If this is a
+        1D array, then a single force plot will be drawn. If it is a 2D array, then a
+        stacked force plot will be drawn.
+
+    features : numpy.array
+        Matrix of feature values (# features) or (# samples x # features). This provides the values of all the
+        features, and should be the same shape as the ``shap_values`` argument.
+
+    feature_names : list
+        List of feature names (# features).
+
+    out_names : str
+        The name of the output of the model (plural to support multi-output plotting in the future).
+
+    link : "identity" or "logit"
+        The transformation used when drawing the tick mark labels. Using "logit" will change log-odds numbers
+        into probabilities.
+
+    plot_cmap : str or list[str]
+        Color map to use. It can be a string (defaults to ``RdBu``) or a list of hex color strings.
+
+    matplotlib : bool
+        Whether to use the default Javascript output, or the (less developed) matplotlib output.
+        Using matplotlib can be helpful in scenarios where rendering Javascript/HTML
+        is inconvenient. Defaults to False.
+
+    show : bool
+        Whether ``matplotlib.pyplot.show()`` is called before returning.
+        Setting this to ``False`` allows the plot
+        to be customized further after it has been created.
+        Only applicable when ``matplotlib`` is set to True.
+
+    figsize :
+        Figure size of the matplotlib output.
+
+    contribution_threshold : float
+        Controls the feature names/values that are displayed on force plot.
+        Only features that the magnitude of their shap value is larger than min_perc * (sum of all abs shap values)
+        will be displayed.
+    """
+
+    # support passing an explanation object
+    if str(type(base_value)).endswith("Explanation'>"):
+        shap_exp = base_value
+        base_value = shap_exp.base_values
+        shap_values = shap_exp.values
+        if features is None:
+            if shap_exp.display_data is None:
+                features = shap_exp.data
+            else:
+                features = shap_exp.display_data
+        if scipy.sparse.issparse(features):
+            features = features.toarray().flatten()
+        if feature_names is None:
+            feature_names = shap_exp.feature_names
+        # if out_names is None: # TODO: waiting for slicer support of this
+        #     out_names = shap_exp.output_names
+
+    # auto unwrap the base_value
+    if isinstance(base_value, np.ndarray):
+        if len(base_value) == 1:
+            base_value = base_value[0]
+        elif len(base_value) > 1 and np.all(base_value == base_value[0]):
+            base_value = base_value[0]
+
+    if isinstance(base_value, (np.ndarray, list)):
+        if not isinstance(shap_values, list) or len(shap_values) != len(base_value):
+            emsg = (
+                "In v0.20, force plot now requires the base value as the first parameter! "
+                "Try shap.plots.force(explainer.expected_value, shap_values) or "
+                "for multi-output models try "
+                "shap.plots.force(explainer.expected_value[0], shap_values[0])."
+            )
+            raise TypeError(emsg)
+
+    if isinstance(shap_values, list):
+        emsg = "The shap_values arg looks multi output, try `shap_values[i]` instead."
+        raise TypeError(emsg)
+
+    link = convert_to_link(link)
+
+    if type(shap_values) != np.ndarray:
+        return visualize(shap_values)
+
+    # convert from a DataFrame or other types
+    if isinstance(features, pd.DataFrame):
+        if feature_names is None:
+            feature_names = list(features.columns)
+        features = features.values
+    elif isinstance(features, pd.Series):
+        if feature_names is None:
+            feature_names = list(features.index)
+        features = features.values
+    elif isinstance(features, list):
+        if feature_names is None:
+            feature_names = features
+        features = None
+    elif features is not None and len(features.shape) == 1 and feature_names is None:
+        feature_names = features
+        features = None
+
+    if len(shap_values.shape) == 1:
+        shap_values = np.reshape(shap_values, (1, len(shap_values)))
+
+    if out_names is None:
+        out_names = ["f(x)"]
+    elif isinstance(out_names, str):
+        out_names = [out_names]
+
+    if shap_values.shape[0] == 1:
+        if feature_names is None:
+            feature_names = [labels['FEATURE'] % str(i) for i in range(shap_values.shape[1])]
+        if features is None:
+            features = ["" for _ in range(len(feature_names))]
+        if type(features) == np.ndarray:
+            features = features.flatten()
+
+        # check that the shape of the shap_values and features match
+        if len(features) != shap_values.shape[1]:
+            emsg = "Length of features is not equal to the length of shap_values!"
+            if len(features) == shap_values.shape[1] - 1:
+                emsg += (
+                    " You might be using an old format shap_values array with the base value "
+                    "as the last column. In this case, just pass the array without the last column."
+                )
+            raise DimensionError(emsg)
+
+        instance = Instance(np.zeros((1, len(feature_names))), features)
+        e = AdditiveExplanation(
+            base_value,
+            np.sum(shap_values[0, :]) + base_value,
+            shap_values[0, :],
+            None,
+            instance,
+            link,
+            Model(None, out_names),
+            DenseData(np.zeros((1, len(feature_names))), list(feature_names))
+        )
+
+        return visualize(e,
+                         plot_cmap,
+                         matplotlib,
+                         figsize=figsize,
+                         show=show,
+                         text_rotation=text_rotation,
+                         min_perc=contribution_threshold)
+
+    else:
+        if matplotlib:
+            raise NotImplementedError("matplotlib = True is not yet supported for force plots with multiple samples!")
+
+        if shap_values.shape[0] > 3000:
+            warnings.warn("shap.plots.force is slow for many thousands of rows, try subsampling your data.")
+
+        exps = []
+        for k in range(shap_values.shape[0]):
+            if feature_names is None:
+                feature_names = [labels['FEATURE'] % str(i) for i in range(shap_values.shape[1])]
+            if features is None:
+                display_features = ["" for i in range(len(feature_names))]
+            else:
+                display_features = features[k, :]
+
+            instance = Instance(np.ones((1, len(feature_names))), display_features)
+            e = AdditiveExplanation(
+                base_value,
+                np.sum(shap_values[k, :]) + base_value,
+                shap_values[k, :],
+                None,
+                instance,
+                link,
+                Model(None, out_names),
+                DenseData(np.ones((1, len(feature_names))), list(feature_names))
+            )
+            exps.append(e)
+
+        return visualize(
+                    exps,
+                    plot_cmap=plot_cmap,
+                    ordering_keys=ordering_keys,
+                    ordering_keys_time_format=ordering_keys_time_format,
+                    text_rotation=text_rotation
+                )
+
+
+class Explanation:
+    def __init__(self):
+        pass
+
+
+class AdditiveExplanation(Explanation):
+    """Data structure for AdditiveForceVisualizer / AdditiveForceArrayVisualizer."""
+
+    def __init__(self, base_value, out_value, effects, effects_var, instance, link, model, data):
+        """
+
+        Parameters
+        ----------
+        base_value : float
+            This is the reference value that the feature contributions start from.
+            For SHAP values, it should be the value of ``explainer.expected_value``.
+
+        out_value : float
+            The model prediction value, taken as the sum of the SHAP values across all
+            features and the ``base_value``.
+        """
+        self.base_value = base_value
+        self.out_value = out_value
+        self.effects = effects
+        self.effects_var = effects_var
+        assert isinstance(instance, Instance)
+        self.instance = instance
+        assert isinstance(link, Link)
+        self.link = link
+        assert isinstance(model, Model)
+        self.model = model
+        assert isinstance(data, Data)
+        self.data = data
+
+err_msg = """
+<div style='color: #900; text-align: center;'>
+  <b>Visualization omitted, Javascript library not loaded!</b><br>
+  Have you run `initjs()` in this notebook? If this notebook was from another
+  user you must also trust this notebook (File -> Trust notebook). If you are viewing
+  this notebook on github the Javascript has been stripped for security. If you are using
+  JupyterLab this error is because a JupyterLab extension has not yet been written.
+</div>"""
+
+
+def getjs():
+    bundle_path = os.path.join(os.path.split(__file__)[0], "resources", "bundle.js")
+    with open(bundle_path, encoding="utf-8") as f:
+        bundle_data = f.read()
+    return f"<script charset='utf-8'>{bundle_data}</script>"
+
+
+def initjs():
+    """Initialize the necessary javascript libraries for interactive force plots.
+
+    Run this only in a notebook environment with IPython installed.
+    """
+    assert have_ipython, "IPython must be installed to use initjs()! Run `pip install ipython` and then restart shap."
+
+    logo_path = os.path.join(os.path.split(__file__)[0], "resources", "logoSmallGray.png")
+    with open(logo_path, "rb") as f:
+        logo_data = f.read()
+    logo_data = base64.b64encode(logo_data).decode("utf-8")
+    display(HTML(
+        f"<div align='center'><img src='data:image/png;base64,{logo_data}' /></div>" +
+        getjs()
+    ))
+
+
+def save_html(out_file, plot, full_html=True):
+    """ Save html plots to an output file.
+
+    Parameters
+    ----------
+    out_file : str or file
+        Location or file to be written to.
+
+    plot : BaseVisualizer
+        Visualizer returned by :func:`shap.plots.force()`.
+
+    full_html : boolean (default: True)
+        If ``True``, writes a complete HTML document starting
+        with an ``<html>`` tag. If ``False``, only script and div
+        tags are included.
+    """
+
+    if not isinstance(plot, BaseVisualizer):
+        raise TypeError("`save_html` requires a Visualizer returned by `shap.plots.force()`.")
+
+    internal_open = False
+    if isinstance(out_file, str):
+        out_file = open(out_file, "w", encoding="utf-8")
+        internal_open = True
+
+    if full_html:
+        out_file.write("<html><head><meta http-equiv='content-type' content='text/html'; charset='utf-8'>")
+
+    # dump the js code
+    out_file.write(getjs())
+
+    if full_html:
+        out_file.write("</head><body>\n")
+
+    out_file.write(plot.html())
+
+    if full_html:
+        out_file.write("</body></html>\n")
+
+    if internal_open:
+        out_file.close()
+
+
+def id_generator(size=20, chars=string.ascii_uppercase + string.digits):
+    return "i"+''.join(random.choice(chars) for _ in range(size))
+
+
+def ensure_not_numpy(x):
+    if isinstance(x, bytes):
+        return x.decode()
+    elif isinstance(x, np.str_):
+        return str(x)
+    elif isinstance(x, np.generic):
+        return float(x.item())
+    else:
+        return x
+
+
+def verify_valid_cmap(cmap):
+    """Checks that cmap is either a str or list of hex colors"""
+    if not (isinstance(cmap, (str, list)) or str(type(cmap)).endswith("unicode'>")):
+        emsg = f"Plot color map must be string or list! Not {type(cmap)}."
+        raise TypeError(emsg)
+
+    if isinstance(cmap, list):
+        if len(cmap) < 2:
+            raise ValueError("Color map must be at least two colors.")
+        _rgbstring = re.compile(r'#[a-fA-F0-9]{6}$')
+        for color in cmap:
+            if not _rgbstring.match(color):
+                raise ValueError(f"Invalid color {color} found in cmap.")
+
+    return cmap
+
+
+def visualize(
+    e,
+    plot_cmap="RdBu",
+    matplotlib=False,
+    figsize=(20, 3),
+    show=True,
+    ordering_keys=None,
+    ordering_keys_time_format=None,
+    text_rotation=0,
+    min_perc=0.05,
+):
+    """Main interface for switching between matplotlib / javascript force plots.
+
+    Parameters
+    ----------
+    e : AdditiveExplanation
+        Contains the data necessary for additive force plots.
+    """
+    plot_cmap = verify_valid_cmap(plot_cmap)
+
+    if isinstance(e, AdditiveExplanation):
+        if matplotlib:
+            return AdditiveForceVisualizer(e, plot_cmap=plot_cmap).matplotlib(
+                figsize=figsize,
+                show=show,
+                text_rotation=text_rotation,
+                min_perc=min_perc,
+            )
+        else:
+            return AdditiveForceVisualizer(e, plot_cmap=plot_cmap)
+    elif isinstance(e, Explanation):
+        if matplotlib:
+            raise ValueError("Matplotlib plot is only supported for additive explanations")
+        return SimpleListVisualizer(e)
+    elif isinstance(e, Sequence) and len(e) > 0 and isinstance(e[0], AdditiveExplanation):
+        if matplotlib:
+            raise ValueError("Matplotlib plot is only supported for additive explanations")
+        return AdditiveForceArrayVisualizer(
+            e,
+            plot_cmap=plot_cmap,
+            ordering_keys=ordering_keys,
+            ordering_keys_time_format=ordering_keys_time_format,
+        )
+    else:
+        raise ValueError("visualize() can only display Explanation objects (or arrays of them)!")
+
+
+class BaseVisualizer:
+    pass
+
+class SimpleListVisualizer(BaseVisualizer):
+    def __init__(self, e):
+        if not isinstance(e, Explanation):
+            emsg = "SimpleListVisualizer can only visualize Explanation objects!"
+            raise TypeError(emsg)
+
+        # build the json data
+        features = {}
+        for i in filter(lambda j: e.effects[j] != 0, range(len(e.data.group_names))):
+            features[i] = {
+                "effect": e.effects[i],
+                "value": e.instance.group_display_values[i]
+            }
+        self.data = {
+            "outNames": e.model.out_names,
+            "base_value": e.base_value,
+            "link": str(e.link),
+            "featureNames": e.data.group_names,
+            "features": features,
+            "plot_cmap":e.plot_cmap.plot_cmap
+        }
+
+    def html(self):
+        # assert have_ipython, "IPython must be installed to use this visualizer! Run `pip install ipython` and then restart shap."
+        generated_id = id_generator()
+        return f"""
+<div id='{generated_id}'>{err_msg}</div>
+ <script>
+   if (window.SHAP) SHAP.ReactDom.render(
+    SHAP.React.createElement(SHAP.SimpleListVisualizer, {json.dumps(self.data)}),
+    document.getElementById('{generated_id}')
+  );
+</script>"""
+
+    def _repr_html_(self):
+        return self.html()
+
+
+class AdditiveForceVisualizer(BaseVisualizer):
+    """Visualizer for a single Additive Force plot."""
+
+    def __init__(self, e, plot_cmap="RdBu"):
+        """
+
+        Parameters
+        ----------
+        e : AdditiveExplanation
+            Contains the data necessary for additive force plots.
+
+        plot_cmap : str or list[str]
+            Color map to use. It can be a string (defaults to ``RdBu``) or a list of hex color strings.
+        """
+        if not isinstance(e, AdditiveExplanation):
+            emsg = "AdditiveForceVisualizer can only visualize AdditiveExplanation objects!"
+            raise TypeError(emsg)
+
+        # build the json data
+        features = {}
+        for i in filter(lambda j: e.effects[j] != 0, range(len(e.data.group_names))):
+            features[i] = {
+                "effect": ensure_not_numpy(e.effects[i]),
+                "value": ensure_not_numpy(e.instance.group_display_values[i])
+            }
+        self.data = {
+            "outNames": e.model.out_names,
+            "baseValue": ensure_not_numpy(e.base_value),
+            "outValue": ensure_not_numpy(e.out_value),
+            "link": str(e.link),
+            "featureNames": e.data.group_names,
+            "features": features,
+            "plot_cmap": plot_cmap,
+        }
+
+    def html(self, label_margin=20):
+        # assert have_ipython, "IPython must be installed to use this visualizer! Run `pip install ipython` and then restart shap."
+        self.data["labelMargin"] = label_margin
+        generated_id = id_generator()
+        return f"""
+<div id='{generated_id}'>{err_msg}</div>
+ <script>
+   if (window.SHAP) SHAP.ReactDom.render(
+    SHAP.React.createElement(SHAP.AdditiveForceVisualizer, {json.dumps(self.data)}),
+    document.getElementById('{generated_id}')
+  );
+</script>"""
+
+    def matplotlib(self, figsize, show, text_rotation, min_perc=0.05):
+        fig = draw_additive_plot(self.data,
+                                 figsize=figsize,
+                                 show=show,
+                                 text_rotation=text_rotation,
+                                 min_perc=min_perc)
+
+        return fig
+
+    def _repr_html_(self):
+        return self.html()
+
+
+class AdditiveForceArrayVisualizer(BaseVisualizer):
+    """Visualizer for a sequence of AdditiveExplanation, as a stacked force plot."""
+
+    def __init__(self, arr, plot_cmap="RdBu", ordering_keys=None, ordering_keys_time_format=None):
+        if not isinstance(arr[0], AdditiveExplanation):
+            emsg = (
+                "AdditiveForceArrayVisualizer can only visualize arrays of "
+                "AdditiveExplanation objects!"
+            )
+            raise TypeError(emsg)
+
+        # order the samples by their position in a hierarchical clustering
+        if all(e.model.f == arr[1].model.f for e in arr):
+            clustOrder = hclust_ordering(np.vstack([e.effects for e in arr]))
+        else:
+            emsg = "Tried to visualize an array of explanations from different models!"
+            raise ValueError(emsg)
+
+        # make sure that we put the higher predictions first...just for consistency
+        if sum(arr[clustOrder[0]].effects) < sum(arr[clustOrder[-1]].effects):
+            np.flipud(clustOrder) # reverse
+
+        # build the json data
+        clustOrder = np.argsort(clustOrder) # inverse permutation
+        self.data = {
+            "outNames": arr[0].model.out_names,
+            "baseValue": ensure_not_numpy(arr[0].base_value),
+            "link": arr[0].link.__str__(),
+            "featureNames": arr[0].data.group_names,
+            "explanations": [],
+            "plot_cmap": plot_cmap,
+            "ordering_keys": list(ordering_keys) if hasattr(ordering_keys, '__iter__') else None,
+            "ordering_keys_time_format": ordering_keys_time_format,
+        }
+        for ind, e in enumerate(arr):
+            self.data["explanations"].append({
+                "outValue": ensure_not_numpy(e.out_value),
+                "simIndex": ensure_not_numpy(clustOrder[ind])+1,
+                "features": {}
+            })
+            for i in filter(lambda j: e.effects[j] != 0 or e.instance.x[0,j] != 0, range(len(e.data.group_names))):
+                self.data["explanations"][-1]["features"][i] = {
+                    "effect": ensure_not_numpy(e.effects[i]),
+                    "value": ensure_not_numpy(e.instance.group_display_values[i])
+                }
+
+    def html(self):
+        # assert have_ipython, "IPython must be installed to use this visualizer! Run `pip install ipython` and then restart shap."
+        return """
+<div id='{id}'>{err_msg}</div>
+ <script>
+   if (window.SHAP) SHAP.ReactDom.render(
+    SHAP.React.createElement(SHAP.AdditiveForceArrayVisualizer, {data}),
+    document.getElementById('{id}')
+  );
+</script>""".format(err_msg=err_msg, data=json.dumps(self.data), id=id_generator())
+
+    def _repr_html_(self):
+        return self.html()
diff --git a/lib/shap/plots/_force_matplotlib.py b/lib/shap/plots/_force_matplotlib.py
new file mode 100644
index 0000000000000000000000000000000000000000..e07f4dc3d7c7d5e3b22946689a2a62a96555d1fd
--- /dev/null
+++ b/lib/shap/plots/_force_matplotlib.py
@@ -0,0 +1,408 @@
+import matplotlib
+import matplotlib.pyplot as plt
+import numpy as np
+from matplotlib import lines
+from matplotlib.font_manager import FontProperties
+from matplotlib.patches import PathPatch
+from matplotlib.path import Path
+
+
+def draw_bars(out_value, features, feature_type, width_separators, width_bar):
+    """Draw the bars and separators."""
+    rectangle_list = []
+    separator_list = []
+
+    pre_val = out_value
+    for index, features in zip(range(len(features)), features):
+        if feature_type == 'positive':
+            left_bound = float(features[0])
+            right_bound = pre_val
+            pre_val = left_bound
+
+            separator_indent = np.abs(width_separators)
+            separator_pos = left_bound
+            colors = ['#FF0D57', '#FFC3D5']
+        else:
+            left_bound = pre_val
+            right_bound = float(features[0])
+            pre_val = right_bound
+
+            separator_indent = - np.abs(width_separators)
+            separator_pos = right_bound
+            colors = ['#1E88E5', '#D1E6FA']
+
+        # Create rectangle
+        if index == 0:
+            if feature_type == 'positive':
+                points_rectangle = [[left_bound, 0],
+                                    [right_bound, 0],
+                                    [right_bound, width_bar],
+                                    [left_bound, width_bar],
+                                    [left_bound + separator_indent, (width_bar / 2)]
+                                    ]
+            else:
+                points_rectangle = [[right_bound, 0],
+                                    [left_bound, 0],
+                                    [left_bound, width_bar],
+                                    [right_bound, width_bar],
+                                    [right_bound + separator_indent, (width_bar / 2)]
+                                    ]
+
+        else:
+            points_rectangle = [[left_bound, 0],
+                                [right_bound, 0],
+                                [right_bound + separator_indent * 0.90, (width_bar / 2)],
+                                [right_bound, width_bar],
+                                [left_bound, width_bar],
+                                [left_bound + separator_indent * 0.90, (width_bar / 2)]]
+
+        line = plt.Polygon(points_rectangle, closed=True, fill=True,
+                           facecolor=colors[0], linewidth=0)
+        rectangle_list += [line]
+
+        # Create separator
+        points_separator = [[separator_pos, 0],
+                            [separator_pos + separator_indent, (width_bar / 2)],
+                            [separator_pos, width_bar]]
+
+        line = plt.Polygon(points_separator, closed=None, fill=None,
+                           edgecolor=colors[1], lw=3)
+        separator_list += [line]
+
+    return rectangle_list, separator_list
+
+
+def draw_labels(fig, ax, out_value, features, feature_type, offset_text, total_effect=0, min_perc=0.05, text_rotation=0):
+    start_text = out_value
+    pre_val = out_value
+
+    # Define variables specific to positive and negative effect features
+    if feature_type == 'positive':
+        colors = ['#FF0D57', '#FFC3D5']
+        alignment = 'right'
+        sign = 1
+    else:
+        colors = ['#1E88E5', '#D1E6FA']
+        alignment = 'left'
+        sign = -1
+
+    # Draw initial line
+    if feature_type == 'positive':
+        x, y = np.array([[pre_val, pre_val], [0, -0.18]])
+        line = lines.Line2D(x, y, lw=1., alpha=0.5, color=colors[0])
+        line.set_clip_on(False)
+        ax.add_line(line)
+        start_text = pre_val
+
+    box_end = out_value
+    val = out_value
+    for feature in features:
+        # Exclude all labels that do not contribute at least 10% to the total
+        feature_contribution = np.abs(float(feature[0]) - pre_val) / np.abs(total_effect)
+        if feature_contribution < min_perc:
+            break
+
+        # Compute value for current feature
+        val = float(feature[0])
+
+        # Draw labels.
+        if feature[1] == "":
+            text = feature[2]
+        else:
+            text = feature[2] + ' = ' + feature[1]
+
+        if text_rotation != 0:
+            va_alignment = 'top'
+        else:
+            va_alignment = 'baseline'
+
+        text_out_val = plt.text(start_text - sign * offset_text,
+                                -0.15, text,
+                                fontsize=12, color=colors[0],
+                                horizontalalignment=alignment,
+                                va=va_alignment,
+                                rotation=text_rotation)
+        text_out_val.set_bbox(dict(facecolor='none', edgecolor='none'))
+
+        # We need to draw the plot to be able to get the size of the
+        # text box
+        fig.canvas.draw()
+        box_size = text_out_val.get_bbox_patch().get_extents()\
+                               .transformed(ax.transData.inverted())
+        if feature_type == 'positive':
+            box_end_ = box_size.get_points()[0][0]
+        else:
+            box_end_ = box_size.get_points()[1][0]
+
+        # If the feature goes over the side of the plot, we remove that label
+        # and stop drawing labels
+        if box_end_ > ax.get_xlim()[1]:
+            text_out_val.remove()
+            break
+
+        # Create end line
+        if (sign * box_end_) > (sign * val):
+            x, y = np.array([[val, val], [0, -0.18]])
+            line = lines.Line2D(x, y, lw=1., alpha=0.5, color=colors[0])
+            line.set_clip_on(False)
+            ax.add_line(line)
+            start_text = val
+            box_end = val
+
+        else:
+            box_end = box_end_ - sign * offset_text
+            x, y = np.array([[val, box_end, box_end],
+                             [0, -0.08, -0.18]])
+            line = lines.Line2D(x, y, lw=1., alpha=0.5, color=colors[0])
+            line.set_clip_on(False)
+            ax.add_line(line)
+            start_text = box_end
+
+        # Update previous value
+        pre_val = float(feature[0])
+
+
+    # Create line for labels
+    extent_shading = [out_value, box_end, 0, -0.31]
+    path = [[out_value, 0], [pre_val, 0], [box_end, -0.08],
+            [box_end, -0.2], [out_value, -0.2],
+            [out_value, 0]]
+
+    path = Path(path)
+    patch = PathPatch(path, facecolor='none', edgecolor='none')
+    ax.add_patch(patch)
+
+    # Extend axis if needed
+    lower_lim, upper_lim = ax.get_xlim()
+    if (box_end < lower_lim):
+        ax.set_xlim(box_end, upper_lim)
+
+    if (box_end > upper_lim):
+        ax.set_xlim(lower_lim, box_end)
+
+    # Create shading
+    if feature_type == 'positive':
+        colors = np.array([(255, 13, 87), (255, 255, 255)]) / 255.
+    else:
+        colors = np.array([(30, 136, 229), (255, 255, 255)]) / 255.
+
+    cm = matplotlib.colors.LinearSegmentedColormap.from_list('cm', colors)
+
+    _, Z2 = np.meshgrid(np.linspace(0, 10), np.linspace(-10, 10))
+    im = plt.imshow(Z2, interpolation='quadric', cmap=cm,
+                    vmax=0.01, alpha=0.3,
+                    origin='lower', extent=extent_shading,
+                    clip_path=patch, clip_on=True, aspect='auto')
+    im.set_clip_path(patch)
+
+    return fig, ax
+
+
+def format_data(data):
+    """Format data."""
+    # Format negative features
+    neg_features = np.array([[data['features'][x]['effect'],
+                              data['features'][x]['value'],
+                              data['featureNames'][x]]
+                             for x in data['features'].keys() if data['features'][x]['effect'] < 0])
+
+    neg_features = np.array(sorted(neg_features, key=lambda x: float(x[0]), reverse=False))
+
+    # Format positive features
+    pos_features = np.array([[data['features'][x]['effect'],
+                              data['features'][x]['value'],
+                              data['featureNames'][x]]
+                             for x in data['features'].keys() if data['features'][x]['effect'] >= 0])
+    pos_features = np.array(sorted(pos_features, key=lambda x: float(x[0]), reverse=True))
+
+    # Define link function
+    if data['link'] == 'identity':
+        def convert_func(x):
+            return x
+    elif data['link'] == 'logit':
+        def convert_func(x):
+            return 1 / (1 + np.exp(-x))
+    else:
+        emsg = f"ERROR: Unrecognized link function: {data['link']}"
+        raise ValueError(emsg)
+
+    # Convert negative feature values to plot values
+    neg_val = data['outValue']
+    for i in neg_features:
+        val = float(i[0])
+        neg_val = neg_val + np.abs(val)
+        i[0] = convert_func(neg_val)
+    if len(neg_features) > 0:
+        total_neg = np.max(neg_features[:, 0].astype(float)) - \
+                    np.min(neg_features[:, 0].astype(float))
+    else:
+        total_neg = 0
+
+    # Convert positive feature values to plot values
+    pos_val = data['outValue']
+    for i in pos_features:
+        val = float(i[0])
+        pos_val = pos_val - np.abs(val)
+        i[0] = convert_func(pos_val)
+
+    if len(pos_features) > 0:
+        total_pos = np.max(pos_features[:, 0].astype(float)) - \
+                    np.min(pos_features[:, 0].astype(float))
+    else:
+        total_pos = 0
+
+    # Convert output value and base value
+    data['outValue'] = convert_func(data['outValue'])
+    data['baseValue'] = convert_func(data['baseValue'])
+
+    return neg_features, total_neg, pos_features, total_pos
+
+
+def draw_output_element(out_name, out_value, ax):
+    # Add output value
+    x, y = np.array([[out_value, out_value], [0, 0.24]])
+    line = lines.Line2D(x, y, lw=2., color='#F2F2F2')
+    line.set_clip_on(False)
+    ax.add_line(line)
+
+    font0 = FontProperties()
+    font = font0.copy()
+    font.set_weight('bold')
+    text_out_val = plt.text(out_value, 0.25, f'{out_value:.2f}',
+                            fontproperties=font,
+                            fontsize=14,
+                            horizontalalignment='center')
+    text_out_val.set_bbox(dict(facecolor='white', edgecolor='white'))
+
+    text_out_val = plt.text(out_value, 0.33, out_name,
+                            fontsize=12, alpha=0.5,
+                            horizontalalignment='center')
+    text_out_val.set_bbox(dict(facecolor='white', edgecolor='white'))
+
+
+def draw_base_element(base_value, ax):
+    x, y = np.array([[base_value, base_value], [0.13, 0.25]])
+    line = lines.Line2D(x, y, lw=2., color='#F2F2F2')
+    line.set_clip_on(False)
+    ax.add_line(line)
+
+    text_out_val = plt.text(base_value, 0.33, 'base value',
+                            fontsize=12, alpha=0.5,
+                            horizontalalignment='center')
+    text_out_val.set_bbox(dict(facecolor='white', edgecolor='white'))
+
+
+def draw_higher_lower_element(out_value, offset_text):
+    plt.text(out_value - offset_text, 0.405, 'higher',
+             fontsize=13, color='#FF0D57',
+             horizontalalignment='right')
+
+    plt.text(out_value + offset_text, 0.405, 'lower',
+             fontsize=13, color='#1E88E5',
+             horizontalalignment='left')
+
+    plt.text(out_value, 0.4, r'$\leftarrow$',
+             fontsize=13, color='#1E88E5',
+             horizontalalignment='center')
+
+    plt.text(out_value, 0.425, r'$\rightarrow$',
+             fontsize=13, color='#FF0D57',
+             horizontalalignment='center')
+
+
+def update_axis_limits(ax, total_pos, pos_features, total_neg,
+                       neg_features, base_value, out_value):
+    ax.set_ylim(-0.5, 0.15)
+    padding = np.max([np.abs(total_pos) * 0.2,
+                      np.abs(total_neg) * 0.2])
+
+    if len(pos_features) > 0:
+        min_x = min(np.min(pos_features[:, 0].astype(float)), base_value) - padding
+    else:
+        min_x = out_value - padding
+    if len(neg_features) > 0:
+        max_x = max(np.max(neg_features[:, 0].astype(float)), base_value) + padding
+    else:
+        max_x = out_value + padding
+    ax.set_xlim(min_x, max_x)
+
+    plt.tick_params(top=True, bottom=False, left=False, right=False, labelleft=False,
+                    labeltop=True, labelbottom=False)
+    plt.locator_params(axis='x', nbins=12)
+
+    for key, spine in zip(plt.gca().spines.keys(), plt.gca().spines.values()):
+        if key != 'top':
+            spine.set_visible(False)
+
+
+def draw_additive_plot(data, figsize, show, text_rotation=0, min_perc=0.05):
+    """Draw additive plot."""
+    # Turn off interactive plot
+    if show is False:
+        plt.ioff()
+
+    # Format data
+    neg_features, total_neg, pos_features, total_pos = format_data(data)
+
+    # Compute overall metrics
+    base_value = data['baseValue']
+    out_value = data['outValue']
+    offset_text = (np.abs(total_neg) + np.abs(total_pos)) * 0.04
+
+    # Define plots
+    fig, ax = plt.subplots(figsize=figsize)
+
+    # Compute axis limit
+    update_axis_limits(ax, total_pos, pos_features, total_neg,
+                       neg_features, base_value, out_value)
+
+    # Define width of bar
+    width_bar = 0.1
+    width_separators = (ax.get_xlim()[1] - ax.get_xlim()[0]) / 200
+
+    # Create bar for negative shap values
+    rectangle_list, separator_list = draw_bars(out_value, neg_features, 'negative',
+                                               width_separators, width_bar)
+    for i in rectangle_list:
+        ax.add_patch(i)
+
+    for i in separator_list:
+        ax.add_patch(i)
+
+    # Create bar for positive shap values
+    rectangle_list, separator_list = draw_bars(out_value, pos_features, 'positive',
+                                               width_separators, width_bar)
+    for i in rectangle_list:
+        ax.add_patch(i)
+
+    for i in separator_list:
+        ax.add_patch(i)
+
+    # Add labels
+    total_effect = np.abs(total_neg) + total_pos
+    fig, ax = draw_labels(fig, ax, out_value, neg_features, 'negative',
+                          offset_text, total_effect, min_perc=min_perc, text_rotation=text_rotation)
+
+    fig, ax = draw_labels(fig, ax, out_value, pos_features, 'positive',
+                          offset_text, total_effect, min_perc=min_perc, text_rotation=text_rotation)
+
+    # higher lower legend
+    draw_higher_lower_element(out_value, offset_text)
+
+    # Add label for base value
+    draw_base_element(base_value, ax)
+
+    # Add output label
+    out_names = data['outNames'][0]
+    draw_output_element(out_names, out_value, ax)
+
+    # Scale axis
+    if data['link'] == 'logit':
+        plt.xscale('logit')
+        ax.xaxis.set_major_formatter(matplotlib.ticker.ScalarFormatter())
+        ax.ticklabel_format(style='plain')
+
+    if show:
+        plt.show()
+    else:
+        return plt.gcf()
diff --git a/lib/shap/plots/_group_difference.py b/lib/shap/plots/_group_difference.py
new file mode 100644
index 0000000000000000000000000000000000000000..b983dc74d3e8eece271655a114ea0f9100b89aa1
--- /dev/null
+++ b/lib/shap/plots/_group_difference.py
@@ -0,0 +1,85 @@
+import matplotlib.pyplot as pl
+import numpy as np
+
+from . import colors
+
+
+def group_difference(shap_values, group_mask, feature_names=None, xlabel=None, xmin=None, xmax=None,
+                     max_display=None, sort=True, show=True, ax=None):
+    """ This plots the difference in mean SHAP values between two groups.
+
+    It is useful to decompose many group level metrics about the model output among the
+    input features. Quantitative fairness metrics for machine learning models are
+    a common example of such group level metrics.
+
+    Parameters
+    ----------
+    shap_values : numpy.array
+        Matrix of SHAP values (# samples x # features) or a vector of model outputs (# samples).
+
+    group_mask : numpy.array
+        A boolean mask where True represents the first group of samples and False the second.
+
+    feature_names : list
+        A list of feature names.
+    """
+
+    # Compute confidence bounds for the group difference value
+    vs = []
+    gmean = group_mask.mean()
+    for i in range(200):
+        r = np.random.rand(shap_values.shape[0]) > gmean
+        vs.append(shap_values[r].mean(0) - shap_values[~r].mean(0))
+    vs = np.array(vs)
+    xerr = np.vstack([np.percentile(vs, 95, axis=0), np.percentile(vs, 5, axis=0)])
+
+    # See if we were passed a single model output vector and not a matrix of SHAP values
+    if len(shap_values.shape) == 1:
+        shap_values = shap_values.reshape(1, -1).T
+        if feature_names is None:
+            feature_names = [""]
+
+    # Fill in any missing feature names
+    if feature_names is None:
+        feature_names = ["Feature %d" % i for i in range(shap_values.shape[1])]
+
+    diff = shap_values[group_mask].mean(0) - shap_values[~group_mask].mean(0)
+
+    if sort is True:
+        inds = np.argsort(-np.abs(diff)).astype(int)
+    else:
+        inds = np.arange(len(diff))
+
+    if max_display is not None:
+        inds = inds[:max_display]
+    if ax:
+        # Disable plotting out if an ax has been provided
+        show = False
+    else:
+        # Draw the figure if no ax has been provided
+        figsize = (6.4, 0.2 + 0.9 * len(inds))
+        _, ax = pl.subplots(figsize=figsize)
+    ticks = range(len(inds)-1, -1, -1)
+    ax.axvline(0, color="#999999", linewidth=0.5)
+    ax.barh(
+        ticks, diff[inds], color=colors.blue_rgb,
+        capsize=3, xerr=np.abs(xerr[:,inds])
+    )
+
+    for i in range(len(inds)):
+        ax.axhline(y=i, color="#cccccc", lw=0.5, dashes=(1, 5), zorder=-1)
+
+    ax.xaxis.set_ticks_position('bottom')
+    ax.yaxis.set_ticks_position('none')
+    ax.set_yticks(ticks)
+    ax.set_yticklabels([feature_names[i] for i in inds], fontsize=13)
+    ax.spines['right'].set_visible(False)
+    ax.spines['top'].set_visible(False)
+    ax.spines['left'].set_visible(False)
+    ax.tick_params(labelsize=11)
+    if xlabel is None:
+        xlabel = "Group SHAP value difference"
+    ax.set_xlabel(xlabel, fontsize=13)
+    ax.set_xlim(xmin, xmax)
+    if show:
+        pl.show()
diff --git a/lib/shap/plots/_heatmap.py b/lib/shap/plots/_heatmap.py
new file mode 100644
index 0000000000000000000000000000000000000000..7a42b34f69b7b5ca4c94bda498918d1ab777ff55
--- /dev/null
+++ b/lib/shap/plots/_heatmap.py
@@ -0,0 +1,182 @@
+import matplotlib.pyplot as pl
+import numpy as np
+
+from .. import Explanation
+from ..utils import OpChain
+from . import colors
+from ._labels import labels
+from ._utils import convert_ordering
+
+
+def heatmap(shap_values, instance_order=Explanation.hclust(), feature_values=Explanation.abs.mean(0),
+            feature_order=None, max_display=10, cmap=colors.red_white_blue, show=True,
+            plot_width=8):
+    """Create a heatmap plot of a set of SHAP values.
+
+    This plot is designed to show the population substructure of a dataset using supervised
+    clustering and a heatmap.
+    Supervised clustering involves clustering data points not by their original
+    feature values but by their explanations.
+    By default, we cluster using :func:`shap.utils.hclust_ordering`,
+    but any clustering can be used to order the samples.
+
+    Parameters
+    ----------
+    shap_values : shap.Explanation
+        A multi-row :class:`.Explanation` object that we want to visualize in a
+        cluster ordering.
+
+    instance_order : OpChain or numpy.ndarray
+        A function that returns a sort ordering given a matrix of SHAP values and an axis, or
+        a direct sample ordering given as an ``numpy.ndarray``.
+
+    feature_values : OpChain or numpy.ndarray
+        A function that returns a global summary value for each input feature, or an array of such values.
+
+    feature_order : None, OpChain, or numpy.ndarray
+        A function that returns a sort ordering given a matrix of SHAP values and an axis, or
+        a direct input feature ordering given as an ``numpy.ndarray``.
+        If ``None``, then we use ``feature_values.argsort``.
+
+    max_display : int
+        The maximum number of features to display (default is 10).
+
+    show : bool
+        Whether ``matplotlib.pyplot.show()`` is called before returning.
+        Setting this to ``False`` allows the plot
+        to be customized further after it has been created.
+
+    plot_width: int, default 8
+        The width of the heatmap plot.
+
+    Examples
+    --------
+
+    See `heatmap plot examples <https://shap.readthedocs.io/en/latest/example_notebooks/api_examples/plots/heatmap.html>`_.
+
+    """
+
+    # sort the SHAP values matrix by rows and columns
+    values = shap_values.values
+    if issubclass(type(feature_values), OpChain):
+        feature_values = feature_values.apply(Explanation(values))
+    if issubclass(type(feature_values), Explanation):
+        feature_values = feature_values.values
+    if feature_order is None:
+        feature_order = np.argsort(-feature_values)
+    elif issubclass(type(feature_order), OpChain):
+        feature_order = feature_order.apply(Explanation(values))
+    elif not hasattr(feature_order, "__len__"):
+        raise Exception("Unsupported feature_order: %s!" % str(feature_order))
+    xlabel = "Instances"
+    instance_order = convert_ordering(instance_order, shap_values)
+    # if issubclass(type(instance_order), OpChain):
+    #     #xlabel += " " + instance_order.summary_string("SHAP values")
+    #     instance_order = instance_order.apply(Explanation(values))
+    # elif not hasattr(instance_order, "__len__"):
+    #     raise Exception("Unsupported instance_order: %s!" % str(instance_order))
+    # else:
+    #     instance_order_ops = None
+
+    feature_names = np.array(shap_values.feature_names)[feature_order]
+    values = shap_values.values[instance_order][:,feature_order]
+    feature_values = feature_values[feature_order]
+
+    # if we have more features than `max_display`, then group all the excess features
+    # into a single feature
+    if values.shape[1] > max_display:
+        new_values = np.zeros((values.shape[0], max_display))
+        new_values[:, :-1] = values[:, :max_display-1]
+        new_values[:, -1] = values[:, max_display-1:].sum(1)
+        new_feature_values = np.zeros(max_display)
+        new_feature_values[:-1] = feature_values[:max_display-1]
+        new_feature_values[-1] = feature_values[max_display-1:].sum()
+        feature_names = [
+            *feature_names[:max_display-1],
+            f"Sum of {values.shape[1] - max_display + 1} other features",
+        ]
+        values = new_values
+        feature_values = new_feature_values
+
+    # define the plot size based on how many features we are plotting
+    row_height = 0.5
+    pl.gcf().set_size_inches(plot_width, values.shape[1] * row_height + 2.5)
+    ax = pl.gca()
+
+    # plot the matrix of SHAP values as a heat map
+    vmin, vmax = np.nanpercentile(values.flatten(), [1, 99])
+    ax.imshow(
+        values.T,
+        aspect=0.7 * values.shape[0] / values.shape[1],
+        interpolation="nearest",
+        vmin=min(vmin,-vmax),
+        vmax=max(-vmin,vmax),
+        cmap=cmap,
+    )
+
+    # adjust the axes ticks and spines for the heat map + f(x) line chart
+    ax.xaxis.set_ticks_position("bottom")
+    ax.yaxis.set_ticks_position("left")
+    ax.spines[["left", "right"]].set_visible(True)
+    ax.spines[["left", "right"]].set_bounds(values.shape[1] - row_height, -row_height)
+    ax.spines[["top", "bottom"]].set_visible(False)
+    ax.tick_params(axis="both", direction="out")
+
+    ax.set_ylim(values.shape[1] - row_height, -3)
+    heatmap_yticks_pos = np.arange(values.shape[1])
+    heatmap_yticks_labels = feature_names
+    ax.yaxis.set_ticks(
+        [-1.5, *heatmap_yticks_pos],
+        [r"$f(x)$", *heatmap_yticks_labels],
+        fontsize=13,
+    )
+    # remove the y-tick line for the f(x) label
+    ax.yaxis.get_ticklines()[0].set_visible(False)
+
+    ax.set_xlim(-0.5, values.shape[0] - 0.5)
+    ax.set_xlabel(xlabel)
+
+    # plot the f(x) line chart above the heat map
+    ax.axhline(-1.5, color="#aaaaaa", linestyle="--", linewidth=0.5)
+    fx = values.T.mean(0)
+    ax.plot(
+        -fx / np.abs(fx).max() - 1.5,
+        color="#000000",
+        linewidth=1,
+    )
+
+    # plot the bar plot on the right spine of the heat map
+    bar_container = ax.barh(
+        heatmap_yticks_pos,
+        (feature_values / np.abs(feature_values).max()) * values.shape[0] / 20,
+        height=0.7,
+        align="center",
+        color="#000000",
+        left=values.shape[0] * 1.0 - 0.5,
+        # color=[colors.red_rgb if shap_values[feature_inds[i]] > 0 else colors.blue_rgb for i in range(len(y_pos))]
+    )
+    for b in bar_container:
+        b.set_clip_on(False)
+
+    # draw the color bar
+    import matplotlib.cm as cm
+    m = cm.ScalarMappable(cmap=cmap)
+    m.set_array([min(vmin, -vmax), max(-vmin, vmax)])
+    cb = pl.colorbar(
+        m,
+        ticks=[min(vmin, -vmax), max(-vmin, vmax)],
+        ax=ax,
+        aspect=80,
+        fraction=0.01,
+        pad=0.10,  # padding between the cb and the main axes
+    )
+    cb.set_label(labels["VALUE"], size=12, labelpad=-10)
+    cb.ax.tick_params(labelsize=11, length=0)
+    cb.set_alpha(1)
+    cb.outline.set_visible(False)
+    # bbox = cb.ax.get_window_extent().transformed(pl.gcf().dpi_scale_trans.inverted())
+    # cb.ax.set_aspect((bbox.height - 0.9) * 15)
+    # cb.draw_all()
+
+    if show:
+        pl.show()
diff --git a/lib/shap/plots/_image.py b/lib/shap/plots/_image.py
new file mode 100644
index 0000000000000000000000000000000000000000..841f659f4668829021e6be6d702442a5882d303b
--- /dev/null
+++ b/lib/shap/plots/_image.py
@@ -0,0 +1,571 @@
+import json
+import random
+import string
+from typing import Optional
+
+import matplotlib.pyplot as pl
+import numpy as np
+from matplotlib.colors import Colormap
+
+try:
+    from IPython.display import HTML, display
+    have_ipython = True
+except ImportError:
+    have_ipython = False
+
+from .._explanation import Explanation
+from ..utils import ordinal_str
+from ..utils._legacy import kmeans
+from . import colors
+
+
+def image(shap_values: Explanation or np.ndarray,
+          pixel_values: Optional[np.ndarray] = None,
+          labels: Optional[list or np.ndarray] = None,
+          true_labels: Optional[list] = None,
+          width: Optional[int] = 20,
+          aspect: Optional[float] = 0.2,
+          hspace: Optional[float] = 0.2,
+          labelpad: Optional[float] = None,
+          cmap: Optional[str or Colormap] = colors.red_transparent_blue,
+          show: Optional[bool] = True):
+    """Plots SHAP values for image inputs.
+
+    Parameters
+    ----------
+    shap_values : [numpy.array]
+        List of arrays of SHAP values. Each array has the shape
+        (# samples x width x height x channels), and the
+        length of the list is equal to the number of model outputs that are being
+        explained.
+
+    pixel_values : numpy.array
+        Matrix of pixel values (# samples x width x height x channels) for each image.
+        It should be the same
+        shape as each array in the ``shap_values`` list of arrays.
+
+    labels : list or np.ndarray
+        List or ``np.ndarray`` (# samples x top_k classes) of names for each of the
+        model outputs that are being explained.
+
+    true_labels: list
+        List of a true image labels to plot.
+
+    width : float
+        The width of the produced matplotlib plot.
+
+    labelpad : float
+        How much padding to use around the model output labels.
+
+    show : bool
+        Whether ``matplotlib.pyplot.show()`` is called before returning.
+        Setting this to ``False`` allows the plot
+        to be customized further after it has been created.
+
+    Examples
+    --------
+
+    See `image plot examples <https://shap.readthedocs.io/en/latest/example_notebooks/api_examples/plots/image.html>`_.
+
+    """
+
+    # support passing an explanation object
+    if str(type(shap_values)).endswith("Explanation'>"):
+        shap_exp = shap_values
+        # feature_names = [shap_exp.feature_names]
+        # ind = 0
+        if len(shap_exp.output_dims) == 1:
+            shap_values = [shap_exp.values[..., i] for i in range(shap_exp.values.shape[-1])]
+        elif len(shap_exp.output_dims) == 0:
+            shap_values = shap_exp.values
+        else:
+            raise Exception("Number of outputs needs to have support added!! (probably a simple fix)")
+        if pixel_values is None:
+            pixel_values = shap_exp.data
+        if labels is None:
+            labels = shap_exp.output_names
+
+    # multi_output = True
+    if not isinstance(shap_values, list):
+        # multi_output = False
+        shap_values = [shap_values]
+
+    if len(shap_values[0].shape) == 3:
+        shap_values = [v.reshape(1, *v.shape) for v in shap_values]
+        pixel_values = pixel_values.reshape(1, *pixel_values.shape)
+
+    # labels: (rows (images) x columns (top_k classes) )
+    if labels is not None:
+        if isinstance(labels, list):
+            labels = np.array(labels).reshape(1, -1)
+
+    # if labels is not None:
+    #     labels = np.array(labels)
+    #     if labels.shape[0] != shap_values[0].shape[0] and labels.shape[0] == len(shap_values):
+    #         labels = np.tile(np.array([labels]), shap_values[0].shape[0])
+    #     assert labels.shape[0] == shap_values[0].shape[0], "Labels must have same row count as shap_values arrays!"
+    #     if multi_output:
+    #         assert labels.shape[1] == len(shap_values), "Labels must have a column for each output in shap_values!"
+    #     else:
+    #         assert len(labels[0].shape) == 1, "Labels must be a vector for single output shap_values."
+
+    label_kwargs = {} if labelpad is None else {'pad': labelpad}
+
+    # plot our explanations
+    x = pixel_values
+    fig_size = np.array([3 * (len(shap_values) + 1), 2.5 * (x.shape[0] + 1)])
+    if fig_size[0] > width:
+        fig_size *= width / fig_size[0]
+    fig, axes = pl.subplots(nrows=x.shape[0], ncols=len(shap_values) + 1, figsize=fig_size)
+    if len(axes.shape) == 1:
+        axes = axes.reshape(1, axes.size)
+    for row in range(x.shape[0]):
+        x_curr = x[row].copy()
+
+        # make sure we have a 2D array for grayscale
+        if len(x_curr.shape) == 3 and x_curr.shape[2] == 1:
+            x_curr = x_curr.reshape(x_curr.shape[:2])
+
+        # if x_curr.max() > 1:
+        #     x_curr /= 255.
+
+        # get a grayscale version of the image
+        if len(x_curr.shape) == 3 and x_curr.shape[2] == 3:
+            x_curr_gray = (
+                    0.2989 * x_curr[:, :, 0] + 0.5870 * x_curr[:, :, 1] + 0.1140 * x_curr[:, :, 2])  # rgb to gray
+            x_curr_disp = x_curr
+        elif len(x_curr.shape) == 3:
+            x_curr_gray = x_curr.mean(2)
+
+            # for non-RGB multi-channel data we show an RGB image where each of the three channels is a scaled k-mean center
+            flat_vals = x_curr.reshape([x_curr.shape[0] * x_curr.shape[1], x_curr.shape[2]]).T
+            flat_vals = (flat_vals.T - flat_vals.mean(1)).T
+            means = kmeans(flat_vals, 3, round_values=False).data.T.reshape([x_curr.shape[0], x_curr.shape[1], 3])
+            x_curr_disp = (means - np.percentile(means, 0.5, (0, 1))) / (
+                    np.percentile(means, 99.5, (0, 1)) - np.percentile(means, 1, (0, 1)))
+            x_curr_disp[x_curr_disp > 1] = 1
+            x_curr_disp[x_curr_disp < 0] = 0
+        else:
+            x_curr_gray = x_curr
+            x_curr_disp = x_curr
+
+        axes[row, 0].imshow(x_curr_disp, cmap=pl.get_cmap('gray'))
+        if true_labels:
+            axes[row, 0].set_title(true_labels[row], **label_kwargs)
+        axes[row, 0].axis('off')
+        if len(shap_values[0][row].shape) == 2:
+            abs_vals = np.stack([np.abs(shap_values[i]) for i in range(len(shap_values))], 0).flatten()
+        else:
+            abs_vals = np.stack([np.abs(shap_values[i].sum(-1)) for i in range(len(shap_values))], 0).flatten()
+        max_val = np.nanpercentile(abs_vals, 99.9)
+        for i in range(len(shap_values)):
+            if labels is not None:
+                axes[row, i + 1].set_title(labels[row, i], **label_kwargs)
+            sv = shap_values[i][row] if len(shap_values[i][row].shape) == 2 else shap_values[i][row].sum(-1)
+            axes[row, i + 1].imshow(x_curr_gray, cmap=pl.get_cmap('gray'), alpha=0.15,
+                                    extent=(-1, sv.shape[1], sv.shape[0], -1))
+            im = axes[row, i + 1].imshow(sv, cmap=cmap, vmin=-max_val, vmax=max_val)
+            axes[row, i + 1].axis('off')
+    if hspace == 'auto':
+        fig.tight_layout()
+    else:
+        fig.subplots_adjust(hspace=hspace)
+    cb = fig.colorbar(im, ax=np.ravel(axes).tolist(), label="SHAP value", orientation="horizontal",
+                      aspect=fig_size[0] / aspect)
+    cb.outline.set_visible(False)
+    if show:
+        pl.show()
+
+
+def image_to_text(shap_values):
+    """ Plots SHAP values for image inputs with test outputs.
+
+    Parameters
+    ----------
+    shap_values : [numpy.array]
+        List of arrays of SHAP values. Each array has the shap (# width x height x channels x num output tokens). One array
+        for each sample
+
+    """
+    if not have_ipython:
+        msg = (
+            "IPython is required for this function but is not installed."
+            " Fix this with `pip install ipython`."
+        )
+        raise ImportError(msg)
+
+    if len(shap_values.values.shape) == 5:
+        for i in range(shap_values.values.shape[0]):
+            display(HTML(f"<br/><b>{ordinal_str(i)} instance:</b><br/>"))
+            image_to_text(shap_values[i])
+
+        return
+
+    uuid = ''.join(random.choices(string.ascii_lowercase, k=20))
+
+    # creating input html tokens
+
+    model_output = shap_values.output_names
+
+    output_text_html = ''
+
+    for i in range(model_output.shape[0]):
+        output_text_html += "<div style='display:inline; text-align:center;'>" \
+                            + f"<div id='{uuid}_output_flat_value_label_" + str(i) + "'" \
+                            + "style='display:none;color: #999; padding-top: 0px; font-size:12px;'>" \
+                            + "</div>" \
+                            + f"<div id='{uuid}_output_flat_token_" + str(i) + "'" \
+                            + "style='display: inline; background:transparent; border-radius: 3px; padding: 0px;cursor: default;cursor: pointer;'" \
+                            + f"onmouseover=\"onMouseHoverFlat_{uuid}(this.id)\" " \
+                            + f"onmouseout=\"onMouseOutFlat_{uuid}(this.id)\" " \
+                            + f"onclick=\"onMouseClickFlat_{uuid}(this.id)\" " \
+                            + ">" \
+                            + model_output[i].replace("<", "&lt;").replace(">", "&gt;").replace(' ##', '').replace('▁',
+                                                                                                                   '').replace(
+            'Ġ', '') \
+                            + " </div>" \
+                            + "</div>"
+
+    # computing gray scale images
+    image_data = shap_values.data
+    image_height = image_data.shape[0]
+    image_width = image_data.shape[1]
+
+    # computing gray scale image
+    image_data_gray_scale = np.ones((image_height, image_width, 4)) * 255 * 0.5
+    image_data_gray_scale[:, :, 0] = np.mean(image_data, axis=2).astype(int)
+    image_data_gray_scale[:, :, 1] = image_data_gray_scale[:, :, 0]
+    image_data_gray_scale[:, :, 2] = image_data_gray_scale[:, :, 0]
+
+    # computing shap color values for every pixel and for every output token
+
+    shap_values_color_maps = shap_values.values[:, :, 0, :]
+    max_val = np.nanpercentile(np.abs(shap_values.values), 99.9)
+
+    shap_values_color_dict = {}
+
+    for index in range(model_output.shape[0]):
+        shap_values_color_dict[f'{uuid}_output_flat_token_{index}'] = (colors.red_transparent_blue(
+            0.5 + 0.5 * shap_values_color_maps[:, :, index] / max_val) * 255).astype(int).tolist()
+
+    # converting to json to be read in javascript
+
+    image_data_json = json.dumps(shap_values.data.astype(int).tolist())
+    shap_values_color_dict_json = json.dumps(shap_values_color_dict)
+    image_data_gray_scale_json = json.dumps(image_data_gray_scale.astype(int).tolist())
+
+    image_viz_html = f"""
+
+        <div id="{uuid}_image_viz" class="{uuid}_image_viz_content">
+          <div id="{uuid}_image_viz_header" style="padding:15px;margin:5px;font-family:sans-serif;font-weight:bold;">
+            <div style="display:inline">
+              <span style="font-size: 20px;"> Input/Output - Heatmap </span>
+            </div>
+          </div>
+          <div id="{uuid}_image_viz_content" style="display:flex;">
+            <div id="{uuid}_image_viz_input_container" style="padding:15px;border-style:solid;margin:5px;flex:2;">
+              <div id="{uuid}_image_viz_input_header" style="margin:5px;font-weight:bold;font-family:sans-serif;margin-bottom:10px">
+                Input Image
+              </div>
+              <div id="{uuid}_image_viz_input_content" style="margin:5px;font-family:sans-serif;">
+                  <canvas id="{uuid}_image_canvas" style="cursor:grab;width:100%;max-height:500px;"></canvas>
+                  <br>
+                  <br>
+                  <div id="{uuid}_tools">
+                      <div id="{uuid}_zoom">
+                        <span style="font-size:12px;margin-right:15px;"> Zoom </span>
+                        <button id="{uuid}_minus_button" class="zoom-button" onclick="{uuid}_zoom(-1)" style="background-color: #555555;color: white; border:none;font-size:15px;">-</button>
+                        <button id="{uuid}_plus_button" class="zoom-button" onclick="{uuid}_zoom(1)" style="background-color: #555555;color: white; border:none;font-size:15px;">+</button>
+                        <button id="{uuid}_reset_button" class="zoom-button" onclick="{uuid}_reset()" style="background-color: #555555;color: white; border:none;font-size:15px;"> Reset </button>
+                      </div>
+                      <br>
+                      <div id="{uuid}_opacity" style="display:none">
+                      <span style="font-size:12px;margin-right:15px;"> Shap-Overlay Opacity </span>
+                      <input type="range" min="1" max="100" value="35" style="width:100px" oninput="{uuid}_set_opacity(this.value)">
+                      </div>
+                  </div>
+              </div>
+            </div>
+            <div id="{uuid}_image_viz_output_container" style="padding:15px;border-style:solid;margin:5px;flex:1;">
+              <div id="{uuid}_image_viz_output_header" style="margin:5px;font-weight:bold;font-family:sans-serif;margin-bottom:10px">
+                Output Text
+              </div>
+              <div id="{uuid}_image_viz_output_content" style="margin:5px;font-family:sans-serif;">
+                  {output_text_html}
+              </div>
+            </div>
+          </div>
+        </div>
+
+    """
+
+    image_viz_script = f"""
+        <script>
+
+            var {uuid}_heatmap_flat_state = null;
+            var {uuid}_opacity = 0.35
+
+            function onMouseHoverFlat_{uuid}(id) {{
+                if ({uuid}_heatmap_flat_state === null) {{
+                    document.getElementById(id).style.backgroundColor  = "grey";
+                    {uuid}_update_image_and_overlay(id);
+                }}
+            }}
+
+            function onMouseOutFlat_{uuid}(id) {{
+                if ({uuid}_heatmap_flat_state === null) {{
+                    document.getElementById(id).style.backgroundColor  = "transparent";
+                    {uuid}_update_image_and_overlay(null);
+                }}
+            }}
+
+            function onMouseClickFlat_{uuid}(id) {{
+                if ({uuid}_heatmap_flat_state === null) {{
+                    document.getElementById(id).style.backgroundColor  = "grey";
+                    document.getElementById('{uuid}_opacity').style.display  = "block";
+                    {uuid}_update_image_and_overlay(id);
+                    {uuid}_heatmap_flat_state = id;
+                }}
+                else {{
+                    if ({uuid}_heatmap_flat_state === id) {{
+                        document.getElementById(id).style.backgroundColor  = "transparent";
+                        document.getElementById('{uuid}_opacity').style.display  = "none";
+                        {uuid}_update_image_and_overlay(null);
+                        {uuid}_heatmap_flat_state = null;
+                    }}
+                    else {{
+                        document.getElementById({uuid}_heatmap_flat_state).style.backgroundColor  = "transparent";
+                        document.getElementById(id).style.backgroundColor  = "grey";
+                        {uuid}_update_image_and_overlay(id)
+                        {uuid}_heatmap_flat_state = id
+                    }}
+                }}
+            }}
+
+            const {uuid}_image_data_matrix = {image_data_json};
+            const {uuid}_image_data_gray_scale = {image_data_gray_scale_json};
+            const {uuid}_image_height = {image_height};
+            const {uuid}_image_width = {image_width};
+            const {uuid}_shap_values_color_dict = {shap_values_color_dict_json};
+
+            {uuid}_canvas = document.getElementById('{uuid}_image_canvas');
+            {uuid}_context = {uuid}_canvas.getContext('2d');
+
+            var {uuid}_imageData = {uuid}_convert_image_matrix_to_data({uuid}_image_data_matrix, {image_height}, {image_width}, {uuid}_context);
+            var {uuid}_currImagData = {uuid}_imageData;
+
+
+            {uuid}_trackTransforms({uuid}_context);
+            initial_scale_factor = Math.min({uuid}_canvas.height/{uuid}_image_height,{uuid}_canvas.width/{uuid}_image_width);
+            {uuid}_context.scale(initial_scale_factor, initial_scale_factor);
+
+            function {uuid}_update_image_and_overlay(selected_id) {{
+                if (selected_id == null) {{
+                    {uuid}_currImagData = {uuid}_imageData;
+                    {uuid}_redraw();
+                }}
+                else {{
+                    {uuid}_currImagData = {uuid}_blend_image_shap_map({uuid}_image_data_gray_scale, {uuid}_shap_values_color_dict[selected_id], {image_height}, {image_width}, {uuid}_opacity, {uuid}_context);
+                    {uuid}_redraw();
+                }}
+            }}
+
+            function {uuid}_set_opacity(value) {{
+                {uuid}_opacity = value/100;
+
+                if ({uuid}_heatmap_flat_state !== null ) {{
+                    {uuid}_currImagData = {uuid}_blend_image_shap_map({uuid}_image_data_gray_scale, {uuid}_shap_values_color_dict[{uuid}_heatmap_flat_state], {image_height}, {image_width}, {uuid}_opacity, {uuid}_context);
+                    {uuid}_redraw();
+                }}
+            }}
+
+            function {uuid}_redraw() {{
+
+                // Clear the entire canvas
+                var p1 = {uuid}_context.transformedPoint(0, 0);
+                var p2 = {uuid}_context.transformedPoint({uuid}_canvas.width, {uuid}_canvas.height);
+                {uuid}_context.clearRect(p1.x, p1.y, p2.x - p1.x, p2.y - p1.y);
+
+                {uuid}_context.save();
+                {uuid}_context.setTransform(1, 0, 0, 1, 0, 0);
+                {uuid}_context.clearRect(0, 0, {uuid}_canvas.width, {uuid}_canvas.height);
+                {uuid}_context.restore();
+
+                createImageBitmap({uuid}_currImagData, {{ premultiplyAlpha: 'premultiply' }}).then(function(imgBitmap) {{
+                    {uuid}_context.drawImage(imgBitmap, 0, 0);
+                }});
+            }}
+            {uuid}_redraw();
+            {uuid}_context.save();
+
+            var lastX = {uuid}_canvas.width / 2,
+                lastY = {uuid}_canvas.height / 2;
+
+            var dragStart, dragged;
+
+            {uuid}_canvas.addEventListener('mousedown', function(evt) {{
+                document.body.style.mozUserSelect = document.body.style.webkitUserSelect = document.body.style.userSelect = 'none';
+                lastX = evt.offsetX || (evt.pageX - {uuid}_canvas.offsetLeft);
+                lastY = evt.offsetY || (evt.pageY - {uuid}_canvas.offsetTop);
+                dragStart = {uuid}_context.transformedPoint(lastX, lastY);
+                dragged = false;
+                document.getElementById('{uuid}_image_canvas').style.cursor = 'grabbing';
+            }}, false);
+
+            {uuid}_canvas.addEventListener('mousemove', function(evt) {{
+                lastX = evt.offsetX || (evt.pageX - {uuid}_canvas.offsetLeft);
+                lastY = evt.offsetY || (evt.pageY - {uuid}_canvas.offsetTop);
+                dragged = true;
+                if (dragStart) {{
+                    var pt = {uuid}_context.transformedPoint(lastX, lastY);
+                    {uuid}_context.translate(pt.x - dragStart.x, pt.y - dragStart.y);
+                    {uuid}_redraw();
+                }}
+            }}, false);
+
+            {uuid}_canvas.addEventListener('mouseup', function(evt) {{
+                dragStart = null;
+                document.getElementById('{uuid}_image_canvas').style.cursor = 'grab';
+            }}, false);
+
+            var scaleFactor = 1.1;
+
+            var {uuid}_zoom = function(clicks) {{
+                var pt = {uuid}_context.transformedPoint(lastX, lastY);
+                {uuid}_context.translate(pt.x, pt.y);
+                var factor = Math.pow(scaleFactor, clicks);
+                {uuid}_context.scale(factor, factor);
+                {uuid}_context.translate(-pt.x, -pt.y);
+                {uuid}_redraw();
+            }}
+
+            var {uuid}_reset = function(clicks) {{
+                {uuid}_context.restore();
+                {uuid}_redraw();
+                {uuid}_context.save();
+            }}
+
+            var handleScroll = function(evt) {{
+                var delta = evt.wheelDelta ? evt.wheelDelta / 40 : evt.detail ? -evt.detail : 0;
+                if (delta) {uuid}_zoom(delta);
+                return evt.preventDefault() && false;
+            }}
+
+            {uuid}_canvas.addEventListener('DOMMouseScroll', handleScroll, false);
+            {uuid}_canvas.addEventListener('mousewheel', handleScroll, false);
+
+
+
+            function {uuid}_trackTransforms(ctx) {{
+                var svg = document.createElementNS("http://www.w3.org/2000/svg", 'svg');
+                var xform = svg.createSVGMatrix();
+                ctx.getTransform = function() {{
+                    return xform;
+                }}
+
+                var savedTransforms = [];
+                var save = ctx.save;
+                ctx.save = function() {{
+                    savedTransforms.push(xform.translate(0, 0));
+                    return save.call(ctx);
+                }}
+
+                var restore = ctx.restore;
+                ctx.restore = function() {{
+                    xform = savedTransforms.pop();
+                    return restore.call(ctx);
+                }}
+
+                var scale = ctx.scale;
+                ctx.scale = function(sx, sy) {{
+                    xform = xform.scaleNonUniform(sx, sy);
+                    return scale.call(ctx, sx, sy);
+                }}
+
+                var rotate = ctx.rotate;
+                ctx.rotate = function(radians) {{
+                    xform = xform.rotate(radians * 180 / Math.PI);
+                    return rotate.call(ctx, radians);
+                }}
+
+                var translate = ctx.translate;
+                ctx.translate = function(dx, dy) {{
+                    xform = xform.translate(dx, dy);
+                    return translate.call(ctx, dx, dy);
+                }}
+
+                var transform = ctx.transform;
+                ctx.transform = function(a, b, c, d, e, f) {{
+                    var m2 = svg.createSVGMatrix();
+                    m2.a = a;
+                    m2.b = b;
+                    m2.c = c;
+                    m2.d = d;
+                    m2.e = e;
+                    m2.f = f;
+                    xform = xform.multiply(m2);
+                    return transform.call(ctx, a, b, c, d, e, f);
+                }}
+
+                var setTransform = ctx.setTransform;
+                ctx.setTransform = function(a, b, c, d, e, f) {{
+                    xform.a = a;
+                    xform.b = b;
+                    xform.c = c;
+                    xform.d = d;
+                    xform.e = e;
+                    xform.f = f;
+                    return setTransform.call(ctx, a, b, c, d, e, f);
+                }}
+
+                var pt = svg.createSVGPoint();
+                ctx.transformedPoint = function(x, y) {{
+                    pt.x = x;
+                    pt.y = y;
+                    return pt.matrixTransform(xform.inverse());
+                }}
+            }}
+
+
+            function {uuid}_convert_image_matrix_to_data(image_data_matrix, image_height, image_width, context) {{
+
+                var imageData = context.createImageData(image_height, image_width);
+
+                for(var row_index = 0; row_index < image_height; row_index++) {{
+                    for(var col_index = 0; col_index < image_width; col_index++) {{
+
+                        index = (row_index * image_width + col_index) * 4;
+
+                        imageData.data[index + 0] = image_data_matrix[row_index][col_index][0];
+                        imageData.data[index + 1] = image_data_matrix[row_index][col_index][1];
+                        imageData.data[index + 2] = image_data_matrix[row_index][col_index][2];
+                        imageData.data[index + 3] = 255;
+                    }}
+                }}
+
+                return imageData;
+            }}
+
+            function {uuid}_blend_image_shap_map(image_data_matrix, shap_color_map, image_height, image_width, alpha, context) {{
+                var blendedImageData = context.createImageData(image_height, image_width);
+
+                for(var row_index = 0; row_index < image_height; row_index++) {{
+
+                    for(var col_index = 0; col_index < image_width; col_index++) {{
+
+                        index = (row_index * image_width + col_index) * 4;
+
+                        blendedImageData.data[index + 0] = image_data_matrix[row_index][col_index][0] * alpha + (shap_color_map[row_index][col_index][0]) * ( 1 - alpha);
+                        blendedImageData.data[index + 1] = image_data_matrix[row_index][col_index][1] * alpha + (shap_color_map[row_index][col_index][1]) * ( 1 - alpha);
+                        blendedImageData.data[index + 2] = image_data_matrix[row_index][col_index][2] * alpha + (shap_color_map[row_index][col_index][2]) * ( 1 - alpha);
+                        blendedImageData.data[index + 3] = image_data_matrix[row_index][col_index][3] * alpha + (shap_color_map[row_index][col_index][3]) * ( 1 - alpha);
+                    }}
+                }}
+
+                return blendedImageData;
+            }}
+
+        </script>
+    """
+
+    display(HTML(image_viz_html + image_viz_script))
diff --git a/lib/shap/plots/_labels.py b/lib/shap/plots/_labels.py
new file mode 100644
index 0000000000000000000000000000000000000000..836050801aaabf99677f0d368e0ae88c6d1fabe1
--- /dev/null
+++ b/lib/shap/plots/_labels.py
@@ -0,0 +1,15 @@
+labels = {
+    'MAIN_EFFECT': "SHAP main effect value for\n%s",
+    'INTERACTION_VALUE': "SHAP interaction value",
+    'INTERACTION_EFFECT': "SHAP interaction value for\n%s and %s",
+    'VALUE': "SHAP value (impact on model output)",
+    'GLOBAL_VALUE': "mean(|SHAP value|) (average impact on model output magnitude)",
+    'VALUE_FOR': "SHAP value for\n%s",
+    'PLOT_FOR': "SHAP plot for %s",
+    'FEATURE': "Feature %s",
+    'FEATURE_VALUE': "Feature value",
+    'FEATURE_VALUE_LOW': "Low",
+    'FEATURE_VALUE_HIGH': "High",
+    'JOINT_VALUE': "Joint SHAP value",
+    'MODEL_OUTPUT': "Model output value"
+}
diff --git a/lib/shap/plots/_monitoring.py b/lib/shap/plots/_monitoring.py
new file mode 100644
index 0000000000000000000000000000000000000000..5a7f8d05e3804a722b5a4fd8a219fd3267dc7ec4
--- /dev/null
+++ b/lib/shap/plots/_monitoring.py
@@ -0,0 +1,80 @@
+import matplotlib.pyplot as pl
+import numpy as np
+import pandas as pd
+import scipy.stats
+
+from . import colors
+from ._labels import labels
+
+
+def truncate_text(text, max_len):
+    if len(text) > max_len:
+        return text[:int(max_len/2)-2] + "..." + text[-int(max_len/2)+1:]
+    else:
+        return text
+
+def monitoring(ind, shap_values, features, feature_names=None, show=True):
+    """ Create a SHAP monitoring plot.
+
+    (Note this function is preliminary and subject to change!!)
+    A SHAP monitoring plot is meant to display the behavior of a model
+    over time. Often the shap_values given to this plot explain the loss
+    of a model, so changes in a feature's impact on the model's loss over
+    time can help in monitoring the model's performance.
+
+    Parameters
+    ----------
+    ind : int
+        Index of the feature to plot.
+
+    shap_values : numpy.array
+        Matrix of SHAP values (# samples x # features)
+
+    features : numpy.array or pandas.DataFrame
+        Matrix of feature values (# samples x # features)
+
+    feature_names : list
+        Names of the features (length # features)
+    """
+
+    if isinstance(features, pd.DataFrame):
+        if feature_names is None:
+            feature_names = features.columns
+        features = features.values
+
+    num_features = shap_values.shape[1]
+
+    if feature_names is None:
+        feature_names = np.array([labels['FEATURE'] % str(i) for i in range(num_features)])
+
+    pl.figure(figsize=(10,3))
+    ys = shap_values[:,ind]
+    xs = np.arange(len(ys))#np.linspace(0, 12*2, len(ys))
+
+    pvals = []
+    inc = 50
+    for i in range(inc, len(ys)-inc, inc):
+        #stat, pval = scipy.stats.mannwhitneyu(v[:i], v[i:], alternative="two-sided")
+        _, pval = scipy.stats.ttest_ind(ys[:i], ys[i:])
+        pvals.append(pval)
+    min_pval = np.min(pvals)
+    min_pval_ind = np.argmin(pvals)*inc + inc
+
+    if min_pval < 0.05 / shap_values.shape[1]:
+        pl.axvline(min_pval_ind, linestyle="dashed", color="#666666", alpha=0.2)
+
+    pl.scatter(xs, ys, s=10, c=features[:,ind], cmap=colors.red_blue)
+
+    pl.xlabel("Sample index")
+    pl.ylabel(truncate_text(feature_names[ind], 30) + "\nSHAP value", size=13)
+    pl.gca().xaxis.set_ticks_position('bottom')
+    pl.gca().yaxis.set_ticks_position('left')
+    pl.gca().spines['right'].set_visible(False)
+    pl.gca().spines['top'].set_visible(False)
+    cb = pl.colorbar()
+    cb.outline.set_visible(False)
+    bbox = cb.ax.get_window_extent().transformed(pl.gcf().dpi_scale_trans.inverted())
+    cb.ax.set_aspect((bbox.height - 0.7) * 20)
+    cb.set_label(truncate_text(feature_names[ind], 30), size=13)
+    if show:
+        pl.show()
diff --git a/lib/shap/plots/_partial_dependence.py b/lib/shap/plots/_partial_dependence.py
new file mode 100644
index 0000000000000000000000000000000000000000..4935079a679b4a84878dad3d5ee1f8939d40c51a
--- /dev/null
+++ b/lib/shap/plots/_partial_dependence.py
@@ -0,0 +1,246 @@
+import matplotlib.pyplot as pl
+import numpy as np
+import pandas as pd
+
+from .. import Explanation
+from ..plots.colors import blue_rgb, light_blue_rgb, red_blue_transparent, red_rgb
+from ..utils import convert_name
+
+
+def compute_bounds(xmin, xmax, xv):
+    """ Handles any setting of xmax and xmin.
+
+    Note that we handle None, float, or "percentile(float)" formats.
+    """
+
+    if xmin is not None or xmax is not None:
+        if isinstance(xmin, str) and xmin.startswith("percentile"):
+            xmin = np.nanpercentile(xv, float(xmin[11:-1]))
+        if isinstance(xmax, str) and xmax.startswith("percentile"):
+            xmax = np.nanpercentile(xv, float(xmax[11:-1]))
+
+        if xmin is None or xmin == np.nanmin(xv):
+            xmin = np.nanmin(xv) - (xmax - np.nanmin(xv))/20
+        if xmax is None or xmax == np.nanmax(xv):
+            xmax = np.nanmax(xv) + (np.nanmax(xv) - xmin)/20
+
+    return (xmin, xmax)
+
+def partial_dependence(ind, model, data, xmin="percentile(0)", xmax="percentile(100)",
+                       npoints=None, feature_names=None, hist=True, model_expected_value=False,
+                       feature_expected_value=False, shap_values=None,
+                       ylabel=None, ice=True, ace_opacity=1, pd_opacity=1, pd_linewidth=2,
+                       ace_linewidth='auto', ax=None, show=True):
+    """ A basic partial dependence plot function.
+    """
+
+    if isinstance(data, Explanation):
+        features = data.data
+        shap_values = data
+    else:
+        features = data
+
+    # convert from DataFrames if we got any
+    use_dataframe = False
+    if isinstance(features, pd.DataFrame):
+        if feature_names is None:
+            feature_names = features.columns
+        features = features.values
+        use_dataframe = True
+
+    if feature_names is None:
+        feature_names = ["Feature %d" % i for i in range(features.shape[1])]
+
+    # this is for a 1D partial dependence plot
+    if type(ind) is not tuple:
+        ind = convert_name(ind, None, feature_names)
+        xv = features[:,ind]
+        xmin, xmax = compute_bounds(xmin, xmax, xv)
+        npoints = 100 if npoints is None else npoints
+        xs = np.linspace(xmin, xmax, npoints)
+
+
+        if ice:
+            features_tmp = features.copy()
+            ice_vals = np.zeros((npoints, features.shape[0]))
+            for i in range(npoints):
+                features_tmp[:,ind] = xs[i]
+                if use_dataframe:
+                    ice_vals[i,:] = model(pd.DataFrame(features_tmp, columns=feature_names))
+                else:
+                    ice_vals[i,:] = model(features_tmp)
+            # if linewidth is None:
+            #     linewidth = 1
+            # if opacity is None:
+            #     opacity = 0.5
+
+        features_tmp = features.copy()
+        vals = np.zeros(npoints)
+        for i in range(npoints):
+            features_tmp[:,ind] = xs[i]
+            if use_dataframe:
+                vals[i] = model(pd.DataFrame(features_tmp, columns=feature_names)).mean()
+            else:
+                vals[i] = model(features_tmp).mean()
+
+
+
+
+        if ax is None:
+            fig = pl.figure()
+            ax1 = pl.gca()
+        else:
+            fig = pl.gcf()
+            ax1 = pl.gca()
+
+        #fig, ax1 = pl.subplots(figsize)
+        ax2 = ax1.twinx()
+
+        # the histogram of the data
+        if hist:
+            #n, bins, patches =
+            ax2.hist(xv, 50, density=False, facecolor='black', alpha=0.1, range=(xmin, xmax))
+
+
+
+        # ice line plot
+        if ice:
+            if ace_linewidth == "auto":
+                ace_linewidth = min(1, 50/ice_vals.shape[1])
+            ax1.plot(xs, ice_vals, color=light_blue_rgb, linewidth=ace_linewidth, alpha=ace_opacity)
+
+        # the line plot
+        ax1.plot(xs, vals, color=blue_rgb, linewidth=pd_linewidth, alpha=pd_opacity)
+
+        ax2.set_ylim(0,features.shape[0])#ax2.get_ylim()[0], ax2.get_ylim()[1] * 4)
+        ax1.set_xlabel(feature_names[ind], fontsize=13)
+        if ylabel is None:
+            if not ice:
+                ylabel = "E[f(x) | "+ str(feature_names[ind]) + "]"
+            else:
+                ylabel = "f(x) | "+ str(feature_names[ind])
+        ax1.set_ylabel(ylabel, fontsize=13)
+        ax1.xaxis.set_ticks_position('bottom')
+        ax1.yaxis.set_ticks_position('left')
+        ax1.spines['right'].set_visible(False)
+        ax1.spines['top'].set_visible(False)
+        ax1.tick_params(labelsize=11)
+
+        ax2.xaxis.set_ticks_position('bottom')
+        ax2.yaxis.set_ticks_position('left')
+        ax2.yaxis.set_ticks([])
+        ax2.spines['right'].set_visible(False)
+        ax2.spines['top'].set_visible(False)
+        ax2.spines['left'].set_visible(False)
+        ax2.spines['bottom'].set_visible(False)
+
+
+        if feature_expected_value is not False:
+            ax3=ax2.twiny()
+            ax3.set_xlim(xmin,xmax)
+            mval = xv.mean()
+            ax3.set_xticks([mval])
+            ax3.set_xticklabels(["E["+str(feature_names[ind])+"]"])
+            ax3.spines['right'].set_visible(False)
+            ax3.spines['top'].set_visible(False)
+            ax3.tick_params(length=0, labelsize=11)
+            ax1.axvline(mval, color="#999999", zorder=-1, linestyle="--", linewidth=1)
+
+        if model_expected_value is not False or shap_values is not None:
+            if model_expected_value is True:
+                if use_dataframe:
+                    model_expected_value = model(pd.DataFrame(features, columns=feature_names)).mean()
+                else:
+                    model_expected_value = model(features).mean()
+            else:
+                model_expected_value = shap_values.base_values
+            ymin,ymax = ax1.get_ylim()
+            ax4=ax2.twinx()
+            ax4.set_ylim(ymin,ymax)
+            ax4.set_yticks([model_expected_value])
+            ax4.set_yticklabels(["E[f(x)]"])
+            ax4.spines['right'].set_visible(False)
+            ax4.spines['top'].set_visible(False)
+            ax4.tick_params(length=0, labelsize=11)
+            ax1.axhline(model_expected_value, color="#999999", zorder=-1, linestyle="--", linewidth=1)
+
+        if shap_values is not None:
+            # vals = shap_values.values[:, ind]
+            # if shap_value_features is None:
+            #     shap_value_features = features
+            #     assert shap_values.shape == features.shape
+            # #sample_ind = 18
+            # vals = shap_values[:, ind]
+            # if type(model_expected_value) is bool:
+            #     if use_dataframe:
+            #         model_expected_value = model(pd.DataFrame(features, columns=feature_names)).mean()
+            #     else:
+            #         model_expected_value = model(features).mean()
+            # if isinstance(shap_value_features, pd.DataFrame):
+            #     shap_value_features = shap_value_features.values
+            markerline, stemlines, _ = ax1.stem(
+                shap_values.data[:,ind], shap_values.base_values + shap_values.values[:, ind],
+                bottom=shap_values.base_values,
+                markerfmt="o", basefmt=" ",
+            )
+            stemlines.set_edgecolors([red_rgb if v > 0 else blue_rgb for v in vals])
+            pl.setp(stemlines, 'zorder', -1)
+            pl.setp(stemlines, 'linewidth', 2)
+            pl.setp(markerline, 'color', "black")
+            pl.setp(markerline, 'markersize', 4)
+
+        if show:
+            pl.show()
+        else:
+            return fig,ax1
+
+
+    # this is for a 2D partial dependence plot
+    else:
+        ind0 = convert_name(ind[0], None, feature_names)
+        ind1 = convert_name(ind[1], None, feature_names)
+        xv0 = features[:,ind0]
+        xv1 = features[:,ind1]
+
+        xmin0 = xmin[0] if type(xmin) is tuple else xmin
+        xmin1 = xmin[1] if type(xmin) is tuple else xmin
+        xmax0 = xmax[0] if type(xmax) is tuple else xmax
+        xmax1 = xmax[1] if type(xmax) is tuple else xmax
+
+        xmin0, xmax0 = compute_bounds(xmin0, xmax0, xv0)
+        xmin1, xmax1 = compute_bounds(xmin1, xmax1, xv1)
+        npoints = 20 if npoints is None else npoints
+        xs0 = np.linspace(xmin0, xmax0, npoints)
+        xs1 = np.linspace(xmin1, xmax1, npoints)
+
+        features_tmp = features.copy()
+        x0 = np.zeros((npoints, npoints))
+        x1 = np.zeros((npoints, npoints))
+        vals = np.zeros((npoints, npoints))
+        for i in range(npoints):
+            for j in range(npoints):
+                features_tmp[:,ind0] = xs0[i]
+                features_tmp[:,ind1] = xs1[j]
+                x0[i, j] = xs0[i]
+                x1[i, j] = xs1[j]
+                vals[i, j] = model(features_tmp).mean()
+
+        fig = pl.figure()
+        ax = fig.add_subplot(111, projection='3d')
+
+
+#         x = y = np.arange(-3.0, 3.0, 0.05)
+#         X, Y = np.meshgrid(x, y)
+#         zs = np.array(fun(np.ravel(X), np.ravel(Y)))
+#         Z = zs.reshape(X.shape)
+
+        ax.plot_surface(x0, x1, vals, cmap=red_blue_transparent)
+
+        ax.set_xlabel(feature_names[ind0], fontsize=13)
+        ax.set_ylabel(feature_names[ind1], fontsize=13)
+        ax.set_zlabel("E[f(x) | "+ str(feature_names[ind0]) + ", "+ str(feature_names[ind1]) + "]", fontsize=13)
+
+        if show:
+            pl.show()
+        else:
+            return fig, ax
diff --git a/lib/shap/plots/_scatter.py b/lib/shap/plots/_scatter.py
new file mode 100644
index 0000000000000000000000000000000000000000..5b23dc4e54d68e4f8960c8e09eddc8b57f162755
--- /dev/null
+++ b/lib/shap/plots/_scatter.py
@@ -0,0 +1,780 @@
+import warnings
+
+import matplotlib
+import matplotlib.pyplot as pl
+import numpy as np
+import pandas as pd
+
+from .._explanation import Explanation
+from ..utils import approximate_interactions, convert_name
+from ..utils._general import encode_array_if_needed
+from . import colors
+from ._labels import labels
+
+
+# TODO: Make the color bar a one-sided beeswarm plot so we can see the density along the color axis
+def scatter(shap_values, color="#1E88E5", hist=True, axis_color="#333333", cmap=colors.red_blue,
+            dot_size=16, x_jitter="auto", alpha=1, title=None, xmin=None, xmax=None, ymin=None, ymax=None,
+            overlay=None, ax=None, ylabel="SHAP value", show=True):
+    """Create a SHAP dependence scatter plot, colored by an interaction feature.
+
+    Plots the value of the feature on the x-axis and the SHAP value of the same feature
+    on the y-axis. This shows how the model depends on the given feature, and is like a
+    richer extension of classical partial dependence plots. Vertical dispersion of the
+    data points represents interaction effects. Grey ticks along the y-axis are data
+    points where the feature's value was NaN.
+
+    Note that if you want to change the data being displayed, you can update the
+    ``shap_values.display_features`` attribute and it will then be used for plotting instead of
+    ``shap_values.data``.
+
+    Parameters
+    ----------
+    shap_values : shap.Explanation
+        A single column of an :class:`.Explanation` object (i.e.
+        ``shap_values[:,"Feature A"]``).
+
+    color : string or shap.Explanation
+        How to color the scatter plot points. This can be a fixed color string, or an
+        :class:`.Explanation` object. If it is an :class:`.Explanation` object, then the
+        scatter plot points are colored by the feature that seems to have the strongest
+        interaction effect with the feature given by the ``shap_values`` argument. This
+        is calculated using :func:`shap.utils.approximate_interactions`. If only a
+        single column of an :class:`.Explanation` object is passed, then that
+        feature column will be used to color the data points.
+
+    hist : bool
+        Whether to show a light histogram along the x-axis to show the density of the
+        data. Note that the histogram is normalized such that if all the points were in
+        a single bin, then that bin would span the full height of the plot. Defaults to
+        ``True``.
+
+    x_jitter : 'auto' or float
+        Adds random jitter to feature values by specifying a float between 0 to 1. May
+        increase plot readability when a feature is discrete. By default, ``x_jitter``
+        is chosen based on auto-detection of categorical features.
+
+    alpha : float
+        The transparency of the data points (between 0 and 1). This can be useful to
+        show the density of the data points when using a large dataset.
+
+    xmin : float or string
+        Represents the lower bound of the plot's x-axis. It can be a string of the format
+        "percentile(float)" to denote that percentile of the feature's value used on the x-axis.
+
+    xmax : float or string
+        Represents the upper bound of the plot's x-axis. It can be a string of the format
+        "percentile(float)" to denote that percentile of the feature's value used on the x-axis.
+
+    ax : matplotlib Axes object
+        Optionally specify an existing matplotlib ``Axes`` object, into which the plot will be placed.
+        In this case, we do not create a ``Figure``, otherwise we do.
+
+    show : bool
+        Whether ``matplotlib.pyplot.show()`` is called before returning.
+        Setting this to ``False`` allows the plot
+        to be customized further after it has been created.
+
+    Examples
+    --------
+
+    See `scatter plot examples <https://shap.readthedocs.io/en/latest/example_notebooks/api_examples/plots/scatter.html>`_.
+
+    """
+
+    assert str(type(shap_values)).endswith("Explanation'>"), "The shap_values parameter must be a shap.Explanation object!"
+
+    # see if we are plotting multiple columns
+    if not isinstance(shap_values.feature_names, str) and len(shap_values.feature_names) > 0:
+        inds = np.argsort(np.abs(shap_values.values).mean(0))
+        nan_min = np.nanmin(shap_values.values)
+        nan_max = np.nanmax(shap_values.values)
+        if ymin is None:
+            ymin = nan_min - (nan_max - nan_min)/20
+        if ymax is None:
+            ymax = nan_max + (nan_max - nan_min)/20
+        _ = pl.subplots(1, len(inds), figsize=(min(6 * len(inds), 15), 5))
+        for i in inds:
+            ax = pl.subplot(1,len(inds),i+1)
+            scatter(shap_values[:,i], color=color, show=False, ax=ax, ymin=ymin, ymax=ymax)
+            if overlay is not None:
+                line_styles = ["solid", "dotted", "dashed"]
+                for j, name in enumerate(overlay):
+                    vals = overlay[name]
+                    if isinstance(vals[i][0][0], (float, int)):
+                        pl.plot(vals[i][0], vals[i][1], color="#000000", linestyle=line_styles[j], label=name)
+            if i == 0:
+                ax.set_ylabel(ylabel)
+            else:
+                ax.set_ylabel("")
+                ax.set_yticks([])
+                ax.spines['left'].set_visible(False)
+        if overlay is not None:
+            pl.legend()
+        if show:
+            pl.show()
+        return
+
+    if len(shap_values.shape) != 1:
+        raise Exception("The passed Explanation object has multiple columns, please pass a single feature column to " + \
+                        "shap.plots.dependence like: shap_values[:,column]")
+
+    # this unpacks the explanation object for the code that was written earlier
+    feature_names = [shap_values.feature_names]
+    ind = 0
+    shap_values_arr = shap_values.values.reshape(-1, 1)
+    features = shap_values.data.reshape(-1, 1)
+    if shap_values.display_data is None:
+        display_features = features
+    else:
+        display_features = shap_values.display_data.reshape(-1, 1)
+    interaction_index = None
+
+    # unwrap explanation objects used for bounds
+    if issubclass(type(xmin), Explanation):
+        xmin = xmin.data
+    if issubclass(type(xmax), Explanation):
+        xmax = xmax.data
+    if issubclass(type(ymin), Explanation):
+        ymin = ymin.values
+    if issubclass(type(ymax), Explanation):
+        ymax = ymax.values
+
+    # wrap np.arrays as Explanations
+    if isinstance(color, np.ndarray):
+        color = Explanation(values=color, base_values=None, data=color)
+
+    # TODO: This stacking could be avoided if we use the new shap.utils.potential_interactions function
+    if str(type(color)).endswith("Explanation'>"):
+        shap_values2 = color
+        if issubclass(type(shap_values2.feature_names), (str, int)):
+            feature_names.append(shap_values2.feature_names)
+            shap_values_arr = np.hstack([shap_values_arr, shap_values2.values.reshape(-1, len(feature_names)-1)])
+            features = np.hstack([features, shap_values2.data.reshape(-1, len(feature_names)-1)])
+            if shap_values2.display_data is None:
+                display_features = np.hstack([display_features, shap_values2.data.reshape(-1, len(feature_names)-1)])
+            else:
+                display_features = np.hstack([display_features, shap_values2.display_data.reshape(-1, len(feature_names)-1)])
+        else:
+            feature_names2 = np.array(shap_values2.feature_names)
+            mask = ~(feature_names[0] == feature_names2)
+            feature_names.extend(feature_names2[mask])
+            shap_values_arr = np.hstack([shap_values_arr, shap_values2.values[:,mask]])
+            features = np.hstack([features, shap_values2.data[:,mask]])
+            if shap_values2.display_data is None:
+                display_features = np.hstack([display_features, shap_values2.data[:,mask]])
+            else:
+                display_features = np.hstack([display_features, shap_values2.display_data[:,mask]])
+        color = None
+        interaction_index = "auto"
+
+    if isinstance(shap_values_arr, list):
+        raise TypeError("The passed shap_values_arr are a list not an array! If you have a list of explanations try " \
+                        "passing shap_values_arr[0] instead to explain the first output class of a multi-output model.")
+
+    # convert from DataFrames if we got any
+    if isinstance(features, pd.DataFrame):
+        if feature_names is None:
+            feature_names = features.columns
+        features = features.values
+
+    if feature_names is None:
+        feature_names = [labels['FEATURE'] % str(i) for i in range(shap_values_arr.shape[1])]
+
+    # allow vectors to be passed
+    if len(shap_values_arr.shape) == 1:
+        shap_values_arr = np.reshape(shap_values_arr, (len(shap_values_arr), 1))
+    if len(features.shape) == 1:
+        features = np.reshape(features, (len(features), 1))
+
+    ind = convert_name(ind, shap_values_arr, feature_names)
+
+    # pick jitter for categorical features
+    vals = np.sort(np.unique(features[:,ind]))
+    min_dist = np.inf
+    for i in range(1,len(vals)):
+        d = vals[i] - vals[i-1]
+        if d > 1e-8 and d < min_dist:
+            min_dist = d
+    num_points_per_value = len(features[:,ind]) / len(vals)
+    if num_points_per_value < 10:
+        #categorical = False
+        if x_jitter == "auto":
+            x_jitter = 0
+    elif num_points_per_value < 100:
+        #categorical = True
+        if x_jitter == "auto":
+            x_jitter = min_dist * 0.1
+    else:
+        #categorical = True
+        if x_jitter == "auto":
+            x_jitter = min_dist * 0.2
+
+    # guess what other feature as the stongest interaction with the plotted feature
+    if not hasattr(ind, "__len__"):
+        if interaction_index == "auto":
+            interaction_index = approximate_interactions(ind, shap_values_arr, features)[0]
+        interaction_index = convert_name(interaction_index, shap_values_arr, feature_names)
+    categorical_interaction = False
+
+    # create a matplotlib figure, if `ax` hasn't been specified.
+    if not ax:
+        figsize = (7.5, 5) if interaction_index != ind and interaction_index is not None else (6, 5)
+        fig = pl.figure(figsize=figsize)
+        ax = fig.gca()
+    else:
+        fig = ax.get_figure()
+
+    # plotting SHAP interaction values
+    if len(shap_values_arr.shape) == 3 and hasattr(ind, "__len__") and len(ind) == 2:
+        ind1 = convert_name(ind[0], shap_values_arr, feature_names)
+        ind2 = convert_name(ind[1], shap_values_arr, feature_names)
+        if ind1 == ind2:
+            proj_shap_values_arr = shap_values_arr[:, ind2, :]
+        else:
+            proj_shap_values_arr = shap_values_arr[:, ind2, :] * 2  # off-diag values are split in half
+
+        # there is no interaction coloring for the main effect
+        if ind1 == ind2:
+            fig.set_size_inches(6, 5, forward=True)
+
+        # TODO: remove recursion; generally the functions should be shorter for more maintainable code
+        dependence_legacy(
+            ind1, proj_shap_values_arr, features, feature_names=feature_names,
+            interaction_index=(None if ind1 == ind2 else ind2), display_features=display_features, ax=ax, show=False,
+            xmin=xmin, xmax=xmax, x_jitter=x_jitter, alpha=alpha
+        )
+        if ind1 == ind2:
+            ax.set_ylabel(labels['MAIN_EFFECT'] % feature_names[ind1])
+        else:
+            ax.set_ylabel(labels['INTERACTION_EFFECT'] % (feature_names[ind1], feature_names[ind2]))
+
+        if show:
+            pl.show()
+        return
+
+    assert shap_values_arr.shape[0] == features.shape[0], \
+        "'shap_values_arr' and 'features' values must have the same number of rows!"
+    assert shap_values_arr.shape[1] == features.shape[1], \
+        "'shap_values_arr' must have the same number of columns as 'features'!"
+
+    # get both the raw and display feature values
+    oinds = np.arange(shap_values_arr.shape[0]) # we randomize the ordering so plotting overlaps are not related to data ordering
+    np.random.shuffle(oinds)
+    xv = encode_array_if_needed(features[oinds, ind])
+    xd = display_features[oinds, ind]
+
+    s = shap_values_arr[oinds, ind]
+    if isinstance(xd[0], str):
+        name_map = {}
+        for i in range(len(xv)):
+            name_map[xd[i]] = xv[i]
+        xnames = list(name_map.keys())
+
+    # allow a single feature name to be passed alone
+    if isinstance(feature_names, str):
+        feature_names = [feature_names]
+    name = feature_names[ind]
+
+    # get both the raw and display color values
+    color_norm = None
+    if interaction_index is not None:
+        interaction_feature_values = encode_array_if_needed(features[:, interaction_index])
+        cv = interaction_feature_values
+        cd = display_features[:, interaction_index]
+        clow = np.nanpercentile(cv.astype(float), 5)
+        chigh = np.nanpercentile(cv.astype(float), 95)
+        if clow == chigh:
+            clow = np.nanmin(cv.astype(float))
+            chigh = np.nanmax(cv.astype(float))
+        if isinstance(cd[0], str):
+            cname_map = {}
+            for i in range(len(cv)):
+                cname_map[cd[i]] = cv[i]
+            cnames = list(cname_map.keys())
+            categorical_interaction = True
+        elif clow % 1 == 0 and chigh % 1 == 0 and chigh - clow < 10:
+            categorical_interaction = True
+
+        # discritize colors for categorical features
+        if categorical_interaction and clow != chigh:
+            clow = np.nanmin(cv.astype(float))
+            chigh = np.nanmax(cv.astype(float))
+            bounds = np.linspace(clow, chigh, min(int(chigh - clow + 2), cmap.N-1))
+            color_norm = matplotlib.colors.BoundaryNorm(bounds, cmap.N-1)
+
+    # optionally add jitter to feature values
+    xv_no_jitter = xv.copy()
+    if x_jitter > 0:
+        if x_jitter > 1:
+            x_jitter = 1
+        xvals = xv.copy()
+        if isinstance(xvals[0], float):
+            xvals = xvals.astype(float)
+            xvals = xvals[~np.isnan(xvals)]
+        xvals = np.unique(xvals) # returns a sorted array
+        if len(xvals) >= 2:
+            smallest_diff = np.min(np.diff(xvals))
+            jitter_amount = x_jitter * smallest_diff
+            xv += (np.random.random_sample(size = len(xv))*jitter_amount) - (jitter_amount/2)
+
+
+    # the actual scatter plot, TODO: adapt the dot_size to the number of data points?
+    xv_nan = np.isnan(xv)
+    xv_notnan = np.invert(xv_nan)
+    if interaction_index is not None:
+
+        # plot the nan values in the interaction feature as grey
+        cvals = features[oinds, interaction_index].astype(np.float64)
+        cvals_imp = cvals.copy()
+        cvals_imp[np.isnan(cvals)] = (clow + chigh) / 2.0
+        cvals[cvals_imp > chigh] = chigh
+        cvals[cvals_imp < clow] = clow
+        if color_norm is None:
+            vmin = clow
+            vmax = chigh
+        else:
+            vmin = vmax = None
+        ax.axhline(0, color="#888888", lw=0.5, dashes=(1, 5), zorder=-1)
+        p = ax.scatter(
+            xv[xv_notnan], s[xv_notnan], s=dot_size, linewidth=0, c=cvals[xv_notnan],
+            cmap=cmap, alpha=alpha, vmin=vmin, vmax=vmax,
+            norm=color_norm, rasterized=len(xv) > 500
+        )
+        p.set_array(cvals[xv_notnan])
+    else:
+        p = ax.scatter(xv, s, s=dot_size, linewidth=0, color=color,
+                       alpha=alpha, rasterized=len(xv) > 500)
+
+    if interaction_index != ind and interaction_index is not None:
+        # draw the color bar
+        if isinstance(cd[0], str):
+            tick_positions = np.array([cname_map[n] for n in cnames])
+            tick_positions *= 1 - 1 / len(cnames)
+            tick_positions += 0.5 * (chigh - clow) / (chigh - clow + 1)
+            cb = pl.colorbar(p, ticks=tick_positions, ax=ax, aspect=80)
+            cb.set_ticklabels(cnames)
+        else:
+            cb = pl.colorbar(p, ax=ax, aspect=80)
+
+        cb.set_label(feature_names[interaction_index], size=13)
+        cb.ax.tick_params(labelsize=11)
+        if categorical_interaction:
+            cb.ax.tick_params(length=0)
+        cb.set_alpha(1)
+        cb.outline.set_visible(False)
+#         bbox = cb.ax.get_window_extent().transformed(fig.dpi_scale_trans.inverted())
+#         cb.ax.set_aspect((bbox.height - 0.7) * 20)
+
+    # handles any setting of xmax and xmin
+    # note that we handle None,float, or "percentile(float)" formats
+    if xmin is not None or xmax is not None:
+        if isinstance(xmin, str) and xmin.startswith("percentile"):
+            xmin = np.nanpercentile(xv, float(xmin[11:-1]))
+        if isinstance(xmax, str) and xmax.startswith("percentile"):
+            xmax = np.nanpercentile(xv, float(xmax[11:-1]))
+
+        if xmin is None or xmin == np.nanmin(xv):
+            xmin = np.nanmin(xv) - (xmax - np.nanmin(xv))/20
+        if xmax is None or xmax == np.nanmax(xv):
+            xmax = np.nanmax(xv) + (np.nanmax(xv) - xmin)/20
+
+        ax.set_xlim(xmin, xmax)
+
+    if ymin is not None or ymax is not None:
+        # if type(ymin) == str and ymin.startswith("percentile"):
+        #     ymin = np.nanpercentile(xv, float(ymin[11:-1]))
+        # if type(ymax) == str and ymax.startswith("percentile"):
+        #     ymax = np.nanpercentile(xv, float(ymax[11:-1]))
+
+        if ymin is None or ymin == np.nanmin(xv):
+            ymin = np.nanmin(xv) - (ymax - np.nanmin(xv))/20
+        if ymax is None or ymax == np.nanmax(xv):
+            ymax = np.nanmax(xv) + (np.nanmax(xv) - ymin)/20
+
+        ax.set_ylim(ymin, ymax)
+
+    # plot any nan feature values as tick marks along the y-axis
+    xlim = ax.get_xlim()
+    if interaction_index is not None:
+        p = ax.scatter(
+            xlim[0] * np.ones(xv_nan.sum()), s[xv_nan], marker=1,
+            linewidth=2, c=cvals_imp[xv_nan], cmap=cmap, alpha=alpha,
+            vmin=clow, vmax=chigh
+        )
+        p.set_array(cvals[xv_nan])
+    else:
+        ax.scatter(
+            xlim[0] * np.ones(xv_nan.sum()), s[xv_nan], marker=1,
+            linewidth=2, color=color, alpha=alpha
+        )
+    ax.set_xlim(xlim)
+
+    # the histogram of the data
+    if hist:
+        ax2 = ax.twinx()
+        #n, bins, patches =
+        xlim = ax.get_xlim()
+        xvals = np.unique(xv_no_jitter)
+
+        if len(xvals) / len(xv_no_jitter) < 0.2 and len(xvals) < 75 and np.max(xvals) < 75 and np.min(xvals) >= 0:
+            np.sort(xvals)
+            bin_edges = []
+            for i in range(int(np.max(xvals)+1)):
+                bin_edges.append(i-0.5)
+
+                #bin_edges.append((xvals[i] + xvals[i+1])/2)
+            bin_edges.append(int(np.max(xvals))+0.5)
+
+            lim = np.floor(np.min(xvals) - 0.5) + 0.5, np.ceil(np.max(xvals) + 0.5) - 0.5
+            ax.set_xlim(lim)
+        else:
+            if len(xv_no_jitter) >= 500:
+                bin_edges = 50
+            elif len(xv_no_jitter) >= 200:
+                bin_edges = 20
+            elif len(xv_no_jitter) >= 100:
+                bin_edges = 10
+            else:
+                bin_edges = 5
+
+        ax2.hist(xv[~np.isnan(xv)], bin_edges, density=False, facecolor='#000000', alpha=0.1, range=(xlim[0], xlim[1]), zorder=-1)
+        ax2.set_ylim(0,len(xv))
+
+        ax2.xaxis.set_ticks_position('bottom')
+        ax2.yaxis.set_ticks_position('left')
+        ax2.yaxis.set_ticks([])
+        ax2.spines['right'].set_visible(False)
+        ax2.spines['top'].set_visible(False)
+        ax2.spines['left'].set_visible(False)
+        ax2.spines['bottom'].set_visible(False)
+
+    pl.sca(ax)
+
+    # make the plot more readable
+    ax.set_xlabel(name, color=axis_color, fontsize=13)
+    ax.set_ylabel(labels['VALUE_FOR'] % name, color=axis_color, fontsize=13)
+    if title is not None:
+        ax.set_title(title, color=axis_color, fontsize=13)
+    ax.xaxis.set_ticks_position('bottom')
+    ax.yaxis.set_ticks_position('left')
+    ax.spines['right'].set_visible(False)
+    ax.spines['top'].set_visible(False)
+    ax.tick_params(color=axis_color, labelcolor=axis_color, labelsize=11)
+    for spine in ax.spines.values():
+        spine.set_edgecolor(axis_color)
+    if isinstance(xd[0], str):
+        ax.set_xticks([name_map[n] for n in xnames])
+        ax.set_xticklabels(xnames, fontdict=dict(rotation='vertical', fontsize=11))
+    if show:
+        with warnings.catch_warnings(): # ignore expected matplotlib warnings
+            warnings.simplefilter("ignore", RuntimeWarning)
+            pl.show()
+
+
+def dependence_legacy(ind, shap_values=None, features=None, feature_names=None, display_features=None,
+                      interaction_index="auto",
+                      color="#1E88E5", axis_color="#333333", cmap=None,
+                      dot_size=16, x_jitter=0, alpha=1, title=None, xmin=None, xmax=None, ax=None, show=True,
+                      ymin=None, ymax=None):
+    """ Create a SHAP dependence plot, colored by an interaction feature.
+
+    Plots the value of the feature on the x-axis and the SHAP value of the same feature
+    on the y-axis. This shows how the model depends on the given feature, and is like a
+    richer extension of the classical partial dependence plots. Vertical dispersion of the
+    data points represents interaction effects. Grey ticks along the y-axis are data
+    points where the feature's value was NaN.
+
+
+    Parameters
+    ----------
+    ind : int or string
+        If this is an int it is the index of the feature to plot. If this is a string it is
+        either the name of the feature to plot, or it can have the form "rank(int)" to specify
+        the feature with that rank (ordered by mean absolute SHAP value over all the samples).
+
+    shap_values : numpy.array
+        Matrix of SHAP values (# samples x # features).
+
+    features : numpy.array or pandas.DataFrame
+        Matrix of feature values (# samples x # features).
+
+    feature_names : list
+        Names of the features (length # features).
+
+    display_features : numpy.array or pandas.DataFrame
+        Matrix of feature values for visual display (such as strings instead of coded values).
+
+    interaction_index : "auto", None, int, or string
+        The index of the feature used to color the plot. The name of a feature can also be passed
+        as a string. If "auto" then shap.common.approximate_interactions is used to pick what
+        seems to be the strongest interaction (note that to find to true stongest interaction you
+        need to compute the SHAP interaction values).
+
+    x_jitter : float (0 - 1)
+        Adds random jitter to feature values. May increase plot readability when feature
+        is discrete.
+
+    alpha : float
+        The transparency of the data points (between 0 and 1). This can be useful to the
+        show density of the data points when using a large dataset.
+
+    xmin : float or string
+        Represents the lower bound of the plot's x-axis. It can be a string of the format
+        "percentile(float)" to denote that percentile of the feature's value used on the x-axis.
+
+    xmax : float or string
+        Represents the upper bound of the plot's x-axis. It can be a string of the format
+        "percentile(float)" to denote that percentile of the feature's value used on the x-axis.
+
+    ax : matplotlib Axes object
+         Optionally specify an existing matplotlib Axes object, into which the plot will be placed.
+         In this case we do not create a Figure, otherwise we do.
+
+    ymin : float
+        Represents the lower bound of the plot's y-axis.
+
+    ymax : float
+        Represents the upper bound of the plot's y-axis.
+
+    """
+
+    if cmap is None:
+        cmap = colors.red_blue
+
+    if isinstance(shap_values, list):
+        raise TypeError("The passed shap_values are a list not an array! If you have a list of explanations try " \
+                        "passing shap_values[0] instead to explain the first output class of a multi-output model.")
+
+    # convert from DataFrames if we got any
+    if isinstance(features, pd.DataFrame):
+        if feature_names is None:
+            feature_names = features.columns
+        features = features.values
+    if isinstance(display_features, pd.DataFrame):
+        if feature_names is None:
+            feature_names = display_features.columns
+        display_features = display_features.values
+    elif display_features is None:
+        display_features = features
+
+    if feature_names is None:
+        feature_names = [labels['FEATURE'] % str(i) for i in range(shap_values.shape[1])]
+
+    # allow vectors to be passed
+    if len(shap_values.shape) == 1:
+        shap_values = np.reshape(shap_values, (len(shap_values), 1))
+    if len(features.shape) == 1:
+        features = np.reshape(features, (len(features), 1))
+
+    ind = convert_name(ind, shap_values, feature_names)
+
+    # guess what other feature as the stongest interaction with the plotted feature
+    if not hasattr(ind, "__len__"):
+        if interaction_index == "auto":
+            interaction_index = approximate_interactions(ind, shap_values, features)[0]
+        interaction_index = convert_name(interaction_index, shap_values, feature_names)
+    categorical_interaction = False
+
+    # create a matplotlib figure, if `ax` hasn't been specified.
+    if not ax:
+        figsize = (7.5, 5) if interaction_index != ind and interaction_index is not None else (6, 5)
+        fig = pl.figure(figsize=figsize)
+        ax = fig.gca()
+    else:
+        fig = ax.get_figure()
+
+    # plotting SHAP interaction values
+    if len(shap_values.shape) == 3 and hasattr(ind, "__len__") and len(ind) == 2:
+        ind1 = convert_name(ind[0], shap_values, feature_names)
+        ind2 = convert_name(ind[1], shap_values, feature_names)
+        if ind1 == ind2:
+            proj_shap_values = shap_values[:, ind2, :]
+        else:
+            proj_shap_values = shap_values[:, ind2, :] * 2  # off-diag values are split in half
+
+        # there is no interaction coloring for the main effect
+        if ind1 == ind2:
+            fig.set_size_inches(6, 5, forward=True)
+
+        # TODO: remove recursion; generally the functions should be shorter for more maintainable code
+        dependence_legacy(
+            ind1, proj_shap_values, features, feature_names=feature_names,
+            interaction_index=(None if ind1 == ind2 else ind2), display_features=display_features, ax=ax, show=False,
+            xmin=xmin, xmax=xmax, x_jitter=x_jitter, alpha=alpha
+        )
+        if ind1 == ind2:
+            ax.set_ylabel(labels['MAIN_EFFECT'] % feature_names[ind1])
+        else:
+            ax.set_ylabel(labels['INTERACTION_EFFECT'] % (feature_names[ind1], feature_names[ind2]))
+
+        if show:
+            pl.show()
+        return
+
+    assert shap_values.shape[0] == features.shape[0], \
+        "'shap_values' and 'features' values must have the same number of rows!"
+    assert shap_values.shape[1] == features.shape[1], \
+        "'shap_values' must have the same number of columns as 'features'!"
+
+    # get both the raw and display feature values
+    oinds = np.arange(shap_values.shape[0]) # we randomize the ordering so plotting overlaps are not related to data ordering
+    np.random.shuffle(oinds)
+
+    xv = encode_array_if_needed(features[oinds, ind])
+
+    xd = display_features[oinds, ind]
+    s = shap_values[oinds, ind]
+    if isinstance(xd[0], str):
+        name_map = {}
+        for i in range(len(xv)):
+            name_map[xd[i]] = xv[i]
+        xnames = list(name_map.keys())
+
+    # allow a single feature name to be passed alone
+    if isinstance(feature_names, str):
+        feature_names = [feature_names]
+    name = feature_names[ind]
+
+    # get both the raw and display color values
+    color_norm = None
+    if interaction_index is not None:
+        interaction_feature_values = encode_array_if_needed(features[:, interaction_index])
+        cv = interaction_feature_values
+        cd = display_features[:, interaction_index]
+        clow = np.nanpercentile(cv.astype(float), 5)
+        chigh = np.nanpercentile(cv.astype(float), 95)
+        if clow == chigh:
+            clow = np.nanmin(cv.astype(float))
+            chigh = np.nanmax(cv.astype(float))
+        if isinstance(cd[0], str):
+            cname_map = {}
+            for i in range(len(cv)):
+                cname_map[cd[i]] = cv[i]
+            cnames = list(cname_map.keys())
+            categorical_interaction = True
+        elif clow % 1 == 0 and chigh % 1 == 0 and chigh - clow < 10:
+            categorical_interaction = True
+
+        # discritize colors for categorical features
+        if categorical_interaction and clow != chigh:
+            clow = np.nanmin(cv.astype(float))
+            chigh = np.nanmax(cv.astype(float))
+            bounds = np.linspace(clow, chigh, min(int(chigh - clow + 2), cmap.N-1))
+            color_norm = matplotlib.colors.BoundaryNorm(bounds, cmap.N-1)
+
+    # optionally add jitter to feature values
+    if x_jitter > 0:
+        if x_jitter > 1:
+            x_jitter = 1
+        xvals = xv.copy()
+        if isinstance(xvals[0], float):
+            xvals = xvals.astype(float)
+            xvals = xvals[~np.isnan(xvals)]
+        xvals = np.unique(xvals) # returns a sorted array
+        if len(xvals) >= 2:
+            smallest_diff = np.min(np.diff(xvals))
+            jitter_amount = x_jitter * smallest_diff
+            xv += (np.random.random_sample(size = len(xv))*jitter_amount) - (jitter_amount/2)
+
+    # the actual scatter plot, TODO: adapt the dot_size to the number of data points?
+    xv_nan = np.isnan(xv)
+    xv_notnan = np.invert(xv_nan)
+    if interaction_index is not None:
+
+        # plot the nan values in the interaction feature as grey
+        cvals = interaction_feature_values[oinds].astype(np.float64)
+        cvals_imp = cvals.copy()
+        cvals_imp[np.isnan(cvals)] = (clow + chigh) / 2.0
+        cvals[cvals_imp > chigh] = chigh
+        cvals[cvals_imp < clow] = clow
+        p = ax.scatter(
+            xv[xv_notnan], s[xv_notnan], s=dot_size, linewidth=0, c=cvals[xv_notnan],
+            cmap=cmap, alpha=alpha,
+            norm=color_norm, rasterized=len(xv) > 500
+        )
+        p.set_array(cvals[xv_notnan])
+    else:
+        p = ax.scatter(xv, s, s=dot_size, linewidth=0, color=color,
+                       alpha=alpha, rasterized=len(xv) > 500)
+
+    if interaction_index != ind and interaction_index is not None:
+        # draw the color bar
+        if isinstance(cd[0], str):
+            tick_positions = [cname_map[n] for n in cnames]
+            if len(tick_positions) == 2:
+                tick_positions[0] -= 0.25
+                tick_positions[1] += 0.25
+            cb = pl.colorbar(p, ticks=tick_positions, ax=ax, aspect=80)
+            cb.set_ticklabels(cnames)
+        else:
+            cb = pl.colorbar(p, ax=ax, aspect=80)
+
+        cb.set_label(feature_names[interaction_index], size=13)
+        cb.ax.tick_params(labelsize=11)
+        if categorical_interaction:
+            cb.ax.tick_params(length=0)
+        cb.set_alpha(1)
+        cb.outline.set_visible(False)
+#         bbox = cb.ax.get_window_extent().transformed(fig.dpi_scale_trans.inverted())
+#         cb.ax.set_aspect((bbox.height - 0.7) * 20)
+
+    # handles any setting of xmax and xmin
+    # note that we handle None,float, or "percentile(float)" formats
+    if xmin is not None or xmax is not None:
+        if isinstance(xmin, str) and xmin.startswith("percentile"):
+            xmin = np.nanpercentile(xv, float(xmin[11:-1]))
+        if isinstance(xmax, str) and xmax.startswith("percentile"):
+            xmax = np.nanpercentile(xv, float(xmax[11:-1]))
+
+        if xmin is None or xmin == np.nanmin(xv):
+            xmin = np.nanmin(xv) - (xmax - np.nanmin(xv))/20
+        if xmax is None or xmax == np.nanmax(xv):
+            xmax = np.nanmax(xv) + (np.nanmax(xv) - xmin)/20
+
+        ax.set_xlim(xmin, xmax)
+
+    # plot any nan feature values as tick marks along the y-axis
+    xlim = ax.get_xlim()
+    if interaction_index is not None:
+        p = ax.scatter(
+            xlim[0] * np.ones(xv_nan.sum()), s[xv_nan], marker=1,
+            linewidth=2, c=cvals_imp[xv_nan], cmap=cmap, alpha=alpha,
+            vmin=clow, vmax=chigh
+        )
+        p.set_array(cvals[xv_nan])
+    else:
+        ax.scatter(
+            xlim[0] * np.ones(xv_nan.sum()), s[xv_nan], marker=1,
+            linewidth=2, color=color, alpha=alpha
+        )
+    ax.set_xlim(xlim)
+
+    # make the plot more readable
+    ax.set_xlabel(name, color=axis_color, fontsize=13)
+    ax.set_ylabel(labels['VALUE_FOR'] % name, color=axis_color, fontsize=13)
+
+    if (ymin is not None) or (ymax is not None):
+        if ymin is None:
+            ymin = -ymax
+        if ymax is None:
+            ymax = -ymin
+
+        ax.set_ylim(ymin, ymax)
+
+    if title is not None:
+        ax.set_title(title, color=axis_color, fontsize=13)
+    ax.xaxis.set_ticks_position('bottom')
+    ax.yaxis.set_ticks_position('left')
+    ax.spines['right'].set_visible(False)
+    ax.spines['top'].set_visible(False)
+    ax.tick_params(color=axis_color, labelcolor=axis_color, labelsize=11)
+    for spine in ax.spines.values():
+        spine.set_edgecolor(axis_color)
+    if isinstance(xd[0], str):
+        ax.set_xticks([name_map[n] for n in xnames])
+        ax.set_xticklabels(xnames, fontdict=dict(rotation='vertical', fontsize=11))
+    if show:
+        with warnings.catch_warnings(): # ignore expected matplotlib warnings
+            warnings.simplefilter("ignore", RuntimeWarning)
+            pl.show()
diff --git a/lib/shap/plots/_text.py b/lib/shap/plots/_text.py
new file mode 100644
index 0000000000000000000000000000000000000000..a596c4d0305da580a1711627f9767e146160d043
--- /dev/null
+++ b/lib/shap/plots/_text.py
@@ -0,0 +1,1352 @@
+import json
+import random
+import string
+
+import numpy as np
+
+from . import colors
+
+try:
+    from IPython.display import HTML
+    from IPython.display import display as ipython_display
+    have_ipython = True
+except ImportError:
+    have_ipython = False
+
+
+# TODO: we should support text output explanations (from models that output text not numbers), this would require the force
+# the force plot and the coloring to update based on mouseovers (or clicks to make it fixed) of the output text
+def text(shap_values, num_starting_labels=0, grouping_threshold=0.01, separator='', xmin=None, xmax=None, cmax=None, display=True):
+    """Plots an explanation of a string of text using coloring and interactive labels.
+
+    The output is interactive HTML and you can click on any token to toggle the display of the
+    SHAP value assigned to that token.
+
+    Parameters
+    ----------
+    shap_values : [numpy.array]
+        List of arrays of SHAP values. Each array has the shap values for a string (#input_tokens x output_tokens).
+
+    num_starting_labels : int
+        Number of tokens (sorted in descending order by corresponding SHAP values)
+        that are uncovered in the initial view.
+        When set to 0, all tokens are covered.
+
+    grouping_threshold : float
+        If the component substring effects are less than a ``grouping_threshold``
+        fraction of an unlowered interaction effect, then we visualize the entire group
+        as a single chunk. This is primarily used for explanations that were computed
+        with fixed_context set to 1 or 0 when using the :class:`.explainers.Partition`
+        explainer, since this causes interaction effects to be left on internal nodes
+        rather than lowered.
+
+    separator : string
+        The string separator that joins tokens grouped by interaction effects and
+        unbroken string spans. Defaults to the empty string ``""``.
+
+    xmin : float
+        Minimum shap value bound.
+
+    xmax : float
+        Maximum shap value bound.
+
+    cmax : float
+        Maximum absolute shap value for sample. Used for scaling colors for input tokens.
+
+    display: bool
+        Whether to display or return html to further manipulate or embed. Default: ``True``
+
+    Examples
+    --------
+
+    See `text plot examples <https://shap.readthedocs.io/en/latest/example_notebooks/api_examples/plots/text.html>`_.
+
+    """
+
+    def values_min_max(values, base_values):
+        """ Used to pick our axis limits.
+        """
+        fx = base_values + values.sum()
+        xmin = fx - values[values > 0].sum()
+        xmax = fx - values[values < 0].sum()
+        cmax = max(abs(values.min()), abs(values.max()))
+        d = xmax - xmin
+        xmin -= 0.1 * d
+        xmax += 0.1 * d
+
+        return xmin, xmax, cmax
+
+    uuid = ''.join(random.choices(string.ascii_lowercase, k=20))
+
+    # loop when we get multi-row inputs
+    if len(shap_values.shape) == 2 and (shap_values.output_names is None or isinstance(shap_values.output_names, str)):
+        xmin = 0
+        xmax = 0
+        cmax = 0
+
+        for i, v in enumerate(shap_values):
+            values, clustering = unpack_shap_explanation_contents(v)
+            tokens, values, group_sizes = process_shap_values(v.data, values, grouping_threshold, separator, clustering)
+
+            if i == 0:
+                xmin, xmax, cmax = values_min_max(values, v.base_values)
+                continue
+
+            xmin_i,xmax_i,cmax_i = values_min_max(values, v.base_values)
+            if xmin_i < xmin:
+                xmin = xmin_i
+            if xmax_i > xmax:
+                xmax = xmax_i
+            if cmax_i > cmax:
+                cmax = cmax_i
+        out = ""
+        for i, v in enumerate(shap_values):
+            out += f"""
+    <br>
+    <hr style="height: 1px; background-color: #fff; border: none; margin-top: 18px; margin-bottom: 18px; border-top: 1px dashed #ccc;"">
+    <div align="center" style="margin-top: -35px;"><div style="display: inline-block; background: #fff; padding: 5px; color: #999; font-family: monospace">[{i}]</div>
+    </div>
+                """
+            out += text(
+                v, num_starting_labels=num_starting_labels, grouping_threshold=grouping_threshold,
+                separator=separator, xmin=xmin, xmax=xmax, cmax=cmax, display=False
+            )
+        if display:
+            _ipython_display_html(out)
+            return
+        else:
+            return out
+
+    if len(shap_values.shape) == 2 and shap_values.output_names is not None:
+
+        xmin_computed = None
+        xmax_computed = None
+        cmax_computed = None
+
+        for i in range(shap_values.shape[-1]):
+            values, clustering = unpack_shap_explanation_contents(shap_values[:,i])
+            tokens, values, group_sizes = process_shap_values(shap_values[:,i].data, values, grouping_threshold, separator, clustering)
+
+            # if i == 0:
+            #     xmin, xmax, cmax = values_min_max(values, shap_values[:,i].base_values)
+            #     continue
+
+            xmin_i, xmax_i, cmax_i = values_min_max(values, shap_values[:,i].base_values)
+            if xmin_computed is None or xmin_i < xmin_computed:
+                xmin_computed = xmin_i
+            if xmax_computed is None or xmax_i > xmax_computed:
+                xmax_computed = xmax_i
+            if cmax_computed is None or cmax_i > cmax_computed:
+                cmax_computed = cmax_i
+
+        if xmin is None:
+            xmin = xmin_computed
+        if xmax is None:
+            xmax = xmax_computed
+        if cmax is None:
+            cmax = cmax_computed
+
+        out = f"""<div align='center'>
+<script>
+    document._hover_{uuid} = '_tp_{uuid}_output_0';
+    document._zoom_{uuid} = undefined;
+    function _output_onclick_{uuid}(i) {{
+        var next_id = undefined;
+
+        if (document._zoom_{uuid} !== undefined) {{
+            document.getElementById(document._zoom_{uuid}+ '_zoom').style.display = 'none';
+
+            if (document._zoom_{uuid} === '_tp_{uuid}_output_' + i) {{
+                document.getElementById(document._zoom_{uuid}).style.display = 'block';
+                document.getElementById(document._zoom_{uuid}+'_name').style.borderBottom = '3px solid #000000';
+            }} else {{
+                document.getElementById(document._zoom_{uuid}).style.display = 'none';
+                document.getElementById(document._zoom_{uuid}+'_name').style.borderBottom = 'none';
+            }}
+        }}
+        if (document._zoom_{uuid} !== '_tp_{uuid}_output_' + i) {{
+            next_id = '_tp_{uuid}_output_' + i;
+            document.getElementById(next_id).style.display = 'none';
+            document.getElementById(next_id + '_zoom').style.display = 'block';
+            document.getElementById(next_id+'_name').style.borderBottom = '3px solid #000000';
+        }}
+        document._zoom_{uuid} = next_id;
+    }}
+    function _output_onmouseover_{uuid}(i, el) {{
+        if (document._zoom_{uuid} !== undefined) {{ return; }}
+        if (document._hover_{uuid} !== undefined) {{
+            document.getElementById(document._hover_{uuid} + '_name').style.borderBottom = 'none';
+            document.getElementById(document._hover_{uuid}).style.display = 'none';
+        }}
+        document.getElementById('_tp_{uuid}_output_' + i).style.display = 'block';
+        el.style.borderBottom = '3px solid #000000';
+        document._hover_{uuid} = '_tp_{uuid}_output_' + i;
+    }}
+</script>
+<div style=\"color: rgb(120,120,120); font-size: 12px;\">outputs</div>"""
+        output_values = shap_values.values.sum(0) + shap_values.base_values
+        output_max = np.max(np.abs(output_values))
+        for i,name in enumerate(shap_values.output_names):
+            scaled_value = 0.5 + 0.5 * output_values[i] / (output_max + 1e-8)
+            color = colors.red_transparent_blue(scaled_value)
+            color = (color[0]*255, color[1]*255, color[2]*255, color[3])
+            # '#dddddd' if i == 0 else '#ffffff' border-bottom: {'3px solid #000000' if i == 0 else 'none'};
+            out += f"""
+<div style="display: inline; border-bottom: {'3px solid #000000' if i == 0 else 'none'}; background: rgba{color}; border-radius: 3px; padding: 0px" id="_tp_{uuid}_output_{i}_name"
+    onclick="_output_onclick_{uuid}({i})"
+    onmouseover="_output_onmouseover_{uuid}({i}, this);">{name}</div>"""
+        out += "<br><br>"
+        for i,name in enumerate(shap_values.output_names):
+            out += f"<div id='_tp_{uuid}_output_{i}' style='display: {'block' if i == 0 else 'none'}';>"
+            out += text(
+                shap_values[:, i], num_starting_labels=num_starting_labels, grouping_threshold=grouping_threshold,
+                separator=separator, xmin=xmin, xmax=xmax, cmax=cmax, display=False
+            )
+            out += "</div>"
+            out += f"<div id='_tp_{uuid}_output_{i}_zoom' style='display: none;'>"
+            out += text(
+                shap_values[:, i], num_starting_labels=num_starting_labels, grouping_threshold=grouping_threshold,
+                separator=separator, display=False
+            )
+            out += "</div>"
+        out += "</div>"
+        if display:
+            _ipython_display_html(out)
+            return
+        else:
+            return out
+        #text_to_text(shap_values)
+        #return
+
+    if len(shap_values.shape) == 3:
+        xmin_computed = None
+        xmax_computed = None
+        cmax_computed = None
+
+        for i in range(shap_values.shape[-1]):
+            for j in range(shap_values.shape[0]):
+                values, clustering = unpack_shap_explanation_contents(shap_values[j,:,i])
+                tokens, values, group_sizes = process_shap_values(shap_values[j,:,i].data, values, grouping_threshold, separator, clustering)
+
+                xmin_i, xmax_i, cmax_i = values_min_max(values, shap_values[j,:,i].base_values)
+                if xmin_computed is None or xmin_i < xmin_computed:
+                    xmin_computed = xmin_i
+                if xmax_computed is None or xmax_i > xmax_computed:
+                    xmax_computed = xmax_i
+                if cmax_computed is None or cmax_i > cmax_computed:
+                    cmax_computed = cmax_i
+
+        if xmin is None:
+            xmin = xmin_computed
+        if xmax is None:
+            xmax = xmax_computed
+        if cmax is None:
+            cmax = cmax_computed
+
+        out = ""
+        for i, v in enumerate(shap_values):
+            out += f"""
+<br>
+<hr style="height: 1px; background-color: #fff; border: none; margin-top: 18px; margin-bottom: 18px; border-top: 1px dashed #ccc;"">
+<div align="center" style="margin-top: -35px;"><div style="display: inline-block; background: #fff; padding: 5px; color: #999; font-family: monospace">[{i}]</div>
+</div>
+            """
+            out += text(
+                v, num_starting_labels=num_starting_labels, grouping_threshold=grouping_threshold,
+                separator=separator, xmin=xmin, xmax=xmax, cmax=cmax, display=False
+            )
+        if display:
+            _ipython_display_html(out)
+            return
+        else:
+            return out
+
+
+    # set any unset bounds
+    xmin_new, xmax_new, cmax_new = values_min_max(shap_values.values, shap_values.base_values)
+    if xmin is None:
+        xmin = xmin_new
+    if xmax is None:
+        xmax = xmax_new
+    if cmax is None:
+        cmax = cmax_new
+
+
+    values, clustering = unpack_shap_explanation_contents(shap_values)
+    tokens, values, group_sizes = process_shap_values(shap_values.data, values, grouping_threshold, separator, clustering)
+
+    # build out HTML output one word one at a time
+    top_inds = np.argsort(-np.abs(values))[:num_starting_labels]
+    out = ""
+    # ev_str = str(shap_values.base_values)
+    # vsum_str = str(values.sum())
+    # fx_str = str(shap_values.base_values + values.sum())
+
+    #uuid = ''.join(random.choices(string.ascii_lowercase, k=20))
+    encoded_tokens = [t.replace("<", "&lt;").replace(">", "&gt;").replace(' ##', '') for t in tokens]
+    output_name = shap_values.output_names if isinstance(shap_values.output_names, str) else ""
+    out += svg_force_plot(values, shap_values.base_values, shap_values.base_values + values.sum(), encoded_tokens, uuid, xmin, xmax, output_name)
+    out += "<div align='center'><div style=\"color: rgb(120,120,120); font-size: 12px; margin-top: -15px;\">inputs</div>"
+    for i, token in enumerate(tokens):
+        scaled_value = 0.5 + 0.5 * values[i] / (cmax + 1e-8)
+        color = colors.red_transparent_blue(scaled_value)
+        color = (color[0]*255, color[1]*255, color[2]*255, color[3])
+
+        # display the labels for the most important words
+        label_display = "none"
+        wrapper_display = "inline"
+        if i in top_inds:
+            label_display = "block"
+            wrapper_display = "inline-block"
+
+        # create the value_label string
+        value_label = ""
+        if group_sizes[i] == 1:
+            value_label = str(values[i].round(3))
+        else:
+            value_label = str(values[i].round(3)) + " / " + str(group_sizes[i])
+
+        # the HTML for this token
+        out += f"""<div style='display: {wrapper_display}; text-align: center;'
+    ><div style='display: {label_display}; color: #999; padding-top: 0px; font-size: 12px;'>{value_label}</div
+        ><div id='_tp_{uuid}_ind_{i}'
+            style='display: inline; background: rgba{color}; border-radius: 3px; padding: 0px'
+            onclick="
+            if (this.previousSibling.style.display == 'none') {{
+                this.previousSibling.style.display = 'block';
+                this.parentNode.style.display = 'inline-block';
+            }} else {{
+                this.previousSibling.style.display = 'none';
+                this.parentNode.style.display = 'inline';
+            }}"
+            onmouseover="document.getElementById('_fb_{uuid}_ind_{i}').style.opacity = 1; document.getElementById('_fs_{uuid}_ind_{i}').style.opacity = 1;"
+            onmouseout="document.getElementById('_fb_{uuid}_ind_{i}').style.opacity = 0; document.getElementById('_fs_{uuid}_ind_{i}').style.opacity = 0;"
+        >{token.replace("<", "&lt;").replace(">", "&gt;").replace(' ##', '')}</div></div>"""
+    out += "</div>"
+
+    if display:
+        _ipython_display_html(out)
+        return
+    else:
+        return out
+
+def process_shap_values(tokens, values, grouping_threshold, separator, clustering = None, return_meta_data  = False):
+
+    # See if we got hierarchical input data. If we did then we need to reprocess the
+    # shap_values and tokens to get the groups we want to display
+    M = len(tokens)
+    if len(values) != M:
+
+        # make sure we were given a partition tree
+        if clustering is None:
+            raise ValueError("The length of the attribution values must match the number of " + \
+                             "tokens if shap_values.clustering is None! When passing hierarchical " + \
+                             "attributions the clustering is also required.")
+
+        # compute the groups, lower_values, and max_values
+        groups = [[i] for i in range(M)]
+        lower_values = np.zeros(len(values))
+        lower_values[:M] = values[:M]
+        max_values = np.zeros(len(values))
+        max_values[:M] = np.abs(values[:M])
+        for i in range(clustering.shape[0]):
+            li = int(clustering[i,0])
+            ri = int(clustering[i,1])
+            groups.append(groups[li] + groups[ri])
+            lower_values[M+i] = lower_values[li] + lower_values[ri] + values[M+i]
+            max_values[i+M] = max(abs(values[M+i]) / len(groups[M+i]), max_values[li], max_values[ri])
+
+        # compute the upper_values
+        upper_values = np.zeros(len(values))
+        def lower_credit(upper_values, clustering, i, value=0):
+            if i < M:
+                upper_values[i] = value
+                return
+            li = int(clustering[i-M,0])
+            ri = int(clustering[i-M,1])
+            upper_values[i] = value
+            value += values[i]
+#             lower_credit(upper_values, clustering, li, value * len(groups[li]) / (len(groups[li]) + len(groups[ri])))
+#             lower_credit(upper_values, clustering, ri, value * len(groups[ri]) / (len(groups[li]) + len(groups[ri])))
+            lower_credit(upper_values, clustering, li, value * 0.5)
+            lower_credit(upper_values, clustering, ri, value * 0.5)
+
+        lower_credit(upper_values, clustering, len(values) - 1)
+
+        # the group_values comes from the dividends above them and below them
+        group_values = lower_values + upper_values
+
+        # merge all the tokens in groups dominated by interaction effects (since we don't want to hide those)
+        new_tokens = []
+        new_values = []
+        group_sizes = []
+
+        # meta data
+        token_id_to_node_id_mapping = np.zeros((M,))
+        collapsed_node_ids = []
+
+        def merge_tokens(new_tokens, new_values, group_sizes, i):
+
+            # return at the leaves
+            if i < M and i >= 0:
+                new_tokens.append(tokens[i])
+                new_values.append(group_values[i])
+                group_sizes.append(1)
+
+                # meta data
+                collapsed_node_ids.append(i)
+                token_id_to_node_id_mapping[i] = i
+
+            else:
+
+                # compute the dividend at internal nodes
+                li = int(clustering[i-M,0])
+                ri = int(clustering[i-M,1])
+                dv = abs(values[i]) / len(groups[i])
+
+                # if the interaction level is too high then just treat this whole group as one token
+                if max(max_values[li], max_values[ri]) < dv * grouping_threshold:
+                    new_tokens.append(separator.join([tokens[g] for g in groups[li]]) + separator + separator.join([tokens[g] for g in groups[ri]]))
+                    new_values.append(group_values[i])
+                    group_sizes.append(len(groups[i]))
+
+                    # setting collapsed node ids and token id to current node id mapping metadata
+
+                    collapsed_node_ids.append(i)
+                    for g in groups[li]:
+                        token_id_to_node_id_mapping[g] = i
+
+                    for g in groups[ri]:
+                        token_id_to_node_id_mapping[g] = i
+
+                # if interaction level is not too high we recurse
+                else:
+                    merge_tokens(new_tokens, new_values, group_sizes, li)
+                    merge_tokens(new_tokens, new_values, group_sizes, ri)
+        merge_tokens(new_tokens, new_values, group_sizes, len(group_values) - 1)
+
+        # replance the incoming parameters with the grouped versions
+        tokens = np.array(new_tokens)
+        values = np.array(new_values)
+        group_sizes = np.array(group_sizes)
+
+        # meta data
+        token_id_to_node_id_mapping = np.array(token_id_to_node_id_mapping)
+        collapsed_node_ids = np.array(collapsed_node_ids)
+
+        M = len(tokens)
+    else:
+        group_sizes = np.ones(M)
+        token_id_to_node_id_mapping = np.arange(M)
+        collapsed_node_ids = np.arange(M)
+
+    if return_meta_data:
+        return tokens, values, group_sizes, token_id_to_node_id_mapping, collapsed_node_ids
+    else:
+        return tokens, values, group_sizes
+
+def svg_force_plot(values, base_values, fx, tokens, uuid, xmin, xmax, output_name):
+
+
+    def xpos(xval):
+        return 100 * (xval - xmin)  / (xmax - xmin + 1e-8)
+
+    s = ''
+    s += '<svg width="100%" height="80px">'
+
+    ### x-axis marks ###
+
+    # draw x axis line
+    s += '<line x1="0" y1="33" x2="100%" y2="33" style="stroke:rgb(150,150,150);stroke-width:1" />'
+
+    # draw base value
+    def draw_tick_mark(xval, label=None, bold=False, backing=False):
+        s = ""
+        s += f'<line x1="{xpos(xval)}%" y1="33" x2="{xpos(xval)}%" y2="37" style="stroke:rgb(150,150,150);stroke-width:1" />'
+        if not bold:
+            if backing:
+                s += f'<text x="{xpos(xval)}%" y="27" font-size="13px" style="stroke:#ffffff;stroke-width:8px;" fill="rgb(255,255,255)" dominant-baseline="bottom" text-anchor="middle">{xval:g}</text>'
+            s += f'<text x="{xpos(xval)}%" y="27" font-size="12px" fill="rgb(120,120,120)" dominant-baseline="bottom" text-anchor="middle">{xval:g}</text>'
+        else:
+            if backing:
+                s += f'<text x="{xpos(xval)}%" y="27" font-size="13px" style="stroke:#ffffff;stroke-width:8px;" font-weight="bold" fill="rgb(255,255,255)" dominant-baseline="bottom" text-anchor="middle">{xval:g}</text>'
+            s += f'<text x="{xpos(xval)}%" y="27" font-size="13px" font-weight="bold" fill="rgb(0,0,0)" dominant-baseline="bottom" text-anchor="middle">{xval:g}</text>'
+        if label is not None:
+            s += f'<text x="{xpos(xval)}%" y="10" font-size="12px" fill="rgb(120,120,120)" dominant-baseline="bottom" text-anchor="middle">{label}</text>'
+        return s
+
+
+    xcenter = round((xmax + xmin) / 2, int(round(1-np.log10(xmax - xmin + 1e-8))))
+    s += draw_tick_mark(xcenter)
+    #    np.log10(xmax - xmin)
+
+    tick_interval = round((xmax - xmin) / 7, int(round(1-np.log10(xmax - xmin + 1e-8))))
+
+    #tick_interval = (xmax - xmin) / 7
+    side_buffer = (xmax - xmin) / 14
+    for i in range(1,10):
+        pos = xcenter - i * tick_interval
+        if pos < xmin + side_buffer:
+            break
+        s += draw_tick_mark(pos)
+    for i in range(1,10):
+        pos = xcenter + i * tick_interval
+        if pos > xmax - side_buffer:
+            break
+        s += draw_tick_mark(pos)
+    s += draw_tick_mark(base_values, label="base value", backing=True)
+    s += draw_tick_mark(fx, bold=True, label=f"f<tspan baseline-shift=\"sub\" font-size=\"8px\">{output_name}</tspan>(inputs)", backing=True)
+
+
+    ### Positive value marks ###
+
+    red = tuple(colors.red_rgb * 255)
+    light_red = (255, 195, 213)
+
+    # draw base red bar
+    x = fx - values[values > 0].sum()
+    w = 100 * values[values > 0].sum() / (xmax - xmin + 1e-8)
+    s += f'<rect x="{xpos(x)}%" width="{w}%" y="40" height="18" style="fill:rgb{red}; stroke-width:0; stroke:rgb(0,0,0)" />'
+
+    # draw underline marks and the text labels
+    pos = fx
+    last_pos = pos
+    inds = [i for i in np.argsort(-np.abs(values)) if values[i] > 0]
+    for i,ind in enumerate(inds):
+        v = values[ind]
+        pos -= v
+
+        # a line under the bar to animate
+        s += f'<line x1="{xpos(pos)}%" x2="{xpos(last_pos)}%" y1="60" y2="60" id="_fb_{uuid}_ind_{ind}" style="stroke:rgb{red};stroke-width:2; opacity: 0"/>'
+
+        # the text label cropped and centered
+        s += f'<text x="{(xpos(last_pos) + xpos(pos))/2}%" y="71" font-size="12px" id="_fs_{uuid}_ind_{ind}" fill="rgb{red}" style="opacity: 0" dominant-baseline="middle" text-anchor="middle">{values[ind].round(3)}</text>'
+
+        # the text label cropped and centered
+        s += f'<svg x="{xpos(pos)}%" y="40" height="20" width="{xpos(last_pos) - xpos(pos)}%">'
+        s += '  <svg x="0" y="0" width="100%" height="100%">'
+        s += f'    <text x="50%" y="9" font-size="12px" fill="rgb(255,255,255)" dominant-baseline="middle" text-anchor="middle">{tokens[ind].strip()}</text>'
+        s += '  </svg>'
+        s += '</svg>'
+
+        last_pos = pos
+
+    # draw the divider padding (which covers the text near the dividers)
+    pos = fx
+    for i,ind in enumerate(inds):
+        v = values[ind]
+        pos -= v
+
+        if i != 0:
+            for j in range(4):
+                s += f'<g transform="translate({2*j-8},0)">'
+                s += f'  <svg x="{xpos(last_pos)}%" y="40" height="18" overflow="visible" width="30">'
+                s += f'    <path d="M 0 -9 l 6 18 L 0 25" fill="none" style="stroke:rgb{red};stroke-width:2" />'
+                s += '  </svg>'
+                s += '</g>'
+
+        if i + 1 != len(inds):
+            for j in range(4):
+                s += f'<g transform="translate({2*j-0},0)">'
+                s += f'  <svg x="{xpos(pos)}%" y="40" height="18" overflow="visible" width="30">'
+                s += f'    <path d="M 0 -9 l 6 18 L 0 25" fill="none" style="stroke:rgb{red};stroke-width:2" />'
+                s += '  </svg>'
+                s += '</g>'
+
+        last_pos = pos
+
+    # center padding
+    s += f'<rect transform="translate(-8,0)" x="{xpos(fx)}%" y="40" width="8" height="18" style="fill:rgb{red}"/>'
+
+    # cover up a notch at the end of the red bar
+    pos = fx - values[values > 0].sum()
+    s += '<g transform="translate(-11.5,0)">'
+    s += f'  <svg x="{xpos(pos)}%" y="40" height="18" overflow="visible" width="30">'
+    s += '    <path d="M 10 -9 l 6 18 L 10 25 L 0 25 L 0 -9" fill="#ffffff" style="stroke:rgb(255,255,255);stroke-width:2" />'
+    s += '  </svg>'
+    s += '</g>'
+
+
+    # draw the light red divider lines and a rect to handle mouseover events
+    pos = fx
+    last_pos = pos
+    for i,ind in enumerate(inds):
+        v = values[ind]
+        pos -= v
+
+        # divider line
+        if i + 1 != len(inds):
+            s += '<g transform="translate(-1.5,0)">'
+            s += f'  <svg x="{xpos(last_pos)}%" y="40" height="18" overflow="visible" width="30">'
+            s += f'    <path d="M 0 -9 l 6 18 L 0 25" fill="none" style="stroke:rgb{light_red};stroke-width:2" />'
+            s += '  </svg>'
+            s += '</g>'
+
+        # mouse over rectangle
+        s += f'<rect x="{xpos(pos)}%" y="40" height="20" width="{xpos(last_pos) - xpos(pos)}%"'
+        s += '      onmouseover="'
+        s += f'document.getElementById(\'_tp_{uuid}_ind_{ind}\').style.textDecoration = \'underline\';'
+        s += f'document.getElementById(\'_fs_{uuid}_ind_{ind}\').style.opacity = 1;'
+        s += f'document.getElementById(\'_fb_{uuid}_ind_{ind}\').style.opacity = 1;'
+        s += '"'
+        s += '      onmouseout="'
+        s += f'document.getElementById(\'_tp_{uuid}_ind_{ind}\').style.textDecoration = \'none\';'
+        s += f'document.getElementById(\'_fs_{uuid}_ind_{ind}\').style.opacity = 0;'
+        s += f'document.getElementById(\'_fb_{uuid}_ind_{ind}\').style.opacity = 0;'
+        s += '" style="fill:rgb(0,0,0,0)" />'
+
+        last_pos = pos
+
+
+    ### Negative value marks ###
+
+    blue = tuple(colors.blue_rgb * 255)
+    light_blue = (208, 230, 250)
+
+    # draw base blue bar
+    w = 100 * -values[values < 0].sum() / (xmax - xmin + 1e-8)
+    s += f'<rect x="{xpos(fx)}%" width="{w}%" y="40" height="18" style="fill:rgb{blue}; stroke-width:0; stroke:rgb(0,0,0)" />'
+
+    # draw underline marks and the text labels
+    pos = fx
+    last_pos = pos
+    inds = [i for i in np.argsort(-np.abs(values)) if values[i] < 0]
+    for i,ind in enumerate(inds):
+        v = values[ind]
+        pos -= v
+
+        # a line under the bar to animate
+        s += f'<line x1="{xpos(last_pos)}%" x2="{xpos(pos)}%" y1="60" y2="60" id="_fb_{uuid}_ind_{ind}" style="stroke:rgb{blue};stroke-width:2; opacity: 0"/>'
+
+        # the value text
+        s += f'<text x="{(xpos(last_pos) + xpos(pos))/2}%" y="71" font-size="12px" fill="rgb{blue}" id="_fs_{uuid}_ind_{ind}" style="opacity: 0" dominant-baseline="middle" text-anchor="middle">{values[ind].round(3)}</text>'
+
+        # the text label cropped and centered
+        s += f'<svg x="{xpos(last_pos)}%" y="40" height="20" width="{xpos(pos) - xpos(last_pos)}%">'
+        s += '  <svg x="0" y="0" width="100%" height="100%">'
+        s += f'    <text x="50%" y="9" font-size="12px" fill="rgb(255,255,255)" dominant-baseline="middle" text-anchor="middle">{tokens[ind].strip()}</text>'
+        s += '  </svg>'
+        s += '</svg>'
+
+        last_pos = pos
+
+    # draw the divider padding (which covers the text near the dividers)
+    pos = fx
+    for i,ind in enumerate(inds):
+        v = values[ind]
+        pos -= v
+
+        if i != 0:
+            for j in range(4):
+                s += f'<g transform="translate({-2*j+2},0)">'
+                s += f'  <svg x="{xpos(last_pos)}%" y="40" height="18" overflow="visible" width="30">'
+                s += f'    <path d="M 8 -9 l -6 18 L 8 25" fill="none" style="stroke:rgb{blue};stroke-width:2" />'
+                s += '  </svg>'
+                s += '</g>'
+
+        if i + 1 != len(inds):
+            for j in range(4):
+                s += f'<g transform="translate(-{2*j+8},0)">'
+                s += f'  <svg x="{xpos(pos)}%" y="40" height="18" overflow="visible" width="30">'
+                s += f'    <path d="M 8 -9 l -6 18 L 8 25" fill="none" style="stroke:rgb{blue};stroke-width:2" />'
+                s += '  </svg>'
+                s += '</g>'
+
+        last_pos = pos
+
+    # center padding
+    s += f'<rect transform="translate(0,0)" x="{xpos(fx)}%" y="40" width="8" height="18" style="fill:rgb{blue}"/>'
+
+    # cover up a notch at the end of the blue bar
+    pos = fx - values[values < 0].sum()
+    s += '<g transform="translate(-6.0,0)">'
+    s += f'  <svg x="{xpos(pos)}%" y="40" height="18" overflow="visible" width="30">'
+    s += '    <path d="M 8 -9 l -6 18 L 8 25 L 20 25 L 20 -9" fill="#ffffff" style="stroke:rgb(255,255,255);stroke-width:2" />'
+    s += '  </svg>'
+    s += '</g>'
+
+    # draw the light blue divider lines and a rect to handle mouseover events
+    pos = fx
+    last_pos = pos
+    for i,ind in enumerate(inds):
+        v = values[ind]
+        pos -= v
+
+        # divider line
+        if i + 1 != len(inds):
+            s += '<g transform="translate(-6.0,0)">'
+            s += f'  <svg x="{xpos(pos)}%" y="40" height="18" overflow="visible" width="30">'
+            s += f'    <path d="M 8 -9 l -6 18 L 8 25" fill="none" style="stroke:rgb{light_blue};stroke-width:2" />'
+            s += '  </svg>'
+            s += '</g>'
+
+        # mouse over rectangle
+        s += f'<rect x="{xpos(last_pos)}%" y="40" height="20" width="{xpos(pos) - xpos(last_pos)}%"'
+        s += '      onmouseover="'
+        s += f'document.getElementById(\'_tp_{uuid}_ind_{ind}\').style.textDecoration = \'underline\';'
+        s += f'document.getElementById(\'_fs_{uuid}_ind_{ind}\').style.opacity = 1;'
+        s += f'document.getElementById(\'_fb_{uuid}_ind_{ind}\').style.opacity = 1;'
+        s += '"'
+        s += '      onmouseout="'
+        s += f'document.getElementById(\'_tp_{uuid}_ind_{ind}\').style.textDecoration = \'none\';'
+        s += f'document.getElementById(\'_fs_{uuid}_ind_{ind}\').style.opacity = 0;'
+        s += f'document.getElementById(\'_fb_{uuid}_ind_{ind}\').style.opacity = 0;'
+        s += '" style="fill:rgb(0,0,0,0)" />'
+
+        last_pos = pos
+
+    s += '</svg>'
+
+    return s
+
+
+def text_old(shap_values, tokens, partition_tree=None, num_starting_labels=0, grouping_threshold=1, separator=''):
+    """ Plots an explanation of a string of text using coloring and interactive labels.
+
+    The output is interactive HTML and you can click on any token to toggle the display of the
+    SHAP value assigned to that token.
+    """
+
+    # See if we got hierarchical input data. If we did then we need to reprocess the
+    # shap_values and tokens to get the groups we want to display
+    M = len(tokens)
+    if len(shap_values) != M:
+
+        # make sure we were given a partition tree
+        if partition_tree is None:
+            raise ValueError("The length of the attribution values must match the number of " + \
+                             "tokens if partition_tree is None! When passing hierarchical " + \
+                             "attributions the partition_tree is also required.")
+
+        # compute the groups, lower_values, and max_values
+        groups = [[i] for i in range(M)]
+        lower_values = np.zeros(len(shap_values))
+        lower_values[:M] = shap_values[:M]
+        max_values = np.zeros(len(shap_values))
+        max_values[:M] = np.abs(shap_values[:M])
+        for i in range(partition_tree.shape[0]):
+            li = partition_tree[i,0]
+            ri = partition_tree[i,1]
+            groups.append(groups[li] + groups[ri])
+            lower_values[M+i] = lower_values[li] + lower_values[ri] + shap_values[M+i]
+            max_values[i+M] = max(abs(shap_values[M+i]) / len(groups[M+i]), max_values[li], max_values[ri])
+
+        # compute the upper_values
+        upper_values = np.zeros(len(shap_values))
+        def lower_credit(upper_values, partition_tree, i, value=0):
+            if i < M:
+                upper_values[i] = value
+                return
+            li = partition_tree[i-M,0]
+            ri = partition_tree[i-M,1]
+            upper_values[i] = value
+            value += shap_values[i]
+
+            lower_credit(upper_values, partition_tree, li, value * 0.5)
+            lower_credit(upper_values, partition_tree, ri, value * 0.5)
+
+        lower_credit(upper_values, partition_tree, len(shap_values) - 1)
+
+        # the group_values comes from the dividends above them and below them
+        group_values = lower_values + upper_values
+
+        # merge all the tokens in groups dominated by interaction effects (since we don't want to hide those)
+        new_tokens = []
+        new_shap_values = []
+        group_sizes = []
+        def merge_tokens(new_tokens, new_values, group_sizes, i):
+
+            # return at the leaves
+            if i < M and i >= 0:
+                new_tokens.append(tokens[i])
+                new_values.append(group_values[i])
+                group_sizes.append(1)
+            else:
+
+                # compute the dividend at internal nodes
+                li = partition_tree[i-M,0]
+                ri = partition_tree[i-M,1]
+                dv = abs(shap_values[i]) / len(groups[i])
+
+                # if the interaction level is too high then just treat this whole group as one token
+                if dv > grouping_threshold * max(max_values[li], max_values[ri]):
+                    new_tokens.append(separator.join([tokens[g] for g in groups[li]]) + separator + separator.join([tokens[g] for g in groups[ri]]))
+                    new_values.append(group_values[i] / len(groups[i]))
+                    group_sizes.append(len(groups[i]))
+                # if interaction level is not too high we recurse
+                else:
+                    merge_tokens(new_tokens, new_values, group_sizes, li)
+                    merge_tokens(new_tokens, new_values, group_sizes, ri)
+        merge_tokens(new_tokens, new_shap_values, group_sizes, len(group_values) - 1)
+
+        # replance the incoming parameters with the grouped versions
+        tokens = np.array(new_tokens)
+        shap_values = np.array(new_shap_values)
+        group_sizes = np.array(group_sizes)
+        M = len(tokens)
+    else:
+        group_sizes = np.ones(M)
+
+    # build out HTML output one word one at a time
+    top_inds = np.argsort(-np.abs(shap_values))[:num_starting_labels]
+    maxv = shap_values.max()
+    minv = shap_values.min()
+    out = ""
+    for i in range(M):
+        scaled_value = 0.5 + 0.5 * shap_values[i] / max(abs(maxv), abs(minv))
+        color = colors.red_transparent_blue(scaled_value)
+        color = (color[0]*255, color[1]*255, color[2]*255, color[3])
+
+        # display the labels for the most important words
+        label_display = "none"
+        wrapper_display = "inline"
+        if i in top_inds:
+            label_display = "block"
+            wrapper_display = "inline-block"
+
+        # create the value_label string
+        value_label = ""
+        if group_sizes[i] == 1:
+            value_label = str(shap_values[i].round(3))
+        else:
+            value_label = str((shap_values[i] * group_sizes[i]).round(3)) + " / " + str(group_sizes[i])
+
+        # the HTML for this token
+        out += "<div style='display: " + wrapper_display + "; text-align: center;'>" \
+             + "<div style='display: " + label_display + "; color: #999; padding-top: 0px; font-size: 12px;'>" \
+             + value_label \
+             + "</div>" \
+             + "<div " \
+             +   "style='display: inline; background: rgba" + str(color) + "; border-radius: 3px; padding: 0px'" \
+             +   "onclick=\"if (this.previousSibling.style.display == 'none') {" \
+             +       "this.previousSibling.style.display = 'block';" \
+             +       "this.parentNode.style.display = 'inline-block';" \
+             +     "} else {" \
+             +       "this.previousSibling.style.display = 'none';" \
+             +       "this.parentNode.style.display = 'inline';" \
+             +     "}" \
+             +   "\"" \
+             + ">" \
+             + tokens[i].replace("<", "&lt;").replace(">", "&gt;").replace(' ##', '') \
+             + "</div>" \
+             + "</div>"
+
+    return _ipython_display_html(out)
+
+def text_to_text(shap_values):
+
+    # unique ID added to HTML elements and function to avoid collision of different instances
+    uuid = ''.join(random.choices(string.ascii_lowercase, k=20))
+
+    saliency_plot_markup = saliency_plot(shap_values)
+    heatmap_markup = heatmap(shap_values)
+
+    html = f"""
+    <html>
+    <div id="{uuid}_viz_container">
+      <div id="{uuid}_viz_header" style="padding:15px;border-style:solid;margin:5px;font-family:sans-serif;font-weight:bold;">
+        Visualization Type:
+        <select name="viz_type" id="{uuid}_viz_type" onchange="selectVizType_{uuid}(this)">
+          <option value="heatmap" selected="selected">Input/Output - Heatmap</option>
+          <option value="saliency-plot">Saliency Plot</option>
+        </select>
+      </div>
+      <div id="{uuid}_content" style="padding:15px;border-style:solid;margin:5px;">
+          <div id = "{uuid}_saliency_plot_container" class="{uuid}_viz_container" style="display:none">
+              {saliency_plot_markup}
+          </div>
+
+          <div id = "{uuid}_heatmap_container" class="{uuid}_viz_container">
+              {heatmap_markup}
+          </div>
+      </div>
+    </div>
+    </html>
+    """
+
+
+    javascript = f"""
+    <script>
+        function selectVizType_{uuid}(selectObject) {{
+
+          /* Hide all viz */
+
+            var elements = document.getElementsByClassName("{uuid}_viz_container")
+          for (var i = 0; i < elements.length; i++){{
+              elements[i].style.display = 'none';
+          }}
+
+          var value = selectObject.value;
+          if ( value === "saliency-plot" ){{
+              document.getElementById('{uuid}_saliency_plot_container').style.display  = "block";
+          }}
+          else if ( value === "heatmap" ) {{
+              document.getElementById('{uuid}_heatmap_container').style.display  = "block";
+          }}
+        }}
+    </script>
+    """
+
+    _ipython_display_html(javascript + html)
+
+def saliency_plot(shap_values):
+
+    uuid = ''.join(random.choices(string.ascii_lowercase, k=20))
+
+    unpacked_values, clustering = unpack_shap_explanation_contents(shap_values)
+    tokens, values, group_sizes, token_id_to_node_id_mapping, collapsed_node_ids = process_shap_values(shap_values.data, unpacked_values[:,0], 1, '', clustering, True)
+
+
+    def compress_shap_matrix(shap_matrix,group_sizes):
+        compressed_matrix = np.zeros((group_sizes.shape[0],shap_matrix.shape[1]))
+        counter = 0
+        for index in range(len(group_sizes)):
+            compressed_matrix[index,:] = np.sum(shap_matrix[counter:counter+group_sizes[index],:],axis=0)
+            counter+=group_sizes[index]
+
+        return compressed_matrix
+
+    compressed_shap_matrix = compress_shap_matrix(shap_values.values,group_sizes)
+
+    # generate background colors of saliency plot
+
+    def get_colors(shap_values):
+        input_colors = []
+        cmax = max(abs(compressed_shap_matrix.min()), abs(compressed_shap_matrix.max()))
+        for row_index in range(compressed_shap_matrix.shape[0]):
+            input_colors_row = []
+            for col_index in range(compressed_shap_matrix.shape[1]):
+                scaled_value = 0.5 + 0.5 * compressed_shap_matrix[row_index,col_index] / cmax
+                color = colors.red_transparent_blue(scaled_value)
+                color = 'rgba'+str((color[0]*255, color[1]*255, color[2]*255, color[3]))
+                input_colors_row.append(color)
+            input_colors.append(input_colors_row)
+
+        return input_colors
+
+    model_output = shap_values.output_names
+
+    input_colors = get_colors(shap_values)
+
+    out = '<table border = "1" cellpadding = "5" cellspacing = "5" style="overflow-x:scroll;display:block;">'
+
+    # add top row containing input tokens
+    out += '<tr>'
+    out += '<th></th>'
+
+    for j in range(compressed_shap_matrix.shape[0]):
+        out += '<th>' + tokens[j].replace("<", "&lt;").replace(">", "&gt;").replace(' ##', '').replace('▁', '').replace('Ġ','') + '</th>'
+    out += '</tr>'
+
+    for row_index in range(compressed_shap_matrix.shape[1]):
+        out += '<tr>'
+        out += '<th>' + model_output[row_index].replace("<", "&lt;").replace(">", "&gt;").replace(' ##', '').replace('▁', '').replace('Ġ','') + '</th>'
+        for col_index in range(compressed_shap_matrix.shape[0]):
+            out += '<th style="background:' + input_colors[col_index][row_index]+ '">' + str(round(compressed_shap_matrix[col_index][row_index],3)) + '</th>'
+        out += '</tr>'
+
+    out += '</table>'
+
+    saliency_plot_html = f"""
+        <div id="{uuid}_saliency_plot" class="{uuid}_viz_content">
+            <div style="margin:5px;font-family:sans-serif;font-weight:bold;">
+                <span style="font-size: 20px;"> Saliency Plot </span>
+                <br>
+                x-axis: Output Text
+                <br>
+                y-axis: Input Text
+            </div>
+            {out}
+        </div>
+    """
+    return saliency_plot_html
+
+def heatmap(shap_values):
+
+    # constants
+
+    TREE_NODE_KEY_TOKENS = 'tokens'
+    TREE_NODE_KEY_CHILDREN = 'children'
+
+
+    uuid = ''.join(random.choices(string.ascii_lowercase, k=20))
+
+    def get_color(shap_value,cmax):
+        scaled_value = 0.5 + 0.5 * shap_value / cmax
+        color = colors.red_transparent_blue(scaled_value)
+        color = (color[0]*255, color[1]*255, color[2]*255, color[3])
+        return color
+
+    def process_text_to_text_shap_values(shap_values):
+        processed_values = []
+
+        unpacked_values, clustering = unpack_shap_explanation_contents(shap_values)
+        max_val = 0
+
+        for index,output_token in enumerate(shap_values.output_names):
+            tokens, values, group_sizes, token_id_to_node_id_mapping, collapsed_node_ids = process_shap_values(shap_values.data, unpacked_values[:,index], 1, '', clustering, True)
+            processed_value = {
+                'tokens':tokens,
+                'values':values,
+                'group_sizes':group_sizes,
+                'token_id_to_node_id_mapping':token_id_to_node_id_mapping,
+                'collapsed_node_ids':collapsed_node_ids
+            }
+
+            processed_values.append(processed_value)
+            max_val = max(max_val,np.max(values))
+
+        return processed_values,max_val
+
+    # unpack input tokens and output tokens
+    model_input = shap_values.data
+    model_output = shap_values.output_names
+
+    processed_values, max_val = process_text_to_text_shap_values(shap_values)
+
+    # generate dictionary containing precomputed background colors and shap values which are addressable by html token ids
+    colors_dict = {}
+    shap_values_dict = {}
+    token_id_to_node_id_mapping = {}
+    cmax = max(abs(shap_values.values.min()), abs(shap_values.values.max()),max_val)
+
+    # input token -> output token color and label value mapping
+
+    for row_index in range(len(model_input)):
+        color_values = {}
+        shap_values_list = {}
+
+        for col_index in range(len(model_output)):
+            color_values[uuid+'_output_flat_token_'+str(col_index)] = 'rgba' + str(get_color(shap_values.values[row_index][col_index],cmax))
+            shap_values_list[uuid+'_output_flat_value_label_'+str(col_index)] = round(shap_values.values[row_index][col_index],3)
+
+        colors_dict[f'{uuid}_input_node_{row_index}_content'] = color_values
+        shap_values_dict[f'{uuid}_input_node_{row_index}_content'] = shap_values_list
+
+    # output token -> input token color and label value mapping
+
+    for col_index in range(len(model_output)):
+        color_values = {}
+        shap_values_list = {}
+
+        for row_index in range(processed_values[col_index]['collapsed_node_ids'].shape[0]):
+            color_values[uuid+'_input_node_'+str(processed_values[col_index]['collapsed_node_ids'][row_index])+'_content'] = 'rgba' + str(get_color(processed_values[col_index]['values'][row_index],cmax))
+            shap_label_value_str = str(round(processed_values[col_index]['values'][row_index],3))
+            if processed_values[col_index]['group_sizes'][row_index] > 1:
+                shap_label_value_str += ('/' + str(processed_values[col_index]['group_sizes'][row_index]))
+
+            shap_values_list[uuid+'_input_node_'+str(processed_values[col_index]['collapsed_node_ids'][row_index])+'_label'] = shap_label_value_str
+
+
+        colors_dict[uuid+'_output_flat_token_'+str(col_index)] = color_values
+        shap_values_dict[uuid+'_output_flat_token_'+str(col_index)] = shap_values_list
+
+        token_id_to_node_id_mapping_dict = {}
+
+        for index,node_id in enumerate(processed_values[col_index]['token_id_to_node_id_mapping'].tolist()):
+            token_id_to_node_id_mapping_dict[f'{uuid}_input_node_{index}_content'] = f'{uuid}_input_node_{int(node_id)}_content'
+
+        token_id_to_node_id_mapping[uuid+'_output_flat_token_'+str(col_index)] = token_id_to_node_id_mapping_dict
+
+
+    # convert python dictionary into json to be inserted into the runtime javascript environment
+    colors_json = json.dumps(colors_dict)
+    shap_values_json = json.dumps(shap_values_dict)
+    token_id_to_node_id_mapping_json = json.dumps(token_id_to_node_id_mapping)
+
+    javascript_values = "<script> " \
+            + f"colors_{uuid} = " + colors_json + "\n" \
+            + f" shap_values_{uuid} = " + shap_values_json + "\n"\
+            + f" token_id_to_node_id_mapping_{uuid} = " + token_id_to_node_id_mapping_json + "\n"\
+            +  "</script> \n "
+
+    def generate_tree(shap_values):
+        num_tokens = len(shap_values.data)
+        token_list = {}
+
+        for index in range(num_tokens):
+            node_content = {}
+            node_content[TREE_NODE_KEY_TOKENS] = shap_values.data[index]
+            node_content[TREE_NODE_KEY_CHILDREN] = {}
+            token_list[str(index)] = node_content
+
+        counter = num_tokens
+        for pair in shap_values.clustering:
+            first_node = str(int(pair[0]))
+            second_node = str(int(pair[1]))
+
+            new_node_content = {}
+            new_node_content[TREE_NODE_KEY_CHILDREN] = {
+                first_node:token_list[first_node],
+                second_node:token_list[second_node]
+            }
+
+            token_list[str(counter)] = new_node_content
+            counter += 1
+
+            del token_list[first_node]
+            del token_list[second_node]
+
+        return token_list
+
+    tree = generate_tree(shap_values)
+
+    # generates the input token html elements
+    # each element contains the label value (initially hidden) and the token text
+
+    input_text_html = ''
+
+    def populate_input_tree(input_index,token_list_subtree,input_text_html):
+        content = token_list_subtree[input_index]
+        input_text_html += f'<div id="{uuid}_input_node_{input_index}_container" style="display:inline;text-align:center">'
+
+        input_text_html += f'<div id="{uuid}_input_node_{input_index}_label" style="display:none; padding-top: 0px; font-size:12px;">'
+
+        input_text_html +='</div>'
+
+        if token_list_subtree[input_index][TREE_NODE_KEY_CHILDREN]:
+            input_text_html += f'<div id="{uuid}_input_node_{input_index}_content" style="display:inline;">'
+            for child_index,child_content in token_list_subtree[input_index][TREE_NODE_KEY_CHILDREN].items():
+                input_text_html = populate_input_tree(child_index,token_list_subtree[input_index][TREE_NODE_KEY_CHILDREN],input_text_html)
+            input_text_html +='</div>'
+        else:
+            input_text_html += f'<div id="{uuid}_input_node_{input_index}_content"' \
+                        + "style='display: inline; background:transparent; border-radius: 3px; padding: 0px;cursor: default;cursor: pointer;'" \
+                        + f"onmouseover=\"onMouseHoverFlat_{uuid}(this.id)\" " \
+                        + f"onmouseout=\"onMouseOutFlat_{uuid}(this.id)\" " \
+                        + f"onclick=\"onMouseClickFlat_{uuid}(this.id)\" " \
+                        + ">"
+            input_text_html += content[TREE_NODE_KEY_TOKENS].replace("<", "&lt;").replace(">", "&gt;").replace(' ##', '').replace('▁', '').replace('Ġ','')
+            input_text_html +='</div>'
+
+        input_text_html +='</div>'
+
+        return input_text_html
+
+    input_text_html = populate_input_tree(list(tree.keys())[0],tree,input_text_html)
+
+    # generates the output token html elements
+    output_text_html = ''
+
+    for i in range(len(model_output)):
+        output_text_html += "<div style='display:inline; text-align:center;'>" \
+                + f"<div id='{uuid}_output_flat_value_label_"+ str(i) +"'" \
+                + "style='display:none;color: #999; padding-top: 0px; font-size:12px;'>" \
+                + "</div>" \
+                + f"<div id='{uuid}_output_flat_token_"+ str(i) +"'" \
+                + "style='display: inline; background:transparent; border-radius: 3px; padding: 0px;cursor: default;cursor: pointer;'" \
+                + f"onmouseover=\"onMouseHoverFlat_{uuid}(this.id)\" " \
+                + f"onmouseout=\"onMouseOutFlat_{uuid}(this.id)\" " \
+                + f"onclick=\"onMouseClickFlat_{uuid}(this.id)\" " \
+                + ">" \
+                + model_output[i].replace("<", "&lt;").replace(">", "&gt;").replace(' ##', '').replace('▁', '').replace('Ġ','') \
+                + " </div>" \
+                + "</div>"
+
+    heatmap_html = f"""
+        <div id="{uuid}_heatmap" class="{uuid}_viz_content">
+          <div id="{uuid}_heatmap_header" style="padding:15px;margin:5px;font-family:sans-serif;font-weight:bold;">
+            <div style="display:inline">
+              <span style="font-size: 20px;"> Input/Output - Heatmap </span>
+            </div>
+            <div style="display:inline;float:right">
+              Layout :
+              <select name="alignment" id="{uuid}_alignment" onchange="selectAlignment_{uuid}(this)">
+                <option value="left-right" selected="selected">Left/Right</option>
+                <option value="top-bottom">Top/Bottom</option>
+              </select>
+            </div>
+          </div>
+          <div id="{uuid}_heatmap_content" style="display:flex;">
+            <div id="{uuid}_input_container" style="padding:15px;border-style:solid;margin:5px;flex:1;">
+              <div id="{uuid}_input_header" style="margin:5px;font-weight:bold;font-family:sans-serif;margin-bottom:10px">
+                Input Text
+              </div>
+              <div id="{uuid}_input_content" style="margin:5px;font-family:sans-serif;">
+                  {input_text_html}
+              </div>
+            </div>
+            <div id="{uuid}_output_container" style="padding:15px;border-style:solid;margin:5px;flex:1;">
+              <div id="{uuid}_output_header" style="margin:5px;font-weight:bold;font-family:sans-serif;margin-bottom:10px">
+                Output Text
+              </div>
+              <div id="{uuid}_output_content" style="margin:5px;font-family:sans-serif;">
+                  {output_text_html}
+              </div>
+            </div>
+          </div>
+        </div>
+    """
+
+    heatmap_javascript = f"""
+        <script>
+            function selectAlignment_{uuid}(selectObject) {{
+                var value = selectObject.value;
+                if ( value === "left-right" ){{
+                  document.getElementById('{uuid}_heatmap_content').style.display  = "flex";
+                }}
+                else if ( value === "top-bottom" ) {{
+                  document.getElementById('{uuid}_heatmap_content').style.display  = "inline";
+                }}
+            }}
+
+            var {uuid}_heatmap_flat_state = null;
+
+            function onMouseHoverFlat_{uuid}(id) {{
+                if ({uuid}_heatmap_flat_state === null) {{
+                    setBackgroundColors_{uuid}(id);
+                    document.getElementById(id).style.backgroundColor  = "grey";
+                }}
+
+                if (getIdSide_{uuid}(id) === 'input' && getIdSide_{uuid}({uuid}_heatmap_flat_state) === 'output'){{
+
+                    label_content_id = token_id_to_node_id_mapping_{uuid}[{uuid}_heatmap_flat_state][id];
+
+                    if (document.getElementById(label_content_id).previousElementSibling.style.display == 'none'){{
+                        document.getElementById(label_content_id).style.textShadow = "0px 0px 1px #000000";
+                    }}
+
+                }}
+
+            }}
+
+            function onMouseOutFlat_{uuid}(id) {{
+                if ({uuid}_heatmap_flat_state === null) {{
+                    cleanValuesAndColors_{uuid}(id);
+                    document.getElementById(id).style.backgroundColor  = "transparent";
+                }}
+
+                if (getIdSide_{uuid}(id) === 'input' && getIdSide_{uuid}({uuid}_heatmap_flat_state) === 'output'){{
+
+                    label_content_id = token_id_to_node_id_mapping_{uuid}[{uuid}_heatmap_flat_state][id];
+
+                    if (document.getElementById(label_content_id).previousElementSibling.style.display == 'none'){{
+                        document.getElementById(label_content_id).style.textShadow = "inherit";
+                    }}
+
+                }}
+
+            }}
+
+            function onMouseClickFlat_{uuid}(id) {{
+                if ({uuid}_heatmap_flat_state === id) {{
+
+                    // If the clicked token was already selected
+
+                    document.getElementById(id).style.backgroundColor  = "transparent";
+                    cleanValuesAndColors_{uuid}(id);
+                    {uuid}_heatmap_flat_state = null;
+                }}
+                else {{
+                    if ({uuid}_heatmap_flat_state === null) {{
+
+                        // No token previously selected, new token clicked on
+
+                        cleanValuesAndColors_{uuid}(id)
+                        {uuid}_heatmap_flat_state = id;
+                        document.getElementById(id).style.backgroundColor  = "grey";
+                        setLabelValues_{uuid}(id);
+                        setBackgroundColors_{uuid}(id);
+                    }}
+                    else {{
+                        if (getIdSide_{uuid}({uuid}_heatmap_flat_state) === getIdSide_{uuid}(id)) {{
+
+                            // User clicked a token on the same side as the currently selected token
+
+                            cleanValuesAndColors_{uuid}({uuid}_heatmap_flat_state)
+                            document.getElementById({uuid}_heatmap_flat_state).style.backgroundColor  = "transparent";
+                            {uuid}_heatmap_flat_state = id;
+                            document.getElementById(id).style.backgroundColor  = "grey";
+                            setLabelValues_{uuid}(id);
+                            setBackgroundColors_{uuid}(id);
+                        }}
+                        else{{
+
+                            if (getIdSide_{uuid}(id) === 'input') {{
+                                label_content_id = token_id_to_node_id_mapping_{uuid}[{uuid}_heatmap_flat_state][id];
+
+                                if (document.getElementById(label_content_id).previousElementSibling.style.display == 'none') {{
+                                    document.getElementById(label_content_id).previousElementSibling.style.display = 'block';
+                                    document.getElementById(label_content_id).parentNode.style.display = 'inline-block';
+                                    document.getElementById(label_content_id).style.textShadow = "0px 0px 1px #000000";
+                                  }}
+                                else {{
+                                    document.getElementById(label_content_id).previousElementSibling.style.display = 'none';
+                                    document.getElementById(label_content_id).parentNode.style.display = 'inline';
+                                    document.getElementById(label_content_id).style.textShadow  = "inherit";
+                                  }}
+
+                            }}
+                            else {{
+                                if (document.getElementById(id).previousElementSibling.style.display == 'none') {{
+                                    document.getElementById(id).previousElementSibling.style.display = 'block';
+                                    document.getElementById(id).parentNode.style.display = 'inline-block';
+                                  }}
+                                else {{
+                                    document.getElementById(id).previousElementSibling.style.display = 'none';
+                                    document.getElementById(id).parentNode.style.display = 'inline';
+                                  }}
+                            }}
+
+                        }}
+                    }}
+
+                }}
+            }}
+
+            function setLabelValues_{uuid}(id) {{
+                for(const token in shap_values_{uuid}[id]){{
+                    document.getElementById(token).innerHTML = shap_values_{uuid}[id][token];
+                    document.getElementById(token).nextElementSibling.title = 'SHAP Value : ' + shap_values_{uuid}[id][token];
+                }}
+            }}
+
+            function setBackgroundColors_{uuid}(id) {{
+                for(const token in colors_{uuid}[id]){{
+                    document.getElementById(token).style.backgroundColor  = colors_{uuid}[id][token];
+                }}
+            }}
+
+            function cleanValuesAndColors_{uuid}(id) {{
+                for(const token in shap_values_{uuid}[id]){{
+                    document.getElementById(token).innerHTML = "";
+                    document.getElementById(token).nextElementSibling.title = "";
+                }}
+                 for(const token in colors_{uuid}[id]){{
+                    document.getElementById(token).style.backgroundColor  = "transparent";
+                    document.getElementById(token).previousElementSibling.style.display = 'none';
+                    document.getElementById(token).parentNode.style.display = 'inline';
+                    document.getElementById(token).style.textShadow  = "inherit";
+                }}
+            }}
+
+            function getIdSide_{uuid}(id) {{
+                if (id === null) {{
+                    return 'null'
+                }}
+                return id.split("_")[1];
+            }}
+        </script>
+    """
+
+    return heatmap_html + heatmap_javascript + javascript_values
+
+
+def unpack_shap_explanation_contents(shap_values):
+    values = getattr(shap_values, "hierarchical_values", None)
+    if values is None:
+        values = shap_values.values
+    clustering = getattr(shap_values, "clustering", None)
+
+    return np.array(values), clustering
+
+
+def _ipython_display_html(data):
+    """Check IPython is installed, then display HTML"""
+    if not have_ipython:
+        msg = (
+            "IPython is required for this function but is not installed."
+            " Fix this with `pip install ipython`."
+        )
+        raise ImportError(msg)
+    return ipython_display(HTML(data))
diff --git a/lib/shap/plots/_utils.py b/lib/shap/plots/_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..dda4dd1123cbe50f172c27df7dcc3173c3778c2d
--- /dev/null
+++ b/lib/shap/plots/_utils.py
@@ -0,0 +1,234 @@
+import matplotlib.pyplot as pl
+import numpy as np
+
+from .. import Explanation
+from ..utils import OpChain
+from . import colors
+
+
+def convert_color(color):
+    if color == "shap_red":
+        return colors.red_rgb
+    if color == "shap_blue":
+        return colors.blue_rgb
+
+    try:
+        return pl.get_cmap(color)
+    except ValueError:
+        return color
+
+
+def convert_ordering(ordering, shap_values):
+    if issubclass(type(ordering), OpChain):
+        ordering = ordering.apply(Explanation(shap_values))
+    if issubclass(type(ordering), Explanation):
+        if "argsort" in [op["name"] for op in ordering.op_history]:
+            ordering = ordering.values
+        else:
+            ordering = ordering.argsort.flip.values
+    return ordering
+
+
+def get_sort_order(dist, clust_order, cluster_threshold, feature_order):
+    """ Returns a sorted order of the values where we respect the clustering order when dist[i,j] < cluster_threshold
+    """
+
+    #feature_imp = np.abs(values)
+
+    # if partition_tree is not None:
+    #     new_tree = fill_internal_max_values(partition_tree, shap_values)
+    #     clust_order = sort_inds(new_tree, np.abs(shap_values))
+    clust_inds = np.argsort(clust_order)
+
+    feature_order = feature_order.copy()#order.apply(Explanation(shap_values))
+    # print("feature_order", feature_order)
+    for i in range(len(feature_order)-1):
+        ind1 = feature_order[i]
+        next_ind = feature_order[i+1]
+        next_ind_pos = i + 1
+        for j in range(i+1,len(feature_order)):
+            ind2 = feature_order[j]
+
+
+
+            #if feature_imp[ind] >
+            # if ind1 == 2:
+            #     print(ind1, ind2, dist[ind1,ind2])
+            if dist[ind1,ind2] <= cluster_threshold:
+
+                # if ind1 == 2:
+                #     print(clust_inds)
+                #     print(ind1, ind2, next_ind, dist[ind1,ind2], clust_inds[ind2], clust_inds[next_ind])
+                if dist[ind1,next_ind] > cluster_threshold or clust_inds[ind2] < clust_inds[next_ind]:
+                    next_ind = ind2
+                    next_ind_pos = j
+            # print("next_ind", next_ind)
+            # print("next_ind_pos", next_ind_pos)
+
+        # insert the next_ind next
+        for j in range(next_ind_pos, i+1, -1):
+            #print("j", j)
+            feature_order[j] = feature_order[j-1]
+        feature_order[i+1] = next_ind
+        #print(feature_order)
+
+
+
+    return feature_order
+
+def merge_nodes(values, partition_tree):
+    """ This merges the two clustered leaf nodes with the smallest total value.
+    """
+    M = partition_tree.shape[0] + 1
+
+    ptind = 0
+    min_val = np.inf
+    for i in range(partition_tree.shape[0]):
+        ind1 = int(partition_tree[i,0])
+        ind2 = int(partition_tree[i,1])
+        if ind1 < M and ind2 < M:
+            val = np.abs(values[ind1]) + np.abs(values[ind2])
+            if val < min_val:
+                min_val = val
+                ptind = i
+                #print("ptind", ptind, min_val)
+
+    ind1 = int(partition_tree[ptind,0])
+    ind2 = int(partition_tree[ptind,1])
+    if ind1 > ind2:
+        tmp = ind1
+        ind1 = ind2
+        ind2 = tmp
+
+    partition_tree_new = partition_tree.copy()
+    for i in range(partition_tree_new.shape[0]):
+        i0 = int(partition_tree_new[i,0])
+        i1 = int(partition_tree_new[i,1])
+        if i0 == ind2:
+            partition_tree_new[i,0] = ind1
+        elif i0 > ind2:
+            partition_tree_new[i,0] -= 1
+            if i0 == ptind + M:
+                partition_tree_new[i,0] = ind1
+            elif i0 > ptind + M:
+                partition_tree_new[i,0] -= 1
+
+
+        if i1 == ind2:
+            partition_tree_new[i,1] = ind1
+        elif i1 > ind2:
+            partition_tree_new[i,1] -= 1
+            if i1 == ptind + M:
+                partition_tree_new[i,1] = ind1
+            elif i1 > ptind + M:
+                partition_tree_new[i,1] -= 1
+    partition_tree_new = np.delete(partition_tree_new, ptind, axis=0)
+
+    # update the counts to be correct
+    fill_counts(partition_tree_new)
+
+    return partition_tree_new, ind1, ind2
+
+def dendrogram_coords(leaf_positions, partition_tree):
+    """ Returns the x and y coords of the lines of a dendrogram where the leaf order is given.
+
+    Note that scipy can compute these coords as well, but it does not allow you to easily specify
+    a specific leaf order, hence this reimplementation.
+    """
+
+    xout = []
+    yout = []
+    _dendrogram_coords_rec(partition_tree.shape[0]-1, leaf_positions, partition_tree, xout, yout)
+
+    return np.array(xout), np.array(yout)
+def _dendrogram_coords_rec(pos, leaf_positions, partition_tree, xout, yout):
+    M = partition_tree.shape[0] + 1
+
+    if pos < 0:
+        return leaf_positions[pos + M], 0
+
+    left = int(partition_tree[pos, 0]) - M
+    right = int(partition_tree[pos, 1]) - M
+
+    x_left, y_left = _dendrogram_coords_rec(left, leaf_positions, partition_tree, xout, yout)
+    x_right, y_right = _dendrogram_coords_rec(right, leaf_positions, partition_tree, xout, yout)
+
+    y_curr = partition_tree[pos, 2]
+
+    xout.append([x_left, x_left, x_right, x_right])
+    yout.append([y_left, y_curr, y_curr, y_right])
+
+    return (x_left + x_right) / 2, y_curr
+
+def fill_internal_max_values(partition_tree, leaf_values):
+    """ This fills the forth column of the partition tree matrix with the max leaf value in that cluster.
+    """
+    M = partition_tree.shape[0] + 1
+    new_tree = partition_tree.copy()
+    for i in range(new_tree.shape[0]):
+        val = 0
+        if new_tree[i,0] < M:
+            ind = int(new_tree[i,0])
+            val = max(val, np.abs(leaf_values[ind]))
+        else:
+            ind = int(new_tree[i,0])-M
+            val = max(val, np.abs(new_tree[ind,3])) # / partition_tree[ind,2])
+        if new_tree[i,1] < M:
+            ind = int(new_tree[i,1])
+            val = max(val, np.abs(leaf_values[ind]))
+        else:
+            ind = int(new_tree[i,1])-M
+            val = max(val, np.abs(new_tree[ind,3])) # / partition_tree[ind,2])
+        new_tree[i,3] = val
+    return new_tree
+
+def fill_counts(partition_tree):
+    """ This updates the
+    """
+    M = partition_tree.shape[0] + 1
+    for i in range(partition_tree.shape[0]):
+        val = 0
+        if partition_tree[i,0] < M:
+            ind = int(partition_tree[i,0])
+            val += 1
+        else:
+            ind = int(partition_tree[i,0])-M
+            val += partition_tree[ind,3]
+        if partition_tree[i,1] < M:
+            ind = int(partition_tree[i,1])
+            val += 1
+        else:
+            ind = int(partition_tree[i,1])-M
+            val += partition_tree[ind,3]
+        partition_tree[i,3] = val
+
+def sort_inds(partition_tree, leaf_values, pos=None, inds=None):
+    if inds is None:
+        inds = []
+
+    if pos is None:
+        partition_tree = fill_internal_max_values(partition_tree, leaf_values)
+        pos = partition_tree.shape[0]-1
+
+    M = partition_tree.shape[0] + 1
+
+    if pos < 0:
+        inds.append(pos + M)
+        return
+
+    left = int(partition_tree[pos, 0]) - M
+    right = int(partition_tree[pos, 1]) - M
+
+
+    left_val = partition_tree[left,3] if left >= 0 else leaf_values[left + M]
+    right_val = partition_tree[right,3] if right >= 0 else leaf_values[right + M]
+
+    if left_val < right_val:
+        tmp = right
+        right = left
+        left = tmp
+
+    sort_inds(partition_tree, leaf_values, left, inds)
+    sort_inds(partition_tree, leaf_values, right, inds)
+
+    return inds
diff --git a/lib/shap/plots/_violin.py b/lib/shap/plots/_violin.py
new file mode 100644
index 0000000000000000000000000000000000000000..6c8c8fbc01cc2e30cb4db55a53b37c19c72f2a85
--- /dev/null
+++ b/lib/shap/plots/_violin.py
@@ -0,0 +1,365 @@
+""" Summary plots of SHAP values (violin plot) across a whole dataset.
+"""
+
+import warnings
+
+import matplotlib.pyplot as pl
+import numpy as np
+import pandas as pd
+from scipy.stats import gaussian_kde
+
+from ..utils._exceptions import DimensionError
+from . import colors
+from ._labels import labels
+
+
+# TODO: remove unused title argument / use title argument
+# TODO: Add support for hclustering based explanations where we sort the leaf order by magnitude and then show the dendrogram to the left
+def violin(shap_values, features=None, feature_names=None, max_display=None, plot_type="violin",
+                 color=None, axis_color="#333333", title=None, alpha=1, show=True, sort=True,
+                 color_bar=True, plot_size="auto", layered_violin_max_num_bins=20, class_names=None,
+                 class_inds=None,
+                 color_bar_label=labels["FEATURE_VALUE"],
+                 cmap=colors.red_blue,
+                 # deprecated
+                 auto_size_plot=None,
+                 use_log_scale=False,
+    ):
+    """Create a SHAP violin plot, colored by feature values when they are provided.
+
+    Parameters
+    ----------
+    shap_values : Explanation, or numpy.array
+        For single output explanations, this is a matrix of SHAP values (# samples x # features).
+
+    features : numpy.array or pandas.DataFrame or list
+        Matrix of feature values (# samples x # features) or a ``feature_names`` list as
+        shorthand.
+
+    feature_names : list
+        Names of the features (length: # features).
+
+    max_display : int
+        How many top features to include in the plot (default is 20).
+
+    plot_type : "violin", or "layered_violin".
+        What type of summary plot to produce. A "layered_violin" plot shows the
+        distribution of the SHAP values of each variable. A "violin" plot is the same,
+        except with outliers drawn as scatter points.
+
+    color_bar : bool
+        Whether to draw the color bar (legend).
+
+    show : bool
+        Whether ``matplotlib.pyplot.show()`` is called before returning.
+        Setting this to ``False`` allows the plot
+        to be customized further after it has been created.
+
+    plot_size : "auto" (default), float, (float, float), or None
+        What size to make the plot. By default, the size is auto-scaled based on the number of
+        features that are being displayed. Passing a single float will cause each row to be that
+        many inches high. Passing a pair of floats will scale the plot by that
+        number of inches. If ``None`` is passed, then the size of the current figure will be left
+        unchanged.
+
+    Examples
+    --------
+
+    See `violin plot examples <https://shap.readthedocs.io/en/latest/example_notebooks/api_examples/plots/violin.html>`_.
+
+    """
+
+    # support passing an explanation object
+    if str(type(shap_values)).endswith("Explanation'>"):
+        shap_exp = shap_values
+        shap_values = shap_exp.values
+        if features is None:
+            features = shap_exp.data
+        if feature_names is None:
+            feature_names = shap_exp.feature_names
+        # if out_names is None: # TODO: waiting for slicer support of this
+        #     out_names = shap_exp.output_names
+
+    # deprecation warnings
+    if auto_size_plot is not None:
+        warnings.warn("auto_size_plot=False is deprecated and is now ignored! Use plot_size=None instead.")
+
+    if isinstance(shap_values, list):
+        emsg = (
+            "Violin plots don't support multi-output explanations! "
+            "Use 'shap.plots.bar` instead."
+        )
+        raise TypeError(emsg)
+
+    if plot_type is None:
+        plot_type = "violin"
+    if plot_type not in {"violin", "layered_violin"}:
+        emsg = (
+            "plot_type: Expected one of ('violin','layered_violin'), received "
+            f"{plot_type} instead."
+        )
+        raise ValueError(emsg)
+
+    assert len(shap_values.shape) != 1, (
+        "Violin summary plots need a matrix of shap_values, not a vector."
+    )
+
+    # default color:
+    if color is None:
+        if plot_type == 'layered_violin':
+            color = "coolwarm"
+        else:
+            color = colors.blue_rgb
+
+    # convert from a DataFrame or other types
+    if isinstance(features, pd.DataFrame):
+        if feature_names is None:
+            feature_names = features.columns
+        features = features.values
+    elif isinstance(features, list):
+        if feature_names is None:
+            feature_names = features
+        features = None
+    elif (features is not None) and len(features.shape) == 1 and feature_names is None:
+        feature_names = features
+        features = None
+
+    num_features = shap_values.shape[1]
+
+    if features is not None:
+        shape_msg = (
+            "The shape of the shap_values matrix does not match the shape "
+            "of the provided data matrix."
+        )
+        if num_features - 1 == features.shape[1]:
+            shape_msg += (
+                " Perhaps the extra column in the shap_values matrix is the "
+                "constant offset? If so, just pass shap_values[:,:-1]."
+            )
+            raise DimensionError(shape_msg)
+        if num_features != features.shape[1]:
+            raise DimensionError(shape_msg)
+
+    if feature_names is None:
+        feature_names = np.array([labels['FEATURE'] % str(i) for i in range(num_features)])
+
+    if use_log_scale:
+        pl.xscale('symlog')
+
+    if max_display is None:
+        max_display = 20
+
+    if sort:
+        # order features by the sum of their effect magnitudes
+        feature_order = np.argsort(np.sum(np.abs(shap_values), axis=0))
+        feature_order = feature_order[-min(max_display, len(feature_order)):]
+    else:
+        feature_order = np.flip(np.arange(min(max_display, num_features)), 0)
+
+    row_height = 0.4
+    if plot_size == "auto":
+        pl.gcf().set_size_inches(8, len(feature_order) * row_height + 1.5)
+    elif type(plot_size) in (list, tuple):
+        pl.gcf().set_size_inches(plot_size[0], plot_size[1])
+    elif plot_size is not None:
+        pl.gcf().set_size_inches(8, len(feature_order) * plot_size + 1.5)
+    pl.axvline(x=0, color="#999999", zorder=-1)
+
+    if plot_type == "violin":
+        for pos, i in enumerate(feature_order):
+            pl.axhline(y=pos, color="#cccccc", lw=0.5, dashes=(1, 5), zorder=-1)
+
+        if features is not None:
+            global_low = np.nanpercentile(shap_values[:, :len(feature_names)].flatten(), 1)
+            global_high = np.nanpercentile(shap_values[:, :len(feature_names)].flatten(), 99)
+            for pos, i in enumerate(feature_order):
+                shaps = shap_values[:, i]
+                shap_min, shap_max = np.min(shaps), np.max(shaps)
+                rng = shap_max - shap_min
+                xs = np.linspace(np.min(shaps) - rng * 0.2, np.max(shaps) + rng * 0.2, 100)
+                if np.std(shaps) < (global_high - global_low) / 100:
+                    ds = gaussian_kde(shaps + np.random.randn(len(shaps)) * (global_high - global_low) / 100)(xs)
+                else:
+                    ds = gaussian_kde(shaps)(xs)
+                ds /= np.max(ds) * 3
+
+                values = features[:, i]
+                # window_size = max(10, len(values) // 20)
+                smooth_values = np.zeros(len(xs) - 1)
+                sort_inds = np.argsort(shaps)
+                trailing_pos = 0
+                leading_pos = 0
+                running_sum = 0
+                back_fill = 0
+                for j in range(len(xs) - 1):
+
+                    while leading_pos < len(shaps) and xs[j] >= shaps[sort_inds[leading_pos]]:
+                        running_sum += values[sort_inds[leading_pos]]
+                        leading_pos += 1
+                        if leading_pos - trailing_pos > 20:
+                            running_sum -= values[sort_inds[trailing_pos]]
+                            trailing_pos += 1
+                    if leading_pos - trailing_pos > 0:
+                        smooth_values[j] = running_sum / (leading_pos - trailing_pos)
+                        for k in range(back_fill):
+                            smooth_values[j - k - 1] = smooth_values[j]
+                    else:
+                        back_fill += 1
+
+                # Get nan values:
+                nan_mask = np.isnan(values)
+
+                # Trim the value and color range to percentiles
+                vmin, vmax, cvals = _trim_crange(values, nan_mask)
+
+                # plot the nan values in the interaction feature as grey
+                pl.scatter(shaps[nan_mask], np.ones(shap_values[nan_mask].shape[0]) * pos,
+                           color="#777777", s=9,
+                           alpha=alpha, linewidth=0, zorder=1)
+                # plot the non-nan values colored by the trimmed feature value
+                pl.scatter(shaps[np.invert(nan_mask)], np.ones(shap_values[np.invert(nan_mask)].shape[0]) * pos,
+                           cmap=cmap, vmin=vmin, vmax=vmax, s=9,
+                           c=cvals, alpha=alpha, linewidth=0, zorder=1)
+                # smooth_values -= nxp.nanpercentile(smooth_values, 5)
+                # smooth_values /= np.nanpercentile(smooth_values, 95)
+                smooth_values -= vmin
+                if vmax - vmin > 0:
+                    smooth_values /= vmax - vmin
+                for i in range(len(xs) - 1):
+                    if ds[i] > 0.05 or ds[i + 1] > 0.05:
+                        pl.fill_between([xs[i], xs[i + 1]], [pos + ds[i], pos + ds[i + 1]],
+                                        [pos - ds[i], pos - ds[i + 1]], color=colors.red_blue_no_bounds(smooth_values[i]),
+                                        zorder=2)
+
+        else:
+            parts = pl.violinplot(shap_values[:, feature_order], range(len(feature_order)), points=200, vert=False,
+                                  widths=0.7,
+                                  showmeans=False, showextrema=False, showmedians=False)
+
+            for pc in parts['bodies']:
+                pc.set_facecolor(color)
+                pc.set_edgecolor('none')
+                pc.set_alpha(alpha)
+
+    elif plot_type == "layered_violin":  # courtesy of @kodonnell
+        num_x_points = 200
+        bins = np.linspace(0, features.shape[0], layered_violin_max_num_bins + 1).round(0).astype(
+            'int')  # the indices of the feature data corresponding to each bin
+        shap_min, shap_max = np.min(shap_values), np.max(shap_values)
+        x_points = np.linspace(shap_min, shap_max, num_x_points)
+
+        # loop through each feature and plot:
+        for pos, ind in enumerate(feature_order):
+            # decide how to handle: if #unique < layered_violin_max_num_bins then split by unique value, otherwise use bins/percentiles.
+            # to keep simpler code, in the case of uniques, we just adjust the bins to align with the unique counts.
+            feature = features[:, ind]
+            unique, counts = np.unique(feature, return_counts=True)
+            if unique.shape[0] <= layered_violin_max_num_bins:
+                order = np.argsort(unique)
+                thesebins = np.cumsum(counts[order])
+                thesebins = np.insert(thesebins, 0, 0)
+            else:
+                thesebins = bins
+            nbins = thesebins.shape[0] - 1
+            # order the feature data so we can apply percentiling
+            order = np.argsort(feature)
+            # x axis is located at y0 = pos, with pos being there for offset
+            # y0 = np.ones(num_x_points) * pos
+            # calculate kdes:
+            ys = np.zeros((nbins, num_x_points))
+            for i in range(nbins):
+                # get shap values in this bin:
+                shaps = shap_values[order[thesebins[i]:thesebins[i + 1]], ind]
+                # if there's only one element, then we can't
+                if shaps.shape[0] == 1:
+                    warnings.warn(
+                        "not enough data in bin #%d for feature %s, so it'll be ignored. Try increasing the number of records to plot."
+                        % (i, feature_names[ind]))
+                    # to ignore it, just set it to the previous y-values (so the area between them will be zero). Not ys is already 0, so there's
+                    # nothing to do if i == 0
+                    if i > 0:
+                        ys[i, :] = ys[i - 1, :]
+                    continue
+                # save kde of them: note that we add a tiny bit of gaussian noise to avoid singular matrix errors
+                ys[i, :] = gaussian_kde(shaps + np.random.normal(loc=0, scale=0.001, size=shaps.shape[0]))(x_points)
+                # scale it up so that the 'size' of each y represents the size of the bin. For continuous data this will
+                # do nothing, but when we've gone with the unique option, this will matter - e.g. if 99% are male and 1%
+                # female, we want the 1% to appear a lot smaller.
+                size = thesebins[i + 1] - thesebins[i]
+                bin_size_if_even = features.shape[0] / nbins
+                relative_bin_size = size / bin_size_if_even
+                ys[i, :] *= relative_bin_size
+            # now plot 'em. We don't plot the individual strips, as this can leave whitespace between them.
+            # instead, we plot the full kde, then remove outer strip and plot over it, etc., to ensure no
+            # whitespace
+            ys = np.cumsum(ys, axis=0)
+            width = 0.8
+            scale = ys.max() * 2 / width  # 2 is here as we plot both sides of x axis
+            for i in range(nbins - 1, -1, -1):
+                y = ys[i, :] / scale
+                c = pl.get_cmap(color)(i / (
+                        nbins - 1)) if color in pl.cm.datad else color  # if color is a cmap, use it, otherwise use a color
+                pl.fill_between(x_points, pos - y, pos + y, facecolor=c, edgecolor="face")
+        pl.xlim(shap_min, shap_max)
+
+    # draw the color bar
+    if color_bar and features is not None and plot_type != "bar" and \
+            (plot_type != "layered_violin" or color in pl.cm.datad):
+        import matplotlib.cm as cm
+        m = cm.ScalarMappable(cmap=cmap if plot_type != "layered_violin" else pl.get_cmap(color))
+        m.set_array([0, 1])
+        cb = pl.colorbar(m, ax=pl.gca(), ticks=[0, 1], aspect=80)
+        cb.set_ticklabels([labels['FEATURE_VALUE_LOW'], labels['FEATURE_VALUE_HIGH']])
+        cb.set_label(color_bar_label, size=12, labelpad=0)
+        cb.ax.tick_params(labelsize=11, length=0)
+        cb.set_alpha(1)
+        cb.outline.set_visible(False)
+        # bbox = cb.ax.get_window_extent().transformed(pl.gcf().dpi_scale_trans.inverted())
+        # cb.ax.set_aspect((bbox.height - 0.9) * 20)
+        # cb.draw_all()
+
+    pl.gca().xaxis.set_ticks_position('bottom')
+    pl.gca().yaxis.set_ticks_position('none')
+    pl.gca().spines['right'].set_visible(False)
+    pl.gca().spines['top'].set_visible(False)
+    pl.gca().spines['left'].set_visible(False)
+    pl.gca().tick_params(color=axis_color, labelcolor=axis_color)
+    pl.yticks(range(len(feature_order)), [feature_names[i] for i in feature_order], fontsize=13)
+    pl.gca().tick_params('y', length=20, width=0.5, which='major')
+    pl.gca().tick_params('x', labelsize=11)
+    pl.ylim(-1, len(feature_order))
+    pl.xlabel(labels['VALUE'], fontsize=13)
+
+    if show:
+        pl.show()
+
+def _trim_crange(values, nan_mask):
+    """Trim the color range, but prevent the color range from collapsing."""
+    # Get vmin and vmax as 5. and 95. percentiles
+    vmin = np.nanpercentile(values, 5)
+    vmax = np.nanpercentile(values, 95)
+    if vmin == vmax:  # if percentile range is equal, take 1./99. perc.
+        vmin = np.nanpercentile(values, 1)
+        vmax = np.nanpercentile(values, 99)
+        if vmin == vmax:  # if still equal, use min/max
+            vmin = np.min(values)
+            vmax = np.max(values)
+
+    if vmin > vmax: # fixes rare numerical precision issues
+        vmin = vmax
+
+    # Get color values depending on value range
+    cvals = values[np.invert(nan_mask)].astype(np.float64)
+    cvals_imp = cvals.copy()
+    cvals_imp[np.isnan(cvals)] = (vmin + vmax) / 2.0
+    cvals[cvals_imp > vmax] = vmax
+    cvals[cvals_imp < vmin] = vmin
+
+    return vmin, vmax, cvals
+
+
+def shorten_text(text, length_limit):
+    if len(text) > length_limit:
+        return text[:length_limit - 3] + "..."
+    else:
+        return text
diff --git a/lib/shap/plots/_waterfall.py b/lib/shap/plots/_waterfall.py
new file mode 100644
index 0000000000000000000000000000000000000000..7bf8fd4d665b81c945c634062116ea45e0eefa79
--- /dev/null
+++ b/lib/shap/plots/_waterfall.py
@@ -0,0 +1,624 @@
+import matplotlib
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+
+from .. import Explanation
+from ..utils import format_value
+from . import colors
+from ._labels import labels
+
+
+# TODO: If we make a JS version of this plot then we could let users click on a bar and then see the dependence
+# plot that is associated with that feature get overlaid on the plot...it would quickly allow users to answer
+# why a feature is pushing down or up. Perhaps the best way to do this would be with an ICE plot hanging off
+# of the bar...
+def waterfall(shap_values, max_display=10, show=True):
+    """Plots an explanation of a single prediction as a waterfall plot.
+
+    The SHAP value of a feature represents the impact of the evidence provided by that feature on the model's
+    output. The waterfall plot is designed to visually display how the SHAP values (evidence) of each feature
+    move the model output from our prior expectation under the background data distribution, to the final model
+    prediction given the evidence of all the features.
+
+    Features are sorted by the magnitude of their SHAP values with the smallest
+    magnitude features grouped together at the bottom of the plot when the number of
+    features in the models exceeds the ``max_display`` parameter.
+
+    Parameters
+    ----------
+    shap_values : Explanation
+        A one-dimensional :class:`.Explanation` object that contains the feature values and SHAP values to plot.
+
+    max_display : str
+        The maximum number of features to plot (default is 10).
+
+    show : bool
+        Whether ``matplotlib.pyplot.show()`` is called before returning.
+        Setting this to ``False`` allows the plot to be customized further after it
+        has been created, returning the current axis via plt.gca().
+
+    Examples
+    --------
+
+    See `waterfall plot examples <https://shap.readthedocs.io/en/latest/example_notebooks/api_examples/plots/waterfall.html>`_.
+
+    """
+
+    # Turn off interactive plot
+    if show is False:
+        plt.ioff()
+
+    # make sure the input is an Explanation object
+    if not isinstance(shap_values, Explanation):
+        emsg = (
+            "The waterfall plot requires an `Explanation` object as the "
+            "`shap_values` argument."
+        )
+        raise TypeError(emsg)
+
+    # make sure we only have a single explanation to plot
+    sv_shape = shap_values.shape
+    if len(sv_shape) != 1:
+        emsg = (
+            "The waterfall plot can currently only plot a single explanation, but a "
+            f"matrix of explanations (shape {sv_shape}) was passed! Perhaps try "
+            "`shap.plots.waterfall(shap_values[0])` or for multi-output models, "
+            "try `shap.plots.waterfall(shap_values[0, 0])`."
+        )
+        raise ValueError(emsg)
+
+    base_values = float(shap_values.base_values)
+    features = shap_values.display_data if shap_values.display_data is not None else shap_values.data
+    feature_names = shap_values.feature_names
+    lower_bounds = getattr(shap_values, "lower_bounds", None)
+    upper_bounds = getattr(shap_values, "upper_bounds", None)
+    values = shap_values.values
+
+    # unwrap pandas series
+    if isinstance(features, pd.Series):
+        if feature_names is None:
+            feature_names = list(features.index)
+        features = features.values
+
+    # fallback feature names
+    if feature_names is None:
+        feature_names = np.array([labels['FEATURE'] % str(i) for i in range(len(values))])
+
+    # init variables we use for tracking the plot locations
+    num_features = min(max_display, len(values))
+    row_height = 0.5
+    rng = range(num_features - 1, -1, -1)
+    order = np.argsort(-np.abs(values))
+    pos_lefts = []
+    pos_inds = []
+    pos_widths = []
+    pos_low = []
+    pos_high = []
+    neg_lefts = []
+    neg_inds = []
+    neg_widths = []
+    neg_low = []
+    neg_high = []
+    loc = base_values + values.sum()
+    yticklabels = ["" for _ in range(num_features + 1)]
+
+    # size the plot based on how many features we are plotting
+    plt.gcf().set_size_inches(8, num_features * row_height + 1.5)
+
+    # see how many individual (vs. grouped at the end) features we are plotting
+    if num_features == len(values):
+        num_individual = num_features
+    else:
+        num_individual = num_features - 1
+
+    # compute the locations of the individual features and plot the dashed connecting lines
+    for i in range(num_individual):
+        sval = values[order[i]]
+        loc -= sval
+        if sval >= 0:
+            pos_inds.append(rng[i])
+            pos_widths.append(sval)
+            if lower_bounds is not None:
+                pos_low.append(lower_bounds[order[i]])
+                pos_high.append(upper_bounds[order[i]])
+            pos_lefts.append(loc)
+        else:
+            neg_inds.append(rng[i])
+            neg_widths.append(sval)
+            if lower_bounds is not None:
+                neg_low.append(lower_bounds[order[i]])
+                neg_high.append(upper_bounds[order[i]])
+            neg_lefts.append(loc)
+        if num_individual != num_features or i + 4 < num_individual:
+            plt.plot([loc, loc], [rng[i] - 1 - 0.4, rng[i] + 0.4],
+                     color="#bbbbbb", linestyle="--", linewidth=0.5, zorder=-1)
+        if features is None:
+            yticklabels[rng[i]] = feature_names[order[i]]
+        else:
+            if np.issubdtype(type(features[order[i]]), np.number):
+                yticklabels[rng[i]] = format_value(float(features[order[i]]), "%0.03f") + " = " + feature_names[order[i]]
+            else:
+                yticklabels[rng[i]] = str(features[order[i]]) + " = " + str(feature_names[order[i]])
+
+    # add a last grouped feature to represent the impact of all the features we didn't show
+    if num_features < len(values):
+        yticklabels[0] = "%d other features" % (len(values) - num_features + 1)
+        remaining_impact = base_values - loc
+        if remaining_impact < 0:
+            pos_inds.append(0)
+            pos_widths.append(-remaining_impact)
+            pos_lefts.append(loc + remaining_impact)
+        else:
+            neg_inds.append(0)
+            neg_widths.append(-remaining_impact)
+            neg_lefts.append(loc + remaining_impact)
+
+    points = pos_lefts + list(np.array(pos_lefts) + np.array(pos_widths)) + neg_lefts + \
+        list(np.array(neg_lefts) + np.array(neg_widths))
+    dataw = np.max(points) - np.min(points)
+
+    # draw invisible bars just for sizing the axes
+    label_padding = np.array([0.1*dataw if w < 1 else 0 for w in pos_widths])
+    plt.barh(pos_inds, np.array(pos_widths) + label_padding + 0.02*dataw,
+             left=np.array(pos_lefts) - 0.01*dataw, color=colors.red_rgb, alpha=0)
+    label_padding = np.array([-0.1*dataw if -w < 1 else 0 for w in neg_widths])
+    plt.barh(neg_inds, np.array(neg_widths) + label_padding - 0.02*dataw,
+             left=np.array(neg_lefts) + 0.01*dataw, color=colors.blue_rgb, alpha=0)
+
+    # define variable we need for plotting the arrows
+    head_length = 0.08
+    bar_width = 0.8
+    xlen = plt.xlim()[1] - plt.xlim()[0]
+    fig = plt.gcf()
+    ax = plt.gca()
+    bbox = ax.get_window_extent().transformed(fig.dpi_scale_trans.inverted())
+    width = bbox.width
+    bbox_to_xscale = xlen/width
+    hl_scaled = bbox_to_xscale * head_length
+    renderer = fig.canvas.get_renderer()
+
+    # draw the positive arrows
+    for i in range(len(pos_inds)):
+        dist = pos_widths[i]
+        arrow_obj = plt.arrow(
+            pos_lefts[i], pos_inds[i], max(dist-hl_scaled, 0.000001), 0,
+            head_length=min(dist, hl_scaled),
+            color=colors.red_rgb, width=bar_width,
+            head_width=bar_width,
+        )
+
+        if pos_low is not None and i < len(pos_low):
+            plt.errorbar(
+                pos_lefts[i] + pos_widths[i], pos_inds[i],
+                xerr=np.array([[pos_widths[i] - pos_low[i]], [pos_high[i] - pos_widths[i]]]),
+                ecolor=colors.light_red_rgb,
+            )
+
+        txt_obj = plt.text(
+            pos_lefts[i] + 0.5*dist, pos_inds[i], format_value(pos_widths[i], '%+0.02f'),
+            horizontalalignment='center', verticalalignment='center', color="white",
+            fontsize=12,
+        )
+        text_bbox = txt_obj.get_window_extent(renderer=renderer)
+        arrow_bbox = arrow_obj.get_window_extent(renderer=renderer)
+
+        # if the text overflows the arrow then draw it after the arrow
+        if text_bbox.width > arrow_bbox.width:
+            txt_obj.remove()
+
+            txt_obj = plt.text(
+                pos_lefts[i] + (5/72)*bbox_to_xscale + dist, pos_inds[i], format_value(pos_widths[i], '%+0.02f'),
+                horizontalalignment='left', verticalalignment='center', color=colors.red_rgb,
+                fontsize=12,
+            )
+
+    # draw the negative arrows
+    for i in range(len(neg_inds)):
+        dist = neg_widths[i]
+
+        arrow_obj = plt.arrow(
+            neg_lefts[i], neg_inds[i], -max(-dist-hl_scaled, 0.000001), 0,
+            head_length=min(-dist, hl_scaled),
+            color=colors.blue_rgb, width=bar_width,
+            head_width=bar_width,
+        )
+
+        if neg_low is not None and i < len(neg_low):
+            plt.errorbar(
+                neg_lefts[i] + neg_widths[i], neg_inds[i],
+                xerr=np.array([[neg_widths[i] - neg_low[i]], [neg_high[i] - neg_widths[i]]]),
+                ecolor=colors.light_blue_rgb,
+            )
+
+        txt_obj = plt.text(
+            neg_lefts[i] + 0.5*dist, neg_inds[i], format_value(neg_widths[i], '%+0.02f'),
+            horizontalalignment='center', verticalalignment='center', color="white",
+            fontsize=12,
+        )
+        text_bbox = txt_obj.get_window_extent(renderer=renderer)
+        arrow_bbox = arrow_obj.get_window_extent(renderer=renderer)
+
+        # if the text overflows the arrow then draw it after the arrow
+        if text_bbox.width > arrow_bbox.width:
+            txt_obj.remove()
+
+            txt_obj = plt.text(
+                neg_lefts[i] - (5/72)*bbox_to_xscale + dist, neg_inds[i], format_value(neg_widths[i], '%+0.02f'),
+                horizontalalignment='right', verticalalignment='center', color=colors.blue_rgb,
+                fontsize=12,
+            )
+
+    # draw the y-ticks twice, once in gray and then again with just the feature names in black
+    # The 1e-8 is so matplotlib 3.3 doesn't try and collapse the ticks
+    ytick_pos = list(range(num_features)) + list(np.arange(num_features)+1e-8)
+    plt.yticks(ytick_pos, yticklabels[:-1] + [label.split('=')[-1] for label in yticklabels[:-1]], fontsize=13)
+
+    # put horizontal lines for each feature row
+    for i in range(num_features):
+        plt.axhline(i, color="#cccccc", lw=0.5, dashes=(1, 5), zorder=-1)
+
+    # mark the prior expected value and the model prediction
+    plt.axvline(base_values, 0, 1/num_features, color="#bbbbbb", linestyle="--", linewidth=0.5, zorder=-1)
+    fx = base_values + values.sum()
+    plt.axvline(fx, 0, 1, color="#bbbbbb", linestyle="--", linewidth=0.5, zorder=-1)
+
+    # clean up the main axis
+    plt.gca().xaxis.set_ticks_position('bottom')
+    plt.gca().yaxis.set_ticks_position('none')
+    plt.gca().spines['right'].set_visible(False)
+    plt.gca().spines['top'].set_visible(False)
+    plt.gca().spines['left'].set_visible(False)
+    ax.tick_params(labelsize=13)
+    #plt.xlabel("\nModel output", fontsize=12)
+
+    # draw the E[f(X)] tick mark
+    xmin, xmax = ax.get_xlim()
+    ax2 = ax.twiny()
+    ax2.set_xlim(xmin, xmax)
+    ax2.set_xticks([base_values, base_values+1e-8])  # The 1e-8 is so matplotlib 3.3 doesn't try and collapse the ticks
+    ax2.set_xticklabels(["\n$E[f(X)]$", "\n$ = "+format_value(base_values, "%0.03f")+"$"], fontsize=12, ha="left")
+    ax2.spines['right'].set_visible(False)
+    ax2.spines['top'].set_visible(False)
+    ax2.spines['left'].set_visible(False)
+
+    # draw the f(x) tick mark
+    ax3 = ax2.twiny()
+    ax3.set_xlim(xmin, xmax)
+    # The 1e-8 is so matplotlib 3.3 doesn't try and collapse the ticks
+    ax3.set_xticks([base_values + values.sum(), base_values + values.sum() + 1e-8])
+    ax3.set_xticklabels(["$f(x)$", "$ = "+format_value(fx, "%0.03f")+"$"], fontsize=12, ha="left")
+    tick_labels = ax3.xaxis.get_majorticklabels()
+    tick_labels[0].set_transform(tick_labels[0].get_transform(
+    ) + matplotlib.transforms.ScaledTranslation(-10/72., 0, fig.dpi_scale_trans))
+    tick_labels[1].set_transform(tick_labels[1].get_transform(
+    ) + matplotlib.transforms.ScaledTranslation(12/72., 0, fig.dpi_scale_trans))
+    tick_labels[1].set_color("#999999")
+    ax3.spines['right'].set_visible(False)
+    ax3.spines['top'].set_visible(False)
+    ax3.spines['left'].set_visible(False)
+
+    # adjust the position of the E[f(X)] = x.xx label
+    tick_labels = ax2.xaxis.get_majorticklabels()
+    tick_labels[0].set_transform(tick_labels[0].get_transform(
+    ) + matplotlib.transforms.ScaledTranslation(-20/72., 0, fig.dpi_scale_trans))
+    tick_labels[1].set_transform(tick_labels[1].get_transform(
+    ) + matplotlib.transforms.ScaledTranslation(22/72., -1/72., fig.dpi_scale_trans))
+
+    tick_labels[1].set_color("#999999")
+
+    # color the y tick labels that have the feature values as gray
+    # (these fall behind the black ones with just the feature name)
+    tick_labels = ax.yaxis.get_majorticklabels()
+    for i in range(num_features):
+        tick_labels[i].set_color("#999999")
+
+    if show:
+        plt.show()
+    else:
+        return plt.gca()
+
+
+def waterfall_legacy(expected_value, shap_values=None, features=None, feature_names=None, max_display=10, show=True):
+    """ Plots an explanation of a single prediction as a waterfall plot.
+
+    The SHAP value of a feature represents the impact of the evidence provided by that feature on the model's
+    output. The waterfall plot is designed to visually display how the SHAP values (evidence) of each feature
+    move the model output from our prior expectation under the background data distribution, to the final model
+    prediction given the evidence of all the features. Features are sorted by the magnitude of their SHAP values
+    with the smallest magnitude features grouped together at the bottom of the plot when the number of features
+    in the models exceeds the max_display parameter.
+
+    Parameters
+    ----------
+    expected_value : float
+        This is the reference value that the feature contributions start from. For SHAP values it should
+        be the value of explainer.expected_value.
+
+    shap_values : numpy.array
+        One dimensional array of SHAP values.
+
+    features : numpy.array
+        One dimensional array of feature values. This provides the values of all the
+        features, and should be the same shape as the shap_values argument.
+
+    feature_names : list
+        List of feature names (# features).
+
+    max_display : str
+        The maximum number of features to plot.
+
+    show : bool
+        Whether matplotlib.pyplot.show() is called before returning. Setting this to False allows the plot
+        to be customized further after it has been created.
+    """
+
+    # Turn off interactive plot when not calling plt.show
+    if show is False:
+        plt.ioff()
+
+    # support passing an explanation object
+    upper_bounds = None
+    lower_bounds = None
+    if str(type(expected_value)).endswith("Explanation'>"):
+        shap_exp = expected_value
+        expected_value = shap_exp.expected_value
+        shap_values = shap_exp.values
+        features = shap_exp.data
+        feature_names = shap_exp.feature_names
+        lower_bounds = getattr(shap_exp, "lower_bounds", None)
+        upper_bounds = getattr(shap_exp, "upper_bounds", None)
+
+    # make sure we only have a single output to explain
+    if (isinstance(expected_value, np.ndarray) and len(expected_value) > 0) or isinstance(expected_value, list):
+        raise Exception("waterfall_plot requires a scalar expected_value of the model output as the first "
+                        "parameter, but you have passed an array as the first parameter! "
+                        "Try shap.waterfall_plot(explainer.expected_value[0], shap_values[0], X[0]) or "
+                        "for multi-output models try "
+                        "shap.waterfall_plot(explainer.expected_value[0], shap_values[0][0], X[0]).")
+
+    # make sure we only have a single explanation to plot
+    if len(shap_values.shape) == 2:
+        raise Exception(
+            "The waterfall_plot can currently only plot a single explanation but a matrix of explanations was passed!")
+
+    # unwrap pandas series
+    if isinstance(features, pd.Series):
+        if feature_names is None:
+            feature_names = list(features.index)
+        features = features.values
+
+    # fallback feature names
+    if feature_names is None:
+        feature_names = np.array([labels['FEATURE'] % str(i) for i in range(len(shap_values))])
+
+    # init variables we use for tracking the plot locations
+    num_features = min(max_display, len(shap_values))
+    row_height = 0.5
+    rng = range(num_features - 1, -1, -1)
+    order = np.argsort(-np.abs(shap_values))
+    pos_lefts = []
+    pos_inds = []
+    pos_widths = []
+    pos_low = []
+    pos_high = []
+    neg_lefts = []
+    neg_inds = []
+    neg_widths = []
+    neg_low = []
+    neg_high = []
+    loc = expected_value + shap_values.sum()
+    yticklabels = ["" for i in range(num_features + 1)]
+
+    # size the plot based on how many features we are plotting
+    plt.gcf().set_size_inches(8, num_features * row_height + 1.5)
+
+    # see how many individual (vs. grouped at the end) features we are plotting
+    if num_features == len(shap_values):
+        num_individual = num_features
+    else:
+        num_individual = num_features - 1
+
+    # compute the locations of the individual features and plot the dashed connecting lines
+    for i in range(num_individual):
+        sval = shap_values[order[i]]
+        loc -= sval
+        if sval >= 0:
+            pos_inds.append(rng[i])
+            pos_widths.append(sval)
+            if lower_bounds is not None:
+                pos_low.append(lower_bounds[order[i]])
+                pos_high.append(upper_bounds[order[i]])
+            pos_lefts.append(loc)
+        else:
+            neg_inds.append(rng[i])
+            neg_widths.append(sval)
+            if lower_bounds is not None:
+                neg_low.append(lower_bounds[order[i]])
+                neg_high.append(upper_bounds[order[i]])
+            neg_lefts.append(loc)
+        if num_individual != num_features or i + 4 < num_individual:
+            plt.plot([loc, loc], [rng[i] - 1 - 0.4, rng[i] + 0.4],
+                     color="#bbbbbb", linestyle="--", linewidth=0.5, zorder=-1)
+        if features is None:
+            yticklabels[rng[i]] = feature_names[order[i]]
+        else:
+            yticklabels[rng[i]] = format_value(features[order[i]], "%0.03f") + " = " + feature_names[order[i]]
+
+    # add a last grouped feature to represent the impact of all the features we didn't show
+    if num_features < len(shap_values):
+        yticklabels[0] = "%d other features" % (len(shap_values) - num_features + 1)
+        remaining_impact = expected_value - loc
+        if remaining_impact < 0:
+            pos_inds.append(0)
+            pos_widths.append(-remaining_impact)
+            pos_lefts.append(loc + remaining_impact)
+        else:
+            neg_inds.append(0)
+            neg_widths.append(-remaining_impact)
+            neg_lefts.append(loc + remaining_impact)
+
+    points = pos_lefts + list(np.array(pos_lefts) + np.array(pos_widths)) + neg_lefts + \
+        list(np.array(neg_lefts) + np.array(neg_widths))
+    dataw = np.max(points) - np.min(points)
+
+    # draw invisible bars just for sizing the axes
+    label_padding = np.array([0.1*dataw if w < 1 else 0 for w in pos_widths])
+    plt.barh(pos_inds, np.array(pos_widths) + label_padding + 0.02*dataw,
+             left=np.array(pos_lefts) - 0.01*dataw, color=colors.red_rgb, alpha=0)
+    label_padding = np.array([-0.1*dataw if -w < 1 else 0 for w in neg_widths])
+    plt.barh(neg_inds, np.array(neg_widths) + label_padding - 0.02*dataw,
+             left=np.array(neg_lefts) + 0.01*dataw, color=colors.blue_rgb, alpha=0)
+
+    # define variable we need for plotting the arrows
+    head_length = 0.08
+    bar_width = 0.8
+    xlen = plt.xlim()[1] - plt.xlim()[0]
+    fig = plt.gcf()
+    ax = plt.gca()
+    bbox = ax.get_window_extent().transformed(fig.dpi_scale_trans.inverted())
+    width = bbox.width
+    bbox_to_xscale = xlen/width
+    hl_scaled = bbox_to_xscale * head_length
+    renderer = fig.canvas.get_renderer()
+
+    # draw the positive arrows
+    for i in range(len(pos_inds)):
+        dist = pos_widths[i]
+        arrow_obj = plt.arrow(
+            pos_lefts[i], pos_inds[i], max(dist-hl_scaled, 0.000001), 0,
+            head_length=min(dist, hl_scaled),
+            color=colors.red_rgb, width=bar_width,
+            head_width=bar_width
+        )
+
+        if pos_low is not None and i < len(pos_low):
+            plt.errorbar(
+                pos_lefts[i] + pos_widths[i], pos_inds[i],
+                xerr=np.array([[pos_widths[i] - pos_low[i]], [pos_high[i] - pos_widths[i]]]),
+                ecolor=colors.light_red_rgb
+            )
+
+        txt_obj = plt.text(
+            pos_lefts[i] + 0.5*dist, pos_inds[i], format_value(pos_widths[i], '%+0.02f'),
+            horizontalalignment='center', verticalalignment='center', color="white",
+            fontsize=12
+        )
+        text_bbox = txt_obj.get_window_extent(renderer=renderer)
+        arrow_bbox = arrow_obj.get_window_extent(renderer=renderer)
+
+        # if the text overflows the arrow then draw it after the arrow
+        if text_bbox.width > arrow_bbox.width:
+            txt_obj.remove()
+
+            txt_obj = plt.text(
+                pos_lefts[i] + (5/72)*bbox_to_xscale + dist, pos_inds[i], format_value(pos_widths[i], '%+0.02f'),
+                horizontalalignment='left', verticalalignment='center', color=colors.red_rgb,
+                fontsize=12
+            )
+
+    # draw the negative arrows
+    for i in range(len(neg_inds)):
+        dist = neg_widths[i]
+
+        arrow_obj = plt.arrow(
+            neg_lefts[i], neg_inds[i], -max(-dist-hl_scaled, 0.000001), 0,
+            head_length=min(-dist, hl_scaled),
+            color=colors.blue_rgb, width=bar_width,
+            head_width=bar_width
+        )
+
+        if neg_low is not None and i < len(neg_low):
+            plt.errorbar(
+                neg_lefts[i] + neg_widths[i], neg_inds[i],
+                xerr=np.array([[neg_widths[i] - neg_low[i]], [neg_high[i] - neg_widths[i]]]),
+                ecolor=colors.light_blue_rgb
+            )
+
+        txt_obj = plt.text(
+            neg_lefts[i] + 0.5*dist, neg_inds[i], format_value(neg_widths[i], '%+0.02f'),
+            horizontalalignment='center', verticalalignment='center', color="white",
+            fontsize=12
+        )
+        text_bbox = txt_obj.get_window_extent(renderer=renderer)
+        arrow_bbox = arrow_obj.get_window_extent(renderer=renderer)
+
+        # if the text overflows the arrow then draw it after the arrow
+        if text_bbox.width > arrow_bbox.width:
+            txt_obj.remove()
+
+            txt_obj = plt.text(
+                neg_lefts[i] - (5/72)*bbox_to_xscale + dist, neg_inds[i], format_value(neg_widths[i], '%+0.02f'),
+                horizontalalignment='right', verticalalignment='center', color=colors.blue_rgb,
+                fontsize=12
+            )
+
+    # draw the y-ticks twice, once in gray and then again with just the feature names in black
+    plt.yticks(list(range(num_features))*2, yticklabels[:-1] +
+               [label.split('=')[-1] for label in yticklabels[:-1]], fontsize=13)
+
+    # put horizontal lines for each feature row
+    for i in range(num_features):
+        plt.axhline(i, color="#cccccc", lw=0.5, dashes=(1, 5), zorder=-1)
+
+    # mark the prior expected value and the model prediction
+    plt.axvline(expected_value, 0, 1/num_features, color="#bbbbbb", linestyle="--", linewidth=0.5, zorder=-1)
+    fx = expected_value + shap_values.sum()
+    plt.axvline(fx, 0, 1, color="#bbbbbb", linestyle="--", linewidth=0.5, zorder=-1)
+
+    # clean up the main axis
+    plt.gca().xaxis.set_ticks_position('bottom')
+    plt.gca().yaxis.set_ticks_position('none')
+    plt.gca().spines['right'].set_visible(False)
+    plt.gca().spines['top'].set_visible(False)
+    plt.gca().spines['left'].set_visible(False)
+    ax.tick_params(labelsize=13)
+    #plt.xlabel("\nModel output", fontsize=12)
+
+    # draw the E[f(X)] tick mark
+    xmin, xmax = ax.get_xlim()
+    ax2 = ax.twiny()
+    ax2.set_xlim(xmin, xmax)
+    ax2.set_xticks([expected_value, expected_value+1e-8])  # The 1e-8 is so matplotlib 3.3 doesn't try and collapse the ticks
+    ax2.set_xticklabels(["\n$E[f(X)]$", "\n$ = "+format_value(expected_value, "%0.03f")+"$"], fontsize=12, ha="left")
+    ax2.spines['right'].set_visible(False)
+    ax2.spines['top'].set_visible(False)
+    ax2.spines['left'].set_visible(False)
+
+    # draw the f(x) tick mark
+    ax3 = ax2.twiny()
+    ax3.set_xlim(xmin, xmax)
+    # The 1e-8 is so matplotlib 3.3 doesn't try and collapse the ticks
+    ax3.set_xticks([
+        expected_value + shap_values.sum(),
+        expected_value + shap_values.sum() + 1e-8,
+    ])
+    ax3.set_xticklabels(["$f(x)$", "$ = "+format_value(fx, "%0.03f")+"$"], fontsize=12, ha="left")
+    tick_labels = ax3.xaxis.get_majorticklabels()
+    tick_labels[0].set_transform(tick_labels[0].get_transform(
+    ) + matplotlib.transforms.ScaledTranslation(-10/72., 0, fig.dpi_scale_trans))
+    tick_labels[1].set_transform(tick_labels[1].get_transform(
+    ) + matplotlib.transforms.ScaledTranslation(12/72., 0, fig.dpi_scale_trans))
+    tick_labels[1].set_color("#999999")
+    ax3.spines['right'].set_visible(False)
+    ax3.spines['top'].set_visible(False)
+    ax3.spines['left'].set_visible(False)
+
+    # adjust the position of the E[f(X)] = x.xx label
+    tick_labels = ax2.xaxis.get_majorticklabels()
+    tick_labels[0].set_transform(tick_labels[0].get_transform(
+    ) + matplotlib.transforms.ScaledTranslation(-20/72., 0, fig.dpi_scale_trans))
+    tick_labels[1].set_transform(tick_labels[1].get_transform(
+    ) + matplotlib.transforms.ScaledTranslation(22/72., -1/72., fig.dpi_scale_trans))
+    tick_labels[1].set_color("#999999")
+
+    # color the y tick labels that have the feature values as gray
+    # (these fall behind the black ones with just the feature name)
+    tick_labels = ax.yaxis.get_majorticklabels()
+    for i in range(num_features):
+        tick_labels[i].set_color("#999999")
+
+    if show:
+        plt.show()
+    else:
+        return plt.gcf()
diff --git a/lib/shap/plots/colors/__init__.py b/lib/shap/plots/colors/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..0eae7743f3edd430165a807551b0baa25221c75e
--- /dev/null
+++ b/lib/shap/plots/colors/__init__.py
@@ -0,0 +1,31 @@
+from ._colors import (
+    blue_rgb,
+    gray_rgb,
+    light_blue_rgb,
+    light_red_rgb,
+    red_blue,
+    red_blue_circle,
+    red_blue_no_bounds,
+    red_blue_transparent,
+    red_rgb,
+    red_transparent_blue,
+    red_white_blue,
+    transparent_blue,
+    transparent_red,
+)
+
+__all__ = [
+    "blue_rgb",
+    "gray_rgb",
+    "light_blue_rgb",
+    "light_red_rgb",
+    "red_blue",
+    "red_blue_circle",
+    "red_blue_no_bounds",
+    "red_blue_transparent",
+    "red_rgb",
+    "red_transparent_blue",
+    "red_white_blue",
+    "transparent_blue",
+    "transparent_red",
+]
diff --git a/lib/shap/plots/colors/_colorconv.py b/lib/shap/plots/colors/_colorconv.py
new file mode 100644
index 0000000000000000000000000000000000000000..a50e525663bfe1bf6d5bcb1372bb9e704522a695
--- /dev/null
+++ b/lib/shap/plots/colors/_colorconv.py
@@ -0,0 +1,993 @@
+# This is a small chunk of code from the skimage package. It is reproduced
+# here because all we need is a couple color conversion routines, and adding
+# all of skimage as dependency is really heavy.
+
+# Disable linting on vendored code
+# ruff: noqa
+
+# Copyright (C) 2019, the scikit-image team
+# All rights reserved.
+
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+
+#  1. Redistributions of source code must retain the above copyright
+#     notice, this list of conditions and the following disclaimer.
+#  2. Redistributions in binary form must reproduce the above copyright
+#     notice, this list of conditions and the following disclaimer in
+#     the documentation and/or other materials provided with the
+#     distribution.
+#  3. Neither the name of skimage nor the names of its contributors may be
+#     used to endorse or promote products derived from this software without
+#     specific prior written permission.
+
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+# IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+# skimage/_shared/version_requirements.py:_check_version
+
+#     Copyright (c) 2013 The IPython Development Team
+#     All rights reserved.
+
+#     Redistribution and use in source and binary forms, with or without
+#     modification, are permitted provided that the following conditions are met:
+
+#     * Redistributions of source code must retain the above copyright notice, this
+#       list of conditions and the following disclaimer.
+
+#     * Redistributions in binary form must reproduce the above copyright notice,
+#       this list of conditions and the following disclaimer in the documentation
+#       and/or other materials provided with the distribution.
+
+#     * Neither the name of the copyright holder nor the names of its
+#       contributors may be used to endorse or promote products derived from
+#       this software without specific prior written permission.
+
+#     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+#     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+#     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+#     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+#     FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+#     DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+#     SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+#     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+#     OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#     OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# skimage/_shared/version_requirements.py:is_installed:
+
+#     Original Copyright (C) 2009-2011 Pierre Raybaut
+
+#     Permission is hereby granted, free of charge, to any person obtaining
+#     a copy of this software and associated documentation files (the
+#     "Software"), to deal in the Software without restriction, including
+#     without limitation the rights to use, copy, modify, merge, publish,
+#     distribute, sublicense, and/or sell copies of the Software, and to
+#     permit persons to whom the Software is furnished to do so, subject to
+#     the following conditions:
+
+#     The above copyright notice and this permission notice shall be
+#     included in all copies or substantial portions of the Software.
+
+#     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+#     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+#     MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+#     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+#     LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+#     OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+#     WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+# skimage/external/tifffile:
+
+#     Copyright (c) 2008-2014, Christoph Gohlke
+#     Copyright (c) 2008-2014, The Regents of the University of California
+#     Produced at the Laboratory for Fluorescence Dynamics
+#     All rights reserved.
+
+#     Redistribution and use in source and binary forms, with or without
+#     modification, are permitted provided that the following conditions are met:
+
+#     * Redistributions of source code must retain the above copyright notice,
+#       this list of conditions and the following disclaimer.
+
+#     * Redistributions in binary form must reproduce the above copyright notice,
+#       this list of conditions and the following disclaimer in the documentation
+#       and/or other materials provided with the distribution.
+
+#     * Neither the name of the copyright holder nor the names of its
+#       contributors may be used to endorse or promote products derived from
+#       this software without specific prior written permission.
+
+#     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+#     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+#     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+#     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+#     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+#     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+#     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+#     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+#     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+#     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+#     POSSIBILITY OF SUCH DAMAGE.
+
+
+
+import numpy as np
+from scipy import linalg
+from warnings import warn
+
+def rgb2xyz(rgb):
+    """RGB to XYZ color space conversion.
+    Parameters
+    ----------
+    rgb : (..., 3) array_like
+        The image in RGB format. Final dimension denotes channels.
+    Returns
+    -------
+    out : (..., 3) ndarray
+        The image in XYZ format. Same dimensions as input.
+    Raises
+    ------
+    ValueError
+        If `rgb` is not at least 2-D with shape (..., 3).
+    Notes
+    -----
+    The CIE XYZ color space is derived from the CIE RGB color space. Note
+    however that this function converts from sRGB.
+    References
+    ----------
+    .. [1] https://en.wikipedia.org/wiki/CIE_1931_color_space
+    Examples
+    --------
+    >>> from skimage import data
+    >>> img = data.astronaut()
+    >>> img_xyz = rgb2xyz(img)
+    """
+    # Follow the algorithm from http://www.easyrgb.com/index.php
+    # except we don't multiply/divide by 100 in the conversion
+    arr = _prepare_colorarray(rgb).copy()
+    mask = arr > 0.04045
+    arr[mask] = np.power((arr[mask] + 0.055) / 1.055, 2.4)
+    arr[~mask] /= 12.92
+    return arr @ xyz_from_rgb.T.astype(arr.dtype)
+
+def lab2xyz(lab, illuminant="D65", observer="2"):
+    """CIE-LAB to XYZcolor space conversion.
+    Parameters
+    ----------
+    lab : array_like
+        The image in lab format, in a 3-D array of shape ``(.., .., 3)``.
+    illuminant : {"A", "D50", "D55", "D65", "D75", "E"}, optional
+        The name of the illuminant (the function is NOT case sensitive).
+    observer : {"2", "10"}, optional
+        The aperture angle of the observer.
+    Returns
+    -------
+    out : ndarray
+        The image in XYZ format, in a 3-D array of shape ``(.., .., 3)``.
+    Raises
+    ------
+    ValueError
+        If `lab` is not a 3-D array of shape ``(.., .., 3)``.
+    ValueError
+        If either the illuminant or the observer angle are not supported or
+        unknown.
+    UserWarning
+        If any of the pixels are invalid (Z < 0).
+    Notes
+    -----
+    By default Observer= 2A, Illuminant= D65. CIE XYZ tristimulus values x_ref
+    = 95.047, y_ref = 100., z_ref = 108.883. See function 'get_xyz_coords' for
+    a list of supported illuminants.
+    References
+    ----------
+    .. [1] http://www.easyrgb.com/index.php?X=MATH&H=07#text7
+    .. [2] https://en.wikipedia.org/wiki/Lab_color_space
+    """
+
+    arr = _prepare_colorarray(lab).copy()
+
+    L, a, b = arr[:, :, 0], arr[:, :, 1], arr[:, :, 2]
+    y = (L + 16.) / 116.
+    x = (a / 500.) + y
+    z = y - (b / 200.)
+
+    if np.any(z < 0):
+        invalid = np.nonzero(z < 0)
+        warn('Color data out of range: Z < 0 in %s pixels' % invalid[0].size,
+             stacklevel=2)
+        z[invalid] = 0
+
+    out = np.dstack([x, y, z])
+
+    mask = out > 0.2068966
+    out[mask] = np.power(out[mask], 3.)
+    out[~mask] = (out[~mask] - 16.0 / 116.) / 7.787
+
+    # rescale to the reference white (illuminant)
+    xyz_ref_white = get_xyz_coords(illuminant, observer)
+    out *= xyz_ref_white
+    return out
+
+def xyz2lab(xyz, illuminant="D65", observer="2"):
+    """XYZ to CIE-LAB color space conversion.
+    Parameters
+    ----------
+    xyz : array_like
+        The image in XYZ format, in a 3- or 4-D array of shape
+        ``(.., ..,[ ..,] 3)``.
+    illuminant : {"A", "D50", "D55", "D65", "D75", "E"}, optional
+        The name of the illuminant (the function is NOT case sensitive).
+    observer : {"2", "10"}, optional
+        The aperture angle of the observer.
+    Returns
+    -------
+    out : ndarray
+        The image in CIE-LAB format, in a 3- or 4-D array of shape
+        ``(.., ..,[ ..,] 3)``.
+    Raises
+    ------
+    ValueError
+        If `xyz` is not a 3-D array of shape ``(.., ..,[ ..,] 3)``.
+    ValueError
+        If either the illuminant or the observer angle is unsupported or
+        unknown.
+    Notes
+    -----
+    By default Observer= 2A, Illuminant= D65. CIE XYZ tristimulus values
+    x_ref=95.047, y_ref=100., z_ref=108.883. See function `get_xyz_coords` for
+    a list of supported illuminants.
+    References
+    ----------
+    .. [1] http://www.easyrgb.com/index.php?X=MATH&H=07#text7
+    .. [2] https://en.wikipedia.org/wiki/Lab_color_space
+    Examples
+    --------
+    >>> from skimage import data
+    >>> from skimage.color import rgb2xyz, xyz2lab
+    >>> img = data.astronaut()
+    >>> img_xyz = rgb2xyz(img)
+    >>> img_lab = xyz2lab(img_xyz)
+    """
+    arr = _prepare_colorarray(xyz)
+
+    xyz_ref_white = get_xyz_coords(illuminant, observer)
+
+    # scale by CIE XYZ tristimulus values of the reference white point
+    arr = arr / xyz_ref_white
+
+    # Nonlinear distortion and linear transformation
+    mask = arr > 0.008856
+    arr[mask] = np.cbrt(arr[mask])
+    arr[~mask] = 7.787 * arr[~mask] + 16. / 116.
+
+    x, y, z = arr[..., 0], arr[..., 1], arr[..., 2]
+
+    # Vector scaling
+    L = (116. * y) - 16.
+    a = 500.0 * (x - y)
+    b = 200.0 * (y - z)
+
+    return np.concatenate([x[..., np.newaxis] for x in [L, a, b]], axis=-1)
+
+def lab2rgb(lab, illuminant="D65", observer="2"):
+    """Lab to RGB color space conversion.
+    Parameters
+    ----------
+    lab : array_like
+        The image in Lab format, in a 3-D array of shape ``(.., .., 3)``.
+    illuminant : {"A", "D50", "D55", "D65", "D75", "E"}, optional
+        The name of the illuminant (the function is NOT case sensitive).
+    observer : {"2", "10"}, optional
+        The aperture angle of the observer.
+    Returns
+    -------
+    out : ndarray
+        The image in RGB format, in a 3-D array of shape ``(.., .., 3)``.
+    Raises
+    ------
+    ValueError
+        If `lab` is not a 3-D array of shape ``(.., .., 3)``.
+    References
+    ----------
+    .. [1] https://en.wikipedia.org/wiki/Standard_illuminant
+    Notes
+    -----
+    This function uses lab2xyz and xyz2rgb.
+    By default Observer= 2A, Illuminant= D65. CIE XYZ tristimulus values
+    x_ref=95.047, y_ref=100., z_ref=108.883. See function `get_xyz_coords` for
+    a list of supported illuminants.
+    """
+    return xyz2rgb(lab2xyz(lab, illuminant, observer))
+
+
+def rgb2lab(rgb, illuminant="D65", observer="2"):
+    """RGB to lab color space conversion.
+    Parameters
+    ----------
+    rgb : array_like
+        The image in RGB format, in a 3- or 4-D array of shape
+        ``(.., ..,[ ..,] 3)``.
+    illuminant : {"A", "D50", "D55", "D65", "D75", "E"}, optional
+        The name of the illuminant (the function is NOT case sensitive).
+    observer : {"2", "10"}, optional
+        The aperture angle of the observer.
+    Returns
+    -------
+    out : ndarray
+        The image in Lab format, in a 3- or 4-D array of shape
+        ``(.., ..,[ ..,] 3)``.
+    Raises
+    ------
+    ValueError
+        If `rgb` is not a 3- or 4-D array of shape ``(.., ..,[ ..,] 3)``.
+    References
+    ----------
+    .. [1] https://en.wikipedia.org/wiki/Standard_illuminant
+    Notes
+    -----
+    This function uses rgb2xyz and xyz2lab.
+    By default Observer= 2A, Illuminant= D65. CIE XYZ tristimulus values
+    x_ref=95.047, y_ref=100., z_ref=108.883. See function `get_xyz_coords` for
+    a list of supported illuminants.
+    """
+    return xyz2lab(rgb2xyz(rgb), illuminant, observer)
+
+def lch2lab(lch):
+    """CIE-LCH to CIE-LAB color space conversion.
+    LCH is the cylindrical representation of the LAB (Cartesian) colorspace
+    Parameters
+    ----------
+    lch : array_like
+        The N-D image in CIE-LCH format. The last (``N+1``-th) dimension must
+        have at least 3 elements, corresponding to the ``L``, ``a``, and ``b``
+        color channels.  Subsequent elements are copied.
+    Returns
+    -------
+    out : ndarray
+        The image in LAB format, with same shape as input `lch`.
+    Raises
+    ------
+    ValueError
+        If `lch` does not have at least 3 color channels (i.e. l, c, h).
+    Examples
+    --------
+    >>> from skimage import data
+    >>> from skimage.color import rgb2lab, lch2lab
+    >>> img = data.astronaut()
+    >>> img_lab = rgb2lab(img)
+    >>> img_lch = lab2lch(img_lab)
+    >>> img_lab2 = lch2lab(img_lch)
+    """
+    lch = _prepare_lab_array(lch)
+
+    c, h = lch[..., 1], lch[..., 2]
+    lch[..., 1], lch[..., 2] = c * np.cos(h), c * np.sin(h)
+    return lch
+
+def _prepare_lab_array(arr):
+    """Ensure input for lab2lch, lch2lab are well-posed.
+    Arrays must be in floating point and have at least 3 elements in
+    last dimension.  Return a new array.
+    """
+    arr = np.asarray(arr)
+    shape = arr.shape
+    if shape[-1] < 3:
+        raise ValueError('Input array has less than 3 color channels')
+    return img_as_float(arr, force_copy=True)
+
+def get_xyz_coords(illuminant, observer):
+    """Get the XYZ coordinates of the given illuminant and observer [1]_.
+    Parameters
+    ----------
+    illuminant : {"A", "D50", "D55", "D65", "D75", "E"}, optional
+        The name of the illuminant (the function is NOT case sensitive).
+    observer : {"2", "10"}, optional
+        The aperture angle of the observer.
+    Returns
+    -------
+    (x, y, z) : tuple
+        A tuple with 3 elements containing the XYZ coordinates of the given
+        illuminant.
+    Raises
+    ------
+    ValueError
+        If either the illuminant or the observer angle are not supported or
+        unknown.
+    References
+    ----------
+    .. [1] https://en.wikipedia.org/wiki/Standard_illuminant
+    """
+    illuminant = illuminant.upper()
+    try:
+        return illuminants[illuminant][observer]
+    except KeyError:
+        raise ValueError("Unknown illuminant/observer combination\
+        (\'{0}\', \'{1}\')".format(illuminant, observer))
+
+def _prepare_colorarray(arr):
+    """Check the shape of the array and convert it to
+    floating point representation.
+    """
+    arr = np.asanyarray(arr)
+
+    if arr.ndim not in [3, 4] or arr.shape[-1] != 3:
+        msg = ("the input array must be have a shape == (.., ..,[ ..,] 3)), " +
+               "got (" + (", ".join(map(str, arr.shape))) + ")")
+        raise ValueError(msg)
+
+    return img_as_float(arr)
+
+def xyz2rgb(xyz):
+    """XYZ to RGB color space conversion.
+    Parameters
+    ----------
+    xyz : array_like
+        The image in XYZ format, in a 3-D array of shape ``(.., .., 3)``.
+    Returns
+    -------
+    out : ndarray
+        The image in RGB format, in a 3-D array of shape ``(.., .., 3)``.
+    Raises
+    ------
+    ValueError
+        If `xyz` is not a 3-D array of shape ``(.., .., 3)``.
+    Notes
+    -----
+    The CIE XYZ color space is derived from the CIE RGB color space. Note
+    however that this function converts to sRGB.
+    References
+    ----------
+    .. [1] https://en.wikipedia.org/wiki/CIE_1931_color_space
+    Examples
+    --------
+    >>> from skimage import data
+    >>> from skimage.color import rgb2xyz, xyz2rgb
+    >>> img = data.astronaut()
+    >>> img_xyz = rgb2xyz(img)
+    >>> img_rgb = xyz2rgb(img_xyz)
+    """
+    # Follow the algorithm from http://www.easyrgb.com/index.php
+    # except we don't multiply/divide by 100 in the conversion
+    arr = _convert(rgb_from_xyz, xyz)
+    mask = arr > 0.0031308
+    arr[mask] = 1.055 * np.power(arr[mask], 1 / 2.4) - 0.055
+    arr[~mask] *= 12.92
+    np.clip(arr, 0, 1, out=arr)
+    return arr
+
+def _convert(matrix, arr):
+    """Do the color space conversion.
+    Parameters
+    ----------
+    matrix : array_like
+        The 3x3 matrix to use.
+    arr : array_like
+        The input array.
+    Returns
+    -------
+    out : ndarray, dtype=float
+        The converted array.
+    """
+    arr = _prepare_colorarray(arr)
+
+    return arr @ matrix.T.copy()
+
+# ---------------------------------------------------------------
+# Primaries for the coordinate systems
+# ---------------------------------------------------------------
+cie_primaries = np.array([700, 546.1, 435.8])
+sb_primaries = np.array([1. / 155, 1. / 190, 1. / 225]) * 1e5
+
+# ---------------------------------------------------------------
+# Matrices that define conversion between different color spaces
+# ---------------------------------------------------------------
+
+# From sRGB specification
+xyz_from_rgb = np.array([[0.412453, 0.357580, 0.180423],
+                         [0.212671, 0.715160, 0.072169],
+                         [0.019334, 0.119193, 0.950227]])
+
+rgb_from_xyz = linalg.inv(xyz_from_rgb)
+
+# From https://en.wikipedia.org/wiki/CIE_1931_color_space
+# Note: Travis's code did not have the divide by 0.17697
+xyz_from_rgbcie = np.array([[0.49, 0.31, 0.20],
+                            [0.17697, 0.81240, 0.01063],
+                            [0.00, 0.01, 0.99]]) / 0.17697
+
+rgbcie_from_xyz = linalg.inv(xyz_from_rgbcie)
+
+# construct matrices to and from rgb:
+rgbcie_from_rgb = rgbcie_from_xyz @ xyz_from_rgb
+rgb_from_rgbcie = rgb_from_xyz @ xyz_from_rgbcie
+
+
+gray_from_rgb = np.array([[0.2125, 0.7154, 0.0721],
+                          [0, 0, 0],
+                          [0, 0, 0]])
+
+yuv_from_rgb = np.array([[ 0.299     ,  0.587     ,  0.114      ],
+                         [-0.14714119, -0.28886916,  0.43601035 ],
+                         [ 0.61497538, -0.51496512, -0.10001026 ]])
+
+rgb_from_yuv = linalg.inv(yuv_from_rgb)
+
+yiq_from_rgb = np.array([[0.299     ,  0.587     ,  0.114     ],
+                         [0.59590059, -0.27455667, -0.32134392],
+                         [0.21153661, -0.52273617,  0.31119955]])
+
+rgb_from_yiq = linalg.inv(yiq_from_rgb)
+
+ypbpr_from_rgb = np.array([[ 0.299   , 0.587   , 0.114   ],
+                           [-0.168736,-0.331264, 0.5     ],
+                           [ 0.5     ,-0.418688,-0.081312]])
+
+rgb_from_ypbpr = linalg.inv(ypbpr_from_rgb)
+
+ycbcr_from_rgb = np.array([[    65.481,   128.553,    24.966],
+                           [   -37.797,   -74.203,   112.0  ],
+                           [   112.0  ,   -93.786,   -18.214]])
+
+rgb_from_ycbcr = linalg.inv(ycbcr_from_rgb)
+
+ydbdr_from_rgb = np.array([[    0.299,   0.587,    0.114],
+                           [   -0.45 ,  -0.883,    1.333],
+                           [   -1.333,   1.116,    0.217]])
+
+rgb_from_ydbdr = linalg.inv(ydbdr_from_rgb)
+
+
+# CIE LAB constants for Observer=2A, Illuminant=D65
+# NOTE: this is actually the XYZ values for the illuminant above.
+lab_ref_white = np.array([0.95047, 1., 1.08883])
+
+
+# XYZ coordinates of the illuminants, scaled to [0, 1]. For each illuminant I
+# we have:
+#
+#   illuminant[I][0] corresponds to the XYZ coordinates for the 2 degree
+#   field of view.
+#
+#   illuminant[I][1] corresponds to the XYZ coordinates for the 10 degree
+#   field of view.
+#
+# The XYZ coordinates are calculated from [1], using the formula:
+#
+#   X = x * ( Y / y )
+#   Y = Y
+#   Z = ( 1 - x - y ) * ( Y / y )
+#
+# where Y = 1. The only exception is the illuminant "D65" with aperture angle
+# 2, whose coordinates are copied from 'lab_ref_white' for
+# backward-compatibility reasons.
+#
+#     References
+#    ----------
+#    .. [1] https://en.wikipedia.org/wiki/Standard_illuminant
+
+illuminants = \
+    {"A": {'2': (1.098466069456375, 1, 0.3558228003436005),
+           '10': (1.111420406956693, 1, 0.3519978321919493)},
+     "D50": {'2': (0.9642119944211994, 1, 0.8251882845188288),
+             '10': (0.9672062750333777, 1, 0.8142801513128616)},
+     "D55": {'2': (0.956797052643698, 1, 0.9214805860173273),
+             '10': (0.9579665682254781, 1, 0.9092525159847462)},
+     "D65": {'2': (0.95047, 1., 1.08883),   # This was: `lab_ref_white`
+             '10': (0.94809667673716, 1, 1.0730513595166162)},
+     "D75": {'2': (0.9497220898840717, 1, 1.226393520724154),
+             '10': (0.9441713925645873, 1, 1.2064272211720228)},
+     "E": {'2': (1.0, 1.0, 1.0),
+           '10': (1.0, 1.0, 1.0)}}
+
+
+
+
+__all__ = ['img_as_float32', 'img_as_float64', 'img_as_float',
+           #'img_as_int', 'img_as_uint', 'img_as_ubyte',
+           #'img_as_bool',
+           'dtype_limits']
+
+# For integers Numpy uses `_integer_types` basis internally, and builds a leaky
+# `np.XintYY` abstraction on top of it. This leads to situations when, for
+# example, there are two np.Xint64 dtypes with the same attributes but
+# different object references. In order to avoid any potential issues,
+# we use the basis dtypes here. For more information, see:
+# - https://github.com/scikit-image/scikit-image/issues/3043
+# For convenience, for these dtypes we indicate also the possible bit depths
+# (some of them are platform specific). For the details, see:
+# http://www.unix.org/whitepapers/64bit.html
+_integer_types = (np.byte, np.ubyte,          # 8 bits
+                  np.short, np.ushort,        # 16 bits
+                  np.intc, np.uintc,          # 16 or 32 or 64 bits
+                  np.int_, np.uint,           # 32 or 64 bits
+                  np.longlong, np.ulonglong)  # 64 bits
+_integer_ranges = {t: (np.iinfo(t).min, np.iinfo(t).max)
+                   for t in _integer_types}
+dtype_range = {np.bool_: (False, True),
+               bool: (False, True),
+               np.float16: (-1, 1),
+               np.float32: (-1, 1),
+               np.float64: (-1, 1)}
+dtype_range.update(_integer_ranges)
+
+_supported_types = list(dtype_range.keys())
+
+
+def dtype_limits(image, clip_negative=False):
+    """Return intensity limits, i.e. (min, max) tuple, of the image's dtype.
+    Parameters
+    ----------
+    image : ndarray
+        Input image.
+    clip_negative : bool, optional
+        If True, clip the negative range (i.e. return 0 for min intensity)
+        even if the image dtype allows negative values.
+    Returns
+    -------
+    imin, imax : tuple
+        Lower and upper intensity limits.
+    """
+    imin, imax = dtype_range[image.dtype.type]
+    if clip_negative:
+        imin = 0
+    return imin, imax
+
+
+def _dtype_itemsize(itemsize, *dtypes):
+    """Return first of `dtypes` with itemsize greater than `itemsize`
+    Parameters
+    ----------
+    itemsize: int
+        The data type object element size.
+    Other Parameters
+    ----------------
+    *dtypes:
+        Any Object accepted by `np.dtype` to be converted to a data
+        type object
+    Returns
+    -------
+    dtype: data type object
+        First of `dtypes` with itemsize greater than `itemsize`.
+    """
+    return next(dt for dt in dtypes if np.dtype(dt).itemsize >= itemsize)
+
+
+def _dtype_bits(kind, bits, itemsize=1):
+    """Return dtype of `kind` that can store a `bits` wide unsigned int
+    Parameters:
+    kind: str
+        Data type kind.
+    bits: int
+        Desired number of bits.
+    itemsize: int
+        The data type object element size.
+    Returns
+    -------
+    dtype: data type object
+        Data type of `kind` that can store a `bits` wide unsigned int
+    """
+
+    s = next(i for i in (itemsize, ) + (2, 4, 8) if
+             bits < (i * 8) or (bits == (i * 8) and kind == 'u'))
+
+    return np.dtype(kind + str(s))
+
+
+def _scale(a, n, m, copy=True):
+    """Scale an array of unsigned/positive integers from `n` to `m` bits.
+    Numbers can be represented exactly only if `m` is a multiple of `n`.
+    Parameters
+    ----------
+    a : ndarray
+        Input image array.
+    n : int
+        Number of bits currently used to encode the values in `a`.
+    m : int
+        Desired number of bits to encode the values in `out`.
+    copy : bool, optional
+        If True, allocates and returns new array. Otherwise, modifies
+        `a` in place.
+    Returns
+    -------
+    out : array
+        Output image array. Has the same kind as `a`.
+    """
+    kind = a.dtype.kind
+    if n > m and a.max() < 2 ** m:
+        mnew = int(np.ceil(m / 2) * 2)
+        if mnew > m:
+            dtype = "int{}".format(mnew)
+        else:
+            dtype = "uint{}".format(mnew)
+        n = int(np.ceil(n / 2) * 2)
+        warn("Downcasting {} to {} without scaling because max "
+             "value {} fits in {}".format(a.dtype, dtype, a.max(), dtype),
+             stacklevel=3)
+        return a.astype(_dtype_bits(kind, m))
+    elif n == m:
+        return a.copy() if copy else a
+    elif n > m:
+        # downscale with precision loss
+        if copy:
+            b = np.empty(a.shape, _dtype_bits(kind, m))
+            np.floor_divide(a, 2**(n - m), out=b, dtype=a.dtype,
+                            casting='unsafe')
+            return b
+        else:
+            a //= 2**(n - m)
+            return a
+    elif m % n == 0:
+        # exact upscale to a multiple of `n` bits
+        if copy:
+            b = np.empty(a.shape, _dtype_bits(kind, m))
+            np.multiply(a, (2**m - 1) // (2**n - 1), out=b, dtype=b.dtype)
+            return b
+        else:
+            a = a.astype(_dtype_bits(kind, m, a.dtype.itemsize), copy=False)
+            a *= (2**m - 1) // (2**n - 1)
+            return a
+    else:
+        # upscale to a multiple of `n` bits,
+        # then downscale with precision loss
+        o = (m // n + 1) * n
+        if copy:
+            b = np.empty(a.shape, _dtype_bits(kind, o))
+            np.multiply(a, (2**o - 1) // (2**n - 1), out=b, dtype=b.dtype)
+            b //= 2**(o - m)
+            return b
+        else:
+            a = a.astype(_dtype_bits(kind, o, a.dtype.itemsize), copy=False)
+            a *= (2**o - 1) // (2**n - 1)
+            a //= 2**(o - m)
+            return a
+
+
+def convert(image, dtype, force_copy=False, uniform=False):
+    """
+    Convert an image to the requested data-type.
+    Warnings are issued in case of precision loss, or when negative values
+    are clipped during conversion to unsigned integer types (sign loss).
+    Floating point values are expected to be normalized and will be clipped
+    to the range [0.0, 1.0] or [-1.0, 1.0] when converting to unsigned or
+    signed integers respectively.
+    Numbers are not shifted to the negative side when converting from
+    unsigned to signed integer types. Negative values will be clipped when
+    converting to unsigned integers.
+    Parameters
+    ----------
+    image : ndarray
+        Input image.
+    dtype : dtype
+        Target data-type.
+    force_copy : bool, optional
+        Force a copy of the data, irrespective of its current dtype.
+    uniform : bool, optional
+        Uniformly quantize the floating point range to the integer range.
+        By default (uniform=False) floating point values are scaled and
+        rounded to the nearest integers, which minimizes back and forth
+        conversion errors.
+    .. versionchanged :: 0.15
+        ``convert`` no longer warns about possible precision or sign
+        information loss. See discussions on these warnings at:
+        https://github.com/scikit-image/scikit-image/issues/2602
+        https://github.com/scikit-image/scikit-image/issues/543#issuecomment-208202228
+        https://github.com/scikit-image/scikit-image/pull/3575
+    References
+    ----------
+    .. [1] DirectX data conversion rules.
+           https://msdn.microsoft.com/en-us/library/windows/desktop/dd607323%28v=vs.85%29.aspx
+    .. [2] Data Conversions. In "OpenGL ES 2.0 Specification v2.0.25",
+           pp 7-8. Khronos Group, 2010.
+    .. [3] Proper treatment of pixels as integers. A.W. Paeth.
+           In "Graphics Gems I", pp 249-256. Morgan Kaufmann, 1990.
+    .. [4] Dirty Pixels. J. Blinn. In "Jim Blinn's corner: Dirty Pixels",
+           pp 47-57. Morgan Kaufmann, 1998.
+    """
+    image = np.asarray(image)
+    dtypeobj_in = image.dtype
+    if dtype is np.floating:
+        dtypeobj_out = np.dtype("float64")
+    else:
+        dtypeobj_out = np.dtype(dtype)
+    dtype_in = dtypeobj_in.type
+    dtype_out = dtypeobj_out.type
+    kind_in = dtypeobj_in.kind
+    kind_out = dtypeobj_out.kind
+    itemsize_in = dtypeobj_in.itemsize
+    itemsize_out = dtypeobj_out.itemsize
+
+    # Below, we do an `issubdtype` check.  Its purpose is to find out
+    # whether we can get away without doing any image conversion.  This happens
+    # when:
+    #
+    # - the output and input dtypes are the same or
+    # - when the output is specified as a type, and the input dtype
+    #   is a subclass of that type (e.g. `np.floating` will allow
+    #   `float32` and `float64` arrays through)
+
+    if np.issubdtype(dtype_in, np.obj2sctype(dtype)):
+        if force_copy:
+            image = image.copy()
+        return image
+
+    if not (dtype_in in _supported_types and dtype_out in _supported_types):
+        raise ValueError("Can not convert from {} to {}."
+                         .format(dtypeobj_in, dtypeobj_out))
+
+    if kind_in in 'ui':
+        imin_in = np.iinfo(dtype_in).min
+        imax_in = np.iinfo(dtype_in).max
+    if kind_out in 'ui':
+        imin_out = np.iinfo(dtype_out).min
+        imax_out = np.iinfo(dtype_out).max
+
+    # any -> binary
+    if kind_out == 'b':
+        return image > dtype_in(dtype_range[dtype_in][1] / 2)
+
+    # binary -> any
+    if kind_in == 'b':
+        result = image.astype(dtype_out)
+        if kind_out != 'f':
+            result *= dtype_out(dtype_range[dtype_out][1])
+        return result
+
+    # float -> any
+    if kind_in == 'f':
+        if kind_out == 'f':
+            # float -> float
+            return image.astype(dtype_out)
+
+        if np.min(image) < -1.0 or np.max(image) > 1.0:
+            raise ValueError("Images of type float must be between -1 and 1.")
+        # floating point -> integer
+        # use float type that can represent output integer type
+        computation_type = _dtype_itemsize(itemsize_out, dtype_in,
+                                           np.float32, np.float64)
+
+        if not uniform:
+            if kind_out == 'u':
+                image_out = np.multiply(image, imax_out,
+                                        dtype=computation_type)
+            else:
+                image_out = np.multiply(image, (imax_out - imin_out) / 2,
+                                        dtype=computation_type)
+                image_out -= 1.0 / 2.
+            np.rint(image_out, out=image_out)
+            np.clip(image_out, imin_out, imax_out, out=image_out)
+        elif kind_out == 'u':
+            image_out = np.multiply(image, imax_out + 1,
+                                    dtype=computation_type)
+            np.clip(image_out, 0, imax_out, out=image_out)
+        else:
+            image_out = np.multiply(image, (imax_out - imin_out + 1.0) / 2.0,
+                                    dtype=computation_type)
+            np.floor(image_out, out=image_out)
+            np.clip(image_out, imin_out, imax_out, out=image_out)
+        return image_out.astype(dtype_out)
+
+    # signed/unsigned int -> float
+    if kind_out == 'f':
+        # use float type that can exactly represent input integers
+        computation_type = _dtype_itemsize(itemsize_in, dtype_out,
+                                           np.float32, np.float64)
+
+        if kind_in == 'u':
+            # using np.divide or np.multiply doesn't copy the data
+            # until the computation time
+            image = np.multiply(image, 1. / imax_in,
+                                dtype=computation_type)
+            # DirectX uses this conversion also for signed ints
+            # if imin_in:
+            #     np.maximum(image, -1.0, out=image)
+        else:
+            image = np.add(image, 0.5, dtype=computation_type)
+            image *= 2 / (imax_in - imin_in)
+
+        return np.asarray(image, dtype_out)
+
+    # unsigned int -> signed/unsigned int
+    if kind_in == 'u':
+        if kind_out == 'i':
+            # unsigned int -> signed int
+            image = _scale(image, 8 * itemsize_in, 8 * itemsize_out - 1)
+            return image.view(dtype_out)
+        else:
+            # unsigned int -> unsigned int
+            return _scale(image, 8 * itemsize_in, 8 * itemsize_out)
+
+    # signed int -> unsigned int
+    if kind_out == 'u':
+        image = _scale(image, 8 * itemsize_in - 1, 8 * itemsize_out)
+        result = np.empty(image.shape, dtype_out)
+        np.maximum(image, 0, out=result, dtype=image.dtype, casting='unsafe')
+        return result
+
+    # signed int -> signed int
+    if itemsize_in > itemsize_out:
+        return _scale(image, 8 * itemsize_in - 1, 8 * itemsize_out - 1)
+
+    image = image.astype(_dtype_bits('i', itemsize_out * 8))
+    image -= imin_in
+    image = _scale(image, 8 * itemsize_in, 8 * itemsize_out, copy=False)
+    image += imin_out
+    return image.astype(dtype_out)
+
+
+def img_as_float32(image, force_copy=False):
+    """Convert an image to single-precision (32-bit) floating point format.
+    Parameters
+    ----------
+    image : ndarray
+        Input image.
+    force_copy : bool, optional
+        Force a copy of the data, irrespective of its current dtype.
+    Returns
+    -------
+    out : ndarray of float32
+        Output image.
+    Notes
+    -----
+    The range of a floating point image is [0.0, 1.0] or [-1.0, 1.0] when
+    converting from unsigned or signed datatypes, respectively.
+    If the input image has a float type, intensity values are not modified
+    and can be outside the ranges [0.0, 1.0] or [-1.0, 1.0].
+    """
+    return convert(image, np.float32, force_copy)
+
+
+def img_as_float64(image, force_copy=False):
+    """Convert an image to double-precision (64-bit) floating point format.
+    Parameters
+    ----------
+    image : ndarray
+        Input image.
+    force_copy : bool, optional
+        Force a copy of the data, irrespective of its current dtype.
+    Returns
+    -------
+    out : ndarray of float64
+        Output image.
+    Notes
+    -----
+    The range of a floating point image is [0.0, 1.0] or [-1.0, 1.0] when
+    converting from unsigned or signed datatypes, respectively.
+    If the input image has a float type, intensity values are not modified
+    and can be outside the ranges [0.0, 1.0] or [-1.0, 1.0].
+    """
+    return convert(image, np.float64, force_copy)
+
+
+def img_as_float(image, force_copy=False):
+    """Convert an image to floating point format.
+    This function is similar to `img_as_float64`, but will not convert
+    lower-precision floating point arrays to `float64`.
+    Parameters
+    ----------
+    image : ndarray
+        Input image.
+    force_copy : bool, optional
+        Force a copy of the data, irrespective of its current dtype.
+    Returns
+    -------
+    out : ndarray of float
+        Output image.
+    Notes
+    -----
+    The range of a floating point image is [0.0, 1.0] or [-1.0, 1.0] when
+    converting from unsigned or signed datatypes, respectively.
+    If the input image has a float type, intensity values are not modified
+    and can be outside the ranges [0.0, 1.0] or [-1.0, 1.0].
+    """
+    return convert(image, np.floating, force_copy)
diff --git a/lib/shap/plots/colors/_colors.py b/lib/shap/plots/colors/_colors.py
new file mode 100644
index 0000000000000000000000000000000000000000..248f66f5bc170ac9cbcd731f72005b5db5164156
--- /dev/null
+++ b/lib/shap/plots/colors/_colors.py
@@ -0,0 +1,151 @@
+""" This defines some common colors.
+"""
+
+
+import numpy as np
+
+from ._colorconv import lab2rgb, lch2lab
+
+try:
+    import matplotlib  # noqa: F401
+    from matplotlib.colors import LinearSegmentedColormap
+
+    def lch2rgb(x):
+        return lab2rgb(lch2lab([[x]]))[0][0]
+
+    # define our colors using Lch
+    # note that we intentionally vary the lightness during interpolation so as to better
+    # enable the eye to see patterns (since patterns are most easily recognized through
+    # lightness variability)
+    blue_lch = [54., 70., 4.6588]
+    l_mid = 40.
+    red_lch = [54., 90., 0.35470565 + 2* np.pi]
+    gray_lch = [55., 0., 0.]
+    blue_rgb = lch2rgb(blue_lch)
+    red_rgb = lch2rgb(red_lch)
+    gray_rgb = lch2rgb(gray_lch)
+    white_rgb = np.array([1.,1.,1.])
+
+
+    light_blue_rgb = np.array([127., 196, 252])/255
+    light_red_rgb = np.array([255., 127, 167])/255
+
+    # define a perceptually uniform color scale using the Lch color space
+    reds = []
+    greens = []
+    blues = []
+    alphas = []
+    nsteps = 100
+    l_vals = list(np.linspace(blue_lch[0], l_mid, nsteps//2)) + list(np.linspace(l_mid, red_lch[0], nsteps//2))
+    c_vals = np.linspace(blue_lch[1], red_lch[1], nsteps)
+    h_vals = np.linspace(blue_lch[2], red_lch[2], nsteps)
+    for pos,l,c,h in zip(np.linspace(0, 1, nsteps), l_vals, c_vals, h_vals): # noqa: E741
+        lch = [l, c, h]
+        rgb = lch2rgb(lch)
+        reds.append((pos, rgb[0], rgb[0]))
+        greens.append((pos, rgb[1], rgb[1]))
+        blues.append((pos, rgb[2], rgb[2]))
+        alphas.append((pos, 1.0, 1.0))
+
+    red_blue = LinearSegmentedColormap('red_blue', {
+        "red": reds,
+        "green": greens,
+        "blue": blues,
+        "alpha": alphas
+    })
+    red_blue.set_bad(gray_rgb, 1.0)
+    red_blue.set_over(gray_rgb, 1.0)
+    red_blue.set_under(gray_rgb, 1.0) # "under" is incorrectly used instead of "bad" in the scatter plot
+
+    red_blue_no_bounds = LinearSegmentedColormap('red_blue_no_bounds', {
+        "red": reds,
+        "green": greens,
+        "blue": blues,
+        "alpha": alphas
+    })
+
+    red_blue_transparent = LinearSegmentedColormap('red_blue_no_bounds', {
+        "red": reds,
+        "green": greens,
+        "blue": blues,
+        "alpha": [(a[0], 0.5, 0.5) for a in alphas]
+    })
+
+    # define a circular version of the color scale for categorical coloring
+    reds = []
+    greens = []
+    blues = []
+    alphas = []
+    nsteps = 100
+    c_vals = np.linspace(blue_lch[1], red_lch[1], nsteps)
+    h_vals = np.linspace(blue_lch[2], red_lch[2], nsteps)
+    for pos,c,h in zip(np.linspace(0, 0.5, nsteps), c_vals, h_vals):
+        lch = [blue_lch[0], c, h]
+        rgb = lch2rgb(lch)
+        reds.append((pos, rgb[0], rgb[0]))
+        greens.append((pos, rgb[1], rgb[1]))
+        blues.append((pos, rgb[2], rgb[2]))
+        alphas.append((pos, 1.0, 1.0))
+    c_vals = np.linspace(red_lch[1], blue_lch[1], nsteps)
+    h_vals = np.linspace(red_lch[2] - 2 * np.pi, blue_lch[2], nsteps)
+    for pos,c,h in zip(np.linspace(0.5, 1, nsteps), c_vals, h_vals):
+        lch = [blue_lch[0], c, h]
+        rgb = lch2rgb(lch)
+        reds.append((pos, rgb[0], rgb[0]))
+        greens.append((pos, rgb[1], rgb[1]))
+        blues.append((pos, rgb[2], rgb[2]))
+        alphas.append((pos, 1.0, 1.0))
+
+    red_blue_circle = LinearSegmentedColormap('red_blue_circle', {
+        "red": reds,
+        "green": greens,
+        "blue": blues,
+        "alpha": alphas
+    })
+
+    colors = []
+    for j in np.linspace(1, 0, 100):
+        colors.append((30./255, 136./255, 229./255,j))
+    for j in np.linspace(0, 1, 100):
+        colors.append((255./255, 13./255, 87./255,j))
+    red_transparent_blue = LinearSegmentedColormap.from_list("red_transparent_blue", colors)
+
+    colors = []
+    for j in np.linspace(0, 1, 100):
+        colors.append((30./255, 136./255, 229./255,j))
+    transparent_blue = LinearSegmentedColormap.from_list("transparent_blue", colors)
+
+    colors = []
+    for j in np.linspace(0, 1, 100):
+        colors.append((255./255, 13./255, 87./255,j))
+    transparent_red = LinearSegmentedColormap.from_list("transparent_red", colors)
+
+    old_blue_rgb = np.array([30, 136, 229]) / 255
+    old_red_rgb = np.array([255, 13, 87]) / 255
+
+    colors = []
+    for alpha in np.linspace(1, 0, 100):
+        c = blue_rgb * alpha + (1 - alpha) * white_rgb
+        colors.append(c)
+    for alpha in np.linspace(0, 1, 100):
+        c = red_rgb * alpha + (1 - alpha) * white_rgb
+        colors.append(c)
+    red_white_blue = LinearSegmentedColormap.from_list("red_transparent_blue", colors)
+
+
+except ImportError:
+    pass
+
+#default_colors = ["#1E88E5", "#ff0d57", "#13B755", "#7C52FF", "#FFC000", "#00AEEF"]
+
+#blue_rgba = np.array([0.11764705882352941, 0.5333333333333333, 0.8980392156862745, 1.0])
+# blue_rgba = np.array([30, 136, 229, 255]) / 255
+# blue_rgb = np.array([30, 136, 229]) / 255
+# red_rgb = np.array([255, 13, 87]) / 255
+
+# default_blue_colors = []
+# tmp = blue_rgba.copy()
+# for i in range(10):
+#     default_blue_colors.append(tmp.copy())
+#     if tmp[-1] > 0.1:
+#         tmp[-1] *= 0.7
diff --git a/lib/shap/plots/resources/bundle.js b/lib/shap/plots/resources/bundle.js
new file mode 100644
index 0000000000000000000000000000000000000000..6bacdda75c83029ab28776d17e385511287e6dcf
--- /dev/null
+++ b/lib/shap/plots/resources/bundle.js
@@ -0,0 +1,2 @@
+/*! For license information please see bundle.js.LICENSE.txt */
+(()=>{var e={486:function(e,t,n){var r;e=n.nmd(e),function(){var a,i="Expected a function",o="__lodash_hash_undefined__",u="__lodash_placeholder__",l=32,s=128,c=1/0,f=9007199254740991,p=NaN,d=4294967295,h=[["ary",s],["bind",1],["bindKey",2],["curry",8],["curryRight",16],["flip",512],["partial",l],["partialRight",64],["rearg",256]],v="[object Arguments]",g="[object Array]",y="[object Boolean]",m="[object Date]",b="[object Error]",_="[object Function]",w="[object GeneratorFunction]",x="[object Map]",k="[object Number]",S="[object Object]",E="[object Promise]",C="[object RegExp]",T="[object Set]",M="[object String]",N="[object Symbol]",P="[object WeakMap]",z="[object ArrayBuffer]",L="[object DataView]",O="[object Float32Array]",A="[object Float64Array]",F="[object Int8Array]",D="[object Int16Array]",R="[object Int32Array]",j="[object Uint8Array]",U="[object Uint8ClampedArray]",I="[object Uint16Array]",$="[object Uint32Array]",B=/\b__p \+= '';/g,W=/\b(__p \+=) '' \+/g,V=/(__e\(.*?\)|\b__t\)) \+\n'';/g,H=/&(?:amp|lt|gt|quot|#39);/g,q=/[&<>"']/g,Q=RegExp(H.source),Y=RegExp(q.source),G=/<%-([\s\S]+?)%>/g,K=/<%([\s\S]+?)%>/g,Z=/<%=([\s\S]+?)%>/g,X=/\.|\[(?:[^[\]]*|(["'])(?:(?!\1)[^\\]|\\.)*?\1)\]/,J=/^\w*$/,ee=/[^.[\]]+|\[(?:(-?\d+(?:\.\d+)?)|(["'])((?:(?!\2)[^\\]|\\.)*?)\2)\]|(?=(?:\.|\[\])(?:\.|\[\]|$))/g,te=/[\\^$.*+?()[\]{}|]/g,ne=RegExp(te.source),re=/^\s+/,ae=/\s/,ie=/\{(?:\n\/\* \[wrapped with .+\] \*\/)?\n?/,oe=/\{\n\/\* \[wrapped with (.+)\] \*/,ue=/,? & /,le=/[^\x00-\x2f\x3a-\x40\x5b-\x60\x7b-\x7f]+/g,se=/[()=,{}\[\]\/\s]/,ce=/\\(\\)?/g,fe=/\$\{([^\\}]*(?:\\.[^\\}]*)*)\}/g,pe=/\w*$/,de=/^[-+]0x[0-9a-f]+$/i,he=/^0b[01]+$/i,ve=/^\[object .+?Constructor\]$/,ge=/^0o[0-7]+$/i,ye=/^(?:0|[1-9]\d*)$/,me=/[\xc0-\xd6\xd8-\xf6\xf8-\xff\u0100-\u017f]/g,be=/($^)/,_e=/['\n\r\u2028\u2029\\]/g,we="\\ud800-\\udfff",xe="\\u0300-\\u036f\\ufe20-\\ufe2f\\u20d0-\\u20ff",ke="\\u2700-\\u27bf",Se="a-z\\xdf-\\xf6\\xf8-\\xff",Ee="A-Z\\xc0-\\xd6\\xd8-\\xde",Ce="\\ufe0e\\ufe0f",Te="\\xac\\xb1\\xd7\\xf7\\x00-\\x2f\\x3a-\\x40\\x5b-\\x60\\x7b-\\xbf\\u2000-\\u206f \\t\\x0b\\f\\xa0\\ufeff\\n\\r\\u2028\\u2029\\u1680\\u180e\\u2000\\u2001\\u2002\\u2003\\u2004\\u2005\\u2006\\u2007\\u2008\\u2009\\u200a\\u202f\\u205f\\u3000",Me="["+we+"]",Ne="["+Te+"]",Pe="["+xe+"]",ze="\\d+",Le="["+ke+"]",Oe="["+Se+"]",Ae="[^"+we+Te+ze+ke+Se+Ee+"]",Fe="\\ud83c[\\udffb-\\udfff]",De="[^"+we+"]",Re="(?:\\ud83c[\\udde6-\\uddff]){2}",je="[\\ud800-\\udbff][\\udc00-\\udfff]",Ue="["+Ee+"]",Ie="\\u200d",$e="(?:"+Oe+"|"+Ae+")",Be="(?:"+Ue+"|"+Ae+")",We="(?:['’](?:d|ll|m|re|s|t|ve))?",Ve="(?:['’](?:D|LL|M|RE|S|T|VE))?",He="(?:"+Pe+"|"+Fe+")?",qe="["+Ce+"]?",Qe=qe+He+"(?:"+Ie+"(?:"+[De,Re,je].join("|")+")"+qe+He+")*",Ye="(?:"+[Le,Re,je].join("|")+")"+Qe,Ge="(?:"+[De+Pe+"?",Pe,Re,je,Me].join("|")+")",Ke=RegExp("['’]","g"),Ze=RegExp(Pe,"g"),Xe=RegExp(Fe+"(?="+Fe+")|"+Ge+Qe,"g"),Je=RegExp([Ue+"?"+Oe+"+"+We+"(?="+[Ne,Ue,"$"].join("|")+")",Be+"+"+Ve+"(?="+[Ne,Ue+$e,"$"].join("|")+")",Ue+"?"+$e+"+"+We,Ue+"+"+Ve,"\\d*(?:1ST|2ND|3RD|(?![123])\\dTH)(?=\\b|[a-z_])","\\d*(?:1st|2nd|3rd|(?![123])\\dth)(?=\\b|[A-Z_])",ze,Ye].join("|"),"g"),et=RegExp("["+Ie+we+xe+Ce+"]"),tt=/[a-z][A-Z]|[A-Z]{2}[a-z]|[0-9][a-zA-Z]|[a-zA-Z][0-9]|[^a-zA-Z0-9 ]/,nt=["Array","Buffer","DataView","Date","Error","Float32Array","Float64Array","Function","Int8Array","Int16Array","Int32Array","Map","Math","Object","Promise","RegExp","Set","String","Symbol","TypeError","Uint8Array","Uint8ClampedArray","Uint16Array","Uint32Array","WeakMap","_","clearTimeout","isFinite","parseInt","setTimeout"],rt=-1,at={};at[O]=at[A]=at[F]=at[D]=at[R]=at[j]=at[U]=at[I]=at[$]=!0,at[v]=at[g]=at[z]=at[y]=at[L]=at[m]=at[b]=at[_]=at[x]=at[k]=at[S]=at[C]=at[T]=at[M]=at[P]=!1;var it={};it[v]=it[g]=it[z]=it[L]=it[y]=it[m]=it[O]=it[A]=it[F]=it[D]=it[R]=it[x]=it[k]=it[S]=it[C]=it[T]=it[M]=it[N]=it[j]=it[U]=it[I]=it[$]=!0,it[b]=it[_]=it[P]=!1;var ot={"\\":"\\","'":"'","\n":"n","\r":"r","\u2028":"u2028","\u2029":"u2029"},ut=parseFloat,lt=parseInt,st="object"==typeof n.g&&n.g&&n.g.Object===Object&&n.g,ct="object"==typeof self&&self&&self.Object===Object&&self,ft=st||ct||Function("return this")(),pt=t&&!t.nodeType&&t,dt=pt&&e&&!e.nodeType&&e,ht=dt&&dt.exports===pt,vt=ht&&st.process,gt=function(){try{return dt&&dt.require&&dt.require("util").types||vt&&vt.binding&&vt.binding("util")}catch(e){}}(),yt=gt&&gt.isArrayBuffer,mt=gt&&gt.isDate,bt=gt&&gt.isMap,_t=gt&&gt.isRegExp,wt=gt&&gt.isSet,xt=gt&&gt.isTypedArray;function kt(e,t,n){switch(n.length){case 0:return e.call(t);case 1:return e.call(t,n[0]);case 2:return e.call(t,n[0],n[1]);case 3:return e.call(t,n[0],n[1],n[2])}return e.apply(t,n)}function St(e,t,n,r){for(var a=-1,i=null==e?0:e.length;++a<i;){var o=e[a];t(r,o,n(o),e)}return r}function Et(e,t){for(var n=-1,r=null==e?0:e.length;++n<r&&!1!==t(e[n],n,e););return e}function Ct(e,t){for(var n=null==e?0:e.length;n--&&!1!==t(e[n],n,e););return e}function Tt(e,t){for(var n=-1,r=null==e?0:e.length;++n<r;)if(!t(e[n],n,e))return!1;return!0}function Mt(e,t){for(var n=-1,r=null==e?0:e.length,a=0,i=[];++n<r;){var o=e[n];t(o,n,e)&&(i[a++]=o)}return i}function Nt(e,t){return!(null==e||!e.length)&&Ut(e,t,0)>-1}function Pt(e,t,n){for(var r=-1,a=null==e?0:e.length;++r<a;)if(n(t,e[r]))return!0;return!1}function zt(e,t){for(var n=-1,r=null==e?0:e.length,a=Array(r);++n<r;)a[n]=t(e[n],n,e);return a}function Lt(e,t){for(var n=-1,r=t.length,a=e.length;++n<r;)e[a+n]=t[n];return e}function Ot(e,t,n,r){var a=-1,i=null==e?0:e.length;for(r&&i&&(n=e[++a]);++a<i;)n=t(n,e[a],a,e);return n}function At(e,t,n,r){var a=null==e?0:e.length;for(r&&a&&(n=e[--a]);a--;)n=t(n,e[a],a,e);return n}function Ft(e,t){for(var n=-1,r=null==e?0:e.length;++n<r;)if(t(e[n],n,e))return!0;return!1}var Dt=Wt("length");function Rt(e,t,n){var r;return n(e,(function(e,n,a){if(t(e,n,a))return r=n,!1})),r}function jt(e,t,n,r){for(var a=e.length,i=n+(r?1:-1);r?i--:++i<a;)if(t(e[i],i,e))return i;return-1}function Ut(e,t,n){return t==t?function(e,t,n){for(var r=n-1,a=e.length;++r<a;)if(e[r]===t)return r;return-1}(e,t,n):jt(e,$t,n)}function It(e,t,n,r){for(var a=n-1,i=e.length;++a<i;)if(r(e[a],t))return a;return-1}function $t(e){return e!=e}function Bt(e,t){var n=null==e?0:e.length;return n?qt(e,t)/n:p}function Wt(e){return function(t){return null==t?a:t[e]}}function Vt(e){return function(t){return null==e?a:e[t]}}function Ht(e,t,n,r,a){return a(e,(function(e,a,i){n=r?(r=!1,e):t(n,e,a,i)})),n}function qt(e,t){for(var n,r=-1,i=e.length;++r<i;){var o=t(e[r]);o!==a&&(n=n===a?o:n+o)}return n}function Qt(e,t){for(var n=-1,r=Array(e);++n<e;)r[n]=t(n);return r}function Yt(e){return e?e.slice(0,pn(e)+1).replace(re,""):e}function Gt(e){return function(t){return e(t)}}function Kt(e,t){return zt(t,(function(t){return e[t]}))}function Zt(e,t){return e.has(t)}function Xt(e,t){for(var n=-1,r=e.length;++n<r&&Ut(t,e[n],0)>-1;);return n}function Jt(e,t){for(var n=e.length;n--&&Ut(t,e[n],0)>-1;);return n}var en=Vt({À:"A",Á:"A",Â:"A",Ã:"A",Ä:"A",Å:"A",à:"a",á:"a",â:"a",ã:"a",ä:"a",å:"a",Ç:"C",ç:"c",Ð:"D",ð:"d",È:"E",É:"E",Ê:"E",Ë:"E",è:"e",é:"e",ê:"e",ë:"e",Ì:"I",Í:"I",Î:"I",Ï:"I",ì:"i",í:"i",î:"i",ï:"i",Ñ:"N",ñ:"n",Ò:"O",Ó:"O",Ô:"O",Õ:"O",Ö:"O",Ø:"O",ò:"o",ó:"o",ô:"o",õ:"o",ö:"o",ø:"o",Ù:"U",Ú:"U",Û:"U",Ü:"U",ù:"u",ú:"u",û:"u",ü:"u",Ý:"Y",ý:"y",ÿ:"y",Æ:"Ae",æ:"ae",Þ:"Th",þ:"th",ß:"ss",Ā:"A",Ă:"A",Ą:"A",ā:"a",ă:"a",ą:"a",Ć:"C",Ĉ:"C",Ċ:"C",Č:"C",ć:"c",ĉ:"c",ċ:"c",č:"c",Ď:"D",Đ:"D",ď:"d",đ:"d",Ē:"E",Ĕ:"E",Ė:"E",Ę:"E",Ě:"E",ē:"e",ĕ:"e",ė:"e",ę:"e",ě:"e",Ĝ:"G",Ğ:"G",Ġ:"G",Ģ:"G",ĝ:"g",ğ:"g",ġ:"g",ģ:"g",Ĥ:"H",Ħ:"H",ĥ:"h",ħ:"h",Ĩ:"I",Ī:"I",Ĭ:"I",Į:"I",İ:"I",ĩ:"i",ī:"i",ĭ:"i",į:"i",ı:"i",Ĵ:"J",ĵ:"j",Ķ:"K",ķ:"k",ĸ:"k",Ĺ:"L",Ļ:"L",Ľ:"L",Ŀ:"L",Ł:"L",ĺ:"l",ļ:"l",ľ:"l",ŀ:"l",ł:"l",Ń:"N",Ņ:"N",Ň:"N",Ŋ:"N",ń:"n",ņ:"n",ň:"n",ŋ:"n",Ō:"O",Ŏ:"O",Ő:"O",ō:"o",ŏ:"o",ő:"o",Ŕ:"R",Ŗ:"R",Ř:"R",ŕ:"r",ŗ:"r",ř:"r",Ś:"S",Ŝ:"S",Ş:"S",Š:"S",ś:"s",ŝ:"s",ş:"s",š:"s",Ţ:"T",Ť:"T",Ŧ:"T",ţ:"t",ť:"t",ŧ:"t",Ũ:"U",Ū:"U",Ŭ:"U",Ů:"U",Ű:"U",Ų:"U",ũ:"u",ū:"u",ŭ:"u",ů:"u",ű:"u",ų:"u",Ŵ:"W",ŵ:"w",Ŷ:"Y",ŷ:"y",Ÿ:"Y",Ź:"Z",Ż:"Z",Ž:"Z",ź:"z",ż:"z",ž:"z",Ĳ:"IJ",ĳ:"ij",Œ:"Oe",œ:"oe",ŉ:"'n",ſ:"s"}),tn=Vt({"&":"&amp;","<":"&lt;",">":"&gt;",'"':"&quot;","'":"&#39;"});function nn(e){return"\\"+ot[e]}function rn(e){return et.test(e)}function an(e){var t=-1,n=Array(e.size);return e.forEach((function(e,r){n[++t]=[r,e]})),n}function on(e,t){return function(n){return e(t(n))}}function un(e,t){for(var n=-1,r=e.length,a=0,i=[];++n<r;){var o=e[n];o!==t&&o!==u||(e[n]=u,i[a++]=n)}return i}function ln(e){var t=-1,n=Array(e.size);return e.forEach((function(e){n[++t]=e})),n}function sn(e){var t=-1,n=Array(e.size);return e.forEach((function(e){n[++t]=[e,e]})),n}function cn(e){return rn(e)?function(e){for(var t=Xe.lastIndex=0;Xe.test(e);)++t;return t}(e):Dt(e)}function fn(e){return rn(e)?function(e){return e.match(Xe)||[]}(e):function(e){return e.split("")}(e)}function pn(e){for(var t=e.length;t--&&ae.test(e.charAt(t)););return t}var dn=Vt({"&amp;":"&","&lt;":"<","&gt;":">","&quot;":'"',"&#39;":"'"}),hn=function e(t){var n,r=(t=null==t?ft:hn.defaults(ft.Object(),t,hn.pick(ft,nt))).Array,ae=t.Date,we=t.Error,xe=t.Function,ke=t.Math,Se=t.Object,Ee=t.RegExp,Ce=t.String,Te=t.TypeError,Me=r.prototype,Ne=xe.prototype,Pe=Se.prototype,ze=t["__core-js_shared__"],Le=Ne.toString,Oe=Pe.hasOwnProperty,Ae=0,Fe=(n=/[^.]+$/.exec(ze&&ze.keys&&ze.keys.IE_PROTO||""))?"Symbol(src)_1."+n:"",De=Pe.toString,Re=Le.call(Se),je=ft._,Ue=Ee("^"+Le.call(Oe).replace(te,"\\$&").replace(/hasOwnProperty|(function).*?(?=\\\()| for .+?(?=\\\])/g,"$1.*?")+"$"),Ie=ht?t.Buffer:a,$e=t.Symbol,Be=t.Uint8Array,We=Ie?Ie.allocUnsafe:a,Ve=on(Se.getPrototypeOf,Se),He=Se.create,qe=Pe.propertyIsEnumerable,Qe=Me.splice,Ye=$e?$e.isConcatSpreadable:a,Ge=$e?$e.iterator:a,Xe=$e?$e.toStringTag:a,et=function(){try{var e=li(Se,"defineProperty");return e({},"",{}),e}catch(e){}}(),ot=t.clearTimeout!==ft.clearTimeout&&t.clearTimeout,st=ae&&ae.now!==ft.Date.now&&ae.now,ct=t.setTimeout!==ft.setTimeout&&t.setTimeout,pt=ke.ceil,dt=ke.floor,vt=Se.getOwnPropertySymbols,gt=Ie?Ie.isBuffer:a,Dt=t.isFinite,Vt=Me.join,vn=on(Se.keys,Se),gn=ke.max,yn=ke.min,mn=ae.now,bn=t.parseInt,_n=ke.random,wn=Me.reverse,xn=li(t,"DataView"),kn=li(t,"Map"),Sn=li(t,"Promise"),En=li(t,"Set"),Cn=li(t,"WeakMap"),Tn=li(Se,"create"),Mn=Cn&&new Cn,Nn={},Pn=Di(xn),zn=Di(kn),Ln=Di(Sn),On=Di(En),An=Di(Cn),Fn=$e?$e.prototype:a,Dn=Fn?Fn.valueOf:a,Rn=Fn?Fn.toString:a;function jn(e){if(eu(e)&&!Wo(e)&&!(e instanceof Bn)){if(e instanceof $n)return e;if(Oe.call(e,"__wrapped__"))return Ri(e)}return new $n(e)}var Un=function(){function e(){}return function(t){if(!Jo(t))return{};if(He)return He(t);e.prototype=t;var n=new e;return e.prototype=a,n}}();function In(){}function $n(e,t){this.__wrapped__=e,this.__actions__=[],this.__chain__=!!t,this.__index__=0,this.__values__=a}function Bn(e){this.__wrapped__=e,this.__actions__=[],this.__dir__=1,this.__filtered__=!1,this.__iteratees__=[],this.__takeCount__=d,this.__views__=[]}function Wn(e){var t=-1,n=null==e?0:e.length;for(this.clear();++t<n;){var r=e[t];this.set(r[0],r[1])}}function Vn(e){var t=-1,n=null==e?0:e.length;for(this.clear();++t<n;){var r=e[t];this.set(r[0],r[1])}}function Hn(e){var t=-1,n=null==e?0:e.length;for(this.clear();++t<n;){var r=e[t];this.set(r[0],r[1])}}function qn(e){var t=-1,n=null==e?0:e.length;for(this.__data__=new Hn;++t<n;)this.add(e[t])}function Qn(e){var t=this.__data__=new Vn(e);this.size=t.size}function Yn(e,t){var n=Wo(e),r=!n&&Bo(e),a=!n&&!r&&Qo(e),i=!n&&!r&&!a&&lu(e),o=n||r||a||i,u=o?Qt(e.length,Ce):[],l=u.length;for(var s in e)!t&&!Oe.call(e,s)||o&&("length"==s||a&&("offset"==s||"parent"==s)||i&&("buffer"==s||"byteLength"==s||"byteOffset"==s)||vi(s,l))||u.push(s);return u}function Gn(e){var t=e.length;return t?e[Hr(0,t-1)]:a}function Kn(e,t){return zi(Ca(e),ir(t,0,e.length))}function Zn(e){return zi(Ca(e))}function Xn(e,t,n){(n!==a&&!Uo(e[t],n)||n===a&&!(t in e))&&rr(e,t,n)}function Jn(e,t,n){var r=e[t];Oe.call(e,t)&&Uo(r,n)&&(n!==a||t in e)||rr(e,t,n)}function er(e,t){for(var n=e.length;n--;)if(Uo(e[n][0],t))return n;return-1}function tr(e,t,n,r){return cr(e,(function(e,a,i){t(r,e,n(e),i)})),r}function nr(e,t){return e&&Ta(t,Pu(t),e)}function rr(e,t,n){"__proto__"==t&&et?et(e,t,{configurable:!0,enumerable:!0,value:n,writable:!0}):e[t]=n}function ar(e,t){for(var n=-1,i=t.length,o=r(i),u=null==e;++n<i;)o[n]=u?a:Eu(e,t[n]);return o}function ir(e,t,n){return e==e&&(n!==a&&(e=e<=n?e:n),t!==a&&(e=e>=t?e:t)),e}function or(e,t,n,r,i,o){var u,l=1&t,s=2&t,c=4&t;if(n&&(u=i?n(e,r,i,o):n(e)),u!==a)return u;if(!Jo(e))return e;var f=Wo(e);if(f){if(u=function(e){var t=e.length,n=new e.constructor(t);return t&&"string"==typeof e[0]&&Oe.call(e,"index")&&(n.index=e.index,n.input=e.input),n}(e),!l)return Ca(e,u)}else{var p=fi(e),d=p==_||p==w;if(Qo(e))return _a(e,l);if(p==S||p==v||d&&!i){if(u=s||d?{}:di(e),!l)return s?function(e,t){return Ta(e,ci(e),t)}(e,function(e,t){return e&&Ta(t,zu(t),e)}(u,e)):function(e,t){return Ta(e,si(e),t)}(e,nr(u,e))}else{if(!it[p])return i?e:{};u=function(e,t,n){var r,a=e.constructor;switch(t){case z:return wa(e);case y:case m:return new a(+e);case L:return function(e,t){var n=t?wa(e.buffer):e.buffer;return new e.constructor(n,e.byteOffset,e.byteLength)}(e,n);case O:case A:case F:case D:case R:case j:case U:case I:case $:return xa(e,n);case x:return new a;case k:case M:return new a(e);case C:return function(e){var t=new e.constructor(e.source,pe.exec(e));return t.lastIndex=e.lastIndex,t}(e);case T:return new a;case N:return r=e,Dn?Se(Dn.call(r)):{}}}(e,p,l)}}o||(o=new Qn);var h=o.get(e);if(h)return h;o.set(e,u),iu(e)?e.forEach((function(r){u.add(or(r,t,n,r,e,o))})):tu(e)&&e.forEach((function(r,a){u.set(a,or(r,t,n,a,e,o))}));var g=f?a:(c?s?ti:ei:s?zu:Pu)(e);return Et(g||e,(function(r,a){g&&(r=e[a=r]),Jn(u,a,or(r,t,n,a,e,o))})),u}function ur(e,t,n){var r=n.length;if(null==e)return!r;for(e=Se(e);r--;){var i=n[r],o=t[i],u=e[i];if(u===a&&!(i in e)||!o(u))return!1}return!0}function lr(e,t,n){if("function"!=typeof e)throw new Te(i);return Ti((function(){e.apply(a,n)}),t)}function sr(e,t,n,r){var a=-1,i=Nt,o=!0,u=e.length,l=[],s=t.length;if(!u)return l;n&&(t=zt(t,Gt(n))),r?(i=Pt,o=!1):t.length>=200&&(i=Zt,o=!1,t=new qn(t));e:for(;++a<u;){var c=e[a],f=null==n?c:n(c);if(c=r||0!==c?c:0,o&&f==f){for(var p=s;p--;)if(t[p]===f)continue e;l.push(c)}else i(t,f,r)||l.push(c)}return l}jn.templateSettings={escape:G,evaluate:K,interpolate:Z,variable:"",imports:{_:jn}},jn.prototype=In.prototype,jn.prototype.constructor=jn,$n.prototype=Un(In.prototype),$n.prototype.constructor=$n,Bn.prototype=Un(In.prototype),Bn.prototype.constructor=Bn,Wn.prototype.clear=function(){this.__data__=Tn?Tn(null):{},this.size=0},Wn.prototype.delete=function(e){var t=this.has(e)&&delete this.__data__[e];return this.size-=t?1:0,t},Wn.prototype.get=function(e){var t=this.__data__;if(Tn){var n=t[e];return n===o?a:n}return Oe.call(t,e)?t[e]:a},Wn.prototype.has=function(e){var t=this.__data__;return Tn?t[e]!==a:Oe.call(t,e)},Wn.prototype.set=function(e,t){var n=this.__data__;return this.size+=this.has(e)?0:1,n[e]=Tn&&t===a?o:t,this},Vn.prototype.clear=function(){this.__data__=[],this.size=0},Vn.prototype.delete=function(e){var t=this.__data__,n=er(t,e);return!(n<0||(n==t.length-1?t.pop():Qe.call(t,n,1),--this.size,0))},Vn.prototype.get=function(e){var t=this.__data__,n=er(t,e);return n<0?a:t[n][1]},Vn.prototype.has=function(e){return er(this.__data__,e)>-1},Vn.prototype.set=function(e,t){var n=this.__data__,r=er(n,e);return r<0?(++this.size,n.push([e,t])):n[r][1]=t,this},Hn.prototype.clear=function(){this.size=0,this.__data__={hash:new Wn,map:new(kn||Vn),string:new Wn}},Hn.prototype.delete=function(e){var t=oi(this,e).delete(e);return this.size-=t?1:0,t},Hn.prototype.get=function(e){return oi(this,e).get(e)},Hn.prototype.has=function(e){return oi(this,e).has(e)},Hn.prototype.set=function(e,t){var n=oi(this,e),r=n.size;return n.set(e,t),this.size+=n.size==r?0:1,this},qn.prototype.add=qn.prototype.push=function(e){return this.__data__.set(e,o),this},qn.prototype.has=function(e){return this.__data__.has(e)},Qn.prototype.clear=function(){this.__data__=new Vn,this.size=0},Qn.prototype.delete=function(e){var t=this.__data__,n=t.delete(e);return this.size=t.size,n},Qn.prototype.get=function(e){return this.__data__.get(e)},Qn.prototype.has=function(e){return this.__data__.has(e)},Qn.prototype.set=function(e,t){var n=this.__data__;if(n instanceof Vn){var r=n.__data__;if(!kn||r.length<199)return r.push([e,t]),this.size=++n.size,this;n=this.__data__=new Hn(r)}return n.set(e,t),this.size=n.size,this};var cr=Pa(mr),fr=Pa(br,!0);function pr(e,t){var n=!0;return cr(e,(function(e,r,a){return n=!!t(e,r,a)})),n}function dr(e,t,n){for(var r=-1,i=e.length;++r<i;){var o=e[r],u=t(o);if(null!=u&&(l===a?u==u&&!uu(u):n(u,l)))var l=u,s=o}return s}function hr(e,t){var n=[];return cr(e,(function(e,r,a){t(e,r,a)&&n.push(e)})),n}function vr(e,t,n,r,a){var i=-1,o=e.length;for(n||(n=hi),a||(a=[]);++i<o;){var u=e[i];t>0&&n(u)?t>1?vr(u,t-1,n,r,a):Lt(a,u):r||(a[a.length]=u)}return a}var gr=za(),yr=za(!0);function mr(e,t){return e&&gr(e,t,Pu)}function br(e,t){return e&&yr(e,t,Pu)}function _r(e,t){return Mt(t,(function(t){return Ko(e[t])}))}function wr(e,t){for(var n=0,r=(t=ga(t,e)).length;null!=e&&n<r;)e=e[Fi(t[n++])];return n&&n==r?e:a}function xr(e,t,n){var r=t(e);return Wo(e)?r:Lt(r,n(e))}function kr(e){return null==e?e===a?"[object Undefined]":"[object Null]":Xe&&Xe in Se(e)?function(e){var t=Oe.call(e,Xe),n=e[Xe];try{e[Xe]=a;var r=!0}catch(e){}var i=De.call(e);return r&&(t?e[Xe]=n:delete e[Xe]),i}(e):function(e){return De.call(e)}(e)}function Sr(e,t){return e>t}function Er(e,t){return null!=e&&Oe.call(e,t)}function Cr(e,t){return null!=e&&t in Se(e)}function Tr(e,t,n){for(var i=n?Pt:Nt,o=e[0].length,u=e.length,l=u,s=r(u),c=1/0,f=[];l--;){var p=e[l];l&&t&&(p=zt(p,Gt(t))),c=yn(p.length,c),s[l]=!n&&(t||o>=120&&p.length>=120)?new qn(l&&p):a}p=e[0];var d=-1,h=s[0];e:for(;++d<o&&f.length<c;){var v=p[d],g=t?t(v):v;if(v=n||0!==v?v:0,!(h?Zt(h,g):i(f,g,n))){for(l=u;--l;){var y=s[l];if(!(y?Zt(y,g):i(e[l],g,n)))continue e}h&&h.push(g),f.push(v)}}return f}function Mr(e,t,n){var r=null==(e=Si(e,t=ga(t,e)))?e:e[Fi(Yi(t))];return null==r?a:kt(r,e,n)}function Nr(e){return eu(e)&&kr(e)==v}function Pr(e,t,n,r,i){return e===t||(null==e||null==t||!eu(e)&&!eu(t)?e!=e&&t!=t:function(e,t,n,r,i,o){var u=Wo(e),l=Wo(t),s=u?g:fi(e),c=l?g:fi(t),f=(s=s==v?S:s)==S,p=(c=c==v?S:c)==S,d=s==c;if(d&&Qo(e)){if(!Qo(t))return!1;u=!0,f=!1}if(d&&!f)return o||(o=new Qn),u||lu(e)?Xa(e,t,n,r,i,o):function(e,t,n,r,a,i,o){switch(n){case L:if(e.byteLength!=t.byteLength||e.byteOffset!=t.byteOffset)return!1;e=e.buffer,t=t.buffer;case z:return!(e.byteLength!=t.byteLength||!i(new Be(e),new Be(t)));case y:case m:case k:return Uo(+e,+t);case b:return e.name==t.name&&e.message==t.message;case C:case M:return e==t+"";case x:var u=an;case T:var l=1&r;if(u||(u=ln),e.size!=t.size&&!l)return!1;var s=o.get(e);if(s)return s==t;r|=2,o.set(e,t);var c=Xa(u(e),u(t),r,a,i,o);return o.delete(e),c;case N:if(Dn)return Dn.call(e)==Dn.call(t)}return!1}(e,t,s,n,r,i,o);if(!(1&n)){var h=f&&Oe.call(e,"__wrapped__"),_=p&&Oe.call(t,"__wrapped__");if(h||_){var w=h?e.value():e,E=_?t.value():t;return o||(o=new Qn),i(w,E,n,r,o)}}return!!d&&(o||(o=new Qn),function(e,t,n,r,i,o){var u=1&n,l=ei(e),s=l.length;if(s!=ei(t).length&&!u)return!1;for(var c=s;c--;){var f=l[c];if(!(u?f in t:Oe.call(t,f)))return!1}var p=o.get(e),d=o.get(t);if(p&&d)return p==t&&d==e;var h=!0;o.set(e,t),o.set(t,e);for(var v=u;++c<s;){var g=e[f=l[c]],y=t[f];if(r)var m=u?r(y,g,f,t,e,o):r(g,y,f,e,t,o);if(!(m===a?g===y||i(g,y,n,r,o):m)){h=!1;break}v||(v="constructor"==f)}if(h&&!v){var b=e.constructor,_=t.constructor;b==_||!("constructor"in e)||!("constructor"in t)||"function"==typeof b&&b instanceof b&&"function"==typeof _&&_ instanceof _||(h=!1)}return o.delete(e),o.delete(t),h}(e,t,n,r,i,o))}(e,t,n,r,Pr,i))}function zr(e,t,n,r){var i=n.length,o=i,u=!r;if(null==e)return!o;for(e=Se(e);i--;){var l=n[i];if(u&&l[2]?l[1]!==e[l[0]]:!(l[0]in e))return!1}for(;++i<o;){var s=(l=n[i])[0],c=e[s],f=l[1];if(u&&l[2]){if(c===a&&!(s in e))return!1}else{var p=new Qn;if(r)var d=r(c,f,s,e,t,p);if(!(d===a?Pr(f,c,3,r,p):d))return!1}}return!0}function Lr(e){return!(!Jo(e)||(t=e,Fe&&Fe in t))&&(Ko(e)?Ue:ve).test(Di(e));var t}function Or(e){return"function"==typeof e?e:null==e?nl:"object"==typeof e?Wo(e)?jr(e[0],e[1]):Rr(e):fl(e)}function Ar(e){if(!_i(e))return vn(e);var t=[];for(var n in Se(e))Oe.call(e,n)&&"constructor"!=n&&t.push(n);return t}function Fr(e,t){return e<t}function Dr(e,t){var n=-1,a=Ho(e)?r(e.length):[];return cr(e,(function(e,r,i){a[++n]=t(e,r,i)})),a}function Rr(e){var t=ui(e);return 1==t.length&&t[0][2]?xi(t[0][0],t[0][1]):function(n){return n===e||zr(n,e,t)}}function jr(e,t){return yi(e)&&wi(t)?xi(Fi(e),t):function(n){var r=Eu(n,e);return r===a&&r===t?Cu(n,e):Pr(t,r,3)}}function Ur(e,t,n,r,i){e!==t&&gr(t,(function(o,u){if(i||(i=new Qn),Jo(o))!function(e,t,n,r,i,o,u){var l=Ei(e,n),s=Ei(t,n),c=u.get(s);if(c)Xn(e,n,c);else{var f=o?o(l,s,n+"",e,t,u):a,p=f===a;if(p){var d=Wo(s),h=!d&&Qo(s),v=!d&&!h&&lu(s);f=s,d||h||v?Wo(l)?f=l:qo(l)?f=Ca(l):h?(p=!1,f=_a(s,!0)):v?(p=!1,f=xa(s,!0)):f=[]:ru(s)||Bo(s)?(f=l,Bo(l)?f=gu(l):Jo(l)&&!Ko(l)||(f=di(s))):p=!1}p&&(u.set(s,f),i(f,s,r,o,u),u.delete(s)),Xn(e,n,f)}}(e,t,u,n,Ur,r,i);else{var l=r?r(Ei(e,u),o,u+"",e,t,i):a;l===a&&(l=o),Xn(e,u,l)}}),zu)}function Ir(e,t){var n=e.length;if(n)return vi(t+=t<0?n:0,n)?e[t]:a}function $r(e,t,n){t=t.length?zt(t,(function(e){return Wo(e)?function(t){return wr(t,1===e.length?e[0]:e)}:e})):[nl];var r=-1;t=zt(t,Gt(ii()));var a=Dr(e,(function(e,n,a){var i=zt(t,(function(t){return t(e)}));return{criteria:i,index:++r,value:e}}));return function(e,t){var r=e.length;for(e.sort((function(e,t){return function(e,t,n){for(var r=-1,a=e.criteria,i=t.criteria,o=a.length,u=n.length;++r<o;){var l=ka(a[r],i[r]);if(l)return r>=u?l:l*("desc"==n[r]?-1:1)}return e.index-t.index}(e,t,n)}));r--;)e[r]=e[r].value;return e}(a)}function Br(e,t,n){for(var r=-1,a=t.length,i={};++r<a;){var o=t[r],u=wr(e,o);n(u,o)&&Kr(i,ga(o,e),u)}return i}function Wr(e,t,n,r){var a=r?It:Ut,i=-1,o=t.length,u=e;for(e===t&&(t=Ca(t)),n&&(u=zt(e,Gt(n)));++i<o;)for(var l=0,s=t[i],c=n?n(s):s;(l=a(u,c,l,r))>-1;)u!==e&&Qe.call(u,l,1),Qe.call(e,l,1);return e}function Vr(e,t){for(var n=e?t.length:0,r=n-1;n--;){var a=t[n];if(n==r||a!==i){var i=a;vi(a)?Qe.call(e,a,1):la(e,a)}}return e}function Hr(e,t){return e+dt(_n()*(t-e+1))}function qr(e,t){var n="";if(!e||t<1||t>f)return n;do{t%2&&(n+=e),(t=dt(t/2))&&(e+=e)}while(t);return n}function Qr(e,t){return Mi(ki(e,t,nl),e+"")}function Yr(e){return Gn(Uu(e))}function Gr(e,t){var n=Uu(e);return zi(n,ir(t,0,n.length))}function Kr(e,t,n,r){if(!Jo(e))return e;for(var i=-1,o=(t=ga(t,e)).length,u=o-1,l=e;null!=l&&++i<o;){var s=Fi(t[i]),c=n;if("__proto__"===s||"constructor"===s||"prototype"===s)return e;if(i!=u){var f=l[s];(c=r?r(f,s,l):a)===a&&(c=Jo(f)?f:vi(t[i+1])?[]:{})}Jn(l,s,c),l=l[s]}return e}var Zr=Mn?function(e,t){return Mn.set(e,t),e}:nl,Xr=et?function(e,t){return et(e,"toString",{configurable:!0,enumerable:!1,value:Ju(t),writable:!0})}:nl;function Jr(e){return zi(Uu(e))}function ea(e,t,n){var a=-1,i=e.length;t<0&&(t=-t>i?0:i+t),(n=n>i?i:n)<0&&(n+=i),i=t>n?0:n-t>>>0,t>>>=0;for(var o=r(i);++a<i;)o[a]=e[a+t];return o}function ta(e,t){var n;return cr(e,(function(e,r,a){return!(n=t(e,r,a))})),!!n}function na(e,t,n){var r=0,a=null==e?r:e.length;if("number"==typeof t&&t==t&&a<=2147483647){for(;r<a;){var i=r+a>>>1,o=e[i];null!==o&&!uu(o)&&(n?o<=t:o<t)?r=i+1:a=i}return a}return ra(e,t,nl,n)}function ra(e,t,n,r){var i=0,o=null==e?0:e.length;if(0===o)return 0;for(var u=(t=n(t))!=t,l=null===t,s=uu(t),c=t===a;i<o;){var f=dt((i+o)/2),p=n(e[f]),d=p!==a,h=null===p,v=p==p,g=uu(p);if(u)var y=r||v;else y=c?v&&(r||d):l?v&&d&&(r||!h):s?v&&d&&!h&&(r||!g):!h&&!g&&(r?p<=t:p<t);y?i=f+1:o=f}return yn(o,4294967294)}function aa(e,t){for(var n=-1,r=e.length,a=0,i=[];++n<r;){var o=e[n],u=t?t(o):o;if(!n||!Uo(u,l)){var l=u;i[a++]=0===o?0:o}}return i}function ia(e){return"number"==typeof e?e:uu(e)?p:+e}function oa(e){if("string"==typeof e)return e;if(Wo(e))return zt(e,oa)+"";if(uu(e))return Rn?Rn.call(e):"";var t=e+"";return"0"==t&&1/e==-1/0?"-0":t}function ua(e,t,n){var r=-1,a=Nt,i=e.length,o=!0,u=[],l=u;if(n)o=!1,a=Pt;else if(i>=200){var s=t?null:qa(e);if(s)return ln(s);o=!1,a=Zt,l=new qn}else l=t?[]:u;e:for(;++r<i;){var c=e[r],f=t?t(c):c;if(c=n||0!==c?c:0,o&&f==f){for(var p=l.length;p--;)if(l[p]===f)continue e;t&&l.push(f),u.push(c)}else a(l,f,n)||(l!==u&&l.push(f),u.push(c))}return u}function la(e,t){return null==(e=Si(e,t=ga(t,e)))||delete e[Fi(Yi(t))]}function sa(e,t,n,r){return Kr(e,t,n(wr(e,t)),r)}function ca(e,t,n,r){for(var a=e.length,i=r?a:-1;(r?i--:++i<a)&&t(e[i],i,e););return n?ea(e,r?0:i,r?i+1:a):ea(e,r?i+1:0,r?a:i)}function fa(e,t){var n=e;return n instanceof Bn&&(n=n.value()),Ot(t,(function(e,t){return t.func.apply(t.thisArg,Lt([e],t.args))}),n)}function pa(e,t,n){var a=e.length;if(a<2)return a?ua(e[0]):[];for(var i=-1,o=r(a);++i<a;)for(var u=e[i],l=-1;++l<a;)l!=i&&(o[i]=sr(o[i]||u,e[l],t,n));return ua(vr(o,1),t,n)}function da(e,t,n){for(var r=-1,i=e.length,o=t.length,u={};++r<i;){var l=r<o?t[r]:a;n(u,e[r],l)}return u}function ha(e){return qo(e)?e:[]}function va(e){return"function"==typeof e?e:nl}function ga(e,t){return Wo(e)?e:yi(e,t)?[e]:Ai(yu(e))}var ya=Qr;function ma(e,t,n){var r=e.length;return n=n===a?r:n,!t&&n>=r?e:ea(e,t,n)}var ba=ot||function(e){return ft.clearTimeout(e)};function _a(e,t){if(t)return e.slice();var n=e.length,r=We?We(n):new e.constructor(n);return e.copy(r),r}function wa(e){var t=new e.constructor(e.byteLength);return new Be(t).set(new Be(e)),t}function xa(e,t){var n=t?wa(e.buffer):e.buffer;return new e.constructor(n,e.byteOffset,e.length)}function ka(e,t){if(e!==t){var n=e!==a,r=null===e,i=e==e,o=uu(e),u=t!==a,l=null===t,s=t==t,c=uu(t);if(!l&&!c&&!o&&e>t||o&&u&&s&&!l&&!c||r&&u&&s||!n&&s||!i)return 1;if(!r&&!o&&!c&&e<t||c&&n&&i&&!r&&!o||l&&n&&i||!u&&i||!s)return-1}return 0}function Sa(e,t,n,a){for(var i=-1,o=e.length,u=n.length,l=-1,s=t.length,c=gn(o-u,0),f=r(s+c),p=!a;++l<s;)f[l]=t[l];for(;++i<u;)(p||i<o)&&(f[n[i]]=e[i]);for(;c--;)f[l++]=e[i++];return f}function Ea(e,t,n,a){for(var i=-1,o=e.length,u=-1,l=n.length,s=-1,c=t.length,f=gn(o-l,0),p=r(f+c),d=!a;++i<f;)p[i]=e[i];for(var h=i;++s<c;)p[h+s]=t[s];for(;++u<l;)(d||i<o)&&(p[h+n[u]]=e[i++]);return p}function Ca(e,t){var n=-1,a=e.length;for(t||(t=r(a));++n<a;)t[n]=e[n];return t}function Ta(e,t,n,r){var i=!n;n||(n={});for(var o=-1,u=t.length;++o<u;){var l=t[o],s=r?r(n[l],e[l],l,n,e):a;s===a&&(s=e[l]),i?rr(n,l,s):Jn(n,l,s)}return n}function Ma(e,t){return function(n,r){var a=Wo(n)?St:tr,i=t?t():{};return a(n,e,ii(r,2),i)}}function Na(e){return Qr((function(t,n){var r=-1,i=n.length,o=i>1?n[i-1]:a,u=i>2?n[2]:a;for(o=e.length>3&&"function"==typeof o?(i--,o):a,u&&gi(n[0],n[1],u)&&(o=i<3?a:o,i=1),t=Se(t);++r<i;){var l=n[r];l&&e(t,l,r,o)}return t}))}function Pa(e,t){return function(n,r){if(null==n)return n;if(!Ho(n))return e(n,r);for(var a=n.length,i=t?a:-1,o=Se(n);(t?i--:++i<a)&&!1!==r(o[i],i,o););return n}}function za(e){return function(t,n,r){for(var a=-1,i=Se(t),o=r(t),u=o.length;u--;){var l=o[e?u:++a];if(!1===n(i[l],l,i))break}return t}}function La(e){return function(t){var n=rn(t=yu(t))?fn(t):a,r=n?n[0]:t.charAt(0),i=n?ma(n,1).join(""):t.slice(1);return r[e]()+i}}function Oa(e){return function(t){return Ot(Ku(Bu(t).replace(Ke,"")),e,"")}}function Aa(e){return function(){var t=arguments;switch(t.length){case 0:return new e;case 1:return new e(t[0]);case 2:return new e(t[0],t[1]);case 3:return new e(t[0],t[1],t[2]);case 4:return new e(t[0],t[1],t[2],t[3]);case 5:return new e(t[0],t[1],t[2],t[3],t[4]);case 6:return new e(t[0],t[1],t[2],t[3],t[4],t[5]);case 7:return new e(t[0],t[1],t[2],t[3],t[4],t[5],t[6])}var n=Un(e.prototype),r=e.apply(n,t);return Jo(r)?r:n}}function Fa(e){return function(t,n,r){var i=Se(t);if(!Ho(t)){var o=ii(n,3);t=Pu(t),n=function(e){return o(i[e],e,i)}}var u=e(t,n,r);return u>-1?i[o?t[u]:u]:a}}function Da(e){return Ja((function(t){var n=t.length,r=n,o=$n.prototype.thru;for(e&&t.reverse();r--;){var u=t[r];if("function"!=typeof u)throw new Te(i);if(o&&!l&&"wrapper"==ri(u))var l=new $n([],!0)}for(r=l?r:n;++r<n;){var s=ri(u=t[r]),c="wrapper"==s?ni(u):a;l=c&&mi(c[0])&&424==c[1]&&!c[4].length&&1==c[9]?l[ri(c[0])].apply(l,c[3]):1==u.length&&mi(u)?l[s]():l.thru(u)}return function(){var e=arguments,r=e[0];if(l&&1==e.length&&Wo(r))return l.plant(r).value();for(var a=0,i=n?t[a].apply(this,e):r;++a<n;)i=t[a].call(this,i);return i}}))}function Ra(e,t,n,i,o,u,l,c,f,p){var d=t&s,h=1&t,v=2&t,g=24&t,y=512&t,m=v?a:Aa(e);return function s(){for(var b=arguments.length,_=r(b),w=b;w--;)_[w]=arguments[w];if(g)var x=ai(s),k=function(e,t){for(var n=e.length,r=0;n--;)e[n]===t&&++r;return r}(_,x);if(i&&(_=Sa(_,i,o,g)),u&&(_=Ea(_,u,l,g)),b-=k,g&&b<p){var S=un(_,x);return Va(e,t,Ra,s.placeholder,n,_,S,c,f,p-b)}var E=h?n:this,C=v?E[e]:e;return b=_.length,c?_=function(e,t){for(var n=e.length,r=yn(t.length,n),i=Ca(e);r--;){var o=t[r];e[r]=vi(o,n)?i[o]:a}return e}(_,c):y&&b>1&&_.reverse(),d&&f<b&&(_.length=f),this&&this!==ft&&this instanceof s&&(C=m||Aa(C)),C.apply(E,_)}}function ja(e,t){return function(n,r){return function(e,t,n,r){return mr(e,(function(e,a,i){t(r,n(e),a,i)})),r}(n,e,t(r),{})}}function Ua(e,t){return function(n,r){var i;if(n===a&&r===a)return t;if(n!==a&&(i=n),r!==a){if(i===a)return r;"string"==typeof n||"string"==typeof r?(n=oa(n),r=oa(r)):(n=ia(n),r=ia(r)),i=e(n,r)}return i}}function Ia(e){return Ja((function(t){return t=zt(t,Gt(ii())),Qr((function(n){var r=this;return e(t,(function(e){return kt(e,r,n)}))}))}))}function $a(e,t){var n=(t=t===a?" ":oa(t)).length;if(n<2)return n?qr(t,e):t;var r=qr(t,pt(e/cn(t)));return rn(t)?ma(fn(r),0,e).join(""):r.slice(0,e)}function Ba(e){return function(t,n,i){return i&&"number"!=typeof i&&gi(t,n,i)&&(n=i=a),t=pu(t),n===a?(n=t,t=0):n=pu(n),function(e,t,n,a){for(var i=-1,o=gn(pt((t-e)/(n||1)),0),u=r(o);o--;)u[a?o:++i]=e,e+=n;return u}(t,n,i=i===a?t<n?1:-1:pu(i),e)}}function Wa(e){return function(t,n){return"string"==typeof t&&"string"==typeof n||(t=vu(t),n=vu(n)),e(t,n)}}function Va(e,t,n,r,i,o,u,s,c,f){var p=8&t;t|=p?l:64,4&(t&=~(p?64:l))||(t&=-4);var d=[e,t,i,p?o:a,p?u:a,p?a:o,p?a:u,s,c,f],h=n.apply(a,d);return mi(e)&&Ci(h,d),h.placeholder=r,Ni(h,e,t)}function Ha(e){var t=ke[e];return function(e,n){if(e=vu(e),(n=null==n?0:yn(du(n),292))&&Dt(e)){var r=(yu(e)+"e").split("e");return+((r=(yu(t(r[0]+"e"+(+r[1]+n)))+"e").split("e"))[0]+"e"+(+r[1]-n))}return t(e)}}var qa=En&&1/ln(new En([,-0]))[1]==c?function(e){return new En(e)}:ul;function Qa(e){return function(t){var n=fi(t);return n==x?an(t):n==T?sn(t):function(e,t){return zt(t,(function(t){return[t,e[t]]}))}(t,e(t))}}function Ya(e,t,n,o,c,f,p,d){var h=2&t;if(!h&&"function"!=typeof e)throw new Te(i);var v=o?o.length:0;if(v||(t&=-97,o=c=a),p=p===a?p:gn(du(p),0),d=d===a?d:du(d),v-=c?c.length:0,64&t){var g=o,y=c;o=c=a}var m=h?a:ni(e),b=[e,t,n,o,c,g,y,f,p,d];if(m&&function(e,t){var n=e[1],r=t[1],a=n|r,i=a<131,o=r==s&&8==n||r==s&&256==n&&e[7].length<=t[8]||384==r&&t[7].length<=t[8]&&8==n;if(!i&&!o)return e;1&r&&(e[2]=t[2],a|=1&n?0:4);var l=t[3];if(l){var c=e[3];e[3]=c?Sa(c,l,t[4]):l,e[4]=c?un(e[3],u):t[4]}(l=t[5])&&(c=e[5],e[5]=c?Ea(c,l,t[6]):l,e[6]=c?un(e[5],u):t[6]),(l=t[7])&&(e[7]=l),r&s&&(e[8]=null==e[8]?t[8]:yn(e[8],t[8])),null==e[9]&&(e[9]=t[9]),e[0]=t[0],e[1]=a}(b,m),e=b[0],t=b[1],n=b[2],o=b[3],c=b[4],!(d=b[9]=b[9]===a?h?0:e.length:gn(b[9]-v,0))&&24&t&&(t&=-25),t&&1!=t)_=8==t||16==t?function(e,t,n){var i=Aa(e);return function o(){for(var u=arguments.length,l=r(u),s=u,c=ai(o);s--;)l[s]=arguments[s];var f=u<3&&l[0]!==c&&l[u-1]!==c?[]:un(l,c);return(u-=f.length)<n?Va(e,t,Ra,o.placeholder,a,l,f,a,a,n-u):kt(this&&this!==ft&&this instanceof o?i:e,this,l)}}(e,t,d):t!=l&&33!=t||c.length?Ra.apply(a,b):function(e,t,n,a){var i=1&t,o=Aa(e);return function t(){for(var u=-1,l=arguments.length,s=-1,c=a.length,f=r(c+l),p=this&&this!==ft&&this instanceof t?o:e;++s<c;)f[s]=a[s];for(;l--;)f[s++]=arguments[++u];return kt(p,i?n:this,f)}}(e,t,n,o);else var _=function(e,t,n){var r=1&t,a=Aa(e);return function t(){return(this&&this!==ft&&this instanceof t?a:e).apply(r?n:this,arguments)}}(e,t,n);return Ni((m?Zr:Ci)(_,b),e,t)}function Ga(e,t,n,r){return e===a||Uo(e,Pe[n])&&!Oe.call(r,n)?t:e}function Ka(e,t,n,r,i,o){return Jo(e)&&Jo(t)&&(o.set(t,e),Ur(e,t,a,Ka,o),o.delete(t)),e}function Za(e){return ru(e)?a:e}function Xa(e,t,n,r,i,o){var u=1&n,l=e.length,s=t.length;if(l!=s&&!(u&&s>l))return!1;var c=o.get(e),f=o.get(t);if(c&&f)return c==t&&f==e;var p=-1,d=!0,h=2&n?new qn:a;for(o.set(e,t),o.set(t,e);++p<l;){var v=e[p],g=t[p];if(r)var y=u?r(g,v,p,t,e,o):r(v,g,p,e,t,o);if(y!==a){if(y)continue;d=!1;break}if(h){if(!Ft(t,(function(e,t){if(!Zt(h,t)&&(v===e||i(v,e,n,r,o)))return h.push(t)}))){d=!1;break}}else if(v!==g&&!i(v,g,n,r,o)){d=!1;break}}return o.delete(e),o.delete(t),d}function Ja(e){return Mi(ki(e,a,Wi),e+"")}function ei(e){return xr(e,Pu,si)}function ti(e){return xr(e,zu,ci)}var ni=Mn?function(e){return Mn.get(e)}:ul;function ri(e){for(var t=e.name+"",n=Nn[t],r=Oe.call(Nn,t)?n.length:0;r--;){var a=n[r],i=a.func;if(null==i||i==e)return a.name}return t}function ai(e){return(Oe.call(jn,"placeholder")?jn:e).placeholder}function ii(){var e=jn.iteratee||rl;return e=e===rl?Or:e,arguments.length?e(arguments[0],arguments[1]):e}function oi(e,t){var n,r,a=e.__data__;return("string"==(r=typeof(n=t))||"number"==r||"symbol"==r||"boolean"==r?"__proto__"!==n:null===n)?a["string"==typeof t?"string":"hash"]:a.map}function ui(e){for(var t=Pu(e),n=t.length;n--;){var r=t[n],a=e[r];t[n]=[r,a,wi(a)]}return t}function li(e,t){var n=function(e,t){return null==e?a:e[t]}(e,t);return Lr(n)?n:a}var si=vt?function(e){return null==e?[]:(e=Se(e),Mt(vt(e),(function(t){return qe.call(e,t)})))}:hl,ci=vt?function(e){for(var t=[];e;)Lt(t,si(e)),e=Ve(e);return t}:hl,fi=kr;function pi(e,t,n){for(var r=-1,a=(t=ga(t,e)).length,i=!1;++r<a;){var o=Fi(t[r]);if(!(i=null!=e&&n(e,o)))break;e=e[o]}return i||++r!=a?i:!!(a=null==e?0:e.length)&&Xo(a)&&vi(o,a)&&(Wo(e)||Bo(e))}function di(e){return"function"!=typeof e.constructor||_i(e)?{}:Un(Ve(e))}function hi(e){return Wo(e)||Bo(e)||!!(Ye&&e&&e[Ye])}function vi(e,t){var n=typeof e;return!!(t=null==t?f:t)&&("number"==n||"symbol"!=n&&ye.test(e))&&e>-1&&e%1==0&&e<t}function gi(e,t,n){if(!Jo(n))return!1;var r=typeof t;return!!("number"==r?Ho(n)&&vi(t,n.length):"string"==r&&t in n)&&Uo(n[t],e)}function yi(e,t){if(Wo(e))return!1;var n=typeof e;return!("number"!=n&&"symbol"!=n&&"boolean"!=n&&null!=e&&!uu(e))||J.test(e)||!X.test(e)||null!=t&&e in Se(t)}function mi(e){var t=ri(e),n=jn[t];if("function"!=typeof n||!(t in Bn.prototype))return!1;if(e===n)return!0;var r=ni(n);return!!r&&e===r[0]}(xn&&fi(new xn(new ArrayBuffer(1)))!=L||kn&&fi(new kn)!=x||Sn&&fi(Sn.resolve())!=E||En&&fi(new En)!=T||Cn&&fi(new Cn)!=P)&&(fi=function(e){var t=kr(e),n=t==S?e.constructor:a,r=n?Di(n):"";if(r)switch(r){case Pn:return L;case zn:return x;case Ln:return E;case On:return T;case An:return P}return t});var bi=ze?Ko:vl;function _i(e){var t=e&&e.constructor;return e===("function"==typeof t&&t.prototype||Pe)}function wi(e){return e==e&&!Jo(e)}function xi(e,t){return function(n){return null!=n&&n[e]===t&&(t!==a||e in Se(n))}}function ki(e,t,n){return t=gn(t===a?e.length-1:t,0),function(){for(var a=arguments,i=-1,o=gn(a.length-t,0),u=r(o);++i<o;)u[i]=a[t+i];i=-1;for(var l=r(t+1);++i<t;)l[i]=a[i];return l[t]=n(u),kt(e,this,l)}}function Si(e,t){return t.length<2?e:wr(e,ea(t,0,-1))}function Ei(e,t){if(("constructor"!==t||"function"!=typeof e[t])&&"__proto__"!=t)return e[t]}var Ci=Pi(Zr),Ti=ct||function(e,t){return ft.setTimeout(e,t)},Mi=Pi(Xr);function Ni(e,t,n){var r=t+"";return Mi(e,function(e,t){var n=t.length;if(!n)return e;var r=n-1;return t[r]=(n>1?"& ":"")+t[r],t=t.join(n>2?", ":" "),e.replace(ie,"{\n/* [wrapped with "+t+"] */\n")}(r,function(e,t){return Et(h,(function(n){var r="_."+n[0];t&n[1]&&!Nt(e,r)&&e.push(r)})),e.sort()}(function(e){var t=e.match(oe);return t?t[1].split(ue):[]}(r),n)))}function Pi(e){var t=0,n=0;return function(){var r=mn(),i=16-(r-n);if(n=r,i>0){if(++t>=800)return arguments[0]}else t=0;return e.apply(a,arguments)}}function zi(e,t){var n=-1,r=e.length,i=r-1;for(t=t===a?r:t;++n<t;){var o=Hr(n,i),u=e[o];e[o]=e[n],e[n]=u}return e.length=t,e}var Li,Oi,Ai=(Li=Oo((function(e){var t=[];return 46===e.charCodeAt(0)&&t.push(""),e.replace(ee,(function(e,n,r,a){t.push(r?a.replace(ce,"$1"):n||e)})),t}),(function(e){return 500===Oi.size&&Oi.clear(),e})),Oi=Li.cache,Li);function Fi(e){if("string"==typeof e||uu(e))return e;var t=e+"";return"0"==t&&1/e==-1/0?"-0":t}function Di(e){if(null!=e){try{return Le.call(e)}catch(e){}try{return e+""}catch(e){}}return""}function Ri(e){if(e instanceof Bn)return e.clone();var t=new $n(e.__wrapped__,e.__chain__);return t.__actions__=Ca(e.__actions__),t.__index__=e.__index__,t.__values__=e.__values__,t}var ji=Qr((function(e,t){return qo(e)?sr(e,vr(t,1,qo,!0)):[]})),Ui=Qr((function(e,t){var n=Yi(t);return qo(n)&&(n=a),qo(e)?sr(e,vr(t,1,qo,!0),ii(n,2)):[]})),Ii=Qr((function(e,t){var n=Yi(t);return qo(n)&&(n=a),qo(e)?sr(e,vr(t,1,qo,!0),a,n):[]}));function $i(e,t,n){var r=null==e?0:e.length;if(!r)return-1;var a=null==n?0:du(n);return a<0&&(a=gn(r+a,0)),jt(e,ii(t,3),a)}function Bi(e,t,n){var r=null==e?0:e.length;if(!r)return-1;var i=r-1;return n!==a&&(i=du(n),i=n<0?gn(r+i,0):yn(i,r-1)),jt(e,ii(t,3),i,!0)}function Wi(e){return null!=e&&e.length?vr(e,1):[]}function Vi(e){return e&&e.length?e[0]:a}var Hi=Qr((function(e){var t=zt(e,ha);return t.length&&t[0]===e[0]?Tr(t):[]})),qi=Qr((function(e){var t=Yi(e),n=zt(e,ha);return t===Yi(n)?t=a:n.pop(),n.length&&n[0]===e[0]?Tr(n,ii(t,2)):[]})),Qi=Qr((function(e){var t=Yi(e),n=zt(e,ha);return(t="function"==typeof t?t:a)&&n.pop(),n.length&&n[0]===e[0]?Tr(n,a,t):[]}));function Yi(e){var t=null==e?0:e.length;return t?e[t-1]:a}var Gi=Qr(Ki);function Ki(e,t){return e&&e.length&&t&&t.length?Wr(e,t):e}var Zi=Ja((function(e,t){var n=null==e?0:e.length,r=ar(e,t);return Vr(e,zt(t,(function(e){return vi(e,n)?+e:e})).sort(ka)),r}));function Xi(e){return null==e?e:wn.call(e)}var Ji=Qr((function(e){return ua(vr(e,1,qo,!0))})),eo=Qr((function(e){var t=Yi(e);return qo(t)&&(t=a),ua(vr(e,1,qo,!0),ii(t,2))})),to=Qr((function(e){var t=Yi(e);return t="function"==typeof t?t:a,ua(vr(e,1,qo,!0),a,t)}));function no(e){if(!e||!e.length)return[];var t=0;return e=Mt(e,(function(e){if(qo(e))return t=gn(e.length,t),!0})),Qt(t,(function(t){return zt(e,Wt(t))}))}function ro(e,t){if(!e||!e.length)return[];var n=no(e);return null==t?n:zt(n,(function(e){return kt(t,a,e)}))}var ao=Qr((function(e,t){return qo(e)?sr(e,t):[]})),io=Qr((function(e){return pa(Mt(e,qo))})),oo=Qr((function(e){var t=Yi(e);return qo(t)&&(t=a),pa(Mt(e,qo),ii(t,2))})),uo=Qr((function(e){var t=Yi(e);return t="function"==typeof t?t:a,pa(Mt(e,qo),a,t)})),lo=Qr(no),so=Qr((function(e){var t=e.length,n=t>1?e[t-1]:a;return n="function"==typeof n?(e.pop(),n):a,ro(e,n)}));function co(e){var t=jn(e);return t.__chain__=!0,t}function fo(e,t){return t(e)}var po=Ja((function(e){var t=e.length,n=t?e[0]:0,r=this.__wrapped__,i=function(t){return ar(t,e)};return!(t>1||this.__actions__.length)&&r instanceof Bn&&vi(n)?((r=r.slice(n,+n+(t?1:0))).__actions__.push({func:fo,args:[i],thisArg:a}),new $n(r,this.__chain__).thru((function(e){return t&&!e.length&&e.push(a),e}))):this.thru(i)})),ho=Ma((function(e,t,n){Oe.call(e,n)?++e[n]:rr(e,n,1)})),vo=Fa($i),go=Fa(Bi);function yo(e,t){return(Wo(e)?Et:cr)(e,ii(t,3))}function mo(e,t){return(Wo(e)?Ct:fr)(e,ii(t,3))}var bo=Ma((function(e,t,n){Oe.call(e,n)?e[n].push(t):rr(e,n,[t])})),_o=Qr((function(e,t,n){var a=-1,i="function"==typeof t,o=Ho(e)?r(e.length):[];return cr(e,(function(e){o[++a]=i?kt(t,e,n):Mr(e,t,n)})),o})),wo=Ma((function(e,t,n){rr(e,n,t)}));function xo(e,t){return(Wo(e)?zt:Dr)(e,ii(t,3))}var ko=Ma((function(e,t,n){e[n?0:1].push(t)}),(function(){return[[],[]]})),So=Qr((function(e,t){if(null==e)return[];var n=t.length;return n>1&&gi(e,t[0],t[1])?t=[]:n>2&&gi(t[0],t[1],t[2])&&(t=[t[0]]),$r(e,vr(t,1),[])})),Eo=st||function(){return ft.Date.now()};function Co(e,t,n){return t=n?a:t,t=e&&null==t?e.length:t,Ya(e,s,a,a,a,a,t)}function To(e,t){var n;if("function"!=typeof t)throw new Te(i);return e=du(e),function(){return--e>0&&(n=t.apply(this,arguments)),e<=1&&(t=a),n}}var Mo=Qr((function(e,t,n){var r=1;if(n.length){var a=un(n,ai(Mo));r|=l}return Ya(e,r,t,n,a)})),No=Qr((function(e,t,n){var r=3;if(n.length){var a=un(n,ai(No));r|=l}return Ya(t,r,e,n,a)}));function Po(e,t,n){var r,o,u,l,s,c,f=0,p=!1,d=!1,h=!0;if("function"!=typeof e)throw new Te(i);function v(t){var n=r,i=o;return r=o=a,f=t,l=e.apply(i,n)}function g(e){var n=e-c;return c===a||n>=t||n<0||d&&e-f>=u}function y(){var e=Eo();if(g(e))return m(e);s=Ti(y,function(e){var n=t-(e-c);return d?yn(n,u-(e-f)):n}(e))}function m(e){return s=a,h&&r?v(e):(r=o=a,l)}function b(){var e=Eo(),n=g(e);if(r=arguments,o=this,c=e,n){if(s===a)return function(e){return f=e,s=Ti(y,t),p?v(e):l}(c);if(d)return ba(s),s=Ti(y,t),v(c)}return s===a&&(s=Ti(y,t)),l}return t=vu(t)||0,Jo(n)&&(p=!!n.leading,u=(d="maxWait"in n)?gn(vu(n.maxWait)||0,t):u,h="trailing"in n?!!n.trailing:h),b.cancel=function(){s!==a&&ba(s),f=0,r=c=o=s=a},b.flush=function(){return s===a?l:m(Eo())},b}var zo=Qr((function(e,t){return lr(e,1,t)})),Lo=Qr((function(e,t,n){return lr(e,vu(t)||0,n)}));function Oo(e,t){if("function"!=typeof e||null!=t&&"function"!=typeof t)throw new Te(i);var n=function(){var r=arguments,a=t?t.apply(this,r):r[0],i=n.cache;if(i.has(a))return i.get(a);var o=e.apply(this,r);return n.cache=i.set(a,o)||i,o};return n.cache=new(Oo.Cache||Hn),n}function Ao(e){if("function"!=typeof e)throw new Te(i);return function(){var t=arguments;switch(t.length){case 0:return!e.call(this);case 1:return!e.call(this,t[0]);case 2:return!e.call(this,t[0],t[1]);case 3:return!e.call(this,t[0],t[1],t[2])}return!e.apply(this,t)}}Oo.Cache=Hn;var Fo=ya((function(e,t){var n=(t=1==t.length&&Wo(t[0])?zt(t[0],Gt(ii())):zt(vr(t,1),Gt(ii()))).length;return Qr((function(r){for(var a=-1,i=yn(r.length,n);++a<i;)r[a]=t[a].call(this,r[a]);return kt(e,this,r)}))})),Do=Qr((function(e,t){var n=un(t,ai(Do));return Ya(e,l,a,t,n)})),Ro=Qr((function(e,t){var n=un(t,ai(Ro));return Ya(e,64,a,t,n)})),jo=Ja((function(e,t){return Ya(e,256,a,a,a,t)}));function Uo(e,t){return e===t||e!=e&&t!=t}var Io=Wa(Sr),$o=Wa((function(e,t){return e>=t})),Bo=Nr(function(){return arguments}())?Nr:function(e){return eu(e)&&Oe.call(e,"callee")&&!qe.call(e,"callee")},Wo=r.isArray,Vo=yt?Gt(yt):function(e){return eu(e)&&kr(e)==z};function Ho(e){return null!=e&&Xo(e.length)&&!Ko(e)}function qo(e){return eu(e)&&Ho(e)}var Qo=gt||vl,Yo=mt?Gt(mt):function(e){return eu(e)&&kr(e)==m};function Go(e){if(!eu(e))return!1;var t=kr(e);return t==b||"[object DOMException]"==t||"string"==typeof e.message&&"string"==typeof e.name&&!ru(e)}function Ko(e){if(!Jo(e))return!1;var t=kr(e);return t==_||t==w||"[object AsyncFunction]"==t||"[object Proxy]"==t}function Zo(e){return"number"==typeof e&&e==du(e)}function Xo(e){return"number"==typeof e&&e>-1&&e%1==0&&e<=f}function Jo(e){var t=typeof e;return null!=e&&("object"==t||"function"==t)}function eu(e){return null!=e&&"object"==typeof e}var tu=bt?Gt(bt):function(e){return eu(e)&&fi(e)==x};function nu(e){return"number"==typeof e||eu(e)&&kr(e)==k}function ru(e){if(!eu(e)||kr(e)!=S)return!1;var t=Ve(e);if(null===t)return!0;var n=Oe.call(t,"constructor")&&t.constructor;return"function"==typeof n&&n instanceof n&&Le.call(n)==Re}var au=_t?Gt(_t):function(e){return eu(e)&&kr(e)==C},iu=wt?Gt(wt):function(e){return eu(e)&&fi(e)==T};function ou(e){return"string"==typeof e||!Wo(e)&&eu(e)&&kr(e)==M}function uu(e){return"symbol"==typeof e||eu(e)&&kr(e)==N}var lu=xt?Gt(xt):function(e){return eu(e)&&Xo(e.length)&&!!at[kr(e)]},su=Wa(Fr),cu=Wa((function(e,t){return e<=t}));function fu(e){if(!e)return[];if(Ho(e))return ou(e)?fn(e):Ca(e);if(Ge&&e[Ge])return function(e){for(var t,n=[];!(t=e.next()).done;)n.push(t.value);return n}(e[Ge]());var t=fi(e);return(t==x?an:t==T?ln:Uu)(e)}function pu(e){return e?(e=vu(e))===c||e===-1/0?17976931348623157e292*(e<0?-1:1):e==e?e:0:0===e?e:0}function du(e){var t=pu(e),n=t%1;return t==t?n?t-n:t:0}function hu(e){return e?ir(du(e),0,d):0}function vu(e){if("number"==typeof e)return e;if(uu(e))return p;if(Jo(e)){var t="function"==typeof e.valueOf?e.valueOf():e;e=Jo(t)?t+"":t}if("string"!=typeof e)return 0===e?e:+e;e=Yt(e);var n=he.test(e);return n||ge.test(e)?lt(e.slice(2),n?2:8):de.test(e)?p:+e}function gu(e){return Ta(e,zu(e))}function yu(e){return null==e?"":oa(e)}var mu=Na((function(e,t){if(_i(t)||Ho(t))Ta(t,Pu(t),e);else for(var n in t)Oe.call(t,n)&&Jn(e,n,t[n])})),bu=Na((function(e,t){Ta(t,zu(t),e)})),_u=Na((function(e,t,n,r){Ta(t,zu(t),e,r)})),wu=Na((function(e,t,n,r){Ta(t,Pu(t),e,r)})),xu=Ja(ar),ku=Qr((function(e,t){e=Se(e);var n=-1,r=t.length,i=r>2?t[2]:a;for(i&&gi(t[0],t[1],i)&&(r=1);++n<r;)for(var o=t[n],u=zu(o),l=-1,s=u.length;++l<s;){var c=u[l],f=e[c];(f===a||Uo(f,Pe[c])&&!Oe.call(e,c))&&(e[c]=o[c])}return e})),Su=Qr((function(e){return e.push(a,Ka),kt(Ou,a,e)}));function Eu(e,t,n){var r=null==e?a:wr(e,t);return r===a?n:r}function Cu(e,t){return null!=e&&pi(e,t,Cr)}var Tu=ja((function(e,t,n){null!=t&&"function"!=typeof t.toString&&(t=De.call(t)),e[t]=n}),Ju(nl)),Mu=ja((function(e,t,n){null!=t&&"function"!=typeof t.toString&&(t=De.call(t)),Oe.call(e,t)?e[t].push(n):e[t]=[n]}),ii),Nu=Qr(Mr);function Pu(e){return Ho(e)?Yn(e):Ar(e)}function zu(e){return Ho(e)?Yn(e,!0):function(e){if(!Jo(e))return function(e){var t=[];if(null!=e)for(var n in Se(e))t.push(n);return t}(e);var t=_i(e),n=[];for(var r in e)("constructor"!=r||!t&&Oe.call(e,r))&&n.push(r);return n}(e)}var Lu=Na((function(e,t,n){Ur(e,t,n)})),Ou=Na((function(e,t,n,r){Ur(e,t,n,r)})),Au=Ja((function(e,t){var n={};if(null==e)return n;var r=!1;t=zt(t,(function(t){return t=ga(t,e),r||(r=t.length>1),t})),Ta(e,ti(e),n),r&&(n=or(n,7,Za));for(var a=t.length;a--;)la(n,t[a]);return n})),Fu=Ja((function(e,t){return null==e?{}:function(e,t){return Br(e,t,(function(t,n){return Cu(e,n)}))}(e,t)}));function Du(e,t){if(null==e)return{};var n=zt(ti(e),(function(e){return[e]}));return t=ii(t),Br(e,n,(function(e,n){return t(e,n[0])}))}var Ru=Qa(Pu),ju=Qa(zu);function Uu(e){return null==e?[]:Kt(e,Pu(e))}var Iu=Oa((function(e,t,n){return t=t.toLowerCase(),e+(n?$u(t):t)}));function $u(e){return Gu(yu(e).toLowerCase())}function Bu(e){return(e=yu(e))&&e.replace(me,en).replace(Ze,"")}var Wu=Oa((function(e,t,n){return e+(n?"-":"")+t.toLowerCase()})),Vu=Oa((function(e,t,n){return e+(n?" ":"")+t.toLowerCase()})),Hu=La("toLowerCase"),qu=Oa((function(e,t,n){return e+(n?"_":"")+t.toLowerCase()})),Qu=Oa((function(e,t,n){return e+(n?" ":"")+Gu(t)})),Yu=Oa((function(e,t,n){return e+(n?" ":"")+t.toUpperCase()})),Gu=La("toUpperCase");function Ku(e,t,n){return e=yu(e),(t=n?a:t)===a?function(e){return tt.test(e)}(e)?function(e){return e.match(Je)||[]}(e):function(e){return e.match(le)||[]}(e):e.match(t)||[]}var Zu=Qr((function(e,t){try{return kt(e,a,t)}catch(e){return Go(e)?e:new we(e)}})),Xu=Ja((function(e,t){return Et(t,(function(t){t=Fi(t),rr(e,t,Mo(e[t],e))})),e}));function Ju(e){return function(){return e}}var el=Da(),tl=Da(!0);function nl(e){return e}function rl(e){return Or("function"==typeof e?e:or(e,1))}var al=Qr((function(e,t){return function(n){return Mr(n,e,t)}})),il=Qr((function(e,t){return function(n){return Mr(e,n,t)}}));function ol(e,t,n){var r=Pu(t),a=_r(t,r);null!=n||Jo(t)&&(a.length||!r.length)||(n=t,t=e,e=this,a=_r(t,Pu(t)));var i=!(Jo(n)&&"chain"in n&&!n.chain),o=Ko(e);return Et(a,(function(n){var r=t[n];e[n]=r,o&&(e.prototype[n]=function(){var t=this.__chain__;if(i||t){var n=e(this.__wrapped__);return(n.__actions__=Ca(this.__actions__)).push({func:r,args:arguments,thisArg:e}),n.__chain__=t,n}return r.apply(e,Lt([this.value()],arguments))})})),e}function ul(){}var ll=Ia(zt),sl=Ia(Tt),cl=Ia(Ft);function fl(e){return yi(e)?Wt(Fi(e)):function(e){return function(t){return wr(t,e)}}(e)}var pl=Ba(),dl=Ba(!0);function hl(){return[]}function vl(){return!1}var gl,yl=Ua((function(e,t){return e+t}),0),ml=Ha("ceil"),bl=Ua((function(e,t){return e/t}),1),_l=Ha("floor"),wl=Ua((function(e,t){return e*t}),1),xl=Ha("round"),kl=Ua((function(e,t){return e-t}),0);return jn.after=function(e,t){if("function"!=typeof t)throw new Te(i);return e=du(e),function(){if(--e<1)return t.apply(this,arguments)}},jn.ary=Co,jn.assign=mu,jn.assignIn=bu,jn.assignInWith=_u,jn.assignWith=wu,jn.at=xu,jn.before=To,jn.bind=Mo,jn.bindAll=Xu,jn.bindKey=No,jn.castArray=function(){if(!arguments.length)return[];var e=arguments[0];return Wo(e)?e:[e]},jn.chain=co,jn.chunk=function(e,t,n){t=(n?gi(e,t,n):t===a)?1:gn(du(t),0);var i=null==e?0:e.length;if(!i||t<1)return[];for(var o=0,u=0,l=r(pt(i/t));o<i;)l[u++]=ea(e,o,o+=t);return l},jn.compact=function(e){for(var t=-1,n=null==e?0:e.length,r=0,a=[];++t<n;){var i=e[t];i&&(a[r++]=i)}return a},jn.concat=function(){var e=arguments.length;if(!e)return[];for(var t=r(e-1),n=arguments[0],a=e;a--;)t[a-1]=arguments[a];return Lt(Wo(n)?Ca(n):[n],vr(t,1))},jn.cond=function(e){var t=null==e?0:e.length,n=ii();return e=t?zt(e,(function(e){if("function"!=typeof e[1])throw new Te(i);return[n(e[0]),e[1]]})):[],Qr((function(n){for(var r=-1;++r<t;){var a=e[r];if(kt(a[0],this,n))return kt(a[1],this,n)}}))},jn.conforms=function(e){return function(e){var t=Pu(e);return function(n){return ur(n,e,t)}}(or(e,1))},jn.constant=Ju,jn.countBy=ho,jn.create=function(e,t){var n=Un(e);return null==t?n:nr(n,t)},jn.curry=function e(t,n,r){var i=Ya(t,8,a,a,a,a,a,n=r?a:n);return i.placeholder=e.placeholder,i},jn.curryRight=function e(t,n,r){var i=Ya(t,16,a,a,a,a,a,n=r?a:n);return i.placeholder=e.placeholder,i},jn.debounce=Po,jn.defaults=ku,jn.defaultsDeep=Su,jn.defer=zo,jn.delay=Lo,jn.difference=ji,jn.differenceBy=Ui,jn.differenceWith=Ii,jn.drop=function(e,t,n){var r=null==e?0:e.length;return r?ea(e,(t=n||t===a?1:du(t))<0?0:t,r):[]},jn.dropRight=function(e,t,n){var r=null==e?0:e.length;return r?ea(e,0,(t=r-(t=n||t===a?1:du(t)))<0?0:t):[]},jn.dropRightWhile=function(e,t){return e&&e.length?ca(e,ii(t,3),!0,!0):[]},jn.dropWhile=function(e,t){return e&&e.length?ca(e,ii(t,3),!0):[]},jn.fill=function(e,t,n,r){var i=null==e?0:e.length;return i?(n&&"number"!=typeof n&&gi(e,t,n)&&(n=0,r=i),function(e,t,n,r){var i=e.length;for((n=du(n))<0&&(n=-n>i?0:i+n),(r=r===a||r>i?i:du(r))<0&&(r+=i),r=n>r?0:hu(r);n<r;)e[n++]=t;return e}(e,t,n,r)):[]},jn.filter=function(e,t){return(Wo(e)?Mt:hr)(e,ii(t,3))},jn.flatMap=function(e,t){return vr(xo(e,t),1)},jn.flatMapDeep=function(e,t){return vr(xo(e,t),c)},jn.flatMapDepth=function(e,t,n){return n=n===a?1:du(n),vr(xo(e,t),n)},jn.flatten=Wi,jn.flattenDeep=function(e){return null!=e&&e.length?vr(e,c):[]},jn.flattenDepth=function(e,t){return null!=e&&e.length?vr(e,t=t===a?1:du(t)):[]},jn.flip=function(e){return Ya(e,512)},jn.flow=el,jn.flowRight=tl,jn.fromPairs=function(e){for(var t=-1,n=null==e?0:e.length,r={};++t<n;){var a=e[t];r[a[0]]=a[1]}return r},jn.functions=function(e){return null==e?[]:_r(e,Pu(e))},jn.functionsIn=function(e){return null==e?[]:_r(e,zu(e))},jn.groupBy=bo,jn.initial=function(e){return null!=e&&e.length?ea(e,0,-1):[]},jn.intersection=Hi,jn.intersectionBy=qi,jn.intersectionWith=Qi,jn.invert=Tu,jn.invertBy=Mu,jn.invokeMap=_o,jn.iteratee=rl,jn.keyBy=wo,jn.keys=Pu,jn.keysIn=zu,jn.map=xo,jn.mapKeys=function(e,t){var n={};return t=ii(t,3),mr(e,(function(e,r,a){rr(n,t(e,r,a),e)})),n},jn.mapValues=function(e,t){var n={};return t=ii(t,3),mr(e,(function(e,r,a){rr(n,r,t(e,r,a))})),n},jn.matches=function(e){return Rr(or(e,1))},jn.matchesProperty=function(e,t){return jr(e,or(t,1))},jn.memoize=Oo,jn.merge=Lu,jn.mergeWith=Ou,jn.method=al,jn.methodOf=il,jn.mixin=ol,jn.negate=Ao,jn.nthArg=function(e){return e=du(e),Qr((function(t){return Ir(t,e)}))},jn.omit=Au,jn.omitBy=function(e,t){return Du(e,Ao(ii(t)))},jn.once=function(e){return To(2,e)},jn.orderBy=function(e,t,n,r){return null==e?[]:(Wo(t)||(t=null==t?[]:[t]),Wo(n=r?a:n)||(n=null==n?[]:[n]),$r(e,t,n))},jn.over=ll,jn.overArgs=Fo,jn.overEvery=sl,jn.overSome=cl,jn.partial=Do,jn.partialRight=Ro,jn.partition=ko,jn.pick=Fu,jn.pickBy=Du,jn.property=fl,jn.propertyOf=function(e){return function(t){return null==e?a:wr(e,t)}},jn.pull=Gi,jn.pullAll=Ki,jn.pullAllBy=function(e,t,n){return e&&e.length&&t&&t.length?Wr(e,t,ii(n,2)):e},jn.pullAllWith=function(e,t,n){return e&&e.length&&t&&t.length?Wr(e,t,a,n):e},jn.pullAt=Zi,jn.range=pl,jn.rangeRight=dl,jn.rearg=jo,jn.reject=function(e,t){return(Wo(e)?Mt:hr)(e,Ao(ii(t,3)))},jn.remove=function(e,t){var n=[];if(!e||!e.length)return n;var r=-1,a=[],i=e.length;for(t=ii(t,3);++r<i;){var o=e[r];t(o,r,e)&&(n.push(o),a.push(r))}return Vr(e,a),n},jn.rest=function(e,t){if("function"!=typeof e)throw new Te(i);return Qr(e,t=t===a?t:du(t))},jn.reverse=Xi,jn.sampleSize=function(e,t,n){return t=(n?gi(e,t,n):t===a)?1:du(t),(Wo(e)?Kn:Gr)(e,t)},jn.set=function(e,t,n){return null==e?e:Kr(e,t,n)},jn.setWith=function(e,t,n,r){return r="function"==typeof r?r:a,null==e?e:Kr(e,t,n,r)},jn.shuffle=function(e){return(Wo(e)?Zn:Jr)(e)},jn.slice=function(e,t,n){var r=null==e?0:e.length;return r?(n&&"number"!=typeof n&&gi(e,t,n)?(t=0,n=r):(t=null==t?0:du(t),n=n===a?r:du(n)),ea(e,t,n)):[]},jn.sortBy=So,jn.sortedUniq=function(e){return e&&e.length?aa(e):[]},jn.sortedUniqBy=function(e,t){return e&&e.length?aa(e,ii(t,2)):[]},jn.split=function(e,t,n){return n&&"number"!=typeof n&&gi(e,t,n)&&(t=n=a),(n=n===a?d:n>>>0)?(e=yu(e))&&("string"==typeof t||null!=t&&!au(t))&&!(t=oa(t))&&rn(e)?ma(fn(e),0,n):e.split(t,n):[]},jn.spread=function(e,t){if("function"!=typeof e)throw new Te(i);return t=null==t?0:gn(du(t),0),Qr((function(n){var r=n[t],a=ma(n,0,t);return r&&Lt(a,r),kt(e,this,a)}))},jn.tail=function(e){var t=null==e?0:e.length;return t?ea(e,1,t):[]},jn.take=function(e,t,n){return e&&e.length?ea(e,0,(t=n||t===a?1:du(t))<0?0:t):[]},jn.takeRight=function(e,t,n){var r=null==e?0:e.length;return r?ea(e,(t=r-(t=n||t===a?1:du(t)))<0?0:t,r):[]},jn.takeRightWhile=function(e,t){return e&&e.length?ca(e,ii(t,3),!1,!0):[]},jn.takeWhile=function(e,t){return e&&e.length?ca(e,ii(t,3)):[]},jn.tap=function(e,t){return t(e),e},jn.throttle=function(e,t,n){var r=!0,a=!0;if("function"!=typeof e)throw new Te(i);return Jo(n)&&(r="leading"in n?!!n.leading:r,a="trailing"in n?!!n.trailing:a),Po(e,t,{leading:r,maxWait:t,trailing:a})},jn.thru=fo,jn.toArray=fu,jn.toPairs=Ru,jn.toPairsIn=ju,jn.toPath=function(e){return Wo(e)?zt(e,Fi):uu(e)?[e]:Ca(Ai(yu(e)))},jn.toPlainObject=gu,jn.transform=function(e,t,n){var r=Wo(e),a=r||Qo(e)||lu(e);if(t=ii(t,4),null==n){var i=e&&e.constructor;n=a?r?new i:[]:Jo(e)&&Ko(i)?Un(Ve(e)):{}}return(a?Et:mr)(e,(function(e,r,a){return t(n,e,r,a)})),n},jn.unary=function(e){return Co(e,1)},jn.union=Ji,jn.unionBy=eo,jn.unionWith=to,jn.uniq=function(e){return e&&e.length?ua(e):[]},jn.uniqBy=function(e,t){return e&&e.length?ua(e,ii(t,2)):[]},jn.uniqWith=function(e,t){return t="function"==typeof t?t:a,e&&e.length?ua(e,a,t):[]},jn.unset=function(e,t){return null==e||la(e,t)},jn.unzip=no,jn.unzipWith=ro,jn.update=function(e,t,n){return null==e?e:sa(e,t,va(n))},jn.updateWith=function(e,t,n,r){return r="function"==typeof r?r:a,null==e?e:sa(e,t,va(n),r)},jn.values=Uu,jn.valuesIn=function(e){return null==e?[]:Kt(e,zu(e))},jn.without=ao,jn.words=Ku,jn.wrap=function(e,t){return Do(va(t),e)},jn.xor=io,jn.xorBy=oo,jn.xorWith=uo,jn.zip=lo,jn.zipObject=function(e,t){return da(e||[],t||[],Jn)},jn.zipObjectDeep=function(e,t){return da(e||[],t||[],Kr)},jn.zipWith=so,jn.entries=Ru,jn.entriesIn=ju,jn.extend=bu,jn.extendWith=_u,ol(jn,jn),jn.add=yl,jn.attempt=Zu,jn.camelCase=Iu,jn.capitalize=$u,jn.ceil=ml,jn.clamp=function(e,t,n){return n===a&&(n=t,t=a),n!==a&&(n=(n=vu(n))==n?n:0),t!==a&&(t=(t=vu(t))==t?t:0),ir(vu(e),t,n)},jn.clone=function(e){return or(e,4)},jn.cloneDeep=function(e){return or(e,5)},jn.cloneDeepWith=function(e,t){return or(e,5,t="function"==typeof t?t:a)},jn.cloneWith=function(e,t){return or(e,4,t="function"==typeof t?t:a)},jn.conformsTo=function(e,t){return null==t||ur(e,t,Pu(t))},jn.deburr=Bu,jn.defaultTo=function(e,t){return null==e||e!=e?t:e},jn.divide=bl,jn.endsWith=function(e,t,n){e=yu(e),t=oa(t);var r=e.length,i=n=n===a?r:ir(du(n),0,r);return(n-=t.length)>=0&&e.slice(n,i)==t},jn.eq=Uo,jn.escape=function(e){return(e=yu(e))&&Y.test(e)?e.replace(q,tn):e},jn.escapeRegExp=function(e){return(e=yu(e))&&ne.test(e)?e.replace(te,"\\$&"):e},jn.every=function(e,t,n){var r=Wo(e)?Tt:pr;return n&&gi(e,t,n)&&(t=a),r(e,ii(t,3))},jn.find=vo,jn.findIndex=$i,jn.findKey=function(e,t){return Rt(e,ii(t,3),mr)},jn.findLast=go,jn.findLastIndex=Bi,jn.findLastKey=function(e,t){return Rt(e,ii(t,3),br)},jn.floor=_l,jn.forEach=yo,jn.forEachRight=mo,jn.forIn=function(e,t){return null==e?e:gr(e,ii(t,3),zu)},jn.forInRight=function(e,t){return null==e?e:yr(e,ii(t,3),zu)},jn.forOwn=function(e,t){return e&&mr(e,ii(t,3))},jn.forOwnRight=function(e,t){return e&&br(e,ii(t,3))},jn.get=Eu,jn.gt=Io,jn.gte=$o,jn.has=function(e,t){return null!=e&&pi(e,t,Er)},jn.hasIn=Cu,jn.head=Vi,jn.identity=nl,jn.includes=function(e,t,n,r){e=Ho(e)?e:Uu(e),n=n&&!r?du(n):0;var a=e.length;return n<0&&(n=gn(a+n,0)),ou(e)?n<=a&&e.indexOf(t,n)>-1:!!a&&Ut(e,t,n)>-1},jn.indexOf=function(e,t,n){var r=null==e?0:e.length;if(!r)return-1;var a=null==n?0:du(n);return a<0&&(a=gn(r+a,0)),Ut(e,t,a)},jn.inRange=function(e,t,n){return t=pu(t),n===a?(n=t,t=0):n=pu(n),function(e,t,n){return e>=yn(t,n)&&e<gn(t,n)}(e=vu(e),t,n)},jn.invoke=Nu,jn.isArguments=Bo,jn.isArray=Wo,jn.isArrayBuffer=Vo,jn.isArrayLike=Ho,jn.isArrayLikeObject=qo,jn.isBoolean=function(e){return!0===e||!1===e||eu(e)&&kr(e)==y},jn.isBuffer=Qo,jn.isDate=Yo,jn.isElement=function(e){return eu(e)&&1===e.nodeType&&!ru(e)},jn.isEmpty=function(e){if(null==e)return!0;if(Ho(e)&&(Wo(e)||"string"==typeof e||"function"==typeof e.splice||Qo(e)||lu(e)||Bo(e)))return!e.length;var t=fi(e);if(t==x||t==T)return!e.size;if(_i(e))return!Ar(e).length;for(var n in e)if(Oe.call(e,n))return!1;return!0},jn.isEqual=function(e,t){return Pr(e,t)},jn.isEqualWith=function(e,t,n){var r=(n="function"==typeof n?n:a)?n(e,t):a;return r===a?Pr(e,t,a,n):!!r},jn.isError=Go,jn.isFinite=function(e){return"number"==typeof e&&Dt(e)},jn.isFunction=Ko,jn.isInteger=Zo,jn.isLength=Xo,jn.isMap=tu,jn.isMatch=function(e,t){return e===t||zr(e,t,ui(t))},jn.isMatchWith=function(e,t,n){return n="function"==typeof n?n:a,zr(e,t,ui(t),n)},jn.isNaN=function(e){return nu(e)&&e!=+e},jn.isNative=function(e){if(bi(e))throw new we("Unsupported core-js use. Try https://npms.io/search?q=ponyfill.");return Lr(e)},jn.isNil=function(e){return null==e},jn.isNull=function(e){return null===e},jn.isNumber=nu,jn.isObject=Jo,jn.isObjectLike=eu,jn.isPlainObject=ru,jn.isRegExp=au,jn.isSafeInteger=function(e){return Zo(e)&&e>=-9007199254740991&&e<=f},jn.isSet=iu,jn.isString=ou,jn.isSymbol=uu,jn.isTypedArray=lu,jn.isUndefined=function(e){return e===a},jn.isWeakMap=function(e){return eu(e)&&fi(e)==P},jn.isWeakSet=function(e){return eu(e)&&"[object WeakSet]"==kr(e)},jn.join=function(e,t){return null==e?"":Vt.call(e,t)},jn.kebabCase=Wu,jn.last=Yi,jn.lastIndexOf=function(e,t,n){var r=null==e?0:e.length;if(!r)return-1;var i=r;return n!==a&&(i=(i=du(n))<0?gn(r+i,0):yn(i,r-1)),t==t?function(e,t,n){for(var r=n+1;r--;)if(e[r]===t)return r;return r}(e,t,i):jt(e,$t,i,!0)},jn.lowerCase=Vu,jn.lowerFirst=Hu,jn.lt=su,jn.lte=cu,jn.max=function(e){return e&&e.length?dr(e,nl,Sr):a},jn.maxBy=function(e,t){return e&&e.length?dr(e,ii(t,2),Sr):a},jn.mean=function(e){return Bt(e,nl)},jn.meanBy=function(e,t){return Bt(e,ii(t,2))},jn.min=function(e){return e&&e.length?dr(e,nl,Fr):a},jn.minBy=function(e,t){return e&&e.length?dr(e,ii(t,2),Fr):a},jn.stubArray=hl,jn.stubFalse=vl,jn.stubObject=function(){return{}},jn.stubString=function(){return""},jn.stubTrue=function(){return!0},jn.multiply=wl,jn.nth=function(e,t){return e&&e.length?Ir(e,du(t)):a},jn.noConflict=function(){return ft._===this&&(ft._=je),this},jn.noop=ul,jn.now=Eo,jn.pad=function(e,t,n){e=yu(e);var r=(t=du(t))?cn(e):0;if(!t||r>=t)return e;var a=(t-r)/2;return $a(dt(a),n)+e+$a(pt(a),n)},jn.padEnd=function(e,t,n){e=yu(e);var r=(t=du(t))?cn(e):0;return t&&r<t?e+$a(t-r,n):e},jn.padStart=function(e,t,n){e=yu(e);var r=(t=du(t))?cn(e):0;return t&&r<t?$a(t-r,n)+e:e},jn.parseInt=function(e,t,n){return n||null==t?t=0:t&&(t=+t),bn(yu(e).replace(re,""),t||0)},jn.random=function(e,t,n){if(n&&"boolean"!=typeof n&&gi(e,t,n)&&(t=n=a),n===a&&("boolean"==typeof t?(n=t,t=a):"boolean"==typeof e&&(n=e,e=a)),e===a&&t===a?(e=0,t=1):(e=pu(e),t===a?(t=e,e=0):t=pu(t)),e>t){var r=e;e=t,t=r}if(n||e%1||t%1){var i=_n();return yn(e+i*(t-e+ut("1e-"+((i+"").length-1))),t)}return Hr(e,t)},jn.reduce=function(e,t,n){var r=Wo(e)?Ot:Ht,a=arguments.length<3;return r(e,ii(t,4),n,a,cr)},jn.reduceRight=function(e,t,n){var r=Wo(e)?At:Ht,a=arguments.length<3;return r(e,ii(t,4),n,a,fr)},jn.repeat=function(e,t,n){return t=(n?gi(e,t,n):t===a)?1:du(t),qr(yu(e),t)},jn.replace=function(){var e=arguments,t=yu(e[0]);return e.length<3?t:t.replace(e[1],e[2])},jn.result=function(e,t,n){var r=-1,i=(t=ga(t,e)).length;for(i||(i=1,e=a);++r<i;){var o=null==e?a:e[Fi(t[r])];o===a&&(r=i,o=n),e=Ko(o)?o.call(e):o}return e},jn.round=xl,jn.runInContext=e,jn.sample=function(e){return(Wo(e)?Gn:Yr)(e)},jn.size=function(e){if(null==e)return 0;if(Ho(e))return ou(e)?cn(e):e.length;var t=fi(e);return t==x||t==T?e.size:Ar(e).length},jn.snakeCase=qu,jn.some=function(e,t,n){var r=Wo(e)?Ft:ta;return n&&gi(e,t,n)&&(t=a),r(e,ii(t,3))},jn.sortedIndex=function(e,t){return na(e,t)},jn.sortedIndexBy=function(e,t,n){return ra(e,t,ii(n,2))},jn.sortedIndexOf=function(e,t){var n=null==e?0:e.length;if(n){var r=na(e,t);if(r<n&&Uo(e[r],t))return r}return-1},jn.sortedLastIndex=function(e,t){return na(e,t,!0)},jn.sortedLastIndexBy=function(e,t,n){return ra(e,t,ii(n,2),!0)},jn.sortedLastIndexOf=function(e,t){if(null!=e&&e.length){var n=na(e,t,!0)-1;if(Uo(e[n],t))return n}return-1},jn.startCase=Qu,jn.startsWith=function(e,t,n){return e=yu(e),n=null==n?0:ir(du(n),0,e.length),t=oa(t),e.slice(n,n+t.length)==t},jn.subtract=kl,jn.sum=function(e){return e&&e.length?qt(e,nl):0},jn.sumBy=function(e,t){return e&&e.length?qt(e,ii(t,2)):0},jn.template=function(e,t,n){var r=jn.templateSettings;n&&gi(e,t,n)&&(t=a),e=yu(e),t=_u({},t,r,Ga);var i,o,u=_u({},t.imports,r.imports,Ga),l=Pu(u),s=Kt(u,l),c=0,f=t.interpolate||be,p="__p += '",d=Ee((t.escape||be).source+"|"+f.source+"|"+(f===Z?fe:be).source+"|"+(t.evaluate||be).source+"|$","g"),h="//# sourceURL="+(Oe.call(t,"sourceURL")?(t.sourceURL+"").replace(/\s/g," "):"lodash.templateSources["+ ++rt+"]")+"\n";e.replace(d,(function(t,n,r,a,u,l){return r||(r=a),p+=e.slice(c,l).replace(_e,nn),n&&(i=!0,p+="' +\n__e("+n+") +\n'"),u&&(o=!0,p+="';\n"+u+";\n__p += '"),r&&(p+="' +\n((__t = ("+r+")) == null ? '' : __t) +\n'"),c=l+t.length,t})),p+="';\n";var v=Oe.call(t,"variable")&&t.variable;if(v){if(se.test(v))throw new we("Invalid `variable` option passed into `_.template`")}else p="with (obj) {\n"+p+"\n}\n";p=(o?p.replace(B,""):p).replace(W,"$1").replace(V,"$1;"),p="function("+(v||"obj")+") {\n"+(v?"":"obj || (obj = {});\n")+"var __t, __p = ''"+(i?", __e = _.escape":"")+(o?", __j = Array.prototype.join;\nfunction print() { __p += __j.call(arguments, '') }\n":";\n")+p+"return __p\n}";var g=Zu((function(){return xe(l,h+"return "+p).apply(a,s)}));if(g.source=p,Go(g))throw g;return g},jn.times=function(e,t){if((e=du(e))<1||e>f)return[];var n=d,r=yn(e,d);t=ii(t),e-=d;for(var a=Qt(r,t);++n<e;)t(n);return a},jn.toFinite=pu,jn.toInteger=du,jn.toLength=hu,jn.toLower=function(e){return yu(e).toLowerCase()},jn.toNumber=vu,jn.toSafeInteger=function(e){return e?ir(du(e),-9007199254740991,f):0===e?e:0},jn.toString=yu,jn.toUpper=function(e){return yu(e).toUpperCase()},jn.trim=function(e,t,n){if((e=yu(e))&&(n||t===a))return Yt(e);if(!e||!(t=oa(t)))return e;var r=fn(e),i=fn(t);return ma(r,Xt(r,i),Jt(r,i)+1).join("")},jn.trimEnd=function(e,t,n){if((e=yu(e))&&(n||t===a))return e.slice(0,pn(e)+1);if(!e||!(t=oa(t)))return e;var r=fn(e);return ma(r,0,Jt(r,fn(t))+1).join("")},jn.trimStart=function(e,t,n){if((e=yu(e))&&(n||t===a))return e.replace(re,"");if(!e||!(t=oa(t)))return e;var r=fn(e);return ma(r,Xt(r,fn(t))).join("")},jn.truncate=function(e,t){var n=30,r="...";if(Jo(t)){var i="separator"in t?t.separator:i;n="length"in t?du(t.length):n,r="omission"in t?oa(t.omission):r}var o=(e=yu(e)).length;if(rn(e)){var u=fn(e);o=u.length}if(n>=o)return e;var l=n-cn(r);if(l<1)return r;var s=u?ma(u,0,l).join(""):e.slice(0,l);if(i===a)return s+r;if(u&&(l+=s.length-l),au(i)){if(e.slice(l).search(i)){var c,f=s;for(i.global||(i=Ee(i.source,yu(pe.exec(i))+"g")),i.lastIndex=0;c=i.exec(f);)var p=c.index;s=s.slice(0,p===a?l:p)}}else if(e.indexOf(oa(i),l)!=l){var d=s.lastIndexOf(i);d>-1&&(s=s.slice(0,d))}return s+r},jn.unescape=function(e){return(e=yu(e))&&Q.test(e)?e.replace(H,dn):e},jn.uniqueId=function(e){var t=++Ae;return yu(e)+t},jn.upperCase=Yu,jn.upperFirst=Gu,jn.each=yo,jn.eachRight=mo,jn.first=Vi,ol(jn,(gl={},mr(jn,(function(e,t){Oe.call(jn.prototype,t)||(gl[t]=e)})),gl),{chain:!1}),jn.VERSION="4.17.21",Et(["bind","bindKey","curry","curryRight","partial","partialRight"],(function(e){jn[e].placeholder=jn})),Et(["drop","take"],(function(e,t){Bn.prototype[e]=function(n){n=n===a?1:gn(du(n),0);var r=this.__filtered__&&!t?new Bn(this):this.clone();return r.__filtered__?r.__takeCount__=yn(n,r.__takeCount__):r.__views__.push({size:yn(n,d),type:e+(r.__dir__<0?"Right":"")}),r},Bn.prototype[e+"Right"]=function(t){return this.reverse()[e](t).reverse()}})),Et(["filter","map","takeWhile"],(function(e,t){var n=t+1,r=1==n||3==n;Bn.prototype[e]=function(e){var t=this.clone();return t.__iteratees__.push({iteratee:ii(e,3),type:n}),t.__filtered__=t.__filtered__||r,t}})),Et(["head","last"],(function(e,t){var n="take"+(t?"Right":"");Bn.prototype[e]=function(){return this[n](1).value()[0]}})),Et(["initial","tail"],(function(e,t){var n="drop"+(t?"":"Right");Bn.prototype[e]=function(){return this.__filtered__?new Bn(this):this[n](1)}})),Bn.prototype.compact=function(){return this.filter(nl)},Bn.prototype.find=function(e){return this.filter(e).head()},Bn.prototype.findLast=function(e){return this.reverse().find(e)},Bn.prototype.invokeMap=Qr((function(e,t){return"function"==typeof e?new Bn(this):this.map((function(n){return Mr(n,e,t)}))})),Bn.prototype.reject=function(e){return this.filter(Ao(ii(e)))},Bn.prototype.slice=function(e,t){e=du(e);var n=this;return n.__filtered__&&(e>0||t<0)?new Bn(n):(e<0?n=n.takeRight(-e):e&&(n=n.drop(e)),t!==a&&(n=(t=du(t))<0?n.dropRight(-t):n.take(t-e)),n)},Bn.prototype.takeRightWhile=function(e){return this.reverse().takeWhile(e).reverse()},Bn.prototype.toArray=function(){return this.take(d)},mr(Bn.prototype,(function(e,t){var n=/^(?:filter|find|map|reject)|While$/.test(t),r=/^(?:head|last)$/.test(t),i=jn[r?"take"+("last"==t?"Right":""):t],o=r||/^find/.test(t);i&&(jn.prototype[t]=function(){var t=this.__wrapped__,u=r?[1]:arguments,l=t instanceof Bn,s=u[0],c=l||Wo(t),f=function(e){var t=i.apply(jn,Lt([e],u));return r&&p?t[0]:t};c&&n&&"function"==typeof s&&1!=s.length&&(l=c=!1);var p=this.__chain__,d=!!this.__actions__.length,h=o&&!p,v=l&&!d;if(!o&&c){t=v?t:new Bn(this);var g=e.apply(t,u);return g.__actions__.push({func:fo,args:[f],thisArg:a}),new $n(g,p)}return h&&v?e.apply(this,u):(g=this.thru(f),h?r?g.value()[0]:g.value():g)})})),Et(["pop","push","shift","sort","splice","unshift"],(function(e){var t=Me[e],n=/^(?:push|sort|unshift)$/.test(e)?"tap":"thru",r=/^(?:pop|shift)$/.test(e);jn.prototype[e]=function(){var e=arguments;if(r&&!this.__chain__){var a=this.value();return t.apply(Wo(a)?a:[],e)}return this[n]((function(n){return t.apply(Wo(n)?n:[],e)}))}})),mr(Bn.prototype,(function(e,t){var n=jn[t];if(n){var r=n.name+"";Oe.call(Nn,r)||(Nn[r]=[]),Nn[r].push({name:t,func:n})}})),Nn[Ra(a,2).name]=[{name:"wrapper",func:a}],Bn.prototype.clone=function(){var e=new Bn(this.__wrapped__);return e.__actions__=Ca(this.__actions__),e.__dir__=this.__dir__,e.__filtered__=this.__filtered__,e.__iteratees__=Ca(this.__iteratees__),e.__takeCount__=this.__takeCount__,e.__views__=Ca(this.__views__),e},Bn.prototype.reverse=function(){if(this.__filtered__){var e=new Bn(this);e.__dir__=-1,e.__filtered__=!0}else(e=this.clone()).__dir__*=-1;return e},Bn.prototype.value=function(){var e=this.__wrapped__.value(),t=this.__dir__,n=Wo(e),r=t<0,a=n?e.length:0,i=function(e,t,n){for(var r=-1,a=n.length;++r<a;){var i=n[r],o=i.size;switch(i.type){case"drop":e+=o;break;case"dropRight":t-=o;break;case"take":t=yn(t,e+o);break;case"takeRight":e=gn(e,t-o)}}return{start:e,end:t}}(0,a,this.__views__),o=i.start,u=i.end,l=u-o,s=r?u:o-1,c=this.__iteratees__,f=c.length,p=0,d=yn(l,this.__takeCount__);if(!n||!r&&a==l&&d==l)return fa(e,this.__actions__);var h=[];e:for(;l--&&p<d;){for(var v=-1,g=e[s+=t];++v<f;){var y=c[v],m=y.iteratee,b=y.type,_=m(g);if(2==b)g=_;else if(!_){if(1==b)continue e;break e}}h[p++]=g}return h},jn.prototype.at=po,jn.prototype.chain=function(){return co(this)},jn.prototype.commit=function(){return new $n(this.value(),this.__chain__)},jn.prototype.next=function(){this.__values__===a&&(this.__values__=fu(this.value()));var e=this.__index__>=this.__values__.length;return{done:e,value:e?a:this.__values__[this.__index__++]}},jn.prototype.plant=function(e){for(var t,n=this;n instanceof In;){var r=Ri(n);r.__index__=0,r.__values__=a,t?i.__wrapped__=r:t=r;var i=r;n=n.__wrapped__}return i.__wrapped__=e,t},jn.prototype.reverse=function(){var e=this.__wrapped__;if(e instanceof Bn){var t=e;return this.__actions__.length&&(t=new Bn(this)),(t=t.reverse()).__actions__.push({func:fo,args:[Xi],thisArg:a}),new $n(t,this.__chain__)}return this.thru(Xi)},jn.prototype.toJSON=jn.prototype.valueOf=jn.prototype.value=function(){return fa(this.__wrapped__,this.__actions__)},jn.prototype.first=jn.prototype.head,Ge&&(jn.prototype[Ge]=function(){return this}),jn}();ft._=hn,(r=function(){return hn}.call(t,n,t,e))===a||(e.exports=r)}.call(this)},448:(e,t,n)=>{"use strict";var r=n(294),a=n(840);function i(e){for(var t="https://reactjs.org/docs/error-decoder.html?invariant="+e,n=1;n<arguments.length;n++)t+="&args[]="+encodeURIComponent(arguments[n]);return"Minified React error #"+e+"; visit "+t+" for the full message or use the non-minified dev environment for full errors and additional helpful warnings."}var o=new Set,u={};function l(e,t){s(e,t),s(e+"Capture",t)}function s(e,t){for(u[e]=t,e=0;e<t.length;e++)o.add(t[e])}var c=!("undefined"==typeof window||void 0===window.document||void 0===window.document.createElement),f=Object.prototype.hasOwnProperty,p=/^[:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD][:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\-.0-9\u00B7\u0300-\u036F\u203F-\u2040]*$/,d={},h={};function v(e,t,n,r,a,i,o){this.acceptsBooleans=2===t||3===t||4===t,this.attributeName=r,this.attributeNamespace=a,this.mustUseProperty=n,this.propertyName=e,this.type=t,this.sanitizeURL=i,this.removeEmptyString=o}var g={};"children dangerouslySetInnerHTML defaultValue defaultChecked innerHTML suppressContentEditableWarning suppressHydrationWarning style".split(" ").forEach((function(e){g[e]=new v(e,0,!1,e,null,!1,!1)})),[["acceptCharset","accept-charset"],["className","class"],["htmlFor","for"],["httpEquiv","http-equiv"]].forEach((function(e){var t=e[0];g[t]=new v(t,1,!1,e[1],null,!1,!1)})),["contentEditable","draggable","spellCheck","value"].forEach((function(e){g[e]=new v(e,2,!1,e.toLowerCase(),null,!1,!1)})),["autoReverse","externalResourcesRequired","focusable","preserveAlpha"].forEach((function(e){g[e]=new v(e,2,!1,e,null,!1,!1)})),"allowFullScreen async autoFocus autoPlay controls default defer disabled disablePictureInPicture disableRemotePlayback formNoValidate hidden loop noModule noValidate open playsInline readOnly required reversed scoped seamless itemScope".split(" ").forEach((function(e){g[e]=new v(e,3,!1,e.toLowerCase(),null,!1,!1)})),["checked","multiple","muted","selected"].forEach((function(e){g[e]=new v(e,3,!0,e,null,!1,!1)})),["capture","download"].forEach((function(e){g[e]=new v(e,4,!1,e,null,!1,!1)})),["cols","rows","size","span"].forEach((function(e){g[e]=new v(e,6,!1,e,null,!1,!1)})),["rowSpan","start"].forEach((function(e){g[e]=new v(e,5,!1,e.toLowerCase(),null,!1,!1)}));var y=/[\-:]([a-z])/g;function m(e){return e[1].toUpperCase()}function b(e,t,n,r){var a=g.hasOwnProperty(t)?g[t]:null;(null!==a?0!==a.type:r||!(2<t.length)||"o"!==t[0]&&"O"!==t[0]||"n"!==t[1]&&"N"!==t[1])&&(function(e,t,n,r){if(null==t||function(e,t,n,r){if(null!==n&&0===n.type)return!1;switch(typeof t){case"function":case"symbol":return!0;case"boolean":return!r&&(null!==n?!n.acceptsBooleans:"data-"!==(e=e.toLowerCase().slice(0,5))&&"aria-"!==e);default:return!1}}(e,t,n,r))return!0;if(r)return!1;if(null!==n)switch(n.type){case 3:return!t;case 4:return!1===t;case 5:return isNaN(t);case 6:return isNaN(t)||1>t}return!1}(t,n,a,r)&&(n=null),r||null===a?function(e){return!!f.call(h,e)||!f.call(d,e)&&(p.test(e)?h[e]=!0:(d[e]=!0,!1))}(t)&&(null===n?e.removeAttribute(t):e.setAttribute(t,""+n)):a.mustUseProperty?e[a.propertyName]=null===n?3!==a.type&&"":n:(t=a.attributeName,r=a.attributeNamespace,null===n?e.removeAttribute(t):(n=3===(a=a.type)||4===a&&!0===n?"":""+n,r?e.setAttributeNS(r,t,n):e.setAttribute(t,n))))}"accent-height alignment-baseline arabic-form baseline-shift cap-height clip-path clip-rule color-interpolation color-interpolation-filters color-profile color-rendering dominant-baseline enable-background fill-opacity fill-rule flood-color flood-opacity font-family font-size font-size-adjust font-stretch font-style font-variant font-weight glyph-name glyph-orientation-horizontal glyph-orientation-vertical horiz-adv-x horiz-origin-x image-rendering letter-spacing lighting-color marker-end marker-mid marker-start overline-position overline-thickness paint-order panose-1 pointer-events rendering-intent shape-rendering stop-color stop-opacity strikethrough-position strikethrough-thickness stroke-dasharray stroke-dashoffset stroke-linecap stroke-linejoin stroke-miterlimit stroke-opacity stroke-width text-anchor text-decoration text-rendering underline-position underline-thickness unicode-bidi unicode-range units-per-em v-alphabetic v-hanging v-ideographic v-mathematical vector-effect vert-adv-y vert-origin-x vert-origin-y word-spacing writing-mode xmlns:xlink x-height".split(" ").forEach((function(e){var t=e.replace(y,m);g[t]=new v(t,1,!1,e,null,!1,!1)})),"xlink:actuate xlink:arcrole xlink:role xlink:show xlink:title xlink:type".split(" ").forEach((function(e){var t=e.replace(y,m);g[t]=new v(t,1,!1,e,"http://www.w3.org/1999/xlink",!1,!1)})),["xml:base","xml:lang","xml:space"].forEach((function(e){var t=e.replace(y,m);g[t]=new v(t,1,!1,e,"http://www.w3.org/XML/1998/namespace",!1,!1)})),["tabIndex","crossOrigin"].forEach((function(e){g[e]=new v(e,1,!1,e.toLowerCase(),null,!1,!1)})),g.xlinkHref=new v("xlinkHref",1,!1,"xlink:href","http://www.w3.org/1999/xlink",!0,!1),["src","href","action","formAction"].forEach((function(e){g[e]=new v(e,1,!1,e.toLowerCase(),null,!0,!0)}));var _=r.__SECRET_INTERNALS_DO_NOT_USE_OR_YOU_WILL_BE_FIRED,w=Symbol.for("react.element"),x=Symbol.for("react.portal"),k=Symbol.for("react.fragment"),S=Symbol.for("react.strict_mode"),E=Symbol.for("react.profiler"),C=Symbol.for("react.provider"),T=Symbol.for("react.context"),M=Symbol.for("react.forward_ref"),N=Symbol.for("react.suspense"),P=Symbol.for("react.suspense_list"),z=Symbol.for("react.memo"),L=Symbol.for("react.lazy");Symbol.for("react.scope"),Symbol.for("react.debug_trace_mode");var O=Symbol.for("react.offscreen");Symbol.for("react.legacy_hidden"),Symbol.for("react.cache"),Symbol.for("react.tracing_marker");var A=Symbol.iterator;function F(e){return null===e||"object"!=typeof e?null:"function"==typeof(e=A&&e[A]||e["@@iterator"])?e:null}var D,R=Object.assign;function j(e){if(void 0===D)try{throw Error()}catch(e){var t=e.stack.trim().match(/\n( *(at )?)/);D=t&&t[1]||""}return"\n"+D+e}var U=!1;function I(e,t){if(!e||U)return"";U=!0;var n=Error.prepareStackTrace;Error.prepareStackTrace=void 0;try{if(t)if(t=function(){throw Error()},Object.defineProperty(t.prototype,"props",{set:function(){throw Error()}}),"object"==typeof Reflect&&Reflect.construct){try{Reflect.construct(t,[])}catch(e){var r=e}Reflect.construct(e,[],t)}else{try{t.call()}catch(e){r=e}e.call(t.prototype)}else{try{throw Error()}catch(e){r=e}e()}}catch(t){if(t&&r&&"string"==typeof t.stack){for(var a=t.stack.split("\n"),i=r.stack.split("\n"),o=a.length-1,u=i.length-1;1<=o&&0<=u&&a[o]!==i[u];)u--;for(;1<=o&&0<=u;o--,u--)if(a[o]!==i[u]){if(1!==o||1!==u)do{if(o--,0>--u||a[o]!==i[u]){var l="\n"+a[o].replace(" at new "," at ");return e.displayName&&l.includes("<anonymous>")&&(l=l.replace("<anonymous>",e.displayName)),l}}while(1<=o&&0<=u);break}}}finally{U=!1,Error.prepareStackTrace=n}return(e=e?e.displayName||e.name:"")?j(e):""}function $(e){switch(e.tag){case 5:return j(e.type);case 16:return j("Lazy");case 13:return j("Suspense");case 19:return j("SuspenseList");case 0:case 2:case 15:return I(e.type,!1);case 11:return I(e.type.render,!1);case 1:return I(e.type,!0);default:return""}}function B(e){if(null==e)return null;if("function"==typeof e)return e.displayName||e.name||null;if("string"==typeof e)return e;switch(e){case k:return"Fragment";case x:return"Portal";case E:return"Profiler";case S:return"StrictMode";case N:return"Suspense";case P:return"SuspenseList"}if("object"==typeof e)switch(e.$$typeof){case T:return(e.displayName||"Context")+".Consumer";case C:return(e._context.displayName||"Context")+".Provider";case M:var t=e.render;return(e=e.displayName)||(e=""!==(e=t.displayName||t.name||"")?"ForwardRef("+e+")":"ForwardRef"),e;case z:return null!==(t=e.displayName||null)?t:B(e.type)||"Memo";case L:t=e._payload,e=e._init;try{return B(e(t))}catch(e){}}return null}function W(e){var t=e.type;switch(e.tag){case 24:return"Cache";case 9:return(t.displayName||"Context")+".Consumer";case 10:return(t._context.displayName||"Context")+".Provider";case 18:return"DehydratedFragment";case 11:return e=(e=t.render).displayName||e.name||"",t.displayName||(""!==e?"ForwardRef("+e+")":"ForwardRef");case 7:return"Fragment";case 5:return t;case 4:return"Portal";case 3:return"Root";case 6:return"Text";case 16:return B(t);case 8:return t===S?"StrictMode":"Mode";case 22:return"Offscreen";case 12:return"Profiler";case 21:return"Scope";case 13:return"Suspense";case 19:return"SuspenseList";case 25:return"TracingMarker";case 1:case 0:case 17:case 2:case 14:case 15:if("function"==typeof t)return t.displayName||t.name||null;if("string"==typeof t)return t}return null}function V(e){switch(typeof e){case"boolean":case"number":case"string":case"undefined":case"object":return e;default:return""}}function H(e){var t=e.type;return(e=e.nodeName)&&"input"===e.toLowerCase()&&("checkbox"===t||"radio"===t)}function q(e){e._valueTracker||(e._valueTracker=function(e){var t=H(e)?"checked":"value",n=Object.getOwnPropertyDescriptor(e.constructor.prototype,t),r=""+e[t];if(!e.hasOwnProperty(t)&&void 0!==n&&"function"==typeof n.get&&"function"==typeof n.set){var a=n.get,i=n.set;return Object.defineProperty(e,t,{configurable:!0,get:function(){return a.call(this)},set:function(e){r=""+e,i.call(this,e)}}),Object.defineProperty(e,t,{enumerable:n.enumerable}),{getValue:function(){return r},setValue:function(e){r=""+e},stopTracking:function(){e._valueTracker=null,delete e[t]}}}}(e))}function Q(e){if(!e)return!1;var t=e._valueTracker;if(!t)return!0;var n=t.getValue(),r="";return e&&(r=H(e)?e.checked?"true":"false":e.value),(e=r)!==n&&(t.setValue(e),!0)}function Y(e){if(void 0===(e=e||("undefined"!=typeof document?document:void 0)))return null;try{return e.activeElement||e.body}catch(t){return e.body}}function G(e,t){var n=t.checked;return R({},t,{defaultChecked:void 0,defaultValue:void 0,value:void 0,checked:null!=n?n:e._wrapperState.initialChecked})}function K(e,t){var n=null==t.defaultValue?"":t.defaultValue,r=null!=t.checked?t.checked:t.defaultChecked;n=V(null!=t.value?t.value:n),e._wrapperState={initialChecked:r,initialValue:n,controlled:"checkbox"===t.type||"radio"===t.type?null!=t.checked:null!=t.value}}function Z(e,t){null!=(t=t.checked)&&b(e,"checked",t,!1)}function X(e,t){Z(e,t);var n=V(t.value),r=t.type;if(null!=n)"number"===r?(0===n&&""===e.value||e.value!=n)&&(e.value=""+n):e.value!==""+n&&(e.value=""+n);else if("submit"===r||"reset"===r)return void e.removeAttribute("value");t.hasOwnProperty("value")?ee(e,t.type,n):t.hasOwnProperty("defaultValue")&&ee(e,t.type,V(t.defaultValue)),null==t.checked&&null!=t.defaultChecked&&(e.defaultChecked=!!t.defaultChecked)}function J(e,t,n){if(t.hasOwnProperty("value")||t.hasOwnProperty("defaultValue")){var r=t.type;if(!("submit"!==r&&"reset"!==r||void 0!==t.value&&null!==t.value))return;t=""+e._wrapperState.initialValue,n||t===e.value||(e.value=t),e.defaultValue=t}""!==(n=e.name)&&(e.name=""),e.defaultChecked=!!e._wrapperState.initialChecked,""!==n&&(e.name=n)}function ee(e,t,n){"number"===t&&Y(e.ownerDocument)===e||(null==n?e.defaultValue=""+e._wrapperState.initialValue:e.defaultValue!==""+n&&(e.defaultValue=""+n))}var te=Array.isArray;function ne(e,t,n,r){if(e=e.options,t){t={};for(var a=0;a<n.length;a++)t["$"+n[a]]=!0;for(n=0;n<e.length;n++)a=t.hasOwnProperty("$"+e[n].value),e[n].selected!==a&&(e[n].selected=a),a&&r&&(e[n].defaultSelected=!0)}else{for(n=""+V(n),t=null,a=0;a<e.length;a++){if(e[a].value===n)return e[a].selected=!0,void(r&&(e[a].defaultSelected=!0));null!==t||e[a].disabled||(t=e[a])}null!==t&&(t.selected=!0)}}function re(e,t){if(null!=t.dangerouslySetInnerHTML)throw Error(i(91));return R({},t,{value:void 0,defaultValue:void 0,children:""+e._wrapperState.initialValue})}function ae(e,t){var n=t.value;if(null==n){if(n=t.children,t=t.defaultValue,null!=n){if(null!=t)throw Error(i(92));if(te(n)){if(1<n.length)throw Error(i(93));n=n[0]}t=n}null==t&&(t=""),n=t}e._wrapperState={initialValue:V(n)}}function ie(e,t){var n=V(t.value),r=V(t.defaultValue);null!=n&&((n=""+n)!==e.value&&(e.value=n),null==t.defaultValue&&e.defaultValue!==n&&(e.defaultValue=n)),null!=r&&(e.defaultValue=""+r)}function oe(e){var t=e.textContent;t===e._wrapperState.initialValue&&""!==t&&null!==t&&(e.value=t)}function ue(e){switch(e){case"svg":return"http://www.w3.org/2000/svg";case"math":return"http://www.w3.org/1998/Math/MathML";default:return"http://www.w3.org/1999/xhtml"}}function le(e,t){return null==e||"http://www.w3.org/1999/xhtml"===e?ue(t):"http://www.w3.org/2000/svg"===e&&"foreignObject"===t?"http://www.w3.org/1999/xhtml":e}var se,ce,fe=(ce=function(e,t){if("http://www.w3.org/2000/svg"!==e.namespaceURI||"innerHTML"in e)e.innerHTML=t;else{for((se=se||document.createElement("div")).innerHTML="<svg>"+t.valueOf().toString()+"</svg>",t=se.firstChild;e.firstChild;)e.removeChild(e.firstChild);for(;t.firstChild;)e.appendChild(t.firstChild)}},"undefined"!=typeof MSApp&&MSApp.execUnsafeLocalFunction?function(e,t,n,r){MSApp.execUnsafeLocalFunction((function(){return ce(e,t)}))}:ce);function pe(e,t){if(t){var n=e.firstChild;if(n&&n===e.lastChild&&3===n.nodeType)return void(n.nodeValue=t)}e.textContent=t}var de={animationIterationCount:!0,aspectRatio:!0,borderImageOutset:!0,borderImageSlice:!0,borderImageWidth:!0,boxFlex:!0,boxFlexGroup:!0,boxOrdinalGroup:!0,columnCount:!0,columns:!0,flex:!0,flexGrow:!0,flexPositive:!0,flexShrink:!0,flexNegative:!0,flexOrder:!0,gridArea:!0,gridRow:!0,gridRowEnd:!0,gridRowSpan:!0,gridRowStart:!0,gridColumn:!0,gridColumnEnd:!0,gridColumnSpan:!0,gridColumnStart:!0,fontWeight:!0,lineClamp:!0,lineHeight:!0,opacity:!0,order:!0,orphans:!0,tabSize:!0,widows:!0,zIndex:!0,zoom:!0,fillOpacity:!0,floodOpacity:!0,stopOpacity:!0,strokeDasharray:!0,strokeDashoffset:!0,strokeMiterlimit:!0,strokeOpacity:!0,strokeWidth:!0},he=["Webkit","ms","Moz","O"];function ve(e,t,n){return null==t||"boolean"==typeof t||""===t?"":n||"number"!=typeof t||0===t||de.hasOwnProperty(e)&&de[e]?(""+t).trim():t+"px"}function ge(e,t){for(var n in e=e.style,t)if(t.hasOwnProperty(n)){var r=0===n.indexOf("--"),a=ve(n,t[n],r);"float"===n&&(n="cssFloat"),r?e.setProperty(n,a):e[n]=a}}Object.keys(de).forEach((function(e){he.forEach((function(t){t=t+e.charAt(0).toUpperCase()+e.substring(1),de[t]=de[e]}))}));var ye=R({menuitem:!0},{area:!0,base:!0,br:!0,col:!0,embed:!0,hr:!0,img:!0,input:!0,keygen:!0,link:!0,meta:!0,param:!0,source:!0,track:!0,wbr:!0});function me(e,t){if(t){if(ye[e]&&(null!=t.children||null!=t.dangerouslySetInnerHTML))throw Error(i(137,e));if(null!=t.dangerouslySetInnerHTML){if(null!=t.children)throw Error(i(60));if("object"!=typeof t.dangerouslySetInnerHTML||!("__html"in t.dangerouslySetInnerHTML))throw Error(i(61))}if(null!=t.style&&"object"!=typeof t.style)throw Error(i(62))}}function be(e,t){if(-1===e.indexOf("-"))return"string"==typeof t.is;switch(e){case"annotation-xml":case"color-profile":case"font-face":case"font-face-src":case"font-face-uri":case"font-face-format":case"font-face-name":case"missing-glyph":return!1;default:return!0}}var _e=null;function we(e){return(e=e.target||e.srcElement||window).correspondingUseElement&&(e=e.correspondingUseElement),3===e.nodeType?e.parentNode:e}var xe=null,ke=null,Se=null;function Ee(e){if(e=ba(e)){if("function"!=typeof xe)throw Error(i(280));var t=e.stateNode;t&&(t=wa(t),xe(e.stateNode,e.type,t))}}function Ce(e){ke?Se?Se.push(e):Se=[e]:ke=e}function Te(){if(ke){var e=ke,t=Se;if(Se=ke=null,Ee(e),t)for(e=0;e<t.length;e++)Ee(t[e])}}function Me(e,t){return e(t)}function Ne(){}var Pe=!1;function ze(e,t,n){if(Pe)return e(t,n);Pe=!0;try{return Me(e,t,n)}finally{Pe=!1,(null!==ke||null!==Se)&&(Ne(),Te())}}function Le(e,t){var n=e.stateNode;if(null===n)return null;var r=wa(n);if(null===r)return null;n=r[t];e:switch(t){case"onClick":case"onClickCapture":case"onDoubleClick":case"onDoubleClickCapture":case"onMouseDown":case"onMouseDownCapture":case"onMouseMove":case"onMouseMoveCapture":case"onMouseUp":case"onMouseUpCapture":case"onMouseEnter":(r=!r.disabled)||(r=!("button"===(e=e.type)||"input"===e||"select"===e||"textarea"===e)),e=!r;break e;default:e=!1}if(e)return null;if(n&&"function"!=typeof n)throw Error(i(231,t,typeof n));return n}var Oe=!1;if(c)try{var Ae={};Object.defineProperty(Ae,"passive",{get:function(){Oe=!0}}),window.addEventListener("test",Ae,Ae),window.removeEventListener("test",Ae,Ae)}catch(ce){Oe=!1}function Fe(e,t,n,r,a,i,o,u,l){var s=Array.prototype.slice.call(arguments,3);try{t.apply(n,s)}catch(e){this.onError(e)}}var De=!1,Re=null,je=!1,Ue=null,Ie={onError:function(e){De=!0,Re=e}};function $e(e,t,n,r,a,i,o,u,l){De=!1,Re=null,Fe.apply(Ie,arguments)}function Be(e){var t=e,n=e;if(e.alternate)for(;t.return;)t=t.return;else{e=t;do{0!=(4098&(t=e).flags)&&(n=t.return),e=t.return}while(e)}return 3===t.tag?n:null}function We(e){if(13===e.tag){var t=e.memoizedState;if(null===t&&null!==(e=e.alternate)&&(t=e.memoizedState),null!==t)return t.dehydrated}return null}function Ve(e){if(Be(e)!==e)throw Error(i(188))}function He(e){return null!==(e=function(e){var t=e.alternate;if(!t){if(null===(t=Be(e)))throw Error(i(188));return t!==e?null:e}for(var n=e,r=t;;){var a=n.return;if(null===a)break;var o=a.alternate;if(null===o){if(null!==(r=a.return)){n=r;continue}break}if(a.child===o.child){for(o=a.child;o;){if(o===n)return Ve(a),e;if(o===r)return Ve(a),t;o=o.sibling}throw Error(i(188))}if(n.return!==r.return)n=a,r=o;else{for(var u=!1,l=a.child;l;){if(l===n){u=!0,n=a,r=o;break}if(l===r){u=!0,r=a,n=o;break}l=l.sibling}if(!u){for(l=o.child;l;){if(l===n){u=!0,n=o,r=a;break}if(l===r){u=!0,r=o,n=a;break}l=l.sibling}if(!u)throw Error(i(189))}}if(n.alternate!==r)throw Error(i(190))}if(3!==n.tag)throw Error(i(188));return n.stateNode.current===n?e:t}(e))?qe(e):null}function qe(e){if(5===e.tag||6===e.tag)return e;for(e=e.child;null!==e;){var t=qe(e);if(null!==t)return t;e=e.sibling}return null}var Qe=a.unstable_scheduleCallback,Ye=a.unstable_cancelCallback,Ge=a.unstable_shouldYield,Ke=a.unstable_requestPaint,Ze=a.unstable_now,Xe=a.unstable_getCurrentPriorityLevel,Je=a.unstable_ImmediatePriority,et=a.unstable_UserBlockingPriority,tt=a.unstable_NormalPriority,nt=a.unstable_LowPriority,rt=a.unstable_IdlePriority,at=null,it=null,ot=Math.clz32?Math.clz32:function(e){return 0===(e>>>=0)?32:31-(ut(e)/lt|0)|0},ut=Math.log,lt=Math.LN2,st=64,ct=4194304;function ft(e){switch(e&-e){case 1:return 1;case 2:return 2;case 4:return 4;case 8:return 8;case 16:return 16;case 32:return 32;case 64:case 128:case 256:case 512:case 1024:case 2048:case 4096:case 8192:case 16384:case 32768:case 65536:case 131072:case 262144:case 524288:case 1048576:case 2097152:return 4194240&e;case 4194304:case 8388608:case 16777216:case 33554432:case 67108864:return 130023424&e;case 134217728:return 134217728;case 268435456:return 268435456;case 536870912:return 536870912;case 1073741824:return 1073741824;default:return e}}function pt(e,t){var n=e.pendingLanes;if(0===n)return 0;var r=0,a=e.suspendedLanes,i=e.pingedLanes,o=268435455&n;if(0!==o){var u=o&~a;0!==u?r=ft(u):0!=(i&=o)&&(r=ft(i))}else 0!=(o=n&~a)?r=ft(o):0!==i&&(r=ft(i));if(0===r)return 0;if(0!==t&&t!==r&&0==(t&a)&&((a=r&-r)>=(i=t&-t)||16===a&&0!=(4194240&i)))return t;if(0!=(4&r)&&(r|=16&n),0!==(t=e.entangledLanes))for(e=e.entanglements,t&=r;0<t;)a=1<<(n=31-ot(t)),r|=e[n],t&=~a;return r}function dt(e,t){switch(e){case 1:case 2:case 4:return t+250;case 8:case 16:case 32:case 64:case 128:case 256:case 512:case 1024:case 2048:case 4096:case 8192:case 16384:case 32768:case 65536:case 131072:case 262144:case 524288:case 1048576:case 2097152:return t+5e3;default:return-1}}function ht(e){return 0!=(e=-1073741825&e.pendingLanes)?e:1073741824&e?1073741824:0}function vt(){var e=st;return 0==(4194240&(st<<=1))&&(st=64),e}function gt(e){for(var t=[],n=0;31>n;n++)t.push(e);return t}function yt(e,t,n){e.pendingLanes|=t,536870912!==t&&(e.suspendedLanes=0,e.pingedLanes=0),(e=e.eventTimes)[t=31-ot(t)]=n}function mt(e,t){var n=e.entangledLanes|=t;for(e=e.entanglements;n;){var r=31-ot(n),a=1<<r;a&t|e[r]&t&&(e[r]|=t),n&=~a}}var bt=0;function _t(e){return 1<(e&=-e)?4<e?0!=(268435455&e)?16:536870912:4:1}var wt,xt,kt,St,Et,Ct=!1,Tt=[],Mt=null,Nt=null,Pt=null,zt=new Map,Lt=new Map,Ot=[],At="mousedown mouseup touchcancel touchend touchstart auxclick dblclick pointercancel pointerdown pointerup dragend dragstart drop compositionend compositionstart keydown keypress keyup input textInput copy cut paste click change contextmenu reset submit".split(" ");function Ft(e,t){switch(e){case"focusin":case"focusout":Mt=null;break;case"dragenter":case"dragleave":Nt=null;break;case"mouseover":case"mouseout":Pt=null;break;case"pointerover":case"pointerout":zt.delete(t.pointerId);break;case"gotpointercapture":case"lostpointercapture":Lt.delete(t.pointerId)}}function Dt(e,t,n,r,a,i){return null===e||e.nativeEvent!==i?(e={blockedOn:t,domEventName:n,eventSystemFlags:r,nativeEvent:i,targetContainers:[a]},null!==t&&null!==(t=ba(t))&&xt(t),e):(e.eventSystemFlags|=r,t=e.targetContainers,null!==a&&-1===t.indexOf(a)&&t.push(a),e)}function Rt(e){var t=ma(e.target);if(null!==t){var n=Be(t);if(null!==n)if(13===(t=n.tag)){if(null!==(t=We(n)))return e.blockedOn=t,void Et(e.priority,(function(){kt(n)}))}else if(3===t&&n.stateNode.current.memoizedState.isDehydrated)return void(e.blockedOn=3===n.tag?n.stateNode.containerInfo:null)}e.blockedOn=null}function jt(e){if(null!==e.blockedOn)return!1;for(var t=e.targetContainers;0<t.length;){var n=Gt(e.domEventName,e.eventSystemFlags,t[0],e.nativeEvent);if(null!==n)return null!==(t=ba(n))&&xt(t),e.blockedOn=n,!1;var r=new(n=e.nativeEvent).constructor(n.type,n);_e=r,n.target.dispatchEvent(r),_e=null,t.shift()}return!0}function Ut(e,t,n){jt(e)&&n.delete(t)}function It(){Ct=!1,null!==Mt&&jt(Mt)&&(Mt=null),null!==Nt&&jt(Nt)&&(Nt=null),null!==Pt&&jt(Pt)&&(Pt=null),zt.forEach(Ut),Lt.forEach(Ut)}function $t(e,t){e.blockedOn===t&&(e.blockedOn=null,Ct||(Ct=!0,a.unstable_scheduleCallback(a.unstable_NormalPriority,It)))}function Bt(e){function t(t){return $t(t,e)}if(0<Tt.length){$t(Tt[0],e);for(var n=1;n<Tt.length;n++){var r=Tt[n];r.blockedOn===e&&(r.blockedOn=null)}}for(null!==Mt&&$t(Mt,e),null!==Nt&&$t(Nt,e),null!==Pt&&$t(Pt,e),zt.forEach(t),Lt.forEach(t),n=0;n<Ot.length;n++)(r=Ot[n]).blockedOn===e&&(r.blockedOn=null);for(;0<Ot.length&&null===(n=Ot[0]).blockedOn;)Rt(n),null===n.blockedOn&&Ot.shift()}var Wt=_.ReactCurrentBatchConfig,Vt=!0;function Ht(e,t,n,r){var a=bt,i=Wt.transition;Wt.transition=null;try{bt=1,Qt(e,t,n,r)}finally{bt=a,Wt.transition=i}}function qt(e,t,n,r){var a=bt,i=Wt.transition;Wt.transition=null;try{bt=4,Qt(e,t,n,r)}finally{bt=a,Wt.transition=i}}function Qt(e,t,n,r){if(Vt){var a=Gt(e,t,n,r);if(null===a)Vr(e,t,r,Yt,n),Ft(e,r);else if(function(e,t,n,r,a){switch(t){case"focusin":return Mt=Dt(Mt,e,t,n,r,a),!0;case"dragenter":return Nt=Dt(Nt,e,t,n,r,a),!0;case"mouseover":return Pt=Dt(Pt,e,t,n,r,a),!0;case"pointerover":var i=a.pointerId;return zt.set(i,Dt(zt.get(i)||null,e,t,n,r,a)),!0;case"gotpointercapture":return i=a.pointerId,Lt.set(i,Dt(Lt.get(i)||null,e,t,n,r,a)),!0}return!1}(a,e,t,n,r))r.stopPropagation();else if(Ft(e,r),4&t&&-1<At.indexOf(e)){for(;null!==a;){var i=ba(a);if(null!==i&&wt(i),null===(i=Gt(e,t,n,r))&&Vr(e,t,r,Yt,n),i===a)break;a=i}null!==a&&r.stopPropagation()}else Vr(e,t,r,null,n)}}var Yt=null;function Gt(e,t,n,r){if(Yt=null,null!==(e=ma(e=we(r))))if(null===(t=Be(e)))e=null;else if(13===(n=t.tag)){if(null!==(e=We(t)))return e;e=null}else if(3===n){if(t.stateNode.current.memoizedState.isDehydrated)return 3===t.tag?t.stateNode.containerInfo:null;e=null}else t!==e&&(e=null);return Yt=e,null}function Kt(e){switch(e){case"cancel":case"click":case"close":case"contextmenu":case"copy":case"cut":case"auxclick":case"dblclick":case"dragend":case"dragstart":case"drop":case"focusin":case"focusout":case"input":case"invalid":case"keydown":case"keypress":case"keyup":case"mousedown":case"mouseup":case"paste":case"pause":case"play":case"pointercancel":case"pointerdown":case"pointerup":case"ratechange":case"reset":case"resize":case"seeked":case"submit":case"touchcancel":case"touchend":case"touchstart":case"volumechange":case"change":case"selectionchange":case"textInput":case"compositionstart":case"compositionend":case"compositionupdate":case"beforeblur":case"afterblur":case"beforeinput":case"blur":case"fullscreenchange":case"focus":case"hashchange":case"popstate":case"select":case"selectstart":return 1;case"drag":case"dragenter":case"dragexit":case"dragleave":case"dragover":case"mousemove":case"mouseout":case"mouseover":case"pointermove":case"pointerout":case"pointerover":case"scroll":case"toggle":case"touchmove":case"wheel":case"mouseenter":case"mouseleave":case"pointerenter":case"pointerleave":return 4;case"message":switch(Xe()){case Je:return 1;case et:return 4;case tt:case nt:return 16;case rt:return 536870912;default:return 16}default:return 16}}var Zt=null,Xt=null,Jt=null;function en(){if(Jt)return Jt;var e,t,n=Xt,r=n.length,a="value"in Zt?Zt.value:Zt.textContent,i=a.length;for(e=0;e<r&&n[e]===a[e];e++);var o=r-e;for(t=1;t<=o&&n[r-t]===a[i-t];t++);return Jt=a.slice(e,1<t?1-t:void 0)}function tn(e){var t=e.keyCode;return"charCode"in e?0===(e=e.charCode)&&13===t&&(e=13):e=t,10===e&&(e=13),32<=e||13===e?e:0}function nn(){return!0}function rn(){return!1}function an(e){function t(t,n,r,a,i){for(var o in this._reactName=t,this._targetInst=r,this.type=n,this.nativeEvent=a,this.target=i,this.currentTarget=null,e)e.hasOwnProperty(o)&&(t=e[o],this[o]=t?t(a):a[o]);return this.isDefaultPrevented=(null!=a.defaultPrevented?a.defaultPrevented:!1===a.returnValue)?nn:rn,this.isPropagationStopped=rn,this}return R(t.prototype,{preventDefault:function(){this.defaultPrevented=!0;var e=this.nativeEvent;e&&(e.preventDefault?e.preventDefault():"unknown"!=typeof e.returnValue&&(e.returnValue=!1),this.isDefaultPrevented=nn)},stopPropagation:function(){var e=this.nativeEvent;e&&(e.stopPropagation?e.stopPropagation():"unknown"!=typeof e.cancelBubble&&(e.cancelBubble=!0),this.isPropagationStopped=nn)},persist:function(){},isPersistent:nn}),t}var on,un,ln,sn={eventPhase:0,bubbles:0,cancelable:0,timeStamp:function(e){return e.timeStamp||Date.now()},defaultPrevented:0,isTrusted:0},cn=an(sn),fn=R({},sn,{view:0,detail:0}),pn=an(fn),dn=R({},fn,{screenX:0,screenY:0,clientX:0,clientY:0,pageX:0,pageY:0,ctrlKey:0,shiftKey:0,altKey:0,metaKey:0,getModifierState:En,button:0,buttons:0,relatedTarget:function(e){return void 0===e.relatedTarget?e.fromElement===e.srcElement?e.toElement:e.fromElement:e.relatedTarget},movementX:function(e){return"movementX"in e?e.movementX:(e!==ln&&(ln&&"mousemove"===e.type?(on=e.screenX-ln.screenX,un=e.screenY-ln.screenY):un=on=0,ln=e),on)},movementY:function(e){return"movementY"in e?e.movementY:un}}),hn=an(dn),vn=an(R({},dn,{dataTransfer:0})),gn=an(R({},fn,{relatedTarget:0})),yn=an(R({},sn,{animationName:0,elapsedTime:0,pseudoElement:0})),mn=R({},sn,{clipboardData:function(e){return"clipboardData"in e?e.clipboardData:window.clipboardData}}),bn=an(mn),_n=an(R({},sn,{data:0})),wn={Esc:"Escape",Spacebar:" ",Left:"ArrowLeft",Up:"ArrowUp",Right:"ArrowRight",Down:"ArrowDown",Del:"Delete",Win:"OS",Menu:"ContextMenu",Apps:"ContextMenu",Scroll:"ScrollLock",MozPrintableKey:"Unidentified"},xn={8:"Backspace",9:"Tab",12:"Clear",13:"Enter",16:"Shift",17:"Control",18:"Alt",19:"Pause",20:"CapsLock",27:"Escape",32:" ",33:"PageUp",34:"PageDown",35:"End",36:"Home",37:"ArrowLeft",38:"ArrowUp",39:"ArrowRight",40:"ArrowDown",45:"Insert",46:"Delete",112:"F1",113:"F2",114:"F3",115:"F4",116:"F5",117:"F6",118:"F7",119:"F8",120:"F9",121:"F10",122:"F11",123:"F12",144:"NumLock",145:"ScrollLock",224:"Meta"},kn={Alt:"altKey",Control:"ctrlKey",Meta:"metaKey",Shift:"shiftKey"};function Sn(e){var t=this.nativeEvent;return t.getModifierState?t.getModifierState(e):!!(e=kn[e])&&!!t[e]}function En(){return Sn}var Cn=R({},fn,{key:function(e){if(e.key){var t=wn[e.key]||e.key;if("Unidentified"!==t)return t}return"keypress"===e.type?13===(e=tn(e))?"Enter":String.fromCharCode(e):"keydown"===e.type||"keyup"===e.type?xn[e.keyCode]||"Unidentified":""},code:0,location:0,ctrlKey:0,shiftKey:0,altKey:0,metaKey:0,repeat:0,locale:0,getModifierState:En,charCode:function(e){return"keypress"===e.type?tn(e):0},keyCode:function(e){return"keydown"===e.type||"keyup"===e.type?e.keyCode:0},which:function(e){return"keypress"===e.type?tn(e):"keydown"===e.type||"keyup"===e.type?e.keyCode:0}}),Tn=an(Cn),Mn=an(R({},dn,{pointerId:0,width:0,height:0,pressure:0,tangentialPressure:0,tiltX:0,tiltY:0,twist:0,pointerType:0,isPrimary:0})),Nn=an(R({},fn,{touches:0,targetTouches:0,changedTouches:0,altKey:0,metaKey:0,ctrlKey:0,shiftKey:0,getModifierState:En})),Pn=an(R({},sn,{propertyName:0,elapsedTime:0,pseudoElement:0})),zn=R({},dn,{deltaX:function(e){return"deltaX"in e?e.deltaX:"wheelDeltaX"in e?-e.wheelDeltaX:0},deltaY:function(e){return"deltaY"in e?e.deltaY:"wheelDeltaY"in e?-e.wheelDeltaY:"wheelDelta"in e?-e.wheelDelta:0},deltaZ:0,deltaMode:0}),Ln=an(zn),On=[9,13,27,32],An=c&&"CompositionEvent"in window,Fn=null;c&&"documentMode"in document&&(Fn=document.documentMode);var Dn=c&&"TextEvent"in window&&!Fn,Rn=c&&(!An||Fn&&8<Fn&&11>=Fn),jn=String.fromCharCode(32),Un=!1;function In(e,t){switch(e){case"keyup":return-1!==On.indexOf(t.keyCode);case"keydown":return 229!==t.keyCode;case"keypress":case"mousedown":case"focusout":return!0;default:return!1}}function $n(e){return"object"==typeof(e=e.detail)&&"data"in e?e.data:null}var Bn=!1,Wn={color:!0,date:!0,datetime:!0,"datetime-local":!0,email:!0,month:!0,number:!0,password:!0,range:!0,search:!0,tel:!0,text:!0,time:!0,url:!0,week:!0};function Vn(e){var t=e&&e.nodeName&&e.nodeName.toLowerCase();return"input"===t?!!Wn[e.type]:"textarea"===t}function Hn(e,t,n,r){Ce(r),0<(t=qr(t,"onChange")).length&&(n=new cn("onChange","change",null,n,r),e.push({event:n,listeners:t}))}var qn=null,Qn=null;function Yn(e){jr(e,0)}function Gn(e){if(Q(_a(e)))return e}function Kn(e,t){if("change"===e)return t}var Zn=!1;if(c){var Xn;if(c){var Jn="oninput"in document;if(!Jn){var er=document.createElement("div");er.setAttribute("oninput","return;"),Jn="function"==typeof er.oninput}Xn=Jn}else Xn=!1;Zn=Xn&&(!document.documentMode||9<document.documentMode)}function tr(){qn&&(qn.detachEvent("onpropertychange",nr),Qn=qn=null)}function nr(e){if("value"===e.propertyName&&Gn(Qn)){var t=[];Hn(t,Qn,e,we(e)),ze(Yn,t)}}function rr(e,t,n){"focusin"===e?(tr(),Qn=n,(qn=t).attachEvent("onpropertychange",nr)):"focusout"===e&&tr()}function ar(e){if("selectionchange"===e||"keyup"===e||"keydown"===e)return Gn(Qn)}function ir(e,t){if("click"===e)return Gn(t)}function or(e,t){if("input"===e||"change"===e)return Gn(t)}var ur="function"==typeof Object.is?Object.is:function(e,t){return e===t&&(0!==e||1/e==1/t)||e!=e&&t!=t};function lr(e,t){if(ur(e,t))return!0;if("object"!=typeof e||null===e||"object"!=typeof t||null===t)return!1;var n=Object.keys(e),r=Object.keys(t);if(n.length!==r.length)return!1;for(r=0;r<n.length;r++){var a=n[r];if(!f.call(t,a)||!ur(e[a],t[a]))return!1}return!0}function sr(e){for(;e&&e.firstChild;)e=e.firstChild;return e}function cr(e,t){var n,r=sr(e);for(e=0;r;){if(3===r.nodeType){if(n=e+r.textContent.length,e<=t&&n>=t)return{node:r,offset:t-e};e=n}e:{for(;r;){if(r.nextSibling){r=r.nextSibling;break e}r=r.parentNode}r=void 0}r=sr(r)}}function fr(e,t){return!(!e||!t)&&(e===t||(!e||3!==e.nodeType)&&(t&&3===t.nodeType?fr(e,t.parentNode):"contains"in e?e.contains(t):!!e.compareDocumentPosition&&!!(16&e.compareDocumentPosition(t))))}function pr(){for(var e=window,t=Y();t instanceof e.HTMLIFrameElement;){try{var n="string"==typeof t.contentWindow.location.href}catch(e){n=!1}if(!n)break;t=Y((e=t.contentWindow).document)}return t}function dr(e){var t=e&&e.nodeName&&e.nodeName.toLowerCase();return t&&("input"===t&&("text"===e.type||"search"===e.type||"tel"===e.type||"url"===e.type||"password"===e.type)||"textarea"===t||"true"===e.contentEditable)}function hr(e){var t=pr(),n=e.focusedElem,r=e.selectionRange;if(t!==n&&n&&n.ownerDocument&&fr(n.ownerDocument.documentElement,n)){if(null!==r&&dr(n))if(t=r.start,void 0===(e=r.end)&&(e=t),"selectionStart"in n)n.selectionStart=t,n.selectionEnd=Math.min(e,n.value.length);else if((e=(t=n.ownerDocument||document)&&t.defaultView||window).getSelection){e=e.getSelection();var a=n.textContent.length,i=Math.min(r.start,a);r=void 0===r.end?i:Math.min(r.end,a),!e.extend&&i>r&&(a=r,r=i,i=a),a=cr(n,i);var o=cr(n,r);a&&o&&(1!==e.rangeCount||e.anchorNode!==a.node||e.anchorOffset!==a.offset||e.focusNode!==o.node||e.focusOffset!==o.offset)&&((t=t.createRange()).setStart(a.node,a.offset),e.removeAllRanges(),i>r?(e.addRange(t),e.extend(o.node,o.offset)):(t.setEnd(o.node,o.offset),e.addRange(t)))}for(t=[],e=n;e=e.parentNode;)1===e.nodeType&&t.push({element:e,left:e.scrollLeft,top:e.scrollTop});for("function"==typeof n.focus&&n.focus(),n=0;n<t.length;n++)(e=t[n]).element.scrollLeft=e.left,e.element.scrollTop=e.top}}var vr=c&&"documentMode"in document&&11>=document.documentMode,gr=null,yr=null,mr=null,br=!1;function _r(e,t,n){var r=n.window===n?n.document:9===n.nodeType?n:n.ownerDocument;br||null==gr||gr!==Y(r)||(r="selectionStart"in(r=gr)&&dr(r)?{start:r.selectionStart,end:r.selectionEnd}:{anchorNode:(r=(r.ownerDocument&&r.ownerDocument.defaultView||window).getSelection()).anchorNode,anchorOffset:r.anchorOffset,focusNode:r.focusNode,focusOffset:r.focusOffset},mr&&lr(mr,r)||(mr=r,0<(r=qr(yr,"onSelect")).length&&(t=new cn("onSelect","select",null,t,n),e.push({event:t,listeners:r}),t.target=gr)))}function wr(e,t){var n={};return n[e.toLowerCase()]=t.toLowerCase(),n["Webkit"+e]="webkit"+t,n["Moz"+e]="moz"+t,n}var xr={animationend:wr("Animation","AnimationEnd"),animationiteration:wr("Animation","AnimationIteration"),animationstart:wr("Animation","AnimationStart"),transitionend:wr("Transition","TransitionEnd")},kr={},Sr={};function Er(e){if(kr[e])return kr[e];if(!xr[e])return e;var t,n=xr[e];for(t in n)if(n.hasOwnProperty(t)&&t in Sr)return kr[e]=n[t];return e}c&&(Sr=document.createElement("div").style,"AnimationEvent"in window||(delete xr.animationend.animation,delete xr.animationiteration.animation,delete xr.animationstart.animation),"TransitionEvent"in window||delete xr.transitionend.transition);var Cr=Er("animationend"),Tr=Er("animationiteration"),Mr=Er("animationstart"),Nr=Er("transitionend"),Pr=new Map,zr="abort auxClick cancel canPlay canPlayThrough click close contextMenu copy cut drag dragEnd dragEnter dragExit dragLeave dragOver dragStart drop durationChange emptied encrypted ended error gotPointerCapture input invalid keyDown keyPress keyUp load loadedData loadedMetadata loadStart lostPointerCapture mouseDown mouseMove mouseOut mouseOver mouseUp paste pause play playing pointerCancel pointerDown pointerMove pointerOut pointerOver pointerUp progress rateChange reset resize seeked seeking stalled submit suspend timeUpdate touchCancel touchEnd touchStart volumeChange scroll toggle touchMove waiting wheel".split(" ");function Lr(e,t){Pr.set(e,t),l(t,[e])}for(var Or=0;Or<zr.length;Or++){var Ar=zr[Or];Lr(Ar.toLowerCase(),"on"+(Ar[0].toUpperCase()+Ar.slice(1)))}Lr(Cr,"onAnimationEnd"),Lr(Tr,"onAnimationIteration"),Lr(Mr,"onAnimationStart"),Lr("dblclick","onDoubleClick"),Lr("focusin","onFocus"),Lr("focusout","onBlur"),Lr(Nr,"onTransitionEnd"),s("onMouseEnter",["mouseout","mouseover"]),s("onMouseLeave",["mouseout","mouseover"]),s("onPointerEnter",["pointerout","pointerover"]),s("onPointerLeave",["pointerout","pointerover"]),l("onChange","change click focusin focusout input keydown keyup selectionchange".split(" ")),l("onSelect","focusout contextmenu dragend focusin keydown keyup mousedown mouseup selectionchange".split(" ")),l("onBeforeInput",["compositionend","keypress","textInput","paste"]),l("onCompositionEnd","compositionend focusout keydown keypress keyup mousedown".split(" ")),l("onCompositionStart","compositionstart focusout keydown keypress keyup mousedown".split(" ")),l("onCompositionUpdate","compositionupdate focusout keydown keypress keyup mousedown".split(" "));var Fr="abort canplay canplaythrough durationchange emptied encrypted ended error loadeddata loadedmetadata loadstart pause play playing progress ratechange resize seeked seeking stalled suspend timeupdate volumechange waiting".split(" "),Dr=new Set("cancel close invalid load scroll toggle".split(" ").concat(Fr));function Rr(e,t,n){var r=e.type||"unknown-event";e.currentTarget=n,function(e,t,n,r,a,o,u,l,s){if($e.apply(this,arguments),De){if(!De)throw Error(i(198));var c=Re;De=!1,Re=null,je||(je=!0,Ue=c)}}(r,t,void 0,e),e.currentTarget=null}function jr(e,t){t=0!=(4&t);for(var n=0;n<e.length;n++){var r=e[n],a=r.event;r=r.listeners;e:{var i=void 0;if(t)for(var o=r.length-1;0<=o;o--){var u=r[o],l=u.instance,s=u.currentTarget;if(u=u.listener,l!==i&&a.isPropagationStopped())break e;Rr(a,u,s),i=l}else for(o=0;o<r.length;o++){if(l=(u=r[o]).instance,s=u.currentTarget,u=u.listener,l!==i&&a.isPropagationStopped())break e;Rr(a,u,s),i=l}}}if(je)throw e=Ue,je=!1,Ue=null,e}function Ur(e,t){var n=t[va];void 0===n&&(n=t[va]=new Set);var r=e+"__bubble";n.has(r)||(Wr(t,e,2,!1),n.add(r))}function Ir(e,t,n){var r=0;t&&(r|=4),Wr(n,e,r,t)}var $r="_reactListening"+Math.random().toString(36).slice(2);function Br(e){if(!e[$r]){e[$r]=!0,o.forEach((function(t){"selectionchange"!==t&&(Dr.has(t)||Ir(t,!1,e),Ir(t,!0,e))}));var t=9===e.nodeType?e:e.ownerDocument;null===t||t[$r]||(t[$r]=!0,Ir("selectionchange",!1,t))}}function Wr(e,t,n,r){switch(Kt(t)){case 1:var a=Ht;break;case 4:a=qt;break;default:a=Qt}n=a.bind(null,t,n,e),a=void 0,!Oe||"touchstart"!==t&&"touchmove"!==t&&"wheel"!==t||(a=!0),r?void 0!==a?e.addEventListener(t,n,{capture:!0,passive:a}):e.addEventListener(t,n,!0):void 0!==a?e.addEventListener(t,n,{passive:a}):e.addEventListener(t,n,!1)}function Vr(e,t,n,r,a){var i=r;if(0==(1&t)&&0==(2&t)&&null!==r)e:for(;;){if(null===r)return;var o=r.tag;if(3===o||4===o){var u=r.stateNode.containerInfo;if(u===a||8===u.nodeType&&u.parentNode===a)break;if(4===o)for(o=r.return;null!==o;){var l=o.tag;if((3===l||4===l)&&((l=o.stateNode.containerInfo)===a||8===l.nodeType&&l.parentNode===a))return;o=o.return}for(;null!==u;){if(null===(o=ma(u)))return;if(5===(l=o.tag)||6===l){r=i=o;continue e}u=u.parentNode}}r=r.return}ze((function(){var r=i,a=we(n),o=[];e:{var u=Pr.get(e);if(void 0!==u){var l=cn,s=e;switch(e){case"keypress":if(0===tn(n))break e;case"keydown":case"keyup":l=Tn;break;case"focusin":s="focus",l=gn;break;case"focusout":s="blur",l=gn;break;case"beforeblur":case"afterblur":l=gn;break;case"click":if(2===n.button)break e;case"auxclick":case"dblclick":case"mousedown":case"mousemove":case"mouseup":case"mouseout":case"mouseover":case"contextmenu":l=hn;break;case"drag":case"dragend":case"dragenter":case"dragexit":case"dragleave":case"dragover":case"dragstart":case"drop":l=vn;break;case"touchcancel":case"touchend":case"touchmove":case"touchstart":l=Nn;break;case Cr:case Tr:case Mr:l=yn;break;case Nr:l=Pn;break;case"scroll":l=pn;break;case"wheel":l=Ln;break;case"copy":case"cut":case"paste":l=bn;break;case"gotpointercapture":case"lostpointercapture":case"pointercancel":case"pointerdown":case"pointermove":case"pointerout":case"pointerover":case"pointerup":l=Mn}var c=0!=(4&t),f=!c&&"scroll"===e,p=c?null!==u?u+"Capture":null:u;c=[];for(var d,h=r;null!==h;){var v=(d=h).stateNode;if(5===d.tag&&null!==v&&(d=v,null!==p&&null!=(v=Le(h,p))&&c.push(Hr(h,v,d))),f)break;h=h.return}0<c.length&&(u=new l(u,s,null,n,a),o.push({event:u,listeners:c}))}}if(0==(7&t)){if(l="mouseout"===e||"pointerout"===e,(!(u="mouseover"===e||"pointerover"===e)||n===_e||!(s=n.relatedTarget||n.fromElement)||!ma(s)&&!s[ha])&&(l||u)&&(u=a.window===a?a:(u=a.ownerDocument)?u.defaultView||u.parentWindow:window,l?(l=r,null!==(s=(s=n.relatedTarget||n.toElement)?ma(s):null)&&(s!==(f=Be(s))||5!==s.tag&&6!==s.tag)&&(s=null)):(l=null,s=r),l!==s)){if(c=hn,v="onMouseLeave",p="onMouseEnter",h="mouse","pointerout"!==e&&"pointerover"!==e||(c=Mn,v="onPointerLeave",p="onPointerEnter",h="pointer"),f=null==l?u:_a(l),d=null==s?u:_a(s),(u=new c(v,h+"leave",l,n,a)).target=f,u.relatedTarget=d,v=null,ma(a)===r&&((c=new c(p,h+"enter",s,n,a)).target=d,c.relatedTarget=f,v=c),f=v,l&&s)e:{for(p=s,h=0,d=c=l;d;d=Qr(d))h++;for(d=0,v=p;v;v=Qr(v))d++;for(;0<h-d;)c=Qr(c),h--;for(;0<d-h;)p=Qr(p),d--;for(;h--;){if(c===p||null!==p&&c===p.alternate)break e;c=Qr(c),p=Qr(p)}c=null}else c=null;null!==l&&Yr(o,u,l,c,!1),null!==s&&null!==f&&Yr(o,f,s,c,!0)}if("select"===(l=(u=r?_a(r):window).nodeName&&u.nodeName.toLowerCase())||"input"===l&&"file"===u.type)var g=Kn;else if(Vn(u))if(Zn)g=or;else{g=ar;var y=rr}else(l=u.nodeName)&&"input"===l.toLowerCase()&&("checkbox"===u.type||"radio"===u.type)&&(g=ir);switch(g&&(g=g(e,r))?Hn(o,g,n,a):(y&&y(e,u,r),"focusout"===e&&(y=u._wrapperState)&&y.controlled&&"number"===u.type&&ee(u,"number",u.value)),y=r?_a(r):window,e){case"focusin":(Vn(y)||"true"===y.contentEditable)&&(gr=y,yr=r,mr=null);break;case"focusout":mr=yr=gr=null;break;case"mousedown":br=!0;break;case"contextmenu":case"mouseup":case"dragend":br=!1,_r(o,n,a);break;case"selectionchange":if(vr)break;case"keydown":case"keyup":_r(o,n,a)}var m;if(An)e:{switch(e){case"compositionstart":var b="onCompositionStart";break e;case"compositionend":b="onCompositionEnd";break e;case"compositionupdate":b="onCompositionUpdate";break e}b=void 0}else Bn?In(e,n)&&(b="onCompositionEnd"):"keydown"===e&&229===n.keyCode&&(b="onCompositionStart");b&&(Rn&&"ko"!==n.locale&&(Bn||"onCompositionStart"!==b?"onCompositionEnd"===b&&Bn&&(m=en()):(Xt="value"in(Zt=a)?Zt.value:Zt.textContent,Bn=!0)),0<(y=qr(r,b)).length&&(b=new _n(b,e,null,n,a),o.push({event:b,listeners:y}),(m||null!==(m=$n(n)))&&(b.data=m))),(m=Dn?function(e,t){switch(e){case"compositionend":return $n(t);case"keypress":return 32!==t.which?null:(Un=!0,jn);case"textInput":return(e=t.data)===jn&&Un?null:e;default:return null}}(e,n):function(e,t){if(Bn)return"compositionend"===e||!An&&In(e,t)?(e=en(),Jt=Xt=Zt=null,Bn=!1,e):null;switch(e){case"paste":default:return null;case"keypress":if(!(t.ctrlKey||t.altKey||t.metaKey)||t.ctrlKey&&t.altKey){if(t.char&&1<t.char.length)return t.char;if(t.which)return String.fromCharCode(t.which)}return null;case"compositionend":return Rn&&"ko"!==t.locale?null:t.data}}(e,n))&&0<(r=qr(r,"onBeforeInput")).length&&(a=new _n("onBeforeInput","beforeinput",null,n,a),o.push({event:a,listeners:r}),a.data=m)}jr(o,t)}))}function Hr(e,t,n){return{instance:e,listener:t,currentTarget:n}}function qr(e,t){for(var n=t+"Capture",r=[];null!==e;){var a=e,i=a.stateNode;5===a.tag&&null!==i&&(a=i,null!=(i=Le(e,n))&&r.unshift(Hr(e,i,a)),null!=(i=Le(e,t))&&r.push(Hr(e,i,a))),e=e.return}return r}function Qr(e){if(null===e)return null;do{e=e.return}while(e&&5!==e.tag);return e||null}function Yr(e,t,n,r,a){for(var i=t._reactName,o=[];null!==n&&n!==r;){var u=n,l=u.alternate,s=u.stateNode;if(null!==l&&l===r)break;5===u.tag&&null!==s&&(u=s,a?null!=(l=Le(n,i))&&o.unshift(Hr(n,l,u)):a||null!=(l=Le(n,i))&&o.push(Hr(n,l,u))),n=n.return}0!==o.length&&e.push({event:t,listeners:o})}var Gr=/\r\n?/g,Kr=/\u0000|\uFFFD/g;function Zr(e){return("string"==typeof e?e:""+e).replace(Gr,"\n").replace(Kr,"")}function Xr(e,t,n){if(t=Zr(t),Zr(e)!==t&&n)throw Error(i(425))}function Jr(){}var ea=null,ta=null;function na(e,t){return"textarea"===e||"noscript"===e||"string"==typeof t.children||"number"==typeof t.children||"object"==typeof t.dangerouslySetInnerHTML&&null!==t.dangerouslySetInnerHTML&&null!=t.dangerouslySetInnerHTML.__html}var ra="function"==typeof setTimeout?setTimeout:void 0,aa="function"==typeof clearTimeout?clearTimeout:void 0,ia="function"==typeof Promise?Promise:void 0,oa="function"==typeof queueMicrotask?queueMicrotask:void 0!==ia?function(e){return ia.resolve(null).then(e).catch(ua)}:ra;function ua(e){setTimeout((function(){throw e}))}function la(e,t){var n=t,r=0;do{var a=n.nextSibling;if(e.removeChild(n),a&&8===a.nodeType)if("/$"===(n=a.data)){if(0===r)return e.removeChild(a),void Bt(t);r--}else"$"!==n&&"$?"!==n&&"$!"!==n||r++;n=a}while(n);Bt(t)}function sa(e){for(;null!=e;e=e.nextSibling){var t=e.nodeType;if(1===t||3===t)break;if(8===t){if("$"===(t=e.data)||"$!"===t||"$?"===t)break;if("/$"===t)return null}}return e}function ca(e){e=e.previousSibling;for(var t=0;e;){if(8===e.nodeType){var n=e.data;if("$"===n||"$!"===n||"$?"===n){if(0===t)return e;t--}else"/$"===n&&t++}e=e.previousSibling}return null}var fa=Math.random().toString(36).slice(2),pa="__reactFiber$"+fa,da="__reactProps$"+fa,ha="__reactContainer$"+fa,va="__reactEvents$"+fa,ga="__reactListeners$"+fa,ya="__reactHandles$"+fa;function ma(e){var t=e[pa];if(t)return t;for(var n=e.parentNode;n;){if(t=n[ha]||n[pa]){if(n=t.alternate,null!==t.child||null!==n&&null!==n.child)for(e=ca(e);null!==e;){if(n=e[pa])return n;e=ca(e)}return t}n=(e=n).parentNode}return null}function ba(e){return!(e=e[pa]||e[ha])||5!==e.tag&&6!==e.tag&&13!==e.tag&&3!==e.tag?null:e}function _a(e){if(5===e.tag||6===e.tag)return e.stateNode;throw Error(i(33))}function wa(e){return e[da]||null}var xa=[],ka=-1;function Sa(e){return{current:e}}function Ea(e){0>ka||(e.current=xa[ka],xa[ka]=null,ka--)}function Ca(e,t){ka++,xa[ka]=e.current,e.current=t}var Ta={},Ma=Sa(Ta),Na=Sa(!1),Pa=Ta;function za(e,t){var n=e.type.contextTypes;if(!n)return Ta;var r=e.stateNode;if(r&&r.__reactInternalMemoizedUnmaskedChildContext===t)return r.__reactInternalMemoizedMaskedChildContext;var a,i={};for(a in n)i[a]=t[a];return r&&((e=e.stateNode).__reactInternalMemoizedUnmaskedChildContext=t,e.__reactInternalMemoizedMaskedChildContext=i),i}function La(e){return null!=e.childContextTypes}function Oa(){Ea(Na),Ea(Ma)}function Aa(e,t,n){if(Ma.current!==Ta)throw Error(i(168));Ca(Ma,t),Ca(Na,n)}function Fa(e,t,n){var r=e.stateNode;if(t=t.childContextTypes,"function"!=typeof r.getChildContext)return n;for(var a in r=r.getChildContext())if(!(a in t))throw Error(i(108,W(e)||"Unknown",a));return R({},n,r)}function Da(e){return e=(e=e.stateNode)&&e.__reactInternalMemoizedMergedChildContext||Ta,Pa=Ma.current,Ca(Ma,e),Ca(Na,Na.current),!0}function Ra(e,t,n){var r=e.stateNode;if(!r)throw Error(i(169));n?(e=Fa(e,t,Pa),r.__reactInternalMemoizedMergedChildContext=e,Ea(Na),Ea(Ma),Ca(Ma,e)):Ea(Na),Ca(Na,n)}var ja=null,Ua=!1,Ia=!1;function $a(e){null===ja?ja=[e]:ja.push(e)}function Ba(){if(!Ia&&null!==ja){Ia=!0;var e=0,t=bt;try{var n=ja;for(bt=1;e<n.length;e++){var r=n[e];do{r=r(!0)}while(null!==r)}ja=null,Ua=!1}catch(t){throw null!==ja&&(ja=ja.slice(e+1)),Qe(Je,Ba),t}finally{bt=t,Ia=!1}}return null}var Wa=[],Va=0,Ha=null,qa=0,Qa=[],Ya=0,Ga=null,Ka=1,Za="";function Xa(e,t){Wa[Va++]=qa,Wa[Va++]=Ha,Ha=e,qa=t}function Ja(e,t,n){Qa[Ya++]=Ka,Qa[Ya++]=Za,Qa[Ya++]=Ga,Ga=e;var r=Ka;e=Za;var a=32-ot(r)-1;r&=~(1<<a),n+=1;var i=32-ot(t)+a;if(30<i){var o=a-a%5;i=(r&(1<<o)-1).toString(32),r>>=o,a-=o,Ka=1<<32-ot(t)+a|n<<a|r,Za=i+e}else Ka=1<<i|n<<a|r,Za=e}function ei(e){null!==e.return&&(Xa(e,1),Ja(e,1,0))}function ti(e){for(;e===Ha;)Ha=Wa[--Va],Wa[Va]=null,qa=Wa[--Va],Wa[Va]=null;for(;e===Ga;)Ga=Qa[--Ya],Qa[Ya]=null,Za=Qa[--Ya],Qa[Ya]=null,Ka=Qa[--Ya],Qa[Ya]=null}var ni=null,ri=null,ai=!1,ii=null;function oi(e,t){var n=Ls(5,null,null,0);n.elementType="DELETED",n.stateNode=t,n.return=e,null===(t=e.deletions)?(e.deletions=[n],e.flags|=16):t.push(n)}function ui(e,t){switch(e.tag){case 5:var n=e.type;return null!==(t=1!==t.nodeType||n.toLowerCase()!==t.nodeName.toLowerCase()?null:t)&&(e.stateNode=t,ni=e,ri=sa(t.firstChild),!0);case 6:return null!==(t=""===e.pendingProps||3!==t.nodeType?null:t)&&(e.stateNode=t,ni=e,ri=null,!0);case 13:return null!==(t=8!==t.nodeType?null:t)&&(n=null!==Ga?{id:Ka,overflow:Za}:null,e.memoizedState={dehydrated:t,treeContext:n,retryLane:1073741824},(n=Ls(18,null,null,0)).stateNode=t,n.return=e,e.child=n,ni=e,ri=null,!0);default:return!1}}function li(e){return 0!=(1&e.mode)&&0==(128&e.flags)}function si(e){if(ai){var t=ri;if(t){var n=t;if(!ui(e,t)){if(li(e))throw Error(i(418));t=sa(n.nextSibling);var r=ni;t&&ui(e,t)?oi(r,n):(e.flags=-4097&e.flags|2,ai=!1,ni=e)}}else{if(li(e))throw Error(i(418));e.flags=-4097&e.flags|2,ai=!1,ni=e}}}function ci(e){for(e=e.return;null!==e&&5!==e.tag&&3!==e.tag&&13!==e.tag;)e=e.return;ni=e}function fi(e){if(e!==ni)return!1;if(!ai)return ci(e),ai=!0,!1;var t;if((t=3!==e.tag)&&!(t=5!==e.tag)&&(t="head"!==(t=e.type)&&"body"!==t&&!na(e.type,e.memoizedProps)),t&&(t=ri)){if(li(e))throw pi(),Error(i(418));for(;t;)oi(e,t),t=sa(t.nextSibling)}if(ci(e),13===e.tag){if(!(e=null!==(e=e.memoizedState)?e.dehydrated:null))throw Error(i(317));e:{for(e=e.nextSibling,t=0;e;){if(8===e.nodeType){var n=e.data;if("/$"===n){if(0===t){ri=sa(e.nextSibling);break e}t--}else"$"!==n&&"$!"!==n&&"$?"!==n||t++}e=e.nextSibling}ri=null}}else ri=ni?sa(e.stateNode.nextSibling):null;return!0}function pi(){for(var e=ri;e;)e=sa(e.nextSibling)}function di(){ri=ni=null,ai=!1}function hi(e){null===ii?ii=[e]:ii.push(e)}var vi=_.ReactCurrentBatchConfig;function gi(e,t){if(e&&e.defaultProps){for(var n in t=R({},t),e=e.defaultProps)void 0===t[n]&&(t[n]=e[n]);return t}return t}var yi=Sa(null),mi=null,bi=null,_i=null;function wi(){_i=bi=mi=null}function xi(e){var t=yi.current;Ea(yi),e._currentValue=t}function ki(e,t,n){for(;null!==e;){var r=e.alternate;if((e.childLanes&t)!==t?(e.childLanes|=t,null!==r&&(r.childLanes|=t)):null!==r&&(r.childLanes&t)!==t&&(r.childLanes|=t),e===n)break;e=e.return}}function Si(e,t){mi=e,_i=bi=null,null!==(e=e.dependencies)&&null!==e.firstContext&&(0!=(e.lanes&t)&&(_u=!0),e.firstContext=null)}function Ei(e){var t=e._currentValue;if(_i!==e)if(e={context:e,memoizedValue:t,next:null},null===bi){if(null===mi)throw Error(i(308));bi=e,mi.dependencies={lanes:0,firstContext:e}}else bi=bi.next=e;return t}var Ci=null;function Ti(e){null===Ci?Ci=[e]:Ci.push(e)}function Mi(e,t,n,r){var a=t.interleaved;return null===a?(n.next=n,Ti(t)):(n.next=a.next,a.next=n),t.interleaved=n,Ni(e,r)}function Ni(e,t){e.lanes|=t;var n=e.alternate;for(null!==n&&(n.lanes|=t),n=e,e=e.return;null!==e;)e.childLanes|=t,null!==(n=e.alternate)&&(n.childLanes|=t),n=e,e=e.return;return 3===n.tag?n.stateNode:null}var Pi=!1;function zi(e){e.updateQueue={baseState:e.memoizedState,firstBaseUpdate:null,lastBaseUpdate:null,shared:{pending:null,interleaved:null,lanes:0},effects:null}}function Li(e,t){e=e.updateQueue,t.updateQueue===e&&(t.updateQueue={baseState:e.baseState,firstBaseUpdate:e.firstBaseUpdate,lastBaseUpdate:e.lastBaseUpdate,shared:e.shared,effects:e.effects})}function Oi(e,t){return{eventTime:e,lane:t,tag:0,payload:null,callback:null,next:null}}function Ai(e,t,n){var r=e.updateQueue;if(null===r)return null;if(r=r.shared,0!=(2&Nl)){var a=r.pending;return null===a?t.next=t:(t.next=a.next,a.next=t),r.pending=t,Ni(e,n)}return null===(a=r.interleaved)?(t.next=t,Ti(r)):(t.next=a.next,a.next=t),r.interleaved=t,Ni(e,n)}function Fi(e,t,n){if(null!==(t=t.updateQueue)&&(t=t.shared,0!=(4194240&n))){var r=t.lanes;n|=r&=e.pendingLanes,t.lanes=n,mt(e,n)}}function Di(e,t){var n=e.updateQueue,r=e.alternate;if(null!==r&&n===(r=r.updateQueue)){var a=null,i=null;if(null!==(n=n.firstBaseUpdate)){do{var o={eventTime:n.eventTime,lane:n.lane,tag:n.tag,payload:n.payload,callback:n.callback,next:null};null===i?a=i=o:i=i.next=o,n=n.next}while(null!==n);null===i?a=i=t:i=i.next=t}else a=i=t;return n={baseState:r.baseState,firstBaseUpdate:a,lastBaseUpdate:i,shared:r.shared,effects:r.effects},void(e.updateQueue=n)}null===(e=n.lastBaseUpdate)?n.firstBaseUpdate=t:e.next=t,n.lastBaseUpdate=t}function Ri(e,t,n,r){var a=e.updateQueue;Pi=!1;var i=a.firstBaseUpdate,o=a.lastBaseUpdate,u=a.shared.pending;if(null!==u){a.shared.pending=null;var l=u,s=l.next;l.next=null,null===o?i=s:o.next=s,o=l;var c=e.alternate;null!==c&&(u=(c=c.updateQueue).lastBaseUpdate)!==o&&(null===u?c.firstBaseUpdate=s:u.next=s,c.lastBaseUpdate=l)}if(null!==i){var f=a.baseState;for(o=0,c=s=l=null,u=i;;){var p=u.lane,d=u.eventTime;if((r&p)===p){null!==c&&(c=c.next={eventTime:d,lane:0,tag:u.tag,payload:u.payload,callback:u.callback,next:null});e:{var h=e,v=u;switch(p=t,d=n,v.tag){case 1:if("function"==typeof(h=v.payload)){f=h.call(d,f,p);break e}f=h;break e;case 3:h.flags=-65537&h.flags|128;case 0:if(null==(p="function"==typeof(h=v.payload)?h.call(d,f,p):h))break e;f=R({},f,p);break e;case 2:Pi=!0}}null!==u.callback&&0!==u.lane&&(e.flags|=64,null===(p=a.effects)?a.effects=[u]:p.push(u))}else d={eventTime:d,lane:p,tag:u.tag,payload:u.payload,callback:u.callback,next:null},null===c?(s=c=d,l=f):c=c.next=d,o|=p;if(null===(u=u.next)){if(null===(u=a.shared.pending))break;u=(p=u).next,p.next=null,a.lastBaseUpdate=p,a.shared.pending=null}}if(null===c&&(l=f),a.baseState=l,a.firstBaseUpdate=s,a.lastBaseUpdate=c,null!==(t=a.shared.interleaved)){a=t;do{o|=a.lane,a=a.next}while(a!==t)}else null===i&&(a.shared.lanes=0);Rl|=o,e.lanes=o,e.memoizedState=f}}function ji(e,t,n){if(e=t.effects,t.effects=null,null!==e)for(t=0;t<e.length;t++){var r=e[t],a=r.callback;if(null!==a){if(r.callback=null,r=n,"function"!=typeof a)throw Error(i(191,a));a.call(r)}}}var Ui=(new r.Component).refs;function Ii(e,t,n,r){n=null==(n=n(r,t=e.memoizedState))?t:R({},t,n),e.memoizedState=n,0===e.lanes&&(e.updateQueue.baseState=n)}var $i={isMounted:function(e){return!!(e=e._reactInternals)&&Be(e)===e},enqueueSetState:function(e,t,n){e=e._reactInternals;var r=ts(),a=ns(e),i=Oi(r,a);i.payload=t,null!=n&&(i.callback=n),null!==(t=Ai(e,i,a))&&(rs(t,e,a,r),Fi(t,e,a))},enqueueReplaceState:function(e,t,n){e=e._reactInternals;var r=ts(),a=ns(e),i=Oi(r,a);i.tag=1,i.payload=t,null!=n&&(i.callback=n),null!==(t=Ai(e,i,a))&&(rs(t,e,a,r),Fi(t,e,a))},enqueueForceUpdate:function(e,t){e=e._reactInternals;var n=ts(),r=ns(e),a=Oi(n,r);a.tag=2,null!=t&&(a.callback=t),null!==(t=Ai(e,a,r))&&(rs(t,e,r,n),Fi(t,e,r))}};function Bi(e,t,n,r,a,i,o){return"function"==typeof(e=e.stateNode).shouldComponentUpdate?e.shouldComponentUpdate(r,i,o):!(t.prototype&&t.prototype.isPureReactComponent&&lr(n,r)&&lr(a,i))}function Wi(e,t,n){var r=!1,a=Ta,i=t.contextType;return"object"==typeof i&&null!==i?i=Ei(i):(a=La(t)?Pa:Ma.current,i=(r=null!=(r=t.contextTypes))?za(e,a):Ta),t=new t(n,i),e.memoizedState=null!==t.state&&void 0!==t.state?t.state:null,t.updater=$i,e.stateNode=t,t._reactInternals=e,r&&((e=e.stateNode).__reactInternalMemoizedUnmaskedChildContext=a,e.__reactInternalMemoizedMaskedChildContext=i),t}function Vi(e,t,n,r){e=t.state,"function"==typeof t.componentWillReceiveProps&&t.componentWillReceiveProps(n,r),"function"==typeof t.UNSAFE_componentWillReceiveProps&&t.UNSAFE_componentWillReceiveProps(n,r),t.state!==e&&$i.enqueueReplaceState(t,t.state,null)}function Hi(e,t,n,r){var a=e.stateNode;a.props=n,a.state=e.memoizedState,a.refs=Ui,zi(e);var i=t.contextType;"object"==typeof i&&null!==i?a.context=Ei(i):(i=La(t)?Pa:Ma.current,a.context=za(e,i)),a.state=e.memoizedState,"function"==typeof(i=t.getDerivedStateFromProps)&&(Ii(e,t,i,n),a.state=e.memoizedState),"function"==typeof t.getDerivedStateFromProps||"function"==typeof a.getSnapshotBeforeUpdate||"function"!=typeof a.UNSAFE_componentWillMount&&"function"!=typeof a.componentWillMount||(t=a.state,"function"==typeof a.componentWillMount&&a.componentWillMount(),"function"==typeof a.UNSAFE_componentWillMount&&a.UNSAFE_componentWillMount(),t!==a.state&&$i.enqueueReplaceState(a,a.state,null),Ri(e,n,a,r),a.state=e.memoizedState),"function"==typeof a.componentDidMount&&(e.flags|=4194308)}function qi(e,t,n){if(null!==(e=n.ref)&&"function"!=typeof e&&"object"!=typeof e){if(n._owner){if(n=n._owner){if(1!==n.tag)throw Error(i(309));var r=n.stateNode}if(!r)throw Error(i(147,e));var a=r,o=""+e;return null!==t&&null!==t.ref&&"function"==typeof t.ref&&t.ref._stringRef===o?t.ref:(t=function(e){var t=a.refs;t===Ui&&(t=a.refs={}),null===e?delete t[o]:t[o]=e},t._stringRef=o,t)}if("string"!=typeof e)throw Error(i(284));if(!n._owner)throw Error(i(290,e))}return e}function Qi(e,t){throw e=Object.prototype.toString.call(t),Error(i(31,"[object Object]"===e?"object with keys {"+Object.keys(t).join(", ")+"}":e))}function Yi(e){return(0,e._init)(e._payload)}function Gi(e){function t(t,n){if(e){var r=t.deletions;null===r?(t.deletions=[n],t.flags|=16):r.push(n)}}function n(n,r){if(!e)return null;for(;null!==r;)t(n,r),r=r.sibling;return null}function r(e,t){for(e=new Map;null!==t;)null!==t.key?e.set(t.key,t):e.set(t.index,t),t=t.sibling;return e}function a(e,t){return(e=As(e,t)).index=0,e.sibling=null,e}function o(t,n,r){return t.index=r,e?null!==(r=t.alternate)?(r=r.index)<n?(t.flags|=2,n):r:(t.flags|=2,n):(t.flags|=1048576,n)}function u(t){return e&&null===t.alternate&&(t.flags|=2),t}function l(e,t,n,r){return null===t||6!==t.tag?((t=js(n,e.mode,r)).return=e,t):((t=a(t,n)).return=e,t)}function s(e,t,n,r){var i=n.type;return i===k?f(e,t,n.props.children,r,n.key):null!==t&&(t.elementType===i||"object"==typeof i&&null!==i&&i.$$typeof===L&&Yi(i)===t.type)?((r=a(t,n.props)).ref=qi(e,t,n),r.return=e,r):((r=Fs(n.type,n.key,n.props,null,e.mode,r)).ref=qi(e,t,n),r.return=e,r)}function c(e,t,n,r){return null===t||4!==t.tag||t.stateNode.containerInfo!==n.containerInfo||t.stateNode.implementation!==n.implementation?((t=Us(n,e.mode,r)).return=e,t):((t=a(t,n.children||[])).return=e,t)}function f(e,t,n,r,i){return null===t||7!==t.tag?((t=Ds(n,e.mode,r,i)).return=e,t):((t=a(t,n)).return=e,t)}function p(e,t,n){if("string"==typeof t&&""!==t||"number"==typeof t)return(t=js(""+t,e.mode,n)).return=e,t;if("object"==typeof t&&null!==t){switch(t.$$typeof){case w:return(n=Fs(t.type,t.key,t.props,null,e.mode,n)).ref=qi(e,null,t),n.return=e,n;case x:return(t=Us(t,e.mode,n)).return=e,t;case L:return p(e,(0,t._init)(t._payload),n)}if(te(t)||F(t))return(t=Ds(t,e.mode,n,null)).return=e,t;Qi(e,t)}return null}function d(e,t,n,r){var a=null!==t?t.key:null;if("string"==typeof n&&""!==n||"number"==typeof n)return null!==a?null:l(e,t,""+n,r);if("object"==typeof n&&null!==n){switch(n.$$typeof){case w:return n.key===a?s(e,t,n,r):null;case x:return n.key===a?c(e,t,n,r):null;case L:return d(e,t,(a=n._init)(n._payload),r)}if(te(n)||F(n))return null!==a?null:f(e,t,n,r,null);Qi(e,n)}return null}function h(e,t,n,r,a){if("string"==typeof r&&""!==r||"number"==typeof r)return l(t,e=e.get(n)||null,""+r,a);if("object"==typeof r&&null!==r){switch(r.$$typeof){case w:return s(t,e=e.get(null===r.key?n:r.key)||null,r,a);case x:return c(t,e=e.get(null===r.key?n:r.key)||null,r,a);case L:return h(e,t,n,(0,r._init)(r._payload),a)}if(te(r)||F(r))return f(t,e=e.get(n)||null,r,a,null);Qi(t,r)}return null}function v(a,i,u,l){for(var s=null,c=null,f=i,v=i=0,g=null;null!==f&&v<u.length;v++){f.index>v?(g=f,f=null):g=f.sibling;var y=d(a,f,u[v],l);if(null===y){null===f&&(f=g);break}e&&f&&null===y.alternate&&t(a,f),i=o(y,i,v),null===c?s=y:c.sibling=y,c=y,f=g}if(v===u.length)return n(a,f),ai&&Xa(a,v),s;if(null===f){for(;v<u.length;v++)null!==(f=p(a,u[v],l))&&(i=o(f,i,v),null===c?s=f:c.sibling=f,c=f);return ai&&Xa(a,v),s}for(f=r(a,f);v<u.length;v++)null!==(g=h(f,a,v,u[v],l))&&(e&&null!==g.alternate&&f.delete(null===g.key?v:g.key),i=o(g,i,v),null===c?s=g:c.sibling=g,c=g);return e&&f.forEach((function(e){return t(a,e)})),ai&&Xa(a,v),s}function g(a,u,l,s){var c=F(l);if("function"!=typeof c)throw Error(i(150));if(null==(l=c.call(l)))throw Error(i(151));for(var f=c=null,v=u,g=u=0,y=null,m=l.next();null!==v&&!m.done;g++,m=l.next()){v.index>g?(y=v,v=null):y=v.sibling;var b=d(a,v,m.value,s);if(null===b){null===v&&(v=y);break}e&&v&&null===b.alternate&&t(a,v),u=o(b,u,g),null===f?c=b:f.sibling=b,f=b,v=y}if(m.done)return n(a,v),ai&&Xa(a,g),c;if(null===v){for(;!m.done;g++,m=l.next())null!==(m=p(a,m.value,s))&&(u=o(m,u,g),null===f?c=m:f.sibling=m,f=m);return ai&&Xa(a,g),c}for(v=r(a,v);!m.done;g++,m=l.next())null!==(m=h(v,a,g,m.value,s))&&(e&&null!==m.alternate&&v.delete(null===m.key?g:m.key),u=o(m,u,g),null===f?c=m:f.sibling=m,f=m);return e&&v.forEach((function(e){return t(a,e)})),ai&&Xa(a,g),c}return function e(r,i,o,l){if("object"==typeof o&&null!==o&&o.type===k&&null===o.key&&(o=o.props.children),"object"==typeof o&&null!==o){switch(o.$$typeof){case w:e:{for(var s=o.key,c=i;null!==c;){if(c.key===s){if((s=o.type)===k){if(7===c.tag){n(r,c.sibling),(i=a(c,o.props.children)).return=r,r=i;break e}}else if(c.elementType===s||"object"==typeof s&&null!==s&&s.$$typeof===L&&Yi(s)===c.type){n(r,c.sibling),(i=a(c,o.props)).ref=qi(r,c,o),i.return=r,r=i;break e}n(r,c);break}t(r,c),c=c.sibling}o.type===k?((i=Ds(o.props.children,r.mode,l,o.key)).return=r,r=i):((l=Fs(o.type,o.key,o.props,null,r.mode,l)).ref=qi(r,i,o),l.return=r,r=l)}return u(r);case x:e:{for(c=o.key;null!==i;){if(i.key===c){if(4===i.tag&&i.stateNode.containerInfo===o.containerInfo&&i.stateNode.implementation===o.implementation){n(r,i.sibling),(i=a(i,o.children||[])).return=r,r=i;break e}n(r,i);break}t(r,i),i=i.sibling}(i=Us(o,r.mode,l)).return=r,r=i}return u(r);case L:return e(r,i,(c=o._init)(o._payload),l)}if(te(o))return v(r,i,o,l);if(F(o))return g(r,i,o,l);Qi(r,o)}return"string"==typeof o&&""!==o||"number"==typeof o?(o=""+o,null!==i&&6===i.tag?(n(r,i.sibling),(i=a(i,o)).return=r,r=i):(n(r,i),(i=js(o,r.mode,l)).return=r,r=i),u(r)):n(r,i)}}var Ki=Gi(!0),Zi=Gi(!1),Xi={},Ji=Sa(Xi),eo=Sa(Xi),to=Sa(Xi);function no(e){if(e===Xi)throw Error(i(174));return e}function ro(e,t){switch(Ca(to,t),Ca(eo,e),Ca(Ji,Xi),e=t.nodeType){case 9:case 11:t=(t=t.documentElement)?t.namespaceURI:le(null,"");break;default:t=le(t=(e=8===e?t.parentNode:t).namespaceURI||null,e=e.tagName)}Ea(Ji),Ca(Ji,t)}function ao(){Ea(Ji),Ea(eo),Ea(to)}function io(e){no(to.current);var t=no(Ji.current),n=le(t,e.type);t!==n&&(Ca(eo,e),Ca(Ji,n))}function oo(e){eo.current===e&&(Ea(Ji),Ea(eo))}var uo=Sa(0);function lo(e){for(var t=e;null!==t;){if(13===t.tag){var n=t.memoizedState;if(null!==n&&(null===(n=n.dehydrated)||"$?"===n.data||"$!"===n.data))return t}else if(19===t.tag&&void 0!==t.memoizedProps.revealOrder){if(0!=(128&t.flags))return t}else if(null!==t.child){t.child.return=t,t=t.child;continue}if(t===e)break;for(;null===t.sibling;){if(null===t.return||t.return===e)return null;t=t.return}t.sibling.return=t.return,t=t.sibling}return null}var so=[];function co(){for(var e=0;e<so.length;e++)so[e]._workInProgressVersionPrimary=null;so.length=0}var fo=_.ReactCurrentDispatcher,po=_.ReactCurrentBatchConfig,ho=0,vo=null,go=null,yo=null,mo=!1,bo=!1,_o=0,wo=0;function xo(){throw Error(i(321))}function ko(e,t){if(null===t)return!1;for(var n=0;n<t.length&&n<e.length;n++)if(!ur(e[n],t[n]))return!1;return!0}function So(e,t,n,r,a,o){if(ho=o,vo=t,t.memoizedState=null,t.updateQueue=null,t.lanes=0,fo.current=null===e||null===e.memoizedState?uu:lu,e=n(r,a),bo){o=0;do{if(bo=!1,_o=0,25<=o)throw Error(i(301));o+=1,yo=go=null,t.updateQueue=null,fo.current=su,e=n(r,a)}while(bo)}if(fo.current=ou,t=null!==go&&null!==go.next,ho=0,yo=go=vo=null,mo=!1,t)throw Error(i(300));return e}function Eo(){var e=0!==_o;return _o=0,e}function Co(){var e={memoizedState:null,baseState:null,baseQueue:null,queue:null,next:null};return null===yo?vo.memoizedState=yo=e:yo=yo.next=e,yo}function To(){if(null===go){var e=vo.alternate;e=null!==e?e.memoizedState:null}else e=go.next;var t=null===yo?vo.memoizedState:yo.next;if(null!==t)yo=t,go=e;else{if(null===e)throw Error(i(310));e={memoizedState:(go=e).memoizedState,baseState:go.baseState,baseQueue:go.baseQueue,queue:go.queue,next:null},null===yo?vo.memoizedState=yo=e:yo=yo.next=e}return yo}function Mo(e,t){return"function"==typeof t?t(e):t}function No(e){var t=To(),n=t.queue;if(null===n)throw Error(i(311));n.lastRenderedReducer=e;var r=go,a=r.baseQueue,o=n.pending;if(null!==o){if(null!==a){var u=a.next;a.next=o.next,o.next=u}r.baseQueue=a=o,n.pending=null}if(null!==a){o=a.next,r=r.baseState;var l=u=null,s=null,c=o;do{var f=c.lane;if((ho&f)===f)null!==s&&(s=s.next={lane:0,action:c.action,hasEagerState:c.hasEagerState,eagerState:c.eagerState,next:null}),r=c.hasEagerState?c.eagerState:e(r,c.action);else{var p={lane:f,action:c.action,hasEagerState:c.hasEagerState,eagerState:c.eagerState,next:null};null===s?(l=s=p,u=r):s=s.next=p,vo.lanes|=f,Rl|=f}c=c.next}while(null!==c&&c!==o);null===s?u=r:s.next=l,ur(r,t.memoizedState)||(_u=!0),t.memoizedState=r,t.baseState=u,t.baseQueue=s,n.lastRenderedState=r}if(null!==(e=n.interleaved)){a=e;do{o=a.lane,vo.lanes|=o,Rl|=o,a=a.next}while(a!==e)}else null===a&&(n.lanes=0);return[t.memoizedState,n.dispatch]}function Po(e){var t=To(),n=t.queue;if(null===n)throw Error(i(311));n.lastRenderedReducer=e;var r=n.dispatch,a=n.pending,o=t.memoizedState;if(null!==a){n.pending=null;var u=a=a.next;do{o=e(o,u.action),u=u.next}while(u!==a);ur(o,t.memoizedState)||(_u=!0),t.memoizedState=o,null===t.baseQueue&&(t.baseState=o),n.lastRenderedState=o}return[o,r]}function zo(){}function Lo(e,t){var n=vo,r=To(),a=t(),o=!ur(r.memoizedState,a);if(o&&(r.memoizedState=a,_u=!0),r=r.queue,Vo(Fo.bind(null,n,r,e),[e]),r.getSnapshot!==t||o||null!==yo&&1&yo.memoizedState.tag){if(n.flags|=2048,Uo(9,Ao.bind(null,n,r,a,t),void 0,null),null===Pl)throw Error(i(349));0!=(30&ho)||Oo(n,t,a)}return a}function Oo(e,t,n){e.flags|=16384,e={getSnapshot:t,value:n},null===(t=vo.updateQueue)?(t={lastEffect:null,stores:null},vo.updateQueue=t,t.stores=[e]):null===(n=t.stores)?t.stores=[e]:n.push(e)}function Ao(e,t,n,r){t.value=n,t.getSnapshot=r,Do(t)&&Ro(e)}function Fo(e,t,n){return n((function(){Do(t)&&Ro(e)}))}function Do(e){var t=e.getSnapshot;e=e.value;try{var n=t();return!ur(e,n)}catch(e){return!0}}function Ro(e){var t=Ni(e,1);null!==t&&rs(t,e,1,-1)}function jo(e){var t=Co();return"function"==typeof e&&(e=e()),t.memoizedState=t.baseState=e,e={pending:null,interleaved:null,lanes:0,dispatch:null,lastRenderedReducer:Mo,lastRenderedState:e},t.queue=e,e=e.dispatch=nu.bind(null,vo,e),[t.memoizedState,e]}function Uo(e,t,n,r){return e={tag:e,create:t,destroy:n,deps:r,next:null},null===(t=vo.updateQueue)?(t={lastEffect:null,stores:null},vo.updateQueue=t,t.lastEffect=e.next=e):null===(n=t.lastEffect)?t.lastEffect=e.next=e:(r=n.next,n.next=e,e.next=r,t.lastEffect=e),e}function Io(){return To().memoizedState}function $o(e,t,n,r){var a=Co();vo.flags|=e,a.memoizedState=Uo(1|t,n,void 0,void 0===r?null:r)}function Bo(e,t,n,r){var a=To();r=void 0===r?null:r;var i=void 0;if(null!==go){var o=go.memoizedState;if(i=o.destroy,null!==r&&ko(r,o.deps))return void(a.memoizedState=Uo(t,n,i,r))}vo.flags|=e,a.memoizedState=Uo(1|t,n,i,r)}function Wo(e,t){return $o(8390656,8,e,t)}function Vo(e,t){return Bo(2048,8,e,t)}function Ho(e,t){return Bo(4,2,e,t)}function qo(e,t){return Bo(4,4,e,t)}function Qo(e,t){return"function"==typeof t?(e=e(),t(e),function(){t(null)}):null!=t?(e=e(),t.current=e,function(){t.current=null}):void 0}function Yo(e,t,n){return n=null!=n?n.concat([e]):null,Bo(4,4,Qo.bind(null,t,e),n)}function Go(){}function Ko(e,t){var n=To();t=void 0===t?null:t;var r=n.memoizedState;return null!==r&&null!==t&&ko(t,r[1])?r[0]:(n.memoizedState=[e,t],e)}function Zo(e,t){var n=To();t=void 0===t?null:t;var r=n.memoizedState;return null!==r&&null!==t&&ko(t,r[1])?r[0]:(e=e(),n.memoizedState=[e,t],e)}function Xo(e,t,n){return 0==(21&ho)?(e.baseState&&(e.baseState=!1,_u=!0),e.memoizedState=n):(ur(n,t)||(n=vt(),vo.lanes|=n,Rl|=n,e.baseState=!0),t)}function Jo(e,t){var n=bt;bt=0!==n&&4>n?n:4,e(!0);var r=po.transition;po.transition={};try{e(!1),t()}finally{bt=n,po.transition=r}}function eu(){return To().memoizedState}function tu(e,t,n){var r=ns(e);n={lane:r,action:n,hasEagerState:!1,eagerState:null,next:null},ru(e)?au(t,n):null!==(n=Mi(e,t,n,r))&&(rs(n,e,r,ts()),iu(n,t,r))}function nu(e,t,n){var r=ns(e),a={lane:r,action:n,hasEagerState:!1,eagerState:null,next:null};if(ru(e))au(t,a);else{var i=e.alternate;if(0===e.lanes&&(null===i||0===i.lanes)&&null!==(i=t.lastRenderedReducer))try{var o=t.lastRenderedState,u=i(o,n);if(a.hasEagerState=!0,a.eagerState=u,ur(u,o)){var l=t.interleaved;return null===l?(a.next=a,Ti(t)):(a.next=l.next,l.next=a),void(t.interleaved=a)}}catch(e){}null!==(n=Mi(e,t,a,r))&&(rs(n,e,r,a=ts()),iu(n,t,r))}}function ru(e){var t=e.alternate;return e===vo||null!==t&&t===vo}function au(e,t){bo=mo=!0;var n=e.pending;null===n?t.next=t:(t.next=n.next,n.next=t),e.pending=t}function iu(e,t,n){if(0!=(4194240&n)){var r=t.lanes;n|=r&=e.pendingLanes,t.lanes=n,mt(e,n)}}var ou={readContext:Ei,useCallback:xo,useContext:xo,useEffect:xo,useImperativeHandle:xo,useInsertionEffect:xo,useLayoutEffect:xo,useMemo:xo,useReducer:xo,useRef:xo,useState:xo,useDebugValue:xo,useDeferredValue:xo,useTransition:xo,useMutableSource:xo,useSyncExternalStore:xo,useId:xo,unstable_isNewReconciler:!1},uu={readContext:Ei,useCallback:function(e,t){return Co().memoizedState=[e,void 0===t?null:t],e},useContext:Ei,useEffect:Wo,useImperativeHandle:function(e,t,n){return n=null!=n?n.concat([e]):null,$o(4194308,4,Qo.bind(null,t,e),n)},useLayoutEffect:function(e,t){return $o(4194308,4,e,t)},useInsertionEffect:function(e,t){return $o(4,2,e,t)},useMemo:function(e,t){var n=Co();return t=void 0===t?null:t,e=e(),n.memoizedState=[e,t],e},useReducer:function(e,t,n){var r=Co();return t=void 0!==n?n(t):t,r.memoizedState=r.baseState=t,e={pending:null,interleaved:null,lanes:0,dispatch:null,lastRenderedReducer:e,lastRenderedState:t},r.queue=e,e=e.dispatch=tu.bind(null,vo,e),[r.memoizedState,e]},useRef:function(e){return e={current:e},Co().memoizedState=e},useState:jo,useDebugValue:Go,useDeferredValue:function(e){return Co().memoizedState=e},useTransition:function(){var e=jo(!1),t=e[0];return e=Jo.bind(null,e[1]),Co().memoizedState=e,[t,e]},useMutableSource:function(){},useSyncExternalStore:function(e,t,n){var r=vo,a=Co();if(ai){if(void 0===n)throw Error(i(407));n=n()}else{if(n=t(),null===Pl)throw Error(i(349));0!=(30&ho)||Oo(r,t,n)}a.memoizedState=n;var o={value:n,getSnapshot:t};return a.queue=o,Wo(Fo.bind(null,r,o,e),[e]),r.flags|=2048,Uo(9,Ao.bind(null,r,o,n,t),void 0,null),n},useId:function(){var e=Co(),t=Pl.identifierPrefix;if(ai){var n=Za;t=":"+t+"R"+(n=(Ka&~(1<<32-ot(Ka)-1)).toString(32)+n),0<(n=_o++)&&(t+="H"+n.toString(32)),t+=":"}else t=":"+t+"r"+(n=wo++).toString(32)+":";return e.memoizedState=t},unstable_isNewReconciler:!1},lu={readContext:Ei,useCallback:Ko,useContext:Ei,useEffect:Vo,useImperativeHandle:Yo,useInsertionEffect:Ho,useLayoutEffect:qo,useMemo:Zo,useReducer:No,useRef:Io,useState:function(){return No(Mo)},useDebugValue:Go,useDeferredValue:function(e){return Xo(To(),go.memoizedState,e)},useTransition:function(){return[No(Mo)[0],To().memoizedState]},useMutableSource:zo,useSyncExternalStore:Lo,useId:eu,unstable_isNewReconciler:!1},su={readContext:Ei,useCallback:Ko,useContext:Ei,useEffect:Vo,useImperativeHandle:Yo,useInsertionEffect:Ho,useLayoutEffect:qo,useMemo:Zo,useReducer:Po,useRef:Io,useState:function(){return Po(Mo)},useDebugValue:Go,useDeferredValue:function(e){var t=To();return null===go?t.memoizedState=e:Xo(t,go.memoizedState,e)},useTransition:function(){return[Po(Mo)[0],To().memoizedState]},useMutableSource:zo,useSyncExternalStore:Lo,useId:eu,unstable_isNewReconciler:!1};function cu(e,t){try{var n="",r=t;do{n+=$(r),r=r.return}while(r);var a=n}catch(e){a="\nError generating stack: "+e.message+"\n"+e.stack}return{value:e,source:t,stack:a,digest:null}}function fu(e,t,n){return{value:e,source:null,stack:null!=n?n:null,digest:null!=t?t:null}}function pu(e,t){try{console.error(t.value)}catch(e){setTimeout((function(){throw e}))}}var du="function"==typeof WeakMap?WeakMap:Map;function hu(e,t,n){(n=Oi(-1,n)).tag=3,n.payload={element:null};var r=t.value;return n.callback=function(){Hl||(Hl=!0,ql=r),pu(0,t)},n}function vu(e,t,n){(n=Oi(-1,n)).tag=3;var r=e.type.getDerivedStateFromError;if("function"==typeof r){var a=t.value;n.payload=function(){return r(a)},n.callback=function(){pu(0,t)}}var i=e.stateNode;return null!==i&&"function"==typeof i.componentDidCatch&&(n.callback=function(){pu(0,t),"function"!=typeof r&&(null===Ql?Ql=new Set([this]):Ql.add(this));var e=t.stack;this.componentDidCatch(t.value,{componentStack:null!==e?e:""})}),n}function gu(e,t,n){var r=e.pingCache;if(null===r){r=e.pingCache=new du;var a=new Set;r.set(t,a)}else void 0===(a=r.get(t))&&(a=new Set,r.set(t,a));a.has(n)||(a.add(n),e=Cs.bind(null,e,t,n),t.then(e,e))}function yu(e){do{var t;if((t=13===e.tag)&&(t=null===(t=e.memoizedState)||null!==t.dehydrated),t)return e;e=e.return}while(null!==e);return null}function mu(e,t,n,r,a){return 0==(1&e.mode)?(e===t?e.flags|=65536:(e.flags|=128,n.flags|=131072,n.flags&=-52805,1===n.tag&&(null===n.alternate?n.tag=17:((t=Oi(-1,1)).tag=2,Ai(n,t,1))),n.lanes|=1),e):(e.flags|=65536,e.lanes=a,e)}var bu=_.ReactCurrentOwner,_u=!1;function wu(e,t,n,r){t.child=null===e?Zi(t,null,n,r):Ki(t,e.child,n,r)}function xu(e,t,n,r,a){n=n.render;var i=t.ref;return Si(t,a),r=So(e,t,n,r,i,a),n=Eo(),null===e||_u?(ai&&n&&ei(t),t.flags|=1,wu(e,t,r,a),t.child):(t.updateQueue=e.updateQueue,t.flags&=-2053,e.lanes&=~a,Hu(e,t,a))}function ku(e,t,n,r,a){if(null===e){var i=n.type;return"function"!=typeof i||Os(i)||void 0!==i.defaultProps||null!==n.compare||void 0!==n.defaultProps?((e=Fs(n.type,null,r,t,t.mode,a)).ref=t.ref,e.return=t,t.child=e):(t.tag=15,t.type=i,Su(e,t,i,r,a))}if(i=e.child,0==(e.lanes&a)){var o=i.memoizedProps;if((n=null!==(n=n.compare)?n:lr)(o,r)&&e.ref===t.ref)return Hu(e,t,a)}return t.flags|=1,(e=As(i,r)).ref=t.ref,e.return=t,t.child=e}function Su(e,t,n,r,a){if(null!==e){var i=e.memoizedProps;if(lr(i,r)&&e.ref===t.ref){if(_u=!1,t.pendingProps=r=i,0==(e.lanes&a))return t.lanes=e.lanes,Hu(e,t,a);0!=(131072&e.flags)&&(_u=!0)}}return Tu(e,t,n,r,a)}function Eu(e,t,n){var r=t.pendingProps,a=r.children,i=null!==e?e.memoizedState:null;if("hidden"===r.mode)if(0==(1&t.mode))t.memoizedState={baseLanes:0,cachePool:null,transitions:null},Ca(Al,Ol),Ol|=n;else{if(0==(1073741824&n))return e=null!==i?i.baseLanes|n:n,t.lanes=t.childLanes=1073741824,t.memoizedState={baseLanes:e,cachePool:null,transitions:null},t.updateQueue=null,Ca(Al,Ol),Ol|=e,null;t.memoizedState={baseLanes:0,cachePool:null,transitions:null},r=null!==i?i.baseLanes:n,Ca(Al,Ol),Ol|=r}else null!==i?(r=i.baseLanes|n,t.memoizedState=null):r=n,Ca(Al,Ol),Ol|=r;return wu(e,t,a,n),t.child}function Cu(e,t){var n=t.ref;(null===e&&null!==n||null!==e&&e.ref!==n)&&(t.flags|=512,t.flags|=2097152)}function Tu(e,t,n,r,a){var i=La(n)?Pa:Ma.current;return i=za(t,i),Si(t,a),n=So(e,t,n,r,i,a),r=Eo(),null===e||_u?(ai&&r&&ei(t),t.flags|=1,wu(e,t,n,a),t.child):(t.updateQueue=e.updateQueue,t.flags&=-2053,e.lanes&=~a,Hu(e,t,a))}function Mu(e,t,n,r,a){if(La(n)){var i=!0;Da(t)}else i=!1;if(Si(t,a),null===t.stateNode)Vu(e,t),Wi(t,n,r),Hi(t,n,r,a),r=!0;else if(null===e){var o=t.stateNode,u=t.memoizedProps;o.props=u;var l=o.context,s=n.contextType;s="object"==typeof s&&null!==s?Ei(s):za(t,s=La(n)?Pa:Ma.current);var c=n.getDerivedStateFromProps,f="function"==typeof c||"function"==typeof o.getSnapshotBeforeUpdate;f||"function"!=typeof o.UNSAFE_componentWillReceiveProps&&"function"!=typeof o.componentWillReceiveProps||(u!==r||l!==s)&&Vi(t,o,r,s),Pi=!1;var p=t.memoizedState;o.state=p,Ri(t,r,o,a),l=t.memoizedState,u!==r||p!==l||Na.current||Pi?("function"==typeof c&&(Ii(t,n,c,r),l=t.memoizedState),(u=Pi||Bi(t,n,u,r,p,l,s))?(f||"function"!=typeof o.UNSAFE_componentWillMount&&"function"!=typeof o.componentWillMount||("function"==typeof o.componentWillMount&&o.componentWillMount(),"function"==typeof o.UNSAFE_componentWillMount&&o.UNSAFE_componentWillMount()),"function"==typeof o.componentDidMount&&(t.flags|=4194308)):("function"==typeof o.componentDidMount&&(t.flags|=4194308),t.memoizedProps=r,t.memoizedState=l),o.props=r,o.state=l,o.context=s,r=u):("function"==typeof o.componentDidMount&&(t.flags|=4194308),r=!1)}else{o=t.stateNode,Li(e,t),u=t.memoizedProps,s=t.type===t.elementType?u:gi(t.type,u),o.props=s,f=t.pendingProps,p=o.context,l="object"==typeof(l=n.contextType)&&null!==l?Ei(l):za(t,l=La(n)?Pa:Ma.current);var d=n.getDerivedStateFromProps;(c="function"==typeof d||"function"==typeof o.getSnapshotBeforeUpdate)||"function"!=typeof o.UNSAFE_componentWillReceiveProps&&"function"!=typeof o.componentWillReceiveProps||(u!==f||p!==l)&&Vi(t,o,r,l),Pi=!1,p=t.memoizedState,o.state=p,Ri(t,r,o,a);var h=t.memoizedState;u!==f||p!==h||Na.current||Pi?("function"==typeof d&&(Ii(t,n,d,r),h=t.memoizedState),(s=Pi||Bi(t,n,s,r,p,h,l)||!1)?(c||"function"!=typeof o.UNSAFE_componentWillUpdate&&"function"!=typeof o.componentWillUpdate||("function"==typeof o.componentWillUpdate&&o.componentWillUpdate(r,h,l),"function"==typeof o.UNSAFE_componentWillUpdate&&o.UNSAFE_componentWillUpdate(r,h,l)),"function"==typeof o.componentDidUpdate&&(t.flags|=4),"function"==typeof o.getSnapshotBeforeUpdate&&(t.flags|=1024)):("function"!=typeof o.componentDidUpdate||u===e.memoizedProps&&p===e.memoizedState||(t.flags|=4),"function"!=typeof o.getSnapshotBeforeUpdate||u===e.memoizedProps&&p===e.memoizedState||(t.flags|=1024),t.memoizedProps=r,t.memoizedState=h),o.props=r,o.state=h,o.context=l,r=s):("function"!=typeof o.componentDidUpdate||u===e.memoizedProps&&p===e.memoizedState||(t.flags|=4),"function"!=typeof o.getSnapshotBeforeUpdate||u===e.memoizedProps&&p===e.memoizedState||(t.flags|=1024),r=!1)}return Nu(e,t,n,r,i,a)}function Nu(e,t,n,r,a,i){Cu(e,t);var o=0!=(128&t.flags);if(!r&&!o)return a&&Ra(t,n,!1),Hu(e,t,i);r=t.stateNode,bu.current=t;var u=o&&"function"!=typeof n.getDerivedStateFromError?null:r.render();return t.flags|=1,null!==e&&o?(t.child=Ki(t,e.child,null,i),t.child=Ki(t,null,u,i)):wu(e,t,u,i),t.memoizedState=r.state,a&&Ra(t,n,!0),t.child}function Pu(e){var t=e.stateNode;t.pendingContext?Aa(0,t.pendingContext,t.pendingContext!==t.context):t.context&&Aa(0,t.context,!1),ro(e,t.containerInfo)}function zu(e,t,n,r,a){return di(),hi(a),t.flags|=256,wu(e,t,n,r),t.child}var Lu,Ou,Au,Fu,Du={dehydrated:null,treeContext:null,retryLane:0};function Ru(e){return{baseLanes:e,cachePool:null,transitions:null}}function ju(e,t,n){var r,a=t.pendingProps,o=uo.current,u=!1,l=0!=(128&t.flags);if((r=l)||(r=(null===e||null!==e.memoizedState)&&0!=(2&o)),r?(u=!0,t.flags&=-129):null!==e&&null===e.memoizedState||(o|=1),Ca(uo,1&o),null===e)return si(t),null!==(e=t.memoizedState)&&null!==(e=e.dehydrated)?(0==(1&t.mode)?t.lanes=1:"$!"===e.data?t.lanes=8:t.lanes=1073741824,null):(l=a.children,e=a.fallback,u?(a=t.mode,u=t.child,l={mode:"hidden",children:l},0==(1&a)&&null!==u?(u.childLanes=0,u.pendingProps=l):u=Rs(l,a,0,null),e=Ds(e,a,n,null),u.return=t,e.return=t,u.sibling=e,t.child=u,t.child.memoizedState=Ru(n),t.memoizedState=Du,e):Uu(t,l));if(null!==(o=e.memoizedState)&&null!==(r=o.dehydrated))return function(e,t,n,r,a,o,u){if(n)return 256&t.flags?(t.flags&=-257,Iu(e,t,u,r=fu(Error(i(422))))):null!==t.memoizedState?(t.child=e.child,t.flags|=128,null):(o=r.fallback,a=t.mode,r=Rs({mode:"visible",children:r.children},a,0,null),(o=Ds(o,a,u,null)).flags|=2,r.return=t,o.return=t,r.sibling=o,t.child=r,0!=(1&t.mode)&&Ki(t,e.child,null,u),t.child.memoizedState=Ru(u),t.memoizedState=Du,o);if(0==(1&t.mode))return Iu(e,t,u,null);if("$!"===a.data){if(r=a.nextSibling&&a.nextSibling.dataset)var l=r.dgst;return r=l,Iu(e,t,u,r=fu(o=Error(i(419)),r,void 0))}if(l=0!=(u&e.childLanes),_u||l){if(null!==(r=Pl)){switch(u&-u){case 4:a=2;break;case 16:a=8;break;case 64:case 128:case 256:case 512:case 1024:case 2048:case 4096:case 8192:case 16384:case 32768:case 65536:case 131072:case 262144:case 524288:case 1048576:case 2097152:case 4194304:case 8388608:case 16777216:case 33554432:case 67108864:a=32;break;case 536870912:a=268435456;break;default:a=0}0!==(a=0!=(a&(r.suspendedLanes|u))?0:a)&&a!==o.retryLane&&(o.retryLane=a,Ni(e,a),rs(r,e,a,-1))}return gs(),Iu(e,t,u,r=fu(Error(i(421))))}return"$?"===a.data?(t.flags|=128,t.child=e.child,t=Ms.bind(null,e),a._reactRetry=t,null):(e=o.treeContext,ri=sa(a.nextSibling),ni=t,ai=!0,ii=null,null!==e&&(Qa[Ya++]=Ka,Qa[Ya++]=Za,Qa[Ya++]=Ga,Ka=e.id,Za=e.overflow,Ga=t),(t=Uu(t,r.children)).flags|=4096,t)}(e,t,l,a,r,o,n);if(u){u=a.fallback,l=t.mode,r=(o=e.child).sibling;var s={mode:"hidden",children:a.children};return 0==(1&l)&&t.child!==o?((a=t.child).childLanes=0,a.pendingProps=s,t.deletions=null):(a=As(o,s)).subtreeFlags=14680064&o.subtreeFlags,null!==r?u=As(r,u):(u=Ds(u,l,n,null)).flags|=2,u.return=t,a.return=t,a.sibling=u,t.child=a,a=u,u=t.child,l=null===(l=e.child.memoizedState)?Ru(n):{baseLanes:l.baseLanes|n,cachePool:null,transitions:l.transitions},u.memoizedState=l,u.childLanes=e.childLanes&~n,t.memoizedState=Du,a}return e=(u=e.child).sibling,a=As(u,{mode:"visible",children:a.children}),0==(1&t.mode)&&(a.lanes=n),a.return=t,a.sibling=null,null!==e&&(null===(n=t.deletions)?(t.deletions=[e],t.flags|=16):n.push(e)),t.child=a,t.memoizedState=null,a}function Uu(e,t){return(t=Rs({mode:"visible",children:t},e.mode,0,null)).return=e,e.child=t}function Iu(e,t,n,r){return null!==r&&hi(r),Ki(t,e.child,null,n),(e=Uu(t,t.pendingProps.children)).flags|=2,t.memoizedState=null,e}function $u(e,t,n){e.lanes|=t;var r=e.alternate;null!==r&&(r.lanes|=t),ki(e.return,t,n)}function Bu(e,t,n,r,a){var i=e.memoizedState;null===i?e.memoizedState={isBackwards:t,rendering:null,renderingStartTime:0,last:r,tail:n,tailMode:a}:(i.isBackwards=t,i.rendering=null,i.renderingStartTime=0,i.last=r,i.tail=n,i.tailMode=a)}function Wu(e,t,n){var r=t.pendingProps,a=r.revealOrder,i=r.tail;if(wu(e,t,r.children,n),0!=(2&(r=uo.current)))r=1&r|2,t.flags|=128;else{if(null!==e&&0!=(128&e.flags))e:for(e=t.child;null!==e;){if(13===e.tag)null!==e.memoizedState&&$u(e,n,t);else if(19===e.tag)$u(e,n,t);else if(null!==e.child){e.child.return=e,e=e.child;continue}if(e===t)break e;for(;null===e.sibling;){if(null===e.return||e.return===t)break e;e=e.return}e.sibling.return=e.return,e=e.sibling}r&=1}if(Ca(uo,r),0==(1&t.mode))t.memoizedState=null;else switch(a){case"forwards":for(n=t.child,a=null;null!==n;)null!==(e=n.alternate)&&null===lo(e)&&(a=n),n=n.sibling;null===(n=a)?(a=t.child,t.child=null):(a=n.sibling,n.sibling=null),Bu(t,!1,a,n,i);break;case"backwards":for(n=null,a=t.child,t.child=null;null!==a;){if(null!==(e=a.alternate)&&null===lo(e)){t.child=a;break}e=a.sibling,a.sibling=n,n=a,a=e}Bu(t,!0,n,null,i);break;case"together":Bu(t,!1,null,null,void 0);break;default:t.memoizedState=null}return t.child}function Vu(e,t){0==(1&t.mode)&&null!==e&&(e.alternate=null,t.alternate=null,t.flags|=2)}function Hu(e,t,n){if(null!==e&&(t.dependencies=e.dependencies),Rl|=t.lanes,0==(n&t.childLanes))return null;if(null!==e&&t.child!==e.child)throw Error(i(153));if(null!==t.child){for(n=As(e=t.child,e.pendingProps),t.child=n,n.return=t;null!==e.sibling;)e=e.sibling,(n=n.sibling=As(e,e.pendingProps)).return=t;n.sibling=null}return t.child}function qu(e,t){if(!ai)switch(e.tailMode){case"hidden":t=e.tail;for(var n=null;null!==t;)null!==t.alternate&&(n=t),t=t.sibling;null===n?e.tail=null:n.sibling=null;break;case"collapsed":n=e.tail;for(var r=null;null!==n;)null!==n.alternate&&(r=n),n=n.sibling;null===r?t||null===e.tail?e.tail=null:e.tail.sibling=null:r.sibling=null}}function Qu(e){var t=null!==e.alternate&&e.alternate.child===e.child,n=0,r=0;if(t)for(var a=e.child;null!==a;)n|=a.lanes|a.childLanes,r|=14680064&a.subtreeFlags,r|=14680064&a.flags,a.return=e,a=a.sibling;else for(a=e.child;null!==a;)n|=a.lanes|a.childLanes,r|=a.subtreeFlags,r|=a.flags,a.return=e,a=a.sibling;return e.subtreeFlags|=r,e.childLanes=n,t}function Yu(e,t,n){var r=t.pendingProps;switch(ti(t),t.tag){case 2:case 16:case 15:case 0:case 11:case 7:case 8:case 12:case 9:case 14:return Qu(t),null;case 1:case 17:return La(t.type)&&Oa(),Qu(t),null;case 3:return r=t.stateNode,ao(),Ea(Na),Ea(Ma),co(),r.pendingContext&&(r.context=r.pendingContext,r.pendingContext=null),null!==e&&null!==e.child||(fi(t)?t.flags|=4:null===e||e.memoizedState.isDehydrated&&0==(256&t.flags)||(t.flags|=1024,null!==ii&&(us(ii),ii=null))),Ou(e,t),Qu(t),null;case 5:oo(t);var a=no(to.current);if(n=t.type,null!==e&&null!=t.stateNode)Au(e,t,n,r,a),e.ref!==t.ref&&(t.flags|=512,t.flags|=2097152);else{if(!r){if(null===t.stateNode)throw Error(i(166));return Qu(t),null}if(e=no(Ji.current),fi(t)){r=t.stateNode,n=t.type;var o=t.memoizedProps;switch(r[pa]=t,r[da]=o,e=0!=(1&t.mode),n){case"dialog":Ur("cancel",r),Ur("close",r);break;case"iframe":case"object":case"embed":Ur("load",r);break;case"video":case"audio":for(a=0;a<Fr.length;a++)Ur(Fr[a],r);break;case"source":Ur("error",r);break;case"img":case"image":case"link":Ur("error",r),Ur("load",r);break;case"details":Ur("toggle",r);break;case"input":K(r,o),Ur("invalid",r);break;case"select":r._wrapperState={wasMultiple:!!o.multiple},Ur("invalid",r);break;case"textarea":ae(r,o),Ur("invalid",r)}for(var l in me(n,o),a=null,o)if(o.hasOwnProperty(l)){var s=o[l];"children"===l?"string"==typeof s?r.textContent!==s&&(!0!==o.suppressHydrationWarning&&Xr(r.textContent,s,e),a=["children",s]):"number"==typeof s&&r.textContent!==""+s&&(!0!==o.suppressHydrationWarning&&Xr(r.textContent,s,e),a=["children",""+s]):u.hasOwnProperty(l)&&null!=s&&"onScroll"===l&&Ur("scroll",r)}switch(n){case"input":q(r),J(r,o,!0);break;case"textarea":q(r),oe(r);break;case"select":case"option":break;default:"function"==typeof o.onClick&&(r.onclick=Jr)}r=a,t.updateQueue=r,null!==r&&(t.flags|=4)}else{l=9===a.nodeType?a:a.ownerDocument,"http://www.w3.org/1999/xhtml"===e&&(e=ue(n)),"http://www.w3.org/1999/xhtml"===e?"script"===n?((e=l.createElement("div")).innerHTML="<script><\/script>",e=e.removeChild(e.firstChild)):"string"==typeof r.is?e=l.createElement(n,{is:r.is}):(e=l.createElement(n),"select"===n&&(l=e,r.multiple?l.multiple=!0:r.size&&(l.size=r.size))):e=l.createElementNS(e,n),e[pa]=t,e[da]=r,Lu(e,t,!1,!1),t.stateNode=e;e:{switch(l=be(n,r),n){case"dialog":Ur("cancel",e),Ur("close",e),a=r;break;case"iframe":case"object":case"embed":Ur("load",e),a=r;break;case"video":case"audio":for(a=0;a<Fr.length;a++)Ur(Fr[a],e);a=r;break;case"source":Ur("error",e),a=r;break;case"img":case"image":case"link":Ur("error",e),Ur("load",e),a=r;break;case"details":Ur("toggle",e),a=r;break;case"input":K(e,r),a=G(e,r),Ur("invalid",e);break;case"option":default:a=r;break;case"select":e._wrapperState={wasMultiple:!!r.multiple},a=R({},r,{value:void 0}),Ur("invalid",e);break;case"textarea":ae(e,r),a=re(e,r),Ur("invalid",e)}for(o in me(n,a),s=a)if(s.hasOwnProperty(o)){var c=s[o];"style"===o?ge(e,c):"dangerouslySetInnerHTML"===o?null!=(c=c?c.__html:void 0)&&fe(e,c):"children"===o?"string"==typeof c?("textarea"!==n||""!==c)&&pe(e,c):"number"==typeof c&&pe(e,""+c):"suppressContentEditableWarning"!==o&&"suppressHydrationWarning"!==o&&"autoFocus"!==o&&(u.hasOwnProperty(o)?null!=c&&"onScroll"===o&&Ur("scroll",e):null!=c&&b(e,o,c,l))}switch(n){case"input":q(e),J(e,r,!1);break;case"textarea":q(e),oe(e);break;case"option":null!=r.value&&e.setAttribute("value",""+V(r.value));break;case"select":e.multiple=!!r.multiple,null!=(o=r.value)?ne(e,!!r.multiple,o,!1):null!=r.defaultValue&&ne(e,!!r.multiple,r.defaultValue,!0);break;default:"function"==typeof a.onClick&&(e.onclick=Jr)}switch(n){case"button":case"input":case"select":case"textarea":r=!!r.autoFocus;break e;case"img":r=!0;break e;default:r=!1}}r&&(t.flags|=4)}null!==t.ref&&(t.flags|=512,t.flags|=2097152)}return Qu(t),null;case 6:if(e&&null!=t.stateNode)Fu(e,t,e.memoizedProps,r);else{if("string"!=typeof r&&null===t.stateNode)throw Error(i(166));if(n=no(to.current),no(Ji.current),fi(t)){if(r=t.stateNode,n=t.memoizedProps,r[pa]=t,(o=r.nodeValue!==n)&&null!==(e=ni))switch(e.tag){case 3:Xr(r.nodeValue,n,0!=(1&e.mode));break;case 5:!0!==e.memoizedProps.suppressHydrationWarning&&Xr(r.nodeValue,n,0!=(1&e.mode))}o&&(t.flags|=4)}else(r=(9===n.nodeType?n:n.ownerDocument).createTextNode(r))[pa]=t,t.stateNode=r}return Qu(t),null;case 13:if(Ea(uo),r=t.memoizedState,null===e||null!==e.memoizedState&&null!==e.memoizedState.dehydrated){if(ai&&null!==ri&&0!=(1&t.mode)&&0==(128&t.flags))pi(),di(),t.flags|=98560,o=!1;else if(o=fi(t),null!==r&&null!==r.dehydrated){if(null===e){if(!o)throw Error(i(318));if(!(o=null!==(o=t.memoizedState)?o.dehydrated:null))throw Error(i(317));o[pa]=t}else di(),0==(128&t.flags)&&(t.memoizedState=null),t.flags|=4;Qu(t),o=!1}else null!==ii&&(us(ii),ii=null),o=!0;if(!o)return 65536&t.flags?t:null}return 0!=(128&t.flags)?(t.lanes=n,t):((r=null!==r)!=(null!==e&&null!==e.memoizedState)&&r&&(t.child.flags|=8192,0!=(1&t.mode)&&(null===e||0!=(1&uo.current)?0===Fl&&(Fl=3):gs())),null!==t.updateQueue&&(t.flags|=4),Qu(t),null);case 4:return ao(),Ou(e,t),null===e&&Br(t.stateNode.containerInfo),Qu(t),null;case 10:return xi(t.type._context),Qu(t),null;case 19:if(Ea(uo),null===(o=t.memoizedState))return Qu(t),null;if(r=0!=(128&t.flags),null===(l=o.rendering))if(r)qu(o,!1);else{if(0!==Fl||null!==e&&0!=(128&e.flags))for(e=t.child;null!==e;){if(null!==(l=lo(e))){for(t.flags|=128,qu(o,!1),null!==(r=l.updateQueue)&&(t.updateQueue=r,t.flags|=4),t.subtreeFlags=0,r=n,n=t.child;null!==n;)e=r,(o=n).flags&=14680066,null===(l=o.alternate)?(o.childLanes=0,o.lanes=e,o.child=null,o.subtreeFlags=0,o.memoizedProps=null,o.memoizedState=null,o.updateQueue=null,o.dependencies=null,o.stateNode=null):(o.childLanes=l.childLanes,o.lanes=l.lanes,o.child=l.child,o.subtreeFlags=0,o.deletions=null,o.memoizedProps=l.memoizedProps,o.memoizedState=l.memoizedState,o.updateQueue=l.updateQueue,o.type=l.type,e=l.dependencies,o.dependencies=null===e?null:{lanes:e.lanes,firstContext:e.firstContext}),n=n.sibling;return Ca(uo,1&uo.current|2),t.child}e=e.sibling}null!==o.tail&&Ze()>Wl&&(t.flags|=128,r=!0,qu(o,!1),t.lanes=4194304)}else{if(!r)if(null!==(e=lo(l))){if(t.flags|=128,r=!0,null!==(n=e.updateQueue)&&(t.updateQueue=n,t.flags|=4),qu(o,!0),null===o.tail&&"hidden"===o.tailMode&&!l.alternate&&!ai)return Qu(t),null}else 2*Ze()-o.renderingStartTime>Wl&&1073741824!==n&&(t.flags|=128,r=!0,qu(o,!1),t.lanes=4194304);o.isBackwards?(l.sibling=t.child,t.child=l):(null!==(n=o.last)?n.sibling=l:t.child=l,o.last=l)}return null!==o.tail?(t=o.tail,o.rendering=t,o.tail=t.sibling,o.renderingStartTime=Ze(),t.sibling=null,n=uo.current,Ca(uo,r?1&n|2:1&n),t):(Qu(t),null);case 22:case 23:return ps(),r=null!==t.memoizedState,null!==e&&null!==e.memoizedState!==r&&(t.flags|=8192),r&&0!=(1&t.mode)?0!=(1073741824&Ol)&&(Qu(t),6&t.subtreeFlags&&(t.flags|=8192)):Qu(t),null;case 24:case 25:return null}throw Error(i(156,t.tag))}function Gu(e,t){switch(ti(t),t.tag){case 1:return La(t.type)&&Oa(),65536&(e=t.flags)?(t.flags=-65537&e|128,t):null;case 3:return ao(),Ea(Na),Ea(Ma),co(),0!=(65536&(e=t.flags))&&0==(128&e)?(t.flags=-65537&e|128,t):null;case 5:return oo(t),null;case 13:if(Ea(uo),null!==(e=t.memoizedState)&&null!==e.dehydrated){if(null===t.alternate)throw Error(i(340));di()}return 65536&(e=t.flags)?(t.flags=-65537&e|128,t):null;case 19:return Ea(uo),null;case 4:return ao(),null;case 10:return xi(t.type._context),null;case 22:case 23:return ps(),null;default:return null}}Lu=function(e,t){for(var n=t.child;null!==n;){if(5===n.tag||6===n.tag)e.appendChild(n.stateNode);else if(4!==n.tag&&null!==n.child){n.child.return=n,n=n.child;continue}if(n===t)break;for(;null===n.sibling;){if(null===n.return||n.return===t)return;n=n.return}n.sibling.return=n.return,n=n.sibling}},Ou=function(){},Au=function(e,t,n,r){var a=e.memoizedProps;if(a!==r){e=t.stateNode,no(Ji.current);var i,o=null;switch(n){case"input":a=G(e,a),r=G(e,r),o=[];break;case"select":a=R({},a,{value:void 0}),r=R({},r,{value:void 0}),o=[];break;case"textarea":a=re(e,a),r=re(e,r),o=[];break;default:"function"!=typeof a.onClick&&"function"==typeof r.onClick&&(e.onclick=Jr)}for(c in me(n,r),n=null,a)if(!r.hasOwnProperty(c)&&a.hasOwnProperty(c)&&null!=a[c])if("style"===c){var l=a[c];for(i in l)l.hasOwnProperty(i)&&(n||(n={}),n[i]="")}else"dangerouslySetInnerHTML"!==c&&"children"!==c&&"suppressContentEditableWarning"!==c&&"suppressHydrationWarning"!==c&&"autoFocus"!==c&&(u.hasOwnProperty(c)?o||(o=[]):(o=o||[]).push(c,null));for(c in r){var s=r[c];if(l=null!=a?a[c]:void 0,r.hasOwnProperty(c)&&s!==l&&(null!=s||null!=l))if("style"===c)if(l){for(i in l)!l.hasOwnProperty(i)||s&&s.hasOwnProperty(i)||(n||(n={}),n[i]="");for(i in s)s.hasOwnProperty(i)&&l[i]!==s[i]&&(n||(n={}),n[i]=s[i])}else n||(o||(o=[]),o.push(c,n)),n=s;else"dangerouslySetInnerHTML"===c?(s=s?s.__html:void 0,l=l?l.__html:void 0,null!=s&&l!==s&&(o=o||[]).push(c,s)):"children"===c?"string"!=typeof s&&"number"!=typeof s||(o=o||[]).push(c,""+s):"suppressContentEditableWarning"!==c&&"suppressHydrationWarning"!==c&&(u.hasOwnProperty(c)?(null!=s&&"onScroll"===c&&Ur("scroll",e),o||l===s||(o=[])):(o=o||[]).push(c,s))}n&&(o=o||[]).push("style",n);var c=o;(t.updateQueue=c)&&(t.flags|=4)}},Fu=function(e,t,n,r){n!==r&&(t.flags|=4)};var Ku=!1,Zu=!1,Xu="function"==typeof WeakSet?WeakSet:Set,Ju=null;function el(e,t){var n=e.ref;if(null!==n)if("function"==typeof n)try{n(null)}catch(n){Es(e,t,n)}else n.current=null}function tl(e,t,n){try{n()}catch(n){Es(e,t,n)}}var nl=!1;function rl(e,t,n){var r=t.updateQueue;if(null!==(r=null!==r?r.lastEffect:null)){var a=r=r.next;do{if((a.tag&e)===e){var i=a.destroy;a.destroy=void 0,void 0!==i&&tl(t,n,i)}a=a.next}while(a!==r)}}function al(e,t){if(null!==(t=null!==(t=t.updateQueue)?t.lastEffect:null)){var n=t=t.next;do{if((n.tag&e)===e){var r=n.create;n.destroy=r()}n=n.next}while(n!==t)}}function il(e){var t=e.ref;if(null!==t){var n=e.stateNode;e.tag,e=n,"function"==typeof t?t(e):t.current=e}}function ol(e){var t=e.alternate;null!==t&&(e.alternate=null,ol(t)),e.child=null,e.deletions=null,e.sibling=null,5===e.tag&&null!==(t=e.stateNode)&&(delete t[pa],delete t[da],delete t[va],delete t[ga],delete t[ya]),e.stateNode=null,e.return=null,e.dependencies=null,e.memoizedProps=null,e.memoizedState=null,e.pendingProps=null,e.stateNode=null,e.updateQueue=null}function ul(e){return 5===e.tag||3===e.tag||4===e.tag}function ll(e){e:for(;;){for(;null===e.sibling;){if(null===e.return||ul(e.return))return null;e=e.return}for(e.sibling.return=e.return,e=e.sibling;5!==e.tag&&6!==e.tag&&18!==e.tag;){if(2&e.flags)continue e;if(null===e.child||4===e.tag)continue e;e.child.return=e,e=e.child}if(!(2&e.flags))return e.stateNode}}function sl(e,t,n){var r=e.tag;if(5===r||6===r)e=e.stateNode,t?8===n.nodeType?n.parentNode.insertBefore(e,t):n.insertBefore(e,t):(8===n.nodeType?(t=n.parentNode).insertBefore(e,n):(t=n).appendChild(e),null!=(n=n._reactRootContainer)||null!==t.onclick||(t.onclick=Jr));else if(4!==r&&null!==(e=e.child))for(sl(e,t,n),e=e.sibling;null!==e;)sl(e,t,n),e=e.sibling}function cl(e,t,n){var r=e.tag;if(5===r||6===r)e=e.stateNode,t?n.insertBefore(e,t):n.appendChild(e);else if(4!==r&&null!==(e=e.child))for(cl(e,t,n),e=e.sibling;null!==e;)cl(e,t,n),e=e.sibling}var fl=null,pl=!1;function dl(e,t,n){for(n=n.child;null!==n;)hl(e,t,n),n=n.sibling}function hl(e,t,n){if(it&&"function"==typeof it.onCommitFiberUnmount)try{it.onCommitFiberUnmount(at,n)}catch(e){}switch(n.tag){case 5:Zu||el(n,t);case 6:var r=fl,a=pl;fl=null,dl(e,t,n),pl=a,null!==(fl=r)&&(pl?(e=fl,n=n.stateNode,8===e.nodeType?e.parentNode.removeChild(n):e.removeChild(n)):fl.removeChild(n.stateNode));break;case 18:null!==fl&&(pl?(e=fl,n=n.stateNode,8===e.nodeType?la(e.parentNode,n):1===e.nodeType&&la(e,n),Bt(e)):la(fl,n.stateNode));break;case 4:r=fl,a=pl,fl=n.stateNode.containerInfo,pl=!0,dl(e,t,n),fl=r,pl=a;break;case 0:case 11:case 14:case 15:if(!Zu&&null!==(r=n.updateQueue)&&null!==(r=r.lastEffect)){a=r=r.next;do{var i=a,o=i.destroy;i=i.tag,void 0!==o&&(0!=(2&i)||0!=(4&i))&&tl(n,t,o),a=a.next}while(a!==r)}dl(e,t,n);break;case 1:if(!Zu&&(el(n,t),"function"==typeof(r=n.stateNode).componentWillUnmount))try{r.props=n.memoizedProps,r.state=n.memoizedState,r.componentWillUnmount()}catch(e){Es(n,t,e)}dl(e,t,n);break;case 21:dl(e,t,n);break;case 22:1&n.mode?(Zu=(r=Zu)||null!==n.memoizedState,dl(e,t,n),Zu=r):dl(e,t,n);break;default:dl(e,t,n)}}function vl(e){var t=e.updateQueue;if(null!==t){e.updateQueue=null;var n=e.stateNode;null===n&&(n=e.stateNode=new Xu),t.forEach((function(t){var r=Ns.bind(null,e,t);n.has(t)||(n.add(t),t.then(r,r))}))}}function gl(e,t){var n=t.deletions;if(null!==n)for(var r=0;r<n.length;r++){var a=n[r];try{var o=e,u=t,l=u;e:for(;null!==l;){switch(l.tag){case 5:fl=l.stateNode,pl=!1;break e;case 3:case 4:fl=l.stateNode.containerInfo,pl=!0;break e}l=l.return}if(null===fl)throw Error(i(160));hl(o,u,a),fl=null,pl=!1;var s=a.alternate;null!==s&&(s.return=null),a.return=null}catch(e){Es(a,t,e)}}if(12854&t.subtreeFlags)for(t=t.child;null!==t;)yl(t,e),t=t.sibling}function yl(e,t){var n=e.alternate,r=e.flags;switch(e.tag){case 0:case 11:case 14:case 15:if(gl(t,e),ml(e),4&r){try{rl(3,e,e.return),al(3,e)}catch(t){Es(e,e.return,t)}try{rl(5,e,e.return)}catch(t){Es(e,e.return,t)}}break;case 1:gl(t,e),ml(e),512&r&&null!==n&&el(n,n.return);break;case 5:if(gl(t,e),ml(e),512&r&&null!==n&&el(n,n.return),32&e.flags){var a=e.stateNode;try{pe(a,"")}catch(t){Es(e,e.return,t)}}if(4&r&&null!=(a=e.stateNode)){var o=e.memoizedProps,u=null!==n?n.memoizedProps:o,l=e.type,s=e.updateQueue;if(e.updateQueue=null,null!==s)try{"input"===l&&"radio"===o.type&&null!=o.name&&Z(a,o),be(l,u);var c=be(l,o);for(u=0;u<s.length;u+=2){var f=s[u],p=s[u+1];"style"===f?ge(a,p):"dangerouslySetInnerHTML"===f?fe(a,p):"children"===f?pe(a,p):b(a,f,p,c)}switch(l){case"input":X(a,o);break;case"textarea":ie(a,o);break;case"select":var d=a._wrapperState.wasMultiple;a._wrapperState.wasMultiple=!!o.multiple;var h=o.value;null!=h?ne(a,!!o.multiple,h,!1):d!==!!o.multiple&&(null!=o.defaultValue?ne(a,!!o.multiple,o.defaultValue,!0):ne(a,!!o.multiple,o.multiple?[]:"",!1))}a[da]=o}catch(t){Es(e,e.return,t)}}break;case 6:if(gl(t,e),ml(e),4&r){if(null===e.stateNode)throw Error(i(162));a=e.stateNode,o=e.memoizedProps;try{a.nodeValue=o}catch(t){Es(e,e.return,t)}}break;case 3:if(gl(t,e),ml(e),4&r&&null!==n&&n.memoizedState.isDehydrated)try{Bt(t.containerInfo)}catch(t){Es(e,e.return,t)}break;case 4:default:gl(t,e),ml(e);break;case 13:gl(t,e),ml(e),8192&(a=e.child).flags&&(o=null!==a.memoizedState,a.stateNode.isHidden=o,!o||null!==a.alternate&&null!==a.alternate.memoizedState||(Bl=Ze())),4&r&&vl(e);break;case 22:if(f=null!==n&&null!==n.memoizedState,1&e.mode?(Zu=(c=Zu)||f,gl(t,e),Zu=c):gl(t,e),ml(e),8192&r){if(c=null!==e.memoizedState,(e.stateNode.isHidden=c)&&!f&&0!=(1&e.mode))for(Ju=e,f=e.child;null!==f;){for(p=Ju=f;null!==Ju;){switch(h=(d=Ju).child,d.tag){case 0:case 11:case 14:case 15:rl(4,d,d.return);break;case 1:el(d,d.return);var v=d.stateNode;if("function"==typeof v.componentWillUnmount){r=d,n=d.return;try{t=r,v.props=t.memoizedProps,v.state=t.memoizedState,v.componentWillUnmount()}catch(e){Es(r,n,e)}}break;case 5:el(d,d.return);break;case 22:if(null!==d.memoizedState){xl(p);continue}}null!==h?(h.return=d,Ju=h):xl(p)}f=f.sibling}e:for(f=null,p=e;;){if(5===p.tag){if(null===f){f=p;try{a=p.stateNode,c?"function"==typeof(o=a.style).setProperty?o.setProperty("display","none","important"):o.display="none":(l=p.stateNode,u=null!=(s=p.memoizedProps.style)&&s.hasOwnProperty("display")?s.display:null,l.style.display=ve("display",u))}catch(t){Es(e,e.return,t)}}}else if(6===p.tag){if(null===f)try{p.stateNode.nodeValue=c?"":p.memoizedProps}catch(t){Es(e,e.return,t)}}else if((22!==p.tag&&23!==p.tag||null===p.memoizedState||p===e)&&null!==p.child){p.child.return=p,p=p.child;continue}if(p===e)break e;for(;null===p.sibling;){if(null===p.return||p.return===e)break e;f===p&&(f=null),p=p.return}f===p&&(f=null),p.sibling.return=p.return,p=p.sibling}}break;case 19:gl(t,e),ml(e),4&r&&vl(e);case 21:}}function ml(e){var t=e.flags;if(2&t){try{e:{for(var n=e.return;null!==n;){if(ul(n)){var r=n;break e}n=n.return}throw Error(i(160))}switch(r.tag){case 5:var a=r.stateNode;32&r.flags&&(pe(a,""),r.flags&=-33),cl(e,ll(e),a);break;case 3:case 4:var o=r.stateNode.containerInfo;sl(e,ll(e),o);break;default:throw Error(i(161))}}catch(t){Es(e,e.return,t)}e.flags&=-3}4096&t&&(e.flags&=-4097)}function bl(e,t,n){Ju=e,_l(e,t,n)}function _l(e,t,n){for(var r=0!=(1&e.mode);null!==Ju;){var a=Ju,i=a.child;if(22===a.tag&&r){var o=null!==a.memoizedState||Ku;if(!o){var u=a.alternate,l=null!==u&&null!==u.memoizedState||Zu;u=Ku;var s=Zu;if(Ku=o,(Zu=l)&&!s)for(Ju=a;null!==Ju;)l=(o=Ju).child,22===o.tag&&null!==o.memoizedState?kl(a):null!==l?(l.return=o,Ju=l):kl(a);for(;null!==i;)Ju=i,_l(i,t,n),i=i.sibling;Ju=a,Ku=u,Zu=s}wl(e)}else 0!=(8772&a.subtreeFlags)&&null!==i?(i.return=a,Ju=i):wl(e)}}function wl(e){for(;null!==Ju;){var t=Ju;if(0!=(8772&t.flags)){var n=t.alternate;try{if(0!=(8772&t.flags))switch(t.tag){case 0:case 11:case 15:Zu||al(5,t);break;case 1:var r=t.stateNode;if(4&t.flags&&!Zu)if(null===n)r.componentDidMount();else{var a=t.elementType===t.type?n.memoizedProps:gi(t.type,n.memoizedProps);r.componentDidUpdate(a,n.memoizedState,r.__reactInternalSnapshotBeforeUpdate)}var o=t.updateQueue;null!==o&&ji(t,o,r);break;case 3:var u=t.updateQueue;if(null!==u){if(n=null,null!==t.child)switch(t.child.tag){case 5:case 1:n=t.child.stateNode}ji(t,u,n)}break;case 5:var l=t.stateNode;if(null===n&&4&t.flags){n=l;var s=t.memoizedProps;switch(t.type){case"button":case"input":case"select":case"textarea":s.autoFocus&&n.focus();break;case"img":s.src&&(n.src=s.src)}}break;case 6:case 4:case 12:case 19:case 17:case 21:case 22:case 23:case 25:break;case 13:if(null===t.memoizedState){var c=t.alternate;if(null!==c){var f=c.memoizedState;if(null!==f){var p=f.dehydrated;null!==p&&Bt(p)}}}break;default:throw Error(i(163))}Zu||512&t.flags&&il(t)}catch(e){Es(t,t.return,e)}}if(t===e){Ju=null;break}if(null!==(n=t.sibling)){n.return=t.return,Ju=n;break}Ju=t.return}}function xl(e){for(;null!==Ju;){var t=Ju;if(t===e){Ju=null;break}var n=t.sibling;if(null!==n){n.return=t.return,Ju=n;break}Ju=t.return}}function kl(e){for(;null!==Ju;){var t=Ju;try{switch(t.tag){case 0:case 11:case 15:var n=t.return;try{al(4,t)}catch(e){Es(t,n,e)}break;case 1:var r=t.stateNode;if("function"==typeof r.componentDidMount){var a=t.return;try{r.componentDidMount()}catch(e){Es(t,a,e)}}var i=t.return;try{il(t)}catch(e){Es(t,i,e)}break;case 5:var o=t.return;try{il(t)}catch(e){Es(t,o,e)}}}catch(e){Es(t,t.return,e)}if(t===e){Ju=null;break}var u=t.sibling;if(null!==u){u.return=t.return,Ju=u;break}Ju=t.return}}var Sl,El=Math.ceil,Cl=_.ReactCurrentDispatcher,Tl=_.ReactCurrentOwner,Ml=_.ReactCurrentBatchConfig,Nl=0,Pl=null,zl=null,Ll=0,Ol=0,Al=Sa(0),Fl=0,Dl=null,Rl=0,jl=0,Ul=0,Il=null,$l=null,Bl=0,Wl=1/0,Vl=null,Hl=!1,ql=null,Ql=null,Yl=!1,Gl=null,Kl=0,Zl=0,Xl=null,Jl=-1,es=0;function ts(){return 0!=(6&Nl)?Ze():-1!==Jl?Jl:Jl=Ze()}function ns(e){return 0==(1&e.mode)?1:0!=(2&Nl)&&0!==Ll?Ll&-Ll:null!==vi.transition?(0===es&&(es=vt()),es):0!==(e=bt)?e:e=void 0===(e=window.event)?16:Kt(e.type)}function rs(e,t,n,r){if(50<Zl)throw Zl=0,Xl=null,Error(i(185));yt(e,n,r),0!=(2&Nl)&&e===Pl||(e===Pl&&(0==(2&Nl)&&(jl|=n),4===Fl&&ls(e,Ll)),as(e,r),1===n&&0===Nl&&0==(1&t.mode)&&(Wl=Ze()+500,Ua&&Ba()))}function as(e,t){var n=e.callbackNode;!function(e,t){for(var n=e.suspendedLanes,r=e.pingedLanes,a=e.expirationTimes,i=e.pendingLanes;0<i;){var o=31-ot(i),u=1<<o,l=a[o];-1===l?0!=(u&n)&&0==(u&r)||(a[o]=dt(u,t)):l<=t&&(e.expiredLanes|=u),i&=~u}}(e,t);var r=pt(e,e===Pl?Ll:0);if(0===r)null!==n&&Ye(n),e.callbackNode=null,e.callbackPriority=0;else if(t=r&-r,e.callbackPriority!==t){if(null!=n&&Ye(n),1===t)0===e.tag?function(e){Ua=!0,$a(e)}(ss.bind(null,e)):$a(ss.bind(null,e)),oa((function(){0==(6&Nl)&&Ba()})),n=null;else{switch(_t(r)){case 1:n=Je;break;case 4:n=et;break;case 16:default:n=tt;break;case 536870912:n=rt}n=Ps(n,is.bind(null,e))}e.callbackPriority=t,e.callbackNode=n}}function is(e,t){if(Jl=-1,es=0,0!=(6&Nl))throw Error(i(327));var n=e.callbackNode;if(ks()&&e.callbackNode!==n)return null;var r=pt(e,e===Pl?Ll:0);if(0===r)return null;if(0!=(30&r)||0!=(r&e.expiredLanes)||t)t=ys(e,r);else{t=r;var a=Nl;Nl|=2;var o=vs();for(Pl===e&&Ll===t||(Vl=null,Wl=Ze()+500,ds(e,t));;)try{bs();break}catch(t){hs(e,t)}wi(),Cl.current=o,Nl=a,null!==zl?t=0:(Pl=null,Ll=0,t=Fl)}if(0!==t){if(2===t&&0!==(a=ht(e))&&(r=a,t=os(e,a)),1===t)throw n=Dl,ds(e,0),ls(e,r),as(e,Ze()),n;if(6===t)ls(e,r);else{if(a=e.current.alternate,0==(30&r)&&!function(e){for(var t=e;;){if(16384&t.flags){var n=t.updateQueue;if(null!==n&&null!==(n=n.stores))for(var r=0;r<n.length;r++){var a=n[r],i=a.getSnapshot;a=a.value;try{if(!ur(i(),a))return!1}catch(e){return!1}}}if(n=t.child,16384&t.subtreeFlags&&null!==n)n.return=t,t=n;else{if(t===e)break;for(;null===t.sibling;){if(null===t.return||t.return===e)return!0;t=t.return}t.sibling.return=t.return,t=t.sibling}}return!0}(a)&&(2===(t=ys(e,r))&&0!==(o=ht(e))&&(r=o,t=os(e,o)),1===t))throw n=Dl,ds(e,0),ls(e,r),as(e,Ze()),n;switch(e.finishedWork=a,e.finishedLanes=r,t){case 0:case 1:throw Error(i(345));case 2:case 5:xs(e,$l,Vl);break;case 3:if(ls(e,r),(130023424&r)===r&&10<(t=Bl+500-Ze())){if(0!==pt(e,0))break;if(((a=e.suspendedLanes)&r)!==r){ts(),e.pingedLanes|=e.suspendedLanes&a;break}e.timeoutHandle=ra(xs.bind(null,e,$l,Vl),t);break}xs(e,$l,Vl);break;case 4:if(ls(e,r),(4194240&r)===r)break;for(t=e.eventTimes,a=-1;0<r;){var u=31-ot(r);o=1<<u,(u=t[u])>a&&(a=u),r&=~o}if(r=a,10<(r=(120>(r=Ze()-r)?120:480>r?480:1080>r?1080:1920>r?1920:3e3>r?3e3:4320>r?4320:1960*El(r/1960))-r)){e.timeoutHandle=ra(xs.bind(null,e,$l,Vl),r);break}xs(e,$l,Vl);break;default:throw Error(i(329))}}}return as(e,Ze()),e.callbackNode===n?is.bind(null,e):null}function os(e,t){var n=Il;return e.current.memoizedState.isDehydrated&&(ds(e,t).flags|=256),2!==(e=ys(e,t))&&(t=$l,$l=n,null!==t&&us(t)),e}function us(e){null===$l?$l=e:$l.push.apply($l,e)}function ls(e,t){for(t&=~Ul,t&=~jl,e.suspendedLanes|=t,e.pingedLanes&=~t,e=e.expirationTimes;0<t;){var n=31-ot(t),r=1<<n;e[n]=-1,t&=~r}}function ss(e){if(0!=(6&Nl))throw Error(i(327));ks();var t=pt(e,0);if(0==(1&t))return as(e,Ze()),null;var n=ys(e,t);if(0!==e.tag&&2===n){var r=ht(e);0!==r&&(t=r,n=os(e,r))}if(1===n)throw n=Dl,ds(e,0),ls(e,t),as(e,Ze()),n;if(6===n)throw Error(i(345));return e.finishedWork=e.current.alternate,e.finishedLanes=t,xs(e,$l,Vl),as(e,Ze()),null}function cs(e,t){var n=Nl;Nl|=1;try{return e(t)}finally{0===(Nl=n)&&(Wl=Ze()+500,Ua&&Ba())}}function fs(e){null!==Gl&&0===Gl.tag&&0==(6&Nl)&&ks();var t=Nl;Nl|=1;var n=Ml.transition,r=bt;try{if(Ml.transition=null,bt=1,e)return e()}finally{bt=r,Ml.transition=n,0==(6&(Nl=t))&&Ba()}}function ps(){Ol=Al.current,Ea(Al)}function ds(e,t){e.finishedWork=null,e.finishedLanes=0;var n=e.timeoutHandle;if(-1!==n&&(e.timeoutHandle=-1,aa(n)),null!==zl)for(n=zl.return;null!==n;){var r=n;switch(ti(r),r.tag){case 1:null!=(r=r.type.childContextTypes)&&Oa();break;case 3:ao(),Ea(Na),Ea(Ma),co();break;case 5:oo(r);break;case 4:ao();break;case 13:case 19:Ea(uo);break;case 10:xi(r.type._context);break;case 22:case 23:ps()}n=n.return}if(Pl=e,zl=e=As(e.current,null),Ll=Ol=t,Fl=0,Dl=null,Ul=jl=Rl=0,$l=Il=null,null!==Ci){for(t=0;t<Ci.length;t++)if(null!==(r=(n=Ci[t]).interleaved)){n.interleaved=null;var a=r.next,i=n.pending;if(null!==i){var o=i.next;i.next=a,r.next=o}n.pending=r}Ci=null}return e}function hs(e,t){for(;;){var n=zl;try{if(wi(),fo.current=ou,mo){for(var r=vo.memoizedState;null!==r;){var a=r.queue;null!==a&&(a.pending=null),r=r.next}mo=!1}if(ho=0,yo=go=vo=null,bo=!1,_o=0,Tl.current=null,null===n||null===n.return){Fl=1,Dl=t,zl=null;break}e:{var o=e,u=n.return,l=n,s=t;if(t=Ll,l.flags|=32768,null!==s&&"object"==typeof s&&"function"==typeof s.then){var c=s,f=l,p=f.tag;if(0==(1&f.mode)&&(0===p||11===p||15===p)){var d=f.alternate;d?(f.updateQueue=d.updateQueue,f.memoizedState=d.memoizedState,f.lanes=d.lanes):(f.updateQueue=null,f.memoizedState=null)}var h=yu(u);if(null!==h){h.flags&=-257,mu(h,u,l,0,t),1&h.mode&&gu(o,c,t),s=c;var v=(t=h).updateQueue;if(null===v){var g=new Set;g.add(s),t.updateQueue=g}else v.add(s);break e}if(0==(1&t)){gu(o,c,t),gs();break e}s=Error(i(426))}else if(ai&&1&l.mode){var y=yu(u);if(null!==y){0==(65536&y.flags)&&(y.flags|=256),mu(y,u,l,0,t),hi(cu(s,l));break e}}o=s=cu(s,l),4!==Fl&&(Fl=2),null===Il?Il=[o]:Il.push(o),o=u;do{switch(o.tag){case 3:o.flags|=65536,t&=-t,o.lanes|=t,Di(o,hu(0,s,t));break e;case 1:l=s;var m=o.type,b=o.stateNode;if(0==(128&o.flags)&&("function"==typeof m.getDerivedStateFromError||null!==b&&"function"==typeof b.componentDidCatch&&(null===Ql||!Ql.has(b)))){o.flags|=65536,t&=-t,o.lanes|=t,Di(o,vu(o,l,t));break e}}o=o.return}while(null!==o)}ws(n)}catch(e){t=e,zl===n&&null!==n&&(zl=n=n.return);continue}break}}function vs(){var e=Cl.current;return Cl.current=ou,null===e?ou:e}function gs(){0!==Fl&&3!==Fl&&2!==Fl||(Fl=4),null===Pl||0==(268435455&Rl)&&0==(268435455&jl)||ls(Pl,Ll)}function ys(e,t){var n=Nl;Nl|=2;var r=vs();for(Pl===e&&Ll===t||(Vl=null,ds(e,t));;)try{ms();break}catch(t){hs(e,t)}if(wi(),Nl=n,Cl.current=r,null!==zl)throw Error(i(261));return Pl=null,Ll=0,Fl}function ms(){for(;null!==zl;)_s(zl)}function bs(){for(;null!==zl&&!Ge();)_s(zl)}function _s(e){var t=Sl(e.alternate,e,Ol);e.memoizedProps=e.pendingProps,null===t?ws(e):zl=t,Tl.current=null}function ws(e){var t=e;do{var n=t.alternate;if(e=t.return,0==(32768&t.flags)){if(null!==(n=Yu(n,t,Ol)))return void(zl=n)}else{if(null!==(n=Gu(n,t)))return n.flags&=32767,void(zl=n);if(null===e)return Fl=6,void(zl=null);e.flags|=32768,e.subtreeFlags=0,e.deletions=null}if(null!==(t=t.sibling))return void(zl=t);zl=t=e}while(null!==t);0===Fl&&(Fl=5)}function xs(e,t,n){var r=bt,a=Ml.transition;try{Ml.transition=null,bt=1,function(e,t,n,r){do{ks()}while(null!==Gl);if(0!=(6&Nl))throw Error(i(327));n=e.finishedWork;var a=e.finishedLanes;if(null===n)return null;if(e.finishedWork=null,e.finishedLanes=0,n===e.current)throw Error(i(177));e.callbackNode=null,e.callbackPriority=0;var o=n.lanes|n.childLanes;if(function(e,t){var n=e.pendingLanes&~t;e.pendingLanes=t,e.suspendedLanes=0,e.pingedLanes=0,e.expiredLanes&=t,e.mutableReadLanes&=t,e.entangledLanes&=t,t=e.entanglements;var r=e.eventTimes;for(e=e.expirationTimes;0<n;){var a=31-ot(n),i=1<<a;t[a]=0,r[a]=-1,e[a]=-1,n&=~i}}(e,o),e===Pl&&(zl=Pl=null,Ll=0),0==(2064&n.subtreeFlags)&&0==(2064&n.flags)||Yl||(Yl=!0,Ps(tt,(function(){return ks(),null}))),o=0!=(15990&n.flags),0!=(15990&n.subtreeFlags)||o){o=Ml.transition,Ml.transition=null;var u=bt;bt=1;var l=Nl;Nl|=4,Tl.current=null,function(e,t){if(ea=Vt,dr(e=pr())){if("selectionStart"in e)var n={start:e.selectionStart,end:e.selectionEnd};else e:{var r=(n=(n=e.ownerDocument)&&n.defaultView||window).getSelection&&n.getSelection();if(r&&0!==r.rangeCount){n=r.anchorNode;var a=r.anchorOffset,o=r.focusNode;r=r.focusOffset;try{n.nodeType,o.nodeType}catch(e){n=null;break e}var u=0,l=-1,s=-1,c=0,f=0,p=e,d=null;t:for(;;){for(var h;p!==n||0!==a&&3!==p.nodeType||(l=u+a),p!==o||0!==r&&3!==p.nodeType||(s=u+r),3===p.nodeType&&(u+=p.nodeValue.length),null!==(h=p.firstChild);)d=p,p=h;for(;;){if(p===e)break t;if(d===n&&++c===a&&(l=u),d===o&&++f===r&&(s=u),null!==(h=p.nextSibling))break;d=(p=d).parentNode}p=h}n=-1===l||-1===s?null:{start:l,end:s}}else n=null}n=n||{start:0,end:0}}else n=null;for(ta={focusedElem:e,selectionRange:n},Vt=!1,Ju=t;null!==Ju;)if(e=(t=Ju).child,0!=(1028&t.subtreeFlags)&&null!==e)e.return=t,Ju=e;else for(;null!==Ju;){t=Ju;try{var v=t.alternate;if(0!=(1024&t.flags))switch(t.tag){case 0:case 11:case 15:case 5:case 6:case 4:case 17:break;case 1:if(null!==v){var g=v.memoizedProps,y=v.memoizedState,m=t.stateNode,b=m.getSnapshotBeforeUpdate(t.elementType===t.type?g:gi(t.type,g),y);m.__reactInternalSnapshotBeforeUpdate=b}break;case 3:var _=t.stateNode.containerInfo;1===_.nodeType?_.textContent="":9===_.nodeType&&_.documentElement&&_.removeChild(_.documentElement);break;default:throw Error(i(163))}}catch(e){Es(t,t.return,e)}if(null!==(e=t.sibling)){e.return=t.return,Ju=e;break}Ju=t.return}v=nl,nl=!1}(e,n),yl(n,e),hr(ta),Vt=!!ea,ta=ea=null,e.current=n,bl(n,e,a),Ke(),Nl=l,bt=u,Ml.transition=o}else e.current=n;if(Yl&&(Yl=!1,Gl=e,Kl=a),0===(o=e.pendingLanes)&&(Ql=null),function(e){if(it&&"function"==typeof it.onCommitFiberRoot)try{it.onCommitFiberRoot(at,e,void 0,128==(128&e.current.flags))}catch(e){}}(n.stateNode),as(e,Ze()),null!==t)for(r=e.onRecoverableError,n=0;n<t.length;n++)r((a=t[n]).value,{componentStack:a.stack,digest:a.digest});if(Hl)throw Hl=!1,e=ql,ql=null,e;0!=(1&Kl)&&0!==e.tag&&ks(),0!=(1&(o=e.pendingLanes))?e===Xl?Zl++:(Zl=0,Xl=e):Zl=0,Ba()}(e,t,n,r)}finally{Ml.transition=a,bt=r}return null}function ks(){if(null!==Gl){var e=_t(Kl),t=Ml.transition,n=bt;try{if(Ml.transition=null,bt=16>e?16:e,null===Gl)var r=!1;else{if(e=Gl,Gl=null,Kl=0,0!=(6&Nl))throw Error(i(331));var a=Nl;for(Nl|=4,Ju=e.current;null!==Ju;){var o=Ju,u=o.child;if(0!=(16&Ju.flags)){var l=o.deletions;if(null!==l){for(var s=0;s<l.length;s++){var c=l[s];for(Ju=c;null!==Ju;){var f=Ju;switch(f.tag){case 0:case 11:case 15:rl(8,f,o)}var p=f.child;if(null!==p)p.return=f,Ju=p;else for(;null!==Ju;){var d=(f=Ju).sibling,h=f.return;if(ol(f),f===c){Ju=null;break}if(null!==d){d.return=h,Ju=d;break}Ju=h}}}var v=o.alternate;if(null!==v){var g=v.child;if(null!==g){v.child=null;do{var y=g.sibling;g.sibling=null,g=y}while(null!==g)}}Ju=o}}if(0!=(2064&o.subtreeFlags)&&null!==u)u.return=o,Ju=u;else e:for(;null!==Ju;){if(0!=(2048&(o=Ju).flags))switch(o.tag){case 0:case 11:case 15:rl(9,o,o.return)}var m=o.sibling;if(null!==m){m.return=o.return,Ju=m;break e}Ju=o.return}}var b=e.current;for(Ju=b;null!==Ju;){var _=(u=Ju).child;if(0!=(2064&u.subtreeFlags)&&null!==_)_.return=u,Ju=_;else e:for(u=b;null!==Ju;){if(0!=(2048&(l=Ju).flags))try{switch(l.tag){case 0:case 11:case 15:al(9,l)}}catch(e){Es(l,l.return,e)}if(l===u){Ju=null;break e}var w=l.sibling;if(null!==w){w.return=l.return,Ju=w;break e}Ju=l.return}}if(Nl=a,Ba(),it&&"function"==typeof it.onPostCommitFiberRoot)try{it.onPostCommitFiberRoot(at,e)}catch(e){}r=!0}return r}finally{bt=n,Ml.transition=t}}return!1}function Ss(e,t,n){e=Ai(e,t=hu(0,t=cu(n,t),1),1),t=ts(),null!==e&&(yt(e,1,t),as(e,t))}function Es(e,t,n){if(3===e.tag)Ss(e,e,n);else for(;null!==t;){if(3===t.tag){Ss(t,e,n);break}if(1===t.tag){var r=t.stateNode;if("function"==typeof t.type.getDerivedStateFromError||"function"==typeof r.componentDidCatch&&(null===Ql||!Ql.has(r))){t=Ai(t,e=vu(t,e=cu(n,e),1),1),e=ts(),null!==t&&(yt(t,1,e),as(t,e));break}}t=t.return}}function Cs(e,t,n){var r=e.pingCache;null!==r&&r.delete(t),t=ts(),e.pingedLanes|=e.suspendedLanes&n,Pl===e&&(Ll&n)===n&&(4===Fl||3===Fl&&(130023424&Ll)===Ll&&500>Ze()-Bl?ds(e,0):Ul|=n),as(e,t)}function Ts(e,t){0===t&&(0==(1&e.mode)?t=1:(t=ct,0==(130023424&(ct<<=1))&&(ct=4194304)));var n=ts();null!==(e=Ni(e,t))&&(yt(e,t,n),as(e,n))}function Ms(e){var t=e.memoizedState,n=0;null!==t&&(n=t.retryLane),Ts(e,n)}function Ns(e,t){var n=0;switch(e.tag){case 13:var r=e.stateNode,a=e.memoizedState;null!==a&&(n=a.retryLane);break;case 19:r=e.stateNode;break;default:throw Error(i(314))}null!==r&&r.delete(t),Ts(e,n)}function Ps(e,t){return Qe(e,t)}function zs(e,t,n,r){this.tag=e,this.key=n,this.sibling=this.child=this.return=this.stateNode=this.type=this.elementType=null,this.index=0,this.ref=null,this.pendingProps=t,this.dependencies=this.memoizedState=this.updateQueue=this.memoizedProps=null,this.mode=r,this.subtreeFlags=this.flags=0,this.deletions=null,this.childLanes=this.lanes=0,this.alternate=null}function Ls(e,t,n,r){return new zs(e,t,n,r)}function Os(e){return!(!(e=e.prototype)||!e.isReactComponent)}function As(e,t){var n=e.alternate;return null===n?((n=Ls(e.tag,t,e.key,e.mode)).elementType=e.elementType,n.type=e.type,n.stateNode=e.stateNode,n.alternate=e,e.alternate=n):(n.pendingProps=t,n.type=e.type,n.flags=0,n.subtreeFlags=0,n.deletions=null),n.flags=14680064&e.flags,n.childLanes=e.childLanes,n.lanes=e.lanes,n.child=e.child,n.memoizedProps=e.memoizedProps,n.memoizedState=e.memoizedState,n.updateQueue=e.updateQueue,t=e.dependencies,n.dependencies=null===t?null:{lanes:t.lanes,firstContext:t.firstContext},n.sibling=e.sibling,n.index=e.index,n.ref=e.ref,n}function Fs(e,t,n,r,a,o){var u=2;if(r=e,"function"==typeof e)Os(e)&&(u=1);else if("string"==typeof e)u=5;else e:switch(e){case k:return Ds(n.children,a,o,t);case S:u=8,a|=8;break;case E:return(e=Ls(12,n,t,2|a)).elementType=E,e.lanes=o,e;case N:return(e=Ls(13,n,t,a)).elementType=N,e.lanes=o,e;case P:return(e=Ls(19,n,t,a)).elementType=P,e.lanes=o,e;case O:return Rs(n,a,o,t);default:if("object"==typeof e&&null!==e)switch(e.$$typeof){case C:u=10;break e;case T:u=9;break e;case M:u=11;break e;case z:u=14;break e;case L:u=16,r=null;break e}throw Error(i(130,null==e?e:typeof e,""))}return(t=Ls(u,n,t,a)).elementType=e,t.type=r,t.lanes=o,t}function Ds(e,t,n,r){return(e=Ls(7,e,r,t)).lanes=n,e}function Rs(e,t,n,r){return(e=Ls(22,e,r,t)).elementType=O,e.lanes=n,e.stateNode={isHidden:!1},e}function js(e,t,n){return(e=Ls(6,e,null,t)).lanes=n,e}function Us(e,t,n){return(t=Ls(4,null!==e.children?e.children:[],e.key,t)).lanes=n,t.stateNode={containerInfo:e.containerInfo,pendingChildren:null,implementation:e.implementation},t}function Is(e,t,n,r,a){this.tag=t,this.containerInfo=e,this.finishedWork=this.pingCache=this.current=this.pendingChildren=null,this.timeoutHandle=-1,this.callbackNode=this.pendingContext=this.context=null,this.callbackPriority=0,this.eventTimes=gt(0),this.expirationTimes=gt(-1),this.entangledLanes=this.finishedLanes=this.mutableReadLanes=this.expiredLanes=this.pingedLanes=this.suspendedLanes=this.pendingLanes=0,this.entanglements=gt(0),this.identifierPrefix=r,this.onRecoverableError=a,this.mutableSourceEagerHydrationData=null}function $s(e,t,n,r,a,i,o,u,l){return e=new Is(e,t,n,u,l),1===t?(t=1,!0===i&&(t|=8)):t=0,i=Ls(3,null,null,t),e.current=i,i.stateNode=e,i.memoizedState={element:r,isDehydrated:n,cache:null,transitions:null,pendingSuspenseBoundaries:null},zi(i),e}function Bs(e){if(!e)return Ta;e:{if(Be(e=e._reactInternals)!==e||1!==e.tag)throw Error(i(170));var t=e;do{switch(t.tag){case 3:t=t.stateNode.context;break e;case 1:if(La(t.type)){t=t.stateNode.__reactInternalMemoizedMergedChildContext;break e}}t=t.return}while(null!==t);throw Error(i(171))}if(1===e.tag){var n=e.type;if(La(n))return Fa(e,n,t)}return t}function Ws(e,t,n,r,a,i,o,u,l){return(e=$s(n,r,!0,e,0,i,0,u,l)).context=Bs(null),n=e.current,(i=Oi(r=ts(),a=ns(n))).callback=null!=t?t:null,Ai(n,i,a),e.current.lanes=a,yt(e,a,r),as(e,r),e}function Vs(e,t,n,r){var a=t.current,i=ts(),o=ns(a);return n=Bs(n),null===t.context?t.context=n:t.pendingContext=n,(t=Oi(i,o)).payload={element:e},null!==(r=void 0===r?null:r)&&(t.callback=r),null!==(e=Ai(a,t,o))&&(rs(e,a,o,i),Fi(e,a,o)),o}function Hs(e){return(e=e.current).child?(e.child.tag,e.child.stateNode):null}function qs(e,t){if(null!==(e=e.memoizedState)&&null!==e.dehydrated){var n=e.retryLane;e.retryLane=0!==n&&n<t?n:t}}function Qs(e,t){qs(e,t),(e=e.alternate)&&qs(e,t)}Sl=function(e,t,n){if(null!==e)if(e.memoizedProps!==t.pendingProps||Na.current)_u=!0;else{if(0==(e.lanes&n)&&0==(128&t.flags))return _u=!1,function(e,t,n){switch(t.tag){case 3:Pu(t),di();break;case 5:io(t);break;case 1:La(t.type)&&Da(t);break;case 4:ro(t,t.stateNode.containerInfo);break;case 10:var r=t.type._context,a=t.memoizedProps.value;Ca(yi,r._currentValue),r._currentValue=a;break;case 13:if(null!==(r=t.memoizedState))return null!==r.dehydrated?(Ca(uo,1&uo.current),t.flags|=128,null):0!=(n&t.child.childLanes)?ju(e,t,n):(Ca(uo,1&uo.current),null!==(e=Hu(e,t,n))?e.sibling:null);Ca(uo,1&uo.current);break;case 19:if(r=0!=(n&t.childLanes),0!=(128&e.flags)){if(r)return Wu(e,t,n);t.flags|=128}if(null!==(a=t.memoizedState)&&(a.rendering=null,a.tail=null,a.lastEffect=null),Ca(uo,uo.current),r)break;return null;case 22:case 23:return t.lanes=0,Eu(e,t,n)}return Hu(e,t,n)}(e,t,n);_u=0!=(131072&e.flags)}else _u=!1,ai&&0!=(1048576&t.flags)&&Ja(t,qa,t.index);switch(t.lanes=0,t.tag){case 2:var r=t.type;Vu(e,t),e=t.pendingProps;var a=za(t,Ma.current);Si(t,n),a=So(null,t,r,e,a,n);var o=Eo();return t.flags|=1,"object"==typeof a&&null!==a&&"function"==typeof a.render&&void 0===a.$$typeof?(t.tag=1,t.memoizedState=null,t.updateQueue=null,La(r)?(o=!0,Da(t)):o=!1,t.memoizedState=null!==a.state&&void 0!==a.state?a.state:null,zi(t),a.updater=$i,t.stateNode=a,a._reactInternals=t,Hi(t,r,e,n),t=Nu(null,t,r,!0,o,n)):(t.tag=0,ai&&o&&ei(t),wu(null,t,a,n),t=t.child),t;case 16:r=t.elementType;e:{switch(Vu(e,t),e=t.pendingProps,r=(a=r._init)(r._payload),t.type=r,a=t.tag=function(e){if("function"==typeof e)return Os(e)?1:0;if(null!=e){if((e=e.$$typeof)===M)return 11;if(e===z)return 14}return 2}(r),e=gi(r,e),a){case 0:t=Tu(null,t,r,e,n);break e;case 1:t=Mu(null,t,r,e,n);break e;case 11:t=xu(null,t,r,e,n);break e;case 14:t=ku(null,t,r,gi(r.type,e),n);break e}throw Error(i(306,r,""))}return t;case 0:return r=t.type,a=t.pendingProps,Tu(e,t,r,a=t.elementType===r?a:gi(r,a),n);case 1:return r=t.type,a=t.pendingProps,Mu(e,t,r,a=t.elementType===r?a:gi(r,a),n);case 3:e:{if(Pu(t),null===e)throw Error(i(387));r=t.pendingProps,a=(o=t.memoizedState).element,Li(e,t),Ri(t,r,null,n);var u=t.memoizedState;if(r=u.element,o.isDehydrated){if(o={element:r,isDehydrated:!1,cache:u.cache,pendingSuspenseBoundaries:u.pendingSuspenseBoundaries,transitions:u.transitions},t.updateQueue.baseState=o,t.memoizedState=o,256&t.flags){t=zu(e,t,r,n,a=cu(Error(i(423)),t));break e}if(r!==a){t=zu(e,t,r,n,a=cu(Error(i(424)),t));break e}for(ri=sa(t.stateNode.containerInfo.firstChild),ni=t,ai=!0,ii=null,n=Zi(t,null,r,n),t.child=n;n;)n.flags=-3&n.flags|4096,n=n.sibling}else{if(di(),r===a){t=Hu(e,t,n);break e}wu(e,t,r,n)}t=t.child}return t;case 5:return io(t),null===e&&si(t),r=t.type,a=t.pendingProps,o=null!==e?e.memoizedProps:null,u=a.children,na(r,a)?u=null:null!==o&&na(r,o)&&(t.flags|=32),Cu(e,t),wu(e,t,u,n),t.child;case 6:return null===e&&si(t),null;case 13:return ju(e,t,n);case 4:return ro(t,t.stateNode.containerInfo),r=t.pendingProps,null===e?t.child=Ki(t,null,r,n):wu(e,t,r,n),t.child;case 11:return r=t.type,a=t.pendingProps,xu(e,t,r,a=t.elementType===r?a:gi(r,a),n);case 7:return wu(e,t,t.pendingProps,n),t.child;case 8:case 12:return wu(e,t,t.pendingProps.children,n),t.child;case 10:e:{if(r=t.type._context,a=t.pendingProps,o=t.memoizedProps,u=a.value,Ca(yi,r._currentValue),r._currentValue=u,null!==o)if(ur(o.value,u)){if(o.children===a.children&&!Na.current){t=Hu(e,t,n);break e}}else for(null!==(o=t.child)&&(o.return=t);null!==o;){var l=o.dependencies;if(null!==l){u=o.child;for(var s=l.firstContext;null!==s;){if(s.context===r){if(1===o.tag){(s=Oi(-1,n&-n)).tag=2;var c=o.updateQueue;if(null!==c){var f=(c=c.shared).pending;null===f?s.next=s:(s.next=f.next,f.next=s),c.pending=s}}o.lanes|=n,null!==(s=o.alternate)&&(s.lanes|=n),ki(o.return,n,t),l.lanes|=n;break}s=s.next}}else if(10===o.tag)u=o.type===t.type?null:o.child;else if(18===o.tag){if(null===(u=o.return))throw Error(i(341));u.lanes|=n,null!==(l=u.alternate)&&(l.lanes|=n),ki(u,n,t),u=o.sibling}else u=o.child;if(null!==u)u.return=o;else for(u=o;null!==u;){if(u===t){u=null;break}if(null!==(o=u.sibling)){o.return=u.return,u=o;break}u=u.return}o=u}wu(e,t,a.children,n),t=t.child}return t;case 9:return a=t.type,r=t.pendingProps.children,Si(t,n),r=r(a=Ei(a)),t.flags|=1,wu(e,t,r,n),t.child;case 14:return a=gi(r=t.type,t.pendingProps),ku(e,t,r,a=gi(r.type,a),n);case 15:return Su(e,t,t.type,t.pendingProps,n);case 17:return r=t.type,a=t.pendingProps,a=t.elementType===r?a:gi(r,a),Vu(e,t),t.tag=1,La(r)?(e=!0,Da(t)):e=!1,Si(t,n),Wi(t,r,a),Hi(t,r,a,n),Nu(null,t,r,!0,e,n);case 19:return Wu(e,t,n);case 22:return Eu(e,t,n)}throw Error(i(156,t.tag))};var Ys="function"==typeof reportError?reportError:function(e){console.error(e)};function Gs(e){this._internalRoot=e}function Ks(e){this._internalRoot=e}function Zs(e){return!(!e||1!==e.nodeType&&9!==e.nodeType&&11!==e.nodeType)}function Xs(e){return!(!e||1!==e.nodeType&&9!==e.nodeType&&11!==e.nodeType&&(8!==e.nodeType||" react-mount-point-unstable "!==e.nodeValue))}function Js(){}function ec(e,t,n,r,a){var i=n._reactRootContainer;if(i){var o=i;if("function"==typeof a){var u=a;a=function(){var e=Hs(o);u.call(e)}}Vs(t,o,e,a)}else o=function(e,t,n,r,a){if(a){if("function"==typeof r){var i=r;r=function(){var e=Hs(o);i.call(e)}}var o=Ws(t,r,e,0,null,!1,0,"",Js);return e._reactRootContainer=o,e[ha]=o.current,Br(8===e.nodeType?e.parentNode:e),fs(),o}for(;a=e.lastChild;)e.removeChild(a);if("function"==typeof r){var u=r;r=function(){var e=Hs(l);u.call(e)}}var l=$s(e,0,!1,null,0,!1,0,"",Js);return e._reactRootContainer=l,e[ha]=l.current,Br(8===e.nodeType?e.parentNode:e),fs((function(){Vs(t,l,n,r)})),l}(n,t,e,a,r);return Hs(o)}Ks.prototype.render=Gs.prototype.render=function(e){var t=this._internalRoot;if(null===t)throw Error(i(409));Vs(e,t,null,null)},Ks.prototype.unmount=Gs.prototype.unmount=function(){var e=this._internalRoot;if(null!==e){this._internalRoot=null;var t=e.containerInfo;fs((function(){Vs(null,e,null,null)})),t[ha]=null}},Ks.prototype.unstable_scheduleHydration=function(e){if(e){var t=St();e={blockedOn:null,target:e,priority:t};for(var n=0;n<Ot.length&&0!==t&&t<Ot[n].priority;n++);Ot.splice(n,0,e),0===n&&Rt(e)}},wt=function(e){switch(e.tag){case 3:var t=e.stateNode;if(t.current.memoizedState.isDehydrated){var n=ft(t.pendingLanes);0!==n&&(mt(t,1|n),as(t,Ze()),0==(6&Nl)&&(Wl=Ze()+500,Ba()))}break;case 13:fs((function(){var t=Ni(e,1);if(null!==t){var n=ts();rs(t,e,1,n)}})),Qs(e,1)}},xt=function(e){if(13===e.tag){var t=Ni(e,134217728);null!==t&&rs(t,e,134217728,ts()),Qs(e,134217728)}},kt=function(e){if(13===e.tag){var t=ns(e),n=Ni(e,t);null!==n&&rs(n,e,t,ts()),Qs(e,t)}},St=function(){return bt},Et=function(e,t){var n=bt;try{return bt=e,t()}finally{bt=n}},xe=function(e,t,n){switch(t){case"input":if(X(e,n),t=n.name,"radio"===n.type&&null!=t){for(n=e;n.parentNode;)n=n.parentNode;for(n=n.querySelectorAll("input[name="+JSON.stringify(""+t)+'][type="radio"]'),t=0;t<n.length;t++){var r=n[t];if(r!==e&&r.form===e.form){var a=wa(r);if(!a)throw Error(i(90));Q(r),X(r,a)}}}break;case"textarea":ie(e,n);break;case"select":null!=(t=n.value)&&ne(e,!!n.multiple,t,!1)}},Me=cs,Ne=fs;var tc={usingClientEntryPoint:!1,Events:[ba,_a,wa,Ce,Te,cs]},nc={findFiberByHostInstance:ma,bundleType:0,version:"18.2.0",rendererPackageName:"react-dom"},rc={bundleType:nc.bundleType,version:nc.version,rendererPackageName:nc.rendererPackageName,rendererConfig:nc.rendererConfig,overrideHookState:null,overrideHookStateDeletePath:null,overrideHookStateRenamePath:null,overrideProps:null,overridePropsDeletePath:null,overridePropsRenamePath:null,setErrorHandler:null,setSuspenseHandler:null,scheduleUpdate:null,currentDispatcherRef:_.ReactCurrentDispatcher,findHostInstanceByFiber:function(e){return null===(e=He(e))?null:e.stateNode},findFiberByHostInstance:nc.findFiberByHostInstance||function(){return null},findHostInstancesForRefresh:null,scheduleRefresh:null,scheduleRoot:null,setRefreshHandler:null,getCurrentFiber:null,reconcilerVersion:"18.2.0-next-9e3b772b8-20220608"};if("undefined"!=typeof __REACT_DEVTOOLS_GLOBAL_HOOK__){var ac=__REACT_DEVTOOLS_GLOBAL_HOOK__;if(!ac.isDisabled&&ac.supportsFiber)try{at=ac.inject(rc),it=ac}catch(ce){}}t.__SECRET_INTERNALS_DO_NOT_USE_OR_YOU_WILL_BE_FIRED=tc,t.createPortal=function(e,t){var n=2<arguments.length&&void 0!==arguments[2]?arguments[2]:null;if(!Zs(t))throw Error(i(200));return function(e,t,n){var r=3<arguments.length&&void 0!==arguments[3]?arguments[3]:null;return{$$typeof:x,key:null==r?null:""+r,children:e,containerInfo:t,implementation:n}}(e,t,null,n)},t.createRoot=function(e,t){if(!Zs(e))throw Error(i(299));var n=!1,r="",a=Ys;return null!=t&&(!0===t.unstable_strictMode&&(n=!0),void 0!==t.identifierPrefix&&(r=t.identifierPrefix),void 0!==t.onRecoverableError&&(a=t.onRecoverableError)),t=$s(e,1,!1,null,0,n,0,r,a),e[ha]=t.current,Br(8===e.nodeType?e.parentNode:e),new Gs(t)},t.findDOMNode=function(e){if(null==e)return null;if(1===e.nodeType)return e;var t=e._reactInternals;if(void 0===t){if("function"==typeof e.render)throw Error(i(188));throw e=Object.keys(e).join(","),Error(i(268,e))}return null===(e=He(t))?null:e.stateNode},t.flushSync=function(e){return fs(e)},t.hydrate=function(e,t,n){if(!Xs(t))throw Error(i(200));return ec(null,e,t,!0,n)},t.hydrateRoot=function(e,t,n){if(!Zs(e))throw Error(i(405));var r=null!=n&&n.hydratedSources||null,a=!1,o="",u=Ys;if(null!=n&&(!0===n.unstable_strictMode&&(a=!0),void 0!==n.identifierPrefix&&(o=n.identifierPrefix),void 0!==n.onRecoverableError&&(u=n.onRecoverableError)),t=Ws(t,null,e,1,null!=n?n:null,a,0,o,u),e[ha]=t.current,Br(e),r)for(e=0;e<r.length;e++)a=(a=(n=r[e])._getVersion)(n._source),null==t.mutableSourceEagerHydrationData?t.mutableSourceEagerHydrationData=[n,a]:t.mutableSourceEagerHydrationData.push(n,a);return new Ks(t)},t.render=function(e,t,n){if(!Xs(t))throw Error(i(200));return ec(null,e,t,!1,n)},t.unmountComponentAtNode=function(e){if(!Xs(e))throw Error(i(40));return!!e._reactRootContainer&&(fs((function(){ec(null,null,e,!1,(function(){e._reactRootContainer=null,e[ha]=null}))})),!0)},t.unstable_batchedUpdates=cs,t.unstable_renderSubtreeIntoContainer=function(e,t,n,r){if(!Xs(n))throw Error(i(200));if(null==e||void 0===e._reactInternals)throw Error(i(38));return ec(e,t,n,!1,r)},t.version="18.2.0-next-9e3b772b8-20220608"},935:(e,t,n)=>{"use strict";!function e(){if("undefined"!=typeof __REACT_DEVTOOLS_GLOBAL_HOOK__&&"function"==typeof __REACT_DEVTOOLS_GLOBAL_HOOK__.checkDCE)try{__REACT_DEVTOOLS_GLOBAL_HOOK__.checkDCE(e)}catch(e){console.error(e)}}(),e.exports=n(448)},408:(e,t)=>{"use strict";var n=Symbol.for("react.element"),r=Symbol.for("react.portal"),a=Symbol.for("react.fragment"),i=Symbol.for("react.strict_mode"),o=Symbol.for("react.profiler"),u=Symbol.for("react.provider"),l=Symbol.for("react.context"),s=Symbol.for("react.forward_ref"),c=Symbol.for("react.suspense"),f=Symbol.for("react.memo"),p=Symbol.for("react.lazy"),d=Symbol.iterator,h={isMounted:function(){return!1},enqueueForceUpdate:function(){},enqueueReplaceState:function(){},enqueueSetState:function(){}},v=Object.assign,g={};function y(e,t,n){this.props=e,this.context=t,this.refs=g,this.updater=n||h}function m(){}function b(e,t,n){this.props=e,this.context=t,this.refs=g,this.updater=n||h}y.prototype.isReactComponent={},y.prototype.setState=function(e,t){if("object"!=typeof e&&"function"!=typeof e&&null!=e)throw Error("setState(...): takes an object of state variables to update or a function which returns an object of state variables.");this.updater.enqueueSetState(this,e,t,"setState")},y.prototype.forceUpdate=function(e){this.updater.enqueueForceUpdate(this,e,"forceUpdate")},m.prototype=y.prototype;var _=b.prototype=new m;_.constructor=b,v(_,y.prototype),_.isPureReactComponent=!0;var w=Array.isArray,x=Object.prototype.hasOwnProperty,k={current:null},S={key:!0,ref:!0,__self:!0,__source:!0};function E(e,t,r){var a,i={},o=null,u=null;if(null!=t)for(a in void 0!==t.ref&&(u=t.ref),void 0!==t.key&&(o=""+t.key),t)x.call(t,a)&&!S.hasOwnProperty(a)&&(i[a]=t[a]);var l=arguments.length-2;if(1===l)i.children=r;else if(1<l){for(var s=Array(l),c=0;c<l;c++)s[c]=arguments[c+2];i.children=s}if(e&&e.defaultProps)for(a in l=e.defaultProps)void 0===i[a]&&(i[a]=l[a]);return{$$typeof:n,type:e,key:o,ref:u,props:i,_owner:k.current}}function C(e){return"object"==typeof e&&null!==e&&e.$$typeof===n}var T=/\/+/g;function M(e,t){return"object"==typeof e&&null!==e&&null!=e.key?function(e){var t={"=":"=0",":":"=2"};return"$"+e.replace(/[=:]/g,(function(e){return t[e]}))}(""+e.key):t.toString(36)}function N(e,t,a,i,o){var u=typeof e;"undefined"!==u&&"boolean"!==u||(e=null);var l=!1;if(null===e)l=!0;else switch(u){case"string":case"number":l=!0;break;case"object":switch(e.$$typeof){case n:case r:l=!0}}if(l)return o=o(l=e),e=""===i?"."+M(l,0):i,w(o)?(a="",null!=e&&(a=e.replace(T,"$&/")+"/"),N(o,t,a,"",(function(e){return e}))):null!=o&&(C(o)&&(o=function(e,t){return{$$typeof:n,type:e.type,key:t,ref:e.ref,props:e.props,_owner:e._owner}}(o,a+(!o.key||l&&l.key===o.key?"":(""+o.key).replace(T,"$&/")+"/")+e)),t.push(o)),1;if(l=0,i=""===i?".":i+":",w(e))for(var s=0;s<e.length;s++){var c=i+M(u=e[s],s);l+=N(u,t,a,c,o)}else if(c=function(e){return null===e||"object"!=typeof e?null:"function"==typeof(e=d&&e[d]||e["@@iterator"])?e:null}(e),"function"==typeof c)for(e=c.call(e),s=0;!(u=e.next()).done;)l+=N(u=u.value,t,a,c=i+M(u,s++),o);else if("object"===u)throw t=String(e),Error("Objects are not valid as a React child (found: "+("[object Object]"===t?"object with keys {"+Object.keys(e).join(", ")+"}":t)+"). If you meant to render a collection of children, use an array instead.");return l}function P(e,t,n){if(null==e)return e;var r=[],a=0;return N(e,r,"","",(function(e){return t.call(n,e,a++)})),r}function z(e){if(-1===e._status){var t=e._result;(t=t()).then((function(t){0!==e._status&&-1!==e._status||(e._status=1,e._result=t)}),(function(t){0!==e._status&&-1!==e._status||(e._status=2,e._result=t)})),-1===e._status&&(e._status=0,e._result=t)}if(1===e._status)return e._result.default;throw e._result}var L={current:null},O={transition:null},A={ReactCurrentDispatcher:L,ReactCurrentBatchConfig:O,ReactCurrentOwner:k};t.Children={map:P,forEach:function(e,t,n){P(e,(function(){t.apply(this,arguments)}),n)},count:function(e){var t=0;return P(e,(function(){t++})),t},toArray:function(e){return P(e,(function(e){return e}))||[]},only:function(e){if(!C(e))throw Error("React.Children.only expected to receive a single React element child.");return e}},t.Component=y,t.Fragment=a,t.Profiler=o,t.PureComponent=b,t.StrictMode=i,t.Suspense=c,t.__SECRET_INTERNALS_DO_NOT_USE_OR_YOU_WILL_BE_FIRED=A,t.cloneElement=function(e,t,r){if(null==e)throw Error("React.cloneElement(...): The argument must be a React element, but you passed "+e+".");var a=v({},e.props),i=e.key,o=e.ref,u=e._owner;if(null!=t){if(void 0!==t.ref&&(o=t.ref,u=k.current),void 0!==t.key&&(i=""+t.key),e.type&&e.type.defaultProps)var l=e.type.defaultProps;for(s in t)x.call(t,s)&&!S.hasOwnProperty(s)&&(a[s]=void 0===t[s]&&void 0!==l?l[s]:t[s])}var s=arguments.length-2;if(1===s)a.children=r;else if(1<s){l=Array(s);for(var c=0;c<s;c++)l[c]=arguments[c+2];a.children=l}return{$$typeof:n,type:e.type,key:i,ref:o,props:a,_owner:u}},t.createContext=function(e){return(e={$$typeof:l,_currentValue:e,_currentValue2:e,_threadCount:0,Provider:null,Consumer:null,_defaultValue:null,_globalName:null}).Provider={$$typeof:u,_context:e},e.Consumer=e},t.createElement=E,t.createFactory=function(e){var t=E.bind(null,e);return t.type=e,t},t.createRef=function(){return{current:null}},t.forwardRef=function(e){return{$$typeof:s,render:e}},t.isValidElement=C,t.lazy=function(e){return{$$typeof:p,_payload:{_status:-1,_result:e},_init:z}},t.memo=function(e,t){return{$$typeof:f,type:e,compare:void 0===t?null:t}},t.startTransition=function(e){var t=O.transition;O.transition={};try{e()}finally{O.transition=t}},t.unstable_act=function(){throw Error("act(...) is not supported in production builds of React.")},t.useCallback=function(e,t){return L.current.useCallback(e,t)},t.useContext=function(e){return L.current.useContext(e)},t.useDebugValue=function(){},t.useDeferredValue=function(e){return L.current.useDeferredValue(e)},t.useEffect=function(e,t){return L.current.useEffect(e,t)},t.useId=function(){return L.current.useId()},t.useImperativeHandle=function(e,t,n){return L.current.useImperativeHandle(e,t,n)},t.useInsertionEffect=function(e,t){return L.current.useInsertionEffect(e,t)},t.useLayoutEffect=function(e,t){return L.current.useLayoutEffect(e,t)},t.useMemo=function(e,t){return L.current.useMemo(e,t)},t.useReducer=function(e,t,n){return L.current.useReducer(e,t,n)},t.useRef=function(e){return L.current.useRef(e)},t.useState=function(e){return L.current.useState(e)},t.useSyncExternalStore=function(e,t,n){return L.current.useSyncExternalStore(e,t,n)},t.useTransition=function(){return L.current.useTransition()},t.version="18.2.0"},294:(e,t,n)=>{"use strict";e.exports=n(408)},53:(e,t)=>{"use strict";function n(e,t){var n=e.length;e.push(t);e:for(;0<n;){var r=n-1>>>1,a=e[r];if(!(0<i(a,t)))break e;e[r]=t,e[n]=a,n=r}}function r(e){return 0===e.length?null:e[0]}function a(e){if(0===e.length)return null;var t=e[0],n=e.pop();if(n!==t){e[0]=n;e:for(var r=0,a=e.length,o=a>>>1;r<o;){var u=2*(r+1)-1,l=e[u],s=u+1,c=e[s];if(0>i(l,n))s<a&&0>i(c,l)?(e[r]=c,e[s]=n,r=s):(e[r]=l,e[u]=n,r=u);else{if(!(s<a&&0>i(c,n)))break e;e[r]=c,e[s]=n,r=s}}}return t}function i(e,t){var n=e.sortIndex-t.sortIndex;return 0!==n?n:e.id-t.id}if("object"==typeof performance&&"function"==typeof performance.now){var o=performance;t.unstable_now=function(){return o.now()}}else{var u=Date,l=u.now();t.unstable_now=function(){return u.now()-l}}var s=[],c=[],f=1,p=null,d=3,h=!1,v=!1,g=!1,y="function"==typeof setTimeout?setTimeout:null,m="function"==typeof clearTimeout?clearTimeout:null,b="undefined"!=typeof setImmediate?setImmediate:null;function _(e){for(var t=r(c);null!==t;){if(null===t.callback)a(c);else{if(!(t.startTime<=e))break;a(c),t.sortIndex=t.expirationTime,n(s,t)}t=r(c)}}function w(e){if(g=!1,_(e),!v)if(null!==r(s))v=!0,O(x);else{var t=r(c);null!==t&&A(w,t.startTime-e)}}function x(e,n){v=!1,g&&(g=!1,m(C),C=-1),h=!0;var i=d;try{for(_(n),p=r(s);null!==p&&(!(p.expirationTime>n)||e&&!N());){var o=p.callback;if("function"==typeof o){p.callback=null,d=p.priorityLevel;var u=o(p.expirationTime<=n);n=t.unstable_now(),"function"==typeof u?p.callback=u:p===r(s)&&a(s),_(n)}else a(s);p=r(s)}if(null!==p)var l=!0;else{var f=r(c);null!==f&&A(w,f.startTime-n),l=!1}return l}finally{p=null,d=i,h=!1}}"undefined"!=typeof navigator&&void 0!==navigator.scheduling&&void 0!==navigator.scheduling.isInputPending&&navigator.scheduling.isInputPending.bind(navigator.scheduling);var k,S=!1,E=null,C=-1,T=5,M=-1;function N(){return!(t.unstable_now()-M<T)}function P(){if(null!==E){var e=t.unstable_now();M=e;var n=!0;try{n=E(!0,e)}finally{n?k():(S=!1,E=null)}}else S=!1}if("function"==typeof b)k=function(){b(P)};else if("undefined"!=typeof MessageChannel){var z=new MessageChannel,L=z.port2;z.port1.onmessage=P,k=function(){L.postMessage(null)}}else k=function(){y(P,0)};function O(e){E=e,S||(S=!0,k())}function A(e,n){C=y((function(){e(t.unstable_now())}),n)}t.unstable_IdlePriority=5,t.unstable_ImmediatePriority=1,t.unstable_LowPriority=4,t.unstable_NormalPriority=3,t.unstable_Profiling=null,t.unstable_UserBlockingPriority=2,t.unstable_cancelCallback=function(e){e.callback=null},t.unstable_continueExecution=function(){v||h||(v=!0,O(x))},t.unstable_forceFrameRate=function(e){0>e||125<e?console.error("forceFrameRate takes a positive int between 0 and 125, forcing frame rates higher than 125 fps is not supported"):T=0<e?Math.floor(1e3/e):5},t.unstable_getCurrentPriorityLevel=function(){return d},t.unstable_getFirstCallbackNode=function(){return r(s)},t.unstable_next=function(e){switch(d){case 1:case 2:case 3:var t=3;break;default:t=d}var n=d;d=t;try{return e()}finally{d=n}},t.unstable_pauseExecution=function(){},t.unstable_requestPaint=function(){},t.unstable_runWithPriority=function(e,t){switch(e){case 1:case 2:case 3:case 4:case 5:break;default:e=3}var n=d;d=e;try{return t()}finally{d=n}},t.unstable_scheduleCallback=function(e,a,i){var o=t.unstable_now();switch(i="object"==typeof i&&null!==i&&"number"==typeof(i=i.delay)&&0<i?o+i:o,e){case 1:var u=-1;break;case 2:u=250;break;case 5:u=1073741823;break;case 4:u=1e4;break;default:u=5e3}return e={id:f++,callback:a,priorityLevel:e,startTime:i,expirationTime:u=i+u,sortIndex:-1},i>o?(e.sortIndex=i,n(c,e),null===r(s)&&e===r(c)&&(g?(m(C),C=-1):g=!0,A(w,i-o))):(e.sortIndex=u,n(s,e),v||h||(v=!0,O(x))),e},t.unstable_shouldYield=N,t.unstable_wrapCallback=function(e){var t=d;return function(){var n=d;d=t;try{return e.apply(this,arguments)}finally{d=n}}}},840:(e,t,n)=>{"use strict";e.exports=n(53)}},t={};function n(r){var a=t[r];if(void 0!==a)return a.exports;var i=t[r]={id:r,loaded:!1,exports:{}};return e[r].call(i.exports,i,i.exports,n),i.loaded=!0,i.exports}n.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||new Function("return this")()}catch(e){if("object"==typeof window)return window}}(),n.nmd=e=>(e.paths=[],e.children||(e.children=[]),e),(()=>{"use strict";var e=n(294),t=n(935);const r=Math.sqrt(50),a=Math.sqrt(10),i=Math.sqrt(2);function o(e,t,n){const u=(t-e)/Math.max(0,n),l=Math.floor(Math.log10(u)),s=u/Math.pow(10,l),c=s>=r?10:s>=a?5:s>=i?2:1;let f,p,d;return l<0?(d=Math.pow(10,-l)/c,f=Math.round(e*d),p=Math.round(t*d),f/d<e&&++f,p/d>t&&--p,d=-d):(d=Math.pow(10,l)*c,f=Math.round(e/d),p=Math.round(t/d),f*d<e&&++f,p*d>t&&--p),p<f&&.5<=n&&n<2?o(e,t,2*n):[f,p,d]}function u(e,t,n){return o(e=+e,t=+t,n=+n)[2]}function l(e,t,n){n=+n;const r=(t=+t)<(e=+e),a=r?u(t,e,n):u(e,t,n);return(r?-1:1)*(a<0?1/-a:a)}function s(e,t){return null==e||null==t?NaN:e<t?-1:e>t?1:e>=t?0:NaN}function c(e,t){return null==e||null==t?NaN:t<e?-1:t>e?1:t>=e?0:NaN}function f(e){let t,n,r;function a(e,r,a=0,i=e.length){if(a<i){if(0!==t(r,r))return i;do{const t=a+i>>>1;n(e[t],r)<0?a=t+1:i=t}while(a<i)}return a}return 2!==e.length?(t=s,n=(t,n)=>s(e(t),n),r=(t,n)=>e(t)-n):(t=e===s||e===c?e:p,n=e,r=e),{left:a,center:function(e,t,n=0,i=e.length){const o=a(e,t,n,i-1);return o>n&&r(e[o-1],t)>-r(e[o],t)?o-1:o},right:function(e,r,a=0,i=e.length){if(a<i){if(0!==t(r,r))return i;do{const t=a+i>>>1;n(e[t],r)<=0?a=t+1:i=t}while(a<i)}return a}}}function p(){return 0}const d=f(s),h=d.right,v=(d.left,f((function(e){return null===e?NaN:+e})).center,h);function g(e,t,n){e.prototype=t.prototype=n,n.constructor=e}function y(e,t){var n=Object.create(e.prototype);for(var r in t)n[r]=t[r];return n}function m(){}var b=.7,_=1/b,w="\\s*([+-]?\\d+)\\s*",x="\\s*([+-]?(?:\\d*\\.)?\\d+(?:[eE][+-]?\\d+)?)\\s*",k="\\s*([+-]?(?:\\d*\\.)?\\d+(?:[eE][+-]?\\d+)?)%\\s*",S=/^#([0-9a-f]{3,8})$/,E=new RegExp(`^rgb\\(${w},${w},${w}\\)$`),C=new RegExp(`^rgb\\(${k},${k},${k}\\)$`),T=new RegExp(`^rgba\\(${w},${w},${w},${x}\\)$`),M=new RegExp(`^rgba\\(${k},${k},${k},${x}\\)$`),N=new RegExp(`^hsl\\(${x},${k},${k}\\)$`),P=new RegExp(`^hsla\\(${x},${k},${k},${x}\\)$`),z={aliceblue:15792383,antiquewhite:16444375,aqua:65535,aquamarine:8388564,azure:15794175,beige:16119260,bisque:16770244,black:0,blanchedalmond:16772045,blue:255,blueviolet:9055202,brown:10824234,burlywood:14596231,cadetblue:6266528,chartreuse:8388352,chocolate:13789470,coral:16744272,cornflowerblue:6591981,cornsilk:16775388,crimson:14423100,cyan:65535,darkblue:139,darkcyan:35723,darkgoldenrod:12092939,darkgray:11119017,darkgreen:25600,darkgrey:11119017,darkkhaki:12433259,darkmagenta:9109643,darkolivegreen:5597999,darkorange:16747520,darkorchid:10040012,darkred:9109504,darksalmon:15308410,darkseagreen:9419919,darkslateblue:4734347,darkslategray:3100495,darkslategrey:3100495,darkturquoise:52945,darkviolet:9699539,deeppink:16716947,deepskyblue:49151,dimgray:6908265,dimgrey:6908265,dodgerblue:2003199,firebrick:11674146,floralwhite:16775920,forestgreen:2263842,fuchsia:16711935,gainsboro:14474460,ghostwhite:16316671,gold:16766720,goldenrod:14329120,gray:8421504,green:32768,greenyellow:11403055,grey:8421504,honeydew:15794160,hotpink:16738740,indianred:13458524,indigo:4915330,ivory:16777200,khaki:15787660,lavender:15132410,lavenderblush:16773365,lawngreen:8190976,lemonchiffon:16775885,lightblue:11393254,lightcoral:15761536,lightcyan:14745599,lightgoldenrodyellow:16448210,lightgray:13882323,lightgreen:9498256,lightgrey:13882323,lightpink:16758465,lightsalmon:16752762,lightseagreen:2142890,lightskyblue:8900346,lightslategray:7833753,lightslategrey:7833753,lightsteelblue:11584734,lightyellow:16777184,lime:65280,limegreen:3329330,linen:16445670,magenta:16711935,maroon:8388608,mediumaquamarine:6737322,mediumblue:205,mediumorchid:12211667,mediumpurple:9662683,mediumseagreen:3978097,mediumslateblue:8087790,mediumspringgreen:64154,mediumturquoise:4772300,mediumvioletred:13047173,midnightblue:1644912,mintcream:16121850,mistyrose:16770273,moccasin:16770229,navajowhite:16768685,navy:128,oldlace:16643558,olive:8421376,olivedrab:7048739,orange:16753920,orangered:16729344,orchid:14315734,palegoldenrod:15657130,palegreen:10025880,paleturquoise:11529966,palevioletred:14381203,papayawhip:16773077,peachpuff:16767673,peru:13468991,pink:16761035,plum:14524637,powderblue:11591910,purple:8388736,rebeccapurple:6697881,red:16711680,rosybrown:12357519,royalblue:4286945,saddlebrown:9127187,salmon:16416882,sandybrown:16032864,seagreen:3050327,seashell:16774638,sienna:10506797,silver:12632256,skyblue:8900331,slateblue:6970061,slategray:7372944,slategrey:7372944,snow:16775930,springgreen:65407,steelblue:4620980,tan:13808780,teal:32896,thistle:14204888,tomato:16737095,turquoise:4251856,violet:15631086,wheat:16113331,white:16777215,whitesmoke:16119285,yellow:16776960,yellowgreen:10145074};function L(){return this.rgb().formatHex()}function O(){return this.rgb().formatRgb()}function A(e){var t,n;return e=(e+"").trim().toLowerCase(),(t=S.exec(e))?(n=t[1].length,t=parseInt(t[1],16),6===n?F(t):3===n?new j(t>>8&15|t>>4&240,t>>4&15|240&t,(15&t)<<4|15&t,1):8===n?D(t>>24&255,t>>16&255,t>>8&255,(255&t)/255):4===n?D(t>>12&15|t>>8&240,t>>8&15|t>>4&240,t>>4&15|240&t,((15&t)<<4|15&t)/255):null):(t=E.exec(e))?new j(t[1],t[2],t[3],1):(t=C.exec(e))?new j(255*t[1]/100,255*t[2]/100,255*t[3]/100,1):(t=T.exec(e))?D(t[1],t[2],t[3],t[4]):(t=M.exec(e))?D(255*t[1]/100,255*t[2]/100,255*t[3]/100,t[4]):(t=N.exec(e))?V(t[1],t[2]/100,t[3]/100,1):(t=P.exec(e))?V(t[1],t[2]/100,t[3]/100,t[4]):z.hasOwnProperty(e)?F(z[e]):"transparent"===e?new j(NaN,NaN,NaN,0):null}function F(e){return new j(e>>16&255,e>>8&255,255&e,1)}function D(e,t,n,r){return r<=0&&(e=t=n=NaN),new j(e,t,n,r)}function R(e,t,n,r){return 1===arguments.length?((a=e)instanceof m||(a=A(a)),a?new j((a=a.rgb()).r,a.g,a.b,a.opacity):new j):new j(e,t,n,null==r?1:r);var a}function j(e,t,n,r){this.r=+e,this.g=+t,this.b=+n,this.opacity=+r}function U(){return`#${W(this.r)}${W(this.g)}${W(this.b)}`}function I(){const e=$(this.opacity);return`${1===e?"rgb(":"rgba("}${B(this.r)}, ${B(this.g)}, ${B(this.b)}${1===e?")":`, ${e})`}`}function $(e){return isNaN(e)?1:Math.max(0,Math.min(1,e))}function B(e){return Math.max(0,Math.min(255,Math.round(e)||0))}function W(e){return((e=B(e))<16?"0":"")+e.toString(16)}function V(e,t,n,r){return r<=0?e=t=n=NaN:n<=0||n>=1?e=t=NaN:t<=0&&(e=NaN),new Q(e,t,n,r)}function H(e){if(e instanceof Q)return new Q(e.h,e.s,e.l,e.opacity);if(e instanceof m||(e=A(e)),!e)return new Q;if(e instanceof Q)return e;var t=(e=e.rgb()).r/255,n=e.g/255,r=e.b/255,a=Math.min(t,n,r),i=Math.max(t,n,r),o=NaN,u=i-a,l=(i+a)/2;return u?(o=t===i?(n-r)/u+6*(n<r):n===i?(r-t)/u+2:(t-n)/u+4,u/=l<.5?i+a:2-i-a,o*=60):u=l>0&&l<1?0:o,new Q(o,u,l,e.opacity)}function q(e,t,n,r){return 1===arguments.length?H(e):new Q(e,t,n,null==r?1:r)}function Q(e,t,n,r){this.h=+e,this.s=+t,this.l=+n,this.opacity=+r}function Y(e){return(e=(e||0)%360)<0?e+360:e}function G(e){return Math.max(0,Math.min(1,e||0))}function K(e,t,n){return 255*(e<60?t+(n-t)*e/60:e<180?n:e<240?t+(n-t)*(240-e)/60:t)}function Z(e,t,n,r,a){var i=e*e,o=i*e;return((1-3*e+3*i-o)*t+(4-6*i+3*o)*n+(1+3*e+3*i-3*o)*r+o*a)/6}g(m,A,{copy(e){return Object.assign(new this.constructor,this,e)},displayable(){return this.rgb().displayable()},hex:L,formatHex:L,formatHex8:function(){return this.rgb().formatHex8()},formatHsl:function(){return H(this).formatHsl()},formatRgb:O,toString:O}),g(j,R,y(m,{brighter(e){return e=null==e?_:Math.pow(_,e),new j(this.r*e,this.g*e,this.b*e,this.opacity)},darker(e){return e=null==e?b:Math.pow(b,e),new j(this.r*e,this.g*e,this.b*e,this.opacity)},rgb(){return this},clamp(){return new j(B(this.r),B(this.g),B(this.b),$(this.opacity))},displayable(){return-.5<=this.r&&this.r<255.5&&-.5<=this.g&&this.g<255.5&&-.5<=this.b&&this.b<255.5&&0<=this.opacity&&this.opacity<=1},hex:U,formatHex:U,formatHex8:function(){return`#${W(this.r)}${W(this.g)}${W(this.b)}${W(255*(isNaN(this.opacity)?1:this.opacity))}`},formatRgb:I,toString:I})),g(Q,q,y(m,{brighter(e){return e=null==e?_:Math.pow(_,e),new Q(this.h,this.s,this.l*e,this.opacity)},darker(e){return e=null==e?b:Math.pow(b,e),new Q(this.h,this.s,this.l*e,this.opacity)},rgb(){var e=this.h%360+360*(this.h<0),t=isNaN(e)||isNaN(this.s)?0:this.s,n=this.l,r=n+(n<.5?n:1-n)*t,a=2*n-r;return new j(K(e>=240?e-240:e+120,a,r),K(e,a,r),K(e<120?e+240:e-120,a,r),this.opacity)},clamp(){return new Q(Y(this.h),G(this.s),G(this.l),$(this.opacity))},displayable(){return(0<=this.s&&this.s<=1||isNaN(this.s))&&0<=this.l&&this.l<=1&&0<=this.opacity&&this.opacity<=1},formatHsl(){const e=$(this.opacity);return`${1===e?"hsl(":"hsla("}${Y(this.h)}, ${100*G(this.s)}%, ${100*G(this.l)}%${1===e?")":`, ${e})`}`}}));const X=e=>()=>e;function J(e,t){var n=t-e;return n?function(e,t){return function(n){return e+n*t}}(e,n):X(isNaN(e)?t:e)}const ee=function e(t){var n=function(e){return 1==(e=+e)?J:function(t,n){return n-t?function(e,t,n){return e=Math.pow(e,n),t=Math.pow(t,n)-e,n=1/n,function(r){return Math.pow(e+r*t,n)}}(t,n,e):X(isNaN(t)?n:t)}}(t);function r(e,t){var r=n((e=R(e)).r,(t=R(t)).r),a=n(e.g,t.g),i=n(e.b,t.b),o=J(e.opacity,t.opacity);return function(t){return e.r=r(t),e.g=a(t),e.b=i(t),e.opacity=o(t),e+""}}return r.gamma=e,r}(1);function te(e){return function(t){var n,r,a=t.length,i=new Array(a),o=new Array(a),u=new Array(a);for(n=0;n<a;++n)r=R(t[n]),i[n]=r.r||0,o[n]=r.g||0,u[n]=r.b||0;return i=e(i),o=e(o),u=e(u),r.opacity=1,function(e){return r.r=i(e),r.g=o(e),r.b=u(e),r+""}}}function ne(e,t){var n,r=t?t.length:0,a=e?Math.min(r,e.length):0,i=new Array(a),o=new Array(r);for(n=0;n<a;++n)i[n]=ce(e[n],t[n]);for(;n<r;++n)o[n]=t[n];return function(e){for(n=0;n<a;++n)o[n]=i[n](e);return o}}function re(e,t){var n=new Date;return e=+e,t=+t,function(r){return n.setTime(e*(1-r)+t*r),n}}function ae(e,t){return e=+e,t=+t,function(n){return e*(1-n)+t*n}}function ie(e,t){var n,r={},a={};for(n in null!==e&&"object"==typeof e||(e={}),null!==t&&"object"==typeof t||(t={}),t)n in e?r[n]=ce(e[n],t[n]):a[n]=t[n];return function(e){for(n in r)a[n]=r[n](e);return a}}te((function(e){var t=e.length-1;return function(n){var r=n<=0?n=0:n>=1?(n=1,t-1):Math.floor(n*t),a=e[r],i=e[r+1],o=r>0?e[r-1]:2*a-i,u=r<t-1?e[r+2]:2*i-a;return Z((n-r/t)*t,o,a,i,u)}})),te((function(e){var t=e.length;return function(n){var r=Math.floor(((n%=1)<0?++n:n)*t),a=e[(r+t-1)%t],i=e[r%t],o=e[(r+1)%t],u=e[(r+2)%t];return Z((n-r/t)*t,a,i,o,u)}}));var oe=/[-+]?(?:\d+\.?\d*|\.?\d+)(?:[eE][-+]?\d+)?/g,ue=new RegExp(oe.source,"g");function le(e,t){var n,r,a,i=oe.lastIndex=ue.lastIndex=0,o=-1,u=[],l=[];for(e+="",t+="";(n=oe.exec(e))&&(r=ue.exec(t));)(a=r.index)>i&&(a=t.slice(i,a),u[o]?u[o]+=a:u[++o]=a),(n=n[0])===(r=r[0])?u[o]?u[o]+=r:u[++o]=r:(u[++o]=null,l.push({i:o,x:ae(n,r)})),i=ue.lastIndex;return i<t.length&&(a=t.slice(i),u[o]?u[o]+=a:u[++o]=a),u.length<2?l[0]?function(e){return function(t){return e(t)+""}}(l[0].x):function(e){return function(){return e}}(t):(t=l.length,function(e){for(var n,r=0;r<t;++r)u[(n=l[r]).i]=n.x(e);return u.join("")})}function se(e,t){t||(t=[]);var n,r=e?Math.min(t.length,e.length):0,a=t.slice();return function(i){for(n=0;n<r;++n)a[n]=e[n]*(1-i)+t[n]*i;return a}}function ce(e,t){var n,r,a=typeof t;return null==t||"boolean"===a?X(t):("number"===a?ae:"string"===a?(n=A(t))?(t=n,ee):le:t instanceof A?ee:t instanceof Date?re:(r=t,!ArrayBuffer.isView(r)||r instanceof DataView?Array.isArray(t)?ne:"function"!=typeof t.valueOf&&"function"!=typeof t.toString||isNaN(t)?ie:ae:se))(e,t)}function fe(e,t){return e=+e,t=+t,function(n){return Math.round(e*(1-n)+t*n)}}function pe(e){return+e}var de=[0,1];function he(e){return e}function ve(e,t){return(t-=e=+e)?function(n){return(n-e)/t}:(n=isNaN(t)?NaN:.5,function(){return n});var n}function ge(e,t,n){var r=e[0],a=e[1],i=t[0],o=t[1];return a<r?(r=ve(a,r),i=n(o,i)):(r=ve(r,a),i=n(i,o)),function(e){return i(r(e))}}function ye(e,t,n){var r=Math.min(e.length,t.length)-1,a=new Array(r),i=new Array(r),o=-1;for(e[r]<e[0]&&(e=e.slice().reverse(),t=t.slice().reverse());++o<r;)a[o]=ve(e[o],e[o+1]),i[o]=n(t[o],t[o+1]);return function(t){var n=v(e,t,1,r)-1;return i[n](a[n](t))}}function me(e,t){return t.domain(e.domain()).range(e.range()).interpolate(e.interpolate()).clamp(e.clamp()).unknown(e.unknown())}function be(){return function(){var e,t,n,r,a,i,o=de,u=de,l=ce,s=he;function c(){var e,t,n,l=Math.min(o.length,u.length);return s!==he&&(e=o[0],t=o[l-1],e>t&&(n=e,e=t,t=n),s=function(n){return Math.max(e,Math.min(t,n))}),r=l>2?ye:ge,a=i=null,f}function f(t){return null==t||isNaN(t=+t)?n:(a||(a=r(o.map(e),u,l)))(e(s(t)))}return f.invert=function(n){return s(t((i||(i=r(u,o.map(e),ae)))(n)))},f.domain=function(e){return arguments.length?(o=Array.from(e,pe),c()):o.slice()},f.range=function(e){return arguments.length?(u=Array.from(e),c()):u.slice()},f.rangeRound=function(e){return u=Array.from(e),l=fe,c()},f.clamp=function(e){return arguments.length?(s=!!e||he,c()):s!==he},f.interpolate=function(e){return arguments.length?(l=e,c()):l},f.unknown=function(e){return arguments.length?(n=e,f):n},function(n,r){return e=n,t=r,c()}}()(he,he)}function _e(e,t){switch(arguments.length){case 0:break;case 1:this.range(e);break;default:this.range(t).domain(e)}return this}var we,xe=/^(?:(.)?([<>=^]))?([+\-( ])?([$#])?(0)?(\d+)?(,)?(\.\d+)?(~)?([a-z%])?$/i;function ke(e){if(!(t=xe.exec(e)))throw new Error("invalid format: "+e);var t;return new Se({fill:t[1],align:t[2],sign:t[3],symbol:t[4],zero:t[5],width:t[6],comma:t[7],precision:t[8]&&t[8].slice(1),trim:t[9],type:t[10]})}function Se(e){this.fill=void 0===e.fill?" ":e.fill+"",this.align=void 0===e.align?">":e.align+"",this.sign=void 0===e.sign?"-":e.sign+"",this.symbol=void 0===e.symbol?"":e.symbol+"",this.zero=!!e.zero,this.width=void 0===e.width?void 0:+e.width,this.comma=!!e.comma,this.precision=void 0===e.precision?void 0:+e.precision,this.trim=!!e.trim,this.type=void 0===e.type?"":e.type+""}function Ee(e,t){if((n=(e=t?e.toExponential(t-1):e.toExponential()).indexOf("e"))<0)return null;var n,r=e.slice(0,n);return[r.length>1?r[0]+r.slice(2):r,+e.slice(n+1)]}function Ce(e){return(e=Ee(Math.abs(e)))?e[1]:NaN}function Te(e,t){var n=Ee(e,t);if(!n)return e+"";var r=n[0],a=n[1];return a<0?"0."+new Array(-a).join("0")+r:r.length>a+1?r.slice(0,a+1)+"."+r.slice(a+1):r+new Array(a-r.length+2).join("0")}ke.prototype=Se.prototype,Se.prototype.toString=function(){return this.fill+this.align+this.sign+this.symbol+(this.zero?"0":"")+(void 0===this.width?"":Math.max(1,0|this.width))+(this.comma?",":"")+(void 0===this.precision?"":"."+Math.max(0,0|this.precision))+(this.trim?"~":"")+this.type};const Me={"%":(e,t)=>(100*e).toFixed(t),b:e=>Math.round(e).toString(2),c:e=>e+"",d:function(e){return Math.abs(e=Math.round(e))>=1e21?e.toLocaleString("en").replace(/,/g,""):e.toString(10)},e:(e,t)=>e.toExponential(t),f:(e,t)=>e.toFixed(t),g:(e,t)=>e.toPrecision(t),o:e=>Math.round(e).toString(8),p:(e,t)=>Te(100*e,t),r:Te,s:function(e,t){var n=Ee(e,t);if(!n)return e+"";var r=n[0],a=n[1],i=a-(we=3*Math.max(-8,Math.min(8,Math.floor(a/3))))+1,o=r.length;return i===o?r:i>o?r+new Array(i-o+1).join("0"):i>0?r.slice(0,i)+"."+r.slice(i):"0."+new Array(1-i).join("0")+Ee(e,Math.max(0,t+i-1))[0]},X:e=>Math.round(e).toString(16).toUpperCase(),x:e=>Math.round(e).toString(16)};function Ne(e){return e}var Pe,ze,Le,Oe=Array.prototype.map,Ae=["y","z","a","f","p","n","µ","m","","k","M","G","T","P","E","Z","Y"];function Fe(e){var t=e.domain;return e.ticks=function(e){var n=t();return function(e,t,n){if(!((n=+n)>0))return[];if((e=+e)==(t=+t))return[e];const r=t<e,[a,i,u]=r?o(t,e,n):o(e,t,n);if(!(i>=a))return[];const l=i-a+1,s=new Array(l);if(r)if(u<0)for(let e=0;e<l;++e)s[e]=(i-e)/-u;else for(let e=0;e<l;++e)s[e]=(i-e)*u;else if(u<0)for(let e=0;e<l;++e)s[e]=(a+e)/-u;else for(let e=0;e<l;++e)s[e]=(a+e)*u;return s}(n[0],n[n.length-1],null==e?10:e)},e.tickFormat=function(e,n){var r=t();return function(e,t,n,r){var a,i=l(e,t,n);switch((r=ke(null==r?",f":r)).type){case"s":var o=Math.max(Math.abs(e),Math.abs(t));return null!=r.precision||isNaN(a=function(e,t){return Math.max(0,3*Math.max(-8,Math.min(8,Math.floor(Ce(t)/3)))-Ce(Math.abs(e)))}(i,o))||(r.precision=a),Le(r,o);case"":case"e":case"g":case"p":case"r":null!=r.precision||isNaN(a=function(e,t){return e=Math.abs(e),t=Math.abs(t)-e,Math.max(0,Ce(t)-Ce(e))+1}(i,Math.max(Math.abs(e),Math.abs(t))))||(r.precision=a-("e"===r.type));break;case"f":case"%":null!=r.precision||isNaN(a=function(e){return Math.max(0,-Ce(Math.abs(e)))}(i))||(r.precision=a-2*("%"===r.type))}return ze(r)}(r[0],r[r.length-1],null==e?10:e,n)},e.nice=function(n){null==n&&(n=10);var r,a,i=t(),o=0,l=i.length-1,s=i[o],c=i[l],f=10;for(c<s&&(a=s,s=c,c=a,a=o,o=l,l=a);f-- >0;){if((a=u(s,c,n))===r)return i[o]=s,i[l]=c,t(i);if(a>0)s=Math.floor(s/a)*a,c=Math.ceil(c/a)*a;else{if(!(a<0))break;s=Math.ceil(s*a)/a,c=Math.floor(c*a)/a}r=a}return e},e}function De(){var e=be();return e.copy=function(){return me(e,De())},_e.apply(e,arguments),Fe(e)}Pe=function(e){var t,n,r=void 0===e.grouping||void 0===e.thousands?Ne:(t=Oe.call(e.grouping,Number),n=e.thousands+"",function(e,r){for(var a=e.length,i=[],o=0,u=t[0],l=0;a>0&&u>0&&(l+u+1>r&&(u=Math.max(1,r-l)),i.push(e.substring(a-=u,a+u)),!((l+=u+1)>r));)u=t[o=(o+1)%t.length];return i.reverse().join(n)}),a=void 0===e.currency?"":e.currency[0]+"",i=void 0===e.currency?"":e.currency[1]+"",o=void 0===e.decimal?".":e.decimal+"",u=void 0===e.numerals?Ne:function(e){return function(t){return t.replace(/[0-9]/g,(function(t){return e[+t]}))}}(Oe.call(e.numerals,String)),l=void 0===e.percent?"%":e.percent+"",s=void 0===e.minus?"−":e.minus+"",c=void 0===e.nan?"NaN":e.nan+"";function f(e){var t=(e=ke(e)).fill,n=e.align,f=e.sign,p=e.symbol,d=e.zero,h=e.width,v=e.comma,g=e.precision,y=e.trim,m=e.type;"n"===m?(v=!0,m="g"):Me[m]||(void 0===g&&(g=12),y=!0,m="g"),(d||"0"===t&&"="===n)&&(d=!0,t="0",n="=");var b="$"===p?a:"#"===p&&/[boxX]/.test(m)?"0"+m.toLowerCase():"",_="$"===p?i:/[%p]/.test(m)?l:"",w=Me[m],x=/[defgprs%]/.test(m);function k(e){var a,i,l,p=b,k=_;if("c"===m)k=w(e)+k,e="";else{var S=(e=+e)<0||1/e<0;if(e=isNaN(e)?c:w(Math.abs(e),g),y&&(e=function(e){e:for(var t,n=e.length,r=1,a=-1;r<n;++r)switch(e[r]){case".":a=t=r;break;case"0":0===a&&(a=r),t=r;break;default:if(!+e[r])break e;a>0&&(a=0)}return a>0?e.slice(0,a)+e.slice(t+1):e}(e)),S&&0==+e&&"+"!==f&&(S=!1),p=(S?"("===f?f:s:"-"===f||"("===f?"":f)+p,k=("s"===m?Ae[8+we/3]:"")+k+(S&&"("===f?")":""),x)for(a=-1,i=e.length;++a<i;)if(48>(l=e.charCodeAt(a))||l>57){k=(46===l?o+e.slice(a+1):e.slice(a))+k,e=e.slice(0,a);break}}v&&!d&&(e=r(e,1/0));var E=p.length+e.length+k.length,C=E<h?new Array(h-E+1).join(t):"";switch(v&&d&&(e=r(C+e,C.length?h-k.length:1/0),C=""),n){case"<":e=p+e+k+C;break;case"=":e=p+C+e+k;break;case"^":e=C.slice(0,E=C.length>>1)+p+e+k+C.slice(E);break;default:e=C+p+e+k}return u(e)}return g=void 0===g?6:/[gprs]/.test(m)?Math.max(1,Math.min(21,g)):Math.max(0,Math.min(20,g)),k.toString=function(){return e+""},k}return{format:f,formatPrefix:function(e,t){var n=f(((e=ke(e)).type="f",e)),r=3*Math.max(-8,Math.min(8,Math.floor(Ce(t)/3))),a=Math.pow(10,-r),i=Ae[8+r/3];return function(e){return n(a*e)+i}}}}({thousands:",",grouping:[3],currency:["$",""]}),ze=Pe.format,Le=Pe.formatPrefix;var Re=n(486);const je={colors:{RdBu:["rgb(255, 13, 87)","rgb(30, 136, 229)"],GnPR:["rgb(24, 196, 93)","rgb(124, 82, 255)"],CyPU:["#0099C6","#990099"],PkYg:["#DD4477","#66AA00"],DrDb:["#B82E2E","#316395"],LpLb:["#994499","#22AA99"],YlDp:["#AAAA11","#6633CC"],OrId:["#E67300","#3E0099"]},gray:"#777"};function Ue(e){return Ue="function"==typeof Symbol&&"symbol"==typeof Symbol.iterator?function(e){return typeof e}:function(e){return e&&"function"==typeof Symbol&&e.constructor===Symbol&&e!==Symbol.prototype?"symbol":typeof e},Ue(e)}function Ie(e,t){for(var n=0;n<t.length;n++){var r=t[n];r.enumerable=r.enumerable||!1,r.configurable=!0,"value"in r&&(r.writable=!0),Object.defineProperty(e,(void 0,a=function(e,t){if("object"!==Ue(e)||null===e)return e;var n=e[Symbol.toPrimitive];if(void 0!==n){var r=n.call(e,"string");if("object"!==Ue(r))return r;throw new TypeError("@@toPrimitive must return a primitive value.")}return String(e)}(r.key),"symbol"===Ue(a)?a:String(a)),r)}var a}function $e(e,t){return $e=Object.setPrototypeOf?Object.setPrototypeOf.bind():function(e,t){return e.__proto__=t,e},$e(e,t)}function Be(e){if(void 0===e)throw new ReferenceError("this hasn't been initialised - super() hasn't been called");return e}function We(e){return We=Object.setPrototypeOf?Object.getPrototypeOf.bind():function(e){return e.__proto__||Object.getPrototypeOf(e)},We(e)}var Ve=function(t){!function(e,t){if("function"!=typeof t&&null!==t)throw new TypeError("Super expression must either be null or a function");e.prototype=Object.create(t&&t.prototype,{constructor:{value:e,writable:!0,configurable:!0}}),Object.defineProperty(e,"prototype",{writable:!1}),t&&$e(e,t)}(u,t);var n,r,a,i,o=(a=u,i=function(){if("undefined"==typeof Reflect||!Reflect.construct)return!1;if(Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],(function(){}))),!0}catch(e){return!1}}(),function(){var e,t=We(a);if(i){var n=We(this).constructor;e=Reflect.construct(t,arguments,n)}else e=t.apply(this,arguments);return function(e,t){if(t&&("object"===Ue(t)||"function"==typeof t))return t;if(void 0!==t)throw new TypeError("Derived constructors may only return object or undefined");return Be(e)}(this,e)});function u(){var e;return function(e,t){if(!(e instanceof t))throw new TypeError("Cannot call a class as a function")}(this,u),(e=o.call(this)).width=100,window.lastSimpleListInstance=Be(e),e.effectFormat=ze(".2"),e}return n=u,(r=[{key:"render",value:function(){var t=this,n=void 0;"string"==typeof this.props.plot_cmap?this.props.plot_cmap in je.colors?n=je.colors[this.props.plot_cmap]:(console.log("Invalid color map name, reverting to default."),n=je.colors.RdBu):Array.isArray(this.props.plot_cmap)&&(n=this.props.plot_cmap),console.log(this.props.features,this.props.features),this.scale=De().domain([0,(0,Re.max)((0,Re.map)(this.props.features,(function(e){return Math.abs(e.effect)})))]).range([0,this.width]);var r=(0,Re.reverse)((0,Re.sortBy)(Object.keys(this.props.features),(function(e){return Math.abs(t.props.features[e].effect)}))).map((function(r){var a,i,o=t.props.features[r],u=t.props.featureNames[r],l={width:t.scale(Math.abs(o.effect)),height:"20px",background:o.effect<0?n[0]:n[1],display:"inline-block"},s={lineHeight:"20px",display:"inline-block",width:t.width+40,verticalAlign:"top",marginRight:"5px",textAlign:"right"},c={lineHeight:"20px",display:"inline-block",width:t.width+40,verticalAlign:"top",marginLeft:"5px"};return o.effect<0?(i=e.createElement("span",{style:c},u),s.width=40+t.width-t.scale(Math.abs(o.effect)),s.textAlign="right",s.color="#999",s.fontSize="13px",a=e.createElement("span",{style:s},t.effectFormat(o.effect))):(s.textAlign="right",a=e.createElement("span",{style:s},u),c.width=40,c.textAlign="left",c.color="#999",c.fontSize="13px",i=e.createElement("span",{style:c},t.effectFormat(o.effect))),e.createElement("div",{key:r,style:{marginTop:"2px"}},a,e.createElement("div",{style:l}),i)}));return e.createElement("span",null,r)}}])&&Ie(n.prototype,r),Object.defineProperty(n,"prototype",{writable:!1}),u}(e.Component);Ve.defaultProps={plot_cmap:"RdBu"};const He=Ve;function qe(){}function Qe(e){return null==e?qe:function(){return this.querySelector(e)}}function Ye(){return[]}function Ge(e){return function(t){return t.matches(e)}}var Ke=Array.prototype.find;function Ze(){return this.firstElementChild}var Xe=Array.prototype.filter;function Je(){return Array.from(this.children)}function et(e){return new Array(e.length)}function tt(e,t){this.ownerDocument=e.ownerDocument,this.namespaceURI=e.namespaceURI,this._next=null,this._parent=e,this.__data__=t}function nt(e,t,n,r,a,i){for(var o,u=0,l=t.length,s=i.length;u<s;++u)(o=t[u])?(o.__data__=i[u],r[u]=o):n[u]=new tt(e,i[u]);for(;u<l;++u)(o=t[u])&&(a[u]=o)}function rt(e,t,n,r,a,i,o){var u,l,s,c=new Map,f=t.length,p=i.length,d=new Array(f);for(u=0;u<f;++u)(l=t[u])&&(d[u]=s=o.call(l,l.__data__,u,t)+"",c.has(s)?a[u]=l:c.set(s,l));for(u=0;u<p;++u)s=o.call(e,i[u],u,i)+"",(l=c.get(s))?(r[u]=l,l.__data__=i[u],c.delete(s)):n[u]=new tt(e,i[u]);for(u=0;u<f;++u)(l=t[u])&&c.get(d[u])===l&&(a[u]=l)}function at(e){return e.__data__}function it(e){return"object"==typeof e&&"length"in e?e:Array.from(e)}function ot(e,t){return e<t?-1:e>t?1:e>=t?0:NaN}tt.prototype={constructor:tt,appendChild:function(e){return this._parent.insertBefore(e,this._next)},insertBefore:function(e,t){return this._parent.insertBefore(e,t)},querySelector:function(e){return this._parent.querySelector(e)},querySelectorAll:function(e){return this._parent.querySelectorAll(e)}};var ut="http://www.w3.org/1999/xhtml";const lt={svg:"http://www.w3.org/2000/svg",xhtml:ut,xlink:"http://www.w3.org/1999/xlink",xml:"http://www.w3.org/XML/1998/namespace",xmlns:"http://www.w3.org/2000/xmlns/"};function st(e){var t=e+="",n=t.indexOf(":");return n>=0&&"xmlns"!==(t=e.slice(0,n))&&(e=e.slice(n+1)),lt.hasOwnProperty(t)?{space:lt[t],local:e}:e}function ct(e){return function(){this.removeAttribute(e)}}function ft(e){return function(){this.removeAttributeNS(e.space,e.local)}}function pt(e,t){return function(){this.setAttribute(e,t)}}function dt(e,t){return function(){this.setAttributeNS(e.space,e.local,t)}}function ht(e,t){return function(){var n=t.apply(this,arguments);null==n?this.removeAttribute(e):this.setAttribute(e,n)}}function vt(e,t){return function(){var n=t.apply(this,arguments);null==n?this.removeAttributeNS(e.space,e.local):this.setAttributeNS(e.space,e.local,n)}}function gt(e){return e.ownerDocument&&e.ownerDocument.defaultView||e.document&&e||e.defaultView}function yt(e){return function(){this.style.removeProperty(e)}}function mt(e,t,n){return function(){this.style.setProperty(e,t,n)}}function bt(e,t,n){return function(){var r=t.apply(this,arguments);null==r?this.style.removeProperty(e):this.style.setProperty(e,r,n)}}function _t(e){return function(){delete this[e]}}function wt(e,t){return function(){this[e]=t}}function xt(e,t){return function(){var n=t.apply(this,arguments);null==n?delete this[e]:this[e]=n}}function kt(e){return e.trim().split(/^|\s+/)}function St(e){return e.classList||new Et(e)}function Et(e){this._node=e,this._names=kt(e.getAttribute("class")||"")}function Ct(e,t){for(var n=St(e),r=-1,a=t.length;++r<a;)n.add(t[r])}function Tt(e,t){for(var n=St(e),r=-1,a=t.length;++r<a;)n.remove(t[r])}function Mt(e){return function(){Ct(this,e)}}function Nt(e){return function(){Tt(this,e)}}function Pt(e,t){return function(){(t.apply(this,arguments)?Ct:Tt)(this,e)}}function zt(){this.textContent=""}function Lt(e){return function(){this.textContent=e}}function Ot(e){return function(){var t=e.apply(this,arguments);this.textContent=null==t?"":t}}function At(){this.innerHTML=""}function Ft(e){return function(){this.innerHTML=e}}function Dt(e){return function(){var t=e.apply(this,arguments);this.innerHTML=null==t?"":t}}function Rt(){this.nextSibling&&this.parentNode.appendChild(this)}function jt(){this.previousSibling&&this.parentNode.insertBefore(this,this.parentNode.firstChild)}function Ut(e){return function(){var t=this.ownerDocument,n=this.namespaceURI;return n===ut&&t.documentElement.namespaceURI===ut?t.createElement(e):t.createElementNS(n,e)}}function It(e){return function(){return this.ownerDocument.createElementNS(e.space,e.local)}}function $t(e){var t=st(e);return(t.local?It:Ut)(t)}function Bt(){return null}function Wt(){var e=this.parentNode;e&&e.removeChild(this)}function Vt(){var e=this.cloneNode(!1),t=this.parentNode;return t?t.insertBefore(e,this.nextSibling):e}function Ht(){var e=this.cloneNode(!0),t=this.parentNode;return t?t.insertBefore(e,this.nextSibling):e}function qt(e){return function(){var t=this.__on;if(t){for(var n,r=0,a=-1,i=t.length;r<i;++r)n=t[r],e.type&&n.type!==e.type||n.name!==e.name?t[++a]=n:this.removeEventListener(n.type,n.listener,n.options);++a?t.length=a:delete this.__on}}}function Qt(e,t,n){return function(){var r,a=this.__on,i=function(e){return function(t){e.call(this,t,this.__data__)}}(t);if(a)for(var o=0,u=a.length;o<u;++o)if((r=a[o]).type===e.type&&r.name===e.name)return this.removeEventListener(r.type,r.listener,r.options),this.addEventListener(r.type,r.listener=i,r.options=n),void(r.value=t);this.addEventListener(e.type,i,n),r={type:e.type,name:e.name,value:t,listener:i,options:n},a?a.push(r):this.__on=[r]}}function Yt(e,t,n){var r=gt(e),a=r.CustomEvent;"function"==typeof a?a=new a(t,n):(a=r.document.createEvent("Event"),n?(a.initEvent(t,n.bubbles,n.cancelable),a.detail=n.detail):a.initEvent(t,!1,!1)),e.dispatchEvent(a)}function Gt(e,t){return function(){return Yt(this,e,t)}}function Kt(e,t){return function(){return Yt(this,e,t.apply(this,arguments))}}Et.prototype={add:function(e){this._names.indexOf(e)<0&&(this._names.push(e),this._node.setAttribute("class",this._names.join(" ")))},remove:function(e){var t=this._names.indexOf(e);t>=0&&(this._names.splice(t,1),this._node.setAttribute("class",this._names.join(" ")))},contains:function(e){return this._names.indexOf(e)>=0}};var Zt=[null];function Xt(e,t){this._groups=e,this._parents=t}function Jt(e){return"string"==typeof e?new Xt([[document.querySelector(e)]],[document.documentElement]):new Xt([[e]],Zt)}function en(e){return e}Xt.prototype=function(){return new Xt([[document.documentElement]],Zt)}.prototype={constructor:Xt,select:function(e){"function"!=typeof e&&(e=Qe(e));for(var t=this._groups,n=t.length,r=new Array(n),a=0;a<n;++a)for(var i,o,u=t[a],l=u.length,s=r[a]=new Array(l),c=0;c<l;++c)(i=u[c])&&(o=e.call(i,i.__data__,c,u))&&("__data__"in i&&(o.__data__=i.__data__),s[c]=o);return new Xt(r,this._parents)},selectAll:function(e){e="function"==typeof e?function(e){return function(){return null==(t=e.apply(this,arguments))?[]:Array.isArray(t)?t:Array.from(t);var t}}(e):function(e){return null==e?Ye:function(){return this.querySelectorAll(e)}}(e);for(var t=this._groups,n=t.length,r=[],a=[],i=0;i<n;++i)for(var o,u=t[i],l=u.length,s=0;s<l;++s)(o=u[s])&&(r.push(e.call(o,o.__data__,s,u)),a.push(o));return new Xt(r,a)},selectChild:function(e){return this.select(null==e?Ze:function(e){return function(){return Ke.call(this.children,e)}}("function"==typeof e?e:Ge(e)))},selectChildren:function(e){return this.selectAll(null==e?Je:function(e){return function(){return Xe.call(this.children,e)}}("function"==typeof e?e:Ge(e)))},filter:function(e){"function"!=typeof e&&(e=function(e){return function(){return this.matches(e)}}(e));for(var t=this._groups,n=t.length,r=new Array(n),a=0;a<n;++a)for(var i,o=t[a],u=o.length,l=r[a]=[],s=0;s<u;++s)(i=o[s])&&e.call(i,i.__data__,s,o)&&l.push(i);return new Xt(r,this._parents)},data:function(e,t){if(!arguments.length)return Array.from(this,at);var n,r=t?rt:nt,a=this._parents,i=this._groups;"function"!=typeof e&&(n=e,e=function(){return n});for(var o=i.length,u=new Array(o),l=new Array(o),s=new Array(o),c=0;c<o;++c){var f=a[c],p=i[c],d=p.length,h=it(e.call(f,f&&f.__data__,c,a)),v=h.length,g=l[c]=new Array(v),y=u[c]=new Array(v);r(f,p,g,y,s[c]=new Array(d),h,t);for(var m,b,_=0,w=0;_<v;++_)if(m=g[_]){for(_>=w&&(w=_+1);!(b=y[w])&&++w<v;);m._next=b||null}}return(u=new Xt(u,a))._enter=l,u._exit=s,u},enter:function(){return new Xt(this._enter||this._groups.map(et),this._parents)},exit:function(){return new Xt(this._exit||this._groups.map(et),this._parents)},join:function(e,t,n){var r=this.enter(),a=this,i=this.exit();return"function"==typeof e?(r=e(r))&&(r=r.selection()):r=r.append(e+""),null!=t&&(a=t(a))&&(a=a.selection()),null==n?i.remove():n(i),r&&a?r.merge(a).order():a},merge:function(e){for(var t=e.selection?e.selection():e,n=this._groups,r=t._groups,a=n.length,i=r.length,o=Math.min(a,i),u=new Array(a),l=0;l<o;++l)for(var s,c=n[l],f=r[l],p=c.length,d=u[l]=new Array(p),h=0;h<p;++h)(s=c[h]||f[h])&&(d[h]=s);for(;l<a;++l)u[l]=n[l];return new Xt(u,this._parents)},selection:function(){return this},order:function(){for(var e=this._groups,t=-1,n=e.length;++t<n;)for(var r,a=e[t],i=a.length-1,o=a[i];--i>=0;)(r=a[i])&&(o&&4^r.compareDocumentPosition(o)&&o.parentNode.insertBefore(r,o),o=r);return this},sort:function(e){function t(t,n){return t&&n?e(t.__data__,n.__data__):!t-!n}e||(e=ot);for(var n=this._groups,r=n.length,a=new Array(r),i=0;i<r;++i){for(var o,u=n[i],l=u.length,s=a[i]=new Array(l),c=0;c<l;++c)(o=u[c])&&(s[c]=o);s.sort(t)}return new Xt(a,this._parents).order()},call:function(){var e=arguments[0];return arguments[0]=this,e.apply(null,arguments),this},nodes:function(){return Array.from(this)},node:function(){for(var e=this._groups,t=0,n=e.length;t<n;++t)for(var r=e[t],a=0,i=r.length;a<i;++a){var o=r[a];if(o)return o}return null},size:function(){let e=0;for(const t of this)++e;return e},empty:function(){return!this.node()},each:function(e){for(var t=this._groups,n=0,r=t.length;n<r;++n)for(var a,i=t[n],o=0,u=i.length;o<u;++o)(a=i[o])&&e.call(a,a.__data__,o,i);return this},attr:function(e,t){var n=st(e);if(arguments.length<2){var r=this.node();return n.local?r.getAttributeNS(n.space,n.local):r.getAttribute(n)}return this.each((null==t?n.local?ft:ct:"function"==typeof t?n.local?vt:ht:n.local?dt:pt)(n,t))},style:function(e,t,n){return arguments.length>1?this.each((null==t?yt:"function"==typeof t?bt:mt)(e,t,null==n?"":n)):function(e,t){return e.style.getPropertyValue(t)||gt(e).getComputedStyle(e,null).getPropertyValue(t)}(this.node(),e)},property:function(e,t){return arguments.length>1?this.each((null==t?_t:"function"==typeof t?xt:wt)(e,t)):this.node()[e]},classed:function(e,t){var n=kt(e+"");if(arguments.length<2){for(var r=St(this.node()),a=-1,i=n.length;++a<i;)if(!r.contains(n[a]))return!1;return!0}return this.each(("function"==typeof t?Pt:t?Mt:Nt)(n,t))},text:function(e){return arguments.length?this.each(null==e?zt:("function"==typeof e?Ot:Lt)(e)):this.node().textContent},html:function(e){return arguments.length?this.each(null==e?At:("function"==typeof e?Dt:Ft)(e)):this.node().innerHTML},raise:function(){return this.each(Rt)},lower:function(){return this.each(jt)},append:function(e){var t="function"==typeof e?e:$t(e);return this.select((function(){return this.appendChild(t.apply(this,arguments))}))},insert:function(e,t){var n="function"==typeof e?e:$t(e),r=null==t?Bt:"function"==typeof t?t:Qe(t);return this.select((function(){return this.insertBefore(n.apply(this,arguments),r.apply(this,arguments)||null)}))},remove:function(){return this.each(Wt)},clone:function(e){return this.select(e?Ht:Vt)},datum:function(e){return arguments.length?this.property("__data__",e):this.node().__data__},on:function(e,t,n){var r,a,i=function(e){return e.trim().split(/^|\s+/).map((function(e){var t="",n=e.indexOf(".");return n>=0&&(t=e.slice(n+1),e=e.slice(0,n)),{type:e,name:t}}))}(e+""),o=i.length;if(!(arguments.length<2)){for(u=t?Qt:qt,r=0;r<o;++r)this.each(u(i[r],t,n));return this}var u=this.node().__on;if(u)for(var l,s=0,c=u.length;s<c;++s)for(r=0,l=u[s];r<o;++r)if((a=i[r]).type===l.type&&a.name===l.name)return l.value},dispatch:function(e,t){return this.each(("function"==typeof t?Kt:Gt)(e,t))},[Symbol.iterator]:function*(){for(var e=this._groups,t=0,n=e.length;t<n;++t)for(var r,a=e[t],i=0,o=a.length;i<o;++i)(r=a[i])&&(yield r)}};var tn=1,nn=2,rn=3,an=4,on=1e-6;function un(e){return"translate("+e+",0)"}function ln(e){return"translate(0,"+e+")"}function sn(e){return t=>+e(t)}function cn(e,t){return t=Math.max(0,e.bandwidth()-2*t)/2,e.round()&&(t=Math.round(t)),n=>+e(n)+t}function fn(){return!this.__axis}function pn(e,t){var n=[],r=null,a=null,i=6,o=6,u=3,l="undefined"!=typeof window&&window.devicePixelRatio>1?0:.5,s=e===tn||e===an?-1:1,c=e===an||e===nn?"x":"y",f=e===tn||e===rn?un:ln;function p(p){var d=null==r?t.ticks?t.ticks.apply(t,n):t.domain():r,h=null==a?t.tickFormat?t.tickFormat.apply(t,n):en:a,v=Math.max(i,0)+u,g=t.range(),y=+g[0]+l,m=+g[g.length-1]+l,b=(t.bandwidth?cn:sn)(t.copy(),l),_=p.selection?p.selection():p,w=_.selectAll(".domain").data([null]),x=_.selectAll(".tick").data(d,t).order(),k=x.exit(),S=x.enter().append("g").attr("class","tick"),E=x.select("line"),C=x.select("text");w=w.merge(w.enter().insert("path",".tick").attr("class","domain").attr("stroke","currentColor")),x=x.merge(S),E=E.merge(S.append("line").attr("stroke","currentColor").attr(c+"2",s*i)),C=C.merge(S.append("text").attr("fill","currentColor").attr(c,s*v).attr("dy",e===tn?"0em":e===rn?"0.71em":"0.32em")),p!==_&&(w=w.transition(p),x=x.transition(p),E=E.transition(p),C=C.transition(p),k=k.transition(p).attr("opacity",on).attr("transform",(function(e){return isFinite(e=b(e))?f(e+l):this.getAttribute("transform")})),S.attr("opacity",on).attr("transform",(function(e){var t=this.parentNode.__axis;return f((t&&isFinite(t=t(e))?t:b(e))+l)}))),k.remove(),w.attr("d",e===an||e===nn?o?"M"+s*o+","+y+"H"+l+"V"+m+"H"+s*o:"M"+l+","+y+"V"+m:o?"M"+y+","+s*o+"V"+l+"H"+m+"V"+s*o:"M"+y+","+l+"H"+m),x.attr("opacity",1).attr("transform",(function(e){return f(b(e)+l)})),E.attr(c+"2",s*i),C.attr(c,s*v).text(h),_.filter(fn).attr("fill","none").attr("font-size",10).attr("font-family","sans-serif").attr("text-anchor",e===nn?"start":e===an?"end":"middle"),_.each((function(){this.__axis=b}))}return p.scale=function(e){return arguments.length?(t=e,p):t},p.ticks=function(){return n=Array.from(arguments),p},p.tickArguments=function(e){return arguments.length?(n=null==e?[]:Array.from(e),p):n.slice()},p.tickValues=function(e){return arguments.length?(r=null==e?null:Array.from(e),p):r&&r.slice()},p.tickFormat=function(e){return arguments.length?(a=e,p):a},p.tickSize=function(e){return arguments.length?(i=o=+e,p):i},p.tickSizeInner=function(e){return arguments.length?(i=+e,p):i},p.tickSizeOuter=function(e){return arguments.length?(o=+e,p):o},p.tickPadding=function(e){return arguments.length?(u=+e,p):u},p.offset=function(e){return arguments.length?(l=+e,p):l},p}function dn(e){return pn(rn,e)}function hn(e){return function(){return e}}function vn(e){this._context=e}function gn(e){return new vn(e)}Array.prototype.slice,vn.prototype={areaStart:function(){this._line=0},areaEnd:function(){this._line=NaN},lineStart:function(){this._point=0},lineEnd:function(){(this._line||0!==this._line&&1===this._point)&&this._context.closePath(),this._line=1-this._line},point:function(e,t){switch(e=+e,t=+t,this._point){case 0:this._point=1,this._line?this._context.lineTo(e,t):this._context.moveTo(e,t);break;case 1:this._point=2;default:this._context.lineTo(e,t)}}};const yn=Math.PI,mn=2*yn,bn=1e-6,_n=mn-bn;function wn(e){this._+=e[0];for(let t=1,n=e.length;t<n;++t)this._+=arguments[t]+e[t]}class xn{constructor(e){this._x0=this._y0=this._x1=this._y1=null,this._="",this._append=null==e?wn:function(e){let t=Math.floor(e);if(!(t>=0))throw new Error(`invalid digits: ${e}`);if(t>15)return wn;const n=10**t;return function(e){this._+=e[0];for(let t=1,r=e.length;t<r;++t)this._+=Math.round(arguments[t]*n)/n+e[t]}}(e)}moveTo(e,t){this._append`M${this._x0=this._x1=+e},${this._y0=this._y1=+t}`}closePath(){null!==this._x1&&(this._x1=this._x0,this._y1=this._y0,this._append`Z`)}lineTo(e,t){this._append`L${this._x1=+e},${this._y1=+t}`}quadraticCurveTo(e,t,n,r){this._append`Q${+e},${+t},${this._x1=+n},${this._y1=+r}`}bezierCurveTo(e,t,n,r,a,i){this._append`C${+e},${+t},${+n},${+r},${this._x1=+a},${this._y1=+i}`}arcTo(e,t,n,r,a){if(e=+e,t=+t,n=+n,r=+r,(a=+a)<0)throw new Error(`negative radius: ${a}`);let i=this._x1,o=this._y1,u=n-e,l=r-t,s=i-e,c=o-t,f=s*s+c*c;if(null===this._x1)this._append`M${this._x1=e},${this._y1=t}`;else if(f>bn)if(Math.abs(c*u-l*s)>bn&&a){let p=n-i,d=r-o,h=u*u+l*l,v=p*p+d*d,g=Math.sqrt(h),y=Math.sqrt(f),m=a*Math.tan((yn-Math.acos((h+f-v)/(2*g*y)))/2),b=m/y,_=m/g;Math.abs(b-1)>bn&&this._append`L${e+b*s},${t+b*c}`,this._append`A${a},${a},0,0,${+(c*p>s*d)},${this._x1=e+_*u},${this._y1=t+_*l}`}else this._append`L${this._x1=e},${this._y1=t}`}arc(e,t,n,r,a,i){if(e=+e,t=+t,i=!!i,(n=+n)<0)throw new Error(`negative radius: ${n}`);let o=n*Math.cos(r),u=n*Math.sin(r),l=e+o,s=t+u,c=1^i,f=i?r-a:a-r;null===this._x1?this._append`M${l},${s}`:(Math.abs(this._x1-l)>bn||Math.abs(this._y1-s)>bn)&&this._append`L${l},${s}`,n&&(f<0&&(f=f%mn+mn),f>_n?this._append`A${n},${n},0,1,${c},${e-o},${t-u}A${n},${n},0,1,${c},${this._x1=l},${this._y1=s}`:f>bn&&this._append`A${n},${n},0,${+(f>=yn)},${c},${this._x1=e+n*Math.cos(a)},${this._y1=t+n*Math.sin(a)}`)}rect(e,t,n,r){this._append`M${this._x0=this._x1=+e},${this._y0=this._y1=+t}h${n=+n}v${+r}h${-n}Z`}toString(){return this._}}function kn(e){return e[0]}function Sn(e){return e[1]}function En(e,t){var n=hn(!0),r=null,a=gn,i=null,o=function(e){let t=3;return e.digits=function(n){if(!arguments.length)return t;if(null==n)t=null;else{const e=Math.floor(n);if(!(e>=0))throw new RangeError(`invalid digits: ${n}`);t=e}return e},()=>new xn(t)}(u);function u(u){var l,s,c,f=(u=function(e){return"object"==typeof e&&"length"in e?e:Array.from(e)}(u)).length,p=!1;for(null==r&&(i=a(c=o())),l=0;l<=f;++l)!(l<f&&n(s=u[l],l,u))===p&&((p=!p)?i.lineStart():i.lineEnd()),p&&i.point(+e(s,l,u),+t(s,l,u));if(c)return i=null,c+""||null}return e="function"==typeof e?e:void 0===e?kn:hn(e),t="function"==typeof t?t:void 0===t?Sn:hn(t),u.x=function(t){return arguments.length?(e="function"==typeof t?t:hn(+t),u):e},u.y=function(e){return arguments.length?(t="function"==typeof e?e:hn(+e),u):t},u.defined=function(e){return arguments.length?(n="function"==typeof e?e:hn(!!e),u):n},u.curve=function(e){return arguments.length?(a=e,null!=r&&(i=a(r)),u):a},u.context=function(e){return arguments.length?(null==e?r=i=null:i=a(r=e),u):r},u}function Cn(e){return Cn="function"==typeof Symbol&&"symbol"==typeof Symbol.iterator?function(e){return typeof e}:function(e){return e&&"function"==typeof Symbol&&e.constructor===Symbol&&e!==Symbol.prototype?"symbol":typeof e},Cn(e)}function Tn(e,t){for(var n=0;n<t.length;n++){var r=t[n];r.enumerable=r.enumerable||!1,r.configurable=!0,"value"in r&&(r.writable=!0),Object.defineProperty(e,(void 0,a=function(e,t){if("object"!==Cn(e)||null===e)return e;var n=e[Symbol.toPrimitive];if(void 0!==n){var r=n.call(e,"string");if("object"!==Cn(r))return r;throw new TypeError("@@toPrimitive must return a primitive value.")}return String(e)}(r.key),"symbol"===Cn(a)?a:String(a)),r)}var a}function Mn(e,t){return Mn=Object.setPrototypeOf?Object.setPrototypeOf.bind():function(e,t){return e.__proto__=t,e},Mn(e,t)}function Nn(e){if(void 0===e)throw new ReferenceError("this hasn't been initialised - super() hasn't been called");return e}function Pn(e){return Pn=Object.setPrototypeOf?Object.getPrototypeOf.bind():function(e){return e.__proto__||Object.getPrototypeOf(e)},Pn(e)}var zn=function(t){!function(e,t){if("function"!=typeof t&&null!==t)throw new TypeError("Super expression must either be null or a function");e.prototype=Object.create(t&&t.prototype,{constructor:{value:e,writable:!0,configurable:!0}}),Object.defineProperty(e,"prototype",{writable:!1}),t&&Mn(e,t)}(u,t);var n,r,a,i,o=(a=u,i=function(){if("undefined"==typeof Reflect||!Reflect.construct)return!1;if(Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],(function(){}))),!0}catch(e){return!1}}(),function(){var e,t=Pn(a);if(i){var n=Pn(this).constructor;e=Reflect.construct(t,arguments,n)}else e=t.apply(this,arguments);return function(e,t){if(t&&("object"===Cn(t)||"function"==typeof t))return t;if(void 0!==t)throw new TypeError("Derived constructors may only return object or undefined");return Nn(e)}(this,e)});function u(){var e;return function(e,t){if(!(e instanceof t))throw new TypeError("Cannot call a class as a function")}(this,u),e=o.call(this),window.lastAdditiveForceVisualizer=Nn(e),e.effectFormat=ze(".2"),e.redraw=(0,Re.debounce)((function(){return e.draw()}),200),e}return n=u,(r=[{key:"componentDidMount",value:function(){var e=this;this.mainGroup=this.svg.append("g"),this.axisElement=this.mainGroup.append("g").attr("transform","translate(0,35)").attr("class","force-bar-axis"),this.onTopGroup=this.svg.append("g"),this.baseValueTitle=this.svg.append("text"),this.joinPointLine=this.svg.append("line"),this.joinPointLabelOutline=this.svg.append("text"),this.joinPointLabel=this.svg.append("text"),this.joinPointTitleLeft=this.svg.append("text"),this.joinPointTitleLeftArrow=this.svg.append("text"),this.joinPointTitle=this.svg.append("text"),this.joinPointTitleRightArrow=this.svg.append("text"),this.joinPointTitleRight=this.svg.append("text"),this.hoverLabelBacking=this.svg.append("text").attr("x",10).attr("y",20).attr("text-anchor","middle").attr("font-size",12).attr("stroke","#fff").attr("fill","#fff").attr("stroke-width","4").attr("stroke-linejoin","round").text("").on("mouseover",(function(){e.hoverLabel.attr("opacity",1),e.hoverLabelBacking.attr("opacity",1)})).on("mouseout",(function(){e.hoverLabel.attr("opacity",0),e.hoverLabelBacking.attr("opacity",0)})),this.hoverLabel=this.svg.append("text").attr("x",10).attr("y",20).attr("text-anchor","middle").attr("font-size",12).attr("fill","#0f0").text("").on("mouseover",(function(){e.hoverLabel.attr("opacity",1),e.hoverLabelBacking.attr("opacity",1)})).on("mouseout",(function(){e.hoverLabel.attr("opacity",0),e.hoverLabelBacking.attr("opacity",0)}));var t=void 0;"string"==typeof this.props.plot_cmap?this.props.plot_cmap in je.colors?t=je.colors[this.props.plot_cmap]:(console.log("Invalid color map name, reverting to default."),t=je.colors.RdBu):Array.isArray(this.props.plot_cmap)&&(t=this.props.plot_cmap),this.colors=t.map((function(e){return q(e)})),this.brighterColors=[1.45,1.6].map((function(t,n){return e.colors[n].brighter(t)})),this.colors.map((function(t,n){var r=e.svg.append("linearGradient").attr("id","linear-grad-"+n).attr("x1","0%").attr("y1","0%").attr("x2","0%").attr("y2","100%");r.append("stop").attr("offset","0%").attr("stop-color",t).attr("stop-opacity",.6),r.append("stop").attr("offset","100%").attr("stop-color",t).attr("stop-opacity",0);var a=e.svg.append("linearGradient").attr("id","linear-backgrad-"+n).attr("x1","0%").attr("y1","0%").attr("x2","0%").attr("y2","100%");a.append("stop").attr("offset","0%").attr("stop-color",t).attr("stop-opacity",.5),a.append("stop").attr("offset","100%").attr("stop-color",t).attr("stop-opacity",0)})),this.tickFormat=ze(",.4"),this.scaleCentered=De(),this.axis=dn().scale(this.scaleCentered).tickSizeInner(4).tickSizeOuter(0).tickFormat((function(t){return e.tickFormat(e.invLinkFunction(t))})).tickPadding(-18),window.addEventListener("resize",this.redraw),window.setTimeout(this.redraw,50)}},{key:"componentDidUpdate",value:function(){this.draw()}},{key:"draw",value:function(){var e=this;(0,Re.each)(this.props.featureNames,(function(t,n){e.props.features[n]&&(e.props.features[n].name=t)})),"identity"===this.props.link?this.invLinkFunction=function(t){return e.props.baseValue+t}:"logit"===this.props.link?this.invLinkFunction=function(t){return 1/(1+Math.exp(-(e.props.baseValue+t)))}:console.log("ERROR: Unrecognized link function: ",this.props.link);var t=this.svg.node().parentNode.offsetWidth;if(0==t)return setTimeout((function(){return e.draw(e.props)}),500);this.svg.style("height","150px"),this.svg.style("width",t+"px");var n=(0,Re.sortBy)(this.props.features,(function(e){return-1/(e.effect+1e-10)})),r=(0,Re.sum)((0,Re.map)(n,(function(e){return Math.abs(e.effect)}))),a=(0,Re.sum)((0,Re.map)((0,Re.filter)(n,(function(e){return e.effect>0})),(function(e){return e.effect})))||0,i=(0,Re.sum)((0,Re.map)((0,Re.filter)(n,(function(e){return e.effect<0})),(function(e){return-e.effect})))||0;this.domainSize=3*Math.max(a,i);var o=De().domain([0,this.domainSize]).range([0,t]),u=t/2-o(i);this.scaleCentered.domain([-this.domainSize/2,this.domainSize/2]).range([0,t]).clamp(!0),this.axisElement.attr("transform","translate(0,50)").call(this.axis);var l,s,c,f=0;for(l=0;l<n.length;++l)n[l].x=f,n[l].effect<0&&void 0===s&&(s=f,c=l),f+=Math.abs(n[l].effect);void 0===s&&(s=f,c=l);var p=En().x((function(e){return e[0]})).y((function(e){return e[1]})),d=function(t){return void 0!==t.value&&null!==t.value&&""!==t.value?t.name+" = "+(isNaN(t.value)?t.value:e.tickFormat(t.value)):t.name};n=this.props.hideBars?[]:n;var h=this.mainGroup.selectAll(".force-bar-blocks").data(n);h.enter().append("path").attr("class","force-bar-blocks").merge(h).attr("d",(function(e,t){var n=o(e.x)+u,r=o(Math.abs(e.effect)),a=e.effect<0?-4:4,i=a;return t===c&&(a=0),t===c-1&&(i=0),p([[n,56],[n+r,56],[n+r+i,64.5],[n+r,73],[n,73],[n+a,64.5]])})).attr("fill",(function(t){return t.effect>0?e.colors[0]:e.colors[1]})).on("mouseover",(function(t){if(o(Math.abs(t.effect))<o(r)/50||o(Math.abs(t.effect))<10){var n=o(t.x)+u,a=o(Math.abs(t.effect));e.hoverLabel.attr("opacity",1).attr("x",n+a/2).attr("y",50.5).attr("fill",t.effect>0?e.colors[0]:e.colors[1]).text(d(t)),e.hoverLabelBacking.attr("opacity",1).attr("x",n+a/2).attr("y",50.5).text(d(t))}})).on("mouseout",(function(){e.hoverLabel.attr("opacity",0),e.hoverLabelBacking.attr("opacity",0)})),h.exit().remove();var v=(0,Re.filter)(n,(function(e){return o(Math.abs(e.effect))>o(r)/50&&o(Math.abs(e.effect))>10})),g=this.onTopGroup.selectAll(".force-bar-labels").data(v);if(g.exit().remove(),g=g.enter().append("text").attr("class","force-bar-labels").attr("font-size","12px").attr("y",98).merge(g).text((function(t){return void 0!==t.value&&null!==t.value&&""!==t.value?t.name+" = "+(isNaN(t.value)?t.value:e.tickFormat(t.value)):t.name})).attr("fill",(function(t){return t.effect>0?e.colors[0]:e.colors[1]})).attr("stroke",(function(e){return e.textWidth=Math.max(this.getComputedTextLength(),o(Math.abs(e.effect))-10),e.innerTextWidth=this.getComputedTextLength(),"none"})),this.filteredData=v,n.length>0){f=s+o.invert(5);for(var y=c;y<n.length;++y)n[y].textx=f,f+=o.invert(n[y].textWidth+10);f=s-o.invert(5);for(var m=c-1;m>=0;--m)n[m].textx=f,f-=o.invert(n[m].textWidth+10)}g.attr("x",(function(e){return o(e.textx)+u+(e.effect>0?-e.textWidth/2:e.textWidth/2)})).attr("text-anchor","middle"),v=(0,Re.filter)(v,(function(n){return o(n.textx)+u>e.props.labelMargin&&o(n.textx)+u<t-e.props.labelMargin})),this.filteredData2=v;var b=v.slice(),_=(0,Re.findIndex)(n,v[0])-1;_>=0&&b.unshift(n[_]);var w=this.mainGroup.selectAll(".force-bar-labelBacking").data(v);w.enter().append("path").attr("class","force-bar-labelBacking").attr("stroke","none").attr("opacity",.2).merge(w).attr("d",(function(e){return p([[o(e.x)+o(Math.abs(e.effect))+u,73],[(e.effect>0?o(e.textx):o(e.textx)+e.textWidth)+u+5,83],[(e.effect>0?o(e.textx):o(e.textx)+e.textWidth)+u+5,104],[(e.effect>0?o(e.textx)-e.textWidth:o(e.textx))+u-5,104],[(e.effect>0?o(e.textx)-e.textWidth:o(e.textx))+u-5,83],[o(e.x)+u,73]])})).attr("fill",(function(e){return"url(#linear-backgrad-".concat(e.effect>0?0:1,")")})),w.exit().remove();var x=this.mainGroup.selectAll(".force-bar-labelDividers").data(v.slice(0,-1));x.enter().append("rect").attr("class","force-bar-labelDividers").attr("height","21px").attr("width","1px").attr("y",83).merge(x).attr("x",(function(e){return(e.effect>0?o(e.textx):o(e.textx)+e.textWidth)+u+4.5})).attr("fill",(function(e){return"url(#linear-grad-".concat(e.effect>0?0:1,")")})),x.exit().remove();var k=this.mainGroup.selectAll(".force-bar-labelLinks").data(v.slice(0,-1));k.enter().append("line").attr("class","force-bar-labelLinks").attr("y1",73).attr("y2",83).attr("stroke-opacity",.5).attr("stroke-width",1).merge(k).attr("x1",(function(e){return o(e.x)+o(Math.abs(e.effect))+u})).attr("x2",(function(e){return(e.effect>0?o(e.textx):o(e.textx)+e.textWidth)+u+5})).attr("stroke",(function(t){return t.effect>0?e.colors[0]:e.colors[1]})),k.exit().remove();var S=this.mainGroup.selectAll(".force-bar-blockDividers").data(n.slice(0,-1));S.enter().append("path").attr("class","force-bar-blockDividers").attr("stroke-width",2).attr("fill","none").merge(S).attr("d",(function(e){var t=o(e.x)+o(Math.abs(e.effect))+u;return p([[t,56],[t+(e.effect<0?-4:4),64.5],[t,73]])})).attr("stroke",(function(t,n){return c===n+1||Math.abs(t.effect)<1e-8?"#rgba(0,0,0,0)":t.effect>0?e.brighterColors[0]:e.brighterColors[1]})),S.exit().remove(),this.joinPointLine.attr("x1",o(s)+u).attr("x2",o(s)+u).attr("y1",50).attr("y2",56).attr("stroke","#F2F2F2").attr("stroke-width",1).attr("opacity",1),this.joinPointLabelOutline.attr("x",o(s)+u).attr("y",45).attr("color","#fff").attr("text-anchor","middle").attr("font-weight","bold").attr("stroke","#fff").attr("stroke-width",6).text(ze(",.2f")(this.invLinkFunction(s-i))).attr("opacity",1),console.log("joinPoint",s,u,50,i),this.joinPointLabel.attr("x",o(s)+u).attr("y",45).attr("text-anchor","middle").attr("font-weight","bold").attr("fill","#000").text(ze(",.2f")(this.invLinkFunction(s-i))).attr("opacity",1),this.joinPointTitle.attr("x",o(s)+u).attr("y",28).attr("text-anchor","middle").attr("font-size","12").attr("fill","#000").text(this.props.outNames[0]).attr("opacity",.5),this.props.hideBars||(this.joinPointTitleLeft.attr("x",o(s)+u-16).attr("y",12).attr("text-anchor","end").attr("font-size","13").attr("fill",this.colors[0]).text("higher").attr("opacity",1),this.joinPointTitleRight.attr("x",o(s)+u+16).attr("y",12).attr("text-anchor","start").attr("font-size","13").attr("fill",this.colors[1]).text("lower").attr("opacity",1),this.joinPointTitleLeftArrow.attr("x",o(s)+u+7).attr("y",8).attr("text-anchor","end").attr("font-size","13").attr("fill",this.colors[0]).text("→").attr("opacity",1),this.joinPointTitleRightArrow.attr("x",o(s)+u-7).attr("y",14).attr("text-anchor","start").attr("font-size","13").attr("fill",this.colors[1]).text("←").attr("opacity",1)),this.props.hideBaseValueLabel||this.baseValueTitle.attr("x",this.scaleCentered(0)).attr("y",28).attr("text-anchor","middle").attr("font-size","12").attr("fill","#000").text("base value").attr("opacity",.5)}},{key:"componentWillUnmount",value:function(){window.removeEventListener("resize",this.redraw)}},{key:"render",value:function(){var t=this;return e.createElement("svg",{ref:function(e){return t.svg=Jt(e)},style:{userSelect:"none",display:"block",fontFamily:"arial",sansSerif:!0}},e.createElement("style",{dangerouslySetInnerHTML:{__html:"\n          .force-bar-axis path {\n            fill: none;\n            opacity: 0.4;\n          }\n          .force-bar-axis paths {\n            display: none;\n          }\n          .tick line {\n            stroke: #000;\n            stroke-width: 1px;\n            opacity: 0.4;\n          }\n          .tick text {\n            fill: #000;\n            opacity: 0.5;\n            font-size: 12px;\n            padding: 0px;\n          }"}}))}}])&&Tn(n.prototype,r),Object.defineProperty(n,"prototype",{writable:!1}),u}(e.Component);zn.defaultProps={plot_cmap:"RdBu"};const Ln=zn,On=1e3,An=6e4,Fn=36e5,Dn=864e5,Rn=6048e5,jn=31536e6,Un=new Date,In=new Date;function $n(e,t,n,r){function a(t){return e(t=0===arguments.length?new Date:new Date(+t)),t}return a.floor=t=>(e(t=new Date(+t)),t),a.ceil=n=>(e(n=new Date(n-1)),t(n,1),e(n),n),a.round=e=>{const t=a(e),n=a.ceil(e);return e-t<n-e?t:n},a.offset=(e,n)=>(t(e=new Date(+e),null==n?1:Math.floor(n)),e),a.range=(n,r,i)=>{const o=[];if(n=a.ceil(n),i=null==i?1:Math.floor(i),!(n<r&&i>0))return o;let u;do{o.push(u=new Date(+n)),t(n,i),e(n)}while(u<n&&n<r);return o},a.filter=n=>$n((t=>{if(t>=t)for(;e(t),!n(t);)t.setTime(t-1)}),((e,r)=>{if(e>=e)if(r<0)for(;++r<=0;)for(;t(e,-1),!n(e););else for(;--r>=0;)for(;t(e,1),!n(e););})),n&&(a.count=(t,r)=>(Un.setTime(+t),In.setTime(+r),e(Un),e(In),Math.floor(n(Un,In))),a.every=e=>(e=Math.floor(e),isFinite(e)&&e>0?e>1?a.filter(r?t=>r(t)%e==0:t=>a.count(0,t)%e==0):a:null)),a}const Bn=$n((()=>{}),((e,t)=>{e.setTime(+e+t)}),((e,t)=>t-e));Bn.every=e=>(e=Math.floor(e),isFinite(e)&&e>0?e>1?$n((t=>{t.setTime(Math.floor(t/e)*e)}),((t,n)=>{t.setTime(+t+n*e)}),((t,n)=>(n-t)/e)):Bn:null),Bn.range;const Wn=$n((e=>{e.setTime(e-e.getMilliseconds())}),((e,t)=>{e.setTime(+e+t*On)}),((e,t)=>(t-e)/On),(e=>e.getUTCSeconds())),Vn=(Wn.range,$n((e=>{e.setTime(e-e.getMilliseconds()-e.getSeconds()*On)}),((e,t)=>{e.setTime(+e+t*An)}),((e,t)=>(t-e)/An),(e=>e.getMinutes()))),Hn=(Vn.range,$n((e=>{e.setUTCSeconds(0,0)}),((e,t)=>{e.setTime(+e+t*An)}),((e,t)=>(t-e)/An),(e=>e.getUTCMinutes()))),qn=(Hn.range,$n((e=>{e.setTime(e-e.getMilliseconds()-e.getSeconds()*On-e.getMinutes()*An)}),((e,t)=>{e.setTime(+e+t*Fn)}),((e,t)=>(t-e)/Fn),(e=>e.getHours()))),Qn=(qn.range,$n((e=>{e.setUTCMinutes(0,0,0)}),((e,t)=>{e.setTime(+e+t*Fn)}),((e,t)=>(t-e)/Fn),(e=>e.getUTCHours()))),Yn=(Qn.range,$n((e=>e.setHours(0,0,0,0)),((e,t)=>e.setDate(e.getDate()+t)),((e,t)=>(t-e-(t.getTimezoneOffset()-e.getTimezoneOffset())*An)/Dn),(e=>e.getDate()-1))),Gn=(Yn.range,$n((e=>{e.setUTCHours(0,0,0,0)}),((e,t)=>{e.setUTCDate(e.getUTCDate()+t)}),((e,t)=>(t-e)/Dn),(e=>e.getUTCDate()-1))),Kn=(Gn.range,$n((e=>{e.setUTCHours(0,0,0,0)}),((e,t)=>{e.setUTCDate(e.getUTCDate()+t)}),((e,t)=>(t-e)/Dn),(e=>Math.floor(e/Dn))));function Zn(e){return $n((t=>{t.setDate(t.getDate()-(t.getDay()+7-e)%7),t.setHours(0,0,0,0)}),((e,t)=>{e.setDate(e.getDate()+7*t)}),((e,t)=>(t-e-(t.getTimezoneOffset()-e.getTimezoneOffset())*An)/Rn))}Kn.range;const Xn=Zn(0),Jn=Zn(1),er=Zn(2),tr=Zn(3),nr=Zn(4),rr=Zn(5),ar=Zn(6);function ir(e){return $n((t=>{t.setUTCDate(t.getUTCDate()-(t.getUTCDay()+7-e)%7),t.setUTCHours(0,0,0,0)}),((e,t)=>{e.setUTCDate(e.getUTCDate()+7*t)}),((e,t)=>(t-e)/Rn))}Xn.range,Jn.range,er.range,tr.range,nr.range,rr.range,ar.range;const or=ir(0),ur=ir(1),lr=ir(2),sr=ir(3),cr=ir(4),fr=ir(5),pr=ir(6),dr=(or.range,ur.range,lr.range,sr.range,cr.range,fr.range,pr.range,$n((e=>{e.setDate(1),e.setHours(0,0,0,0)}),((e,t)=>{e.setMonth(e.getMonth()+t)}),((e,t)=>t.getMonth()-e.getMonth()+12*(t.getFullYear()-e.getFullYear())),(e=>e.getMonth()))),hr=(dr.range,$n((e=>{e.setUTCDate(1),e.setUTCHours(0,0,0,0)}),((e,t)=>{e.setUTCMonth(e.getUTCMonth()+t)}),((e,t)=>t.getUTCMonth()-e.getUTCMonth()+12*(t.getUTCFullYear()-e.getUTCFullYear())),(e=>e.getUTCMonth()))),vr=(hr.range,$n((e=>{e.setMonth(0,1),e.setHours(0,0,0,0)}),((e,t)=>{e.setFullYear(e.getFullYear()+t)}),((e,t)=>t.getFullYear()-e.getFullYear()),(e=>e.getFullYear())));vr.every=e=>isFinite(e=Math.floor(e))&&e>0?$n((t=>{t.setFullYear(Math.floor(t.getFullYear()/e)*e),t.setMonth(0,1),t.setHours(0,0,0,0)}),((t,n)=>{t.setFullYear(t.getFullYear()+n*e)})):null,vr.range;const gr=$n((e=>{e.setUTCMonth(0,1),e.setUTCHours(0,0,0,0)}),((e,t)=>{e.setUTCFullYear(e.getUTCFullYear()+t)}),((e,t)=>t.getUTCFullYear()-e.getUTCFullYear()),(e=>e.getUTCFullYear()));function yr(e,t,n,r,a,i){const o=[[Wn,1,On],[Wn,5,5e3],[Wn,15,15e3],[Wn,30,3e4],[i,1,An],[i,5,3e5],[i,15,9e5],[i,30,18e5],[a,1,Fn],[a,3,108e5],[a,6,216e5],[a,12,432e5],[r,1,Dn],[r,2,1728e5],[n,1,Rn],[t,1,2592e6],[t,3,7776e6],[e,1,jn]];function u(t,n,r){const a=Math.abs(n-t)/r,i=f((([,,e])=>e)).right(o,a);if(i===o.length)return e.every(l(t/jn,n/jn,r));if(0===i)return Bn.every(Math.max(l(t,n,r),1));const[u,s]=o[a/o[i-1][2]<o[i][2]/a?i-1:i];return u.every(s)}return[function(e,t,n){const r=t<e;r&&([e,t]=[t,e]);const a=n&&"function"==typeof n.range?n:u(e,t,n),i=a?a.range(e,+t+1):[];return r?i.reverse():i},u]}gr.every=e=>isFinite(e=Math.floor(e))&&e>0?$n((t=>{t.setUTCFullYear(Math.floor(t.getUTCFullYear()/e)*e),t.setUTCMonth(0,1),t.setUTCHours(0,0,0,0)}),((t,n)=>{t.setUTCFullYear(t.getUTCFullYear()+n*e)})):null,gr.range;const[mr,br]=yr(gr,hr,or,Kn,Qn,Hn),[_r,wr]=yr(vr,dr,Xn,Yn,qn,Vn);function xr(e){if(0<=e.y&&e.y<100){var t=new Date(-1,e.m,e.d,e.H,e.M,e.S,e.L);return t.setFullYear(e.y),t}return new Date(e.y,e.m,e.d,e.H,e.M,e.S,e.L)}function kr(e){if(0<=e.y&&e.y<100){var t=new Date(Date.UTC(-1,e.m,e.d,e.H,e.M,e.S,e.L));return t.setUTCFullYear(e.y),t}return new Date(Date.UTC(e.y,e.m,e.d,e.H,e.M,e.S,e.L))}function Sr(e,t,n){return{y:e,m:t,d:n,H:0,M:0,S:0,L:0}}var Er,Cr,Tr,Mr={"-":"",_:" ",0:"0"},Nr=/^\s*\d+/,Pr=/^%/,zr=/[\\^$*+?|[\]().{}]/g;function Lr(e,t,n){var r=e<0?"-":"",a=(r?-e:e)+"",i=a.length;return r+(i<n?new Array(n-i+1).join(t)+a:a)}function Or(e){return e.replace(zr,"\\$&")}function Ar(e){return new RegExp("^(?:"+e.map(Or).join("|")+")","i")}function Fr(e){return new Map(e.map(((e,t)=>[e.toLowerCase(),t])))}function Dr(e,t,n){var r=Nr.exec(t.slice(n,n+1));return r?(e.w=+r[0],n+r[0].length):-1}function Rr(e,t,n){var r=Nr.exec(t.slice(n,n+1));return r?(e.u=+r[0],n+r[0].length):-1}function jr(e,t,n){var r=Nr.exec(t.slice(n,n+2));return r?(e.U=+r[0],n+r[0].length):-1}function Ur(e,t,n){var r=Nr.exec(t.slice(n,n+2));return r?(e.V=+r[0],n+r[0].length):-1}function Ir(e,t,n){var r=Nr.exec(t.slice(n,n+2));return r?(e.W=+r[0],n+r[0].length):-1}function $r(e,t,n){var r=Nr.exec(t.slice(n,n+4));return r?(e.y=+r[0],n+r[0].length):-1}function Br(e,t,n){var r=Nr.exec(t.slice(n,n+2));return r?(e.y=+r[0]+(+r[0]>68?1900:2e3),n+r[0].length):-1}function Wr(e,t,n){var r=/^(Z)|([+-]\d\d)(?::?(\d\d))?/.exec(t.slice(n,n+6));return r?(e.Z=r[1]?0:-(r[2]+(r[3]||"00")),n+r[0].length):-1}function Vr(e,t,n){var r=Nr.exec(t.slice(n,n+1));return r?(e.q=3*r[0]-3,n+r[0].length):-1}function Hr(e,t,n){var r=Nr.exec(t.slice(n,n+2));return r?(e.m=r[0]-1,n+r[0].length):-1}function qr(e,t,n){var r=Nr.exec(t.slice(n,n+2));return r?(e.d=+r[0],n+r[0].length):-1}function Qr(e,t,n){var r=Nr.exec(t.slice(n,n+3));return r?(e.m=0,e.d=+r[0],n+r[0].length):-1}function Yr(e,t,n){var r=Nr.exec(t.slice(n,n+2));return r?(e.H=+r[0],n+r[0].length):-1}function Gr(e,t,n){var r=Nr.exec(t.slice(n,n+2));return r?(e.M=+r[0],n+r[0].length):-1}function Kr(e,t,n){var r=Nr.exec(t.slice(n,n+2));return r?(e.S=+r[0],n+r[0].length):-1}function Zr(e,t,n){var r=Nr.exec(t.slice(n,n+3));return r?(e.L=+r[0],n+r[0].length):-1}function Xr(e,t,n){var r=Nr.exec(t.slice(n,n+6));return r?(e.L=Math.floor(r[0]/1e3),n+r[0].length):-1}function Jr(e,t,n){var r=Pr.exec(t.slice(n,n+1));return r?n+r[0].length:-1}function ea(e,t,n){var r=Nr.exec(t.slice(n));return r?(e.Q=+r[0],n+r[0].length):-1}function ta(e,t,n){var r=Nr.exec(t.slice(n));return r?(e.s=+r[0],n+r[0].length):-1}function na(e,t){return Lr(e.getDate(),t,2)}function ra(e,t){return Lr(e.getHours(),t,2)}function aa(e,t){return Lr(e.getHours()%12||12,t,2)}function ia(e,t){return Lr(1+Yn.count(vr(e),e),t,3)}function oa(e,t){return Lr(e.getMilliseconds(),t,3)}function ua(e,t){return oa(e,t)+"000"}function la(e,t){return Lr(e.getMonth()+1,t,2)}function sa(e,t){return Lr(e.getMinutes(),t,2)}function ca(e,t){return Lr(e.getSeconds(),t,2)}function fa(e){var t=e.getDay();return 0===t?7:t}function pa(e,t){return Lr(Xn.count(vr(e)-1,e),t,2)}function da(e){var t=e.getDay();return t>=4||0===t?nr(e):nr.ceil(e)}function ha(e,t){return e=da(e),Lr(nr.count(vr(e),e)+(4===vr(e).getDay()),t,2)}function va(e){return e.getDay()}function ga(e,t){return Lr(Jn.count(vr(e)-1,e),t,2)}function ya(e,t){return Lr(e.getFullYear()%100,t,2)}function ma(e,t){return Lr((e=da(e)).getFullYear()%100,t,2)}function ba(e,t){return Lr(e.getFullYear()%1e4,t,4)}function _a(e,t){var n=e.getDay();return Lr((e=n>=4||0===n?nr(e):nr.ceil(e)).getFullYear()%1e4,t,4)}function wa(e){var t=e.getTimezoneOffset();return(t>0?"-":(t*=-1,"+"))+Lr(t/60|0,"0",2)+Lr(t%60,"0",2)}function xa(e,t){return Lr(e.getUTCDate(),t,2)}function ka(e,t){return Lr(e.getUTCHours(),t,2)}function Sa(e,t){return Lr(e.getUTCHours()%12||12,t,2)}function Ea(e,t){return Lr(1+Gn.count(gr(e),e),t,3)}function Ca(e,t){return Lr(e.getUTCMilliseconds(),t,3)}function Ta(e,t){return Ca(e,t)+"000"}function Ma(e,t){return Lr(e.getUTCMonth()+1,t,2)}function Na(e,t){return Lr(e.getUTCMinutes(),t,2)}function Pa(e,t){return Lr(e.getUTCSeconds(),t,2)}function za(e){var t=e.getUTCDay();return 0===t?7:t}function La(e,t){return Lr(or.count(gr(e)-1,e),t,2)}function Oa(e){var t=e.getUTCDay();return t>=4||0===t?cr(e):cr.ceil(e)}function Aa(e,t){return e=Oa(e),Lr(cr.count(gr(e),e)+(4===gr(e).getUTCDay()),t,2)}function Fa(e){return e.getUTCDay()}function Da(e,t){return Lr(ur.count(gr(e)-1,e),t,2)}function Ra(e,t){return Lr(e.getUTCFullYear()%100,t,2)}function ja(e,t){return Lr((e=Oa(e)).getUTCFullYear()%100,t,2)}function Ua(e,t){return Lr(e.getUTCFullYear()%1e4,t,4)}function Ia(e,t){var n=e.getUTCDay();return Lr((e=n>=4||0===n?cr(e):cr.ceil(e)).getUTCFullYear()%1e4,t,4)}function $a(){return"+0000"}function Ba(){return"%"}function Wa(e){return+e}function Va(e){return Math.floor(+e/1e3)}function Ha(e){return new Date(e)}function qa(e){return e instanceof Date?+e:+new Date(+e)}function Qa(e,t,n,r,a,i,o,u,l,s){var c=be(),f=c.invert,p=c.domain,d=s(".%L"),h=s(":%S"),v=s("%I:%M"),g=s("%I %p"),y=s("%a %d"),m=s("%b %d"),b=s("%B"),_=s("%Y");function w(e){return(l(e)<e?d:u(e)<e?h:o(e)<e?v:i(e)<e?g:r(e)<e?a(e)<e?y:m:n(e)<e?b:_)(e)}return c.invert=function(e){return new Date(f(e))},c.domain=function(e){return arguments.length?p(Array.from(e,qa)):p().map(Ha)},c.ticks=function(t){var n=p();return e(n[0],n[n.length-1],null==t?10:t)},c.tickFormat=function(e,t){return null==t?w:s(t)},c.nice=function(e){var n=p();return e&&"function"==typeof e.range||(e=t(n[0],n[n.length-1],null==e?10:e)),e?p(function(e,t){var n,r=0,a=(e=e.slice()).length-1,i=e[r],o=e[a];return o<i&&(n=r,r=a,a=n,n=i,i=o,o=n),e[r]=t.floor(i),e[a]=t.ceil(o),e}(n,e)):c},c.copy=function(){return me(c,Qa(e,t,n,r,a,i,o,u,l,s))},c}function Ya(){return _e.apply(Qa(_r,wr,vr,dr,Xn,Yn,qn,Vn,Wn,Cr).domain([new Date(2e3,0,1),new Date(2e3,0,2)]),arguments)}function Ga(e,t){var n="undefined"!=typeof Symbol&&e[Symbol.iterator]||e["@@iterator"];if(!n){if(Array.isArray(e)||(n=function(e,t){if(e){if("string"==typeof e)return Ka(e,t);var n=Object.prototype.toString.call(e).slice(8,-1);return"Object"===n&&e.constructor&&(n=e.constructor.name),"Map"===n||"Set"===n?Array.from(e):"Arguments"===n||/^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(n)?Ka(e,t):void 0}}(e))||t&&e&&"number"==typeof e.length){n&&(e=n);var r=0,a=function(){};return{s:a,n:function(){return r>=e.length?{done:!0}:{done:!1,value:e[r++]}},e:function(e){throw e},f:a}}throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.")}var i,o=!0,u=!1;return{s:function(){n=n.call(e)},n:function(){var e=n.next();return o=e.done,e},e:function(e){u=!0,i=e},f:function(){try{o||null==n.return||n.return()}finally{if(u)throw i}}}}function Ka(e,t){(null==t||t>e.length)&&(t=e.length);for(var n=0,r=new Array(t);n<t;n++)r[n]=e[n];return r}function Za(e){return Za="function"==typeof Symbol&&"symbol"==typeof Symbol.iterator?function(e){return typeof e}:function(e){return e&&"function"==typeof Symbol&&e.constructor===Symbol&&e!==Symbol.prototype?"symbol":typeof e},Za(e)}function Xa(e,t){for(var n=0;n<t.length;n++){var r=t[n];r.enumerable=r.enumerable||!1,r.configurable=!0,"value"in r&&(r.writable=!0),Object.defineProperty(e,(void 0,a=function(e,t){if("object"!==Za(e)||null===e)return e;var n=e[Symbol.toPrimitive];if(void 0!==n){var r=n.call(e,"string");if("object"!==Za(r))return r;throw new TypeError("@@toPrimitive must return a primitive value.")}return String(e)}(r.key),"symbol"===Za(a)?a:String(a)),r)}var a}function Ja(e,t){return Ja=Object.setPrototypeOf?Object.setPrototypeOf.bind():function(e,t){return e.__proto__=t,e},Ja(e,t)}function ei(e){if(void 0===e)throw new ReferenceError("this hasn't been initialised - super() hasn't been called");return e}function ti(e){return ti=Object.setPrototypeOf?Object.getPrototypeOf.bind():function(e){return e.__proto__||Object.getPrototypeOf(e)},ti(e)}Er=function(e){var t=e.dateTime,n=e.date,r=e.time,a=e.periods,i=e.days,o=e.shortDays,u=e.months,l=e.shortMonths,s=Ar(a),c=Fr(a),f=Ar(i),p=Fr(i),d=Ar(o),h=Fr(o),v=Ar(u),g=Fr(u),y=Ar(l),m=Fr(l),b={a:function(e){return o[e.getDay()]},A:function(e){return i[e.getDay()]},b:function(e){return l[e.getMonth()]},B:function(e){return u[e.getMonth()]},c:null,d:na,e:na,f:ua,g:ma,G:_a,H:ra,I:aa,j:ia,L:oa,m:la,M:sa,p:function(e){return a[+(e.getHours()>=12)]},q:function(e){return 1+~~(e.getMonth()/3)},Q:Wa,s:Va,S:ca,u:fa,U:pa,V:ha,w:va,W:ga,x:null,X:null,y:ya,Y:ba,Z:wa,"%":Ba},_={a:function(e){return o[e.getUTCDay()]},A:function(e){return i[e.getUTCDay()]},b:function(e){return l[e.getUTCMonth()]},B:function(e){return u[e.getUTCMonth()]},c:null,d:xa,e:xa,f:Ta,g:ja,G:Ia,H:ka,I:Sa,j:Ea,L:Ca,m:Ma,M:Na,p:function(e){return a[+(e.getUTCHours()>=12)]},q:function(e){return 1+~~(e.getUTCMonth()/3)},Q:Wa,s:Va,S:Pa,u:za,U:La,V:Aa,w:Fa,W:Da,x:null,X:null,y:Ra,Y:Ua,Z:$a,"%":Ba},w={a:function(e,t,n){var r=d.exec(t.slice(n));return r?(e.w=h.get(r[0].toLowerCase()),n+r[0].length):-1},A:function(e,t,n){var r=f.exec(t.slice(n));return r?(e.w=p.get(r[0].toLowerCase()),n+r[0].length):-1},b:function(e,t,n){var r=y.exec(t.slice(n));return r?(e.m=m.get(r[0].toLowerCase()),n+r[0].length):-1},B:function(e,t,n){var r=v.exec(t.slice(n));return r?(e.m=g.get(r[0].toLowerCase()),n+r[0].length):-1},c:function(e,n,r){return S(e,t,n,r)},d:qr,e:qr,f:Xr,g:Br,G:$r,H:Yr,I:Yr,j:Qr,L:Zr,m:Hr,M:Gr,p:function(e,t,n){var r=s.exec(t.slice(n));return r?(e.p=c.get(r[0].toLowerCase()),n+r[0].length):-1},q:Vr,Q:ea,s:ta,S:Kr,u:Rr,U:jr,V:Ur,w:Dr,W:Ir,x:function(e,t,r){return S(e,n,t,r)},X:function(e,t,n){return S(e,r,t,n)},y:Br,Y:$r,Z:Wr,"%":Jr};function x(e,t){return function(n){var r,a,i,o=[],u=-1,l=0,s=e.length;for(n instanceof Date||(n=new Date(+n));++u<s;)37===e.charCodeAt(u)&&(o.push(e.slice(l,u)),null!=(a=Mr[r=e.charAt(++u)])?r=e.charAt(++u):a="e"===r?" ":"0",(i=t[r])&&(r=i(n,a)),o.push(r),l=u+1);return o.push(e.slice(l,u)),o.join("")}}function k(e,t){return function(n){var r,a,i=Sr(1900,void 0,1);if(S(i,e,n+="",0)!=n.length)return null;if("Q"in i)return new Date(i.Q);if("s"in i)return new Date(1e3*i.s+("L"in i?i.L:0));if(t&&!("Z"in i)&&(i.Z=0),"p"in i&&(i.H=i.H%12+12*i.p),void 0===i.m&&(i.m="q"in i?i.q:0),"V"in i){if(i.V<1||i.V>53)return null;"w"in i||(i.w=1),"Z"in i?(a=(r=kr(Sr(i.y,0,1))).getUTCDay(),r=a>4||0===a?ur.ceil(r):ur(r),r=Gn.offset(r,7*(i.V-1)),i.y=r.getUTCFullYear(),i.m=r.getUTCMonth(),i.d=r.getUTCDate()+(i.w+6)%7):(a=(r=xr(Sr(i.y,0,1))).getDay(),r=a>4||0===a?Jn.ceil(r):Jn(r),r=Yn.offset(r,7*(i.V-1)),i.y=r.getFullYear(),i.m=r.getMonth(),i.d=r.getDate()+(i.w+6)%7)}else("W"in i||"U"in i)&&("w"in i||(i.w="u"in i?i.u%7:"W"in i?1:0),a="Z"in i?kr(Sr(i.y,0,1)).getUTCDay():xr(Sr(i.y,0,1)).getDay(),i.m=0,i.d="W"in i?(i.w+6)%7+7*i.W-(a+5)%7:i.w+7*i.U-(a+6)%7);return"Z"in i?(i.H+=i.Z/100|0,i.M+=i.Z%100,kr(i)):xr(i)}}function S(e,t,n,r){for(var a,i,o=0,u=t.length,l=n.length;o<u;){if(r>=l)return-1;if(37===(a=t.charCodeAt(o++))){if(a=t.charAt(o++),!(i=w[a in Mr?t.charAt(o++):a])||(r=i(e,n,r))<0)return-1}else if(a!=n.charCodeAt(r++))return-1}return r}return b.x=x(n,b),b.X=x(r,b),b.c=x(t,b),_.x=x(n,_),_.X=x(r,_),_.c=x(t,_),{format:function(e){var t=x(e+="",b);return t.toString=function(){return e},t},parse:function(e){var t=k(e+="",!1);return t.toString=function(){return e},t},utcFormat:function(e){var t=x(e+="",_);return t.toString=function(){return e},t},utcParse:function(e){var t=k(e+="",!0);return t.toString=function(){return e},t}}}({dateTime:"%x, %X",date:"%-m/%-d/%Y",time:"%-I:%M:%S %p",periods:["AM","PM"],days:["Sunday","Monday","Tuesday","Wednesday","Thursday","Friday","Saturday"],shortDays:["Sun","Mon","Tue","Wed","Thu","Fri","Sat"],months:["January","February","March","April","May","June","July","August","September","October","November","December"],shortMonths:["Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"]}),Cr=Er.format,Tr=Er.parse,Er.utcFormat,Er.utcParse;var ni=function(t){!function(e,t){if("function"!=typeof t&&null!==t)throw new TypeError("Super expression must either be null or a function");e.prototype=Object.create(t&&t.prototype,{constructor:{value:e,writable:!0,configurable:!0}}),Object.defineProperty(e,"prototype",{writable:!1}),t&&Ja(e,t)}(u,t);var n,r,a,i,o=(a=u,i=function(){if("undefined"==typeof Reflect||!Reflect.construct)return!1;if(Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],(function(){}))),!0}catch(e){return!1}}(),function(){var e,t=ti(a);if(i){var n=ti(this).constructor;e=Reflect.construct(t,arguments,n)}else e=t.apply(this,arguments);return function(e,t){if(t&&("object"===Za(t)||"function"==typeof t))return t;if(void 0!==t)throw new TypeError("Derived constructors may only return object or undefined");return ei(e)}(this,e)});function u(){var e;return function(e,t){if(!(e instanceof t))throw new TypeError("Cannot call a class as a function")}(this,u),e=o.call(this),window.lastAdditiveForceArrayVisualizer=ei(e),e.topOffset=28,e.leftOffset=80,e.height=350,e.effectFormat=ze(".2"),e.redraw=(0,Re.debounce)((function(){return e.draw()}),200),e}return n=u,(r=[{key:"componentDidMount",value:function(){var e=this;this.mainGroup=this.svg.append("g"),this.onTopGroup=this.svg.append("g"),this.xaxisElement=this.onTopGroup.append("g").attr("transform","translate(0,35)").attr("class","force-bar-array-xaxis"),this.yaxisElement=this.onTopGroup.append("g").attr("transform","translate(0,35)").attr("class","force-bar-array-yaxis"),this.hoverGroup1=this.svg.append("g"),this.hoverGroup2=this.svg.append("g"),this.baseValueTitle=this.svg.append("text"),this.hoverLine=this.svg.append("line"),this.hoverxOutline=this.svg.append("text").attr("text-anchor","middle").attr("font-weight","bold").attr("fill","#fff").attr("stroke","#fff").attr("stroke-width","6").attr("font-size","12px"),this.hoverx=this.svg.append("text").attr("text-anchor","middle").attr("font-weight","bold").attr("fill","#000").attr("font-size","12px"),this.hoverxTitle=this.svg.append("text").attr("text-anchor","middle").attr("opacity",.6).attr("font-size","12px"),this.hoveryOutline=this.svg.append("text").attr("text-anchor","end").attr("font-weight","bold").attr("fill","#fff").attr("stroke","#fff").attr("stroke-width","6").attr("font-size","12px"),this.hovery=this.svg.append("text").attr("text-anchor","end").attr("font-weight","bold").attr("fill","#000").attr("font-size","12px"),this.xlabel=this.wrapper.select(".additive-force-array-xlabel"),this.ylabel=this.wrapper.select(".additive-force-array-ylabel");var t=void 0;"string"==typeof this.props.plot_cmap?this.props.plot_cmap in je.colors?t=je.colors[this.props.plot_cmap]:(console.log("Invalid color map name, reverting to default."),t=je.colors.RdBu):Array.isArray(this.props.plot_cmap)&&(t=this.props.plot_cmap),this.colors=t.map((function(e){return q(e)})),this.brighterColors=[1.45,1.6].map((function(t,n){return e.colors[n].brighter(t)}));var n=ze(",.4");null!=this.props.ordering_keys&&null!=this.props.ordering_keys_time_format?(this.parseTime=Tr(this.props.ordering_keys_time_format),this.formatTime=Cr(this.props.ordering_keys_time_format),this.xtickFormat=function(e){return"object"==Za(e)?this.formatTime(e):n(e)}):(this.parseTime=null,this.formatTime=null,this.xtickFormat=n),this.xscale=De(),this.xaxis=dn().scale(this.xscale).tickSizeInner(4).tickSizeOuter(0).tickFormat((function(t){return e.xtickFormat(t)})).tickPadding(-18),this.ytickFormat=n,this.yscale=De(),this.yaxis=pn(an,undefined).scale(this.yscale).tickSizeInner(4).tickSizeOuter(0).tickFormat((function(t){return e.ytickFormat(e.invLinkFunction(t))})).tickPadding(2),this.xlabel.node().onchange=function(){return e.internalDraw()},this.ylabel.node().onchange=function(){return e.internalDraw()},this.svg.on("mousemove",(function(t){return e.mouseMoved(t)})),this.svg.on("click",(function(){return alert("This original index of the sample you clicked is "+e.nearestExpIndex)})),this.svg.on("mouseout",(function(t){return e.mouseOut(t)})),window.addEventListener("resize",this.redraw),window.setTimeout(this.redraw,50)}},{key:"componentDidUpdate",value:function(){this.draw()}},{key:"mouseOut",value:function(){this.hoverLine.attr("display","none"),this.hoverx.attr("display","none"),this.hoverxOutline.attr("display","none"),this.hoverxTitle.attr("display","none"),this.hovery.attr("display","none"),this.hoveryOutline.attr("display","none"),this.hoverGroup1.attr("display","none"),this.hoverGroup2.attr("display","none")}},{key:"mouseMoved",value:function(e){var t,n,r=this;this.hoverLine.attr("display",""),this.hoverx.attr("display",""),this.hoverxOutline.attr("display",""),this.hoverxTitle.attr("display",""),this.hovery.attr("display",""),this.hoveryOutline.attr("display",""),this.hoverGroup1.attr("display",""),this.hoverGroup2.attr("display","");var a=function(e,t){if(e=function(e){let t;for(;t=e.sourceEvent;)e=t;return e}(e),void 0===t&&(t=e.currentTarget),t){var n=t.ownerSVGElement||t;if(n.createSVGPoint){var r=n.createSVGPoint();return r.x=e.clientX,r.y=e.clientY,[(r=r.matrixTransform(t.getScreenCTM().inverse())).x,r.y]}if(t.getBoundingClientRect){var a=t.getBoundingClientRect();return[e.clientX-a.left-t.clientLeft,e.clientY-a.top-t.clientTop]}}return[e.pageX,e.pageY]}(e,this.svg.node())[0];if(this.props.explanations){for(t=0;t<this.currExplanations.length;++t)(!n||Math.abs(n.xmapScaled-a)>Math.abs(this.currExplanations[t].xmapScaled-a))&&(n=this.currExplanations[t]);this.nearestExpIndex=n.origInd,this.hoverLine.attr("x1",n.xmapScaled).attr("x2",n.xmapScaled).attr("y1",0+this.topOffset).attr("y2",this.height),this.hoverx.attr("x",n.xmapScaled).attr("y",this.topOffset-5).text(this.xtickFormat(n.xmap)),this.hoverxOutline.attr("x",n.xmapScaled).attr("y",this.topOffset-5).text(this.xtickFormat(n.xmap)),this.hoverxTitle.attr("x",n.xmapScaled).attr("y",this.topOffset-18).text(n.count>1?n.count+" averaged samples":""),this.hovery.attr("x",this.leftOffset-6).attr("y",n.joinPointy).text(this.ytickFormat(this.invLinkFunction(n.joinPoint))),this.hoveryOutline.attr("x",this.leftOffset-6).attr("y",n.joinPointy).text(this.ytickFormat(this.invLinkFunction(n.joinPoint)));for(var i,o,u=[],l=this.currPosOrderedFeatures.length-1;l>=0;--l){var s=this.currPosOrderedFeatures[l],c=n.features[s];o=5+(c.posyTop+c.posyBottom)/2,(!i||o-i>=15)&&c.posyTop-c.posyBottom>=6&&(u.push(c),i=o)}var f=[];i=void 0;var p,d=Ga(this.currNegOrderedFeatures);try{for(d.s();!(p=d.n()).done;){var h=p.value,v=n.features[h];o=5+(v.negyTop+v.negyBottom)/2,(!i||i-o>=15)&&v.negyTop-v.negyBottom>=6&&(f.push(v),i=o)}}catch(e){d.e(e)}finally{d.f()}var g=function(e){var t="";return null!==e.value&&void 0!==e.value&&(t=" = "+(isNaN(e.value)?e.value:r.ytickFormat(e.value))),n.count>1?"mean("+r.props.featureNames[e.ind]+")"+t:r.props.featureNames[e.ind]+t},y=this.hoverGroup1.selectAll(".pos-values").data(u);y.enter().append("text").attr("class","pos-values").merge(y).attr("x",n.xmapScaled+5).attr("y",(function(e){return 4+(e.posyTop+e.posyBottom)/2})).attr("text-anchor","start").attr("font-size",12).attr("stroke","#fff").attr("fill","#fff").attr("stroke-width","4").attr("stroke-linejoin","round").attr("opacity",1).text(g),y.exit().remove();var m=this.hoverGroup2.selectAll(".pos-values").data(u);m.enter().append("text").attr("class","pos-values").merge(m).attr("x",n.xmapScaled+5).attr("y",(function(e){return 4+(e.posyTop+e.posyBottom)/2})).attr("text-anchor","start").attr("font-size",12).attr("fill",this.colors[0]).text(g),m.exit().remove();var b=this.hoverGroup1.selectAll(".neg-values").data(f);b.enter().append("text").attr("class","neg-values").merge(b).attr("x",n.xmapScaled+5).attr("y",(function(e){return 4+(e.negyTop+e.negyBottom)/2})).attr("text-anchor","start").attr("font-size",12).attr("stroke","#fff").attr("fill","#fff").attr("stroke-width","4").attr("stroke-linejoin","round").attr("opacity",1).text(g),b.exit().remove();var _=this.hoverGroup2.selectAll(".neg-values").data(f);_.enter().append("text").attr("class","neg-values").merge(_).attr("x",n.xmapScaled+5).attr("y",(function(e){return 4+(e.negyTop+e.negyBottom)/2})).attr("text-anchor","start").attr("font-size",12).attr("fill",this.colors[1]).text(g),_.exit().remove()}}},{key:"draw",value:function(){var e=this;if(this.props.explanations&&0!==this.props.explanations.length){(0,Re.each)(this.props.explanations,(function(e,t){return e.origInd=t}));var t,n={},r={},a={},i=Ga(this.props.explanations);try{for(i.s();!(t=i.n()).done;){var o=t.value;for(var u in o.features)void 0===n[u]&&(n[u]=0,r[u]=0,a[u]=0),o.features[u].effect>0?n[u]+=o.features[u].effect:r[u]-=o.features[u].effect,null!==o.features[u].value&&void 0!==o.features[u].value&&(a[u]+=1)}}catch(e){i.e(e)}finally{i.f()}this.usedFeatures=(0,Re.sortBy)((0,Re.keys)(n),(function(e){return-(n[e]+r[e])})),console.log("found ",this.usedFeatures.length," used features"),this.posOrderedFeatures=(0,Re.sortBy)(this.usedFeatures,(function(e){return n[e]})),this.negOrderedFeatures=(0,Re.sortBy)(this.usedFeatures,(function(e){return-r[e]})),this.singleValueFeatures=(0,Re.filter)(this.usedFeatures,(function(e){return a[e]>0}));var l=["sample order by similarity","sample order by output value","original sample ordering"].concat(this.singleValueFeatures.map((function(t){return e.props.featureNames[t]})));null!=this.props.ordering_keys&&l.unshift("sample order by key");var s=this.xlabel.selectAll("option").data(l);s.enter().append("option").merge(s).attr("value",(function(e){return e})).text((function(e){return e})),s.exit().remove();var c=this.props.outNames[0]?this.props.outNames[0]:"model output value";(l=(0,Re.map)(this.usedFeatures,(function(t){return[e.props.featureNames[t],e.props.featureNames[t]+" effects"]}))).unshift(["model output value",c]);var f=this.ylabel.selectAll("option").data(l);f.enter().append("option").merge(f).attr("value",(function(e){return e[0]})).text((function(e){return e[1]})),f.exit().remove(),this.ylabel.style("top",(this.height-10-this.topOffset)/2+this.topOffset+"px").style("left",10-this.ylabel.node().offsetWidth/2+"px"),this.internalDraw()}}},{key:"internalDraw",value:function(){var e,t,n=this,r=Ga(this.props.explanations);try{for(r.s();!(e=r.n()).done;){var a,i=e.value,o=Ga(this.usedFeatures);try{for(o.s();!(a=o.n()).done;){var u=a.value;i.features.hasOwnProperty(u)||(i.features[u]={effect:0,value:0}),i.features[u].ind=u}}catch(e){o.e(e)}finally{o.f()}}}catch(e){r.e(e)}finally{r.f()}var l=this.xlabel.node().value,s="sample order by key"===l&&null!=this.props.ordering_keys_time_format;if(this.xscale=s?Ya():De(),this.xaxis.scale(this.xscale),"sample order by similarity"===l)t=(0,Re.sortBy)(this.props.explanations,(function(e){return e.simIndex})),(0,Re.each)(t,(function(e,t){return e.xmap=t}));else if("sample order by output value"===l)t=(0,Re.sortBy)(this.props.explanations,(function(e){return-e.outValue})),(0,Re.each)(t,(function(e,t){return e.xmap=t}));else if("original sample ordering"===l)t=(0,Re.sortBy)(this.props.explanations,(function(e){return e.origInd})),(0,Re.each)(t,(function(e,t){return e.xmap=t}));else if("sample order by key"===l)t=this.props.explanations,s?(0,Re.each)(t,(function(e,t){return e.xmap=n.parseTime(n.props.ordering_keys[t])})):(0,Re.each)(t,(function(e,t){return e.xmap=n.props.ordering_keys[t]})),t=(0,Re.sortBy)(t,(function(e){return e.xmap}));else{var c=(0,Re.findKey)(this.props.featureNames,(function(e){return e===l}));(0,Re.each)(this.props.explanations,(function(e,t){return e.xmap=e.features[c].value}));var f=(0,Re.sortBy)(this.props.explanations,(function(e){return e.xmap})),p=(0,Re.map)(f,(function(e){return e.xmap}));if("string"==typeof p[0])return void alert("Ordering by category names is not yet supported.");var d,h,v=(0,Re.min)(p),g=((0,Re.max)(p)-v)/100;t=[];for(var y=0;y<f.length;++y){var m=f[y];if(d&&!h&&m.xmap-d.xmap<=g||h&&m.xmap-h.xmap<=g){h||((h=(0,Re.cloneDeep)(d)).count=1);var b,_=Ga(this.usedFeatures);try{for(_.s();!(b=_.n()).done;){var w=b.value;h.features[w].effect+=m.features[w].effect,h.features[w].value+=m.features[w].value}}catch(e){_.e(e)}finally{_.f()}h.count+=1}else if(d)if(h){var x,k=Ga(this.usedFeatures);try{for(k.s();!(x=k.n()).done;){var S=x.value;h.features[S].effect/=h.count,h.features[S].value/=h.count}}catch(e){k.e(e)}finally{k.f()}t.push(h),h=void 0}else t.push(d);d=m}d.xmap-t[t.length-1].xmap>g&&t.push(d)}this.currUsedFeatures=this.usedFeatures,this.currPosOrderedFeatures=this.posOrderedFeatures,this.currNegOrderedFeatures=this.negOrderedFeatures;var E=this.ylabel.node().value;if("model output value"!==E){var C=t;t=(0,Re.cloneDeep)(t);for(var T=(0,Re.findKey)(this.props.featureNames,(function(e){return e===E})),M=0;M<t.length;++M){var N=t[M].features[T];t[M].features={},t[M].features[T]=N,C[M].remapped_version=t[M]}this.currUsedFeatures=[T],this.currPosOrderedFeatures=[T],this.currNegOrderedFeatures=[T]}this.currExplanations=t,"identity"===this.props.link?this.invLinkFunction=function(e){return n.props.baseValue+e}:"logit"===this.props.link?this.invLinkFunction=function(e){return 1/(1+Math.exp(-(n.props.baseValue+e)))}:console.log("ERROR: Unrecognized link function: ",this.props.link),this.predValues=(0,Re.map)(t,(function(e){return(0,Re.sum)((0,Re.map)(e.features,(function(e){return e.effect})))}));var P=this.wrapper.node().offsetWidth;if(0==P)return setTimeout((function(){return n.draw(t)}),500);this.svg.style("height",this.height+"px"),this.svg.style("width",P+"px");var z=(0,Re.map)(t,(function(e){return e.xmap}));this.xscale.domain([(0,Re.min)(z),(0,Re.max)(z)]).range([this.leftOffset,P]).clamp(!0),this.xaxisElement.attr("transform","translate(0,"+this.topOffset+")").call(this.xaxis);for(var L=0;L<this.currExplanations.length;++L)this.currExplanations[L].xmapScaled=this.xscale(this.currExplanations[L].xmap);for(var O=t.length,A=0,F=0;F<O;++F){var D=t[F].features,R=(0,Re.sum)((0,Re.map)((0,Re.filter)(D,(function(e){return e.effect>0})),(function(e){return e.effect})))||0,j=(0,Re.sum)((0,Re.map)((0,Re.filter)(D,(function(e){return e.effect<0})),(function(e){return-e.effect})))||0;A=Math.max(A,2.2*Math.max(R,j))}this.yscale.domain([-A/2,A/2]).range([this.height-10,this.topOffset]),this.yaxisElement.attr("transform","translate("+this.leftOffset+",0)").call(this.yaxis);for(var U=0;U<O;++U){var I,$=t[U].features,B=-((0,Re.sum)((0,Re.map)((0,Re.filter)($,(function(e){return e.effect<0})),(function(e){return-e.effect})))||0),W=void 0,V=Ga(this.currPosOrderedFeatures);try{for(V.s();!(I=V.n()).done;)$[W=I.value].posyTop=this.yscale(B),$[W].effect>0&&(B+=$[W].effect),$[W].posyBottom=this.yscale(B),$[W].ind=W}catch(e){V.e(e)}finally{V.f()}var H,q=B,Q=Ga(this.currNegOrderedFeatures);try{for(Q.s();!(H=Q.n()).done;)$[W=H.value].negyTop=this.yscale(B),$[W].effect<0&&(B-=$[W].effect),$[W].negyBottom=this.yscale(B)}catch(e){Q.e(e)}finally{Q.f()}t[U].joinPoint=q,t[U].joinPointy=this.yscale(q)}var Y=En().x((function(e){return e[0]})).y((function(e){return e[1]})),G=this.mainGroup.selectAll(".force-bar-array-area-pos").data(this.currUsedFeatures);G.enter().append("path").attr("class","force-bar-array-area-pos").merge(G).attr("d",(function(e){var n=(0,Re.map)((0,Re.range)(O),(function(n){return[t[n].xmapScaled,t[n].features[e].posyTop]})),r=(0,Re.map)((0,Re.rangeRight)(O),(function(n){return[t[n].xmapScaled,t[n].features[e].posyBottom]}));return Y(n.concat(r))})).attr("fill",this.colors[0]),G.exit().remove();var K=this.mainGroup.selectAll(".force-bar-array-area-neg").data(this.currUsedFeatures);K.enter().append("path").attr("class","force-bar-array-area-neg").merge(K).attr("d",(function(e){var n=(0,Re.map)((0,Re.range)(O),(function(n){return[t[n].xmapScaled,t[n].features[e].negyTop]})),r=(0,Re.map)((0,Re.rangeRight)(O),(function(n){return[t[n].xmapScaled,t[n].features[e].negyBottom]}));return Y(n.concat(r))})).attr("fill",this.colors[1]),K.exit().remove();var Z=this.mainGroup.selectAll(".force-bar-array-divider-pos").data(this.currUsedFeatures);Z.enter().append("path").attr("class","force-bar-array-divider-pos").merge(Z).attr("d",(function(e){var n=(0,Re.map)((0,Re.range)(O),(function(n){return[t[n].xmapScaled,t[n].features[e].posyBottom]}));return Y(n)})).attr("fill","none").attr("stroke-width",1).attr("stroke",(function(){return n.colors[0].brighter(1.2)})),Z.exit().remove();var X=this.mainGroup.selectAll(".force-bar-array-divider-neg").data(this.currUsedFeatures);X.enter().append("path").attr("class","force-bar-array-divider-neg").merge(X).attr("d",(function(e){var n=(0,Re.map)((0,Re.range)(O),(function(n){return[t[n].xmapScaled,t[n].features[e].negyTop]}));return Y(n)})).attr("fill","none").attr("stroke-width",1).attr("stroke",(function(){return n.colors[1].brighter(1.5)})),X.exit().remove();for(var J=function(e,t,n,r,a){var i,o,u,l;"pos"===a?(i=e[n].features[t].posyBottom,o=e[n].features[t].posyTop):(i=e[n].features[t].negyBottom,o=e[n].features[t].negyTop);for(var s=n+1;s<=r;++s)"pos"===a?(u=e[s].features[t].posyBottom,l=e[s].features[t].posyTop):(u=e[s].features[t].negyBottom,l=e[s].features[t].negyTop),u>i&&(i=u),l<o&&(o=l);return{top:i,bottom:o}},ee=[],te=0,ne=["pos","neg"];te<ne.length;te++){var re,ae=ne[te],ie=Ga(this.currUsedFeatures);try{for(ie.s();!(re=ie.n()).done;)for(var oe=re.value,ue=0,le=0,se=0,ce={top:0,bottom:0},fe=void 0;le<O-1;){for(;se<100&&le<O-1;)++le,se=t[le].xmapScaled-t[ue].xmapScaled;for(ce=J(t,oe,ue,le,ae);ce.bottom-ce.top<20&&ue<le;)++ue,ce=J(t,oe,ue,le,ae);if(se=t[le].xmapScaled-t[ue].xmapScaled,ce.bottom-ce.top>=20&&se>=100){for(;le<O-1;){if(++le,!((fe=J(t,oe,ue,le,ae)).bottom-fe.top>20)){--le;break}ce=fe}se=t[le].xmapScaled-t[ue].xmapScaled,ee.push([(t[le].xmapScaled+t[ue].xmapScaled)/2,(ce.top+ce.bottom)/2,this.props.featureNames[oe]]);var pe=t[le].xmapScaled;for(ue=le;pe+100>t[ue].xmapScaled&&ue<O-1;)++ue;le=ue}}}catch(e){ie.e(e)}finally{ie.f()}}var de=this.onTopGroup.selectAll(".force-bar-array-flabels").data(ee);de.enter().append("text").attr("class","force-bar-array-flabels").merge(de).attr("x",(function(e){return e[0]})).attr("y",(function(e){return e[1]+4})).text((function(e){return e[2]})),de.exit().remove()}},{key:"componentWillUnmount",value:function(){window.removeEventListener("resize",this.redraw)}},{key:"render",value:function(){var t=this;return e.createElement("div",{ref:function(e){return t.wrapper=Jt(e)},style:{textAlign:"center"}},e.createElement("style",{dangerouslySetInnerHTML:{__html:"\n          .force-bar-array-wrapper {\n            text-align: center;\n          }\n          .force-bar-array-xaxis path {\n            fill: none;\n            opacity: 0.4;\n          }\n          .force-bar-array-xaxis .domain {\n            opacity: 0;\n          }\n          .force-bar-array-xaxis paths {\n            display: none;\n          }\n          .force-bar-array-yaxis path {\n            fill: none;\n            opacity: 0.4;\n          }\n          .force-bar-array-yaxis paths {\n            display: none;\n          }\n          .tick line {\n            stroke: #000;\n            stroke-width: 1px;\n            opacity: 0.4;\n          }\n          .tick text {\n            fill: #000;\n            opacity: 0.5;\n            font-size: 12px;\n            padding: 0px;\n          }\n          .force-bar-array-flabels {\n            font-size: 12px;\n            fill: #fff;\n            text-anchor: middle;\n          }\n          .additive-force-array-xlabel {\n            background: none;\n            border: 1px solid #ccc;\n            opacity: 0.5;\n            margin-bottom: 0px;\n            font-size: 12px;\n            font-family: arial;\n            margin-left: 80px;\n            max-width: 300px;\n          }\n          .additive-force-array-xlabel:focus {\n            outline: none;\n          }\n          .additive-force-array-ylabel {\n            position: relative;\n            top: 0px;\n            left: 0px;\n            transform: rotate(-90deg);\n            background: none;\n            border: 1px solid #ccc;\n            opacity: 0.5;\n            margin-bottom: 0px;\n            font-size: 12px;\n            font-family: arial;\n            max-width: 150px;\n          }\n          .additive-force-array-ylabel:focus {\n            outline: none;\n          }\n          .additive-force-array-hoverLine {\n            stroke-width: 1px;\n            stroke: #fff;\n            opacity: 1;\n          }"}}),e.createElement("select",{className:"additive-force-array-xlabel"}),e.createElement("div",{style:{height:"0px",textAlign:"left"}},e.createElement("select",{className:"additive-force-array-ylabel"})),e.createElement("svg",{ref:function(e){return t.svg=Jt(e)},style:{userSelect:"none",display:"block",fontFamily:"arial",sansSerif:!0}}))}}])&&Xa(n.prototype,r),Object.defineProperty(n,"prototype",{writable:!1}),u}(e.Component);ni.defaultProps={plot_cmap:"RdBu",ordering_keys:null,ordering_keys_time_format:null};const ri=ni;window.SHAP={SimpleListVisualizer:He,AdditiveForceVisualizer:Ln,AdditiveForceArrayVisualizer:ri,React:e,ReactDom:t}})()})();
diff --git a/lib/shap/plots/resources/logoSmallGray.png b/lib/shap/plots/resources/logoSmallGray.png
new file mode 100644
index 0000000000000000000000000000000000000000..1c48e9c660af81e83d3e5d373f838c768b6ca6c4
Binary files /dev/null and b/lib/shap/plots/resources/logoSmallGray.png differ
diff --git a/lib/shap/utils/__init__.py b/lib/shap/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c5d2bcb8696a7760ec5458177937921c19c7cff4
--- /dev/null
+++ b/lib/shap/utils/__init__.py
@@ -0,0 +1,46 @@
+from ._clustering import (
+    delta_minimization_order,
+    hclust,
+    hclust_ordering,
+    partition_tree,
+    partition_tree_shuffle,
+)
+from ._general import (
+    OpChain,
+    approximate_interactions,
+    assert_import,
+    convert_name,
+    format_value,
+    ordinal_str,
+    potential_interactions,
+    record_import_error,
+    safe_isinstance,
+    sample,
+    shapley_coefficients,
+    suppress_stderr,
+)
+from ._masked_model import MaskedModel, make_masks
+from ._show_progress import show_progress
+
+__all__ = [
+    "delta_minimization_order",
+    "hclust",
+    "hclust_ordering",
+    "partition_tree",
+    "partition_tree_shuffle",
+    "OpChain",
+    "approximate_interactions",
+    "assert_import",
+    "convert_name",
+    "format_value",
+    "ordinal_str",
+    "potential_interactions",
+    "record_import_error",
+    "safe_isinstance",
+    "sample",
+    "shapley_coefficients",
+    "suppress_stderr",
+    "MaskedModel",
+    "make_masks",
+    "show_progress",
+]
diff --git a/lib/shap/utils/_clustering.py b/lib/shap/utils/_clustering.py
new file mode 100644
index 0000000000000000000000000000000000000000..7478bfc3dffe2f6c3b4945929791df1623e72765
--- /dev/null
+++ b/lib/shap/utils/_clustering.py
@@ -0,0 +1,190 @@
+import warnings
+
+import numpy as np
+import pandas as pd
+import scipy.cluster
+import scipy.spatial
+import sklearn
+from numba import njit
+
+from ._show_progress import show_progress
+
+
+def partition_tree(X, metric="correlation"):
+    X_full_rank = X + np.random.randn(*X.shape) * 1e-8
+    D = scipy.spatial.distance.pdist(X_full_rank.fillna(X_full_rank.mean()).T, metric=metric)
+    return scipy.cluster.hierarchy.complete(D)
+
+
+def partition_tree_shuffle(indexes, index_mask, partition_tree):
+    """ Randomly shuffle the indexes in a way that is consistent with the given partition tree.
+
+    Parameters
+    ----------
+    indexes: np.array
+        The output location of the indexes we want shuffled. Note that len(indexes) should equal index_mask.sum().
+
+    index_mask: np.array
+        A bool mask of which indexes we want to include in the shuffled list.
+
+    partition_tree: np.array
+        The partition tree we should follow.
+    """
+    M = len(index_mask)
+    #switch = np.random.randn(M) < 0
+    _pt_shuffle_rec(partition_tree.shape[0]-1, indexes, index_mask, partition_tree, M, 0)
+@njit
+def _pt_shuffle_rec(i, indexes, index_mask, partition_tree, M, pos):
+    if i < 0:
+        # see if we should include this index in the ordering
+        if index_mask[i + M]:
+            indexes[pos] = i + M
+            return pos + 1
+        else:
+            return pos
+    left = int(partition_tree[i,0] - M)
+    right = int(partition_tree[i,1] - M)
+    if np.random.randn() < 0:
+        pos = _pt_shuffle_rec(left, indexes, index_mask, partition_tree, M, pos)
+        pos = _pt_shuffle_rec(right, indexes, index_mask, partition_tree, M, pos)
+    else:
+        pos = _pt_shuffle_rec(right, indexes, index_mask, partition_tree, M, pos)
+        pos = _pt_shuffle_rec(left, indexes, index_mask, partition_tree, M, pos)
+    return pos
+
+@njit
+def delta_minimization_order(all_masks, max_swap_size=100, num_passes=2):
+    order = np.arange(len(all_masks))
+    for _ in range(num_passes):
+        for length in list(range(2, max_swap_size)):
+            for i in range(1, len(order)-length):
+                if _reverse_window_score_gain(all_masks, order, i, length) > 0:
+                    _reverse_window(order, i, length)
+    return order
+@njit
+def _reverse_window(order, start, length):
+    for i in range(length // 2):
+        tmp = order[start + i]
+        order[start + i] = order[start + length - i - 1]
+        order[start + length - i - 1] = tmp
+@njit
+def _reverse_window_score_gain(masks, order, start, length):
+    forward_score = _mask_delta_score(masks[order[start - 1]], masks[order[start]]) + \
+                    _mask_delta_score(masks[order[start + length-1]], masks[order[start + length]])
+    reverse_score = _mask_delta_score(masks[order[start - 1]], masks[order[start + length-1]]) + \
+                    _mask_delta_score(masks[order[start]], masks[order[start + length]])
+
+    return forward_score - reverse_score
+@njit
+def _mask_delta_score(m1, m2):
+    return (m1 ^ m2).sum()
+
+
+def hclust_ordering(X, metric="sqeuclidean", anchor_first=False):
+    """ A leaf ordering is under-defined, this picks the ordering that keeps nearby samples similar.
+    """
+
+    # compute a hierarchical clustering and return the optimal leaf ordering
+    D = scipy.spatial.distance.pdist(X, metric)
+    cluster_matrix = scipy.cluster.hierarchy.complete(D)
+    return scipy.cluster.hierarchy.leaves_list(scipy.cluster.hierarchy.optimal_leaf_ordering(cluster_matrix, D))
+
+def xgboost_distances_r2(X, y, learning_rate=0.6, early_stopping_rounds=2, subsample=1, max_estimators=10000, random_state=0):
+    """ Compute reducancy distances scaled from 0-1 among all the feature in X relative to the label y.
+
+    Distances are measured by training univariate XGBoost models of y for all the features, and then
+    predicting the output of these models using univariate XGBoost models of other features. If one
+    feature can effectively predict the output of another feature's univariate XGBoost model of y,
+    then the second feature is redundant with the first with respect to y. A distance of 1 corresponds
+    to no redundancy while a distance of 0 corresponds to perfect redundancy (measured using the
+    proportion of variance explained). Note these distances are not symmetric.
+    """
+
+    import xgboost
+
+    # pick our train/text split
+    X_train,X_test,y_train,y_test = sklearn.model_selection.train_test_split(X, y, random_state=random_state)
+
+    # fit an XGBoost model on each of the features
+    test_preds = []
+    train_preds = []
+    for i in range(X.shape[1]):
+        model = xgboost.XGBRegressor(subsample=subsample, n_estimators=max_estimators, learning_rate=learning_rate, max_depth=1, early_stopping_rounds=early_stopping_rounds)
+        model.fit(X_train[:,i:i+1], y_train, eval_set=[(X_test[:,i:i+1], y_test)], verbose=False)
+        train_preds.append(model.predict(X_train[:,i:i+1]))
+        test_preds.append(model.predict(X_test[:,i:i+1]))
+    train_preds = np.vstack(train_preds).T
+    test_preds = np.vstack(test_preds).T
+
+    # fit XGBoost models to predict the outputs of other XGBoost models to see how redundant features are
+    dist = np.zeros((X.shape[1], X.shape[1]))
+    for i in show_progress(range(X.shape[1]), total=X.shape[1]):
+        for j in range(X.shape[1]):
+            if i == j:
+                dist[i,j] = 0
+                continue
+
+            # skip features that have not variance in their predictions (likely because the feature is a constant)
+            preds_var = np.var(test_preds[:,i])
+            if preds_var < 1e-4:
+                warnings.warn(f"No/low signal found from feature {i} (this is typically caused by constant or near-constant features)! Cluster distances can't be computed for it (so setting all distances to 1).")
+                r2 = 0
+
+            # fit the model
+            else:
+                model = xgboost.XGBRegressor(subsample=subsample, n_estimators=max_estimators, learning_rate=learning_rate, max_depth=1, early_stopping_rounds=early_stopping_rounds)
+                model.fit(X_train[:,j:j+1], train_preds[:,i], eval_set=[(X_test[:,j:j+1], test_preds[:,i])], verbose=False)
+                r2 = max(0, 1 - np.mean((test_preds[:,i] - model.predict(X_test[:,j:j+1]))**2) / preds_var)
+            dist[i,j] = 1 - r2
+
+    return dist
+
+def hclust(X, y=None, linkage="single", metric="auto", random_state=0):
+    if isinstance(X, pd.DataFrame):
+        X = X.values
+
+    if metric == "auto":
+        if y is not None:
+            metric = "xgboost_distances_r2"
+
+    # build the distance matrix
+    if metric == "xgboost_distances_r2":
+        dist_full = xgboost_distances_r2(X, y, random_state=random_state)
+
+        # build a condensed upper triangular version by taking the max distance from either direction
+        dist = []
+        for i in range(dist_full.shape[0]):
+            for j in range(i+1, dist_full.shape[1]):
+                if i != j:
+                    if linkage == "single":
+                        dist.append(min(dist_full[i,j], dist_full[j,i]))
+                    elif linkage == "complete":
+                        dist.append(max(dist_full[i,j], dist_full[j,i]))
+                    elif linkage == "average":
+                        dist.append((dist_full[i,j] + dist_full[j,i]) / 2)
+                    else:
+                        raise Exception("Unsupported linkage type!")
+        dist = np.array(dist)
+
+    else:
+        if y is not None:
+            warnings.warn("Ignoring the y argument passed to shap.utils.hclust since the given clustering metric is not based on label fitting!")
+        if isinstance(X, pd.DataFrame):
+            bg_no_nan = X.values.copy()
+        else:
+            bg_no_nan = X.copy()
+        for i in range(bg_no_nan.shape[1]):
+            np.nan_to_num(bg_no_nan[:,i], nan=np.nanmean(bg_no_nan[:,i]), copy=False)
+        dist = scipy.spatial.distance.pdist(bg_no_nan.T + np.random.randn(*bg_no_nan.T.shape)*1e-8, metric=metric)
+    # else:
+    #     raise Exception("Unknown metric: " + str(metric))
+
+    # build linkage
+    if linkage == "single":
+        return scipy.cluster.hierarchy.single(dist)
+    elif linkage == "complete":
+        return scipy.cluster.hierarchy.complete(dist)
+    elif linkage == "average":
+        return scipy.cluster.hierarchy.average(dist)
+    else:
+        raise Exception("Unknown linkage: " + str(linkage))
diff --git a/lib/shap/utils/_exceptions.py b/lib/shap/utils/_exceptions.py
new file mode 100644
index 0000000000000000000000000000000000000000..0e8b3d0e41ab2c8b823afb82d053332bdbc31d72
--- /dev/null
+++ b/lib/shap/utils/_exceptions.py
@@ -0,0 +1,35 @@
+class DimensionError(Exception):
+    """
+    Used for instances where dimensions are either
+    not supported or cause errors.
+    """
+
+    pass
+
+class InvalidAction(Exception):
+    pass
+
+
+class ConvergenceError(Exception):
+    pass
+
+class InvalidMaskerError(ValueError):
+    pass
+
+class ExplainerError(Exception):
+    """
+    Generic errors related to Explainers
+    """
+    pass
+
+class InvalidAlgorithmError(ValueError):
+    pass
+
+class InvalidFeaturePerturbationError(ValueError):
+    pass
+
+class InvalidModelError(ValueError):
+    pass
+
+class InvalidClusteringError(ValueError):
+    pass
diff --git a/lib/shap/utils/_general.py b/lib/shap/utils/_general.py
new file mode 100644
index 0000000000000000000000000000000000000000..3de0eaf982a0bcc4eb019a8f8113045b4b16ebac
--- /dev/null
+++ b/lib/shap/utils/_general.py
@@ -0,0 +1,337 @@
+import copy
+import os
+import re
+import sys
+from contextlib import contextmanager
+
+import numpy as np
+import pandas as pd
+import scipy.special
+import sklearn
+
+import_errors = {}
+
+def assert_import(package_name):
+    global import_errors
+    if package_name in import_errors:
+        msg,e = import_errors[package_name]
+        print(msg)
+        raise e
+
+def record_import_error(package_name, msg, e):
+    global import_errors
+    import_errors[package_name] = (msg, e)
+
+
+def shapley_coefficients(n):
+    out = np.zeros(n)
+    for i in range(n):
+        out[i] = 1 / (n * scipy.special.comb(n-1,i))
+    return out
+
+
+def convert_name(ind, shap_values, input_names):
+    if isinstance(ind, str):
+        nzinds = np.where(np.array(input_names) == ind)[0]
+        if len(nzinds) == 0:
+            # we allow rank based indexing using the format "rank(int)"
+            if ind.startswith("rank("):
+                return np.argsort(-np.abs(shap_values).mean(0))[int(ind[5:-1])]
+
+            # we allow the sum of all the SHAP values to be specified with "sum()"
+            # assuming here that the calling method can deal with this case
+            elif ind == "sum()":
+                return "sum()"
+            else:
+                raise ValueError("Could not find feature named: " + ind)
+        else:
+            return nzinds[0]
+    else:
+        return ind
+
+def potential_interactions(shap_values_column, shap_values_matrix):
+    """ Order other features by how much interaction they seem to have with the feature at the given index.
+
+    This just bins the SHAP values for a feature along that feature's value. For true Shapley interaction
+    index values for SHAP see the interaction_contribs option implemented in XGBoost.
+    """
+
+    # ignore inds that are identical to the column
+    ignore_inds = np.where((shap_values_matrix.values.T - shap_values_column.values).T.std(0) < 1e-8)
+
+    X = shap_values_matrix.data
+
+    if X.shape[0] > 10000:
+        a = np.arange(X.shape[0])
+        np.random.shuffle(a)
+        inds = a[:10000]
+    else:
+        inds = np.arange(X.shape[0])
+
+    x = shap_values_column.data[inds]
+    srt = np.argsort(x)
+    shap_ref = shap_values_column.values[inds]
+    shap_ref = shap_ref[srt]
+    inc = max(min(int(len(x) / 10.0), 50), 1)
+    interactions = []
+    for i in range(X.shape[1]):
+        encoded_val_other = encode_array_if_needed(X[inds, i][srt], dtype=float)
+
+        val_other = encoded_val_other
+        v = 0.0
+        if not (i in ignore_inds or np.sum(np.abs(val_other)) < 1e-8):
+            for j in range(0, len(x), inc):
+                if np.std(val_other[j:j + inc]) > 0 and np.std(shap_ref[j:j + inc]) > 0:
+                    v += abs(np.corrcoef(shap_ref[j:j + inc], val_other[j:j + inc])[0, 1])
+        val_v = v
+
+        val_other = np.isnan(encoded_val_other)
+        v = 0.0
+        if not (i in ignore_inds or np.sum(np.abs(val_other)) < 1e-8):
+            for j in range(0, len(x), inc):
+                if np.std(val_other[j:j + inc]) > 0 and np.std(shap_ref[j:j + inc]) > 0:
+                    v += abs(np.corrcoef(shap_ref[j:j + inc], val_other[j:j + inc])[0, 1])
+        nan_v = v
+
+        interactions.append(max(val_v, nan_v))
+
+    return np.argsort(-np.abs(interactions))
+
+
+def approximate_interactions(index, shap_values, X, feature_names=None):
+    """ Order other features by how much interaction they seem to have with the feature at the given index.
+
+    This just bins the SHAP values for a feature along that feature's value. For true Shapley interaction
+    index values for SHAP see the interaction_contribs option implemented in XGBoost.
+    """
+
+    # convert from DataFrames if we got any
+    if isinstance(X, pd.DataFrame):
+        if feature_names is None:
+            feature_names = X.columns
+        X = X.values
+
+    index = convert_name(index, shap_values, feature_names)
+
+    if X.shape[0] > 10000:
+        a = np.arange(X.shape[0])
+        np.random.shuffle(a)
+        inds = a[:10000]
+    else:
+        inds = np.arange(X.shape[0])
+
+    x = X[inds, index]
+    srt = np.argsort(x)
+    shap_ref = shap_values[inds, index]
+    shap_ref = shap_ref[srt]
+    inc = max(min(int(len(x) / 10.0), 50), 1)
+    interactions = []
+    for i in range(X.shape[1]):
+        encoded_val_other = encode_array_if_needed(X[inds, i][srt], dtype=float)
+
+        val_other = encoded_val_other
+        v = 0.0
+        if not (i == index or np.sum(np.abs(val_other)) < 1e-8):
+            for j in range(0, len(x), inc):
+                if np.std(val_other[j:j + inc]) > 0 and np.std(shap_ref[j:j + inc]) > 0:
+                    v += abs(np.corrcoef(shap_ref[j:j + inc], val_other[j:j + inc])[0, 1])
+        val_v = v
+
+        val_other = np.isnan(encoded_val_other)
+        v = 0.0
+        if not (i == index or np.sum(np.abs(val_other)) < 1e-8):
+            for j in range(0, len(x), inc):
+                if np.std(val_other[j:j + inc]) > 0 and np.std(shap_ref[j:j + inc]) > 0:
+                    v += abs(np.corrcoef(shap_ref[j:j + inc], val_other[j:j + inc])[0, 1])
+        nan_v = v
+
+        interactions.append(max(val_v, nan_v))
+
+    return np.argsort(-np.abs(interactions))
+
+def encode_array_if_needed(arr, dtype=np.float64):
+    try:
+        return arr.astype(dtype)
+    except ValueError:
+        unique_values = np.unique(arr)
+        encoding_dict = {string: index for index, string in enumerate(unique_values)}
+        encoded_array = np.array([encoding_dict[string] for string in arr], dtype=dtype)
+        return encoded_array
+
+
+def sample(X, nsamples=100, random_state=0):
+    """Performs sampling without replacement of the input data ``X``.
+
+    This is a simple wrapper over scikit-learn's ``shuffle`` function.
+    It is used mainly to downsample ``X`` for use as a background
+    dataset in SHAP :class:`.Explainer` and its subclasses.
+
+    .. versionchanged :: 0.42
+        The behaviour of ``sample`` was changed from sampling *with* replacement to sampling
+        *without* replacement.
+        Note that reproducibility might be broken when using this function pre- and post-0.42,
+        even with the specification of ``random_state``.
+
+    Parameters
+    ----------
+    X : array-like
+        Data to sample from. Input data can be arrays, lists, dataframes
+        or scipy sparse matrices with a consistent first dimension.
+
+    nsamples : int
+        Number of samples to generate from ``X``.
+
+    random_state :
+        Determines random number generation for shuffling the data. Use this to
+        ensure reproducibility across multiple function calls.
+    """
+    if hasattr(X, "shape"):
+        over_count = nsamples >= X.shape[0]
+    else:
+        over_count = nsamples >= len(X)
+
+    if over_count:
+        return X
+    return sklearn.utils.shuffle(X, n_samples=nsamples, random_state=random_state)
+
+
+def safe_isinstance(obj, class_path_str):
+    """
+    Acts as a safe version of isinstance without having to explicitly
+    import packages which may not exist in the users environment.
+
+    Checks if obj is an instance of type specified by class_path_str.
+
+    Parameters
+    ----------
+    obj: Any
+        Some object you want to test against
+    class_path_str: str or list
+        A string or list of strings specifying full class paths
+        Example: `sklearn.ensemble.RandomForestRegressor`
+
+    Returns
+    --------
+    bool: True if isinstance is true and the package exists, False otherwise
+    """
+    if isinstance(class_path_str, str):
+        class_path_strs = [class_path_str]
+    elif isinstance(class_path_str, list) or isinstance(class_path_str, tuple):
+        class_path_strs = class_path_str
+    else:
+        class_path_strs = ['']
+
+    # try each module path in order
+    for class_path_str in class_path_strs:
+        if "." not in class_path_str:
+            raise ValueError("class_path_str must be a string or list of strings specifying a full \
+                module path to a class. Eg, 'sklearn.ensemble.RandomForestRegressor'")
+
+        # Splits on last occurrence of "."
+        module_name, class_name = class_path_str.rsplit(".", 1)
+
+        # here we don't check further if the model is not imported, since we shouldn't have
+        # an object of that types passed to us if the model the type is from has never been
+        # imported. (and we don't want to import lots of new modules for no reason)
+        if module_name not in sys.modules:
+            continue
+
+        module = sys.modules[module_name]
+
+        #Get class
+        _class = getattr(module, class_name, None)
+
+        if _class is None:
+            continue
+
+        if isinstance(obj, _class):
+            return True
+
+    return False
+
+
+def format_value(s, format_str):
+    """ Strips trailing zeros and uses a unicode minus sign.
+    """
+
+    if not issubclass(type(s), str):
+        s = format_str % s
+    s = re.sub(r'\.?0+$', '', s)
+    if s[0] == "-":
+        s = "\u2212" + s[1:]
+    return s
+
+# From: https://groups.google.com/forum/m/#!topic/openrefine/G7_PSdUeno0
+def ordinal_str(n):
+    """ Converts a number to and ordinal string.
+    """
+    return str(n) + {1: 'st', 2: 'nd', 3: 'rd'}.get(4 if 10 <= n % 100 < 20 else n % 10, "th")
+
+class OpChain:
+    """ A way to represent a set of dot chained operations on an object without actually running them.
+    """
+
+    def __init__(self, root_name=""):
+        self._ops = []
+        self._root_name = root_name
+
+    def apply(self, obj):
+        """ Applies all our ops to the given object.
+        """
+        for o in self._ops:
+            op,args,kwargs = o
+            if args is not None:
+                obj = getattr(obj, op)(*args, **kwargs)
+            else:
+                obj = getattr(obj, op)
+        return obj
+
+    def __call__(self, *args, **kwargs):
+        """ Update the args for the previous operation.
+        """
+        new_self = OpChain(self._root_name)
+        new_self._ops = copy.copy(self._ops)
+        new_self._ops[-1][1] = args
+        new_self._ops[-1][2] = kwargs
+        return new_self
+
+    def __getitem__(self, item):
+        new_self = OpChain(self._root_name)
+        new_self._ops = copy.copy(self._ops)
+        new_self._ops.append(["__getitem__", [item], {}])
+        return new_self
+
+    def __getattr__(self, name):
+        # Don't chain special attributes
+        if name.startswith("__") and name.endswith("__"):
+            return None
+        new_self = OpChain(self._root_name)
+        new_self._ops = copy.copy(self._ops)
+        new_self._ops.append([name, None, None])
+        return new_self
+
+    def __repr__(self):
+        out = self._root_name
+        for o in self._ops:
+            op,args,kwargs = o
+            out += "."
+            out += op
+            if (args is not None and len(args) > 0) or (kwargs is not None and len(kwargs) > 0):
+                out += "("
+                if args is not None and len(args) > 0:
+                    out += ", ".join([str(v) for v in args])
+                if kwargs is not None and len(kwargs) > 0:
+                    out += ", " + ", ".join([str(k)+"="+str(kwargs[k]) for k in kwargs.keys()])
+                out += ")"
+        return out
+
+# https://thesmithfam.org/blog/2012/10/25/temporarily-suppress-console-output-in-python/
+@contextmanager
+def suppress_stderr():
+    with open(os.devnull, "w") as devnull:
+        old_stderr = sys.stderr
+        sys.stderr = devnull
+        try:
+            yield
+        finally:
+            sys.stderr = old_stderr
diff --git a/lib/shap/utils/_keras.py b/lib/shap/utils/_keras.py
new file mode 100644
index 0000000000000000000000000000000000000000..ab705c549cf8e34ad8ee98572781a060e423ec97
--- /dev/null
+++ b/lib/shap/utils/_keras.py
@@ -0,0 +1,65 @@
+""" This file contains various utility functions that are useful but not core to SHAP.
+"""
+
+
+def clone_keras_layers(model, start_layer, stop_layer):
+        """ Clones the keras layers between the start and stop layer as a new model.
+        """
+
+        import tensorflow as tf
+
+        if isinstance(start_layer, int):
+            start_layer = model.layers[start_layer]
+        if isinstance(stop_layer, int):
+            stop_layer = model.layers[stop_layer]
+
+        input_shape = start_layer.get_input_shape_at(0) # get the input shape of desired layer
+        layer_input = tf.keras.Input(shape=input_shape[1:]) # a new input tensor to be able to feed the desired layer
+
+        new_layers = {start_layer.input.name: layer_input}
+        layers_to_process = list(model.layers)
+        last_len = 0
+        dup_try = 0
+        while len(layers_to_process) > 0:
+            layer = layers_to_process.pop(0)
+            if len(layers_to_process) == last_len:
+                dup_try += 1
+            else:
+                dup_try = 0
+            last_len = len(layers_to_process)
+            if dup_try > len(layers_to_process):
+                raise Exception("Failed to find a complete graph starting at the given layer!")
+            try:
+                if isinstance(layer.input, list):
+                    layer_inputs = [new_layers[v.name] for v in layer.input]
+                else:
+                    layer_inputs = new_layers[layer.input.name]
+            except KeyError:
+                # we don't have all the inputs ready for us read so put us back on the list
+                # behind the next one in line
+                layers_to_process.append(layer)
+                continue
+            if layer.output.name not in new_layers:
+                new_layers[layer.output.name] = layer(layer_inputs)
+            if layer.output.name == stop_layer.output.name:
+                break
+        return tf.keras.Model(layer_input, new_layers[stop_layer.output.name])
+
+def split_keras_model(model, layer):
+    """ Splits the keras model around layer into two models.
+
+    This is done such that model2(model1(X)) = model(X)
+    and mode11(X) == layer(X)
+    """
+
+    if isinstance(layer, str):
+        layer = model.get_layer(layer)
+    elif isinstance(layer, int):
+        layer = model.layers[layer]
+
+    prev_layer = model.get_layer(layer.get_input_at(0).name.split("/")[0])
+
+    model1 = clone_keras_layers(model, model.layers[1], prev_layer)
+    model2 = clone_keras_layers(model, layer, model.layers[-1])
+
+    return model1, model2
diff --git a/lib/shap/utils/_legacy.py b/lib/shap/utils/_legacy.py
new file mode 100644
index 0000000000000000000000000000000000000000..bc4364913df931baa85cd241f34864282c45cf94
--- /dev/null
+++ b/lib/shap/utils/_legacy.py
@@ -0,0 +1,270 @@
+import copy
+
+import numpy as np
+import pandas as pd
+import scipy.sparse
+from sklearn.cluster import KMeans
+from sklearn.impute import SimpleImputer
+
+
+def kmeans(X, k, round_values=True):
+    """ Summarize a dataset with k mean samples weighted by the number of data points they
+    each represent.
+
+    Parameters
+    ----------
+    X : numpy.array or pandas.DataFrame or any scipy.sparse matrix
+        Matrix of data samples to summarize (# samples x # features)
+
+    k : int
+        Number of means to use for approximation.
+
+    round_values : bool
+        For all i, round the ith dimension of each mean sample to match the nearest value
+        from X[:,i]. This ensures discrete features always get a valid value.
+
+    Returns
+    -------
+    DenseData object.
+    """
+
+    group_names = [str(i) for i in range(X.shape[1])]
+    if isinstance(X, pd.DataFrame):
+        group_names = X.columns
+        X = X.values
+
+    # in case there are any missing values in data impute them
+    imp = SimpleImputer(missing_values=np.nan, strategy='mean')
+    X = imp.fit_transform(X)
+
+    # Specify `n_init` for consistent behaviour between sklearn versions
+    kmeans = KMeans(n_clusters=k, random_state=0, n_init=10).fit(X)
+
+    if round_values:
+        for i in range(k):
+            for j in range(X.shape[1]):
+                xj = X[:,j].toarray().flatten() if scipy.sparse.issparse(X) else X[:, j] # sparse support courtesy of @PrimozGodec
+                ind = np.argmin(np.abs(xj - kmeans.cluster_centers_[i,j]))
+                kmeans.cluster_centers_[i,j] = X[ind,j]
+    return DenseData(kmeans.cluster_centers_, group_names, None, 1.0*np.bincount(kmeans.labels_))
+
+
+class Instance:
+    def __init__(self, x, group_display_values):
+        self.x = x
+        self.group_display_values = group_display_values
+
+
+def convert_to_instance(val):
+    if isinstance(val, Instance):
+        return val
+    else:
+        return Instance(val, None)
+
+
+class InstanceWithIndex(Instance):
+    def __init__(self, x, column_name, index_value, index_name, group_display_values):
+        Instance.__init__(self, x, group_display_values)
+        self.index_value = index_value
+        self.index_name = index_name
+        self.column_name = column_name
+
+    def convert_to_df(self):
+        index = pd.DataFrame(self.index_value, columns=[self.index_name])
+        data = pd.DataFrame(self.x, columns=self.column_name)
+        df = pd.concat([index, data], axis=1)
+        df = df.set_index(self.index_name)
+        return df
+
+
+def convert_to_instance_with_index(val, column_name, index_value, index_name):
+    return InstanceWithIndex(val, column_name, index_value, index_name, None)
+
+
+def match_instance_to_data(instance, data):
+    if not isinstance(instance, Instance):
+        raise TypeError("instance must be of type Instance!")
+
+    if isinstance(data, DenseData):
+        if instance.group_display_values is None:
+            instance.group_display_values = [instance.x[0, group[0]] if len(group) == 1 else "" for group in data.groups]
+        assert len(instance.group_display_values) == len(data.groups)
+        instance.groups = data.groups
+
+
+class Model:
+    def __init__(self, f, out_names):
+        self.f = f
+        self.out_names = out_names
+
+
+def convert_to_model(val, keep_index=False):
+    """ Convert a model to a Model object.
+
+    Parameters
+    ----------
+    val : function or Model object
+        The model function or a Model object.
+
+    keep_index : bool
+        If True then the index values will be passed to the model function as the first argument.
+        When this is False the feature names will be removed from the model object to avoid unnecessary warnings.
+    """
+    if isinstance(val, Model):
+        out = val
+    else:
+        out = Model(val, None)
+
+    # Fix for the sklearn warning
+    # 'X does not have valid feature names, but <model> was fitted with feature names'
+    if not keep_index: # when using keep index, a dataframe with expected features names is expected to be passed
+        f_self = getattr(out.f, "__self__", None)
+        if f_self and hasattr(f_self, "feature_names_in_"):
+            # Make a copy so that the feature names are not removed from the original model
+            out = copy.deepcopy(out)
+            out.f.__self__.feature_names_in_ = None
+
+    return out
+
+
+def match_model_to_data(model, data):
+    if not isinstance(model, Model):
+        raise TypeError("model must be of type Model!")
+
+    try:
+        if isinstance(data, DenseDataWithIndex):
+            out_val = model.f(data.convert_to_df())
+        else:
+            out_val = model.f(data.data)
+    except Exception:
+        print("Provided model function fails when applied to the provided data set.")
+        raise
+
+    if model.out_names is None:
+        if len(out_val.shape) == 1:
+            model.out_names = ["output value"]
+        else:
+            model.out_names = ["output value "+str(i) for i in range(out_val.shape[0])]
+
+    return out_val
+
+
+class Data:
+    def __init__(self):
+        pass
+
+
+class SparseData(Data):
+    def __init__(self, data, *args):
+        num_samples = data.shape[0]
+        self.weights = np.ones(num_samples)
+        self.weights /= np.sum(self.weights)
+        self.transposed = False
+        self.groups = None
+        self.group_names = None
+        self.groups_size = data.shape[1]
+        self.data = data
+
+
+class DenseData(Data):
+    def __init__(self, data, group_names, *args):
+        self.groups = args[0] if len(args) > 0 and args[0] is not None else [np.array([i]) for i in range(len(group_names))]
+
+        j = sum(len(g) for g in self.groups)
+        num_samples = data.shape[0]
+        t = False
+        if j != data.shape[1]:
+            t = True
+            num_samples = data.shape[1]
+
+        valid = (not t and j == data.shape[1]) or (t and j == data.shape[0])
+        if not valid:
+            raise ValueError("# of names must match data matrix!")
+
+        self.weights = args[1] if len(args) > 1 else np.ones(num_samples)
+        self.weights /= np.sum(self.weights)
+        wl = len(self.weights)
+        valid = (not t and wl == data.shape[0]) or (t and wl == data.shape[1])
+        if not valid:
+            raise ValueError("# of weights must match data matrix!")
+
+        self.transposed = t
+        self.group_names = group_names
+        self.data = data
+        self.groups_size = len(self.groups)
+
+
+class DenseDataWithIndex(DenseData):
+    def __init__(self, data, group_names, index, index_name, *args):
+        DenseData.__init__(self, data, group_names, *args)
+        self.index_value = index
+        self.index_name = index_name
+
+    def convert_to_df(self):
+        data = pd.DataFrame(self.data, columns=self.group_names)
+        index = pd.DataFrame(self.index_value, columns=[self.index_name])
+        df = pd.concat([index, data], axis=1)
+        df = df.set_index(self.index_name)
+        return df
+
+
+def convert_to_data(val, keep_index=False):
+    if isinstance(val, Data):
+        return val
+    if isinstance(val, np.ndarray):
+        return DenseData(val, [str(i) for i in range(val.shape[1])])
+    if isinstance(val, pd.Series):
+        return DenseData(val.values.reshape((1,len(val))), list(val.index))
+    if isinstance(val, pd.DataFrame):
+        if keep_index:
+            return DenseDataWithIndex(val.values, list(val.columns), val.index.values, val.index.name)
+        else:
+            return DenseData(val.values, list(val.columns))
+    if scipy.sparse.issparse(val):
+        if not scipy.sparse.isspmatrix_csr(val):
+            val = val.tocsr()
+        return SparseData(val)
+
+    emsg = f"Unknown type passed as data object: {type(val)}"
+    raise TypeError(emsg)
+
+
+class Link:
+    def __init__(self):
+        pass
+
+
+class IdentityLink(Link):
+    def __str__(self):
+        return "identity"
+
+    @staticmethod
+    def f(x):
+        return x
+
+    @staticmethod
+    def finv(x):
+        return x
+
+
+class LogitLink(Link):
+    def __str__(self):
+        return "logit"
+
+    @staticmethod
+    def f(x):
+        return np.log(x/(1-x))
+
+    @staticmethod
+    def finv(x):
+        return 1/(1+np.exp(-x))
+
+
+def convert_to_link(val):
+    if isinstance(val, Link):
+        return val
+    if val == "identity":
+        return IdentityLink()
+    if val == "logit":
+        return LogitLink()
+    raise TypeError("Passed link object must be a subclass of iml.Link")
diff --git a/lib/shap/utils/_masked_model.py b/lib/shap/utils/_masked_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..b4fd082432edfa00745f69bfe1a5a3e175c702d7
--- /dev/null
+++ b/lib/shap/utils/_masked_model.py
@@ -0,0 +1,480 @@
+import copy
+
+import numpy as np
+import scipy.sparse
+from numba import njit
+
+from .. import links
+
+
+class MaskedModel:
+    """ This is a utility class that combines a model, a masker object, and a current input.
+
+    The combination of a model, a masker object, and a current input produces a binary set
+    function that can be called to mask out any set of inputs. This class attempts to be smart
+    about only evaluating the model for background samples when the inputs changed (note this
+    requires the masker object to have a .invariants method).
+    """
+
+    delta_mask_noop_value = 2147483647 # used to encode a noop for delta masking
+
+    def __init__(self, model, masker, link, linearize_link, *args):
+        self.model = model
+        self.masker = masker
+        self.link = link
+        self.linearize_link = linearize_link
+        self.args = args
+
+        # if the masker supports it, save what positions vary from the background
+        if callable(getattr(self.masker, "invariants", None)):
+            self._variants = ~self.masker.invariants(*args)
+            self._variants_column_sums = self._variants.sum(0)
+            self._variants_row_inds = [
+                self._variants[:,i] for i in range(self._variants.shape[1])
+            ]
+        else:
+            self._variants = None
+
+        # compute the length of the mask (and hence our length)
+        if hasattr(self.masker, "shape"):
+            if callable(self.masker.shape):
+                mshape = self.masker.shape(*self.args)
+                self._masker_rows = mshape[0]
+                self._masker_cols = mshape[1]
+            else:
+                mshape = self.masker.shape
+                self._masker_rows = mshape[0]
+                self._masker_cols = mshape[1]
+        else:
+            self._masker_rows = None# # just assuming...
+            self._masker_cols = sum(np.prod(a.shape) for a in self.args)
+
+        self._linearizing_weights = None
+
+    def __call__(self, masks, zero_index=None, batch_size=None):
+
+        # if we are passed a 1D array of indexes then we are delta masking and have a special implementation
+        if len(masks.shape) == 1:
+            if getattr(self.masker, "supports_delta_masking", False):
+                return self._delta_masking_call(masks, zero_index=zero_index, batch_size=batch_size)
+
+            # we need to convert from delta masking to a full masking call because we were given a delta masking
+            # input but the masker does not support delta masking
+            else:
+                full_masks = np.zeros((int(np.sum(masks >= 0)), self._masker_cols), dtype=bool)
+                _convert_delta_mask_to_full(masks, full_masks)
+                return self._full_masking_call(full_masks, zero_index=zero_index, batch_size=batch_size)
+
+        else:
+            return self._full_masking_call(masks, batch_size=batch_size)
+
+    def _full_masking_call(self, masks, zero_index=None, batch_size=None):
+
+        if batch_size is None:
+            batch_size = len(masks)
+        do_delta_masking = getattr(self.masker, "reset_delta_masking", None) is not None
+        num_varying_rows = np.zeros(len(masks), dtype=int)
+        batch_positions = np.zeros(len(masks)+1, dtype=int)
+        varying_rows = []
+        if self._variants is not None:
+            delta_tmp = self._variants.copy().astype(int)
+        all_outputs = []
+        for batch_ind in range(0, len(masks), batch_size):
+            mask_batch = masks[batch_ind:batch_ind + batch_size]
+            all_masked_inputs = []
+            num_mask_samples = np.zeros(len(mask_batch), dtype=int)
+            last_mask = np.zeros(mask_batch.shape[1], dtype=bool)
+            for i, mask in enumerate(mask_batch):
+
+                # mask the inputs
+                delta_mask = mask ^ last_mask
+                if do_delta_masking and delta_mask.sum() == 1:
+                    delta_ind = np.nonzero(delta_mask)[0][0]
+                    masked_inputs = self.masker(delta_ind, *self.args).copy()
+                else:
+                    masked_inputs = self.masker(mask, *self.args)
+
+                # get a copy that won't get overwritten by the next iteration
+                if not getattr(self.masker, "immutable_outputs", False):
+                    masked_inputs = copy.deepcopy(masked_inputs)
+
+                # wrap the masked inputs if they are not already in a tuple
+                if not isinstance(masked_inputs, tuple):
+                    masked_inputs = (masked_inputs,)
+
+                # masked_inputs = self.masker(mask, *self.args)
+                num_mask_samples[i] = len(masked_inputs[0])
+
+                # see which rows have been updated, so we can only evaluate the model on the rows we need to
+                if i == 0 or self._variants is None:
+                    varying_rows.append(np.ones(num_mask_samples[i], dtype=bool))
+                    num_varying_rows[batch_ind + i] = num_mask_samples[i]
+                else:
+                    # a = np.any(self._variants & delta_mask, axis=1)
+                    # a = np.any(self._variants & delta_mask, axis=1)
+                    # a = np.any(self._variants & delta_mask, axis=1)
+                    # (self._variants & delta_mask).sum(1) > 0
+
+                    np.bitwise_and(self._variants, delta_mask, out=delta_tmp)
+                    varying_rows.append(np.any(delta_tmp, axis=1))#np.any(self._variants & delta_mask, axis=1))
+                    num_varying_rows[batch_ind + i] = varying_rows[-1].sum()
+                    # for i in range(20):
+                    #     varying_rows[-1].sum()
+                last_mask[:] = mask
+
+                batch_positions[batch_ind + i + 1] = batch_positions[batch_ind + i] + num_varying_rows[batch_ind + i]
+
+                # subset the masked input to only the rows that vary
+                if num_varying_rows[batch_ind + i] != num_mask_samples[i]:
+                    if len(self.args) == 1:
+                        # _ = masked_inputs[varying_rows[-1]]
+                        # _ = masked_inputs[varying_rows[-1]]
+                        # _ = masked_inputs[varying_rows[-1]]
+                        masked_inputs_subset = masked_inputs[0][varying_rows[-1]]
+                    else:
+                        masked_inputs_subset = [v[varying_rows[-1]] for v in zip(*masked_inputs[0])]
+                    masked_inputs = (masked_inputs_subset,) + masked_inputs[1:]
+
+                # define no. of list based on output of masked_inputs
+                if len(all_masked_inputs) != len(masked_inputs):
+                    all_masked_inputs = [[] for m in range(len(masked_inputs))]
+
+                for i, v in enumerate(masked_inputs):
+                    all_masked_inputs[i].append(v)
+
+            joined_masked_inputs = tuple([np.concatenate(v) for v in all_masked_inputs])
+            outputs = self.model(*joined_masked_inputs)
+            _assert_output_input_match(joined_masked_inputs, outputs)
+            all_outputs.append(outputs)
+        outputs = np.concatenate(all_outputs)
+
+        if self.linearize_link and self.link != links.identity and self._linearizing_weights is None:
+            self.background_outputs = outputs[batch_positions[zero_index]:batch_positions[zero_index+1]]
+            self._linearizing_weights = link_reweighting(self.background_outputs, self.link)
+
+        averaged_outs = np.zeros((len(batch_positions)-1,) + outputs.shape[1:])
+        max_outs = self._masker_rows if self._masker_rows is not None else max(len(r) for r in varying_rows)
+        last_outs = np.zeros((max_outs,) + outputs.shape[1:])
+        varying_rows = np.array(varying_rows)
+
+        _build_fixed_output(averaged_outs, last_outs, outputs, batch_positions, varying_rows, num_varying_rows, self.link, self._linearizing_weights)
+
+        return averaged_outs
+
+        # return self._build_output(outputs, batch_positions, varying_rows)
+
+    # def _build_varying_delta_mask_rows(self, masks):
+    #     """ This builds the _varying_delta_mask_rows property which is a list of rows that
+    #     could change for each delta set.
+    #     """
+
+    #     self._varying_delta_mask_rows = []
+    #     i = -1
+    #     masks_pos = 0
+    #     while masks_pos < len(masks):
+    #         i += 1
+
+    #         delta_index = masks[masks_pos]
+    #         masks_pos += 1
+
+    #         # update the masked inputs
+    #         varying_rows_set = []
+    #         while delta_index < 0: # negative values mean keep going
+    #             original_index = -delta_index + 1
+    #             varying_rows_set.append(self._variants_row_inds[original_index])
+    #             delta_index = masks[masks_pos]
+    #             masks_pos += 1
+    #         self._varying_delta_mask_rows.append(np.unique(np.concatenate(varying_rows_set)))
+
+
+    def _delta_masking_call(self, masks, zero_index=None, batch_size=None):
+        # TODO: we need to do batching here
+
+        assert getattr(self.masker, "supports_delta_masking", None) is not None, "Masker must support delta masking!"
+
+        masked_inputs, varying_rows = self.masker(masks, *self.args)
+        num_varying_rows = varying_rows.sum(1)
+
+        subset_masked_inputs = [arg[varying_rows.reshape(-1)] for arg in masked_inputs]
+
+        batch_positions = np.zeros(len(varying_rows)+1, dtype=int)
+        for i in range(len(varying_rows)):
+            batch_positions[i+1] = batch_positions[i] + num_varying_rows[i]
+
+        # joined_masked_inputs = self._stack_inputs(all_masked_inputs)
+        outputs = self.model(*subset_masked_inputs)
+        _assert_output_input_match(subset_masked_inputs, outputs)
+
+        if self.linearize_link and self.link != links.identity and self._linearizing_weights is None:
+            self.background_outputs = outputs[batch_positions[zero_index]:batch_positions[zero_index+1]]
+            self._linearizing_weights = link_reweighting(self.background_outputs, self.link)
+
+        averaged_outs = np.zeros((varying_rows.shape[0],) + outputs.shape[1:])
+        last_outs = np.zeros((varying_rows.shape[1],) + outputs.shape[1:])
+        #print("link", self.link)
+        _build_fixed_output(averaged_outs, last_outs, outputs, batch_positions, varying_rows, num_varying_rows, self.link, self._linearizing_weights)
+
+        return averaged_outs
+
+    @property
+    def mask_shapes(self):
+        if hasattr(self.masker, "mask_shapes") and callable(self.masker.mask_shapes):
+            return self.masker.mask_shapes(*self.args)
+        else:
+            return [a.shape for a in self.args] # TODO: this will need to get more flexible
+
+    def __len__(self):
+        """ How many binary inputs there are to toggle.
+
+        By default we just match what the masker tells us. But if the masker doesn't help us
+        out by giving a length then we assume is the number of data inputs.
+        """
+        return self._masker_cols
+
+    def varying_inputs(self):
+        if self._variants is None:
+            return np.arange(self._masker_cols)
+        else:
+            return np.where(np.any(self._variants, axis=0))[0]
+
+    def main_effects(self, inds=None, batch_size=None):
+        """ Compute the main effects for this model.
+        """
+
+        # if no indexes are given then we assume all indexes could be non-zero
+        if inds is None:
+            inds = np.arange(len(self))
+
+        # mask each potentially nonzero input in isolation
+        masks = np.zeros(2*len(inds), dtype=int)
+        masks[0] = MaskedModel.delta_mask_noop_value
+        last_ind = -1
+        for i in range(len(inds)):
+            if i > 0:
+                masks[2*i] = -last_ind - 1 # turn off the last input
+            masks[2*i+1] = inds[i] # turn on this input
+            last_ind = inds[i]
+
+        # compute the main effects for the given indexes
+        outputs = self(masks, batch_size=batch_size)
+        main_effects = outputs[1:] - outputs[0]
+
+        # expand the vector to the full input size
+        expanded_main_effects = np.zeros((len(self),) + outputs.shape[1:])
+        for i,ind in enumerate(inds):
+            expanded_main_effects[ind] = main_effects[i]
+
+        return expanded_main_effects
+
+def _assert_output_input_match(inputs, outputs):
+    assert len(outputs) == len(inputs[0]), \
+        f"The model produced {len(outputs)} output rows when given {len(inputs[0])} input rows! Check the implementation of the model you provided for errors."
+
+def _convert_delta_mask_to_full(masks, full_masks):
+    """ This converts a delta masking array to a full bool masking array.
+    """
+
+    i = -1
+    masks_pos = 0
+    while masks_pos < len(masks):
+        i += 1
+
+        if i > 0:
+            full_masks[i] = full_masks[i-1]
+
+        while masks[masks_pos] < 0:
+            full_masks[i,-masks[masks_pos]-1] = ~full_masks[i,-masks[masks_pos]-1] # -value - 1 is the original index that needs flipped
+            masks_pos += 1
+
+        if masks[masks_pos] != MaskedModel.delta_mask_noop_value:
+            full_masks[i,masks[masks_pos]] = ~full_masks[i,masks[masks_pos]]
+        masks_pos += 1
+
+#@njit # TODO: figure out how to jit this function, or most of it
+def _build_delta_masked_inputs(masks, batch_positions, num_mask_samples, num_varying_rows, delta_indexes,
+                               varying_rows, args, masker, variants, variants_column_sums):
+    all_masked_inputs = [[] for a in args]
+    dpos = 0
+    i = -1
+    masks_pos = 0
+    while masks_pos < len(masks):
+        i += 1
+
+        dpos = 0
+        delta_indexes[0] = masks[masks_pos]
+
+        # update the masked inputs
+        while delta_indexes[dpos] < 0: # negative values mean keep going
+            delta_indexes[dpos] = -delta_indexes[dpos] - 1 # -value + 1 is the original index that needs flipped
+            masker(delta_indexes[dpos], *args)
+            dpos += 1
+            delta_indexes[dpos] = masks[masks_pos + dpos]
+        masked_inputs = masker(delta_indexes[dpos], *args).copy()
+
+        masks_pos += dpos + 1
+
+        num_mask_samples[i] = len(masked_inputs)
+        #print(i, dpos, delta_indexes[dpos])
+        # see which rows have been updated, so we can only evaluate the model on the rows we need to
+        if i == 0:
+            varying_rows[i,:] = True
+            #varying_rows.append(np.arange(num_mask_samples[i]))
+            num_varying_rows[i] = num_mask_samples[i]
+
+        else:
+            # only one column was changed
+            if dpos == 0:
+
+                varying_rows[i,:] = variants[:,delta_indexes[dpos]]
+                #varying_rows.append(_variants_row_inds[delta_indexes[dpos]])
+                num_varying_rows[i] = variants_column_sums[delta_indexes[dpos]]
+
+
+            # more than one column was changed
+            else:
+                varying_rows[i,:] = np.any(variants[:,delta_indexes[:dpos+1]], axis=1)
+                #varying_rows.append(np.any(variants[:,delta_indexes[:dpos+1]], axis=1))
+                num_varying_rows[i] = varying_rows[i,:].sum()
+
+        batch_positions[i+1] = batch_positions[i] + num_varying_rows[i]
+
+        # subset the masked input to only the rows that vary
+        if num_varying_rows[i] != num_mask_samples[i]:
+            if len(args) == 1:
+                masked_inputs = masked_inputs[varying_rows[i,:]]
+            else:
+                masked_inputs = [v[varying_rows[i,:]] for v in zip(*masked_inputs)]
+
+        # wrap the masked inputs if they are not already in a tuple
+        if len(args) == 1:
+            masked_inputs = (masked_inputs,)
+
+        for j in range(len(masked_inputs)):
+            all_masked_inputs[j].append(masked_inputs[j])
+
+    return all_masked_inputs, i + 1 # i + 1 is the number of output rows after averaging
+
+
+def _build_fixed_output(averaged_outs, last_outs, outputs, batch_positions, varying_rows, num_varying_rows, link, linearizing_weights):
+    if len(last_outs.shape) == 1:
+        _build_fixed_single_output(averaged_outs, last_outs, outputs, batch_positions, varying_rows, num_varying_rows, link, linearizing_weights)
+    else:
+        _build_fixed_multi_output(averaged_outs, last_outs, outputs, batch_positions, varying_rows, num_varying_rows, link, linearizing_weights)
+
+@njit # we can't use this when using a custom link function...
+def _build_fixed_single_output(averaged_outs, last_outs, outputs, batch_positions, varying_rows, num_varying_rows, link, linearizing_weights):
+    # here we can assume that the outputs will always be the same size, and we need
+    # to carry over evaluation outputs
+    sample_count = last_outs.shape[0]
+    # if linearizing_weights is not None:
+    #     averaged_outs[0] = np.mean(linearizing_weights * link(last_outs))
+    # else:
+    #     averaged_outs[0] = link(np.mean(last_outs))
+    for i in range(0, len(averaged_outs)):
+        if batch_positions[i] < batch_positions[i+1]:
+            if num_varying_rows[i] == sample_count:
+                last_outs[:] = outputs[batch_positions[i]:batch_positions[i+1]]
+            else:
+                last_outs[varying_rows[i]] = outputs[batch_positions[i]:batch_positions[i+1]]
+            if linearizing_weights is not None:
+                averaged_outs[i] = np.mean(linearizing_weights * link(last_outs))
+            else:
+                averaged_outs[i] = link(np.mean(last_outs))
+        else:
+            averaged_outs[i] = averaged_outs[i-1]
+
+@njit
+def _build_fixed_multi_output(averaged_outs, last_outs, outputs, batch_positions, varying_rows, num_varying_rows, link, linearizing_weights):
+    # here we can assume that the outputs will always be the same size, and we need
+    # to carry over evaluation outputs
+    sample_count = last_outs.shape[0]
+    for i in range(0, len(averaged_outs)):
+        if batch_positions[i] < batch_positions[i+1]:
+            if num_varying_rows[i] == sample_count:
+                last_outs[:] = outputs[batch_positions[i]:batch_positions[i+1]]
+            else:
+                last_outs[varying_rows[i]] = outputs[batch_positions[i]:batch_positions[i+1]]
+            #averaged_outs[i] = link(np.mean(last_outs))
+            if linearizing_weights is not None:
+                for j in range(last_outs.shape[-1]):
+                    averaged_outs[i,j] = np.mean(linearizing_weights[:,j] * link(last_outs[:,j]))
+            else:
+                for j in range(last_outs.shape[-1]): # using -1 is important
+                    averaged_outs[i,j] = link(np.mean(last_outs[:,j])) # we can't just do np.mean(last_outs, 0) because that fails to numba compile
+        else:
+            averaged_outs[i] = averaged_outs[i-1]
+
+
+def make_masks(cluster_matrix):
+    """ Builds a sparse CSR mask matrix from the given clustering.
+
+    This function is optimized since trees for images can be very large.
+    """
+
+    M = cluster_matrix.shape[0] + 1
+    indices_row_pos = np.zeros(2 * M - 1, dtype=int)
+    indptr = np.zeros(2 * M, dtype=int)
+    indices = np.zeros(int(np.sum(cluster_matrix[:,3])) + M, dtype=int)
+
+    # build an array of index lists in CSR format
+    _init_masks(cluster_matrix, M, indices_row_pos, indptr)
+    _rec_fill_masks(cluster_matrix, indices_row_pos, indptr, indices, M, cluster_matrix.shape[0] - 1 + M)
+    mask_matrix = scipy.sparse.csr_matrix(
+        (np.ones(len(indices), dtype=bool), indices, indptr),
+        shape=(2 * M - 1, M)
+    )
+
+    return mask_matrix
+
+@njit
+def _init_masks(cluster_matrix, M, indices_row_pos, indptr):
+    pos = 0
+    for i in range(2 * M - 1):
+        if i < M:
+            pos += 1
+        else:
+            pos += int(cluster_matrix[i-M, 3])
+        indptr[i+1] = pos
+        indices_row_pos[i] = indptr[i]
+
+@njit
+def _rec_fill_masks(cluster_matrix, indices_row_pos, indptr, indices, M, ind):
+    pos = indices_row_pos[ind]
+
+    if ind < M:
+        indices[pos] = ind
+        return
+
+    lind = int(cluster_matrix[ind-M,0])
+    rind = int(cluster_matrix[ind-M,1])
+    lind_size = int(cluster_matrix[lind-M, 3]) if lind >= M else 1
+    rind_size = int(cluster_matrix[rind-M, 3]) if rind >= M else 1
+
+    lpos = indices_row_pos[lind]
+    rpos = indices_row_pos[rind]
+
+    _rec_fill_masks(cluster_matrix, indices_row_pos, indptr, indices, M, lind)
+    indices[pos:pos + lind_size] = indices[lpos:lpos + lind_size]
+
+    _rec_fill_masks(cluster_matrix, indices_row_pos, indptr, indices, M, rind)
+    indices[pos + lind_size:pos + lind_size + rind_size] = indices[rpos:rpos + rind_size]
+
+def link_reweighting(p, link):
+    """ Returns a weighting that makes mean(weights*link(p)) == link(mean(p)).
+
+    This is based on a linearization of the link function. When the link function is monotonic then we
+    can find a set of positive weights that adjust for the non-linear influence changes on the
+    expected value. Note that there are many possible reweightings that can satisfy the above
+    property. This function returns the one that has the lowest L2 norm.
+    """
+
+    # the linearized link function is a first order Taylor expansion of the link function
+    # centered at the expected value
+    expected_value = np.mean(p, axis=0)
+    epsilon = 0.0001
+    link_gradient = (link(expected_value + epsilon) - link(expected_value)) / epsilon
+    linearized_link = link_gradient*(p - expected_value) + link(expected_value)
+
+    weights = (linearized_link - link(expected_value)) / (link(p) - link(expected_value))
+    weights *= weights.shape[0] / np.sum(weights, axis=0)
+    return weights
diff --git a/lib/shap/utils/_show_progress.py b/lib/shap/utils/_show_progress.py
new file mode 100644
index 0000000000000000000000000000000000000000..ce69a39ce451c1ab2430d6d0ce0882c8d4a3e2cc
--- /dev/null
+++ b/lib/shap/utils/_show_progress.py
@@ -0,0 +1,38 @@
+import time
+
+import tqdm
+
+
+class ShowProgress:
+    """ This is a simple wrapper around tqdm that includes a starting delay before printing.
+    """
+    def __init__(self, iterable, total, desc, silent, start_delay):
+        self.iter = iter(iterable)
+        self.start_time = time.time()
+        self.pbar = None
+        self.total = total
+        self.desc = desc
+        self.start_delay = start_delay
+        self.silent = silent
+        self.unshown_count = 0
+
+    def __next__(self):
+        if self.pbar is None and time.time() - self.start_time > self.start_delay:
+           self.pbar = tqdm.tqdm(total=self.total, initial=self.unshown_count, desc=self.desc, disable=self.silent)
+           self.pbar.start_t = self.start_time
+        if self.pbar is not None:
+            self.pbar.update(1)
+        else:
+            self.unshown_count += 1
+        try:
+            return next(self.iter)
+        except StopIteration as e:
+            if self.pbar is not None:
+                self.pbar.close()
+            raise e
+
+    def __iter__(self):
+        return self
+
+def show_progress(iterable, total=None, desc=None, silent=False, start_delay=10):
+    return ShowProgress(iterable, total, desc, silent, start_delay)
diff --git a/lib/shap/utils/image.py b/lib/shap/utils/image.py
new file mode 100644
index 0000000000000000000000000000000000000000..1107e0f09020682cc5d752d572b9a982fda49916
--- /dev/null
+++ b/lib/shap/utils/image.py
@@ -0,0 +1,148 @@
+import os
+from textwrap import wrap
+
+import cv2
+import matplotlib.pyplot as plt
+import numpy as np
+
+import shap
+
+
+def is_empty(path):
+    """ Function to check if folder at given path exists and is not empty.
+
+    Returns True if folder is empty or does not exist.
+    """
+    empty = False
+    if os.path.exists(path) and not os.path.isfile(path):
+        # Checking if the directory is empty or not
+        if not os.listdir(path):
+            empty = True
+            print("'test_images' folder is empty. Please place images to be tested in this folder.")
+    else:
+        empty = True
+        print("There is no 'test_images' folder under current directory. Please create one and place images to be tested there.")
+    return empty
+
+def make_dir(path):
+    """
+    Function to create a new directory with given path or empty if it already exists.
+    """
+    if not os.path.exists(path):
+        if not os.path.isfile(path):
+            # make directory if it does not exist
+            os.makedirs(path)
+        else:
+            print("Please give a valid folder path.")
+    else:
+        # Check if empty or not
+        if os.listdir(path):
+            # if exists, empty directory
+            for file in os.listdir(path):
+                os.remove(path+file)
+
+def add_sample_images(path):
+    """
+    Function to add sample images from imagenet50 SHAP data in the given folder.
+    """
+    X, _ = shap.datasets.imagenet50()
+    counter = 1
+    indexes_list = [25, 26, 30, 44]
+    for i, image in enumerate(X):
+        if i in indexes_list:
+            path_to_image = os.path.join(path, f"{counter}.jpg")
+            save_image(image, path_to_image)
+            counter += 1
+
+def load_image(path_to_image):
+    """
+    Function to load image at given path and return numpy array of RGB float values.
+    """
+    image = cv2.imread(path_to_image)
+    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+    return np.array(image).astype('float')
+
+def check_valid_image(path_to_image):
+    """
+    Function to check if a file has valid image extensions and return True if it does.
+    Note: Azure Cognitive Services only accepts below file formats.
+    """
+    valid_extensions = (".png", ".jpg", ".jpeg", ".gif", ".bmp", ".jfif")
+    if path_to_image.endswith(valid_extensions):
+        return True
+
+def save_image(array, path_to_image):
+    """
+    Function to save image(RGB values array) at given path (filename and location).
+    """
+    # saving array of RGB values as an image
+    image = np.array(array)/255.0
+    plt.imsave(path_to_image, image)
+
+
+def resize_image(path_to_image, reshaped_dir):
+    """
+    Function to resize given image retaining original aspect ratio and save in given directory 'reshaped_dir'.
+    Returns numpy array of resized image and path where resized file is saved.
+    Note:
+    Azure COGS CV has size limit of < 4MB and min size of 50x50 for images.
+    Hence, large image files are being reshaped in code below to increase speed of SHAP explanations and run Azure COGS for image captions.
+    If image (pixel_size, pixel_size) is greater than 500 for either of the dimensions:
+    1 - image is resized to have max. 500 pixel size for the dimension > 500
+    2 - other dimension is resized retaining the original aspect ratio
+    """
+    image = load_image(path_to_image)
+
+    # checking if either of (pixel_size, pixel_size) dimension is greater than 500.
+    reshaped_path = None
+    _, tail = os.path.split(path_to_image)
+    file_name = tail.split('.')[0]
+    max_pixels = 500
+    reshape = True
+
+    if image.shape[0] == image.shape[1] and image.shape[0] > 500:
+        new_dim = (max_pixels, max_pixels)
+    elif image.shape[0] > image.shape[1] and  image.shape[0] > 500:
+        new_dim = (max_pixels, int(image.shape[1]*max_pixels/image.shape[0]))
+    elif image.shape[1] > image.shape[0] and image.shape[1] > 500:
+        new_dim = (int(image.shape[0]*max_pixels/image.shape[1]), max_pixels)
+    else:
+        reshape = False
+
+    # reshape image
+    if reshape:
+        # flipping axis for cv2 because cv2 uses width x height while numpy uses height x width
+        image = cv2.resize(image, dsize = (new_dim[1], new_dim[0]))
+        reshaped_path = os.path.join(reshaped_dir, file_name + ".png")
+        print("Reshaped image size:", image.shape)
+        save_image(image, reshaped_path)
+        image = np.array(image).astype('float')
+
+    return image, reshaped_path
+
+
+def display_grid_plot(list_of_captions, list_of_images, max_columns=4, figsize=(20,20)):
+    """
+    Function to display grid of images and their titles/captions.
+    """
+
+    # load list of images
+    masked_images = []
+    for filename in  list_of_images:
+        image = load_image(filename)
+        masked_images.append(image.astype(int))
+
+    # display grid plot with wrapping
+    fig = plt.figure(figsize=figsize)
+    column = 0
+    for i in range(len(masked_images)):
+        column += 1
+        #  check for end of column and create a new figure
+        if column == max_columns+1:
+            fig = plt.figure(figsize=figsize)
+            column = 1
+        fig.add_subplot(1, max_columns, column)
+        plt.imshow(masked_images[i])
+        plt.axis('off')
+        if len(list_of_captions) >= len(masked_images):
+            plt.title("\n".join(wrap(str(list_of_captions[i]), width = 40)))
diff --git a/lib/shap/utils/transformers.py b/lib/shap/utils/transformers.py
new file mode 100644
index 0000000000000000000000000000000000000000..dd0dfe29431fa609a59b8a7ab4cd470ac1515db4
--- /dev/null
+++ b/lib/shap/utils/transformers.py
@@ -0,0 +1,155 @@
+from ._general import safe_isinstance
+
+MODELS_FOR_SEQ_TO_SEQ_CAUSAL_LM = [
+        "transformers.T5ForConditionalGeneration",
+        "transformers.PegasusForConditionalGeneration",
+        "transformers.MarianMTModel",
+        "transformers.MBartForConditionalGeneration",
+        "transformers.BlenderbotForConditionalGeneration",
+        "transformers.BartForConditionalGeneration",
+        "transformers.FSMTForConditionalGeneration",
+        "transformers.EncoderDecoderModel",
+        "transformers.XLMProphetNetForConditionalGeneration",
+        "transformers.ProphetNetForConditionalGeneration",
+        "transformers.TFMT5ForConditionalGeneration",
+        "transformers.TFT5ForConditionalGeneration",
+        "transformers.TFMarianMTModel",
+        "transformers.TFMBartForConditionalGeneration",
+        "transformers.TFPegasusForConditionalGeneration",
+        "transformers.TFBlenderbotForConditionalGeneration",
+        "transformers.TFBartForConditionalGeneration"
+    ]
+
+MODELS_FOR_CAUSAL_LM = [
+        "transformers.CamembertForCausalLM",
+        "transformers.XLMRobertaForCausalLM",
+        "transformers.RobertaForCausalLM",
+        "transformers.BertLMHeadModel",
+        "transformers.OpenAIGPTLMHeadModel",
+        "transformers.GPT2LMHeadModel",
+        "transformers.TransfoXLLMHeadModel",
+        "transformers.XLNetLMHeadModel",
+        "transformers.XLMWithLMHeadModel",
+        "transformers.CTRLLMHeadModel",
+        "transformers.ReformerModelWithLMHead",
+        "transformers.BertGenerationDecoder",
+        "transformers.XLMProphetNetForCausalLM",
+        "transformers.ProphetNetForCausalLM",
+        "transformers.TFBertLMHeadModel",
+        "transformers.TFOpenAIGPTLMHeadModel",
+        "transformers.TFGPT2LMHeadModel",
+        "transformers.TFTransfoXLLMHeadModel",
+        "transformers.TFXLNetLMHeadModel",
+        "transformers.TFXLMWithLMHeadModel",
+        "transformers.TFCTRLLMHeadModel",
+    ]
+
+MODELS_FOR_MASKED_LM = [
+    "transformers.LayoutLMForMaskedLM",
+    "transformers.DistilBertForMaskedLM",
+    "transformers.AlbertForMaskedLM",
+    "transformers.BartForConditionalGeneration",
+    "transformers.CamembertForMaskedLM",
+    "transformers.XLMRobertaForMaskedLM",
+    "transformers.LongformerForMaskedLM",
+    "transformers.RobertaForMaskedLM",
+    "transformers.SqueezeBertForMaskedLM",
+    "transformers.BertForMaskedLM",
+    "transformers.MobileBertForMaskedLM",
+    "transformers.FlaubertWithLMHeadModel",
+    "transformers.XLMWithLMHeadModel",
+    "transformers.ElectraForMaskedLM",
+    "transformers.ReformerForMaskedLM",
+    "transformers.FunnelForMaskedLM",
+    "transformers.TFDistilBertForMaskedLM",
+    "transformers.TFAlbertForMaskedLM",
+    "transformers.TFCamembertForMaskedLM",
+    "transformers.TFXLMRobertaForMaskedLM",
+    "transformers.TFLongformerForMaskedLM",
+    "transformers.TFRobertaForMaskedLM",
+    "transformers.TFBertForMaskedLM",
+    "transformers.TFMobileBertForMaskedLM",
+    "transformers.TFFlaubertWithLMHeadModel",
+    "transformers.TFXLMWithLMHeadModel",
+    "transformers.TFElectraForMaskedLM",
+    "transformers.TFFunnelForMaskedLM"
+]
+
+SENTENCEPIECE_TOKENIZERS = [
+    "transformers.MarianTokenizer",
+    "transformers.T5Tokenizer",
+    "transformers.XLNetTokenizer",
+    "transformers.AlbertTokenizer"
+]
+
+def is_transformers_lm(model):
+    """ Check if the given model object is a huggingface transformers language model.
+    """
+    return (safe_isinstance(model, "transformers.PreTrainedModel") or safe_isinstance(model, "transformers.TFPreTrainedModel")) and \
+        safe_isinstance(model, MODELS_FOR_SEQ_TO_SEQ_CAUSAL_LM + MODELS_FOR_CAUSAL_LM)
+
+def parse_prefix_suffix_for_tokenizer(tokenizer):
+    """ Set prefix and suffix tokens based on null tokens.
+
+    Example for distillgpt2: null_tokens=[], for BART: null_tokens = [0,2] and for MarianMT: null_tokens=[0]
+    used to slice tokens belonging to sentence after passing through tokenizer.encode().
+    """
+    null_tokens = tokenizer("")["input_ids"]
+    keep_prefix, keep_suffix = None, None
+
+    if len(null_tokens) == 1:
+        null_token = null_tokens[0]
+        if hasattr(tokenizer, "special_tokens_map") and hasattr(tokenizer, "decode"):
+            st_map = tokenizer.special_tokens_map
+            assert (('eos_token' in st_map) or ('bos_token' in st_map)), "No eos token or bos token found in tokenizer!"
+            if ('eos_token' in st_map) and (tokenizer.decode(null_token) == st_map['eos_token']):
+                keep_prefix = 0
+                keep_suffix = 1
+                # prefix_strlen = 0
+                # suffix_strlen = len(tokenizer.decode(null_tokens[-keep_suffix:]))
+            elif ('bos_token' in st_map) and (tokenizer.decode(null_token) == st_map['bos_token']):
+                keep_prefix = 1
+                keep_suffix = 0
+                # prefix_strlen = len(tokenizer.decode(null_tokens[:keep_prefix]))
+                # suffix_strlen = 0
+        else:
+            raise Exception("The given tokenizer produces one token when applied to the empty string, but " + \
+                            "does not have a .special_tokens_map['eos_token'] or .special_tokens_map['bos_token'] " + \
+                            "property (and .decode) to specify if it is an eos (end) of bos (beginning) token!")
+    else:
+        assert len(null_tokens) % 2 == 0, "An odd number of boundary tokens are added to the null string!"
+        keep_prefix = len(null_tokens) // 2
+        keep_suffix = len(null_tokens) // 2
+        # prefix_strlen = len(tokenizer.decode(null_tokens[:keep_prefix]))
+        # suffix_strlen = len(tokenizer.decode(null_tokens[-keep_suffix:]))
+
+    return {
+        'keep_prefix' : keep_prefix,
+        'keep_suffix' : keep_suffix,
+        # 'prefix_strlen' : prefix_strlen,
+        # 'suffix_strlen' : suffix_strlen,
+        'null_tokens' : null_tokens
+    }
+
+def getattr_silent(obj, attr):
+    """ This turns of verbose logging of missing attributes for huggingface transformers.
+
+    This is motivated by huggingface transformers objects that print error warnings
+    when we access unset properties.
+    """
+
+    reset_verbose = False
+    if getattr(obj, 'verbose', False):
+        reset_verbose = True
+        obj.verbose = False
+
+    val = getattr(obj, attr, None)
+
+    if reset_verbose:
+        obj.verbose = True
+
+    # fix strange huggingface bug where `obj.verbose = False` causes val to change from None to "None"
+    if val == "None":
+        val = None
+
+    return val
diff --git a/requirements.txt b/requirements.txt
index 475c4e2343d7db3c4464d55af0889a8971b8d8a4..92cee3ee242bdd4f4b93cff4f319783152f4f535 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,7 +5,6 @@ hmmlearn~=0.3.0
 matplotlib~=3.7.0
 scikit-fuzzy~=0.4.2
 gradio~=4.17.0
-shap~=0.44.1
 networkx~=2.8.4
 scipy~=1.10.0
 xgboost~=2.0.3