Skip to content
Snippets Groups Projects
Commit 8a04acd1 authored by Caroline DE POURTALES's avatar Caroline DE POURTALES
Browse files

preparing demo

parent 371c0b20
No related branches found
No related tags found
No related merge requests found
Showing
with 124 additions and 2088 deletions
......@@ -8,6 +8,7 @@ from dash import dcc, html
from callbacks import register_callbacks
from pages.application.application import Application, Model, View
from utils import extract_data
app = dash.Dash(external_stylesheets=[dbc.themes.LUX], suppress_callback_exceptions=True,
......@@ -20,9 +21,26 @@ models_data = open('data_retriever.json')
data = json.load(models_data)["data"]
# For home directory
page_home = dbc.Row([html.H3("Welcome")])
welcome_message = html.Div(html.Iframe(
src=app.get_asset_url("welcome.html"),
style={"height": "1067px", "width": "100%"},
))
page_home = dbc.Row([welcome_message])
# For course directory
page_course = dbc.Row([])
course_data_format = html.Div(html.Iframe(
src=app.get_asset_url("course_data_format.html"),
style={"height": "1067px", "width": "100%"},
))
course_decision_tree = html.Iframe(
src="assets/course_decision_tree.html",
style={"height": "1067px", "width": "100%"},
)
main_course = dcc.Tabs(children=[
dcc.Tab(label='Data format', children=[course_data_format]),
dcc.Tab(label='Course Decision Tree', children=[course_decision_tree])])
page_course = dbc.Row([main_course])
# For the application
names_models, dict_components, dic_solvers, dic_xtypes = extract_data(data)
model_application = Model(names_models, dict_components, dic_solvers, dic_xtypes)
......
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>What kinf of model, data, or instance can I upload ?</title>
</head>
<body>
<h1> What kind course_data_format.html of model, data, or instance can I upload ? </h1>
<h2> What is the option to add information on model ? </h2>
<h3> Why ? </h3>
<p> There is a switch button, you can use it when you want to attach the csv you trained your model on or a feature mapping. This is useful when you didn't dump the features names in your pkl of model but still wants them to show up, or when the values are categorical.
</p>
<h3> How ? </h3>
<h2> What kind of model can I upload ? </h2>
<p> You can only import .pkl models.</p>
<h2> What should the format of the instance be ? </h2>
<p> You can either upload a .txt file containing hte instance with the format : feature1=value1,feature2=value2,... where feature1, feature2 are the names of the columns.
But you can also upload a json of your instance.</p>
<p> Attention !!!! If you never mention the feature names in your model or additional information, the instance should be of format : f1=...,f2=... precisely</p>
<h1> What are the advanced parameters ? </h1>
<p> These are specific to the kind of model you selected, it would mainly be how the explanation should be computed and the kind of explanation.</p>
</body>
</html>
\ No newline at end of file
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>What kinf of model, data, or instance can I upload ?</title>
</head>
<body>
<h1> What library am I able to use on the platform ?</h1>
<p> Only models from scikit-learn are allowed.</p>
</body>
</html>
\ No newline at end of file
/* NAVBAR */
.navbar-dark .navbar-brand {
......
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>What kinf of model, data, or instance can I upload ?</title>
</head>
<body>
<h1> Welcome </h1>
</body>
</html>
\ No newline at end of file
......@@ -28,14 +28,16 @@ def register_callbacks(page_home, page_course, page_application, app):
return active_link[0], active_link[1], active_link[2]
@app.callback(Output('solver_sat', 'options'),
Output('solver_sat', 'value'),
Output('explanation_type', 'options'),
Output('explanation_type', 'value'),
Input('ml_model_choice', 'value'),
prevent_initial_call=True
)
def update_ml_type_options(value_ml_model):
model_application = page_application.model
model_application.update_ml_model(value_ml_model)
return model_application.solvers, model_application.xtypes
return model_application.solvers, model_application.solvers[0], model_application.xtypes, [list(model_application.xtypes.keys())[0]]
@app.callback(
Output('pretrained_model_filename', 'children'),
......
......@@ -9,20 +9,6 @@
],
"xtypes" : {
"AXp": "Abductive Explanation", "CXp": "Contrastive explanation"}
},
{
"ml_type" : "NaiveBayes",
"component" : "NaiveBayesComponent",
"solvers" : [],
"xtypes" : {
"AXp": "Abductive Explanation", "CXp": "Contrastive explanation"}
},
{
"ml_type" : "RandomForest",
"component" : "RandomForestComponent",
"solvers" : ["LIME", "ANCHOR", "SHAP"],
"xtypes" : {"H": "Heuristic", "HV": "Heuristic and validation", "G": "Global"}
}
]
......
#!/usr/bin/env python
#-*- coding:utf-8 -*-
# -*- coding:utf-8 -*-
##
## dtree.py
##
......@@ -9,7 +9,7 @@
##
#
#==============================================================================
# ==============================================================================
from __future__ import print_function
import collections
from functools import reduce
......@@ -25,6 +25,7 @@ except ImportError: # for Python3
from sklearn.tree import _tree
import numpy as np
class Node():
"""
Node class.
......@@ -38,8 +39,9 @@ class Node():
self.feat = feat
self.vals = vals
#
#==============================================================================
# ==============================================================================
class DecisionTree():
"""
Simple decision tree class.
......@@ -62,14 +64,14 @@ class DecisionTree():
self.feids = {}
self.fdoms = {}
self.fvmap = {}
self.feature_names = {f'f{i}' : feature_names[i] for i, f in enumerate(feature_names)}
self.feature_names = {f'f{i}': feature_names[i] for i, f in enumerate(feature_names)}
# OHE mapping
OHEMap = collections.namedtuple('OHEMap', ['dir', 'opp'])
self.ohmap = OHEMap(dir={}, opp={})
if from_dt:
self.from_dt(from_dt)
self.from_dt(from_dt)
if mapfile:
self.parse_mapping(mapfile)
......@@ -103,7 +105,7 @@ class DecisionTree():
self.terms = {}
for i in range(self.nof_terms):
nd, _, t = lines[i + 4].strip().split()
self.terms[int(nd)] = t #int(t)
self.terms[int(nd)] = t # int(t)
# finally, reading the nodes
self.nodes = collections.defaultdict(lambda: Node(feat='', vals={}))
......@@ -132,7 +134,7 @@ class DecisionTree():
# simplifying the features and their domains
self.feats = sorted(self.feats)
#self.feids = {f: i for i, f in enumerate(self.feats)}
# self.feids = {f: i for i, f in enumerate(self.feats)}
self.fdoms = {f: sorted(self.fdoms[f]) for f in self.fdoms}
# here we assume all features are present in the tree
......@@ -175,10 +177,10 @@ class DecisionTree():
for line in lines[1:]:
feat, val, real = line.split()
self.fvmap[tuple([feat, int(val)])] = '{0}{1}'.format(self.feature_names[feat], real)
#if feat not in self.feids:
# if feat not in self.feids:
# self.feids[feat] = len(self.feids)
#assert len(self.feids) == self.nof_feats
# assert len(self.feids) == self.nof_feats
def convert_to_multiedges(self):
"""
......@@ -324,34 +326,38 @@ class DecisionTree():
# returning the set of sets with no duplicates
return list(dict.fromkeys(sets))
def explain(self, inst, enum=1, pathlits=False, xtype = ["AXp"], solver='g3', htype='sorted'):
def explain(self, inst, enum=1, pathlits=False, xtype=["AXp"], solver='g3', htype='sorted'):
"""
Compute a given number of explanations.
"""
#contaiins all the elements for explanation
# contaiins all the elements for explanation
explanation_dic = {}
self.feids = {f'f{i}': i for i, f in enumerate(inst)}
inst = [(f'f{i}', int(inst[i][1])) for i,f in enumerate(inst)]
inst = [(f'f{i}', int(inst[i][1])) for i, f in enumerate(inst)]
path, term, depth = self.execute(inst, pathlits)
#decision path
decision_path_str = 'IF {0} THEN class={1}'.format(' AND '.join([self.fvmap[inst[self.feids[self.nodes[n].feat]]] for n in path]), term)
# decision path
decision_path_str = 'IF {0} THEN class={1}'.format(
' AND '.join([self.fvmap[inst[self.feids[self.nodes[n].feat]]] for n in path]), term)
explanation_dic["Decision path of instance : "] = decision_path_str
explanation_dic["Decision path length : "] = 'Path length is :'+ str(depth)
explanation_dic["Decision path length : "] = 'Path length is :' + str(depth)
if self.ohmap.dir:
f2v = {fv[0]: fv[1] for fv in inst}
# updating fvmap for printing ohe features
for fo, fis in self.ohmap.dir.items():
self.fvmap[tuple([fo, None])] = '(' + ' AND '.join([self.fvmap[tuple([fi, f2v[fi]])] for fi in fis]) + ')'
self.fvmap[tuple([fo, None])] = '(' + ' AND '.join(
[self.fvmap[tuple([fi, f2v[fi]])] for fi in fis]) + ')'
# computing the sets to hit
to_hit = self.prepare_sets(inst, term)
for type in xtype :
explanation_dic["List of path explanation(s)"] = []
explanation_dic["List of path contrastive explanation(s)"] = []
for type in xtype:
if type == "AXp":
explanation_dic.update(self.enumerate_abductive(to_hit, enum, solver, htype, term))
else :
else:
explanation_dic.update(self.enumerate_contrastive(to_hit, term))
return explanation_dic
......@@ -367,7 +373,8 @@ class DecisionTree():
expls = []
for i, expl in enumerate(hitman.enumerate(), 1):
list_expls.append([self.fvmap[p] for p in sorted(expl, key=lambda p: p[0])])
list_expls_str.append('Explanation: IF {0} THEN class={1}'.format(' AND '.join([self.fvmap[p] for p in sorted(expl, key=lambda p: p[0])]), term))
list_expls_str.append('Explanation: IF {0} THEN class={1}'.format(
' AND '.join([self.fvmap[p] for p in sorted(expl, key=lambda p: p[0])]), term))
expls.append(expl)
if i == enum:
......@@ -375,9 +382,10 @@ class DecisionTree():
explanation["List of path explanation(s)"] = list_expls
explanation["List of abductive explanation(s)"] = list_expls_str
explanation["Number of abductive explanation(s) : "] = str(i)
explanation["Minimal abductive explanation : "] = str( min([len(e) for e in expls]))
explanation["Maximal abductive explanation : "] = str( max([len(e) for e in expls]))
explanation["Average abductive explanation : "] = '{0:.2f}'.format(sum([len(e) for e in expls]) / len(expls))
explanation["Minimal abductive explanation : "] = str(min([len(e) for e in expls]))
explanation["Maximal abductive explanation : "] = str(max([len(e) for e in expls]))
explanation["Average abductive explanation : "] = '{0:.2f}'.format(
sum([len(e) for e in expls]) / len(expls))
return explanation
......@@ -385,6 +393,7 @@ class DecisionTree():
"""
Enumerate contrastive explanations.
"""
def process_set(done, target):
for s in done:
if s <= target:
......@@ -401,14 +410,15 @@ class DecisionTree():
list_expls_str = []
explanation = {}
for expl in expls:
list_contrastive_expls.append([self.fvmap[(p[0],1-p[1])] for p in sorted(expl, key=lambda p: p[0])])
list_expls_str.append('Contrastive: IF {0} THEN class!={1}'.format(' OR '.join(['!{0}'.format(self.fvmap[p]) for p in sorted(expl, key=lambda p: p[0])]), term))
list_contrastive_expls.append([self.fvmap[(p[0], 1 - p[1])] for p in sorted(expl, key=lambda p: p[0])])
list_expls_str.append('Contrastive: IF {0} THEN class!={1}'.format(
' OR '.join(['!{0}'.format(self.fvmap[p]) for p in sorted(expl, key=lambda p: p[0])]), term))
explanation["List of path contrastive explanation(s)"] = list_contrastive_expls
explanation["List of contrastive explanation(s)"] = list_expls_str
explanation["Number of contrastive explanation(s) : "]=str(len(expls))
explanation["Minimal contrastive explanation : "]= str( min([len(e) for e in expls]))
explanation["Maximal contrastive explanation : "]= str( max([len(e) for e in expls]))
explanation["Average contrastive explanation : "]='{0:.2f}'.format(sum([len(e) for e in expls]) / len(expls))
explanation["Number of contrastive explanation(s) : "] = str(len(expls))
explanation["Minimal contrastive explanation : "] = str(min([len(e) for e in expls]))
explanation["Maximal contrastive explanation : "] = str(max([len(e) for e in expls]))
explanation["Average contrastive explanation : "] = '{0:.2f}'.format(sum([len(e) for e in expls]) / len(expls))
return explanation
from os import path
import base64
import dash_bootstrap_components as dbc
import numpy as np
from dash import dcc, html
import subprocess
import shlex
class NaiveBayesComponent():
def __init__(self, model, type_model='SKL', info=None, type_info=''):
#Conversion model
p=subprocess.Popen(['perl','pages/application/NaiveBayes/utils/cnbc2xlc.pl', model],stdout=subprocess.PIPE)
print(p.stdout.read())
self.naive_bayes = model
self.map_file = ""
self.network = html.Div([])
self.explanation = []
def update_with_explicability(self, instance, enum, xtype, solver) :
# Call explanation
p=subprocess.Popen(['perl','pages/application/NaiveBayes/utils/xpxlc.pl', self.naive_bayes, instance, self.map_file],stdout=subprocess.PIPE)
print(p.stdout.read())
self.explanation = []
list_explanations_path=[]
explanation = {}
self.network = html.Div([])
#Creating a clean and nice text component
#instance plotting
self.explanation.append(html.H4("Instance : \n"))
self.explanation.append(html.P(str([str(instance[i]) for i in range (len(instance))])))
for k in explanation.keys() :
if k != "List of path explanation(s)" and k!= "List of path contrastive explanation(s)" :
if k in ["List of abductive explanation(s)","List of contrastive explanation(s)"] :
self.explanation.append(html.H4(k))
for expl in explanation[k] :
self.explanation.append(html.Hr())
self.explanation.append(html.P(expl))
self.explanation.append(html.Hr())
else :
self.explanation.append(html.P(k + explanation[k]))
else :
list_explanations_path = explanation["List of path explanation(s)"]
list_contrastive_explanations_path = explanation["List of path contrastive explanation(s)"]
return list_explanations_path, list_contrastive_explanations_path
package Parsers;
use strict;
use warnings;
use Data::Dumper;
use POSIX qw( !assert );
use Exporter;
require Utils; # Must use require, to get INC updated
import Utils qw( &get_progname &get_progpath );
BEGIN {
@Parsers::ISA = ('Exporter');
@Parsers::EXPORT_OK =
qw( &parse_xlc &parse_cnbc &parse_xmap
&parse_instance &parse_explanations
&parse_blc &parse_acc );
}
use constant F_ERR_MSG =>
"Please check file name, existence, permissions, etc.\n";
use constant HLPMAP => 1;
use constant CCAT_CH => '_';
use constant CCHK => 0;
if (CCHK) {
## Uncomment to use assertions && debug messages
#use Carp::Assert; # Assertions are on.
}
# Parse XLC format
sub parse_xlc()
{
my ($opts, $xlc, $fname) = @_;
open(my $fh, "<$fname") || die "Unable to open file $fname. " . F_ERR_MSG;
my ($nc, $nr, $rmode) = (0, 0, 0);
while(<$fh>) {
chomp;
next if m/^\s*c\s+$/;
if ($rmode == 0) { # Read number of features
m/^\s*(\d+)\s*$/ || die "Unable to match: $_\n";
($xlc->{NV}, $rmode) = ($1, 1);
}
elsif ($rmode == 1) { # Read w0
m/^\s*(\-?\d+\.?\d*)\s*$/ || die "Unable to match: $_\n";
($xlc->{W0}, $rmode) = ($1, 2);
}
elsif ($rmode == 2) { # Read number of real-valued features
m/^\s*(\d+)\s*$/ || die "Unable to match: $_\n";
($xlc->{NReal}, $rmode) = ($1, 3);
if ($xlc->{NReal} == 0) { $rmode = 4; }
}
elsif ($rmode == 3) { # Read real-valued coefficients
m/^\s*(\-?\d+\.?\d*)\s*$/ || die "Unable to match: $_\n";
push @{$xlc->{RVs}}, $1;
if (++$nr == $xlc->{NReal}) { ($nr, $rmode) = (0, 4); }
}
elsif ($rmode == 4) { # Read number of categorical features
m/^\s*(\d+)\s*$/ || die "Unable to match: $_\n";
($xlc->{NCat}, $rmode) = ($1, 5);
}
elsif ($rmode == 5) { # Read domains and weights of cat. features
my $cvi = "CVs$nc";
@{$xlc->{$cvi}} = split(/ +/);
push @{$xlc->{CDs}}, shift @{$xlc->{$cvi}};
if (++$nc == $xlc->{NCat}) { $rmode = 6; }
}
else { die "Invalid state with input: $_\n"; }
}
close($fh);
}
# Parse map file
sub parse_xmap()
{
my ($opts, $xmap, $fname) = @_;
open(my $fh, "<$fname") || die "Unable to open file $fname. " . F_ERR_MSG;
my ($cc, $nv, $nc, $nr, $rmode) = (0, 0, 0, 0, 0);
while(<$fh>) {
chomp;
next if m/^\s*c\s+$/;
if ($rmode == 0) { # Read number of classes
m/^\s*(\d+)\s*$/ || die "Unable to match: $_\n";
($xmap->{NC}, $rmode, $cc) = ($1, 1, 0);
if ($xmap->{NC} == 0) { $rmode = 2; }
}
elsif ($rmode == 1) { # Read class name maps
my @toks = split(/ +/);
my $cid = shift @toks;
${$xmap->{ClMap}}[$cid] = join(CCAT_CH, @toks);
if (++$cc == $xmap->{NC}) { $rmode = 2; }
}
elsif ($rmode == 2) { # Read number of features
m/^\s*(\d+)\s*$/ || die "Unable to match \@ $rmode: $_\n";
($xmap->{NV}, $rmode) = ($1, 3);
}
elsif ($rmode == 3) { # Read number of real-valued features
m/^\s*(\d+)\s*$/ || die "Unable to match \@ $rmode: $_\n";
($xmap->{NReal}, $rmode, $nr) = ($1, 4, 0);
if ($xmap->{NReal} == 0) { $rmode = 5; }
}
elsif ($rmode == 4) { # Read map of real-value features
my @toks = split(/ +/);
my $rid = shift @toks;
${$xmap->{VMap}}[$rid] = join(CCAT_CH, @toks);
if (++$nr == $xmap->{NReal}) { $rmode = 5; }
}
elsif ($rmode == 5) { # Read number of categorical features
m/^\s*(\d+)\s*$/ || die "Unable to match \@ $rmode: $_\n";
($xmap->{NCat}, $rmode, $nc) = ($1, 6, $nr);
}
elsif ($rmode == 6) { # Read categorical feature
my @toks = split(/ +/);
my $cid = shift @toks;
if (!HLPMAP) {
${$xmap->{VMap}}[$cid] = join(CCAT_CH, @toks); }
else {
my ($sch, $ech, $jch) = ('', '', '');
if ($#toks > 0) { ($sch, $ech, $jch) = ('\'', '\'', ' '); }
${$xmap->{VMap}}[$cid] = $sch . join($jch, @toks) . $ech;
}
$rmode = 7;
if (CCHK) { assert($cid == $nc, "Invalid categorical ID"); }
}
elsif ($rmode == 7) { # Read domain size of current feature
m/^\s*(\d+)\s*$/ || die "Unable to match \@ $rmode: $_\n";
($xmap->{CDs}->{$nc}, $rmode, $nv) = ($1, 8, 0);
}
elsif ($rmode == 8) { # Read values of categorical feature
my @toks = split(/ +/);
my $vid = shift @toks;
if (!HLPMAP) {
${$xmap->{CMap}->{$nc}}[$vid] = join(CCAT_CH, @toks); }
else {
my ($repl, $sch, $ech, $jch) = (0, '', '', '');
for (my $i=0; $i<=$#toks; ++$i) {
if ($toks[$i] =~ m/${$xmap->{VMap}}[$nc]/) {
$toks[$i] =~ s/${$xmap->{VMap}}[$nc]/\?\?/g;
$repl = 1;
}
}
if ($#toks > 0 && !$repl) { ($sch,$ech,$jch)=('\'','\'',' '); }
${$xmap->{CMap}->{$nc}}[$vid] = $sch . join($jch, @toks) . $ech;
}
if (++$nv == $xmap->{CDs}->{$nc}) {
if (++$nc == $xmap->{NReal}+$xmap->{NCat}) { $rmode = 9; }
else { $rmode = 6; }
}
}
else { die "Invalid state with input \@ $rmode: $_\n"; }
}
close($fh);
}
# Parse CNBC format -- currently hard-coded for 2 classes
sub parse_cnbc()
{
my ($opts, $cnbc, $fname) = @_;
open(my $fh, "<$fname") || die "Unable to open file $fname. " . F_ERR_MSG;
my ($cc, $cv, $pol, $rmode) = (0, 0, 0, 0);
while(<$fh>) {
chomp;
if ($rmode == 0) { # Read number of classes
m/^\s*(\d+)\s*$/ || die "Unable to match: $_\n";
($cnbc->{NC}, $rmode, $cc) = ($1, 1, 0);
}
elsif ($rmode == 1) { # Read priors
m/^\s*(\-?\d+\.?\d*)\s*$/ || die "Unable to match: $_\n";
push @{$cnbc->{Prior}}, $1;
if (++$cc == $cnbc->{NC}) { $rmode = 2; }
}
elsif ($rmode == 2) { # Read number of features
m/^\s*(\d+)\s*$/ || die "Unable to match: $_\n";
($cnbc->{NV}, $cv, $rmode) = ($1, 0, 3);
}
elsif ($rmode == 3) { # Read domain size of feature
my $cpt = "CPT$cv";
if ($cv == $cnbc->{NV}) { die "Too many features specified?\n"; }
m/^\s*(\d+)\s*$/ || die "Unable to match: $_\n";
($cnbc->{$cpt}->{D}, $cc, $rmode) = ($1, 0, 4);
}
elsif ($rmode == 4) { # Read CPT for feature
my $cpt = "CPT$cv";
my $ccl = "C$cc";
my @probs = split(/ +/);
if ($#probs+1 != $cnbc->{$cpt}->{D}) { die "Invalid CPT def\n"; }
for (my $i=0; $i<=$#probs; ++$i) {
$probs[$i] =~ m/(\-?\d+\.?\d*)/ || die "Unable to match: $_\n";
push @{$cnbc->{$cpt}->{$ccl}}, $probs[$i];
}
if (++$cc == $cnbc->{NC}) {
($cv, $cc, $rmode) = ($cv+1, 0, 3); # Move to next feature
}
} else { die "Unexpected read mode in CNBC file\n"; }
}
close($fh);
}
# Parse BLC format
sub parse_blc()
{
my ($opts, $blc, $fname) = @_;
open(my $fh, "<$fname") || die "Unable to open file $fname. " . F_ERR_MSG;
my ($rmode, $cnt) = (0, 0);
while(<$fh>) {
next if m/^\s*$/ || m/^c\s+/;
chomp;
if ($rmode == 0) {
m/\s*(\d+)\s*$/ || die "Unable to match: $_\n";
($blc->{NV}, $rmode) = ($1, 1);
}
elsif ($rmode == 1) {
if ($cnt == $blc->{NV}+1) {
die "Too many lines in BLC description??\n"; }
m/^\s*(\-?\d+\.?\d*)\s*$/ || die "Unable to match: $_\n";
${$blc->{Ws}}[$cnt++] = $1;
}
}
close($fh);
}
# Parse ACC format
sub parse_acc()
{
my ($opts, $acc, $fname) = @_;
open(my $fh, "<$fname") || die "Unable to open file $fname. " . F_ERR_MSG;
my ($cc, $cv, $pol, $rmode) = (0, 0, 0, 0);
while(<$fh>) {
next if m/^\s*$/ || m/^c\s+/;
chomp;
if ($rmode == 0) {
m/\s*(\d)\s*$/ || die "Unable to match: $_\n";
($acc->{NC}, $rmode) = ($1, 1);
}
elsif ($rmode == 1) {
m/\s*(\d+)\s*$/ || die "Unable to match: $_\n";
($acc->{NV}, $rmode) = ($1, 2);
}
elsif ($rmode == 2) {
my $class = "C$cc";
m/^\s*(\-?\d+\.?\d*)\s*$/ || die "Unable to match: $_\n";
$acc->{VV}->{$class}->{W0} = $1;
$rmode = 3;
}
elsif ($rmode == 3) {
my $class = "C$cc";
my $polarity = "P$pol";
m/^\s*(\-?\d+\.?\d*)\s*$/ || die "Unable to match: $_\n";
${$acc->{VV}->{$class}->{$polarity}}[$cv] = $1;
$pol = 1 - $pol;
if ($pol == 0) { $cv++; }
if ($cv == $acc->{NV}) {
($cc, $cv, $pol) = ($cc+1, 0, 0);
if ($cc == $acc->{NC}) { last; }
$rmode = 2;
}
}
}
close($fh);
}
# Parse instance format
sub parse_instance()
{
my ($opts, $inst, $fname) = @_;
open(my $fh, "<$fname") || die "Unable to open file $fname. " . F_ERR_MSG;
my ($cnt, $rmode) = (0, 0);
while(<$fh>) {
next if m/^\s*$/ || m/^c\s+/;
chomp;
if ($rmode == 0) {
m/\s*(\d+)\s*$/ || die "Unable to match: $_\n";
($inst->{NV}, $rmode) = ($1, 1);
}
elsif ($rmode == 1) {
m/\s*(\d+)\s*$/ || die "Unable to match: $_\n";
${$inst->{E}}[$cnt++] = $1;
if ($cnt == $inst->{NV}) { $rmode = 2; }
}
elsif ($rmode == 2) {
m/\s*(\d+)\s*$/ || die "Unable to match: $_\n";
$inst->{C} = $1;
}
}
close($fh);
}
# Parse explanations
sub parse_explanations()
{
my ($fname, $xpl) = @_;
open(my $fh, "<$fname") || die "Unable to open file $fname. " . F_ERR_MSG;
while(<$fh>) {
next if m/^\s*$/ || m/^c\s+/;
chomp;
my @lits = split(/ +/);
shift @lits; # Drop 'Expl: '
push @{$xpl->{Expl}}, \@lits;
}
close($fh);
}
END {
}
1; # to ensure that the 'require' or 'use' succeeds
package Utils;
use strict;
use warnings;
use Data::Dumper;
use POSIX;
use Exporter();
use Sys::Hostname;
BEGIN {
@Utils::ISA = ('Exporter');
@Utils::EXPORT_OK = qw( &get_progname &get_progpath &round &SIG_handler );
}
#------------------------------------------------------------------------------#
# Execution path handling
#------------------------------------------------------------------------------#
sub get_progname() {
my @progname_toks = split(/\//, $0);
my $progname = $progname_toks[$#progname_toks];
#print "$progname\n";
return $progname;
}
sub get_progpath() {
my @progname_toks = split(/\//, $0);
pop @progname_toks;
my $progpath = join('/', @progname_toks);
if ($progpath eq '') { $progpath = '\.\/'; }
#print "Prog Path: $progpath\n"; #exit;
return $progpath;
}
sub get_hostname() {
my $full_host_name = &Sys::Hostname::hostname();
$full_host_name =~ m/(\w+)\.?/;
my $rhname = $1;
#print "|$hostname|\n"; exit;
return $rhname;
}
sub resolve_inc() { # Kept here as a template; need a copy in each script...
my ($cref, $pmname) = @_;
my @progname_toks = split(/\//, $0);
pop @progname_toks;
my $progpath = join('/', @progname_toks);
my $fullname = $progpath . '/' . $pmname;
my $fh;
open($fh, "<$fullname") || die "non-existing file: $pmname\n";
return $fh;
}
#------------------------------------------------------------------------------#
# Signal handling utilities
#------------------------------------------------------------------------------#
sub register_handlers()
{
$SIG{'INT'} = 'Utils::INT_handler';
$SIG{'TERM'} = 'Utils::INT_handler';
$SIG{'ABRT'} = 'Utils::SIG_handler';
$SIG{'SEGV'} = 'Utils::SIG_handler';
$SIG{'BUS'} = 'Utils::SIG_handler';
$SIG{'QUIT'} = 'Utils::SIG_handler';
$SIG{'XCPU'} = 'Utils::SIG_handler';
}
my @args = ();
my @callback = ();
sub push_arg()
{
push @args, shift;
}
sub push_callback()
{
push @callback, shift;
}
sub SIG_handler()
{
&Utils::INT_handler();
}
sub INT_handler()
{
# call any declared callbacks, e.g. to prints stats, summaries, etc.
print "\nReceived system signal. Cleaning up & terminating...\n";
foreach my $cback (@callback) {
&{$cback}(\@args);
}
exit 20; # 20 denotes resources exceeded condition (see below)
}
#------------------------------------------------------------------------------#
# Useful utils
#------------------------------------------------------------------------------#
sub round() {
my ($rval) = @_;
return int($rval + 0.5);
}
END {
}
1; # to ensure that the 'require' or 'use' succeeds
package Writers;
use strict;
use warnings;
use Data::Dumper;
use POSIX;
use Exporter;
require Utils; # Must use require, to get INC updated
import Utils qw( &get_progname &get_progpath );
BEGIN {
@Writers::ISA = ('Exporter');
@Writers::EXPORT_OK = qw( &write_xlc );
}
# Export XLC format
sub write_xlc()
{
my ($opts, $xlc) = @_;
print("$xlc->{NV}\n");
print("$xlc->{W0}\n");
print("$xlc->{NReal}\n");
for (my $i=0; $i<$xlc->{NReal}; ++$i) {
print("${$xlc->{RVs}}[$i]\n");
}
print("$xlc->{NCat}\n");
for (my $i=0; $i<$xlc->{NCat}; ++$i) {
my $cvi = "CVs$i";
print("${$xlc->{CDs}}[$i] ");
print("@{$xlc->{$cvi}}\n");
}
}
END {
}
1; # to ensure that the 'require' or 'use' succeeds
#!/usr/bin/env perl
## Tool for translating the probabilities of an CNBC into a
## sequence of non-negative weights which are then represented
## in the XLC format.
## Script specifically assumes *2* classes
push @INC, \&resolve_inc;
use strict;
use warnings;
use Data::Dumper;
use Getopt::Std;
require Parsers;
import Parsers qw( parse_cnbc );
require Writers;
import Writers qw( write_xlc );
use constant DBG => 0; ## Also, comment out unused 'uses'
use constant CHK => 0;
my $f_err_msg = "Please check file name, existence, permissions, etc.\n";
# 0. Read command line arguments
my %opts = ();
&read_opts(\%opts);
if ((CHK || DBG) && (defined($opts{k}) || defined($opts{d}))) {
## Uncomment to use assertions && debug messages
#use Carp::Assert; # Assertions are on.
#if (DBG && $opts{d}) {
# use Data::Dumper;
#}
}
if (defined($opts{o})) {
open ($opts{FH}, '>', $opts{o});
select($opts{FH});
}
# 1. Data structures
my %cnbc = ();
my %xlc = ();
my $mval = 0;
my $tval = 0;
# 2. Read ML model (definition of (C)NBC in CNBC format)
&parse_cnbc(\%opts, \%cnbc, $opts{f});
if ($opts{d}) { warn Data::Dumper->Dump([ \%cnbc ], [ qw(cnbc) ]); }
# 3. Translate CNBC weights (i.e. probs) into CNBC weights (i.e. additive & >=0)
&process_weights(\%opts, \%cnbc);
if ($opts{d}) { warn Data::Dumper->Dump([ \%cnbc ], [ qw(cnbc) ]); }
#4. Reduce CNBC (w/ weights) into XLC
&reduce_cnbc_xlc(\%opts, \%cnbc, \%xlc);
if ($opts{d}) { warn Data::Dumper->Dump([ \%xlc ], [ qw(xlc) ]); }
# 4. Print ML model in ACC format
&write_xlc(\%opts, \%xlc);
1;
# Core functions
# Goal is to apply a translation to the prob values
sub process_weights()
{
my ($opts, $cnbc) = @_;
if (CHK && $opts->{k}) {
assert($cnbc->{NC}==2, "Cannot handle $cnbc->{NC} classes\n");
}
# 1. First traversal: compute & sum logarithms and flag 0 probs
my ($hasp0, $sumlogs, $minv, $logv) = (0, 0, 0, 0);
for(my $i=0; $i<=$#{$cnbc->{Prior}}; ++$i) {
if (${$cnbc->{Prior}}[$i] == 0) { $hasp0 = 1; }
else {
$logv = log(${$cnbc->{Prior}}[$i]);
$sumlogs += $logv;
${$cnbc->{Prior}}[$i] = $logv;
if ($logv < $minv) { $minv = $logv; }
}
}
for(my $j=0; $j<$cnbc->{NV}; ++$j) {
my $cpt = "CPT$j";
for(my $i=0; $i<=$#{$cnbc->{Prior}}; ++$i) {
my $ccl = "C$i";
for(my $k=0; $k<$cnbc->{$cpt}->{D}; ++$k) {
if (${$cnbc->{$cpt}->{$ccl}}[$k] == 0) { $hasp0 = 1; }
else {
$logv = log(${$cnbc->{$cpt}->{$ccl}}[$k]);
$sumlogs += $logv;
${$cnbc->{$cpt}->{$ccl}}[$k] = $logv;
if ($logv < $minv) { $minv = $logv; }
}
}
}
}
$mval = $sumlogs - 1;
$tval = ($hasp0) ? -$mval : -$minv;
# 2. Second traversal: update 0 probs, offset weights by T
for(my $i=0; $i<=$#{$cnbc->{Prior}}; ++$i) {
if (${$cnbc->{Prior}}[$i] == 0) {
${$cnbc->{Prior}}[$i] = $mval;
}
${$cnbc->{Prior}}[$i] += $tval;
}
for(my $j=0; $j<$cnbc->{NV}; ++$j) {
my $cpt = "CPT$j";
for(my $i=0; $i<=$#{$cnbc->{Prior}}; ++$i) {
my $ccl = "C$i";
for(my $k=0; $k<$cnbc->{$cpt}->{D}; ++$k) {
if (${$cnbc->{$cpt}->{$ccl}}[$k] == 0) {
${$cnbc->{$cpt}->{$ccl}}[$k] = $mval;
}
${$cnbc->{$cpt}->{$ccl}}[$k] += $tval;
}
}
}
if ($opts->{d}) { warn Data::Dumper->Dump([ $cnbc ], [ qw(cnbc_pw) ]); }
}
sub reduce_cnbc_xlc()
{
my ($opts, $cnbc, $xlc) = @_;
$xlc->{NV} = $cnbc->{NV};
$xlc->{W0} = ${$cnbc->{Prior}}[0] - ${$cnbc->{Prior}}[1];
$xlc->{NReal} = 0;
$xlc->{NCat} = $cnbc->{NV};
for(my $j=0; $j<$cnbc->{NV}; ++$j) {
my $cpt = "CPT$j";
my $cvj = "CVs$j";
my ($ccl0, $ccl1) = ('C0', 'C1');
push @{$xlc->{CDs}}, $cnbc->{$cpt}->{D};
for(my $k=0; $k<$cnbc->{$cpt}->{D}; ++$k) {
my $vdiff =
${$cnbc->{$cpt}->{$ccl0}}[$k] - ${$cnbc->{$cpt}->{$ccl1}}[$k];
push @{$xlc->{$cvj}}, $vdiff;
}
}
}
# Format parsing functions
sub read_acc_spec()
{
my ($fname, $acc) = @_;
die "Must use common parser!!!!\n";
open(my $fh, "<$fname") ||
die "Unable to open file $fname. " . $f_err_msg;
my ($cc, $cv, $pol, $rmode) = (0, 0, 0, 0);
while(<$fh>) {
chomp;
if ($rmode == 0) {
m/\s*(\d)\s*$/ || die "Unable to match: $_\n";
($acc->{NC}, $rmode) = ($1, 1);
}
elsif ($rmode == 1) {
m/\s*(\d+)\s*$/ || die "Unable to match: $_\n";
($acc->{NV}, $rmode) = ($1, 2);
}
elsif ($rmode == 2) {
my $class = "C$cc";
m/\s*(\-?\d+\.?\d*)\s*$/ || die "Unable to match: $_\n";
$acc->{VV}->{$class}->{W0} = $1;
$rmode = 3;
}
elsif ($rmode == 3) {
my $class = "C$cc";
my $polarity = "P$pol";
m/\s*(\-?\d+\.?\d*)\s*$/ || die "Unable to match: $_\n";
${$acc->{VV}->{$class}->{$polarity}}[$cv] = $1;
$pol = 1 - $pol;
if ($pol == 0) { $cv++; }
if ($cv == $acc->{NV}) {
($cc, $cv, $pol) = ($cc+1, 0, 0);
if ($cc == $acc->{NC}) { last; }
$rmode = 2;
}
} else { die "Unexpected line in file: $_\n"; }
}
close($fh);
}
# Utilities
sub read_opts()
{
my ($opts) = @_;
getopts("hdvkf:o:", $opts);
if ($opts->{h}) {
&prt_help();
}
elsif (!defined($opts->{f})) {
die "Usage: $0 [-h] [-d] [-v] [-k] [-o <out-file>] -f <cnbc-file>\n" ;
}
}
sub prt_help()
{
my $tname = &toolname($0);
print <<"EOF";
$tname: Translate CNBC format into XLC format
Usage: $tname [-h] [-d] [-v] [-k] [-o <out-file>] -f <cnbc-file>
-f <cnbc-file> specification of CNBC file
-o <out-file> output file for exporting XLC format
-k perform consistency checks & exit if error
-v verbose mode
-d debug mode
-h prints this help
Author: joao.marques-silva\@univ-toulouse.fr
EOF
exit();
}
sub toolname()
{
my ($tname) = @_;
$tname =~ m/([\.\_\-a-zA-Z0-9]+)$/;
return $1;
}
#------------------------------------------------------------------------------#
# Auxiliary functions
#------------------------------------------------------------------------------#
sub resolve_inc() { # Copy from template kept in UTILS package
my ($cref, $pmname) = @_;
my @progname_toks = split(/\//, $0);
pop @progname_toks;
my $progpath = join('/', @progname_toks);
my $fullname = $progpath . '/' . $pmname;
open(my $fh, "<$fullname") || die "non-existing file: $pmname\n";
return $fh;
}
# jpms
This diff is collapsed.
import base64
import dash_bootstrap_components as dbc
import numpy as np
from dash import dcc, html
from pages.application.RandomForest.utils.data import Data
from pages.application.RandomForest.utils.anchor_wrap import anchor_call
from pages.application.RandomForest.utils.lime_wrap import lime_call
from pages.application.RandomForest.utils.shap_wrap import shap_call
from pages.application.RandomForest.utils.xgbooster import XGBooster, preprocess_dataset
from pages.application.RandomForest.utils.xgbrf import XGBRandomForest
class RandomForestComponent:
def __init__(self, model, type_model='SKL', info=None, type_info=''):
if info is not None and '.csv' in type_info:
self.data = Data(info)
# Conversion model
if type_model == "RF":
self.random_forest = XGBRandomForest(info, from_model=model)
else:
self.random_forest = XGBooster(info, from_model=model)
# self.random_forest.encode(test_on=info)
self.map_file = ""
self.network = html.Div([])
self.explanation = []
def update_with_explicability(self, instance, enum_feats=None, validation=None, xtype=None, solver=None, ):
# Call explanation
if not enum_feats and self.data is not None:
enum_feats = len(self.data.names) - 1
expl = self.random_forest.explain(instance,
use_lime=lime_call if solver == "lime" else None,
use_anchor=anchor_call if solver == "anchor" else None,
use_shap=shap_call if solver == "shap" else None,
nof_feats=enum_feats)
if validation:
coex = self.random_forest.validate(instance, expl)
if coex:
# repairing the local explanation
gexpl = self.random_forest.explain(instance, expl_ext=expl, prefer_ext=True)
else:
# an attempt to refine the local explanation further
gexpl = self.random_forest.explain(instance, expl_ext=expl)
print(expl)
self.explanation = []
list_explanations_path = []
explanation = {}
self.network = html.Div([])
# Creating a clean and nice text component
# instance plotting
self.explanation.append(html.H4("Instance : \n"))
self.explanation.append(html.P(str([str(instance[i]) for i in range(len(instance))])))
for k in explanation.keys():
if k != "List of path explanation(s)" and k != "List of path contrastive explanation(s)":
if k in ["List of abductive explanation(s)", "List of contrastive explanation(s)"]:
self.explanation.append(html.H4(k))
for expl in explanation[k]:
self.explanation.append(html.Hr())
self.explanation.append(html.P(expl))
self.explanation.append(html.Hr())
else:
self.explanation.append(html.P(k + explanation[k]))
else:
list_explanations_path = explanation["List of path explanation(s)"]
list_contrastive_explanations_path = explanation["List of path contrastive explanation(s)"]
return list_explanations_path, list_contrastive_explanations_path
from .anchor_wrap import *
#!/usr/bin/env python
#-*- coding:utf-8 -*-
##
## anchor_wrap.py (reuses parts of the code of SHAP)
##
## Created on: Jan 6, 2019
## Author: Nina Narodytska, Alexey Ignatiev
## E-mail: narodytska@vmware.com, aignatiev@ciencias.ulisboa.pt
##
#
#==============================================================================
from __future__ import print_function
import json
import numpy as np
import xgboost as xgb
import math
import resource
from anchor import utils
from anchor import anchor_tabular
import sklearn
import sklearn.ensemble
#
#==============================================================================
def anchor_call(xgb, sample=None, nb_samples=5, feats='all',
nb_features_in_exp=5, threshold=0.95):
timer = resource.getrusage(resource.RUSAGE_CHILDREN).ru_utime + \
resource.getrusage(resource.RUSAGE_SELF).ru_utime
# we need a way to say that features are categorical ?
# we do not have this informations.
explainer = anchor_tabular.AnchorTabularExplainer(
class_names=xgb.target_name,
feature_names=xgb.feature_names,
train_data=xgb.X,
categorical_names=xgb.categorical_names if xgb.use_categorical else {})
# if (len(xgb.X_test) != 0):
# explainer.fit(xgb.X_train, xgb.Y_train, xgb.X_test, xgb.Y_test)
# else:
# explainer.fit(xgb.X_train, xgb.Y_train, xgb.X_train, xgb.Y_train)
predict_fn_xgb = lambda x: xgb.model.predict(xgb.transform(x)).astype(int)
f2imap = {}
for i, f in enumerate(xgb.feature_names):
f2imap[f.strip()] = i
if (sample is not None):
try:
feat_sample = np.asarray(sample, dtype=np.float32)
except Exception as inst:
print("Cannot parse input sample:", sample, inst)
exit()
print("\n\n\nStarting Anchor explainer... \nConsidering a sample with features:", feat_sample)
if not (len(feat_sample) == len(xgb.X_train[0])):
print("Unmatched features are not supported: The number of features in a sample {} is not equal to the number of features in this benchmark {}".format(len(feat_sample), len(xgb.X_train[0])))
exit()
# compute boost predictions
feat_sample_exp = np.expand_dims(feat_sample, axis=0)
feat_sample_exp = xgb.transform(feat_sample_exp)
y_pred = xgb.model.predict(feat_sample_exp)[0]
y_pred_prob = xgb.model.predict_proba(feat_sample_exp)[0]
#hack testiing that we use the same onehot encoding
# test_feat_sample_exp = explainer.encoder.transform(feat_sample_exp)
test_y_pred = xgb.model.predict(feat_sample_exp)[0]
test_y_pred_prob = xgb.model.predict_proba(feat_sample_exp)[0]
assert(np.allclose(y_pred_prob, test_y_pred_prob))
print('Prediction: ', explainer.class_names[predict_fn_xgb(feat_sample.reshape(1, -1))[0]])
# exp = explainer.explain_instance(feat_sample, xgb.model.predict, threshold=threshold)
print('sample ====== ', feat_sample)
exp = explainer.explain_instance(feat_sample, predict_fn_xgb, threshold=threshold)
print('Anchor: %s' % (' AND '.join(exp.names())))
print('Precision: %.2f' % exp.precision())
print('Coverage: %.2f' % exp.coverage())
# explanation
expl = []
if (xgb.use_categorical):
for k, v in enumerate(exp.features()):
expl.append(v)
print("Clause ", k, end=": ")
print("feature (", v, ",", explainer.feature_names[v], end="); ")
print("value (", feat_sample[v], ",", explainer.categorical_names[v][int(feat_sample[v])] , ")")
else:
print("We only support datasets with categorical features for Anchor. Please pre-process your data.")
exit()
timer = resource.getrusage(resource.RUSAGE_CHILDREN).ru_utime + \
resource.getrusage(resource.RUSAGE_SELF).ru_utime - timer
print(' time: {0:.2f}'.format(timer))
return sorted(expl)
###################################### TESTING
max_sample = nb_samples
y_pred_prob = xgb.model.predict_proba(xgb.X_test)
y_pred = xgb.model.predict(xgb.X_test)
nb_tests = min(max_sample,len(xgb.Y_test))
top_labels = 1
for sample in range(nb_tests):
np.set_printoptions(precision=2)
feat_sample = xgb.X_test[sample]
print("Considering a sample with features:", feat_sample)
if (False):
feat_sample[4] = 3000
y_pred_prob_sample = xgb.model.predict_proba([feat_sample])
print(y_pred_prob_sample)
print("\t Predictions:", y_pred_prob[sample])
exp = explainer.explain_instance(feat_sample,
predict_fn_xgb,
num_features= xgb.num_class,
top_labels = 1,
labels = list(range(xgb.num_class)))
for i in range(xgb.num_class):
if (i != y_pred[sample]):
continue
print("\t \t Explanations for the winner class", i, " (xgboost confidence = ", y_pred_prob[sample][i], ")")
print("\t \t Features in explanations: ", exp.as_list(label=i))
timer = resource.getrusage(resource.RUSAGE_CHILDREN).ru_utime + \
resource.getrusage(resource.RUSAGE_SELF).ru_utime - timer
print(' time: {0:.2f}'.format(timer))
return
#!/usr/bin/env python
#-*- coding:utf-8 -*-
##
## data.py
##
## Created on: Sep 20, 2017
## Author: Alexey Ignatiev, Nina Narodytska
## E-mail: aignatiev@ciencias.ulisboa.pt, narodytska@vmware.com
##
#
#==============================================================================
from __future__ import print_function
import collections
import itertools
import pickle
import six
from six.moves import range
import numpy as np
#
#==============================================================================
class Data(object):
"""
Class for representing data (transactions).
"""
def __init__(self, filename=None, fpointer=None, mapfile=None,
separator=' ', use_categorical = False):
"""
Constructor and parser.
"""
self.names = None
self.nm2id = None
self.samps = None
self.wghts = None
self.feats = None
self.fvmap = None
self.ovmap = {}
self.fvars = None
self.fname = filename
self.mname = mapfile
self.deleted = set([])
if filename:
with open(filename, 'r') as fp:
self.parse(fp, separator)
elif fpointer:
self.parse(fpointer, separator)
if self.mname:
self.read_orig_values()
# check if we have extra info about categorical_features
if (use_categorical):
extra_file = filename+".pkl"
try:
f = open(extra_file, "rb")
print("Attempt: loading extra data from ", extra_file)
extra_info = pickle.load(f)
print("loaded")
f.close()
self.categorical_features = extra_info["categorical_features"]
self.categorical_names = extra_info["categorical_names"]
self.class_names = extra_info["class_names"]
self.categorical_onehot_names = extra_info["categorical_names"].copy()
for i, name in enumerate(self.class_names):
self.class_names[i] = str(name).replace("b'","'")
for c in self.categorical_names.items():
clean_feature_names = []
for i, name in enumerate(c[1]):
name = str(name).replace("b'","'")
clean_feature_names.append(name)
self.categorical_names[c[0]] = clean_feature_names
except Exception as e:
f.close()
print("Please provide info about categorical features or omit option -c", e)
exit()
def parse(self, fp, separator):
"""
Parse input file.
"""
# reading data set from file
lines = fp.readlines()
# reading preamble
self.names = lines[0].strip().split(separator)
self.feats = [set([]) for n in self.names]
del(lines[0])
# filling name to id mapping
self.nm2id = {name: i for i, name in enumerate(self.names)}
self.nonbin2bin = {}
for name in self.nm2id:
spl = name.rsplit(':',1)
if (spl[0] not in self.nonbin2bin):
self.nonbin2bin[spl[0]] = [name]
else:
self.nonbin2bin[spl[0]].append(name)
# reading training samples
self.samps, self.wghts = [], []
for line, w in six.iteritems(collections.Counter(lines)):
sample = line.strip().split(separator)
for i, f in enumerate(sample):
if f:
self.feats[i].add(f)
self.samps.append(sample)
self.wghts.append(w)
# direct and opposite mappings for items
idpool = itertools.count(start=0)
FVMap = collections.namedtuple('FVMap', ['dir', 'opp'])
self.fvmap = FVMap(dir={}, opp={})
# mapping features to ids
for i in range(len(self.names) - 1):
feats = sorted(list(self.feats[i]), reverse=True)
if len(feats) > 2:
for l in feats:
self.fvmap.dir[(self.names[i], l)] = l
else:
self.fvmap.dir[(self.names[i], feats[0])] = 1
if len(feats) == 2:
self.fvmap.dir[(self.names[i], feats[1])] = 0
# opposite mapping
for key, val in six.iteritems(self.fvmap.dir):
self.fvmap.opp[val] = key
# determining feature variables (excluding class variables)
for v, pair in six.iteritems(self.fvmap.opp):
if pair[0] == self.names[-1]:
self.fvars = v - 1
break
def read_orig_values(self):
"""
Read original values for all the features.
(from a separate CSV file)
"""
self.ovmap = {}
for line in open(self.mname, 'r'):
featval, bits = line.strip().split(',')
feat, val = featval.split(':')
for i, b in enumerate(bits):
f = '{0}:b{1}'.format(feat, i + 1)
v = self.fvmap.dir[(f, '1')]
if v not in self.ovmap:
self.ovmap[v] = [feat]
if -v not in self.ovmap:
self.ovmap[-v] = [feat]
self.ovmap[v if b == '1' else -v].append(val)
from .lime_wrap import *
#!/usr/bin/env python
#-*- coding:utf-8 -*-
##
## lime_wrap.py (reuses parts of the code of SHAP)
##
## Created on: Dec 12, 2018
## Author: Nina Narodytska, Alexey Ignatiev
## E-mail: narodytska@vmware.com, aignatiev@ciencias.ulisboa.pt
##
#
#==============================================================================
import json
import numpy as np
import xgboost as xgb
import math
import lime
import lime.lime_tabular
import resource
#
#==============================================================================
def lime_call(xgb, sample = None, nb_samples = 5, feats='all',
nb_features_in_exp=5):
timer = resource.getrusage(resource.RUSAGE_CHILDREN).ru_utime + \
resource.getrusage(resource.RUSAGE_SELF).ru_utime
# we need a way to say that features are categorical ?
# we do not have this informations.
predict_fn_xgb = lambda x: xgb.model.predict_proba(xgb.transform(x)).astype(float)
explainer = lime.lime_tabular.LimeTabularExplainer(
xgb.X_train,
feature_names=xgb.feature_names,
categorical_features=xgb.categorical_features if xgb.use_categorical else None,
class_names=xgb.target_name,
discretize_continuous=True,
)
f2imap = {}
for i, f in enumerate(xgb.feature_names):
f2imap[f.strip()] = i
if (sample is not None):
try:
feat_sample = np.asarray(sample, dtype=np.float32)
except:
print("Cannot parse input sample:", sample)
exit()
print("\n\n\nStarting LIME explainer... \nConsidering a sample with features:", feat_sample)
if not (len(feat_sample) == len(xgb.X_train[0])):
print("Unmatched features are not supported: The number of features in a sample {} is not equal to the number of features in this benchmark {}".format(len(feat_sample), len(xgb.X_train[0])))
exit()
# compute boost predictions
feat_sample_exp = np.expand_dims(feat_sample, axis=0)
feat_sample_exp = xgb.transform(feat_sample_exp)
y_pred = xgb.model.predict(feat_sample_exp)[0]
y_pred_prob = xgb.model.predict_proba(feat_sample_exp)[0]
exp = explainer.explain_instance(feat_sample,
predict_fn_xgb,
num_features = nb_features_in_exp,
top_labels = 1)#,
#labels = list(range(xgb.num_class)))
expl = []
# choose which features in the explanation to focus on
if feats in ('p', 'pos', '+'):
feats = 1
elif feats in ('n', 'neg', '-'):
feats = -1
else:
feats = 0
for i in range(xgb.num_class):
if (i != y_pred):
continue
print("\t \t Explanations for the winner class", i, " (xgboost confidence = ", y_pred_prob[i], ")")
print("\t \t Features in explanations: ", exp.as_list(label=i))
s_human_readable = ""
for k, v in enumerate(exp.as_list(label=i)):
if (feats == 1 and v[1] < 0) or (feats == -1 and v[1] >= 0):
continue
if not (('<' in v[0]) or ('>' in v[0])):
a = v[0].split('=')
f = a[0].strip()
l = a[1].strip()
u = l
if (xgb.use_categorical):
fid = f2imap[f]
fvid = int(a[1])
#s_human_readable = s_human_readable + f + " = [" + str(xgb.categorical_names[fid][fvid]) +"," + str(v[1])+ "] "
s_human_readable = s_human_readable + "\t \t id = {}, name = {}, score = {}\n".format(fid, f, str(v[1]))
else:
a = v[0].split('<')
if len(a) == 1:
a = v[0].split('>')
if len(a) == 2:
f = a[0].strip()
if '>' in v[0]:
l, u = float(a[1].strip(' =')), None
else:
l, u = None, float(a[1].strip(' ='))
else:
l = float(a[0].strip())
f = a[1].strip(' =')
u = float(a[2].strip(' ='))
# expl.append(tuple([f2imap[f], l, u, v[1] >= 0]))
expl.append(f2imap[f])
if (xgb.use_categorical):
if (len(s_human_readable) > 0):
print("\t \t Features in explanations (with provided categorical labels): \n", s_human_readable)
timer = resource.getrusage(resource.RUSAGE_CHILDREN).ru_utime + \
resource.getrusage(resource.RUSAGE_SELF).ru_utime - timer
print(' time: {0:.2f}'.format(timer))
return sorted(expl)
###################################### TESTING
max_sample = nb_samples
y_pred_prob = xgb.model.predict_proba(xgb.X_test)
y_pred = xgb.model.predict(xgb.X_test)
nb_tests = min(max_sample,len(xgb.Y_test))
top_labels = 1
for sample in range(nb_tests):
np.set_printoptions(precision=2)
feat_sample = xgb.X_test[sample]
print("Considering a sample with features:", feat_sample)
if (False):
feat_sample[4] = 3000
y_pred_prob_sample = xgb.model.predict_proba([feat_sample])
print(y_pred_prob_sample)
print("\t Predictions:", y_pred_prob[sample])
exp = explainer.explain_instance(feat_sample,
predict_fn_xgb,
num_features= xgb.num_class,
top_labels = 1,
labels = list(range(xgb.num_class)))
for i in range(xgb.num_class):
if (i != y_pred[sample]):
continue
print("\t \t Explanations for the winner class", i, " (xgboost confidence = ", y_pred_prob[sample][i], ")")
print("\t \t Features in explanations: ", exp.as_list(label=i))
timer = resource.getrusage(resource.RUSAGE_CHILDREN).ru_utime + \
resource.getrusage(resource.RUSAGE_SELF).ru_utime - timer
print(' time: {0:.2f}'.format(timer))
return
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment