Skip to content
Snippets Groups Projects
Commit 022452bc authored by Caroline DE POURTALES's avatar Caroline DE POURTALES
Browse files

added dataset upload and now works with pkl file, however wrong form on inftance

parent 27fa3bfa
No related branches found
No related tags found
1 merge request!3Decision tree done
......@@ -4,7 +4,7 @@ from dash import Input, Output, State
from dash.dependencies import Input, Output, State
from dash.exceptions import PreventUpdate
from utils import parse_contents_graph, parse_contents_instance
from utils import parse_contents_graph, parse_contents_instance, parse_contents_data
def register_callbacks(page_home, page_course, page_application, app):
......@@ -37,6 +37,8 @@ def register_callbacks(page_home, page_course, page_application, app):
Input('ml_model_choice', 'value'),
Input('ml_pretrained_model_choice', 'contents'),
State('ml_pretrained_model_choice', 'filename'),
Input('model_dataset_choice', 'contents'),
State('model_dataset_choice', 'filename'),
Input('ml_instance_choice', 'contents'),
State('ml_instance_choice', 'filename'),
Input('number_explanations', 'value'),
......@@ -45,7 +47,7 @@ def register_callbacks(page_home, page_course, page_application, app):
Input('expl_choice', 'value'),
prevent_initial_call=True
)
def update_ml_type(value_ml_model, pretrained_model_contents, pretrained_model_filename, instance_contents, instance_filename, enum, xtype, solver, expl_choice):
def update_ml_type(value_ml_model, pretrained_model_contents, pretrained_model_filename, model_dataset, model_dataset_filename, instance_contents, instance_filename, enum, xtype, solver, expl_choice):
ctx = dash.callback_context
if ctx.triggered:
ihm_id = ctx.triggered[0]['prop_id'].split('.')[0]
......@@ -57,8 +59,15 @@ def register_callbacks(page_home, page_course, page_application, app):
elif ihm_id == 'ml_pretrained_model_choice':
if model_application.ml_model is None :
raise PreventUpdate
tree = parse_contents_graph(pretrained_model_contents, pretrained_model_filename)
model_application.update_pretrained_model(tree, pretrained_model_filename)
graph = parse_contents_graph(pretrained_model_contents, pretrained_model_filename)
model_application.update_pretrained_model(graph)
return pretrained_model_filename, None, None, None
elif ihm_id == 'model_dataset_choice':
if model_application.ml_model is None :
raise PreventUpdate
model_dataset = parse_contents_data(model_dataset, model_dataset_filename)
model_application.update_pretrained_model_dataset(model_dataset)
return pretrained_model_filename, None, model_application.component.network, None
elif ihm_id == 'ml_instance_choice' :
......
......@@ -6,6 +6,7 @@ import dash_interactive_graphviz
import numpy as np
from dash import dcc, html
from pages.application.DecisionTree.utils.upload_tree import UploadedDecisionTree
from pages.application.DecisionTree.utils.data import Data
from pages.application.DecisionTree.utils.dtree import DecisionTree
from pages.application.DecisionTree.utils.dtviz import (visualize,
......@@ -14,18 +15,20 @@ from pages.application.DecisionTree.utils.dtviz import (visualize,
class DecisionTreeComponent():
def __init__(self, tree, filename_tree):
def __init__(self, tree, dataset):
data = Data(dataset)
fvmap = data.mapping_features()
try:
feature_names = tree.feature_names_in_
except:
print("You did not dump the model with the features names")
feature_names = [f'f{i}' for i in range(tree.n_features_in_)]
self.uploaded_dt = UploadedDecisionTree(tree, 'SKL', filename_tree, maxdepth=tree.get_depth(), feature_names=feature_names, nb_classes=tree.n_classes_)
#need a function that takes as input UploadedDecisionTree and gives DecisionTree
self.dt_format, self.map, features_names_mapping = self.uploaded_dt.convert_dt(feat_names=feature_names)
self.dt = DecisionTree(from_dt=self.dt_format, mapfile = self.map, mapping_features=features_names_mapping)
self.uploaded_dt = UploadedDecisionTree(tree, 'SKL', maxdepth=tree.get_depth(), feature_names=feature_names, nb_classes=tree.n_classes_)
self.dt_format, self.map, features_names_mapping = self.uploaded_dt.dump(fvmap, feat_names=feature_names)
self.dt = DecisionTree(from_dt=self.dt_format, mapfile = self.map)
dot_source = visualize(self.dt)
self.network = [dbc.Row(dash_interactive_graphviz.DashInteractiveGraphviz(dot_source=dot_source, style = {"width": "60%",
......@@ -34,6 +37,11 @@ class DecisionTreeComponent():
self.explanation = []
def update_with_explicability(self, instance, enum, xtype, solver) :
self.explanation = []
list_explanations_path=[]
explanation = self.dt.explain(instance, enum=enum, xtype = xtype, solver=solver)
dot_source = visualize_instance(self.dt, instance)
self.network = [dbc.Row(dash_interactive_graphviz.DashInteractiveGraphviz(
dot_source=dot_source, style = {"width": "50%",
......@@ -41,9 +49,6 @@ class DecisionTreeComponent():
"background-color": "transparent"}
))]
self.explanation = []
list_explanations_path=[]
explanation = self.dt.explain(instance, enum=enum, xtype = xtype, solver=solver)
#Creating a clean and nice text component
for k in explanation.keys() :
......@@ -62,6 +67,7 @@ class DecisionTreeComponent():
return list_explanations_path
def draw_explanation(self, instance, expl) :
print(expl)
dot_source = visualize_expl(self.dt, instance, expl)
self.network = [dbc.Row(dash_interactive_graphviz.DashInteractiveGraphviz(
dot_source=dot_source,
......
#!/usr/bin/env python
#-*- coding:utf-8 -*-
##
## data.py
##
## Created on: Sep 20, 2017
## Author: Alexey Ignatiev, Nina Narodytska
## E-mail: aignatiev@ciencias.ulisboa.pt, narodytska@vmware.com
##
#
#==============================================================================
from __future__ import print_function
import collections
import itertools
import os, pickle
import six
import gzip
from six.moves import range
import numpy as np
import pandas as pd
#from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
#
#==============================================================================
class Data(object):
"""
Class for representing data (transactions).
"""
def __init__(self, data, separator=','):
"""
Constructor and parser.
"""
self.names = None
self.nm2id = None
self.feats = None
self.targets = None
self.samples = None
self.parse(data, separator)
def parse(self, data, separator):
"""
Parse input file.
"""
# reading data set from file
lines = data.split('\n')
# reading preamble
self.names = [name.replace('"','').strip() for name in lines[0].strip().split(separator)]
self.feats = [set([]) for n in self.names[:-1]]
self.targets = set([])
lines = lines[1:]
# filling name to id mapping
self.nm2id = {name: i for i, name in enumerate(self.names)}
self.nonbin2bin = {}
for name in self.nm2id:
spl = name.rsplit(':',1)
if (spl[0] not in self.nonbin2bin):
self.nonbin2bin[spl[0]] = [name]
else:
self.nonbin2bin[spl[0]].append(name)
# reading training samples
self.samples = []
for line, w in six.iteritems(collections.Counter(lines)):
inst = [v.strip() for v in line.strip().split(separator)]
self.samples.append(inst)
for i, v in enumerate(inst[:-1]):
if v:
self.feats[i].add(str(v))
assert(inst[-1])
self.targets.add(str(inst[-1]))
self.nof_feats = len(self.names[:-1])
def mapping_features(self):
"""
feature-value mapping
"""
fvmap = {}
for i in range(self.nof_feats):
fvmap[f'f{i}'] = dict()
for j, v in enumerate(sorted(self.feats[i])):
fvmap[f'f{i}'][j] = (self.names[i], True, v)
if len(self.feats[i]) > 2:
m = len(self.feats[i])
for j, v in enumerate(sorted(self.feats[i])):
fvmap[f'f{i}'][j+m] = (self.names[i], False, v)
return fvmap
\ No newline at end of file
......@@ -45,7 +45,7 @@ class DecisionTree():
Simple decision tree class.
"""
def __init__(self, from_dt=None, mapfile=None, mapping_features=None, verbose=0):
def __init__(self, from_dt=None, mapfile=None, verbose=0):
"""
Constructor.
"""
......@@ -63,8 +63,6 @@ class DecisionTree():
self.fdoms = {}
self.fvmap = {}
self.features_names = mapping_features
# OHE mapping
OHEMap = collections.namedtuple('OHEMap', ['dir', 'opp'])
self.ohmap = OHEMap(dir={}, opp={})
......@@ -79,7 +77,6 @@ class DecisionTree():
for v in self.fdoms[f]:
self.fvmap[tuple([f, v])] = '{0}={1}'.format(f, v)
def from_dt(self, data):
"""
Get the tree from a file pointer.
......@@ -134,7 +131,7 @@ class DecisionTree():
# simplifying the features and their domains
self.feats = sorted(self.feats)
self.feids = {f: i for i, f in enumerate(self.feats)}
#self.feids = {f: i for i, f in enumerate(self.feats)}
self.fdoms = {f: sorted(self.fdoms[f]) for f in self.fdoms}
# here we assume all features are present in the tree
......@@ -171,9 +168,13 @@ class DecisionTree():
# skipping the first comment line if necessary
lines = lines[1:]
# number of features
self.nof_feats = int(lines[0].strip())
self.feids = {}
for line in lines[1:]:
feat, val, real = line.split()
self.fvmap[tuple([feat, int(val)])] = '{0}{1}'.format(self.features_names[feat], real)
self.fvmap[tuple([feat, int(val)])] = '{0}{1}'.format(feat, real)
#if feat not in self.feids:
# self.feids[feat] = len(self.feids)
......@@ -331,19 +332,18 @@ class DecisionTree():
"""
Compute a given number of explanations.
"""
self.feids = {f[0]: i for i, f in enumerate(inst)}
inst_dic = {}
for i in range(len(inst)):
inst_dic[inst[i][0]] = np.float32(inst[i][1])
path, term, depth = self.execute(inst, pathlits)
#contaiins all the elements for explanation
explanation_dic = {}
#instance plotting
explanation_dic["Instance : "] = str(inst_dic)
explanation_dic["Instance : "] = str([self.fvmap[inst[i]] for i in range (len(inst))])
#decision path
decision_path_str = 'IF {0} THEN class={1}'.format(' AND '.join([str(inst[self.feids[self.nodes[n].feat]]) for n in path]), term)
decision_path_str = 'IF {0} THEN class={1}'.format(' AND '.join([self.fvmap[inst[self.feids[self.nodes[n].feat]]] for n in path]), term)
explanation_dic["Decision path of instance : "] = decision_path_str
explanation_dic["Decision path length : "] = 'Path length is :'+ str(depth)
......@@ -375,8 +375,8 @@ class DecisionTree():
with Hitman(bootstrap_with=to_hit, solver='m22', htype=htype) as hitman:
expls = []
for i, expl in enumerate(hitman.enumerate(), 1):
list_expls.append([ str(p[0]) + "=" + str(p[1]) for p in expl])
list_expls_str.append('Explanation: IF {0} THEN class={1}'.format(' AND '.join([str(p) for p in sorted(expl, key=lambda p: p[0])]), term))
list_expls.append([self.fvmap[p] for p in sorted(expl, key=lambda p: p[0])])
list_expls_str.append('Explanation: IF {0} THEN class={1}'.format(' AND '.join([self.fvmap[p] for p in sorted(expl, key=lambda p: p[0])]), term))
expls.append(expl)
if i == enum:
......@@ -409,7 +409,7 @@ class DecisionTree():
list_expls_str = []
explanation = {}
for expl in expls:
list_expls_str.append('Contrastive: IF {0} THEN class!={1}'.format(' OR '.join(['!{0}'.format(str(p)) for p in sorted(expl, key=lambda p: p[0])]), term))
list_expls_str.append('Contrastive: IF {0} THEN class!={1}'.format(' OR '.join(['!{0}'.format(self.fvmap[p]) for p in sorted(expl, key=lambda p: p[0])]), term))
explanation["List of contrastive explanation(s)"] = list_expls_str
explanation["Number of contrastive explanation(s) : "]=str(len(expls))
......
......@@ -50,7 +50,7 @@ def visualize(dt):
# non-terminal nodes
for n in dt.nodes:
g.add_node(n, label=dt.features_names[dt.nodes[n].feat])
g.add_node(n, label=dt.nodes[n].feat)
node = g.get_node(n)
node.attr['shape'] = 'circle'
node.attr['fontsize'] = 13
......@@ -98,7 +98,7 @@ def visualize_instance(dt, instance):
# non-terminal nodes
for n in dt.nodes:
g.add_node(n, label=dt.features_names[dt.nodes[n].feat])
g.add_node(n, label=dt.nodes[n].feat)
node = g.get_node(n)
node.attr['shape'] = 'circle'
node.attr['fontsize'] = 13
......@@ -156,7 +156,7 @@ def visualize_expl(dt, instance, expl):
# non-terminal nodes
for n in dt.nodes:
g.add_node(n, label=dt.features_names[dt.nodes[n].feat])
g.add_node(n, label=dt.nodes[n].feat)
node = g.get_node(n)
node.attr['shape'] = 'circle'
node.attr['fontsize'] = 13
......@@ -183,8 +183,9 @@ def visualize_expl(dt, instance, expl):
#instance path in dashed
if ((n1,n2) in edges_instance) or (n2_type=='square' and (n1, "term:"+ dt.terms[n2]) in edges_instance):
edge.attr['style'] = 'dashed'
if edge.attr['label'] in expl:
edge.attr['color'] = 'blue'
for label in edge.attr['label'].split('\n'):
if label in expl:
edge.attr['color'] = 'blue'
edge.attr['fontsize'] = 10
edge.attr['arrowsize'] = 0.8
......
......@@ -82,7 +82,7 @@ def scores_tree(node, sample):
#
#==============================================================================
def get_json_tree(model, tool, maxdepth=None, fname=None):
def get_json_tree(model, tool, maxdepth=None):
"""
returns the dtree in JSON format
"""
......@@ -116,19 +116,136 @@ class UploadedDecisionTree:
""" A decision tree.
This object provides a common interface to many different types of models.
"""
def __init__(self, model, tool, fname, maxdepth, feature_names=None, nb_classes = 0):
def __init__(self, model, tool, maxdepth, feature_names=None, nb_classes = 0):
self.tool = tool
self.model = model
self.tree = None
self.depth = None
self.n_nodes = None
json_tree = get_json_tree(self.model, self.tool, maxdepth, fname)
json_tree = get_json_tree(self.model, self.tool, maxdepth)
self.tree, self.n_nodes, self.depth = self.build_tree(json_tree, feature_names)
def print_tree(self):
print("DT model:")
walk_tree(self.tree)
def dump(self, fvmap, filename=None, maxdepth=None, feat_names=None):
"""
save the dtree and data map in .dt/.map file
"""
def walk_tree(node, domains, internal, terminal):
"""
extract internal (non-term) & terminal nodes
"""
if (len(node.children) == 0): # leaf node
terminal.append((node.id, node.values))
else:
assert (node.children[0].id == node.left_node_id)
assert (node.children[1].id == node.right_node_id)
f = f"f{node.feature}"
if self.tool == "DL85":
l,r = (1,0)
internal.append((node.id, f, l, node.children[0].id))
internal.append((node.id, f, r, node.children[1].id))
elif self.tool == "ITI":
#l,r = (0,1)
if len(fvmap[f]) > 2:
n = 0
for v in fvmap[f]:
if (fvmap[f][v][2] == node.threshold) and \
(fvmap[f][v][1] == True):
l = v
n = n + 1
if (fvmap[f][v][2] == node.threshold) and \
(fvmap[f][v][1] == False):
r = v
n = n + 1
assert (n == 2)
elif (fvmap[f][0][2] == node.threshold):
l,r = (0,1)
else:
assert (fvmap[f][1][2] == node.threshold)
l,r = (1,0)
internal.append((node.id, f, l, node.children[0].id))
internal.append((node.id, f, r, node.children[1].id))
elif self.tool == "IAI":
left, right = [], []
for p in fvmap[f]:
if fvmap[f][p][1] == True:
assert (fvmap[f][p][2] in node.split)
if node.split[fvmap[f][p][2]]:
left.append(p)
else:
right.append(p)
internal.extend([(node.id, f, l, node.children[0].id) for l in left])
internal.extend([(node.id, f, r, node.children[1].id) for r in right])
elif self.tool == 'SKL':
left, right = [], []
for j in domains[f]:
if np.float32(fvmap[f][j][2]) <= np.float32(node.threshold):
left.append(j)
else:
right.append(j)
internal.extend([(node.id, f, l, node.children[0].id) for l in left])
internal.extend([(node.id, f, r, node.children[1].id) for r in right])
dom0, dom1 = dict(), dict()
dom0.update(domains)
dom1.update(domains)
dom0[f] = left
dom1[f] = right
else:
assert False, 'Unhandled model type: {0}'.format(self.tool)
internal, terminal = walk_tree(node.children[0], dom0, internal, terminal)
internal, terminal = walk_tree(node.children[1], dom1, internal, terminal)
return internal, terminal
domains = {f:[j for j in fvmap[f] if((fvmap[f][j][1]))] for f in fvmap}
internal, terminal = walk_tree(self.tree, domains, [], [])
dt = f"{self.n_nodes}\n{self.tree.id}\n"
dt += f"I {' '.join(dict.fromkeys([str(i) for i,_,_,_ in internal]))}\n"
dt +=f"T {' '.join([str(i) for i,_ in terminal ])}\n"
for i,c in terminal:
dt +=f"{i} T {c}\n"
for i,f, j, n in internal:
dt +=f"{i} {f} {j} {n}\n"
map = "Categorical\n"
map += f"{len(fvmap)}"
for f in fvmap:
for v in fvmap[f]:
if (fvmap[f][v][1] == True):
map += f"\n{f} {v} ={fvmap[f][v][2]}"
if (fvmap[f][v][1] == False) and self.tool == "ITI":
map += f"\n{f} {v} !={fvmap[f][v][2]}"
if feat_names is not None:
features_names_mapping = ''
for i,fid in enumerate(feat_names):
f=f'f{i}'
features_names_mapping += f'T:C,{fid}:{f},'+",".join([f'{fvmap[f][v][2]}:{v}' for v in fvmap[f] if(fvmap[f][v][1])])+'\n'
return dt, map, features_names_mapping
def convert_dt(self, feat_names):
"""
save dtree in .dt format & generate dtree map from the tree
......@@ -182,18 +299,14 @@ class UploadedDecisionTree:
map += f"\n{f} {j} <={t}"
map += f"\n{f} {j+1} >{t}"
features_names_mapping = {}
for i,fid in enumerate(feat_names):
f=f'f{i}'
if f in self.intvs:
features_names_mapping[f] = fid
#features_names_mapping += ",".join([f'{t}:{j}' for j,t in enumerate(self.intvs[f])])+'\n'
#thresholds = self.intvs[f][:-1]+[self.intvs[f][-2]]
#fp.write(",".join([f'{t}:{j}' for j,t in enumerate(thresholds)])+'\n')
if feat_names is not None:
features_names_mapping = ''
for i,fid in enumerate(feat_names):
f=f'f{i}'
if f in self.intvs:
features_names_mapping += f'\n Categorical,{fid}:{f},'
features_names_mapping += ",".join([f'{t}:{j}' for j,t in enumerate(self.intvs[f])])
print(dt)
print(map)
print(features_names_mapping)
return dt, map, features_names_mapping
......
......@@ -19,6 +19,7 @@ class Model():
self.ml_model = ''
self.pretrained_model = ''
self.model_dataset = ''
self.instance = ''
......@@ -33,9 +34,12 @@ class Model():
self.component_class = self.dict_components[self.ml_model]
self.component_class = globals()[self.component_class]
def update_pretrained_model(self, pretrained_model_update, filename_model):
def update_pretrained_model(self, pretrained_model_update):
self.pretrained_model = pretrained_model_update
self.component = self.component_class(self.pretrained_model, filename_model)
def update_pretrained_model_dataset(self, model_dataset):
self.model_dataset = model_dataset
self.component = self.component_class(self.pretrained_model, self.model_dataset)
def update_instance(self, instance, enum, xtype, solver="g3"):
self.instance = instance
......@@ -65,6 +69,16 @@ class View():
),
html.Div(id='pretrained_model_filename')])
self.model_dataset = html.Div([
dcc.Upload(
id='model_dataset_choice',
children=html.Div([
'Drag and Drop or ',
html.A('Select File')
]),
className="upload"
)])
self.instance_upload = html.Div([
dcc.Upload(
id='ml_instance_choice',
......@@ -87,6 +101,9 @@ class View():
html.Br(),
self.pretrained_model_upload,
html.Hr(),
html.Label("Choose the pretrained model dataset : "),
self.model_dataset,
html.Hr(),
html.Label("Choose the instance to explain : "),
html.Br(),
self.instance_upload,
......
......@@ -21,6 +21,19 @@ def parse_contents_graph(contents, filename):
return data
def parse_contents_data(contents, filename):
content_type, content_string = contents.split(',')
decoded = base64.b64decode(content_string)
try:
if '.csv' in filename:
data = decoded.decode('utf-8')
except Exception as e:
print(e)
return html.Div([
'There was an error processing this file.'
])
return data
def parse_contents_instance(contents, filename):
content_type, content_string = contents.split(',')
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment