erasmopurif commited on
Commit
d2a8669
1 Parent(s): 255e550

First commit

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. Homepage.py +59 -0
  2. README.md +8 -8
  3. ali.graphml +220 -0
  4. imgs/fairup_architecture.jpeg +0 -0
  5. imgs/fairup_architecture.png +0 -0
  6. imgs/logo_ovgu_dtdh.png +0 -0
  7. imgs/logo_ovgu_fin_en.jpg +0 -0
  8. nba.graphml +0 -0
  9. pages/1_Framework.py +796 -0
  10. pages/ovgu_logo.png +0 -0
  11. pages/setup.sh +7 -0
  12. presets/Presets.py +259 -0
  13. presets/__pycache__/FairGNN_preset.cpython-310.pyc +0 -0
  14. presets/__pycache__/Presets.cpython-310.pyc +0 -0
  15. requirements.txt +3 -0
  16. src/__pycache__/fainress_component.cpython-37.pyc +0 -0
  17. src/__pycache__/fainress_component.cpython-39.pyc +0 -0
  18. src/__pycache__/utils.cpython-37.pyc +0 -0
  19. src/__pycache__/utils.cpython-39.pyc +0 -0
  20. src/aif360/README.md +0 -0
  21. src/aif360/__init__.py +4 -0
  22. src/aif360/__pycache__/__init__.cpython-37.pyc +0 -0
  23. src/aif360/__pycache__/__init__.cpython-39.pyc +0 -0
  24. src/aif360/__pycache__/decorating_metaclass.cpython-37.pyc +0 -0
  25. src/aif360/__pycache__/decorating_metaclass.cpython-39.pyc +0 -0
  26. src/aif360/aif360-r/.Rbuildignore +12 -0
  27. src/aif360/aif360-r/.gitignore +7 -0
  28. src/aif360/aif360-r/CODEOFCONDUCT.md +44 -0
  29. src/aif360/aif360-r/CONTRIBUTING.md +30 -0
  30. src/aif360/aif360-r/DESCRIPTION +24 -0
  31. src/aif360/aif360-r/LICENSE.md +194 -0
  32. src/aif360/aif360-r/NAMESPACE +24 -0
  33. src/aif360/aif360-r/R/binary_label_dataset_metric.R +43 -0
  34. src/aif360/aif360-r/R/classification_metric.R +114 -0
  35. src/aif360/aif360-r/R/dataset.R +71 -0
  36. src/aif360/aif360-r/R/dataset_metric.R +42 -0
  37. src/aif360/aif360-r/R/import.R +23 -0
  38. src/aif360/aif360-r/R/inprocessing_adversarial_debiasing.R +73 -0
  39. src/aif360/aif360-r/R/inprocessing_prejudice_remover.R +26 -0
  40. src/aif360/aif360-r/R/postprocessing_reject_option_classification.R +85 -0
  41. src/aif360/aif360-r/R/preprocessing_disparate_impact_remover.R +27 -0
  42. src/aif360/aif360-r/R/preprocessing_reweighing.R +25 -0
  43. src/aif360/aif360-r/R/standard_datasets.R +31 -0
  44. src/aif360/aif360-r/R/utils.R +89 -0
  45. src/aif360/aif360-r/R/zzz.R +4 -0
  46. src/aif360/aif360-r/README.Rmd +150 -0
  47. src/aif360/aif360-r/README.md +155 -0
  48. src/aif360/aif360-r/cran-comments.md +10 -0
  49. src/aif360/aif360-r/inst/examples/test.R +20 -0
  50. src/aif360/aif360-r/inst/extdata/actual_data.csv +21 -0
Homepage.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PIL import Image
3
+ import os
4
+
5
+ if 'STREAMLIT_PRODUCTION' in os.environ:
6
+ # Running on Streamlit Sharing
7
+ with open('test.yml', 'r') as file:
8
+ environment = file.read()
9
+ with open('test_tmp.yml', 'w') as file:
10
+ file.write(environment.replace('prefix: /', ''))
11
+ os.system('conda env create -f test_tmp.yml')
12
+ os.system('source activate ./envs/$(head -1 test_tmp.yml | cut -d " " -f2)')
13
+
14
+
15
+ st.set_page_config(
16
+ page_title="Homepage",
17
+ layout="wide"
18
+ )
19
+
20
+ # Create a text box
21
+ #text_box = st.text_input("Enter some text:")
22
+
23
+ # Create an expander to display additional information
24
+ #with st.expander("More information"):
25
+ # st.write("This section contains additional information about the text box.")
26
+
27
+ # Display the text box
28
+ #st.write("You entered:", text_box)
29
+
30
+ logo_ovgu_fin = Image.open('imgs/logo_ovgu_fin_en.jpg')
31
+ st.image(logo_ovgu_fin)
32
+
33
+ st.title("FairUP: a Framework for Fairness Analysis of Graph Neural Network-Based User Profiling Models 🚀")
34
+ st.markdown("##### *Mohamed Abdelrazek, Erasmo Purificato, Ludovico Boratto, and Ernesto William De Luca*")
35
+
36
+ st.markdown("## Description")
37
+ st.markdown("""
38
+ **FairUP** is a standardised framework that empowers researchers and practitioners to simultaneously analyse state-of-the-art Graph Neural Network-based models for user profiling task, in terms of classification performance and fairness metrics scores.
39
+
40
+ The framework, whose architecture is shown below, presents several components, which allow end-users to:
41
+ * compute the fairness of the input dataset by means of a pre-processing fairness metric, i.e. *disparate impact*;
42
+ * mitigate the unfairness of the dataset, if needed, by applying different debiasing methods, i.e. *sampling*, *reweighting* and *disparate impact remover*;
43
+ * standardise the input (a graph in Neo4J or NetworkX format) for each of the included GNNs;
44
+ * train one or more GNN models, specifying the parameters for each of them;
45
+ * evaluate post-hoc fairness by exploiting four metrics, i.e. *statistical parity*, *equal opportunity*, *overall accuracy equality*, *treatment equality*.
46
+ """)
47
+
48
+ # st.markdown('##### We have developed a comprehensive framework for Graph Neural Networks-based user profiling models that empowers researchers and users to simultaneously train multiple models and analyze their outcomes. This framework includes tools for mitigating bias, ensuring fairness, and increasing model interpretability. Our approach allows for the incorporation of debiasing techniques into the training process, which helps to minimize the impact of societal biases on model performance. In addition, our framework supports multiple evaluation metrics, enabling the user to compare and contrast the performance of different models.')
49
+
50
+ # Vertical space
51
+ st.text("")
52
+
53
+ fairup = Image.open('imgs/fairup_architecture.png')
54
+ st.image(fairup, caption="Logical architecture of FairUP framework")
55
+
56
+ #st.text("")
57
+ #st.markdown('##### The framework is divided into 3 components: the Pre-processing component, the Core component, and the Post-processing fairness evaluation component')
58
+
59
+ #st.sidebar.success("Select a page")
README.md CHANGED
@@ -1,12 +1,12 @@
1
  ---
2
- title: FairUP
3
- emoji: 👀
4
- colorFrom: yellow
5
- colorTo: purple
6
- sdk: streamlit
7
- sdk_version: 1.19.0
8
- app_file: app.py
9
- pinned: false
10
  license: cc-by-4.0
11
  ---
12
 
 
1
  ---
2
+ title: FairUP
3
+ emoji: 🚀
4
+ colorFrom: blue
5
+ colorTo: green
6
+ sdk: streamlit
7
+ sdk_version: 1.19.0
8
+ app_file: Homepage.py
9
+ pinned: false
10
  license: cc-by-4.0
11
  ---
12
 
ali.graphml ADDED
@@ -0,0 +1,220 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version='1.0' encoding='utf-8'?>
2
+ <graphml xmlns="http://graphml.graphdrawing.org/xmlns" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://graphml.graphdrawing.org/xmlns http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd"><key id="d17" for="node" attr.name="price" attr.type="double"/>
3
+ <key id="d16" for="node" attr.name="brand" attr.type="double"/>
4
+ <key id="d15" for="node" attr.name="customer" attr.type="long"/>
5
+ <key id="d14" for="node" attr.name="campaign_id" attr.type="long"/>
6
+ <key id="d13" for="node" attr.name="cate_id" attr.type="long"/>
7
+ <key id="d12" for="node" attr.name="clk" attr.type="long"/>
8
+ <key id="d11" for="node" attr.name="nonclk" attr.type="long"/>
9
+ <key id="d10" for="node" attr.name="pid" attr.type="string"/>
10
+ <key id="d9" for="node" attr.name="adgroup_id" attr.type="long"/>
11
+ <key id="d8" for="node" attr.name="time_stamp" attr.type="long"/>
12
+ <key id="d7" for="node" attr.name="new_user_class_level" attr.type="double"/>
13
+ <key id="d6" for="node" attr.name="occupation" attr.type="double"/>
14
+ <key id="d5" for="node" attr.name="shopping_level" attr.type="double"/>
15
+ <key id="d4" for="node" attr.name="pvalue_level" attr.type="double"/>
16
+ <key id="d3" for="node" attr.name="age_level" attr.type="double"/>
17
+ <key id="d2" for="node" attr.name="final_gender_code" attr.type="double"/>
18
+ <key id="d1" for="node" attr.name="cms_group_id" attr.type="double"/>
19
+ <key id="d0" for="node" attr.name="cms_segid" attr.type="double"/>
20
+ <graph edgedefault="directed"><node id="523">
21
+ <data key="d0">5.0</data>
22
+ <data key="d1">2.0</data>
23
+ <data key="d2">2.0</data>
24
+ <data key="d3">2.0</data>
25
+ <data key="d4">1.0</data>
26
+ <data key="d5">3.0</data>
27
+ <data key="d6">1.0</data>
28
+ <data key="d7">2.0</data>
29
+ <data key="d8">1494506876</data>
30
+ <data key="d9">95657</data>
31
+ <data key="d10">430548_1007</data>
32
+ <data key="d11">1</data>
33
+ <data key="d12">0</data>
34
+ <data key="d13">6412</data>
35
+ <data key="d14">160512</data>
36
+ <data key="d15">82513</data>
37
+ <data key="d16">26994.0</data>
38
+ <data key="d17">619.0</data>
39
+ </node>
40
+ <node id="830671">
41
+ <data key="d0">34.0</data>
42
+ <data key="d1">4.0</data>
43
+ <data key="d2">2.0</data>
44
+ <data key="d3">4.0</data>
45
+ <data key="d4">3.0</data>
46
+ <data key="d5">3.0</data>
47
+ <data key="d6">0.0</data>
48
+ <data key="d7">3.0</data>
49
+ <data key="d8">1494668843</data>
50
+ <data key="d9">95657</data>
51
+ <data key="d10">430539_1007</data>
52
+ <data key="d11">1</data>
53
+ <data key="d12">0</data>
54
+ <data key="d13">6412</data>
55
+ <data key="d14">160512</data>
56
+ <data key="d15">82513</data>
57
+ <data key="d16">26994.0</data>
58
+ <data key="d17">619.0</data>
59
+ </node>
60
+ <node id="567632">
61
+ <data key="d0">66.0</data>
62
+ <data key="d1">9.0</data>
63
+ <data key="d2">1.0</data>
64
+ <data key="d3">3.0</data>
65
+ <data key="d4">2.0</data>
66
+ <data key="d5">3.0</data>
67
+ <data key="d6">0.0</data>
68
+ <data key="d7">2.0</data>
69
+ <data key="d8">1494603840</data>
70
+ <data key="d9">95657</data>
71
+ <data key="d10">430539_1007</data>
72
+ <data key="d11">1</data>
73
+ <data key="d12">0</data>
74
+ <data key="d13">6412</data>
75
+ <data key="d14">160512</data>
76
+ <data key="d15">82513</data>
77
+ <data key="d16">26994.0</data>
78
+ <data key="d17">619.0</data>
79
+ </node>
80
+ <node id="16333">
81
+ <data key="d0">91.0</data>
82
+ <data key="d1">11.0</data>
83
+ <data key="d2">1.0</data>
84
+ <data key="d3">5.0</data>
85
+ <data key="d4">2.0</data>
86
+ <data key="d5">3.0</data>
87
+ <data key="d6">0.0</data>
88
+ <data key="d7">2.0</data>
89
+ <data key="d8">1494561928</data>
90
+ <data key="d9">95657</data>
91
+ <data key="d10">430539_1007</data>
92
+ <data key="d11">1</data>
93
+ <data key="d12">0</data>
94
+ <data key="d13">6412</data>
95
+ <data key="d14">160512</data>
96
+ <data key="d15">82513</data>
97
+ <data key="d16">26994.0</data>
98
+ <data key="d17">619.0</data>
99
+ </node>
100
+ <node id="521847">
101
+ <data key="d0">20.0</data>
102
+ <data key="d1">3.0</data>
103
+ <data key="d2">2.0</data>
104
+ <data key="d3">3.0</data>
105
+ <data key="d4">2.0</data>
106
+ <data key="d5">3.0</data>
107
+ <data key="d6">0.0</data>
108
+ <data key="d7">3.0</data>
109
+ <data key="d8">1494579049</data>
110
+ <data key="d9">95657</data>
111
+ <data key="d10">430548_1007</data>
112
+ <data key="d11">1</data>
113
+ <data key="d12">0</data>
114
+ <data key="d13">6412</data>
115
+ <data key="d14">160512</data>
116
+ <data key="d15">82513</data>
117
+ <data key="d16">26994.0</data>
118
+ <data key="d17">619.0</data>
119
+ </node>
120
+ <node id="227111">
121
+ <data key="d0">8.0</data>
122
+ <data key="d1">2.0</data>
123
+ <data key="d2">2.0</data>
124
+ <data key="d3">2.0</data>
125
+ <data key="d4">2.0</data>
126
+ <data key="d5">3.0</data>
127
+ <data key="d6">0.0</data>
128
+ <data key="d7">3.0</data>
129
+ <data key="d8">1494559984</data>
130
+ <data key="d9">95657</data>
131
+ <data key="d10">430539_1007</data>
132
+ <data key="d11">1</data>
133
+ <data key="d12">0</data>
134
+ <data key="d13">6412</data>
135
+ <data key="d14">160512</data>
136
+ <data key="d15">82513</data>
137
+ <data key="d16">26994.0</data>
138
+ <data key="d17">619.0</data>
139
+ </node>
140
+ <node id="632984">
141
+ <data key="d0">89.0</data>
142
+ <data key="d1">11.0</data>
143
+ <data key="d2">1.0</data>
144
+ <data key="d3">5.0</data>
145
+ <data key="d4">1.0</data>
146
+ <data key="d5">3.0</data>
147
+ <data key="d6">0.0</data>
148
+ <data key="d7">4.0</data>
149
+ <data key="d8">1494566502</data>
150
+ <data key="d9">95657</data>
151
+ <data key="d10">430548_1007</data>
152
+ <data key="d11">1</data>
153
+ <data key="d12">0</data>
154
+ <data key="d13">6412</data>
155
+ <data key="d14">160512</data>
156
+ <data key="d15">82513</data>
157
+ <data key="d16">26994.0</data>
158
+ <data key="d17">619.0</data>
159
+ </node>
160
+ <node id="912028">
161
+ <data key="d0">20.0</data>
162
+ <data key="d1">3.0</data>
163
+ <data key="d2">2.0</data>
164
+ <data key="d3">3.0</data>
165
+ <data key="d4">2.0</data>
166
+ <data key="d5">3.0</data>
167
+ <data key="d6">0.0</data>
168
+ <data key="d7">3.0</data>
169
+ <data key="d8">1494276088</data>
170
+ <data key="d9">95657</data>
171
+ <data key="d10">430539_1007</data>
172
+ <data key="d11">1</data>
173
+ <data key="d12">0</data>
174
+ <data key="d13">6412</data>
175
+ <data key="d14">160512</data>
176
+ <data key="d15">82513</data>
177
+ <data key="d16">26994.0</data>
178
+ <data key="d17">619.0</data>
179
+ </node>
180
+ <node id="120208">
181
+ <data key="d0">77.0</data>
182
+ <data key="d1">10.0</data>
183
+ <data key="d2">1.0</data>
184
+ <data key="d3">4.0</data>
185
+ <data key="d4">1.0</data>
186
+ <data key="d5">3.0</data>
187
+ <data key="d6">0.0</data>
188
+ <data key="d7">2.0</data>
189
+ <data key="d8">1494563600</data>
190
+ <data key="d9">95657</data>
191
+ <data key="d10">430548_1007</data>
192
+ <data key="d11">1</data>
193
+ <data key="d12">0</data>
194
+ <data key="d13">6412</data>
195
+ <data key="d14">160512</data>
196
+ <data key="d15">82513</data>
197
+ <data key="d16">26994.0</data>
198
+ <data key="d17">619.0</data>
199
+ </node>
200
+ <node id="390080">
201
+ <data key="d0">6.0</data>
202
+ <data key="d1">2.0</data>
203
+ <data key="d2">2.0</data>
204
+ <data key="d3">2.0</data>
205
+ <data key="d4">2.0</data>
206
+ <data key="d5">2.0</data>
207
+ <data key="d6">0.0</data>
208
+ <data key="d7">4.0</data>
209
+ <data key="d8">1494296612</data>
210
+ <data key="d9">95657</data>
211
+ <data key="d10">430539_1007</data>
212
+ <data key="d11">1</data>
213
+ <data key="d12">0</data>
214
+ <data key="d13">6412</data>
215
+ <data key="d14">160512</data>
216
+ <data key="d15">82513</data>
217
+ <data key="d16">26994.0</data>
218
+ <data key="d17">619.0</data>
219
+ </node>
220
+ </graph></graphml>
imgs/fairup_architecture.jpeg ADDED
imgs/fairup_architecture.png ADDED
imgs/logo_ovgu_dtdh.png ADDED
imgs/logo_ovgu_fin_en.jpg ADDED
nba.graphml ADDED
The diff for this file is too large to render. See raw diff
 
pages/1_Framework.py ADDED
@@ -0,0 +1,796 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PIL import Image
3
+ import time
4
+ import pandas as pd
5
+ import os
6
+ import paramiko
7
+ import threading
8
+ import queue
9
+ import warnings
10
+ import re
11
+ import subprocess
12
+ from presets import Presets
13
+ import random
14
+ #from src import main
15
+
16
+
17
+
18
+ st.set_page_config(layout="wide")
19
+ st.warning('Note: We are running out with GPU problems. The GNN models are currently running on CPU and some of the Framework capabilities may not be available. We apologise for the inconvenience and we will fix that soon.', icon="⚠️")
20
+
21
+ st.header('')
22
+ ovgu_img = Image.open('imgs/logo_ovgu_fin_en.jpg')
23
+ st.image(ovgu_img)
24
+ st.title("FairUP: a Framework for Fairness Analysis of Graph Neural Network-Based User Profiling Models. 🚀")
25
+
26
+ warnings.filterwarnings("ignore", category=DeprecationWarning)
27
+ warnings.filterwarnings("ignore", category=RuntimeWarning)
28
+ warnings.filterwarnings("ignore")
29
+
30
+ nba_columns = ['user_id', 'SALARY', 'AGE', 'MP', 'FG', 'FGA', 'FG%', '3P', '3PA',
31
+ '3P%', '2P', '2PA', '2P%', 'eFG%', 'FT', 'FTA', 'FT%', 'ORB', 'DRB',
32
+ 'TRB', 'AST', 'STL', 'BLK', 'TOV', 'PF_x', 'POINTS', 'GP', 'MPG',
33
+ 'ORPM', 'DRPM', 'RPM', 'WINS_RPM', 'PIE', 'PACE', 'W', 'player_height',
34
+ 'player_weight', 'country', 'C', 'PF_y', 'PF-C', 'PG', 'SF', 'SG',
35
+ 'ATL', 'ATL/CLE', 'ATL/LAL', 'BKN', 'BKN/WSH', 'BOS', 'CHA', 'CHI',
36
+ 'CHI/OKC', 'CLE', 'CLE/DAL', 'CLE/MIA', 'DAL', 'DAL/BKN', 'DAL/PHI',
37
+ 'DEN', 'DEN/CHA', 'DEN/POR', 'DET', 'GS', 'GS/CHA', 'GS/SAC', 'HOU',
38
+ 'HOU/LAL', 'HOU/MEM', 'IND', 'LAC', 'LAL', 'MEM', 'MIA', 'MIL',
39
+ 'MIL/CHA', 'MIN', 'NO', 'NO/DAL', 'NO/MEM', 'NO/MIL', 'NO/MIN/SAC',
40
+ 'NO/ORL', 'NO/SAC', 'NY', 'NY/PHI', 'OKC', 'ORL', 'ORL/TOR', 'PHI',
41
+ 'PHI/OKC', 'PHX', 'POR', 'SA', 'SAC', 'TOR', 'UTAH', 'WSH']
42
+
43
+ pokec_columns = ['user_id',
44
+ 'public',
45
+ 'completion_percentage',
46
+ 'gender',
47
+ 'region',
48
+ 'AGE',
49
+ 'I_am_working_in_field',
50
+ 'spoken_languages_indicator',
51
+ 'anglicky',
52
+ 'nemecky',
53
+ 'rusky',
54
+ 'francuzsky',
55
+ 'spanielsky',
56
+ 'taliansky',
57
+ 'slovensky',
58
+ 'japonsky',
59
+ 'hobbies_indicator',
60
+ 'priatelia',
61
+ 'sportovanie',
62
+ 'pocuvanie hudby',
63
+ 'pozeranie filmov',
64
+ 'spanie',
65
+ 'kupalisko',
66
+ 'party',
67
+ 'cestovanie',
68
+ 'kino',
69
+ 'diskoteky',
70
+ 'nakupovanie',
71
+ 'tancovanie',
72
+ 'turistika',
73
+ 'surfovanie po webe',
74
+ 'praca s pc',
75
+ 'sex',
76
+ 'pc hry',
77
+ 'stanovanie',
78
+ 'varenie',
79
+ 'jedlo',
80
+ 'fotografovanie',
81
+ 'citanie',
82
+ 'malovanie',
83
+ 'chovatelstvo',
84
+ 'domace prace',
85
+ 'divadlo',
86
+ 'prace okolo domu',
87
+ 'prace v zahrade',
88
+ 'chodenie do muzei',
89
+ 'zberatelstvo',
90
+ 'hackovanie',
91
+ 'I_most_enjoy_good_food_indicator',
92
+ 'pri telke',
93
+ 'v dobrej restauracii',
94
+ 'pri svieckach s partnerom',
95
+ 'v posteli',
96
+ 'v prirode',
97
+ 'z partnerovho bruska',
98
+ 'v kuchyni pri stole',
99
+ 'pets_indicator',
100
+ 'pes',
101
+ 'mam psa',
102
+ 'nemam ziadne',
103
+ 'macka',
104
+ 'rybky',
105
+ 'mam macku',
106
+ 'mam rybky',
107
+ 'vtacik',
108
+ 'body_type_indicator',
109
+ 'priemerna',
110
+ 'vysportovana',
111
+ 'chuda',
112
+ 'velka a pekna',
113
+ 'tak trosku pri sebe',
114
+ 'eye_color_indicator',
115
+ 'hnede',
116
+ 'modre',
117
+ 'zelene',
118
+ 'hair_color_indicator',
119
+ 'cierne',
120
+ 'blond',
121
+ 'plave',
122
+ 'hair_type_indicator',
123
+ 'kratke',
124
+ 'dlhe',
125
+ 'rovne',
126
+ 'po plecia',
127
+ 'kucerave',
128
+ 'na jezka',
129
+ 'completed_level_of_education_indicator',
130
+ 'stredoskolske',
131
+ 'zakladne',
132
+ 'vysokoskolske',
133
+ 'ucnovske',
134
+ 'favourite_color_indicator',
135
+ 'modra',
136
+ 'cierna',
137
+ 'cervena',
138
+ 'biela',
139
+ 'zelena',
140
+ 'fialova',
141
+ 'zlta',
142
+ 'ruzova',
143
+ 'oranzova',
144
+ 'hneda',
145
+ 'relation_to_smoking_indicator',
146
+ 'nefajcim',
147
+ 'fajcim pravidelne',
148
+ 'fajcim prilezitostne',
149
+ 'uz nefajcim',
150
+ 'relation_to_alcohol_indicator',
151
+ 'pijem prilezitostne',
152
+ 'abstinent',
153
+ 'nepijem',
154
+ 'on_pokec_i_am_looking_for_indicator',
155
+ 'dobreho priatela',
156
+ 'priatelku',
157
+ 'niekoho na chatovanie',
158
+ 'udrzujem vztahy s priatelmi',
159
+ 'vaznu znamost',
160
+ 'sexualneho partnera',
161
+ 'dlhodoby seriozny vztah',
162
+ 'love_is_for_me_indicator',
163
+ 'nie je nic lepsie',
164
+ 'ako byt zamilovany(a)',
165
+ 'v laske vidim zmysel zivota',
166
+ 'v laske som sa sklamal(a)',
167
+ 'preto som velmi opatrny(a)',
168
+ 'laska je zakladom vyrovnaneho sexualneho zivota',
169
+ 'romanticka laska nie je pre mna',
170
+ 'davam prednost realite',
171
+ 'relation_to_casual_sex_indicator',
172
+ 'nedokazem mat s niekym sex bez lasky',
173
+ 'to skutocne zalezi len na okolnostiach',
174
+ 'sex mozem mat iba s niekym',
175
+ 'koho dobre poznam',
176
+ 'dokazem mat sex s kymkolvek',
177
+ 'kto dobre vyzera',
178
+ 'my_partner_should_be_indicator',
179
+ 'mojou chybajucou polovickou',
180
+ 'laskou mojho zivota',
181
+ 'moj najlepsi priatel',
182
+ 'absolutne zodpovedny a spolahlivy',
183
+ 'hlavne spolocensky typ',
184
+ 'clovek',
185
+ 'ktoreho uplne respektujem',
186
+ 'hlavne dobry milenec',
187
+ 'niekto',
188
+ 'marital_status_indicator',
189
+ 'slobodny(a)',
190
+ 'mam vazny vztah',
191
+ 'zenaty (vydata)',
192
+ 'rozvedeny(a)',
193
+ 'slobodny',
194
+ 'relation_to_children_indicator',
195
+ 'v buducnosti chcem mat deti',
196
+ 'I_like_movies_indicator',
197
+ 'komedie',
198
+ 'akcne',
199
+ 'horory',
200
+ 'serialy',
201
+ 'romanticke',
202
+ 'rodinne',
203
+ 'sci-fi',
204
+ 'historicke',
205
+ 'vojnove',
206
+ 'zahadne',
207
+ 'mysteriozne',
208
+ 'dokumentarne',
209
+ 'eroticke',
210
+ 'dramy',
211
+ 'fantasy',
212
+ 'muzikaly',
213
+ 'kasove trhaky',
214
+ 'umelecke',
215
+ 'alternativne',
216
+ 'I_like_watching_movie_indicator',
217
+ 'doma z gauca',
218
+ 'v kine',
219
+ 'u priatela',
220
+ 'priatelky',
221
+ 'I_like_music_indicator',
222
+ 'disko',
223
+ 'pop',
224
+ 'rock',
225
+ 'rap',
226
+ 'techno',
227
+ 'house',
228
+ 'hitparadovky',
229
+ 'sladaky',
230
+ 'hip-hop',
231
+ 'metal',
232
+ 'soundtracky',
233
+ 'punk',
234
+ 'oldies',
235
+ 'folklor a ludovky',
236
+ 'folk a country',
237
+ 'jazz',
238
+ 'klasicka hudba',
239
+ 'opery',
240
+ 'alternativa',
241
+ 'trance',
242
+ 'I_mostly_like_listening_to_music_indicator',
243
+ 'kedykolvek a kdekolvek',
244
+ 'na posteli',
245
+ 'pri chodzi',
246
+ 'na dobru noc',
247
+ 'na diskoteke',
248
+ 's partnerom',
249
+ 'vo vani',
250
+ 'v aute',
251
+ 'na koncerte',
252
+ 'pri sexe',
253
+ 'v praci',
254
+ 'the_idea_of_good_evening_indicator',
255
+ 'pozerat dobry film v tv',
256
+ 'pocuvat dobru hudbu',
257
+ 's kamaratmi do baru',
258
+ 'ist do kina alebo divadla',
259
+ 'surfovat na sieti a chatovat',
260
+ 'ist na koncert',
261
+ 'citat dobru knihu',
262
+ 'nieco dobre uvarit',
263
+ 'zhasnut svetla a meditovat',
264
+ 'ist do posilnovne',
265
+ 'I_like_specialties_from_kitchen_indicator',
266
+ 'slovenskej',
267
+ 'talianskej',
268
+ 'cinskej',
269
+ 'mexickej',
270
+ 'francuzskej',
271
+ 'greckej',
272
+ 'morske zivocichy',
273
+ 'vegetarianskej',
274
+ 'japonskej',
275
+ 'indickej',
276
+ 'I_am_going_to_concerts_indicator',
277
+ 'ja na koncerty nechodim',
278
+ 'zriedkavo',
279
+ 'my_active_sports_indicator',
280
+ 'plavanie',
281
+ 'futbal',
282
+ 'kolieskove korcule',
283
+ 'lyzovanie',
284
+ 'korculovanie',
285
+ 'behanie',
286
+ 'posilnovanie',
287
+ 'tenis',
288
+ 'hokej',
289
+ 'basketbal',
290
+ 'snowboarding',
291
+ 'pingpong',
292
+ 'auto-moto sporty',
293
+ 'bedminton',
294
+ 'volejbal',
295
+ 'aerobik',
296
+ 'bojove sporty',
297
+ 'hadzana',
298
+ 'skateboarding',
299
+ 'my_passive_sports_indicator',
300
+ 'baseball',
301
+ 'golf',
302
+ 'horolezectvo',
303
+ 'bezkovanie',
304
+ 'surfing',
305
+ 'I_like_books_indicator',
306
+ 'necitam knihy',
307
+ 'o zabave',
308
+ 'humor',
309
+ 'hry',
310
+ 'historicke romany',
311
+ 'rozpravky',
312
+ 'odbornu literaturu',
313
+ 'psychologicku literaturu',
314
+ 'literaturu pre rozvoj osobnosti',
315
+ 'cestopisy',
316
+ 'literaturu faktu',
317
+ 'poeziu',
318
+ 'zivotopisne a pamate',
319
+ 'pocitacovu literaturu',
320
+ 'filozoficku literaturu',
321
+ 'literaturu o umeni a architekture']
322
+
323
+ alibaba_columns = ['userid', 'final_gender_code', 'age_level', 'pvalue_level', 'occupation', 'new_user_class_level ', 'adgroup_id', 'clk', 'cate_id']
324
+ jd_columns = ['user_id',
325
+ 'gender',
326
+ 'age_range',
327
+ 'item_id',
328
+ 'cid1',
329
+ 'cid2',
330
+ 'cid3',
331
+ 'cid1_name',
332
+ 'cid2_name',
333
+ 'cid3_name',
334
+ 'brand_code',
335
+ 'price',
336
+ 'item_name',
337
+ 'seg_name']
338
+
339
+ ##############################
340
+ # Preset
341
+ preset_question = st.radio("Do you want to apply a preset?", ("No", "Yes"))
342
+ with st.expander("More information"):
343
+ st.write("A preset is a pre-defined parameter and model settings that can be choosen by the user to test the Framework easily.")
344
+ st.write("Each preset option is defined by the model name and (in brackets) the dataset which it will be trained on.")
345
+ if preset_question == 'Yes':
346
+ preset_list = ['FairGNN (NBA)', 'RHGN (Alibaba)', 'CatGCN (Alibaba)']
347
+ preset = st.selectbox('Select Preset', preset_list)
348
+ # implment presets as functions?
349
+ if preset == 'FairGNN (NBA)':
350
+ model_type, predict_attr, sens_attr = Presets.FairGNN_NBA()
351
+ elif preset == 'RHGN (Alibaba)':
352
+ model_type, predict_attr, sens_attr = Presets.RHGN_Alibaba()
353
+ elif preset == 'CatGCN (Alibaba)':
354
+ model_type, predict_attr, sens_attr = Presets.CatGCN_Alibaba()
355
+
356
+ Presets.experiment_begin(model_type, predict_attr, sens_attr)
357
+
358
+ elif preset_question == 'No':
359
+ dataset = st.selectbox("Which dataset do you want to evaluate?", ("NBA", "Pokec-z", "Alibaba", "JD"))
360
+ if dataset == "NBA":
361
+ dataset = 'nba'
362
+ predict_attr = st.selectbox("Select prediction label", nba_columns)
363
+ sens_attr = st.selectbox("Select sensitive attribute", nba_columns)
364
+ elif dataset == "Pokec-z":
365
+ dataset = 'pokec_z'
366
+ predict_attr = st.selectbox("Select prediction label", pokec_columns)
367
+ sens_attr = st.selectbox("Select sensitive attribute", pokec_columns)
368
+ elif dataset == "Alibaba":
369
+ dataset = 'alibaba'
370
+ predict_attr = st.selectbox("Select prediction label", alibaba_columns)
371
+ sens_attr = st.selectbox("Select sensitive attribute", alibaba_columns)
372
+ elif dataset == 'JD':
373
+ dataset = 'tecent'
374
+ predict_attr = st.selectbox("Select prediction label", jd_columns)
375
+ sens_attr = st.selectbox("Select sensitive attribute", jd_columns)
376
+
377
+
378
+ # todo get all columns of the selected dataset and change this to a selectbox
379
+ #predict_attr = st.text_input("Enter the prediction label")
380
+ #sens_attr = st.text_input("Enter the senstive attribute")
381
+ def read_output(stdout, queue):
382
+ for line in stdout:
383
+ queue.put(line.strip())
384
+
385
+ def execute_command_fairness(dataset, sens_attr, predict_attr):
386
+ with st.spinner("Loading..."):
387
+ time.sleep(1)
388
+ #ssh = paramiko.SSHClient()
389
+ # Automatically add the server's host key (for the first connection only)
390
+ #ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
391
+
392
+ # Connect to the remote server
393
+ #ssh.connect('141.44.31.206', username='abdelrazek', password='Mohamed')
394
+
395
+ #if dataset == 'nba':
396
+ # stdin_new, stdout_new, stderr_new = ssh.exec_command('cd /home/abdelrazek/framework-for-fairness-analysis-and-mitigation-main && /home/abdelrazek/anaconda3/envs/test/bin/python3 main.py --calc_fairness True --dataset_name {} --dataset_path ../nba.csv --special_case True --sens_attr {} --predict_attr {} --type 1'.format(dataset, sens_attr, predict_attr), get_pty=True)
397
+ #elif dataset == 'alibaba':
398
+ # stdin_new, stdout_new, stderr_new = ssh.exec_command('cd /home/abdelrazek/framework-for-fairness-analysis-and-mitigation-main && /home/abdelrazek/anaconda3/envs/test/bin/python3 main.py --calc_fairness True --dataset_name {} --dataset_path ../alibaba_small.csv --special_case True --sens_attr {} --predict_attr {} --type 1'.format(dataset, sens_attr, predict_attr), get_pty=True)
399
+ #elif dataset == 'tecent':
400
+ # stdin_new, stdout_new, stderr_new = ssh.exec_command('cd /home/abdelrazek/framework-for-fairness-analysis-and-mitigation-main && /home/abdelrazek/anaconda3/envs/test/bin/python3 main.py --calc_fairness True --dataset_name {} --dataset_path ../JD_small.csv --special_case True --sens_attr {} --predict_attr {} --type 1'.format(dataset, sens_attr, predict_attr), get_pty=True)
401
+ #elif dataset == 'pokec_z':
402
+ # stdin_new, stdout_new, stderr_new = ssh.exec_command('cd /home/abdelrazek/framework-for-fairness-analysis-and-mitigation-main && /home/abdelrazek/anaconda3/envs/test/bin/python3 main.py --calc_fairness True --dataset_name {} --dataset_path ../Master-Thesis-dev/region_job.csv --special_case True --sens_attr {} --predict_attr {} --type 1'.format(dataset, sens_attr, predict_attr), get_pty=True)
403
+ #output_queue = queue.Queue()
404
+ # start a thread to continuously read the output from the stdout object
405
+ test = 'pwd'
406
+ st.text(os.system(test))
407
+ output_thread = threading.Thread(target=read_output, args=(stderr_new, output_queue))
408
+ output_thread.start()
409
+
410
+ # display the output in the Streamlit UI
411
+ while True:
412
+ try:
413
+ line = output_queue.get_nowait()
414
+ st.text(line)
415
+ except queue.Empty:
416
+ if output_thread.is_alive():
417
+ continue
418
+ else:
419
+ break
420
+
421
+ # wait for the thread to finish
422
+ output_thread.join()
423
+ # print the output to the console
424
+ for line in stdout_new:
425
+ print(line.strip())
426
+ if "Dataset" in line:
427
+ st.text(line.strip())
428
+ ssh.close()
429
+
430
+ fairness_evaluation = st.radio("Do you want to evaluate the dataset fairness?", ("No", "Yes"))
431
+ with st.expander("More information"):
432
+ st.write("Evaluate how fair the dataset, namely how much bias is affecting the dataset as a whole using the disparate impact metric.")
433
+ if fairness_evaluation == "Yes":
434
+ if st.button('Calculate Fairness'):
435
+ # todo send command to server to compute fairness
436
+ # then show fairness
437
+ # add info box
438
+ #dataset_fairness = st.write('Dataset Fairness: 1.57 (Fair)')
439
+ #execute_command_fairness(dataset, sens_attr, predict_attr)
440
+ #with open('test_new.yml', 'r') as file:
441
+ # environment = file.read()
442
+ #with open('test_tmp.yml', 'w') as file:
443
+ # file.write(environment.replace('prefix: /', ''))
444
+ #os.system('conda env create --file test_new.yml --name streamlit_env_new')
445
+ #os.system('conda activate streamlit_env_new')
446
+ commands = os.popen('cd src && python main.py --calc_fairness True --dataset_name nba --dataset_path ./datasets/NBA/nba.csv --special_case True --sens_attr country --predict_attr SALARY --type 1').read()
447
+
448
+ #output = os.popen('cd')
449
+ #output = os.popen('python main.py --calc_fairness True --dataset_name nba --dataset_path ./datasets/NBA/nba.csv --special_case True --sens_attr country --predict_attr SALARY --type 1').read()
450
+ #st.text(output)
451
+
452
+ print(commands)
453
+
454
+
455
+ #####################
456
+ debias = st.radio("Do you want to apply debias approaches?", ("No", "Yes"))
457
+ if "Yes" in debias:
458
+ debias_approach = st.selectbox("Select which debias approach you want to apply", ["Sample", "Reweighting", "Disparate remover impact"])
459
+ with st.expander("More information"):
460
+ st.write("You can mitigate the bias using three pre-processing debaising approaches:")
461
+ st.write("Sampling: Generates more data to overcome the bias between the different sensitive attributes and classes.")
462
+ st.write("Reweighting Minimizing the bias in the dataset by assiging different weights to dataset tuples, for example giving the unfavorable sensntive attributes higher weights than favorable sensitive attributes")
463
+ st.write("Disparate impact remover: Transforms the sensitive attribute features in a way that the correlation between the sensitive attribute features and the prediction class is reduced")
464
+
465
+
466
+
467
+
468
+ #if dataset != None:
469
+ #st.markdown("#### Select dataset")
470
+ #uploaded_file = st.file_uploader("Select dataset")
471
+ #dataset_path = st.text_input("", value="")
472
+
473
+ model_type = st.multiselect("Select the models you want to train", ["FairGNN", "RHGN", "CatGCN"])
474
+
475
+ if "RHGN" in model_type and "FairGNN" in model_type:
476
+ st.markdown("### Enter the general parameters")
477
+ seed = st.number_input("Enter the prefered seed number", value=0)
478
+
479
+ #predict_attr = st.text_input("Enter the prediction label")
480
+ #sens_attr = st.text_input("Enter the senstive attribute")
481
+
482
+
483
+ st.markdown("### Enter the RHGN parameters")
484
+ num_hidden = st.text_input("Enter the number of hidden layers", value=0)
485
+ with st.expander("More information"):
486
+ st.write("The number of hidden layers refers to the number of layers between the input layer and the output layer of a model.")
487
+ lr_rhgn = st.number_input("Enter the learning rate for RHGN")
488
+ with st.expander("More information"):
489
+ st.write("Is a hyperparameter that controls the step size of the updates made to the weights during training. In other words, it determines how quickly the model learns from the data.")
490
+
491
+ epochs_rhgn = st.number_input("Enter the number of epochs for RHGN", value=0)
492
+ with st.expander("More information"):
493
+ st.write("Refers to a single pass through the entire training dataset during the training of a model. In other words, an epoch is a measure of the number of times the model has seen the entire training data.")
494
+
495
+ clip = st.number_input("Enter the clip value", value=0)
496
+ with st.expander("More information"):
497
+ st.write("The clip number is a hyperparameter that determines the maximum value that the gradient can take. If the gradient exceeds this value, it is clipped (i.e., truncated to the maximum value).")
498
+
499
+
500
+
501
+ st.markdown("### Enter the FairGNN parameters")
502
+ lr_fairgnn = st.number_input("Enter the learning rate for FairGNN")
503
+ with st.expander("More information"):
504
+ st.write("Is a hyperparameter that controls the step size of the updates made to the weights during training. In other words, it determines how quickly the model learns from the data.")
505
+ epochs_fairgnn = st.number_input("Enter the number of epochs for FairGNN", value=0)
506
+ with st.expander("More information"):
507
+ st.write("Refers to a single pass through the entire training dataset during the training of a model. In other words, an epoch is a measure of the number of times the model has seen the entire training data.")
508
+ sens_number = st.number_input("Enter the sens number", value=0)
509
+
510
+ label_number = st.number_input("Enter the label number", value=0)
511
+
512
+ num_hidden = st.number_input("Enter the hidden layer number" , value=0)
513
+ with st.expander("More information"):
514
+ st.write("The number of hidden layers refers to the number of layers between the input layer and the output layer of a model.")
515
+ alpha = st.number_input("Enter alpha value", value=0)
516
+ with st.expander("More information"):
517
+ st.write("Refers to the regularization parameter that controls the amount of L2 regularization applied to the model's weights during the training process.")
518
+
519
+ beta = st.number_input("Enter beta value", value=0)
520
+ with st.expander("More information"):
521
+ st.write("Refers to the momentum parameter that controls how much the optimizer should take into account the previous update when computing the current update to the model's weights during the training process.")
522
+
523
+
524
+ if "RHGN" in model_type and "CatGCN" in model_type:
525
+ st.markdown("### Enter the general parameters")
526
+ seed = st.number_input("Enter the prefered seed number", value=0)
527
+ #predict_attr = st.text_input("Enter the prediction label")
528
+ #sens_attr = st.text_input("Enter the senstive attribute")
529
+
530
+ st.markdown("### Enter the RHGN parameters")
531
+ num_hidden = st.text_input("Enter the number of hidden layers")
532
+ with st.expander("More information"):
533
+ st.write("The number of hidden layers refers to the number of layers between the input layer and the output layer of a model.")
534
+ lr_rhgn = st.number_input("Enter the learning rate")
535
+ with st.expander("More information"):
536
+ st.write("Is a hyperparameter that controls the step size of the updates made to the weights during training. In other words, it determines how quickly the model learns from the data.")
537
+ epochs_rhgn = st.number_input("Enter the number of epochs", value=0)
538
+ with st.expander("More information"):
539
+ st.write("Refers to a single pass through the entire training dataset during the training of a model. In other words, an epoch is a measure of the number of times the model has seen the entire training data.")
540
+ clip = st.number_input("Enter the clip value", value=0)
541
+ with st.expander("More information"):
542
+ st.write("The clip number is a hyperparameter that determines the maximum value that the gradient can take. If the gradient exceeds this value, it is clipped (i.e., truncated to the maximum value).")
543
+
544
+ st.markdown("### Enter the CatGCN parameters")
545
+ weight_decay = st.number_input("Enter the weight decay value" )
546
+ with st.expander("More information"):
547
+ st.write("The parameters that controls the amount the weights will exponentially decay to zero.")
548
+ lr_catgcn = st.number_input("Enter the learning rate")
549
+ with st.expander("More information"):
550
+ st.write("Is a hyperparameter that controls the step size of the updates made to the weights during training. In other words, it determines how quickly the model learns from the data.")
551
+ epochs_catgcn = st.number_input("Enter the number of epochs", value=0)
552
+ with st.expander("More information"):
553
+ st.write("Refers to a single pass through the entire training dataset during the training of a model. In other words, an epoch is a measure of the number of times the model has seen the entire training data.")
554
+ diag_probe = st.number_input("Enter the diag probe value" , value=0)
555
+ graph_refining = st.selectbox("Choose the graph refining approach", ("agc", "fignn", "none"))
556
+ grn_units = st.number_input("Enter the grn units value" , value=0)
557
+ bi_interaction = st.selectbox("Choose the bi-interaction approach", ("nfm", "none"))
558
+
559
+
560
+ elif "RHGN" in model_type and len(model_type) == 1:
561
+ st.markdown("### Enter the general paramaters")
562
+ seed = st.number_input("Enter the prefered seed number", value=0)
563
+ #lr = st.number_input("Enter the learning rate", value=0)
564
+ #epochs = st.number_input("Enter the number of epochs", value=0)
565
+ #predict_attr = st.text_input("Enter the prediction label")
566
+ #sens_attr = st.text_input("Enter the senstive attribute")
567
+
568
+
569
+ st.markdown("### Enter the RHGN parametrs")
570
+ num_hidden = st.number_input("Enter the number of hidden layers", value=0)
571
+ with st.expander("More information"):
572
+ st.write("The number of hidden layers refers to the number of layers between the input layer and the output layer of a model.")
573
+ lr_rhgn = st.number_input("Enter the learning rate")
574
+ with st.expander("More information"):
575
+ st.write("Is a hyperparameter that controls the step size of the updates made to the weights during training. In other words, it determines how quickly the model learns from the data.")
576
+
577
+ epochs_rhgn = st.number_input("Enter the number of epochs for RHGN", value=0)
578
+ with st.expander("More information"):
579
+ st.write("Refers to a single pass through the entire training dataset during the training of a model. In other words, an epoch is a measure of the number of times the model has seen the entire training data.")
580
+
581
+ clip = st.number_input("Enter the clip value", value=0)
582
+ with st.expander("More information"):
583
+ st.write("The clip number is a hyperparameter that determines the maximum value that the gradient can take. If the gradient exceeds this value, it is clipped (i.e., truncated to the maximum value).")
584
+
585
+ elif "FairGNN" in model_type and len(model_type) == 1:
586
+ st.markdown("### Enter the general parameters")
587
+ seed = st.number_input("Enter the prefered seed number" , value=0)
588
+ #lr = st.number_input("Enter the learning rate" , value=0)
589
+ #epochs = st.number_input("Enter the number of epochs" , value=0)
590
+ #predict_attr = st.text_input("Enter the prediction label")
591
+ #sens_attr = st.text_input("Enter the senstive attribute")
592
+
593
+
594
+ st.markdown("### Enter the FairGNN parameters")
595
+ lr_fairgnn = st.number_input("Enter the learning rate")
596
+ epochs_fairgnn = st.number_input("Enter the number of epochs" , value=0)
597
+ with st.expander("More information"):
598
+ st.write("Refers to a single pass through the entire training dataset during the training of a model. In other words, an epoch is a measure of the number of times the model has seen the entire training data.")
599
+ sens_number = st.number_input("Enter the sens number" , value=0)
600
+ label_number = st.number_input("Enter the label number", value=0)
601
+ num_hidden = st.number_input("Enter the hidden layer number" , value=0)
602
+ with st.expander("More information"):
603
+ st.write("The number of hidden layers refers to the number of layers between the input layer and the output layer of a model.")
604
+ alpha = st.number_input("Enter alpha value" , value=0)
605
+ with st.expander("More information"):
606
+ st.write("Refers to the regularization parameter that controls the amount of L2 regularization applied to the model's weights during the training process.")
607
+ beta = st.number_input("Enter beta value", value=0)
608
+ with st.expander("More information"):
609
+ st.write("Refers to the momentum parameter that controls how much the optimizer should take into account the previous update when computing the current update to the model's weights during the training process.")
610
+
611
+
612
+ elif "CatGCN" in model_type and len(model_type) == 1:
613
+ st.markdown("### Enter the general paramaters")
614
+ seed = st.number_input("Enter the prefered seed number", value=0)
615
+ #lr = st.number_input("Enter the learning rate" , value=0)
616
+ #epochs = st.number_input("Enter the number of epochs" , value=0)
617
+ #predict_attr = st.text_input("Enter the prediction label")
618
+ #sens_attr = st.text_input("Enter the senstive attribute")
619
+
620
+ st.markdown("### Enter the CatGCN parameters")
621
+ weight_decay = st.number_input("Enter the weight decay value")
622
+ with st.expander("More information"):
623
+ st.write("The parameters that controls the amount the weights will exponentially decay to zero.")
624
+ lr_catgcn = st.number_input("Enter the learning rate")
625
+ with st.expander("More information"):
626
+ st.write("Is a hyperparameter that controls the step size of the updates made to the weights during training. In other words, it determines how quickly the model learns from the data.")
627
+ epochs_catgcn = st.number_input("Enter the number of epochs" , value=0)
628
+ with st.expander("More information"):
629
+ st.write("Refers to a single pass through the entire training dataset during the training of a model. In other words, an epoch is a measure of the number of times the model has seen the entire training data.")
630
+ diag_probe = st.number_input("Enter the diag probe value" , value=0)
631
+ graph_refining = st.multiselect("Choose the graph refining approach", ["agc", "fignn", "none"])
632
+ grn_units = st.number_input("Enter the grn units value" , value=0)
633
+ bi_interaction = st.multiselect("Choose the bi-interaction approach", ["nfm", "none"])
634
+
635
+
636
+
637
+ if len(model_type) != 0:
638
+ if st.button("Begin experiment"):
639
+ with st.spinner("Loading..."):
640
+ time.sleep(2)
641
+ if predict_attr == 'final_gender_code':
642
+ predict_attr == 'bin_gender'
643
+ if sens_attr == 'age_level':
644
+ sens_attr == 'bin_age'
645
+
646
+ ###################################################################################################################
647
+ ssh = paramiko.SSHClient()
648
+ port = 443
649
+ # Automatically add the server's host key (for the first connection only)
650
+ ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
651
+
652
+ # Connect to the remote server
653
+ ssh.connect('https://dtdh206.cs.uni-magdeburg.de:443')
654
+ #ssh.connect('141.44.31.206', port=443, banner_timeout=200)
655
+ stdin, stdout, stderr = ssh.exec_command('ls')
656
+ print(stdout)
657
+
658
+ if len(model_type) == 1 and 'FairGNN' in model_type:
659
+ stdin, stdout, stderr = ssh.exec_command('cd /home/abdelrazek/framework-for-fairness-analysis-and-mitigation-main && /home/abdelrazek/anaconda3/envs/test/bin/python3 -W ignore main.py --seed {} --epoch {} --model GCN --sens_number {} --num_hidden {} --acc 0.20 --roc 0.20 --alpha {} --beta {} --dataset_name {} --dataset_path ../nba.csv --dataset_user_id_name user_id --model_type FairGNN --type 1 --sens_attr {} --predict_attr {} --label_number 100 --no-cuda True --special_case True --neptune_project mohamed9/FairGNN-Alibaba --neptune_token eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiI0Nzc0MTIzMy0xMjRhLTQ0OGQtODE5Mi1mZjE3MDE0MGFhOGMifQ=='.format(seed, epochs_fairgnn, sens_number, num_hidden, alpha, beta, dataset, sens_attr, predict_attr))
660
+ if len(model_type) == 1 and 'RHGN' in model_type:
661
+ stdin, stdout, stderr = ssh.exec_command('cd /home/abdelrazek/framework-for-fairness-analysis-and-mitigation-main && /home/abdelrazek/anaconda3/envs/test/bin/python3 -W ignore main.py --seed {} --gpu 0 --dataset_path ../ --max_lr {} --num_hidden {} --clip {} --epochs {} --label {} --sens_attr {} --type 1 --model_type RHGN --dataset_name {} --dataset_user_id_name userid --special_case True --neptune_project mohamed9/FairGNN-Alibaba --neptune_token eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiI0Nzc0MTIzMy0xMjRhLTQ0OGQtODE5Mi1mZjE3MDE0MGFhOGMifQ=='.format(seed, lr_rhgn, num_hidden, clip, epochs_rhgn, predict_attr, sens_attr, dataset))
662
+ # CatGCN
663
+ if len(model_type) == 1 and 'CatGCN' in model_type:
664
+ stdin, stdout, stderr = ssh.exec_command('cd /home/abdelrazek/framework-for-fairness-analysis-and-mitigation-main && /home/abdelrazek/anaconda3/envs/test/bin/python3 -W ignore main.py --seed {} --gpu 0 --lr {} --weight_decay {} --dropout 0.1 --diag-probe {} --graph-refining {} --aggr-pooling mean --grn_units {} --bi-interaction {} --nfm-units none --graph-layer pna --gnn-hops 1 --gnn-units none --aggr-style sum --balance-ratio 0.7 --sens_attr {} --label {} --dataset_name {} --dataset_path ../ --type 1 --model_type CatGCN --dataset_user_id_name userid --alpha 0.5 --special_case True --neptune_project mohamed9/FairGNN-Alibaba --neptune_token eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiI0Nzc0MTIzMy0xMjRhLTQ0OGQtODE5Mi1mZjE3MDE0MGFhOGMifQ=='.format(seed, lr_catgcn, weight_decay, diag_probe, graph_refining, grn_units, bi_interaction, sens_attr, predict_attr, dataset))
665
+
666
+ # FairGNN and RHGN
667
+ if len(model_type) == 2 and 'FairGNN' in model_type and 'RHGN' in model_type:
668
+ if predict_attr == 'final_gender_code':
669
+ label = 'bin_gender'
670
+ if sens_attr == 'age_level':
671
+ sens_attr_rhgn = 'bin_age'
672
+ stdin, stdout, stderr = ssh.exec_command('cd /home/abdelrazek/framework-for-fairness-analysis-and-mitigation-main && /home/abdelrazek/anaconda3/envs/test/bin/python3 -W ignore main.py --seed {} --epochs {} --model GCN --sens_number {} --num_hidden {} --acc 0.20 --roc 0.20 --alpha {} --beta {} --dataset_name {} --dataset_path ../nba.csv --dataset_user_id_name user_id --model_type FairGNN RHGN --type 1 --sens_attr {} --label {} --predict_attr {} --label_number 100 --no-cuda True --max_lr {} --clip {} --epochs_rhgn {} --special_case True --neptune_project mohamed9/FairGNN-Alibaba --neptune_token eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiI0Nzc0MTIzMy0xMjRhLTQ0OGQtODE5Mi1mZjE3MDE0MGFhOGMifQ=='.format(seed, epochs_fairgnn, sens_number, num_hidden, alpha, beta, dataset, sens_attr, predict_attr, predict_attr, lr_rhgn, clip, epochs_rhgn))
673
+ print('cd /home/abdelrazek/framework-for-fairness-analysis-and-mitigation-main && /home/abdelrazek/anaconda3/envs/test/bin/python3 -W ignore main.py --seed {} --epochs {} --model GCN --sens_number {} --num_hidden {} --acc 0.20 --roc 0.20 --alpha {} --beta {} --dataset_name {} --dataset_path ../nba.csv --dataset_user_id_name user_id --model_type FairGNN RHGN --type 1 --sens_attr {} --label {} --predict_attr {} --label_number 100 --no-cuda True --max_lr {} --clip {} --epochs_rhgn {} --special_case True --neptune_project mohamed9/FairGNN-Alibaba --neptune_token eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiI0Nzc0MTIzMy0xMjRhLTQ0OGQtODE5Mi1mZjE3MDE0MGFhOGMifQ=='.format(seed, epochs_fairgnn, sens_number, num_hidden, alpha, beta, dataset, sens_attr, predict_attr, predict_attr, lr_rhgn, clip, epochs_rhgn))
674
+ #stdin, stdout, stderr = ssh.exec_command('cd /home/abdelrazek/framework-for-fairness-analysis-and-mitigation-main && ls')
675
+
676
+ output_queue = queue.Queue()
677
+ output_thred = threading.Thread(target=read_output, args=(stderr, output_queue))
678
+ output_thred.start()
679
+
680
+ while True:
681
+ try:
682
+ line = output_queue.get_nowait()
683
+ #st.text(line)
684
+ except queue.Empty:
685
+ if output_thred.is_alive():
686
+ continue
687
+ else:
688
+ break
689
+
690
+ output_thred.join()
691
+ all_output = []
692
+ for line in stdout:
693
+ print(line.strip())
694
+ #st.text(line.strip())
695
+ if "Test_final:" in line and 'FairGNN' in model_type:
696
+ result = line.strip()
697
+ #st.text(result)
698
+ if 'accuracy' in line and 'RHGN' in model_type:
699
+ #st.text(line.strip())
700
+ line = line.strip() + 'end'
701
+ acc = re.search('accuracy (.+?)end', line)
702
+ acc = acc.group(1)
703
+ acc_rhgn = acc.split()[0]
704
+ if 'F1 score:' in line:
705
+ f1 = '.'.join(line.split('.')[0:2])
706
+ f1_rhgn = '{:.3f}'.format(float(f1.split()[-1]))
707
+ if 'Statistical Parity Difference (SPD):' in line:
708
+ spd_rhgn = '{:.3f}'.format(float(line.split()[-1]))
709
+
710
+ if 'Equal Opportunity Difference (EOD):' in line:
711
+ eod_rhgn = '{:.3f}'.format(float(line.split()[-1]))
712
+
713
+ if 'Overall Accuracy Equality Difference (OAED):' in line:
714
+ oaed_rhgn = '{:.3f}'.format(float(line.split()[-1]))
715
+
716
+ if 'Treatment Equality Difference (TED):' in line:
717
+ ted_rhgn = '{:.3f}'.format(float(line.split()[-1]))
718
+ #all_output.append(line.strip())
719
+ # Close the connection
720
+ ssh.close()
721
+
722
+ st.success("Done!")
723
+
724
+
725
+ st.markdown("## Training Results:")
726
+ print(len(model_type))
727
+ print(model_type)
728
+ if len(model_type) == 1 and 'FairGNN' in model_type:
729
+ st.text(result)
730
+ acc = re.search('accuracy:(.+?)roc', result)
731
+ f1 = re.search('F1:(.+?)acc_sens', result)
732
+
733
+ spd = re.search('parity:(.+?)equality', result)
734
+ eod = re.search('equality:(.+?)oaed', result)
735
+ oaed = re.search('oaed:(.+?)treatment equality', result)
736
+ ted = re.search('treatment equality(.+?)end', result)
737
+ data = {'Model': [model_type],
738
+ 'Accuracy': [acc.group(1)],
739
+ 'F1': [f1.group(1)],
740
+ 'SPD': [spd.group(1)],
741
+ 'EOD': [eod.group(1)],
742
+ 'OAED': [oaed.group(1)],
743
+ 'TED': [ted.group(1)]
744
+ }
745
+
746
+ elif len(model_type) == 1 and 'RHGN' in model_type:
747
+ #print('all_output:', all_output)
748
+ data = {'Model': [model_type],
749
+ 'Accuracy': [acc_rhgn],
750
+ 'F1': [f1_rhgn],
751
+ 'SPD': [spd_rhgn],
752
+ 'EOD': [eod_rhgn],
753
+ 'OAED': [oaed_rhgn],
754
+ 'TED': [ted_rhgn]
755
+ }
756
+
757
+ elif len(model_type) == 2 and 'RHGN' in model_type and 'FairGNN' in model_type:
758
+
759
+ acc = re.search('a:(.+?)roc', result)
760
+ f1 = re.search('F1:(.+?)acc_sens', result)
761
+
762
+ spd = re.search('parity:(.+?)equality', result)
763
+ eod = re.search('equality:(.+?)oaed', result)
764
+ oaed = re.search('oaed:(.+?)treatment equality', result)
765
+ ted = re.search('treatment equality(.+?)end', result)
766
+
767
+ ind_fairgnn = model_type.index('FairGNN')
768
+ ind_rhgn = model_type.index('RHGN')
769
+ data = {'Model': [model_type[ind_fairgnn], model_type[ind_rhgn]],
770
+ 'Prediction label': [predict_attr, predict_attr],
771
+ 'Sensitive attribute': [sens_attr, sens_attr],
772
+ 'Accuracy': [acc.group(1), acc_rhgn],
773
+ 'F1': [f1.group(1), f1_rhgn],
774
+ 'SPD': [spd.group(1), spd_rhgn],
775
+ 'EOD': [eod.group(1), eod_rhgn],
776
+ 'OAED': [oaed.group(1), oaed_rhgn],
777
+ 'TED': [ted.group(1), ted_rhgn]
778
+ }
779
+
780
+ df = pd.DataFrame(data)
781
+
782
+ #st.dataframe(df, width=5000)
783
+ # set the display options for the DataFrame
784
+ pd.set_option("display.max_columns", None)
785
+ pd.set_option("display.width", 100)
786
+
787
+
788
+
789
+ # display the DataFrame in Streamlit
790
+ st.write(df)
791
+
792
+ #st.write("The logs of the experiment can be found at: mohamed9/Experiments-RHGN-CatGCN-Alibaba")
793
+ #st.markdown("The logs of the experiment can be found at: **mohamed9/Experiments-RHGN-FairGNN-Alibaba**")
794
+
795
+
796
+
pages/ovgu_logo.png ADDED
pages/setup.sh ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # Create a new Conda environment
4
+ conda env create -f test_new.yml --name env_test
5
+
6
+ # Activate the Conda environment
7
+ source activate env_test
presets/Presets.py ADDED
@@ -0,0 +1,259 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import os
4
+ import paramiko
5
+ import warnings
6
+ import re
7
+ import time
8
+
9
+
10
+ def FairGNN_NBA():
11
+ #dataset = st.selectbox("Dataset", ("NBA"))
12
+ dataset = st.text_input('Dataset', 'NBA', disabled=True)
13
+ dataset = 'nba'
14
+ predict_attr = st.text_input("Prediction label", 'SALARY', disabled=True)
15
+ sens_attr = st.text_input("Sensitive attribute", 'country', disabled=True)
16
+
17
+
18
+ model_type = st.text_input("Models to train", 'FairGNN', disabled=True)
19
+
20
+ st.markdown("### General parameters")
21
+ seed = st.number_input("Prefered seed number" , value=42, disabled=True)
22
+
23
+ st.markdown("### FairGNN parameters")
24
+ lr_fairgnn = st.number_input("Learning rate", value=0.01, disabled=True)
25
+ epochs_fairgnn = st.number_input("Number of epochs" , value=2000, disabled=True)
26
+ with st.expander("More information"):
27
+ st.write("Refers to a single pass through the entire training dataset during the training of a model. In other words, an epoch is a measure of the number of times the model has seen the entire training data.")
28
+ sens_number = st.number_input("Sens number" , value=50, disabled=True)
29
+ label_number = st.number_input("Label number", value=1000, disabled=True)
30
+ num_hidden = st.number_input("Hidden layer number" , value=128, disabled=True)
31
+ with st.expander("More information"):
32
+ st.write("The number of hidden layers refers to the number of layers between the input layer and the output layer of a model.")
33
+ alpha = st.number_input("Alpha value" , value=10, disabled=True)
34
+ with st.expander("More information"):
35
+ st.write("Refers to the regularization parameter that controls the amount of L2 regularization applied to the model's weights during the training process.")
36
+ beta = st.number_input("Beta value", value=1, disabled=True)
37
+ with st.expander("More information"):
38
+ st.write("Refers to the momentum parameter that controls how much the optimizer should take into account the previous update when computing the current update to the model's weights during the training process.")
39
+
40
+ return model_type, predict_attr, sens_attr
41
+
42
+ def RHGN_Alibaba():
43
+ dataset = st.text_input('Dataset', 'Alibaba', disabled=True)
44
+ dataset = 'alibaba'
45
+ predict_attr = st.text_input('Prediction label', 'final_gender_code', disabled=True)
46
+ sens_attr = st.text_input('Sensitive attribute', 'age_level', disabled=True)
47
+
48
+ model_type = st.text_input("Models to train", 'RHGN', disabled=True)
49
+
50
+ st.markdown("### General parameters")
51
+ seed = st.number_input("Prefered seed number" , value=3, disabled=True)
52
+
53
+ st.markdown("### RHGN parametrs")
54
+ num_hidden = st.number_input("Hidden layer number", value=32, disabled=True)
55
+ with st.expander("More information"):
56
+ st.write("The number of hidden layers refers to the number of layers between the input layer and the output layer of a model.")
57
+ lr_rhgn = st.number_input("Learning rate", value=0.1, disabled=True)
58
+ with st.expander("More information"):
59
+ st.write("Is a hyperparameter that controls the step size of the updates made to the weights during training. In other words, it determines how quickly the model learns from the data.")
60
+
61
+ epochs_rhgn = st.number_input("Epochs", value=100, disabled=True)
62
+ with st.expander("More information"):
63
+ st.write("Refers to a single pass through the entire training dataset during the training of a model. In other words, an epoch is a measure of the number of times the model has seen the entire training data.")
64
+
65
+ clip = st.number_input("Clip value", value=2, disabled=True)
66
+ with st.expander("More information"):
67
+ st.write("The clip number is a hyperparameter that determines the maximum value that the gradient can take. If the gradient exceeds this value, it is clipped (i.e., truncated to the maximum value).")
68
+
69
+ return model_type, predict_attr, sens_attr
70
+
71
+ def CatGCN_Alibaba():
72
+ dataset = st.text_input('Dataset', 'Alibaba', disabled=True)
73
+ dataset = 'alibaba'
74
+ predict_attr = st.text_input('Prediction label', 'final_gender_code', disabled=True)
75
+ sens_attr = st.text_input('Sensitive attribute', 'age_level', disabled=True)
76
+
77
+ model_type = st.text_input('Models to train', 'CatGCN', disabled=True)
78
+
79
+ st.markdown("### General parameters")
80
+ seed = st.number_input("Prefered seed number" , value=11, disabled=True)
81
+
82
+ st.markdown("### CatGCN parameters")
83
+ weight_decay = st.number_input("Weight decay value", value=0.01, disabled=True)
84
+ with st.expander("More information"):
85
+ st.write("The parameters that controls the amount the weights will exponentially decay to zero.")
86
+ lr_catgcn = st.number_input("Learning rate", value=0.1, disabled=True)
87
+ with st.expander("More information"):
88
+ st.write("Is a hyperparameter that controls the step size of the updates made to the weights during training. In other words, it determines how quickly the model learns from the data.")
89
+ epochs_catgcn = st.number_input("Number of epochs" , value=100, disabled=True)
90
+ with st.expander("More information"):
91
+ st.write("Refers to a single pass through the entire training dataset during the training of a model. In other words, an epoch is a measure of the number of times the model has seen the entire training data.")
92
+ diag_probe = st.number_input("Diag probe value" , value=39, disabled=True)
93
+ graph_refining = st.text_input("Graph refining approach", "agc", disabled=True)
94
+ grn_units = st.number_input("Enter the grn units value" , value=64, disabled=True)
95
+ bi_interaction = st.text_input("Bi-interaction approach", "nfm", disabled=True)
96
+
97
+ return model_type, predict_attr, sens_attr
98
+
99
+ def experiment_begin(model_type, predict_attr, sens_attr):
100
+ if len(model_type) != 0:
101
+ if st.button("Begin experiment"):
102
+ with st.spinner("Loading..."):
103
+
104
+ time.sleep(2)
105
+ ssh = paramiko.SSHClient()
106
+ port = 443
107
+ # Automatically add the server's host key (for the first connection only)
108
+ ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
109
+
110
+ # Connect to the remote server
111
+ ssh.connect('https://dtdh206.cs.uni-magdeburg.de:443')
112
+ #ssh.connect('141.44.31.206', port=443, banner_timeout=200)
113
+ stdin, stdout, stderr = ssh.exec_command('ls')
114
+ print(stdout)
115
+
116
+ if len(model_type) == 1 and 'FairGNN' in model_type:
117
+ stdin, stdout, stderr = ssh.exec_command('cd /home/abdelrazek/framework-for-fairness-analysis-and-mitigation-main && /home/abdelrazek/anaconda3/envs/test/bin/python3 -W ignore main.py --seed {} --epoch {} --model GCN --sens_number {} --num_hidden {} --acc 0.20 --roc 0.20 --alpha {} --beta {} --dataset_name {} --dataset_path ../nba.csv --dataset_user_id_name user_id --model_type FairGNN --type 1 --sens_attr {} --predict_attr {} --label_number 100 --no-cuda True --special_case True --neptune_project mohamed9/FairGNN-Alibaba --neptune_token eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiI0Nzc0MTIzMy0xMjRhLTQ0OGQtODE5Mi1mZjE3MDE0MGFhOGMifQ=='.format(seed, epochs_fairgnn, sens_number, num_hidden, alpha, beta, dataset, sens_attr, predict_attr))
118
+ if len(model_type) == 1 and 'RHGN' in model_type:
119
+ if predict_attr == 'final_gender_code':
120
+ predict_attr = 'bin_gender'
121
+ if sens_attr == 'age_level':
122
+ sens_attr = 'bin_age'
123
+ stdin, stdout, stderr = ssh.exec_command('cd /home/abdelrazek/framework-for-fairness-analysis-and-mitigation-main && /home/abdelrazek/anaconda3/envs/test/bin/python3 -W ignore main.py --seed {} --gpu 0 --dataset_path ../ --max_lr {} --num_hidden {} --clip {} --epochs {} --label {} --sens_attr {} --type 1 --model_type RHGN --dataset_name {} --dataset_user_id_name userid --special_case True --neptune_project mohamed9/FairGNN-Alibaba --neptune_token eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiI0Nzc0MTIzMy0xMjRhLTQ0OGQtODE5Mi1mZjE3MDE0MGFhOGMifQ=='.format(seed, lr_rhgn, num_hidden, clip, epochs_rhgn, predict_attr, sens_attr, dataset))
124
+ # CatGCN
125
+ if len(model_type) == 1 and 'CatGCN' in model_type:
126
+ if predict_attr == 'final_gender_code':
127
+ predict_attr = 'bin_gender'
128
+ if sens_attr == 'age_level':
129
+ sens_attr = 'bin_age'
130
+ stdin, stdout, stderr = ssh.exec_command('cd /home/abdelrazek/framework-for-fairness-analysis-and-mitigation-main && /home/abdelrazek/anaconda3/envs/test/bin/python3 -W ignore main.py --seed {} --gpu 0 --lr {} --weight_decay {} --dropout 0.1 --diag-probe {} --graph-refining {} --aggr-pooling mean --grn_units {} --bi-interaction {} --nfm-units none --graph-layer pna --gnn-hops 1 --gnn-units none --aggr-style sum --balance-ratio 0.7 --sens_attr {} --label {} --dataset_name {} --dataset_path ../ --type 1 --model_type CatGCN --dataset_user_id_name userid --alpha 0.5 --special_case True --neptune_project mohamed9/FairGNN-Alibaba --neptune_token eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiI0Nzc0MTIzMy0xMjRhLTQ0OGQtODE5Mi1mZjE3MDE0MGFhOGMifQ=='.format(seed, lr_catgcn, weight_decay, diag_probe, graph_refining, grn_units, bi_interaction, sens_attr, predict_attr, dataset))
131
+
132
+ # FairGNN and RHGN
133
+ if len(model_type) == 2 and 'FairGNN' in model_type and 'RHGN' in model_type:
134
+ if predict_attr == 'final_gender_code':
135
+ label = 'bin_gender'
136
+ if sens_attr == 'age_level':
137
+ sens_attr_rhgn = 'bin_age'
138
+ stdin, stdout, stderr = ssh.exec_command('cd /home/abdelrazek/framework-for-fairness-analysis-and-mitigation-main && /home/abdelrazek/anaconda3/envs/test/bin/python3 -W ignore main.py --seed {} --epochs {} --model GCN --sens_number {} --num_hidden {} --acc 0.20 --roc 0.20 --alpha {} --beta {} --dataset_name {} --dataset_path ../nba.csv --dataset_user_id_name user_id --model_type FairGNN RHGN --type 1 --sens_attr {} --label {} --predict_attr {} --label_number 100 --no-cuda True --max_lr {} --clip {} --epochs_rhgn {} --special_case True --neptune_project mohamed9/FairGNN-Alibaba --neptune_token eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiI0Nzc0MTIzMy0xMjRhLTQ0OGQtODE5Mi1mZjE3MDE0MGFhOGMifQ=='.format(seed, epochs_fairgnn, sens_number, num_hidden, alpha, beta, dataset, sens_attr, predict_attr, predict_attr, lr_rhgn, clip, epochs_rhgn))
139
+ print('cd /home/abdelrazek/framework-for-fairness-analysis-and-mitigation-main && /home/abdelrazek/anaconda3/envs/test/bin/python3 -W ignore main.py --seed {} --epochs {} --model GCN --sens_number {} --num_hidden {} --acc 0.20 --roc 0.20 --alpha {} --beta {} --dataset_name {} --dataset_path ../nba.csv --dataset_user_id_name user_id --model_type FairGNN RHGN --type 1 --sens_attr {} --label {} --predict_attr {} --label_number 100 --no-cuda True --max_lr {} --clip {} --epochs_rhgn {} --special_case True --neptune_project mohamed9/FairGNN-Alibaba --neptune_token eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiI0Nzc0MTIzMy0xMjRhLTQ0OGQtODE5Mi1mZjE3MDE0MGFhOGMifQ=='.format(seed, epochs_fairgnn, sens_number, num_hidden, alpha, beta, dataset, sens_attr, predict_attr, predict_attr, lr_rhgn, clip, epochs_rhgn))
140
+ #stdin, stdout, stderr = ssh.exec_command('cd /home/abdelrazek/framework-for-fairness-analysis-and-mitigation-main && ls')
141
+
142
+ output_queue = queue.Queue()
143
+ output_thred = threading.Thread(target=read_output, args=(stderr, output_queue))
144
+ output_thred.start()
145
+
146
+ while True:
147
+ try:
148
+ line = output_queue.get_nowait()
149
+ #st.text(line)
150
+ except queue.Empty:
151
+ if output_thred.is_alive():
152
+ continue
153
+ else:
154
+ break
155
+
156
+ output_thred.join()
157
+ all_output = []
158
+ for line in stdout:
159
+ print(line.strip())
160
+ #st.text(line.strip())
161
+ if "Test_final:" in line and 'FairGNN' in model_type:
162
+ result = line.strip()
163
+ #st.text(result)
164
+ if 'accuracy' in line and 'RHGN' in model_type:
165
+ #st.text(line.strip())
166
+ line = line.strip() + 'end'
167
+ acc = re.search('accuracy (.+?)end', line)
168
+ acc = acc.group(1)
169
+ acc_rhgn = acc.split()[0]
170
+ if 'F1 score:' in line:
171
+ f1 = '.'.join(line.split('.')[0:2])
172
+ f1_rhgn = '{:.3f}'.format(float(f1.split()[-1]))
173
+ if 'Statistical Parity Difference (SPD):' in line:
174
+ spd_rhgn = '{:.3f}'.format(float(line.split()[-1]))
175
+
176
+ if 'Equal Opportunity Difference (EOD):' in line:
177
+ eod_rhgn = '{:.3f}'.format(float(line.split()[-1]))
178
+
179
+ if 'Overall Accuracy Equality Difference (OAED):' in line:
180
+ oaed_rhgn = '{:.3f}'.format(float(line.split()[-1]))
181
+
182
+ if 'Treatment Equality Difference (TED):' in line:
183
+ ted_rhgn = '{:.3f}'.format(float(line.split()[-1]))
184
+ #all_output.append(line.strip())
185
+ # Close the connection
186
+ ssh.close()
187
+
188
+ st.success("Done!")
189
+
190
+
191
+ st.markdown("## Training Results:")
192
+ print(len(model_type))
193
+ print(model_type)
194
+ if len(model_type) == 1 and 'FairGNN' in model_type:
195
+ st.text(result)
196
+ acc = re.search('accuracy:(.+?)roc', result)
197
+ f1 = re.search('F1:(.+?)acc_sens', result)
198
+
199
+ spd = re.search('parity:(.+?)equality', result)
200
+ eod = re.search('equality:(.+?)oaed', result)
201
+ oaed = re.search('oaed:(.+?)treatment equality', result)
202
+ ted = re.search('treatment equality(.+?)end', result)
203
+ data = {'Model': [model_type],
204
+ 'Accuracy': [acc.group(1)],
205
+ 'F1': [f1.group(1)],
206
+ 'SPD': [spd.group(1)],
207
+ 'EOD': [eod.group(1)],
208
+ 'OAED': [oaed.group(1)],
209
+ 'TED': [ted.group(1)]
210
+ }
211
+
212
+ elif len(model_type) == 1 and 'RHGN' in model_type:
213
+ #print('all_output:', all_output)
214
+ data = {'Model': [model_type],
215
+ 'Accuracy': [acc_rhgn],
216
+ 'F1': [f1_rhgn],
217
+ 'SPD': [spd_rhgn],
218
+ 'EOD': [eod_rhgn],
219
+ 'OAED': [oaed_rhgn],
220
+ 'TED': [ted_rhgn]
221
+ }
222
+
223
+ elif len(model_type) == 2 and 'RHGN' in model_type and 'FairGNN' in model_type:
224
+
225
+ acc = re.search('a:(.+?)roc', result)
226
+ f1 = re.search('F1:(.+?)acc_sens', result)
227
+
228
+ spd = re.search('parity:(.+?)equality', result)
229
+ eod = re.search('equality:(.+?)oaed', result)
230
+ oaed = re.search('oaed:(.+?)treatment equality', result)
231
+ ted = re.search('treatment equality(.+?)end', result)
232
+
233
+ ind_fairgnn = model_type.index('FairGNN')
234
+ ind_rhgn = model_type.index('RHGN')
235
+ data = {'Model': [model_type[ind_fairgnn], model_type[ind_rhgn]],
236
+ 'Prediction label': [predict_attr, predict_attr],
237
+ 'Sensitive attribute': [sens_attr, sens_attr],
238
+ 'Accuracy': [acc.group(1), acc_rhgn],
239
+ 'F1': [f1.group(1), f1_rhgn],
240
+ 'SPD': [spd.group(1), spd_rhgn],
241
+ 'EOD': [eod.group(1), eod_rhgn],
242
+ 'OAED': [oaed.group(1), oaed_rhgn],
243
+ 'TED': [ted.group(1), ted_rhgn]
244
+ }
245
+
246
+ df = pd.DataFrame(data)
247
+
248
+ #st.dataframe(df, width=5000)
249
+ # set the display options for the DataFrame
250
+ pd.set_option("display.max_columns", None)
251
+ pd.set_option("display.width", 100)
252
+
253
+
254
+
255
+ # display the DataFrame in Streamlit
256
+ st.write(df)
257
+
258
+ #st.write("The logs of the experiment can be found at: mohamed9/Experiments-RHGN-CatGCN-Alibaba")
259
+ #st.markdown("The logs of the experiment can be found at: **mohamed9/Experiments-RHGN-FairGNN-Alibaba**")
presets/__pycache__/FairGNN_preset.cpython-310.pyc ADDED
Binary file (9.28 kB). View file
 
presets/__pycache__/Presets.cpython-310.pyc ADDED
Binary file (10.2 kB). View file
 
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ paramiko==2.8.1
2
+
3
+
src/__pycache__/fainress_component.cpython-37.pyc ADDED
Binary file (6.71 kB). View file
 
src/__pycache__/fainress_component.cpython-39.pyc ADDED
Binary file (6.47 kB). View file
 
src/__pycache__/utils.cpython-37.pyc ADDED
Binary file (11.3 kB). View file
 
src/__pycache__/utils.cpython-39.pyc ADDED
Binary file (11.2 kB). View file
 
src/aif360/README.md ADDED
File without changes
src/aif360/__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ try:
2
+ from .version import version as __version__
3
+ except ImportError:
4
+ pass
src/aif360/__pycache__/__init__.cpython-37.pyc ADDED
Binary file (280 Bytes). View file
 
src/aif360/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (282 Bytes). View file
 
src/aif360/__pycache__/decorating_metaclass.cpython-37.pyc ADDED
Binary file (1.63 kB). View file
 
src/aif360/__pycache__/decorating_metaclass.cpython-39.pyc ADDED
Binary file (1.64 kB). View file
 
src/aif360/aif360-r/.Rbuildignore ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ^.*\.Rproj$ # Automatically added by RStudio,
2
+ ^\.Rproj\.user$ # used for temporary files.
3
+ ^README\.Rmd$ # An Rmarkdown file used to generate README.md
4
+ ^NEWS\.md$ # A news file written in Markdown
5
+ ^\.travis\.yml$ # Used for continuous integration testing with travis
6
+ ^LICENSE\.md$
7
+ ^raif360\.Rproj$
8
+ ^README\.Rmd$
9
+ ^CONDUCT\.md$
10
+ ^CONTRIBUTING\.md$
11
+ ^CODEOFCONDUCT\.md$
12
+ ^cran-comments\.md$
src/aif360/aif360-r/.gitignore ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ .Rproj.user
2
+ .Rhistory
3
+ .RData
4
+ .Ruserdata
5
+ .httr-oauth
6
+ .*.Rnb.cached
7
+ .DS_Store
src/aif360/aif360-r/CODEOFCONDUCT.md ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # Contributor Code of Conduct
3
+
4
+ ## Our Pledge
5
+
6
+ We as members, contributors, and leaders pledge to make participation in our
7
+ community a harassment-free experience for everyone, regardless of age, body
8
+ size, visible or invisible disability, ethnicity, sex characteristics, gender
9
+ identity and expression, level of experience, education, socio-economic status,
10
+ nationality, personal appearance, race, religion, or sexual identity
11
+ and orientation.
12
+
13
+ We pledge to act and interact in ways that contribute to an open, welcoming,
14
+ diverse, inclusive, and healthy community.
15
+
16
+ ## Our Standards
17
+
18
+ Examples of behavior that contributes to a positive environment for our
19
+ community include:
20
+
21
+ * Demonstrating empathy and kindness toward other people
22
+ * Being respectful of differing opinions, viewpoints, and experiences
23
+ * Giving and gracefully accepting constructive feedback
24
+ * Accepting responsibility and apologizing to those affected by our mistakes,
25
+ and learning from the experience
26
+ * Focusing on what is best not just for us as individuals, but for the
27
+ overall community
28
+
29
+ Examples of unacceptable behavior include:
30
+
31
+ * The use of sexualized language or imagery, and sexual attention or
32
+ advances of any kind
33
+ * Trolling, insulting or derogatory comments, and personal or political attacks
34
+ * Public or private harassment
35
+ * Publishing others' private information, such as a physical or email
36
+ address, without their explicit permission
37
+ * Other conduct which could reasonably be considered inappropriate in a
38
+ professional setting
39
+
40
+ ## Attribution
41
+
42
+ This Code of Conduct is adapted from the [Contributor Covenant](https://www.contributor-covenant.org/),
43
+ version 2.0, available at
44
+ https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
src/aif360/aif360-r/CONTRIBUTING.md ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Contributing to the AIF360 R package
2
+
3
+ This guide is divided into three main parts:
4
+
5
+ 1. Filing a bug report or feature request in an issue.
6
+ 2. Suggesting a change via a pull request.
7
+ 3. New features or enhancements to AIF360 functionality.
8
+
9
+ If you're not familiar with git or GitHub, please start by reading <http://r-pkgs.had.co.nz/git.html>
10
+
11
+ Please note that the AIF360 R package is released with a [Contributor Code of Conduct](CODEOFCONDUCT.md). By contributing to this project,
12
+ you agree to abide by its terms.
13
+
14
+ ## Issues
15
+
16
+ If you find a bug, please search GitHub under [Issues](https://github.com/Trusted-AI/AIF360/issues) to ensure the bug was not already reported.
17
+ If you’re unable to find an open issue addressing the problem, [open a new one](https://github.com/Trusted-AI/AIF360/issues/new). Please include a title and clear description, as much relevant information as possible (such as required packages, data, etc.), and a code sample to replicate the issue.
18
+
19
+ ## Pull requests
20
+
21
+ To contribute a change to the AIF360 R package, you follow these steps:
22
+
23
+ * Create a branch in git and make your changes.
24
+ * Push branch to GitHub and open a new pull request (PR).
25
+ * Ensure the PR description clearly describes the problem and solution. Include the relevant issue number if applicable.
26
+
27
+ ## New Features
28
+
29
+ The AIF360 R package is part of [AI Fairness 360](https://github.com/Trusted-AI/AIF360), developed with extensibility in mind. If you wish to suggest new metrics, explainers, algorithms or datasets. Please get in touch on [Slack]( https://aif360.slack.com) (invitation [here](https://aif360.slack.com/join/shared_invite/zt-5hfvuafo-X0~g6tgJQ~7tIAT~S294TQ))!
30
+
src/aif360/aif360-r/DESCRIPTION ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Package: aif360
2
+ Type: Package
3
+ Title: Help Detect and Mitigate Bias in Machine Learning Models
4
+ Version: 0.1.0
5
+ Authors@R: c(
6
+ person("Gabriela", "de Queiroz", email = "[email protected]", role = "aut"),
7
+ person("Stacey", "Ronaghan", email = "[email protected]", role = "aut"),
8
+ person("Saishruthi", "Swaminathan", email = "[email protected]",
9
+ role = c("aut", "cre")))
10
+ Description: The AI Fairness 360 toolkit is an open-source library to help detect
11
+ and mitigate bias in machine learning models. The AI Fairness 360 R package includes
12
+ a comprehensive set of metrics for datasets and models to test for biases,
13
+ explanations for these metrics, and algorithms to mitigate bias in datasets and models.
14
+ License: Apache License (>= 2.0)
15
+ Encoding: UTF-8
16
+ LazyData: true
17
+ URL: https://github.com/Trusted-AI/AIF360
18
+ BugReports: https://github.com/Trusted-AI/AIF360/issues
19
+ Imports:
20
+ reticulate,
21
+ rstudioapi
22
+ RoxygenNote: 7.2.0
23
+ Suggests:
24
+ testthat
src/aif360/aif360-r/LICENSE.md ADDED
@@ -0,0 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Apache License
2
+ ==============
3
+
4
+ _Version 2.0, January 2004_
5
+ _&lt;<http://www.apache.org/licenses/>&gt;_
6
+
7
+ ### Terms and Conditions for use, reproduction, and distribution
8
+
9
+ #### 1. Definitions
10
+
11
+ “License” shall mean the terms and conditions for use, reproduction, and
12
+ distribution as defined by Sections 1 through 9 of this document.
13
+
14
+ “Licensor” shall mean the copyright owner or entity authorized by the copyright
15
+ owner that is granting the License.
16
+
17
+ “Legal Entity” shall mean the union of the acting entity and all other entities
18
+ that control, are controlled by, or are under common control with that entity.
19
+ For the purposes of this definition, “control” means **(i)** the power, direct or
20
+ indirect, to cause the direction or management of such entity, whether by
21
+ contract or otherwise, or **(ii)** ownership of fifty percent (50%) or more of the
22
+ outstanding shares, or **(iii)** beneficial ownership of such entity.
23
+
24
+ “You” (or “Your”) shall mean an individual or Legal Entity exercising
25
+ permissions granted by this License.
26
+
27
+ “Source” form shall mean the preferred form for making modifications, including
28
+ but not limited to software source code, documentation source, and configuration
29
+ files.
30
+
31
+ “Object” form shall mean any form resulting from mechanical transformation or
32
+ translation of a Source form, including but not limited to compiled object code,
33
+ generated documentation, and conversions to other media types.
34
+
35
+ “Work” shall mean the work of authorship, whether in Source or Object form, made
36
+ available under the License, as indicated by a copyright notice that is included
37
+ in or attached to the work (an example is provided in the Appendix below).
38
+
39
+ “Derivative Works” shall mean any work, whether in Source or Object form, that
40
+ is based on (or derived from) the Work and for which the editorial revisions,
41
+ annotations, elaborations, or other modifications represent, as a whole, an
42
+ original work of authorship. For the purposes of this License, Derivative Works
43
+ shall not include works that remain separable from, or merely link (or bind by
44
+ name) to the interfaces of, the Work and Derivative Works thereof.
45
+
46
+ “Contribution” shall mean any work of authorship, including the original version
47
+ of the Work and any modifications or additions to that Work or Derivative Works
48
+ thereof, that is intentionally submitted to Licensor for inclusion in the Work
49
+ by the copyright owner or by an individual or Legal Entity authorized to submit
50
+ on behalf of the copyright owner. For the purposes of this definition,
51
+ “submitted” means any form of electronic, verbal, or written communication sent
52
+ to the Licensor or its representatives, including but not limited to
53
+ communication on electronic mailing lists, source code control systems, and
54
+ issue tracking systems that are managed by, or on behalf of, the Licensor for
55
+ the purpose of discussing and improving the Work, but excluding communication
56
+ that is conspicuously marked or otherwise designated in writing by the copyright
57
+ owner as “Not a Contribution.”
58
+
59
+ “Contributor” shall mean Licensor and any individual or Legal Entity on behalf
60
+ of whom a Contribution has been received by Licensor and subsequently
61
+ incorporated within the Work.
62
+
63
+ #### 2. Grant of Copyright License
64
+
65
+ Subject to the terms and conditions of this License, each Contributor hereby
66
+ grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free,
67
+ irrevocable copyright license to reproduce, prepare Derivative Works of,
68
+ publicly display, publicly perform, sublicense, and distribute the Work and such
69
+ Derivative Works in Source or Object form.
70
+
71
+ #### 3. Grant of Patent License
72
+
73
+ Subject to the terms and conditions of this License, each Contributor hereby
74
+ grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free,
75
+ irrevocable (except as stated in this section) patent license to make, have
76
+ made, use, offer to sell, sell, import, and otherwise transfer the Work, where
77
+ such license applies only to those patent claims licensable by such Contributor
78
+ that are necessarily infringed by their Contribution(s) alone or by combination
79
+ of their Contribution(s) with the Work to which such Contribution(s) was
80
+ submitted. If You institute patent litigation against any entity (including a
81
+ cross-claim or counterclaim in a lawsuit) alleging that the Work or a
82
+ Contribution incorporated within the Work constitutes direct or contributory
83
+ patent infringement, then any patent licenses granted to You under this License
84
+ for that Work shall terminate as of the date such litigation is filed.
85
+
86
+ #### 4. Redistribution
87
+
88
+ You may reproduce and distribute copies of the Work or Derivative Works thereof
89
+ in any medium, with or without modifications, and in Source or Object form,
90
+ provided that You meet the following conditions:
91
+
92
+ * **(a)** You must give any other recipients of the Work or Derivative Works a copy of
93
+ this License; and
94
+ * **(b)** You must cause any modified files to carry prominent notices stating that You
95
+ changed the files; and
96
+ * **(c)** You must retain, in the Source form of any Derivative Works that You distribute,
97
+ all copyright, patent, trademark, and attribution notices from the Source form
98
+ of the Work, excluding those notices that do not pertain to any part of the
99
+ Derivative Works; and
100
+ * **(d)** If the Work includes a “NOTICE” text file as part of its distribution, then any
101
+ Derivative Works that You distribute must include a readable copy of the
102
+ attribution notices contained within such NOTICE file, excluding those notices
103
+ that do not pertain to any part of the Derivative Works, in at least one of the
104
+ following places: within a NOTICE text file distributed as part of the
105
+ Derivative Works; within the Source form or documentation, if provided along
106
+ with the Derivative Works; or, within a display generated by the Derivative
107
+ Works, if and wherever such third-party notices normally appear. The contents of
108
+ the NOTICE file are for informational purposes only and do not modify the
109
+ License. You may add Your own attribution notices within Derivative Works that
110
+ You distribute, alongside or as an addendum to the NOTICE text from the Work,
111
+ provided that such additional attribution notices cannot be construed as
112
+ modifying the License.
113
+
114
+ You may add Your own copyright statement to Your modifications and may provide
115
+ additional or different license terms and conditions for use, reproduction, or
116
+ distribution of Your modifications, or for any such Derivative Works as a whole,
117
+ provided Your use, reproduction, and distribution of the Work otherwise complies
118
+ with the conditions stated in this License.
119
+
120
+ #### 5. Submission of Contributions
121
+
122
+ Unless You explicitly state otherwise, any Contribution intentionally submitted
123
+ for inclusion in the Work by You to the Licensor shall be under the terms and
124
+ conditions of this License, without any additional terms or conditions.
125
+ Notwithstanding the above, nothing herein shall supersede or modify the terms of
126
+ any separate license agreement you may have executed with Licensor regarding
127
+ such Contributions.
128
+
129
+ #### 6. Trademarks
130
+
131
+ This License does not grant permission to use the trade names, trademarks,
132
+ service marks, or product names of the Licensor, except as required for
133
+ reasonable and customary use in describing the origin of the Work and
134
+ reproducing the content of the NOTICE file.
135
+
136
+ #### 7. Disclaimer of Warranty
137
+
138
+ Unless required by applicable law or agreed to in writing, Licensor provides the
139
+ Work (and each Contributor provides its Contributions) on an “AS IS” BASIS,
140
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied,
141
+ including, without limitation, any warranties or conditions of TITLE,
142
+ NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are
143
+ solely responsible for determining the appropriateness of using or
144
+ redistributing the Work and assume any risks associated with Your exercise of
145
+ permissions under this License.
146
+
147
+ #### 8. Limitation of Liability
148
+
149
+ In no event and under no legal theory, whether in tort (including negligence),
150
+ contract, or otherwise, unless required by applicable law (such as deliberate
151
+ and grossly negligent acts) or agreed to in writing, shall any Contributor be
152
+ liable to You for damages, including any direct, indirect, special, incidental,
153
+ or consequential damages of any character arising as a result of this License or
154
+ out of the use or inability to use the Work (including but not limited to
155
+ damages for loss of goodwill, work stoppage, computer failure or malfunction, or
156
+ any and all other commercial damages or losses), even if such Contributor has
157
+ been advised of the possibility of such damages.
158
+
159
+ #### 9. Accepting Warranty or Additional Liability
160
+
161
+ While redistributing the Work or Derivative Works thereof, You may choose to
162
+ offer, and charge a fee for, acceptance of support, warranty, indemnity, or
163
+ other liability obligations and/or rights consistent with this License. However,
164
+ in accepting such obligations, You may act only on Your own behalf and on Your
165
+ sole responsibility, not on behalf of any other Contributor, and only if You
166
+ agree to indemnify, defend, and hold each Contributor harmless for any liability
167
+ incurred by, or claims asserted against, such Contributor by reason of your
168
+ accepting any such warranty or additional liability.
169
+
170
+ _END OF TERMS AND CONDITIONS_
171
+
172
+ ### APPENDIX: How to apply the Apache License to your work
173
+
174
+ To apply the Apache License to your work, attach the following boilerplate
175
+ notice, with the fields enclosed by brackets `[]` replaced with your own
176
+ identifying information. (Don't include the brackets!) The text should be
177
+ enclosed in the appropriate comment syntax for the file format. We also
178
+ recommend that a file or class name and description of purpose be included on
179
+ the same “printed page” as the copyright notice for easier identification within
180
+ third-party archives.
181
+
182
+ Copyright 2020-2021 The AI Fairness 360 (AIF360) Authors
183
+
184
+ Licensed under the Apache License, Version 2.0 (the "License");
185
+ you may not use this file except in compliance with the License.
186
+ You may obtain a copy of the License at
187
+
188
+ http://www.apache.org/licenses/LICENSE-2.0
189
+
190
+ Unless required by applicable law or agreed to in writing, software
191
+ distributed under the License is distributed on an "AS IS" BASIS,
192
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
193
+ See the License for the specific language governing permissions and
194
+ limitations under the License.
src/aif360/aif360-r/NAMESPACE ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by roxygen2: do not edit by hand
2
+
3
+ export(adult_dataset)
4
+ export(adversarial_debiasing)
5
+ export(bank_dataset)
6
+ export(binary_label_dataset)
7
+ export(binary_label_dataset_metric)
8
+ export(classification_metric)
9
+ export(compas_dataset)
10
+ export(disparate_impact_remover)
11
+ export(german_dataset)
12
+ export(install_aif360)
13
+ export(law_school_gpa_dataset)
14
+ export(load_aif360_lib)
15
+ export(prejudice_remover)
16
+ export(reject_option_classification)
17
+ export(reweighing)
18
+ importFrom(reticulate,import)
19
+ importFrom(reticulate,py_dict)
20
+ importFrom(reticulate,py_suppress_warnings)
21
+ importFrom(reticulate,py_to_r)
22
+ importFrom(reticulate,r_to_py)
23
+ importFrom(utils,file_test)
24
+ importFrom(utils,read.csv)
src/aif360/aif360-r/R/binary_label_dataset_metric.R ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #' Binary Label Dataset Metric
2
+ #' @description Class for computing metrics on an aif360 compatible dataset with binary labels.
3
+ #' @param dataset A aif360 compatible dataset.
4
+ #' @param privileged_groups Privileged groups. List containing privileged protected attribute name and value of the privileged protected attribute.
5
+ #' @param unprivileged_groups Unprivileged groups. List containing unprivileged protected attribute name and value of the unprivileged protected attribute.
6
+ #' @usage
7
+ #' binary_label_dataset_metric(dataset, privileged_groups, unprivileged_groups)
8
+ #' @examples
9
+ #' \dontrun{
10
+ #' load_aif360_lib()
11
+ #' # Load the adult dataset
12
+ #' adult_dataset <- adult_dataset()
13
+ #'
14
+ #' # Define the groups
15
+ #' privileged_groups <- list("race", 1)
16
+ #' unprivileged_groups <- list("race", 0)
17
+ #'
18
+ #' # Metric for Binary Label Dataset
19
+ #' bm <- binary_label_dataset_metric(dataset = adult_dataset,
20
+ #' privileged_groups = privileged_groups,
21
+ #' unprivileged_groups = unprivileged_groups)
22
+ #'
23
+ #' # Difference in mean outcomes between unprivileged and privileged groups
24
+ #' bm$mean_difference()
25
+ #' }
26
+ #' @seealso
27
+ #' \href{https://aif360.readthedocs.io/en/latest/modules/metrics.html#aif360.metrics.BinaryLabelDatasetMetric}{Explore available binary label dataset metrics here}
28
+ #'
29
+ #' Available metrics are: base_rate, consistency, disparate_impact, mean_difference, num_negatives, num_positives and statistical_parity_difference.
30
+ #' @export
31
+ #' @importFrom reticulate py_suppress_warnings py_to_r
32
+ #'
33
+ binary_label_dataset_metric <- function(dataset,
34
+ privileged_groups,
35
+ unprivileged_groups){
36
+
37
+ p_dict <- dict_fn(privileged_groups)
38
+ u_dict <- dict_fn(unprivileged_groups)
39
+
40
+ return(metrics$BinaryLabelDatasetMetric(dataset,
41
+ privileged_groups = p_dict,
42
+ unprivileged_groups = u_dict))
43
+ }
src/aif360/aif360-r/R/classification_metric.R ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #' Classification Metric
2
+ #' @description
3
+ #' Class for computing metrics based on two BinaryLabelDatasets. The first dataset is the original one and the second is the output of the classification transformer (or similar)
4
+ #' @param dataset (BinaryLabelDataset) Dataset containing ground-truth labels
5
+ #' @param classified_dataset (BinaryLabelDataset) Dataset containing predictions
6
+ #' @param privileged_groups Privileged groups. List containing privileged protected attribute name and value of the privileged protected attribute.
7
+ #' @param unprivileged_groups Unprivileged groups. List containing unprivileged protected attribute name and value of the unprivileged protected attribute.
8
+ #' @usage
9
+ #' classification_metric(dataset, classified_dataset, unprivileged_groups, privileged_groups)
10
+ #' @examples
11
+ #' \dontrun{
12
+ #' load_aif360_lib()
13
+ #' # Input dataset
14
+ #' data <- data.frame("feat" = c(0,0,1,1,1,1,0,1,1,0), "label" = c(1,0,0,1,0,0,1,0,1,1))
15
+ #' # Create aif compatible input dataset
16
+ #' act <- aif360::binary_label_dataset(data_path = data, favor_label=0, unfavor_label=1,
17
+ #' unprivileged_protected_attribute=0,
18
+ #' privileged_protected_attribute=1,
19
+ #' target_column="label", protected_attribute="feat")
20
+ #' # Classified dataset
21
+ #' pred_data <- data.frame("feat" = c(0,0,1,1,1,1,0,1,1,0), "label" = c(1,0,1,1,1,0,1,0,0,1))
22
+ #' # Create aif compatible classified dataset
23
+ #' pred <- aif360::binary_label_dataset(data_path = pred_data, favor_label=0, unfavor_label=1,
24
+ #' unprivileged_protected_attribute=0,
25
+ #' privileged_protected_attribute=1,
26
+ #' target_column="label", protected_attribute="feat")
27
+ #' # Create an instance of classification metric
28
+ #' cm <- classification_metric(act, pred, list('feat', 1), list('feat', 0))
29
+ #' # Access metric functions
30
+ #' cm$accuracy()
31
+ #' }
32
+ #' @seealso
33
+ #' \href{https://aif360.readthedocs.io/en/latest/modules/metrics.html#classification-metric}{Explore available classification metrics explanations here}
34
+ #'
35
+ #' Available metrics:
36
+ #' \itemize{
37
+ #' \item accuracy
38
+ #' \item average_abs_odds_difference
39
+ #' \item average_odds_difference
40
+ #' \item between_all_groups_coefficient_of_variation
41
+ #' \item between_all_groups_generalized_entropy_index
42
+ #' \item between_all_groups_theil_index
43
+ #' \item between_group_coefficient_of_variation
44
+ #' \item between_group_generalized_entropy_index
45
+ #' \item between_group_theil_index
46
+ #' \item binary_confusion_matrix
47
+ #' \item coefficient_of_variation
48
+ #' \item disparate_impact
49
+ #' \item equal_opportunity_difference
50
+ #' \item error_rate
51
+ #' \item error_rate_difference
52
+ #' \item error_rate_ratio
53
+ #' \item false_discovery_rate
54
+ #' \item false_discovery_rate_difference
55
+ #' \item false_discovery_rate_ratio
56
+ #' \item false_negative_rate
57
+ #' \item false_negative_rate_difference
58
+ #' \item false_negative_rate_ratio
59
+ #' \item false_omission_rate
60
+ #' \item false_omission_rate_difference
61
+ #' \item false_omission_rate_ratio
62
+ #' \item false_positive_rate
63
+ #' \item false_positive_rate_difference
64
+ #' \item false_positive_rate_ratio
65
+ #' \item generalized_binary_confusion_matrix
66
+ #' \item generalized_entropy_index
67
+ #' \item generalized_false_negative_rate
68
+ #' \item generalized_false_positive_rate
69
+ #' \item generalized_true_negative_rate
70
+ #' \item generalized_true_positive_rate
71
+ #' \item negative_predictive_value
72
+ #' \item num_false_negatives
73
+ #' \item num_false_positives
74
+ #' \item num_generalized_false_negatives
75
+ #' \item num_generalized_false_positives
76
+ #' \item num_generalized_true_negatives
77
+ #' \item num_generalized_true_positives
78
+ #' \item num_pred_negatives
79
+ #' \item num_pred_positives
80
+ #' \item num_true_negatives
81
+ #' \item num_true_positives
82
+ #' \item performance_measures
83
+ #' \item positive_predictive_value
84
+ #' \item power
85
+ #' \item precision
86
+ #' \item recall
87
+ #' \item selection_rate
88
+ #' \item sensitivity
89
+ #' \item specificity
90
+ #' \item statistical_parity_difference
91
+ #' \item theil_index
92
+ #' \item true_negative_rate
93
+ #' \item true_positive_rate
94
+ #' \item true_positive_rate_difference
95
+ #'
96
+ #' }
97
+ #' @export
98
+ #' @importFrom reticulate py_suppress_warnings
99
+ #'
100
+ classification_metric <- function(dataset,
101
+ classified_dataset,
102
+ unprivileged_groups,
103
+ privileged_groups){
104
+
105
+ u_dict <- dict_fn(unprivileged_groups)
106
+
107
+ p_dict <- dict_fn(privileged_groups)
108
+
109
+
110
+ return(metrics$ClassificationMetric(dataset,
111
+ classified_dataset,
112
+ unprivileged_groups = u_dict,
113
+ privileged_groups = p_dict))
114
+ }
src/aif360/aif360-r/R/dataset.R ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #' AIF360 dataset
2
+ #' @description
3
+ #' Function to create AIF compatible dataset.
4
+ #' @param data_path Path to the input CSV file or a R dataframe.
5
+ #' @param favor_label Label value which is considered favorable (i.e. “positive”).
6
+ #' @param unfavor_label Label value which is considered unfavorable (i.e. “negative”).
7
+ #' @param unprivileged_protected_attribute A unprotected attribute value which is considered privileged from a fairness perspective.
8
+ #' @param privileged_protected_attribute A protected attribute value which is considered privileged from a fairness perspective.
9
+ #' @param target_column Name describing the label.
10
+ #' @param protected_attribute A feature for which fairness is desired.
11
+ #' @usage
12
+ #' binary_label_dataset(data_path, favor_label, unfavor_label,
13
+ #' unprivileged_protected_attribute,
14
+ #' privileged_protected_attribute,
15
+ #' target_column, protected_attribute)
16
+ #' @examples
17
+ #' \dontrun{
18
+ #' load_aif360_lib()
19
+ #' # Input dataset
20
+ #' data <- data.frame("feat" = c(0,0,1,1,1,1,0,1,1,0), "label" = c(1,0,0,1,0,0,1,0,1,1))
21
+ #' # Create aif compatible input dataset
22
+ #' act <- aif360::binary_label_dataset(data_path = data, favor_label=0, unfavor_label=1,
23
+ #' unprivileged_protected_attribute=0,
24
+ #' privileged_protected_attribute=1,
25
+ #' target_column="label", protected_attribute="feat")
26
+ #' }
27
+ #' @seealso
28
+ #' \href{https://aif360.readthedocs.io/en/latest/modules/datasets.html#binary-label-dataset}{More about AIF binary dataset.}
29
+ #' @export
30
+ #' @importFrom reticulate py_suppress_warnings py_dict r_to_py
31
+ #' @importFrom utils file_test
32
+ #'
33
+ binary_label_dataset <- function(data_path, favor_label,
34
+ unfavor_label, unprivileged_protected_attribute,
35
+ privileged_protected_attribute,
36
+ target_column, protected_attribute) {
37
+
38
+ if (is.data.frame(data_path)) {
39
+ dataframe <- r_to_py(data_path)
40
+ } else if (file_test("-f", data_path) == TRUE) {
41
+ dataframe = input_data(data_path)
42
+ }
43
+ unprivileged_protected_list <- list_of_list(unprivileged_protected_attribute)
44
+ privileged_protected_list <- list_of_list(privileged_protected_attribute)
45
+ target_column_list <- list_fn(target_column)
46
+ protected_attribute_list <- list_fn(protected_attribute)
47
+
48
+ return(datasets$BinaryLabelDataset(df = dataframe,
49
+ favorable_label = favor_label,
50
+ unfavorable_label = unfavor_label,
51
+ unprivileged_protected_attributes = unprivileged_protected_list,
52
+ privileged_protected_attributes = privileged_protected_list,
53
+ label_names = target_column_list,
54
+ protected_attribute_names = protected_attribute_list))
55
+
56
+ }
57
+
58
+
59
+
60
+
61
+
62
+
63
+
64
+
65
+
66
+
67
+
68
+
69
+
70
+
71
+
src/aif360/aif360-r/R/dataset_metric.R ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #' Dataset Metric
2
+ #' @description
3
+ #' Class to provide access to functions for computing metrics on an aif360 compatible dataset
4
+ #'
5
+ #' @param data A aif360 compatible dataset
6
+ #' @param privileged_groups Privileged groups. List containing privileged protected attribute name and value of the privileged protected attribute.
7
+ #' @param unprivileged_groups Unprivileged groups. List containing unprivileged protected attribute name and value of the unprivileged protected attribute.
8
+ #' @usage
9
+ #' dataset_metric(data, privileged_groups, unprivileged_groups)
10
+ #' @examples
11
+ #' \dontrun{
12
+ #' load_aif360_lib()
13
+ #' data <- data.frame("feat" = c(0,0,1,1,1,1,0,1,1,0), "label" = c(1,0,0,1,0,0,1,0,1,1))
14
+ #' # Create aif compatible dataset
15
+ #' dd <- aif360::binary_label_dataset(data_path = data,
16
+ #' favor_label=0, unfavor_label=1,
17
+ #' unprivileged_protected_attribute=0,
18
+ #' privileged_protected_attribute=1,
19
+ #' target_column="label", protected_attribute="feat")
20
+ #' # Create an instance of dataset metric
21
+ #' dm <- dataset_metric(dd, list('feat', 1), list('feat',2))
22
+ #' # Access metric functions
23
+ #' dm$num_instances()
24
+ #' }
25
+ #' @seealso
26
+ #' \href{https://aif360.readthedocs.io/en/latest/modules/metrics.html#dataset-metric}{Explore available dataset metrics here}
27
+ #'
28
+ #' Available metric: num_instances
29
+ #' @noRd
30
+ #' @importFrom reticulate py_suppress_warnings import
31
+ #'
32
+ dataset_metric <- function(data,
33
+ privileged_groups,
34
+ unprivileged_groups){
35
+
36
+ p_dict <- dict_fn(privileged_groups)
37
+ u_dict <- dict_fn(unprivileged_groups)
38
+
39
+ return(metrics$DatasetMetric(data,
40
+ privileged_groups = p_dict,
41
+ unprivileged_groups = u_dict))
42
+ }
src/aif360/aif360-r/R/import.R ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #' load functions
2
+ #' @export
3
+ #'
4
+ load_aif360_lib <- function() {
5
+ e <- globalenv()
6
+ bindings <- c("datasets", "metrics", "pre_algo", "in_algo", "post_algo", "tf")
7
+ if (!all(bindings %in% ls(e))){
8
+ e$datasets <- import("aif360.datasets")
9
+ e$metrics <- import("aif360.metrics")
10
+ e$pre_algo <- import("aif360.algorithms.preprocessing")
11
+ e$in_algo <- import("aif360.algorithms.inprocessing")
12
+ e$post_algo <- import("aif360.algorithms.postprocessing")
13
+ e$tf <- import("tensorflow")
14
+ lockBinding("datasets", e)
15
+ lockBinding("metrics", e)
16
+ lockBinding("pre_algo", e)
17
+ lockBinding("in_algo", e)
18
+ lockBinding("post_algo", e)
19
+ lockBinding("tf", e)
20
+ } else {
21
+ message("The aif360 functions have already been loaded. You can begin using the package.")
22
+ }
23
+ }
src/aif360/aif360-r/R/inprocessing_adversarial_debiasing.R ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #' Adversarial Debiasing
2
+ #' @description Adversarial debiasing is an in-processing technique that learns a classifier to maximize prediction accuracy
3
+ #' and simultaneously reduce an adversary's ability to determine the protected attribute from the predictions
4
+ #' @param unprivileged_groups A list with two values: the column of the protected class and the value indicating representation for unprivileged group.
5
+ #' @param privileged_groups A list with two values: the column of the protected class and the value indicating representation for privileged group.
6
+ #' @param scope_name Scope name for the tensorflow variables.
7
+ #' @param sess tensorflow session
8
+ #' @param seed Seed to make \code{predict} repeatable. If not, \code{NULL}, must be an integer.
9
+ #' @param adversary_loss_weight Hyperparameter that chooses the strength of the adversarial loss.
10
+ #' @param num_epochs Number of training epochs. Must be an integer.
11
+ #' @param batch_size Batch size. Must be an integer.
12
+ #' @param classifier_num_hidden_units Number of hidden units in the classifier model. Must be an integer.
13
+ #' @param debias Learn a classifier with or without debiasing.
14
+ #' @examples
15
+ #' \dontrun{
16
+ #' load_aif360_lib()
17
+ #' ad <- adult_dataset()
18
+ #' p <- list("race", 1)
19
+ #' u <- list("race", 0)
20
+ #'
21
+ #' sess <- tf$compat$v1$Session()
22
+ #'
23
+ #' plain_model <- adversarial_debiasing(privileged_groups = p,
24
+ #' unprivileged_groups = u,
25
+ #' scope_name = "debiased_classifier",
26
+ #' debias = TRUE,
27
+ #' sess = sess)
28
+ #'
29
+ #' plain_model$fit(ad)
30
+ #' ad_nodebiasing <- plain_model$predict(ad)
31
+ #' }
32
+ #' @export
33
+ #'
34
+ adversarial_debiasing <- function(unprivileged_groups,
35
+ privileged_groups,
36
+ scope_name = "current",
37
+ sess = tf$compat$v1$Session(),
38
+ seed = NULL,
39
+ adversary_loss_weight = 0.1,
40
+ num_epochs = 50L,
41
+ batch_size = 128L,
42
+ classifier_num_hidden_units = 200L,
43
+ debias = TRUE) {
44
+
45
+
46
+
47
+ unprivileged_dict <- dict_fn(unprivileged_groups)
48
+ privileged_dict <- dict_fn(privileged_groups)
49
+
50
+ # run check for variables that must be integers
51
+ int_vars <- list(num_epochs = num_epochs, batch_size = batch_size, classifier_num_hidden_units = classifier_num_hidden_units)
52
+
53
+ if (!is.null(seed)) int_vars <- append(int_vars, c(seed = seed))
54
+
55
+ is_int <- sapply(int_vars, is.integer)
56
+ int_varnames <- names(int_vars)
57
+
58
+ if (any(!is_int)) stop(paste(int_varnames[!is_int], collapse = ", "), " must be integer(s)")
59
+
60
+
61
+
62
+ ad <- in_algo$AdversarialDebiasing(unprivileged_dict,
63
+ privileged_dict,
64
+ scope_name = scope_name,
65
+ sess = sess,
66
+ seed = seed,
67
+ adversary_loss_weight = adversary_loss_weight,
68
+ num_epochs = num_epochs,
69
+ batch_size = batch_size,
70
+ classifier_num_hidden_units = classifier_num_hidden_units,
71
+ debias = debias)
72
+ return(ad)
73
+ }
src/aif360/aif360-r/R/inprocessing_prejudice_remover.R ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #' Prejudice Remover
2
+ #' @description Prejudice remover is an in-processing technique that adds a discrimination-aware regularization term to the learning objective
3
+ #' @param eta fairness penalty parameter
4
+ #' @param sensitive_attr name of protected attribute
5
+ #' @param class_attr label name
6
+ #' @usage prejudice_remover(eta=1.0, sensitive_attr='',class_attr='')
7
+ #' @examples
8
+ #' \dontrun{
9
+ #' # An example using the Adult Dataset
10
+ #' load_aif360_lib()
11
+ #' ad <- adult_dataset()
12
+ #' model <- prejudice_remover(class_attr = "income-per-year", sensitive_attr = "race")
13
+ #' model$fit(ad)
14
+ #' ad_pred <- model$predict(ad)
15
+ #'}
16
+ #' @export
17
+ #'
18
+ prejudice_remover <- function(eta=1.0,
19
+ sensitive_attr='',
20
+ class_attr=''){
21
+
22
+ pr <- in_algo$PrejudiceRemover(eta,
23
+ sensitive_attr,
24
+ class_attr)
25
+ return(pr)
26
+ }
src/aif360/aif360-r/R/postprocessing_reject_option_classification.R ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #' Reject option classification
2
+ #'
3
+ #' @description Reject option classification is a postprocessing technique that gives
4
+ #' favorable outcomes to unpriviliged groups and unfavorable outcomes to
5
+ #' priviliged groups in a confidence band around the decision boundary with
6
+ #' the highest uncertainty.
7
+ #' @param unprivileged_groups A list epresentation for unprivileged group.
8
+ #' @param privileged_groups A list representation for privileged group.
9
+ #' @param low_class_thresh Smallest classification threshold to use in the optimization. Should be between 0. and 1.
10
+ #' @param high_class_thresh Highest classification threshold to use in the optimization. Should be between 0. and 1.
11
+ #' @param num_class_thresh Number of classification thresholds between low_class_thresh and high_class_thresh for the optimization search. Should be > 0.
12
+ #' @param num_ROC_margin Number of relevant ROC margins to be used in the optimization search. Should be > 0.
13
+ #' @param metric_name Name of the metric to use for the optimization. Allowed options are "Statistical parity difference", "Average odds difference", "Equal opportunity difference".
14
+ #' @param metric_ub Upper bound of constraint on the metric value
15
+ #' @param metric_lb Lower bound of constraint on the metric value
16
+ #' @examples
17
+ #' \dontrun{
18
+ #' # Example with Adult Dataset
19
+ #' load_aif360_lib()
20
+ #' ad <- adult_dataset()
21
+ #' p <- list("race",1)
22
+ #' u <- list("race", 0)
23
+ #'
24
+ #' col_names <- c(ad$feature_names, "label")
25
+ #' ad_df <- data.frame(ad$features, ad$labels)
26
+ #' colnames(ad_df) <- col_names
27
+ #'
28
+ #' lr <- glm(label ~ ., data=ad_df, family=binomial)
29
+ #'
30
+ #' ad_prob <- predict(lr, ad_df)
31
+ #' ad_pred <- factor(ifelse(ad_prob> 0.5,1,0))
32
+ #'
33
+ #' ad_df_pred <- data.frame(ad_df)
34
+ #' ad_df_pred$label <- as.character(ad_pred)
35
+ #' colnames(ad_df_pred) <- c(ad$feature_names, 'label')
36
+ #'
37
+ #' ad_ds <- binary_label_dataset(ad_df, target_column='label', favor_label = 1,
38
+ #' unfavor_label = 0, unprivileged_protected_attribute = 0,
39
+ #' privileged_protected_attribute = 1, protected_attribute='race')
40
+ #'
41
+ #' ad_ds_pred <- binary_label_dataset(ad_df_pred, target_column='label', favor_label = 1,
42
+ #' unfavor_label = 0, unprivileged_protected_attribute = 0,
43
+ #' privileged_protected_attribute = 1, protected_attribute='race')
44
+ #'
45
+ #' roc <- reject_option_classification(unprivileged_groups = u,
46
+ #' privileged_groups = p,
47
+ #' low_class_thresh = 0.01,
48
+ #' high_class_thresh = 0.99,
49
+ #' num_class_thresh = as.integer(100),
50
+ #' num_ROC_margin = as.integer(50),
51
+ #' metric_name = "Statistical parity difference",
52
+ #' metric_ub = 0.05,
53
+ #' metric_lb = -0.05)
54
+ #'
55
+ #' roc <- roc$fit(ad_ds, ad_ds_pred)
56
+ #'
57
+ #' ds_transformed_pred <- roc$predict(ad_ds_pred)
58
+ #' }
59
+ #' @export
60
+ #'
61
+ reject_option_classification <- function(unprivileged_groups,
62
+ privileged_groups,
63
+ low_class_thresh=0.01,
64
+ high_class_thresh=0.99,
65
+ num_class_thresh=as.integer(100),
66
+ num_ROC_margin=as.integer(50),
67
+ metric_name='Statistical parity difference',
68
+ metric_ub=0.05,
69
+ metric_lb=-0.05){
70
+
71
+ u_dict <- dict_fn(unprivileged_groups)
72
+ p_dict <- dict_fn(privileged_groups)
73
+
74
+ return(post_algo$RejectOptionClassification(u_dict,
75
+ p_dict,
76
+ low_class_thresh,
77
+ high_class_thresh,
78
+ num_class_thresh,
79
+ num_ROC_margin,
80
+ metric_name,
81
+ metric_ub,
82
+ metric_lb))
83
+ }
84
+
85
+
src/aif360/aif360-r/R/preprocessing_disparate_impact_remover.R ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #' Disparate Impact Remover
2
+ #' @description Disparate impact remover is a preprocessing technique that edits feature values increase group fairness while preserving rank-ordering within groups
3
+ #' @param repair_level Repair amount. 0.0 is no repair while 1.0 is full repair.
4
+ #' @param sensitive_attribute Single protected attribute with which to do repair.
5
+ #' @usage disparate_impact_remover(repair_level = 1.0, sensitive_attribute = '')
6
+ #' @examples
7
+ #' \dontrun{
8
+ #' # An example using the Adult Dataset
9
+ #' load_aif360_lib()
10
+ #' ad <- adult_dataset()
11
+ #' p <- list("race", 1)
12
+ #' u <- list("race", 0)
13
+ #'
14
+ #' di <- disparate_impact_remover(repair_level = 1.0, sensitive_attribute = "race")
15
+ #' rp <- di$fit_transform(ad)
16
+ #'
17
+ #' di_2 <- disparate_impact_remover(repair_level = 0.8, sensitive_attribute = "race")
18
+ #' rp_2 <- di_2$fit_transform(ad)
19
+ #' }
20
+ #' @export
21
+ #'
22
+ disparate_impact_remover <- function(repair_level=1.0, sensitive_attribute='') {
23
+ dr <- pre_algo$DisparateImpactRemover(repair_level, sensitive_attribute)
24
+ return (dr)
25
+ }
26
+
27
+
src/aif360/aif360-r/R/preprocessing_reweighing.R ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #' Reweighing
2
+ #' @description Reweighing is a preprocessing technique that weights the examples in each (group, label) combination differently to ensure fairness before classification
3
+ #' @param unprivileged_groups a list with two values: the column of the protected class and the value indicating representation for unprivileged group
4
+ #' @param privileged_groups a list with two values: the column of the protected class and the value indicating representation for privileged group
5
+ #' @usage reweighing(unprivileged_groups, privileged_groups)
6
+ #' @examples
7
+ #' \dontrun{
8
+ #' # An example using the Adult Dataset
9
+ #' load_aif360_lib()
10
+ #' ad <- adult_dataset()
11
+ #' p <- list("race", 1)
12
+ #' u <- list("race", 0)
13
+ #' rw <- reweighing(u,p)
14
+ #' rw$fit(ad)
15
+ #' ad_transformed <- rw$transform(ad)
16
+ #' ad_fit_transformed <- rw$fit_transform(ad)
17
+ #' }
18
+ #' @export
19
+ #'
20
+ reweighing <- function(unprivileged_groups, privileged_groups) {
21
+ unprivileged_dict <- dict_fn(unprivileged_groups)
22
+ privileged_dict <- dict_fn(privileged_groups)
23
+ rw <- pre_algo$Reweighing(unprivileged_dict, privileged_dict)
24
+ return (rw)
25
+ }
src/aif360/aif360-r/R/standard_datasets.R ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #' Adult Census Income Dataset
2
+ #' @export
3
+ adult_dataset <- function(){
4
+ return (datasets$AdultDataset())
5
+ }
6
+
7
+ #' Bank Dataset
8
+ #' @export
9
+ bank_dataset <- function(){
10
+ return (datasets$BankDataset())
11
+ }
12
+
13
+ #' Compas Dataset
14
+ #' @export
15
+ compas_dataset <- function(){
16
+ return (datasets$CompasDataset())
17
+ }
18
+
19
+ #' German Dataset
20
+ #' @export
21
+ german_dataset <- function(){
22
+ return (datasets$GermanDataset())
23
+ }
24
+
25
+ #' Law School GPA Dataset
26
+ #'@seealso
27
+ #' \href{https://aif360.readthedocs.io/en/latest/modules/generated/aif360.datasets.LawSchoolGPADataset.html#aif360.datasets.LawSchoolGPADataset}{More about the Law School GPA dataset.}
28
+ #' @export
29
+ law_school_gpa_dataset <- function(){
30
+ return (datasets$LawSchoolGPADataset())
31
+ }
src/aif360/aif360-r/R/utils.R ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #' Install aif360 and its dependencies
2
+ #'
3
+ #' @inheritParams reticulate::conda_list
4
+ #'
5
+ #' @param method Installation method. By default, "auto" automatically finds a
6
+ #' method that will work in the local environment. Change the default to force
7
+ #' a specific installation method. Note that the "virtualenv" method is not
8
+ #' available on Windows. Note also
9
+ #' that since this command runs without privilege the "system" method is
10
+ #' available only on Windows.
11
+ #'
12
+ #' @param version AIF360 version to install. Specify "default" to install
13
+ #' the latest release.
14
+ #'
15
+ #' @param envname Name of Python environment to install within
16
+ #'
17
+ #' @param extra_packages Additional Python packages to install.
18
+ #'
19
+ #' @param restart_session Restart R session after installing (note this will
20
+ #' only occur within RStudio).
21
+ #'
22
+ #' @param conda_python_version the python version installed in the created conda
23
+ #' environment. Python 3.6 is installed by default.
24
+ #'
25
+ #' @param ... other arguments passed to [reticulate::conda_install()] or
26
+ #' [reticulate::virtualenv_install()].
27
+ #'
28
+ #'
29
+ #' @export
30
+ install_aif360 <- function(method = c("auto", "virtualenv", "conda"),
31
+ conda = "auto",
32
+ version = "default",
33
+ envname = NULL,
34
+ extra_packages = NULL,
35
+ restart_session = TRUE,
36
+ conda_python_version = "3.7",
37
+ ...) {
38
+
39
+ method <- match.arg(method)
40
+
41
+ reticulate::py_install(
42
+ packages = c("aif360", "numba", "BlackBoxAuditing", "tensorflow>=1.13.1,<2", "pandas",
43
+ "fairlearn==0.4.6", "protobuf==3.20.1"),
44
+ envname = envname,
45
+ method = method,
46
+ conda = conda,
47
+ python_version = conda_python_version,
48
+ pip = TRUE,
49
+ ...
50
+ )
51
+
52
+ cat("\nInstallation complete.\n\n")
53
+
54
+ if (restart_session && rstudioapi::hasFun("restartSession"))
55
+ rstudioapi::restartSession()
56
+
57
+ invisible(NULL)
58
+ }
59
+
60
+ #' Read CSV file
61
+ #' @param inp data file
62
+ #' @noRd
63
+ #' @importFrom utils read.csv
64
+ #'
65
+ input_data <- function(inp){
66
+ read.csv(inp)
67
+ }
68
+ #' create a list
69
+ #' @param i input for function
70
+ #' @noRd
71
+ #'
72
+ list_fn <- function(i){
73
+ list(i)
74
+ }
75
+ #' create a list of list
76
+ #' @param i input for function
77
+ #' @noRd
78
+ #'
79
+ list_of_list <- function(i){
80
+ list(list(i))
81
+ }
82
+ #' Create dictionary
83
+ #' @param values input
84
+ #' @noRd
85
+ #' @importFrom reticulate py_dict
86
+ #'
87
+ dict_fn <- function(values){
88
+ c(py_dict(c(values[[1]]),c(values[[2]]), convert = FALSE))
89
+ }
src/aif360/aif360-r/R/zzz.R ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ ## quiets concerns of R CMD check re: the .'s that appear in pipelines
2
+ if(getRversion() >= "2.15.1")
3
+ utils::globalVariables(c("datasets", "metrics", "tf", "pre_algo", "in_algo", "post_algo"))
4
+ Globals <- list()
src/aif360/aif360-r/README.Rmd ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ output: github_document
3
+ ---
4
+
5
+ <!-- README.md is generated from README.Rmd. Please edit that file -->
6
+
7
+ ```{r, include = FALSE}
8
+ knitr::opts_chunk$set(
9
+ collapse = TRUE,
10
+ comment = "#>",
11
+ fig.path = "man/figures/README-",
12
+ out.width = "100%"
13
+ )
14
+ library(aif360)
15
+ ```
16
+
17
+ # AI Fairness 360 (AIF360) R Package
18
+
19
+ <!-- badges: start -->
20
+ [![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/aif360)](https://cran.r-project.org/package=aif360)
21
+ <!-- badges: end -->
22
+
23
+ ## Overview
24
+
25
+ The AI Fairness 360 toolkit is an open-source library to help detect and mitigate bias in machine learning models.
26
+ The AI Fairness 360 R package includes a comprehensive set of metrics for datasets and models to test for biases, explanations for these metrics, and algorithms to mitigate bias in datasets and models.
27
+
28
+ ## Installation
29
+
30
+ Install the CRAN version:
31
+
32
+ ```r
33
+ install.packages("aif360")
34
+ ```
35
+
36
+ Or install the development version from GitHub:
37
+
38
+ ``` r
39
+ # install.packages("devtools")
40
+ devtools::install_github("Trusted-AI/AIF360/aif360/aif360-r")
41
+ ```
42
+
43
+ Then, use the install_aif360() function to install AIF360:
44
+
45
+ ``` r
46
+ library(aif360)
47
+ install_aif360()
48
+ ```
49
+
50
+ ## Installation methods
51
+
52
+ AIF360 is distributed as a Python package and so needs to be installed within a Python environment on your system. By default, the install_aif360() function attempts to install AIF360 within an isolated Python environment (“r-reticulate”).
53
+
54
+ You can check using `reticulate::conda_python()` and `reticulate::py_config()`
55
+
56
+ ### Suggested steps
57
+
58
+ 1) Install reticulate and check if you have miniconda installed. If you do, go to step 2.
59
+
60
+ ```r
61
+ install.packages("reticulate")
62
+ reticulate::conda_list()
63
+ ```
64
+ If you get an error: `Error: Unable to find conda binary. Is Anaconda installed?`, please install
65
+ miniconda
66
+
67
+ ```r
68
+ reticulate::install_miniconda()
69
+ ```
70
+ If everything worked, you should get the message:
71
+
72
+ `* Miniconda has been successfully installed at '/home/rstudio/.local/share/r-miniconda'.`
73
+
74
+ You can double check:
75
+
76
+ ```
77
+ reticulate::conda_list()
78
+ ```
79
+ You will get something like this:
80
+
81
+ ```
82
+ name python
83
+ 1 r-miniconda /home/rstudio/.local/share/r-miniconda/bin/python
84
+ 2 r-reticulate /home/rstudio/.local/share/r-miniconda/envs/r-reticulate/bin/python
85
+ ```
86
+
87
+
88
+ 2) You can create a new conda env and then configure which version of Python to use:
89
+
90
+ ```r
91
+ reticulate::conda_create(envname = "r-test")
92
+ reticulate::use_miniconda(condaenv = "r-test", required = TRUE)
93
+ ```
94
+ Check that everything is working `reticulate::py_config()`.
95
+
96
+ 3) If you haven't yet, please install the aif360 package `install.packages("aif360")` and then
97
+ install aif360 dependencies
98
+
99
+ ```r
100
+ aif360::install_aif360(envname = "r-test")
101
+ ```
102
+ Note that this step should take a few minutes and the R session will restart.
103
+
104
+ 4) You can now activate your Python environment
105
+ ```r
106
+ reticulate::use_miniconda(condaenv = "r-test", required = TRUE)
107
+ ```
108
+
109
+ ## Getting Started
110
+
111
+ ```r
112
+ library(aif360)
113
+ load_aif360_lib()
114
+ ```
115
+
116
+ ``` r
117
+ # load a toy dataset
118
+ data <- data.frame("feature1" = c(0,0,1,1,1,1,0,1,1,0),
119
+ "feature2" = c(0,1,0,1,1,0,0,0,0,1),
120
+ "label" = c(1,0,0,1,0,0,1,0,1,1))
121
+
122
+ # format the dataset
123
+ formatted_dataset <- aif360::binary_label_dataset(data_path = data,
124
+ favor_label = 0,
125
+ unfavor_label = 1,
126
+ unprivileged_protected_attribute = 0,
127
+ privileged_protected_attribute = 1,
128
+ target_column = "label",
129
+ protected_attribute = "feature1")
130
+
131
+ ```
132
+
133
+ ## Troubleshooting
134
+
135
+ If you encounter any errors during the installation process, look for your issue here and try the solutions.
136
+
137
+ ### Locked binding
138
+ If you get an error: `cannot change value of locked binding`, please restart the R session. Then try reactivating your Python environment and running the following commands exactly once:
139
+ ```r
140
+ library(aif360)
141
+ load_aif360_lib()
142
+ ```
143
+
144
+ ## Contributing
145
+
146
+ If you'd like to contribute to the development of aif360, please read [these guidelines](CONTRIBUTING.md).
147
+
148
+ Please note that the aif360 project is released with a [Contributor Code of Conduct](CODEOFCONDUCT.md). By contributing to this project, you agree to abide by its terms.
149
+
150
+
src/aif360/aif360-r/README.md ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ <!-- README.md is generated from README.Rmd. Please edit that file -->
3
+
4
+ # AI Fairness 360 (AIF360) R Package
5
+
6
+ <!-- badges: start -->
7
+
8
+ [![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/aif360)](https://cran.r-project.org/package=aif360)
9
+ <!-- badges: end -->
10
+
11
+ ## Overview
12
+
13
+ The AI Fairness 360 toolkit is an open-source library to help detect and
14
+ mitigate bias in machine learning models. The AI Fairness 360 R package
15
+ includes a comprehensive set of metrics for datasets and models to test
16
+ for biases, explanations for these metrics, and algorithms to mitigate
17
+ bias in datasets and models.
18
+
19
+ ## Installation
20
+
21
+ Install the CRAN version:
22
+
23
+ ``` r
24
+ install.packages("aif360")
25
+ ```
26
+
27
+ Or install the development version from GitHub:
28
+
29
+ ``` r
30
+ # install.packages("devtools")
31
+ devtools::install_github("Trusted-AI/AIF360/aif360/aif360-r")
32
+ ```
33
+
34
+ Then, use the install_aif360() function to install AIF360:
35
+
36
+ ``` r
37
+ library(aif360)
38
+ install_aif360()
39
+ ```
40
+
41
+ ## Installation methods
42
+
43
+ AIF360 is distributed as a Python package and so needs to be installed
44
+ within a Python environment on your system. By default, the
45
+ install_aif360() function attempts to install AIF360 within an isolated
46
+ Python environment (“r-reticulate”).
47
+
48
+ You can check using `reticulate::conda_python()` and
49
+ `reticulate::py_config()`
50
+
51
+ ### Suggested steps
52
+
53
+ 1) Install reticulate and check if you have miniconda installed. If you
54
+ do, go to step 2.
55
+
56
+ ``` r
57
+ install.packages("reticulate")
58
+ reticulate::conda_list()
59
+ ```
60
+
61
+ If you get an error:
62
+ `Error: Unable to find conda binary. Is Anaconda installed?`, please
63
+ install miniconda
64
+
65
+ ``` r
66
+ reticulate::install_miniconda()
67
+ ```
68
+
69
+ If everything worked, you should get the message:
70
+
71
+ `* Miniconda has been successfully installed at '/home/rstudio/.local/share/r-miniconda'.`
72
+
73
+ You can double check:
74
+
75
+ reticulate::conda_list()
76
+
77
+ You will get something like this:
78
+
79
+ name python
80
+ 1 r-miniconda /home/rstudio/.local/share/r-miniconda/bin/python
81
+ 2 r-reticulate /home/rstudio/.local/share/r-miniconda/envs/r-reticulate/bin/python
82
+
83
+ 2) You can create a new conda env and then configure which version of
84
+ Python to use:
85
+
86
+ ``` r
87
+ reticulate::conda_create(envname = "r-test")
88
+ reticulate::use_miniconda(condaenv = "r-test", required = TRUE)
89
+ ```
90
+
91
+ Check that everything is working `reticulate::py_config()`.
92
+
93
+ 3) If you haven’t yet, please install the aif360 package
94
+ `install.packages("aif360")` and then install aif360 dependencies
95
+
96
+ ``` r
97
+ aif360::install_aif360(envname = "r-test")
98
+ ```
99
+
100
+ Note that this step should take a few minutes and the R session will
101
+ restart.
102
+
103
+ 4) You can now activate your Python environment
104
+
105
+ ``` r
106
+ reticulate::use_miniconda(condaenv = "r-test", required = TRUE)
107
+ ```
108
+
109
+ ## Getting Started
110
+
111
+ ``` r
112
+ library(aif360)
113
+ load_aif360_lib()
114
+ ```
115
+
116
+ ``` r
117
+ # load a toy dataset
118
+ data <- data.frame("feature1" = c(0,0,1,1,1,1,0,1,1,0),
119
+ "feature2" = c(0,1,0,1,1,0,0,0,0,1),
120
+ "label" = c(1,0,0,1,0,0,1,0,1,1))
121
+
122
+ # format the dataset
123
+ formatted_dataset <- aif360::binary_label_dataset(data_path = data,
124
+ favor_label = 0,
125
+ unfavor_label = 1,
126
+ unprivileged_protected_attribute = 0,
127
+ privileged_protected_attribute = 1,
128
+ target_column = "label",
129
+ protected_attribute = "feature1")
130
+ ```
131
+
132
+ ## Troubleshooting
133
+
134
+ If you encounter any errors during the installation process, look for
135
+ your issue here and try the solutions.
136
+
137
+ ### Locked binding
138
+
139
+ If you get an error: `cannot change value of locked binding`, please
140
+ restart the R session. Then try reactivating your Python environment and
141
+ running the following commands exactly once:
142
+
143
+ ``` r
144
+ library(aif360)
145
+ load_aif360_lib()
146
+ ```
147
+
148
+ ## Contributing
149
+
150
+ If you’d like to contribute to the development of aif360, please read
151
+ [these guidelines](CONTRIBUTING.md).
152
+
153
+ Please note that the aif360 project is released with a [Contributor Code
154
+ of Conduct](CODEOFCONDUCT.md). By contributing to this project, you
155
+ agree to abide by its terms.
src/aif360/aif360-r/cran-comments.md ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Test environments
2
+ * local R installation, R 4.0.0
3
+ * ubuntu 16.04 (on travis-ci), R 4.0.0
4
+ * win-builder (devel)
5
+
6
+ ## R CMD check results
7
+
8
+ 0 errors | 0 warnings | 1 note
9
+
10
+ * This is a new release.
src/aif360/aif360-r/inst/examples/test.R ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ library(aif360)
2
+ install_aif360()
3
+
4
+
5
+ load_aif360_lib()
6
+
7
+
8
+
9
+ dd <- aif360::binary_label_dataset(
10
+ data_path = system.file("extdata", "data.csv", package="aif360"),
11
+ favor_label=0,
12
+ unfavor_label=1,
13
+ unprivileged_protected_attribute=0,
14
+ privileged_protected_attribute=1,
15
+ target_column="income",
16
+ protected_attribute="sex")
17
+
18
+ dd$favorable_label
19
+ dd$labels
20
+ dd$unfavorable_label
src/aif360/aif360-r/inst/extdata/actual_data.csv ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,income
2
+ 41,0,202822,11,9,5,0,1,2,0,0,0,32,39,0
3
+ 72,0,129912,11,9,2,0,0,4,1,0,0,25,39,0
4
+ 45,2,119199,7,12,0,10,4,4,0,0,0,48,39,0
5
+ 31,4,199655,12,14,0,8,1,3,0,0,0,30,39,0
6
+ 39,2,111499,7,12,2,1,5,4,0,0,0,20,39,1
7
+ 37,4,198216,7,12,0,13,1,4,0,0,0,40,39,0
8
+ 43,4,260761,11,9,2,7,0,4,1,0,0,40,26,0
9
+ 65,6,99359,14,15,4,10,1,4,1,1086,0,60,39,0
10
+ 43,7,255835,15,10,0,1,2,4,0,0,0,40,39,0
11
+ 43,6,27242,15,10,2,3,0,4,1,0,0,50,39,0
12
+ 32,4,34066,0,6,2,6,0,0,1,0,0,40,39,0
13
+ 43,4,84661,8,11,2,12,0,4,1,0,0,45,39,0
14
+ 32,4,116138,12,14,4,13,1,1,1,0,0,11,36,0
15
+ 53,4,321865,12,14,2,4,0,4,1,0,0,40,39,1
16
+ 22,4,310152,15,10,4,11,1,4,1,0,0,40,39,0
17
+ 27,4,257302,7,12,2,13,5,4,0,0,0,38,39,0
18
+ 40,4,154374,11,9,2,7,0,4,1,0,0,40,39,1
19
+ 58,4,151910,11,9,6,1,4,4,0,0,0,40,39,0
20
+ 22,4,201490,11,9,4,1,3,4,1,0,0,20,39,0
21
+ 52,5,287927,11,9,2,4,5,4,0,15024,0,40,39,1