updated app format
Browse files- app.py +6 -6
- chemdata.py +2 -55
app.py
CHANGED
|
@@ -25,7 +25,7 @@ from rdkit.Chem.Draw import rdMolDraw2D
|
|
| 25 |
import pandas as pd
|
| 26 |
from st_keyup import st_keyup
|
| 27 |
|
| 28 |
-
st.set_page_config(layout="
|
| 29 |
|
| 30 |
|
| 31 |
def local_css(file_name):
|
|
@@ -60,7 +60,7 @@ formatted_text = (
|
|
| 60 |
"<h1 style='text-align: center;'>"
|
| 61 |
"<span style='color: red;'>Pharmacokinetics</span>"
|
| 62 |
"<span style='color: black;'> of </span>"
|
| 63 |
-
"<span style='color: blue;'>🤫confidential
|
| 64 |
"<span style='color: black;'> molecules</span>"
|
| 65 |
"</h1>"
|
| 66 |
)
|
|
@@ -82,9 +82,9 @@ The server on which the prediction is computed will never see the molecule in cl
|
|
| 82 |
Why is this **magic**? Because this is equivalent to computing the prediction on the molecule in clear text, but without sharing the molecule with the server.
|
| 83 |
Even if organization "B" - or in fact any other party - would try to steal the data, they would only see the encrypted molecular data.
|
| 84 |
**Only the party that has the private key (organization "A") can decrypt the prediction**. This is possible using a method called "Fully Homomorphic Encryption" (FHE).
|
| 85 |
-
This special encryption scheme allows to perform computations on encrypted data.
|
| 86 |
|
| 87 |
-
We use the open-source library <a href="https://
|
| 88 |
|
| 89 |
The code used for the FHE prediction is available in the open-source library
|
| 90 |
\n
|
|
@@ -103,7 +103,7 @@ st.divider()
|
|
| 103 |
|
| 104 |
st.markdown(
|
| 105 |
"<p style='text-align: center; color: grey;'>"
|
| 106 |
-
+ img_to_html("scheme2.png", width="
|
| 107 |
+ "</p>",
|
| 108 |
unsafe_allow_html=True,
|
| 109 |
)
|
|
@@ -652,7 +652,7 @@ if __name__ == "__main__":
|
|
| 652 |
st.markdown(
|
| 653 |
"""
|
| 654 |
<div style="width: 100%; text-align: center; padding: 10px;">
|
| 655 |
-
The app was built with <a href="https://
|
| 656 |
an open-source library by <a href="https://www.zama.ai/" target="_blank">Zama</a>.
|
| 657 |
</div>
|
| 658 |
""",
|
|
|
|
| 25 |
import pandas as pd
|
| 26 |
from st_keyup import st_keyup
|
| 27 |
|
| 28 |
+
st.set_page_config(layout="wide", page_title="VaultChem")
|
| 29 |
|
| 30 |
|
| 31 |
def local_css(file_name):
|
|
|
|
| 60 |
"<h1 style='text-align: center;'>"
|
| 61 |
"<span style='color: red;'>Pharmacokinetics</span>"
|
| 62 |
"<span style='color: black;'> of </span>"
|
| 63 |
+
"<span style='color: blue;'>🤫confidential</span>"
|
| 64 |
"<span style='color: black;'> molecules</span>"
|
| 65 |
"</h1>"
|
| 66 |
)
|
|
|
|
| 82 |
Why is this **magic**? Because this is equivalent to computing the prediction on the molecule in clear text, but without sharing the molecule with the server.
|
| 83 |
Even if organization "B" - or in fact any other party - would try to steal the data, they would only see the encrypted molecular data.
|
| 84 |
**Only the party that has the private key (organization "A") can decrypt the prediction**. This is possible using a method called "Fully Homomorphic Encryption" (FHE).
|
| 85 |
+
This special encryption scheme allows to perform computations on encrypted data, to learn more about FHE, click [here](https://fhe.org/resources/).
|
| 86 |
|
| 87 |
+
We use the open-source library <a href="https://github.com/zama-ai/concrete-ml" target="_blank">Concrete-ML</a> to develop safe and robust encryption technology.
|
| 88 |
|
| 89 |
The code used for the FHE prediction is available in the open-source library
|
| 90 |
\n
|
|
|
|
| 103 |
|
| 104 |
st.markdown(
|
| 105 |
"<p style='text-align: center; color: grey;'>"
|
| 106 |
+
+ img_to_html("scheme2.png", width="65%")
|
| 107 |
+ "</p>",
|
| 108 |
unsafe_allow_html=True,
|
| 109 |
)
|
|
|
|
| 652 |
st.markdown(
|
| 653 |
"""
|
| 654 |
<div style="width: 100%; text-align: center; padding: 10px;">
|
| 655 |
+
The app was built with <a href="https://github.com/zama-ai/concrete-ml" target="_blank">Concrete-ML</a>,
|
| 656 |
an open-source library by <a href="https://www.zama.ai/" target="_blank">Zama</a>.
|
| 657 |
</div>
|
| 658 |
""",
|
chemdata.py
CHANGED
|
@@ -153,60 +153,6 @@ def compute_descriptors_from_smiles_list(SMILES):
|
|
| 153 |
return np.array(X)
|
| 154 |
|
| 155 |
|
| 156 |
-
class ProcessToxChemData:
|
| 157 |
-
def __init__(self, bits=256):
|
| 158 |
-
self.bits = int(bits)
|
| 159 |
-
if not os.path.exists("data"):
|
| 160 |
-
os.makedirs("data")
|
| 161 |
-
self.save_file = "data/" + "save_file_Tox" + str(self.bits) + ".pkl"
|
| 162 |
-
|
| 163 |
-
if os.path.exists(self.save_file):
|
| 164 |
-
with open(self.save_file, "rb") as file:
|
| 165 |
-
self.adjusted_valid_entries_per_task = pickle.load(file)
|
| 166 |
-
else:
|
| 167 |
-
url = "https://github.com/deepchem/deepchem/blob/master/datasets/tox21.csv.gz?raw=true"
|
| 168 |
-
response = requests.get(url)
|
| 169 |
-
content = gzip.decompress(response.content)
|
| 170 |
-
self.df = pd.read_csv(BytesIO(content))
|
| 171 |
-
self.process()
|
| 172 |
-
self.save_adjusted_data()
|
| 173 |
-
|
| 174 |
-
def process(self):
|
| 175 |
-
self.adjusted_valid_entries_per_task = {}
|
| 176 |
-
|
| 177 |
-
# Iterating through each task column and extracting valid entries
|
| 178 |
-
for task in self.df.columns[
|
| 179 |
-
:-2
|
| 180 |
-
]: # Excluding mol_id and smiles from the iteration
|
| 181 |
-
valid_entries = self.df.dropna(subset=[task])[["mol_id", "smiles", task]]
|
| 182 |
-
|
| 183 |
-
valid_entries["fps"] = valid_entries["smiles"].apply(
|
| 184 |
-
lambda x: generate_fingerprint(x, radius=2, bits=self.bits)
|
| 185 |
-
)
|
| 186 |
-
valid_entries = valid_entries.dropna(subset=["fps"])
|
| 187 |
-
valid_entries["descriptors"] = valid_entries["smiles"].apply(
|
| 188 |
-
lambda x: compute_descriptors_from_smiles_list([x])[0]
|
| 189 |
-
)
|
| 190 |
-
valid_entries = valid_entries.dropna(subset=["descriptors"])
|
| 191 |
-
# Shuffle the rows
|
| 192 |
-
valid_entries = valid_entries.sample(frac=1, random_state=42).reset_index(
|
| 193 |
-
drop=True
|
| 194 |
-
)
|
| 195 |
-
self.adjusted_valid_entries_per_task[task] = valid_entries
|
| 196 |
-
self.adjusted_valid_entries_per_task[
|
| 197 |
-
task
|
| 198 |
-
] = self.adjusted_valid_entries_per_task[task].rename(columns={task: "y"})
|
| 199 |
-
|
| 200 |
-
def save_adjusted_data(self):
|
| 201 |
-
with open(self.save_file, "wb") as file:
|
| 202 |
-
pickle.dump(self.adjusted_valid_entries_per_task, file)
|
| 203 |
-
|
| 204 |
-
def get_X_y(self, task):
|
| 205 |
-
X = np.float_(np.stack(self.adjusted_valid_entries_per_task[task].fps.values))
|
| 206 |
-
y = self.adjusted_valid_entries_per_task[task].y.values.astype(int)
|
| 207 |
-
return X, y
|
| 208 |
-
|
| 209 |
-
|
| 210 |
class ProcessADMEChemData:
|
| 211 |
def __init__(self, bits=512, radius=2):
|
| 212 |
self.bits = int(bits)
|
|
@@ -291,7 +237,8 @@ def load_ADME_data(task, bits=256, radius=2):
|
|
| 291 |
"""
|
| 292 |
data = ProcessADMEChemData(bits=bits, radius=radius)
|
| 293 |
X, y = data.get_X_y(task)
|
| 294 |
-
|
|
|
|
| 295 |
|
| 296 |
|
| 297 |
class ProcessGenericChemData:
|
|
|
|
| 153 |
return np.array(X)
|
| 154 |
|
| 155 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
class ProcessADMEChemData:
|
| 157 |
def __init__(self, bits=512, radius=2):
|
| 158 |
self.bits = int(bits)
|
|
|
|
| 237 |
"""
|
| 238 |
data = ProcessADMEChemData(bits=bits, radius=radius)
|
| 239 |
X, y = data.get_X_y(task)
|
| 240 |
+
SMILES = data.adjusted_valid_entries_per_task[task]["smiles"].values
|
| 241 |
+
return train_test_split(SMILES,X, y, test_size=0.2, random_state=42)
|
| 242 |
|
| 243 |
|
| 244 |
class ProcessGenericChemData:
|