Spaces:
Sleeping
Sleeping
Eachan Johnson
commited on
Commit
·
3d0bd0d
1
Parent(s):
9a9440b
Tidy up
Browse files- schemist/__init__.py +3 -0
- schemist/cli.py +1 -2
- schemist/tables.py +9 -8
schemist/__init__.py
CHANGED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from importlib.metadata import version
|
| 2 |
+
|
| 3 |
+
__version__ = version("schemist")
|
schemist/cli.py
CHANGED
|
@@ -14,6 +14,7 @@ from carabiner.cliutils import clicommand, CLIOption, CLICommand, CLIApp
|
|
| 14 |
from carabiner.itertools import tenumerate
|
| 15 |
from carabiner.pd import get_formats, write_stream
|
| 16 |
|
|
|
|
| 17 |
from .collating import collate_inventory, deduplicate_file
|
| 18 |
from .converting import _TO_FUNCTIONS, _FROM_FUNCTIONS
|
| 19 |
from .generating import AA, REACTIONS
|
|
@@ -22,8 +23,6 @@ from .tables import (converter, cleaner, featurizer, assign_groups,
|
|
| 22 |
_assign_splits, splitter, _peptide_table, reactor)
|
| 23 |
from .splitting import _SPLITTERS, _GROUPED_SPLITTERS
|
| 24 |
|
| 25 |
-
__version__ = '0.0.1'
|
| 26 |
-
|
| 27 |
def _option_parser(x: Optional[List[str]]) -> Dict[str, Any]:
|
| 28 |
|
| 29 |
options = {}
|
|
|
|
| 14 |
from carabiner.itertools import tenumerate
|
| 15 |
from carabiner.pd import get_formats, write_stream
|
| 16 |
|
| 17 |
+
from . import __version__
|
| 18 |
from .collating import collate_inventory, deduplicate_file
|
| 19 |
from .converting import _TO_FUNCTIONS, _FROM_FUNCTIONS
|
| 20 |
from .generating import AA, REACTIONS
|
|
|
|
| 23 |
_assign_splits, splitter, _peptide_table, reactor)
|
| 24 |
from .splitting import _SPLITTERS, _GROUPED_SPLITTERS
|
| 25 |
|
|
|
|
|
|
|
| 26 |
def _option_parser(x: Optional[List[str]]) -> Dict[str, Any]:
|
| 27 |
|
| 28 |
options = {}
|
schemist/tables.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
"""Tools for processing tabular data."""
|
| 2 |
|
| 3 |
-
from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union
|
| 4 |
from functools import partial
|
| 5 |
|
| 6 |
try:
|
|
@@ -45,9 +45,9 @@ def _get_error_tally(df: DataFrame,
|
|
| 45 |
def converter(df: DataFrame,
|
| 46 |
column: str = 'smiles',
|
| 47 |
input_representation: str = 'smiles',
|
| 48 |
-
output_representation: Union[str,
|
| 49 |
prefix: Optional[str] = None,
|
| 50 |
-
options: Optional[
|
| 51 |
|
| 52 |
"""
|
| 53 |
|
|
@@ -59,13 +59,14 @@ def converter(df: DataFrame,
|
|
| 59 |
column_values = _get_column_values(df, column)
|
| 60 |
|
| 61 |
output_representation = cast(output_representation, to=list)
|
| 62 |
-
converters = convert_string_representation(
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
|
|
|
|
|
|
| 66 |
converted = {f"{prefix}{conversion_name}": cast(conversion, to=list)
|
| 67 |
for conversion_name, conversion in converters.items()}
|
| 68 |
-
|
| 69 |
df = df.assign(**converted)
|
| 70 |
|
| 71 |
return _get_error_tally(df, list(converted)), df
|
|
|
|
| 1 |
"""Tools for processing tabular data."""
|
| 2 |
|
| 3 |
+
from typing import Any, Callable, Dict, Iterable, List, Mapping, Optional, Tuple, Union
|
| 4 |
from functools import partial
|
| 5 |
|
| 6 |
try:
|
|
|
|
| 45 |
def converter(df: DataFrame,
|
| 46 |
column: str = 'smiles',
|
| 47 |
input_representation: str = 'smiles',
|
| 48 |
+
output_representation: Union[str, Iterable[str]] = 'smiles',
|
| 49 |
prefix: Optional[str] = None,
|
| 50 |
+
options: Optional[Mapping[str, Any]] = None) -> Tuple[Dict[str, int], DataFrame]:
|
| 51 |
|
| 52 |
"""
|
| 53 |
|
|
|
|
| 59 |
column_values = _get_column_values(df, column)
|
| 60 |
|
| 61 |
output_representation = cast(output_representation, to=list)
|
| 62 |
+
converters = convert_string_representation(
|
| 63 |
+
column_values,
|
| 64 |
+
output_representation=output_representation,
|
| 65 |
+
input_representation=input_representation,
|
| 66 |
+
**options,
|
| 67 |
+
)
|
| 68 |
converted = {f"{prefix}{conversion_name}": cast(conversion, to=list)
|
| 69 |
for conversion_name, conversion in converters.items()}
|
|
|
|
| 70 |
df = df.assign(**converted)
|
| 71 |
|
| 72 |
return _get_error_tally(df, list(converted)), df
|