diff --git a/config/example_params.json b/config/example_params.json
index 4bea76bc..47fce1d4 100644
--- a/config/example_params.json
+++ b/config/example_params.json
@@ -13,6 +13,7 @@
         "flaml-individual-descriptors",
         "autogluon-manifolds",
         "kerastuner-reference-embedding",
+        "kerastuner-eosce-embedding",
         "molmap"
     ]
 }
\ No newline at end of file
diff --git a/install_linux.sh b/install_linux.sh
index 5f083277..abfd1fce 100644
--- a/install_linux.sh
+++ b/install_linux.sh
@@ -31,7 +31,7 @@ python3 -m pip install autogluon.tabular[all]==0.5.2
 python3 -m pip install "xgboost==1.3.3"
 python3 -m pip install "SQLAlchemy<1.4.0"
 
-# install zairachem
+# install extra dependencies
 python3 -m pip install git+https://github.com/chembl/FPSim2.git@0.3.0
 python3 -m pip install -q -U keras-tuner==1.1.3
 
@@ -39,6 +39,9 @@ python3 -m pip install -q -U keras-tuner==1.1.3
 python3 -m pip install git+https://github.com/ersilia-os/ersilia.git
 ersilia --help
 
+# install ersilia compound embedding
+python3 -m pip install git+https://github.com/ersilia-os/compound-embedding-lite.git
+
 # install isaura
 python3 -m pip install git+https://github.com/ersilia-os/isaura.git@ce293244ad0bdd6d7d4f796d2a84b17208a87b56
 
@@ -51,5 +54,11 @@ python3 -m pip install git+https://github.com/ersilia-os/lazy-qsar.git
 # install melloddy-tuner
 python3 -m pip install git+https://github.com/melloddy/MELLODDY-TUNER.git@2.1.3
 
+# install tabpfn
+python3 -m pip install tabpfn==0.1.8
+
+# install imblearn
+python3 -m pip install imbalanced-learn==0.10.1
+
 # install zairachem
 python3 -m pip install -e .
diff --git a/zairachem/automl/binarytabpfn.py b/zairachem/automl/binarytabpfn.py
new file mode 100644
index 00000000..aad81454
--- /dev/null
+++ b/zairachem/automl/binarytabpfn.py
@@ -0,0 +1,142 @@
+import numpy as np
+from lol import LOL
+import random
+import collections
+from tabpfn import TabPFNClassifier
+from imblearn.combine import SMOTETomek
+from imblearn.over_sampling import KMeansSMOTE
+from imblearn.under_sampling import EditedNearestNeighbours
+import joblib
+
+
+class TabPFNBinaryClassifier(object):
+    def __init__(self, device="cpu", N_ensemble_configurations=4):
+        self.device = device
+        self.N_ensemble_configurations = N_ensemble_configurations
+        self.max_samples = 1000
+
+    def _get_balanced_datasets(self, X, y):
+        try:
+            smp = SMOTETomek(sampling_strategy="auto")
+            X_0, y_0 = smp.fit_resample(X, y)
+        except:
+            X_0, y_0 = X, y
+        try:
+            smp = KMeansSMOTE(sampling_strategy="auto")
+            X_1, y_1 = smp.fit_resample(X, y)
+        except:
+            X_1, y_1 = X, y
+        try:
+            smp = EditedNearestNeighbours(sampling_strategy="auto")
+            X_2, y_2 = smp.fit_resample(X, y)
+        except:
+            X_2, y_2 = X, y
+        results = [(X_0, y_0), (X_1, y_1), (X_2, y_2)]
+        return results
+
+    def _cap_samples(self, X, y):
+        if X.shape[0] <= self.max_samples:
+            return [(X, y)]
+        idxs = [i for i in range(X.shape[0])]
+        R = []
+        for _ in range(3):
+            smp_idxs = random.sample(idxs, self.max_samples)
+            X_, y_ = X[smp_idxs], y[smp_idxs]
+            if np.sum(y_) == 0:
+                continue
+            R += [(X_, y_)]
+        return R
+
+    def _get_ensemble(self, X, y):
+        R = []
+        for X_0, y_0 in self._get_balanced_datasets(X, y):
+            for X_1, y_1 in self._cap_samples(X_0, y_0):
+                R += [(X_1, y_1)]
+        return R
+
+    def fit(self, X, y):
+        self.reducer = LOL(n_components=100)
+        self.reducer.fit(X, y)
+        X = self.reducer.transform(X)
+        self.ensemble = self._get_ensemble(X, y)
+
+    def predict_proba(self, X):
+        model = TabPFNClassifier(
+            device=self.device, N_ensemble_configurations=self.N_ensemble_configurations
+        )
+        X = self.reducer.transform(X)
+        R = []
+        for X_tr, y_tr in self.ensemble:
+            # print(X_tr.shape, np.sum(y_tr))
+            model.fit(X_tr, y_tr)
+            R += [model.predict_proba(X)[:, 1]]
+            model.remove_models_from_memory()
+        R = np.array(R).T
+        y_h1 = np.mean(R, axis=1)
+        y_h0 = 1 - y_h1
+        y_h = np.array([y_h0, y_h1]).T
+        return y_h
+
+    def save(self, file_name):
+        data = {
+            "device": self.device,
+            "N_ensemble_configurations": self.N_ensemble_configurations,
+            "reducer": self.reducer,
+            "ensemble": self.ensemble,
+        }
+        joblib.dump(data, file_name)
+
+    def load(self, file_name):
+        data = joblib.load(file_name)
+        model = TabPFNBinaryClassifier(
+            device=data["device"],
+            N_ensemble_configurations=data["N_ensemble_configurations"],
+        )
+        model.ensemble = data["ensemble"]
+        model.reducer = data["reducer"]
+        return TabPFNClassifierArtifact(model, 0.5)
+
+
+class Binarizer(object):
+    def __init__(self, threshold):
+        self.threshold = threshold
+
+    def binarize(self, y_hat):
+        y_bin = []
+        for y in y_hat:
+            if y > self.threshold:
+                y_bin += [1]
+            else:
+                y_bin += [0]
+        return np.array(y_bin, dtype=np.uint8)
+
+
+class TabPFNClassifierArtifact(object):
+    def __init__(self, model, threshold):
+        self.model = model
+        self.threshold = threshold
+        if threshold is not None:
+            self.binarizer = Binarizer(self.threshold)
+        else:
+            self.binarizer = None
+
+    def predict_proba(self, X):
+        return self.model.predict_proba(X)[:, 1]
+
+    def predict(self, X):
+        if self.binarizer is not None:
+            y_hat = self.predict_proba(X)
+            y_bin = self.binarizer.binarize(y_hat)
+        else:
+            y_bin = self.model.predict(X)
+        return y_bin
+
+    def run(self, X, y=None):
+        results = collections.OrderedDict()
+        results["main"] = {
+            "idxs": None,
+            "y": y,
+            "y_hat": self.predict_proba(X),
+            "b_hat": self.predict(X),
+        }
+        return results
diff --git a/zairachem/descriptors/describe.py b/zairachem/descriptors/describe.py
index 2fff69f5..c2075463 100644
--- a/zairachem/descriptors/describe.py
+++ b/zairachem/descriptors/describe.py
@@ -3,6 +3,7 @@
 from .raw import RawDescriptors
 from .treated import TreatedDescriptors
 from .reference import ReferenceDescriptors, SimpleDescriptors
+from .eosce import EosceDescriptors
 from .manifolds import Manifolds
 
 from .. import ZairaBase
@@ -47,6 +48,12 @@ def _reference_descriptors(self):
                 ReferenceDescriptors().run()
             step.update()
 
+    def _eosce_descriptors(self):
+        step = PipelineStep("eosce_descriptors", self.output_dir)
+        if not step.is_done():
+            EosceDescriptors().run()
+            step.update()
+
     def _manifolds(self):
         step = PipelineStep("manifolds", self.output_dir)
         if not step.is_done():
@@ -58,5 +65,6 @@ def run(self):
         self._raw_descriptions()
         self._treated_descriptions()
         self._reference_descriptors()
+        self._eosce_descriptors()
         self._manifolds()
         self.update_elapsed_time()
diff --git a/zairachem/descriptors/eosce.py b/zairachem/descriptors/eosce.py
new file mode 100644
index 00000000..21194f37
--- /dev/null
+++ b/zairachem/descriptors/eosce.py
@@ -0,0 +1,63 @@
+import os
+import pandas as pd
+import h5py
+
+from eosce.models import ErsiliaCompoundEmbeddings
+from ..utils.matrices import Hdf5
+from .. import ZairaBase
+
+from ..setup import SMILES_COLUMN
+from ..vars import DATA_SUBFOLDER, DATA_FILENAME, DESCRIPTORS_SUBFOLDER
+
+EOSCE_FILE_NAME = "eosce.h5"
+
+
+class EosceEmbedder(ZairaBase):
+    def __init__(self):
+        ZairaBase.__init__(self)
+        self.model = ErsiliaCompoundEmbeddings()
+
+    def calculate(self, smiles_list, output_h5):
+        X = self.model.transform(smiles_list)
+        if output_h5 is None:
+            return X
+        keys = ["key-{0}".format(i) for i in range(len(smiles_list))]
+        features = ["feat-{0}".format(i) for i in range(X.shape[1])]
+        inputs = smiles_list
+        with h5py.File(output_h5, "w") as f:
+            f.create_dataset("Keys", data=keys)
+            f.create_dataset("Features", data=features)
+            f.create_dataset("Inputs", data=inputs)
+            f.create_dataset("Values", data=X)
+
+
+class EosceLoader(ZairaBase):
+    def __init__(self):
+        ZairaBase.__init__(self)
+        self.path = self.get_output_dir()
+
+    def open(self, eos_id):
+        path = os.path.join(self.path, DESCRIPTORS_SUBFOLDER, eos_id, EOSCE_FILE_NAME)
+        return Hdf5(path)
+
+
+class EosceDescriptors(ZairaBase):
+    def __init__(self):
+        ZairaBase.__init__(self)
+        self.path = self.get_output_dir()
+        self.input_csv = os.path.join(self.path, DATA_SUBFOLDER, DATA_FILENAME)
+        self.smiles_list = self._get_smiles_list()
+
+    def _get_smiles_list(self):
+        df = pd.read_csv(self.input_csv)
+        return list(df[SMILES_COLUMN])
+
+    def output_h5_filename(self):
+        path = os.path.join(self.path, DESCRIPTORS_SUBFOLDER)
+        os.makedirs(path, exist_ok=True)
+        return os.path.join(path, EOSCE_FILE_NAME)
+
+    def run(self):
+        output_h5 = self.output_h5_filename()
+        ref = EosceEmbedder()
+        ref.calculate(self.smiles_list, output_h5)
diff --git a/zairachem/estimators/from_ersilia_embedding/__init__.py b/zairachem/estimators/from_ersilia_embedding/__init__.py
new file mode 100644
index 00000000..1d7b9c64
--- /dev/null
+++ b/zairachem/estimators/from_ersilia_embedding/__init__.py
@@ -0,0 +1 @@
+ESTIMATORS_FAMILY_SUBFOLDER = "ersilia_embedding"
diff --git a/zairachem/estimators/from_ersilia_embedding/estimate.py b/zairachem/estimators/from_ersilia_embedding/estimate.py
new file mode 100644
index 00000000..be6c099e
--- /dev/null
+++ b/zairachem/estimators/from_ersilia_embedding/estimate.py
@@ -0,0 +1,183 @@
+import os
+import numpy as np
+import pandas as pd
+import h5py
+
+from ... import ZairaBase
+from ..base import BaseEstimator, BaseOutcomeAssembler
+from ...automl.kerastuner import KerasTunerEstimator
+from ...vars import (
+    DATA_FILENAME,
+    DATA_SUBFOLDER,
+    DESCRIPTORS_SUBFOLDER,
+    ESTIMATORS_SUBFOLDER,
+)
+from . import ESTIMATORS_FAMILY_SUBFOLDER
+from .. import RESULTS_MAPPED_FILENAME, RESULTS_UNMAPPED_FILENAME
+
+
+class XGetter(ZairaBase):
+    def __init__(self, path):
+        ZairaBase.__init__(self)
+        self.path = path
+        self.X = []
+        self.columns = []
+
+    def _get_eosce_descriptor(self):
+        with h5py.File(
+            os.path.join(self.path, DESCRIPTORS_SUBFOLDER, "eosce.h5"), "r"
+        ) as f:
+            X_ = f["Values"][:]
+            self.X += [X_]
+            self.columns += [
+                "feat-{0}".format(x.decode("utf-8")) for x in f["Features"][:]
+            ]
+
+    def get(self):
+        self._get_eosce_descriptor()
+        X = np.hstack(self.X)
+        df = pd.DataFrame(X, columns=self.columns)
+        df.to_csv(
+            os.path.join(
+                self.path,
+                ESTIMATORS_SUBFOLDER,
+                ESTIMATORS_FAMILY_SUBFOLDER,
+                DATA_FILENAME,
+            ),
+            index=False,
+        )
+        return df
+
+
+class Fitter(BaseEstimator):
+    def __init__(self, path):
+        BaseEstimator.__init__(self, path=path)
+        self.trained_path = os.path.join(
+            self.get_output_dir(), ESTIMATORS_SUBFOLDER, ESTIMATORS_FAMILY_SUBFOLDER
+        )
+        self.x_getter = XGetter
+
+    def _get_X(self):
+        df = self.x_getter(path=self.path).get()
+        return df
+
+    def _get_y(self, task):
+        df = pd.read_csv(os.path.join(self.path, DATA_SUBFOLDER, DATA_FILENAME))
+        return np.array(df[task])
+
+    def _get_Y(self):
+        Y = []
+        columns = []
+        for t in self._get_reg_tasks():
+            y = self._get_y(t)
+            Y += [y]
+            columns += [t]
+        for t in self._get_clf_tasks():
+            y = self._get_y(t)
+            Y += [y]
+            columns += [t]
+        Y = np.array(Y).T
+        df = pd.DataFrame(Y, columns=columns)
+        return df
+
+    def run(self, time_budget_sec=None):
+        self.reset_time()
+        if time_budget_sec is None:
+            time_budget_sec = self._estimate_time_budget()
+        else:
+            time_budget_sec = time_budget_sec
+        train_idxs = self.get_train_indices(path=self.path)
+        df_X = self._get_X()
+        df_Y = self._get_Y()
+        df = pd.concat([df_X, df_Y], axis=1)
+        labels = list(df_Y.columns)
+        self.logger.debug("Starting KerasTuner estimation")
+        estimator = KerasTunerEstimator(save_path=self.trained_path)
+        self.logger.debug("Fitting")
+        estimator.fit(data=df.iloc[train_idxs, :], labels=labels)
+        estimator.save()
+        estimator = estimator.load()
+        results = estimator.run(df)
+        self.update_elapsed_time()
+        return results
+
+
+class Predictor(BaseEstimator):
+    def __init__(self, path):
+        BaseEstimator.__init__(self, path=path)
+        self.trained_path = os.path.join(
+            self.get_trained_dir(), ESTIMATORS_SUBFOLDER, ESTIMATORS_FAMILY_SUBFOLDER
+        )
+        self.x_getter = XGetter
+
+    def run(self):
+        self.reset_time()
+        df = self.x_getter(path=self.path).get()
+        model = KerasTunerEstimator(save_path=self.trained_path).load()
+        results = model.run(df)
+        self.update_elapsed_time()
+        return results
+
+
+class Assembler(BaseOutcomeAssembler):
+    def __init__(self, path=None):
+        BaseOutcomeAssembler.__init__(self, path=path)
+
+    def run(self, df):
+        df_c = self._get_compounds()
+        df_y = df
+        df = pd.concat([df_c, df_y], axis=1)
+        df.to_csv(
+            os.path.join(
+                self.path,
+                ESTIMATORS_SUBFOLDER,
+                ESTIMATORS_FAMILY_SUBFOLDER,
+                RESULTS_UNMAPPED_FILENAME,
+            ),
+            index=False,
+        )
+        mappings = self._get_mappings()
+        df = self._remap(df, mappings)
+        df.to_csv(
+            os.path.join(
+                self.path,
+                ESTIMATORS_SUBFOLDER,
+                ESTIMATORS_FAMILY_SUBFOLDER,
+                RESULTS_MAPPED_FILENAME,
+            ),
+            index=False,
+        )
+
+
+class Estimator(ZairaBase):
+    def __init__(self, path=None):
+        ZairaBase.__init__(self)
+        if path is None:
+            self.path = self.get_output_dir()
+        else:
+            self.path = path
+        path_ = os.path.join(
+            self.path, ESTIMATORS_SUBFOLDER, ESTIMATORS_FAMILY_SUBFOLDER
+        )
+        if not os.path.exists(path_):
+            os.makedirs(path_, exist_ok=True)
+        if not self.is_predict():
+            self.logger.debug("Starting kerastuner fitter")
+            self.estimator = Fitter(path=self.path)
+        else:
+            self.logger.debug("Starting kerastuner predictor")
+            self.estimator = Predictor(path=self.path)
+        self.assembler = Assembler(path=self.path)
+
+    def run(self, time_budget_sec=None):
+        if time_budget_sec is not None:
+            self.time_budget_sec = int(time_budget_sec)
+        else:
+            self.time_budget_sec = None
+        if not self.is_predict():
+            self.logger.debug("Mode: fit")
+            results = self.estimator.run()
+        else:
+            self.logger.debug("Mode: predict")
+            results = self.estimator.run()
+        self.assembler.run(results)
diff --git a/zairachem/estimators/from_ersilia_embedding/pipe.py b/zairachem/estimators/from_ersilia_embedding/pipe.py
new file mode 100644
index 00000000..89adfda8
--- /dev/null
+++ b/zairachem/estimators/from_ersilia_embedding/pipe.py
@@ -0,0 +1,9 @@
+from .estimate import Estimator
+
+
+class EosceEmbeddingPipeline(object):
+    def __init__(self, path):
+        self.e = Estimator(path=path)
+
+    def run(self, time_budget_sec=None):
+        self.e.run(time_budget_sec=time_budget_sec)
diff --git a/zairachem/estimators/from_individual_full_descriptors/assemble.py b/zairachem/estimators/from_individual_full_descriptors/assemble.py
index 5b2b7cbc..ba4acda1 100644
--- a/zairachem/estimators/from_individual_full_descriptors/assemble.py
+++ b/zairachem/estimators/from_individual_full_descriptors/assemble.py
@@ -80,7 +80,7 @@ def _get_model_ids(self):
             os.path.join(path_trained, DESCRIPTORS_SUBFOLDER, "done_eos.json"), "r"
         ) as f:
             model_ids = list(json.load(f))
-        model_ids_successfull = []
+        model_ids_successful = []
         for model_id in model_ids:
             if os.path.isfile(
                 os.path.join(
@@ -91,8 +91,8 @@ def _get_model_ids(self):
                     "y_hat.joblib",
                 )
             ):
-                model_ids_successfull += [model_id]
-        return model_ids_successfull
+                model_ids_successful += [model_id]
+        return model_ids_successful
 
     def run(self):
         model_ids = self._get_model_ids()
diff --git a/zairachem/estimators/from_individual_full_descriptors/estimate.py b/zairachem/estimators/from_individual_full_descriptors/estimate.py
index a047383e..e96906a6 100644
--- a/zairachem/estimators/from_individual_full_descriptors/estimate.py
+++ b/zairachem/estimators/from_individual_full_descriptors/estimate.py
@@ -244,13 +244,13 @@ def _get_model_ids(self):
             os.path.join(path_trained, DESCRIPTORS_SUBFOLDER, "done_eos.json"), "r"
         ) as f:
             model_ids = list(json.load(f))
-        model_ids_successfull = []
+        model_ids_successful = []
         for model_id in model_ids:
             if os.path.isfile(
                 os.path.join(path, DESCRIPTORS_SUBFOLDER, model_id, "treated.h5")
             ):
-                model_ids_successfull += [model_id]
-        return model_ids_successfull
+                model_ids_successful += [model_id]
+        return model_ids_successful
 
     def run(self, time_budget_sec=None):
         model_ids = self._get_model_ids()
diff --git a/zairachem/estimators/from_individual_full_descriptors/performance.py b/zairachem/estimators/from_individual_full_descriptors/performance.py
index 4bcd91b5..d067456f 100644
--- a/zairachem/estimators/from_individual_full_descriptors/performance.py
+++ b/zairachem/estimators/from_individual_full_descriptors/performance.py
@@ -212,7 +212,7 @@ def _get_model_ids(self):
             os.path.join(path_trained, DESCRIPTORS_SUBFOLDER, "done_eos.json"), "r"
         ) as f:
             model_ids = list(json.load(f))
-        model_ids_successfull = []
+        model_ids_successful = []
         for model_id in model_ids:
             if os.path.isfile(
                 os.path.join(
@@ -223,8 +223,8 @@ def _get_model_ids(self):
                     "y_hat.joblib",
                 )
             ):
-                model_ids_successfull += [model_id]
-        return model_ids_successfull
+                model_ids_successful += [model_id]
+        return model_ids_successful
 
     def run(self):
         model_ids = self._get_model_ids()
diff --git a/zairachem/estimators/from_individual_full_descriptors_tabpfn/__init__.py b/zairachem/estimators/from_individual_full_descriptors_tabpfn/__init__.py
new file mode 100644
index 00000000..856581cd
--- /dev/null
+++ b/zairachem/estimators/from_individual_full_descriptors_tabpfn/__init__.py
@@ -0,0 +1 @@
+ESTIMATORS_FAMILY_SUBFOLDER = "individual_full_descriptors_tabpfn"
diff --git a/zairachem/estimators/from_individual_full_descriptors_tabpfn/assemble.py b/zairachem/estimators/from_individual_full_descriptors_tabpfn/assemble.py
new file mode 100644
index 00000000..ba4acda1
--- /dev/null
+++ b/zairachem/estimators/from_individual_full_descriptors_tabpfn/assemble.py
@@ -0,0 +1,101 @@
+import pandas as pd
+import json
+import os
+import joblib
+import collections
+
+from . import ESTIMATORS_FAMILY_SUBFOLDER
+from ... import ZairaBase
+from ...vars import DESCRIPTORS_SUBFOLDER, ESTIMATORS_SUBFOLDER
+from .. import Y_HAT_FILE, RESULTS_UNMAPPED_FILENAME, RESULTS_MAPPED_FILENAME
+from ..base import BaseOutcomeAssembler
+
+
+class IndividualOutcomeAssembler(BaseOutcomeAssembler):
+    def __init__(self, path=None, model_id=None):
+        BaseOutcomeAssembler.__init__(self, path=path)
+        self.model_id = model_id
+
+    def _get_y_hat(self):
+        results = joblib.load(
+            os.path.join(
+                self.path,
+                ESTIMATORS_SUBFOLDER,
+                ESTIMATORS_FAMILY_SUBFOLDER,
+                self.model_id,
+                Y_HAT_FILE,
+            )
+        )
+        data = collections.OrderedDict()
+        for c, r in results.items():
+            r = r["main"]
+            data[c] = r["y_hat"]
+            if "b_hat" in r:
+                data[c + "_bin"] = r["b_hat"]
+        return pd.DataFrame(data)
+
+    def run(self):
+        df_c = self._get_compounds()
+        df_y = self._get_y_hat()
+        df = pd.concat([df_c, df_y], axis=1)
+        df.to_csv(
+            os.path.join(
+                self.path,
+                ESTIMATORS_SUBFOLDER,
+                ESTIMATORS_FAMILY_SUBFOLDER,
+                self.model_id,
+                RESULTS_UNMAPPED_FILENAME,
+            ),
+            index=False,
+        )
+        mappings = self._get_mappings()
+        df = self._remap(df, mappings)
+        df.to_csv(
+            os.path.join(
+                self.path,
+                ESTIMATORS_SUBFOLDER,
+                ESTIMATORS_FAMILY_SUBFOLDER,
+                self.model_id,
+                RESULTS_MAPPED_FILENAME,
+            ),
+            index=False,
+        )
+
+
+class OutcomeAssembler(ZairaBase):
+    def __init__(self, path=None):
+        ZairaBase.__init__(self)
+        self.path = path
+
+    def _get_model_ids(self):
+        if self.path is None:
+            path = self.get_output_dir()
+        else:
+            path = self.path
+        if self.is_predict():
+            path_trained = self.get_trained_dir()
+        else:
+            path_trained = path
+        with open(
+            os.path.join(path_trained, DESCRIPTORS_SUBFOLDER, "done_eos.json"), "r"
+        ) as f:
+            model_ids = list(json.load(f))
+        model_ids_successful = []
+        for model_id in model_ids:
+            if os.path.isfile(
+                os.path.join(
+                    path,
+                    ESTIMATORS_SUBFOLDER,
+                    ESTIMATORS_FAMILY_SUBFOLDER,
+                    model_id,
+                    "y_hat.joblib",
+                )
+            ):
+                model_ids_successful += [model_id]
+        return model_ids_successful
+
+    def run(self):
+        model_ids = self._get_model_ids()
+        for model_id in model_ids:
+            o = IndividualOutcomeAssembler(path=self.path, model_id=model_id)
+            o.run()
diff --git a/zairachem/estimators/from_individual_full_descriptors_tabpfn/estimate.py b/zairachem/estimators/from_individual_full_descriptors_tabpfn/estimate.py
new file mode 100644
index 00000000..6f9c9f4f
--- /dev/null
+++ b/zairachem/estimators/from_individual_full_descriptors_tabpfn/estimate.py
@@ -0,0 +1,185 @@
+import os
+import json
+import h5py
+import pandas as pd
+import numpy as np
+import collections
+import joblib
+
+from ...descriptors.treated import TREATED_FILE_NAME
+
+from ... import ZairaBase
+from ...automl.binarytabpfn import TabPFNBinaryClassifier
+
+from ...vars import (
+    DESCRIPTORS_SUBFOLDER,
+    DATA_SUBFOLDER,
+    DATA_FILENAME,
+    ESTIMATORS_SUBFOLDER,
+)
+from ..base import BaseEstimator
+
+from .. import Y_HAT_FILE
+from . import ESTIMATORS_FAMILY_SUBFOLDER
+
+
+class BaseEstimatorIndividual(BaseEstimator):
+    def __init__(self, path, model_id):
+        BaseEstimator.__init__(self, path=path)
+        path_ = os.path.join(
+            self.path, ESTIMATORS_SUBFOLDER, ESTIMATORS_FAMILY_SUBFOLDER, model_id
+        )
+        if not os.path.exists(path_):
+            os.makedirs(path_)
+        self.model_id = model_id
+
+    def _get_X(self):
+        f = os.path.join(
+            self.path, DESCRIPTORS_SUBFOLDER, self.model_id, TREATED_FILE_NAME
+        )
+        with h5py.File(f, "r") as f:
+            X = f["Values"][:]
+        return X
+
+
+class Fitter(BaseEstimatorIndividual):
+    def __init__(self, path, model_id):
+        BaseEstimatorIndividual.__init__(self, path=path, model_id=model_id)
+        self.trained_path = os.path.join(
+            self.get_output_dir(), ESTIMATORS_SUBFOLDER, ESTIMATORS_FAMILY_SUBFOLDER
+        )
+
+    def _get_flds(self):
+        # for now only auxiliary folds are used
+        col = [f for f in self.schema["folds"] if "_aux" in f][0]
+        df = pd.read_csv(os.path.join(self.path, DATA_SUBFOLDER, DATA_FILENAME))
+        return np.array(df[col])
+
+    def _get_y(self, task):
+        # for now iterate task by task
+        df = pd.read_csv(os.path.join(self.path, DATA_SUBFOLDER, DATA_FILENAME))
+        return np.array(df[task])
+
+    def run(self, time_budget_sec=60):
+        self.reset_time()
+        if time_budget_sec is None:
+            time_budget_sec = self._estimate_time_budget()
+        else:
+            time_budget_sec = time_budget_sec
+        tasks = collections.OrderedDict()
+        X = self._get_X()
+        train_idxs = self.get_train_indices(path=self.path)
+        valid_idxs = self.get_validation_indices(path=self.path)
+        for t in self._get_clf_tasks():
+            y = self._get_y(t)
+            model = TabPFNBinaryClassifier()
+            model.fit(X[train_idxs], y[train_idxs])
+            file_name = os.path.join(self.trained_path, self.model_id, t + ".joblib")
+            model.save(file_name)
+            model = model.load(file_name)
+            tasks[t] = model.run(X, y)
+            _valid_task = model.run(X[valid_idxs], y[valid_idxs])
+            tasks[t]["valid"] = _valid_task["main"]
+        self.update_elapsed_time()
+        return tasks
+
+
+class Predictor(BaseEstimatorIndividual):
+    def __init__(self, path, model_id):
+        BaseEstimatorIndividual.__init__(self, path=path, model_id=model_id)
+        self.trained_path = os.path.join(
+            self.get_trained_dir(), ESTIMATORS_SUBFOLDER, ESTIMATORS_FAMILY_SUBFOLDER
+        )
+
+    def _get_y(self, task):
+        # for now iterate task by task
+        df = pd.read_csv(os.path.join(self.path, DATA_SUBFOLDER, DATA_FILENAME))
+        columns = set(df.columns)
+        if task in columns:
+            return np.array(df[task])
+        else:
+            return None
+
+    def run(self):
+        self.reset_time()
+        tasks = collections.OrderedDict()
+        X = self._get_X()
+        for t in self._get_clf_tasks():
+            y = self._get_y(t)
+            model = TabPFNBinaryClassifier()
+            file_name = os.path.join(self.trained_path, self.model_id, t + ".joblib")
+            model = model.load(file_name)
+            tasks[t] = model.run(X, y)
+        self.update_elapsed_time()
+        return tasks
+
+
+class IndividualEstimator(ZairaBase):
+    def __init__(self, path=None, model_id=None):
+        ZairaBase.__init__(self)
+        self.model_id = model_id
+        if path is None:
+            self.path = self.get_output_dir()
+        else:
+            self.path = path
+        if not self.is_predict():
+            self.estimator = Fitter(path=self.path, model_id=self.model_id)
+        else:
+            self.estimator = Predictor(path=self.path, model_id=self.model_id)
+
+    def run(self, time_budget_sec=None):
+        if time_budget_sec is not None:
+            self.time_budget_sec = int(time_budget_sec)
+        else:
+            self.time_budget_sec = None
+        if not self.is_predict():
+            results = self.estimator.run(time_budget_sec=self.time_budget_sec)
+        else:
+            results = self.estimator.run()
+        joblib.dump(
+            results,
+            os.path.join(
+                self.path,
+                ESTIMATORS_SUBFOLDER,
+                ESTIMATORS_FAMILY_SUBFOLDER,
+                self.model_id,
+                Y_HAT_FILE,
+            ),
+        )
+
+
+class Estimator(ZairaBase):
+    def __init__(self, path=None):
+        ZairaBase.__init__(self)
+        self.path = path
+
+    def _get_model_ids(self):
+        if self.path is None:
+            path = self.get_output_dir()
+        else:
+            path = self.path
+        if self.is_predict():
+            path_trained = self.get_trained_dir()
+        else:
+            path_trained = path
+        with open(
+            os.path.join(path_trained, DESCRIPTORS_SUBFOLDER, "done_eos.json"), "r"
+        ) as f:
+            model_ids = list(json.load(f))
+        model_ids_successful = []
+        for model_id in model_ids:
+            if os.path.isfile(
+                os.path.join(path, DESCRIPTORS_SUBFOLDER, model_id, "treated.h5")
+            ):
+                model_ids_successful += [model_id]
+        return model_ids_successful
+
+    def run(self, time_budget_sec=None):
+        model_ids = self._get_model_ids()
+        if time_budget_sec is not None:
+            tbs = max(int(time_budget_sec / len(model_ids)), 1)
+        else:
+            tbs = None
+        for model_id in model_ids:
+            estimator = IndividualEstimator(path=self.path, model_id=model_id)
+            estimator.run(time_budget_sec=tbs)
diff --git a/zairachem/estimators/from_individual_full_descriptors_tabpfn/performance.py b/zairachem/estimators/from_individual_full_descriptors_tabpfn/performance.py
new file mode 100644
index 00000000..d067456f
--- /dev/null
+++ b/zairachem/estimators/from_individual_full_descriptors_tabpfn/performance.py
@@ -0,0 +1,233 @@
+import os
+import json
+import numpy as np
+import pandas as pd
+import joblib
+import collections
+
+from sklearn import metrics
+
+from .. import Y_HAT_FILE
+from ... import ZairaBase
+
+from . import ESTIMATORS_FAMILY_SUBFOLDER
+from ...vars import (
+    DATA_SUBFOLDER,
+    DESCRIPTORS_SUBFOLDER,
+    ESTIMATORS_SUBFOLDER,
+    DATA_FILENAME,
+)
+
+from .. import CLF_REPORT_FILENAME, REG_REPORT_FILENAME
+
+
+class BasePerformance(ZairaBase):
+    def __init__(self, path=None, model_id=None):
+        ZairaBase.__init__(self)
+        if path is None:
+            self.path = self.get_output_dir()
+        else:
+            self.path = path
+        self.model_id = model_id
+
+    def _get_y_hat_dict(self):
+        return joblib.load(
+            os.path.join(
+                self.path,
+                ESTIMATORS_SUBFOLDER,
+                ESTIMATORS_FAMILY_SUBFOLDER,
+                self.model_id,
+                Y_HAT_FILE,
+            )
+        )
+
+
+class ClassificationPerformance(BasePerformance):
+    def __init__(self, path, model_id):
+        BasePerformance.__init__(self, path=path, model_id=model_id)
+        self.results = self._get_y_hat_dict()
+        self._prefix = self._get_prefix()
+        self.results = self.results[self._prefix]
+
+    def _get_prefix(self):
+        for c in list(self.results.keys()):
+            if "clf_" in c:
+                return c
+
+    def _try_metric(self, fun, t, p):
+        try:
+            return float(fun(t, p))
+        except:
+            return None
+
+    def _calculate(self, key):
+        r = self.results[key]
+        y_true = np.array(r["y"])
+        y_pred = np.array(r["y_hat"])
+        b_pred = np.array(r["b_hat"])
+        try:
+            confu = metrics.confusion_matrix(y_true, b_pred, labels=[0, 1])
+        except:
+            confu = np.array([[-1, -1], [-1, -1]])
+        report = {
+            "roc_auc_score": self._try_metric(metrics.roc_auc_score, y_true, y_pred),
+            "precision_score": self._try_metric(
+                metrics.precision_score, y_true, b_pred
+            ),
+            "recall_score": self._try_metric(metrics.recall_score, y_true, b_pred),
+            "tp": int(confu[1, 1]),
+            "tn": int(confu[0, 0]),
+            "fp": int(confu[0, 1]),
+            "fn": int(confu[1, 0]),
+            "y_true": [int(y) for y in y_true],
+            "y_pred": [float(y) for y in y_pred],
+            "b_pred": [int(y) for y in b_pred],
+        }
+        return report
+
+    def calculate(self):
+        report = collections.OrderedDict()
+        for k in self.results.keys():
+            report[k] = self._calculate(k)
+        return report
+
+
+class RegressionPerformance(BasePerformance):
+    def __init__(self, path, model_id):
+        BasePerformance.__init__(self, path=path, model_id=model_id)
+        self.results = self._get_y_hat_dict()
+        self._prefix = self._get_prefix()
+        self.results = self.results[self._prefix]
+
+    def _get_prefix(self):
+        for c in list(self.results.keys()):
+            if "reg_" in c:
+                return c
+
+    def _calculate(self, key):
+        r = self.results[key]
+        y_true = np.array(r["y"])
+        y_pred = np.array(r["y_hat"])
+        report = {
+            "r2_score": float(metrics.r2_score(y_true, y_pred)),
+            "mean_absolute_error": float(metrics.mean_absolute_error(y_true, y_pred)),
+            "mean_squared_error": float(metrics.mean_squared_error(y_true, y_pred)),
+            "y_true": [float(y) for y in y_true],
+            "y_pred": [float(y) for y in y_pred],
+        }
+        return report
+
+    def calculate(self):
+        report = collections.OrderedDict()
+        for k in self.results.keys():
+            report[k] = self._calculate(k)
+        return report
+
+
+class IndividualPerformanceReporter(ZairaBase):
+    def __init__(self, path=None, model_id=None):
+        ZairaBase.__init__(self)
+        if path is None:
+            self.path = self.get_output_dir()
+        else:
+            self.path = path
+        self.has_tasks = self._has_tasks()
+        if self._has_clf_tasks():
+            self.clf = ClassificationPerformance(path=path, model_id=model_id)
+        else:
+            self.clf = None
+        if self._has_reg_tasks():
+            self.reg = RegressionPerformance(path=path, model_id=model_id)
+        else:
+            self.reg = None
+        self.model_id = model_id
+
+    def _has_tasks(self):
+        df = pd.read_csv(os.path.join(self.path, DATA_SUBFOLDER, DATA_FILENAME))
+        for c in list(df.columns):
+            if "clf_" in c or "reg_" in c:
+                return True
+        return False
+
+    def _has_reg_tasks(self):
+        df = pd.read_csv(os.path.join(self.path, DATA_SUBFOLDER, DATA_FILENAME))
+        for c in list(df.columns):
+            if "reg_" in c and "_skip" not in c and "_aux" not in c:
+                return True
+        return False
+
+    def _has_clf_tasks(self):
+        df = pd.read_csv(os.path.join(self.path, DATA_SUBFOLDER, DATA_FILENAME))
+        for c in list(df.columns):
+            if "clf_" in c and "_skip" not in c and "_aux" not in c:
+                return True
+        return False
+
+    def run(self):
+        if not self.has_tasks:
+            return
+        if self.clf is not None:
+            clf_rep = self.clf.calculate()
+            with open(
+                os.path.join(
+                    self.path,
+                    ESTIMATORS_SUBFOLDER,
+                    ESTIMATORS_FAMILY_SUBFOLDER,
+                    self.model_id,
+                    CLF_REPORT_FILENAME,
+                ),
+                "w",
+            ) as f:
+                json.dump(clf_rep, f, indent=4)
+        if self.reg is not None:
+            reg_rep = self.reg.calculate()
+            with open(
+                os.path.join(
+                    self.path,
+                    ESTIMATORS_SUBFOLDER,
+                    ESTIMATORS_FAMILY_SUBFOLDER,
+                    self.model_id,
+                    REG_REPORT_FILENAME,
+                ),
+                "w",
+            ) as f:
+                json.dump(reg_rep, f, indent=4)
+
+
+class PerformanceReporter(ZairaBase):
+    def __init__(self, path=None):
+        ZairaBase.__init__(self)
+        self.path = path
+
+    def _get_model_ids(self):
+        if self.path is None:
+            path = self.get_output_dir()
+        else:
+            path = self.path
+        if self.is_predict():
+            path_trained = self.get_trained_dir()
+        else:
+            path_trained = path
+        with open(
+            os.path.join(path_trained, DESCRIPTORS_SUBFOLDER, "done_eos.json"), "r"
+        ) as f:
+            model_ids = list(json.load(f))
+        model_ids_successful = []
+        for model_id in model_ids:
+            if os.path.isfile(
+                os.path.join(
+                    path,
+                    ESTIMATORS_SUBFOLDER,
+                    ESTIMATORS_FAMILY_SUBFOLDER,
+                    model_id,
+                    "y_hat.joblib",
+                )
+            ):
+                model_ids_successful += [model_id]
+        return model_ids_successful
+
+    def run(self):
+        model_ids = self._get_model_ids()
+        for model_id in model_ids:
+            p = IndividualPerformanceReporter(path=self.path, model_id=model_id)
+            p.run()
diff --git a/zairachem/estimators/from_individual_full_descriptors_tabpfn/pipe.py b/zairachem/estimators/from_individual_full_descriptors_tabpfn/pipe.py
new file mode 100644
index 00000000..8e397fbf
--- /dev/null
+++ b/zairachem/estimators/from_individual_full_descriptors_tabpfn/pipe.py
@@ -0,0 +1,15 @@
+from .estimate import Estimator
+from .assemble import OutcomeAssembler
+from .performance import PerformanceReporter
+
+
+class IndividualFullDescriptorTabPFNPipeline(object):
+    def __init__(self, path):
+        self.e = Estimator(path=path)
+        self.a = OutcomeAssembler(path=path)
+        self.p = PerformanceReporter(path=path)
+
+    def run(self, time_budget_sec=None):
+        self.e.run(time_budget_sec=time_budget_sec)
+        self.a.run()
+        self.p.run()
diff --git a/zairachem/estimators/pipe.py b/zairachem/estimators/pipe.py
index dddd61f6..fe658a71 100644
--- a/zairachem/estimators/pipe.py
+++ b/zairachem/estimators/pipe.py
@@ -7,8 +7,12 @@
 from .from_classic.pipe import ClassicPipeline
 from .from_fingerprint.pipe import FingerprintPipeline
 from .from_individual_full_descriptors.pipe import IndividualFullDescriptorPipeline
+from .from_individual_full_descriptors_tabpfn.pipe import (
+    IndividualFullDescriptorTabPFNPipeline,
+)
 from .from_manifolds.pipe import ManifoldPipeline
 from .from_reference_embedding.pipe import ReferenceEmbeddingPipeline
+from .from_ersilia_embedding.pipe import EosceEmbeddingPipeline
 from .from_molmap.pipe import MolMapPipeline
 from .evaluate import SimpleEvaluator
 
@@ -81,6 +85,19 @@ def _individual_estimator_pipeline(self, time_budget_sec):
             p.run(time_budget_sec=time_budget_sec)
             step.update()
 
+    def _individual_estimator_tabpfn_pipeline(self, time_budget_sec):
+        if self.is_lazy():
+            self.logger.info("Lazy mode skips individual descriptors with tabpfn")
+            return
+        if "tabpfn-individual-descriptors" not in self._estimators_to_use:
+            return
+        step = PipelineStep("individual_estimator_pipeline_tabpfn", self.output_dir)
+        if not step.is_done():
+            self.logger.debug("Running individual estimator pipeline")
+            p = IndividualFullDescriptorTabPFNPipeline(path=self.path)
+            p.run(time_budget_sec=time_budget_sec)
+            step.update()
+
     def _manifolds_pipeline(self, time_budget_sec):
         if self.is_lazy():
             self.logger.info("Lazy mode skips manifolds")
@@ -107,6 +124,16 @@ def _reference_pipeline(self, time_budget_sec):
             p.run(time_budget_sec=time_budget_sec)
             step.update()
 
+    def _eosce_pipeline(self, time_budget_sec):
+        if "kerastuner-eosce-embedding" not in self._estimators_to_use:
+            return
+        step = PipelineStep("eosce_pipeline", self.output_dir)
+        if not step.is_done():
+            self.logger.debug("Ersilia compound embedding pipeline")
+            p = EosceEmbeddingPipeline(path=self.path)
+            p.run(time_budget_sec=time_budget_sec)
+            step.update()
+
     def _molmap_pipeline(self, time_budget_sec):
         if self.is_lazy():
             self.logger.info("Lazy mode skips molmap")
@@ -133,8 +160,10 @@ def _simple_evaluation(self):
     def run(self, time_budget_sec=None):
         self._classic_estimator_pipeline(time_budget_sec)
         self._fingerprint_estimator_pipeline(time_budget_sec)
+        self._individual_estimator_tabpfn_pipeline(time_budget_sec)
         self._individual_estimator_pipeline(time_budget_sec)
         self._manifolds_pipeline(time_budget_sec)
         self._reference_pipeline(time_budget_sec)
+        self._eosce_pipeline(time_budget_sec)
         self._molmap_pipeline(time_budget_sec)
         self._simple_evaluation()
diff --git a/zairachem/pool/bagger.py b/zairachem/pool/bagger.py
index e1a3e7c9..535c33d0 100644
--- a/zairachem/pool/bagger.py
+++ b/zairachem/pool/bagger.py
@@ -5,9 +5,10 @@
 import joblib
 import h5py
 import collections
+from scipy.special import expit
 from sklearn.linear_model import LogisticRegressionCV, LinearRegression
 from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
-from sklearn.preprocessing import RobustScaler
+from sklearn.preprocessing import RobustScaler, PowerTransformer
 from sklearn.metrics import roc_curve, auc, r2_score
 
 from .. import ZairaBase
@@ -155,8 +156,8 @@ def save(self, filename):
 
 
 class PoolClassifier(object):
-    def __init__(self, path, mode="scaling"):
-        assert mode in ["scaling", "median", "model"]
+    def __init__(self, path, mode="weighting"):
+        assert mode in ["weighting", "median", "model"]
         self.path = path
         if not os.path.exists(self.path):
             os.makedirs(self.path, exist_ok=True)
@@ -168,7 +169,8 @@ def _get_model_filename(self, n):
     def _fit_just_median(self, df_X, df_y):
         return np.median(np.array(df_X), axis=1)
 
-    def _fit_scaling(self, df_X, df_y):
+    def _fit_weighting(self, df_X, df_y):
+        y = np.array(df_y).ravel()
         cols = list(df_X.columns)
         X = np.array(df_X)
         p25 = np.percentile(X.ravel(), 25)
@@ -177,10 +179,13 @@ def _fit_scaling(self, df_X, df_y):
         scale = (p25, p50, p75)
         for c in cols:
             X = np.array(df_X[c]).reshape(-1, 1)
-            mdl = RobustScaler()
-            mdl.fit(X)
+            mdl0 = PowerTransformer()
+            mdl0.fit(X)
+            X = mdl0.transform(X)
+            mdl1 = LogisticRegressionCV()
+            mdl1.fit(X, y)
             filename = self._get_model_filename(c)
-            joblib.dump(mdl, filename)
+            joblib.dump((mdl0, mdl1), filename)
         filename = self._get_model_filename("overall")
         joblib.dump(scale, filename)
         filename = self._get_model_filename("weighting")
@@ -188,7 +193,7 @@ def _fit_scaling(self, df_X, df_y):
         ws.distance_to_leads()
         ws.importance()
         ws.save(filename)
-        return self._predict_scaling(df_X)
+        return self._predict_weighting(df_X)
 
     def _fit_model(self, df_X, df_y):
         y = np.array(df_y).ravel()
@@ -204,22 +209,19 @@ def _fit_model(self, df_X, df_y):
     def _predict_just_median(self, df_X):
         return np.median(np.array(df_X), axis=1)
 
-    def _predict_scaling(self, df_X):
+    def _predict_weighting(self, df_X):
         cols = list(df_X.columns)
         Y_hat = []
         for c in cols:
             filename = self._get_model_filename(c)
             if os.path.exists(filename):
-                mdl = joblib.load(filename)
+                mdl0, mdl1 = joblib.load(filename)
                 X = np.array(df_X[c]).reshape(-1, 1)
-                y_hat = mdl.transform(X).ravel()
+                X = mdl0.transform(X)
+                y_hat = mdl1.predict_proba(X)[:,1]
                 Y_hat += [y_hat]
         Y_hat = np.array(Y_hat).T
         filename = self._get_model_filename("overall")
-        scale = joblib.load(filename)
-        iqr = scale[-1] - scale[0]
-        med = scale[1]
-        Y_hat = Y_hat * iqr + med
         filename = self._get_model_filename("weighting")
         weights = joblib.load(filename)
         wvals = weights["weights"]
@@ -230,7 +232,7 @@ def _predict_scaling(self, df_X):
                 w = w + 1
             y_hats += [np.average(Y_hat, axis=1, weights=w)]
         y_hats += [np.mean(Y_hat, axis=1)]
-        y_hats = np.clip(np.array(y_hats), 0, 1)
+        y_hats = np.array(y_hats)
         y_hat = np.mean(y_hats, axis=0)
         return y_hat
 
@@ -248,16 +250,16 @@ def _predict_model(self, df_X):
         return np.median(Y_hat, axis=1)
 
     def fit(self, df_X, df_y):
-        if self.mode == "scaling":
-            return self._fit_scaling(df_X, df_y)
+        if self.mode == "weighting":
+            return self._fit_weighting(df_X, df_y)
         if self.mode == "median":
             return self._fit_just_median(df_X, df_y)
         if self.mdoe == "model":
             return self._fit_model(df_X, df_y)
 
     def predict(self, df_X):
-        if self.mode == "scaling":
-            return self._predict_scaling(df_X)
+        if self.mode == "weighting":
+            return self._predict_weighting(df_X)
         if self.mode == "median":
             return self._predict_just_median(df_X)
         if self.mode == "model":
diff --git a/zairachem/vars.py b/zairachem/vars.py
index b1039c66..fc229d10 100644
--- a/zairachem/vars.py
+++ b/zairachem/vars.py
@@ -41,18 +41,22 @@
 # Ersilia Model Hub
 
 ERSILIA_HUB_DEFAULT_MODELS = [
-    "morgan-counts",
+    # "morgan-counts",
     "cc-signaturizer",
+    "molfeat-chemgpt",
+    "rdkit-fingerprint",
     "grover-embedding",
     "mordred",
-]  # molbert was removed
+]
 
 DEFAULT_ESTIMATORS = [
     "baseline-classic",
     "baseline-fingerprint",
     "flaml-individual-descriptors",
+    "tabpfn-individual-descriptors",
     "autogluon-manifolds",
     "kerastuner-reference-embedding",
+    "kerastuner-eosce-embedding",
     "molmap",
 ]