We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
We use askathon cleaned response dataset to evaluate the nasa-v6 models against vanilla distilbert model. We're using onnx version of nasa-v6 model.
askathon cleaned response
Dataset loader is tentatively:
def load_askathon_clean(path: str) -> pd.DataFrame: data = pd.read_csv(path) data = data.drop(columns=["Email Address"]).reset_index(drop=True) data.rename(columns={ data.columns[0] : "context", data.columns[1]: "id", data.columns[2]: "source", data.columns[3]: "topics", data.columns[4]: "q1", data.columns[5]: "a1", data.columns[6]: "q2", data.columns[7]: "a2", data.columns[8]: "q3", data.columns[9]: "a3", data.columns[10]: "q4", data.columns[11]: "a4", data.columns[12]: "q5", data.columns[13]: "a5" }, inplace=True) data.drop(columns=["source", "topics"], inplace=True) return data def create_qa_dataset(data: pd.DataFrame) -> pd.DataFrame: res = [] q_keys = [f"q{i}" for i in range(1, 6)] a_keys = [f"a{i}" for i in range(1, 6)] def _index_fn(context: str, answer: str) -> int: try: return context.lower().index(answer.rstrip(" ,.!?").lower()) except ValueError: return -1 for _df in data.itertuples(): tmp = [] for qk, ak in zip(q_keys, a_keys): q, a = getattr(_df, qk), getattr(_df, ak) if not isinstance(a, str): continue idx = _index_fn(_df.context, a) if idx > -1: tmp.append(dict( id=str(_df.id), context=_df.context, question=q, answer_text=a, answer_start=idx, )) res.extend(tmp) return pd.DataFrame(res) data = create_qa_dataset(load_askathon_clean("data/askathon.csv"))
Evaluation is done through evalem with following tentative pipeline code:
evalem
from evalem.nlp.evaluators import QAEvaluator from evalem.nlp.models import QuestionAnsweringHFPipelineWrapper from evalem.nlp.metrics import BartScore, BertScore, BleuMetric, MeteorMetric, ExactMatchMetric, RougeMetric from evalem import NamedSimpleEvaluationPipeline from evalem.misc.utils import build_comparison_table # define models wrapped_model = QuestionAnsweringHFPipelineWrapper(device="mps") wrapped_model_2 = QuestionAnsweringHFPipelineWrapper.from_onnx( model="tmp/onnx/nasa-v6/", tokenizer="tmp/onnx/nasa-v6/", device="mps" ) # define evaluators/metrics evaluators_common = [ QAEvaluator(), BertScore(device="mps"), BartScore(device="mps"), RougeMetric(), MeteorMetric(), BleuMetric(), ] # build pipelines eval_pipe = NamedSimpleEvaluationPipeline( model=wrapped_model, evaluators=evaluators_common, name="distilbert" ) eval_pipe_2 = NamedSimpleEvaluationPipeline( model=wrapped_model_2, evaluators=evaluators_common, name="nasa-v6-onnx" ) # evaluate and get comparison table results = build_comparison_table( eval_pipe, eval_pipe_2, inputs=list(data[["context", "question"]].T.to_dict().values()), references=data["answer_text"].to_list(), )
cc: @muthukumaranR @xhagrg
The text was updated successfully, but these errors were encountered:
No branches or pull requests
We use
askathon cleaned response
dataset to evaluate the nasa-v6 models against vanilla distilbert model.We're using onnx version of nasa-v6 model.
Dataset loader is tentatively:
Evaluation is done through
evalem
with following tentative pipeline code:cc: @muthukumaranR @xhagrg
The text was updated successfully, but these errors were encountered: