def recall(retrieved, expected):
return float(len(set(retrieved).intersection(set(expected)))) / len(expected)
def mrr(retrieved, expected):
# expected: list of relevant document ids (strings)
for rank, doc_id in enumerate(retrieved, 1):
if doc_id in expected:
return 1.0 / rank
return 0.0
def evaluate_search(tbl, queries, expected_ids, embeddings, k=5):
# Initialize a new LangWatch evaluation experiment
evaluation = langwatch.evaluation.init("search-methods-comparison")
metrics = dict(semantic=[], lexical=[], hybrid=[])
# Use evaluation.loop() to track the iteration
for idx, query in evaluation.loop(enumerate(tqdm(queries, desc="Evaluating..."))):
eid = expected_ids[idx]
emb = embeddings[idx]
# Semantic search
semantic_results = search_semantic(tbl, query, emb, k)
semantic_recall = recall(semantic_results, eid)
semantic_mrr = mrr(semantic_results, eid)
# Log semantic search results to LangWatch
evaluation.log(
"semantic_search",
index=idx,
score=semantic_recall, # Using recall as the primary score
data={
"query": query,
"expected_id": eid,
"retrieved_ids": semantic_results,
"recall": semantic_recall,
"mrr": semantic_mrr,
"k": k
}
)
metrics["semantic"].append({
"recall": semantic_recall,
"mrr": semantic_mrr
})
# Lexical search
lexical_results = search_lexical(tbl, query, k)
lexical_recall = recall(lexical_results, eid)
lexical_mrr = mrr(lexical_results, eid)
# Log lexical search results to LangWatch
evaluation.log(
"lexical_search",
index=idx,
score=lexical_recall,
data={
"query": query,
"expected_id": eid,
"retrieved_ids": lexical_results,
"recall": lexical_recall,
"mrr": lexical_mrr,
"k": k
}
)
metrics["lexical"].append({
"recall": lexical_recall,
"mrr": lexical_mrr
})
# Hybrid search
hybrid_results = search_hybrid(tbl, query, emb, k)
hybrid_recall = recall(hybrid_results, eid)
hybrid_mrr = mrr(hybrid_results, eid)
# Log hybrid search results to LangWatch
evaluation.log(
"hybrid_search",
index=idx,
score=hybrid_recall,
data={
"query": query,
"expected_id": eid,
"retrieved_ids": hybrid_results,
"recall": hybrid_recall,
"mrr": hybrid_mrr,
"k": k
}
)
metrics["hybrid"].append({
"recall": hybrid_recall,
"mrr": hybrid_mrr
})
return metrics