ReZero-Search-LLM-Agent-Fork/scripts/evaluation/eval_config.yaml

# ------------------------------------------------Environment Settings------------------------------------------------#
# Directory paths for data and outputs
data_dir: "/mnt/nas/thinhlpg/code/DeepSearch/data/flashrag_datasets/"
save_dir: "/mnt/nas/thinhlpg/code/DeepSearch/logs"

# Seed for reproducibility
seed: 2024

# Whether save intermediate data
save_intermediate_data: True
save_note: 'experiment'

# -------------------------------------------------Retrieval Settings------------------------------------------------#
# If set the remote url, the retriever will be a remote retriever and ignore following settings
use_remote_retriever: True
remote_retriever_url: "localhost:8001"

instruction: ~                    # instruction for retrieval model
retrieval_topk: 5                 # number of retrieved documents
retrieval_batch_size: 256         # batch size for retrieval
retrieval_use_fp16: True          # whether to use fp16 for retrieval model
retrieval_query_max_length: 128   # max length of the query
save_retrieval_cache: False       # whether to save the retrieval cache
use_retrieval_cache: False        # whether to use the retrieval cache
retrieval_cache_path: ~           # path to the retrieval cache
retrieval_pooling_method: ~       # set automatically if not provided

# -------------------------------------------------Generator Settings------------------------------------------------#
framework: sgl_remote                   # inference frame work of LLM, supporting: 'hf','vllm','fschat'
sgl_remote_url: "localhost:8002"
generator_model: "janhq/250404-llama-3.2-3b-instruct-grpo-03-s250" # name or path of the generator model, for laoding tokenizer
generator_max_input_len:  2048       # max length of the input
generation_params:
  do_sample: False
  max_tokens: 8192

# -------------------------------------------------Evaluation Settings------------------------------------------------#
# Metrics to evaluate the result
metrics: [ 'em','f1','acc','precision','recall']
# Specify setting for metric, will be called within certain metrics
metric_setting:
  retrieval_recall_topk: 5
save_metric_score: True #　whether to save the metric score into txt file