You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

47 lines
2.1 KiB

This file contains invisible Unicode characters!

This file contains invisible Unicode characters that may be processed differently from what appears below. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to reveal hidden characters.

# ------------------------------------------------Environment Settings------------------------------------------------#
# Directory paths for data and outputs
data_dir: "/mnt/nas/thinhlpg/code/DeepSearch/data/flashrag_datasets/"
save_dir: "/mnt/nas/thinhlpg/code/DeepSearch/logs"
# Seed for reproducibility
seed: 2024
# Whether save intermediate data
save_intermediate_data: True
save_note: 'experiment'
# -------------------------------------------------Retrieval Settings------------------------------------------------#
# If set the remote url, the retriever will be a remote retriever and ignore following settings
use_remote_retriever: True
remote_retriever_url: "localhost:8001"
instruction: ~ # instruction for retrieval model
retrieval_topk: 5 # number of retrieved documents
retrieval_batch_size: 256 # batch size for retrieval
retrieval_use_fp16: True # whether to use fp16 for retrieval model
retrieval_query_max_length: 128 # max length of the query
save_retrieval_cache: False # whether to save the retrieval cache
use_retrieval_cache: False # whether to use the retrieval cache
retrieval_cache_path: ~ # path to the retrieval cache
retrieval_pooling_method: ~ # set automatically if not provided
# -------------------------------------------------Generator Settings------------------------------------------------#
framework: sgl_remote # inference frame work of LLM, supporting: 'hf','vllm','fschat'
sgl_remote_url: "localhost:8002"
generator_model: "janhq/250404-llama-3.2-3b-instruct-grpo-03-s250" # name or path of the generator model, for laoding tokenizer
generator_max_input_len: 2048 # max length of the input
generation_params:
do_sample: False
max_tokens: 8192
# -------------------------------------------------Evaluation Settings------------------------------------------------#
# Metrics to evaluate the result
metrics: [ 'em','f1','acc','precision','recall']
# Specify setting for metric, will be called within certain metrics
metric_setting:
retrieval_recall_topk: 5
save_metric_score: True # whether to save the metric score into txt file