export CUDA_VISIBLE_DEVICES=0

python train_grpo.py