@ -71,6 +71,7 @@ def test_reward_correctness_wrong_answer(reward_correctness_fn):
def test_reward_format_correct ( ) :
def test_reward_format_correct ( ) :
""" Test reward format with correct format """
""" Test reward format with correct format """
prompts = [ " Test prompt " ]
completions = [
completions = [
{
{
" messages " : [
" messages " : [
@ -78,21 +79,23 @@ def test_reward_format_correct():
]
]
}
}
]
]
rewards = reward_format ( [ ] , completions )
rewards = reward_format ( prompts , completions )
assert rewards [ 0 ] == 1.0
assert rewards [ 0 ] == 1.0
def test_reward_format_with_search ( ) :
def test_reward_format_with_search ( ) :
""" Test reward format with search tags only (no answer tags) """
""" Test reward format with search tags only (no answer tags) """
prompts = [ " Test prompt " ]
completions = [
completions = [
{ " messages " : [ { " role " : " assistant " , " content " : " <think> \n Some reasoning \n </think> \n <search>query</search> " } ] }
{ " messages " : [ { " role " : " assistant " , " content " : " <think> \n Some reasoning \n </think> \n <search>query</search> " } ] }
]
]
rewards = reward_format ( [ ] , completions )
rewards = reward_format ( prompts , completions )
assert rewards [ 0 ] == 1.0
assert rewards [ 0 ] == 1.0
def test_reward_format_markdown_tags ( ) :
def test_reward_format_markdown_tags ( ) :
""" Test reward format with markdown-styled tags """
""" Test reward format with markdown-styled tags """
prompts = [ " Test prompt " ]
markdown_formats = [
markdown_formats = [
{
{
" messages " : [
" messages " : [
@ -121,12 +124,13 @@ def test_reward_format_markdown_tags():
]
]
for completion in markdown_formats :
for completion in markdown_formats :
rewards = reward_format ( [ ] , [ completion ] )
rewards = reward_format ( [ " Test prompt " ] , [ completion ] )
assert rewards [ 0 ] == 0.0 , f " Failed with: { completion [ ' messages ' ] [ 0 ] [ ' content ' ] } "
assert rewards [ 0 ] == 0.0 , f " Failed with: { completion [ ' messages ' ] [ 0 ] [ ' content ' ] } "
def test_reward_format_information_tags ( ) :
def test_reward_format_information_tags ( ) :
""" Test reward format with information tags """
""" Test reward format with information tags """
prompts = [ " Test prompt " ]
# Test different information tag variants
# Test different information tag variants
info_variants = [
info_variants = [
" <information>Some info</information> " ,
" <information>Some info</information> " ,
@ -139,12 +143,13 @@ def test_reward_format_information_tags():
for info_tag in info_variants :
for info_tag in info_variants :
content = f " <think> \n Some reasoning \n </think> \n { info_tag } \n <answer> \n The answer \n </answer> "
content = f " <think> \n Some reasoning \n </think> \n { info_tag } \n <answer> \n The answer \n </answer> "
completions = [ { " messages " : [ { " role " : " assistant " , " content " : content } ] } ]
completions = [ { " messages " : [ { " role " : " assistant " , " content " : content } ] } ]
rewards = reward_format ( [ ] , completions )
rewards = reward_format ( prompts , completions )
assert rewards [ 0 ] == 0.0 , f " Failed to detect information tag: { info_tag } "
assert rewards [ 0 ] == 0.0 , f " Failed to detect information tag: { info_tag } "
def test_reward_format_real_example ( ) :
def test_reward_format_real_example ( ) :
""" Test reward format with a real-world example - should fail now since it has both search and answer tags """
""" Test reward format with a real-world example - should fail now since it has both search and answer tags """
prompts = [ " What cars did Paul Walker drive in Fast and Furious? " ]
content = """ <think>I need to search for Paul Walker ' s cars in Fast and Furious movies.</think>
content = """ <think>I need to search for Paul Walker ' s cars in Fast and Furious movies.</think>
< search > Paul Walker ' s cars in Fast and Furious </search>
< search > Paul Walker ' s cars in Fast and Furious </search>
@ -157,27 +162,29 @@ Based on the updated information, it seems the focus was on his career, financia
< answer > Charger < / answer > """
< answer > Charger < / answer > """
completions = [ { " messages " : [ { " role " : " assistant " , " content " : content } ] } ]
completions = [ { " messages " : [ { " role " : " assistant " , " content " : content } ] } ]
rewards = reward_format ( [ ] , completions )
rewards = reward_format ( prompts , completions )
assert rewards [ 0 ] == 0.0 , " Should reject responses with both search and answer tags "
assert rewards [ 0 ] == 0.0 , " Should reject responses with both search and answer tags "
def test_reward_format_real_example_search_only ( ) :
def test_reward_format_real_example_search_only ( ) :
""" Test reward format with search-only format in a real-world example """
""" Test reward format with search-only format in a real-world example """
prompts = [ " What cars did Paul Walker drive in Fast and Furious? " ]
content = """ <think>I need to search for Paul Walker ' s cars in Fast and Furious movies.</think>
content = """ <think>I need to search for Paul Walker ' s cars in Fast and Furious movies.</think>
< search > Paul Walker ' s cars in Fast and Furious </search> " " "
< search > Paul Walker ' s cars in Fast and Furious </search> " " "
completions = [ { " messages " : [ { " role " : " assistant " , " content " : content } ] } ]
completions = [ { " messages " : [ { " role " : " assistant " , " content " : content } ] } ]
rewards = reward_format ( [ ] , completions )
rewards = reward_format ( prompts , completions )
assert rewards [ 0 ] == 1.0 , " Should accept responses with only search tags "
assert rewards [ 0 ] == 1.0 , " Should accept responses with only search tags "
def test_reward_format_real_example_answer_only ( ) :
def test_reward_format_real_example_answer_only ( ) :
""" Test reward format with answer-only format in a real-world example """
""" Test reward format with answer-only format in a real-world example """
prompts = [ " What cars did Paul Walker drive in Fast and Furious? " ]
content = """ <think>Based on the information provided, it seems Paul Walker drove a Charger in the Fast and Furious series.</think>
content = """ <think>Based on the information provided, it seems Paul Walker drove a Charger in the Fast and Furious series.</think>
< answer > Charger < / answer > """
< answer > Charger < / answer > """
completions = [ { " messages " : [ { " role " : " assistant " , " content " : content } ] } ]
completions = [ { " messages " : [ { " role " : " assistant " , " content " : content } ] } ]
rewards = reward_format ( [ ] , completions )
rewards = reward_format ( prompts , completions )
assert rewards [ 0 ] == 1.0 , " Should accept responses with only answer tags "
assert rewards [ 0 ] == 1.0 , " Should accept responses with only answer tags "
@ -252,134 +259,33 @@ def test_reward_format_incomplete_tags():
def test_reward_retry ( ) :
def test_reward_retry ( ) :
""" Test reward retry functionality with progressive rewards up to 5 searches """
""" Test reward retry function """
# Test case with no searches
prompts = [ " What is the capital of France? " ]
completions = [ { " messages " : [ { " role " : " assistant " , " content " : " No searches here " } ] } ]
rewards = reward_retry ( [ ] , completions )
assert rewards [ 0 ] == 0.0 , " Should get 0 reward for no searches "
# Test case with one search
completions = [
{
" messages " : [
{
" role " : " assistant " ,
" content " : " <think>I need more information</think> \n <search>First query</search> " ,
}
]
}
]
rewards = reward_retry ( [ ] , completions )
assert rewards [ 0 ] == 0.35 , " Should get 0.35 reward for one search "
# Test case with three searches in different messages
completions = [
completions = [
{
{
" messages " : [
" messages " : [
{
{ " role " : " assistant " , " content " : " <think>Let me search</think> \n <search>capital of France</search> " } ,
" role " : " assistant " ,
{ " role " : " assistant " , " content " : " <think>Need more info</think> \n <search>Paris history</search> " } ,
" content " : " <think>First search</think> \n <search>Query 1</search> " ,
{ " role " : " assistant " , " content " : " <think>Found it</think> \n <answer>Paris</answer> " } ,
} ,
{ " role " : " assistant " , " content " : " <think>Second search</think> \n <search>Query 2</search> " } ,
{ " role " : " assistant " , " content " : " <think>Third search</think> \n <search>Query 3</search> " } ,
]
}
]
rewards = reward_retry ( [ ] , completions )
assert rewards [ 0 ] == 0.65 , " Should get 0.65 reward for three searches "
# Test case with five searches in different messages
completions = [
{
" messages " : [
{ " role " : " assistant " , " content " : " <think>Search 1</think> \n <search>Query 1</search> " } ,
{ " role " : " assistant " , " content " : " <think>Search 2</think> \n <search>Query 2</search> " } ,
{ " role " : " assistant " , " content " : " <think>Search 3</think> \n <search>Query 3</search> " } ,
{ " role " : " assistant " , " content " : " <think>Search 4</think> \n <search>Query 4</search> " } ,
{ " role " : " assistant " , " content " : " <think>Search 5</think> \n <search>Query 5</search> " } ,
]
]
}
}
]
]
rewards = reward_retry ( [ ] , completions )
rewards = reward_retry ( prompts , completions )
assert rewards [ 0 ] == 0.95 , " Should get 0.95 reward for five searches "
assert len ( rewards ) == 1
assert rewards [ 0 ] > 0 , " Should give positive reward for multiple search attempts "
# Test case with more than five searches
completions = [
{
" messages " : [
{ " role " : " assistant " , " content " : " <think>Search 1</think> \n <search>Query 1</search> " } ,
{ " role " : " assistant " , " content " : " <think>Search 2</think> \n <search>Query 2</search> " } ,
{ " role " : " assistant " , " content " : " <think>Search 3</think> \n <search>Query 3</search> " } ,
{ " role " : " assistant " , " content " : " <think>Search 4</think> \n <search>Query 4</search> " } ,
{ " role " : " assistant " , " content " : " <think>Search 5</think> \n <search>Query 5</search> " } ,
{ " role " : " assistant " , " content " : " <think>Search 6</think> \n <search>Query 6</search> " } ,
]
}
]
rewards = reward_retry ( [ ] , completions )
assert rewards [ 0 ] == 0.95 , " Should cap at 0.95 reward for more than five searches "
# Test case with violation (multiple searches in one message)
completions = [
{
" messages " : [
{
" role " : " assistant " ,
" content " : " <think>Multiple searches</think> \n <search>First query</search> \n <search>Second query</search> " ,
}
]
}
]
rewards = reward_retry ( [ ] , completions )
assert rewards [ 0 ] == 0.25 , " Should get penalized reward (0.5 * 0.5) for violation "
def test_reward_em_chunk ( ) :
def test_reward_em_chunk ( ) :
""" Test reward EM chunk functionality with information tags """
""" Test exact match chunk reward function """
# Test case with matching content in ipython role
prompts = [ " What is Python? " ]
completions = [
completions = [
{ " messages " : [ { " role " : " ipython " , " content " : " <information> This is the correct chunk content </information>" } ] }
{ " messages " : [ { " role " : " user " , " content " : " <information>Python is a programming language</information> " } ] }
]
]
reward_kwargs = { " chunk_content " : [ " This is the correct chunk content " ] }
correct_contents = [ " Python is a programming language " ]
rewards = reward_em_chunk ( [ ] , completions , * * reward_kwarg s)
rewards = reward_em_chunk ( prompts , completions , chunk_content = correct_contents )
assert len ( rewards ) == 1
assert len ( rewards ) == 1
assert rewards [ 0 ] == 1.0 , " Should get reward 1.0 for exact match in ipython role "
assert rewards [ 0 ] == 1.0 , " Should give full reward for exact chunk match "
# Test case with matching content in user role
completions = [
{ " messages " : [ { " role " : " user " , " content " : " <information>This is the correct chunk content</information> " } ] }
]
rewards = reward_em_chunk ( [ ] , completions , * * reward_kwargs )
assert rewards [ 0 ] == 1.0 , " Should get reward 1.0 for exact match in user role "
# Test case with content not starting with <information> tag
completions = [ { " messages " : [ { " role " : " ipython " , " content " : " This is the correct chunk content " } ] } ]
rewards = reward_em_chunk ( [ ] , completions , * * reward_kwargs )
assert rewards [ 0 ] == 0.0 , " Should get reward 0.0 for missing information tag "
# Test case with wrong role
completions = [
{
" messages " : [
{ " role " : " assistant " , " content " : " <information>This is the correct chunk content</information> " }
]
}
]
rewards = reward_em_chunk ( [ ] , completions , * * reward_kwargs )
assert rewards [ 0 ] == 0.0 , " Should get reward 0.0 for wrong role "
# Test case with multiple messages, only one matching
completions = [
{
" messages " : [
{ " role " : " ipython " , " content " : " <information>Wrong content</information> " } ,
{ " role " : " user " , " content " : " <information>This is the correct chunk content</information> " } ,
]
}
]
rewards = reward_em_chunk ( [ ] , completions , * * reward_kwargs )
assert rewards [ 0 ] == 1.0 , " Should get reward 1.0 if any message matches "
def test_reward_em_chunk_no_chunk_content ( ) :
def test_reward_em_chunk_no_chunk_content ( ) :