In [None]:
!pip install matplotlib -q

In [None]:
import numpy as np
import matplotlib.pyplot as plt

def plot_reward_functions():
 # Generate retry counts from 0 to 15
 retries = np.linspace(0, 15, 100)
 
 # 1. Basic Sigmoid
 basic_sigmoid = 1 / (1 + np.exp(-(retries - 4)))
 
 # 2. Our Modified Sigmoid
 x = retries - 4 # Center at 4 retries
 modified_sigmoid = 1 / (1 + np.exp(-x + abs(x)/2))
 
 # 3. With Penalty
 penalized_reward = modified_sigmoid.copy()
 for i, r in enumerate(retries):
 if r > 6:
 penalty = 0.2 * (r - 6)
 penalized_reward[i] = max(0.1, modified_sigmoid[i] - penalty)
 
 # Plotting
 plt.figure(figsize=(12, 6))
 
 plt.plot(retries, basic_sigmoid, 'b--', label='Basic Sigmoid')
 plt.plot(retries, modified_sigmoid, 'g--', label='Modified Sigmoid')
 plt.plot(retries, penalized_reward, 'r-', label='Final Reward (with penalty)', linewidth=2)
 
 # Add vertical lines for key points
 plt.axvline(x=4, color='gray', linestyle=':', alpha=0.5, label='Peak (4 retries)')
 plt.axvline(x=6, color='gray', linestyle=':', alpha=0.5, label='Penalty Start (6 retries)')
 
 plt.grid(True, alpha=0.3)
 plt.xlabel('Number of Retries')
 plt.ylabel('Reward')
 plt.title('Reward Function Visualization')
 plt.legend()
 plt.ylim(-0.1, 1.1)
 
 # Add annotations
 plt.annotate('Optimal Zone', xy=(4, 0.8), xytext=(4, 0.9),
 ha='center', va='bottom',
 bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.3),
 arrowprops=dict(arrowstyle='->'))
 
 plt.annotate('Penalty Zone', xy=(8, 0.3), xytext=(8, 0.5),
 ha='center', va='bottom',
 bbox=dict(boxstyle='round,pad=0.5', fc='red', alpha=0.3),
 arrowprops=dict(arrowstyle='->'))
 
 plt.show()

# Run the visualization
plot_reward_functions()

# Print reward values for specific retry counts
def print_reward_examples():
 retry_examples = [1, 2, 3, 4, 5, 6, 7, 8, 10, 12]
 print("\nReward values for different retry counts:")
 print("Retries | Reward")
 print("-" * 20)
 
 for retries in retry_examples:
 x = retries - 4
 reward = 1 / (1 + np.exp(-x + abs(x)/2))
 if retries > 6:
 penalty = 0.2 * (retries - 6)
 reward = max(0.1, reward - penalty)
 print(f"{retries:7d} | {reward:.3f}")

print_reward_examples()