You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
95 lines
2.4 KiB
95 lines
2.4 KiB
1 year ago
|
###### VERISON2
|
||
|
import inspect
|
||
|
import os
|
||
|
import threading
|
||
1 year ago
|
|
||
|
from dotenv import load_dotenv
|
||
|
|
||
1 year ago
|
from scripts.auto_tests_docs.docs import DOCUMENTATION_WRITER_SOP
|
||
1 year ago
|
from swarms import OpenAIChat
|
||
1 year ago
|
|
||
1 year ago
|
##########
|
||
1 year ago
|
from swarms.tokenizers.r_tokenizers import (
|
||
|
SentencePieceTokenizer,
|
||
|
HuggingFaceTokenizer,
|
||
|
Tokenizer,
|
||
|
)
|
||
|
from swarms.tokenizers.base_tokenizer import BaseTokenizer
|
||
|
from swarms.tokenizers.openai_tokenizers import OpenAITokenizer
|
||
|
from swarms.tokenizers.anthropic_tokenizer import (
|
||
|
AnthropicTokenizer,
|
||
|
)
|
||
|
from swarms.tokenizers.cohere_tokenizer import CohereTokenizer
|
||
|
|
||
1 year ago
|
|
||
1 year ago
|
####################
|
||
1 year ago
|
load_dotenv()
|
||
|
|
||
|
api_key = os.getenv("OPENAI_API_KEY")
|
||
|
|
||
|
model = OpenAIChat(
|
||
|
openai_api_key=api_key,
|
||
|
max_tokens=4000,
|
||
|
)
|
||
|
|
||
|
|
||
1 year ago
|
def process_documentation(cls):
|
||
1 year ago
|
"""
|
||
1 year ago
|
Process the documentation for a given class using OpenAI model and save it in a Markdown file.
|
||
1 year ago
|
"""
|
||
1 year ago
|
doc = inspect.getdoc(cls)
|
||
|
source = inspect.getsource(cls)
|
||
1 year ago
|
input_content = (
|
||
1 year ago
|
"Class Name:"
|
||
|
f" {cls.__name__}\n\nDocumentation:\n{doc}\n\nSource"
|
||
1 year ago
|
f" Code:\n{source}"
|
||
|
)
|
||
|
|
||
1 year ago
|
# Process with OpenAI model (assuming the model's __call__ method takes this input and returns processed content)
|
||
1 year ago
|
processed_content = model(
|
||
1 year ago
|
DOCUMENTATION_WRITER_SOP(input_content, "swarms.tokenizers")
|
||
1 year ago
|
)
|
||
1 year ago
|
|
||
1 year ago
|
# doc_content = f"# {cls.__name__}\n\n{processed_content}\n"
|
||
|
doc_content = f"{processed_content}\n"
|
||
1 year ago
|
|
||
|
# Create the directory if it doesn't exist
|
||
1 year ago
|
dir_path = "docs/swarms/tokenizers"
|
||
1 year ago
|
os.makedirs(dir_path, exist_ok=True)
|
||
|
|
||
1 year ago
|
# Write the processed documentation to a Markdown file
|
||
|
file_path = os.path.join(dir_path, f"{cls.__name__.lower()}.md")
|
||
1 year ago
|
with open(file_path, "w") as file:
|
||
|
file.write(doc_content)
|
||
|
|
||
1 year ago
|
print(f"Documentation generated for {cls.__name__}.")
|
||
1 year ago
|
|
||
1 year ago
|
|
||
|
def main():
|
||
1 year ago
|
classes = [
|
||
1 year ago
|
SentencePieceTokenizer,
|
||
|
HuggingFaceTokenizer,
|
||
|
Tokenizer,
|
||
|
BaseTokenizer,
|
||
|
OpenAITokenizer,
|
||
|
AnthropicTokenizer,
|
||
|
CohereTokenizer,
|
||
1 year ago
|
]
|
||
|
threads = []
|
||
1 year ago
|
for cls in classes:
|
||
1 year ago
|
thread = threading.Thread(
|
||
|
target=process_documentation, args=(cls,)
|
||
|
)
|
||
1 year ago
|
threads.append(thread)
|
||
|
thread.start()
|
||
|
|
||
|
# Wait for all threads to complete
|
||
|
for thread in threads:
|
||
|
thread.join()
|
||
|
|
||
1 year ago
|
print("Documentation generated in 'swarms.tokenizers' directory.")
|
||
1 year ago
|
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
main()
|