diff --git a/README.md b/README.md index 14132cc..0b5ecb0 100644 --- a/README.md +++ b/README.md @@ -123,6 +123,26 @@ quizgen [options] quizgen anki --input-dir path/to/csv/files --root-deck-name "Root Deck Name" ``` +## Model Selection + +QuizGen allows you to select specific AI models for different stages of the question generation process. This can be useful for experimenting with different models or fine-tuning the output to your needs. + +The following model selection arguments are available: + +- `--concept-model`: Specifies the model used for concept extraction. Default is `gpt-4o-mini-2024-07-18`. +- `--questions-model`: Specifies the model used for question generation. Default is `gpt-4o-2024-08-06`. +- `--embedding-model`: Specifies the model used for generating embeddings. Default is `text-embedding-3-small`. + +These arguments can be used with both single chapter and batch commands. For example: + +```bash +# Generate questions using a specific model: +quizgen generate --chapter-path path/to/chapter.md --title "Course Title" --output path/to/output.json --questions-model gpt-4 + +# Generate embeddings using a specific model: +quizgen embeddings --batch --input-dir path/to/json/files --output-dir path/to/embeddings/output --embedding-model text-embedding-ada-002 +``` + ## Example Anki Decks Check out these example Anki decks generated using QuizGen: diff --git a/src/quizgen/scripts/generate_embeddings_batch.py b/src/quizgen/scripts/generate_embeddings_batch.py index 5ba08b7..6f7a781 100644 --- a/src/quizgen/scripts/generate_embeddings_batch.py +++ b/src/quizgen/scripts/generate_embeddings_batch.py @@ -7,7 +7,7 @@ from tqdm import tqdm -def generate_embeddings(input_dir, output_dir): +def generate_embeddings(input_dir, output_dir, embedding_model): input_dir = pathlib.Path(input_dir).resolve() output_dir = pathlib.Path(output_dir).resolve() json_files = list(input_dir.rglob("*.json")) @@ -34,6 +34,8 @@ def generate_embeddings(input_dir, output_dir): str(json_file), "--output-path", str(output_path), + "--embedding-model", + embedding_model, ], check=True, ) @@ -54,9 +56,15 @@ def main(): parser.add_argument( "--output-dir", required=True, help="Output directory for embedding files" ) + parser.add_argument( + "--embedding-model", + type=str, + default="text-embedding-3-small", + help="Model to use for generating embeddings", + ) args = parser.parse_args() - generate_embeddings(args.input_dir, args.output_dir) + generate_embeddings(args.input_dir, args.output_dir, args.embedding_model) if __name__ == "__main__":