diff --git a/deeppavlov/configs/classifiers/paraphraser_convers_distilrubert_2L.json b/deeppavlov/configs/classifiers/paraphraser_convers_distilrubert_2L.json new file mode 100644 index 0000000000..f35ebc134a --- /dev/null +++ b/deeppavlov/configs/classifiers/paraphraser_convers_distilrubert_2L.json @@ -0,0 +1,93 @@ + { + "dataset_reader": { + "class_name": "paraphraser_reader", + "data_path": "{DOWNLOADS_PATH}/paraphraser_data", + "do_lower_case": false + }, + "dataset_iterator": { + "class_name": "siamese_iterator", + "seed": 243, + "len_valid": 500 + }, + "chainer": { + "in": ["text_a", "text_b"], + "in_y": ["y"], + "pipe": [ + { + "class_name": "torch_transformers_preprocessor", + "vocab_file": "{TRANSFORMER}", + "do_lower_case": false, + "max_seq_length": 64, + "in": ["text_a", "text_b"], + "out": ["bert_features"] + }, + { + "class_name": "torch_transformers_classifier", + "n_classes": 2, + "return_probas": false, + "pretrained_bert": "{TRANSFORMER}", + "save_path": "{MODEL_PATH}/model", + "load_path": "{MODEL_PATH}/model", + "attention_probs_keep_prob": 0.67, + "hidden_keep_prob": 0.0, + "optimizer": "AdamW", + "optimizer_parameters": { + "lr": 9e-05 + }, + "learning_rate_drop_patience": 3, + "learning_rate_drop_div": 1.5, + "in": [ + "bert_features" + ], + "in_y": [ + "y" + ], + "out": [ + "predictions" + ] + } + ], + "out": ["predictions"] + }, + "train": { + "epochs": 100, + "batch_size": 64, + "metrics": [ + "f1", + "accuracy" + ], + "validation_patience": 7, + "val_every_n_batches": 50, + "log_every_n_batches": 50, + "evaluation_targets": [ + "train", + "valid", + "test" + ], + "tensorboard_log_dir": "{MODEL_PATH}/", + "class_name": "torch_trainer" + }, + "metadata": { + "variables": { + "ROOT_PATH": "~/.deeppavlov", + "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", + "TRANSFORMER": "DeepPavlov/distilrubert-tiny-cased-conversational", + "MODELS_PATH": "{ROOT_PATH}/models", + "MODEL_PATH": "{MODELS_PATH}/paraphraser_convers_distilrubert_2L" + }, + "download": [ + { + "url": "http://files.deeppavlov.ai/deeppavlov_data/classifiers/paraphraser_convers_distilrubert_2L.tar.gz", + "subdir": "{MODELS_PATH}" + }, + { + "url": "http://files.deeppavlov.ai/datasets/paraphraser.zip", + "subdir": "{DOWNLOADS_PATH}/paraphraser_data" + }, + { + "url": "http://files.deeppavlov.ai/datasets/paraphraser_gold.zip", + "subdir": "{DOWNLOADS_PATH}/paraphraser_data" + } + ] + } +} diff --git a/deeppavlov/configs/classifiers/paraphraser_convers_distilrubert_6L.json b/deeppavlov/configs/classifiers/paraphraser_convers_distilrubert_6L.json new file mode 100644 index 0000000000..02060d97ea --- /dev/null +++ b/deeppavlov/configs/classifiers/paraphraser_convers_distilrubert_6L.json @@ -0,0 +1,93 @@ +{ + "dataset_reader": { + "class_name": "paraphraser_reader", + "data_path": "{DOWNLOADS_PATH}/paraphraser_data", + "do_lower_case": false + }, + "dataset_iterator": { + "class_name": "siamese_iterator", + "seed": 243, + "len_valid": 500 + }, + "chainer": { + "in": ["text_a", "text_b"], + "in_y": ["y"], + "pipe": [ + { + "class_name": "torch_transformers_preprocessor", + "vocab_file": "{TRANSFORMER}", + "do_lower_case": false, + "max_seq_length": 64, + "in": ["text_a", "text_b"], + "out": ["bert_features"] + }, + { + "class_name": "torch_transformers_classifier", + "n_classes": 2, + "return_probas": false, + "pretrained_bert": "{TRANSFORMER}", + "save_path": "{MODEL_PATH}/model", + "load_path": "{MODEL_PATH}/model", + "attention_probs_keep_prob": 0.89, + "hidden_keep_prob": 0.44, + "optimizer": "AdamW", + "optimizer_parameters": { + "lr": 5.46e-05 + }, + "learning_rate_drop_patience": 3, + "learning_rate_drop_div": 1.5, + "in": [ + "bert_features" + ], + "in_y": [ + "y" + ], + "out": [ + "predictions" + ] + } + ], + "out": ["predictions"] + }, + "train": { + "epochs": 100, + "batch_size": 64, + "metrics": [ + "f1", + "accuracy" + ], + "validation_patience": 7, + "val_every_n_batches": 50, + "log_every_n_batches": 50, + "evaluation_targets": [ + "train", + "valid", + "test" + ], + "tensorboard_log_dir": "{MODEL_PATH}/", + "class_name": "torch_trainer" + }, + "metadata": { + "variables": { + "ROOT_PATH": "~/.deeppavlov", + "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", + "TRANSFORMER": "DeepPavlov/distilrubert-base-cased-conversational", + "MODELS_PATH": "{ROOT_PATH}/models", + "MODEL_PATH": "{MODELS_PATH}/paraphraser_convers_distilrubert_6L" + }, + "download": [ + { + "url": "http://files.deeppavlov.ai/deeppavlov_data/classifiers/paraphraser_convers_distilrubert_6L.tar.gz", + "subdir": "{MODELS_PATH}" + }, + { + "url": "http://files.deeppavlov.ai/datasets/paraphraser.zip", + "subdir": "{DOWNLOADS_PATH}/paraphraser_data" + }, + { + "url": "http://files.deeppavlov.ai/datasets/paraphraser_gold.zip", + "subdir": "{DOWNLOADS_PATH}/paraphraser_data" + } + ] + } +} diff --git a/deeppavlov/configs/classifiers/rusentiment_convers_distilrubert_2L.json b/deeppavlov/configs/classifiers/rusentiment_convers_distilrubert_2L.json new file mode 100644 index 0000000000..42d0c72fc4 --- /dev/null +++ b/deeppavlov/configs/classifiers/rusentiment_convers_distilrubert_2L.json @@ -0,0 +1,145 @@ +{ + "dataset_reader": { + "class_name": "basic_classification_reader", + "x": "text", + "y": "label", + "data_path": "{DOWNLOADS_PATH}/rusentiment/", + "train": "rusentiment_random_posts.csv", + "test": "rusentiment_test.csv" + }, + "dataset_iterator": { + "class_name": "basic_classification_iterator", + "seed": 42, + "split_seed": 23, + "field_to_split": "train", + "split_fields": [ + "train", + "valid" + ], + "split_proportions": [ + 0.9, + 0.1 + ] + }, + "chainer": { + "in": [ + "x" + ], + "in_y": [ + "y" + ], + "pipe": [ + { + "class_name": "torch_transformers_preprocessor", + "vocab_file": "{TRANSFORMER}", + "do_lower_case": true, + "max_seq_length": 64, + "in": [ + "x" + ], + "out": [ + "bert_features" + ] + }, + { + "id": "classes_vocab", + "class_name": "simple_vocab", + "fit_on": [ + "y" + ], + "save_path": "{MODEL_PATH}/classes.dict", + "load_path": "{MODEL_PATH}/classes.dict", + "in": "y", + "out": "y_ids" + }, + { + "in": "y_ids", + "out": "y_onehot", + "class_name": "one_hotter", + "depth": "#classes_vocab.len", + "single_vector": true + }, + { + "class_name": "torch_transformers_classifier", + "n_classes": "#classes_vocab.len", + "return_probas": true, + "pretrained_bert": "{TRANSFORMER}", + "save_path": "{MODEL_PATH}/model", + "load_path": "{MODEL_PATH}/model", + "attention_probs_keep_prob": 0.33, + "hidden_keep_prob": 0.67, + "optimizer": "AdamW", + "optimizer_parameters": { + "lr": 3.67e-05 + }, + "learning_rate_drop_patience": 5, + "learning_rate_drop_div": 1.5, + "in": [ + "bert_features" + ], + "in_y": [ + "y_ids" + ], + "out": [ + "y_pred_probas" + ] + }, + { + "in": "y_pred_probas", + "out": "y_pred_ids", + "class_name": "proba2labels", + "max_proba": true + }, + { + "in": "y_pred_ids", + "out": "y_pred_labels", + "ref": "classes_vocab" + } + ], + "out": [ + "y_pred_labels" + ] + }, + "train": { + "epochs": 100, + "batch_size": 64, + "metrics": [ + "f1_weighted", + "f1_macro", + "accuracy", + { + "name": "roc_auc", + "inputs": [ + "y_onehot", + "y_pred_probas" + ] + } + ], + "validation_patience": 5, + "val_every_n_epochs": 1, + "log_every_n_epochs": 1, + "show_examples": false, + "evaluation_targets": [ + "train", + "valid", + "test" + ], + "tensorboard_log_dir": "{MODEL_PATH}/", + "class_name": "torch_trainer" + }, + "metadata": { + "variables": { + "ROOT_PATH": "~/.deeppavlov", + "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", + "TRANSFORMER": "DeepPavlov/distilrubert-tiny-cased-conversational", + "MODELS_PATH": "{ROOT_PATH}/models", + "MODEL_PATH": "{MODELS_PATH}/classifiers/rusentiment_convers_distilrubert_2L" + }, + "download": [ + { + "url": "http://files.deeppavlov.ai/deeppavlov_data/classifiers/rusentiment_convers_distilrubert_2L.tar.gz", + "subdir": "{MODELS_PATH}/classifiers/" + } + ] + } +} diff --git a/deeppavlov/configs/classifiers/rusentiment_convers_distilrubert_6L.json b/deeppavlov/configs/classifiers/rusentiment_convers_distilrubert_6L.json new file mode 100644 index 0000000000..f81488dbbb --- /dev/null +++ b/deeppavlov/configs/classifiers/rusentiment_convers_distilrubert_6L.json @@ -0,0 +1,145 @@ +{ + "dataset_reader": { + "class_name": "basic_classification_reader", + "x": "text", + "y": "label", + "data_path": "{DOWNLOADS_PATH}/rusentiment/", + "train": "rusentiment_random_posts.csv", + "test": "rusentiment_test.csv" + }, + "dataset_iterator": { + "class_name": "basic_classification_iterator", + "seed": 42, + "split_seed": 23, + "field_to_split": "train", + "split_fields": [ + "train", + "valid" + ], + "split_proportions": [ + 0.9, + 0.1 + ] + }, + "chainer": { + "in": [ + "x" + ], + "in_y": [ + "y" + ], + "pipe": [ + { + "class_name": "torch_transformers_preprocessor", + "vocab_file": "{TRANSFORMER}", + "do_lower_case": true, + "max_seq_length": 64, + "in": [ + "x" + ], + "out": [ + "bert_features" + ] + }, + { + "id": "classes_vocab", + "class_name": "simple_vocab", + "fit_on": [ + "y" + ], + "save_path": "{MODEL_PATH}/classes.dict", + "load_path": "{MODEL_PATH}/classes.dict", + "in": "y", + "out": "y_ids" + }, + { + "in": "y_ids", + "out": "y_onehot", + "class_name": "one_hotter", + "depth": "#classes_vocab.len", + "single_vector": true + }, + { + "class_name": "torch_transformers_classifier", + "n_classes": "#classes_vocab.len", + "return_probas": true, + "pretrained_bert": "{TRANSFORMER}", + "save_path": "{MODEL_PATH}/model", + "load_path": "{MODEL_PATH}/model", + "attention_probs_keep_prob": 0.22, + "hidden_keep_prob": 0.22, + "optimizer": "AdamW", + "optimizer_parameters": { + "lr": 4.56e-05 + }, + "learning_rate_drop_patience": 5, + "learning_rate_drop_div": 1.5, + "in": [ + "bert_features" + ], + "in_y": [ + "y_ids" + ], + "out": [ + "y_pred_probas" + ] + }, + { + "in": "y_pred_probas", + "out": "y_pred_ids", + "class_name": "proba2labels", + "max_proba": true + }, + { + "in": "y_pred_ids", + "out": "y_pred_labels", + "ref": "classes_vocab" + } + ], + "out": [ + "y_pred_labels" + ] + }, + "train": { + "epochs": 100, + "batch_size": 64, + "metrics": [ + "f1_weighted", + "f1_macro", + "accuracy", + { + "name": "roc_auc", + "inputs": [ + "y_onehot", + "y_pred_probas" + ] + } + ], + "validation_patience": 5, + "val_every_n_epochs": 1, + "log_every_n_epochs": 1, + "show_examples": false, + "evaluation_targets": [ + "train", + "valid", + "test" + ], + "tensorboard_log_dir": "{MODEL_PATH}/", + "class_name": "torch_trainer" + }, + "metadata": { + "variables": { + "ROOT_PATH": "~/.deeppavlov", + "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", + "TRANSFORMER": "DeepPavlov/distilrubert-base-cased-conversational", + "MODELS_PATH": "{ROOT_PATH}/models", + "MODEL_PATH": "{MODELS_PATH}/classifiers/rusentiment_convers_distilrubert_6L" + }, + "download": [ + { + "url": "http://files.deeppavlov.ai/deeppavlov_data/classifiers/rusentiment_convers_distilrubert_6L.tar.gz", + "subdir": "{MODELS_PATH}/classifiers/" + } + ] + } +} diff --git a/deeppavlov/configs/ner/ner_rus_convers_distilrubert_2L.json b/deeppavlov/configs/ner/ner_rus_convers_distilrubert_2L.json new file mode 100644 index 0000000000..6123c18138 --- /dev/null +++ b/deeppavlov/configs/ner/ner_rus_convers_distilrubert_2L.json @@ -0,0 +1,155 @@ + { + "dataset_reader": { + "class_name": "conll2003_reader", + "data_path": "{DOWNLOADS_PATH}/total_rus/", + "dataset_name": "collection_rus", + "provide_pos": false + }, + "dataset_iterator": { + "class_name": "data_learning_iterator" + }, + "chainer": { + "in": [ + "x" + ], + "in_y": [ + "y" + ], + "pipe": [ + { + "class_name": "torch_transformers_ner_preprocessor", + "vocab_file": "{TRANSFORMER}", + "do_lower_case": false, + "max_seq_length": 512, + "max_subword_length": 15, + "token_masking_prob": 0.0, + "in": [ + "x" + ], + "out": [ + "x_tokens", + "x_subword_tokens", + "x_subword_tok_ids", + "startofword_markers", + "attention_mask" + ] + }, + { + "id": "tag_vocab", + "class_name": "simple_vocab", + "unk_token": [ + "O" + ], + "pad_with_zeros": true, + "save_path": "{MODEL_PATH}/tag.dict", + "load_path": "{MODEL_PATH}/tag.dict", + "fit_on": [ + "y" + ], + "in": [ + "y" + ], + "out": [ + "y_ind" + ] + }, + { + "class_name": "torch_transformers_sequence_tagger", + "n_tags": "#tag_vocab.len", + "pretrained_bert": "{TRANSFORMER}", + "attention_probs_keep_prob": 0.11, + "hidden_keep_prob": 0.67, + "return_probas": false, + "encoder_layer_ids": [ + -1 + ], + "optimizer": "AdamW", + "optimizer_parameters": { + "lr": 8.11e-05, + "weight_decay": 1e-06, + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-06 + }, + "clip_norm": 1.0, + "min_learning_rate": 1e-07, + "learning_rate_drop_patience": 30, + "learning_rate_drop_div": 1.5, + "load_before_drop": true, + "save_path": "{MODEL_PATH}/model", + "load_path": "{MODEL_PATH}/model", + "in": [ + "x_subword_tok_ids", + "attention_mask", + "startofword_markers" + ], + "in_y": [ + "y_ind" + ], + "out": [ + "y_pred_ind" + ] + }, + { + "ref": "tag_vocab", + "in": [ + "y_pred_ind" + ], + "out": [ + "y_pred" + ] + } + ], + "out": [ + "x_tokens", + "y_pred" + ] + }, + "train": { + "epochs": 30, + "batch_size": 10, + "metrics": [ + { + "name": "ner_f1", + "inputs": [ + "y", + "y_pred" + ] + }, + { + "name": "ner_token_f1", + "inputs": [ + "y", + "y_pred" + ] + } + ], + "validation_patience": 100, + "val_every_n_batches": 20, + "log_every_n_batches": 20, + "show_examples": false, + "evaluation_targets": [ + "valid", + "test" + ], + "tensorboard_log_dir": "{MODEL_PATH}/", + "class_name": "torch_trainer" + }, + "metadata": { + "variables": { + "ROOT_PATH": "~/.deeppavlov", + "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", + "MODELS_PATH": "{ROOT_PATH}/models", + "MODEL_PATH": "{MODELS_PATH}/ner_rus_conversational_distilrubert_2L", + "TRANSFORMER": "DeepPavlov/distilrubert-tiny-cased-conversational" + }, + "download": [ + { + "url": "http://files.deeppavlov.ai/v1/ner/ner_rus_conversational_distilrubert_2L.tar.gz", + "subdir": "{MODELS_PATH}" + } + ] + } +} diff --git a/deeppavlov/configs/ner/ner_rus_convers_distilrubert_6L.json b/deeppavlov/configs/ner/ner_rus_convers_distilrubert_6L.json new file mode 100644 index 0000000000..f719065d58 --- /dev/null +++ b/deeppavlov/configs/ner/ner_rus_convers_distilrubert_6L.json @@ -0,0 +1,155 @@ +{ + "dataset_reader": { + "class_name": "conll2003_reader", + "data_path": "{DOWNLOADS_PATH}/total_rus/", + "dataset_name": "collection_rus", + "provide_pos": false + }, + "dataset_iterator": { + "class_name": "data_learning_iterator" + }, + "chainer": { + "in": [ + "x" + ], + "in_y": [ + "y" + ], + "pipe": [ + { + "class_name": "torch_transformers_ner_preprocessor", + "vocab_file": "{TRANSFORMER}", + "do_lower_case": false, + "max_seq_length": 512, + "max_subword_length": 15, + "token_masking_prob": 0.0, + "in": [ + "x" + ], + "out": [ + "x_tokens", + "x_subword_tokens", + "x_subword_tok_ids", + "startofword_markers", + "attention_mask" + ] + }, + { + "id": "tag_vocab", + "class_name": "simple_vocab", + "unk_token": [ + "O" + ], + "pad_with_zeros": true, + "save_path": "{MODEL_PATH}/tag.dict", + "load_path": "{MODEL_PATH}/tag.dict", + "fit_on": [ + "y" + ], + "in": [ + "y" + ], + "out": [ + "y_ind" + ] + }, + { + "class_name": "torch_transformers_sequence_tagger", + "n_tags": "#tag_vocab.len", + "pretrained_bert": "{TRANSFORMER}", + "attention_probs_keep_prob": 0.56, + "hidden_keep_prob": 1.0, + "return_probas": false, + "encoder_layer_ids": [ + -1 + ], + "optimizer": "AdamW", + "optimizer_parameters": { + "lr": 2.78e-05, + "weight_decay": 1e-06, + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-06 + }, + "clip_norm": 1.0, + "min_learning_rate": 1e-07, + "learning_rate_drop_patience": 30, + "learning_rate_drop_div": 1.5, + "load_before_drop": true, + "save_path": "{MODEL_PATH}/model", + "load_path": "{MODEL_PATH}/model", + "in": [ + "x_subword_tok_ids", + "attention_mask", + "startofword_markers" + ], + "in_y": [ + "y_ind" + ], + "out": [ + "y_pred_ind" + ] + }, + { + "ref": "tag_vocab", + "in": [ + "y_pred_ind" + ], + "out": [ + "y_pred" + ] + } + ], + "out": [ + "x_tokens", + "y_pred" + ] + }, + "train": { + "epochs": 30, + "batch_size": 10, + "metrics": [ + { + "name": "ner_f1", + "inputs": [ + "y", + "y_pred" + ] + }, + { + "name": "ner_token_f1", + "inputs": [ + "y", + "y_pred" + ] + } + ], + "validation_patience": 100, + "val_every_n_batches": 20, + "log_every_n_batches": 20, + "show_examples": false, + "evaluation_targets": [ + "valid", + "test" + ], + "tensorboard_log_dir": "{MODEL_PATH}/", + "class_name": "torch_trainer" + }, + "metadata": { + "variables": { + "ROOT_PATH": "~/.deeppavlov", + "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", + "MODELS_PATH": "{ROOT_PATH}/models", + "MODEL_PATH": "{MODELS_PATH}/ner_rus_conversational_distilrubert_6L", + "TRANSFORMER": "DeepPavlov/distilrubert-base-cased-conversational" + }, + "download": [ + { + "url": "http://files.deeppavlov.ai/v1/ner/ner_rus_conversational_distilrubert_6L.tar.gz", + "subdir": "{MODELS_PATH}" + } + ] + } +} diff --git a/deeppavlov/configs/squad/squad_ru_convers_distilrubert_2L.json b/deeppavlov/configs/squad/squad_ru_convers_distilrubert_2L.json new file mode 100644 index 0000000000..830ded55f6 --- /dev/null +++ b/deeppavlov/configs/squad/squad_ru_convers_distilrubert_2L.json @@ -0,0 +1,173 @@ +{ + "dataset_reader": { + "class_name": "squad_dataset_reader", + "dataset": "SberSQuADClean", + "url": "http://files.deeppavlov.ai/datasets/sber_squad_clean-v1.1.tar.gz", + "data_path": "{DOWNLOADS_PATH}/squad_ru_clean/" + }, + "dataset_iterator": { + "class_name": "squad_iterator", + "seed": 1337, + "shuffle": true + }, + "chainer": { + "in": [ + "context_raw", + "question_raw" + ], + "in_y": [ + "ans_raw", + "ans_raw_start" + ], + "pipe": [ + { + "class_name": "torch_squad_transformers_preprocessor", + "add_token_type_ids": true, + "vocab_file": "{TRANSFORMER}", + "do_lower_case": "{lowercase}", + "max_seq_length": 384, + "return_tokens": true, + "in": [ + "question_raw", + "context_raw" + ], + "out": [ + "bert_features", + "subtokens" + ] + }, + { + "class_name": "squad_bert_mapping", + "do_lower_case": "{lowercase}", + "in": [ + "context_raw", + "bert_features", + "subtokens" + ], + "out": [ + "subtok2chars", + "char2subtoks" + ] + }, + { + "class_name": "squad_bert_ans_preprocessor", + "do_lower_case": "{lowercase}", + "in": [ + "ans_raw", + "ans_raw_start", + "char2subtoks" + ], + "out": [ + "ans", + "ans_start", + "ans_end" + ] + }, + { + "class_name": "torch_transformers_squad", + "pretrained_bert": "{TRANSFORMER}", + "save_path": "{MODEL_PATH}/model", + "load_path": "{MODEL_PATH}/model", + "attention_probs_keep_prob": 0.0, + "hidden_keep_prob": 0.11, + "optimizer": "AdamW", + "optimizer_parameters": { + "lr": 9e-05 + }, + "learning_rate_drop_patience": 2, + "learning_rate_drop_div": 1.5, + "in": [ + "bert_features" + ], + "in_y": [ + "ans_start", + "ans_end" + ], + "out": [ + "ans_start_predicted", + "ans_end_predicted", + "logits" + ] + }, + { + "class_name": "squad_bert_ans_postprocessor", + "in": [ + "ans_start_predicted", + "ans_end_predicted", + "context_raw", + "bert_features", + "subtok2chars", + "subtokens" + ], + "out": [ + "ans_predicted", + "ans_start_predicted", + "ans_end_predicted" + ] + } + ], + "out": [ + "ans_predicted", + "ans_start_predicted", + "logits" + ] + }, + "train": { + "show_examples": false, + "evaluation_targets": [ + "valid" + ], + "log_every_n_batches": 250, + "val_every_n_batches": 500, + "batch_size": 10, + "validation_patience": 10, + "metrics": [ + { + "name": "squad_v2_f1", + "inputs": [ + "ans", + "ans_predicted" + ] + }, + { + "name": "squad_v2_em", + "inputs": [ + "ans", + "ans_predicted" + ] + }, + { + "name": "squad_v1_f1", + "inputs": [ + "ans", + "ans_predicted" + ] + }, + { + "name": "squad_v1_em", + "inputs": [ + "ans", + "ans_predicted" + ] + } + ], + "tensorboard_log_dir": "{MODEL_PATH}/logs", + "class_name": "torch_trainer" + }, + "metadata": { + "variables": { + "lowercase": false, + "ROOT_PATH": "~/.deeppavlov", + "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", + "TRANSFORMER": "DeepPavlov/distilrubert-tiny-cased-conversational", + "MODELS_PATH": "{ROOT_PATH}/models", + "MODEL_PATH": "{MODELS_PATH}/squad_ru_convers_distilrubert_2L" + }, + "download": [ + { + "url": "http://files.deeppavlov.ai/deeppavlov_data/squad_ru_convers_distilrubert_2L.tar.gz", + "subdir": "{MODELS_PATH}" + } + ] + } +} diff --git a/deeppavlov/configs/squad/squad_ru_convers_distilrubert_2L_infer.json b/deeppavlov/configs/squad/squad_ru_convers_distilrubert_2L_infer.json new file mode 100644 index 0000000000..9202d83ba8 --- /dev/null +++ b/deeppavlov/configs/squad/squad_ru_convers_distilrubert_2L_infer.json @@ -0,0 +1,76 @@ +{ + "dataset_reader": { + "class_name": "squad_dataset_reader", + "dataset": "SberSQuADClean", + "url": "http://files.deeppavlov.ai/datasets/sber_squad_clean-v1.1.tar.gz", + "data_path": "{DOWNLOADS_PATH}/squad_ru_clean/" + }, + "dataset_iterator": { + "class_name": "squad_iterator", + "seed": 1337, + "shuffle": true + }, + "chainer": { + "in": ["context_raw", "question_raw"], + "in_y": ["ans_raw", "ans_raw_start"], + "pipe": [ + { + "class_name": "torch_transformers_squad_infer", + "lang": "ru", + "batch_size": 128, + "squad_model_config": "{CONFIGS_PATH}/squad/squad_ru_convers_distilrubert_2L.json", + "vocab_file": "{TRANSFORMER}", + "do_lower_case": "{lowercase}", + "max_seq_length": 256, + "in": ["context_raw", "question_raw"], + "out": ["ans_predicted", "ans_start_predicted", "logits"] + } + ], + "out": ["ans_predicted", "ans_start_predicted", "logits"] + }, + "train": { + "show_examples": false, + "evaluation_targets": [ + "valid" + ], + "log_every_n_batches": 250, + "val_every_n_batches": 500, + "batch_size": 10, + "validation_patience": 10, + "metrics": [ + { + "name": "squad_v2_f1", + "inputs": ["ans_raw", "ans_predicted"] + }, + { + "name": "squad_v2_em", + "inputs": ["ans_raw", "ans_predicted"] + }, + { + "name": "squad_v1_f1", + "inputs": ["ans_raw", "ans_predicted"] + }, + { + "name": "squad_v1_em", + "inputs": ["ans_raw", "ans_predicted"] + } + ] + }, + "metadata": { + "variables": { + "lowercase": false, + "ROOT_PATH": "~/.deeppavlov", + "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", + "TRANSFORMER": "DeepPavlov/distilrubert-tiny-cased-conversational", + "MODELS_PATH": "{ROOT_PATH}/models", + "MODEL_PATH": "{MODELS_PATH}/squad_ru_convers_distilrubert_2L", + "CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs" + }, + "download": [ + { + "url": "http://files.deeppavlov.ai/deeppavlov_data/squad_ru_convers_distilrubert_2L.tar.gz", + "subdir": "{MODELS_PATH}" + } + ] + } +} diff --git a/deeppavlov/configs/squad/squad_ru_convers_distilrubert_6L.json b/deeppavlov/configs/squad/squad_ru_convers_distilrubert_6L.json new file mode 100644 index 0000000000..58e815cc77 --- /dev/null +++ b/deeppavlov/configs/squad/squad_ru_convers_distilrubert_6L.json @@ -0,0 +1,173 @@ +{ + "dataset_reader": { + "class_name": "squad_dataset_reader", + "dataset": "SberSQuADClean", + "url": "http://files.deeppavlov.ai/datasets/sber_squad_clean-v1.1.tar.gz", + "data_path": "{DOWNLOADS_PATH}/squad_ru_clean/" + }, + "dataset_iterator": { + "class_name": "squad_iterator", + "seed": 1337, + "shuffle": true + }, + "chainer": { + "in": [ + "context_raw", + "question_raw" + ], + "in_y": [ + "ans_raw", + "ans_raw_start" + ], + "pipe": [ + { + "class_name": "torch_squad_transformers_preprocessor", + "add_token_type_ids": true, + "vocab_file": "{TRANSFORMER}", + "do_lower_case": "{lowercase}", + "max_seq_length": 384, + "return_tokens": true, + "in": [ + "question_raw", + "context_raw" + ], + "out": [ + "bert_features", + "subtokens" + ] + }, + { + "class_name": "squad_bert_mapping", + "do_lower_case": "{lowercase}", + "in": [ + "context_raw", + "bert_features", + "subtokens" + ], + "out": [ + "subtok2chars", + "char2subtoks" + ] + }, + { + "class_name": "squad_bert_ans_preprocessor", + "do_lower_case": "{lowercase}", + "in": [ + "ans_raw", + "ans_raw_start", + "char2subtoks" + ], + "out": [ + "ans", + "ans_start", + "ans_end" + ] + }, + { + "class_name": "torch_transformers_squad", + "pretrained_bert": "{TRANSFORMER}", + "save_path": "{MODEL_PATH}/model", + "load_path": "{MODEL_PATH}/model", + "attention_probs_keep_prob": 0.45, + "hidden_keep_prob": 0.56, + "optimizer": "AdamW", + "optimizer_parameters": { + "lr": 2.78e-05 + }, + "learning_rate_drop_patience": 2, + "learning_rate_drop_div": 1.5, + "in": [ + "bert_features" + ], + "in_y": [ + "ans_start", + "ans_end" + ], + "out": [ + "ans_start_predicted", + "ans_end_predicted", + "logits" + ] + }, + { + "class_name": "squad_bert_ans_postprocessor", + "in": [ + "ans_start_predicted", + "ans_end_predicted", + "context_raw", + "bert_features", + "subtok2chars", + "subtokens" + ], + "out": [ + "ans_predicted", + "ans_start_predicted", + "ans_end_predicted" + ] + } + ], + "out": [ + "ans_predicted", + "ans_start_predicted", + "logits" + ] + }, + "train": { + "show_examples": false, + "evaluation_targets": [ + "valid" + ], + "log_every_n_batches": 250, + "val_every_n_batches": 500, + "batch_size": 10, + "validation_patience": 10, + "metrics": [ + { + "name": "squad_v2_f1", + "inputs": [ + "ans", + "ans_predicted" + ] + }, + { + "name": "squad_v2_em", + "inputs": [ + "ans", + "ans_predicted" + ] + }, + { + "name": "squad_v1_f1", + "inputs": [ + "ans", + "ans_predicted" + ] + }, + { + "name": "squad_v1_em", + "inputs": [ + "ans", + "ans_predicted" + ] + } + ], + "tensorboard_log_dir": "{MODEL_PATH}/logs", + "class_name": "torch_trainer" + }, + "metadata": { + "variables": { + "lowercase": false, + "ROOT_PATH": "~/.deeppavlov", + "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", + "TRANSFORMER": "DeepPavlov/distilrubert-base-cased-conversational", + "MODELS_PATH": "{ROOT_PATH}/models", + "MODEL_PATH": "{MODELS_PATH}/squad_ru_convers_distilrubert_6L" + }, + "download": [ + { + "url": "http://files.deeppavlov.ai/deeppavlov_data/squad_ru_convers_distilrubert_6L.tar.gz", + "subdir": "{MODELS_PATH}" + } + ] + } +} diff --git a/deeppavlov/configs/squad/squad_ru_convers_distilrubert_6L_infer.json b/deeppavlov/configs/squad/squad_ru_convers_distilrubert_6L_infer.json new file mode 100644 index 0000000000..5c6171311c --- /dev/null +++ b/deeppavlov/configs/squad/squad_ru_convers_distilrubert_6L_infer.json @@ -0,0 +1,76 @@ +{ + "dataset_reader": { + "class_name": "squad_dataset_reader", + "dataset": "SberSQuADClean", + "url": "http://files.deeppavlov.ai/datasets/sber_squad_clean-v1.1.tar.gz", + "data_path": "{DOWNLOADS_PATH}/squad_ru_clean/" + }, + "dataset_iterator": { + "class_name": "squad_iterator", + "seed": 1337, + "shuffle": true + }, + "chainer": { + "in": ["context_raw", "question_raw"], + "in_y": ["ans_raw", "ans_raw_start"], + "pipe": [ + { + "class_name": "torch_transformers_squad_infer", + "lang": "ru", + "batch_size": 128, + "squad_model_config": "{CONFIGS_PATH}/squad/squad_ru_convers_distilrubert_6L.json", + "vocab_file": "{TRANSFORMER}", + "do_lower_case": "{lowercase}", + "max_seq_length": 256, + "in": ["context_raw", "question_raw"], + "out": ["ans_predicted", "ans_start_predicted", "logits"] + } + ], + "out": ["ans_predicted", "ans_start_predicted", "logits"] + }, + "train": { + "show_examples": false, + "evaluation_targets": [ + "valid" + ], + "log_every_n_batches": 250, + "val_every_n_batches": 500, + "batch_size": 10, + "validation_patience": 10, + "metrics": [ + { + "name": "squad_v2_f1", + "inputs": ["ans_raw", "ans_predicted"] + }, + { + "name": "squad_v2_em", + "inputs": ["ans_raw", "ans_predicted"] + }, + { + "name": "squad_v1_f1", + "inputs": ["ans_raw", "ans_predicted"] + }, + { + "name": "squad_v1_em", + "inputs": ["ans_raw", "ans_predicted"] + } + ] + }, + "metadata": { + "variables": { + "lowercase": false, + "ROOT_PATH": "~/.deeppavlov", + "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", + "TRANSFORMER": "DeepPavlov/distilrubert-base-cased-conversational", + "MODELS_PATH": "{ROOT_PATH}/models", + "MODEL_PATH": "{MODELS_PATH}/squad_ru_convers_distilrubert_6L", + "CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs" + }, + "download": [ + { + "url": "http://files.deeppavlov.ai/deeppavlov_data/squad_ru_convers_distilrubert_6L.tar.gz", + "subdir": "{MODELS_PATH}" + } + ] + } +} diff --git a/docs/features/models/bert.rst b/docs/features/models/bert.rst index 285c781991..9e68437742 100644 --- a/docs/features/models/bert.rst +++ b/docs/features/models/bert.rst @@ -29,6 +29,8 @@ We have trained BERT-base model for other languages and domains: `[deeppavlov_pytorch] `__ - Conversational RuBERT, Russian, cased, 12-layer, 768-hidden, 12-heads, 180M parameters: `[deeppavlov] `__, `[deeppavlov_pytorch] `__ +- Conversational DistilRuBERT, Russian, cased, 6-layer, 768-hidden, 12-heads, 135.4M parameters: `[deeppavlov_pytorch] `__ +- Conversational DistilRuBERT-tiny, Russian, cased, 2-layer, 768-hidden, 12-heads, 107M parameters: `[deeppavlov_pytorch] `__ - Sentence Multilingual BERT, 101 languages, cased, 12-layer, 768-hidden, 12-heads, 180M parameters: `[deeppavlov] `__, `[deeppavlov_pytorch] `__ - Sentence RuBERT, Russian, cased, 12-layer, 768-hidden, 12-heads, 180M parameters: `[deeppavlov] `__, @@ -50,6 +52,13 @@ English cased version of BERT-base as initialization for English Conversational Conversational RuBERT was trained on OpenSubtitles [5]_, Dirty, Pikabu, and Social Media segment of Taiga corpus [8]_. We assembled new vocabulary for Conversational RuBERT model on this data and initialized model with RuBERT. +Conversational DistilRuBERT (6 transformer layers) and DistilRuBERT-tiny (2 transformer layers) were trained on the same data as Conversational RuBERT and highly inspired by DistilBERT [13]_. Namely, Distil* models (students) used pretrained Conversational RuBERT as teacher and linear combination of the following losses: + +1. Masked language modeling loss (between student output logits for tokens and its true labels) +2. Kullback-Leibler divergence (between student and teacher output logits) +3. Cosine embedding loss (between averaged hidden states of the teacher and hidden states of the student) +4. Mean squared error loss (between averaged attention maps of the teacher and attention maps of the student) + Sentence Multilingual BERT is a representation-based sentence encoder for 101 languages of Multilingual BERT. It is initialized with Multilingual BERT and then fine-tuned on english MultiNLI [9]_ and on dev set of multilingual XNLI [10]_. Sentence representations are mean pooled token embeddings in the same manner as in Sentence-BERT [12]_. @@ -196,3 +205,4 @@ the :doc:`config ` file must be changed to match new BERT .. [10] Williams A., Bowman S. (2018) XNLI: Evaluating Cross-lingual Sentence Representations. arXiv preprint arXiv:1809.05053 .. [11] S. R. Bowman, G. Angeli, C. Potts, and C. D. Manning. (2015) A large annotated corpus for learning natural language inference. arXiv preprint arXiv:1508.05326 .. [12] N. Reimers, I. Gurevych (2019) Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks. arXiv preprint arXiv:1908.10084 +.. [13] Sanh, V., Debut, L., Chaumond, J., & Wolf, T. (2019). DistilBERT, a distilled version of BERT: smaller, faster, cheaper and lighter. arXiv preprint arXiv:1910.01108. diff --git a/docs/features/overview.rst b/docs/features/overview.rst index 31e822ff89..376b780cca 100644 --- a/docs/features/overview.rst +++ b/docs/features/overview.rst @@ -20,27 +20,31 @@ The second model reproduces architecture from the paper `Application of a Hybrid Bi-LSTM-CRF model to the task of Russian Named Entity Recognition `__ which is inspired by Bi-LSTM+CRF architecture from https://arxiv.org/pdf/1603.01360.pdf. -+---------------------------------------------------------+-------+-----------------------------------------------------------------------------+-------------+ -| Dataset | Lang | Model | Test F1 | -+=========================================================+=======+=============================================================================+=============+ -| Persons-1000 dataset with additional LOC and ORG markup | Ru | :config:`ner_rus_bert.json ` | 98.1 | -+ + +-----------------------------------------------------------------------------+-------------+ -| (Collection 3) | | :config:`ner_rus.json ` | 95.1 | -+---------------------------------------------------------+-------+-----------------------------------------------------------------------------+-------------+ -| Ontonotes | Multi | :config:`ner_ontonotes_bert_mult.json ` | 88.8 | -+ +-------+-----------------------------------------------------------------------------+-------------+ -| | En | :config:`ner_ontonotes_bert.json ` | 88.6 | -+ + +-----------------------------------------------------------------------------+-------------+ -| | | :config:`ner_ontonotes.json ` | 87.1 | -+---------------------------------------------------------+ +-----------------------------------------------------------------------------+-------------+ -| ConLL-2003 | | :config:`ner_conll2003_bert.json ` | 91.7 | -+ + +-----------------------------------------------------------------------------+-------------+ -| | | :config:`ner_conll2003_torch_bert.json ` | 88.6 | -+ + +-----------------------------------------------------------------------------+-------------+ -| | | :config:`ner_conll2003.json ` | 89.9 | -+---------------------------------------------------------+ +-----------------------------------------------------------------------------+-------------+ -| DSTC2 | | :config:`ner_dstc2.json ` | 97.1 | -+---------------------------------------------------------+-------+-----------------------------------------------------------------------------+-------------+ ++---------------------------------------------------------+-------+--------------------------------------------------------------------------------------------+-------------+ +| Dataset | Lang | Model | Test F1 | ++=========================================================+=======+============================================================================================+=============+ +| Persons-1000 dataset with additional LOC and ORG markup | Ru | :config:`ner_rus_bert.json ` | 98.1 | ++ + +--------------------------------------------------------------------------------------------+-------------+ +| (Collection 3) | | :config:`ner_rus.json ` | 95.1 | ++ + +--------------------------------------------------------------------------------------------+-------------+ +| | | :config:`ner_rus_convers_distilrubert_2L.json ` | 94.2 ± 0.2 | ++ + +--------------------------------------------------------------------------------------------+-------------+ +| | | :config:`ner_rus_convers_distilrubert_6L.json ` | 96.4 ± 0.2 | ++---------------------------------------------------------+-------+--------------------------------------------------------------------------------------------+-------------+ +| Ontonotes | Multi | :config:`ner_ontonotes_bert_mult.json ` | 88.8 | ++ +-------+--------------------------------------------------------------------------------------------+-------------+ +| | En | :config:`ner_ontonotes_bert.json ` | 88.6 | ++ + +--------------------------------------------------------------------------------------------+-------------+ +| | | :config:`ner_ontonotes.json ` | 87.1 | ++---------------------------------------------------------+ +--------------------------------------------------------------------------------------------+-------------+ +| ConLL-2003 | | :config:`ner_conll2003_bert.json ` | 91.7 | ++ + +--------------------------------------------------------------------------------------------+-------------+ +| | | :config:`ner_conll2003_torch_bert.json ` | 88.6 | ++ + +--------------------------------------------------------------------------------------------+-------------+ +| | | :config:`ner_conll2003.json ` | 89.9 | ++---------------------------------------------------------+ +--------------------------------------------------------------------------------------------+-------------+ +| DSTC2 | | :config:`ner_dstc2.json ` | 97.1 | ++---------------------------------------------------------+-------+--------------------------------------------------------------------------------------------+-------------+ Slot filling models :doc:`[docs] ` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -63,61 +67,65 @@ BiLSTM with self-attention and other models are presented. The model also allows Several pre-trained models are available and presented in Table below. -+------------------+--------------------+------+-------------------------------------------------------------------------------------------------+-------------+--------+--------+-----------+ -| Task | Dataset | Lang | Model | Metric | Valid | Test | Downloads | -+==================+====================+======+=================================================================================================+=============+========+========+===========+ -| 28 intents | `DSTC 2`_ | En | :config:`DSTC 2 emb ` | Accuracy | 0.7613 | 0.7733 | 800 Mb | -+ + + +-------------------------------------------------------------------------------------------------+ +--------+--------+-----------+ -| | | | :config:`Wiki emb ` | | 0.9629 | 0.9617 | 8.5 Gb | -+ + + +-------------------------------------------------------------------------------------------------+ +--------+--------+-----------+ -| | | | :config:`BERT ` | | 0.9673 | 0.9636 | 800 Mb | -+------------------+--------------------+ +-------------------------------------------------------------------------------------------------+-------------+--------+--------+-----------+ -| 7 intents | `SNIPS-2017`_ [1]_ | | :config:`DSTC 2 emb ` | F1-macro | 0.8591 | -- | 800 Mb | -+ + + +-------------------------------------------------------------------------------------------------+ +--------+--------+-----------+ -| | | | :config:`Wiki emb ` | | 0.9820 | -- | 8.5 Gb | -+ + + +-------------------------------------------------------------------------------------------------+ +--------+--------+-----------+ -| | | | :config:`Tfidf + SelectKBest + PCA + Wiki emb ` | | 0.9673 | -- | 8.6 Gb | -+ + + +-------------------------------------------------------------------------------------------------+ +--------+--------+-----------+ -| | | | :config:`Wiki emb weighted by Tfidf ` | | 0.9786 | -- | 8.5 Gb | -+------------------+--------------------+ +-------------------------------------------------------------------------------------------------+-------------+--------+--------+-----------+ -| Insult detection | `Insults`_ | | :config:`Reddit emb ` | ROC-AUC | 0.9263 | 0.8556 | 6.2 Gb | -+ + + +-------------------------------------------------------------------------------------------------+ +--------+--------+-----------+ -| | | | :config:`English BERT ` | | 0.9255 | 0.8612 | 1200 Mb | -+ + + +-------------------------------------------------------------------------------------------------+ +--------+--------+-----------+ -| | | | :config:`English Conversational BERT ` | | 0.9389 | 0.8941 | 1200 Mb | -+ + + +-------------------------------------------------------------------------------------------------+ +--------+--------+-----------+ -| | | | :config:`English BERT on PyTorch ` | | 0.9329 | 0.877 | 1.1 Gb | -+------------------+--------------------+ +-------------------------------------------------------------------------------------------------+-------------+--------+--------+-----------+ -| 5 topics | `AG News`_ | | :config:`Wiki emb ` | Accuracy | 0.8922 | 0.9059 | 8.5 Gb | -+------------------+--------------------+ +-------------------------------------------------------------------------------------------------+-------------+--------+--------+-----------+ -| Intent |`Yahoo-L31`_ | | :config:`Yahoo-L31 on conversational BERT ` | ROC-AUC | 0.9436 | -- | 1200 Mb | -+------------------+--------------------+ +-------------------------------------------------------------------------------------------------+-------------+--------+--------+-----------+ -| Sentiment |`SST`_ | | :config:`5-classes SST on conversational BERT ` | Accuracy | 0.6456 | 0.6715 | 400 Mb | -+ + + +-------------------------------------------------------------------------------------------------+ +--------+--------+-----------+ -| | | | :config:`5-classes SST on multilingual BERT ` | | 0.5738 | 0.6024 | 660 Mb | -+ + + +-------------------------------------------------------------------------------------------------+ +--------+--------+-----------+ -| | | | :config:`3-classes SST SWCNN on PyTorch ` | | 0.7379 | 0.6312 | 4.3 Mb | -+ +--------------------+ +-------------------------------------------------------------------------------------------------+ +--------+--------+-----------+ -| |`Yelp`_ | | :config:`5-classes Yelp on conversational BERT ` | | 0.6925 | 0.6842 | 400 Mb | -+ + + +-------------------------------------------------------------------------------------------------+ +--------+--------+-----------+ -| | | | :config:`5-classes Yelp on multilingual BERT ` | | 0.5896 | 0.5874 | 660 Mb | -+------------------+--------------------+------+-------------------------------------------------------------------------------------------------+-------------+--------+--------+-----------+ -| Sentiment |`Twitter mokoron`_ | Ru | :config:`RuWiki+Lenta emb w/o preprocessing ` | | 0.9965 | 0.9961 | 6.2 Gb | -+ + + +-------------------------------------------------------------------------------------------------+ +--------+--------+-----------+ -| | | | :config:`RuWiki+Lenta emb with preprocessing ` | | 0.7823 | 0.7759 | 6.2 Gb | -+ +--------------------+ +-------------------------------------------------------------------------------------------------+-------------+--------+--------+-----------+ -| |`RuSentiment`_ | | :config:`RuWiki+Lenta emb ` | F1-weighted | 0.6541 | 0.7016 | 6.2 Gb | -+ + + +-------------------------------------------------------------------------------------------------+ +--------+--------+-----------+ -| | | | :config:`Twitter emb super-convergence ` [2]_ | | 0.7301 | 0.7576 | 3.4 Gb | -+ + + +-------------------------------------------------------------------------------------------------+ +--------+--------+-----------+ -| | | | :config:`ELMo ` | | 0.7519 | 0.7875 | 700 Mb | -+ + + +-------------------------------------------------------------------------------------------------+ +--------+--------+-----------+ -| | | | :config:`Multi-language BERT ` | | 0.6809 | 0.7193 | 1900 Mb | -+ + + +-------------------------------------------------------------------------------------------------+ +--------+--------+-----------+ -| | | | :config:`Conversational RuBERT ` | | 0.7548 | 0.7742 | 657 Mb | -+------------------+--------------------+ +-------------------------------------------------------------------------------------------------+-------------+--------+--------+-----------+ -| Intent |Ru like`Yahoo-L31`_ | | :config:`Conversational vs Informational on ELMo ` | ROC-AUC | 0.9412 | -- | 700 Mb | -+------------------+--------------------+------+-------------------------------------------------------------------------------------------------+-------------+--------+--------+-----------+ ++------------------+---------------------+------+----------------------------------------------------------------------------------------------------+-------------+------------------+-----------------+-----------+ +| Task | Dataset | Lang | Model | Metric | Valid | Test | Downloads | ++==================+=====================+======+====================================================================================================+=============+==================+=================+===========+ +| 28 intents | `DSTC 2`_ | En | :config:`DSTC 2 emb ` | Accuracy | 0.7613 | 0.7733 | 800 Mb | ++ + + +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ +| | | | :config:`Wiki emb ` | | 0.9629 | 0.9617 | 8.5 Gb | ++ + + +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ +| | | | :config:`BERT ` | | 0.9673 | 0.9636 | 800 Mb | ++------------------+---------------------+ +----------------------------------------------------------------------------------------------------+-------------+------------------+-----------------+-----------+ +| 7 intents | `SNIPS-2017`_ [1]_ | | :config:`DSTC 2 emb ` | F1-macro | 0.8591 | -- | 800 Mb | ++ + + +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ +| | | | :config:`Wiki emb ` | | 0.9820 | -- | 8.5 Gb | ++ + + +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ +| | | | :config:`Tfidf + SelectKBest + PCA + Wiki emb ` | | 0.9673 | -- | 8.6 Gb | ++ + + +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ +| | | | :config:`Wiki emb weighted by Tfidf ` | | 0.9786 | -- | 8.5 Gb | ++------------------+---------------------+ +----------------------------------------------------------------------------------------------------+-------------+------------------+-----------------+-----------+ +| Insult detection | `Insults`_ | | :config:`Reddit emb ` | ROC-AUC | 0.9263 | 0.8556 | 6.2 Gb | ++ + + +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ +| | | | :config:`English BERT ` | | 0.9255 | 0.8612 | 1200 Mb | ++ + + +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ +| | | | :config:`English Conversational BERT ` | | 0.9389 | 0.8941 | 1200 Mb | ++ + + +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ +| | | | :config:`English BERT on PyTorch ` | | 0.9329 | 0.877 | 1.1 Gb | ++------------------+---------------------+ +----------------------------------------------------------------------------------------------------+-------------+------------------+-----------------+-----------+ +| 5 topics | `AG News`_ | | :config:`Wiki emb ` | Accuracy | 0.8922 | 0.9059 | 8.5 Gb | ++------------------+---------------------+ +----------------------------------------------------------------------------------------------------+-------------+------------------+-----------------+-----------+ +| Intent | `Yahoo-L31`_ | | :config:`Yahoo-L31 on conversational BERT ` | ROC-AUC | 0.9436 | -- | 1200 Mb | ++------------------+---------------------+ +----------------------------------------------------------------------------------------------------+-------------+------------------+-----------------+-----------+ +| Sentiment | `SST`_ | | :config:`5-classes SST on conversational BERT ` | Accuracy | 0.6456 | 0.6715 | 400 Mb | ++ + + +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ +| | | | :config:`5-classes SST on multilingual BERT ` | | 0.5738 | 0.6024 | 660 Mb | ++ + + +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ +| | | | :config:`3-classes SST SWCNN on PyTorch ` | | 0.7379 | 0.6312 | 4.3 Mb | ++ +---------------------+ +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ +| | `Yelp`_ | | :config:`5-classes Yelp on conversational BERT ` | | 0.6925 | 0.6842 | 400 Mb | ++ + + +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ +| | | | :config:`5-classes Yelp on multilingual BERT ` | | 0.5896 | 0.5874 | 660 Mb | ++------------------+---------------------+------+----------------------------------------------------------------------------------------------------+-------------+------------------+-----------------+-----------+ +| Sentiment | `Twitter mokoron`_ | Ru | :config:`RuWiki+Lenta emb w/o preprocessing ` | | 0.9965 | 0.9961 | 6.2 Gb | ++ + + +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ +| | | | :config:`RuWiki+Lenta emb with preprocessing ` | | 0.7823 | 0.7759 | 6.2 Gb | ++ +---------------------+ +----------------------------------------------------------------------------------------------------+-------------+------------------+-----------------+-----------+ +| | `RuSentiment`_ | | :config:`RuWiki+Lenta emb ` | F1-weighted | 0.6541 | 0.7016 | 6.2 Gb | ++ + + +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ +| | | | :config:`Twitter emb super-convergence ` [2]_ | | 0.7301 | 0.7576 | 3.4 Gb | ++ + + +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ +| | | | :config:`ELMo ` | | 0.7519 | 0.7875 | 700 Mb | ++ + + +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ +| | | | :config:`Multi-language BERT ` | | 0.6809 | 0.7193 | 1900 Mb | ++ + + +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ +| | | | :config:`Conversational RuBERT ` | | 0.7548 | 0.7742 | 657 Mb | ++ + + +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ +| | | | :config:`Conversational DistilRuBERT-tiny ` | | 0.72 ± 0.0016 | 0.74 ± 0.01 | 690 Mb | ++ + + +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ +| | | | :config:`Conversational DistilRuBERT-base ` | | 0.73 ± 0.003 | 0.75 ± 0.013 | 1.0 Gb | ++------------------+---------------------+ +----------------------------------------------------------------------------------------------------+-------------+------------------+-----------------+-----------+ +| Intent | Ru like`Yahoo-L31`_ | | :config:`Conversational vs Informational on ELMo ` | ROC-AUC | 0.9412 | -- | 700 Mb | ++------------------+---------------------+------+----------------------------------------------------------------------------------------------------+-------------+------------------+-----------------+-----------+ .. [1] Coucke A. et al. Snips voice platform: an embedded spoken language understanding system for private-by-design voice interfaces //arXiv preprint arXiv:1805.10190. – 2018. .. [2] Smith L. N., Topin N. Super-convergence: Very fast training of residual networks using large learning rates. – 2018. @@ -231,11 +239,11 @@ Available pre-trained models for ranking: +-------------------+----------------------------------------------------------------------------------------------------------------------+-----------+-------+-------+-------+-----------+ | `Ubuntu V2`_ | :config:`ranking_ubuntu_v2_mt_word2vec_smn ` | 68.56 | 67.91 | 81.49 | 95.63 | 1609 MB | +-------------------+----------------------------------------------------------------------------------------------------------------------+-----------+-------+-------+-------+-----------+ - | `Ubuntu V2`_ |:config:`ranking_ubuntu_v2_bert_uncased ` | 66.5 | 66.6 | -- | -- | 396 MB | + | `Ubuntu V2`_ | :config:`ranking_ubuntu_v2_bert_uncased ` | 66.5 | 66.6 | -- | -- | 396 MB | +-------------------+----------------------------------------------------------------------------------------------------------------------+-----------+-------+-------+-------+-----------+ - | `Ubuntu V2`_ |:config:`ranking_ubuntu_v2_bert_uncased on PyTorch ` | 65.73 | 65.74 | -- | -- | 1.1 Gb | + | `Ubuntu V2`_ | :config:`ranking_ubuntu_v2_bert_uncased on PyTorch ` | 65.73 | 65.74 | -- | -- | 1.1 Gb | +-------------------+----------------------------------------------------------------------------------------------------------------------+-----------+-------+-------+-------+-----------+ - | `Ubuntu V2`_ |:config:`ranking_ubuntu_v2_bert_sep ` | 66.5 | 66.5 | -- | -- | 396 MB | + | `Ubuntu V2`_ | :config:`ranking_ubuntu_v2_bert_sep ` | 66.5 | 66.5 | -- | -- | 396 MB | +-------------------+----------------------------------------------------------------------------------------------------------------------+-----------+-------+-------+-------+-----------+ | `Ubuntu V2`_ | :config:`ranking_ubuntu_v2_mt_interact ` | 59.2 | 58.7 | -- | -- | 8906 MB | +-------------------+----------------------------------------------------------------------------------------------------------------------+-----------+-------+-------+-------+-----------+ @@ -247,15 +255,19 @@ Available pre-trained models for paraphrase identification: .. table:: :widths: auto - +------------------------+-----------------------------------------------------------------------------------------------+---------------+----------------+---------+----------+---------------+----------------+----------+ - | Dataset |Model config | Val (accuracy)| Test (accuracy)| Val (F1)| Test (F1)| Val (log_loss)| Test (log_loss)|Downloads | - +========================+===============================================================================================+===============+================+=========+==========+===============+================+==========+ - |`paraphraser.ru`_ |:config:`paraphrase_ident_paraphraser_ft ` | 83.8 | 75.4 | 87.9 | 80.9 | 0.468 | 0.616 |5938M | - +------------------------+-----------------------------------------------------------------------------------------------+---------------+----------------+---------+----------+---------------+----------------+----------+ - |`paraphraser.ru`_ |:config:`paraphrase_bert_multilingual ` | 87.4 | 79.3 | 90.2 | 83.4 | -- | -- |1330M | - +------------------------+-----------------------------------------------------------------------------------------------+---------------+----------------+---------+----------+---------------+----------------+----------+ - |`paraphraser.ru`_ |:config:`paraphrase_rubert ` | 90.2 | 84.9 | 92.3 | 87.9 | -- | -- |1325M | - +------------------------+-----------------------------------------------------------------------------------------------+---------------+----------------+---------+----------+---------------+----------------+----------+ + +------------------------+------------------------------------------------------------------------------------------------------+----------------+-----------------+--------------+--------------+----------------+-----------------+-----------+ + | Dataset | Model config | Val (accuracy) | Test (accuracy) | Val (F1) | Test (F1) | Val (log_loss) | Test (log_loss) | Downloads | + +========================+======================================================================================================+================+=================+==============+==============+================+=================+===========+ + | `paraphraser.ru`_ | :config:`paraphrase_ident_paraphraser_ft ` | 83.8 | 75.4 | 87.9 | 80.9 | 0.468 | 0.616 | 5938M | + +------------------------+------------------------------------------------------------------------------------------------------+----------------+-----------------+--------------+--------------+----------------+-----------------+-----------+ + | `paraphraser.ru`_ | :config:`paraphrase_bert_multilingual ` | 87.4 | 79.3 | 90.2 | 83.4 | -- | -- | 1330M | + +------------------------+------------------------------------------------------------------------------------------------------+----------------+-----------------+--------------+--------------+----------------+-----------------+-----------+ + | `paraphraser.ru`_ | :config:`paraphrase_rubert ` | 90.2 | 84.9 | 92.3 | 87.9 | -- | -- | 1325M | + +------------------------+------------------------------------------------------------------------------------------------------+----------------+-----------------+--------------+--------------+----------------+-----------------+-----------+ + | `paraphraser.ru`_ | :config:`paraphraser_convers_distilrubert_2L ` | 79.4 ± 0.01 | 67.5 ± 0.006 | 84.4 ± 0.04 | 76.2 ± 0.006 | -- | -- | 618M | + +------------------------+------------------------------------------------------------------------------------------------------+----------------+-----------------+--------------+--------------+----------------+-----------------+-----------+ + | `paraphraser.ru`_ | :config:`paraphraser_convers_distilrubert_6L ` | 87.1 ± 0.01 | 78.0 ± 0.01 | 90.0 ± 0.08 | 82.9 ± 0.003 | -- | -- | 930M | + +------------------------+------------------------------------------------------------------------------------------------------+----------------+-----------------+--------------+--------------+----------------+-----------------+-----------+ .. _`paraphraser.ru`: https://paraphraser.ru/ @@ -319,25 +331,29 @@ BERT-based model is described in `BERT: Pre-training of Deep Bidirectional Tran R-Net model is based on `R-NET: Machine Reading Comprehension with Self-matching Networks `__. -+---------------+------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ -| Dataset | Model config | lang | EM (dev) | F-1 (dev) | Downloads | -+===============+========================================================================+=======+================+=================+=================+ -| `SQuAD-v1.1`_ | :config:`DeepPavlov BERT ` | en | 80.88 | 88.49 | 806Mb | -+---------------+------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ -| `SQuAD-v1.1`_ | :config:`DeepPavlov BERT on PyTorch ` | en | 80.79 | 88.30 | 1.1 Gb | -+---------------+------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ -| `SQuAD-v1.1`_ | :config:`DeepPavlov R-Net ` | en | 71.49 | 80.34 | ~2.5Gb | -+---------------+------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ -| SDSJ Task B | :config:`DeepPavlov RuBERT ` | ru | 66.30+-0.24 | 84.60+-0.11 | 1325Mb | -+---------------+------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ -| SDSJ Task B | :config:`DeepPavlov multilingual BERT `| ru | 64.35+-0.39 | 83.39+-0.08 | 1323Mb | -+---------------+------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ -| SDSJ Task B | :config:`DeepPavlov R-Net ` | ru | 60.62 | 80.04 | ~5Gb | -+---------------+------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ -| `DRCD`_ | :config:`DeepPavlov multilingual BERT ` | ch | 84.86 | 89.03 | 630Mb | -+---------------+------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ -| `DRCD`_ | :config:`DeepPavlov Chinese BERT ` | ch | 84.19 | 89.23 | 362Mb | -+---------------+------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ ++----------------+---------------------------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ +| Dataset | Model config | lang | EM (dev) | F-1 (dev) | Downloads | ++================+=============================================================================================+=======+================+=================+=================+ +| `SQuAD-v1.1`_ | :config:`DeepPavlov BERT ` | en | 80.88 | 88.49 | 806Mb | ++----------------+---------------------------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ +| `SQuAD-v1.1`_ | :config:`DeepPavlov BERT on PyTorch ` | en | 80.79 | 88.30 | 1.1 Gb | ++----------------+---------------------------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ +| `SQuAD-v1.1`_ | :config:`DeepPavlov R-Net ` | en | 71.49 | 80.34 | ~2.5Gb | ++----------------+---------------------------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ +| `SDSJ Task B`_ | :config:`DeepPavlov RuBERT ` | ru | 66.30 ± 0.24 | 84.60 ± 0.11 | 1325Mb | ++----------------+---------------------------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ +| `SDSJ Task B`_ | :config:`DeepPavlov multilingual BERT ` | ru | 64.35 ± 0.39 | 83.39 ± 0.08 | 1323Mb | ++----------------+---------------------------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ +| `SDSJ Task B`_ | :config:`DeepPavlov R-Net ` | ru | 60.62 | 80.04 | ~5Gb | ++----------------+---------------------------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ +| `SDSJ Task B`_ | :config:`DeepPavlov DistilRuBERT-tiny ` | ru | 48.3 ± 0.41 | 68.9 ± 0.39 | 867Mb | ++----------------+---------------------------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ +| `SDSJ Task B`_ | :config:`DeepPavlov DistilRuBERT-base ` | ru | 61.77 ± 0.25 | 80.39 ± 0.21 | 1.18Gb | ++----------------+---------------------------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ +| `DRCD`_ | :config:`DeepPavlov multilingual BERT ` | ch | 84.86 | 89.03 | 630Mb | ++----------------+---------------------------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ +| `DRCD`_ | :config:`DeepPavlov Chinese BERT ` | ch | 84.19 | 89.23 | 362Mb | ++----------------+---------------------------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ In the case when answer is not necessary present in given context we have :config:`squad_noans ` model. This model outputs empty string in case if there is no answer in context. @@ -361,31 +377,31 @@ For more scores see :doc:`full table `. +----------------------+--------------------------------------------------------------------------------------------------------------+---------------+----------------+--------------------+ | Dataset | Model | Word accuracy | Sent. accuracy | Download size (MB) | +======================+==============================================================================================================+===============+================+====================+ - |`UD2.3`_ (Russian) |`UD Pipe 2.3`_ (Straka et al., 2017) | 93.5 | | | + | `UD2.3`_ (Russian) | `UD Pipe 2.3`_ (Straka et al., 2017) | 93.5 | | | | +--------------------------------------------------------------------------------------------------------------+---------------+----------------+--------------------+ | | `UD Pipe Future`_ (Straka et al., 2018) | 96.90 | | | | +--------------------------------------------------------------------------------------------------------------+---------------+----------------+--------------------+ - | |:config:`BERT-based model ` | 97.83 | 72.02 | 661 | + | | :config:`BERT-based model ` | 97.83 | 72.02 | 661 | +----------------------+--------------------------------------------------------------------------------------------------------------+---------------+----------------+--------------------+ - | |`Pymorphy`_ + `russian_tagsets`_ (first tag) | 60.93 | 0.00 | | + | | `Pymorphy`_ + `russian_tagsets`_ (first tag) | 60.93 | 0.00 | | + +--------------------------------------------------------------------------------------------------------------+---------------+----------------+--------------------+ - |`UD2.0`_ (Russian) |`UD Pipe 1.2`_ (Straka et al., 2017) | 93.57 | 43.04 | | + | `UD2.0`_ (Russian) | `UD Pipe 1.2`_ (Straka et al., 2017) | 93.57 | 43.04 | | + +--------------------------------------------------------------------------------------------------------------+---------------+----------------+--------------------+ - | |:config:`Basic model ` | 95.17 | 50.58 | 48.7 | + | | :config:`Basic model ` | 95.17 | 50.58 | 48.7 | + +--------------------------------------------------------------------------------------------------------------+---------------+----------------+--------------------+ - | |:config:`Pymorphy-enhanced model ` | **96.23** | 58.00 | 48.7 | + | | :config:`Pymorphy-enhanced model ` | **96.23** | 58.00 | 48.7 | +----------------------+--------------------------------------------------------------------------------------------------------------+---------------+----------------+--------------------+ - | `UD2.0`_ (Czech) |`UD Pipe 1.2`_ (Straka et al., 2017) | 91.86 | 42.28 | | + | `UD2.0`_ (Czech) | `UD Pipe 1.2`_ (Straka et al., 2017) | 91.86 | 42.28 | | | +--------------------------------------------------------------------------------------------------------------+---------------+----------------+--------------------+ - | |:config:`Basic model ` | **94.35** | 51.56 | 41.8 | + | | :config:`Basic model ` | **94.35** | 51.56 | 41.8 | +----------------------+--------------------------------------------------------------------------------------------------------------+---------------+----------------+--------------------+ - |`UD2.0`_ (English) |`UD Pipe 1.2`_ (Straka et al., 2017) | 92.89 | 55.75 | | + | `UD2.0`_ (English) | `UD Pipe 1.2`_ (Straka et al., 2017) | 92.89 | 55.75 | | | +--------------------------------------------------------------------------------------------------------------+---------------+----------------+--------------------+ - | |:config:`Basic model ` | **93.00** | 55.18 | 16.9 | + | | :config:`Basic model ` | **93.00** | 55.18 | 16.9 | +----------------------+--------------------------------------------------------------------------------------------------------------+---------------+----------------+--------------------+ - |`UD2.0`_ (German) |`UD Pipe 1.2`_ (Straka et al., 2017) | 76.65 | 10.24 | | + | `UD2.0`_ (German) | `UD Pipe 1.2`_ (Straka et al., 2017) | 76.65 | 10.24 | | | +--------------------------------------------------------------------------------------------------------------+---------------+----------------+--------------------+ - | |:config:`Basic model ` | **83.83** | 15.25 | 18.6 | + | | :config:`Basic model ` | **83.83** | 15.25 | 18.6 | +----------------------+--------------------------------------------------------------------------------------------------------------+---------------+----------------+--------------------+ .. _`Pymorphy`: https://pymorphy2.readthedocs.io/en/latest/ @@ -415,7 +431,7 @@ on ``ru_syntagrus`` Russian corpus (version UD 2.3). | +-------------------------------------------------------------------------------------------+---------+----------+ | | `UDify (multilingual BERT)`_ (Kondratyuk, 2018) | 94.8 | 93.1 | | +-------------------------------------------------------------------------------------------+---------+----------+ - | |:config:`our BERT model ` | 95.2 | 93.7 | + | | :config:`our BERT model ` | 95.2 | 93.7 | +-------------------------+-------------------------------------------------------------------------------------------+---------+----------+ .. _`UD2.3`: http://hdl.handle.net/11234/1-2895 @@ -473,13 +489,13 @@ based on its Wikipedia knowledge. +----------------+--------------------------------------------------------------------+-----------------------+--------+-----------+ | Dataset | Model config | Wiki dump | F1 | Downloads | +================+====================================================================+=======================+========+===========+ -| `SQuAD-v1.1`_ |:config:`ODQA ` | enwiki (2018-02-11) | 35.89 | 9.7Gb | +| `SQuAD-v1.1`_ | :config:`ODQA ` | enwiki (2018-02-11) | 35.89 | 9.7Gb | +----------------+--------------------------------------------------------------------+-----------------------+--------+-----------+ -| `SQuAD-v1.1`_ |:config:`ODQA ` | enwiki (2016-12-21) | 37.83 | 9.3Gb | +| `SQuAD-v1.1`_ | :config:`ODQA ` | enwiki (2016-12-21) | 37.83 | 9.3Gb | +----------------+--------------------------------------------------------------------+-----------------------+--------+-----------+ -| `SDSJ Task B`_ |:config:`ODQA ` | ruwiki (2018-04-01) | 28.56 | 7.7Gb | +| `SDSJ Task B`_ | :config:`ODQA ` | ruwiki (2018-04-01) | 28.56 | 7.7Gb | +----------------+--------------------------------------------------------------------+-----------------------+--------+-----------+ -| `SDSJ Task B`_ |:config:`ODQA with RuBERT ` | ruwiki (2018-04-01) | 37.83 | 4.3Gb | +| `SDSJ Task B`_ | :config:`ODQA with RuBERT ` | ruwiki (2018-04-01) | 37.83 | 4.3Gb | +----------------+--------------------------------------------------------------------+-----------------------+--------+-----------+ @@ -554,5 +570,5 @@ goal-oriented bot and a slot-filling model with Telegram UI. .. _`SQuAD-v1.1`: https://arxiv.org/abs/1606.05250 -.. _`SDSJ Task B`: https://sdsj.sberbank.ai/2017/ru/contest.html +.. _`SDSJ Task B`: https://arxiv.org/abs/1912.09723 .. _`DRCD`: https://arxiv.org/abs/1806.00920 diff --git a/tests/test_quick_start.py b/tests/test_quick_start.py index 9c7b642364..74ae65dedc 100644 --- a/tests/test_quick_start.py +++ b/tests/test_quick_start.py @@ -144,6 +144,18 @@ ("classifiers/intents_sample_csv.json", "classifiers", ('TI',)): [ONE_ARGUMENT_INFER_CHECK], ("classifiers/intents_sample_json.json", "classifiers", ('TI',)): [ONE_ARGUMENT_INFER_CHECK] }, + "distil": { + ("classifiers/paraphraser_convers_distilrubert_2L.json", "distil", ('IP')): [TWO_ARGUMENTS_INFER_CHECK], + ("classifiers/paraphraser_convers_distilrubert_6L.json", "distil", ('IP')): [TWO_ARGUMENTS_INFER_CHECK], + ("classifiers/rusentiment_convers_distilrubert_2L.json", "distil", ('IP')): [ONE_ARGUMENT_INFER_CHECK], + ("classifiers/rusentiment_convers_distilrubert_6L.json", "distil", ('IP')): [ONE_ARGUMENT_INFER_CHECK], + ("ner/ner_rus_convers_distilrubert_2L.json", "distil", ('IP')): [ONE_ARGUMENT_INFER_CHECK], + ("ner/ner_rus_convers_distilrubert_6L.json", "distil", ('IP')): [ONE_ARGUMENT_INFER_CHECK], + ("squad/squad_ru_convers_distilrubert_2L.json", "distil", ('IP')): [TWO_ARGUMENTS_INFER_CHECK], + ("squad/squad_ru_convers_distilrubert_2L_infer.json", "distil", ('IP')): [TWO_ARGUMENTS_INFER_CHECK], + ("squad/squad_ru_convers_distilrubert_6L.json", "distil", ('IP')): [TWO_ARGUMENTS_INFER_CHECK], + ("squad/squad_ru_convers_distilrubert_6L_infer.json", "distil", ('IP')): [TWO_ARGUMENTS_INFER_CHECK], + }, "entity_linking": { ("kbqa/entity_linking_rus.json", "entity_linking", ('IP',)): [