From 7b24c997805c8a22223291005b56371823b07f6d Mon Sep 17 00:00:00 2001 From: "C. Regouby" Date: Wed, 4 Sep 2024 19:55:45 +0200 Subject: [PATCH 1/5] add FR message translation --- inst/po/fr/LC_MESSAGES/R-tok.mo | Bin 0 -> 1095 bytes po/R-fr.po | 32 ++++++++++++++++++++++++++++++++ po/R-tok.pot | 31 +++++++++++++++++++++++++++++++ 3 files changed, 63 insertions(+) create mode 100644 inst/po/fr/LC_MESSAGES/R-tok.mo create mode 100644 po/R-fr.po create mode 100644 po/R-tok.pot diff --git a/inst/po/fr/LC_MESSAGES/R-tok.mo b/inst/po/fr/LC_MESSAGES/R-tok.mo new file mode 100644 index 0000000000000000000000000000000000000000..e4ea419f2afe1da1cfd5f20214311d7abc71b75b GIT binary patch literal 1095 zcmb_b%We}f6g5y-1jGWiEbcBKI>WTdq6tC*m4G6(C8|ON8!Bh!CNWIx!S;kc=tr>N z9~##11(Em&7JLHVz;Tn(Ms&dfYmUy0eed!0^_icymOcxNYrq{~1Go-+1j0B55?}(< zfFHmU;N~SE9)mmJI`|X#7x+7P2mI}_5U;^Et_ZOS?t|Zezk-9#?Ij@)`e$67zhPP+ zrz)e^_U!c@J7)v`KZ>7>~k7!gnkL4j-<*9GYXuda+cTa<2 z(Xr+&3Xj@Zv;#dHrWxcZiC5x!+(?q7(s>bW^H_zo=&AJFq}61#9wm(^snbfMS!>)+ zRuQ)$osYU!YL`oIY?Cmf2c;Zv)MYt3_uWK2mCg2M>uk@JIH{}~?O8)(zrZG*_xyt* z$7vo?I+PZ-_vmH!S+sT*7xwD2jZWL#q_v*%l~>VoMhj*|bPQ)`Kn%mS;5|!z! z431qiR!r?wQ7&h`V`5zTEaoDQwR3V+IY2+MYN>J7=_Ggzzecd!`;ud7Fy|oK9dswjp*_*rEY{15E2wfdBvi literal 0 HcmV?d00001 diff --git a/po/R-fr.po b/po/R-fr.po new file mode 100644 index 0000000..a3937d0 --- /dev/null +++ b/po/R-fr.po @@ -0,0 +1,32 @@ +msgid "" +msgstr "" +"Project-Id-Version: tok 0.1.4.9000\n" +"POT-Creation-Date: 2024-09-04 19:29+0200\n" +"PO-Revision-Date: 2024-09-04 19:39+0200\n" +"Last-Translator: \n" +"Language-Team: \n" +"Language: fr\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" +"X-Generator: Poedit 3.4.3\n" + +#: encoding.R:29 +msgid "Expected class {.cls REncoding} but got {.cls {class(encoding)}}." +msgstr "Classe attendue {.cls REncoding}. Ici la classe est {.cls {class(encoding)}}." + +#: tokenizer.R:86 tokenizer.R:97 +msgid "This is a static method. Not available for tokenizers instances." +msgstr "Il s'agit d'une méthode statique. Elle n'est pas disponible pour les instances de tokenizers." + +#: tokenizer.R:107 +msgid "{.arg trainer} must inherit from {.cls tok_trainer}." +msgstr "{.arg trainer} doit hériter de {.cls tok_trainer}." + +#: tokenizer.R:230 +msgid "Can't be set this way, use {.fn enable_padding}." +msgstr "Ne peut pas être défini de cette manière, vous devez utilisez {.fn enable_padding}." + +#: tokenizer.R:238 +msgid "Can't be set this way, use {.fn enable_truncation}." +msgstr "Ne peut pas être défini de cette manière, vous devez utilisez {.fn enable_truncation}." diff --git a/po/R-tok.pot b/po/R-tok.pot new file mode 100644 index 0000000..709725e --- /dev/null +++ b/po/R-tok.pot @@ -0,0 +1,31 @@ +msgid "" +msgstr "" +"Project-Id-Version: tok 0.1.4.9000\n" +"POT-Creation-Date: 2024-09-04 19:29+0200\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"Language: \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" + +#: encoding.R:29 +msgid "Expected class {.cls REncoding} but got {.cls {class(encoding)}}." +msgstr "" + +#: tokenizer.R:86 tokenizer.R:97 +msgid "This is a static method. Not available for tokenizers instances." +msgstr "" + +#: tokenizer.R:107 +msgid "{.arg trainer} must inherit from {.cls tok_trainer}." +msgstr "" + +#: tokenizer.R:230 +msgid "Can't be set this way, use {.fn enable_padding}." +msgstr "" + +#: tokenizer.R:238 +msgid "Can't be set this way, use {.fn enable_truncation}." +msgstr "" From 3619f9d120cfadf7aa474f566d67f81fc8ec85d9 Mon Sep 17 00:00:00 2001 From: "C. Regouby" Date: Thu, 5 Sep 2024 22:48:15 +0200 Subject: [PATCH 2/5] wrap cli_abort for translation --- R/encoding.R | 2 +- R/tokenizer.R | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/R/encoding.R b/R/encoding.R index 48e9269..c808af7 100644 --- a/R/encoding.R +++ b/R/encoding.R @@ -26,7 +26,7 @@ encoding <- R6::R6Class( if (inherits(encoding, "REncoding")) { self$.encoding <- encoding } else { - cli::cli_abort("Expected class {.cls REncoding} but got {.cls {class(encoding)}}.") + cli::cli_abort(gettext("Expected class {.cls REncoding} but got {.cls {class(encoding)}}.")) } } ), diff --git a/R/tokenizer.R b/R/tokenizer.R index c1b4675..46d5624 100644 --- a/R/tokenizer.R +++ b/R/tokenizer.R @@ -83,7 +83,7 @@ tokenizer <- R6::R6Class( #' the tokenizer. #' @param path Path to tokenizer.json file from_file = function(path) { - cli::cli_abort("This is a static method. Not available for tokenizers instances.") + cli::cli_abort(gettext("This is a static method. Not available for tokenizers instances.")) }, #' @description @@ -94,7 +94,7 @@ tokenizer <- R6::R6Class( #' @param auth_token An optional auth token used to access private repositories #' on the Hugging Face Hub from_pretrained = function(identifier, revision = "main", auth_token = NULL) { - cli::cli_abort("This is a static method. Not available for tokenizers instances.") + cli::cli_abort(gettext("This is a static method. Not available for tokenizers instances.")) }, #' @description @@ -104,7 +104,7 @@ tokenizer <- R6::R6Class( #' @param files character vector of file paths. train = function(files, trainer) { if (!inherits(trainer, "tok_trainer")) - cli::cli_abort("{.arg trainer} must inherit from {.cls tok_trainer}.") + cli::cli_abort(gettext("{.arg trainer} must inherit from {.cls tok_trainer}.")) self$.tokenizer$train_from_files(trainer$.trainer, normalizePath(files)) }, @@ -227,7 +227,7 @@ tokenizer <- R6::R6Class( #' @field padding Gets padding configuration padding = function(x) { if (!missing(x)) { - cli::cli_abort("Can't be set this way, use {.fn enable_padding}.") + cli::cli_abort(gettext("Can't be set this way, use {.fn enable_padding}.")) } self$.tokenizer$get_padding() @@ -235,7 +235,7 @@ tokenizer <- R6::R6Class( #' @field truncation Gets truncation configuration truncation = function(x) { if (!missing(x)) { - cli::cli_abort("Can't be set this way, use {.fn enable_truncation}.") + cli::cli_abort(gettext("Can't be set this way, use {.fn enable_truncation}.")) } self$.tokenizer$get_truncation() From bf390c02eec65514a86720cb5433fc20372f67ea Mon Sep 17 00:00:00 2001 From: "C. Regouby" Date: Thu, 5 Sep 2024 22:51:01 +0200 Subject: [PATCH 3/5] add test --- tests/testthat/test-message-translations.R | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 tests/testthat/test-message-translations.R diff --git a/tests/testthat/test-message-translations.R b/tests/testthat/test-message-translations.R new file mode 100644 index 0000000..a39ceb4 --- /dev/null +++ b/tests/testthat/test-message-translations.R @@ -0,0 +1,16 @@ +test_that("R-level cli_abort messages are correctly translated in FR", { + withr::with_envvar(c(HUGGINGFACE_HUB_CACHE = tempdir()), { + try({ + tok <- tokenizer$from_pretrained("gpt2") + temp_json <- tempfile(fileext = ".json") + withr::with_language(lang = "fr", + expect_error( + tok$train(temp_json, temp_json), + regexp = "doit hériter de", + fixed = TRUE + ) + ) + }) + }) + +}) \ No newline at end of file From d00b86e51afcdbd250dade96ab49db893c78b9bf Mon Sep 17 00:00:00 2001 From: "C. Regouby" Date: Thu, 5 Sep 2024 22:56:26 +0200 Subject: [PATCH 4/5] add NEWS --- DESCRIPTION | 3 ++- NEWS.md | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index bc93dbb..82f5795 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -3,6 +3,7 @@ Title: Fast Text Tokenization Version: 0.1.4.9000 Authors@R: c( person("Daniel", "Falbel", , "daniel@posit.co", c("aut", "cre")), + person("Regouby", "christophe", , "christophe.regouby@free.fr", c("ctb")), person(family = "Posit", role = c("cph")) ) Description: @@ -14,7 +15,7 @@ License: MIT + file LICENSE SystemRequirements: Rust tool chain w/ cargo, libclang/llvm-config Encoding: UTF-8 Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 Depends: R (>= 4.2.0) Imports: diff --git a/NEWS.md b/NEWS.md index 0f5100c..7488676 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,7 @@ # tok (development version) +- add message translation in FR (#19, @cregouby) + # tok 0.1.4 - Updated libR-sys to fix mac oldrel notes. (#18) From b922e9c3e4f8c33c19919c773b4222f1cfd4ddef Mon Sep 17 00:00:00 2001 From: "C. Regouby" Date: Thu, 5 Sep 2024 23:17:44 +0200 Subject: [PATCH 5/5] fix typos --- DESCRIPTION | 2 +- tests/testthat/test-message-translations.R | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 82f5795..062821b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -3,7 +3,7 @@ Title: Fast Text Tokenization Version: 0.1.4.9000 Authors@R: c( person("Daniel", "Falbel", , "daniel@posit.co", c("aut", "cre")), - person("Regouby", "christophe", , "christophe.regouby@free.fr", c("ctb")), + person("Regouby", "Christophe", , "christophe.regouby@free.fr", c("ctb")), person(family = "Posit", role = c("cph")) ) Description: diff --git a/tests/testthat/test-message-translations.R b/tests/testthat/test-message-translations.R index a39ceb4..8631cb7 100644 --- a/tests/testthat/test-message-translations.R +++ b/tests/testthat/test-message-translations.R @@ -6,10 +6,9 @@ test_that("R-level cli_abort messages are correctly translated in FR", { withr::with_language(lang = "fr", expect_error( tok$train(temp_json, temp_json), - regexp = "doit hériter de", - fixed = TRUE - ) - ) + regexp = "doit hériter de", + fixed = TRUE + )) }) })