Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add FR translation for R-messages #19

Merged
merged 5 commits into from
Sep 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ Title: Fast Text Tokenization
Version: 0.1.4.9000
Authors@R: c(
person("Daniel", "Falbel", , "[email protected]", c("aut", "cre")),
person("Regouby", "Christophe", , "[email protected]", c("ctb")),
person(family = "Posit", role = c("cph"))
)
Description:
Expand All @@ -14,7 +15,7 @@ License: MIT + file LICENSE
SystemRequirements: Rust tool chain w/ cargo, libclang/llvm-config
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.1
RoxygenNote: 7.3.2
Depends:
R (>= 4.2.0)
Imports:
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# tok (development version)

- add message translation in FR (#19, @cregouby)

# tok 0.1.4

- Updated libR-sys to fix mac oldrel notes. (#18)
Expand Down
2 changes: 1 addition & 1 deletion R/encoding.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ encoding <- R6::R6Class(
if (inherits(encoding, "REncoding")) {
self$.encoding <- encoding
} else {
cli::cli_abort("Expected class {.cls REncoding} but got {.cls {class(encoding)}}.")
cli::cli_abort(gettext("Expected class {.cls REncoding} but got {.cls {class(encoding)}}."))
}
}
),
Expand Down
10 changes: 5 additions & 5 deletions R/tokenizer.R
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ tokenizer <- R6::R6Class(
#' the tokenizer.
#' @param path Path to tokenizer.json file
from_file = function(path) {
cli::cli_abort("This is a static method. Not available for tokenizers instances.")
cli::cli_abort(gettext("This is a static method. Not available for tokenizers instances."))
},

#' @description
Expand All @@ -94,7 +94,7 @@ tokenizer <- R6::R6Class(
#' @param auth_token An optional auth token used to access private repositories
#' on the Hugging Face Hub
from_pretrained = function(identifier, revision = "main", auth_token = NULL) {
cli::cli_abort("This is a static method. Not available for tokenizers instances.")
cli::cli_abort(gettext("This is a static method. Not available for tokenizers instances."))
},

#' @description
Expand All @@ -104,7 +104,7 @@ tokenizer <- R6::R6Class(
#' @param files character vector of file paths.
train = function(files, trainer) {
if (!inherits(trainer, "tok_trainer"))
cli::cli_abort("{.arg trainer} must inherit from {.cls tok_trainer}.")
cli::cli_abort(gettext("{.arg trainer} must inherit from {.cls tok_trainer}."))

self$.tokenizer$train_from_files(trainer$.trainer, normalizePath(files))
},
Expand Down Expand Up @@ -227,15 +227,15 @@ tokenizer <- R6::R6Class(
#' @field padding Gets padding configuration
padding = function(x) {
if (!missing(x)) {
cli::cli_abort("Can't be set this way, use {.fn enable_padding}.")
cli::cli_abort(gettext("Can't be set this way, use {.fn enable_padding}."))
}

self$.tokenizer$get_padding()
},
#' @field truncation Gets truncation configuration
truncation = function(x) {
if (!missing(x)) {
cli::cli_abort("Can't be set this way, use {.fn enable_truncation}.")
cli::cli_abort(gettext("Can't be set this way, use {.fn enable_truncation}."))
}

self$.tokenizer$get_truncation()
Expand Down
Binary file added inst/po/fr/LC_MESSAGES/R-tok.mo
Binary file not shown.
32 changes: 32 additions & 0 deletions po/R-fr.po
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
msgid ""
msgstr ""
"Project-Id-Version: tok 0.1.4.9000\n"
"POT-Creation-Date: 2024-09-04 19:29+0200\n"
"PO-Revision-Date: 2024-09-04 19:39+0200\n"
"Last-Translator: \n"
"Language-Team: \n"
"Language: fr\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"X-Generator: Poedit 3.4.3\n"

#: encoding.R:29
msgid "Expected class {.cls REncoding} but got {.cls {class(encoding)}}."
msgstr "Classe attendue {.cls REncoding}. Ici la classe est {.cls {class(encoding)}}."

#: tokenizer.R:86 tokenizer.R:97
msgid "This is a static method. Not available for tokenizers instances."
msgstr "Il s'agit d'une méthode statique. Elle n'est pas disponible pour les instances de tokenizers."

#: tokenizer.R:107
msgid "{.arg trainer} must inherit from {.cls tok_trainer}."
msgstr "{.arg trainer} doit hériter de {.cls tok_trainer}."

#: tokenizer.R:230
msgid "Can't be set this way, use {.fn enable_padding}."
msgstr "Ne peut pas être défini de cette manière, vous devez utilisez {.fn enable_padding}."

#: tokenizer.R:238
msgid "Can't be set this way, use {.fn enable_truncation}."
msgstr "Ne peut pas être défini de cette manière, vous devez utilisez {.fn enable_truncation}."
31 changes: 31 additions & 0 deletions po/R-tok.pot
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
msgid ""
msgstr ""
"Project-Id-Version: tok 0.1.4.9000\n"
"POT-Creation-Date: 2024-09-04 19:29+0200\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <[email protected]>\n"
"Language: \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"

#: encoding.R:29
msgid "Expected class {.cls REncoding} but got {.cls {class(encoding)}}."
msgstr ""

#: tokenizer.R:86 tokenizer.R:97
msgid "This is a static method. Not available for tokenizers instances."
msgstr ""

#: tokenizer.R:107
msgid "{.arg trainer} must inherit from {.cls tok_trainer}."
msgstr ""

#: tokenizer.R:230
msgid "Can't be set this way, use {.fn enable_padding}."
msgstr ""

#: tokenizer.R:238
msgid "Can't be set this way, use {.fn enable_truncation}."
msgstr ""
15 changes: 15 additions & 0 deletions tests/testthat/test-message-translations.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
test_that("R-level cli_abort messages are correctly translated in FR", {
withr::with_envvar(c(HUGGINGFACE_HUB_CACHE = tempdir()), {
try({
tok <- tokenizer$from_pretrained("gpt2")
temp_json <- tempfile(fileext = ".json")
withr::with_language(lang = "fr",
expect_error(
tok$train(temp_json, temp_json),
regexp = "doit hériter de",
fixed = TRUE
))
})
})

})
Loading