Merge branch 'main' into main

guillaume-be · Aug 18, 2024 · fa6b0cc · fa6b0cc
2 parents f8b7bea + 62b40d0
commit fa6b0cc
Show file tree

Hide file tree

Showing 45 changed files with 422 additions and 191 deletions.
diff --git a/.github/workflows/continuous-integration.yml b/.github/workflows/continuous-integration.yml
@@ -174,7 +174,7 @@ jobs:
         with:
           python-version: '3.10'
       - run: |
-          pip install -r requirements.txt --progress-bar off
+          pip install -r ./utils/requirements.txt --progress-bar off
           python ./utils/download-dependencies_distilbert.py
 
   fmt:

diff --git a/.gitignore b/.gitignore
@@ -17,4 +17,6 @@ Cargo.lock
 
 /target
 #**/*.rs.bk
-/resources/
+/models/
+/.venv/
+convert_model.log
diff --git a/Cargo.toml b/Cargo.toml
@@ -76,10 +76,10 @@ features = ["doc-only"]
 
 [dependencies]
 rust_tokenizers = "8.1.1"
-tch = "0.15.0"
+tch = { version = "0.16.0", features = ["download-libtorch"] }
 serde_json = "1"
 serde = { version = "1", features = ["derive"] }
-ordered-float = "3"
+ordered-float = "4.2.0"
 uuid = { version = "1", features = ["v4"] }
 thiserror = "1"
 half = "2"
@@ -88,19 +88,26 @@ regex = "1.6"
 cached-path = { version = "0.6", default-features = false, optional = true }
 dirs = { version = "5", optional = true }
 lazy_static = { version = "1", optional = true }
-ort = {version="~1.15.2", optional = true, default-features = false, features = ["half"]}
-ndarray = {version="0.15", optional = true}
-tokenizers = {version="0.15", optional=true, default-features = false, features = ["onig"]}
+ort = { version = "1.16.3", optional = true, default-features = false, features = [
+    "half",
+] }
+ndarray = { version = "0.15", optional = true }
+tokenizers = { version = "0.19.1", optional = true, default-features = false, features = [
+    "onig",
+] }
 
 [dev-dependencies]
 anyhow = "1"
 csv = "1"
 criterion = "0.5"
 tokio = { version = "1.35", features = ["sync", "rt-multi-thread", "macros"] }
 tempfile = "3"
-itertools = "0.12"
-tracing-subscriber = { version = "0.3", default-features = false, features = [ "env-filter", "fmt" ] }
-ort = {version="~1.15.5", features = ["load-dynamic"]}
+itertools = "0.13.0"
+tracing-subscriber = { version = "0.3", default-features = false, features = [
+    "env-filter",
+    "fmt",
+] }
+ort = { version = "1.16.3", features = ["load-dynamic"] }
 
 [[example]]
 name = "onnx-masked-lm"

diff --git a/README.md b/README.md
diff --git a/requirements.txt b/requirements.txt
diff --git a/src/lib.rs b/src/lib.rs
@@ -91,7 +91,7 @@
 //! ### Manual installation (recommended)
 //!
 //! 1. Download `libtorch` from <https://pytorch.org/get-started/locally/>. This package requires `v2.2`: if this version is no longer available on the "get started" page,
-//! the file should be accessible by modifying the target link, for example `https://download.pytorch.org/libtorch/cu121/libtorch-cxx11-abi-shared-with-deps-2.2.0%2Bcu121.zip` for a Linux version with CUDA12.
+//!     the file should be accessible by modifying the target link, for example `https://download.pytorch.org/libtorch/cu121/libtorch-cxx11-abi-shared-with-deps-2.2.0%2Bcu121.zip` for a Linux version with CUDA12.
 //! 2. Extract the library to a location of your choice
 //! 3. Set the following environment variables
 //! ##### Linux:

diff --git a/src/models/albert/mod.rs b/src/models/albert/mod.rs
@@ -16,6 +16,7 @@
 //! - Configuration file expected to have a structure following the [Transformers library](https://github.com/huggingface/transformers)
 //! - Model weights are expected to have a structure and parameter names following the [Transformers library](https://github.com/huggingface/transformers). A conversion using the Python utility scripts is required to convert the `.bin` weights to the `.ot` format.
 //! - `BertTokenizer` using a `vocab.txt` vocabulary
+//!
 //! Pretrained models are available and can be downloaded using RemoteResources.
 //!
 //! ```no_run

diff --git a/src/models/bart/bart_model.rs b/src/models/bart/bart_model.rs
@@ -369,7 +369,7 @@ fn _shift_tokens_right(input_ids: &Tensor, pad_token_id: i64) -> Tensor {
 /// It is made of the following blocks:
 /// - `encoder`: `BartEncoder` (transformer) made of a vector of encoding layers
 /// - `decoder`: `BartDecoder` (transformer)  made of a vector of decoding layers with self attention and encoder cross-attention.
-/// caching is implemented for the decoder to avoid recalculating static states (encoder key/values and previously calculated decoder key/values)
+///     caching is implemented for the decoder to avoid recalculating static states (encoder key/values and previously calculated decoder key/values)
 /// - `pad_token_id`: padding token id
 pub struct BartModel {
     pub(crate) encoder: BartEncoder,
@@ -437,7 +437,7 @@ impl BartModel {
     /// * `attention_mask` - Optional attention mask of shape (*batch size*, *source_sequence_length*) for the encoder positions. Positions with a mask with value 0 will be masked.
     /// * `decoder_input_ids` - Optional input tensor of shape (*batch size*, *target_sequence_length*). Must be provided when running in generation mode (e.g. initialized with a BOS token)
     /// * `encoder_outputs` - Optional tuple made of a tensor of shape (*batch size*, *source_sequence_length*, *encoder_hidden_dim*) and optional vectors of tensors of length *num_encoder_layers* with shape (*batch size*, *source_sequence_length*, *hidden_size*).
-    /// These correspond to the encoder last hidden state and optional hidden states/attention weights for encoder layers. When provided, the encoder hidden state will not be recalculated. Useful for generation tasks.
+    ///     These correspond to the encoder last hidden state and optional hidden states/attention weights for encoder layers. When provided, the encoder hidden state will not be recalculated. Useful for generation tasks.
     /// * `decoder_attention_mask` - Optional attention mask of shape (*batch size*, *target_sequence_length*) for the decoder positions. Positions with a mask with value 0 will be masked.
     /// * `train` - boolean flag to turn on/off the dropout layers in the model. Should be set to false for inference.
     ///
@@ -597,7 +597,7 @@ impl BartForConditionalGeneration {
     /// * `input_ids` - Optional input tensor of shape (*batch size*, *source_sequence_length*). Must be provided when not running in generation mode
     /// * `attention_mask` - Optional attention mask of shape (*batch size*, *source_sequence_length*) for the encoder positions. Positions with a mask with value 0 will be masked.
     /// * `encoder_outputs` - Optional tuple made of a tensor of shape (*batch size*, *source_sequence_length*, *encoder_hidden_dim*) and optional vectors of tensors of length *num_encoder_layers* with shape (*batch size*, *source_sequence_length*, *hidden_size*).
-    /// These correspond to the encoder last hidden state and optional hidden states/attention weights for encoder layers. When provided, the encoder hidden state will not be recalculated. Useful for generation tasks.
+    ///     These correspond to the encoder last hidden state and optional hidden states/attention weights for encoder layers. When provided, the encoder hidden state will not be recalculated. Useful for generation tasks.
     /// * `decoder_input_ids` - Optional input tensor of shape (*batch size*, *target_sequence_length*). Must be provided when running in generation mode (e.g. initialized with a BOS token)
     /// * `decoder_attention_mask` - Optional attention mask of shape (*batch size*, *target_sequence_length*) for the decoder positions. Positions with a mask with value 0 will be masked.
     /// * `train` - boolean flag to turn on/off the dropout layers in the model. Should be set to false for inference.
@@ -798,7 +798,7 @@ impl BartForSequenceClassification {
     /// * `input_ids` - Optional input tensor of shape (*batch size*, *source_sequence_length*). Must be provided when not running in generation mode
     /// * `attention_mask` - Optional attention mask of shape (*batch size*, *source_sequence_length*) for the encoder positions. Positions with a mask with value 0 will be masked.
     /// * `encoder_outputs` - Optional tuple made of a tensor of shape (*batch size*, *source_sequence_length*, *encoder_hidden_dim*) and optional vectors of tensors of length *num_encoder_layers* with shape (*batch size*, *source_sequence_length*, *hidden_size*).
-    /// These correspond to the encoder last hidden state and optional hidden states/attention weights for encoder layers. When provided, the encoder hidden state will not be recalculated. Useful for generation tasks.
+    ///     These correspond to the encoder last hidden state and optional hidden states/attention weights for encoder layers. When provided, the encoder hidden state will not be recalculated. Useful for generation tasks.
     /// * `decoder_input_ids` - Optional input tensor of shape (*batch size*, *target_sequence_length*). Must be provided when running in generation mode (e.g. initialized with a BOS token)
     /// * `decoder_attention_mask` - Optional attention mask of shape (*batch size*, *target_sequence_length*) for the decoder positions. Positions with a mask with value 0 will be masked.
     /// * `train` - boolean flag to turn on/off the dropout layers in the model. Should be set to false for inference.

diff --git a/src/models/bart/decoder.rs b/src/models/bart/decoder.rs
@@ -340,6 +340,7 @@ impl BartDecoder {
     }
 }
 
+#[allow(dead_code)]
 ///Container holding a BART decoder output
 pub struct BartDecoderOutput {
     /// last decoder layer hidden state

diff --git a/src/models/bart/mod.rs b/src/models/bart/mod.rs
@@ -11,6 +11,7 @@
 //! - Configuration file expected to have a structure following the [Transformers library](https://github.com/huggingface/transformers)
 //! - Model weights are expected to have a structure and parameter names following the [Transformers library](https://github.com/huggingface/transformers). A conversion using the Python utility scripts is required to convert the `.bin` weights to the `.ot` format.
 //! - `RobertaTokenizer` using a `vocab.txt` vocabulary and `merges.txt` 2-gram merges
+//!
 //! Pretrained models are available and can be downloaded using RemoteResources.
 //!
 //! ```no_run

diff --git a/src/models/bert/bert_model.rs b/src/models/bert/bert_model.rs
@@ -42,6 +42,11 @@ impl BertModelResources {
         "bert/model",
         "https://huggingface.co/bert-base-uncased/resolve/main/rust_model.ot",
     );
+    /// Shared under Apache 2.0 license by the Google team at <https://github.com/google-research/bert>. Modified with conversion to C-array format.
+    pub const BERT_LARGE: (&'static str, &'static str) = (
+        "bert-large/model",
+        "https://huggingface.co/bert-large-uncased/resolve/main/rust_model.ot",
+    );
     /// Shared under MIT license by the MDZ Digital Library team at the Bavarian State Library at <https://github.com/dbmdz/berts>. Modified with conversion to C-array format.
     pub const BERT_NER: (&'static str, &'static str) = (
         "bert-ner/model",
@@ -75,6 +80,11 @@ impl BertConfigResources {
         "bert/config",
         "https://huggingface.co/bert-base-uncased/resolve/main/config.json",
     );
+    /// Shared under Apache 2.0 license by the Google team at <https://github.com/google-research/bert>. Modified with conversion to C-array format.
+    pub const BERT_LARGE: (&'static str, &'static str) = (
+        "bert-large/config",
+        "https://huggingface.co/bert-large-uncased/resolve/main/config.json",
+    );
     /// Shared under MIT license by the MDZ Digital Library team at the Bavarian State Library at <https://github.com/dbmdz/berts>. Modified with conversion to C-array format.
     pub const BERT_NER: (&'static str, &'static str) = (
         "bert-ner/config",
@@ -108,6 +118,11 @@ impl BertVocabResources {
         "bert/vocab",
         "https://huggingface.co/bert-base-uncased/resolve/main/vocab.txt",
     );
+    /// Shared under Apache 2.0 license by the Google team at <https://github.com/google-research/bert>. Modified with conversion to C-array format.
+    pub const BERT_LARGE: (&'static str, &'static str) = (
+        "bert-large/vocab",
+        "https://huggingface.co/bert-large-uncased/resolve/main/vocab.txt",
+    );
     /// Shared under MIT license by the MDZ Digital Library team at the Bavarian State Library at <https://github.com/dbmdz/berts>. Modified with conversion to C-array format.
     pub const BERT_NER: (&'static str, &'static str) = (
         "bert-ner/vocab",

diff --git a/src/models/bert/mod.rs b/src/models/bert/mod.rs
@@ -16,6 +16,7 @@
 //! - Configuration file expected to have a structure following the [Transformers library](https://github.com/huggingface/transformers)
 //! - Model weights are expected to have a structure and parameter names following the [Transformers library](https://github.com/huggingface/transformers). A conversion using the Python utility scripts is required to convert the `.bin` weights to the `.ot` format.
 //! - `BertTokenizer` using a `vocab.txt` vocabulary
+//!
 //! Pretrained models are available and can be downloaded using RemoteResources.
 //!
 //! ```no_run

diff --git a/src/models/deberta/mod.rs b/src/models/deberta/mod.rs
@@ -12,6 +12,7 @@
 //! - Configuration file expected to have a structure following the [Transformers library](https://github.com/huggingface/transformers)
 //! - Model weights are expected to have a structure and parameter names following the [Transformers library](https://github.com/huggingface/transformers). A conversion using the Python utility scripts is required to convert the `.bin` weights to the `.ot` format.
 //! - `DebertaTokenizer` using a `vocab.json` vocabulary and `merges.txt` merges file
+//!
 //! Pretrained models for a number of language pairs are available and can be downloaded using RemoteResources.
 //!
 //! ```no_run

diff --git a/src/models/deberta_v2/mod.rs b/src/models/deberta_v2/mod.rs
@@ -12,6 +12,7 @@
 //! - Configuration file expected to have a structure following the [Transformers library](https://github.com/huggingface/transformers)
 //! - Model weights are expected to have a structure and parameter names following the [Transformers library](https://github.com/huggingface/transformers). A conversion using the Python utility scripts is required to convert the `.bin` weights to the `.ot` format.
 //! - `DebertaV2Tokenizer` using a `spiece.model` SentencePiece model file
+//!
 //! Pretrained models for a number of language pairs are available and can be downloaded using RemoteResources.
 //!
 //! ```no_run

diff --git a/src/models/distilbert/mod.rs b/src/models/distilbert/mod.rs
@@ -14,6 +14,7 @@
 //! - Configuration file expected to have a structure following the [Transformers library](https://github.com/huggingface/transformers)
 //! - Model weights are expected to have a structure and parameter names following the [Transformers library](https://github.com/huggingface/transformers). A conversion using the Python utility scripts is required to convert the `.bin` weights to the `.ot` format.
 //! - `BertTokenizer` using a `vocab.txt` vocabulary
+//!
 //! Pretrained models are available and can be downloaded using RemoteResources.
 //!
 //! ```no_run

diff --git a/src/models/electra/mod.rs b/src/models/electra/mod.rs
@@ -19,6 +19,7 @@
 //! - Configuration file expected to have a structure following the [Transformers library](https://github.com/huggingface/transformers)
 //! - Model weights are expected to have a structure and parameter names following the [Transformers library](https://github.com/huggingface/transformers). A conversion using the Python utility scripts is required to convert the `.bin` weights to the `.ot` format.
 //! - `BertTokenizer` using a `vocab.txt` vocabulary
+//!
 //! Pretrained models are available and can be downloaded using RemoteResources.
 //!
 //! ```no_run

diff --git a/src/models/fnet/mod.rs b/src/models/fnet/mod.rs
@@ -14,6 +14,7 @@
 //! - Configuration file expected to have a structure following the [Transformers library](https://github.com/huggingface/transformers)
 //! - Model weights are expected to have a structure and parameter names following the [Transformers library](https://github.com/huggingface/transformers). A conversion using the Python utility scripts is required to convert the `.bin` weights to the `.ot` format.
 //! - `FNetTokenizer` using a `spiece.model` SentencePiece (BPE) model file
+//!
 //! Pretrained models are available and can be downloaded using RemoteResources.
 //!
 //! ```no_run

diff --git a/src/models/gpt2/mod.rs b/src/models/gpt2/mod.rs
@@ -11,6 +11,7 @@
 //! - Configuration file expected to have a structure following the [Transformers library](https://github.com/huggingface/transformers)
 //! - Model weights are expected to have a structure and parameter names following the [Transformers library](https://github.com/huggingface/transformers). A conversion using the Python utility scripts is required to convert the `.bin` weights to the `.ot` format.
 //! - `Gpt2Tokenizer` using a `vocab.txt` vocabulary and `merges.txt` 2-gram merges
+//!
 //! Pretrained models are available and can be downloaded using RemoteResources.
 //!
 //! ```no_run

diff --git a/src/models/longt5/longt5_model.rs b/src/models/longt5/longt5_model.rs
@@ -174,7 +174,7 @@ impl From<&LongT5Config> for T5Config {
 /// It is made of the following blocks:
 /// - `encoder`: `T5Stack` (transformer) made of a vector of encoding layers
 /// - `decoder`: `T5Stack` (transformer)  made of a vector of decoding layers with self attention and encoder cross-attention.
-/// caching is implemented for the decoder to avoid recalculating static states (encoder key/values and previously calculated decoder key/values)
+///     caching is implemented for the decoder to avoid recalculating static states (encoder key/values and previously calculated decoder key/values)
 /// - `embeddings`: `nn::Embedding` Shared embeddings for the encoder and decoder.
 pub struct LongT5Model {
     pub(crate) encoder: LongT5Stack,
@@ -248,7 +248,7 @@ impl LongT5Model {
     /// * `input_ids` - Optional input tensor of shape (*batch size*, *source_sequence_length*). This or `input_embeds` must be provided.
     /// * `attention_mask` - Optional attention mask of shape (*batch size*, *source_sequence_length*) for the encoder positions. Positions with a mask with value 0 will be masked.
     /// * `encoder_outputs` - Optional tuple made of a tensor of shape (*batch size*, *source_sequence_length*, *encoder_hidden_dim*) and optional vectors of tensors of length *num_encoder_layers* with shape (*batch size*, *source_sequence_length*, *hidden_size*).
-    /// These correspond to the encoder last hidden state and optional hidden states/attention weights for encoder layers. When provided, the encoder hidden state will not be recalculated. Useful for generation tasks.
+    ///     These correspond to the encoder last hidden state and optional hidden states/attention weights for encoder layers. When provided, the encoder hidden state will not be recalculated. Useful for generation tasks.
     /// * `decoder_input_ids` - Optional input tensor of shape (*batch size*, *target_sequence_length*). This or `decoder_input_embeds` must be provided.
     /// * `decoder_attention_mask` - Optional attention mask of shape (*batch size*, *target_sequence_length*) for the decoder positions. Positions with a mask with value 0 will be masked.
     /// * `input_embeds` - Optional input tensor of shape (*batch size*, *source_sequence_length*, *embeddings dimension*). This or `input_ids` must be provided.
@@ -436,7 +436,7 @@ impl LongT5ForConditionalGeneration {
     /// * `input_ids` - Optional input tensor of shape (*batch size*, *source_sequence_length*). This or `input_embeds` must be provided.
     /// * `attention_mask` - Optional attention mask of shape (*batch size*, *source_sequence_length*) for the encoder positions. Positions with a mask with value 0 will be masked.
     /// * `encoder_outputs` - Optional tuple made of a tensor of shape (*batch size*, *source_sequence_length*, *encoder_hidden_dim*) and optional vectors of tensors of length *num_encoder_layers* with shape (*batch size*, *source_sequence_length*, *hidden_size*).
-    /// These correspond to the encoder last hidden state and optional hidden states/attention weights for encoder layers. When provided, the encoder hidden state will not be recalculated. Useful for generation tasks.
+    ///     These correspond to the encoder last hidden state and optional hidden states/attention weights for encoder layers. When provided, the encoder hidden state will not be recalculated. Useful for generation tasks.
     /// * `decoder_input_ids` - Optional input tensor of shape (*batch size*, *target_sequence_length*). This or `decoder_input_embeds` must be provided.
     /// * `decoder_attention_mask` - Optional attention mask of shape (*batch size*, *target_sequence_length*) for the decoder positions. Positions with a mask with value 0 will be masked.
     /// * `input_embeds` - Optional input tensor of shape (*batch size*, *source_sequence_length*, *embeddings dimension*). This or `input_ids` must be provided.