Open
Description
I'm trying to rewrite an inference script for a fine-tuned BERT model i made with python into rust, here's how i save my model:
indobert-finetuned
├── config.json
├── model.safetensors
├── special_tokens_map.json
├── tokenizer.json
├── tokenizer_config.json
├── training_args.bin
└── vocab.txt
The python inference script:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model_path = "indobert-finetuned/"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSequenceClassification.from_pretrained(model_path)
#Tokenize input
inputs = tokenizer("anjing", return_tensors='pt', padding=True, truncation=False).to(device)
#Move inputs to the same device as the model
inputs = {key: value.to(device) for key, value in inputs.items()}
inputs = inputs
#Perform inference
with torch.inference_mode():
outputs = model(**inputs)
logits = outputs.logits
print(logits, model)
predicted_label = torch.softmax(logits, dim=-1).squeeze().cpu()
print(predicted_label.tolist())
Prints the following:
[
0.0022081946954131126,
0.9808889031410217,
0.0035459366627037525,
0.0007706195465289056,
0.00043291927431710064,
0.00036624292260967195,
0.0012361736735329032,
0.0010357820428907871,
0.004077346995472908,
0.004229736048728228,
0.000952212605625391,
0.00025596615159884095
]
Here's how i load the model for rust-bert:
let model_res = LocalResource{
local_path: "indobert-finetuned/model.safetensors".into()
};
let sequence_classification_config = ZeroShotClassificationConfig::new(
Bert, // model_type
Torch(Box::new(model_res)), // model_resource
LocalResource {
local_path: "indobert-finetuned/config.json".into()
}, // config_resource
LocalResource {
local_path: "indobert-finetuned/vocab.txt".into()
}, // vocab_resource
None, // merges_resource
true, // lowercase
None, // strip_accent
None // add_prefix_space
);
// loading tokenizer.json & special_tokens_map.json
let tokenizer = TokenizerOption::from_hf_tokenizer_file(
"indobert-finetuned/tokenizer.json",
"indobert-finetuned/special_tokens_map.json"
).unwrap();
// zero shot classification pipeline
let sequence_classification_model = ZeroShotClassificationModel::new_with_tokenizer(
sequence_classification_config,
tokenizer
);
let input = [
"anjing",
];
let labels = [...] // string array with 12 elements
// Run model inference
let output = sequence_classification_model.unwrap().predict_multilabel(
&input,
&labels,
None,
256
);
if let Ok(out) = output {
let scores: Vec<_> = out[0].iter()
.filter_map(|label| Some(label.score))
.collect();
println!("{:?}", scores);
}
Outputs:
[
0.6780490875244141,
0.7056891918182373,
0.6685047745704651,
0.563891589641571,
0.6616213917732239,
0.6930138468742371,
0.6496127247810364,
0.6565861701965332,
0.6566416621208191,
0.7492241859436035,
0.68653404712677,
0.7110175490379333
]
Any help would be appreciated :).
Metadata
Assignees
Labels
No labels
Activity