File tree Expand file tree Collapse file tree 1 file changed +26
-0
lines changed
graalpython/lib-graalpython/patches/tokenizers Expand file tree Collapse file tree 1 file changed +26
-0
lines changed Original file line number Diff line number Diff line change @@ -22,3 +22,29 @@ index 6282c31..47e6b12 100644
2222
2323 [features]
2424 default = ["pyo3/extension-module"]
25+ diff --git a/tokenizers-lib/src/models/bpe/trainer.rs b/tokenizers-lib/src/models/bpe/trainer.rs
26+ index 43ab848..55f95f8 100644
27+ --- a/tokenizers-lib/src/models/bpe/trainer.rs
28+ +++ b/tokenizers-lib/src/models/bpe/trainer.rs
29+ @@ -518,15 +518,16 @@ impl BpeTrainer {
30+ let changes = top
31+ .pos
32+ .maybe_par_iter()
33+ - .flat_map(|i| {
34+ - let w = &words[*i] as *const _ as *mut _;
35+ + .flat_map(|&i| {
36+ + let word = &words[i] as *const _ as *mut Word;
37+ // We can merge each of these words in parallel here because each position
38+ // can be there only once (HashSet). So this is safe.
39+ unsafe {
40+ - let word: &mut Word = &mut (*w);
41+ - word.merge(top.pair.0, top.pair.1, new_token_id)
42+ + // let word: &mut Word = &mut (*word);
43+ + (*word)
44+ + .merge(top.pair.0, top.pair.1, new_token_id)
45+ .into_iter()
46+ - .map(|c| (c, *i))
47+ + .map(|c| (c, i))
48+ .collect::<Vec<_>>()
49+ }
50+ })
You can’t perform that action at this time.
0 commit comments