multi-regexp pretokenizer (#12325)

This commit is contained in:
Michael Yang
2025-09-23 13:21:47 -07:00
committed by GitHub
parent 64883e3c4c
commit a40d427bce
12 changed files with 124 additions and 34 deletions

View File

@@ -82,7 +82,6 @@ func modelHelper(t testing.TB) model.BytePairEncoding {
merges := make([]string, 0, 1)
// Only need vocab for Grammar Test
return model.NewBytePairEncoding(
``,
&model.Vocabulary{
Values: tokens,
Types: make([]int32, len(vocab)),