{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 4, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": { "type": "Sequence", "normalizers": [ { "type": "Replace", "pattern": { "String": "``" }, "content": "\"" }, { "type": "Replace", "pattern": { "String": "''" }, "content": "\"" }, { "type": "Lowercase" } ] }, "pre_tokenizer": { "type": "Metaspace", "replacement": "▁", "add_prefix_space": true }, "post_processor": { "type": "TemplateProcessing", "single": [ { "SpecialToken": { "id": "", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "", "type_id": 0 } } ], "pair": [ { "SpecialToken": { "id": "", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "", "type_id": 0 } }, { "SpecialToken": { "id": "", "type_id": 1 } }, { "Sequence": { "id": "B", "type_id": 1 } }, { "SpecialToken": { "id": "", "type_id": 1 } } ], "special_tokens": { "": { "id": "", "ids": [ 1 ], "tokens": [ "" ] }, "": { "id": "", "ids": [ 0 ], "tokens": [ "" ] } } }, "decoder": { "type": "Metaspace", "replacement": "▁", "add_prefix_space": true }, "model": { "type": "Unigram", "unk_id": 2, "vocab": [ [ "", 0.0 ], [ "", 0.0 ], [ "", 0.0 ], [ "", 0.0 ], [ "", 0.0 ], [ "▁", -2.365659253373373 ], [ "e", -2.7726274257071672 ], [ "s", -2.8092594726709414 ], [ "t", -2.937906677759426 ], [ "a", -3.023488039566267 ], [ "i", -3.130101696517439 ], [ "r", -3.3167080850029116 ], [ "o", -3.4890814730909696 ], [ "n", -3.6166233466724087 ], [ "d", -3.662076718281828 ], [ "▁the", -3.8362751907891095 ], [ "\n", -3.8534604095366554 ], [ "l", -3.882923720655637 ], [ "c", -4.143267028826475 ], [ "m", -4.228727291757142 ], [ "u", -4.335524745161631 ], [ "p", -4.359575101224133 ], [ "▁to", -4.38651368841003 ], [ "ing", -4.483100337757891 ], [ "▁and", -4.520970283112547 ], [ "f", -4.525275602775807 ], [ "▁of", -4.5760527119177485 ], [ "y", -4.58382387353196 ], [ "▁in", -4.740397653766369 ], [ "g", -4.791754116540275 ], [ "h", -4.836474590087411 ], [ "b", -4.909411232248003 ], [ "▁that", -5.199912115593683 ], [ "k", -5.263461955826319 ], [ "w", -5.28123698440287 ], [ "▁is", -5.299636845493664 ], [ "▁be", -5.419702910541615 ], [ "in", -5.4359103464709335 ], [ "▁for", -5.512356650709716 ], [ "v", -5.604824905017555 ], [ "▁we", -5.6102446335917335 ], [ "ly", -5.626062101437649 ], [ "▁you", -5.7438848880358755 ], [ "▁on", -5.812146705563066 ], [ "▁he", -5.884272913006647 ], [ "▁are", -5.911223094826356 ], [ "▁as", -5.929833784333763 ], [ "▁was", -5.944656786916921 ], [ "▁with", -5.9466281189802785 ], [ "ve", -6.029713712838138 ], [ "▁have", -6.056062667440742 ], [ "▁but", -6.078495912607513 ], [ "▁do", -6.120033501363503 ], [ "▁this", -6.1351301241312495 ], [ "ur", -6.159085253292082 ], [ "▁co", -6.163495625813539 ], [ "▁not", -6.168869881961024 ], [ "ation", -6.174891736114809 ], [ "▁they", -6.23089689749701 ], [ "us", -6.263880501182481 ], [ "▁one", -6.280948860315329 ], [ "'s", -6.287063313479678 ], [ "▁or", -6.295727284839586 ], [ "▁ma", -6.3328402203360845 ], [ "▁me", -6.342682111290028 ], [ "▁can", -6.347928769198855 ], [ "▁an", -6.386687084496497 ], [ "▁con", -6.411591977758148 ], [ "ll", -6.421978033658174 ], [ "▁pa", -6.485767473048488 ], [ "ck", -6.498920060222398 ], [ "▁from", -6.510430181366688 ], [ "▁said", -6.549799059746496 ], [ "▁all", -6.55976072022162 ], [ "▁ch", -6.569055886948073 ], [ "▁there", -6.583017726253843 ], [ "▁what", -6.586112233691296 ], [ "▁ca", -6.617227466784206 ], [ "ide", -6.630277697578055 ], [ "'", -6.636383868446295 ], [ "▁lo", -6.6583996554982825 ], [ "▁ex", -6.680873400433887 ], [ "▁go", -6.684875077627904 ], [ "▁ba", -6.691013699795855 ], [ "age", -6.692079993714321 ], [ "▁by", -6.697920339970178 ], [ "▁about", -6.705057222090151 ], [ "▁more", -6.712459504370521 ], [ "▁no", -6.724089235660815 ], [ "▁who", -6.728620204290843 ], [ "▁my", -6.738463579994516 ], [ "▁out", -6.743812086790612 ], [ "men", -6.749756175437181 ], [ "▁will", -6.750296473528447 ], [ "▁people", -6.752779647842171 ], [ "▁their", -6.762497277425371 ], [ "▁pro", -6.778461406812822 ], [ "rea", -6.784288329157702 ], [ "j", -6.7882011736527 ], [ "one", -6.794778316215314 ], [ "ive", -6.813780868209756 ], [ "▁up", -6.820641238110763 ], [ "▁th", -6.828317656094255 ], [ "per", -6.830780397681883 ], [ "▁when", -6.845789034417839 ], [ "▁like", -6.846099446872435 ], [ "▁has", -6.858481313045967 ], [ "▁two", -6.875403572167361 ], [ "▁her", -6.87786488262225 ], [ "ure", -6.890885409969632 ], [ "x", -6.898140163448579 ], [ "▁some", -6.901803863061465 ], [ "▁his", -6.903905558474838 ], [ "▁time", -6.9079754944200875 ], [ "les", -6.940601705628119 ], [ "▁she", -6.950713176332071 ], [ "▁sh", -6.952521338654098 ], [ "red", -6.958620516243249 ], [ "▁see", -6.982276000845513 ], [ "▁would", -6.983175910632584 ], [ "▁get", -6.9876750913724255 ], [ "▁ha", -6.988701633352516 ], [ "▁our", -6.994954275263089 ], [ "▁pre", -7.001261302546547 ], [ "▁had", -7.0086949025400305 ], [ "▁were", -7.014572559915722 ], [ "▁just", -7.015066325981126 ], [ "▁thousand", -7.027882847809208 ], [ "end", -7.032569076938101 ], [ "▁cl", -7.033247703700155 ], [ "z", -7.036404857337107 ], [ "able", -7.045084509819366 ], [ "ight", -7.049731749195802 ], [ "▁it's", -7.051805370713197 ], [ "▁how", -7.058726391933213 ], [ "▁hundred", -7.059674179353372 ], [ "▁comp", -7.083542356209991 ], [ "▁dis", -7.093914212996811 ], [ "▁your", -7.117440901089486 ], [ "▁than", -7.1288683714327 ], [ "▁which", -7.12935647113321 ], [ "▁work", -7.129359951395429 ], [ "▁other", -7.135976300026181 ], [ "▁say", -7.177611083373813 ], [ "▁vi", -7.181149894078873 ], [ "ver", -7.19196386046308 ], [ "▁cr", -7.19324258190079 ], [ "▁know", -7.196656171543747 ], [ "▁new", -7.200727503764982 ], [ "ther", -7.2049247144042905 ], [ "▁been", -7.205833241748344 ], [ "ach", -7.20822932829242 ], [ "ance", -7.208401352102109 ], [ "com", -7.2587815267099085 ], [ "ical", -7.278190190681173 ], [ "▁sta", -7.296280193027171 ], [ "▁make", -7.297080155631601 ], [ "man", -7.297357070474021 ], [ "▁pu", -7.301110944798579 ], [ "▁car", -7.307416744513652 ], [ "▁think", -7.315076410486416 ], [ "gra", -7.327973901718581 ], [ "▁even", -7.329667453772981 ], [ "▁now", -7.334324859615645 ], [ "▁want", -7.338644082597577 ], [ "▁bu", -7.340994934917262 ], [ "▁over", -7.357011041337032 ], [ "▁way", -7.358907442350613 ], [ "▁into", -7.361535232495493 ], [ "ction", -7.370624313735973 ], [ "▁res", -7.370897459814927 ], [ "tter", -7.372367561211393 ], [ "▁la", -7.373978735498742 ], [ "ful", -7.374168968662772 ], [ "▁because", -7.374226327340219 ], [ "▁nine", -7.377940224587501 ], [ "ell", -7.381683580843712 ], [ "he", -7.385884132318933 ], [ "▁li", -7.386083738518794 ], [ "▁could", -7.387987936442446 ], [ "ence", -7.401823391179237 ], [ "▁very", -7.408068976933194 ], [ "▁ar", -7.416235924919285 ], [ "▁us", -7.421022762491443 ], [ "▁them", -7.439007960826133 ], [ "ze", -7.442237617833339 ], [ "ally", -7.44567320812796 ], [ "und", -7.449077104870009 ], [ "▁look", -7.465920105389392 ], [ "ving", -7.476701160870153 ], [ "▁use", -7.489550017591283 ], [ "▁need", -7.507395353897271 ], [ "▁most", -7.508463507233218 ], [ "ang", -7.512544632371608 ], [ "▁every", -7.51759581004417 ], [ "qui", -7.5224431600372945 ], [ "▁any", -7.523800340825591 ], [ "▁bi", -7.526209973364397 ], [ "▁cu", -7.53298549315272 ], [ "ill", -7.542831181363784 ], [ "▁only", -7.542967005319634 ], [ "▁its", -7.548158954564297 ], [ "▁take", -7.549454035649397 ], [ "▁day", -7.552893231103514 ], [ "▁part", -7.555055024187217 ], [ "▁back", -7.556383210659435 ], [ "▁three", -7.557901106933517 ], [ "▁going", -7.561348004910165 ], [ "ever", -7.562036821423799 ], [ "▁years", -7.562332353467326 ], [ "▁also", -7.563420504567631 ], [ "▁these", -7.563445901010146 ], [ "▁world", -7.5658527282563615 ], [ "▁jo", -7.566271341802086 ], [ "for", -7.573909200353221 ], [ "ated", -7.574595698553555 ], [ "▁where", -7.582311490247017 ], [ "▁app", -7.583407657098212 ], [ "ble", -7.627885239687924 ], [ "▁five", -7.631767928016325 ], [ "▁many", -7.635697451452412 ], [ "▁rec", -7.637686462277017 ], [ "▁first", -7.644434380130837 ], [ "▁much", -7.644436171280626 ], [ "▁good", -7.657353544125229 ], [ "▁don't", -7.676856549107038 ], [ "▁ga", -7.692179576415008 ], [ "▁six", -7.69269356597826 ], [ "q", -7.696493807735175 ], [ "rac", -7.70076984035417 ], [ "▁him", -7.708060244213304 ], [ "▁may", -7.710511654127949 ], [ "▁pri", -7.722091815046568 ], [ "▁come", -7.723292409553084 ], [ "▁those", -7.724939069549471 ], [ "▁play", -7.728249278606006 ], [ "ster", -7.729194989675044 ], [ "▁life", -7.739615405532483 ], [ "led", -7.741277900326592 ], [ "▁mu", -7.743922966825698 ], [ "ries", -7.744512973191547 ], [ "▁four", -7.755243064488333 ], [ "mer", -7.759376851450931 ], [ "lic", -7.759810124548855 ], [ "▁after", -7.77115435303911 ], [ "ress", -7.772231241897558 ], [ "▁eight", -7.775065267931247 ], [ "▁really", -7.78233366423092 ], [ "▁year", -7.788820100546483 ], [ "rate", -7.790966895572762 ], [ "▁well", -7.79503801679831 ], [ "▁rel", -7.800763301209921 ], [ "ugh", -7.80508067156425 ], [ "▁long", -7.81463592369218 ], [ "▁through", -7.829704400466477 ], [ "▁seven", -7.836407871710429 ], [ "▁down", -7.836418492278396 ], [ "▁right", -7.858358391858477 ], [ "▁gu", -7.86040337421632 ], [ "▁should", -7.8786070173740015 ], [ "▁show", -7.8912844366308175 ], [ "cent", -7.898714301504919 ], [ "▁imp", -7.9007884922169005 ], [ "low", -7.905991712097462 ], [ "port", -7.914066728567356 ], [ "line", -7.9204166022994045 ], [ "▁twenty", -7.9331023799425235 ], [ "▁inter", -7.933146540231563 ], [ "▁point", -7.947713293554528 ], [ "▁though", -7.950963442024104 ], [ "▁help", -7.953395760731329 ], [ "unk", -7.964053831789903 ], [ "land", -7.969005005504924 ], [ "late", -7.974607980796545 ], [ "▁high", -7.979294411944041 ], [ "hol", -7.984142148598162 ], [ "▁something", -7.987930016312793 ], [ "▁start", -7.9915214497126765 ], [ "▁great", -7.995695921767435 ], [ "▁did", -7.995769657368475 ], [ "▁own", -7.998997362615388 ], [ "▁still", -8.004415671326594 ], [ "▁give", -8.007682726112 ], [ "▁change", -8.04354751743401 ], [ "▁live", -8.045269835331224 ], [ "▁mean", -8.05133774848325 ], [ "▁ten", -8.056860573632987 ], [ "ions", -8.056903676226453 ], [ "▁feel", -8.0580666657745 ], [ "dent", -8.066761170886625 ], [ "▁plan", -8.071036532069423 ], [ "▁around", -8.07490021206401 ], [ "▁again", -8.088898555817392 ], [ "ked", -8.090024710382105 ], [ "▁i'm", -8.092715477262875 ], [ "▁win", -8.100661165025913 ], [ "▁before", -8.10448978812413 ], [ "▁place", -8.104619689156987 ], [ "▁find", -8.114301047518076 ], [ "▁rep", -8.122315146334142 ], [ "▁old", -8.123425447706955 ], [ "que", -8.124806078291178 ], [ "▁home", -8.138270977097743 ], [ "▁same", -8.146767026460111 ], [ "▁made", -8.146986846010165 ], [ "ities", -8.15076062197818 ], [ "▁gene", -8.153271792321526 ], [ "▁little", -8.157188696200281 ], [ "▁never", -8.15876601141345 ], [ "▁add", -8.16085152557753 ], [ "▁dec", -8.162053869836686 ], [ "▁such", -8.166487774779087 ], [ "▁real", -8.170648079845972 ], [ ">", -8.174546931075477 ], [ "<", -8.174546931075477 ], [ "▁different", -8.177401003173864 ], [ "▁america", -8.195006004980675 ], [ "▁percent", -8.203669636244902 ], [ "▁happen", -8.217603938472319 ], [ "▁person", -8.22009536018725 ], [ "▁try", -8.221193651201794 ], [ "▁problem", -8.227408597224207 ], [ "▁war", -8.230954901256146 ], [ "▁hand", -8.254610862372143 ], [ "▁few", -8.25542861805221 ], [ "▁under", -8.259510957523512 ], [ "▁might", -8.259548828010718 ], [ "▁why", -8.266548799381441 ], [ "▁far", -8.273004033857472 ], [ "▁another", -8.275188116440123 ], [ "▁while", -8.277332697575044 ], [ "▁children", -8.278018743726047 ], [ "▁turn", -8.295468236043428 ], [ "▁hard", -8.319579651267661 ], [ "▁school", -8.324874007903873 ], [ "▁system", -8.334528863425346 ], [ "▁fact", -8.340618576727202 ], [ "ship", -8.356785263078958 ], [ "▁fun", -8.35745033900121 ], [ "▁found", -8.357664922206958 ], [ "▁talk", -8.360321901371398 ], [ "▁always", -8.36245965305476 ], [ "▁water", -8.366181844042655 ], [ "▁kind", -8.370441708712251 ], [ "▁power", -8.407352983466396 ], [ "serv", -8.417257194651873 ], [ "▁human", -8.422197729087948 ], [ "▁thirty", -8.424889240542294 ], [ "▁move", -8.425313917158071 ], [ "▁develop", -8.432217995201649 ], [ "▁country", -8.437154694153355 ], [ "bility", -8.442062543843859 ], [ "▁trans", -8.445491134571297 ], [ "▁keep", -8.447121538590636 ], [ "▁between", -8.450074712109986 ], [ "▁question", -8.45132704745506 ], [ "▁blo", -8.457137199160444 ], [ "▁small", -8.464488253220336 ], [ "▁both", -8.465391170838778 ], [ "▁money", -8.471480248618416 ], [ "▁important", -8.474535449814978 ], [ "▁women", -8.488463151090519 ], [ "▁next", -8.499226729264004 ], [ "▁fifty", -8.508940876979525 ], [ "ality", -8.518162563343077 ], [ "▁we're", -8.523634714033474 ], [ "▁friend", -8.529359417835346 ], [ "▁family", -8.535293339824516 ], [ "▁without", -8.537235506300181 ], [ "▁away", -8.538471008287003 ], [ "▁build", -8.53887194141614 ], [ "▁lead", -8.541724089953993 ], [ "▁today", -8.556512784276263 ], [ "▁number", -8.558202484196892 ], [ "▁large", -8.564258756492881 ], [ "▁health", -8.565300531106967 ], [ "▁learn", -8.567104799745971 ], [ "▁believe", -8.577380612888348 ], [ "▁face", -8.5781215463003 ], [ "ption", -8.585144346347143 ], [ "▁free", -8.592213001257278 ], [ "▁book", -8.599140662214898 ], [ "▁house", -8.602072174491202 ], [ "▁business", -8.603458120072414 ], [ "▁open", -8.624533589738132 ], [ "▁you're", -8.648211923200755 ], [ "▁didn't", -8.650732869456236 ], [ "▁research", -8.654318581492854 ], [ "▁government", -8.659900246962522 ], [ "▁enough", -8.661264202203283 ], [ "▁market", -8.667470844760473 ], [ "▁experience", -8.668982145927787 ], [ "▁course", -8.66977737797813 ], [ "▁second", -8.700729924214153 ], [ "▁create", -8.70142942845552 ], [ "▁together", -8.705533541005918 ], [ "▁product", -8.707952333543426 ], [ "▁month", -8.712667102719207 ], [ "▁understand", -8.714626952165377 ], [ "▁group", -8.719622335030813 ], [ "▁hope", -8.72761204981612 ], [ "▁word", -8.738163070498095 ], [ "▁actually", -8.739409587339784 ], [ "▁million", -8.74144010215678 ], [ "▁public", -8.742966764345939 ], [ "▁food", -8.752893623113762 ], [ "▁effect", -8.757232196017489 ], [ "▁design", -8.76188226991536 ], [ "▁level", -8.804900237478842 ], [ "▁reason", -8.815829965488463 ], [ "▁result", -8.816553476957232 ], [ "▁everything", -8.818899965733237 ], [ "▁direct", -8.836863579748076 ], [ "▁they're", -8.839261528715383 ], [ "▁story", -8.848157809410475 ], [ "▁watch", -8.856317693526307 ], [ "▁process", -8.864285937562878 ], [ "▁certain", -8.86481025845487 ], [ "▁moment", -8.874608010450409 ], [ "▁student", -8.891495076518078 ], [ "▁future", -8.903920388479646 ], [ "▁space", -8.907814016098657 ], [ "▁whether", -8.913050460769428 ], [ "▁anything", -8.915366793380104 ], [ "▁control", -8.919573217710804 ], [ "▁almost", -8.94655005817442 ], [ "▁support", -8.951967867236126 ], [ "▁walk", -8.955584246502458 ], [ "▁doesn't", -8.963873365007096 ], [ "▁information", -8.968889128612766 ], [ "▁social", -8.971342303562139 ], [ "▁follow", -8.974468157141684 ], [ "▁until", -8.990321616601861 ], [ "▁example", -9.001875521562726 ], [ "▁difficult", -9.016530669785697 ], [ "▁already", -9.017723103965798 ], [ "▁expect", -9.02178472609668 ], [ "▁energy", -9.024561047592886 ], [ "▁success", -9.028600208851303 ], [ "▁minute", -9.030795425316873 ], [ "▁europe", -9.047719522871835 ], [ "▁probably", -9.048211213265372 ], [ "▁project", -9.05081191490136 ], [ "▁sometimes", -9.053271521338392 ], [ "▁photo", -9.059860032471327 ], [ "▁patient", -9.075396006321792 ], [ "▁across", -9.08167587656886 ], [ "▁particular", -9.08822856829106 ], [ "▁possible", -9.095938491890514 ], [ "vision", -9.105540231265378 ], [ "▁technology", -9.151043704411448 ], [ "▁environment", -9.15969788447536 ], [ "▁political", -9.167264556603437 ], [ "▁themselves", -9.17697704069669 ], [ "position", -9.20491759319196 ], [ "▁strong", -9.205733742263186 ], [ "▁remember", -9.206030914810098 ], [ "▁character", -9.20991178052067 ], [ "▁picture", -9.223497104806157 ], [ "▁wonder", -9.231224767871147 ], [ "▁community", -9.241375579372438 ], [ "▁perhaps", -9.253591058587716 ], [ "▁economic", -9.254737082281682 ], [ "▁global", -9.257818271485323 ], [ "▁challenge", -9.258951607073556 ], [ "▁individual", -9.297649240927925 ], [ "▁suggest", -9.299664904893849 ], [ "▁natural", -9.30603455476944 ], [ "▁special", -9.344672135415555 ], [ "▁difference", -9.372803643965124 ], [ "▁especially", -9.410608286507564 ], [ "▁tradition", -9.46199084516556 ], [ "▁although", -9.471896386211808 ], [ "▁economy", -9.487149405320343 ], [ "▁potential", -9.5558471063055 ], [ "▁opportunity", -9.567421441451728 ], [ "▁university", -9.678153863522152 ], [ "▁significant", -9.94182875191974 ], [ "0", -13.077322941590932 ], [ "1", -13.45284742159885 ], [ "2", -13.656091927467775 ], [ "9", -14.17117877050488 ], [ "[", -14.347114963703774 ], [ "]", -14.378726769576591 ], [ "3", -14.45452055080778 ], [ "5", -14.67569730318567 ], [ "8", -14.707636103260414 ], [ "4", -15.036026851076915 ], [ "$", -15.036026851076915 ], [ "6", -15.187832394806907 ], [ "7", -15.187832394806907 ], [ "&", -15.63514202146954 ], [ "+", -17.41148545414651 ], [ "=", -17.61208069224178 ], [ "#", -17.863246193407697 ], [ "%", -18.342214447428265 ], [ "@", -18.95888111409493 ], [ "^", -19.792214447428265 ], [ "\\", -20.792214447428265 ], [ "*", -20.792214447428265 ] ] } }