Shawn Tan
commited on
Commit
·
bf62d95
1
Parent(s):
28d5673
Remove extra config.
Browse files- config.json +40 -40
config.json
CHANGED
@@ -309,7 +309,7 @@
|
|
309 |
"dropout": 0,
|
310 |
"num_key_value_heads": 4,
|
311 |
"sequence_mixer_type": "stickbreaking_attention",
|
312 |
-
|
313 |
},
|
314 |
{
|
315 |
"add_bias": false,
|
@@ -318,7 +318,7 @@
|
|
318 |
"dropout": 0,
|
319 |
"num_key_value_heads": 4,
|
320 |
"sequence_mixer_type": "stickbreaking_attention",
|
321 |
-
|
322 |
},
|
323 |
{
|
324 |
"add_bias": false,
|
@@ -327,7 +327,7 @@
|
|
327 |
"dropout": 0,
|
328 |
"num_key_value_heads": 4,
|
329 |
"sequence_mixer_type": "stickbreaking_attention",
|
330 |
-
|
331 |
},
|
332 |
{
|
333 |
"add_bias": false,
|
@@ -336,7 +336,7 @@
|
|
336 |
"dropout": 0,
|
337 |
"num_key_value_heads": 4,
|
338 |
"sequence_mixer_type": "stickbreaking_attention",
|
339 |
-
|
340 |
},
|
341 |
{
|
342 |
"add_bias": false,
|
@@ -345,7 +345,7 @@
|
|
345 |
"dropout": 0,
|
346 |
"num_key_value_heads": 4,
|
347 |
"sequence_mixer_type": "stickbreaking_attention",
|
348 |
-
|
349 |
},
|
350 |
{
|
351 |
"add_bias": false,
|
@@ -354,7 +354,7 @@
|
|
354 |
"dropout": 0,
|
355 |
"num_key_value_heads": 4,
|
356 |
"sequence_mixer_type": "stickbreaking_attention",
|
357 |
-
|
358 |
},
|
359 |
{
|
360 |
"add_bias": false,
|
@@ -363,7 +363,7 @@
|
|
363 |
"dropout": 0,
|
364 |
"num_key_value_heads": 4,
|
365 |
"sequence_mixer_type": "stickbreaking_attention",
|
366 |
-
|
367 |
},
|
368 |
{
|
369 |
"add_bias": false,
|
@@ -372,7 +372,7 @@
|
|
372 |
"dropout": 0,
|
373 |
"num_key_value_heads": 4,
|
374 |
"sequence_mixer_type": "stickbreaking_attention",
|
375 |
-
|
376 |
},
|
377 |
{
|
378 |
"add_bias": false,
|
@@ -381,7 +381,7 @@
|
|
381 |
"dropout": 0,
|
382 |
"num_key_value_heads": 4,
|
383 |
"sequence_mixer_type": "stickbreaking_attention",
|
384 |
-
|
385 |
},
|
386 |
{
|
387 |
"add_bias": false,
|
@@ -390,7 +390,7 @@
|
|
390 |
"dropout": 0,
|
391 |
"num_key_value_heads": 4,
|
392 |
"sequence_mixer_type": "stickbreaking_attention",
|
393 |
-
|
394 |
},
|
395 |
{
|
396 |
"add_bias": false,
|
@@ -399,7 +399,7 @@
|
|
399 |
"dropout": 0,
|
400 |
"num_key_value_heads": 4,
|
401 |
"sequence_mixer_type": "stickbreaking_attention",
|
402 |
-
|
403 |
},
|
404 |
{
|
405 |
"add_bias": false,
|
@@ -408,7 +408,7 @@
|
|
408 |
"dropout": 0,
|
409 |
"num_key_value_heads": 4,
|
410 |
"sequence_mixer_type": "stickbreaking_attention",
|
411 |
-
|
412 |
},
|
413 |
{
|
414 |
"add_bias": false,
|
@@ -417,7 +417,7 @@
|
|
417 |
"dropout": 0,
|
418 |
"num_key_value_heads": 4,
|
419 |
"sequence_mixer_type": "stickbreaking_attention",
|
420 |
-
|
421 |
},
|
422 |
{
|
423 |
"add_bias": false,
|
@@ -426,7 +426,7 @@
|
|
426 |
"dropout": 0,
|
427 |
"num_key_value_heads": 4,
|
428 |
"sequence_mixer_type": "stickbreaking_attention",
|
429 |
-
|
430 |
},
|
431 |
{
|
432 |
"add_bias": false,
|
@@ -435,7 +435,7 @@
|
|
435 |
"dropout": 0,
|
436 |
"num_key_value_heads": 4,
|
437 |
"sequence_mixer_type": "stickbreaking_attention",
|
438 |
-
|
439 |
},
|
440 |
{
|
441 |
"add_bias": false,
|
@@ -444,7 +444,7 @@
|
|
444 |
"dropout": 0,
|
445 |
"num_key_value_heads": 4,
|
446 |
"sequence_mixer_type": "stickbreaking_attention",
|
447 |
-
|
448 |
},
|
449 |
{
|
450 |
"add_bias": false,
|
@@ -453,7 +453,7 @@
|
|
453 |
"dropout": 0,
|
454 |
"num_key_value_heads": 4,
|
455 |
"sequence_mixer_type": "stickbreaking_attention",
|
456 |
-
|
457 |
},
|
458 |
{
|
459 |
"add_bias": false,
|
@@ -462,7 +462,7 @@
|
|
462 |
"dropout": 0,
|
463 |
"num_key_value_heads": 4,
|
464 |
"sequence_mixer_type": "stickbreaking_attention",
|
465 |
-
|
466 |
},
|
467 |
{
|
468 |
"add_bias": false,
|
@@ -471,7 +471,7 @@
|
|
471 |
"dropout": 0,
|
472 |
"num_key_value_heads": 4,
|
473 |
"sequence_mixer_type": "stickbreaking_attention",
|
474 |
-
|
475 |
},
|
476 |
{
|
477 |
"add_bias": false,
|
@@ -480,7 +480,7 @@
|
|
480 |
"dropout": 0,
|
481 |
"num_key_value_heads": 4,
|
482 |
"sequence_mixer_type": "stickbreaking_attention",
|
483 |
-
|
484 |
},
|
485 |
{
|
486 |
"add_bias": false,
|
@@ -489,7 +489,7 @@
|
|
489 |
"dropout": 0,
|
490 |
"num_key_value_heads": 4,
|
491 |
"sequence_mixer_type": "stickbreaking_attention",
|
492 |
-
|
493 |
},
|
494 |
{
|
495 |
"add_bias": false,
|
@@ -498,7 +498,7 @@
|
|
498 |
"dropout": 0,
|
499 |
"num_key_value_heads": 4,
|
500 |
"sequence_mixer_type": "stickbreaking_attention",
|
501 |
-
|
502 |
},
|
503 |
{
|
504 |
"add_bias": false,
|
@@ -507,7 +507,7 @@
|
|
507 |
"dropout": 0,
|
508 |
"num_key_value_heads": 4,
|
509 |
"sequence_mixer_type": "stickbreaking_attention",
|
510 |
-
|
511 |
},
|
512 |
{
|
513 |
"add_bias": false,
|
@@ -516,7 +516,7 @@
|
|
516 |
"dropout": 0,
|
517 |
"num_key_value_heads": 4,
|
518 |
"sequence_mixer_type": "stickbreaking_attention",
|
519 |
-
|
520 |
},
|
521 |
{
|
522 |
"add_bias": false,
|
@@ -525,7 +525,7 @@
|
|
525 |
"dropout": 0,
|
526 |
"num_key_value_heads": 4,
|
527 |
"sequence_mixer_type": "stickbreaking_attention",
|
528 |
-
|
529 |
},
|
530 |
{
|
531 |
"add_bias": false,
|
@@ -534,7 +534,7 @@
|
|
534 |
"dropout": 0,
|
535 |
"num_key_value_heads": 4,
|
536 |
"sequence_mixer_type": "stickbreaking_attention",
|
537 |
-
|
538 |
},
|
539 |
{
|
540 |
"add_bias": false,
|
@@ -543,7 +543,7 @@
|
|
543 |
"dropout": 0,
|
544 |
"num_key_value_heads": 4,
|
545 |
"sequence_mixer_type": "stickbreaking_attention",
|
546 |
-
|
547 |
},
|
548 |
{
|
549 |
"add_bias": false,
|
@@ -552,7 +552,7 @@
|
|
552 |
"dropout": 0,
|
553 |
"num_key_value_heads": 4,
|
554 |
"sequence_mixer_type": "stickbreaking_attention",
|
555 |
-
|
556 |
},
|
557 |
{
|
558 |
"add_bias": false,
|
@@ -561,7 +561,7 @@
|
|
561 |
"dropout": 0,
|
562 |
"num_key_value_heads": 4,
|
563 |
"sequence_mixer_type": "stickbreaking_attention",
|
564 |
-
|
565 |
},
|
566 |
{
|
567 |
"add_bias": false,
|
@@ -570,7 +570,7 @@
|
|
570 |
"dropout": 0,
|
571 |
"num_key_value_heads": 4,
|
572 |
"sequence_mixer_type": "stickbreaking_attention",
|
573 |
-
|
574 |
},
|
575 |
{
|
576 |
"add_bias": false,
|
@@ -579,7 +579,7 @@
|
|
579 |
"dropout": 0,
|
580 |
"num_key_value_heads": 4,
|
581 |
"sequence_mixer_type": "stickbreaking_attention",
|
582 |
-
|
583 |
},
|
584 |
{
|
585 |
"add_bias": false,
|
@@ -588,7 +588,7 @@
|
|
588 |
"dropout": 0,
|
589 |
"num_key_value_heads": 4,
|
590 |
"sequence_mixer_type": "stickbreaking_attention",
|
591 |
-
|
592 |
},
|
593 |
{
|
594 |
"add_bias": false,
|
@@ -597,7 +597,7 @@
|
|
597 |
"dropout": 0,
|
598 |
"num_key_value_heads": 4,
|
599 |
"sequence_mixer_type": "stickbreaking_attention",
|
600 |
-
|
601 |
},
|
602 |
{
|
603 |
"add_bias": false,
|
@@ -606,7 +606,7 @@
|
|
606 |
"dropout": 0,
|
607 |
"num_key_value_heads": 4,
|
608 |
"sequence_mixer_type": "stickbreaking_attention",
|
609 |
-
|
610 |
},
|
611 |
{
|
612 |
"add_bias": false,
|
@@ -615,7 +615,7 @@
|
|
615 |
"dropout": 0,
|
616 |
"num_key_value_heads": 4,
|
617 |
"sequence_mixer_type": "stickbreaking_attention",
|
618 |
-
|
619 |
},
|
620 |
{
|
621 |
"add_bias": false,
|
@@ -624,7 +624,7 @@
|
|
624 |
"dropout": 0,
|
625 |
"num_key_value_heads": 4,
|
626 |
"sequence_mixer_type": "stickbreaking_attention",
|
627 |
-
|
628 |
},
|
629 |
{
|
630 |
"add_bias": false,
|
@@ -633,7 +633,7 @@
|
|
633 |
"dropout": 0,
|
634 |
"num_key_value_heads": 4,
|
635 |
"sequence_mixer_type": "stickbreaking_attention",
|
636 |
-
|
637 |
},
|
638 |
{
|
639 |
"add_bias": false,
|
@@ -642,7 +642,7 @@
|
|
642 |
"dropout": 0,
|
643 |
"num_key_value_heads": 4,
|
644 |
"sequence_mixer_type": "stickbreaking_attention",
|
645 |
-
|
646 |
},
|
647 |
{
|
648 |
"add_bias": false,
|
@@ -651,7 +651,7 @@
|
|
651 |
"dropout": 0,
|
652 |
"num_key_value_heads": 4,
|
653 |
"sequence_mixer_type": "stickbreaking_attention",
|
654 |
-
|
655 |
},
|
656 |
{
|
657 |
"add_bias": false,
|
@@ -660,7 +660,7 @@
|
|
660 |
"dropout": 0,
|
661 |
"num_key_value_heads": 4,
|
662 |
"sequence_mixer_type": "stickbreaking_attention",
|
663 |
-
|
664 |
}
|
665 |
],
|
666 |
"transformers_version": "4.49.0.dev0",
|
|
|
309 |
"dropout": 0,
|
310 |
"num_key_value_heads": 4,
|
311 |
"sequence_mixer_type": "stickbreaking_attention",
|
312 |
+
|
313 |
},
|
314 |
{
|
315 |
"add_bias": false,
|
|
|
318 |
"dropout": 0,
|
319 |
"num_key_value_heads": 4,
|
320 |
"sequence_mixer_type": "stickbreaking_attention",
|
321 |
+
|
322 |
},
|
323 |
{
|
324 |
"add_bias": false,
|
|
|
327 |
"dropout": 0,
|
328 |
"num_key_value_heads": 4,
|
329 |
"sequence_mixer_type": "stickbreaking_attention",
|
330 |
+
|
331 |
},
|
332 |
{
|
333 |
"add_bias": false,
|
|
|
336 |
"dropout": 0,
|
337 |
"num_key_value_heads": 4,
|
338 |
"sequence_mixer_type": "stickbreaking_attention",
|
339 |
+
|
340 |
},
|
341 |
{
|
342 |
"add_bias": false,
|
|
|
345 |
"dropout": 0,
|
346 |
"num_key_value_heads": 4,
|
347 |
"sequence_mixer_type": "stickbreaking_attention",
|
348 |
+
|
349 |
},
|
350 |
{
|
351 |
"add_bias": false,
|
|
|
354 |
"dropout": 0,
|
355 |
"num_key_value_heads": 4,
|
356 |
"sequence_mixer_type": "stickbreaking_attention",
|
357 |
+
|
358 |
},
|
359 |
{
|
360 |
"add_bias": false,
|
|
|
363 |
"dropout": 0,
|
364 |
"num_key_value_heads": 4,
|
365 |
"sequence_mixer_type": "stickbreaking_attention",
|
366 |
+
|
367 |
},
|
368 |
{
|
369 |
"add_bias": false,
|
|
|
372 |
"dropout": 0,
|
373 |
"num_key_value_heads": 4,
|
374 |
"sequence_mixer_type": "stickbreaking_attention",
|
375 |
+
|
376 |
},
|
377 |
{
|
378 |
"add_bias": false,
|
|
|
381 |
"dropout": 0,
|
382 |
"num_key_value_heads": 4,
|
383 |
"sequence_mixer_type": "stickbreaking_attention",
|
384 |
+
|
385 |
},
|
386 |
{
|
387 |
"add_bias": false,
|
|
|
390 |
"dropout": 0,
|
391 |
"num_key_value_heads": 4,
|
392 |
"sequence_mixer_type": "stickbreaking_attention",
|
393 |
+
|
394 |
},
|
395 |
{
|
396 |
"add_bias": false,
|
|
|
399 |
"dropout": 0,
|
400 |
"num_key_value_heads": 4,
|
401 |
"sequence_mixer_type": "stickbreaking_attention",
|
402 |
+
|
403 |
},
|
404 |
{
|
405 |
"add_bias": false,
|
|
|
408 |
"dropout": 0,
|
409 |
"num_key_value_heads": 4,
|
410 |
"sequence_mixer_type": "stickbreaking_attention",
|
411 |
+
|
412 |
},
|
413 |
{
|
414 |
"add_bias": false,
|
|
|
417 |
"dropout": 0,
|
418 |
"num_key_value_heads": 4,
|
419 |
"sequence_mixer_type": "stickbreaking_attention",
|
420 |
+
|
421 |
},
|
422 |
{
|
423 |
"add_bias": false,
|
|
|
426 |
"dropout": 0,
|
427 |
"num_key_value_heads": 4,
|
428 |
"sequence_mixer_type": "stickbreaking_attention",
|
429 |
+
|
430 |
},
|
431 |
{
|
432 |
"add_bias": false,
|
|
|
435 |
"dropout": 0,
|
436 |
"num_key_value_heads": 4,
|
437 |
"sequence_mixer_type": "stickbreaking_attention",
|
438 |
+
|
439 |
},
|
440 |
{
|
441 |
"add_bias": false,
|
|
|
444 |
"dropout": 0,
|
445 |
"num_key_value_heads": 4,
|
446 |
"sequence_mixer_type": "stickbreaking_attention",
|
447 |
+
|
448 |
},
|
449 |
{
|
450 |
"add_bias": false,
|
|
|
453 |
"dropout": 0,
|
454 |
"num_key_value_heads": 4,
|
455 |
"sequence_mixer_type": "stickbreaking_attention",
|
456 |
+
|
457 |
},
|
458 |
{
|
459 |
"add_bias": false,
|
|
|
462 |
"dropout": 0,
|
463 |
"num_key_value_heads": 4,
|
464 |
"sequence_mixer_type": "stickbreaking_attention",
|
465 |
+
|
466 |
},
|
467 |
{
|
468 |
"add_bias": false,
|
|
|
471 |
"dropout": 0,
|
472 |
"num_key_value_heads": 4,
|
473 |
"sequence_mixer_type": "stickbreaking_attention",
|
474 |
+
|
475 |
},
|
476 |
{
|
477 |
"add_bias": false,
|
|
|
480 |
"dropout": 0,
|
481 |
"num_key_value_heads": 4,
|
482 |
"sequence_mixer_type": "stickbreaking_attention",
|
483 |
+
|
484 |
},
|
485 |
{
|
486 |
"add_bias": false,
|
|
|
489 |
"dropout": 0,
|
490 |
"num_key_value_heads": 4,
|
491 |
"sequence_mixer_type": "stickbreaking_attention",
|
492 |
+
|
493 |
},
|
494 |
{
|
495 |
"add_bias": false,
|
|
|
498 |
"dropout": 0,
|
499 |
"num_key_value_heads": 4,
|
500 |
"sequence_mixer_type": "stickbreaking_attention",
|
501 |
+
|
502 |
},
|
503 |
{
|
504 |
"add_bias": false,
|
|
|
507 |
"dropout": 0,
|
508 |
"num_key_value_heads": 4,
|
509 |
"sequence_mixer_type": "stickbreaking_attention",
|
510 |
+
|
511 |
},
|
512 |
{
|
513 |
"add_bias": false,
|
|
|
516 |
"dropout": 0,
|
517 |
"num_key_value_heads": 4,
|
518 |
"sequence_mixer_type": "stickbreaking_attention",
|
519 |
+
|
520 |
},
|
521 |
{
|
522 |
"add_bias": false,
|
|
|
525 |
"dropout": 0,
|
526 |
"num_key_value_heads": 4,
|
527 |
"sequence_mixer_type": "stickbreaking_attention",
|
528 |
+
|
529 |
},
|
530 |
{
|
531 |
"add_bias": false,
|
|
|
534 |
"dropout": 0,
|
535 |
"num_key_value_heads": 4,
|
536 |
"sequence_mixer_type": "stickbreaking_attention",
|
537 |
+
|
538 |
},
|
539 |
{
|
540 |
"add_bias": false,
|
|
|
543 |
"dropout": 0,
|
544 |
"num_key_value_heads": 4,
|
545 |
"sequence_mixer_type": "stickbreaking_attention",
|
546 |
+
|
547 |
},
|
548 |
{
|
549 |
"add_bias": false,
|
|
|
552 |
"dropout": 0,
|
553 |
"num_key_value_heads": 4,
|
554 |
"sequence_mixer_type": "stickbreaking_attention",
|
555 |
+
|
556 |
},
|
557 |
{
|
558 |
"add_bias": false,
|
|
|
561 |
"dropout": 0,
|
562 |
"num_key_value_heads": 4,
|
563 |
"sequence_mixer_type": "stickbreaking_attention",
|
564 |
+
|
565 |
},
|
566 |
{
|
567 |
"add_bias": false,
|
|
|
570 |
"dropout": 0,
|
571 |
"num_key_value_heads": 4,
|
572 |
"sequence_mixer_type": "stickbreaking_attention",
|
573 |
+
|
574 |
},
|
575 |
{
|
576 |
"add_bias": false,
|
|
|
579 |
"dropout": 0,
|
580 |
"num_key_value_heads": 4,
|
581 |
"sequence_mixer_type": "stickbreaking_attention",
|
582 |
+
|
583 |
},
|
584 |
{
|
585 |
"add_bias": false,
|
|
|
588 |
"dropout": 0,
|
589 |
"num_key_value_heads": 4,
|
590 |
"sequence_mixer_type": "stickbreaking_attention",
|
591 |
+
|
592 |
},
|
593 |
{
|
594 |
"add_bias": false,
|
|
|
597 |
"dropout": 0,
|
598 |
"num_key_value_heads": 4,
|
599 |
"sequence_mixer_type": "stickbreaking_attention",
|
600 |
+
|
601 |
},
|
602 |
{
|
603 |
"add_bias": false,
|
|
|
606 |
"dropout": 0,
|
607 |
"num_key_value_heads": 4,
|
608 |
"sequence_mixer_type": "stickbreaking_attention",
|
609 |
+
|
610 |
},
|
611 |
{
|
612 |
"add_bias": false,
|
|
|
615 |
"dropout": 0,
|
616 |
"num_key_value_heads": 4,
|
617 |
"sequence_mixer_type": "stickbreaking_attention",
|
618 |
+
|
619 |
},
|
620 |
{
|
621 |
"add_bias": false,
|
|
|
624 |
"dropout": 0,
|
625 |
"num_key_value_heads": 4,
|
626 |
"sequence_mixer_type": "stickbreaking_attention",
|
627 |
+
|
628 |
},
|
629 |
{
|
630 |
"add_bias": false,
|
|
|
633 |
"dropout": 0,
|
634 |
"num_key_value_heads": 4,
|
635 |
"sequence_mixer_type": "stickbreaking_attention",
|
636 |
+
|
637 |
},
|
638 |
{
|
639 |
"add_bias": false,
|
|
|
642 |
"dropout": 0,
|
643 |
"num_key_value_heads": 4,
|
644 |
"sequence_mixer_type": "stickbreaking_attention",
|
645 |
+
|
646 |
},
|
647 |
{
|
648 |
"add_bias": false,
|
|
|
651 |
"dropout": 0,
|
652 |
"num_key_value_heads": 4,
|
653 |
"sequence_mixer_type": "stickbreaking_attention",
|
654 |
+
|
655 |
},
|
656 |
{
|
657 |
"add_bias": false,
|
|
|
660 |
"dropout": 0,
|
661 |
"num_key_value_heads": 4,
|
662 |
"sequence_mixer_type": "stickbreaking_attention",
|
663 |
+
|
664 |
}
|
665 |
],
|
666 |
"transformers_version": "4.49.0.dev0",
|