Shawn Tan commited on
Commit
bf62d95
·
1 Parent(s): 28d5673

Remove extra config.

Browse files
Files changed (1) hide show
  1. config.json +40 -40
config.json CHANGED
@@ -309,7 +309,7 @@
309
  "dropout": 0,
310
  "num_key_value_heads": 4,
311
  "sequence_mixer_type": "stickbreaking_attention",
312
- "softmax_dropout": 0
313
  },
314
  {
315
  "add_bias": false,
@@ -318,7 +318,7 @@
318
  "dropout": 0,
319
  "num_key_value_heads": 4,
320
  "sequence_mixer_type": "stickbreaking_attention",
321
- "softmax_dropout": 0
322
  },
323
  {
324
  "add_bias": false,
@@ -327,7 +327,7 @@
327
  "dropout": 0,
328
  "num_key_value_heads": 4,
329
  "sequence_mixer_type": "stickbreaking_attention",
330
- "softmax_dropout": 0
331
  },
332
  {
333
  "add_bias": false,
@@ -336,7 +336,7 @@
336
  "dropout": 0,
337
  "num_key_value_heads": 4,
338
  "sequence_mixer_type": "stickbreaking_attention",
339
- "softmax_dropout": 0
340
  },
341
  {
342
  "add_bias": false,
@@ -345,7 +345,7 @@
345
  "dropout": 0,
346
  "num_key_value_heads": 4,
347
  "sequence_mixer_type": "stickbreaking_attention",
348
- "softmax_dropout": 0
349
  },
350
  {
351
  "add_bias": false,
@@ -354,7 +354,7 @@
354
  "dropout": 0,
355
  "num_key_value_heads": 4,
356
  "sequence_mixer_type": "stickbreaking_attention",
357
- "softmax_dropout": 0
358
  },
359
  {
360
  "add_bias": false,
@@ -363,7 +363,7 @@
363
  "dropout": 0,
364
  "num_key_value_heads": 4,
365
  "sequence_mixer_type": "stickbreaking_attention",
366
- "softmax_dropout": 0
367
  },
368
  {
369
  "add_bias": false,
@@ -372,7 +372,7 @@
372
  "dropout": 0,
373
  "num_key_value_heads": 4,
374
  "sequence_mixer_type": "stickbreaking_attention",
375
- "softmax_dropout": 0
376
  },
377
  {
378
  "add_bias": false,
@@ -381,7 +381,7 @@
381
  "dropout": 0,
382
  "num_key_value_heads": 4,
383
  "sequence_mixer_type": "stickbreaking_attention",
384
- "softmax_dropout": 0
385
  },
386
  {
387
  "add_bias": false,
@@ -390,7 +390,7 @@
390
  "dropout": 0,
391
  "num_key_value_heads": 4,
392
  "sequence_mixer_type": "stickbreaking_attention",
393
- "softmax_dropout": 0
394
  },
395
  {
396
  "add_bias": false,
@@ -399,7 +399,7 @@
399
  "dropout": 0,
400
  "num_key_value_heads": 4,
401
  "sequence_mixer_type": "stickbreaking_attention",
402
- "softmax_dropout": 0
403
  },
404
  {
405
  "add_bias": false,
@@ -408,7 +408,7 @@
408
  "dropout": 0,
409
  "num_key_value_heads": 4,
410
  "sequence_mixer_type": "stickbreaking_attention",
411
- "softmax_dropout": 0
412
  },
413
  {
414
  "add_bias": false,
@@ -417,7 +417,7 @@
417
  "dropout": 0,
418
  "num_key_value_heads": 4,
419
  "sequence_mixer_type": "stickbreaking_attention",
420
- "softmax_dropout": 0
421
  },
422
  {
423
  "add_bias": false,
@@ -426,7 +426,7 @@
426
  "dropout": 0,
427
  "num_key_value_heads": 4,
428
  "sequence_mixer_type": "stickbreaking_attention",
429
- "softmax_dropout": 0
430
  },
431
  {
432
  "add_bias": false,
@@ -435,7 +435,7 @@
435
  "dropout": 0,
436
  "num_key_value_heads": 4,
437
  "sequence_mixer_type": "stickbreaking_attention",
438
- "softmax_dropout": 0
439
  },
440
  {
441
  "add_bias": false,
@@ -444,7 +444,7 @@
444
  "dropout": 0,
445
  "num_key_value_heads": 4,
446
  "sequence_mixer_type": "stickbreaking_attention",
447
- "softmax_dropout": 0
448
  },
449
  {
450
  "add_bias": false,
@@ -453,7 +453,7 @@
453
  "dropout": 0,
454
  "num_key_value_heads": 4,
455
  "sequence_mixer_type": "stickbreaking_attention",
456
- "softmax_dropout": 0
457
  },
458
  {
459
  "add_bias": false,
@@ -462,7 +462,7 @@
462
  "dropout": 0,
463
  "num_key_value_heads": 4,
464
  "sequence_mixer_type": "stickbreaking_attention",
465
- "softmax_dropout": 0
466
  },
467
  {
468
  "add_bias": false,
@@ -471,7 +471,7 @@
471
  "dropout": 0,
472
  "num_key_value_heads": 4,
473
  "sequence_mixer_type": "stickbreaking_attention",
474
- "softmax_dropout": 0
475
  },
476
  {
477
  "add_bias": false,
@@ -480,7 +480,7 @@
480
  "dropout": 0,
481
  "num_key_value_heads": 4,
482
  "sequence_mixer_type": "stickbreaking_attention",
483
- "softmax_dropout": 0
484
  },
485
  {
486
  "add_bias": false,
@@ -489,7 +489,7 @@
489
  "dropout": 0,
490
  "num_key_value_heads": 4,
491
  "sequence_mixer_type": "stickbreaking_attention",
492
- "softmax_dropout": 0
493
  },
494
  {
495
  "add_bias": false,
@@ -498,7 +498,7 @@
498
  "dropout": 0,
499
  "num_key_value_heads": 4,
500
  "sequence_mixer_type": "stickbreaking_attention",
501
- "softmax_dropout": 0
502
  },
503
  {
504
  "add_bias": false,
@@ -507,7 +507,7 @@
507
  "dropout": 0,
508
  "num_key_value_heads": 4,
509
  "sequence_mixer_type": "stickbreaking_attention",
510
- "softmax_dropout": 0
511
  },
512
  {
513
  "add_bias": false,
@@ -516,7 +516,7 @@
516
  "dropout": 0,
517
  "num_key_value_heads": 4,
518
  "sequence_mixer_type": "stickbreaking_attention",
519
- "softmax_dropout": 0
520
  },
521
  {
522
  "add_bias": false,
@@ -525,7 +525,7 @@
525
  "dropout": 0,
526
  "num_key_value_heads": 4,
527
  "sequence_mixer_type": "stickbreaking_attention",
528
- "softmax_dropout": 0
529
  },
530
  {
531
  "add_bias": false,
@@ -534,7 +534,7 @@
534
  "dropout": 0,
535
  "num_key_value_heads": 4,
536
  "sequence_mixer_type": "stickbreaking_attention",
537
- "softmax_dropout": 0
538
  },
539
  {
540
  "add_bias": false,
@@ -543,7 +543,7 @@
543
  "dropout": 0,
544
  "num_key_value_heads": 4,
545
  "sequence_mixer_type": "stickbreaking_attention",
546
- "softmax_dropout": 0
547
  },
548
  {
549
  "add_bias": false,
@@ -552,7 +552,7 @@
552
  "dropout": 0,
553
  "num_key_value_heads": 4,
554
  "sequence_mixer_type": "stickbreaking_attention",
555
- "softmax_dropout": 0
556
  },
557
  {
558
  "add_bias": false,
@@ -561,7 +561,7 @@
561
  "dropout": 0,
562
  "num_key_value_heads": 4,
563
  "sequence_mixer_type": "stickbreaking_attention",
564
- "softmax_dropout": 0
565
  },
566
  {
567
  "add_bias": false,
@@ -570,7 +570,7 @@
570
  "dropout": 0,
571
  "num_key_value_heads": 4,
572
  "sequence_mixer_type": "stickbreaking_attention",
573
- "softmax_dropout": 0
574
  },
575
  {
576
  "add_bias": false,
@@ -579,7 +579,7 @@
579
  "dropout": 0,
580
  "num_key_value_heads": 4,
581
  "sequence_mixer_type": "stickbreaking_attention",
582
- "softmax_dropout": 0
583
  },
584
  {
585
  "add_bias": false,
@@ -588,7 +588,7 @@
588
  "dropout": 0,
589
  "num_key_value_heads": 4,
590
  "sequence_mixer_type": "stickbreaking_attention",
591
- "softmax_dropout": 0
592
  },
593
  {
594
  "add_bias": false,
@@ -597,7 +597,7 @@
597
  "dropout": 0,
598
  "num_key_value_heads": 4,
599
  "sequence_mixer_type": "stickbreaking_attention",
600
- "softmax_dropout": 0
601
  },
602
  {
603
  "add_bias": false,
@@ -606,7 +606,7 @@
606
  "dropout": 0,
607
  "num_key_value_heads": 4,
608
  "sequence_mixer_type": "stickbreaking_attention",
609
- "softmax_dropout": 0
610
  },
611
  {
612
  "add_bias": false,
@@ -615,7 +615,7 @@
615
  "dropout": 0,
616
  "num_key_value_heads": 4,
617
  "sequence_mixer_type": "stickbreaking_attention",
618
- "softmax_dropout": 0
619
  },
620
  {
621
  "add_bias": false,
@@ -624,7 +624,7 @@
624
  "dropout": 0,
625
  "num_key_value_heads": 4,
626
  "sequence_mixer_type": "stickbreaking_attention",
627
- "softmax_dropout": 0
628
  },
629
  {
630
  "add_bias": false,
@@ -633,7 +633,7 @@
633
  "dropout": 0,
634
  "num_key_value_heads": 4,
635
  "sequence_mixer_type": "stickbreaking_attention",
636
- "softmax_dropout": 0
637
  },
638
  {
639
  "add_bias": false,
@@ -642,7 +642,7 @@
642
  "dropout": 0,
643
  "num_key_value_heads": 4,
644
  "sequence_mixer_type": "stickbreaking_attention",
645
- "softmax_dropout": 0
646
  },
647
  {
648
  "add_bias": false,
@@ -651,7 +651,7 @@
651
  "dropout": 0,
652
  "num_key_value_heads": 4,
653
  "sequence_mixer_type": "stickbreaking_attention",
654
- "softmax_dropout": 0
655
  },
656
  {
657
  "add_bias": false,
@@ -660,7 +660,7 @@
660
  "dropout": 0,
661
  "num_key_value_heads": 4,
662
  "sequence_mixer_type": "stickbreaking_attention",
663
- "softmax_dropout": 0
664
  }
665
  ],
666
  "transformers_version": "4.49.0.dev0",
 
309
  "dropout": 0,
310
  "num_key_value_heads": 4,
311
  "sequence_mixer_type": "stickbreaking_attention",
312
+
313
  },
314
  {
315
  "add_bias": false,
 
318
  "dropout": 0,
319
  "num_key_value_heads": 4,
320
  "sequence_mixer_type": "stickbreaking_attention",
321
+
322
  },
323
  {
324
  "add_bias": false,
 
327
  "dropout": 0,
328
  "num_key_value_heads": 4,
329
  "sequence_mixer_type": "stickbreaking_attention",
330
+
331
  },
332
  {
333
  "add_bias": false,
 
336
  "dropout": 0,
337
  "num_key_value_heads": 4,
338
  "sequence_mixer_type": "stickbreaking_attention",
339
+
340
  },
341
  {
342
  "add_bias": false,
 
345
  "dropout": 0,
346
  "num_key_value_heads": 4,
347
  "sequence_mixer_type": "stickbreaking_attention",
348
+
349
  },
350
  {
351
  "add_bias": false,
 
354
  "dropout": 0,
355
  "num_key_value_heads": 4,
356
  "sequence_mixer_type": "stickbreaking_attention",
357
+
358
  },
359
  {
360
  "add_bias": false,
 
363
  "dropout": 0,
364
  "num_key_value_heads": 4,
365
  "sequence_mixer_type": "stickbreaking_attention",
366
+
367
  },
368
  {
369
  "add_bias": false,
 
372
  "dropout": 0,
373
  "num_key_value_heads": 4,
374
  "sequence_mixer_type": "stickbreaking_attention",
375
+
376
  },
377
  {
378
  "add_bias": false,
 
381
  "dropout": 0,
382
  "num_key_value_heads": 4,
383
  "sequence_mixer_type": "stickbreaking_attention",
384
+
385
  },
386
  {
387
  "add_bias": false,
 
390
  "dropout": 0,
391
  "num_key_value_heads": 4,
392
  "sequence_mixer_type": "stickbreaking_attention",
393
+
394
  },
395
  {
396
  "add_bias": false,
 
399
  "dropout": 0,
400
  "num_key_value_heads": 4,
401
  "sequence_mixer_type": "stickbreaking_attention",
402
+
403
  },
404
  {
405
  "add_bias": false,
 
408
  "dropout": 0,
409
  "num_key_value_heads": 4,
410
  "sequence_mixer_type": "stickbreaking_attention",
411
+
412
  },
413
  {
414
  "add_bias": false,
 
417
  "dropout": 0,
418
  "num_key_value_heads": 4,
419
  "sequence_mixer_type": "stickbreaking_attention",
420
+
421
  },
422
  {
423
  "add_bias": false,
 
426
  "dropout": 0,
427
  "num_key_value_heads": 4,
428
  "sequence_mixer_type": "stickbreaking_attention",
429
+
430
  },
431
  {
432
  "add_bias": false,
 
435
  "dropout": 0,
436
  "num_key_value_heads": 4,
437
  "sequence_mixer_type": "stickbreaking_attention",
438
+
439
  },
440
  {
441
  "add_bias": false,
 
444
  "dropout": 0,
445
  "num_key_value_heads": 4,
446
  "sequence_mixer_type": "stickbreaking_attention",
447
+
448
  },
449
  {
450
  "add_bias": false,
 
453
  "dropout": 0,
454
  "num_key_value_heads": 4,
455
  "sequence_mixer_type": "stickbreaking_attention",
456
+
457
  },
458
  {
459
  "add_bias": false,
 
462
  "dropout": 0,
463
  "num_key_value_heads": 4,
464
  "sequence_mixer_type": "stickbreaking_attention",
465
+
466
  },
467
  {
468
  "add_bias": false,
 
471
  "dropout": 0,
472
  "num_key_value_heads": 4,
473
  "sequence_mixer_type": "stickbreaking_attention",
474
+
475
  },
476
  {
477
  "add_bias": false,
 
480
  "dropout": 0,
481
  "num_key_value_heads": 4,
482
  "sequence_mixer_type": "stickbreaking_attention",
483
+
484
  },
485
  {
486
  "add_bias": false,
 
489
  "dropout": 0,
490
  "num_key_value_heads": 4,
491
  "sequence_mixer_type": "stickbreaking_attention",
492
+
493
  },
494
  {
495
  "add_bias": false,
 
498
  "dropout": 0,
499
  "num_key_value_heads": 4,
500
  "sequence_mixer_type": "stickbreaking_attention",
501
+
502
  },
503
  {
504
  "add_bias": false,
 
507
  "dropout": 0,
508
  "num_key_value_heads": 4,
509
  "sequence_mixer_type": "stickbreaking_attention",
510
+
511
  },
512
  {
513
  "add_bias": false,
 
516
  "dropout": 0,
517
  "num_key_value_heads": 4,
518
  "sequence_mixer_type": "stickbreaking_attention",
519
+
520
  },
521
  {
522
  "add_bias": false,
 
525
  "dropout": 0,
526
  "num_key_value_heads": 4,
527
  "sequence_mixer_type": "stickbreaking_attention",
528
+
529
  },
530
  {
531
  "add_bias": false,
 
534
  "dropout": 0,
535
  "num_key_value_heads": 4,
536
  "sequence_mixer_type": "stickbreaking_attention",
537
+
538
  },
539
  {
540
  "add_bias": false,
 
543
  "dropout": 0,
544
  "num_key_value_heads": 4,
545
  "sequence_mixer_type": "stickbreaking_attention",
546
+
547
  },
548
  {
549
  "add_bias": false,
 
552
  "dropout": 0,
553
  "num_key_value_heads": 4,
554
  "sequence_mixer_type": "stickbreaking_attention",
555
+
556
  },
557
  {
558
  "add_bias": false,
 
561
  "dropout": 0,
562
  "num_key_value_heads": 4,
563
  "sequence_mixer_type": "stickbreaking_attention",
564
+
565
  },
566
  {
567
  "add_bias": false,
 
570
  "dropout": 0,
571
  "num_key_value_heads": 4,
572
  "sequence_mixer_type": "stickbreaking_attention",
573
+
574
  },
575
  {
576
  "add_bias": false,
 
579
  "dropout": 0,
580
  "num_key_value_heads": 4,
581
  "sequence_mixer_type": "stickbreaking_attention",
582
+
583
  },
584
  {
585
  "add_bias": false,
 
588
  "dropout": 0,
589
  "num_key_value_heads": 4,
590
  "sequence_mixer_type": "stickbreaking_attention",
591
+
592
  },
593
  {
594
  "add_bias": false,
 
597
  "dropout": 0,
598
  "num_key_value_heads": 4,
599
  "sequence_mixer_type": "stickbreaking_attention",
600
+
601
  },
602
  {
603
  "add_bias": false,
 
606
  "dropout": 0,
607
  "num_key_value_heads": 4,
608
  "sequence_mixer_type": "stickbreaking_attention",
609
+
610
  },
611
  {
612
  "add_bias": false,
 
615
  "dropout": 0,
616
  "num_key_value_heads": 4,
617
  "sequence_mixer_type": "stickbreaking_attention",
618
+
619
  },
620
  {
621
  "add_bias": false,
 
624
  "dropout": 0,
625
  "num_key_value_heads": 4,
626
  "sequence_mixer_type": "stickbreaking_attention",
627
+
628
  },
629
  {
630
  "add_bias": false,
 
633
  "dropout": 0,
634
  "num_key_value_heads": 4,
635
  "sequence_mixer_type": "stickbreaking_attention",
636
+
637
  },
638
  {
639
  "add_bias": false,
 
642
  "dropout": 0,
643
  "num_key_value_heads": 4,
644
  "sequence_mixer_type": "stickbreaking_attention",
645
+
646
  },
647
  {
648
  "add_bias": false,
 
651
  "dropout": 0,
652
  "num_key_value_heads": 4,
653
  "sequence_mixer_type": "stickbreaking_attention",
654
+
655
  },
656
  {
657
  "add_bias": false,
 
660
  "dropout": 0,
661
  "num_key_value_heads": 4,
662
  "sequence_mixer_type": "stickbreaking_attention",
663
+
664
  }
665
  ],
666
  "transformers_version": "4.49.0.dev0",