Shawn Tan commited on
Commit
f5eaf34
·
1 Parent(s): bf62d95

Cleanup config.

Browse files
Files changed (1) hide show
  1. config.json +40 -80
config.json CHANGED
@@ -308,8 +308,7 @@
308
  "attention_multiplier": 0.0078125,
309
  "dropout": 0,
310
  "num_key_value_heads": 4,
311
- "sequence_mixer_type": "stickbreaking_attention",
312
-
313
  },
314
  {
315
  "add_bias": false,
@@ -317,8 +316,7 @@
317
  "attention_multiplier": 0.0078125,
318
  "dropout": 0,
319
  "num_key_value_heads": 4,
320
- "sequence_mixer_type": "stickbreaking_attention",
321
-
322
  },
323
  {
324
  "add_bias": false,
@@ -326,8 +324,7 @@
326
  "attention_multiplier": 0.0078125,
327
  "dropout": 0,
328
  "num_key_value_heads": 4,
329
- "sequence_mixer_type": "stickbreaking_attention",
330
-
331
  },
332
  {
333
  "add_bias": false,
@@ -335,8 +332,7 @@
335
  "attention_multiplier": 0.0078125,
336
  "dropout": 0,
337
  "num_key_value_heads": 4,
338
- "sequence_mixer_type": "stickbreaking_attention",
339
-
340
  },
341
  {
342
  "add_bias": false,
@@ -344,8 +340,7 @@
344
  "attention_multiplier": 0.0078125,
345
  "dropout": 0,
346
  "num_key_value_heads": 4,
347
- "sequence_mixer_type": "stickbreaking_attention",
348
-
349
  },
350
  {
351
  "add_bias": false,
@@ -353,8 +348,7 @@
353
  "attention_multiplier": 0.0078125,
354
  "dropout": 0,
355
  "num_key_value_heads": 4,
356
- "sequence_mixer_type": "stickbreaking_attention",
357
-
358
  },
359
  {
360
  "add_bias": false,
@@ -362,8 +356,7 @@
362
  "attention_multiplier": 0.0078125,
363
  "dropout": 0,
364
  "num_key_value_heads": 4,
365
- "sequence_mixer_type": "stickbreaking_attention",
366
-
367
  },
368
  {
369
  "add_bias": false,
@@ -371,8 +364,7 @@
371
  "attention_multiplier": 0.0078125,
372
  "dropout": 0,
373
  "num_key_value_heads": 4,
374
- "sequence_mixer_type": "stickbreaking_attention",
375
-
376
  },
377
  {
378
  "add_bias": false,
@@ -380,8 +372,7 @@
380
  "attention_multiplier": 0.0078125,
381
  "dropout": 0,
382
  "num_key_value_heads": 4,
383
- "sequence_mixer_type": "stickbreaking_attention",
384
-
385
  },
386
  {
387
  "add_bias": false,
@@ -389,8 +380,7 @@
389
  "attention_multiplier": 0.0078125,
390
  "dropout": 0,
391
  "num_key_value_heads": 4,
392
- "sequence_mixer_type": "stickbreaking_attention",
393
-
394
  },
395
  {
396
  "add_bias": false,
@@ -398,8 +388,7 @@
398
  "attention_multiplier": 0.0078125,
399
  "dropout": 0,
400
  "num_key_value_heads": 4,
401
- "sequence_mixer_type": "stickbreaking_attention",
402
-
403
  },
404
  {
405
  "add_bias": false,
@@ -407,8 +396,7 @@
407
  "attention_multiplier": 0.0078125,
408
  "dropout": 0,
409
  "num_key_value_heads": 4,
410
- "sequence_mixer_type": "stickbreaking_attention",
411
-
412
  },
413
  {
414
  "add_bias": false,
@@ -416,8 +404,7 @@
416
  "attention_multiplier": 0.0078125,
417
  "dropout": 0,
418
  "num_key_value_heads": 4,
419
- "sequence_mixer_type": "stickbreaking_attention",
420
-
421
  },
422
  {
423
  "add_bias": false,
@@ -425,8 +412,7 @@
425
  "attention_multiplier": 0.0078125,
426
  "dropout": 0,
427
  "num_key_value_heads": 4,
428
- "sequence_mixer_type": "stickbreaking_attention",
429
-
430
  },
431
  {
432
  "add_bias": false,
@@ -434,8 +420,7 @@
434
  "attention_multiplier": 0.0078125,
435
  "dropout": 0,
436
  "num_key_value_heads": 4,
437
- "sequence_mixer_type": "stickbreaking_attention",
438
-
439
  },
440
  {
441
  "add_bias": false,
@@ -443,8 +428,7 @@
443
  "attention_multiplier": 0.0078125,
444
  "dropout": 0,
445
  "num_key_value_heads": 4,
446
- "sequence_mixer_type": "stickbreaking_attention",
447
-
448
  },
449
  {
450
  "add_bias": false,
@@ -452,8 +436,7 @@
452
  "attention_multiplier": 0.0078125,
453
  "dropout": 0,
454
  "num_key_value_heads": 4,
455
- "sequence_mixer_type": "stickbreaking_attention",
456
-
457
  },
458
  {
459
  "add_bias": false,
@@ -461,8 +444,7 @@
461
  "attention_multiplier": 0.0078125,
462
  "dropout": 0,
463
  "num_key_value_heads": 4,
464
- "sequence_mixer_type": "stickbreaking_attention",
465
-
466
  },
467
  {
468
  "add_bias": false,
@@ -470,8 +452,7 @@
470
  "attention_multiplier": 0.0078125,
471
  "dropout": 0,
472
  "num_key_value_heads": 4,
473
- "sequence_mixer_type": "stickbreaking_attention",
474
-
475
  },
476
  {
477
  "add_bias": false,
@@ -479,8 +460,7 @@
479
  "attention_multiplier": 0.0078125,
480
  "dropout": 0,
481
  "num_key_value_heads": 4,
482
- "sequence_mixer_type": "stickbreaking_attention",
483
-
484
  },
485
  {
486
  "add_bias": false,
@@ -488,8 +468,7 @@
488
  "attention_multiplier": 0.0078125,
489
  "dropout": 0,
490
  "num_key_value_heads": 4,
491
- "sequence_mixer_type": "stickbreaking_attention",
492
-
493
  },
494
  {
495
  "add_bias": false,
@@ -497,8 +476,7 @@
497
  "attention_multiplier": 0.0078125,
498
  "dropout": 0,
499
  "num_key_value_heads": 4,
500
- "sequence_mixer_type": "stickbreaking_attention",
501
-
502
  },
503
  {
504
  "add_bias": false,
@@ -506,8 +484,7 @@
506
  "attention_multiplier": 0.0078125,
507
  "dropout": 0,
508
  "num_key_value_heads": 4,
509
- "sequence_mixer_type": "stickbreaking_attention",
510
-
511
  },
512
  {
513
  "add_bias": false,
@@ -515,8 +492,7 @@
515
  "attention_multiplier": 0.0078125,
516
  "dropout": 0,
517
  "num_key_value_heads": 4,
518
- "sequence_mixer_type": "stickbreaking_attention",
519
-
520
  },
521
  {
522
  "add_bias": false,
@@ -524,8 +500,7 @@
524
  "attention_multiplier": 0.0078125,
525
  "dropout": 0,
526
  "num_key_value_heads": 4,
527
- "sequence_mixer_type": "stickbreaking_attention",
528
-
529
  },
530
  {
531
  "add_bias": false,
@@ -533,8 +508,7 @@
533
  "attention_multiplier": 0.0078125,
534
  "dropout": 0,
535
  "num_key_value_heads": 4,
536
- "sequence_mixer_type": "stickbreaking_attention",
537
-
538
  },
539
  {
540
  "add_bias": false,
@@ -542,8 +516,7 @@
542
  "attention_multiplier": 0.0078125,
543
  "dropout": 0,
544
  "num_key_value_heads": 4,
545
- "sequence_mixer_type": "stickbreaking_attention",
546
-
547
  },
548
  {
549
  "add_bias": false,
@@ -551,8 +524,7 @@
551
  "attention_multiplier": 0.0078125,
552
  "dropout": 0,
553
  "num_key_value_heads": 4,
554
- "sequence_mixer_type": "stickbreaking_attention",
555
-
556
  },
557
  {
558
  "add_bias": false,
@@ -560,8 +532,7 @@
560
  "attention_multiplier": 0.0078125,
561
  "dropout": 0,
562
  "num_key_value_heads": 4,
563
- "sequence_mixer_type": "stickbreaking_attention",
564
-
565
  },
566
  {
567
  "add_bias": false,
@@ -569,8 +540,7 @@
569
  "attention_multiplier": 0.0078125,
570
  "dropout": 0,
571
  "num_key_value_heads": 4,
572
- "sequence_mixer_type": "stickbreaking_attention",
573
-
574
  },
575
  {
576
  "add_bias": false,
@@ -578,8 +548,7 @@
578
  "attention_multiplier": 0.0078125,
579
  "dropout": 0,
580
  "num_key_value_heads": 4,
581
- "sequence_mixer_type": "stickbreaking_attention",
582
-
583
  },
584
  {
585
  "add_bias": false,
@@ -587,8 +556,7 @@
587
  "attention_multiplier": 0.0078125,
588
  "dropout": 0,
589
  "num_key_value_heads": 4,
590
- "sequence_mixer_type": "stickbreaking_attention",
591
-
592
  },
593
  {
594
  "add_bias": false,
@@ -596,8 +564,7 @@
596
  "attention_multiplier": 0.0078125,
597
  "dropout": 0,
598
  "num_key_value_heads": 4,
599
- "sequence_mixer_type": "stickbreaking_attention",
600
-
601
  },
602
  {
603
  "add_bias": false,
@@ -605,8 +572,7 @@
605
  "attention_multiplier": 0.0078125,
606
  "dropout": 0,
607
  "num_key_value_heads": 4,
608
- "sequence_mixer_type": "stickbreaking_attention",
609
-
610
  },
611
  {
612
  "add_bias": false,
@@ -614,8 +580,7 @@
614
  "attention_multiplier": 0.0078125,
615
  "dropout": 0,
616
  "num_key_value_heads": 4,
617
- "sequence_mixer_type": "stickbreaking_attention",
618
-
619
  },
620
  {
621
  "add_bias": false,
@@ -623,8 +588,7 @@
623
  "attention_multiplier": 0.0078125,
624
  "dropout": 0,
625
  "num_key_value_heads": 4,
626
- "sequence_mixer_type": "stickbreaking_attention",
627
-
628
  },
629
  {
630
  "add_bias": false,
@@ -632,8 +596,7 @@
632
  "attention_multiplier": 0.0078125,
633
  "dropout": 0,
634
  "num_key_value_heads": 4,
635
- "sequence_mixer_type": "stickbreaking_attention",
636
-
637
  },
638
  {
639
  "add_bias": false,
@@ -641,8 +604,7 @@
641
  "attention_multiplier": 0.0078125,
642
  "dropout": 0,
643
  "num_key_value_heads": 4,
644
- "sequence_mixer_type": "stickbreaking_attention",
645
-
646
  },
647
  {
648
  "add_bias": false,
@@ -650,8 +612,7 @@
650
  "attention_multiplier": 0.0078125,
651
  "dropout": 0,
652
  "num_key_value_heads": 4,
653
- "sequence_mixer_type": "stickbreaking_attention",
654
-
655
  },
656
  {
657
  "add_bias": false,
@@ -659,8 +620,7 @@
659
  "attention_multiplier": 0.0078125,
660
  "dropout": 0,
661
  "num_key_value_heads": 4,
662
- "sequence_mixer_type": "stickbreaking_attention",
663
-
664
  }
665
  ],
666
  "transformers_version": "4.49.0.dev0",
 
308
  "attention_multiplier": 0.0078125,
309
  "dropout": 0,
310
  "num_key_value_heads": 4,
311
+ "sequence_mixer_type": "stickbreaking_attention"
 
312
  },
313
  {
314
  "add_bias": false,
 
316
  "attention_multiplier": 0.0078125,
317
  "dropout": 0,
318
  "num_key_value_heads": 4,
319
+ "sequence_mixer_type": "stickbreaking_attention"
 
320
  },
321
  {
322
  "add_bias": false,
 
324
  "attention_multiplier": 0.0078125,
325
  "dropout": 0,
326
  "num_key_value_heads": 4,
327
+ "sequence_mixer_type": "stickbreaking_attention"
 
328
  },
329
  {
330
  "add_bias": false,
 
332
  "attention_multiplier": 0.0078125,
333
  "dropout": 0,
334
  "num_key_value_heads": 4,
335
+ "sequence_mixer_type": "stickbreaking_attention"
 
336
  },
337
  {
338
  "add_bias": false,
 
340
  "attention_multiplier": 0.0078125,
341
  "dropout": 0,
342
  "num_key_value_heads": 4,
343
+ "sequence_mixer_type": "stickbreaking_attention"
 
344
  },
345
  {
346
  "add_bias": false,
 
348
  "attention_multiplier": 0.0078125,
349
  "dropout": 0,
350
  "num_key_value_heads": 4,
351
+ "sequence_mixer_type": "stickbreaking_attention"
 
352
  },
353
  {
354
  "add_bias": false,
 
356
  "attention_multiplier": 0.0078125,
357
  "dropout": 0,
358
  "num_key_value_heads": 4,
359
+ "sequence_mixer_type": "stickbreaking_attention"
 
360
  },
361
  {
362
  "add_bias": false,
 
364
  "attention_multiplier": 0.0078125,
365
  "dropout": 0,
366
  "num_key_value_heads": 4,
367
+ "sequence_mixer_type": "stickbreaking_attention"
 
368
  },
369
  {
370
  "add_bias": false,
 
372
  "attention_multiplier": 0.0078125,
373
  "dropout": 0,
374
  "num_key_value_heads": 4,
375
+ "sequence_mixer_type": "stickbreaking_attention"
 
376
  },
377
  {
378
  "add_bias": false,
 
380
  "attention_multiplier": 0.0078125,
381
  "dropout": 0,
382
  "num_key_value_heads": 4,
383
+ "sequence_mixer_type": "stickbreaking_attention"
 
384
  },
385
  {
386
  "add_bias": false,
 
388
  "attention_multiplier": 0.0078125,
389
  "dropout": 0,
390
  "num_key_value_heads": 4,
391
+ "sequence_mixer_type": "stickbreaking_attention"
 
392
  },
393
  {
394
  "add_bias": false,
 
396
  "attention_multiplier": 0.0078125,
397
  "dropout": 0,
398
  "num_key_value_heads": 4,
399
+ "sequence_mixer_type": "stickbreaking_attention"
 
400
  },
401
  {
402
  "add_bias": false,
 
404
  "attention_multiplier": 0.0078125,
405
  "dropout": 0,
406
  "num_key_value_heads": 4,
407
+ "sequence_mixer_type": "stickbreaking_attention"
 
408
  },
409
  {
410
  "add_bias": false,
 
412
  "attention_multiplier": 0.0078125,
413
  "dropout": 0,
414
  "num_key_value_heads": 4,
415
+ "sequence_mixer_type": "stickbreaking_attention"
 
416
  },
417
  {
418
  "add_bias": false,
 
420
  "attention_multiplier": 0.0078125,
421
  "dropout": 0,
422
  "num_key_value_heads": 4,
423
+ "sequence_mixer_type": "stickbreaking_attention"
 
424
  },
425
  {
426
  "add_bias": false,
 
428
  "attention_multiplier": 0.0078125,
429
  "dropout": 0,
430
  "num_key_value_heads": 4,
431
+ "sequence_mixer_type": "stickbreaking_attention"
 
432
  },
433
  {
434
  "add_bias": false,
 
436
  "attention_multiplier": 0.0078125,
437
  "dropout": 0,
438
  "num_key_value_heads": 4,
439
+ "sequence_mixer_type": "stickbreaking_attention"
 
440
  },
441
  {
442
  "add_bias": false,
 
444
  "attention_multiplier": 0.0078125,
445
  "dropout": 0,
446
  "num_key_value_heads": 4,
447
+ "sequence_mixer_type": "stickbreaking_attention"
 
448
  },
449
  {
450
  "add_bias": false,
 
452
  "attention_multiplier": 0.0078125,
453
  "dropout": 0,
454
  "num_key_value_heads": 4,
455
+ "sequence_mixer_type": "stickbreaking_attention"
 
456
  },
457
  {
458
  "add_bias": false,
 
460
  "attention_multiplier": 0.0078125,
461
  "dropout": 0,
462
  "num_key_value_heads": 4,
463
+ "sequence_mixer_type": "stickbreaking_attention"
 
464
  },
465
  {
466
  "add_bias": false,
 
468
  "attention_multiplier": 0.0078125,
469
  "dropout": 0,
470
  "num_key_value_heads": 4,
471
+ "sequence_mixer_type": "stickbreaking_attention"
 
472
  },
473
  {
474
  "add_bias": false,
 
476
  "attention_multiplier": 0.0078125,
477
  "dropout": 0,
478
  "num_key_value_heads": 4,
479
+ "sequence_mixer_type": "stickbreaking_attention"
 
480
  },
481
  {
482
  "add_bias": false,
 
484
  "attention_multiplier": 0.0078125,
485
  "dropout": 0,
486
  "num_key_value_heads": 4,
487
+ "sequence_mixer_type": "stickbreaking_attention"
 
488
  },
489
  {
490
  "add_bias": false,
 
492
  "attention_multiplier": 0.0078125,
493
  "dropout": 0,
494
  "num_key_value_heads": 4,
495
+ "sequence_mixer_type": "stickbreaking_attention"
 
496
  },
497
  {
498
  "add_bias": false,
 
500
  "attention_multiplier": 0.0078125,
501
  "dropout": 0,
502
  "num_key_value_heads": 4,
503
+ "sequence_mixer_type": "stickbreaking_attention"
 
504
  },
505
  {
506
  "add_bias": false,
 
508
  "attention_multiplier": 0.0078125,
509
  "dropout": 0,
510
  "num_key_value_heads": 4,
511
+ "sequence_mixer_type": "stickbreaking_attention"
 
512
  },
513
  {
514
  "add_bias": false,
 
516
  "attention_multiplier": 0.0078125,
517
  "dropout": 0,
518
  "num_key_value_heads": 4,
519
+ "sequence_mixer_type": "stickbreaking_attention"
 
520
  },
521
  {
522
  "add_bias": false,
 
524
  "attention_multiplier": 0.0078125,
525
  "dropout": 0,
526
  "num_key_value_heads": 4,
527
+ "sequence_mixer_type": "stickbreaking_attention"
 
528
  },
529
  {
530
  "add_bias": false,
 
532
  "attention_multiplier": 0.0078125,
533
  "dropout": 0,
534
  "num_key_value_heads": 4,
535
+ "sequence_mixer_type": "stickbreaking_attention"
 
536
  },
537
  {
538
  "add_bias": false,
 
540
  "attention_multiplier": 0.0078125,
541
  "dropout": 0,
542
  "num_key_value_heads": 4,
543
+ "sequence_mixer_type": "stickbreaking_attention"
 
544
  },
545
  {
546
  "add_bias": false,
 
548
  "attention_multiplier": 0.0078125,
549
  "dropout": 0,
550
  "num_key_value_heads": 4,
551
+ "sequence_mixer_type": "stickbreaking_attention"
 
552
  },
553
  {
554
  "add_bias": false,
 
556
  "attention_multiplier": 0.0078125,
557
  "dropout": 0,
558
  "num_key_value_heads": 4,
559
+ "sequence_mixer_type": "stickbreaking_attention"
 
560
  },
561
  {
562
  "add_bias": false,
 
564
  "attention_multiplier": 0.0078125,
565
  "dropout": 0,
566
  "num_key_value_heads": 4,
567
+ "sequence_mixer_type": "stickbreaking_attention"
 
568
  },
569
  {
570
  "add_bias": false,
 
572
  "attention_multiplier": 0.0078125,
573
  "dropout": 0,
574
  "num_key_value_heads": 4,
575
+ "sequence_mixer_type": "stickbreaking_attention"
 
576
  },
577
  {
578
  "add_bias": false,
 
580
  "attention_multiplier": 0.0078125,
581
  "dropout": 0,
582
  "num_key_value_heads": 4,
583
+ "sequence_mixer_type": "stickbreaking_attention"
 
584
  },
585
  {
586
  "add_bias": false,
 
588
  "attention_multiplier": 0.0078125,
589
  "dropout": 0,
590
  "num_key_value_heads": 4,
591
+ "sequence_mixer_type": "stickbreaking_attention"
 
592
  },
593
  {
594
  "add_bias": false,
 
596
  "attention_multiplier": 0.0078125,
597
  "dropout": 0,
598
  "num_key_value_heads": 4,
599
+ "sequence_mixer_type": "stickbreaking_attention"
 
600
  },
601
  {
602
  "add_bias": false,
 
604
  "attention_multiplier": 0.0078125,
605
  "dropout": 0,
606
  "num_key_value_heads": 4,
607
+ "sequence_mixer_type": "stickbreaking_attention"
 
608
  },
609
  {
610
  "add_bias": false,
 
612
  "attention_multiplier": 0.0078125,
613
  "dropout": 0,
614
  "num_key_value_heads": 4,
615
+ "sequence_mixer_type": "stickbreaking_attention"
 
616
  },
617
  {
618
  "add_bias": false,
 
620
  "attention_multiplier": 0.0078125,
621
  "dropout": 0,
622
  "num_key_value_heads": 4,
623
+ "sequence_mixer_type": "stickbreaking_attention"
 
624
  }
625
  ],
626
  "transformers_version": "4.49.0.dev0",