{ "architectures": [ "VJEPA2ForVideoClassification" ], "attention_dropout": 0.0, "attention_probs_dropout_prob": 0.0, "crop_size": 256, "drop_path_rate": 0.0, "frames_per_clip": 16, "hidden_act": "gelu", "hidden_dropout_prob": 0.0, "hidden_size": 1024, "id2label": { "0": "Jump", "1": "ApplyEyeMakeup", "2": "ApplyLipstick", "3": "Biking", "4": "BlowDryHair", "5": "BrushingTeeth", "6": "CuttingInKitchen", "7": "Haircut", "8": "Hammering", "9": "HandstandPushups", "10": "HandstandWalking", "11": "HeadMassage", "12": "Knitting", "13": "PlayingDaf", "14": "Typing", "15": "WalkingWithDog", "16": "FallDown", "17": "LyingDown", "18": "SitDown", "19": "Sitting", "20": "StandUp", "21": "Standing", "22": "Walking", "23": "DrinkAngry", "24": "DrinkDisgust", "25": "DrinkHappy", "26": "DrinkNeutral", "27": "DrinkSad", "28": "PutonglassesAngry", "29": "PutonglassesHappy", "30": "PutonglassesNeutral", "31": "PutonglassesSad", "32": "PutonjacketAngry", "33": "PutonjacketHappy", "34": "PutonjacketNeutral", "35": "PutonjacketSad", "36": "ReadAngry", "37": "ReadHappy", "38": "ReadNeutral", "39": "ReadSad", "40": "SitdownAngry", "41": "SitdownHappy", "42": "SitdownNeutral", "43": "SitdownSad", "44": "StandupAngry", "45": "StandupHappy", "46": "StandupNeutral", "47": "StandupSad", "48": "TakeoffglassesAngry", "49": "TakeoffglassesHappy", "50": "TakeoffglassesNeutral", "51": "TakeoffglassesSad", "52": "TakeoffjacketAngry", "53": "TakeoffjacketHappy", "54": "TakeoffjacketNeutral", "55": "TakeoffjacketSad", "56": "WriteAngry", "57": "WriteHappy", "58": "WriteNeutral", "59": "Waving", "60": "Fighting", "61": "Clapping", "62": "WalkingWhileUsingPhone", "63": "ClosingABook", "64": "ClosingABox", "65": "ClosingAClosetCabinet", "66": "ClosingADoor", "67": "ClosingALaptop", "68": "ClosingARefrigerator", "69": "ClosingAWindow", "70": "DrinkingFromACupGlassBottle", "71": "EatingASandwich", "72": "FixingADoor", "73": "FixingADoorknob", "74": "FixingALight", "75": "FixingAVacuum", "76": "FixingTheirHair", "77": "GraspingOntoADoorknob", "78": "HoldingABag", "79": "HoldingABlanket", "80": "HoldingABook", "81": "HoldingABox", "82": "HoldingABroom", "83": "HoldingACupGlassBottleOfSomething", "84": "HoldingADish", "85": "HoldingALaptop", "86": "HoldingAMirror", "87": "HoldingAPhoneCamera", "88": "HoldingAPicture", "89": "HoldingAPillow", "90": "HoldingASandwich", "91": "SomeoneIsRunningSomewhere", "92": "SomeoneIsSmiling", "93": "SomeoneIsSneezing", "94": "SomeoneIsStandingUpFromSomewhere", "95": "HoldingAShoeShoes", "96": "HoldingATowelS", "97": "HoldingAVacuum", "98": "HoldingSomeClothes", "99": "HoldingSomeFood", "100": "HoldingSomeMedicine", "101": "LaughingAtAPicture", "102": "LaughingAtTelevision", "103": "LyingOnABed", "104": "LyingOnASofaCouch", "105": "LyingOnTheFloor", "106": "MakingASandwich", "107": "OpeningABag", "108": "OpeningABook", "109": "OpeningABox", "110": "OpeningAClosetCabinet", "111": "OpeningADoor", "112": "OpeningALaptop", "113": "OpeningARefrigerator", "114": "OpeningAWindow", "115": "PlayingWithAPhoneCamera", "116": "PouringSomethingIntoACupGlassBottle", "117": "PuttingABagSomewhere", "118": "PuttingABlanketSomewhere", "119": "PuttingABookSomewhere", "120": "PuttingABoxSomewhere", "121": "PuttingABroomSomewhere", "122": "PuttingACupGlassBottleSomewhere", "123": "PuttingADishEsSomewhere", "124": "PuttingALaptopSomewhere", "125": "PuttingAPhoneCameraSomewhere", "126": "PuttingAPictureSomewhere", "127": "PuttingAPillowSomewhere", "128": "PuttingASandwichSomewhere", "129": "PuttingATowelSSomewhere", "130": "PuttingClothesSomewhere", "131": "PuttingGroceriesSomewhere", "132": "PuttingOnShoeShoes", "133": "PuttingShoesSomewhere", "134": "PuttingSomeFoodSomewhere", "135": "PuttingSomethingOnAShelf", "136": "PuttingSomethingOnATable", "137": "PuttingTheirPaperNotebookSomewhere", "138": "ReachingForAndGrabbingAPicture", "139": "SittingAtATable", "140": "SittingInABed", "141": "SittingInAChair", "142": "SittingOnATable", "143": "SittingOnSofaCouch", "144": "SittingOnTheFloor", "145": "SmilingAtABook", "146": "SmilingInAMirror", "147": "SnugglingWithABlanket", "148": "SnugglingWithAPillow", "149": "SomeoneIsAwakeningInBed", "150": "SomeoneIsAwakeningSomewhere", "151": "SomeoneIsCookingSomething", "152": "SomeoneIsDressing", "153": "SomeoneIsEatingSomething", "154": "SomeoneIsGoingFromStandingToSitting", "155": "SomeoneIsHoldingAPaperNotebook", "156": "SomeoneIsLaughing", "157": "StandingOnAChair", "158": "SomeoneIsUndressing", "159": "TakingABlanketFromSomewhere", "160": "TakingABookFromSomewhere", "161": "TakingABoxFromSomewhere", "162": "TakingADishEsFromSomewhere", "163": "TakingALaptopFromSomewhere", "164": "TakingAPictureOfSomething", "165": "TakingABagFromSomewhere", "166": "TakingABroomFromSomewhere", "167": "TakingACupGlassBottleFromSomewhere", "168": "TakingAPhoneCameraFromSomewhere", "169": "TakingAPillowFromSomewhere", "170": "TakingASandwichFromSomewhere", "171": "TakingATowelSFromSomewhere", "172": "TakingAVacuumFromSomewhere", "173": "TakingConsumingSomeMedicine", "174": "TakingFoodFromSomewhere", "175": "TakingOffSomeShoes", "176": "TakingPaperNotebookFromSomewhere", "177": "TakingShoesFromSomewhere", "178": "TakingSomeClothesFromSomewhere", "179": "TakingSomethingFromABox", "180": "TalkingOnAPhoneCamera", "181": "ThrowingABagSomewhere", "182": "ThrowingABlanketSomewhere", "183": "ThrowingABookSomewhere", "184": "ThrowingABoxSomewhere", "185": "ThrowingABroomSomewhere", "186": "ThrowingAPillowSomewhere", "187": "ThrowingATowelSSomewhere", "188": "ThrowingClothesSomewhere", "189": "ThrowingFoodSomewhere", "190": "ThrowingShoesSomewhere", "191": "ThrowingSomethingOnTheFloor", "192": "TidyingAShelfOrSomethingOnAShelf", "193": "TidyingSomeClothes", "194": "TidyingSomethingOnTheFloor", "195": "TidyingUpABlanketS", "196": "TidyingUpAClosetCabinet", "197": "TidyingUpATable", "198": "TidyingUpATowelS", "199": "TidyingUpWithABroom", "200": "TurningOffALight", "201": "TurningOnALight", "202": "WalkingThroughADoorway", "203": "WashADishDishes", "204": "WashingACupGlassBottle", "205": "WashingAMirror", "206": "WashingATable", "207": "WashingAWindow", "208": "WashingSomeClothes", "209": "WashingSomethingWithATowel", "210": "WashingTheirHands", "211": "WatchingALaptopOrSomethingOnALaptop", "212": "WatchingLookingAtAPicture", "213": "WatchingLookingOutsideOfAWindow", "214": "WatchingReadingLookingAtABook", "215": "WatchingSomethingSomeoneThemselvesInAMirror", "216": "WatchingTelevision", "217": "WorkingAtATable", "218": "WorkingOnPaperNotebook", "219": "WorkingPlayingOnALaptop" }, "image_size": 256, "in_chans": 3, "initializer_range": 0.02, "label2id": { "ApplyEyeMakeup": 1, "ApplyLipstick": 2, "Biking": 3, "BlowDryHair": 4, "BrushingTeeth": 5, "Clapping": 61, "ClosingABook": 63, "ClosingABox": 64, "ClosingAClosetCabinet": 65, "ClosingADoor": 66, "ClosingALaptop": 67, "ClosingARefrigerator": 68, "ClosingAWindow": 69, "CuttingInKitchen": 6, "DrinkAngry": 23, "DrinkDisgust": 24, "DrinkHappy": 25, "DrinkNeutral": 26, "DrinkSad": 27, "DrinkingFromACupGlassBottle": 70, "EatingASandwich": 71, "FallDown": 16, "Fighting": 60, "FixingADoor": 72, "FixingADoorknob": 73, "FixingALight": 74, "FixingAVacuum": 75, "FixingTheirHair": 76, "GraspingOntoADoorknob": 77, "Haircut": 7, "Hammering": 8, "HandstandPushups": 9, "HandstandWalking": 10, "HeadMassage": 11, "HoldingABag": 78, "HoldingABlanket": 79, "HoldingABook": 80, "HoldingABox": 81, "HoldingABroom": 82, "HoldingACupGlassBottleOfSomething": 83, "HoldingADish": 84, "HoldingALaptop": 85, "HoldingAMirror": 86, "HoldingAPhoneCamera": 87, "HoldingAPicture": 88, "HoldingAPillow": 89, "HoldingASandwich": 90, "HoldingAShoeShoes": 95, "HoldingATowelS": 96, "HoldingAVacuum": 97, "HoldingSomeClothes": 98, "HoldingSomeFood": 99, "HoldingSomeMedicine": 100, "Jump": 0, "Knitting": 12, "LaughingAtAPicture": 101, "LaughingAtTelevision": 102, "LyingDown": 17, "LyingOnABed": 103, "LyingOnASofaCouch": 104, "LyingOnTheFloor": 105, "MakingASandwich": 106, "OpeningABag": 107, "OpeningABook": 108, "OpeningABox": 109, "OpeningAClosetCabinet": 110, "OpeningADoor": 111, "OpeningALaptop": 112, "OpeningARefrigerator": 113, "OpeningAWindow": 114, "PlayingDaf": 13, "PlayingWithAPhoneCamera": 115, "PouringSomethingIntoACupGlassBottle": 116, "PutonglassesAngry": 28, "PutonglassesHappy": 29, "PutonglassesNeutral": 30, "PutonglassesSad": 31, "PutonjacketAngry": 32, "PutonjacketHappy": 33, "PutonjacketNeutral": 34, "PutonjacketSad": 35, "PuttingABagSomewhere": 117, "PuttingABlanketSomewhere": 118, "PuttingABookSomewhere": 119, "PuttingABoxSomewhere": 120, "PuttingABroomSomewhere": 121, "PuttingACupGlassBottleSomewhere": 122, "PuttingADishEsSomewhere": 123, "PuttingALaptopSomewhere": 124, "PuttingAPhoneCameraSomewhere": 125, "PuttingAPictureSomewhere": 126, "PuttingAPillowSomewhere": 127, "PuttingASandwichSomewhere": 128, "PuttingATowelSSomewhere": 129, "PuttingClothesSomewhere": 130, "PuttingGroceriesSomewhere": 131, "PuttingOnShoeShoes": 132, "PuttingShoesSomewhere": 133, "PuttingSomeFoodSomewhere": 134, "PuttingSomethingOnAShelf": 135, "PuttingSomethingOnATable": 136, "PuttingTheirPaperNotebookSomewhere": 137, "ReachingForAndGrabbingAPicture": 138, "ReadAngry": 36, "ReadHappy": 37, "ReadNeutral": 38, "ReadSad": 39, "SitDown": 18, "SitdownAngry": 40, "SitdownHappy": 41, "SitdownNeutral": 42, "SitdownSad": 43, "Sitting": 19, "SittingAtATable": 139, "SittingInABed": 140, "SittingInAChair": 141, "SittingOnATable": 142, "SittingOnSofaCouch": 143, "SittingOnTheFloor": 144, "SmilingAtABook": 145, "SmilingInAMirror": 146, "SnugglingWithABlanket": 147, "SnugglingWithAPillow": 148, "SomeoneIsAwakeningInBed": 149, "SomeoneIsAwakeningSomewhere": 150, "SomeoneIsCookingSomething": 151, "SomeoneIsDressing": 152, "SomeoneIsEatingSomething": 153, "SomeoneIsGoingFromStandingToSitting": 154, "SomeoneIsHoldingAPaperNotebook": 155, "SomeoneIsLaughing": 156, "SomeoneIsRunningSomewhere": 91, "SomeoneIsSmiling": 92, "SomeoneIsSneezing": 93, "SomeoneIsStandingUpFromSomewhere": 94, "SomeoneIsUndressing": 158, "StandUp": 20, "Standing": 21, "StandingOnAChair": 157, "StandupAngry": 44, "StandupHappy": 45, "StandupNeutral": 46, "StandupSad": 47, "TakeoffglassesAngry": 48, "TakeoffglassesHappy": 49, "TakeoffglassesNeutral": 50, "TakeoffglassesSad": 51, "TakeoffjacketAngry": 52, "TakeoffjacketHappy": 53, "TakeoffjacketNeutral": 54, "TakeoffjacketSad": 55, "TakingABagFromSomewhere": 165, "TakingABlanketFromSomewhere": 159, "TakingABookFromSomewhere": 160, "TakingABoxFromSomewhere": 161, "TakingABroomFromSomewhere": 166, "TakingACupGlassBottleFromSomewhere": 167, "TakingADishEsFromSomewhere": 162, "TakingALaptopFromSomewhere": 163, "TakingAPhoneCameraFromSomewhere": 168, "TakingAPictureOfSomething": 164, "TakingAPillowFromSomewhere": 169, "TakingASandwichFromSomewhere": 170, "TakingATowelSFromSomewhere": 171, "TakingAVacuumFromSomewhere": 172, "TakingConsumingSomeMedicine": 173, "TakingFoodFromSomewhere": 174, "TakingOffSomeShoes": 175, "TakingPaperNotebookFromSomewhere": 176, "TakingShoesFromSomewhere": 177, "TakingSomeClothesFromSomewhere": 178, "TakingSomethingFromABox": 179, "TalkingOnAPhoneCamera": 180, "ThrowingABagSomewhere": 181, "ThrowingABlanketSomewhere": 182, "ThrowingABookSomewhere": 183, "ThrowingABoxSomewhere": 184, "ThrowingABroomSomewhere": 185, "ThrowingAPillowSomewhere": 186, "ThrowingATowelSSomewhere": 187, "ThrowingClothesSomewhere": 188, "ThrowingFoodSomewhere": 189, "ThrowingShoesSomewhere": 190, "ThrowingSomethingOnTheFloor": 191, "TidyingAShelfOrSomethingOnAShelf": 192, "TidyingSomeClothes": 193, "TidyingSomethingOnTheFloor": 194, "TidyingUpABlanketS": 195, "TidyingUpAClosetCabinet": 196, "TidyingUpATable": 197, "TidyingUpATowelS": 198, "TidyingUpWithABroom": 199, "TurningOffALight": 200, "TurningOnALight": 201, "Typing": 14, "Walking": 22, "WalkingThroughADoorway": 202, "WalkingWhileUsingPhone": 62, "WalkingWithDog": 15, "WashADishDishes": 203, "WashingACupGlassBottle": 204, "WashingAMirror": 205, "WashingATable": 206, "WashingAWindow": 207, "WashingSomeClothes": 208, "WashingSomethingWithATowel": 209, "WashingTheirHands": 210, "WatchingALaptopOrSomethingOnALaptop": 211, "WatchingLookingAtAPicture": 212, "WatchingLookingOutsideOfAWindow": 213, "WatchingReadingLookingAtABook": 214, "WatchingSomethingSomeoneThemselvesInAMirror": 215, "WatchingTelevision": 216, "Waving": 59, "WorkingAtATable": 217, "WorkingOnPaperNotebook": 218, "WorkingPlayingOnALaptop": 219, "WriteAngry": 56, "WriteHappy": 57, "WriteNeutral": 58 }, "layer_norm_eps": 1e-06, "mlp_ratio": 4, "model_type": "vjepa2", "num_attention_heads": 16, "num_hidden_layers": 24, "num_pooler_layers": 3, "patch_size": 16, "pred_hidden_size": 384, "pred_mlp_ratio": 4.0, "pred_num_attention_heads": 12, "pred_num_hidden_layers": 12, "pred_num_mask_tokens": 10, "pred_zero_init_mask_tokens": true, "problem_type": "single_label_classification", "qkv_bias": true, "torch_dtype": "float32", "transformers_version": "4.54.0.dev0", "tubelet_size": 2, "use_SiLU": false, "wide_SiLU": true }