from transformers import ViTFeatureExtractor, ViTForImageClassification
from PIL import Image
import numpy as np 

class VitBase():

    def __init__(self):
        self.feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224')
        self.model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224')
    
    def extract_feature(self, imgs):
        features = []
        for img in imgs:
            feature = self.feature_extractor(images=img, return_tensors="tf")
            print('keys: ', feature.keys())
            f = self.model(feature)
            print('--> f', type(f))
            # print('type::', (feature['pixel_values'].shape))
            features.append(np.squeeze(feature['pixel_values']))
            print('shape:::',features[0].shape)
        return features