diff --git a/VLM2Vec/evaluation/__init__.py b/VLM2Vec/evaluation/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/VLM2Vec/evaluation/eval_blip.py b/VLM2Vec/evaluation/eval_blip.py
new file mode 100644
index 0000000000000000000000000000000000000000..419051125a33f870c0df122aed7bd8f76962e1c7
--- /dev/null
+++ b/VLM2Vec/evaluation/eval_blip.py
@@ -0,0 +1,209 @@
+# https://github.com/salesforce/LAVIS/blob/3446bac20c5646d35ae383ebe6d13cec4f8b00cb/examples/blip2_feature_extraction.ipynb
+# https://medium.com/@enrico.randellini/image-and-text-features-extraction-with-blip-and-blip-2-how-to-build-a-multimodal-search-engine-a4ceabf51fbe
+from src.arguments import ModelArguments, DataArguments, TrainingArguments
+from transformers import HfArgumentParser, AutoProcessor
+from src.dataset import EvalDataset
+from evaluation.collator import EvalCollator, BLIP2Collator
+from torch.utils.data import DataLoader
+import torch
+from tqdm import tqdm
+import numpy as np
+import pickle
+import os
+from datasets import load_dataset
+from evaluation.eval_utils import get_pred, save_results, print_results
+from lavis.models import load_model_and_preprocess
+
+t2i_tasks = [
+    "EDIS", "MSCOCO_t2i","VisDial","VisualNews_t2i","WebQA", "Wiki-SS-NQ", # retrieval
+    ]
+i2t_tasks = [
+    "MSCOCO_i2t","VisualNews_i2t", # retrieval
+    "ImageNet-1K", "N24News", "HatefulMemes", "VOC2007", "SUN397", "Place365", "ImageNet-A", "ImageNet-R", "ObjectNet", "Country211" # classification
+    ]
+
+
+def get_pred_blip(qry_t, tgt_t, mode="multimodal2text"):
+
+    if mode == "multimodal2text":
+        # Compute the dot product between each token in qry_t (shape 32, dim) and tgt_t (shape candidate_num, dim)
+        # This results in a (32, candidate_num) array of scores
+        scores = np.dot(qry_t, tgt_t.T)  # (32, dim) dot (candidate_num, dim).T -> (32, candidate_num)
+
+        # Find the maximum score for each candidate across the 32 tokens
+        max_scores = np.max(scores, axis=0)  # Max along the 32 tokens for each candidate (shape candidate_num)
+
+        # The prediction is the index of the target with the highest maximum score
+        pred = np.argmax(max_scores)
+
+    elif mode == "text2multimodal":
+        # Compute the dot product between qry_t (shape dim) and each of the 32 tokens in the target (candidate_num, 32, dim)
+        # This results in a (candidate_num, 32) array of scores
+        scores = np.dot(tgt_t, qry_t)  # (candidate_num, 32, dim) dot (dim) -> (candidate_num, 32)
+
+        # Find the maximum score for each candidate across the 32 tokens
+        max_scores = np.max(scores, axis=1)  # Max along the 32 tokens for each candidate (shape candidate_num)
+
+        # The prediction is the index of the target with the highest maximum score
+        pred = np.argmax(max_scores)
+
+    return max_scores, pred
+
+
+def main():
+    parser = HfArgumentParser((ModelArguments, DataArguments, TrainingArguments))
+    model_args, data_args, training_args = parser.parse_args_into_dataclasses()
+    model_args: ModelArguments
+    data_args: DataArguments
+    training_args: TrainingArguments
+    model, vis_processors, txt_processors = load_model_and_preprocess(name=model_args.model_name, model_type=model_args.model_type, is_eval=True, device=training_args.device)
+    embedding_type = data_args.embedding_type
+    eval_collator = BLIP2Collator(
+        data_args=data_args,
+        vis_processors=vis_processors,
+        txt_processors=txt_processors
+    )
+
+    # ToDo: This part of code is a little bit hacky. Need to refactor later.
+    for idx, subset in enumerate(data_args.subset_name):
+        print(f"\033[91m{idx+1}/{len(data_args.subset_name)}: Processing {subset} now!\033[0m")
+        encode_qry_path = os.path.join(data_args.encode_output_path, f"{subset}_qry")
+        encode_tgt_path = os.path.join(data_args.encode_output_path, f"{subset}_tgt")
+        if os.path.exists(encode_qry_path) and os.path.exists(encode_tgt_path):
+            continue
+
+        eval_qry_dataset = EvalDataset(
+            data_args=data_args,
+            subset=subset,
+            text_field="qry_text",
+            img_path_field="qry_img_path",
+        )
+        eval_tgt_dataset = EvalDataset(
+            data_args=data_args,
+            subset=subset,
+            text_field="tgt_text",
+            img_path_field="tgt_img_path",
+        )
+
+        eval_qry_loader = DataLoader(
+            eval_qry_dataset,
+            batch_size=training_args.per_device_eval_batch_size,
+            collate_fn=eval_collator,
+            shuffle=False,
+            drop_last=False,
+            num_workers=training_args.dataloader_num_workers,
+        )
+        eval_tgt_loader = DataLoader(
+            eval_tgt_dataset,
+            batch_size=training_args.per_device_eval_batch_size,
+            collate_fn=eval_collator,
+            shuffle=False,
+            drop_last=False,
+            num_workers=training_args.dataloader_num_workers,
+        )
+
+        encoded_tensor = []
+        with torch.no_grad():
+            for batch in tqdm(eval_qry_loader, desc="Encode query"):
+                samples, modes = batch
+                for sample, mode in zip(samples, modes):
+                    image_features, text_features = None, None
+                    if sample["image"] is not None:
+                        sample["image"] = sample["image"].to(training_args.device)
+                        image_features = model.extract_features(sample, mode="image").image_embeds[0,0,:] # (dim,)
+                    if sample["text_input"]:
+                        text_features = model.extract_features(sample, mode="text").text_embeds[0,0,:] # (dim,)
+                    if embedding_type=="unimodal":
+                        if subset in t2i_tasks:
+                            features = text_features
+                        if subset in i2t_tasks:
+                            features = image_features
+                    elif embedding_type=="multimodal":
+                        if image_features is None:
+                            features = text_features
+                        elif text_features is None:
+                            features = image_features
+                        else:
+                            features = image_features + text_features
+                    encoded_tensor.append(features.cpu().detach().float().numpy())
+        with open(encode_qry_path, 'wb') as f:
+            pickle.dump((encoded_tensor, eval_qry_dataset.paired_data), f)
+
+        encoded_tensor = []
+        with torch.no_grad():
+            for batch in tqdm(eval_tgt_loader, desc="Encode target"):
+                samples, modes = batch
+                for sample, mode in zip(samples, modes):
+                    image_features, text_features = None, None
+                    if sample["image"] is not None:
+                        sample["image"] = sample["image"].to(training_args.device)
+                        image_features = model.extract_features(sample, mode="image").image_embeds[0,0,:] # (dim,)
+                    if sample["text_input"]:
+                        text_features = model.extract_features(sample, mode="text").text_embeds[0,0,:] # (dim,)
+                    if embedding_type=="unimodal":
+                        if subset in t2i_tasks:
+                            features = image_features
+                        if subset in i2t_tasks:
+                            features = text_features
+                    elif embedding_type=="multimodal":
+                        if image_features is None:
+                            features = text_features
+                        elif text_features is None:
+                            features = image_features
+                        else:
+                            features = image_features + text_features
+                    encoded_tensor.append(features.cpu().detach().float().numpy())
+        with open(encode_tgt_path, 'wb') as f:
+            pickle.dump((encoded_tensor, eval_tgt_dataset.paired_data), f)
+
+    results = {}
+    for subset in tqdm(data_args.subset_name, desc="calculate score"):
+        encode_qry_path = os.path.join(data_args.encode_output_path, f"{subset}_qry")
+        encode_tgt_path = os.path.join(data_args.encode_output_path, f"{subset}_tgt")
+        with open(encode_qry_path, 'rb') as f:
+            qry_tensor, qry_index = pickle.load(f)
+        with open(encode_tgt_path, 'rb') as f:
+            tgt_tensor, tgt_index = pickle.load(f)
+        qry_dict, tgt_dict = {}, {}
+        for qry_t, tt in zip(qry_tensor, qry_index):
+            text, img_path = tt["text"], tt["img_path"]
+            qry_dict[(text, img_path)] = qry_t
+        for tgt_t, tt in zip(tgt_tensor, tgt_index):
+            text, img_path = tt["text"], tt["img_path"]
+            tgt_dict[(text, img_path)] = tgt_t
+
+        eval_data = load_dataset(
+            data_args.dataset_name,
+            subset,
+            split=data_args.dataset_split,
+        )
+        acc = 0
+        all_pred = []
+        for row in eval_data:
+            qry_t = qry_dict[(row["qry_text"], row["qry_img_path"])]  # (dim,)
+            tgt_t, all_candidates = [], []
+            if row["tgt_text"] == "":
+                row["tgt_text"] = ["" for _ in range(len(row["tgt_img_path"]))]
+            for tt in zip(row["tgt_text"], row["tgt_img_path"]):
+                tgt_t.append(tgt_dict[tt])
+                all_candidates.append(tt)
+            try:
+                tgt_t = np.stack(tgt_t, axis=0)  # (num_candidate, dim)
+            except:
+                import ipdb; ipdb.set_trace()
+            scores, pred = get_pred(qry_t, tgt_t, normalization=model_args.normalize)
+            if pred == 0:
+                acc += 1
+            all_pred.append(all_candidates[pred])
+            with open(os.path.join(data_args.encode_output_path, f"{subset}_pred.txt"), "w") as f:
+                for item in all_pred:
+                    f.write(f"{item}\n")
+        accuracy = acc / len(eval_data) * 100
+        results[subset] = accuracy
+        print(f"\033[91m{subset} accuracy: {acc/len(eval_data)}\033[0m")
+    save_results(results, model_args, data_args, training_args)
+    print_results(results)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/VLM2Vec/evaluation/eval_clip.py b/VLM2Vec/evaluation/eval_clip.py
new file mode 100644
index 0000000000000000000000000000000000000000..2251a13754dc2a72b9ed682dee4034e4143f093c
--- /dev/null
+++ b/VLM2Vec/evaluation/eval_clip.py
@@ -0,0 +1,185 @@
+from src.arguments import ModelArguments, DataArguments, TrainingArguments
+from transformers import HfArgumentParser, AutoProcessor, AutoTokenizer, CLIPModel
+from src.dataset import EvalDataset
+from src.collator import CLIPCollator
+from torch.utils.data import DataLoader
+import torch
+from tqdm import tqdm
+import numpy as np
+import pickle
+import os
+from datasets import load_dataset
+from evaluation.eval_utils import get_pred, save_results, print_results
+
+t2i_tasks = [
+    "CIRR", "NIGHTS", "EDIS", "MSCOCO_t2i","VisDial","VisualNews_t2i","WebQA", "Wiki-SS-NQ", "OVEN", # retrieval
+    ]
+i2t_tasks = [
+    "MSCOCO_i2t","VisualNews_i2t", # retrieval
+    "ImageNet-1K", "N24News", "HatefulMemes", "VOC2007", "SUN397", "Place365", "ImageNet-A", "ImageNet-R", "ObjectNet", "Country211" # classification
+    ]
+
+
+def main():
+    parser = HfArgumentParser((ModelArguments, DataArguments, TrainingArguments))
+    model_args, data_args, training_args = parser.parse_args_into_dataclasses()
+    model_args: ModelArguments
+    data_args: DataArguments
+    training_args: TrainingArguments
+
+    model = CLIPModel.from_pretrained(model_args.model_name)
+    processor = AutoProcessor.from_pretrained(model_args.model_name)
+    tokenizer = AutoTokenizer.from_pretrained(model_args.model_name)
+
+    embedding_type = data_args.embedding_type
+    eval_collator = CLIPCollator(
+        data_args=data_args,
+        vis_processors=processor,
+        txt_processors=tokenizer
+    )
+    model.eval()
+    model = model.to(training_args.device)
+
+    # ToDo: This part of code is a little bit hacky. Need to refactor later.
+    for idx, subset in enumerate(data_args.subset_name):
+        print(f"\033[91m{idx+1}/{len(data_args.subset_name)}: Processing {subset} now!\033[0m")
+        encode_qry_path = os.path.join(data_args.encode_output_path, f"{subset}_qry")
+        encode_tgt_path = os.path.join(data_args.encode_output_path, f"{subset}_tgt")
+        if os.path.exists(encode_qry_path) and os.path.exists(encode_tgt_path):
+            continue
+
+        eval_qry_dataset = EvalDataset(
+            data_args=data_args,
+            subset=subset,
+            text_field="qry_text",
+            img_path_field="qry_img_path",
+        )
+        eval_tgt_dataset = EvalDataset(
+            data_args=data_args,
+            subset=subset,
+            text_field="tgt_text",
+            img_path_field="tgt_img_path",
+        )
+
+        eval_qry_loader = DataLoader(
+            eval_qry_dataset,
+            batch_size=training_args.per_device_eval_batch_size,
+            collate_fn=eval_collator,
+            shuffle=False,
+            drop_last=False,
+            num_workers=training_args.dataloader_num_workers,
+        )
+        eval_tgt_loader = DataLoader(
+            eval_tgt_dataset,
+            batch_size=training_args.per_device_eval_batch_size,
+            collate_fn=eval_collator,
+            shuffle=False,
+            drop_last=False,
+            num_workers=training_args.dataloader_num_workers,
+        )
+
+        encoded_tensor = []
+        with torch.no_grad():
+            for batch in tqdm(eval_qry_loader, desc="Encode query"):
+                batch = {key: value.to(training_args.device) for key, value in batch.items() if type(value) is not list}
+                image_features, text_features = None, None
+                if "pixel_values" in batch:
+                    image_features = model.get_image_features(batch["pixel_values"])
+                if "input_ids" in batch:
+                    text_features = model.get_text_features(batch["input_ids"], batch["attention_mask"])
+                if embedding_type=="unimodal":
+                    if subset in t2i_tasks:
+                        features = text_features
+                    if subset in i2t_tasks:
+                        features = image_features
+                elif embedding_type=="multimodal":
+                    if image_features is None:
+                        features = text_features
+                    elif text_features is None:
+                        features = image_features
+                    else:
+                        try:
+                            features = image_features + text_features
+                        except:
+                            import ipdb; ipdb.set_trace()
+                encoded_tensor.append(features.cpu().detach().float().numpy())
+        encoded_tensor = np.concatenate(encoded_tensor)
+        with open(encode_qry_path, 'wb') as f:
+            pickle.dump((encoded_tensor, eval_qry_dataset.paired_data), f)
+
+        encoded_tensor = []
+        with torch.no_grad():
+            for batch in tqdm(eval_tgt_loader, desc="Encode target"):
+                batch = {key: value.to(training_args.device) for key, value in batch.items() if type(value) is not list}
+                image_features, text_features = None, None
+                if "pixel_values" in batch:
+                    image_features = model.get_image_features(batch["pixel_values"])
+                if "input_ids" in batch:
+                    text_features = model.get_text_features(batch["input_ids"], batch["attention_mask"])
+                if embedding_type=="unimodal":
+                    if subset in t2i_tasks:
+                        features = image_features
+                    if subset in i2t_tasks:
+                        features = text_features
+                elif embedding_type=="multimodal":
+                    if image_features is None:
+                        features = text_features
+                    elif text_features is None:
+                        features = image_features
+                    else:
+                        features = image_features + text_features
+                encoded_tensor.append(features.cpu().detach().float().numpy())
+            encoded_tensor = np.concatenate(encoded_tensor)
+        with open(encode_tgt_path, 'wb') as f:
+            pickle.dump((encoded_tensor, eval_tgt_dataset.paired_data), f)
+    results = {}
+    for subset in tqdm(data_args.subset_name, desc="calculate score"):
+        encode_qry_path = os.path.join(data_args.encode_output_path, f"{subset}_qry")
+        encode_tgt_path = os.path.join(data_args.encode_output_path, f"{subset}_tgt")
+        with open(encode_qry_path, 'rb') as f:
+            qry_tensor, qry_index = pickle.load(f)
+        with open(encode_tgt_path, 'rb') as f:
+            tgt_tensor, tgt_index = pickle.load(f)
+        qry_dict, tgt_dict = {}, {}
+        for qry_t, tt in zip(qry_tensor, qry_index):
+            text, img_path = tt["text"], tt["img_path"]
+            qry_dict[(text, img_path)] = qry_t
+        for tgt_t, tt in zip(tgt_tensor, tgt_index):
+            text, img_path = tt["text"], tt["img_path"]
+            tgt_dict[(text, img_path)] = tgt_t
+
+        eval_data = load_dataset(
+            data_args.dataset_name,
+            subset,
+            split=data_args.dataset_split,
+        )
+        acc = 0
+        all_pred = []
+        for row in eval_data:
+            qry_t = qry_dict[(row["qry_text"], row["qry_img_path"])]  # (dim,)
+            tgt_t, all_candidates = [], []
+            if row["tgt_text"] == "":
+                row["tgt_text"] = ["" for _ in range(len(row["tgt_img_path"]))]
+            for tt in zip(row["tgt_text"], row["tgt_img_path"]):
+                tgt_t.append(tgt_dict[tt])
+                all_candidates.append(tt)
+            try:
+                tgt_t = np.stack(tgt_t, axis=0)  # (num_candidate, dim)
+            except:
+                import ipdb; ipdb.set_trace()
+            scores, pred = get_pred(qry_t, tgt_t, normalization=model_args.normalize)
+            if pred == 0:
+                acc += 1
+            all_pred.append(all_candidates[pred])
+            with open(os.path.join(data_args.encode_output_path, f"{subset}_pred.txt"), "w") as f:
+                for item in all_pred:
+                    f.write(f"{item}\n")
+        accuracy = acc / len(eval_data) * 100
+        results[subset] = accuracy
+        print(f"\033[91m{subset} accuracy: {acc/len(eval_data)}\033[0m")
+    save_results(results, model_args, data_args, training_args)
+    print_results(results)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/VLM2Vec/evaluation/eval_openclip.py b/VLM2Vec/evaluation/eval_openclip.py
new file mode 100644
index 0000000000000000000000000000000000000000..34ab52d78f84ba4adc1e684146d586f2e82a2597
--- /dev/null
+++ b/VLM2Vec/evaluation/eval_openclip.py
@@ -0,0 +1,185 @@
+import open_clip
+from src.arguments import ModelArguments, DataArguments, TrainingArguments
+from transformers import HfArgumentParser, AutoProcessor, AutoTokenizer, CLIPModel
+from src.dataset import EvalDataset
+from src.collator import EvalCollator, BLIP2Collator, CLIPCollator, OpenCLIPCollator
+from torch.utils.data import DataLoader
+import torch
+from tqdm import tqdm
+import numpy as np
+import pickle
+import os
+from datasets import load_dataset
+from evaluation.eval_utils import get_pred, save_results, print_results
+
+t2i_tasks = [
+    "CIRR", "NIGHTS", "EDIS", "MSCOCO_t2i","VisDial","VisualNews_t2i","WebQA", "Wiki-SS-NQ", "OVEN", # retrieval
+    ]
+i2t_tasks = [
+    "MSCOCO_i2t","VisualNews_i2t", # retrieval
+    "ImageNet-1K", "N24News", "HatefulMemes", "VOC2007", "SUN397", "Place365", "ImageNet-A", "ImageNet-R", "ObjectNet", "Country211" # classification
+    ]
+
+
+def main():
+    parser = HfArgumentParser((ModelArguments, DataArguments, TrainingArguments))
+    model_args, data_args, training_args = parser.parse_args_into_dataclasses()
+    model_args: ModelArguments
+    data_args: DataArguments
+    training_args: TrainingArguments
+
+    model, processor = open_clip.create_model_from_pretrained('hf-hub:laion/CLIP-ViT-B-16-laion2B-s34B-b88K')
+    tokenizer = open_clip.get_tokenizer('hf-hub:laion/CLIP-ViT-B-16-laion2B-s34B-b88K')
+
+    embedding_type = data_args.embedding_type
+    eval_collator = OpenCLIPCollator(
+        data_args=data_args,
+        vis_processors=processor,
+        txt_processors=tokenizer
+    )
+    model.eval()
+    model = model.to(training_args.device)
+
+    # ToDo: This part of code is a little bit hacky. Need to refactor later.
+    for idx, subset in enumerate(data_args.subset_name):
+        print(f"\033[91m{idx+1}/{len(data_args.subset_name)}: Processing {subset} now!\033[0m")
+        encode_qry_path = os.path.join(data_args.encode_output_path, f"{subset}_qry")
+        encode_tgt_path = os.path.join(data_args.encode_output_path, f"{subset}_tgt")
+        if os.path.exists(encode_qry_path) and os.path.exists(encode_tgt_path):
+            continue
+
+        eval_qry_dataset = EvalDataset(
+            data_args=data_args,
+            subset=subset,
+            text_field="qry_text",
+            img_path_field="qry_img_path",
+        )
+        eval_tgt_dataset = EvalDataset(
+            data_args=data_args,
+            subset=subset,
+            text_field="tgt_text",
+            img_path_field="tgt_img_path",
+        )
+
+        eval_qry_loader = DataLoader(
+            eval_qry_dataset,
+            batch_size=training_args.per_device_eval_batch_size,
+            collate_fn=eval_collator,
+            shuffle=False,
+            drop_last=False,
+            num_workers=training_args.dataloader_num_workers,
+        )
+        eval_tgt_loader = DataLoader(
+            eval_tgt_dataset,
+            batch_size=training_args.per_device_eval_batch_size,
+            collate_fn=eval_collator,
+            shuffle=False,
+            drop_last=False,
+            num_workers=training_args.dataloader_num_workers,
+        )
+
+        encoded_tensor = []
+        with torch.no_grad():
+            for batch in tqdm(eval_qry_loader, desc="Encode query"):
+                batch = {key: value.to(training_args.device) for key, value in batch.items() if type(value) is not list}
+                image_features, text_features = None, None
+                if "pixel_values" in batch:
+                    image_features = model.encode_image(batch["pixel_values"])
+                if "input_ids" in batch:
+                    text_features = model.encode_text(batch["input_ids"])
+                if embedding_type=="unimodal":
+                    if subset in t2i_tasks:
+                        features = text_features
+                    if subset in i2t_tasks:
+                        features = image_features
+                elif embedding_type=="multimodal":
+                    if image_features is None:
+                        features = text_features
+                    elif text_features is None:
+                        features = image_features
+                    else:
+                        try:
+                            features = image_features + text_features
+                        except:
+                            import ipdb; ipdb.set_trace()
+                encoded_tensor.append(features.cpu().detach().float().numpy())
+        encoded_tensor = np.concatenate(encoded_tensor)
+        with open(encode_qry_path, 'wb') as f:
+            pickle.dump((encoded_tensor, eval_qry_dataset.paired_data), f)
+
+        encoded_tensor = []
+        with torch.no_grad():
+            for batch in tqdm(eval_tgt_loader, desc="Encode target"):
+                batch = {key: value.to(training_args.device) for key, value in batch.items() if type(value) is not list}
+                image_features, text_features = None, None
+                if "pixel_values" in batch:
+                    image_features = model.encode_image(batch["pixel_values"])
+                if "input_ids" in batch:
+                    text_features = model.encode_text(batch["input_ids"])
+                if embedding_type=="unimodal":
+                    if subset in t2i_tasks:
+                        features = image_features
+                    if subset in i2t_tasks:
+                        features = text_features
+                elif embedding_type=="multimodal":
+                    if image_features is None:
+                        features = text_features
+                    elif text_features is None:
+                        features = image_features
+                    else:
+                        features = image_features + text_features
+                encoded_tensor.append(features.cpu().detach().float().numpy())
+            encoded_tensor = np.concatenate(encoded_tensor)
+        with open(encode_tgt_path, 'wb') as f:
+            pickle.dump((encoded_tensor, eval_tgt_dataset.paired_data), f)
+    results = {}
+    for subset in tqdm(data_args.subset_name, desc="calculate score"):
+        encode_qry_path = os.path.join(data_args.encode_output_path, f"{subset}_qry")
+        encode_tgt_path = os.path.join(data_args.encode_output_path, f"{subset}_tgt")
+        with open(encode_qry_path, 'rb') as f:
+            qry_tensor, qry_index = pickle.load(f)
+        with open(encode_tgt_path, 'rb') as f:
+            tgt_tensor, tgt_index = pickle.load(f)
+        qry_dict, tgt_dict = {}, {}
+        for qry_t, tt in zip(qry_tensor, qry_index):
+            text, img_path = tt["text"], tt["img_path"]
+            qry_dict[(text, img_path)] = qry_t
+        for tgt_t, tt in zip(tgt_tensor, tgt_index):
+            text, img_path = tt["text"], tt["img_path"]
+            tgt_dict[(text, img_path)] = tgt_t
+
+        eval_data = load_dataset(
+            data_args.dataset_name,
+            subset,
+            split=data_args.dataset_split,
+        )
+        acc = 0
+        all_pred = []
+        for row in eval_data:
+            qry_t = qry_dict[(row["qry_text"], row["qry_img_path"])]  # (dim,)
+            tgt_t, all_candidates = [], []
+            if row["tgt_text"] == "":
+                row["tgt_text"] = ["" for _ in range(len(row["tgt_img_path"]))]
+            for tt in zip(row["tgt_text"], row["tgt_img_path"]):
+                tgt_t.append(tgt_dict[tt])
+                all_candidates.append(tt)
+            try:
+                tgt_t = np.stack(tgt_t, axis=0)  # (num_candidate, dim)
+            except:
+                import ipdb; ipdb.set_trace()
+            scores, pred = get_pred(qry_t, tgt_t, normalization=model_args.normalize)
+            if pred == 0:
+                acc += 1
+            all_pred.append(all_candidates[pred])
+            with open(os.path.join(data_args.encode_output_path, f"{subset}_pred.txt"), "w") as f:
+                for item in all_pred:
+                    f.write(f"{item}\n")
+        accuracy = acc / len(eval_data) * 100
+        results[subset] = accuracy
+        print(f"\033[91m{subset} accuracy: {acc/len(eval_data)}\033[0m")
+    save_results(results, model_args, data_args, training_args)
+    print_results(results)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/VLM2Vec/evaluation/eval_siglip.py b/VLM2Vec/evaluation/eval_siglip.py
new file mode 100644
index 0000000000000000000000000000000000000000..49c1687426e29f790258945ab366d493ae39a1e8
--- /dev/null
+++ b/VLM2Vec/evaluation/eval_siglip.py
@@ -0,0 +1,186 @@
+from src.arguments import ModelArguments, DataArguments, TrainingArguments
+from transformers import HfArgumentParser, AutoProcessor, AutoTokenizer, CLIPModel, AutoModel
+from src.dataset import EvalDataset
+from src.collator import EvalCollator, BLIP2Collator, CLIPCollator
+from torch.utils.data import DataLoader
+import torch
+from tqdm import tqdm
+import numpy as np
+import pickle
+import os
+from datasets import load_dataset
+from evaluation.eval_utils import get_pred, save_results, print_results
+
+t2i_tasks = [
+    "CIRR", "NIGHTS", "EDIS", "MSCOCO_t2i","VisDial","VisualNews_t2i","WebQA", "Wiki-SS-NQ", "OVEN", # retrieval
+    ]
+i2t_tasks = [
+    "MSCOCO_i2t","VisualNews_i2t", # retrieval
+    "ImageNet-1K", "N24News", "HatefulMemes", "VOC2007", "SUN397", "Place365", "ImageNet-A", "ImageNet-R", "ObjectNet", "Country211" # classification
+    ]
+
+
+def main():
+    parser = HfArgumentParser((ModelArguments, DataArguments, TrainingArguments))
+    model_args, data_args, training_args = parser.parse_args_into_dataclasses()
+    model_args: ModelArguments
+    data_args: DataArguments
+    training_args: TrainingArguments
+
+    model = AutoModel.from_pretrained("google/siglip-so400m-patch14-384")
+    all_processor = AutoProcessor.from_pretrained("google/siglip-so400m-patch14-384")
+    processor = all_processor.image_processor
+    tokenizer = all_processor.tokenizer
+
+    embedding_type = data_args.embedding_type
+    eval_collator = CLIPCollator(
+        data_args=data_args,
+        vis_processors=processor,
+        txt_processors=tokenizer
+    )
+    model.eval()
+    model = model.to(training_args.device)
+
+    # ToDo: This part of code is a little bit hacky. Need to refactor later.
+    for idx, subset in enumerate(data_args.subset_name):
+        print(f"\033[91m{idx+1}/{len(data_args.subset_name)}: Processing {subset} now!\033[0m")
+        encode_qry_path = os.path.join(data_args.encode_output_path, f"{subset}_qry")
+        encode_tgt_path = os.path.join(data_args.encode_output_path, f"{subset}_tgt")
+        if os.path.exists(encode_qry_path) and os.path.exists(encode_tgt_path):
+            continue
+
+        eval_qry_dataset = EvalDataset(
+            data_args=data_args,
+            subset=subset,
+            text_field="qry_text",
+            img_path_field="qry_img_path",
+        )
+        eval_tgt_dataset = EvalDataset(
+            data_args=data_args,
+            subset=subset,
+            text_field="tgt_text",
+            img_path_field="tgt_img_path",
+        )
+
+        eval_qry_loader = DataLoader(
+            eval_qry_dataset,
+            batch_size=training_args.per_device_eval_batch_size,
+            collate_fn=eval_collator,
+            shuffle=False,
+            drop_last=False,
+            num_workers=training_args.dataloader_num_workers,
+        )
+        eval_tgt_loader = DataLoader(
+            eval_tgt_dataset,
+            batch_size=training_args.per_device_eval_batch_size,
+            collate_fn=eval_collator,
+            shuffle=False,
+            drop_last=False,
+            num_workers=training_args.dataloader_num_workers,
+        )
+
+        encoded_tensor = []
+        with torch.no_grad():
+            for batch in tqdm(eval_qry_loader, desc="Encode query"):
+                batch = {key: value.to(training_args.device) for key, value in batch.items() if type(value) is not list}
+                image_features, text_features = None, None
+                if "pixel_values" in batch:
+                    image_features = model.get_image_features(batch["pixel_values"])
+                if "input_ids" in batch:
+                    text_features = model.get_text_features(batch["input_ids"], batch["attention_mask"])
+                if embedding_type=="unimodal":
+                    if subset in t2i_tasks:
+                        features = text_features
+                    if subset in i2t_tasks:
+                        features = image_features
+                elif embedding_type=="multimodal":
+                    if image_features is None:
+                        features = text_features
+                    elif text_features is None:
+                        features = image_features
+                    else:
+                        try:
+                            features = image_features + text_features
+                        except:
+                            import ipdb; ipdb.set_trace()
+                encoded_tensor.append(features.cpu().detach().float().numpy())
+        encoded_tensor = np.concatenate(encoded_tensor)
+        with open(encode_qry_path, 'wb') as f:
+            pickle.dump((encoded_tensor, eval_qry_dataset.paired_data), f)
+
+        encoded_tensor = []
+        with torch.no_grad():
+            for batch in tqdm(eval_tgt_loader, desc="Encode target"):
+                batch = {key: value.to(training_args.device) for key, value in batch.items() if type(value) is not list}
+                image_features, text_features = None, None
+                if "pixel_values" in batch:
+                    image_features = model.get_image_features(batch["pixel_values"])
+                if "input_ids" in batch:
+                    text_features = model.get_text_features(batch["input_ids"], batch["attention_mask"])
+                if embedding_type=="unimodal":
+                    if subset in t2i_tasks:
+                        features = image_features
+                    if subset in i2t_tasks:
+                        features = text_features
+                elif embedding_type=="multimodal":
+                    if image_features is None:
+                        features = text_features
+                    elif text_features is None:
+                        features = image_features
+                    else:
+                        features = image_features + text_features
+                encoded_tensor.append(features.cpu().detach().float().numpy())
+            encoded_tensor = np.concatenate(encoded_tensor)
+        with open(encode_tgt_path, 'wb') as f:
+            pickle.dump((encoded_tensor, eval_tgt_dataset.paired_data), f)
+    results = {}
+    for subset in tqdm(data_args.subset_name, desc="calculate score"):
+        encode_qry_path = os.path.join(data_args.encode_output_path, f"{subset}_qry")
+        encode_tgt_path = os.path.join(data_args.encode_output_path, f"{subset}_tgt")
+        with open(encode_qry_path, 'rb') as f:
+            qry_tensor, qry_index = pickle.load(f)
+        with open(encode_tgt_path, 'rb') as f:
+            tgt_tensor, tgt_index = pickle.load(f)
+        qry_dict, tgt_dict = {}, {}
+        for qry_t, tt in zip(qry_tensor, qry_index):
+            text, img_path = tt["text"], tt["img_path"]
+            qry_dict[(text, img_path)] = qry_t
+        for tgt_t, tt in zip(tgt_tensor, tgt_index):
+            text, img_path = tt["text"], tt["img_path"]
+            tgt_dict[(text, img_path)] = tgt_t
+
+        eval_data = load_dataset(
+            data_args.dataset_name,
+            subset,
+            split=data_args.dataset_split,
+        )
+        acc = 0
+        all_pred = []
+        for row in eval_data:
+            qry_t = qry_dict[(row["qry_text"], row["qry_img_path"])]  # (dim,)
+            tgt_t, all_candidates = [], []
+            if row["tgt_text"] == "":
+                row["tgt_text"] = ["" for _ in range(len(row["tgt_img_path"]))]
+            for tt in zip(row["tgt_text"], row["tgt_img_path"]):
+                tgt_t.append(tgt_dict[tt])
+                all_candidates.append(tt)
+            try:
+                tgt_t = np.stack(tgt_t, axis=0)  # (num_candidate, dim)
+            except:
+                import ipdb; ipdb.set_trace()
+            scores, pred = get_pred(qry_t, tgt_t, normalization=model_args.normalize)
+            if pred == 0:
+                acc += 1
+            all_pred.append(all_candidates[pred])
+            with open(os.path.join(data_args.encode_output_path, f"{subset}_pred.txt"), "w") as f:
+                for item in all_pred:
+                    f.write(f"{item}\n")
+        accuracy = acc / len(eval_data) * 100
+        results[subset] = accuracy
+        print(f"\033[91m{subset} accuracy: {acc/len(eval_data)}\033[0m")
+    save_results(results, model_args, data_args, training_args)
+    print_results(results)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/VLM2Vec/src/dist_utils.py b/VLM2Vec/src/dist_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..be6655edff2680523f91bc581fa67ce334ece96d
--- /dev/null
+++ b/VLM2Vec/src/dist_utils.py
@@ -0,0 +1,92 @@
+# Code adapted from SimCSE (https://github.com/princeton-nlp/SimCSE) governed by MIT license.
+
+# Copyright (c) 2023, Salesforce, Inc.
+# All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+
+import torch
+import torch.distributed as dist
+
+class GatherLayer(torch.autograd.Function):
+    """
+    Gather tensors from all process, supporting backward propagation.
+    https://github.com/Spijkervet/SimCLR/blob/master/simclr/modules/gather.py
+    """
+    @staticmethod
+    def forward(ctx, input):
+        ctx.save_for_backward(input)
+        output = [torch.zeros_like(input) for _ in range(dist.get_world_size())]
+        dist.all_gather(output, input)
+        return tuple(output)
+
+    @staticmethod
+    def backward(ctx, *grads):
+        (input,) = ctx.saved_tensors
+        grad_out = torch.zeros_like(input)
+        grad_out[:] = grads[dist.get_rank()]
+        return grad_out
+
+
+def dist_gather(x: torch.tensor):
+    if not dist.is_initialized():  return x
+    if len(x.shape) == 0:
+        x = x.reshape(1)
+    x_gather = GatherLayer.apply(x)
+    x_gather = torch.cat(x_gather, dim=0)
+    return x_gather
+
+
+@torch.no_grad()
+def dist_gather_nograd(x: torch.tensor):
+    if not dist.is_initialized():  return x
+    x_gather = [torch.ones_like(x) for _ in range(get_world_size())]
+    dist.all_gather(x_gather, x, async_op=False)
+    x_gather = torch.cat(x_gather, dim=0)
+    return x_gather
+
+
+def get_rank():
+    if not dist.is_available():
+        return 0
+    if not dist.is_initialized():
+        return 0
+    return dist.get_rank()
+
+
+def is_main():
+    return get_rank() == 0
+
+
+def get_world_size():
+    if not dist.is_initialized():
+        return 1
+    else:
+        return dist.get_world_size()
+
+def barrier():
+    if dist.is_initialized():
+        dist.barrier()
+
+
+@torch.no_grad()
+def varsize_gather_nograd(x: torch.Tensor):
+    """gather tensors of different sizes along the first dimension"""
+    if not dist.is_initialized():
+        return x
+
+    # determine max size
+    size = torch.tensor([x.shape[0]], device=x.device, dtype=torch.int)
+    allsizes = [torch.zeros_like(size) for _ in range(dist.get_world_size())]
+    dist.all_gather(allsizes, size)
+    max_size = max([size.cpu().max() for size in allsizes])
+
+    padded = torch.empty(max_size, *x.shape[1:], dtype=x.dtype, device=x.device)
+    padded[: x.shape[0]] = x
+    output = [torch.zeros_like(padded) for _ in range(dist.get_world_size())]
+    dist.all_gather(output, padded)
+
+    output = [tensor[: allsizes[k]] for k, tensor in enumerate(output)]
+    output = torch.cat(output, dim=0)
+
+    return output
diff --git a/VLMEvalKit_old/PaddleMIX/deploy/README.md b/VLMEvalKit_old/PaddleMIX/deploy/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ba0a3ce4475827b5475e4384fb9d1e1364575d72
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/deploy/README.md
@@ -0,0 +1,110 @@
+# PaddleMIX推理部署
+
+[[English](README_en.md)]
+
+PaddleMIX基于Paddle Inference，提供了python的部署方案。部署方式分为两种：
+- 通过 **APPflow** ,设置static_mode = True 变量开启静态图推理，同时可配合trt加速推理；该方式部分模型不支持静态图以及trt，具体模型可参考[跨模态多场景应用](../applications/README.md/#跨模态多场景应用)；
+
+- 单模型部署
+
+
+## 1.APPflow部署
+
+在使用 PaddleMIX 一键预测 **APPflow** 时，可通过设置 static_mode = True 变量开启静态图推理，同时可配合trt加速推理。
+
+### 1.1 示例
+
+```python
+>>> from paddlemix.appflow import Appflow
+>>> from PIL import Image
+
+>>> task = Appflow(app="openset_det_sam",
+                   models=["GroundingDino/groundingdino-swint-ogc","Sam/SamVitH-1024"],
+                   static_mode=True,
+                   precision="fp32")
+>>> image_pil = Image.open("beauty.png").convert("RGB")
+>>> result = task(image=image_pil,prompt="women")
+```
+
+### 1.2 参数说明
+| 参数 | 是否必须| 含义                                                                                          |
+|-------|-------|---------------------------------------------------------------------------------------------|
+| --app | Yes| 应用名称                                                                                   |
+| --models | Yes | 需要使用的模型，可以是单个模型，也可以多个组合                                                                                     |
+| --static_mode  | Option | 是否静态图推理，默认False                                                                                 |
+| --precision | Option | 当 static_mode == True 时使用，默认fp32,可选择trt_fp32、trt_fp16                                                                                    |
+
+说明：
+- 部分模型不支持静态图以及trt，具体可参考[跨模态多场景应用](../applications/README.md)
+- 生成的静态图将在模型名字对应的文件夹下 如:GroundingDino/groundingdino-swint-ogc/
+
+
+## 2. 单模型预测部署
+
+Python端预测部署主要包含两个步骤：
+- 导出预测模型
+- 基于Python进行预测
+
+当前支持模型：
+- [blip2](./blip2/README.md)
+- [groundingdino](./groundingdino/README.md)
+- [sam](./sam/README.md)
+- [qwen_vl](./qwen_vl/README.md)
+
+以 groundingdino 为例子。
+
+### 2.1 导出预测模型
+
+```bash
+cd deploy/groundingdino
+# 导出groundingdino模型
+python export.py \
+--dino_type GroundingDino/groundingdino-swint-ogc
+```
+导出后目录下，包括 `model_state.pdiparams`,  `model_state.pdiparams.info`, `model_state.pdmodel`等文件。
+
+### 2.2 基于python的预测
+
+```bash
+ python predict.py  \
+ --text_encoder_type GroundingDino/groundingdino-swint-ogc \
+ --model_path output_groundingdino/GroundingDino/groundingdino-swint-ogc \
+ --input_image https://bj.bcebos.com/v1/paddlenlp/models/community/GroundingDino/000000004505.jpg \
+ --output_dir ./groundingdino_predict_output \
+ --prompt "bus"
+
+```
+
+## 3. 推理 BenchMark
+
+> Note: 
+> 测试环境为:
+Paddle 3.0，
+PaddleMIX release/2.0 
+PaddleNLP2.7.2
+A100 80G单卡。
+
+### 3.1 benchmark命令
+
+在 `deploy` 对应模型目录下的运行后加 --benchmark,
+如 GroundingDino 的benchmark命令为：
+
+```bash
+ cd deploy/groundingdino
+ python predict.py  \
+ --text_encoder_type GroundingDino/groundingdino-swint-ogc \
+ --model_path output_groundingdino/GroundingDino/groundingdino-swint-ogc \
+ --input_image https://bj.bcebos.com/v1/paddlenlp/models/community/GroundingDino/000000004505.jpg \
+ --output_dir ./groundingdino_predict_output \
+ --prompt "bus" \
+ --benchmark True
+```
+
+# A100性能数据
+|模型|图片分辨率|数据类型 |Paddle Deploy |
+|-|-|-|-|
+|qwen-vl-7b|448*448|fp16|669.8 ms|
+|llava-1.5-7b|336*336|fp16|981.2 ms|
+|llava-1.6-7b|336*336|fp16|778.7 ms|
+|groundingDino/groundingdino-swint-ogc|800*1193|fp32|100 ms|
+|Sam/SamVitH-1024|1024*1024|fp32|121 ms|
diff --git a/VLMEvalKit_old/PaddleMIX/deploy/README_en.md b/VLMEvalKit_old/PaddleMIX/deploy/README_en.md
new file mode 100644
index 0000000000000000000000000000000000000000..9614e9d1fde3d08969de93bbd7773b2f933ead80
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/deploy/README_en.md
@@ -0,0 +1,108 @@
+# PaddleMIX Inference Deployment
+
+[[中文文档](README.md)]
+
+PaddleMIX utilizes Paddle Inference and provides a Python-based deployment solution. There are two deployment methods:
+
+1. **APPflow Deployment**: 
+   - By setting the `static_mode = True` variable in APPflow, you can enable static graph inference. Additionally, you can accelerate inference using TensorRT. Note that not all models support static graph or TensorRT. Please refer to the [Multi Modal And Scenario](../applications/README_en.md/#multi-modal-and-scenario) section for specific model support.
+
+2. **Single Model Deployment**: 
+
+For APPflow usage, you can set the `static_mode = True` variable to enable static graph inference and optionally accelerate inference using TensorRT.
+
+### 1.1 Exmaples
+
+```python
+>>> from paddlemix.appflow import Appflow
+>>> from PIL import Image
+
+>>> task = Appflow(app="openset_det_sam",
+                   models=["GroundingDino/groundingdino-swint-ogc","Sam/SamVitH-1024"],
+                   static_mode=True,
+                   precision="fp32")
+>>> image_pil = Image.open("beauty.png").convert("RGB")
+>>> result = task(image=image_pil,prompt="women")
+```
+
+### 1.2 Parameter Explanation
+| Parameter | Required? | Meaning                                                                                          |
+|-------|-------|---------------------------------------------------------------------------------------------|
+| --app | Yes| Application name                                                                                   |
+| --models | Yes | Model(s) used. Can be one model, or multiple models                                                                                    |
+| --static_mode  | Optional | Whether to use static graph inference, default to False                                                                                 |
+| --precision | Optional | When `static_mode == True`, it defaults to using FP32. You can optionally select `trt_fp32` or `trt_fp16`.                                                                                   |
+
+Instructions：
+- Some models do not support static graph or TensorRT. For specific information, please refer to [Multi Modal And Scenario](../applications/README_en.md/#multi-modal-and-scenario).
+
+- The generated static graph will be located in the folder corresponding to the model name, for example: `GroundingDino/groundingdino-swint-ogc/`.
+
+## 2. Single Model Prediction Deployment
+
+Python-based prediction deployment mainly involves two steps:
+- Exporting the predictive model
+- Performing prediction using Python
+
+Currently supported models:
+- [blip2](./blip2/README.md)
+- [groundingdino](./groundingdino/README.md)
+- [sam](./sam/README.md)
+- [qwen_vl](./qwen_vl/README.md)
+
+Using groundingdino as an exmaple.
+
+### 2.1 Exporting Predictive Model
+
+```bash
+cd deploy/groundingdino
+# 导出groundingdino模型
+python export.py \
+--dino_type GroundingDino/groundingdino-swint-ogc
+```
+Will be exported to the following directory, including `model_state.pdiparams`,  `model_state.pdiparams.info`, `model_state.pdmodel`and other files.
+
+### 2.2 Python-based Inference
+
+```bash
+ python predict.py  \
+ --text_encoder_type GroundingDino/groundingdino-swint-ogc \
+ --model_path output_groundingdino/GroundingDino/groundingdino-swint-ogc \
+ --input_image https://bj.bcebos.com/v1/paddlenlp/models/community/GroundingDino/000000004505.jpg \
+ --output_dir ./groundingdino_predict_output \
+ --prompt "bus"
+
+```
+
+## 3. BenchMark
+
+> Note: 
+> environment
+Paddle 3.0
+PaddleMIX release/2.0 
+PaddleNLP 2.7.2
+A100 80G。
+
+### 3.1 benchmark cmd
+
+Add -- benchmark after running in the 'deploy' corresponding model directory to obtain the running time of the model.
+example: GroundingDino benchmark：
+
+```bash
+ cd deploy/groundingdino
+ python predict.py  \
+ --text_encoder_type GroundingDino/groundingdino-swint-ogc \
+ --model_path output_groundingdino/GroundingDino/groundingdino-swint-ogc \
+ --input_image https://bj.bcebos.com/v1/paddlenlp/models/community/GroundingDino/000000004505.jpg \
+ --output_dir ./groundingdino_predict_output \
+ --prompt "bus" \
+ --benchmark True
+```
+
+|Model|image size|dtype |Paddle Deploy |
+|-|-|-|-|
+|qwen-vl-7b|448*448|fp16|669.8 ms|
+|llava-1.5-7b|336*336|fp16|981.2 ms|
+|llava-1.6-7b|336*336|fp16|778.7 ms|
+|groundingDino/groundingdino-swint-ogc|800*1193|fp32|100 ms|
+|Sam/SamVitH-1024|1024*1024|fp32|121 ms|
\ No newline at end of file
diff --git a/VLMEvalKit_old/PaddleMIX/docs/CHANGELOG.md b/VLMEvalKit_old/PaddleMIX/docs/CHANGELOG.md
new file mode 100644
index 0000000000000000000000000000000000000000..f1ead4f987a5f327353398eb627c4d199af52054
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/docs/CHANGELOG.md
@@ -0,0 +1,44 @@
+# 版本更新信息
+
+## 最新版本信息
+
+### 2.0(07/26/2024)
+
+#### 多模态理解
+
+1. 新增模型：LLaVA: v1.5-7b, v1.5-13b, v1,6-7b，CogAgent, CogVLM, Qwen-VL, InternLM-XComposer2
+2. 数据集增强：新增chatml_dataset图文对话数据读取方案，可自定义chat_template文件适配，支持混合数据集
+3. 工具链升级：新增Auto模块，统一SFT训练流程，兼容全参数、lora训练。新增mixtoken训练策略，SFT吞吐量提升5.6倍。支持Qwen-VL，LLaVA推理部署，较torch推理性能提升2.38倍
+
+#### 多模态生成
+
+1. 视频生成能力：支持Sora相关技术，支持DiT、SiT、UViT训练推理，新增NaViT、MAGVIT-v2模型； 新增视频生成模型SVD、Open Sora，支持模型微调和推理； 新增姿态可控视频生成模型AnimateAnyone、即插即用视频生成模型AnimateDiff、GIF视频生成模型Hotshot-XL；
+2. 文生图模型库：新增高速推理文图生成模型LCM，适配SD/SDXL训练和推理；
+3. 工具链升级：发布ppdiffusers 0.24.1版本，新增peft，accelerate后端； 权重加载/保存全面升级，支持分布式、模型切片、safetensors等场景。
+4. 生态兼容：提供基于ppdiffusers开发的ComfyUI插件，支持了常见的模型加载转换、文生图、图生图、图像局部修改等任务。新增Stable Diffusion 1.5系列节点；新增Stable Diffusion XL系列节点。新增4个图像生成的workflow案例。
+
+#### DataCopilot（多模态数据处理工具箱）
+
+1. 多模态数据集类型MMDataset，支持加载和导出Json、H5、Jsonl等多种数据存储格式，内置并发（map, filter）数据处理接口等
+2. 多模态数据格式工具，支持自定义数据结构，数据转换，离线格式检查
+3. 多模态数据分析工具，支持基本的统计信息，数据可视化功能，以及注册自定义功能
+
+### 1.0(11/15/2023)
+
+#### 核心能力
+
+1. 大规模预训练: BLIP-2支持数据并行、sharding、模型并行，流水线并行训练；支持千亿参数规模训练; EVA-CLIP支持数据并行、sharding、模型并行训练; Stable Diffusion支持数据并行、sharding、BF16 O2训练; CLIP，Coca支持数据并行训练
+2. 有监督精调: Stable Diffusion，SDXL 支持LoRA精调
+3. 推理部署: 支持BLIP-2，miniGPT-4，Grounding DINO, SAM，Stable Diffusion动转静导出部署
+
+#### 前沿模型
+1. 新增CLIP系列跨模态大模型：CLIP，EVA-CLIP，Coca
+2. 新增图生文跨模态大模型：BLIP-2，miniGPT-4，VisualGLM
+3. 新增跨模态视觉模型：Grounding DINO， SAM
+4. 新增融合更多模态大模型：ImageBind
+5. 新增文生图模型：SDXL，支持Text2Image、Img2Img、Inpainting、InstructPix2Pix等任务，支持DreamBooth Lora训练； 新增UniDiffuser，通过统一的多模态扩散过程支持文生图、图生文等任务； 新增文本条件视频生成模型LVDM，支持训练与推理； 新增文图生成模型Kandinsky 2.2，Consistency models； Controlnet升级，支持ControlNetImg2Img、ControlNetInpaint、 StableDiffusionXLControlNet等。
+
+#### 特色应用
+1. 新增跨模态大模型应用流水线AppFlow
+2. 新增基于chat的图像编辑应用
+3. 新增自动标注应用
diff --git a/VLMEvalKit_old/PaddleMIX/docs/FAQ.md b/VLMEvalKit_old/PaddleMIX/docs/FAQ.md
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/VLMEvalKit_old/PaddleMIX/docs/train_tutorial.md b/VLMEvalKit_old/PaddleMIX/docs/train_tutorial.md
new file mode 100644
index 0000000000000000000000000000000000000000..baac7761a64a29334f02b255f971c23710fd3782
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/docs/train_tutorial.md
@@ -0,0 +1,10 @@
+# Train Tutorial
+
+
+## 训练微调示例
+- [Blip2](../paddlemix/examples/blip2/README.md)
+- [clip](../paddlemix/examples/clip/README.md)
+- [coca](../paddlemix/examples/coca/README.md)
+- [eva02](../paddlemix/examples/eva02/README.md)
+- [evaclip](../paddlemix/examples/evaclip/README.md)
+- [Stable Diffusion](../ppdiffusers/examples/text_to_image/README.md)
diff --git a/VLMEvalKit_old/PaddleMIX/ppdiffusers/LICENSE b/VLMEvalKit_old/PaddleMIX/ppdiffusers/LICENSE
new file mode 100644
index 0000000000000000000000000000000000000000..962fee016f4e1b4fcaa5565ad3373a49cad04141
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/ppdiffusers/LICENSE
@@ -0,0 +1,203 @@
+
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
\ No newline at end of file
diff --git a/VLMEvalKit_old/PaddleMIX/ppdiffusers/Makefile b/VLMEvalKit_old/PaddleMIX/ppdiffusers/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..160de104e005b97faba8f766a87ed20e013578a9
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/ppdiffusers/Makefile
@@ -0,0 +1,30 @@
+
+.DEFAULT_GOAL := all
+
+.PHONY: all
+all: deploy-version build deploy
+
+.PHONY: build
+build:
+	python3 setup.py sdist bdist_wheel
+
+.PHONY: deploy
+deploy:
+	make deploy-version
+	twine upload --skip-existing dist/*
+
+.PHONY: deploy-version
+deploy-version:
+	echo "VERSION = '$$(cat VERSION)'" > ppdiffusers/version.py
+
+.PHONY: install
+install:
+	pip install -r requirements.txt
+
+.PHONY: version
+version:
+	@newVersion=$$(awk -F. '{print $$1"."$$2"."$$3+1}' < VERSION) \
+		&& echo $${newVersion} > VERSION \
+		&& git add VERSION \
+		&& git commit -m "🔥 update version to $${newVersion}" > /dev/null \
+		&& echo "Bumped version to $${newVersion}"
\ No newline at end of file
diff --git a/VLMEvalKit_old/PaddleMIX/ppdiffusers/README.md b/VLMEvalKit_old/PaddleMIX/ppdiffusers/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..609b2a390800b41558edcf4b14d7a1692d6cc118
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/ppdiffusers/README.md
@@ -0,0 +1,1278 @@
+<div align="center">
+  <img src="https://user-images.githubusercontent.com/11793384/215372703-4385f66a-abe4-44c7-9626-96b7b65270c8.png" width="40%" height="40%" />
+</div>
+
+<p align="center">
+    <a href="https://pypi.org/project/ppdiffusers/"><img src="https://img.shields.io/pypi/pyversions/ppdiffusers"></a>
+    <a href=""><img src="https://img.shields.io/badge/os-linux%2C%20win%2C%20mac-yellow.svg"></a>
+    <a href="https://github.com/PaddlePaddle/PaddleMIX/blob/develop/ppdiffusers/LICENSE"><img src="https://img.shields.io/badge/license-Apache%202-dfd.svg"></a>
+</p>
+
+<h4 align="center">
+  <a href=#特性> 特性 </a> |
+  <a href=#安装> 安装 </a> |
+  <a href=#快速开始> 快速开始 </a> |
+  <a href=#模型部署> 模型部署</a>
+</h4>
+
+# PPDiffusers: Diffusers toolbox implemented based on PaddlePaddle
+
+**PPDiffusers**是一款支持多种模态（如文本图像跨模态、图像、语音）扩散模型（Diffusion Model）训练和推理的国产化工具箱，依托于[**PaddlePaddle**](https://www.paddlepaddle.org.cn/)框架和[**PaddleNLP**](https://github.com/PaddlePaddle/PaddleNLP)自然语言处理开发库。
+
+## News 📢
+* 🔥 **2024.10.18 发布 0.29.0 版本，新增图像生成模型[Stable Diffusion 3 (SD3)](https://github.com/PaddlePaddle/PaddleMIX/blob/develop/ppdiffusers/examples/text_to_image/README_sd3.md)，支持DreamBooth训练及高性能推理；SD3、SDXL适配昇腾910B，提供国产计算芯片上的训推能力；DIT支持[高性能推理](https://github.com/PaddlePaddle/PaddleMIX/blob/develop/ppdiffusers/examples/class_conditional_image_generation/DiT/README.md#23-paddle-inference-%E9%AB%98%E6%80%A7%E8%83%BD%E6%8E%A8%E7%90%86)；支持PaddleNLP 3.0 beta版本。**
+
+* 🔥 **2024.07.15 发布 0.24.1 版本，新增[Open-Sora](https://github.com/PaddlePaddle/PaddleMIX/tree/develop/ppdiffusers/examples/Open-Sora)，支持模型训练和推理；全面支持Paddle 3.0。**
+
+* 🔥 **2024.04.17 发布 0.24.0 版本，支持[Sora相关技术](https://github.com/PaddlePaddle/PaddleMIX/tree/develop/ppdiffusers/examples/sora)，支持[DiT](https://github.com/PaddlePaddle/PaddleMIX/tree/develop/ppdiffusers/examples/class_conditional_image_generation/DiT)、[SiT](https://github.com/PaddlePaddle/PaddleMIX/tree/develop/ppdiffusers/examples/class_conditional_image_generation/DiT#exploring-flow-and-diffusion-based-generative-models-with-scalable-interpolant-transformers-sit)、[UViT](https://github.com/PaddlePaddle/PaddleMIX/tree/develop/ppdiffusers/examples/text_to_image_mscoco_uvit)训练推理，新增[NaViT](https://github.com/PaddlePaddle/PaddleMIX/tree/develop/ppdiffusers/examples/navit)、[MAGVIT-v2](https://github.com/PaddlePaddle/PaddleMIX/tree/develop/ppdiffusers/examples/video_tokenizer/magvit2)模型；
+视频生成能力全面升级；
+新增视频生成模型[SVD](https://github.com/PaddlePaddle/PaddleMIX/tree/develop/ppdiffusers/examples/stable_video_diffusion)，支持模型微调和推理；
+新增姿态可控视频生成模型[AnimateAnyone](https://github.com/PaddlePaddle/PaddleMIX/tree/develop/ppdiffusers/examples/AnimateAnyone)、即插即用视频生成模型[AnimateDiff](https://github.com/PaddlePaddle/PaddleMIX/blob/develop/ppdiffusers/examples/inference/text_to_video_generation_animediff.py)、GIF视频生成模型[Hotshot-XL](https://github.com/PaddlePaddle/PaddleMIX/tree/develop/ppdiffusers/examples/community/Hotshot-XL)；
+新增高速推理文图生成模型[LCM](https://github.com/PaddlePaddle/PaddleMIX/tree/develop/ppdiffusers/examples/consistency_distillation)，支持SD/SDXL训练和推理；
+[模型推理部署](https://github.com/PaddlePaddle/PaddleMIX/tree/develop/ppdiffusers/deploy)全面升级；新增peft，accelerate后端；
+权重加载/保存全面升级，支持分布式、模型切片、safetensors等场景，相关能力已集成DiT、 [IP-Adapter](https://github.com/PaddlePaddle/PaddleMIX/tree/develop/ppdiffusers/examples/ip_adapter)、[PhotoMaker](https://github.com/PaddlePaddle/PaddleMIX/tree/develop/ppdiffusers/examples/PhotoMaker)、[InstantID](https://github.com/PaddlePaddle/PaddleMIX/tree/develop/ppdiffusers/examples/InstantID)等。**
+* 🔥 **2023.12.12 发布 0.19.4 版本，修复已知的部分 BUG，修复 0D Tensor 的 Warning，新增 SDXL 的 FastdeployPipeline。**
+* 🔥 **2023.09.27 发布 0.19.3 版本，新增[SDXL](#文本图像多模)，支持Text2Image、Img2Img、Inpainting、InstructPix2Pix等任务，支持DreamBooth Lora训练；
+新增[UniDiffuser](#文本图像多模)，通过统一的多模态扩散过程支持文生图、图生文等任务；
+新增文本条件视频生成模型[LVDM](https://github.com/PaddlePaddle/PaddleMIX/tree/develop/ppdiffusers/examples/text_to_video_lvdm)，支持训练与推理；
+新增文图生成模型[Kandinsky 2.2](#文本图像多模)，[Consistency models](#文本图像多模)；
+Stable Diffusion支持[BF16 O2训练](https://github.com/PaddlePaddle/PaddleMIX/tree/develop/ppdiffusers/examples/stable_diffusion)，效果对齐FP32；
+[LoRA加载升级](#加载HF-LoRA权重)，支持加载SDXL的LoRA权重；
+[Controlnet](https://github.com/PaddlePaddle/PaddleMIX/tree/develop/ppdiffusers/ppdiffusers/pipelines/controlnet)升级，支持ControlNetImg2Img、ControlNetInpaint、StableDiffusionXLControlNet等。**
+
+
+
+
+## 特性
+#### 📦 SOTA扩散模型Pipelines集合
+我们提供**SOTA（State-of-the-Art）** 的扩散模型Pipelines集合。
+目前**PPDiffusers**已经集成了**100+Pipelines**，支持文图生成（Text-to-Image Generation）、文本引导的图像编辑（Text-Guided Image Inpainting）、文本引导的图像变换（Image-to-Image Text-Guided Generation）、文本条件的视频生成（Text-to-Video Generation）、超分（Super Superresolution）、文本条件的音频生成（Text-to-Audio Generation）在内的**10余项**任务，覆盖**文本、图像、视频、音频**等多种模态。
+如果想要了解当前支持的所有**Pipelines**以及对应的来源信息，可以阅读[🔥 PPDiffusers Pipelines](https://github.com/PaddlePaddle/PaddleMIX/blob/develop/ppdiffusers/ppdiffusers/pipelines/README.md)文档。
+
+
+#### 🔊 提供丰富的Noise Scheduler
+我们提供了丰富的**噪声调度器（Noise Scheduler）**，可以对**速度**与**质量**进行权衡，用户可在推理时根据需求快速切换使用。
+当前**PPDiffusers**已经集成了**14+Scheduler**，不仅支持 [DDPM](https://github.com/PaddlePaddle/PaddleMIX/blob/develop/ppdiffusers/ppdiffusers/schedulers/scheduling_ddpm.py)、[DDIM](https://github.com/PaddlePaddle/PaddleMIX/blob/develop/ppdiffusers/ppdiffusers/schedulers/scheduling_ddim.py) 和 [PNDM](https://github.com/PaddlePaddle/PaddleMIX/blob/develop/ppdiffusers/ppdiffusers/schedulers/scheduling_pndm.py)，还支持最新的 [🔥 DPMSolver](https://github.com/PaddlePaddle/PaddleMIX/blob/develop/ppdiffusers/ppdiffusers/schedulers/scheduling_dpmsolver_multistep.py)！
+
+#### 🎛️ 提供多种扩散模型组件
+我们提供了**多种扩散模型**组件，如[UNet1DModel](https://github.com/PaddlePaddle/PaddleMIX/blob/develop/ppdiffusers/ppdiffusers/models/unet_1d.py)、[UNet2DModel](https://github.com/PaddlePaddle/PaddleMIX/blob/develop/ppdiffusers/ppdiffusers/models/unet_2d.py)、[UNet2DConditionModel](https://github.com/PaddlePaddle/PaddleMIX/blob/develop/ppdiffusers/ppdiffusers/models/unet_2d_condition.py)、[UNet3DConditionModel](https://github.com/PaddlePaddle/PaddleMIX/blob/develop/ppdiffusers/ppdiffusers/models/unet_3d_condition.py)、[VQModel](https://github.com/PaddlePaddle/PaddleMIX/blob/develop/ppdiffusers/ppdiffusers/models/vae.py)、[AutoencoderKL](https://github.com/PaddlePaddle/PaddleMIX/blob/develop/ppdiffusers/ppdiffusers/models/vae.py)等。
+
+
+#### 📖 提供丰富的训练和推理教程
+我们提供了丰富的训练教程，不仅支持扩散模型的二次开发微调，如基于[Textual Inversion](https://github.com/PaddlePaddle/PaddleMIX/tree/develop/ppdiffusers/examples/textual_inversion)和[DreamBooth](https://github.com/PaddlePaddle/PaddleMIX/tree/develop/ppdiffusers/examples/dreambooth)使用3-5张图定制化训练生成图像的风格或物体，还支持[🔥 Latent Diffusion Model](https://github.com/PaddlePaddle/PaddleMIX/tree/develop/ppdiffusers/examples/text_to_image_laion400m)、[🔥 ControlNet](https://github.com/PaddlePaddle/PaddleMIX/tree/develop/ppdiffusers/examples/controlnet)、[🔥 T2I-Adapter](https://github.com/PaddlePaddle/PaddleMIX/tree/develop/ppdiffusers/examples/t2i-adapter)  等扩散模型的训练！
+此外，我们还提供了丰富的[🔥 Pipelines推理样例](https://github.com/PaddlePaddle/PaddleMIX/tree/develop/ppdiffusers/examples/inference)。
+
+#### 🚀 支持FastDeploy高性能部署
+我们提供基于[FastDeploy](https://github.com/PaddlePaddle/FastDeploy)的[🔥 高性能Stable Diffusion Pipeline](https://github.com/PaddlePaddle/PaddleMIX/blob/develop/ppdiffusers/ppdiffusers/pipelines/stable_diffusion/pipeline_fastdeploy_stable_diffusion.py)，更多有关FastDeploy进行多推理引擎后端高性能部署的信息请参考[🔥 高性能FastDeploy推理教程](https://github.com/PaddlePaddle/PaddleMIX/tree/develop/ppdiffusers/deploy)。
+
+## 安装
+
+### 环境依赖
+```
+pip install -r requirements.txt
+```
+关于PaddlePaddle安装的详细教程请查看[Installation](https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/develop/install/pip/linux-pip.html)。
+
+### pip安装
+
+```shell
+pip install --upgrade ppdiffusers
+```
+
+### 手动安装
+```shell
+git clone https://github.com/PaddlePaddle/PaddleMIX
+cd PaddleMIX/ppdiffusers
+python setup.py install
+```
+### 设置代理
+```shell
+export HF_HUB_ENABLE_HF_TRANSFER=1
+export HF_ENDPOINT=https://hf-mirror.com
+```
+
+## 快速开始
+我们将以扩散模型的典型代表**Stable Diffusion**为例，带你快速了解PPDiffusers。
+
+**Stable Diffusion**基于**潜在扩散模型（Latent Diffusion Models）**，专门用于**文图生成（Text-to-Image Generation）任务**。该模型是由来自 [CompVis](https://github.com/CompVis), [Stability AI](https://stability.ai/), [LAION](https://laion.ai/)以及[RunwayML](https://runwayml.com/)的工程师共同开发完成，目前发布了v1和v2两个版本。v1版本采用了LAION-5B数据集子集（分辨率为 512x512）进行训练，并具有以下架构设置：自动编码器下采样因子为8，UNet大小为860M，文本编码器为CLIP ViT-L/14。v2版本相较于v1版本在生成图像的质量和分辨率等进行了改善。
+
+### Stable Diffusion重点模型权重
+
+<details><summary>&emsp; Stable Diffusion 模型支持的权重（英文） </summary>
+
+**我们只需要将下面的"xxxx"，替换成所需的权重名，即可快速使用！**
+```python
+from ppdiffusers import *
+
+pipe_text2img = StableDiffusionPipeline.from_pretrained("xxxx")
+pipe_img2img = StableDiffusionImg2ImgPipeline.from_pretrained("xxxx")
+pipe_inpaint_legacy = StableDiffusionInpaintPipelineLegacy.from_pretrained("xxxx")
+pipe_mega = StableDiffusionMegaPipeline.from_pretrained("xxxx")
+
+# pipe_mega.text2img() 等于 pipe_text2img()
+# pipe_mega.img2img() 等于 pipe_img2img()
+# pipe_mega.inpaint_legacy() 等于 pipe_inpaint_legacy()
+```
+
+| PPDiffusers支持的模型名称                     | 支持加载的Pipeline                                    | 备注 | huggingface.co地址 |
+| :-------------------------------------------: | :--------------------------------------------------------------------: | --- | :-----------------------------------------: |
+| CompVis/stable-diffusion-v1-4           | StableDiffusionPipeline、StableDiffusionImg2ImgPipeline、StableDiffusionInpaintPipelineLegacy、StableDiffusionMegaPipeline、StableDiffusionPipelineAllinOne | Stable-Diffusion-v1-4 使用 Stable-Diffusion-v1-2 的权重进行初始化。随后在"laion-aesthetics v2 5+"数据集上以 **512x512** 分辨率微调了 **225k** 步数，对文本使用了 **10%** 的dropout（即：训练过程中文图对中的文本有 10% 的概率会变成空文本）。模型使用了[CLIP ViT-L/14](https://huggingface.co/openai/clip-vit-large-patch14)作为文本编码器。| [地址](https://huggingface.co/CompVis/stable-diffusion-v1-4) |
+| CompVis/ldm-text2im-large-256               | LDMTextToImagePipeline | [LDM论文](https://arxiv.org/pdf/2112.10752.pdf) LDM-KL-8-G* 权重。| [地址](https://huggingface.co/CompVis/ldm-text2im-large-256) |
+| CompVis/ldm-super-resolution-4x-openimages  | LDMSuperResolutionPipeline | [LDM论文](https://arxiv.org/pdf/2112.10752.pdf) LDM-VQ-4 权重，[原始权重链接](https://ommer-lab.com/files/latent-diffusion/sr_bsr.zip)。| [地址](https://huggingface.co/CompVis/ldm-super-resolution-4x-openimages) |
+| runwayml/stable-diffusion-v1-5              | StableDiffusionPipeline、StableDiffusionImg2ImgPipeline、StableDiffusionInpaintPipelineLegacy、StableDiffusionMegaPipeline、StableDiffusionPipelineAllinOne | Stable-Diffusion-v1-5 使用 Stable-Diffusion-v1-2 的权重进行初始化。随后在"laion-aesthetics v2 5+"数据集上以 **512x512** 分辨率微调了 **595k** 步数，对文本使用了 **10%** 的dropout（即：训练过程中文图对中的文本有 10% 的概率会变成空文本）。模型同样也使用了[CLIP ViT-L/14](https://huggingface.co/openai/clip-vit-large-patch14)作为文本编码器。| [地址](https://huggingface.co/runwayml/stable-diffusion-v1-5) |
+| runwayml/stable-diffusion-inpainting        | StableDiffusionInpaintPipeline | Stable-Diffusion-Inpainting 使用 Stable-Diffusion-v1-2 的权重进行初始化。首先进行了 **595k** 步的常规训练（实际也就是 Stable-Diffusion-v1-5 的权重），然后进行了 **440k** 步的 inpainting 修复训练。对于 inpainting 修复训练，给 UNet 额外增加了 **5** 输入通道（其中 **4** 个用于被 Mask 遮盖住的图片，**1** 个用于 Mask 本身）。在训练期间，会随机生成 Mask，并有 **25%** 概率会将原始图片全部 Mask 掉。| [地址](https://huggingface.co/runwayml/stable-diffusion-inpainting) |
+| stabilityai/stable-diffusion-2-base         | StableDiffusionPipeline、StableDiffusionImg2ImgPipeline、StableDiffusionInpaintPipelineLegacy、StableDiffusionMegaPipeline、StableDiffusionPipelineAllinOne | 该模型首先在 [LAION-5B 256x256 子集上](https://laion.ai/blog/laion-5b/) （过滤条件：[punsafe = 0.1 的 LAION-NSFW 分类器](https://github.com/LAION-AI/CLIP-based-NSFW-Detector) 和 审美分数大于等于 4.5 ）从头开始训练 **550k** 步，然后又在分辨率 **>= 512x512** 的同一数据集上进一步训练 **850k** 步。| [地址](https://huggingface.co/stabilityai/stable-diffusion-2-base) |
+| stabilityai/stable-diffusion-2              | StableDiffusionPipeline、StableDiffusionImg2ImgPipeline、StableDiffusionInpaintPipelineLegacy、StableDiffusionMegaPipeline、StableDiffusionPipelineAllinOne | stable-diffusion-2 使用 stable-diffusion-2-base 权重进行初始化，首先在同一数据集上（**512x512** 分辨率）使用 [v-objective](https://arxiv.org/abs/2202.00512) 训练了 **150k** 步。然后又在 **768x768** 分辨率上使用 [v-objective](https://arxiv.org/abs/2202.00512) 继续训练了 **140k** 步。| [地址](https://huggingface.co/stabilityai/stable-diffusion-2) |
+| stabilityai/stable-diffusion-2-inpainting   | StableDiffusionInpaintPipeline |stable-diffusion-2-inpainting 使用 stable-diffusion-2-base 权重初始化，并且额外训练了 **200k** 步。训练过程使用了 [LAMA](https://github.com/saic-mdal/lama) 中提出的 Mask 生成策略，并且使用 Mask 图片的 Latent 表示（经过 VAE 编码）作为附加条件。| [地址](https://huggingface.co/stabilityai/stable-diffusion-2-inpainting) |
+| stabilityai/stable-diffusion-x4-upscaler    | StableDiffusionUpscalePipeline | 该模型在**LAION 10M** 子集上（>2048x2048）训练了 1.25M 步。该模型还在分辨率为 **512x512** 的图像上使用 [Text-guided Latent Upscaling Diffusion Model](https://arxiv.org/abs/2112.10752) 进行了训练。除了**文本输入**之外，它还接收 **noise_level** 作为输入参数，因此我们可以使用 [预定义的 Scheduler](https://huggingface.co/stabilityai/stable-diffusion-x4-upscaler/blob/main/low_res_scheduler/scheduler_config.json) 向低分辨率的输入图片添加噪声。| [地址](https://huggingface.co/stabilityai/stable-diffusion-x4-upscaler) |
+| hakurei/waifu-diffusion    | StableDiffusionPipeline、StableDiffusionImg2ImgPipeline、StableDiffusionInpaintPipelineLegacy、StableDiffusionMegaPipeline、StableDiffusionPipelineAllinOne | waifu-diffusion-v1-2 使用 stable-diffusion-v1-4 权重初始化，并且在**高质量动漫**图像数据集上进行微调后得到的模型。用于微调的数据是 **680k** 文本图像样本，这些样本是通过 **booru 网站** 下载的。| [地址](https://huggingface.co/hakurei/waifu-diffusion) |
+| hakurei/waifu-diffusion-v1-3    | StableDiffusionPipeline、StableDiffusionImg2ImgPipeline、StableDiffusionInpaintPipelineLegacy、StableDiffusionMegaPipeline、StableDiffusionPipelineAllinOne | waifu-diffusion-v1-3 是 waifu-diffusion-v1-2 基础上进一步训练得到的。他们对数据集进行了额外操作：（1）删除下划线；（2）删除括号；（3）用逗号分隔每个booru 标签；（4）随机化标签顺序。| [地址](https://huggingface.co/hakurei/waifu-diffusion) |
+| naclbit/trinart_stable_diffusion_v2_60k    | StableDiffusionPipeline、StableDiffusionImg2ImgPipeline、StableDiffusionInpaintPipelineLegacy、StableDiffusionMegaPipeline、StableDiffusionPipelineAllinOne | trinart_stable_diffusion 使用 stable-diffusion-v1-4 权重初始化，在 40k **高分辨率漫画/动漫风格**的图片数据集上微调了 8 个 epoch。V2 版模型使用 **dropouts**、**10k+ 图像**和**新的标记策略**训练了**更长时间**。| [地址](https://huggingface.co/naclbit/trinart_stable_diffusion_v2) |
+| naclbit/trinart_stable_diffusion_v2_95k    | StableDiffusionPipeline、StableDiffusionImg2ImgPipeline、StableDiffusionInpaintPipelineLegacy、StableDiffusionMegaPipeline、StableDiffusionPipelineAllinOne | **95k** 步数的结果，其他同上。| [地址](https://huggingface.co/naclbit/trinart_stable_diffusion_v2) |
+| naclbit/trinart_stable_diffusion_v2_115k    | StableDiffusionPipeline、StableDiffusionImg2ImgPipeline、StableDiffusionInpaintPipelineLegacy、StableDiffusionMegaPipeline、StableDiffusionPipelineAllinOne | **115k** 步数的结果，其他同上。| [地址](https://huggingface.co/naclbit/trinart_stable_diffusion_v2) |
+| Deltaadams/Hentai-Diffusion    | StableDiffusionPipeline、StableDiffusionImg2ImgPipeline、StableDiffusionInpaintPipelineLegacy、StableDiffusionMegaPipeline、StableDiffusionPipelineAllinOne | None| [地址](https://huggingface.co/Deltaadams/Hentai-Diffusion) |
+| ringhyacinth/nail-set-diffuser    | StableDiffusionPipeline、StableDiffusionImg2ImgPipeline、StableDiffusionInpaintPipelineLegacy、StableDiffusionMegaPipeline、StableDiffusionPipelineAllinOne | 美甲领域的扩散模型，训练数据使用了 [Weekend](https://weibo.com/u/5982308498)| [地址](https://huggingface.co/ringhyacinth/nail-set-diffuser) |
+| Linaqruf/anything-v3.0    | StableDiffusionPipeline、StableDiffusionImg2ImgPipeline、StableDiffusionInpaintPipelineLegacy、StableDiffusionMegaPipeline、StableDiffusionPipelineAllinOne | 该模型可通过输入几个文本提示词就能生成**高质量、高度详细的动漫风格图片**，该模型支持使用 **danbooru 标签文本** 生成图像。| [地址](https://huggingface.co/Linaqruf/anything-v3.0) |
+
+</details>
+<details><summary>&emsp; Stable Diffusion 模型支持的权重（中文和多语言） </summary>
+
+
+| PPDiffusers支持的模型名称                     | 支持加载的Pipeline                                    | 备注 | huggingface.co地址 |
+| :-------------------------------------------: | :--------------------------------------------------------------------: | --- | :-----------------------------------------: |
+| BAAI/AltDiffusion                           | AltDiffusionPipeline、AltDiffusionImg2ImgPipeline | 该模型使用 [AltCLIP](https://github.com/FlagAI-Open/FlagAI/tree/master/examples/AltCLIP/README.md) 作为文本编码器，在 Stable Diffusion 基础上训练了**双语Diffusion模型**，其中训练数据来自 [WuDao数据集](https://data.baai.ac.cn/details/WuDaoCorporaText) 和 [LAION](https://huggingface.co/datasets/ChristophSchuhmann/improved_aesthetics_6plus) 。| [地址](https://huggingface.co/BAAI/AltDiffusion) |
+| BAAI/AltDiffusion-m9                        | AltDiffusionPipeline、AltDiffusionImg2ImgPipeline |该模型使用9种语言的 [AltCLIP-m9](https://github.com/FlagAI-Open/FlagAI/tree/master/examples/AltCLIP/README.md) 作为文本编码器，其他同上。| [地址](https://huggingface.co/BAAI/AltDiffusion-m9) |
+| IDEA-CCNL/Taiyi-Stable-Diffusion-1B-Chinese-v0.1 | StableDiffusionPipeline、StableDiffusionImg2ImgPipeline、StableDiffusionInpaintPipelineLegacy、StableDiffusionMegaPipeline、StableDiffusionPipelineAllinOne | 他们将 [Noah-Wukong](https://wukong-dataset.github.io/wukong-dataset/) 数据集 (100M) 和 [Zero](https://zero.so.com/) 数据集 (23M) 用作预训练的数据集，先用 [IDEA-CCNL/Taiyi-CLIP-RoBERTa-102M-ViT-L-Chinese](https://huggingface.co/IDEA-CCNL/Taiyi-CLIP-RoBERTa-102M-ViT-L-Chinese) 对这两个数据集的图文对相似性进行打分，取 CLIP Score 大于 0.2 的图文对作为训练集。 他们使用 [IDEA-CCNL/Taiyi-CLIP-RoBERTa-102M-ViT-L-Chinese](https://huggingface.co/IDEA-CCNL/Taiyi-CLIP-RoBERTa-102M-ViT-L-Chinese) 作为初始化的text encoder，冻住 [stable-diffusion-v1-4](https://huggingface.co/CompVis/stable-diffusion-v1-4) ([论文](https://arxiv.org/abs/2112.10752)) 模型的其他部分，只训练 text encoder，以便保留原始模型的生成能力且实现中文概念的对齐。该模型目前在0.2亿图文对上训练了一个 epoch。 在 32 x A100 上训练了大约100小时，该版本只是一个初步的版本。| [地址](https://huggingface.co/IDEA-CCNL/Taiyi-Stable-Diffusion-1B-Chinese-v0.1) |
+| IDEA-CCNL/Taiyi-Stable-Diffusion-1B-Chinese-EN-v0.1 | StableDiffusionPipeline、StableDiffusionImg2ImgPipeline、StableDiffusionInpaintPipelineLegacy、StableDiffusionMegaPipeline、StableDiffusionPipelineAllinOne | 他们将 [Noah-Wukong](https://wukong-dataset.github.io/wukong-dataset/) 数据集 (100M) 和 [Zero](https://zero.so.com/) 数据集 (23M) 用作预训练的数据集，先用 [IDEA-CCNL/Taiyi-CLIP-RoBERTa-102M-ViT-L-Chinese](https://huggingface.co/IDEA-CCNL/Taiyi-CLIP-RoBERTa-102M-ViT-L-Chinese) 对这两个数据集的图文对相似性进行打分，取 CLIP Score 大于 0.2 的图文对作为训练集。 他们使用 [stable-diffusion-v1-4](https://huggingface.co/CompVis/stable-diffusion-v1-4) ([论文](https://arxiv.org/abs/2112.10752)) 模型进行继续训练，其中训练分为**两个stage**。**第一个stage** 中冻住模型的其他部分，只训练 text encoder ，以便保留原始模型的生成能力且实现中文概念的对齐。**第二个stage** 中将全部模型解冻，一起训练 text encoder 和 diffusion model ，以便 diffusion model 更好的适配中文引导。第一个 stage 他们训练了 80 小时，第二个 stage 训练了 100 小时，两个stage都是用了8 x A100，该版本是一个初步的版本。| [地址](https://huggingface.co/IDEA-CCNL/Taiyi-Stable-Diffusion-1B-Chinese-EN-v0.1) |
+</details>
+
+
+### 加载HF Diffusers权重
+```python
+from ppdiffusers import StableDiffusionPipeline
+# 设置from_hf_hub为True，表示从huggingface hub下载，from_diffusers为True表示加载的是diffusers版Pytorch权重
+pipe = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2", from_hf_hub=True, from_diffusers=True)
+```
+
+### 加载原库的Lightning权重
+```python
+from ppdiffusers import StableDiffusionPipeline
+# 可输入网址 或 本地ckpt、safetensors文件
+pipe = StableDiffusionPipeline.from_single_file("https://paddlenlp.bj.bcebos.com/models/community/junnyu/develop/ppdiffusers/chilloutmix_NiPrunedFp32Fix.safetensors")
+```
+
+### 加载HF LoRA权重
+```python
+from ppdiffusers import DiffusionPipeline
+
+pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", paddle_dtype=paddle.float16)
+
+pipe.load_lora_weights("stabilityai/stable-diffusion-xl-base-1.0",
+    weight_name="sd_xl_offset_example-lora_1.0.safetensors",
+    from_diffusers=True)
+```
+
+### 加载Civitai社区的LoRA权重
+```python
+from ppdiffusers import StableDiffusionPipeline
+pipe = StableDiffusionPipeline.from_pretrained("TASUKU2023/Chilloutmix")
+# 加载lora权重
+pipe.load_lora_weights("./",
+    weight_name="Moxin_10.safetensors",
+    from_diffusers=True)
+pipe.fuse_lora()
+```
+
+### XFormers加速
+为了使用**XFormers加速**，我们需要安装`develop`版本的`paddle`，Linux系统的安装命令如下：
+```sh
+python -m pip install paddlepaddle-gpu==0.0.0.post117 -f https://www.paddlepaddle.org.cn/whl/linux/gpu/develop.html
+```
+
+```python
+import paddle
+from ppdiffusers import StableDiffusionPipeline
+pipe = StableDiffusionPipeline.from_pretrained("TASUKU2023/Chilloutmix", paddle_dtype=paddle.float16)
+# 开启xformers加速 默认选择"cutlass"加速
+pipe.enable_xformers_memory_efficient_attention()
+# flash 需要使用 A100、A10、3060、3070、3080、3090 等以上显卡。
+# pipe.enable_xformers_memory_efficient_attention("flash")
+```
+
+### ToME + ControlNet
+```python
+# 安装develop的ppdiffusers
+# pip install "ppdiffusers>=0.24.0"
+import paddle
+from ppdiffusers import ControlNetModel, StableDiffusionControlNetPipeline
+from ppdiffusers.utils import load_image
+
+controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-canny")
+pipe = StableDiffusionControlNetPipeline.from_pretrained(
+    "runwayml/stable-diffusion-v1-5", safety_checker=None, controlnet=controlnet, paddle_dtype=paddle.float16
+)
+
+# Apply ToMe with a 50% merging ratio
+pipe.apply_tome(ratio=0.5) # Can also use pipe.unet in place of pipe here
+
+# 我们可以开启 xformers
+# pipe.enable_xformers_memory_efficient_attention()
+generator = paddle.Generator().manual_seed(0)
+prompt = "bird"
+image = load_image(
+    "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd_controlnet/bird_canny.png"
+)
+
+image = pipe(prompt, image, generator=generator).images[0]
+
+image.save("bird.png")
+```
+
+### 文图生成 （Text-to-Image Generation）
+
+```python
+import paddle
+from ppdiffusers import StableDiffusionPipeline
+
+pipe = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2")
+
+# 设置随机种子，我们可以复现下面的结果！
+paddle.seed(5232132133)
+prompt = "a portrait of shiba inu with a red cap growing on its head. intricate. lifelike. soft light. sony a 7 r iv 5 5 mm. cinematic post - processing "
+image = pipe(prompt, guidance_scale=7.5, height=768, width=768).images[0]
+
+image.save("shiba_dog_with_a_red_cap.png")
+```
+<div align="center">
+<img width="500" alt="image" src="https://user-images.githubusercontent.com/50394665/204796701-d7911f76-8670-47d5-8d1b-8368b046c5e4.png">
+</div>
+
+### 文本引导的图像变换（Image-to-Image Text-Guided Generation）
+
+<details><summary>&emsp;Image-to-Image Text-Guided Generation Demo </summary>
+
+```python
+import paddle
+from ppdiffusers import StableDiffusionImg2ImgPipeline
+from ppdiffusers.utils import load_image
+
+pipe = StableDiffusionImg2ImgPipeline.from_pretrained("Linaqruf/anything-v3.0", safety_checker=None)
+
+url = "https://paddlenlp.bj.bcebos.com/models/community/CompVis/data/image_Kurisu.png"
+image = load_image(url).resize((512, 768))
+
+# 设置随机种子，我们可以复现下面的结果！
+paddle.seed(42)
+prompt = "Kurisu Makise, looking at viewer, long hair, standing, 1girl, hair ornament, hair flower, cute, jacket, white flower, white dress"
+negative_prompt = "lowres, bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality, normal quality, jpeg artifacts, signature, watermark, username, blurry"
+
+image = pipe(prompt=prompt, negative_prompt=negative_prompt, image=image, strength=0.75, guidance_scale=7.5).images[0]
+image.save("image_Kurisu_img2img.png")
+```
+<div align="center">
+<img width="500" alt="image" src="https://user-images.githubusercontent.com/50394665/204799529-cd89dcdb-eb1d-4247-91ac-b0f7bad777f8.png">
+</div>
+</details>
+
+### 文本引导的图像编辑（Text-Guided Image Inpainting）
+
+注意！当前有两种版本的图像编辑代码，一个是Legacy版本，一个是正式版本，下面将分别介绍两种代码如何使用！
+
+<details><summary>&emsp;Legacy版本代码</summary>
+
+```python
+import paddle
+from ppdiffusers import StableDiffusionInpaintPipelineLegacy
+from ppdiffusers.utils import load_image
+
+# 可选模型权重
+# CompVis/stable-diffusion-v1-4
+# runwayml/stable-diffusion-v1-5
+# stabilityai/stable-diffusion-2-base （原始策略 512x512）
+# stabilityai/stable-diffusion-2 （v-objective 768x768）
+# Linaqruf/anything-v3.0
+# ......
+img_url = "https://paddlenlp.bj.bcebos.com/models/community/CompVis/stable-diffusion-v1-4/overture-creations.png"
+mask_url = "https://paddlenlp.bj.bcebos.com/models/community/CompVis/stable-diffusion-v1-4/overture-creations-mask.png"
+
+image = load_image(img_url).resize((512, 512))
+mask_image = load_image(mask_url).resize((512, 512))
+
+pipe = StableDiffusionInpaintPipelineLegacy.from_pretrained("stabilityai/stable-diffusion-2-base", safety_checker=None)
+
+# 设置随机种子，我们可以复现下面的结果！
+paddle.seed(10245)
+prompt = "a red cat sitting on a bench"
+image = pipe(prompt=prompt, image=image, mask_image=mask_image, strength=0.75).images[0]
+
+image.save("a_red_cat_legacy.png")
+```
+<div align="center">
+<img width="900" alt="image" src="https://user-images.githubusercontent.com/50394665/204802186-5a6d302b-83aa-4247-a5bb-ebabfcc3abc4.png">
+</div>
+
+</details>
+
+<details><summary>&emsp;正式版本代码</summary>
+
+Tips: 下面的使用方法是新版本的代码，也是官方推荐的代码，注意必须配合 **runwayml/stable-diffusion-inpainting** 和 **stabilityai/stable-diffusion-2-inpainting** 才可正常使用。
+```python
+import paddle
+from ppdiffusers import StableDiffusionInpaintPipeline
+from ppdiffusers.utils import load_image
+
+# 可选模型权重
+# runwayml/stable-diffusion-inpainting
+# stabilityai/stable-diffusion-2-inpainting
+img_url = "https://paddlenlp.bj.bcebos.com/models/community/CompVis/stable-diffusion-v1-4/overture-creations.png"
+mask_url = "https://paddlenlp.bj.bcebos.com/models/community/CompVis/stable-diffusion-v1-4/overture-creations-mask.png"
+
+image = load_image(img_url).resize((512, 512))
+mask_image = load_image(mask_url).resize((512, 512))
+
+pipe = StableDiffusionInpaintPipeline.from_pretrained("stabilityai/stable-diffusion-2-inpainting")
+
+# 设置随机种子，我们可以复现下面的结果！
+paddle.seed(1024)
+prompt = "Face of a yellow cat, high resolution, sitting on a park bench"
+image = pipe(prompt=prompt, image=image, mask_image=mask_image).images[0]
+
+image.save("a_yellow_cat.png")
+```
+<div align="center">
+<img width="900" alt="image" src="https://user-images.githubusercontent.com/50394665/204801946-6cd043bc-f3db-42cf-82cd-6a6171484523.png">
+</div>
+</details>
+
+### 文本引导的图像放大 & 超分（Text-Guided Image Upscaling & Super-Resolution）
+
+<details><summary>&emsp;Text-Guided Image Upscaling Demo</summary>
+
+```python
+import paddle
+from ppdiffusers import StableDiffusionUpscalePipeline
+from ppdiffusers.utils import load_image
+
+pipe = StableDiffusionUpscalePipeline.from_pretrained("stabilityai/stable-diffusion-x4-upscaler")
+
+url = "https://paddlenlp.bj.bcebos.com/models/community/CompVis/data/low_res_cat.png"
+# 我们人工将原始图片缩小成 128x128 分辨率，最终保存的图片会放大4倍！
+low_res_img = load_image(url).resize((128, 128))
+
+prompt = "a white cat"
+image = pipe(prompt=prompt, image=low_res_img).images[0]
+
+image.save("upscaled_white_cat.png")
+```
+<div align="center">
+<img width="200" alt="image" src="https://user-images.githubusercontent.com/50394665/204806180-b7f1b9cf-8a62-4577-b5c4-91adda08a13b.png">
+<img width="400" alt="image" src="https://user-images.githubusercontent.com/50394665/204806202-8c110be3-5f48-4946-95ea-21ad5a9a2340.png">
+</div>
+</details>
+
+<details><summary>&emsp;Super-Resolution Demo</summary>
+
+```python
+import paddle
+from ppdiffusers import LDMSuperResolutionPipeline
+from ppdiffusers.utils import load_image
+
+pipe = LDMSuperResolutionPipeline.from_pretrained("CompVis/ldm-super-resolution-4x-openimages")
+
+url = "https://paddlenlp.bj.bcebos.com/models/community/CompVis/stable-diffusion-v1-4/overture-creations.png"
+
+# 我们人工将原始图片缩小成 128x128 分辨率，最终保存的图片会放大4倍！
+low_res_img = load_image(url).resize((128, 128))
+
+image = pipe(image=low_res_img, num_inference_steps=100).images[0]
+
+image.save("ldm-super-resolution-image.png")
+```
+<div align="center">
+<img width="200" alt="image" src="https://user-images.githubusercontent.com/50394665/204804426-5e28b571-aa41-4f56-ba26-68cca75fdaae.png">
+<img width="400" alt="image" src="https://user-images.githubusercontent.com/50394665/204804148-fe7c293b-6cd7-4942-ae9c-446369fe8410.png">
+</div>
+
+</details>
+
+## 模型推理部署
+除了**Paddle动态图**运行之外，很多模型还支持将模型导出并使用推理引擎运行。我们提供基于[FastDeploy](https://github.com/PaddlePaddle/FastDeploy)上的**StableDiffusion**模型部署示例，涵盖文生图、图生图、图像编辑等任务，用户可以按照我们提供[StableDiffusion模型导出教程](https://github.com/PaddlePaddle/PaddleMIX/blob/develop/ppdiffusers/deploy/export.md)将模型导出，然后使用`FastDeployStableDiffusionMegaPipeline`进行高性能推理部署！
+
+<details><summary>&emsp; 已预先导出的FastDeploy版Stable Diffusion权重 </summary>
+
+**注意：当前导出的vae encoder带有随机因素！**
+
+- CompVis/stable-diffusion-v1-4@fastdeploy
+- runwayml/stable-diffusion-v1-5@fastdeploy
+- runwayml/stable-diffusion-inpainting@fastdeploy
+- stabilityai/stable-diffusion-2-base@fastdeploy
+- stabilityai/stable-diffusion-2@fastdeploy
+- stabilityai/stable-diffusion-2-inpainting@fastdeploy
+- Linaqruf/anything-v3.0@fastdeploy
+- hakurei/waifu-diffusion-v1-3@fastdeploy
+
+</details>
+
+<details><summary>&emsp; FastDeploy Demo </summary>
+
+```python
+import paddle
+import fastdeploy as fd
+from ppdiffusers import FastDeployStableDiffusionMegaPipeline
+from ppdiffusers.utils import load_image
+
+def create_runtime_option(device_id=0, backend="paddle", use_cuda_stream=True):
+    option = fd.RuntimeOption()
+    if backend == "paddle":
+        option.use_paddle_backend()
+    else:
+        option.use_ort_backend()
+    if device_id == -1:
+        option.use_cpu()
+    else:
+        option.use_gpu(device_id)
+        if use_cuda_stream:
+            paddle_stream = paddle.device.cuda.current_stream(device_id).cuda_stream
+            option.set_external_raw_stream(paddle_stream)
+    return option
+
+runtime_options = {
+    "text_encoder": create_runtime_option(0, "paddle"),  # use gpu:0
+    "vae_encoder": create_runtime_option(0, "paddle"),  # use gpu:0
+    "vae_decoder": create_runtime_option(0, "paddle"),  # use gpu:0
+    "unet": create_runtime_option(0, "paddle"),  # use gpu:0
+}
+
+fd_pipe = FastDeployStableDiffusionMegaPipeline.from_pretrained(
+    "Linaqruf/anything-v3.0@fastdeploy", runtime_options=runtime_options
+)
+
+# text2img
+prompt = "a portrait of shiba inu with a red cap growing on its head. intricate. lifelike. soft light. sony a 7 r iv 5 5 mm. cinematic post - processing "
+image_text2img = fd_pipe.text2img(prompt=prompt, num_inference_steps=50).images[0]
+image_text2img.save("image_text2img.png")
+
+# img2img
+url = "https://paddlenlp.bj.bcebos.com/models/community/CompVis/data/image_Kurisu.png"
+image = load_image(url).resize((512, 512))
+prompt = "Kurisu Makise, looking at viewer, long hair, standing, 1girl, hair ornament, hair flower, cute, jacket, white flower, white dress"
+negative_prompt = "lowres, bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality, normal quality, jpeg artifacts, signature, watermark, username, blurry"
+
+image_img2img = fd_pipe.img2img(
+    prompt=prompt, negative_prompt=negative_prompt, image=image, strength=0.75, guidance_scale=7.5
+).images[0]
+image_img2img.save("image_img2img.png")
+
+# inpaint_legacy
+img_url = "https://paddlenlp.bj.bcebos.com/models/community/CompVis/stable-diffusion-v1-4/overture-creations.png"
+mask_url = "https://paddlenlp.bj.bcebos.com/models/community/CompVis/stable-diffusion-v1-4/overture-creations-mask.png"
+image = load_image(img_url).resize((512, 512))
+mask_image = load_image(mask_url).resize((512, 512))
+prompt = "a red cat sitting on a bench"
+
+image_inpaint_legacy = fd_pipe.inpaint_legacy(
+    prompt=prompt, image=image, mask_image=mask_image, strength=0.75, num_inference_steps=50
+).images[0]
+image_inpaint_legacy.save("image_inpaint_legacy.png")
+```
+</details>
+<div align="center">
+<img width="900" alt="image" src="https://user-images.githubusercontent.com/50394665/205297240-46b80992-34af-40cd-91a6-ae76589d0e21.png">
+</div>
+
+
+## 更多任务分类展示
+### 文本图像多模
+
+<details open>
+<summary>&emsp;文图生成（Text-to-Image Generation）</summary>
+
+#### text_to_image_generation-stable_diffusion
+
+```python
+from ppdiffusers import StableDiffusionPipeline
+
+# 加载模型和scheduler
+pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5")
+
+# 执行pipeline进行推理
+prompt = "a photo of an astronaut riding a horse on mars"
+image = pipe(prompt).images[0]
+
+# 保存图片
+image.save("astronaut_rides_horse_sd.png")
+```
+<div align="center">
+<img width="300" alt="image" src="https://user-images.githubusercontent.com/20476674/209322401-6ecfeaaa-6878-4302-b592-07a31de4e590.png">
+</div>
+
+#### text_to_image_generation-stable_diffusion_xl
+
+```python
+import paddle
+from ppdiffusers import StableDiffusionXLPipeline
+
+pipe = StableDiffusionXLPipeline.from_pretrained(
+     "stabilityai/stable-diffusion-xl-base-1.0",
+     paddle_dtype=paddle.float16,
+     variant="fp16"
+)
+prompt = "a photo of an astronaut riding a horse on mars"
+generator = paddle.Generator().manual_seed(42)
+image = pipe(prompt=prompt, generator=generator, num_inference_steps=50).images[0]
+image.save('sdxl_text2image.png')
+```
+<div align="center">
+<img width="300" alt="image" src="https://github.com/PaddlePaddle/PaddleMIX/assets/35400185/d72729f9-8685-48f9-a238-e4ddf6d264f3">
+</div>
+
+#### text_to_image_generation-sdxl_base_with_refiner
+
+```python
+from ppdiffusers import DiffusionPipeline
+import paddle
+
+# load both base & refiner
+base = DiffusionPipeline.from_pretrained(
+    "stabilityai/stable-diffusion-xl-base-1.0",
+    paddle_dtype=paddle.float16,
+)
+refiner = DiffusionPipeline.from_pretrained(
+    "stabilityai/stable-diffusion-xl-refiner-1.0",
+    text_encoder_2=base.text_encoder_2,
+    vae=base.vae,
+    paddle_dtype=paddle.float16,
+    variant="fp16",
+)
+
+# Define how many steps and what % of steps to be run on each experts (80/20) here
+n_steps = 40
+high_noise_frac = 0.8
+
+prompt = "A majestic lion jumping from a big stone at night"
+prompt = "a photo of an astronaut riding a horse on mars"
+generator = paddle.Generator().manual_seed(42)
+
+# run both experts
+image = base(
+    prompt=prompt,
+    output_type="latent",
+    generator=generator,
+).images
+
+image = refiner(
+    prompt=prompt,
+    image=image,
+    generator=generator,
+).images[0]
+image.save('text_to_image_generation-sdxl-base-with-refiner-result.png')
+```
+<div align="center">
+<img width="300" alt="image" src="https://github.com/PaddlePaddle/PaddleMIX/assets/35400185/8ef36826-ed94-4856-a356-af1677f60d1b">
+</div>
+
+#### text_to_image_generation-kandinsky2_2
+```python
+from ppdiffusers import KandinskyV22Pipeline, KandinskyV22PriorPipeline
+
+pipe_prior = KandinskyV22PriorPipeline.from_pretrained("kandinsky-community/kandinsky-2-2-prior")
+prompt = "red cat, 4k photo"
+out = pipe_prior(prompt)
+image_emb = out.image_embeds
+zero_image_emb = out.negative_image_embeds
+pipe = KandinskyV22Pipeline.from_pretrained("kandinsky-community/kandinsky-2-2-decoder")
+image = pipe(
+    image_embeds=image_emb,
+    negative_image_embeds=zero_image_emb,
+    height=768,
+    width=768,
+    num_inference_steps=50,
+).images
+image[0].save("text_to_image_generation-kandinsky2_2-result-cat.png")
+```
+<div align="center">
+<img width="300" alt="image" src="https://github.com/PaddlePaddle/PaddleMIX/assets/35400185/188f76dd-4bd7-4a33-8f30-b893c7a9e249">
+</div>
+
+#### text_to_image_generation-unidiffuser
+```python
+import paddle
+from paddlenlp.trainer import set_seed
+
+from ppdiffusers import UniDiffuserPipeline
+
+model_id_or_path = "thu-ml/unidiffuser-v1"
+pipe = UniDiffuserPipeline.from_pretrained(model_id_or_path, paddle_dtype=paddle.float16)
+set_seed(42)
+
+# Text variation can be performed with a text-to-image generation followed by a image-to-text generation:
+# 1. Text-to-image generation
+prompt = "an elephant under the sea"
+sample = pipe(prompt=prompt, num_inference_steps=20, guidance_scale=8.0)
+t2i_image = sample.images[0]
+t2i_image.save("t2i_image.png")
+````
+<div align="center">
+<img width="300" alt="image" src="https://github.com/PaddlePaddle/PaddleMIX/assets/35400185/a6eb11d2-ad27-4263-8cb4-b0d8dd42b36c">
+</div>
+
+#### text_to_image_generation-deepfloyd_if
+
+```python
+import paddle
+
+from ppdiffusers import DiffusionPipeline, IFPipeline, IFSuperResolutionPipeline
+from ppdiffusers.utils import pd_to_pil
+
+# Stage 1: generate images
+pipe = IFPipeline.from_pretrained("DeepFloyd/IF-I-XL-v1.0", variant="fp16", paddle_dtype=paddle.float16)
+pipe.enable_xformers_memory_efficient_attention()
+prompt = 'a photo of a kangaroo wearing an orange hoodie and blue sunglasses standing in front of the eiffel tower holding a sign that says "very deep learning"'
+prompt_embeds, negative_embeds = pipe.encode_prompt(prompt)
+image = pipe(
+    prompt_embeds=prompt_embeds,
+    negative_prompt_embeds=negative_embeds,
+    output_type="pd",
+).images
+
+# save intermediate image
+pil_image = pd_to_pil(image)
+pil_image[0].save("text_to_image_generation-deepfloyd_if-result-if_stage_I.png")
+# save gpu memory
+pipe.to(paddle_device="cpu")
+
+# Stage 2: super resolution stage1
+super_res_1_pipe = IFSuperResolutionPipeline.from_pretrained(
+    "DeepFloyd/IF-II-L-v1.0", text_encoder=None, variant="fp16", paddle_dtype=paddle.float16
+)
+super_res_1_pipe.enable_xformers_memory_efficient_attention()
+
+image = super_res_1_pipe(
+    image=image,
+    prompt_embeds=prompt_embeds,
+    negative_prompt_embeds=negative_embeds,
+    output_type="pd",
+).images
+# save intermediate image
+pil_image = pd_to_pil(image)
+pil_image[0].save("text_to_image_generation-deepfloyd_if-result-if_stage_II.png")
+# save gpu memory
+super_res_1_pipe.to(paddle_device="cpu")
+```
+<div align="center">
+<img alt="image" src="https://user-images.githubusercontent.com/20476674/246785766-700dfad9-159d-4bfb-bfc7-c18df938a052.png">
+</div>
+<div align="center">
+<center>if_stage_I</center>
+</div>
+<div align="center">
+<img alt="image" src="https://user-images.githubusercontent.com/20476674/246785773-3359ca5f-dadf-4cc8-b318-ff1f9d4a2d35.png">
+</div>
+<div align="center">
+<center>if_stage_II</center>
+<!-- <img alt="image" src="https://user-images.githubusercontent.com/20476674/246785774-8870829a-354b-4a87-9d67-93af315f51e6.png">
+<center>if_stage_III</center> -->
+</div>
+</details>
+
+
+<details><summary>&emsp;文本引导的图像放大（Text-Guided Image Upscaling）</summary>
+
+#### text_guided_image_upscaling-stable_diffusion_2
+
+```python
+from ppdiffusers import StableDiffusionUpscalePipeline
+from ppdiffusers.utils import load_image
+
+pipe = StableDiffusionUpscalePipeline.from_pretrained("stabilityai/stable-diffusion-x4-upscaler")
+
+url = "https://paddlenlp.bj.bcebos.com/models/community/CompVis/data/low_res_cat.png"
+low_res_img = load_image(url).resize((128, 128))
+
+prompt = "a white cat"
+upscaled_image = pipe(prompt=prompt, image=low_res_img).images[0]
+upscaled_image.save("upsampled_cat_sd2.png")
+```
+<div align="center">
+<img alt="image" src="https://user-images.githubusercontent.com/20476674/209324085-0d058b70-89b0-43c2-affe-534eedf116cf.png">
+<center>原图像</center>
+<img alt="image" src="https://user-images.githubusercontent.com/20476674/209323862-ce2d8658-a52b-4f35-90cb-aa7d310022e7.png">
+<center>生成图像</center>
+</div>
+</details>
+
+<details><summary>&emsp;文本引导的图像编辑（Text-Guided Image Inpainting）</summary>
+
+#### text_guided_image_inpainting-stable_diffusion_2
+
+```python
+import paddle
+
+from ppdiffusers import PaintByExamplePipeline
+from ppdiffusers.utils import load_image
+
+img_url = "https://paddlenlp.bj.bcebos.com/models/community/Fantasy-Studio/data/image_example_1.png"
+mask_url = "https://paddlenlp.bj.bcebos.com/models/community/Fantasy-Studio/data/mask_example_1.png"
+example_url = "https://paddlenlp.bj.bcebos.com/models/community/Fantasy-Studio/data/reference_example_1.jpeg"
+
+init_image = load_image(img_url).resize((512, 512))
+mask_image = load_image(mask_url).resize((512, 512))
+example_image = load_image(example_url).resize((512, 512))
+
+pipe = PaintByExamplePipeline.from_pretrained("Fantasy-Studio/Paint-by-Example")
+
+# 使用fp16加快生成速度
+with paddle.amp.auto_cast(True):
+    image = pipe(image=init_image, mask_image=mask_image, example_image=example_image).images[0]
+image.save("image_guided_image_inpainting-paint_by_example-result.png")
+```
+<div align="center">
+<img alt="image" src="https://user-images.githubusercontent.com/20476674/247118364-5d91f433-f9ac-4514-b5f0-cb4599905847.png" width=300>
+<center>原图像</center>
+<div align="center">
+<img alt="image" src="https://user-images.githubusercontent.com/20476674/247118361-0f78d6db-6896-4f8d-b1bd-8350192f7a4e.png" width=300>
+<center>掩码图像</center>
+<div align="center">
+<img alt="image" src="https://user-images.githubusercontent.com/20476674/247118368-305a048d-ddc3-4a5f-8915-58591ef680f0.jpeg" width=300>
+<center>参考图像</center>
+<img alt="image" src="https://user-images.githubusercontent.com/20476674/247117963-e5b9b754-39a3-480b-a557-46a2f9310e79.png" width=300>
+<center>生成图像</center>
+</div>
+</details>
+
+
+<details><summary>&emsp;文本引导的图像变换（Image-to-Image Text-Guided Generation）</summary>
+
+#### text_guided_image_inpainting-kandinsky2_2
+```python
+import numpy as np
+import paddle
+
+from ppdiffusers import KandinskyV22InpaintPipeline, KandinskyV22PriorPipeline
+from ppdiffusers.utils import load_image
+
+pipe_prior = KandinskyV22PriorPipeline.from_pretrained(
+    "kandinsky-community/kandinsky-2-2-prior", paddle_dtype=paddle.float16
+)
+prompt = "a hat"
+image_emb, zero_image_emb = pipe_prior(prompt, return_dict=False)
+pipe = KandinskyV22InpaintPipeline.from_pretrained(
+    "kandinsky-community/kandinsky-2-2-decoder-inpaint", paddle_dtype=paddle.float16
+)
+init_image = load_image(
+    "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/kandinsky/cat.png"
+)
+mask = np.zeros((768, 768), dtype=np.float32)
+mask[:250, 250:-250] = 1
+out = pipe(
+    image=init_image,
+    mask_image=mask,
+    image_embeds=image_emb,
+    negative_image_embeds=zero_image_emb,
+    height=768,
+    width=768,
+    num_inference_steps=50,
+)
+image = out.images[0]
+image.save("text_guided_image_inpainting-kandinsky2_2-result-cat_with_hat.png")
+```
+<div align="center">
+<img width="300" alt="image" src="https://github.com/PaddlePaddle/PaddleMIX/assets/35400185/64a943d5-167b-4433-91c3-3cf9279714db">
+<center>原图像</center>
+<img width="300" alt="image" src="https://github.com/PaddlePaddle/PaddleMIX/assets/35400185/f469c127-52f4-4173-a693-c06b92a052aa">
+<center>生成图像</center>
+</div>
+
+#### image_to_image_text_guided_generation-stable_diffusion
+```python
+import paddle
+
+from ppdiffusers import StableDiffusionImg2ImgPipeline
+from ppdiffusers.utils import load_image
+
+# 加载pipeline
+pipe = StableDiffusionImg2ImgPipeline.from_pretrained("runwayml/stable-diffusion-v1-5")
+
+# 下载初始图片
+url = "https://paddlenlp.bj.bcebos.com/models/community/CompVis/stable-diffusion-v1-4/sketch-mountains-input.png"
+
+init_image = load_image(url).resize((768, 512))
+
+prompt = "A fantasy landscape, trending on artstation"
+# 使用fp16加快生成速度
+with paddle.amp.auto_cast(True):
+    image = pipe(prompt=prompt, image=init_image, strength=0.75, guidance_scale=7.5).images[0]
+
+image.save("fantasy_landscape.png")
+```
+<div align="center">
+<img width="300" alt="image" src="https://user-images.githubusercontent.com/20476674/209327142-d8e1d0c7-3bf8-4a08-a0e8-b11451fc84d8.png">
+<center>原图像</center>
+<img width="300" alt="image" src="https://user-images.githubusercontent.com/20476674/209325799-d9ff279b-0d57-435f-bda7-763e3323be23.png">
+<center>生成图像</center>
+</div>
+
+#### image_to_image_text_guided_generation-stable_diffusion_xl
+```python
+import paddle
+from ppdiffusers import StableDiffusionXLImg2ImgPipeline
+from ppdiffusers.utils import load_image
+
+pipe = StableDiffusionXLImg2ImgPipeline.from_pretrained(
+    "stabilityai/stable-diffusion-xl-refiner-1.0",
+    paddle_dtype=paddle.float16,
+    # from_hf_hub=True,
+    # from_diffusers=True,
+    variant="fp16"
+)
+url = "https://paddlenlp.bj.bcebos.com/models/community/westfish/develop-0-19-3/000000009.png"
+init_image = load_image(url).convert("RGB")
+prompt = "a photo of an astronaut riding a horse on mars"
+image = pipe(prompt, image=init_image).images[0]
+image.save('sdxl_image2image.png')
+```
+<div align="center">
+<img width="300" alt="image" src="https://github.com/PaddlePaddle/PaddleMIX/assets/35400185/41bd9381-2799-4bed-a5e2-ba312a2f8da9">
+<center>原图像</center>
+<img width="300" alt="image" src="https://github.com/PaddlePaddle/PaddleMIX/assets/35400185/db672d03-2e3a-46ac-97fd-d80cca18dbbe">
+<center>生成图像</center>
+</div>
+
+#### image_to_image_text_guided_generation-kandinsky2_2
+```python
+import paddle
+
+from ppdiffusers import KandinskyV22Img2ImgPipeline, KandinskyV22PriorPipeline
+from ppdiffusers.utils import load_image
+
+pipe_prior = KandinskyV22PriorPipeline.from_pretrained(
+    "kandinsky-community/kandinsky-2-2-prior", paddle_dtype=paddle.float16
+)
+prompt = "A red cartoon frog, 4k"
+image_emb, zero_image_emb = pipe_prior(prompt, return_dict=False)
+pipe = KandinskyV22Img2ImgPipeline.from_pretrained(
+    "kandinsky-community/kandinsky-2-2-decoder", paddle_dtype=paddle.float16
+)
+
+init_image = load_image(
+    "https://hf-mirror.com/datasets/hf-internal-testing/diffusers-images/resolve/main/kandinsky/frog.png"
+)
+image = pipe(
+    image=init_image,
+    image_embeds=image_emb,
+    negative_image_embeds=zero_image_emb,
+    height=768,
+    width=768,
+    num_inference_steps=100,
+    strength=0.2,
+).images
+image[0].save("image_to_image_text_guided_generation-kandinsky2_2-result-red_frog.png")
+```
+<div align="center">
+<img width="300" alt="image" src="https://github.com/PaddlePaddle/PaddleMIX/assets/35400185/aae57109-94ad-408e-ae75-8cce650cebe5">
+<center>原图像</center>
+<img width="300" alt="image" src="https://github.com/PaddlePaddle/PaddleMIX/assets/35400185/23cf2c4e-416f-4f21-82a6-e57de11b5e83">
+<center>生成图像</center>
+</div>
+
+</details>
+</details>
+
+<details><summary>&emsp;文本图像双引导图像生成（Dual Text and Image Guided Generation）</summary>
+
+#### dual_text_and_image_guided_generation-versatile_diffusion
+```python
+from ppdiffusers import VersatileDiffusionDualGuidedPipeline
+from ppdiffusers.utils import load_image
+
+url = "https://paddlenlp.bj.bcebos.com/models/community/CompVis/data/benz.jpg"
+image = load_image(url)
+text = "a red car in the sun"
+
+pipe = VersatileDiffusionDualGuidedPipeline.from_pretrained("shi-labs/versatile-diffusion")
+pipe.remove_unused_weights()
+
+text_to_image_strength = 0.75
+image = pipe(prompt=text, image=image, text_to_image_strength=text_to_image_strength).images[0]
+image.save("versatile-diffusion-red_car.png")
+```
+<div align="center">
+<img width="300" alt="image" src="https://user-images.githubusercontent.com/20476674/209325965-2475e9c4-a524-4970-8498-dfe10ff9cf24.jpg" >
+<center>原图像</center>
+<img width="300" alt="image" src="https://user-images.githubusercontent.com/20476674/209325293-049098d0-d591-4abc-b151-9291ac2636da.png">
+<center>生成图像</center>
+</div>
+</details>
+
+### 文本视频多模
+
+<details open>
+<summary>&emsp;文本条件的视频生成（Text-to-Video Generation）</summary>
+
+#### text_to_video_generation-lvdm
+
+```python
+import paddle
+
+from ppdiffusers import LVDMTextToVideoPipeline
+
+# 加载模型和scheduler
+pipe = LVDMTextToVideoPipeline.from_pretrained("westfish/lvdm_text2video_orig_webvid_2m")
+
+# 执行pipeline进行推理
+seed = 2013
+generator = paddle.Generator().manual_seed(seed)
+samples = pipe(
+    prompt="cutting in kitchen",
+    num_frames=16,
+    height=256,
+    width=256,
+    num_inference_steps=50,
+    generator=generator,
+    guidance_scale=15,
+    eta=1,
+    save_dir=".",
+    save_name="text_to_video_generation-lvdm-result-ddim_lvdm_text_to_video_ucf",
+    encoder_type="2d",
+    scale_factor=0.18215,
+    shift_factor=0,
+)
+```
+<div align="center">
+<img width="300" alt="image" src="https://user-images.githubusercontent.com/20476674/270906907-2b9d53c1-0272-4c7a-81b2-cd962d23bbee.gif">
+</div>
+
+#### text_to_video_generation-synth
+
+```python
+import imageio
+
+from ppdiffusers import DPMSolverMultistepScheduler, TextToVideoSDPipeline
+
+pipe = TextToVideoSDPipeline.from_pretrained("damo-vilab/text-to-video-ms-1.7b")
+pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
+
+prompt = "An astronaut riding a horse."
+video_frames = pipe(prompt, num_inference_steps=25).frames
+imageio.mimsave("text_to_video_generation-synth-result-astronaut_riding_a_horse.mp4", video_frames, fps=8)
+```
+<div align="center">
+<img width="300" alt="image" src="https://user-images.githubusercontent.com/20476674/281259277-0ebe29a3-4eba-48ee-a98b-292e60de3c98.gif">
+</div>
+
+
+#### text_to_video_generation-synth with zeroscope_v2_XL
+
+```python
+import imageio
+
+from ppdiffusers import DPMSolverMultistepScheduler, TextToVideoSDPipeline
+
+# from ppdiffusers.utils import export_to_video
+
+pipe = TextToVideoSDPipeline.from_pretrained("cerspense/zeroscope_v2_XL")
+pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
+
+prompt = "An astronaut riding a horse."
+video_frames = pipe(prompt, num_inference_steps=50, height=320, width=576, num_frames=24).frames
+imageio.mimsave("text_to_video_generation-synth-result-astronaut_riding_a_horse.mp4", video_frames, fps=8)
+```
+<div align="center">
+<img width="300" alt="image" src="https://github.com/PaddlePaddle/PaddleMIX/assets/35400185/43ebbca0-9f07-458b-809a-acf296a2539b">
+</div>
+
+#### text_to_video_generation-zero
+
+```python
+import imageio
+
+# pip install imageio[ffmpeg]
+import paddle
+
+from ppdiffusers import TextToVideoZeroPipeline
+
+model_id = "runwayml/stable-diffusion-v1-5"
+pipe = TextToVideoZeroPipeline.from_pretrained(model_id, paddle_dtype=paddle.float16)
+
+prompt = "A panda is playing guitar on times square"
+result = pipe(prompt=prompt).images
+result = [(r * 255).astype("uint8") for r in result]
+imageio.mimsave("text_to_video_generation-zero-result-panda.mp4", result, fps=4)
+```
+<div align="center">
+<img width="300" alt="image" src="https://user-images.githubusercontent.com/20476674/246779321-c2b0c2b4-e383-40c7-a4d8-f417e8062b35.gif">
+</div>
+
+</details>
+
+### 文本音频多模
+<details>
+<summary>&emsp;文本条件的音频生成（Text-to-Audio Generation）</summary>
+
+#### text_to_audio_generation-audio_ldm
+
+```python
+import paddle
+import scipy
+
+from ppdiffusers import AudioLDM2Pipeline
+
+pipe = AudioLDM2Pipeline.from_pretrained("cvssp/audioldm2", paddle_dtype=paddle.float16)
+
+prompt = "Musical constellations twinkling in the night sky, forming a cosmic melody."
+negative_prompt = "Low quality."
+audio = pipe(prompt, negative_prompt=negative_prompt, num_inference_steps=200, audio_length_in_s=10).audios[0]
+
+output_path = f"{prompt}.wav"
+# save the audio sample as a .wav file
+scipy.io.wavfile.write(output_path, rate=16000, data=audio)
+```
+<div align = "center">
+  <thead>
+  </thead>
+  <tbody>
+   <tr>
+      <td align = "center">
+      <a href="https://paddlenlp.bj.bcebos.com/models/community/paddlemix/ppdiffusers/AudioLDM2-Music.wav" rel="nofollow">
+            <img align="center" src="https://user-images.githubusercontent.com/20476674/209344877-edbf1c24-f08d-4e3b-88a4-a27e1fd0a858.png" width="200 style="max-width: 100%;"></a><br>
+      </td>
+    </tr>
+  </tbody>
+</div>
+</details>
+
+可以使用以下代码转换[huggingface](https://huggingface.co/docs/diffusers/api/pipelines/audioldm2)的模型，一键在paddle中使用
+```python
+pipe = AudioLDM2Pipeline.from_pretrained("cvssp/audioldm2-music", from_hf_hub=True, from_diffusers=True).save_pretrained("cvssp/audioldm2-music")
+```
+### 图像
+
+<details><summary>&emsp;无条件图像生成（Unconditional Image Generation）</summary>
+
+#### unconditional_image_generation-latent_diffusion_uncond
+
+```python
+from ppdiffusers import LDMPipeline
+
+# 加载模型和scheduler
+pipe = LDMPipeline.from_pretrained("CompVis/ldm-celebahq-256")
+
+# 执行pipeline进行推理
+image = pipe(num_inference_steps=200).images[0]
+
+# 保存图片
+image.save("ldm_generated_image.png")
+```
+<div align="center">
+<img width="300" alt="image" src="https://user-images.githubusercontent.com/20476674/209327936-7fe914e0-0ea0-4e21-a433-24eaed6ee94c.png">
+</div>
+</details>
+
+<details><summary>&emsp;超分（Super Superresolution）</summary>
+
+#### super_resolution-latent_diffusion
+```python
+import paddle
+
+from ppdiffusers import LDMSuperResolutionPipeline
+from ppdiffusers.utils import load_image
+
+# 加载pipeline
+pipe = LDMSuperResolutionPipeline.from_pretrained("CompVis/ldm-super-resolution-4x-openimages")
+
+# 下载初始图片
+url = "https://paddlenlp.bj.bcebos.com/models/community/CompVis/stable-diffusion-v1-4/overture-creations.png"
+
+init_image = load_image(url).resize((128, 128))
+init_image.save("original-image.png")
+
+# 使用fp16加快生成速度
+with paddle.amp.auto_cast(True):
+    image = pipe(init_image, num_inference_steps=100, eta=1).images[0]
+
+image.save("super-resolution-image.png")
+```
+<div align="center">
+<img  alt="image" src="https://user-images.githubusercontent.com/20476674/209328660-9700fdc3-72b3-43bd-9a00-23b370ba030b.png">
+<center>原图像</center>
+<img  alt="image" src="https://user-images.githubusercontent.com/20476674/209328479-4eaea5d8-aa4a-4f31-aa2a-b47e3c730f15.png">
+<center>生成图像</center>
+</div>
+</details>
+
+
+<details><summary>&emsp;图像编辑（Image Inpainting）</summary>
+
+#### image_inpainting-repaint
+```python
+from ppdiffusers import RePaintPipeline, RePaintScheduler
+from ppdiffusers.utils import load_image
+
+img_url = "https://paddlenlp.bj.bcebos.com/models/community/CompVis/data/celeba_hq_256.png"
+mask_url = "https://paddlenlp.bj.bcebos.com/models/community/CompVis/data/mask_256.png"
+
+# Load the original image and the mask as PIL images
+original_image = load_image(img_url).resize((256, 256))
+mask_image = load_image(mask_url).resize((256, 256))
+
+scheduler = RePaintScheduler.from_pretrained("google/ddpm-ema-celebahq-256", subfolder="scheduler")
+pipe = RePaintPipeline.from_pretrained("google/ddpm-ema-celebahq-256", scheduler=scheduler)
+
+output = pipe(
+    original_image=original_image,
+    mask_image=mask_image,
+    num_inference_steps=250,
+    eta=0.0,
+    jump_length=10,
+    jump_n_sample=10,
+)
+inpainted_image = output.images[0]
+
+inpainted_image.save("repaint-image.png")
+```
+<div align="center">
+<img  alt="image" src="https://user-images.githubusercontent.com/20476674/209329052-b6fc2aaf-1a59-49a3-92ef-60180fdffd81.png">
+<center>原图像</center>
+<img  alt="image" src="https://user-images.githubusercontent.com/20476674/209329048-4fe12176-32a0-4800-98f2-49bd8d593799.png">
+<center>mask图像</center>
+<img  alt="image" src="https://user-images.githubusercontent.com/20476674/209329241-b7e4d99e-468a-4b95-8829-d77ee14bfe98.png">
+<center>生成图像</center>
+</div>
+</details>
+
+
+
+<details><summary>&emsp;图像变化（Image Variation）</summary>
+
+#### image_variation-versatile_diffusion
+```python
+from ppdiffusers import VersatileDiffusionImageVariationPipeline
+from ppdiffusers.utils import load_image
+
+url = "https://paddlenlp.bj.bcebos.com/models/community/CompVis/data/benz.jpg"
+image = load_image(url)
+
+pipe = VersatileDiffusionImageVariationPipeline.from_pretrained("shi-labs/versatile-diffusion")
+
+image = pipe(image).images[0]
+image.save("versatile-diffusion-car_variation.png")
+```
+<div align="center">
+<img  width="300" alt="image" src="https://user-images.githubusercontent.com/20476674/209331434-51f6cdbd-b8e4-4faa-8e49-1cc852e35603.jpg">
+<center>原图像</center>
+<img  width="300" alt="image" src="https://user-images.githubusercontent.com/20476674/209331591-f6cc4cd8-8430-4627-8d22-bf404fb2bfdd.png">
+<center>生成图像</center>
+</div>
+</details>
+
+
+
+
+
+### 音频
+<details>
+<summary>&emsp;无条件音频生成（Unconditional Audio Generation）</summary>
+
+#### unconditional_audio_generation-audio_diffusion
+
+```python
+from scipy.io.wavfile import write
+from ppdiffusers import AudioDiffusionPipeline
+import paddle
+
+# 加载模型和scheduler
+pipe = AudioDiffusionPipeline.from_pretrained("teticio/audio-diffusion-ddim-256")
+pipe.set_progress_bar_config(disable=None)
+generator = paddle.Generator().manual_seed(42)
+
+output = pipe(generator=generator)
+audio = output.audios[0]
+image = output.images[0]
+
+# 保存音频到本地
+for i, audio in enumerate(audio):
+    write(f"audio_diffusion_test{i}.wav", pipe.mel.config.sample_rate, audio.transpose())
+
+# 保存图片
+image.save("audio_diffusion_test.png")
+```
+<div align = "center">
+  <thead>
+  </thead>
+  <tbody>
+   <tr>
+      <td align = "center">
+      <a href="https://paddlenlp.bj.bcebos.com/models/community/teticio/data/audio_diffusion_test0.wav" rel="nofollow">
+            <img align="center" src="https://user-images.githubusercontent.com/20476674/209344877-edbf1c24-f08d-4e3b-88a4-a27e1fd0a858.png" width="200 style="max-width: 100%;"></a><br>
+      </td>
+    </tr>
+  </tbody>
+</div>
+
+<div align="center">
+<img  width="300" alt="image" src="https://user-images.githubusercontent.com/20476674/209342125-93e8715e-895b-4115-9e1e-e65c6c2cd95a.png">
+</div>
+
+
+#### unconditional_audio_generation-spectrogram_diffusion
+
+```python
+import paddle
+import scipy
+
+from ppdiffusers import MidiProcessor, SpectrogramDiffusionPipeline
+from ppdiffusers.utils.download_utils import ppdiffusers_url_download
+
+# Download MIDI from: wget https://paddlenlp.bj.bcebos.com/models/community/junnyu/develop/beethoven_hammerklavier_2.mid
+mid_file_path = ppdiffusers_url_download(
+    "https://paddlenlp.bj.bcebos.com/models/community/junnyu/develop/beethoven_hammerklavier_2.mid", cache_dir="."
+)
+pipe = SpectrogramDiffusionPipeline.from_pretrained("google/music-spectrogram-diffusion", paddle_dtype=paddle.float16)
+processor = MidiProcessor()
+output = pipe(processor(mid_file_path))
+audio = output.audios[0]
+
+output_path = "unconditional_audio_generation-spectrogram_diffusion-result-beethoven_hammerklavier_2.wav"
+# save the audio sample as a .wav file
+scipy.io.wavfile.write(output_path, rate=16000, data=audio)
+```
+<div align = "center">
+  <thead>
+  </thead>
+  <tbody>
+   <tr>
+      <td align = "center">
+      <a href="https://paddlenlp.bj.bcebos.com/models/community/westfish/develop_ppdiffusers_data/beethoven_hammerklavier_2.wav" rel="nofollow">
+            <img align="center" src="https://user-images.githubusercontent.com/20476674/209344877-edbf1c24-f08d-4e3b-88a4-a27e1fd0a858.png" width="200 style="max-width: 100%;"></a><br>
+      </td>
+    </tr>
+  </tbody>
+</div>
+</details>
+
+
+
+## License
+PPDiffusers 遵循 [Apache-2.0开源协议](https://github.com/PaddlePaddle/PaddleMIX/blob/develop/ppdiffusers/LICENSE)。
+
+Stable Diffusion 遵循 [The CreativeML OpenRAIL M 开源协议](https://huggingface.co/spaces/CompVis/stable-diffusion-license)。
+> The CreativeML OpenRAIL M is an [Open RAIL M license](https://www.licenses.ai/blog/2022/8/18/naming-convention-of-responsible-ai-licenses), adapted from the work that [BigScience](https://bigscience.huggingface.co/) and [the RAIL Initiative](https://www.licenses.ai/) are jointly carrying in the area of responsible AI licensing. See also [the article about the BLOOM Open RAIL license](https://bigscience.huggingface.co/blog/the-bigscience-rail-license) on which this license is based.
+
+Stable Diffusion 3遵循 [Stability Community 开源协议](https://stability.ai/license)。
+> Community License: Free for research, non-commercial, and commercial use for organisations or individuals with less than $1M annual revenue. You only need a paid Enterprise license if your yearly revenues exceed USD$1M and you use Stability AI models in commercial products or services. Read more: https://stability.ai/license
+
+## Acknowledge
+我们借鉴了🤗 Hugging Face的[Diffusers](https://github.com/huggingface/diffusers)关于预训练扩散模型使用的优秀设计，在此对Hugging Face作者及其开源社区表示感谢。
+
+## Citation
+
+```bibtex
+@misc{ppdiffusers,
+  author = {PaddlePaddle Authors},
+  title = {PPDiffusers: State-of-the-art diffusion model toolkit based on PaddlePaddle},
+  year = {2022},
+  publisher = {GitHub},
+  journal = {GitHub repository},
+  howpublished = {\url{https://github.com/PaddlePaddle/PaddleMIX/tree/develop/ppdiffusers}}
+}
+```
diff --git a/VLMEvalKit_old/PaddleMIX/ppdiffusers/deploy/controlnet/export_model.py b/VLMEvalKit_old/PaddleMIX/ppdiffusers/deploy/controlnet/export_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..05a4c0e5335b5714dc1fc6f658431d0c36ec5a34
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/ppdiffusers/deploy/controlnet/export_model.py
@@ -0,0 +1,263 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import argparse
+import os
+
+# set USE_PPXFORMERS=False to avoid using ppxformers
+os.environ["USE_PPXFORMERS"] = "False"
+from pathlib import Path
+from types import MethodType
+
+import paddle
+
+from ppdiffusers import (
+    ControlNetModel,
+    PaddleInferRuntimeModel,
+    PaddleInferStableDiffusionControlNetPipeline,
+    StableDiffusionControlNetPipeline,
+    UNet2DConditionModel,
+)
+
+
+class ControlNetWithUnetModel(paddle.nn.Layer):
+    def __init__(
+        self,
+        unet,
+        controlnet,
+    ):
+        super().__init__()
+        self.unet = unet
+        self.controlnet = controlnet
+
+    def forward(
+        self,
+        sample,
+        timestep,
+        encoder_hidden_states,
+        controlnet_cond,
+        controlnet_conditioning_scale,
+        return_dict=True,
+    ):
+        down_block_res_samples, mid_block_res_sample = self.controlnet(
+            sample,
+            timestep,
+            encoder_hidden_states=encoder_hidden_states,
+            controlnet_cond=controlnet_cond,
+            conditioning_scale=controlnet_conditioning_scale,
+            return_dict=False,
+        )
+
+        noise_pred = self.unet(
+            sample,
+            timestep,
+            encoder_hidden_states=encoder_hidden_states,
+            down_block_additional_residuals=down_block_res_samples,
+            mid_block_additional_residual=mid_block_res_sample,
+            return_dict=return_dict,
+        )
+        return noise_pred
+
+
+def convert_ppdiffusers_pipeline_to_paddleinfer_pipeline(
+    model_path: str,
+    controlnet_model_path: str,
+    output_path: str,
+    sample: bool = False,
+    height: int = None,
+    width: int = None,
+):
+    unet_tmp = UNet2DConditionModel.from_pretrained(model_path, resnet_pre_temb_non_linearity=False, subfolder="unet")
+    controlnet_tmp = ControlNetModel.from_pretrained(controlnet_model_path, resnet_pre_temb_non_linearity=False)
+
+    pipeline = StableDiffusionControlNetPipeline.from_pretrained(
+        model_path,
+        unet=unet_tmp,
+        controlnet=controlnet_tmp,
+        safety_checker=None,
+        feature_extractor=None,
+        requires_safety_checker=False,
+    )
+    output_path = Path(output_path)
+    # calculate latent's H and W
+    latent_height = height // 8 if height is not None else None
+    latent_width = width // 8 if width is not None else None
+    # get arguments
+    cross_attention_dim = pipeline.unet.config.cross_attention_dim  # 768 or 1024 or 1280
+    unet_channels = pipeline.unet.config.in_channels  # 4
+    vae_in_channels = pipeline.vae.config.in_channels  # 3
+    vae_latent_channels = pipeline.vae.config.latent_channels  # 4
+    print(
+        f"cross_attention_dim: {cross_attention_dim}\n",
+        f"unet_in_channels: {unet_channels}\n",
+        f"vae_encoder_in_channels: {vae_in_channels}\n",
+        f"vae_decoder_latent_channels: {vae_latent_channels}",
+    )
+    # 1. Convert text_encoder
+    text_encoder = paddle.jit.to_static(
+        pipeline.text_encoder,
+        input_spec=[paddle.static.InputSpec(shape=[None, None], dtype="int64", name="input_ids")],  # input_ids
+    )
+    save_path = os.path.join(args.output_path, "text_encoder", "inference")
+    paddle.jit.save(text_encoder, save_path)
+    print(f"Save text_encoder model in {save_path} successfully.")
+    del pipeline.text_encoder
+
+    # wrap unet + controlnet
+    new_unet = ControlNetWithUnetModel(unet=pipeline.unet, controlnet=pipeline.controlnet)
+
+    # 2. Convert unet
+    unet = paddle.jit.to_static(
+        new_unet,
+        input_spec=[
+            paddle.static.InputSpec(
+                shape=[None, unet_channels, latent_height, latent_width],
+                dtype="float32",
+                name="sample",
+            ),  # sample
+            paddle.static.InputSpec(shape=[1], dtype="float32", name="timestep"),  # timestep
+            paddle.static.InputSpec(
+                shape=[None, None, cross_attention_dim],
+                dtype="float32",
+                name="encoder_hidden_states",
+            ),  # encoder_hidden_states
+            paddle.static.InputSpec(
+                shape=[None, vae_in_channels, height, width],
+                dtype="float32",
+                name="controlnet_cond",
+            ),  # controlnet_cond
+            paddle.static.InputSpec(
+                shape=[len(pipeline.unet.config.block_out_channels) * 3 + 1],
+                dtype="float32",
+                name="controlnet_conditioning_scale",
+            ),  # controlnet_conditioning_scale
+        ],
+    )
+
+    save_path = os.path.join(args.output_path, "unet", "inference")
+    paddle.jit.save(unet, save_path)
+    print(f"Save unet model in {save_path} successfully.")
+    del pipeline.unet
+    del new_unet
+
+    def forward_vae_encoder_mode(self, z):
+        return self.encode(z, True).latent_dist.mode()
+
+    def forward_vae_encoder_sample(self, z):
+        return self.encode(z, True).latent_dist.sample()
+
+    # 3. Convert vae encoder
+    vae_encoder = pipeline.vae
+    if sample:
+        vae_encoder.forward = MethodType(forward_vae_encoder_sample, vae_encoder)
+    else:
+        vae_encoder.forward = MethodType(forward_vae_encoder_mode, vae_encoder)
+
+    vae_encoder = paddle.jit.to_static(
+        vae_encoder,
+        input_spec=[
+            paddle.static.InputSpec(
+                shape=[None, vae_in_channels, height, width],
+                dtype="float32",
+                name="sample",  # N, C, H, W
+            ),  # latent
+        ],
+    )
+    # Save vae_encoder in static graph model.
+    save_path = os.path.join(args.output_path, "vae_encoder", "inference")
+    paddle.jit.save(vae_encoder, save_path)
+    print(f"Save vae_encoder model in {save_path} successfully.")
+
+    # 4. Convert vae encoder
+    vae_decoder = pipeline.vae
+
+    def forward_vae_decoder(self, z):
+        return self.decode(z, True).sample
+
+    vae_decoder.forward = MethodType(forward_vae_decoder, vae_decoder)
+    vae_decoder = paddle.jit.to_static(
+        vae_decoder,
+        input_spec=[
+            paddle.static.InputSpec(
+                shape=[None, vae_latent_channels, latent_height, latent_width],
+                dtype="float32",
+                name="latent_sample",
+            ),  # latent_sample
+        ],
+    )
+    # Save vae_decoder in static graph model.
+    save_path = os.path.join(args.output_path, "vae_decoder", "inference")
+    paddle.jit.save(vae_decoder, save_path)
+    print(f"Save vae_decoder model in {save_path} successfully.")
+    del pipeline.vae
+
+    paddleinfer_pipeline = PaddleInferStableDiffusionControlNetPipeline(
+        vae_encoder=PaddleInferRuntimeModel.from_pretrained(output_path / "vae_encoder"),
+        vae_decoder=PaddleInferRuntimeModel.from_pretrained(output_path / "vae_decoder"),
+        text_encoder=PaddleInferRuntimeModel.from_pretrained(output_path / "text_encoder"),
+        unet=PaddleInferRuntimeModel.from_pretrained(output_path / "unet"),
+        tokenizer=pipeline.tokenizer,
+        scheduler=pipeline.scheduler,
+        safety_checker=None,
+        feature_extractor=None,
+        image_encoder=None,
+        requires_safety_checker=False,
+    )
+    paddleinfer_pipeline.save_pretrained(str(output_path))
+    print("PaddleInfer pipeline saved to", output_path)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument(
+        "--pretrained_model_name_or_path",
+        type=str,
+        default="runwayml/stable-diffusion-v1-5",
+        help="Path to the `ppdiffusers` checkpoint to convert (either a local directory or on the bos).",
+    )
+    parser.add_argument(
+        "--controlnet_pretrained_model_name_or_path",
+        type=str,
+        default="lllyasviel/sd-controlnet-canny",
+        help="Path to the `ppdiffusers` controlnet_pretrained_model_name_or_path  checkpoint to convert (either a local directory or on the bos).",
+    )
+    parser.add_argument("--output_path", type=str, required=True, help="Path to the output model.")
+    parser.add_argument(
+        "--sample",
+        action="store_true",
+        default=False,
+        help="Export the vae encoder in mode or sample",
+    )
+    parser.add_argument(
+        "--height",
+        type=int,
+        default=None,
+        help="The height of output images. Default: None",
+    )
+    parser.add_argument(
+        "--width",
+        type=int,
+        default=None,
+        help="The width of output images. Default: None",
+    )
+    args = parser.parse_args()
+
+    convert_ppdiffusers_pipeline_to_paddleinfer_pipeline(
+        args.pretrained_model_name_or_path,
+        args.controlnet_pretrained_model_name_or_path,
+        args.output_path,
+        args.sample,
+        args.height,
+        args.width,
+    )
diff --git a/VLMEvalKit_old/PaddleMIX/ppdiffusers/deploy/controlnet/scripts/benchmark_paddle.sh b/VLMEvalKit_old/PaddleMIX/ppdiffusers/deploy/controlnet/scripts/benchmark_paddle.sh
new file mode 100644
index 0000000000000000000000000000000000000000..babde7cd92a54bcb31ab4e4c89e1c7c2017e33f4
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/ppdiffusers/deploy/controlnet/scripts/benchmark_paddle.sh
@@ -0,0 +1,32 @@
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# attention raw fp16
+python infer_dygraph_paddle.py --scheduler "ddim" --task_name all --attention_type raw --use_fp16 True --inference_steps 50 --height 512 --width 512 --benchmark_steps 10
+
+# attention cutlass fp16
+python infer_dygraph_paddle.py --scheduler "ddim" --task_name all --attention_type cutlass --use_fp16 True --inference_steps 50 --height 512 --width 512 --benchmark_steps 10
+
+# attention flash fp16
+python infer_dygraph_paddle.py --scheduler "ddim" --task_name all --attention_type flash --use_fp16 True --inference_steps 50 --height 512 --width 512 --benchmark_steps 10
+
+
+# attention raw fp32
+python infer_dygraph_paddle.py --scheduler "ddim" --task_name all --attention_type raw --use_fp16 False --inference_steps 50 --height 512 --width 512 --benchmark_steps 10
+
+# attention cutlass fp32
+python infer_dygraph_paddle.py --scheduler "ddim" --task_name all --attention_type cutlass --use_fp16 False --inference_steps 50 --height 512 --width 512 --benchmark_steps 10
+
+# attention flash fp32
+python infer_dygraph_paddle.py --scheduler "ddim" --task_name all --attention_type flash --use_fp16 False --inference_steps 50 --height 512 --width 512 --benchmark_steps 10
diff --git a/VLMEvalKit_old/PaddleMIX/ppdiffusers/deploy/controlnet/scripts/benchmark_torch.sh b/VLMEvalKit_old/PaddleMIX/ppdiffusers/deploy/controlnet/scripts/benchmark_torch.sh
new file mode 100644
index 0000000000000000000000000000000000000000..40eb9bc45707a567eb68415727060bdf1344c5cc
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/ppdiffusers/deploy/controlnet/scripts/benchmark_torch.sh
@@ -0,0 +1,26 @@
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# attention raw
+python infer_dygraph_torch.py --scheduler "ddim" --task_name all --attention_type raw --use_fp16 True --inference_steps 50 --height 512 --width 512 --benchmark_steps 10
+
+# attention sdp
+python infer_dygraph_torch.py --scheduler "ddim" --task_name all --attention_type sdp --use_fp16 True --inference_steps 50 --height 512 --width 512 --benchmark_steps 10
+
+
+# attention raw fp32
+python infer_dygraph_torch.py --scheduler "ddim" --task_name all --attention_type raw --use_fp16 False --inference_steps 50 --height 512 --width 512 --benchmark_steps 10
+
+# attention sdp fp32
+python infer_dygraph_torch.py --scheduler "ddim" --task_name all --attention_type sdp --use_fp16 False --inference_steps 50 --height 512 --width 512 --benchmark_steps 10
\ No newline at end of file
diff --git a/VLMEvalKit_old/PaddleMIX/ppdiffusers/deploy/controlnet/scripts/inference.sh b/VLMEvalKit_old/PaddleMIX/ppdiffusers/deploy/controlnet/scripts/inference.sh
new file mode 100644
index 0000000000000000000000000000000000000000..24541c8f5297b87a28c1c343f1addd9608a558e8
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/ppdiffusers/deploy/controlnet/scripts/inference.sh
@@ -0,0 +1,26 @@
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# ==============================================================================
+# use paddle as backend to inference static model is not fast,
+# this script is used to make sure the inference is correct.
+# ==============================================================================
+# text2img
+python infer.py --model_dir static_model/stable-diffusion-v1-5-canny/ --scheduler "ddim" --backend paddle --device gpu --task_name text2img
+
+# img2img
+python infer.py --model_dir static_model/stable-diffusion-v1-5-canny/ --scheduler "ddim" --backend paddle --device gpu --task_name img2img
+
+# inpaint
+python infer.py --model_dir static_model/stable-diffusion-v1-5-canny/ --scheduler "ddim" --backend paddle --device gpu --task_name inpaint_legacy
\ No newline at end of file
diff --git a/VLMEvalKit_old/PaddleMIX/ppdiffusers/deploy/sd15/export_model.py b/VLMEvalKit_old/PaddleMIX/ppdiffusers/deploy/sd15/export_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..796b2c99ac368056563c192e6e92cf18c46ccb3e
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/ppdiffusers/deploy/sd15/export_model.py
@@ -0,0 +1,205 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import os
+
+# set USE_PPXFORMERS=False to avoid using ppxformers
+os.environ["USE_PPXFORMERS"] = "False"
+from pathlib import Path
+from types import MethodType
+
+import paddle
+from unet_2d_condition_housing import UNet2DConditionModelSDHousing
+
+from ppdiffusers import (
+    PaddleInferRuntimeModel,
+    PaddleInferStableDiffusionInpaintPipeline,
+    PaddleInferStableDiffusionMegaPipeline,
+    StableDiffusionPipeline,
+)
+
+
+def convert_ppdiffusers_pipeline_to_paddleinfer_pipeline(
+    model_path: str,
+    output_path: str,
+    sample: bool = False,
+    height: int = None,
+    width: int = None,
+):
+    # specify unet model with unet pre_temb_act opt enabled.
+    unet_model = UNet2DConditionModelSDHousing.from_pretrained(
+        model_path, resnet_pre_temb_non_linearity=False, subfolder="unet"
+    )
+    pipeline = StableDiffusionPipeline.from_pretrained(
+        model_path,
+        unet=unet_model,
+        safety_checker=None,
+    )
+    output_path = Path(output_path)
+    # calculate latent's H and W
+    latent_height = height // 8 if height is not None else None
+    latent_width = width // 8 if width is not None else None
+    # get arguments
+    cross_attention_dim = pipeline.unet.config.cross_attention_dim  # 768 or 1024 or 1280
+    unet_channels = pipeline.unet.config.in_channels  # 4 or 9
+    vae_in_channels = pipeline.vae.config.in_channels  # 3
+    vae_latent_channels = pipeline.vae.config.latent_channels  # 4
+    print(
+        f"cross_attention_dim: {cross_attention_dim}\n",
+        f"unet_in_channels: {unet_channels}\n",
+        f"vae_encoder_in_channels: {vae_in_channels}\n",
+        f"vae_decoder_latent_channels: {vae_latent_channels}",
+    )
+    # 1. Convert text_encoder
+    text_encoder = paddle.jit.to_static(
+        pipeline.text_encoder,
+        input_spec=[paddle.static.InputSpec(shape=[None, None], dtype="int64", name="input_ids")],  # input_ids
+    )
+    save_path = os.path.join(args.output_path, "text_encoder", "inference")
+    paddle.jit.save(text_encoder, save_path)
+    print(f"Save text_encoder model in {save_path} successfully.")
+    del pipeline.text_encoder
+
+    # 2. Convert unet
+    unet = paddle.jit.to_static(
+        pipeline.unet,
+        input_spec=[
+            paddle.static.InputSpec(
+                shape=[None, unet_channels, latent_height, latent_width],
+                dtype="float32",
+                name="sample",
+            ),  # sample
+            paddle.static.InputSpec(shape=[1], dtype="float32", name="timestep"),  # timestep
+            paddle.static.InputSpec(
+                shape=[None, None, cross_attention_dim],
+                dtype="float32",
+                name="encoder_hidden_states",
+            ),  # encoder_hidden_states
+        ],
+    )
+    save_path = os.path.join(args.output_path, "unet", "inference")
+    paddle.jit.save(unet, save_path)
+    print(f"Save unet model in {save_path} successfully.")
+    del pipeline.unet
+
+    def forward_vae_encoder_mode(self, z):
+        return self.encode(z, True).latent_dist.mode()
+
+    def forward_vae_encoder_sample(self, z):
+        return self.encode(z, True).latent_dist.sample()
+
+    # 3. Convert vae encoder
+    vae_encoder = pipeline.vae
+    if sample:
+        vae_encoder.forward = MethodType(forward_vae_encoder_sample, vae_encoder)
+    else:
+        vae_encoder.forward = MethodType(forward_vae_encoder_mode, vae_encoder)
+
+    vae_encoder = paddle.jit.to_static(
+        vae_encoder,
+        input_spec=[
+            paddle.static.InputSpec(
+                shape=[None, vae_in_channels, height, width],
+                dtype="float32",
+                name="sample",  # N, C, H, W
+            ),  # latent
+        ],
+    )
+    # Save vae_encoder in static graph model.
+    save_path = os.path.join(args.output_path, "vae_encoder", "inference")
+    paddle.jit.save(vae_encoder, save_path)
+    print(f"Save vae_encoder model in {save_path} successfully.")
+
+    # 4. Convert vae encoder
+    vae_decoder = pipeline.vae
+
+    def forward_vae_decoder(self, z):
+        return self.decode(z, True).sample
+
+    vae_decoder.forward = MethodType(forward_vae_decoder, vae_decoder)
+    vae_decoder = paddle.jit.to_static(
+        vae_decoder,
+        input_spec=[
+            paddle.static.InputSpec(
+                shape=[None, vae_latent_channels, latent_height, latent_width],
+                dtype="float32",
+                name="latent_sample",
+            ),  # latent_sample
+        ],
+    )
+    # Save vae_decoder in static graph model.
+    save_path = os.path.join(args.output_path, "vae_decoder", "inference")
+    paddle.jit.save(vae_decoder, save_path)
+    print(f"Save vae_decoder model in {save_path} successfully.")
+    del pipeline.vae
+
+    if "inpainting" in model_path:
+        fd_pipe_cls = PaddleInferStableDiffusionInpaintPipeline
+    else:
+        fd_pipe_cls = PaddleInferStableDiffusionMegaPipeline
+
+    paddleinfer_pipeline = fd_pipe_cls(
+        vae_encoder=PaddleInferRuntimeModel.from_pretrained(output_path / "vae_encoder"),
+        vae_decoder=PaddleInferRuntimeModel.from_pretrained(output_path / "vae_decoder"),
+        text_encoder=PaddleInferRuntimeModel.from_pretrained(output_path / "text_encoder"),
+        unet=PaddleInferRuntimeModel.from_pretrained(output_path / "unet"),
+        tokenizer=pipeline.tokenizer,
+        scheduler=pipeline.scheduler,
+        feature_extractor=pipeline.feature_extractor,
+        image_encoder=None,
+        safety_checker=None,
+        requires_safety_checker=False,
+    )
+    paddleinfer_pipeline.save_pretrained(str(output_path))
+    print("PaddleInfer pipeline saved to", output_path)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument(
+        "--pretrained_model_name_or_path",
+        type=str,
+        required=True,
+        help="Path to the `ppdiffusers` checkpoint to convert (either a local directory or on the bos).",
+    )
+    parser.add_argument("--output_path", type=str, required=True, help="Path to the output model.")
+    parser.add_argument(
+        "--sample",
+        action="store_true",
+        default=False,
+        help="Export the vae encoder in mode or sample",
+    )
+    parser.add_argument(
+        "--height",
+        type=int,
+        default=None,
+        help="The height of output images. Default: None",
+    )
+    parser.add_argument(
+        "--width",
+        type=int,
+        default=None,
+        help="The width of output images. Default: None",
+    )
+    args = parser.parse_args()
+
+    convert_ppdiffusers_pipeline_to_paddleinfer_pipeline(
+        args.pretrained_model_name_or_path,
+        args.output_path,
+        args.sample,
+        args.height,
+        args.width,
+    )
diff --git a/VLMEvalKit_old/PaddleMIX/ppdiffusers/deploy/sd15/infer.py b/VLMEvalKit_old/PaddleMIX/ppdiffusers/deploy/sd15/infer.py
new file mode 100644
index 0000000000000000000000000000000000000000..0ad63f98f50ed7cdd9f4f9c23476db3346fff131
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/ppdiffusers/deploy/sd15/infer.py
@@ -0,0 +1,408 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import os
+import time
+
+# isort: split
+import paddle
+import paddle.inference as paddle_infer
+
+# isort: split
+import numpy as np
+from paddlenlp.trainer.argparser import strtobool
+from tqdm.auto import trange
+
+from ppdiffusers import (  # noqa
+    DiffusionPipeline,
+    PaddleInferStableDiffusionMegaPipeline,
+)
+from ppdiffusers.utils import load_image
+
+
+def parse_arguments():
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--model_dir",
+        default="runwayml/stable-diffusion-v1-5@paddleinfer",
+        help="The model directory of diffusion_model.",
+    )
+    parser.add_argument(
+        "--inference_steps",
+        type=int,
+        default=50,
+        help="The number of unet inference steps.",
+    )
+    parser.add_argument(
+        "--benchmark_steps",
+        type=int,
+        default=10,
+        help="The number of performance benchmark steps.",
+    )
+    parser.add_argument(
+        "--backend",
+        type=str,
+        default="paddle_tensorrt",
+        choices=["paddle", "paddle_tensorrt"],
+        help="The inference runtime backend of unet model and text encoder model.",
+    )
+    parser.add_argument(
+        "--device",
+        type=str,
+        default="gpu",
+        choices=[
+            "cpu",
+            "gpu",
+            "huawei_ascend_npu",
+            "kunlunxin_xpu",
+        ],
+        help="The inference runtime device of models.",
+    )
+    parser.add_argument(
+        "--task_name",
+        type=str,
+        default="text2img",
+        choices=[
+            "text2img",
+            "img2img",
+            "inpaint_legacy",
+            "all",
+        ],
+        help="The task can be one of [text2img, img2img, inpaint_legacy, all]. ",
+    )
+    parser.add_argument(
+        "--parse_prompt_type",
+        type=str,
+        default="lpw",
+        choices=[
+            "raw",
+            "lpw",
+        ],
+        help="The parse_prompt_type can be one of [raw, lpw]. ",
+    )
+    parser.add_argument("--use_fp16", type=strtobool, default=True, help="Wheter to use FP16 mode")
+    parser.add_argument("--device_id", type=int, default=0, help="The selected gpu id. -1 means use cpu")
+    parser.add_argument(
+        "--scheduler",
+        type=str,
+        default="preconfig-euler-ancestral",
+        choices=[
+            "pndm",
+            "lms",
+            "euler",
+            "euler-ancestral",
+            "preconfig-euler-ancestral",
+            "dpm-multi",
+            "dpm-single",
+            "unipc-multi",
+            "ddim",
+            "ddpm",
+            "deis-multi",
+            "heun",
+            "kdpm2-ancestral",
+            "kdpm2",
+        ],
+        help="The scheduler type of stable diffusion.",
+    )
+    parser.add_argument("--height", type=int, default=512, help="Height of input image")
+    parser.add_argument("--width", type=int, default=512, help="Width of input image")
+    parser.add_argument("--strength", type=float, default=1.0, help="Strength for img2img / inpaint")
+    parser.add_argument("--hr_resize_height", type=int, default=768, help="HR Height of input image")
+    parser.add_argument("--hr_resize_width", type=int, default=768, help="HR Width of input image")
+    parser.add_argument("--is_sd2_0", type=strtobool, default=False, help="Is sd2_0 model?")
+    parser.add_argument(
+        "--tune",
+        type=strtobool,
+        default=False,
+        help="Whether to tune the shape of tensorrt engine.",
+    )
+
+    return parser.parse_args()
+
+
+def create_paddle_inference_runtime(
+    model_dir="",
+    model_name="",
+    use_trt=False,
+    precision_mode=paddle_infer.PrecisionType.Half,
+    device_id=0,
+    disable_paddle_trt_ops=[],
+    disable_paddle_pass=[],
+    workspace=24 * 1024 * 1024 * 1024,
+    tune=False,
+):
+    config = paddle_infer.Config()
+    config.enable_memory_optim()
+    shape_file = f"{model_dir}/{model_name}/shape_range_info.pbtxt"
+    if tune:
+        config.collect_shape_range_info(shape_file)
+        config.switch_ir_optim(False)
+    else:
+        config.enable_new_executor()
+        if str(os.environ.get("FLAGS_enable_pir_in_executor")).lower() in ("true", "1"):
+            config.enable_new_ir()
+            if str(os.environ.get("FLAGS_use_cinn")).lower() in ("true", "1"):
+                config.enable_cinn()
+
+    if device_id != -1:
+        config.use_gpu()
+        config.enable_use_gpu(memory_pool_init_size_mb=2000, device_id=device_id, precision_mode=precision_mode)
+    for pass_name in disable_paddle_pass:
+        config.delete_pass(pass_name)
+    if use_trt:
+        config.enable_tensorrt_engine(
+            workspace_size=workspace,
+            precision_mode=precision_mode,
+            max_batch_size=1,
+            min_subgraph_size=3,
+            use_static=True,
+        )
+        config.enable_tensorrt_memory_optim()
+        config.enable_tuned_tensorrt_dynamic_shape(shape_file, True)
+        cache_file = os.path.join(model_dir, model_name, "_opt_cache/")
+        config.set_optim_cache_dir(cache_file)
+        if precision_mode != paddle_infer.PrecisionType.Half:
+            only_fp16_passes = [
+                "trt_cross_multihead_matmul_fuse_pass",
+                "trt_flash_multihead_matmul_fuse_pass",
+                "preln_elementwise_groupnorm_act_pass",
+                "elementwise_groupnorm_act_pass",
+            ]
+            for curr_pass in only_fp16_passes:
+                config.delete_pass(curr_pass)
+    return config
+
+
+def main(args):
+    if args.device_id == -1:
+        paddle.set_device("cpu")
+    else:
+        paddle.set_device(f"gpu:{args.device_id}")
+
+    seed = 1024
+    min_image_size = 512
+    max_image_size = 768
+    max_image_size = max(min_image_size, max_image_size)
+
+    # 4. Init runtime
+    only_fp16_passes = [
+        "trt_cross_multihead_matmul_fuse_pass",
+        "trt_flash_multihead_matmul_fuse_pass",
+        "preln_elementwise_groupnorm_act_pass",
+        "elementwise_groupnorm_act_pass",
+    ]
+    no_need_passes = [
+        "trt_prompt_tuning_embedding_eltwise_layernorm_fuse_pass",
+        "add_support_int8_pass",
+        "elementwise_groupnorm_act_pass",
+        "groupnorm_act_pass",
+        "preln_elementwise_groupnorm_act_pass",
+    ]
+    paddle_delete_passes = dict(
+        text_encoder=only_fp16_passes + no_need_passes if not args.use_fp16 else no_need_passes,
+        text_encoder_2=only_fp16_passes + no_need_passes if not args.use_fp16 else no_need_passes,
+        vae_encoder=only_fp16_passes + [] if args.use_fp16 else [],
+        vae_decoder=only_fp16_passes + no_need_passes if not args.use_fp16 else no_need_passes,
+        unet=only_fp16_passes + no_need_passes if not args.use_fp16 else no_need_passes,
+        image_encoder=only_fp16_passes + no_need_passes if not args.use_fp16 else no_need_passes,
+    )
+    args.use_trt = args.backend == "paddle_tensorrt"
+    precision_mode = paddle_infer.PrecisionType.Half if args.use_fp16 else paddle_infer.PrecisionType.Float32
+    infer_configs = dict(
+        text_encoder=create_paddle_inference_runtime(
+            model_dir=args.model_dir,
+            use_trt=False,
+            model_name="text_encoder",
+            precision_mode=paddle_infer.PrecisionType.Half,
+            device_id=args.device_id,
+            disable_paddle_trt_ops=["range", "lookup_table_v2"],
+            disable_paddle_pass=paddle_delete_passes.get("text_encoder", []),
+            tune=False,
+        ),
+        vae_encoder=create_paddle_inference_runtime(
+            model_dir=args.model_dir,
+            model_name="vae_encoder",
+            use_trt=False,
+            precision_mode=paddle_infer.PrecisionType.Half,
+            device_id=args.device_id,
+            disable_paddle_pass=paddle_delete_passes.get("vae_encoder", []),
+            tune=False,
+        ),
+        vae_decoder=create_paddle_inference_runtime(
+            model_dir=args.model_dir,
+            model_name="vae_decoder",
+            use_trt=False,
+            precision_mode=paddle_infer.PrecisionType.Float32,
+            device_id=args.device_id,
+            disable_paddle_pass=paddle_delete_passes.get("vae_decoder", []),
+            tune=False,
+        ),
+        unet=create_paddle_inference_runtime(
+            model_dir=args.model_dir,
+            model_name="unet",
+            use_trt=args.use_trt,
+            precision_mode=precision_mode,
+            device_id=args.device_id,
+            disable_paddle_pass=no_need_passes,
+            tune=args.tune,
+        ),
+    )
+    pipe = PaddleInferStableDiffusionMegaPipeline.from_pretrained(
+        args.model_dir,
+        infer_configs=infer_configs,
+        use_optim_cache=False,
+    )
+    pipe.set_progress_bar_config(disable=False)
+    pipe.change_scheduler(args.scheduler)
+    parse_prompt_type = args.parse_prompt_type
+    width = args.width
+    height = args.height
+
+    folder = f"results-{args.backend}"
+    os.makedirs(folder, exist_ok=True)
+    if args.task_name in ["text2img", "all"]:
+        # text2img
+        prompt = "a photo of an astronaut riding a horse on mars"
+        time_costs = []
+        # warmup
+        pipe.text2img(
+            prompt,
+            num_inference_steps=20,
+            height=height,
+            width=width,
+            # parse_prompt_type=parse_prompt_type,
+        )
+        print("==> Test text2img performance.")
+        for step in trange(args.benchmark_steps):
+            start = time.time()
+            paddle.seed(seed)
+            images = pipe.text2img(
+                prompt,
+                output_type="pil",
+                num_inference_steps=args.inference_steps,
+                height=height,
+                width=width,
+                # parse_prompt_type=parse_prompt_type,
+            ).images
+            latency = time.time() - start
+            time_costs += [latency]
+            # print(f"No {step:3d} time cost: {latency:2f} s")
+        print(
+            f"Use fp16: {'true' if args.use_fp16 else 'false'}, "
+            f"Mean iter/sec: {1 / (np.mean(time_costs) / args.inference_steps):2f} it/s, "
+            f"Mean latency: {np.mean(time_costs):2f} s, p50 latency: {np.percentile(time_costs, 50):2f} s, "
+            f"p90 latency: {np.percentile(time_costs, 90):2f} s, p95 latency: {np.percentile(time_costs, 95):2f} s."
+        )
+        images[0].save(f"{folder}/text2img.png")
+
+    if args.task_name in ["img2img", "all"]:
+        # img2img
+        img_url = (
+            "https://paddlenlp.bj.bcebos.com/models/community/CompVis/stable-diffusion-v1-4/sketch-mountains-input.png"
+        )
+        init_image = load_image(img_url)
+        prompt = "A fantasy landscape, trending on artstation"
+        time_costs = []
+        # warmup
+        pipe.img2img(
+            prompt,
+            image=init_image,
+            num_inference_steps=20,
+            height=height,
+            width=width,
+            strength=args.strength,
+            # parse_prompt_type=parse_prompt_type,
+        )
+        print("==> Test img2img performance.")
+        for step in trange(args.benchmark_steps):
+            start = time.time()
+            paddle.seed(seed)
+            images = pipe.img2img(
+                prompt,
+                image=init_image,
+                num_inference_steps=args.inference_steps,
+                height=height,
+                width=width,
+                strength=args.strength,
+                # parse_prompt_type=parse_prompt_type,
+            ).images
+            latency = time.time() - start
+            time_costs += [latency]
+            # print(f"No {step:3d} time cost: {latency:2f} s")
+        print(
+            f"Use fp16: {'true' if args.use_fp16 else 'false'}, "
+            f"Mean iter/sec: {1 / (np.mean(time_costs) / args.inference_steps):2f} it/s, "
+            f"Mean latency: {np.mean(time_costs):2f} s, p50 latency: {np.percentile(time_costs, 50):2f} s, "
+            f"p90 latency: {np.percentile(time_costs, 90):2f} s, p95 latency: {np.percentile(time_costs, 95):2f} s."
+        )
+        images[0].save(f"{folder}/img2img.png")
+
+    if args.task_name in ["inpaint", "inpaint_legacy", "all"]:
+        img_url = (
+            "https://paddlenlp.bj.bcebos.com/models/community/CompVis/stable-diffusion-v1-4/overture-creations.png"
+        )
+        mask_url = "https://paddlenlp.bj.bcebos.com/models/community/CompVis/stable-diffusion-v1-4/overture-creations-mask.png"
+        init_image = load_image(img_url)
+        mask_image = load_image(mask_url)
+        prompt = "Face of a yellow cat, high resolution, sitting on a park bench"
+        time_costs = []
+        # warmup
+        if args.task_name in ["inpaint_legacy", "all"]:
+            call_fn = pipe.inpaint_legacy
+            task_name = "inpaint_legacy"
+        else:
+            call_fn = pipe.inpaint
+            task_name = "inpaint"
+        call_fn(
+            prompt,
+            image=init_image,
+            mask_image=mask_image,
+            num_inference_steps=20,
+            height=height,
+            width=width,
+            strength=args.strength,
+            parse_prompt_type=parse_prompt_type,
+        )
+        print(f"==> Test {task_name} performance.")
+        for step in trange(args.benchmark_steps):
+            start = time.time()
+            paddle.seed(seed)
+            images = call_fn(
+                prompt,
+                image=init_image,
+                mask_image=mask_image,
+                num_inference_steps=args.inference_steps,
+                height=height,
+                width=width,
+                strength=args.strength,
+                parse_prompt_type=parse_prompt_type,
+            ).images
+            latency = time.time() - start
+            time_costs += [latency]
+            # print(f"No {step:3d} time cost: {latency:2f} s")
+        print(
+            f"Use fp16: {'true' if args.use_fp16 else 'false'}, "
+            f"Mean iter/sec: {1 / (np.mean(time_costs) / args.inference_steps):2f} it/s, "
+            f"Mean latency: {np.mean(time_costs):2f} s, p50 latency: {np.percentile(time_costs, 50):2f} s, "
+            f"p90 latency: {np.percentile(time_costs, 90):2f} s, p95 latency: {np.percentile(time_costs, 95):2f} s."
+        )
+
+        images[0].save(f"{folder}/{task_name}.png")
+
+
+if __name__ == "__main__":
+    args = parse_arguments()
+    main(args)
diff --git a/VLMEvalKit_old/PaddleMIX/ppdiffusers/deploy/sd15/infer_dygraph_paddle.py b/VLMEvalKit_old/PaddleMIX/ppdiffusers/deploy/sd15/infer_dygraph_paddle.py
new file mode 100644
index 0000000000000000000000000000000000000000..06ffde0f7ddd1b75c3ada2a5f62c8e6165ae9056
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/ppdiffusers/deploy/sd15/infer_dygraph_paddle.py
@@ -0,0 +1,357 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import os
+import time
+import warnings
+
+import cv2
+import numpy as np
+import paddle
+from PIL import Image
+from tqdm.auto import trange
+
+from ppdiffusers import (
+    DDIMScheduler,
+    DDPMScheduler,
+    DEISMultistepScheduler,
+    DPMSolverMultistepScheduler,
+    DPMSolverSinglestepScheduler,
+    EulerAncestralDiscreteScheduler,
+    EulerDiscreteScheduler,
+    HeunDiscreteScheduler,
+    KDPM2AncestralDiscreteScheduler,
+    KDPM2DiscreteScheduler,
+    LMSDiscreteScheduler,
+    PNDMScheduler,
+    StableDiffusionImg2ImgPipeline,
+    StableDiffusionInpaintPipeline,
+    StableDiffusionPipeline,
+    UniPCMultistepScheduler,
+)
+from ppdiffusers.utils import load_image
+
+
+def get_canny_image(image, args):
+    if isinstance(image, Image.Image):
+        image = np.array(image)
+    image = cv2.Canny(image, args.low_threshold, args.high_threshold)
+    image = image[:, :, None]
+    image = np.concatenate([image, image, image], axis=2)
+    canny_image = Image.fromarray(image)
+    return canny_image
+
+
+def strtobool(v):
+    if isinstance(v, bool):
+        return v
+    if v.lower() in ("yes", "true", "t", "y", "1"):
+        return True
+    elif v.lower() in ("no", "false", "f", "n", "0"):
+        return False
+    else:
+        raise ValueError(
+            f"Truthy value expected: got {v} but expected one of yes/no, true/false, t/f, y/n, 1/0 (case insensitive)."
+        )
+
+
+def change_scheduler(self, scheduler_type="ddim"):
+    self.orginal_scheduler_config = self.scheduler.config
+    scheduler_type = scheduler_type.lower()
+    if scheduler_type == "pndm":
+        scheduler = PNDMScheduler.from_config(self.orginal_scheduler_config, skip_prk_steps=True)
+    elif scheduler_type == "lms":
+        scheduler = LMSDiscreteScheduler.from_config(self.orginal_scheduler_config)
+    elif scheduler_type == "heun":
+        scheduler = HeunDiscreteScheduler.from_config(self.orginal_scheduler_config)
+    elif scheduler_type == "euler":
+        scheduler = EulerDiscreteScheduler.from_config(self.orginal_scheduler_config)
+    elif scheduler_type == "euler-ancestral":
+        scheduler = EulerAncestralDiscreteScheduler.from_config(self.orginal_scheduler_config)
+    elif scheduler_type == "dpm-multi":
+        scheduler = DPMSolverMultistepScheduler.from_config(self.orginal_scheduler_config)
+    elif scheduler_type == "dpm-single":
+        scheduler = DPMSolverSinglestepScheduler.from_config(self.orginal_scheduler_config)
+    elif scheduler_type == "kdpm2-ancestral":
+        scheduler = KDPM2AncestralDiscreteScheduler.from_config(self.orginal_scheduler_config)
+    elif scheduler_type == "kdpm2":
+        scheduler = KDPM2DiscreteScheduler.from_config(self.orginal_scheduler_config)
+    elif scheduler_type == "unipc-multi":
+        scheduler = UniPCMultistepScheduler.from_config(self.orginal_scheduler_config)
+    elif scheduler_type == "ddim":
+        scheduler = DDIMScheduler.from_config(
+            self.orginal_scheduler_config,
+            steps_offset=1,
+            clip_sample=False,
+            set_alpha_to_one=False,
+        )
+    elif scheduler_type == "ddpm":
+        scheduler = DDPMScheduler.from_config(
+            self.orginal_scheduler_config,
+        )
+    elif scheduler_type == "deis-multi":
+        scheduler = DEISMultistepScheduler.from_config(
+            self.orginal_scheduler_config,
+        )
+    else:
+        raise ValueError(f"Scheduler of type {scheduler_type} doesn't exist!")
+    return scheduler
+
+
+def parse_arguments():
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--pretrained_model_name_or_path",
+        type=str,
+        default="runwayml/stable-diffusion-v1-5",
+        help="Path to the `diffusers` checkpoint to convert (either a local directory or on the bos).",
+    )
+    parser.add_argument(
+        "--inference_steps",
+        type=int,
+        default=50,
+        help="The number of unet inference steps.",
+    )
+    parser.add_argument(
+        "--benchmark_steps",
+        type=int,
+        default=10,
+        help="The number of performance benchmark steps.",
+    )
+    parser.add_argument(
+        "--task_name",
+        type=str,
+        default="all",
+        choices=[
+            "text2img",
+            "img2img",
+            "inpaint_legacy",
+            "all",
+        ],
+        help="The task can be one of [text2img, img2img, inpaint_legacy, all]. ",
+    )
+    parser.add_argument(
+        "--parse_prompt_type",
+        type=str,
+        default="raw",
+        choices=[
+            "raw",
+            "lpw",
+        ],
+        help="The parse_prompt_type can be one of [raw, lpw]. ",
+    )
+    parser.add_argument("--use_fp16", type=strtobool, default=True, help="Wheter to use FP16 mode")
+    parser.add_argument(
+        "--attention_type", type=str, default="raw", choices=["raw", "cutlass", "flash", "all"], help="attention_type."
+    )
+    parser.add_argument("--device_id", type=int, default=0, help="The selected gpu id. -1 means use cpu")
+    parser.add_argument(
+        "--scheduler",
+        type=str,
+        default="euler-ancestral",
+        choices=[
+            "pndm",
+            "lms",
+            "euler",
+            "euler-ancestral",
+            "dpm-multi",
+            "dpm-single",
+            "unipc-multi",
+            "ddim",
+            "ddpm",
+            "deis-multi",
+            "heun",
+            "kdpm2-ancestral",
+            "kdpm2",
+        ],
+        help="The scheduler type of stable diffusion.",
+    )
+    parser.add_argument("--height", type=int, default=512, help="Height of input image")
+    parser.add_argument("--width", type=int, default=512, help="Width of input image")
+    parser.add_argument("--strength", type=float, default=1.0, help="Strength for img2img / inpaint")
+    return parser.parse_args()
+
+
+def main(args):
+
+    seed = 1024
+    paddle_dtype = paddle.float16 if args.use_fp16 else paddle.float32
+    pipe = StableDiffusionPipeline.from_pretrained(
+        args.pretrained_model_name_or_path,
+        safety_checker=None,
+        feature_extractor=None,
+        requires_safety_checker=False,
+        paddle_dtype=paddle_dtype,
+    )
+    scheduler = change_scheduler(pipe, args.scheduler)
+    pipe.scheduler = scheduler
+
+    if args.attention_type == "all":
+        args.attention_type = ["raw", "cutlass", "flash"]
+    else:
+        args.attention_type = [args.attention_type]
+
+    for attention_type in args.attention_type:
+        if attention_type == "raw":
+            pipe.disable_xformers_memory_efficient_attention()
+        else:
+            try:
+                pipe.enable_xformers_memory_efficient_attention(attention_type)
+            except Exception as e:
+                if attention_type == "flash":
+                    warnings.warn(
+                        "Attention type flash is not supported on your GPU! We need to use 3060、3070、3080、3090、4060、4070、4080、4090、A30、A100 etc."
+                    )
+                    continue
+                else:
+                    raise ValueError(e)
+
+        if not args.use_fp16 and attention_type == "flash":
+            print("Flash attention is not supported dtype=float32! Please use float16 or bfloat16. We will skip this!")
+            continue
+
+        width = args.width
+        height = args.height
+        pipe.set_progress_bar_config(disable=False)
+
+        folder = f"paddle_attn_{attention_type}_fp16" if args.use_fp16 else f"paddle_attn_{attention_type}_fp32"
+        os.makedirs(folder, exist_ok=True)
+        if args.task_name in ["text2img", "all"]:
+            init_image = load_image(
+                "https://paddlenlp.bj.bcebos.com/models/community/junnyu/develop/control_bird_canny_demo.png"
+            )
+            # text2img
+            prompt = "bird"
+            time_costs = []
+            # warmup
+            pipe(
+                prompt,
+                num_inference_steps=10,
+                height=height,
+                width=width,
+            )
+            print("==> Test text2img performance.")
+            for step in trange(args.benchmark_steps):
+                start = time.time()
+                paddle.seed(seed)
+                images = pipe(
+                    prompt,
+                    num_inference_steps=args.inference_steps,
+                    height=height,
+                    width=width,
+                ).images
+                latency = time.time() - start
+                time_costs += [latency]
+                # print(f"No {step:3d} time cost: {latency:2f} s")
+            print(
+                f"Attention type: {attention_type}, "
+                f"Use fp16: {'true' if args.use_fp16 else 'false'}, "
+                f"Mean iter/sec: {1 / (np.mean(time_costs) / args.inference_steps):2f} it/s, "
+                f"Mean latency: {np.mean(time_costs):2f} s, p50 latency: {np.percentile(time_costs, 50):2f} s, "
+                f"p90 latency: {np.percentile(time_costs, 90):2f} s, p95 latency: {np.percentile(time_costs, 95):2f} s."
+            )
+            images[0].save(f"{folder}/text2img.png")
+
+        if args.task_name in ["img2img", "all"]:
+            pipe_img2img = StableDiffusionImg2ImgPipeline(**pipe.components)
+            pipe_img2img.set_progress_bar_config(disable=False)
+            img_url = "https://paddlenlp.bj.bcebos.com/models/community/CompVis/stable-diffusion-v1-4/sketch-mountains-input.png"
+            init_image = load_image(img_url).resize((width, height))
+            prompt = "A fantasy landscape, trending on artstation"
+            time_costs = []
+            # warmup
+            pipe_img2img(
+                prompt,
+                image=init_image,
+                num_inference_steps=20,
+                height=height,
+                width=width,
+                strength=args.strength,
+            )
+            print("==> Test img2img performance.")
+            for step in trange(args.benchmark_steps):
+                start = time.time()
+                paddle.seed(seed)
+                images = pipe_img2img(
+                    prompt,
+                    image=init_image,
+                    num_inference_steps=args.inference_steps,
+                    height=height,
+                    width=width,
+                    strength=args.strength,
+                ).images
+                latency = time.time() - start
+                time_costs += [latency]
+                # print(f"No {step:3d} time cost: {latency:2f} s")
+            print(
+                f"Attention type: {attention_type}, "
+                f"Use fp16: {'true' if args.use_fp16 else 'false'}, "
+                f"Mean iter/sec: {1 / (np.mean(time_costs) / args.inference_steps):2f} it/s, "
+                f"Mean latency: {np.mean(time_costs):2f} s, p50 latency: {np.percentile(time_costs, 50):2f} s, "
+                f"p90 latency: {np.percentile(time_costs, 90):2f} s, p95 latency: {np.percentile(time_costs, 95):2f} s."
+            )
+            images[0].save(f"{folder}/img2img.png")
+
+        if args.task_name in ["inpaint_legacy", "all"]:
+            pipe_inpaint = StableDiffusionInpaintPipeline(**pipe.components)
+            pipe_inpaint.set_progress_bar_config(disable=False)
+            img_url = (
+                "https://paddlenlp.bj.bcebos.com/models/community/CompVis/stable-diffusion-v1-4/overture-creations.png"
+            )
+            mask_url = "https://paddlenlp.bj.bcebos.com/models/community/CompVis/stable-diffusion-v1-4/overture-creations-mask.png"
+            init_image = load_image(img_url).resize((width, height))
+            mask_image = load_image(mask_url).resize((width, height))
+            prompt = "Face of a yellow cat, high resolution, sitting on a park bench"
+            time_costs = []
+            task_name = "inpaint_legacy"
+            pipe_inpaint(
+                prompt,
+                image=init_image,
+                mask_image=mask_image,
+                num_inference_steps=20,
+                height=height,
+                width=width,
+                strength=args.strength,
+            )
+            print(f"==> Test {task_name} performance.")
+            for step in trange(args.benchmark_steps):
+                start = time.time()
+                paddle.seed(seed)
+                images = pipe_inpaint(
+                    prompt,
+                    image=init_image,
+                    mask_image=mask_image,
+                    num_inference_steps=args.inference_steps,
+                    height=height,
+                    width=width,
+                    strength=args.strength,
+                ).images
+                latency = time.time() - start
+                time_costs += [latency]
+                # print(f"No {step:3d} time cost: {latency:2f} s")
+            print(
+                f"Attention type: {attention_type}, "
+                f"Use fp16: {'true' if args.use_fp16 else 'false'}, "
+                f"Mean iter/sec: {1 / (np.mean(time_costs) / args.inference_steps):2f} it/s, "
+                f"Mean latency: {np.mean(time_costs):2f} s, p50 latency: {np.percentile(time_costs, 50):2f} s, "
+                f"p90 latency: {np.percentile(time_costs, 90):2f} s, p95 latency: {np.percentile(time_costs, 95):2f} s."
+            )
+            images[0].save(f"{folder}/{task_name}.png")
+
+
+if __name__ == "__main__":
+    args = parse_arguments()
+    main(args)
diff --git a/VLMEvalKit_old/PaddleMIX/ppdiffusers/deploy/sd15/infer_dygraph_torch.py b/VLMEvalKit_old/PaddleMIX/ppdiffusers/deploy/sd15/infer_dygraph_torch.py
new file mode 100644
index 0000000000000000000000000000000000000000..febc46610eca3d524d182c8bc39495a202fdaaca
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/ppdiffusers/deploy/sd15/infer_dygraph_torch.py
@@ -0,0 +1,417 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import os
+import time
+
+import torch
+
+torch.nn.functional.scaled_dot_product_attention_ = torch.nn.functional.scaled_dot_product_attention
+delattr(torch.nn.functional, "scaled_dot_product_attention")
+
+import cv2
+import numpy as np
+from diffusers import (
+    DDIMScheduler,
+    DDPMScheduler,
+    DEISMultistepScheduler,
+    DPMSolverMultistepScheduler,
+    DPMSolverSinglestepScheduler,
+    EulerAncestralDiscreteScheduler,
+    EulerDiscreteScheduler,
+    HeunDiscreteScheduler,
+    KDPM2AncestralDiscreteScheduler,
+    KDPM2DiscreteScheduler,
+    LMSDiscreteScheduler,
+    PNDMScheduler,
+    StableDiffusionImg2ImgPipeline,
+    StableDiffusionInpaintPipeline,
+    StableDiffusionPipeline,
+    UniPCMultistepScheduler,
+)
+from diffusers.models.attention_processor import AttnProcessor, AttnProcessor2_0
+from diffusers.utils import load_image
+from PIL import Image
+from tqdm.auto import trange
+
+
+def get_canny_image(image, args):
+    if isinstance(image, Image.Image):
+        image = np.array(image)
+    image = cv2.Canny(image, args.low_threshold, args.high_threshold)
+    image = image[:, :, None]
+    image = np.concatenate([image, image, image], axis=2)
+    canny_image = Image.fromarray(image)
+    return canny_image
+
+
+def strtobool(v):
+    if isinstance(v, bool):
+        return v
+    if v.lower() in ("yes", "true", "t", "y", "1"):
+        return True
+    elif v.lower() in ("no", "false", "f", "n", "0"):
+        return False
+    else:
+        raise ValueError(
+            f"Truthy value expected: got {v} but expected one of yes/no, true/false, t/f, y/n, 1/0 (case insensitive)."
+        )
+
+
+def change_scheduler(self, scheduler_type="ddim"):
+    self.orginal_scheduler_config = self.scheduler.config
+    scheduler_type = scheduler_type.lower()
+    if scheduler_type == "pndm":
+        scheduler = PNDMScheduler.from_config(self.orginal_scheduler_config, skip_prk_steps=True)
+    elif scheduler_type == "lms":
+        scheduler = LMSDiscreteScheduler.from_config(self.orginal_scheduler_config)
+    elif scheduler_type == "heun":
+        scheduler = HeunDiscreteScheduler.from_config(self.orginal_scheduler_config)
+    elif scheduler_type == "euler":
+        scheduler = EulerDiscreteScheduler.from_config(self.orginal_scheduler_config)
+    elif scheduler_type == "euler-ancestral":
+        scheduler = EulerAncestralDiscreteScheduler.from_config(self.orginal_scheduler_config)
+    elif scheduler_type == "dpm-multi":
+        scheduler = DPMSolverMultistepScheduler.from_config(self.orginal_scheduler_config)
+    elif scheduler_type == "dpm-single":
+        scheduler = DPMSolverSinglestepScheduler.from_config(self.orginal_scheduler_config)
+    elif scheduler_type == "kdpm2-ancestral":
+        scheduler = KDPM2AncestralDiscreteScheduler.from_config(self.orginal_scheduler_config)
+    elif scheduler_type == "kdpm2":
+        scheduler = KDPM2DiscreteScheduler.from_config(self.orginal_scheduler_config)
+    elif scheduler_type == "unipc-multi":
+        scheduler = UniPCMultistepScheduler.from_config(self.orginal_scheduler_config)
+    elif scheduler_type == "ddim":
+        scheduler = DDIMScheduler.from_config(
+            self.orginal_scheduler_config,
+            steps_offset=1,
+            clip_sample=False,
+            set_alpha_to_one=False,
+        )
+    elif scheduler_type == "ddpm":
+        scheduler = DDPMScheduler.from_config(
+            self.orginal_scheduler_config,
+        )
+    elif scheduler_type == "deis-multi":
+        scheduler = DEISMultistepScheduler.from_config(
+            self.orginal_scheduler_config,
+        )
+    else:
+        raise ValueError(f"Scheduler of type {scheduler_type} doesn't exist!")
+    return scheduler
+
+
+def parse_arguments():
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--pretrained_model_name_or_path",
+        type=str,
+        default="runwayml/stable-diffusion-v1-5",
+        help="Path to the `diffusers` checkpoint to convert (either a local directory or on the bos).",
+    )
+    parser.add_argument(
+        "--inference_steps",
+        type=int,
+        default=50,
+        help="The number of unet inference steps.",
+    )
+    parser.add_argument(
+        "--benchmark_steps",
+        type=int,
+        default=10,
+        help="The number of performance benchmark steps.",
+    )
+    parser.add_argument(
+        "--task_name",
+        type=str,
+        default="all",
+        choices=[
+            "text2img",
+            "img2img",
+            "inpaint_legacy",
+            "all",
+        ],
+        help="The task can be one of [text2img, img2img, inpaint_legacy, all]. ",
+    )
+    parser.add_argument(
+        "--parse_prompt_type",
+        type=str,
+        default="raw",
+        choices=[
+            "raw",
+            "lpw",
+        ],
+        help="The parse_prompt_type can be one of [raw, lpw]. ",
+    )
+    parser.add_argument(
+        "--channels_last",
+        type=strtobool,
+        default=False,
+        help="Wheter to use channels_last",
+    )
+    parser.add_argument("--use_fp16", type=strtobool, default=True, help="Wheter to use FP16 mode")
+    parser.add_argument("--tf32", type=strtobool, default=True, help="tf32")
+    parser.add_argument("--compile", type=strtobool, default=False, help="compile")
+    parser.add_argument(
+        "--attention_type",
+        type=str,
+        default="sdp",
+        choices=[
+            "raw",
+            "sdp",
+        ],
+        help="attention_type.",
+    )
+    parser.add_argument("--device_id", type=int, default=0, help="The selected gpu id. -1 means use cpu")
+    parser.add_argument(
+        "--scheduler",
+        type=str,
+        default="euler-ancestral",
+        choices=[
+            "pndm",
+            "lms",
+            "euler",
+            "euler-ancestral",
+            "dpm-multi",
+            "dpm-single",
+            "unipc-multi",
+            "ddim",
+            "ddpm",
+            "deis-multi",
+            "heun",
+            "kdpm2-ancestral",
+            "kdpm2",
+        ],
+        help="The scheduler type of stable diffusion.",
+    )
+    parser.add_argument("--height", type=int, default=512, help="Height of input image")
+    parser.add_argument("--width", type=int, default=512, help="Width of input image")
+    parser.add_argument("--strength", type=float, default=1.0, help="Strength for img2img / inpaint")
+    return parser.parse_args()
+
+
+def attn_processors(self):
+    processors = {}
+
+    def fn_recursive_add_processors(name: str, module, processors):
+        if hasattr(module, "set_processor"):
+            processors[f"{name}.processor"] = module.processor
+
+        for sub_name, child in module.named_children():
+            fn_recursive_add_processors(f"{name}.{sub_name}", child, processors)
+
+        return processors
+
+    for name, module in self.named_children():
+        fn_recursive_add_processors(name, module, processors)
+
+    return processors
+
+
+def set_attn_processor(self, processor):
+    count = len(attn_processors(self).keys())
+
+    if isinstance(processor, dict) and len(processor) != count:
+        raise ValueError(
+            f"A dict of processors was passed, but the number of processors {len(processor)} does not match the"
+            f" number of attention layers: {count}. Please make sure to pass {count} processor classes."
+        )
+
+    def fn_recursive_attn_processor(name: str, module, processor):
+        if hasattr(module, "set_processor"):
+            if not isinstance(processor, dict):
+                module.set_processor(processor)
+            else:
+                module.set_processor(processor.pop(f"{name}.processor"))
+
+        for sub_name, child in module.named_children():
+            fn_recursive_attn_processor(f"{name}.{sub_name}", child, processor)
+
+    for name, module in self.named_children():
+        fn_recursive_attn_processor(name, module, processor)
+
+
+def main(args):
+    if args.tf32:
+        torch.backends.cuda.matmul.allow_tf32 = True
+    else:
+        torch.backends.cuda.matmul.allow_tf32 = False
+
+    seed = 1024
+    torch_dtype = torch.float16 if args.use_fp16 else torch.float32
+    pipe = StableDiffusionPipeline.from_pretrained(
+        args.pretrained_model_name_or_path,
+        safety_checker=None,
+        feature_extractor=None,
+        requires_safety_checker=False,
+        torch_dtype=torch_dtype,
+    )
+    scheduler = change_scheduler(pipe, args.scheduler)
+    pipe.scheduler = scheduler
+    if args.device_id >= 0:
+        pipe.to(f"cuda:{args.device_id}")
+
+    if args.attention_type == "all":
+        args.attention_type = ["raw", "sdp"]
+    else:
+        args.attention_type = [args.attention_type]
+
+    for attention_type in args.attention_type:
+        attn_prrocessor_cls = AttnProcessor if attention_type == "raw" else AttnProcessor2_0
+        if attention_type == "sdp":
+            torch.nn.functional.scaled_dot_product_attention = torch.nn.functional.scaled_dot_product_attention_
+        set_attn_processor(pipe.unet, attn_prrocessor_cls())
+        set_attn_processor(pipe.vae, attn_prrocessor_cls())
+
+        if args.channels_last:
+            pipe.unet.to(memory_format=torch.channels_last)
+
+        if args.compile:
+            print("Run torch compile")
+            pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
+
+        width = args.width
+        height = args.height
+        pipe.set_progress_bar_config(disable=False)
+
+        folder = f"torch_attn_{attention_type}_fp16" if args.use_fp16 else f"torch_attn_{attention_type}_fp32"
+        os.makedirs(folder, exist_ok=True)
+        if args.task_name in ["text2img", "all"]:
+            init_image = load_image(
+                "https://paddlenlp.bj.bcebos.com/models/community/junnyu/develop/control_bird_canny_demo.png"
+            )
+            # text2img
+            prompt = "bird"
+            time_costs = []
+            # warmup
+            pipe(
+                prompt,
+                num_inference_steps=10,
+                height=height,
+                width=width,
+            )
+            print("==> Test text2img performance.")
+            for step in trange(args.benchmark_steps):
+                start = time.time()
+                torch.cuda.manual_seed(seed)
+                images = pipe(
+                    prompt,
+                    num_inference_steps=args.inference_steps,
+                    height=height,
+                    width=width,
+                ).images
+                latency = time.time() - start
+                time_costs += [latency]
+                # print(f"No {step:3d} time cost: {latency:2f} s")
+            print(
+                f"Attention type: {attention_type}, "
+                f"Use fp16: {'true' if args.use_fp16 else 'false'}, "
+                f"Mean iter/sec: {1 / (np.mean(time_costs) / args.inference_steps):2f} it/s, "
+                f"Mean latency: {np.mean(time_costs):2f} s, p50 latency: {np.percentile(time_costs, 50):2f} s, "
+                f"p90 latency: {np.percentile(time_costs, 90):2f} s, p95 latency: {np.percentile(time_costs, 95):2f} s."
+            )
+            images[0].save(f"{folder}/text2img.png")
+
+        if args.task_name in ["img2img", "all"]:
+            pipe_img2img = StableDiffusionImg2ImgPipeline(**pipe.components)
+            pipe_img2img.set_progress_bar_config(disable=False)
+            img_url = "https://paddlenlp.bj.bcebos.com/models/community/CompVis/stable-diffusion-v1-4/sketch-mountains-input.png"
+            init_image = load_image(img_url).resize((width, height))
+            prompt = "A fantasy landscape, trending on artstation"
+            time_costs = []
+            # warmup
+            pipe_img2img(
+                prompt,
+                image=init_image,
+                num_inference_steps=20,
+                height=height,
+                width=width,
+                strength=args.strength,
+            )
+            print("==> Test img2img performance.")
+            for step in trange(args.benchmark_steps):
+                start = time.time()
+                torch.cuda.manual_seed(seed)
+                images = pipe_img2img(
+                    prompt,
+                    image=init_image,
+                    num_inference_steps=args.inference_steps,
+                    height=height,
+                    width=width,
+                    strength=args.strength,
+                ).images
+                latency = time.time() - start
+                time_costs += [latency]
+                # print(f"No {step:3d} time cost: {latency:2f} s")
+            print(
+                f"Attention type: {attention_type}, "
+                f"Use fp16: {'true' if args.use_fp16 else 'false'}, "
+                f"Mean iter/sec: {1 / (np.mean(time_costs) / args.inference_steps):2f} it/s, "
+                f"Mean latency: {np.mean(time_costs):2f} s, p50 latency: {np.percentile(time_costs, 50):2f} s, "
+                f"p90 latency: {np.percentile(time_costs, 90):2f} s, p95 latency: {np.percentile(time_costs, 95):2f} s."
+            )
+            images[0].save(f"{folder}/img2img.png")
+
+        if args.task_name in ["inpaint_legacy", "all"]:
+            pipe_inpaint = StableDiffusionInpaintPipeline(**pipe.components)
+            pipe_inpaint.set_progress_bar_config(disable=False)
+            img_url = (
+                "https://paddlenlp.bj.bcebos.com/models/community/CompVis/stable-diffusion-v1-4/overture-creations.png"
+            )
+            mask_url = "https://paddlenlp.bj.bcebos.com/models/community/CompVis/stable-diffusion-v1-4/overture-creations-mask.png"
+            init_image = load_image(img_url).resize((width, height))
+            mask_image = load_image(mask_url).resize((width, height))
+            prompt = "Face of a yellow cat, high resolution, sitting on a park bench"
+            time_costs = []
+            task_name = "inpaint_legacy"
+            pipe_inpaint(
+                prompt,
+                image=init_image,
+                mask_image=mask_image,
+                num_inference_steps=20,
+                height=height,
+                width=width,
+                strength=args.strength,
+            )
+            print(f"==> Test {task_name} performance.")
+            for step in trange(args.benchmark_steps):
+                start = time.time()
+                torch.cuda.manual_seed(seed)
+                images = pipe_inpaint(
+                    prompt,
+                    image=init_image,
+                    mask_image=mask_image,
+                    num_inference_steps=args.inference_steps,
+                    height=height,
+                    width=width,
+                    strength=args.strength,
+                ).images
+                latency = time.time() - start
+                time_costs += [latency]
+                # print(f"No {step:3d} time cost: {latency:2f} s")
+            print(
+                f"Attention type: {attention_type}, "
+                f"Use fp16: {'true' if args.use_fp16 else 'false'}, "
+                f"Mean iter/sec: {1 / (np.mean(time_costs) / args.inference_steps):2f} it/s, "
+                f"Mean latency: {np.mean(time_costs):2f} s, p50 latency: {np.percentile(time_costs, 50):2f} s, "
+                f"p90 latency: {np.percentile(time_costs, 90):2f} s, p95 latency: {np.percentile(time_costs, 95):2f} s."
+            )
+            images[0].save(f"{folder}/{task_name}.png")
+
+
+if __name__ == "__main__":
+    args = parse_arguments()
+    main(args)
diff --git a/VLMEvalKit_old/PaddleMIX/ppdiffusers/deploy/sd3/README.md b/VLMEvalKit_old/PaddleMIX/ppdiffusers/deploy/sd3/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..b2804832d904d250bf1806d52c6f285f0652555d
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/ppdiffusers/deploy/sd3/README.md
@@ -0,0 +1,77 @@
+# Stable Diffusion 3 高性能推理
+
+- Paddle Inference提供Stable Diffusion 3 模型高性能推理实现，推理性能提升70%+
+环境准备：
+```shell
+# 安装 triton并适配paddle
+python -m pip install triton
+python -m pip install git+https://github.com/zhoutianzi666/UseTritonInPaddle.git
+python -c "import use_triton_in_paddle; use_triton_in_paddle.make_triton_compatible_with_paddle()"
+
+# 安装develop版本的paddle，请根据自己的cuda版本选择对应的paddle版本，这里选择12.3的cuda版本
+python -m pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu123/
+
+# 安装paddlemix库,使用集成在paddlemix库中的自定义算子。
+python -m pip install paddlemix
+
+# 指定 libCutlassGemmEpilogue.so 的路径
+# 详情请参考 https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/phi/kernels/fusion/cutlass/gemm_epilogue/README.md
+export LD_LIBRARY_PATH=/your_dir/Paddle/paddle/phi/kernels/fusion/cutlass/gemm_epilogue/build:$LD_LIBRARY_PATH
+- 请注意，该项用于在静态图推理时利用Cutlass融合算子提升推理性能，但是并不是必须项。
+如果不使用Cutlass可以将`./text_to_image_generation-stable_diffusion_3.py`中的`exp_enable_use_cutlass`设为False。
+-
+```
+
+高性能推理指令：
+```shell
+# 执行FP16推理
+python  text_to_image_generation-stable_diffusion_3.py  --dtype float16 --height 512 --width 512 \
+--num-inference-steps 50 --inference_optimize 1  \
+--benchmark 1
+```
+注：--inference_optimize 1 用于开启推理优化，--benchmark 1 用于开启性能测试。
+
+
+- 在 NVIDIA A100-SXM4-40GB 上测试的性能如下：
+
+| Paddle Inference|    PyTorch   | Paddle 动态图 |
+| --------------- | ------------ | ------------ |
+|       1.2 s     |     1.78 s   |    4.202 s   |
+
+
+## Paddle Stable Diffusion 3 模型多卡推理：
+### Data Parallel 实现原理
+- 在SD3中，对于输入是一个prompt时，使用CFG需要同时进行unconditional guide和text guide的生成，此时 MM-DiT-blocks 的输入batch_size=2；
+所以我们考虑在多卡并行的方案中，将batch为2的输入拆分到两张卡上进行计算，这样单卡的计算量就减少为原来的一半，降低了单卡所承载的浮点计算量。
+计算完成后，我们再把两张卡的计算结果聚合在一起，结果与单卡计算完全一致。
+
+### Model parallel 实现原理
+- 在SD3中,在Linear和Attnetion中有大量的GEMM（General Matrix Multiply），当生成高分辨率图像时，GEMM的计算量以及模型的预训练权重大小都呈线性递增。
+因此，我们考虑在多卡并行方案中，将模型的这些GEMM拆分到两张卡上进行计算，这样单卡的计算量和权重大小就都减少为原来的一半，不仅降低了单卡所承载的浮点计算量，也降低了单卡的显存占用。
+
+### 开启多卡推理方法
+- Paddle Inference 提供了SD3模型的多卡推理功能，用户可以通过设置 `mp_size 2` 来开启Model Parallel，使用 `dp_size 2`来开启Data Parallel。
+使用 `python -m paddle.distributed.launch --gpus “0,1,2,3”` 指定使用哪些卡进行推理，其中`--gpus “0,1,2,3”`即为启用的GPU卡号。
+如果只需使用两卡推理，则只需指定两卡即可，如 `python -m paddle.distributed.launch --gpus “0,1”`。同时需要指定使用的并行方法及并行度，如 `mp_size 2` 或者 `dp_size 2`。
+
+- 注意，这里的`mp_size`需要设定为不大于输入的batch_size个，且`mp_size`和`dp_size`的和不能超过机器总卡数。
+- 高性能多卡推理指令：
+```shell
+# 执行多卡推理指令
+python -m paddle.distributed.launch --gpus "0,1,2,3" text_to_image_generation-stable_diffusion_3.py \
+--dtype float16 \
+--height 1024 \
+--width 1024 \
+--num-inference-steps 20 \
+--inference_optimize 1 \
+--mp_size 2 \
+--dp_size 2 \
+--benchmark 1
+```
+注：--inference_optimize 1 用于开启推理优化，--benchmark 1 用于开启性能测试。
+
+## 在 NVIDIA A800-SXM4-80GB 上测试的性能如下：
+
+| Paddle mp_size=2 & dp_size=2 |  Paddle mp_size=2   | Paddle dp_size=2 | Paddle Single Card | Paddle 动态图 |
+| ---------------------------- | ------------------- | ---------------- | ------------------ | ------------ |
+|            0.99s             |        1.581 s      |      1.319 s     |       2.376 s      |     3.2 s    |
diff --git a/VLMEvalKit_old/PaddleMIX/ppdiffusers/deploy/sd3/infer_dygraph_paddle.py b/VLMEvalKit_old/PaddleMIX/ppdiffusers/deploy/sd3/infer_dygraph_paddle.py
new file mode 100644
index 0000000000000000000000000000000000000000..14d1f5f24683cf98ff48ce2978666e3e7f91fb5d
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/ppdiffusers/deploy/sd3/infer_dygraph_paddle.py
@@ -0,0 +1,264 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import os
+import time
+import warnings
+
+import cv2
+import numpy as np
+import paddle
+from PIL import Image
+from tqdm.auto import trange
+
+from ppdiffusers import (
+    FlowMatchEulerDiscreteScheduler,
+    DDIMScheduler,
+    DDPMScheduler,
+    DEISMultistepScheduler,
+    DPMSolverMultistepScheduler,
+    DPMSolverSinglestepScheduler,
+    EulerAncestralDiscreteScheduler,
+    EulerDiscreteScheduler,
+    HeunDiscreteScheduler,
+    KDPM2AncestralDiscreteScheduler,
+    KDPM2DiscreteScheduler,
+    LMSDiscreteScheduler,
+    PNDMScheduler,
+    StableDiffusion3Pipeline,
+    UniPCMultistepScheduler,
+)
+from ppdiffusers.utils import load_image
+
+
+
+def strtobool(v):
+    if isinstance(v, bool):
+        return v
+    if v.lower() in ("yes", "true", "t", "y", "1"):
+        return True
+    elif v.lower() in ("no", "false", "f", "n", "0"):
+        return False
+    else:
+        raise ValueError(
+            f"Truthy value expected: got {v} but expected one of yes/no, true/false, t/f, y/n, 1/0 (case insensitive)."
+        )
+
+
+def change_scheduler(self, scheduler_type="ddim"):
+    self.orginal_scheduler_config = self.scheduler.config
+    scheduler_type = scheduler_type.lower()
+    if scheduler_type == "flow":
+        scheduler = FlowMatchEulerDiscreteScheduler.from_config(self.orginal_scheduler_config, skip_prk_steps=True)
+    elif scheduler_type == "pndm":
+        scheduler = PNDMScheduler.from_config(self.orginal_scheduler_config, skip_prk_steps=True)
+    elif scheduler_type == "lms":
+        scheduler = LMSDiscreteScheduler.from_config(self.orginal_scheduler_config)
+    elif scheduler_type == "heun":
+        scheduler = HeunDiscreteScheduler.from_config(self.orginal_scheduler_config)
+    elif scheduler_type == "euler":
+        scheduler = EulerDiscreteScheduler.from_config(self.orginal_scheduler_config)
+    elif scheduler_type == "euler-ancestral":
+        scheduler = EulerAncestralDiscreteScheduler.from_config(self.orginal_scheduler_config)
+    elif scheduler_type == "dpm-multi":
+        scheduler = DPMSolverMultistepScheduler.from_config(self.orginal_scheduler_config)
+    elif scheduler_type == "dpm-single":
+        scheduler = DPMSolverSinglestepScheduler.from_config(self.orginal_scheduler_config)
+    elif scheduler_type == "kdpm2-ancestral":
+        scheduler = KDPM2AncestralDiscreteScheduler.from_config(self.orginal_scheduler_config)
+    elif scheduler_type == "kdpm2":
+        scheduler = KDPM2DiscreteScheduler.from_config(self.orginal_scheduler_config)
+    elif scheduler_type == "unipc-multi":
+        scheduler = UniPCMultistepScheduler.from_config(self.orginal_scheduler_config)
+    elif scheduler_type == "ddim":
+        scheduler = DDIMScheduler.from_config(
+            self.orginal_scheduler_config,
+            steps_offset=1,
+            clip_sample=False,
+            set_alpha_to_one=False,
+        )
+    elif scheduler_type == "ddpm":
+        scheduler = DDPMScheduler.from_config(
+            self.orginal_scheduler_config,
+        )
+    elif scheduler_type == "deis-multi":
+        scheduler = DEISMultistepScheduler.from_config(
+            self.orginal_scheduler_config,
+        )
+    else:
+        raise ValueError(f"Scheduler of type {scheduler_type} doesn't exist!")
+    return scheduler
+
+
+def parse_arguments():
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--pretrained_model_name_or_path",
+        type=str,
+        default="stabilityai/stable-diffusion-3-medium-diffusers",
+        help="Path to the `diffusers` checkpoint to convert (either a local directory or on the bos).",
+    )
+    parser.add_argument(
+        "--inference_steps",
+        type=int,
+        default=50,
+        help="The number of unet inference steps.",
+    )
+    parser.add_argument(
+        "--benchmark_steps",
+        type=int,
+        default=10,
+        help="The number of performance benchmark steps.",
+    )
+    parser.add_argument(
+        "--task_name",
+        type=str,
+        default="all",
+        choices=[
+            "text2img",
+            "img2img",
+            "inpaint_legacy",
+            "all",
+        ],
+        help="The task can be one of [text2img, img2img, inpaint_legacy, all]. ",
+    )
+    parser.add_argument(
+        "--parse_prompt_type",
+        type=str,
+        default="raw",
+        choices=[
+            "raw",
+            "lpw",
+        ],
+        help="The parse_prompt_type can be one of [raw, lpw]. ",
+    )
+    parser.add_argument("--use_fp16", type=strtobool, default=True, help="Wheter to use FP16 mode")
+    parser.add_argument(
+        "--attention_type", type=str, default="raw", choices=["raw", "cutlass", "flash", "all"], help="attention_type."
+    )
+    parser.add_argument("--device_id", type=int, default=0, help="The selected gpu id. -1 means use cpu")
+    parser.add_argument(
+        "--scheduler",
+        type=str,
+        default="euler-ancestral",
+        choices=[
+            "flow",
+            "pndm",
+            "lms",
+            "euler",
+            "euler-ancestral",
+            "dpm-multi",
+            "dpm-single",
+            "unipc-multi",
+            "ddim",
+            "ddpm",
+            "deis-multi",
+            "heun",
+            "kdpm2-ancestral",
+            "kdpm2",
+        ],
+        help="The scheduler type of stable diffusion.",
+    )
+    parser.add_argument("--height", type=int, default=512, help="Height of input image")
+    parser.add_argument("--width", type=int, default=512, help="Width of input image")
+    parser.add_argument("--strength", type=float, default=1.0, help="Strength for img2img / inpaint")
+    return parser.parse_args()
+
+
+def main(args):
+
+    seed = 1024
+    paddle_dtype = paddle.float16 if args.use_fp16 else paddle.float32
+    pipe = StableDiffusion3Pipeline.from_pretrained(
+        args.pretrained_model_name_or_path,
+        safety_checker=None,
+        feature_extractor=None,
+        requires_safety_checker=False,
+        paddle_dtype=paddle_dtype,
+    )
+    scheduler = change_scheduler(pipe, args.scheduler)
+    pipe.scheduler = scheduler
+
+    if args.attention_type == "all":
+        args.attention_type = ["raw", "cutlass", "flash"]
+    else:
+        args.attention_type = [args.attention_type]
+
+    for attention_type in args.attention_type:
+        if attention_type == "raw":
+            pipe.disable_xformers_memory_efficient_attention()
+        else:
+            try:
+                pipe.enable_xformers_memory_efficient_attention(attention_type)
+            except Exception as e:
+                if attention_type == "flash":
+                    warnings.warn(
+                        "Attention type flash is not supported on your GPU! We need to use 3060、3070、3080、3090、4060、4070、4080、4090、A30、A100 etc."
+                    )
+                    continue
+                else:
+                    raise ValueError(e)
+
+        if not args.use_fp16 and attention_type == "flash":
+            print("Flash attention is not supported dtype=float32! Please use float16 or bfloat16. We will skip this!")
+            continue
+
+        width = args.width
+        height = args.height
+        pipe.set_progress_bar_config(disable=False)
+
+        folder = f"paddle_attn_{attention_type}_fp16" if args.use_fp16 else f"paddle_attn_{attention_type}_fp32"
+        os.makedirs(folder, exist_ok=True)
+        if args.task_name in ["text2img", "all"]:
+            init_image = load_image(
+                "https://paddlenlp.bj.bcebos.com/models/community/junnyu/develop/control_bird_canny_demo.png"
+            )
+            # text2img
+            prompt = "bird"
+            time_costs = []
+            # warmup
+            pipe(
+                prompt,
+                num_inference_steps=10,
+                height=height,
+                width=width,
+            )
+            print("==> Test text2img performance.")
+            for step in trange(args.benchmark_steps):
+                start = time.time()
+                paddle.seed(seed)
+                images = pipe(
+                    prompt,
+                    num_inference_steps=args.inference_steps,
+                    height=height,
+                    width=width,
+                ).images
+                latency = time.time() - start
+                time_costs += [latency]
+                # print(f"No {step:3d} time cost: {latency:2f} s")
+            print(
+                f"Attention type: {attention_type}, "
+                f"Use fp16: {'true' if args.use_fp16 else 'false'}, "
+                f"Mean iter/sec: {1 / (np.mean(time_costs) / args.inference_steps):2f} it/s, "
+                f"Mean latency: {np.mean(time_costs):2f} s, p50 latency: {np.percentile(time_costs, 50):2f} s, "
+                f"p90 latency: {np.percentile(time_costs, 90):2f} s, p95 latency: {np.percentile(time_costs, 95):2f} s."
+            )
+            images[0].save(f"{folder}/text2img.png")
+
+
+if __name__ == "__main__":
+    args = parse_arguments()
+    main(args)
diff --git a/VLMEvalKit_old/PaddleMIX/ppdiffusers/deploy/sd3/infer_dygraph_torch.py b/VLMEvalKit_old/PaddleMIX/ppdiffusers/deploy/sd3/infer_dygraph_torch.py
new file mode 100644
index 0000000000000000000000000000000000000000..14c547b5605833d2c25b775136cea0b4112ee94d
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/ppdiffusers/deploy/sd3/infer_dygraph_torch.py
@@ -0,0 +1,325 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import os
+import time
+
+import torch
+
+# torch.nn.functional.scaled_dot_product_attention_ = torch.nn.functional.scaled_dot_product_attention
+# delattr(torch.nn.functional, "scaled_dot_product_attention")
+
+import cv2
+import numpy as np
+from diffusers import (
+    FlowMatchEulerDiscreteScheduler,
+    DDIMScheduler,
+    DDPMScheduler,
+    DEISMultistepScheduler,
+    DPMSolverMultistepScheduler,
+    DPMSolverSinglestepScheduler,
+    EulerAncestralDiscreteScheduler,
+    EulerDiscreteScheduler,
+    HeunDiscreteScheduler,
+    KDPM2AncestralDiscreteScheduler,
+    KDPM2DiscreteScheduler,
+    LMSDiscreteScheduler,
+    PNDMScheduler,
+    StableDiffusion3Pipeline,
+    UniPCMultistepScheduler,
+)
+from diffusers.models.attention_processor import AttnProcessor, AttnProcessor2_0
+from diffusers.utils import load_image
+from PIL import Image
+from tqdm.auto import trange
+
+
+
+def strtobool(v):
+    if isinstance(v, bool):
+        return v
+    if v.lower() in ("yes", "true", "t", "y", "1"):
+        return True
+    elif v.lower() in ("no", "false", "f", "n", "0"):
+        return False
+    else:
+        raise ValueError(
+            f"Truthy value expected: got {v} but expected one of yes/no, true/false, t/f, y/n, 1/0 (case insensitive)."
+        )
+
+
+def change_scheduler(self, scheduler_type="ddim"):
+    self.orginal_scheduler_config = self.scheduler.config
+    scheduler_type = scheduler_type.lower()
+    if scheduler_type == "flow":
+        scheduler = FlowMatchEulerDiscreteScheduler.from_config(self.orginal_scheduler_config, skip_prk_steps=True)
+    elif scheduler_type == "pndm":
+        scheduler = PNDMScheduler.from_config(self.orginal_scheduler_config, skip_prk_steps=True)
+    elif scheduler_type == "lms":
+        scheduler = LMSDiscreteScheduler.from_config(self.orginal_scheduler_config)
+    elif scheduler_type == "heun":
+        scheduler = HeunDiscreteScheduler.from_config(self.orginal_scheduler_config)
+    elif scheduler_type == "euler":
+        scheduler = EulerDiscreteScheduler.from_config(self.orginal_scheduler_config)
+    elif scheduler_type == "euler-ancestral":
+        scheduler = EulerAncestralDiscreteScheduler.from_config(self.orginal_scheduler_config)
+    elif scheduler_type == "dpm-multi":
+        scheduler = DPMSolverMultistepScheduler.from_config(self.orginal_scheduler_config)
+    elif scheduler_type == "dpm-single":
+        scheduler = DPMSolverSinglestepScheduler.from_config(self.orginal_scheduler_config)
+    elif scheduler_type == "kdpm2-ancestral":
+        scheduler = KDPM2AncestralDiscreteScheduler.from_config(self.orginal_scheduler_config)
+    elif scheduler_type == "kdpm2":
+        scheduler = KDPM2DiscreteScheduler.from_config(self.orginal_scheduler_config)
+    elif scheduler_type == "unipc-multi":
+        scheduler = UniPCMultistepScheduler.from_config(self.orginal_scheduler_config)
+    elif scheduler_type == "ddim":
+        scheduler = DDIMScheduler.from_config(
+            self.orginal_scheduler_config,
+            steps_offset=1,
+            clip_sample=False,
+            set_alpha_to_one=False,
+        )
+    elif scheduler_type == "ddpm":
+        scheduler = DDPMScheduler.from_config(
+            self.orginal_scheduler_config,
+        )
+    elif scheduler_type == "deis-multi":
+        scheduler = DEISMultistepScheduler.from_config(
+            self.orginal_scheduler_config,
+        )
+    else:
+        raise ValueError(f"Scheduler of type {scheduler_type} doesn't exist!")
+    return scheduler
+
+
+def parse_arguments():
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--pretrained_model_name_or_path",
+        type=str,
+        default="stabilityai/stable-diffusion-3-medium-diffusers",
+        help="Path to the `diffusers` checkpoint to convert (either a local directory or on the bos).",
+    )
+    parser.add_argument(
+        "--inference_steps",
+        type=int,
+        default=50,
+        help="The number of unet inference steps.",
+    )
+    parser.add_argument(
+        "--benchmark_steps",
+        type=int,
+        default=10,
+        help="The number of performance benchmark steps.",
+    )
+    parser.add_argument(
+        "--task_name",
+        type=str,
+        default="all",
+        choices=[
+            "text2img",
+            "img2img",
+            "inpaint_legacy",
+            "all",
+        ],
+        help="The task can be one of [text2img, img2img, inpaint_legacy, all]. ",
+    )
+    parser.add_argument(
+        "--parse_prompt_type",
+        type=str,
+        default="raw",
+        choices=[
+            "raw",
+            "lpw",
+        ],
+        help="The parse_prompt_type can be one of [raw, lpw]. ",
+    )
+    parser.add_argument(
+        "--channels_last",
+        type=strtobool,
+        default=False,
+        help="Wheter to use channels_last",
+    )
+    parser.add_argument("--use_fp16", type=strtobool, default=True, help="Wheter to use FP16 mode")
+    parser.add_argument("--tf32", type=strtobool, default=True, help="tf32")
+    parser.add_argument("--compile", type=strtobool, default=False, help="compile")
+    parser.add_argument(
+        "--attention_type",
+        type=str,
+        default="sdp",
+        choices=[
+            "raw",
+            "sdp",
+        ],
+        help="attention_type.",
+    )
+    parser.add_argument("--device_id", type=int, default=0, help="The selected gpu id. -1 means use cpu")
+    parser.add_argument(
+        "--scheduler",
+        type=str,
+        default="euler-ancestral",
+        choices=[
+            "flow",
+            "pndm",
+            "lms",
+            "euler",
+            "euler-ancestral",
+            "dpm-multi",
+            "dpm-single",
+            "unipc-multi",
+            "ddim",
+            "ddpm",
+            "deis-multi",
+            "heun",
+            "kdpm2-ancestral",
+            "kdpm2",
+        ],
+        help="The scheduler type of stable diffusion.",
+    )
+    parser.add_argument("--height", type=int, default=512, help="Height of input image")
+    parser.add_argument("--width", type=int, default=512, help="Width of input image")
+    parser.add_argument("--strength", type=float, default=1.0, help="Strength for img2img / inpaint")
+    return parser.parse_args()
+
+
+def attn_processors(self):
+    processors = {}
+
+    def fn_recursive_add_processors(name: str, module, processors):
+        if hasattr(module, "set_processor"):
+            processors[f"{name}.processor"] = module.processor
+
+        for sub_name, child in module.named_children():
+            fn_recursive_add_processors(f"{name}.{sub_name}", child, processors)
+
+        return processors
+
+    for name, module in self.named_children():
+        fn_recursive_add_processors(name, module, processors)
+
+    return processors
+
+
+def set_attn_processor(self, processor):
+    count = len(attn_processors(self).keys())
+
+    if isinstance(processor, dict) and len(processor) != count:
+        raise ValueError(
+            f"A dict of processors was passed, but the number of processors {len(processor)} does not match the"
+            f" number of attention layers: {count}. Please make sure to pass {count} processor classes."
+        )
+
+    def fn_recursive_attn_processor(name: str, module, processor):
+        if hasattr(module, "set_processor"):
+            if not isinstance(processor, dict):
+                module.set_processor(processor)
+            else:
+                module.set_processor(processor.pop(f"{name}.processor"))
+
+        for sub_name, child in module.named_children():
+            fn_recursive_attn_processor(f"{name}.{sub_name}", child, processor)
+
+    for name, module in self.named_children():
+        fn_recursive_attn_processor(name, module, processor)
+
+
+def main(args):
+    if args.tf32:
+        torch.backends.cuda.matmul.allow_tf32 = True
+    else:
+        torch.backends.cuda.matmul.allow_tf32 = False
+
+    seed = 1024
+    torch_dtype = torch.float16 if args.use_fp16 else torch.float32
+    pipe = StableDiffusion3Pipeline.from_pretrained(
+        args.pretrained_model_name_or_path,
+        safety_checker=None,
+        feature_extractor=None,
+        requires_safety_checker=False,
+        torch_dtype=torch_dtype,
+    )
+    scheduler = change_scheduler(pipe, args.scheduler)
+    pipe.scheduler = scheduler
+    if args.device_id >= 0:
+        pipe.to(f"cuda:{args.device_id}")
+
+    if args.attention_type == "all":
+        args.attention_type = ["raw", "sdp"]
+    else:
+        args.attention_type = [args.attention_type]
+
+    for attention_type in args.attention_type:
+        # attn_prrocessor_cls = AttnProcessor if attention_type == "raw" else AttnProcessor2_0
+        # if attention_type == "sdp":
+        #     torch.nn.functional.scaled_dot_product_attention = torch.nn.functional.scaled_dot_product_attention_
+        # set_attn_processor(pipe.transformer, attn_prrocessor_cls())
+        # set_attn_processor(pipe.vae, attn_prrocessor_cls())
+
+        # if args.channels_last:
+        #     pipe.transformer.to(memory_format=torch.channels_last)
+
+        # if args.compile:
+        #     print("Run torch compile")
+        #     pipe.unet = torch.compile(pipe.transformer, mode="reduce-overhead", fullgraph=True)
+
+        width = args.width
+        height = args.height
+        pipe.set_progress_bar_config(disable=False)
+
+        folder = f"torch_attn_{attention_type}_fp16" if args.use_fp16 else f"torch_attn_{attention_type}_fp32"
+        os.makedirs(folder, exist_ok=True)
+        if args.task_name in ["text2img", "all"]:
+            init_image = load_image(
+                "https://paddlenlp.bj.bcebos.com/models/community/junnyu/develop/control_bird_canny_demo.png"
+            )
+            # text2img
+            prompt = "bird"
+            time_costs = []
+            # warmup
+            pipe(
+                prompt,
+                num_inference_steps=10,
+                height=height,
+                width=width,
+            )
+            print("==> Test text2img performance.")
+            for step in trange(args.benchmark_steps):
+                start = time.time()
+                torch.cuda.manual_seed(seed)
+                images = pipe(
+                    prompt,
+                    num_inference_steps=args.inference_steps,
+                    height=height,
+                    width=width,
+                ).images
+                latency = time.time() - start
+                time_costs += [latency]
+                # print(f"No {step:3d} time cost: {latency:2f} s")
+            print(
+                f"Attention type: {attention_type}, "
+                f"Use fp16: {'true' if args.use_fp16 else 'false'}, "
+                f"Mean iter/sec: {1 / (np.mean(time_costs) / args.inference_steps):2f} it/s, "
+                f"Mean latency: {np.mean(time_costs):2f} s, p50 latency: {np.percentile(time_costs, 50):2f} s, "
+                f"p90 latency: {np.percentile(time_costs, 90):2f} s, p95 latency: {np.percentile(time_costs, 95):2f} s."
+            )
+            images[0].save(f"{folder}/text2img.png")
+
+
+
+if __name__ == "__main__":
+    args = parse_arguments()
+    main(args)
diff --git a/VLMEvalKit_old/PaddleMIX/ppdiffusers/deploy/sd3/scripts/benchmark_paddle.sh b/VLMEvalKit_old/PaddleMIX/ppdiffusers/deploy/sd3/scripts/benchmark_paddle.sh
new file mode 100644
index 0000000000000000000000000000000000000000..a0c2d8d45763db9d01e9a0245c02d55c6c0925ae
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/ppdiffusers/deploy/sd3/scripts/benchmark_paddle.sh
@@ -0,0 +1,32 @@
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# attention raw fp16
+python infer_dygraph_paddle.py --scheduler "flow" --task_name all --attention_type raw --use_fp16 True --inference_steps 50 --height 1024 --width 1024 --benchmark_steps 10
+
+# attention cutlass fp16
+python infer_dygraph_paddle.py --scheduler "flow" --task_name all --attention_type cutlass --use_fp16 True --inference_steps 50 --height 1024 --width 1024 --benchmark_steps 10
+
+# attention flash fp16
+python infer_dygraph_paddle.py --scheduler "flow" --task_name all --attention_type flash --use_fp16 True --inference_steps 50 --height 1024 --width 1024 --benchmark_steps 10
+
+
+# attention raw fp32
+python infer_dygraph_paddle.py --scheduler "flow" --task_name all --attention_type raw --use_fp16 False --inference_steps 50 --height 1024 --width 1024 --benchmark_steps 10
+
+# attention cutlass fp32
+python infer_dygraph_paddle.py --scheduler "flow" --task_name all --attention_type cutlass --use_fp16 False --inference_steps 50 --height 1024 --width 1024 --benchmark_steps 10
+
+# attention flash fp32
+python infer_dygraph_paddle.py --scheduler "flow" --task_name all --attention_type flash --use_fp16 False --inference_steps 50 --height 1024 --width 1024 --benchmark_steps 10
diff --git a/VLMEvalKit_old/PaddleMIX/ppdiffusers/deploy/sd3/scripts/benchmark_torch.sh b/VLMEvalKit_old/PaddleMIX/ppdiffusers/deploy/sd3/scripts/benchmark_torch.sh
new file mode 100644
index 0000000000000000000000000000000000000000..020c54969a75651f919585dab0e67beaf016306e
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/ppdiffusers/deploy/sd3/scripts/benchmark_torch.sh
@@ -0,0 +1,21 @@
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# sd3 do ot supprot attention raw
+
+# attention sdp
+python infer_dygraph_torch.py --scheduler "flow" --task_name all --attention_type sdp --use_fp16 True --inference_steps 50 --height 1024 --width 1024 --benchmark_steps 10 
+
+# attention sdp fp32
+python infer_dygraph_torch.py --scheduler "flow" --task_name all --attention_type sdp --use_fp16 False --inference_steps 50 --height 1024 --width 1024 --benchmark_steps 10 
\ No newline at end of file
diff --git a/VLMEvalKit_old/PaddleMIX/ppdiffusers/deploy/sd3/text_to_image_generation-stable_diffusion_3.py b/VLMEvalKit_old/PaddleMIX/ppdiffusers/deploy/sd3/text_to_image_generation-stable_diffusion_3.py
new file mode 100644
index 0000000000000000000000000000000000000000..61d490d683af75b2fc0af87435f7656e9e6d9b42
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/ppdiffusers/deploy/sd3/text_to_image_generation-stable_diffusion_3.py
@@ -0,0 +1,149 @@
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import argparse
+import os
+
+import paddle
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description=" Use PaddleMIX to accelerate the Stable Diffusion3 image generation model."
+    )
+    parser.add_argument(
+        "--benchmark",
+        type=(lambda x: str(x).lower() in ["true", "1", "yes"]),
+        default=False,
+        help="if set to True, measure inference performance",
+    )
+    parser.add_argument(
+        "--inference_optimize",
+        type=(lambda x: str(x).lower() in ["true", "1", "yes"]),
+        default=False,
+        help="If set to True, all optimizations except Triton are enabled.",
+    )
+
+    parser.add_argument("--height", type=int, default=512, help="Height of the generated image.")
+    parser.add_argument("--width", type=int, default=512, help="Width of the generated image.")
+    parser.add_argument("--num-inference-steps", type=int, default=50, help="Number of inference steps.")
+    parser.add_argument("--dtype", type=str, default="float32", help="Inference data types.")
+    parser.add_argument(
+        "--mp_size", type=int, default=1, help="This size refers to the degree of parallelism using model parallel."
+    )
+    parser.add_argument(
+        "--dp_size", type=int, default=1, help="This size refers to the degree of parallelism using data parallel."
+    )
+
+    return parser.parse_args()
+
+
+args = parse_args()
+
+if args.inference_optimize:
+    os.environ["INFERENCE_OPTIMIZE"] = "True"
+    os.environ["INFERENCE_OPTIMIZE_TRITON"] = "True"
+    os.environ["INFERENCE_MP_SIZE"] = str(args.mp_size)
+    os.environ["INFERENCE_DP_SIZE"] = str(args.dp_size)
+if args.dtype == "float32":
+    inference_dtype = paddle.float32
+elif args.dtype == "float16":
+    inference_dtype = paddle.float16
+
+
+import paddle.distributed as dist
+import paddle.distributed.fleet as fleet
+
+if args.mp_size > 1 or args.dp_size > 1:
+    strategy = fleet.DistributedStrategy()
+    model_parallel_size = args.mp_size
+    data_parallel_size = args.dp_size
+    strategy.hybrid_configs = {"dp_degree": data_parallel_size, "mp_degree": model_parallel_size, "pp_degree": 1}
+    fleet.init(is_collective=True, strategy=strategy)
+    hcg = fleet.get_hybrid_communicate_group()
+    mp_id = hcg.get_model_parallel_rank()
+    dp_id = hcg.get_data_parallel_rank()
+    rank_id = dist.get_rank()
+    mp_degree = hcg.get_model_parallel_world_size()
+    dp_degree = hcg.get_data_parallel_world_size()
+    assert mp_degree == args.mp_size
+    assert dp_degree == args.dp_size
+
+    # this is for triton kernel cache for dynamic graph
+    # os.environ["TRITON_KERNEL_CACHE_DIR"] = f"./tmp/sd3_parallel/{rank_id}"
+
+import datetime
+
+from ppdiffusers import StableDiffusion3Pipeline
+
+pipe = StableDiffusion3Pipeline.from_pretrained(
+    "stabilityai/stable-diffusion-3-medium-diffusers",
+    paddle_dtype=inference_dtype,
+)
+
+pipe.transformer = paddle.incubate.jit.inference(
+    pipe.transformer,
+    save_model_dir="./tmp/sd3",
+    enable_new_ir=True,
+    cache_static_model=True,
+    exp_enable_use_cutlass=True,
+    delete_pass_lists=["add_norm_fuse_pass"],
+)
+
+generator = paddle.Generator().manual_seed(42)
+prompt = "A cat holding a sign that says hello world"
+
+
+image = pipe(
+    prompt, num_inference_steps=args.num_inference_steps, width=args.width, height=args.height, generator=generator
+).images[0]
+
+if args.benchmark:
+    # warmup
+    for i in range(3):
+        image = pipe(
+            prompt,
+            num_inference_steps=args.num_inference_steps,
+            width=args.width,
+            height=args.height,
+            generator=generator,
+        ).images[0]
+
+    repeat_times = 10
+    sumtime = 0.0
+    for i in range(repeat_times):
+        paddle.device.synchronize()
+        starttime = datetime.datetime.now()
+        image = pipe(
+            prompt,
+            num_inference_steps=args.num_inference_steps,
+            width=args.width,
+            height=args.height,
+            generator=generator,
+        ).images[0]
+        paddle.device.synchronize()
+        endtime = datetime.datetime.now()
+        duringtime = endtime - starttime
+        duringtime = duringtime.seconds * 1000 + duringtime.microseconds / 1000.0
+        sumtime += duringtime
+        print("SD3 end to end time : ", duringtime, "ms")
+
+    print("SD3 ave end to end time : ", sumtime / repeat_times, "ms")
+
+    cuda_mem_after_used = paddle.device.cuda.max_memory_allocated() / (1024**3)
+    print(f"Max used CUDA memory : {cuda_mem_after_used:.3f} GiB")
+
+
+rank_id = dist.get_rank()
+if rank_id == 0:
+    image.save("text_to_image_generation-stable_diffusion_3-result.png")
diff --git a/VLMEvalKit_old/PaddleMIX/ppdiffusers/deploy/sdxl/README.md b/VLMEvalKit_old/PaddleMIX/ppdiffusers/deploy/sdxl/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..991712e0582c5dad1598e450eaef1c0b09873be1
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/ppdiffusers/deploy/sdxl/README.md
@@ -0,0 +1,44 @@
+# PaddleInfer Stable Diffusion XL 模型高性能部署
+
+ **目录**
+   * [环境依赖](#环境依赖)
+   * [快速体验](#快速体验)
+       * [文图生成（Text-to-Image Generation）](#文图生成)
+       * [文本引导的图像变换（Image-to-Image Text-Guided Generation）](#文本引导的图像变换)
+       * [文本引导的图像编辑（Text-Guided Image Inpainting）](#文本引导的图像编辑)
+
+⚡️[PaddleInfer]是一款全场景、易用灵活、极致高效的AI推理部署工具，为开发者提供多硬件、多推理引擎后端的部署能力。开发者只需调用一行代码即可随意切换硬件、推理引擎后端。本示例展现如何通过 PaddleInfer 将我们 PPDiffusers 训练好的 Stable Diffusion XL模型进行多硬件、多推理引擎后端高性能部署。
+
+<a name="环境依赖"></a>
+
+## 环境依赖
+
+在示例中使用了 PaddleInfer，需要执行以下命令安装依赖。
+
+```shell
+python -m pip install paddlepaddle-gpu==2.6.0.post112 -f https://www.paddlepaddle.org.cn/whl/linux/mkl/avx/stable.html
+```
+
+<a name="快速体验"></a>
+
+## 静态图模型导出 (static model export)
+```
+export USE_PPXFORMERS=False
+python export_model.py --pretrained_model_name_or_path stabilityai/stable-diffusion-xl-base-1.0 --output_path static_model/stable-diffusion-xl-base-1.0
+```
+导出模型在static_model/stable-diffusion-xl-base-1.0目录下。
+
+### 文图生成（Text-to-Image Generation）
+```
+python infer.py --model_dir static_model/stable-diffusion-xl-base-1.0 --scheduler "preconfig-euler-ancestral" --backend paddle --device gpu --task_name text2img
+```
+
+### 文本引导的图像变换（Image-to-Image Text-Guided Generation）
+```
+python infer.py --model_dir static_model/stable-diffusion-xl-base-1.0 --scheduler "preconfig-euler-ancestral" --backend paddle --device gpu --task_name img2img
+```
+
+### 文本引导的图像编辑（Text-Guided Image Inpainting）
+```
+python infer.py --model_dir static_model/stable-diffusion-xl-base-1.0 --scheduler "preconfig-euler-ancestral" --backend paddle --device gpu --task_name inpaint
+```
diff --git a/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/experimental/rl/__init__.py b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/experimental/rl/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..afe6428281af43f57efb59b68bd1f918bf3bbd4c
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/experimental/rl/__init__.py
@@ -0,0 +1,15 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .value_guided_sampling import ValueGuidedRLPipeline
diff --git a/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/experimental/rl/value_guided_sampling.py b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/experimental/rl/value_guided_sampling.py
new file mode 100644
index 0000000000000000000000000000000000000000..7024c5c94358fb40b62f653b1d7891dff12cd762
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/experimental/rl/value_guided_sampling.py
@@ -0,0 +1,153 @@
+# Copyright 2023 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+import paddle
+
+from ...models.unet_1d import UNet1DModel
+from ...pipelines import DiffusionPipeline
+from ...utils.dummy_paddle_objects import DDPMScheduler
+from ...utils.paddle_utils import randn_tensor
+
+
+class ValueGuidedRLPipeline(DiffusionPipeline):
+    r"""
+    Pipeline for value-guided sampling from a diffusion model trained to predict sequences of states.
+
+    This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods
+    implemented for all pipelines (downloading, saving, running on a particular device, etc.).
+
+    Parameters:
+        value_function ([`UNet1DModel`]):
+            A specialized UNet for fine-tuning trajectories base on reward.
+        unet ([`UNet1DModel`]):
+            UNet architecture to denoise the encoded trajectories.
+        scheduler ([`SchedulerMixin`]):
+            A scheduler to be used in combination with `unet` to denoise the encoded trajectories. Default for this
+            application is [`DDPMScheduler`].
+        env ():
+            An environment following the OpenAI gym API to act in. For now only Hopper has pretrained models.
+    """
+
+    def __init__(
+        self,
+        value_function: UNet1DModel,
+        unet: UNet1DModel,
+        scheduler: DDPMScheduler,
+        env,
+    ):
+        super().__init__()
+        self.value_function = value_function
+        self.unet = unet
+        self.scheduler = scheduler
+        self.env = env
+        self.data = env.get_dataset()
+        self.means = {}
+        for key in self.data.keys():
+            try:
+                self.means[key] = self.data[key].mean()
+            except Exception:
+                pass
+        self.stds = {}
+        for key in self.data.keys():
+            try:
+                self.stds[key] = self.data[key].std()
+            except Exception:
+                pass
+        self.state_dim = env.observation_space.shape[0]
+        self.action_dim = env.action_space.shape[0]
+
+    def normalize(self, x_in, key):
+        return (x_in - self.means[key]) / self.stds[key]
+
+    def de_normalize(self, x_in, key):
+        return x_in * self.stds[key] + self.means[key]
+
+    def to_paddle(self, x_in):
+        if isinstance(x_in, dict):
+            return {k: self.to_paddle(v) for k, v in x_in.items()}
+        elif paddle.is_tensor(x_in):
+            return x_in
+        return paddle.to_tensor(x_in)
+
+    def reset_x0(self, x_in, cond, act_dim):
+        for key, val in cond.items():
+            x_in[:, key, act_dim:] = val.clone()
+        return x_in
+
+    def run_diffusion(self, x, conditions, n_guide_steps, scale):
+        batch_size = x.shape[0]
+        y = None
+        for i in self.progress_bar(self.scheduler.timesteps):
+            # create batch of timesteps to pass into model
+            timesteps = paddle.full((batch_size,), i, dtype=paddle.int64)
+            for _ in range(n_guide_steps):
+                with paddle.set_grad_enabled(True):
+                    x.stop_gradient = False
+
+                    # permute to match dimension for pre-trained models
+                    y = self.value_function(x.transpose([0, 2, 1]), timesteps).sample
+                    grad = paddle.autograd.grad([y.sum()], [x])[0]
+
+                    posterior_variance = self.scheduler._get_variance(i)
+                    model_std = paddle.exp(0.5 * posterior_variance)
+                    grad = model_std * grad
+
+                grad[timesteps < 2] = 0
+                x = x.detach()
+                x = x + scale * grad
+                x = self.reset_x0(x, conditions, self.action_dim)
+
+            prev_x = self.unet(x.transpose([0, 2, 1]), timesteps).sample.transpose([0, 2, 1])
+
+            # TODO: verify deprecation of this kwarg
+            x = self.scheduler.step(prev_x, i, x, predict_epsilon=False)["prev_sample"]
+
+            # apply conditions to the trajectory (set the initial state)
+            x = self.reset_x0(x, conditions, self.action_dim)
+            x = self.to_paddle(x)
+        return x, y
+
+    def __call__(self, obs, batch_size=64, planning_horizon=32, n_guide_steps=2, scale=0.1):
+        # normalize the observations and create  batch dimension
+        obs = self.normalize(obs, "observations")
+        obs = obs[None].repeat(batch_size, axis=0)
+
+        conditions = {0: self.to_paddle(obs)}
+        shape = (batch_size, planning_horizon, self.state_dim + self.action_dim)
+
+        # generate initial noise and apply our conditions (to make the trajectories start at current state)
+        x1 = randn_tensor(shape, dtype=self.unet.dtype)
+        x = self.reset_x0(x1, conditions, self.action_dim)
+        x = self.to_paddle(x)
+
+        # run the diffusion process
+        x, y = self.run_diffusion(x, conditions, n_guide_steps, scale)
+
+        # sort output trajectories by value
+        sorted_idx = paddle.argsort(y, 0, descending=True).squeeze()
+        sorted_values = x[sorted_idx]
+        actions = sorted_values[:, :, : self.action_dim]
+        actions = actions.detach().cpu().numpy()
+        denorm_actions = self.de_normalize(actions, key="actions")
+
+        # select the action with the highest value
+        if y is not None:
+            selected_index = 0
+        else:
+            # if we didn't run value guiding, select a random action
+            selected_index = np.random.randint(0, batch_size)
+
+        denorm_actions = denorm_actions[selected_index, 0]
+        return denorm_actions
diff --git a/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/animate_anyone/pose_guider.py b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/animate_anyone/pose_guider.py
new file mode 100644
index 0000000000000000000000000000000000000000..7bd36a6caa677ae6910f01acbb87777d8cfc1430
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/animate_anyone/pose_guider.py
@@ -0,0 +1,68 @@
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Tuple
+
+import paddle
+
+from ppdiffusers.models.animate_anyone.motion_module import zero_module
+from ppdiffusers.models.animate_anyone.resnet import InflatedConv3d
+from ppdiffusers.models.modeling_utils import ContextManagers, ModelMixin
+
+
+class PoseGuider(ModelMixin):
+    def __init__(
+        self,
+        conditioning_embedding_channels: int,
+        conditioning_channels: int = 3,
+        block_out_channels: Tuple[int] = (16, 32, 64, 128),
+        weight_dtype=None,
+    ):
+        super().__init__()
+
+        init_contexts = []
+        if weight_dtype is not None:
+            init_contexts.append(paddle.dtype_guard(weight_dtype))
+
+        with ContextManagers(init_contexts):
+            self.conv_in = InflatedConv3d(conditioning_channels, block_out_channels[0], kernel_size=3, padding=1)
+
+            self.blocks = paddle.nn.LayerList(sublayers=[])
+
+            for i in range(len(block_out_channels) - 1):
+                channel_in = block_out_channels[i]
+                channel_out = block_out_channels[i + 1]
+                self.blocks.append(InflatedConv3d(channel_in, channel_in, kernel_size=3, padding=1))
+                self.blocks.append(InflatedConv3d(channel_in, channel_out, kernel_size=3, padding=1, stride=2))
+
+            self.conv_out = zero_module(
+                InflatedConv3d(
+                    block_out_channels[-1],
+                    conditioning_embedding_channels,
+                    kernel_size=3,
+                    padding=1,
+                )
+            )
+
+    def forward(self, conditioning):
+        embedding = self.conv_in(conditioning)
+        embedding = paddle.nn.functional.silu(x=embedding)
+
+        for block in self.blocks:
+            embedding = block(embedding)
+            embedding = paddle.nn.functional.silu(x=embedding)
+
+        embedding = self.conv_out(embedding)
+
+        return embedding
diff --git a/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/animate_anyone/resnet.py b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/animate_anyone/resnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..1aff93940aef9e99752f86fada1ce7cf8f96d69d
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/animate_anyone/resnet.py
@@ -0,0 +1,235 @@
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Adapted from https://github.com/huggingface/ppdiffusers/blob/main/src/ppdiffusers/models/resnet.py
+
+import paddle
+from einops import rearrange
+
+
+class InflatedConv3d(paddle.nn.Conv2D):
+    def forward(self, x):
+        video_length = x.shape[2]
+        x = rearrange(x, "b c f h w -> (b f) c h w")
+        x = super().forward(x)
+
+        x = rearrange(x, "(b f) c h w -> b c f h w", f=video_length)
+
+        return x
+
+
+class InflatedGroupNorm(paddle.nn.GroupNorm):
+    def forward(self, x):
+        video_length = x.shape[2]
+
+        x = rearrange(x, "b c f h w -> (b f) c h w")
+        x = super().forward(x)
+        x = rearrange(x, "(b f) c h w -> b c f h w", f=video_length)
+
+        return x
+
+
+class Upsample3D(paddle.nn.Layer):
+    def __init__(
+        self,
+        channels,
+        use_conv=False,
+        use_conv_transpose=False,
+        out_channels=None,
+        name="conv",
+    ):
+        super().__init__()
+        self.channels = channels
+        self.out_channels = out_channels or channels
+        self.use_conv = use_conv
+        self.use_conv_transpose = use_conv_transpose
+        self.name = name
+
+        if use_conv_transpose:
+            raise NotImplementedError
+        elif use_conv:
+            self.conv = InflatedConv3d(self.channels, self.out_channels, 3, padding=1)
+
+    def forward(self, hidden_states, output_size=None):
+        assert hidden_states.shape[1] == self.channels
+
+        if self.use_conv_transpose:
+            raise NotImplementedError
+
+        # Cast to float32 to as 'upsample_nearest2d_out_frame' op does not support bfloat16
+        dtype = hidden_states.dtype
+        if dtype == "bfloat16":
+            hidden_states = hidden_states.to("float32")
+
+        # upsample_nearest_nhwc fails with large batch sizes. see https://github.com/huggingface/ppdiffusers/issues/984
+        if hidden_states.shape[0] >= 64:
+            hidden_states = hidden_states.contiguous()
+
+        if output_size is None:
+            hidden_states = paddle.nn.functional.interpolate(
+                x=hidden_states, scale_factor=[1.0, 2.0, 2.0], mode="nearest", data_format="NCDHW"
+            )
+        else:
+            hidden_states = paddle.nn.functional.interpolate(
+                x=hidden_states, size=output_size, mode="nearest", data_format="NCDHW"
+            )
+
+        # If the input is bfloat16, we cast back to bfloat16
+        if dtype == "bfloat16":
+            hidden_states = hidden_states.to(dtype)
+
+        hidden_states = self.conv(hidden_states)
+
+        return hidden_states
+
+
+class Downsample3D(paddle.nn.Layer):
+    def __init__(self, channels, use_conv=False, out_channels=None, padding=1, name="conv"):
+        super().__init__()
+        self.channels = channels
+        self.out_channels = out_channels or channels
+        self.use_conv = use_conv
+        self.padding = padding
+        stride = 2
+        self.name = name
+
+        if use_conv:
+            self.conv = InflatedConv3d(self.channels, self.out_channels, 3, stride=stride, padding=padding)
+        else:
+            raise NotImplementedError
+
+    def forward(self, hidden_states):
+        assert hidden_states.shape[1] == self.channels
+        if self.use_conv and self.padding == 0:
+            raise NotImplementedError
+
+        assert hidden_states.shape[1] == self.channels
+        hidden_states = self.conv(hidden_states)
+
+        return hidden_states
+
+
+class ResnetBlock3D(paddle.nn.Layer):
+    def __init__(
+        self,
+        *,
+        in_channels,
+        out_channels=None,
+        conv_shortcut=False,
+        dropout=0.0,
+        temb_channels=512,
+        groups=32,
+        groups_out=None,
+        pre_norm=True,
+        eps=1e-6,
+        non_linearity="swish",
+        time_embedding_norm="default",
+        output_scale_factor=1.0,
+        use_in_shortcut=None,
+        use_inflated_groupnorm=None,
+    ):
+        super().__init__()
+        self.pre_norm = pre_norm
+        self.pre_norm = True
+        self.in_channels = in_channels
+        out_channels = in_channels if out_channels is None else out_channels
+        self.out_channels = out_channels
+        self.use_conv_shortcut = conv_shortcut
+        self.time_embedding_norm = time_embedding_norm
+        self.output_scale_factor = output_scale_factor
+
+        if groups_out is None:
+            groups_out = groups
+
+        assert use_inflated_groupnorm is not None
+        if use_inflated_groupnorm:
+            self.norm1 = InflatedGroupNorm(num_groups=groups, num_channels=in_channels, epsilon=eps)
+        else:
+
+            self.norm1 = paddle.nn.GroupNorm(
+                num_groups=groups, num_channels=in_channels, epsilon=eps, weight_attr=True, bias_attr=True
+            )
+
+        self.conv1 = InflatedConv3d(in_channels, out_channels, kernel_size=3, stride=1, padding=1)
+
+        if temb_channels is not None:
+            if self.time_embedding_norm == "default":
+                time_emb_proj_out_channels = out_channels
+            elif self.time_embedding_norm == "scale_shift":
+                time_emb_proj_out_channels = out_channels * 2
+            else:
+                raise ValueError(f"unknown time_embedding_norm : {self.time_embedding_norm} ")
+
+            self.time_emb_proj = paddle.nn.Linear(in_features=temb_channels, out_features=time_emb_proj_out_channels)
+        else:
+            self.time_emb_proj = None
+
+        if use_inflated_groupnorm:
+            self.norm2 = InflatedGroupNorm(num_groups=groups_out, num_channels=out_channels, epsilon=eps)
+        else:
+            self.norm2 = paddle.nn.GroupNorm(
+                num_groups=groups_out, num_channels=out_channels, epsilon=eps, weight_attr=True, bias_attr=True
+            )
+        self.dropout = paddle.nn.Dropout(p=dropout)
+        self.conv2 = InflatedConv3d(out_channels, out_channels, kernel_size=3, stride=1, padding=1)
+
+        if non_linearity == "swish":
+            self.nonlinearity = lambda x: paddle.nn.functional.silu(x=x)
+        elif non_linearity == "mish":
+            self.nonlinearity = Mish()
+        elif non_linearity == "silu":
+            self.nonlinearity = paddle.nn.Silu()
+
+        self.use_in_shortcut = self.in_channels != self.out_channels if use_in_shortcut is None else use_in_shortcut
+
+        self.conv_shortcut = None
+        if self.use_in_shortcut:
+            self.conv_shortcut = InflatedConv3d(in_channels, out_channels, kernel_size=1, stride=1, padding=0)
+
+    def forward(self, input_tensor, temb):
+        hidden_states = input_tensor
+
+        hidden_states = self.norm1(hidden_states)
+        hidden_states = self.nonlinearity(hidden_states)
+
+        hidden_states = self.conv1(hidden_states)
+
+        if temb is not None:
+            temb = self.time_emb_proj(self.nonlinearity(temb))[:, :, None, None, None]
+
+        if temb is not None and self.time_embedding_norm == "default":
+            hidden_states = hidden_states + temb
+
+        hidden_states = self.norm2(hidden_states)
+
+        if temb is not None and self.time_embedding_norm == "scale_shift":
+            scale, shift = paddle.chunk(x=temb, chunks=2, axis=1)
+            hidden_states = hidden_states * (1 + scale) + shift
+
+        hidden_states = self.nonlinearity(hidden_states)
+
+        hidden_states = self.dropout(hidden_states)
+        hidden_states = self.conv2(hidden_states)
+
+        if self.conv_shortcut is not None:
+            input_tensor = self.conv_shortcut(input_tensor)
+
+        output_tensor = (input_tensor + hidden_states) / self.output_scale_factor
+
+        return output_tensor
+
+
+class Mish(paddle.nn.Layer):
+    def forward(self, hidden_states):
+        return hidden_states * paddle.nn.functional.tanh(x=paddle.nn.functional.softplus(x=hidden_states))
diff --git a/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/animate_anyone/transformer_3d.py b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/animate_anyone/transformer_3d.py
new file mode 100644
index 0000000000000000000000000000000000000000..3f294f9afec90a6699bda8b9f7dc994cbad46654
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/animate_anyone/transformer_3d.py
@@ -0,0 +1,155 @@
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from dataclasses import dataclass
+from typing import Optional
+
+import paddle
+from einops import rearrange, repeat
+
+from ppdiffusers.configuration_utils import ConfigMixin, register_to_config
+from ppdiffusers.models import ModelMixin
+from ppdiffusers.utils import BaseOutput
+
+from .attention import TemporalBasicTransformerBlock
+
+
+@dataclass
+class Transformer3DModelOutput(BaseOutput):
+    sample: paddle.Tensor
+
+
+class Transformer3DModel(ModelMixin, ConfigMixin):
+    _supports_gradient_checkpointing = True
+
+    @register_to_config
+    def __init__(
+        self,
+        num_attention_heads: int = 16,
+        attention_head_dim: int = 88,
+        in_channels: Optional[int] = None,
+        num_layers: int = 1,
+        dropout: float = 0.0,
+        norm_num_groups: int = 32,
+        cross_attention_dim: Optional[int] = None,
+        attention_bias: bool = False,
+        activation_fn: str = "geglu",
+        num_embeds_ada_norm: Optional[int] = None,
+        use_linear_projection: bool = False,
+        only_cross_attention: bool = False,
+        upcast_attention: bool = False,
+        unet_use_cross_frame_attention=None,
+        unet_use_temporal_attention=None,
+    ):
+        super().__init__()
+        self.use_linear_projection = use_linear_projection
+        self.num_attention_heads = num_attention_heads
+        self.attention_head_dim = attention_head_dim
+        inner_dim = num_attention_heads * attention_head_dim
+
+        # Define input layers
+        self.in_channels = in_channels
+
+        self.norm = paddle.nn.GroupNorm(
+            num_groups=norm_num_groups, num_channels=in_channels, epsilon=1e-06, weight_attr=True, bias_attr=True
+        )
+        if use_linear_projection:
+            self.proj_in = paddle.nn.Linear(in_features=in_channels, out_features=inner_dim)
+        else:
+            self.proj_in = paddle.nn.Conv2D(
+                in_channels=in_channels, out_channels=inner_dim, kernel_size=1, stride=1, padding=0
+            )
+        self.transformer_blocks = paddle.nn.LayerList(
+            sublayers=[
+                TemporalBasicTransformerBlock(
+                    inner_dim,
+                    num_attention_heads,
+                    attention_head_dim,
+                    dropout=dropout,
+                    cross_attention_dim=cross_attention_dim,
+                    activation_fn=activation_fn,
+                    num_embeds_ada_norm=num_embeds_ada_norm,
+                    attention_bias=attention_bias,
+                    only_cross_attention=only_cross_attention,
+                    upcast_attention=upcast_attention,
+                    unet_use_cross_frame_attention=unet_use_cross_frame_attention,
+                    unet_use_temporal_attention=unet_use_temporal_attention,
+                )
+                for d in range(num_layers)
+            ]
+        )
+        if use_linear_projection:
+            self.proj_out = paddle.nn.Linear(in_features=in_channels, out_features=inner_dim)
+        else:
+            self.proj_out = paddle.nn.Conv2D(
+                in_channels=inner_dim, out_channels=in_channels, kernel_size=1, stride=1, padding=0
+            )
+
+        self.gradient_checkpointing = False
+
+    def _set_gradient_checkpointing(self, module, value=False):
+        if hasattr(module, "gradient_checkpointing"):
+            module.gradient_checkpointing = value
+
+    def forward(
+        self,
+        hidden_states,
+        encoder_hidden_states=None,
+        timestep=None,
+        return_dict: bool = True,
+    ):
+        # Input
+        assert hidden_states.dim() == 5, f"Expected hidden_states to have ndim=5, but got ndim={hidden_states.dim()}."
+        video_length = hidden_states.shape[2]
+        hidden_states = rearrange(hidden_states, "b c f h w -> (b f) c h w")
+        if encoder_hidden_states.shape[0] != hidden_states.shape[0]:
+            encoder_hidden_states = repeat(encoder_hidden_states, "b n c -> (b f) n c", f=video_length)
+
+        batch, channel, height, weight = hidden_states.shape
+        residual = hidden_states
+
+        hidden_states = self.norm(hidden_states)
+        if not self.use_linear_projection:
+            hidden_states = self.proj_in(hidden_states)
+            inner_dim = hidden_states.shape[1]
+            hidden_states = hidden_states.transpose(perm=[0, 2, 3, 1]).reshape((batch, height * weight, inner_dim))
+        else:
+            inner_dim = hidden_states.shape[1]
+            hidden_states = hidden_states.transpose(perm=[0, 2, 3, 1]).reshape((batch, height * weight, inner_dim))
+            hidden_states = self.proj_in(hidden_states)
+
+        # Blocks
+        for i, block in enumerate(self.transformer_blocks):
+            hidden_states = block(
+                hidden_states,
+                encoder_hidden_states=encoder_hidden_states,
+                timestep=timestep,
+                video_length=video_length,
+            )
+
+        # Output
+        if not self.use_linear_projection:
+            hidden_states = hidden_states.reshape((batch, height, weight, inner_dim)).transpose(perm=[0, 3, 1, 2])
+            hidden_states = self.proj_out(hidden_states)
+        else:
+            hidden_states = self.proj_out(hidden_states)
+            hidden_states = hidden_states.reshape((batch, height, weight, inner_dim)).transpose(perm=[0, 3, 1, 2])
+
+        output = hidden_states + residual
+
+        output = rearrange(output, "(b f) c h w -> b c f h w", f=video_length)
+        if not return_dict:
+            return (output,)
+
+        return Transformer3DModelOutput(sample=output)
diff --git a/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/animate_anyone/unet_3d.py b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/animate_anyone/unet_3d.py
new file mode 100644
index 0000000000000000000000000000000000000000..c5e5645e7abe55191e5dfe004a3446270aa22df1
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/animate_anyone/unet_3d.py
@@ -0,0 +1,615 @@
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Adapted from https://github.com/guoyww/AnimateDiff/blob/main/animatediff/models/unet_blocks.py
+
+from dataclasses import dataclass
+from os import PathLike
+from pathlib import Path
+from typing import Dict, List, Optional, Tuple, Union
+
+import paddle
+
+from ppdiffusers.configuration_utils import ConfigMixin, register_to_config
+from ppdiffusers.models.attention_processor import AttentionProcessor
+from ppdiffusers.models.embeddings import TimestepEmbedding, Timesteps
+from ppdiffusers.models.modeling_utils import ContextManagers, ModelMixin
+from ppdiffusers.utils import BaseOutput, logging
+
+from .resnet import InflatedConv3d, InflatedGroupNorm
+from .unet_3d_blocks import UNetMidBlock3DCrossAttn, get_down_block, get_up_block
+
+logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
+
+
+@dataclass
+class UNet3DConditionOutput(BaseOutput):
+    sample: paddle.Tensor
+
+
+class UNet3DConditionModel(ModelMixin, ConfigMixin):
+    _supports_gradient_checkpointing = True
+
+    @register_to_config
+    def __init__(
+        self,
+        sample_size: Optional[int] = None,
+        in_channels: int = 4,
+        out_channels: int = 4,
+        center_input_sample: bool = False,
+        flip_sin_to_cos: bool = True,
+        freq_shift: int = 0,
+        down_block_types: Tuple[str] = (
+            "CrossAttnDownBlock3D",
+            "CrossAttnDownBlock3D",
+            "CrossAttnDownBlock3D",
+            "DownBlock3D",
+        ),
+        mid_block_type: str = "UNetMidBlock3DCrossAttn",
+        up_block_types: Tuple[str] = (
+            "UpBlock3D",
+            "CrossAttnUpBlock3D",
+            "CrossAttnUpBlock3D",
+            "CrossAttnUpBlock3D",
+        ),
+        only_cross_attention: Union[bool, Tuple[bool]] = False,
+        block_out_channels: Tuple[int] = (320, 640, 1280, 1280),
+        layers_per_block: int = 2,
+        downsample_padding: int = 1,
+        mid_block_scale_factor: float = 1,
+        act_fn: str = "silu",
+        norm_num_groups: int = 32,
+        norm_eps: float = 1e-5,
+        cross_attention_dim: int = 1280,
+        attention_head_dim: Union[int, Tuple[int]] = 8,
+        dual_cross_attention: bool = False,
+        use_linear_projection: bool = False,
+        class_embed_type: Optional[str] = None,
+        num_class_embeds: Optional[int] = None,
+        upcast_attention: bool = False,
+        resnet_time_scale_shift: str = "default",
+        use_inflated_groupnorm=False,
+        # Additional
+        use_motion_module=False,
+        motion_module_resolutions=(1, 2, 4, 8),
+        motion_module_mid_block=False,
+        motion_module_decoder_only=False,
+        motion_module_type=None,
+        motion_module_kwargs={},
+        unet_use_cross_frame_attention=None,
+        unet_use_temporal_attention=None,
+    ):
+        super().__init__()
+
+        self.sample_size = sample_size
+        time_embed_dim = block_out_channels[0] * 4
+
+        # input
+        self.conv_in = InflatedConv3d(in_channels, block_out_channels[0], kernel_size=3, padding=(1, 1))
+
+        # time
+        self.time_proj = Timesteps(block_out_channels[0], flip_sin_to_cos, freq_shift)
+        timestep_input_dim = block_out_channels[0]
+
+        self.time_embedding = TimestepEmbedding(timestep_input_dim, time_embed_dim)
+
+        # class embedding
+        if class_embed_type is None and num_class_embeds is not None:
+            self.class_embedding = paddle.nn.Embedding(num_embeddings=num_class_embeds, embedding_dim=time_embed_dim)
+        elif class_embed_type == "timestep":
+            self.class_embedding = TimestepEmbedding(timestep_input_dim, time_embed_dim)
+        elif class_embed_type == "identity":
+            self.class_embedding = paddle.nn.Identity(time_embed_dim, time_embed_dim)
+        else:
+            self.class_embedding = None
+
+        self.down_blocks = paddle.nn.LayerList(sublayers=[])
+        self.mid_block = None
+        self.up_blocks = paddle.nn.LayerList(sublayers=[])
+
+        if isinstance(only_cross_attention, bool):
+            only_cross_attention = [only_cross_attention] * len(down_block_types)
+
+        if isinstance(attention_head_dim, int):
+            attention_head_dim = (attention_head_dim,) * len(down_block_types)
+
+        # down
+        output_channel = block_out_channels[0]
+        for i, down_block_type in enumerate(down_block_types):
+            res = 2**i
+            input_channel = output_channel
+            output_channel = block_out_channels[i]
+            is_final_block = i == len(block_out_channels) - 1
+
+            down_block = get_down_block(
+                down_block_type,
+                num_layers=layers_per_block,
+                in_channels=input_channel,
+                out_channels=output_channel,
+                temb_channels=time_embed_dim,
+                add_downsample=not is_final_block,
+                resnet_eps=norm_eps,
+                resnet_act_fn=act_fn,
+                resnet_groups=norm_num_groups,
+                cross_attention_dim=cross_attention_dim,
+                attn_num_head_channels=attention_head_dim[i],
+                downsample_padding=downsample_padding,
+                dual_cross_attention=dual_cross_attention,
+                use_linear_projection=use_linear_projection,
+                only_cross_attention=only_cross_attention[i],
+                upcast_attention=upcast_attention,
+                resnet_time_scale_shift=resnet_time_scale_shift,
+                unet_use_cross_frame_attention=unet_use_cross_frame_attention,
+                unet_use_temporal_attention=unet_use_temporal_attention,
+                use_inflated_groupnorm=use_inflated_groupnorm,
+                use_motion_module=use_motion_module
+                and (res in motion_module_resolutions)
+                and (not motion_module_decoder_only),
+                motion_module_type=motion_module_type,
+                motion_module_kwargs=motion_module_kwargs,
+            )
+            self.down_blocks.append(down_block)
+
+        # mid
+        if mid_block_type == "UNetMidBlock3DCrossAttn":
+            self.mid_block = UNetMidBlock3DCrossAttn(
+                in_channels=block_out_channels[-1],
+                temb_channels=time_embed_dim,
+                resnet_eps=norm_eps,
+                resnet_act_fn=act_fn,
+                output_scale_factor=mid_block_scale_factor,
+                resnet_time_scale_shift=resnet_time_scale_shift,
+                cross_attention_dim=cross_attention_dim,
+                attn_num_head_channels=attention_head_dim[-1],
+                resnet_groups=norm_num_groups,
+                dual_cross_attention=dual_cross_attention,
+                use_linear_projection=use_linear_projection,
+                upcast_attention=upcast_attention,
+                unet_use_cross_frame_attention=unet_use_cross_frame_attention,
+                unet_use_temporal_attention=unet_use_temporal_attention,
+                use_inflated_groupnorm=use_inflated_groupnorm,
+                use_motion_module=use_motion_module and motion_module_mid_block,
+                motion_module_type=motion_module_type,
+                motion_module_kwargs=motion_module_kwargs,
+            )
+        else:
+            raise ValueError(f"unknown mid_block_type : {mid_block_type}")
+
+        # count how many layers upsample the videos
+        self.num_upsamplers = 0
+
+        # up
+        reversed_block_out_channels = list(reversed(block_out_channels))
+        reversed_attention_head_dim = list(reversed(attention_head_dim))
+        only_cross_attention = list(reversed(only_cross_attention))
+        output_channel = reversed_block_out_channels[0]
+        for i, up_block_type in enumerate(up_block_types):
+            res = 2 ** (3 - i)
+            is_final_block = i == len(block_out_channels) - 1
+
+            prev_output_channel = output_channel
+            output_channel = reversed_block_out_channels[i]
+            input_channel = reversed_block_out_channels[min(i + 1, len(block_out_channels) - 1)]
+
+            # add upsample block for all BUT final layer
+            if not is_final_block:
+                add_upsample = True
+                self.num_upsamplers += 1
+            else:
+                add_upsample = False
+
+            up_block = get_up_block(
+                up_block_type,
+                num_layers=layers_per_block + 1,
+                in_channels=input_channel,
+                out_channels=output_channel,
+                prev_output_channel=prev_output_channel,
+                temb_channels=time_embed_dim,
+                add_upsample=add_upsample,
+                resnet_eps=norm_eps,
+                resnet_act_fn=act_fn,
+                resnet_groups=norm_num_groups,
+                cross_attention_dim=cross_attention_dim,
+                attn_num_head_channels=reversed_attention_head_dim[i],
+                dual_cross_attention=dual_cross_attention,
+                use_linear_projection=use_linear_projection,
+                only_cross_attention=only_cross_attention[i],
+                upcast_attention=upcast_attention,
+                resnet_time_scale_shift=resnet_time_scale_shift,
+                unet_use_cross_frame_attention=unet_use_cross_frame_attention,
+                unet_use_temporal_attention=unet_use_temporal_attention,
+                use_inflated_groupnorm=use_inflated_groupnorm,
+                use_motion_module=use_motion_module and (res in motion_module_resolutions),
+                motion_module_type=motion_module_type,
+                motion_module_kwargs=motion_module_kwargs,
+            )
+            self.up_blocks.append(up_block)
+            prev_output_channel = output_channel
+
+        # out
+        if use_inflated_groupnorm:
+            self.conv_norm_out = InflatedGroupNorm(
+                num_channels=block_out_channels[0],
+                num_groups=norm_num_groups,
+                epsilon=norm_eps,
+            )
+        else:
+
+            self.conv_norm_out = paddle.nn.GroupNorm(
+                num_channels=block_out_channels[0], num_groups=norm_num_groups, epsilon=norm_eps
+            )
+        self.conv_act = paddle.nn.Silu()
+        self.conv_out = InflatedConv3d(block_out_channels[0], out_channels, kernel_size=3, padding=1)
+
+    @property
+    # Copied from ppdiffusers.models.unet_2d_condition.UNet2DConditionModel.attn_processors
+    def attn_processors(self) -> Dict[str, AttentionProcessor]:
+        r"""
+        Returns:
+            `dict` of attention processors: A dictionary containing all attention processors used in the model with
+            indexed by its weight name.
+        """
+        # set recursively
+        processors = {}
+
+        def fn_recursive_add_processors(
+            name: str,
+            module: paddle.nn.Layer,
+            processors: Dict[str, AttentionProcessor],
+        ):
+            if hasattr(module, "set_processor"):
+                processors[f"{name}.processor"] = module.processor
+
+            for sub_name, child in module.named_children():
+                if "temporal_transformer" not in sub_name:
+                    fn_recursive_add_processors(f"{name}.{sub_name}", child, processors)
+
+            return processors
+
+        for name, module in self.named_children():
+            if "temporal_transformer" not in name:
+                fn_recursive_add_processors(name, module, processors)
+
+        return processors
+
+    def set_attention_slice(self, slice_size):
+        r"""
+        Enable sliced attention computation.
+
+        When this option is enabled, the attention module will split the input tensor in slices, to compute attention
+        in several steps. This is useful to save some memory in exchange for a small speed decrease.
+
+        Args:
+            slice_size (`str` or `int` or `list(int)`, *optional*, defaults to `"auto"`):
+                When `"auto"`, halves the input to the attention heads, so attention will be computed in two steps. If
+                `"max"`, maxium amount of memory will be saved by running only one slice at a time. If a number is
+                provided, uses as many slices as `attention_head_dim // slice_size`. In this case, `attention_head_dim`
+                must be a multiple of `slice_size`.
+        """
+        sliceable_head_dims = []
+
+        def fn_recursive_retrieve_slicable_dims(module: paddle.nn.Layer):
+            if hasattr(module, "set_attention_slice"):
+                sliceable_head_dims.append(module.sliceable_head_dim)
+
+            for child in module.children():
+                fn_recursive_retrieve_slicable_dims(child)
+
+        # retrieve number of attention layers
+        for module in self.children():
+            fn_recursive_retrieve_slicable_dims(module)
+
+        num_slicable_layers = len(sliceable_head_dims)
+
+        if slice_size == "auto":
+            # half the attention head size is usually a good trade-off between
+            # speed and memory
+            slice_size = [dim // 2 for dim in sliceable_head_dims]
+        elif slice_size == "max":
+            # make smallest slice possible
+            slice_size = num_slicable_layers * [1]
+
+        slice_size = num_slicable_layers * [slice_size] if not isinstance(slice_size, list) else slice_size
+
+        if len(slice_size) != len(sliceable_head_dims):
+            raise ValueError(
+                f"You have provided {len(slice_size)}, but {self.config} has {len(sliceable_head_dims)} different"
+                f" attention layers. Make sure to match `len(slice_size)` to be {len(sliceable_head_dims)}."
+            )
+
+        for i in range(len(slice_size)):
+            size = slice_size[i]
+            dim = sliceable_head_dims[i]
+            if size is not None and size > dim:
+                raise ValueError(f"size {size} has to be smaller or equal to {dim}.")
+
+        # Recursively walk through all the children.
+        # Any children which exposes the set_attention_slice method
+        # gets the message
+        def fn_recursive_set_attention_slice(module: paddle.nn.Layer, slice_size: List[int]):
+            if hasattr(module, "set_attention_slice"):
+                module.set_attention_slice(slice_size.pop())
+
+            for child in module.children():
+                fn_recursive_set_attention_slice(child, slice_size)
+
+        reversed_slice_size = list(reversed(slice_size))
+        for module in self.children():
+            fn_recursive_set_attention_slice(module, reversed_slice_size)
+
+    def _set_gradient_checkpointing(self, module, value=False):
+        if hasattr(module, "gradient_checkpointing"):
+            module.gradient_checkpointing = value
+
+    # Copied from ppdiffusers.models.unet_2d_condition.UNet2DConditionModel.set_attn_processor
+    def set_attn_processor(self, processor: Union[AttentionProcessor, Dict[str, AttentionProcessor]]):
+        r"""
+        Sets the attention processor to use to compute attention.
+
+        Parameters:
+            processor (`dict` of `AttentionProcessor` or only `AttentionProcessor`):
+                The instantiated processor class or a dictionary of processor classes that will be set as the processor
+                for **all** `Attention` layers.
+
+                If `processor` is a dict, the key needs to define the path to the corresponding cross attention
+                processor. This is strongly recommended when setting trainable attention processors.
+
+        """
+        count = len(self.attn_processors.keys())
+
+        if isinstance(processor, dict) and len(processor) != count:
+            raise ValueError(
+                f"A dict of processors was passed, but the number of processors {len(processor)} does not match the"
+                f" number of attention layers: {count}. Please make sure to pass {count} processor classes."
+            )
+
+        def fn_recursive_attn_processor(name: str, module: paddle.nn.Layer, processor):
+            if hasattr(module, "set_processor"):
+                if not isinstance(processor, dict):
+                    module.set_processor(processor)
+                else:
+                    module.set_processor(processor.pop(f"{name}.processor"))
+
+            for sub_name, child in module.named_children():
+                if "temporal_transformer" not in sub_name:
+                    fn_recursive_attn_processor(f"{name}.{sub_name}", child, processor)
+
+        for name, module in self.named_children():
+            if "temporal_transformer" not in name:
+                fn_recursive_attn_processor(name, module, processor)
+
+    def forward(
+        self,
+        sample: paddle.Tensor,
+        timestep: Union[paddle.Tensor, float, int],
+        encoder_hidden_states: paddle.Tensor,
+        class_labels: Optional[paddle.Tensor] = None,
+        pose_cond_fea: Optional[paddle.Tensor] = None,
+        attention_mask: Optional[paddle.Tensor] = None,
+        down_block_additional_residuals: Optional[Tuple[paddle.Tensor]] = None,
+        mid_block_additional_residual: Optional[paddle.Tensor] = None,
+        return_dict: bool = True,
+    ) -> Union[UNet3DConditionOutput, Tuple]:
+        r"""
+        Args:
+            sample (`paddle.Tensor`): (batch, channel, height, width) noisy inputs tensor
+            timestep (`paddle.Tensor` or `float` or `int`): (batch) timesteps
+            encoder_hidden_states (`paddle.Tensor`): (batch, sequence_length, feature_dim) encoder hidden states
+            return_dict (`bool`, *optional*, defaults to `True`):
+                Whether or not to return a [`models.unet_2d_condition.UNet2DConditionOutput`] instead of a plain tuple.
+
+        Returns:
+            [`~models.unet_2d_condition.UNet2DConditionOutput`] or `tuple`:
+            [`~models.unet_2d_condition.UNet2DConditionOutput`] if `return_dict` is True, otherwise a `tuple`. When
+            returning a tuple, the first element is the sample tensor.
+        """
+        # By default samples have to be AT least a multiple of the overall upsampling factor.
+        # The overall upsampling factor is equal to 2 ** (# num of upsampling layears).
+        # However, the upsampling interpolation output size can be forced to fit any upsampling size
+        # on the fly if necessary.
+        default_overall_up_factor = 2**self.num_upsamplers
+
+        # upsample size should be forwarded when sample is not a multiple of `default_overall_up_factor`
+        forward_upsample_size = False
+        upsample_size = None
+
+        if any(s % default_overall_up_factor != 0 for s in sample.shape[-2:]):
+            logger.info("Forward upsample size to force interpolation output size.")
+            forward_upsample_size = True
+
+        # prepare attention_mask
+        if attention_mask is not None:
+            attention_mask = (1 - attention_mask.to(sample.dtype)) * -10000.0
+            attention_mask = attention_mask.unsqueeze(1)
+
+        # center input if necessary
+        if self.config.center_input_sample:
+            sample = 2 * sample - 1.0
+
+        # time
+        timesteps = timestep
+        if not paddle.is_tensor(timesteps):
+            # This would be a good case for the `match` statement (Python 3.10+)
+            is_mps = sample.device.type == "mps"
+            if isinstance(timestep, float):
+                dtype = "float32" if is_mps else "float64"
+            else:
+                dtype = "int32" if is_mps else "int64"
+            timesteps = paddle.Tensor([timesteps], dtype=dtype)
+        elif len(timesteps.shape) == 0:
+            timesteps = timesteps[None]
+
+        # broadcast to batch dimension in a way that's compatible with ONNX/Core ML
+        timesteps = timesteps.expand(sample.shape[0])
+
+        t_emb = self.time_proj(timesteps)
+
+        # timesteps does not contain any weights and will always return f32 tensors
+        # but time_embedding might actually be running in fp16. so we need to cast here.
+        # there might be better ways to encapsulate this.
+        t_emb = t_emb.to(dtype=self.dtype)
+        emb = self.time_embedding(t_emb)
+
+        if self.class_embedding is not None:
+            if class_labels is None:
+                raise ValueError("class_labels should be provided when num_class_embeds > 0")
+
+            if self.config.class_embed_type == "timestep":
+                class_labels = self.time_proj(class_labels)
+
+            class_emb = self.class_embedding(class_labels).to(dtype=self.dtype)
+            emb = emb + class_emb
+
+        # pre-process
+
+        sample = self.conv_in(sample)
+
+        if pose_cond_fea is not None:
+            sample = sample + pose_cond_fea
+
+        # down
+        down_block_res_samples = (sample,)
+        for downsample_block in self.down_blocks:
+            if hasattr(downsample_block, "has_cross_attention") and downsample_block.has_cross_attention:
+
+                sample, res_samples = downsample_block(
+                    hidden_states=sample,
+                    temb=emb,
+                    encoder_hidden_states=encoder_hidden_states,
+                    attention_mask=attention_mask,
+                )
+
+            else:
+                sample, res_samples = downsample_block(
+                    hidden_states=sample,
+                    temb=emb,
+                    encoder_hidden_states=encoder_hidden_states,
+                )
+
+            down_block_res_samples += res_samples
+
+        if down_block_additional_residuals is not None:
+            new_down_block_res_samples = ()
+
+            for down_block_res_sample, down_block_additional_residual in zip(
+                down_block_res_samples, down_block_additional_residuals
+            ):
+                down_block_res_sample = down_block_res_sample + down_block_additional_residual
+                new_down_block_res_samples += (down_block_res_sample,)
+
+            down_block_res_samples = new_down_block_res_samples
+
+        # mid
+        sample = self.mid_block(
+            sample,
+            emb,
+            encoder_hidden_states=encoder_hidden_states,
+            attention_mask=attention_mask,
+        )
+
+        if mid_block_additional_residual is not None:
+            sample = sample + mid_block_additional_residual
+
+        # up
+        for i, upsample_block in enumerate(self.up_blocks):
+            is_final_block = i == len(self.up_blocks) - 1
+
+            res_samples = down_block_res_samples[-len(upsample_block.resnets) :]
+            down_block_res_samples = down_block_res_samples[: -len(upsample_block.resnets)]
+
+            # if we have not reached the final block and need to forward the
+            # upsample size, we do it here
+            if not is_final_block and forward_upsample_size:
+                upsample_size = down_block_res_samples[-1].shape[2:]
+
+            if hasattr(upsample_block, "has_cross_attention") and upsample_block.has_cross_attention:
+                sample = upsample_block(
+                    hidden_states=sample,
+                    temb=emb,
+                    res_hidden_states_tuple=res_samples,
+                    encoder_hidden_states=encoder_hidden_states,
+                    upsample_size=upsample_size,
+                    attention_mask=attention_mask,
+                )
+            else:
+                sample = upsample_block(
+                    hidden_states=sample,
+                    temb=emb,
+                    res_hidden_states_tuple=res_samples,
+                    upsample_size=upsample_size,
+                    encoder_hidden_states=encoder_hidden_states,
+                )
+
+        # post-process
+        sample = self.conv_norm_out(sample)
+        sample = self.conv_act(sample)
+        sample = self.conv_out(sample)
+
+        if not return_dict:
+            return (sample,)
+
+        return UNet3DConditionOutput(sample=sample)
+
+    @classmethod
+    def from_pretrained_2d(
+        cls,
+        denoising_unet_config_path: Optional[Union[str, PathLike]],
+        base_model_path: Optional[Union[str, PathLike]] = None,
+        motion_module_path: Optional[Union[str, PathLike]] = None,
+        weight_dtype=None,
+        unet_additional_kwargs=None,
+    ):
+
+        config_file = denoising_unet_config_path
+        if not (Path(config_file).exists() and Path(config_file).is_file()):
+            raise RuntimeError(f"{config_file} does not exist or is not a file")
+
+        unet_config = cls.load_config(config_file)
+        unet_config["_class_name"] = cls.__name__
+        unet_config["down_block_types"] = [
+            "CrossAttnDownBlock3D",
+            "CrossAttnDownBlock3D",
+            "CrossAttnDownBlock3D",
+            "DownBlock3D",
+        ]
+        unet_config["up_block_types"] = [
+            "UpBlock3D",
+            "CrossAttnUpBlock3D",
+            "CrossAttnUpBlock3D",
+            "CrossAttnUpBlock3D",
+        ]
+        unet_config["mid_block_type"] = "UNetMidBlock3DCrossAttn"
+
+        init_contexts = []
+        if weight_dtype is not None:
+            init_contexts.append(paddle.dtype_guard(weight_dtype))
+
+        with ContextManagers(init_contexts):
+            model = cls.from_config(unet_config, **unet_additional_kwargs)
+
+        state_dict = paddle.load(base_model_path)
+
+        # motion module updating
+        if motion_module_path is not None:
+            motion_state_dict = paddle.load(motion_module_path)
+            state_dict.update(motion_state_dict)
+
+        if weight_dtype is not None:
+            for k in state_dict.keys():
+                state_dict[k] = state_dict[k].astype(weight_dtype)
+
+        m, u = model.set_state_dict(state_dict)
+        print(f"### missing keys: {len(m)}; \n### unexpected keys: {len(u)};")
+
+        return model
diff --git a/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/hotshot_xl/__init__.py b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/hotshot_xl/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..ef05224cf6aff170028d4e2e50ce4f3572bc9387
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/hotshot_xl/__init__.py
@@ -0,0 +1,28 @@
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from dataclasses import dataclass
+from typing import Union
+
+import numpy as np
+import paddle
+
+import ppdiffusers
+
+from .unet import UNet3DConditionModel  # noqa: *
+
+
+@dataclass
+class HotshotPipelineXLOutput(ppdiffusers.utils.BaseOutput):
+    videos: Union[paddle.Tensor, np.ndarray]
diff --git a/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/hotshot_xl/resnet.py b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/hotshot_xl/resnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..85f2f60e155b2094be815f83b900548368027939
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/hotshot_xl/resnet.py
@@ -0,0 +1,124 @@
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+from einops import rearrange
+
+import ppdiffusers
+from ppdiffusers.models import resnet
+
+
+class Upsample3D(resnet.Upsample2D):
+    def forward(self, hidden_states, output_size=None, scale: float = 1.0):
+        f = tuple(hidden_states.shape)[2]
+        hidden_states = rearrange(hidden_states, "b c f h w -> (b f) c h w")
+        hidden_states = super(Upsample3D, self).forward(hidden_states, output_size, scale)
+        return rearrange(hidden_states, "(b f) c h w -> b c f h w", f=f)
+
+
+class Downsample3D(ppdiffusers.models.resnet.Downsample2D):
+    def forward(self, hidden_states, scale: float = 1.0):
+        f = tuple(hidden_states.shape)[2]
+        hidden_states = rearrange(hidden_states, "b c f h w -> (b f) c h w")
+        hidden_states = super(Downsample3D, self).forward(hidden_states, scale)
+        return rearrange(hidden_states, "(b f) c h w -> b c f h w", f=f)
+
+
+class Conv3d(ppdiffusers.models.resnet.LoRACompatibleConv):
+    def forward(self, hidden_states, scale: float = 1.0):
+        f = tuple(hidden_states.shape)[2]
+        hidden_states = rearrange(hidden_states, "b c f h w -> (b f) c h w")
+        hidden_states = super().forward(hidden_states, scale)
+        return rearrange(hidden_states, "(b f) c h w -> b c f h w", f=f)
+
+
+class ResnetBlock3D(paddle.nn.Layer):
+    def __init__(
+        self,
+        *,
+        in_channels,
+        out_channels=None,
+        conv_shortcut=False,
+        dropout=0.0,
+        temb_channels=512,
+        groups=32,
+        groups_out=None,
+        pre_norm=True,
+        eps=1e-06,
+        non_linearity="silu",
+        time_embedding_norm="default",
+        output_scale_factor=1.0,
+        use_in_shortcut=None,
+        conv_shortcut_bias: bool = True
+    ):
+        super().__init__()
+        self.pre_norm = pre_norm
+        self.pre_norm = True
+        self.in_channels = in_channels
+        out_channels = in_channels if out_channels is None else out_channels
+        self.out_channels = out_channels
+        self.use_conv_shortcut = conv_shortcut
+        self.time_embedding_norm = time_embedding_norm
+        self.output_scale_factor = output_scale_factor
+        if groups_out is None:
+            groups_out = groups
+        self.norm1 = paddle.nn.GroupNorm(
+            num_groups=groups, num_channels=in_channels, epsilon=eps, weight_attr=True, bias_attr=True
+        )
+        self.conv1 = Conv3d(in_channels, out_channels, kernel_size=3, stride=1, padding=1)
+        if temb_channels is not None:
+            if self.time_embedding_norm == "default":
+                time_emb_proj_out_channels = out_channels
+            elif self.time_embedding_norm == "scale_shift":
+                time_emb_proj_out_channels = out_channels * 2
+            else:
+                raise ValueError(f"unknown time_embedding_norm : {self.time_embedding_norm} ")
+            self.time_emb_proj = paddle.nn.Linear(in_features=temb_channels, out_features=time_emb_proj_out_channels)
+        else:
+            self.time_emb_proj = None
+        self.norm2 = paddle.nn.GroupNorm(
+            num_groups=groups_out, num_channels=out_channels, epsilon=eps, weight_attr=True, bias_attr=True
+        )
+        self.dropout = paddle.nn.Dropout(p=dropout)
+        self.conv2 = Conv3d(out_channels, out_channels, kernel_size=3, stride=1, padding=1)
+        assert non_linearity == "silu"
+        self.nonlinearity = paddle.nn.Silu()
+        self.use_in_shortcut = self.in_channels != self.out_channels if use_in_shortcut is None else use_in_shortcut
+        self.conv_shortcut = None
+        if self.use_in_shortcut:
+            self.conv_shortcut = Conv3d(
+                in_channels, out_channels, kernel_size=1, stride=1, padding=0, bias_attr=conv_shortcut_bias
+            )
+
+    def forward(self, input_tensor, temb):
+        hidden_states = input_tensor
+        hidden_states = self.norm1(hidden_states)
+        hidden_states = self.nonlinearity(hidden_states)
+        hidden_states = self.conv1(hidden_states)
+        if temb is not None:
+            temb = self.nonlinearity(temb)
+            temb = self.time_emb_proj(temb)[:, :, None, None, None]
+        if temb is not None and self.time_embedding_norm == "default":
+            hidden_states = hidden_states + temb
+        hidden_states = self.norm2(hidden_states)
+        if temb is not None and self.time_embedding_norm == "scale_shift":
+            scale, shift = paddle.chunk(x=temb, chunks=2, axis=1)
+            hidden_states = hidden_states * (1 + scale) + shift
+        hidden_states = self.nonlinearity(hidden_states)
+        hidden_states = self.dropout(hidden_states)
+        hidden_states = self.conv2(hidden_states)
+        if self.conv_shortcut is not None:
+            input_tensor = self.conv_shortcut(input_tensor)
+        output_tensor = (input_tensor + hidden_states) / self.output_scale_factor
+        return output_tensor
diff --git a/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/hotshot_xl/transformer_3d.py b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/hotshot_xl/transformer_3d.py
new file mode 100644
index 0000000000000000000000000000000000000000..3c387c4a905e5f7207cf576fc4a06bc88066d9ba
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/hotshot_xl/transformer_3d.py
@@ -0,0 +1,77 @@
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from dataclasses import dataclass
+from typing import Any, Dict, Optional
+
+import paddle
+from einops import rearrange, repeat
+
+import ppdiffusers
+
+
+@dataclass
+class Transformer3DModelOutput(ppdiffusers.utils.BaseOutput):
+    """
+    The output of [`Transformer3DModel`].
+
+    Args:
+        sample (`paddle.FloatTensor` of shape `(batch_size, num_channels, height, width)`:
+            The hidden states output conditioned on the `encoder_hidden_states` input.
+    """
+
+    sample: paddle.float32
+
+
+class Transformer3DModel(ppdiffusers.models.transformer_2d.Transformer2DModel):
+    def __init__(self, *args, **kwargs):
+        super(Transformer3DModel, self).__init__(*args, **kwargs)
+        init_Constant = paddle.nn.initializer.Constant(value=0.0)
+        init_Constant(self.proj_out.weight.data)
+        init_Constant = paddle.nn.initializer.Constant(value=0.0)
+        init_Constant(self.proj_out.bias.data)
+
+    def forward(
+        self,
+        hidden_states: paddle.Tensor,
+        encoder_hidden_states: Optional[paddle.Tensor] = None,
+        timestep: Optional[int] = None,
+        class_labels: Optional[int] = None,
+        cross_attention_kwargs: Dict[str, Any] = None,
+        attention_mask: Optional[paddle.Tensor] = None,
+        encoder_attention_mask: Optional[paddle.Tensor] = None,
+        enable_temporal_layers: bool = True,
+        positional_embedding: Optional[paddle.Tensor] = None,
+        return_dict: bool = True,
+    ):
+        is_video = len(tuple(hidden_states.shape)) == 5
+        if is_video:
+            f = tuple(hidden_states.shape)[2]
+            hidden_states = rearrange(hidden_states, "b c f h w -> (b f) c h w")
+            encoder_hidden_states = repeat(encoder_hidden_states, "b n c -> (b f) n c", f=f)
+        hidden_states = super(Transformer3DModel, self).forward(
+            hidden_states,
+            encoder_hidden_states,
+            timestep,
+            class_labels,
+            cross_attention_kwargs,
+            attention_mask,
+            encoder_attention_mask,
+            return_dict=False,
+        )[0]
+        if is_video:
+            hidden_states = rearrange(hidden_states, "(b f) c h w -> b c f h w", f=f)
+        if not return_dict:
+            return (hidden_states,)
+        return Transformer3DModelOutput(sample=hidden_states)
diff --git a/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/hotshot_xl/unet.py b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/hotshot_xl/unet.py
new file mode 100644
index 0000000000000000000000000000000000000000..39fae6fe6ecdd4a6e619a415836d0aa57c0196ac
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/hotshot_xl/unet.py
@@ -0,0 +1,778 @@
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+from dataclasses import dataclass
+from typing import Any, Dict, List, Optional, Tuple, Union
+
+import paddle
+
+import ppdiffusers
+from ppdiffusers import loaders, transformers  # noqa: *
+
+from .resnet import Conv3d
+from .unet_blocks import (
+    CrossAttnDownBlock3D,
+    CrossAttnUpBlock3D,
+    DownBlock3D,
+    UNetMidBlock3DCrossAttn,
+    UpBlock3D,
+    get_down_block,
+    get_up_block,
+)
+
+logger = ppdiffusers.utils.logging.get_logger(__name__)
+
+
+@dataclass
+class UNet3DConditionOutput(ppdiffusers.utils.BaseOutput):
+    """
+    The output of [`UNet2DConditionModel`].
+
+    Args:
+        sample (`paddle.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
+            The hidden states output conditioned on `encoder_hidden_states` input. Output of last layer of model.
+    """
+
+    sample: paddle.float32 = None
+
+
+class UNet3DConditionModel(
+    ppdiffusers.models.modeling_utils.ModelMixin,
+    ppdiffusers.configuration_utils.ConfigMixin,
+    loaders.UNet2DConditionLoadersMixin,
+):
+    _supports_gradient_checkpointing = True
+
+    @ppdiffusers.configuration_utils.register_to_config
+    def __init__(
+        self,
+        sample_size: Optional[int] = None,
+        in_channels: int = 4,
+        out_channels: int = 4,
+        center_input_sample: bool = False,
+        flip_sin_to_cos: bool = True,
+        freq_shift: int = 0,
+        down_block_types: Tuple[str] = ("CrossAttnDownBlock3D", "CrossAttnDownBlock3D", "DownBlock3D"),
+        mid_block_type: Optional[str] = "UNetMidBlock3DCrossAttn",
+        up_block_types: Tuple[str] = ("UpBlock3D", "CrossAttnUpBlock3D", "CrossAttnUpBlock3D"),
+        only_cross_attention: Union[bool, Tuple[bool]] = False,
+        block_out_channels: Tuple[int] = (320, 640, 1280, 1280),
+        layers_per_block: Union[int, Tuple[int]] = 2,
+        downsample_padding: int = 1,
+        mid_block_scale_factor: float = 1,
+        act_fn: str = "silu",
+        norm_num_groups: Optional[int] = 32,
+        norm_eps: float = 1e-05,
+        cross_attention_dim: Union[int, Tuple[int]] = 1280,
+        transformer_layers_per_block: Union[int, Tuple[int]] = 1,
+        encoder_hid_dim: Optional[int] = None,
+        encoder_hid_dim_type: Optional[str] = None,
+        attention_head_dim: Union[int, Tuple[int]] = 8,
+        num_attention_heads: Optional[Union[int, Tuple[int]]] = None,
+        dual_cross_attention: bool = False,
+        use_linear_projection: bool = False,
+        class_embed_type: Optional[str] = None,
+        addition_embed_type: Optional[str] = None,
+        addition_time_embed_dim: Optional[int] = None,
+        num_class_embeds: Optional[int] = None,
+        upcast_attention: bool = False,
+        resnet_time_scale_shift: str = "default",
+        resnet_skip_time_act: bool = False,
+        resnet_out_scale_factor: int = 1.0,
+        time_embedding_type: str = "positional",
+        time_embedding_dim: Optional[int] = None,
+        time_embedding_act_fn: Optional[str] = None,
+        timestep_post_act: Optional[str] = None,
+        time_cond_proj_dim: Optional[int] = None,
+        conv_in_kernel: int = 3,
+        conv_out_kernel: int = 3,
+        projection_class_embeddings_input_dim: Optional[int] = None,
+        class_embeddings_concat: bool = False,
+        mid_block_only_cross_attention: Optional[bool] = None,
+        cross_attention_norm: Optional[str] = None,
+        addition_embed_type_num_heads=64,
+    ):
+        super().__init__()
+        self.sample_size = sample_size
+        if num_attention_heads is not None:
+            raise ValueError(
+                "At the moment it is not possible to define the number of attention heads via `num_attention_heads` because of a naming issue as described in https://github.com/huggingface/diffusers/issues/2011#issuecomment-1547958131. Passing `num_attention_heads` will only be supported in diffusers v0.19."
+            )
+        num_attention_heads = num_attention_heads or attention_head_dim
+        if len(down_block_types) != len(up_block_types):
+            raise ValueError(
+                f"Must provide the same number of `down_block_types` as `up_block_types`. `down_block_types`: {down_block_types}. `up_block_types`: {up_block_types}."
+            )
+        if len(block_out_channels) != len(down_block_types):
+            raise ValueError(
+                f"Must provide the same number of `block_out_channels` as `down_block_types`. `block_out_channels`: {block_out_channels}. `down_block_types`: {down_block_types}."
+            )
+        if not isinstance(only_cross_attention, bool) and len(only_cross_attention) != len(down_block_types):
+            raise ValueError(
+                f"Must provide the same number of `only_cross_attention` as `down_block_types`. `only_cross_attention`: {only_cross_attention}. `down_block_types`: {down_block_types}."
+            )
+        if not isinstance(num_attention_heads, int) and len(num_attention_heads) != len(down_block_types):
+            raise ValueError(
+                f"Must provide the same number of `num_attention_heads` as `down_block_types`. `num_attention_heads`: {num_attention_heads}. `down_block_types`: {down_block_types}."
+            )
+        if not isinstance(attention_head_dim, int) and len(attention_head_dim) != len(down_block_types):
+            raise ValueError(
+                f"Must provide the same number of `attention_head_dim` as `down_block_types`. `attention_head_dim`: {attention_head_dim}. `down_block_types`: {down_block_types}."
+            )
+        if isinstance(cross_attention_dim, list) and len(cross_attention_dim) != len(down_block_types):
+            raise ValueError(
+                f"Must provide the same number of `cross_attention_dim` as `down_block_types`. `cross_attention_dim`: {cross_attention_dim}. `down_block_types`: {down_block_types}."
+            )
+        if not isinstance(layers_per_block, int) and len(layers_per_block) != len(down_block_types):
+            raise ValueError(
+                f"Must provide the same number of `layers_per_block` as `down_block_types`. `layers_per_block`: {layers_per_block}. `down_block_types`: {down_block_types}."
+            )
+        conv_in_padding = (conv_in_kernel - 1) // 2
+        self.conv_in = Conv3d(in_channels, block_out_channels[0], kernel_size=conv_in_kernel, padding=conv_in_padding)
+        if time_embedding_type == "fourier":
+            time_embed_dim = time_embedding_dim or block_out_channels[0] * 2
+            if time_embed_dim % 2 != 0:
+                raise ValueError(f"`time_embed_dim` should be divisible by 2, but is {time_embed_dim}.")
+            self.time_proj = ppdiffusers.models.embeddings.GaussianFourierProjection(
+                time_embed_dim // 2, set_W_to_weight=False, log=False, flip_sin_to_cos=flip_sin_to_cos
+            )
+            timestep_input_dim = time_embed_dim
+        elif time_embedding_type == "positional":
+            time_embed_dim = time_embedding_dim or block_out_channels[0] * 4
+            self.time_proj = ppdiffusers.models.embeddings.Timesteps(
+                block_out_channels[0], flip_sin_to_cos, freq_shift
+            )
+            timestep_input_dim = block_out_channels[0]
+        else:
+            raise ValueError(
+                f"{time_embedding_type} does not exist. Please make sure to use one of `fourier` or `positional`."
+            )
+        self.time_embedding = ppdiffusers.models.embeddings.TimestepEmbedding(
+            timestep_input_dim,
+            time_embed_dim,
+            act_fn=act_fn,
+            post_act_fn=timestep_post_act,
+            cond_proj_dim=time_cond_proj_dim,
+        )
+        if encoder_hid_dim_type is None and encoder_hid_dim is not None:
+            encoder_hid_dim_type = "text_proj"
+            self.register_to_config(encoder_hid_dim_type=encoder_hid_dim_type)
+            logger.info("encoder_hid_dim_type defaults to 'text_proj' as `encoder_hid_dim` is defined.")
+        if encoder_hid_dim is None and encoder_hid_dim_type is not None:
+            raise ValueError(
+                f"`encoder_hid_dim` has to be defined when `encoder_hid_dim_type` is set to {encoder_hid_dim_type}."
+            )
+        if encoder_hid_dim_type == "text_proj":
+            self.encoder_hid_proj = paddle.nn.Linear(in_features=encoder_hid_dim, out_features=cross_attention_dim)
+        elif encoder_hid_dim_type == "text_image_proj":
+            self.encoder_hid_proj = ppdiffusers.models.embeddings.TextImageProjection(
+                text_embed_dim=encoder_hid_dim,
+                image_embed_dim=cross_attention_dim,
+                cross_attention_dim=cross_attention_dim,
+            )
+        elif encoder_hid_dim_type == "image_proj":
+            self.encoder_hid_proj = ppdiffusers.models.embeddings.ImageProjection(
+                image_embed_dim=encoder_hid_dim, cross_attention_dim=cross_attention_dim
+            )
+        elif encoder_hid_dim_type is not None:
+            raise ValueError(
+                f"encoder_hid_dim_type: {encoder_hid_dim_type} must be None, 'text_proj' or 'text_image_proj'."
+            )
+        else:
+            self.encoder_hid_proj = None
+        if class_embed_type is None and num_class_embeds is not None:
+            self.class_embedding = paddle.nn.Embedding(num_embeddings=num_class_embeds, embedding_dim=time_embed_dim)
+        elif class_embed_type == "timestep":
+            self.class_embedding = ppdiffusers.models.embeddings.TimestepEmbedding(
+                timestep_input_dim, time_embed_dim, act_fn=act_fn
+            )
+        elif class_embed_type == "identity":
+            self.class_embedding = paddle.nn.Identity(time_embed_dim, time_embed_dim)
+        elif class_embed_type == "projection":
+            if projection_class_embeddings_input_dim is None:
+                raise ValueError(
+                    "`class_embed_type`: 'projection' requires `projection_class_embeddings_input_dim` be set"
+                )
+            self.class_embedding = ppdiffusers.models.embeddings.TimestepEmbedding(
+                projection_class_embeddings_input_dim, time_embed_dim
+            )
+        elif class_embed_type == "simple_projection":
+            if projection_class_embeddings_input_dim is None:
+                raise ValueError(
+                    "`class_embed_type`: 'simple_projection' requires `projection_class_embeddings_input_dim` be set"
+                )
+            self.class_embedding = paddle.nn.Linear(
+                in_features=projection_class_embeddings_input_dim, out_features=time_embed_dim
+            )
+        else:
+            self.class_embedding = None
+        if addition_embed_type == "text":
+            if encoder_hid_dim is not None:
+                text_time_embedding_from_dim = encoder_hid_dim
+            else:
+                text_time_embedding_from_dim = cross_attention_dim
+            self.add_embedding = ppdiffusers.models.embeddings.TextTimeEmbedding(
+                text_time_embedding_from_dim, time_embed_dim, num_heads=addition_embed_type_num_heads
+            )
+        elif addition_embed_type == "text_image":
+            self.add_embedding = ppdiffusers.models.embeddings.TextImageTimeEmbedding(
+                text_embed_dim=cross_attention_dim, image_embed_dim=cross_attention_dim, time_embed_dim=time_embed_dim
+            )
+        elif addition_embed_type == "text_time":
+            self.add_time_proj = ppdiffusers.models.embeddings.Timesteps(
+                addition_time_embed_dim, flip_sin_to_cos, freq_shift
+            )
+            self.add_embedding = ppdiffusers.models.embeddings.TimestepEmbedding(
+                projection_class_embeddings_input_dim, time_embed_dim
+            )
+        elif addition_embed_type == "image":
+            self.add_embedding = ppdiffusers.models.embeddings.ImageTimeEmbedding(
+                image_embed_dim=encoder_hid_dim, time_embed_dim=time_embed_dim
+            )
+        elif addition_embed_type == "image_hint":
+            self.add_embedding = ppdiffusers.models.embeddings.ImageHintTimeEmbedding(
+                image_embed_dim=encoder_hid_dim, time_embed_dim=time_embed_dim
+            )
+        elif addition_embed_type is not None:
+            raise ValueError(f"addition_embed_type: {addition_embed_type} must be None, 'text' or 'text_image'.")
+        if time_embedding_act_fn is None:
+            self.time_embed_act = None
+        else:
+            self.time_embed_act = ppdiffusers.models.activations.get_activation(time_embedding_act_fn)
+        self.down_blocks = paddle.nn.LayerList(sublayers=[])
+        self.up_blocks = paddle.nn.LayerList(sublayers=[])
+        if isinstance(only_cross_attention, bool):
+            if mid_block_only_cross_attention is None:
+                mid_block_only_cross_attention = only_cross_attention
+            only_cross_attention = [only_cross_attention] * len(down_block_types)
+        if mid_block_only_cross_attention is None:
+            mid_block_only_cross_attention = False
+        if isinstance(num_attention_heads, int):
+            num_attention_heads = (num_attention_heads,) * len(down_block_types)
+        if isinstance(attention_head_dim, int):
+            attention_head_dim = (attention_head_dim,) * len(down_block_types)
+        if isinstance(cross_attention_dim, int):
+            cross_attention_dim = (cross_attention_dim,) * len(down_block_types)
+        if isinstance(layers_per_block, int):
+            layers_per_block = [layers_per_block] * len(down_block_types)
+        if isinstance(transformer_layers_per_block, int):
+            transformer_layers_per_block = [transformer_layers_per_block] * len(down_block_types)
+        if class_embeddings_concat:
+            blocks_time_embed_dim = time_embed_dim * 2
+        else:
+            blocks_time_embed_dim = time_embed_dim
+        output_channel = block_out_channels[0]
+        for i, down_block_type in enumerate(down_block_types):
+            res = 2**i
+            input_channel = output_channel
+            output_channel = block_out_channels[i]
+            is_final_block = i == len(block_out_channels) - 1
+            down_block = get_down_block(
+                down_block_type,
+                num_layers=layers_per_block[i],
+                transformer_layers_per_block=transformer_layers_per_block[i],
+                in_channels=input_channel,
+                out_channels=output_channel,
+                temb_channels=blocks_time_embed_dim,
+                add_downsample=not is_final_block,
+                resnet_eps=norm_eps,
+                resnet_act_fn=act_fn,
+                resnet_groups=norm_num_groups,
+                cross_attention_dim=cross_attention_dim[i],
+                num_attention_heads=num_attention_heads[i],
+                downsample_padding=downsample_padding,
+                dual_cross_attention=dual_cross_attention,
+                use_linear_projection=use_linear_projection,
+                only_cross_attention=only_cross_attention[i],
+                upcast_attention=upcast_attention,
+                resnet_time_scale_shift=resnet_time_scale_shift,
+                resnet_skip_time_act=resnet_skip_time_act,
+                resnet_out_scale_factor=resnet_out_scale_factor,
+                cross_attention_norm=cross_attention_norm,
+                attention_head_dim=attention_head_dim[i] if attention_head_dim[i] is not None else output_channel,
+            )
+            self.down_blocks.append(down_block)
+        if mid_block_type == "UNetMidBlock3DCrossAttn":
+            self.mid_block = UNetMidBlock3DCrossAttn(
+                transformer_layers_per_block=transformer_layers_per_block[-1],
+                in_channels=block_out_channels[-1],
+                temb_channels=blocks_time_embed_dim,
+                resnet_eps=norm_eps,
+                resnet_act_fn=act_fn,
+                output_scale_factor=mid_block_scale_factor,
+                resnet_time_scale_shift=resnet_time_scale_shift,
+                cross_attention_dim=cross_attention_dim[-1],
+                num_attention_heads=num_attention_heads[-1],
+                resnet_groups=norm_num_groups,
+                dual_cross_attention=dual_cross_attention,
+                use_linear_projection=use_linear_projection,
+                upcast_attention=upcast_attention,
+            )
+        elif mid_block_type == "UNetMidBlock2DSimpleCrossAttn":
+            raise ValueError("UNetMidBlock2DSimpleCrossAttn not supported")
+        elif mid_block_type is None:
+            self.mid_block = None
+        else:
+            raise ValueError(f"unknown mid_block_type : {mid_block_type}")
+        self.num_upsamplers = 0
+        reversed_block_out_channels = list(reversed(block_out_channels))
+        reversed_num_attention_heads = list(reversed(num_attention_heads))
+        reversed_layers_per_block = list(reversed(layers_per_block))
+        reversed_cross_attention_dim = list(reversed(cross_attention_dim))
+        reversed_transformer_layers_per_block = list(reversed(transformer_layers_per_block))
+        only_cross_attention = list(reversed(only_cross_attention))
+        output_channel = reversed_block_out_channels[0]
+        for i, up_block_type in enumerate(up_block_types):
+            res = 2 ** (len(up_block_types) - 1 - i)  # noqa: *
+            is_final_block = i == len(block_out_channels) - 1
+            prev_output_channel = output_channel
+            output_channel = reversed_block_out_channels[i]
+            input_channel = reversed_block_out_channels[min(i + 1, len(block_out_channels) - 1)]
+            if not is_final_block:
+                add_upsample = True
+                self.num_upsamplers += 1
+            else:
+                add_upsample = False
+            up_block = get_up_block(
+                up_block_type,
+                num_layers=reversed_layers_per_block[i] + 1,
+                transformer_layers_per_block=reversed_transformer_layers_per_block[i],
+                in_channels=input_channel,
+                out_channels=output_channel,
+                prev_output_channel=prev_output_channel,
+                temb_channels=blocks_time_embed_dim,
+                add_upsample=add_upsample,
+                resnet_eps=norm_eps,
+                resnet_act_fn=act_fn,
+                resnet_groups=norm_num_groups,
+                cross_attention_dim=reversed_cross_attention_dim[i],
+                num_attention_heads=reversed_num_attention_heads[i],
+                dual_cross_attention=dual_cross_attention,
+                use_linear_projection=use_linear_projection,
+                only_cross_attention=only_cross_attention[i],
+                upcast_attention=upcast_attention,
+                resnet_time_scale_shift=resnet_time_scale_shift,
+                resnet_skip_time_act=resnet_skip_time_act,
+                resnet_out_scale_factor=resnet_out_scale_factor,
+                cross_attention_norm=cross_attention_norm,
+                attention_head_dim=attention_head_dim[i] if attention_head_dim[i] is not None else output_channel,
+            )
+            self.up_blocks.append(up_block)
+            prev_output_channel = output_channel
+        if norm_num_groups is not None:
+            self.conv_norm_out = paddle.nn.GroupNorm(
+                num_channels=block_out_channels[0], num_groups=norm_num_groups, epsilon=norm_eps
+            )
+            self.conv_act = ppdiffusers.models.activations.get_activation(act_fn)
+        else:
+            self.conv_norm_out = None
+            self.conv_act = None
+        conv_out_padding = (conv_out_kernel - 1) // 2
+        self.conv_out = Conv3d(
+            block_out_channels[0], out_channels, kernel_size=conv_out_kernel, padding=conv_out_padding
+        )
+
+    def temporal_parameters(self) -> list:
+        output = []
+        all_blocks = list(self.down_blocks) + list(self.up_blocks) + [self.mid_block]
+        for block in all_blocks:
+            output.extend(block.temporal_parameters())
+        return output
+
+    @property
+    def attn_processors(self) -> Dict[str, ppdiffusers.models.attention_processor.AttentionProcessor]:
+        return self.get_attn_processors(include_temporal_layers=False)
+
+    def get_attn_processors(
+        self, include_temporal_layers=True
+    ) -> Dict[str, ppdiffusers.models.attention_processor.AttentionProcessor]:
+        """
+        Returns:
+            `dict` of attention processors: A dictionary containing all attention processors used in the model with
+            indexed by its weight name.
+        """
+        processors = {}
+
+        def fn_recursive_add_processors(
+            name: str,
+            module: paddle.nn.Layer,
+            processors: Dict[str, ppdiffusers.models.attention_processor.AttentionProcessor],
+        ):
+            if not include_temporal_layers:
+                if "temporal" in name:
+                    return processors
+            if hasattr(module, "set_processor"):
+                processors[f"{name}.processor"] = module.processor
+            for sub_name, child in module.named_children():
+                fn_recursive_add_processors(f"{name}.{sub_name}", child, processors)
+            return processors
+
+        for name, module in self.named_children():
+            fn_recursive_add_processors(name, module, processors)
+        return processors
+
+    def set_attn_processor(
+        self,
+        processor: Union[
+            ppdiffusers.models.attention_processor.AttentionProcessor,
+            Dict[str, ppdiffusers.models.attention_processor.AttentionProcessor],
+        ],
+        include_temporal_layers=False,
+    ):
+        """
+        Sets the attention processor to use to compute attention.
+
+        Parameters:
+            processor (`dict` of `AttentionProcessor` or only `AttentionProcessor`):
+                The instantiated processor class or a dictionary of processor classes that will be set as the processor
+                for **all** `Attention` layers.
+
+                If `processor` is a dict, the key needs to define the path to the corresponding cross attention
+                processor. This is strongly recommended when setting trainable attention processors.
+
+        """
+        count = len(self.get_attn_processors(include_temporal_layers=include_temporal_layers).keys())
+        if isinstance(processor, dict) and len(processor) != count:
+            raise ValueError(
+                f"A dict of processors was passed, but the number of processors {len(processor)} does not match the number of attention layers: {count}. Please make sure to pass {count} processor classes."
+            )
+
+        def fn_recursive_attn_processor(name: str, module: paddle.nn.Layer, processor):
+            if not include_temporal_layers:
+                if "temporal" in name:
+                    return
+            if hasattr(module, "set_processor"):
+                if not isinstance(processor, dict):
+                    module.set_processor(processor)
+                else:
+                    module.set_processor(processor.pop(f"{name}.processor"))
+            for sub_name, child in module.named_children():
+                fn_recursive_attn_processor(f"{name}.{sub_name}", child, processor)
+
+        for name, module in self.named_children():
+            fn_recursive_attn_processor(name, module, processor)
+
+    def set_default_attn_processor(self):
+        """
+        Disables custom attention processors and sets the default attention implementation.
+        """
+        self.set_attn_processor(ppdiffusers.models.attention_processor.AttnProcessor())
+
+    def set_attention_slice(self, slice_size):
+        """
+        Enable sliced attention computation.
+
+        When this option is enabled, the attention module splits the input tensor in slices to compute attention in
+        several steps. This is useful for saving some memory in exchange for a small decrease in speed.
+
+        Args:
+            slice_size (`str` or `int` or `list(int)`, *optional*, defaults to `"auto"`):
+                When `"auto"`, input to the attention heads is halved, so attention is computed in two steps. If
+                `"max"`, maximum amount of memory is saved by running only one slice at a time. If a number is
+                provided, uses as many slices as `attention_head_dim // slice_size`. In this case, `attention_head_dim`
+                must be a multiple of `slice_size`.
+        """
+        sliceable_head_dims = []
+
+        def fn_recursive_retrieve_sliceable_dims(module: paddle.nn.Layer):
+            if hasattr(module, "set_attention_slice"):
+                sliceable_head_dims.append(module.sliceable_head_dim)
+            for child in module.children():
+                fn_recursive_retrieve_sliceable_dims(child)
+
+        for module in self.children():
+            fn_recursive_retrieve_sliceable_dims(module)
+        num_sliceable_layers = len(sliceable_head_dims)
+        if slice_size == "auto":
+            slice_size = [(dim // 2) for dim in sliceable_head_dims]
+        elif slice_size == "max":
+            slice_size = num_sliceable_layers * [1]
+        slice_size = num_sliceable_layers * [slice_size] if not isinstance(slice_size, list) else slice_size
+        if len(slice_size) != len(sliceable_head_dims):
+            raise ValueError(
+                f"You have provided {len(slice_size)}, but {self.config} has {len(sliceable_head_dims)} different attention layers. Make sure to match `len(slice_size)` to be {len(sliceable_head_dims)}."
+            )
+        for i in range(len(slice_size)):
+            size = slice_size[i]
+            dim = sliceable_head_dims[i]
+            if size is not None and size > dim:
+                raise ValueError(f"size {size} has to be smaller or equal to {dim}.")
+
+        def fn_recursive_set_attention_slice(module: paddle.nn.Layer, slice_size: List[int]):
+            if hasattr(module, "set_attention_slice"):
+                module.set_attention_slice(slice_size.pop())
+            for child in module.children():
+                fn_recursive_set_attention_slice(child, slice_size)
+
+        reversed_slice_size = list(reversed(slice_size))
+        for module in self.children():
+            fn_recursive_set_attention_slice(module, reversed_slice_size)
+
+    def _set_gradient_checkpointing(self, module, value=False):
+        if isinstance(module, (CrossAttnDownBlock3D, DownBlock3D, CrossAttnUpBlock3D, UpBlock3D)):
+            module.gradient_checkpointing = value
+
+    def forward(
+        self,
+        sample: paddle.float32,
+        timestep: Union[paddle.Tensor, float, int],
+        encoder_hidden_states: paddle.Tensor,
+        class_labels: Optional[paddle.Tensor] = None,
+        timestep_cond: Optional[paddle.Tensor] = None,
+        attention_mask: Optional[paddle.Tensor] = None,
+        cross_attention_kwargs: Optional[Dict[str, Any]] = None,
+        added_cond_kwargs: Optional[Dict[str, paddle.Tensor]] = None,
+        down_block_additional_residuals: Optional[Tuple[paddle.Tensor]] = None,
+        mid_block_additional_residual: Optional[paddle.Tensor] = None,
+        encoder_attention_mask: Optional[paddle.Tensor] = None,
+        return_dict: bool = True,
+        enable_temporal_attentions: bool = True,
+    ) -> Union[UNet3DConditionOutput, Tuple]:
+        """
+        The [`UNet2DConditionModel`] forward method.
+
+        Args:
+            sample (`paddle.FloatTensor`):
+                The noisy input tensor with the following shape `(batch, channel, height, width)`.
+            timestep (`paddle.FloatTensor` or `float` or `int`): The number of timesteps to denoise an input.
+            encoder_hidden_states (`paddle.FloatTensor`):
+                The encoder hidden states with shape `(batch, sequence_length, feature_dim)`.
+            encoder_attention_mask (`paddle.Tensor`):
+                A cross-attention mask of shape `(batch, sequence_length)` is applied to `encoder_hidden_states`. If
+                `True` the mask is kept, otherwise if `False` it is discarded. Mask will be converted into a bias,
+                which adds large negative values to the attention scores corresponding to "discard" tokens.
+            return_dict (`bool`, *optional*, defaults to `True`):
+                Whether or not to return a [`~models.unet_2d_condition.UNet2DConditionOutput`] instead of a plain
+                tuple.
+            cross_attention_kwargs (`dict`, *optional*):
+                A kwargs dictionary that if specified is passed along to the [`AttnProcessor`].
+            added_cond_kwargs: (`dict`, *optional*):
+                A kwargs dictionary containin additional embeddings that if specified are added to the embeddings that
+                are passed along to the UNet blocks.
+
+        Returns:
+            [`~models.unet_2d_condition.UNet2DConditionOutput`] or `tuple`:
+                If `return_dict` is True, an [`~models.unet_2d_condition.UNet2DConditionOutput`] is returned, otherwise
+                a `tuple` is returned where the first element is the sample tensor.
+        """
+        default_overall_up_factor = 2**self.num_upsamplers
+        forward_upsample_size = False
+        upsample_size = None
+        if any(s % default_overall_up_factor != 0 for s in tuple(sample.shape)[-2:]):
+            logger.info("Forward upsample size to force interpolation output size.")
+            forward_upsample_size = True
+        if attention_mask is not None:
+            attention_mask = (1 - attention_mask.to(sample.dtype)) * -10000.0
+            attention_mask = attention_mask.unsqueeze(axis=1)
+        if encoder_attention_mask is not None:
+            encoder_attention_mask = (1 - encoder_attention_mask.to(sample.dtype)) * -10000.0
+            encoder_attention_mask = encoder_attention_mask.unsqueeze(axis=1)
+        if self.config.center_input_sample:
+            sample = 2 * sample - 1.0
+        timesteps = timestep
+        if not paddle.is_tensor(x=timesteps):
+            is_mps = sample.device.type == "mps"
+            if isinstance(timestep, float):
+                dtype = "float32" if is_mps else "float64"
+            else:
+                dtype = "int32" if is_mps else "int64"
+            timesteps = paddle.to_tensor(data=[timesteps], dtype=dtype, place=sample.place)
+        elif len(tuple(timesteps.shape)) == 0:
+            timesteps = timesteps[None].to(sample.place)
+        timesteps = timesteps.expand(shape=tuple(sample.shape)[0])
+        t_emb = self.time_proj(timesteps)
+        t_emb = t_emb.to(dtype=sample.dtype)
+        emb = self.time_embedding(t_emb, timestep_cond)
+        aug_emb = None
+        if self.class_embedding is not None:
+            if class_labels is None:
+                raise ValueError("class_labels should be provided when num_class_embeds > 0")
+            if self.config.class_embed_type == "timestep":
+                class_labels = self.time_proj(class_labels)
+                class_labels = class_labels.to(dtype=sample.dtype)
+            class_emb = self.class_embedding(class_labels).to(dtype=sample.dtype)
+            if self.config.class_embeddings_concat:
+                emb = paddle.concat(x=[emb, class_emb], axis=-1)
+            else:
+                emb = emb + class_emb
+        if self.config.addition_embed_type == "text":
+            aug_emb = self.add_embedding(encoder_hidden_states)
+        elif self.config.addition_embed_type == "text_image":
+            if "image_embeds" not in added_cond_kwargs:
+                raise ValueError(
+                    f"{self.__class__} has the config param `addition_embed_type` set to 'text_image' which requires the keyword argument `image_embeds` to be passed in `added_cond_kwargs`"
+                )
+            image_embs = added_cond_kwargs.get("image_embeds")
+            text_embs = added_cond_kwargs.get("text_embeds", encoder_hidden_states)
+            aug_emb = self.add_embedding(text_embs, image_embs)
+        elif self.config.addition_embed_type == "text_time":
+            if "text_embeds" not in added_cond_kwargs:
+                raise ValueError(
+                    f"{self.__class__} has the config param `addition_embed_type` set to 'text_time' which requires the keyword argument `text_embeds` to be passed in `added_cond_kwargs`"
+                )
+            text_embeds = added_cond_kwargs.get("text_embeds")
+            if "time_ids" not in added_cond_kwargs:
+                raise ValueError(
+                    f"{self.__class__} has the config param `addition_embed_type` set to 'text_time' which requires the keyword argument `time_ids` to be passed in `added_cond_kwargs`"
+                )
+            time_ids = added_cond_kwargs.get("time_ids")
+            time_embeds = self.add_time_proj(time_ids.flatten())
+            time_embeds = time_embeds.reshape((tuple(text_embeds.shape)[0], -1))
+            add_embeds = paddle.concat(x=[text_embeds, time_embeds], axis=-1)
+            add_embeds = add_embeds.to(emb.dtype)
+            aug_emb = self.add_embedding(add_embeds)
+        elif self.config.addition_embed_type == "image":
+            if "image_embeds" not in added_cond_kwargs:
+                raise ValueError(
+                    f"{self.__class__} has the config param `addition_embed_type` set to 'image' which requires the keyword argument `image_embeds` to be passed in `added_cond_kwargs`"
+                )
+            image_embs = added_cond_kwargs.get("image_embeds")
+            aug_emb = self.add_embedding(image_embs)
+        elif self.config.addition_embed_type == "image_hint":
+            if "image_embeds" not in added_cond_kwargs or "hint" not in added_cond_kwargs:
+                raise ValueError(
+                    f"{self.__class__} has the config param `addition_embed_type` set to 'image_hint' which requires the keyword arguments `image_embeds` and `hint` to be passed in `added_cond_kwargs`"
+                )
+            image_embs = added_cond_kwargs.get("image_embeds")
+            hint = added_cond_kwargs.get("hint")
+            aug_emb, hint = self.add_embedding(image_embs, hint)
+            sample = paddle.concat(x=[sample, hint], axis=1)
+        emb = emb + aug_emb if aug_emb is not None else emb
+        if self.time_embed_act is not None:
+            emb = self.time_embed_act(emb)
+        if self.encoder_hid_proj is not None and self.config.encoder_hid_dim_type == "text_proj":
+            encoder_hidden_states = self.encoder_hid_proj(encoder_hidden_states)
+        elif self.encoder_hid_proj is not None and self.config.encoder_hid_dim_type == "text_image_proj":
+            if "image_embeds" not in added_cond_kwargs:
+                raise ValueError(
+                    f"{self.__class__} has the config param `encoder_hid_dim_type` set to 'text_image_proj' which requires the keyword argument `image_embeds` to be passed in  `added_conditions`"
+                )
+            image_embeds = added_cond_kwargs.get("image_embeds")
+            encoder_hidden_states = self.encoder_hid_proj(encoder_hidden_states, image_embeds)
+        elif self.encoder_hid_proj is not None and self.config.encoder_hid_dim_type == "image_proj":
+            if "image_embeds" not in added_cond_kwargs:
+                raise ValueError(
+                    f"{self.__class__} has the config param `encoder_hid_dim_type` set to 'image_proj' which requires the keyword argument `image_embeds` to be passed in  `added_conditions`"
+                )
+            image_embeds = added_cond_kwargs.get("image_embeds")
+            encoder_hidden_states = self.encoder_hid_proj(image_embeds)
+        sample = self.conv_in(sample)
+        down_block_res_samples = (sample,)
+        for downsample_block in self.down_blocks:
+            if hasattr(downsample_block, "has_cross_attention") and downsample_block.has_cross_attention:
+                sample, res_samples = downsample_block(
+                    hidden_states=sample,
+                    temb=emb,
+                    encoder_hidden_states=encoder_hidden_states,
+                    attention_mask=attention_mask,
+                    cross_attention_kwargs=cross_attention_kwargs,
+                    enable_temporal_attentions=enable_temporal_attentions,
+                )
+            else:
+                sample, res_samples = downsample_block(
+                    hidden_states=sample,
+                    temb=emb,
+                    encoder_hidden_states=encoder_hidden_states,
+                    enable_temporal_attentions=enable_temporal_attentions,
+                )
+            down_block_res_samples += res_samples
+        if down_block_additional_residuals is not None:
+            new_down_block_res_samples = ()
+            for down_block_res_sample, down_block_additional_residual in zip(
+                down_block_res_samples, down_block_additional_residuals
+            ):
+                down_block_res_sample = down_block_res_sample + down_block_additional_residual
+                new_down_block_res_samples = new_down_block_res_samples + (down_block_res_sample,)
+            down_block_res_samples = new_down_block_res_samples
+        if self.mid_block is not None:
+            sample = self.mid_block(
+                sample,
+                emb,
+                encoder_hidden_states=encoder_hidden_states,
+                attention_mask=attention_mask,
+                cross_attention_kwargs=cross_attention_kwargs,
+                enable_temporal_attentions=enable_temporal_attentions,
+            )
+        if mid_block_additional_residual is not None:
+            sample = sample + mid_block_additional_residual
+        for i, upsample_block in enumerate(self.up_blocks):
+            is_final_block = i == len(self.up_blocks) - 1
+            res_samples = down_block_res_samples[-len(upsample_block.resnets) :]
+            down_block_res_samples = down_block_res_samples[: -len(upsample_block.resnets)]
+            if not is_final_block and forward_upsample_size:
+                upsample_size = tuple(down_block_res_samples[-1].shape)[2:]
+            if hasattr(upsample_block, "has_cross_attention") and upsample_block.has_cross_attention:
+                sample = upsample_block(
+                    hidden_states=sample,
+                    temb=emb,
+                    res_hidden_states_tuple=res_samples,
+                    encoder_hidden_states=encoder_hidden_states,
+                    cross_attention_kwargs=cross_attention_kwargs,
+                    upsample_size=upsample_size,
+                    attention_mask=attention_mask,
+                    enable_temporal_attentions=enable_temporal_attentions,
+                )
+            else:
+                sample = upsample_block(
+                    hidden_states=sample,
+                    temb=emb,
+                    res_hidden_states_tuple=res_samples,
+                    upsample_size=upsample_size,
+                    encoder_hidden_states=encoder_hidden_states,
+                    enable_temporal_attentions=enable_temporal_attentions,
+                )
+        if self.conv_norm_out:
+            sample = self.conv_norm_out(sample)
+            sample = self.conv_act(sample)
+        sample = self.conv_out(sample)
+        if not return_dict:
+            return (sample,)
+        return UNet3DConditionOutput(sample=sample)
+
+    @classmethod
+    def from_pretrained_spatial(cls, pretrained_model_path, subfolder=None):
+        import json
+
+        if subfolder is not None:
+            pretrained_model_path = os.path.join(pretrained_model_path, subfolder)
+        config_file = os.path.join(pretrained_model_path, "config.json")
+        with open(config_file, "r") as f:
+            config = json.load(f)
+        config["_class_name"] = "UNet3DConditionModel"
+        config["down_block_types"] = ["DownBlock3D", "CrossAttnDownBlock3D", "CrossAttnDownBlock3D"]
+        config["up_block_types"] = ["CrossAttnUpBlock3D", "CrossAttnUpBlock3D", "UpBlock3D"]
+        config["mid_block_type"] = "UNetMidBlock3DCrossAttn"
+        model = cls.from_config(config)
+        model_files = [
+            os.path.join(pretrained_model_path, "diffusion_paddle_model.bin"),
+            os.path.join(pretrained_model_path, "diffusion_paddle_model.safetensors"),
+        ]
+        model_file = None
+        for fp in model_files:
+            if os.path.exists(fp):
+                model_file = fp
+        if not model_file:
+            raise RuntimeError(f"{model_file} does not exist")
+        if model_file.split(".")[-1] == "safetensors":
+            from safetensors import safe_open
+
+            state_dict = {}
+            with safe_open(model_file, framework="pt", device="cuda") as f:
+                for key in f.keys():
+                    state_dict[key] = f.get_tensor(key)
+        else:
+            state_dict = paddle.load(path=model_file)
+        model.set_state_dict(state_dict=state_dict, use_structured_name=False)
+        return model
diff --git a/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/hotshot_xl/unet_blocks.py b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/hotshot_xl/unet_blocks.py
new file mode 100644
index 0000000000000000000000000000000000000000..093c3b912d7c5d4e382848fba1a984d7450bd1ad
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/hotshot_xl/unet_blocks.py
@@ -0,0 +1,717 @@
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+from paddle.distributed.fleet.utils import recompute
+
+from .resnet import Downsample3D, ResnetBlock3D, Upsample3D
+from .transformer_3d import Transformer3DModel
+from .transformer_temporal import TransformerTemporal
+
+
+def get_down_block(
+    down_block_type,
+    num_layers,
+    in_channels,
+    out_channels,
+    temb_channels,
+    add_downsample,
+    resnet_eps,
+    resnet_act_fn,
+    transformer_layers_per_block=1,
+    num_attention_heads=None,
+    resnet_groups=None,
+    cross_attention_dim=None,
+    downsample_padding=None,
+    dual_cross_attention=False,
+    use_linear_projection=False,
+    only_cross_attention=False,
+    upcast_attention=False,
+    resnet_time_scale_shift="default",
+    resnet_skip_time_act=False,
+    resnet_out_scale_factor=1.0,
+    cross_attention_norm=None,
+    attention_head_dim=None,
+    downsample_type=None,
+):
+    down_block_type = down_block_type[7:] if down_block_type.startswith("UNetRes") else down_block_type
+    if down_block_type == "DownBlock3D":
+        return DownBlock3D(
+            num_layers=num_layers,
+            in_channels=in_channels,
+            out_channels=out_channels,
+            temb_channels=temb_channels,
+            add_downsample=add_downsample,
+            resnet_eps=resnet_eps,
+            resnet_act_fn=resnet_act_fn,
+            resnet_groups=resnet_groups,
+            downsample_padding=downsample_padding,
+            resnet_time_scale_shift=resnet_time_scale_shift,
+        )
+    elif down_block_type == "CrossAttnDownBlock3D":
+        if cross_attention_dim is None:
+            raise ValueError("cross_attention_dim must be specified for CrossAttnDownBlock3D")
+        return CrossAttnDownBlock3D(
+            num_layers=num_layers,
+            in_channels=in_channels,
+            out_channels=out_channels,
+            transformer_layers_per_block=transformer_layers_per_block,
+            temb_channels=temb_channels,
+            add_downsample=add_downsample,
+            resnet_eps=resnet_eps,
+            resnet_act_fn=resnet_act_fn,
+            resnet_groups=resnet_groups,
+            downsample_padding=downsample_padding,
+            cross_attention_dim=cross_attention_dim,
+            num_attention_heads=num_attention_heads,
+            dual_cross_attention=dual_cross_attention,
+            use_linear_projection=use_linear_projection,
+            only_cross_attention=only_cross_attention,
+            upcast_attention=upcast_attention,
+            resnet_time_scale_shift=resnet_time_scale_shift,
+        )
+    raise ValueError(f"{down_block_type} does not exist.")
+
+
+def get_up_block(
+    up_block_type,
+    num_layers,
+    in_channels,
+    out_channels,
+    prev_output_channel,
+    temb_channels,
+    add_upsample,
+    resnet_eps,
+    resnet_act_fn,
+    transformer_layers_per_block=1,
+    num_attention_heads=None,
+    resnet_groups=None,
+    cross_attention_dim=None,
+    dual_cross_attention=False,
+    use_linear_projection=False,
+    only_cross_attention=False,
+    upcast_attention=False,
+    resnet_time_scale_shift="default",
+    resnet_skip_time_act=False,
+    resnet_out_scale_factor=1.0,
+    cross_attention_norm=None,
+    attention_head_dim=None,
+    upsample_type=None,
+):
+    up_block_type = up_block_type[7:] if up_block_type.startswith("UNetRes") else up_block_type
+    if up_block_type == "UpBlock3D":
+        return UpBlock3D(
+            num_layers=num_layers,
+            in_channels=in_channels,
+            out_channels=out_channels,
+            prev_output_channel=prev_output_channel,
+            temb_channels=temb_channels,
+            add_upsample=add_upsample,
+            resnet_eps=resnet_eps,
+            resnet_act_fn=resnet_act_fn,
+            resnet_groups=resnet_groups,
+            resnet_time_scale_shift=resnet_time_scale_shift,
+        )
+    elif up_block_type == "CrossAttnUpBlock3D":
+        if cross_attention_dim is None:
+            raise ValueError("cross_attention_dim must be specified for CrossAttnUpBlock3D")
+        return CrossAttnUpBlock3D(
+            num_layers=num_layers,
+            in_channels=in_channels,
+            transformer_layers_per_block=transformer_layers_per_block,
+            out_channels=out_channels,
+            prev_output_channel=prev_output_channel,
+            temb_channels=temb_channels,
+            add_upsample=add_upsample,
+            resnet_eps=resnet_eps,
+            resnet_act_fn=resnet_act_fn,
+            resnet_groups=resnet_groups,
+            cross_attention_dim=cross_attention_dim,
+            num_attention_heads=num_attention_heads,
+            dual_cross_attention=dual_cross_attention,
+            use_linear_projection=use_linear_projection,
+            only_cross_attention=only_cross_attention,
+            upcast_attention=upcast_attention,
+            resnet_time_scale_shift=resnet_time_scale_shift,
+        )
+    raise ValueError(f"{up_block_type} does not exist.")
+
+
+class UNetMidBlock3DCrossAttn(paddle.nn.Layer):
+    def __init__(
+        self,
+        in_channels: int,
+        temb_channels: int,
+        dropout: float = 0.0,
+        num_layers: int = 1,
+        transformer_layers_per_block: int = 1,
+        resnet_eps: float = 1e-06,
+        resnet_time_scale_shift: str = "default",
+        resnet_act_fn: str = "swish",
+        resnet_groups: int = 32,
+        resnet_pre_norm: bool = True,
+        num_attention_heads=1,
+        output_scale_factor=1.0,
+        cross_attention_dim=1280,
+        dual_cross_attention=False,
+        use_linear_projection=False,
+        upcast_attention=False,
+    ):
+        super().__init__()
+        self.has_cross_attention = True
+        self.num_attention_heads = num_attention_heads
+        resnet_groups = resnet_groups if resnet_groups is not None else min(in_channels // 4, 32)
+        resnets = [
+            ResnetBlock3D(
+                in_channels=in_channels,
+                out_channels=in_channels,
+                temb_channels=temb_channels,
+                eps=resnet_eps,
+                groups=resnet_groups,
+                dropout=dropout,
+                time_embedding_norm=resnet_time_scale_shift,
+                non_linearity=resnet_act_fn,
+                output_scale_factor=output_scale_factor,
+                pre_norm=resnet_pre_norm,
+            )
+        ]
+        attentions = []
+        for _ in range(num_layers):
+            if dual_cross_attention:
+                raise NotImplementedError
+            attentions.append(
+                Transformer3DModel(
+                    num_attention_heads,
+                    in_channels // num_attention_heads,
+                    in_channels=in_channels,
+                    num_layers=transformer_layers_per_block,
+                    cross_attention_dim=cross_attention_dim,
+                    norm_num_groups=resnet_groups,
+                    use_linear_projection=use_linear_projection,
+                    upcast_attention=upcast_attention,
+                )
+            )
+            resnets.append(
+                ResnetBlock3D(
+                    in_channels=in_channels,
+                    out_channels=in_channels,
+                    temb_channels=temb_channels,
+                    eps=resnet_eps,
+                    groups=resnet_groups,
+                    dropout=dropout,
+                    time_embedding_norm=resnet_time_scale_shift,
+                    non_linearity=resnet_act_fn,
+                    output_scale_factor=output_scale_factor,
+                    pre_norm=resnet_pre_norm,
+                )
+            )
+        self.attentions = paddle.nn.LayerList(sublayers=attentions)
+        self.resnets = paddle.nn.LayerList(sublayers=resnets)
+
+    def forward(
+        self,
+        hidden_states,
+        temb=None,
+        encoder_hidden_states=None,
+        attention_mask=None,
+        cross_attention_kwargs=None,
+        enable_temporal_attentions: bool = True,
+    ):
+        hidden_states = self.resnets[0](hidden_states, temb)
+        for attn, resnet in zip(self.attentions, self.resnets[1:]):
+            hidden_states = attn(hidden_states, encoder_hidden_states=encoder_hidden_states).sample
+            hidden_states = resnet(hidden_states, temb)
+        return hidden_states
+
+    def temporal_parameters(self) -> list:
+        return []
+
+
+class CrossAttnDownBlock3D(paddle.nn.Layer):
+    def __init__(
+        self,
+        in_channels: int,
+        out_channels: int,
+        temb_channels: int,
+        dropout: float = 0.0,
+        num_layers: int = 1,
+        transformer_layers_per_block: int = 1,
+        resnet_eps: float = 1e-06,
+        resnet_time_scale_shift: str = "default",
+        resnet_act_fn: str = "swish",
+        resnet_groups: int = 32,
+        resnet_pre_norm: bool = True,
+        num_attention_heads=1,
+        cross_attention_dim=1280,
+        output_scale_factor=1.0,
+        downsample_padding=1,
+        add_downsample=True,
+        dual_cross_attention=False,
+        use_linear_projection=False,
+        only_cross_attention=False,
+        upcast_attention=False,
+    ):
+        super().__init__()
+        resnets = []
+        attentions = []
+        temporal_attentions = []
+        self.has_cross_attention = True
+        self.num_attention_heads = num_attention_heads
+        for i in range(num_layers):
+            in_channels = in_channels if i == 0 else out_channels
+            resnets.append(
+                ResnetBlock3D(
+                    in_channels=in_channels,
+                    out_channels=out_channels,
+                    temb_channels=temb_channels,
+                    eps=resnet_eps,
+                    groups=resnet_groups,
+                    dropout=dropout,
+                    time_embedding_norm=resnet_time_scale_shift,
+                    non_linearity=resnet_act_fn,
+                    output_scale_factor=output_scale_factor,
+                    pre_norm=resnet_pre_norm,
+                )
+            )
+            if dual_cross_attention:
+                raise NotImplementedError
+            attentions.append(
+                Transformer3DModel(
+                    num_attention_heads,
+                    out_channels // num_attention_heads,
+                    in_channels=out_channels,
+                    num_layers=transformer_layers_per_block,
+                    cross_attention_dim=cross_attention_dim,
+                    norm_num_groups=resnet_groups,
+                    use_linear_projection=use_linear_projection,
+                    only_cross_attention=only_cross_attention,
+                    upcast_attention=upcast_attention,
+                )
+            )
+            temporal_attentions.append(
+                TransformerTemporal(
+                    num_attention_heads=8,
+                    attention_head_dim=out_channels // 8,
+                    in_channels=out_channels,
+                    cross_attention_dim=None,
+                )
+            )
+        self.attentions = paddle.nn.LayerList(sublayers=attentions)
+        self.resnets = paddle.nn.LayerList(sublayers=resnets)
+        self.temporal_attentions = paddle.nn.LayerList(sublayers=temporal_attentions)
+        if add_downsample:
+            self.downsamplers = paddle.nn.LayerList(
+                sublayers=[
+                    Downsample3D(
+                        out_channels, use_conv=True, out_channels=out_channels, padding=downsample_padding, name="op"
+                    )
+                ]
+            )
+        else:
+            self.downsamplers = None
+        self.gradient_checkpointing = False
+
+    def forward(
+        self,
+        hidden_states,
+        temb=None,
+        encoder_hidden_states=None,
+        attention_mask=None,
+        cross_attention_kwargs=None,
+        enable_temporal_attentions: bool = True,
+    ):
+        output_states = ()
+        for resnet, attn, temporal_attention in zip(self.resnets, self.attentions, self.temporal_attentions):
+            if self.training and self.gradient_checkpointing and not hidden_states.stop_gradient:
+
+                def create_custom_forward(module, return_dict=None):
+                    def custom_forward(*inputs):
+                        if return_dict is not None:
+                            return module(*inputs, return_dict=return_dict)
+                        else:
+                            return module(*inputs)
+
+                    return custom_forward
+
+                hidden_states = recompute(create_custom_forward(resnet), hidden_states, temb, use_reentrant=False)
+                hidden_states = recompute(
+                    create_custom_forward(attn, return_dict=False),
+                    hidden_states,
+                    encoder_hidden_states,
+                    use_reentrant=False,
+                )[0]
+                if enable_temporal_attentions and temporal_attention is not None:
+                    hidden_states = recompute(
+                        create_custom_forward(temporal_attention),
+                        hidden_states,
+                        encoder_hidden_states,
+                        use_reentrant=False,
+                    )
+            else:
+                hidden_states = resnet(hidden_states, temb)
+                hidden_states = attn(hidden_states, encoder_hidden_states=encoder_hidden_states).sample
+                if temporal_attention and enable_temporal_attentions:
+                    hidden_states = temporal_attention(hidden_states, encoder_hidden_states=encoder_hidden_states)
+            output_states += (hidden_states,)
+        if self.downsamplers is not None:
+            for downsampler in self.downsamplers:
+                hidden_states = downsampler(hidden_states)
+            output_states += (hidden_states,)
+        return hidden_states, output_states
+
+    def temporal_parameters(self) -> list:
+        output = []
+        for block in self.temporal_attentions:
+            if block:
+                output.extend(block.parameters())
+        return output
+
+
+class DownBlock3D(paddle.nn.Layer):
+    def __init__(
+        self,
+        in_channels: int,
+        out_channels: int,
+        temb_channels: int,
+        dropout: float = 0.0,
+        num_layers: int = 1,
+        resnet_eps: float = 1e-06,
+        resnet_time_scale_shift: str = "default",
+        resnet_act_fn: str = "swish",
+        resnet_groups: int = 32,
+        resnet_pre_norm: bool = True,
+        output_scale_factor=1.0,
+        add_downsample=True,
+        downsample_padding=1,
+    ):
+        super().__init__()
+        resnets = []
+        temporal_attentions = []
+        for i in range(num_layers):
+            in_channels = in_channels if i == 0 else out_channels
+            resnets.append(
+                ResnetBlock3D(
+                    in_channels=in_channels,
+                    out_channels=out_channels,
+                    temb_channels=temb_channels,
+                    eps=resnet_eps,
+                    groups=resnet_groups,
+                    dropout=dropout,
+                    time_embedding_norm=resnet_time_scale_shift,
+                    non_linearity=resnet_act_fn,
+                    output_scale_factor=output_scale_factor,
+                    pre_norm=resnet_pre_norm,
+                )
+            )
+            temporal_attentions.append(
+                TransformerTemporal(
+                    num_attention_heads=8,
+                    attention_head_dim=out_channels // 8,
+                    in_channels=out_channels,
+                    cross_attention_dim=None,
+                )
+            )
+        self.resnets = paddle.nn.LayerList(sublayers=resnets)
+        self.temporal_attentions = paddle.nn.LayerList(sublayers=temporal_attentions)
+        if add_downsample:
+            self.downsamplers = paddle.nn.LayerList(
+                sublayers=[
+                    Downsample3D(
+                        out_channels, use_conv=True, out_channels=out_channels, padding=downsample_padding, name="op"
+                    )
+                ]
+            )
+        else:
+            self.downsamplers = None
+        self.gradient_checkpointing = False
+
+    def forward(self, hidden_states, temb=None, encoder_hidden_states=None, enable_temporal_attentions: bool = True):
+        output_states = ()
+        for resnet, temporal_attention in zip(self.resnets, self.temporal_attentions):
+            if self.training and self.gradient_checkpointing and not hidden_states.stop_gradient:
+
+                def create_custom_forward(module):
+                    def custom_forward(*inputs):
+                        return module(*inputs)
+
+                    return custom_forward
+
+                hidden_states = recompute(create_custom_forward(resnet), hidden_states, temb, use_reentrant=False)
+                if enable_temporal_attentions and temporal_attention is not None:
+                    hidden_states = recompute(
+                        create_custom_forward(temporal_attention),
+                        hidden_states,
+                        encoder_hidden_states,
+                        use_reentrant=False,
+                    )
+            else:
+                hidden_states = resnet(hidden_states, temb)
+                if enable_temporal_attentions and temporal_attention:
+                    hidden_states = temporal_attention(hidden_states, encoder_hidden_states=encoder_hidden_states)
+            output_states += (hidden_states,)
+        if self.downsamplers is not None:
+            for downsampler in self.downsamplers:
+                hidden_states = downsampler(hidden_states)
+            output_states += (hidden_states,)
+        return hidden_states, output_states
+
+    def temporal_parameters(self) -> list:
+        output = []
+        for block in self.temporal_attentions:
+            if block:
+                output.extend(block.parameters())
+        return output
+
+
+class CrossAttnUpBlock3D(paddle.nn.Layer):
+    def __init__(
+        self,
+        in_channels: int,
+        out_channels: int,
+        prev_output_channel: int,
+        temb_channels: int,
+        dropout: float = 0.0,
+        num_layers: int = 1,
+        transformer_layers_per_block: int = 1,
+        resnet_eps: float = 1e-06,
+        resnet_time_scale_shift: str = "default",
+        resnet_act_fn: str = "swish",
+        resnet_groups: int = 32,
+        resnet_pre_norm: bool = True,
+        num_attention_heads=1,
+        cross_attention_dim=1280,
+        output_scale_factor=1.0,
+        add_upsample=True,
+        dual_cross_attention=False,
+        use_linear_projection=False,
+        only_cross_attention=False,
+        upcast_attention=False,
+    ):
+        super().__init__()
+        resnets = []
+        attentions = []
+        temporal_attentions = []
+        self.has_cross_attention = True
+        self.num_attention_heads = num_attention_heads
+        for i in range(num_layers):
+            res_skip_channels = in_channels if i == num_layers - 1 else out_channels
+            resnet_in_channels = prev_output_channel if i == 0 else out_channels
+            resnets.append(
+                ResnetBlock3D(
+                    in_channels=resnet_in_channels + res_skip_channels,
+                    out_channels=out_channels,
+                    temb_channels=temb_channels,
+                    eps=resnet_eps,
+                    groups=resnet_groups,
+                    dropout=dropout,
+                    time_embedding_norm=resnet_time_scale_shift,
+                    non_linearity=resnet_act_fn,
+                    output_scale_factor=output_scale_factor,
+                    pre_norm=resnet_pre_norm,
+                )
+            )
+            if dual_cross_attention:
+                raise NotImplementedError
+            attentions.append(
+                Transformer3DModel(
+                    num_attention_heads,
+                    out_channels // num_attention_heads,
+                    in_channels=out_channels,
+                    num_layers=transformer_layers_per_block,
+                    cross_attention_dim=cross_attention_dim,
+                    norm_num_groups=resnet_groups,
+                    use_linear_projection=use_linear_projection,
+                    only_cross_attention=only_cross_attention,
+                    upcast_attention=upcast_attention,
+                )
+            )
+            temporal_attentions.append(
+                TransformerTemporal(
+                    num_attention_heads=8,
+                    attention_head_dim=out_channels // 8,
+                    in_channels=out_channels,
+                    cross_attention_dim=None,
+                )
+            )
+        self.attentions = paddle.nn.LayerList(sublayers=attentions)
+        self.resnets = paddle.nn.LayerList(sublayers=resnets)
+        self.temporal_attentions = paddle.nn.LayerList(sublayers=temporal_attentions)
+        if add_upsample:
+            self.upsamplers = paddle.nn.LayerList(
+                sublayers=[Upsample3D(out_channels, use_conv=True, out_channels=out_channels)]
+            )
+        else:
+            self.upsamplers = None
+        self.gradient_checkpointing = False
+
+    def forward(
+        self,
+        hidden_states,
+        res_hidden_states_tuple,
+        temb=None,
+        encoder_hidden_states=None,
+        upsample_size=None,
+        cross_attention_kwargs=None,
+        attention_mask=None,
+        enable_temporal_attentions: bool = True,
+    ):
+        for resnet, attn, temporal_attention in zip(self.resnets, self.attentions, self.temporal_attentions):
+            res_hidden_states = res_hidden_states_tuple[-1]
+            res_hidden_states_tuple = res_hidden_states_tuple[:-1]
+            hidden_states = paddle.concat(x=[hidden_states, res_hidden_states], axis=1)
+            if self.training and self.gradient_checkpointing and not hidden_states.stop_gradient:
+
+                def create_custom_forward(module, return_dict=None):
+                    def custom_forward(*inputs):
+                        if return_dict is not None:
+                            return module(*inputs, return_dict=return_dict)
+                        else:
+                            return module(*inputs)
+
+                    return custom_forward
+
+                hidden_states = recompute(create_custom_forward(resnet), hidden_states, temb, use_reentrant=False)
+                hidden_states = recompute(
+                    create_custom_forward(attn, return_dict=False),
+                    hidden_states,
+                    encoder_hidden_states,
+                    use_reentrant=False,
+                )[0]
+                if enable_temporal_attentions and temporal_attention is not None:
+                    hidden_states = recompute(
+                        create_custom_forward(temporal_attention),
+                        hidden_states,
+                        encoder_hidden_states,
+                        use_reentrant=False,
+                    )
+            else:
+                hidden_states = resnet(hidden_states, temb)
+                hidden_states = attn(hidden_states, encoder_hidden_states=encoder_hidden_states).sample
+                if enable_temporal_attentions and temporal_attention:
+                    hidden_states = temporal_attention(hidden_states, encoder_hidden_states=encoder_hidden_states)
+        if self.upsamplers is not None:
+            for upsampler in self.upsamplers:
+                hidden_states = upsampler(hidden_states, upsample_size)
+        return hidden_states
+
+    def temporal_parameters(self) -> list:
+        output = []
+        for block in self.temporal_attentions:
+            if block:
+                output.extend(block.parameters())
+        return output
+
+
+class UpBlock3D(paddle.nn.Layer):
+    def __init__(
+        self,
+        in_channels: int,
+        prev_output_channel: int,
+        out_channels: int,
+        temb_channels: int,
+        dropout: float = 0.0,
+        num_layers: int = 1,
+        resnet_eps: float = 1e-06,
+        resnet_time_scale_shift: str = "default",
+        resnet_act_fn: str = "swish",
+        resnet_groups: int = 32,
+        resnet_pre_norm: bool = True,
+        output_scale_factor=1.0,
+        add_upsample=True,
+    ):
+        super().__init__()
+        resnets = []
+        temporal_attentions = []
+        for i in range(num_layers):
+            res_skip_channels = in_channels if i == num_layers - 1 else out_channels
+            resnet_in_channels = prev_output_channel if i == 0 else out_channels
+            resnets.append(
+                ResnetBlock3D(
+                    in_channels=resnet_in_channels + res_skip_channels,
+                    out_channels=out_channels,
+                    temb_channels=temb_channels,
+                    eps=resnet_eps,
+                    groups=resnet_groups,
+                    dropout=dropout,
+                    time_embedding_norm=resnet_time_scale_shift,
+                    non_linearity=resnet_act_fn,
+                    output_scale_factor=output_scale_factor,
+                    pre_norm=resnet_pre_norm,
+                )
+            )
+            temporal_attentions.append(
+                TransformerTemporal(
+                    num_attention_heads=8,
+                    attention_head_dim=out_channels // 8,
+                    in_channels=out_channels,
+                    cross_attention_dim=None,
+                )
+            )
+        self.resnets = paddle.nn.LayerList(sublayers=resnets)
+        self.temporal_attentions = paddle.nn.LayerList(sublayers=temporal_attentions)
+        if add_upsample:
+            self.upsamplers = paddle.nn.LayerList(
+                sublayers=[Upsample3D(out_channels, use_conv=True, out_channels=out_channels)]
+            )
+        else:
+            self.upsamplers = None
+        self.gradient_checkpointing = False
+
+    def forward(
+        self,
+        hidden_states,
+        res_hidden_states_tuple,
+        temb=None,
+        upsample_size=None,
+        encoder_hidden_states=None,
+        enable_temporal_attentions: bool = True,
+    ):
+        for resnet, temporal_attention in zip(self.resnets, self.temporal_attentions):
+            res_hidden_states = res_hidden_states_tuple[-1]
+            res_hidden_states_tuple = res_hidden_states_tuple[:-1]
+            hidden_states = paddle.concat(x=[hidden_states, res_hidden_states], axis=1)
+            if self.training and self.gradient_checkpointing and not hidden_states.stop_gradient:
+
+                def create_custom_forward(module):
+                    def custom_forward(*inputs):
+                        return module(*inputs)
+
+                    return custom_forward
+
+                hidden_states = recompute(create_custom_forward(resnet), hidden_states, temb, use_reentrant=False)
+                if enable_temporal_attentions and temporal_attention is not None:
+                    hidden_states = recompute(
+                        create_custom_forward(temporal_attention),
+                        hidden_states,
+                        encoder_hidden_states,
+                        use_reentrant=False,
+                    )
+            else:
+                hidden_states = resnet(hidden_states, temb)
+                hidden_states = (
+                    temporal_attention(hidden_states, encoder_hidden_states=encoder_hidden_states)
+                    if enable_temporal_attentions and temporal_attention is not None
+                    else hidden_states
+                )
+        if self.upsamplers is not None:
+            for upsampler in self.upsamplers:
+                hidden_states = upsampler(hidden_states, upsample_size)
+        return hidden_states
+
+    def temporal_parameters(self) -> list:
+        output = []
+        for block in self.temporal_attentions:
+            if block:
+                output.extend(block.parameters())
+        return output
diff --git a/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/stable_cascade/gdf/__init__.py b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/stable_cascade/gdf/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..b2cc31a03a59f05a9bc2b53fb21829b2dbd83cbe
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/stable_cascade/gdf/__init__.py
@@ -0,0 +1,162 @@
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+
+import paddle
+import paddle_aux
+
+import ppdiffusers
+
+from .loss_weights import *
+from .noise_conditions import *
+from .samplers import *
+from .scalers import *
+from .schedulers import *
+from .targets import *
+
+
+class GDF:
+    def __init__(self, schedule, input_scaler, target, noise_cond, loss_weight, offset_noise=0):
+        self.schedule = schedule
+        self.input_scaler = input_scaler
+        self.target = target
+        self.noise_cond = noise_cond
+        self.loss_weight = loss_weight
+        self.offset_noise = offset_noise
+
+    def setup_limits(self, stretch_max=True, stretch_min=True, shift=1):
+        stretched_limits = self.input_scaler.setup_limits(
+            self.schedule, self.input_scaler, stretch_max, stretch_min, shift
+        )
+        return stretched_limits
+
+    def diffuse(self, x0, epsilon=None, t=None, shift=1, loss_shift=1, offset=None):
+        if epsilon is None:
+            epsilon = paddle.randn(shape=x0.shape, dtype=x0.dtype)
+
+        if self.offset_noise > 0:
+            if offset is None:
+                offset = paddle.randn(
+                    shape=[x0.shape[0], x0.shape[1]] + [1] * (len(x0.shape) - 2),
+                )
+            epsilon = epsilon + offset * self.offset_noise
+        logSNR = self.schedule(x0.shape[0] if t is None else t, shift=shift)
+        a, b = self.input_scaler(logSNR)
+        if len(a.shape) == 1:
+            a, b = a.reshape([-1, *([1] * (len(x0.shape) - 1))]), b.reshape([-1, *([1] * (len(x0.shape) - 1))])
+        target = self.target(x0, epsilon, logSNR, a, b)
+        return (
+            x0 * a + epsilon * b,
+            epsilon,
+            target,
+            logSNR,
+            self.noise_cond(logSNR),
+            self.loss_weight(logSNR, shift=loss_shift),
+        )
+
+    def undiffuse(self, x, logSNR, pred):
+        a, b = self.input_scaler(logSNR)
+        if len(a.shape) == 1:
+            a, b = a.reshape([-1, *([1] * (len(x.shape) - 1))]), b.reshape([-1, *([1] * (len(x.shape) - 1))])
+        return self.target.x0(x, pred, logSNR, a, b), self.target.epsilon(x, pred, logSNR, a, b)
+
+    def sample(
+        self,
+        model,
+        model_inputs,
+        shape,
+        unconditional_inputs=None,
+        sampler=None,
+        schedule=None,
+        t_start=1.0,
+        t_end=0.0,
+        timesteps=20,
+        x_init=None,
+        cfg=3.0,
+        cfg_t_stop=None,
+        cfg_t_start=None,
+        cfg_rho=0.7,
+        sampler_params=None,
+        shift=1,
+        device="cpu",
+    ):
+        sampler_params = {} if sampler_params is None else sampler_params
+        if sampler is None:
+            sampler = DDPMSampler(self)  # noqa
+        r_range = paddle.linspace(start=t_start, stop=t_end, num=timesteps + 1)
+        schedule = self.schedule if schedule is None else schedule
+        logSNR_range = (
+            schedule(r_range, shift=shift)[:, None]
+            .expand(shape=[-1, shape[0] if x_init is None else x_init.shape[0]])
+            .to(device)
+        )
+        x = sampler.init_x(shape).to(device) if x_init is None else x_init.clone()
+        if cfg is not None:
+            if unconditional_inputs is None:
+                unconditional_inputs = {k: paddle.zeros_like(x=v) for k, v in model_inputs.items()}
+            model_inputs = {
+                k: (
+                    paddle.concat(x=[v, v_u], axis=0)
+                    if isinstance(v, paddle.Tensor)
+                    else [
+                        (
+                            paddle.concat(x=[vi, vi_u], axis=0)
+                            if isinstance(vi, paddle.Tensor) and isinstance(vi_u, paddle.Tensor)
+                            else None
+                        )
+                        for vi, vi_u in zip(v, v_u)
+                    ]
+                    if isinstance(v, list)
+                    else {vk: paddle.concat(x=[v[vk], v_u.get(vk, paddle.zeros_like(x=v[vk]))], axis=0) for vk in v}
+                    if isinstance(v, dict)
+                    else None
+                )
+                for (k, v), (k_u, v_u) in zip(model_inputs.items(), unconditional_inputs.items())
+            }
+        for i in range(0, timesteps):
+            noise_cond = self.noise_cond(logSNR_range[i])
+            if (
+                cfg is not None
+                and (cfg_t_stop is None or r_range[i].item() >= cfg_t_stop)
+                and (cfg_t_start is None or r_range[i].item() <= cfg_t_start)
+            ):
+                cfg_val = cfg
+                if isinstance(cfg_val, (list, tuple)):
+                    assert len(cfg_val) == 2, "cfg must be a float or a list/tuple of length 2"
+                    cfg_val = cfg_val[0] * r_range[i].item() + cfg_val[1] * (1 - r_range[i].item())
+
+                pred, pred_unconditional = model(
+                    paddle.concat(x=[x, x], axis=0), noise_cond.repeat(2), **model_inputs
+                ).chunk(chunks=2)
+
+                pred_cfg = paddle.lerp(pred_unconditional, pred, paddle.to_tensor(cfg_val, dtype=paddle.float32))
+                if cfg_rho > 0:
+                    std_pos, std_cfg = pred.std(), pred_cfg.std()
+                    pred = cfg_rho * (pred_cfg * std_pos / (std_cfg + 1e-9)) + pred_cfg * (1 - cfg_rho)
+                else:
+                    pred = pred_cfg
+            else:
+                pred = model(x, noise_cond, **model_inputs)
+
+            x0, epsilon = self.undiffuse(x, logSNR_range[i], pred)
+            x = sampler(x, x0, epsilon, logSNR_range[i], logSNR_range[i + 1], **sampler_params)
+            altered_vars = yield x0, x, pred
+            if altered_vars is not None:
+                cfg = altered_vars.get("cfg", cfg)
+                cfg_rho = altered_vars.get("cfg_rho", cfg_rho)
+                sampler = altered_vars.get("sampler", sampler)
+                model_inputs = altered_vars.get("model_inputs", model_inputs)
+                x = altered_vars.get("x", x)
+                x_init = altered_vars.get("x_init", x_init)
diff --git a/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/stable_cascade/gdf/loss_weights.py b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/stable_cascade/gdf/loss_weights.py
new file mode 100644
index 0000000000000000000000000000000000000000..e2fefb2dd19a63300881b315e085661da7ca16a2
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/stable_cascade/gdf/loss_weights.py
@@ -0,0 +1,128 @@
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+import paddle
+import paddle_aux  # noqa
+
+
+class BaseLossWeight:
+    def weight(self, logSNR):
+        raise NotImplementedError("this method needs to be overridden")
+
+    def __call__(self, logSNR, *args, shift=1, clamp_range=None, **kwargs):
+        clamp_range = [-1000000000.0, 1000000000.0] if clamp_range is None else clamp_range
+        if shift != 1:
+            logSNR = logSNR.clone() + 2 * np.log(shift)
+        return self.weight(logSNR, *args, **kwargs).clip(*clamp_range)
+
+
+class ComposedLossWeight(BaseLossWeight):
+    def __init__(self, div, mul):
+        self.mul = [mul] if isinstance(mul, BaseLossWeight) else mul
+        self.div = [div] if isinstance(div, BaseLossWeight) else div
+
+    def weight(self, logSNR):
+        prod, div = 1, 1
+        for m in self.mul:
+            prod *= m.weight(logSNR)
+        for d in self.div:
+            div *= d.weight(logSNR)
+        return prod / div
+
+
+class ConstantLossWeight(BaseLossWeight):
+    def __init__(self, v=1):
+        self.v = v
+
+    def weight(self, logSNR):
+        return paddle.ones_like(x=logSNR) * self.v
+
+
+class SNRLossWeight(BaseLossWeight):
+    def weight(self, logSNR):
+        return logSNR.exp()
+
+
+class P2LossWeight(BaseLossWeight):
+    def __init__(self, k=1.0, gamma=1.0, s=1.0):
+        self.k, self.gamma, self.s = k, gamma, s
+
+    def weight(self, logSNR):
+        return (self.k + (logSNR * self.s).exp()) ** -self.gamma
+
+
+class SNRPlusOneLossWeight(BaseLossWeight):
+    def weight(self, logSNR):
+        return logSNR.exp() + 1
+
+
+class MinSNRLossWeight(BaseLossWeight):
+    def __init__(self, max_snr=5):
+        self.max_snr = max_snr
+
+    def weight(self, logSNR):
+        return logSNR.exp().clip(max=self.max_snr)
+
+
+class MinSNRPlusOneLossWeight(BaseLossWeight):
+    def __init__(self, max_snr=5):
+        self.max_snr = max_snr
+
+    def weight(self, logSNR):
+        return (logSNR.exp() + 1).clip(max=self.max_snr)
+
+
+class TruncatedSNRLossWeight(BaseLossWeight):
+    def __init__(self, min_snr=1):
+        self.min_snr = min_snr
+
+    def weight(self, logSNR):
+        return logSNR.exp().clip(min=self.min_snr)
+
+
+class SechLossWeight(BaseLossWeight):
+    def __init__(self, div=2):
+        self.div = div
+
+    def weight(self, logSNR):
+        return 1 / (logSNR / self.div).cosh()
+
+
+class DebiasedLossWeight(BaseLossWeight):
+    def weight(self, logSNR):
+        return 1 / logSNR.exp().sqrt()
+
+
+class SigmoidLossWeight(BaseLossWeight):
+    def __init__(self, s=1):
+        self.s = s
+
+    def weight(self, logSNR):
+        return (logSNR * self.s).sigmoid()
+
+
+class AdaptiveLossWeight(BaseLossWeight):
+    def __init__(self, logsnr_range=[-10, 10], buckets=300, weight_range=[1e-07, 10000000.0]):
+        self.bucket_ranges = paddle.linspace(start=logsnr_range[0], stop=logsnr_range[1], num=buckets - 1)
+        self.bucket_losses = paddle.ones(shape=buckets)
+        self.weight_range = weight_range
+
+    def weight(self, logSNR):
+        indices = paddle.searchsorted(sorted_sequence=self.bucket_ranges.to(logSNR.place), values=logSNR)
+        return (1 / self.bucket_losses.to(logSNR.place)[indices]).clip([*self.weight_range])
+
+    def update_buckets(self, logSNR, loss, beta=0.99):
+        indices = paddle.searchsorted(sorted_sequence=self.bucket_ranges.to(logSNR.place), values=logSNR).cpu()
+        self.bucket_losses[indices] = self.bucket_losses[indices] * beta + loss.detach().cpu() * (1 - beta)
diff --git a/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/stable_cascade/gdf/scalers.py b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/stable_cascade/gdf/scalers.py
new file mode 100644
index 0000000000000000000000000000000000000000..6ea70592b8882b8261a52a8e6d2717fb7c28c3cb
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/stable_cascade/gdf/scalers.py
@@ -0,0 +1,58 @@
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+
+
+class BaseScaler:
+    def __init__(self):
+        self.stretched_limits = None
+
+    def setup_limits(self, schedule, input_scaler, stretch_max=True, stretch_min=True, shift=1):
+        min_logSNR = schedule(paddle.ones(shape=[1]), shift=shift)
+        max_logSNR = schedule(paddle.zeros(shape=[1]), shift=shift)
+        min_a, max_b = [v.item() for v in input_scaler(min_logSNR)] if stretch_max else [0, 1]
+        max_a, min_b = [v.item() for v in input_scaler(max_logSNR)] if stretch_min else [1, 0]
+        self.stretched_limits = [min_a, max_a, min_b, max_b]
+        return self.stretched_limits
+
+    def stretch_limits(self, a, b):
+        min_a, max_a, min_b, max_b = self.stretched_limits
+        return (a - min_a) / (max_a - min_a), (b - min_b) / (max_b - min_b)
+
+    def scalers(self, logSNR):
+        raise NotImplementedError("this method needs to be overridden")
+
+    def __call__(self, logSNR):
+        a, b = self.scalers(logSNR)
+        if self.stretched_limits is not None:
+            a, b = self.stretch_limits(a, b)
+        return a, b
+
+
+class VPScaler(BaseScaler):
+    def scalers(self, logSNR):
+        a_squared = logSNR.sigmoid()
+        a = a_squared.sqrt()
+        b = (1 - a_squared).sqrt()
+        return a, b
+
+
+class LERPScaler(BaseScaler):
+    def scalers(self, logSNR):
+        _a = logSNR.exp() - 1
+        _a[_a == 0] = 0.001
+        a = 1 + (2 - (2**2 + 4 * _a) ** 0.5) / (2 * _a)
+        b = 1 - a
+        return a, b
diff --git a/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/stable_cascade/gdf/targets.py b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/stable_cascade/gdf/targets.py
new file mode 100644
index 0000000000000000000000000000000000000000..51fb2e2e4601cbff4910892b861f06b2040d6e2d
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/stable_cascade/gdf/targets.py
@@ -0,0 +1,59 @@
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+class EpsilonTarget:
+    def __call__(self, x0, epsilon, logSNR, a, b):
+        return epsilon
+
+    def x0(self, noised, pred, logSNR, a, b):
+        return (noised - pred * b) / a
+
+    def epsilon(self, noised, pred, logSNR, a, b):
+        return pred
+
+
+class X0Target:
+    def __call__(self, x0, epsilon, logSNR, a, b):
+        return x0
+
+    def x0(self, noised, pred, logSNR, a, b):
+        return pred
+
+    def epsilon(self, noised, pred, logSNR, a, b):
+        return (noised - pred * a) / b
+
+
+class VTarget:
+    def __call__(self, x0, epsilon, logSNR, a, b):
+        return a * epsilon - b * x0
+
+    def x0(self, noised, pred, logSNR, a, b):
+        squared_sum = a**2 + b**2
+        return a / squared_sum * noised - b / squared_sum * pred
+
+    def epsilon(self, noised, pred, logSNR, a, b):
+        squared_sum = a**2 + b**2
+        return b / squared_sum * noised + a / squared_sum * pred
+
+
+class RectifiedFlowsTarget:
+    def __call__(self, x0, epsilon, logSNR, a, b):
+        return epsilon - x0
+
+    def x0(self, noised, pred, logSNR, a, b):
+        return noised - pred * b
+
+    def epsilon(self, noised, pred, logSNR, a, b):
+        return noised + pred * a
diff --git a/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/stable_cascade/modules/__init__.py b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/stable_cascade/modules/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..063951a2f34e2da6d2ac9dd82221183876e22354
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/stable_cascade/modules/__init__.py
@@ -0,0 +1,17 @@
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .effnet import EfficientNetEncoder
+from .previewer import Previewer
+from .stage_c import AttnBlock, FeedForwardBlock, ResBlock, StageC, TimestepBlock
diff --git a/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/stable_cascade/modules/common.py b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/stable_cascade/modules/common.py
new file mode 100644
index 0000000000000000000000000000000000000000..93724d128cab9e8b7d34438c1ae1f0bc467cc963
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/stable_cascade/modules/common.py
@@ -0,0 +1,151 @@
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+import paddle
+import paddle.nn as nn
+
+
+def load(path="../x.npy"):
+    return paddle.to_tensor(np.load(path))
+
+
+def diff(a, b):
+    return (a - b).abs().mean()
+
+
+class Linear(nn.Linear):
+    def reset_parameters(self):
+        return None
+
+
+class Conv2d(nn.Conv2D):
+    def reset_parameters(self):
+        return None
+
+
+class Attention2D(nn.Layer):
+    def __init__(self, c, nhead, dropout=0.0):
+        super().__init__()
+        self.attn = nn.MultiHeadAttention(c, nhead, dropout=dropout)
+
+    def forward(self, x, kv, self_attn=False):
+        orig_shape = x.shape
+        x = x.reshape([x.shape[0], x.shape[1], -1]).transpose([0, 2, 1])
+        if self_attn:
+            kv = paddle.concat([x, kv], axis=1)
+        x = self.attn(x, kv, kv)
+        x = x.transpose([0, 2, 1]).reshape(orig_shape)
+        return x
+
+
+class LayerNorm2d(nn.LayerNorm):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def forward(self, x):
+        return super().forward(x.transpose(perm=[0, 2, 3, 1])).transpose(perm=[0, 3, 1, 2])
+
+
+class GlobalResponseNorm(nn.Layer):
+    def __init__(self, dim):
+        super(GlobalResponseNorm, self).__init__()
+        self.gamma = self.create_parameter(
+            shape=[1, 1, 1, dim], default_initializer=paddle.nn.initializer.Constant(value=0.0)
+        )
+        self.beta = self.create_parameter(
+            shape=[1, 1, 1, dim], default_initializer=paddle.nn.initializer.Constant(value=0.0)
+        )
+        self.gamma.stop_gradient = False
+        self.beta.stop_gradient = False
+
+    def forward(self, x):
+        Gx = paddle.norm(x, p=2, axis=(1, 2), keepdim=True)
+        Nx = Gx / (paddle.mean(Gx, axis=-1, keepdim=True) + 1e-6)
+        x = self.gamma * (x * Nx) + self.beta + x
+        return x
+
+
+class ResBlock(nn.Layer):
+    def __init__(self, c, c_skip=0, kernel_size=3, dropout=0.0):
+        super().__init__()
+        self.depthwise = Conv2d(c, c, kernel_size=kernel_size, padding=kernel_size // 2, groups=c)
+        self.norm = LayerNorm2d(c, weight_attr=False, bias_attr=False, epsilon=1e-06)
+        self.channelwise = nn.Sequential(
+            Linear(c + c_skip, c * 4),
+            nn.GELU(),
+            GlobalResponseNorm(c * 4),
+            nn.Dropout(p=dropout),
+            Linear(c * 4, c),
+        )
+
+    def forward(self, x, x_skip=None):
+        x_res = x
+        x = self.depthwise(x)
+        x = self.norm(x)
+        if x_skip is not None:
+            x = paddle.concat(x=[x, x_skip], axis=1)
+
+        x = self.channelwise(x.transpose(perm=[0, 2, 3, 1])).transpose(perm=[0, 3, 1, 2])
+        return x + x_res
+
+
+class AttnBlock(nn.Layer):
+    def __init__(self, c, c_cond, nhead, self_attn=True, dropout=0.0):
+        super().__init__()
+        self.self_attn = self_attn
+        self.norm = LayerNorm2d(c, weight_attr=False, bias_attr=False, epsilon=1e-06)
+        self.attention = Attention2D(c, nhead, dropout)
+        self.kv_mapper = nn.Sequential(nn.Silu(), Linear(c_cond, c))
+
+    def forward(self, x, kv):
+        kv = self.kv_mapper(kv)
+        x = x + self.attention(self.norm(x), kv, self_attn=self.self_attn)
+        return x
+
+
+class FeedForwardBlock(nn.Layer):
+    def __init__(self, c, dropout=0.0):
+        super().__init__()
+        self.norm = LayerNorm2d(c, weight_attr=False, bias_attr=False, epsilon=1e-06)
+        self.channelwise = nn.Sequential(
+            Linear(c, c * 4),
+            nn.GELU(),
+            GlobalResponseNorm(c * 4),
+            nn.Dropout(p=dropout),
+            Linear(c * 4, c),
+        )
+
+    def forward(self, x):
+        x = x + self.channelwise(self.norm(x).transpose(perm=[0, 2, 3, 1])).transpose(perm=[0, 3, 1, 2])
+        return x
+
+
+class TimestepBlock(nn.Layer):
+    def __init__(self, c, c_timestep, conds=["sca"], trainable=True):
+        super(TimestepBlock, self).__init__()
+        self.mapper = nn.Linear(c_timestep, c * 2, bias_attr=trainable)
+        self.conds = conds
+        for cname in conds:
+            setattr(self, f"mapper_{cname}", nn.Linear(c_timestep, c * 2, bias_attr=trainable))
+
+    def forward(self, x, t):
+        t = paddle.split(t, num_or_sections=len(self.conds) + 1, axis=1)
+        a_b = self.mapper(t[0])
+        a, b = a_b[:, : a_b.shape[1] // 2, None, None], a_b[:, a_b.shape[1] // 2 :, None, None]
+        for i, c in enumerate(self.conds):
+            ac_bc = getattr(self, f"mapper_{c}")(t[i + 1])
+            ac, bc = ac_bc[:, : ac_bc.shape[1] // 2, None, None], ac_bc[:, ac_bc.shape[1] // 2 :, None, None]
+            a, b = a + ac, b + bc
+        return x * (1 + a) + b
diff --git a/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/stable_cascade/modules/efficientnet_v2_s.py b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/stable_cascade/modules/efficientnet_v2_s.py
new file mode 100644
index 0000000000000000000000000000000000000000..c9497b6373f4b8f289fbadc9b318ff4bd14a1741
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/stable_cascade/modules/efficientnet_v2_s.py
@@ -0,0 +1,561 @@
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import copy
+import math
+from dataclasses import dataclass
+from functools import partial
+from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union
+
+import paddle
+import paddle.nn as nn
+from paddle import Tensor
+from paddle.nn import (
+    AdaptiveAvgPool2D,
+    BatchNorm,
+    BatchNorm2D,
+    Conv2D,
+    Dropout,
+    GroupNorm,
+    Layer,
+    Linear,
+    ReLU,
+    Sequential,
+    Sigmoid,
+    Silu,
+)
+from paddle.nn.initializer import Constant, KaimingNormal, Uniform
+from paddle.utils.download import get_weights_path_from_url
+
+__all__ = ["EfficientNet", "EfficientNet_V2_S_Weights", "efficientnet_v2_s"]
+
+
+class SqueezeExcitation(paddle.nn.Layer):
+    """
+    This block implements the Squeeze-and-Excitation block from https://arxiv.org/abs/1709.01507 (see Fig. 1).
+    Parameters ``activation`` and ``scale_activation`` correspond to ``delta`` and ``sigma`` in eq. 3.
+
+    Args:
+        input_channels (int): Number of channels in the input feature maps
+        squeeze_channels (int): Number of squeeze channels
+        activation (Callable[[Tensor], Tensor], optional): ``delta`` activation. Default: ReLU
+        scale_activation (Callable[[Tensor], Tensor], optional): ``sigma`` activation. Default: Sigmoid
+    """
+
+    def __init__(
+        self,
+        input_channels: int,
+        squeeze_channels: int,
+        activation: Callable[[Tensor], Tensor] = ReLU(),
+        scale_activation: Callable[[Tensor], Tensor] = Sigmoid(),
+    ) -> None:
+        super(SqueezeExcitation, self).__init__()
+        self.avgpool = AdaptiveAvgPool2D(1)
+        self.fc1 = Conv2D(in_channels=input_channels, out_channels=squeeze_channels, kernel_size=1)
+        self.fc2 = Conv2D(in_channels=squeeze_channels, out_channels=input_channels, kernel_size=1)
+        self.activation = activation
+        self.scale_activation = scale_activation
+
+    def forward(self, input: paddle.Tensor) -> paddle.Tensor:
+        scale = self.avgpool(input)
+        scale = self.fc1(scale)
+        scale = self.activation(scale)
+        scale = self.fc2(scale)
+        scale = self.scale_activation(scale)
+        return scale * input
+
+
+def stochastic_depth(input, p, mode, training=True):
+    """
+    Implements the Stochastic Depth from `"Deep Networks with Stochastic Depth"
+    <https://arxiv.org/abs/1603.09382>`_  used for randomly dropping residual
+    branches of residual architectures.
+
+    Args:
+        input (paddle.Tensor): The input tensor or arbitrary dimensions with the first one
+                    being its batch i.e. a batch with ``N`` rows.
+        p (float): probability of the input to be zeroed.
+        mode (str): ``"batch"`` or ``"row"``.
+                    ``"batch"`` randomly zeroes the entire input, ``"row"`` zeroes
+                    randomly selected rows from the batch.
+        training (bool): apply stochastic depth if is ``True``. Default: ``True``
+
+    Returns:
+        paddle.Tensor: The randomly zeroed tensor.
+    """
+    if p < 0.0 or p > 1.0:
+        raise ValueError(f"drop probability has to be between 0 and 1, but got {p}")
+    if mode not in ["batch", "row"]:
+        raise ValueError(f"mode has to be either 'batch' or 'row', but got {mode}")
+    if not training or p == 0.0:
+        return input
+
+    survival_rate = 1.0 - p
+    if mode == "row":
+        size = [input.shape[0]] + [1] * (input.ndim - 1)
+    else:
+        size = [1] * input.ndim
+    noise = paddle.empty(size, dtype=input.dtype)
+    survival_rate = paddle.to_tensor(survival_rate, dtype=input.dtype)
+    paddle.assign(paddle.bernoulli(paddle.broadcast_to(survival_rate, noise.shape)), noise)
+    if survival_rate > 0.0:
+        noise /= survival_rate
+    return input * noise
+
+
+class StochasticDepth(Layer):
+    """
+    See :func:`stochastic_depth`.
+    """
+
+    def __init__(self, p: float, mode: str) -> None:
+        super(StochasticDepth, self).__init__()
+        self.p = p
+        self.mode = mode
+
+    def forward(self, input):
+        return stochastic_depth(input, self.p, self.mode, self.training)
+
+    def __repr__(self):
+        s = f"{self.__class__.__name__}(p={self.p}, mode={self.mode})"
+        return s
+
+
+def _make_ntuple(value, n):
+    """Helper function to create a tuple of size n with the given value."""
+    if isinstance(value, int):
+        return (value,) * n
+    return value
+
+
+class ConvNormActivation(Sequential):
+    def __init__(
+        self,
+        in_channels: int,
+        out_channels: int,
+        kernel_size: Union[int, Sequence[int]] = 3,
+        stride: Union[int, Sequence[int]] = 1,
+        padding: Optional[Union[int, Sequence[int], str]] = None,
+        groups: int = 1,
+        norm_layer: Optional[Callable[..., paddle.nn.Layer]] = BatchNorm,
+        activation_layer: Optional[Callable[..., paddle.nn.Layer]] = ReLU,
+        dilation: Union[int, Sequence[int]] = 1,
+        inplace: Optional[bool] = True,
+        bias: Optional[bool] = None,
+        conv_layer: Callable[..., Conv2D] = Conv2D,
+    ) -> None:
+        if padding is None:
+            padding = (kernel_size - 1) // 2 * dilation
+        else:
+            padding = _make_ntuple(padding, len(kernel_size))
+
+        layers = [
+            conv_layer(
+                in_channels,
+                out_channels,
+                kernel_size,
+                stride,
+                padding,
+                dilation=dilation,
+                groups=groups,
+                bias_attr=False if bias is None else bias,
+            )
+        ]
+
+        if norm_layer is not None:
+            norm_layer_instance = norm_layer(out_channels, use_global_stats=True)
+            layers.append(norm_layer_instance)
+
+        if activation_layer is not None:
+            layers.append(activation_layer)
+
+        super(ConvNormActivation, self).__init__(*layers)
+        self.out_channels = out_channels
+
+
+class Conv2DNormActivation(ConvNormActivation):
+    def __init__(
+        self,
+        in_channels: int,
+        out_channels: int,
+        kernel_size: Union[int, Tuple[int, int]] = 3,
+        stride: Union[int, Tuple[int, int]] = 1,
+        padding: Optional[Union[int, Tuple[int, int], str]] = None,
+        groups: int = 1,
+        norm_layer: Optional[Callable[..., paddle.nn.Layer]] = BatchNorm,
+        activation_layer: Optional[Callable[..., paddle.nn.Layer]] = ReLU,
+        dilation: Union[int, Tuple[int, int]] = 1,
+        inplace: Optional[bool] = True,
+        bias: Optional[bool] = None,
+    ) -> None:
+        super().__init__(
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride,
+            padding,
+            groups,
+            norm_layer,
+            activation_layer,
+            dilation,
+            inplace,
+            bias,
+            Conv2D,
+        )
+
+
+class EfficientNet_V2_S_Weights:
+    IMAGENET1K_V1 = "https://download.pytorch.org/models/efficientnet_v2_s-dd5fe13b.pth"
+
+    def __init__(self, url: str, transforms: Callable[..., Any], meta: Dict[str, Any]) -> None:
+        self.url = url
+        self.transforms = transforms
+        self.meta = meta
+
+    def state_dict(self, progress: bool = True, check_hash: bool = False) -> Dict[str, Any]:
+        path = get_weights_path_from_url(self.url, progress=progress, check_hash=check_hash)
+        return paddle.load(path)
+
+    @classmethod
+    def verify(cls, weights):
+        if weights is None:
+            return None
+        if not isinstance(weights, EfficientNet_V2_S_Weights):
+            raise ValueError(f"weights must be an instance of EfficientNet_V2_S_Weights, but got {type(weights)}")
+        return weights
+
+
+@dataclass
+class _MBConvConfig:
+    expand_ratio: float
+    kernel: int
+    stride: int
+    input_channels: int
+    out_channels: int
+    num_layers: int
+    block: Callable[..., paddle.nn.Layer]
+
+    @staticmethod
+    def adjust_channels(channels: int, width_mult: float, min_value: Optional[int] = None) -> int:
+        return _make_divisible(channels * width_mult, 8, min_value)
+
+
+class MBConvConfig(_MBConvConfig):
+    def __init__(
+        self,
+        expand_ratio: float,
+        kernel: int,
+        stride: int,
+        input_channels: int,
+        out_channels: int,
+        num_layers: int,
+        width_mult: float = 1.0,
+        depth_mult: float = 1.0,
+        block: Optional[Callable[..., paddle.nn.Layer]] = None,
+    ) -> None:
+        input_channels = self.adjust_channels(input_channels, width_mult)
+        out_channels = self.adjust_channels(out_channels, width_mult)
+        num_layers = self.adjust_depth(num_layers, depth_mult)
+        if block is None:
+            block = MBConv
+        super().__init__(expand_ratio, kernel, stride, input_channels, out_channels, num_layers, block)
+
+    @staticmethod
+    def adjust_depth(num_layers: int, depth_mult: float):
+        return int(math.ceil(num_layers * depth_mult))
+
+
+class FusedMBConvConfig(_MBConvConfig):
+    def __init__(
+        self,
+        expand_ratio: float,
+        kernel: int,
+        stride: int,
+        input_channels: int,
+        out_channels: int,
+        num_layers: int,
+        block: Optional[Callable[..., paddle.nn.Layer]] = None,
+    ) -> None:
+        if block is None:
+            block = FusedMBConv
+        super().__init__(expand_ratio, kernel, stride, input_channels, out_channels, num_layers, block)
+
+
+class MBConv(Layer):
+    def __init__(
+        self,
+        cnf,
+        stochastic_depth_prob: float,
+        norm_layer: Callable[..., Layer],
+        se_layer: Callable[..., Layer] = SqueezeExcitation,
+    ) -> None:
+        super(MBConv, self).__init__()
+
+        if not (1 <= cnf.stride <= 2):
+            raise ValueError("illegal stride value")
+
+        self.use_res_connect = cnf.stride == 1 and cnf.input_channels == cnf.out_channels
+
+        layers = []
+        activation_layer = nn.Silu()
+
+        # expand
+        expanded_channels = cnf.adjust_channels(cnf.input_channels, cnf.expand_ratio)
+        if expanded_channels != cnf.input_channels:
+            layers.append(
+                Conv2DNormActivation(
+                    cnf.input_channels,
+                    expanded_channels,
+                    kernel_size=1,
+                    norm_layer=norm_layer,
+                    activation_layer=activation_layer,
+                )
+            )
+
+        # depthwise
+        layers.append(
+            Conv2DNormActivation(
+                expanded_channels,
+                expanded_channels,
+                kernel_size=cnf.kernel,
+                stride=cnf.stride,
+                groups=expanded_channels,
+                norm_layer=norm_layer,
+                activation_layer=activation_layer,
+            )
+        )
+
+        # squeeze and excitation
+        squeeze_channels = max(1, cnf.input_channels // 4)
+        layers.append(se_layer(expanded_channels, squeeze_channels, activation=nn.Silu()))
+
+        # project
+        layers.append(
+            Conv2DNormActivation(
+                expanded_channels, cnf.out_channels, kernel_size=1, norm_layer=norm_layer, activation_layer=None
+            )
+        )
+
+        self.block = Sequential(*layers)
+        self.stochastic_depth = StochasticDepth(stochastic_depth_prob, "row")
+        self.out_channels = cnf.out_channels
+
+    def forward(self, input) -> paddle.Tensor:
+        result = self.block(input)
+        if self.use_res_connect:
+            result = self.stochastic_depth(result)
+            result += input
+        return result
+
+
+class FusedMBConv(Layer):
+    def __init__(
+        self,
+        cnf: "FusedMBConvConfig",
+        stochastic_depth_prob: float,
+        norm_layer: Callable[..., Layer],
+    ) -> None:
+        super(FusedMBConv, self).__init__()
+
+        if not (1 <= cnf.stride <= 2):
+            raise ValueError("illegal stride value")
+
+        self.use_res_connect = cnf.stride == 1 and cnf.input_channels == cnf.out_channels
+
+        layers: List[Layer] = []
+        activation_layer = nn.Silu()
+
+        expanded_channels = cnf.adjust_channels(cnf.input_channels, cnf.expand_ratio)
+        if expanded_channels != cnf.input_channels:
+            # fused expand and project
+            layers.append(
+                Conv2DNormActivation(
+                    cnf.input_channels,
+                    expanded_channels,
+                    kernel_size=cnf.kernel,
+                    stride=cnf.stride,
+                    norm_layer=norm_layer,
+                    activation_layer=activation_layer,
+                )
+            )
+            # project
+            layers.append(
+                Conv2DNormActivation(
+                    expanded_channels,
+                    cnf.out_channels,
+                    kernel_size=1,
+                    norm_layer=norm_layer,
+                    activation_layer=None,
+                )
+            )
+        else:
+            layers.append(
+                Conv2DNormActivation(
+                    cnf.input_channels,
+                    cnf.out_channels,
+                    kernel_size=cnf.kernel,
+                    stride=cnf.stride,
+                    norm_layer=norm_layer,
+                    activation_layer=activation_layer,
+                )
+            )
+
+        self.block = Sequential(*layers)
+        self.stochastic_depth = StochasticDepth(stochastic_depth_prob, "row")
+        self.out_channels = cnf.out_channels
+
+    def forward(self, input: Tensor) -> Tensor:
+        result = self.block(input)
+        if self.use_res_connect:
+            result = self.stochastic_depth(result)
+            result += input
+        return result
+
+
+class EfficientNet(Layer):
+    def __init__(
+        self,
+        inverted_residual_setting: Sequence[Union[MBConvConfig, FusedMBConvConfig]],
+        dropout: float,
+        stochastic_depth_prob: float = 0.2,
+        num_classes: int = 1000,
+        norm_layer: Optional[Callable[..., paddle.nn.Layer]] = None,
+        last_channel: Optional[int] = None,
+    ) -> None:
+        super().__init__()
+        if not inverted_residual_setting:
+            raise ValueError("The inverted_residual_setting should not be empty")
+        elif not (
+            isinstance(inverted_residual_setting, Sequence)
+            and all([isinstance(s, _MBConvConfig) for s in inverted_residual_setting])
+        ):
+            raise TypeError("The inverted_residual_setting should be List[MBConvConfig]")
+        if norm_layer is None:
+            norm_layer = BatchNorm2D
+        layers: List[paddle.nn.Layer] = []
+        firstconv_output_channels = inverted_residual_setting[0].input_channels
+        layers.append(
+            Conv2DNormActivation(
+                3, firstconv_output_channels, kernel_size=3, stride=2, norm_layer=norm_layer, activation_layer=Silu()
+            )
+        )
+        total_stage_blocks = sum(cnf.num_layers for cnf in inverted_residual_setting)
+        stage_block_id = 0
+        for cnf in inverted_residual_setting:
+            stage: List[paddle.nn.Layer] = []
+            for _ in range(cnf.num_layers):
+                block_cnf = copy.copy(cnf)
+                if stage:
+                    block_cnf.input_channels = block_cnf.out_channels
+                    block_cnf.stride = 1
+                sd_prob = stochastic_depth_prob * float(stage_block_id) / total_stage_blocks
+                stage.append(block_cnf.block(block_cnf, sd_prob, norm_layer))
+                stage_block_id += 1
+            layers.append(Sequential(*stage))
+        lastconv_input_channels = inverted_residual_setting[-1].out_channels
+        lastconv_output_channels = last_channel if last_channel is not None else 4 * lastconv_input_channels
+        layers.append(
+            Conv2DNormActivation(
+                lastconv_input_channels,
+                lastconv_output_channels,
+                kernel_size=1,
+                norm_layer=norm_layer,
+                activation_layer=Silu(),
+            )
+        )
+        self.features = Sequential(*layers)
+        self.avgpool = AdaptiveAvgPool2D(output_size=1)
+        self.classifier = Sequential(
+            Dropout(p=dropout), Linear(in_features=lastconv_output_channels, out_features=num_classes)
+        )
+
+        for m in self.sublayers():
+            if isinstance(m, Conv2D):
+                KaimingNormal()(m.weight)
+                if m.bias is not None:
+                    Constant(value=0.0)(m.bias)
+            elif isinstance(m, (BatchNorm2D, GroupNorm)):
+                Constant(value=1.0)(m.weight)
+                Constant(value=0.0)(m.bias)
+            elif isinstance(m, Linear):
+                init_range = 1.0 / math.sqrt(m.weight.shape[1])
+                Uniform(low=-init_range, high=init_range)(m.weight)
+                Constant(value=0.0)(m.bias)
+
+    def forward(self, x: paddle.Tensor) -> paddle.Tensor:
+        x = self.features(x)
+        x = self.avgpool(x)
+        x = paddle.flatten(x=x, start_axis=1)
+        x = self.classifier(x)
+        return x
+
+
+def _make_divisible(value: float, divisor: int, min_value: Optional[int] = None) -> int:
+    if min_value is None:
+        min_value = divisor
+    new_value = max(min_value, int(value + divisor / 2) // divisor * divisor)
+    if new_value < 0.9 * value:
+        new_value += divisor
+    return new_value
+
+
+def _efficientnet(
+    inverted_residual_setting: Sequence[Union[MBConvConfig, FusedMBConvConfig]],
+    dropout: float,
+    last_channel: Optional[int],
+    weights: Optional[EfficientNet_V2_S_Weights],
+    progress: bool,
+    **kwargs: Any
+) -> EfficientNet:
+    if weights is not None:
+        kwargs["num_classes"] = len(weights.meta["categories"])
+    model = EfficientNet(inverted_residual_setting, dropout, last_channel=last_channel, **kwargs)
+    if weights is not None:
+        model.set_state_dict(weights.state_dict(progress=progress, check_hash=True))
+    return model
+
+
+def _efficientnet_conf(
+    arch: str, **kwargs: Any
+) -> Tuple[Sequence[Union[MBConvConfig, FusedMBConvConfig]], Optional[int]]:
+    inverted_residual_setting: Sequence[Union[MBConvConfig, FusedMBConvConfig]]
+    if arch.startswith("efficientnet_v2_s"):
+        inverted_residual_setting = [
+            FusedMBConvConfig(1, 3, 1, 24, 24, 2),
+            FusedMBConvConfig(4, 3, 2, 24, 48, 4),
+            FusedMBConvConfig(4, 3, 2, 48, 64, 4),
+            MBConvConfig(4, 3, 2, 64, 128, 6),
+            MBConvConfig(6, 3, 1, 128, 160, 9),
+            MBConvConfig(6, 3, 2, 160, 256, 15),
+        ]
+        last_channel = 1280
+    else:
+        raise ValueError(f"Unsupported model type {arch}")
+    return inverted_residual_setting, last_channel
+
+
+def efficientnet_v2_s(
+    *, weights: Optional[EfficientNet_V2_S_Weights] = None, progress: bool = True, **kwargs: Any
+) -> EfficientNet:
+    weights = EfficientNet_V2_S_Weights.verify(weights)
+    inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_v2_s")
+    return _efficientnet(
+        inverted_residual_setting,
+        kwargs.pop("dropout", 0.2),
+        last_channel,
+        weights,
+        progress,
+        norm_layer=partial(BatchNorm2D, epsilon=0.001),
+        **kwargs,
+    )
diff --git a/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/stable_cascade/modules/effnet.py b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/stable_cascade/modules/effnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..84bc3fb0f907f802a807e51102ecd6bbba7ea338
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/stable_cascade/modules/effnet.py
@@ -0,0 +1,83 @@
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import paddle.nn as nn
+
+from .efficientnet_v2_s import efficientnet_v2_s
+
+
+class BatchNorm2D(nn.Layer):
+    def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, track_running_stats=True):
+        super(BatchNorm2D, self).__init__()
+        self.num_features = num_features
+        self.eps = eps
+        self.momentum = momentum
+        self.affine = affine
+        self.track_running_stats = track_running_stats
+
+        if self.affine:
+            self.weight = self.create_parameter(
+                shape=[num_features], default_initializer=paddle.nn.initializer.Constant(value=1.0)
+            )
+            self.bias = self.create_parameter(
+                shape=[num_features], default_initializer=paddle.nn.initializer.Constant(value=0.0)
+            )
+        else:
+            self.weight = None
+            self.bias = None
+
+        if self.track_running_stats:
+            self._mean = self.create_parameter(
+                shape=[num_features], default_initializer=paddle.nn.initializer.Constant(value=0.0), is_bias=False
+            )
+            self._variance = self.create_parameter(
+                shape=[num_features], default_initializer=paddle.nn.initializer.Constant(value=1.0), is_bias=False
+            )
+            self._mean.stop_gradient = True
+            self._variance.stop_gradient = True
+        else:
+            self._mean = None
+            self._variance = None
+
+    def forward(self, input):
+        mean = self._mean
+        variance = self._variance
+
+        output = (input - paddle.unsqueeze(mean, axis=[0, 2, 3])) / paddle.unsqueeze(
+            paddle.sqrt(variance + self.eps), axis=[0, 2, 3]
+        )
+        if self.affine:
+            output = output * paddle.unsqueeze(self.weight, axis=[0, 2, 3]) + paddle.unsqueeze(
+                self.bias, axis=[0, 2, 3]
+            )
+        return output
+
+
+class EfficientNetEncoder(nn.Layer):
+    def __init__(self, c_latent=16):
+        super().__init__()
+        self.backbone = efficientnet_v2_s().features
+        self.backbone.eval()
+        self.mapper = nn.Sequential(
+            nn.Conv2D(1280, c_latent, kernel_size=1, bias_attr=False),
+            BatchNorm2D(c_latent, affine=False),
+        )
+        self.mapper.eval()
+
+    def forward(self, x):
+
+        x = self.backbone(x)
+        x = self.mapper(x)
+        return x
diff --git a/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/stable_cascade/modules/previewer.py b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/stable_cascade/modules/previewer.py
new file mode 100644
index 0000000000000000000000000000000000000000..d26ef68dd319d993bf3bc51881441fb657170a62
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/stable_cascade/modules/previewer.py
@@ -0,0 +1,80 @@
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+
+
+class Previewer(paddle.nn.Layer):
+    def __init__(self, c_in=16, c_hidden=512, c_out=3):
+        super().__init__()
+        self.blocks = paddle.nn.Sequential(
+            paddle.nn.Conv2D(in_channels=c_in, out_channels=c_hidden, kernel_size=1),
+            paddle.nn.GELU(),
+            paddle.nn.BatchNorm2D(num_features=c_hidden),
+            paddle.nn.Conv2D(in_channels=c_hidden, out_channels=c_hidden, kernel_size=3, padding=1),
+            paddle.nn.GELU(),
+            paddle.nn.BatchNorm2D(num_features=c_hidden),
+            paddle.nn.Conv2DTranspose(
+                in_channels=c_hidden,
+                out_channels=c_hidden // 2,
+                kernel_size=2,
+                stride=2,
+            ),
+            paddle.nn.GELU(),
+            paddle.nn.BatchNorm2D(num_features=c_hidden // 2),
+            paddle.nn.Conv2D(
+                in_channels=c_hidden // 2,
+                out_channels=c_hidden // 2,
+                kernel_size=3,
+                padding=1,
+            ),
+            paddle.nn.GELU(),
+            paddle.nn.BatchNorm2D(num_features=c_hidden // 2),
+            paddle.nn.Conv2DTranspose(
+                in_channels=c_hidden // 2,
+                out_channels=c_hidden // 4,
+                kernel_size=2,
+                stride=2,
+            ),
+            paddle.nn.GELU(),
+            paddle.nn.BatchNorm2D(num_features=c_hidden // 4),
+            paddle.nn.Conv2D(
+                in_channels=c_hidden // 4,
+                out_channels=c_hidden // 4,
+                kernel_size=3,
+                padding=1,
+            ),
+            paddle.nn.GELU(),
+            paddle.nn.BatchNorm2D(num_features=c_hidden // 4),
+            paddle.nn.Conv2DTranspose(
+                in_channels=c_hidden // 4,
+                out_channels=c_hidden // 4,
+                kernel_size=2,
+                stride=2,
+            ),
+            paddle.nn.GELU(),
+            paddle.nn.BatchNorm2D(num_features=c_hidden // 4),
+            paddle.nn.Conv2D(
+                in_channels=c_hidden // 4,
+                out_channels=c_hidden // 4,
+                kernel_size=3,
+                padding=1,
+            ),
+            paddle.nn.GELU(),
+            paddle.nn.BatchNorm2D(num_features=c_hidden // 4),
+            paddle.nn.Conv2D(in_channels=c_hidden // 4, out_channels=c_out, kernel_size=1),
+        )
+
+    def forward(self, x):
+        return self.blocks(x)
diff --git a/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/stable_cascade/modules/stage_a.py b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/stable_cascade/modules/stage_a.py
new file mode 100644
index 0000000000000000000000000000000000000000..24861c58f4ddf14f4ac88af18d6d8d59f6f6edc6
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/stable_cascade/modules/stage_a.py
@@ -0,0 +1,206 @@
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+from torchtools.nn import VectorQuantize
+
+
+class ResBlock(paddle.nn.Layer):
+    def __init__(self, c, c_hidden):
+        super().__init__()
+        self.norm1 = paddle.nn.LayerNorm(normalized_shape=c, weight_attr=False, bias_attr=False, epsilon=1e-06)
+        self.depthwise = paddle.nn.Sequential(
+            paddle.nn.Pad2D(padding=1, mode="replicate"),
+            paddle.nn.Conv2D(in_channels=c, out_channels=c, kernel_size=3, groups=c),
+        )
+        self.norm2 = paddle.nn.LayerNorm(normalized_shape=c, weight_attr=False, bias_attr=False, epsilon=1e-06)
+        self.channelwise = paddle.nn.Sequential(
+            paddle.nn.Linear(in_features=c, out_features=c_hidden),
+            paddle.nn.GELU(),
+            paddle.nn.Linear(in_features=c_hidden, out_features=c),
+        )
+        out_19 = paddle.create_parameter(
+            shape=paddle.zeros(shape=[6]).shape,
+            dtype=paddle.zeros(shape=[6]).numpy().dtype,
+            default_initializer=paddle.nn.initializer.Assign(paddle.zeros(shape=[6])),
+        )
+        out_19.stop_gradient = not True
+        self.gammas = out_19
+
+        def _basic_init(module):
+            if isinstance(module, paddle.nn.Linear) or isinstance(module, paddle.nn.Conv2D):
+                init_XavierUniform = paddle.nn.initializer.XavierUniform()
+                init_XavierUniform(module.weight)
+                if module.bias is not None:
+                    init_Constant = paddle.nn.initializer.Constant(value=0)
+                    init_Constant(module.bias)
+
+        self.apply(_basic_init)
+
+    def _norm(self, x, norm):
+        return norm(x.transpose(perm=[0, 2, 3, 1])).transpose(perm=[0, 3, 1, 2])
+
+    def forward(self, x):
+        mods = self.gammas
+        x_temp = self._norm(x, self.norm1) * (1 + mods[0]) + mods[1]
+        x = x + self.depthwise(x_temp) * mods[2]
+        x_temp = self._norm(x, self.norm2) * (1 + mods[3]) + mods[4]
+        x = x + self.channelwise(x_temp.transpose(perm=[0, 2, 3, 1])).transpose(perm=[0, 3, 1, 2]) * mods[5]
+        return x
+
+
+class StageA(paddle.nn.Layer):
+    def __init__(
+        self,
+        levels=2,
+        bottleneck_blocks=12,
+        c_hidden=384,
+        c_latent=4,
+        codebook_size=8192,
+        scale_factor=0.43,
+    ):
+        super().__init__()
+        self.c_latent = c_latent
+        self.scale_factor = scale_factor
+        c_levels = [(c_hidden // 2**i) for i in reversed(range(levels))]
+        self.in_block = paddle.nn.Sequential(
+            paddle.nn.PixelUnshuffle(downscale_factor=2),
+            paddle.nn.Conv2D(in_channels=3 * 4, out_channels=c_levels[0], kernel_size=1),
+        )
+        down_blocks = []
+        for i in range(levels):
+            if i > 0:
+                down_blocks.append(
+                    paddle.nn.Conv2D(
+                        in_channels=c_levels[i - 1],
+                        out_channels=c_levels[i],
+                        kernel_size=4,
+                        stride=2,
+                        padding=1,
+                    )
+                )
+            block = ResBlock(c_levels[i], c_levels[i] * 4)
+            down_blocks.append(block)
+        down_blocks.append(
+            paddle.nn.Sequential(
+                paddle.nn.Conv2D(
+                    in_channels=c_levels[-1],
+                    out_channels=c_latent,
+                    kernel_size=1,
+                    bias_attr=False,
+                ),
+                paddle.nn.BatchNorm2D(num_features=c_latent),
+            )
+        )
+        self.down_blocks = paddle.nn.Sequential(*down_blocks)
+        self.down_blocks[0]
+        self.codebook_size = codebook_size
+        self.vquantizer = VectorQuantize(c_latent, k=codebook_size)
+        up_blocks = [
+            paddle.nn.Sequential(paddle.nn.Conv2D(in_channels=c_latent, out_channels=c_levels[-1], kernel_size=1))
+        ]
+        for i in range(levels):
+            for j in range(bottleneck_blocks if i == 0 else 1):
+                block = ResBlock(c_levels[levels - 1 - i], c_levels[levels - 1 - i] * 4)
+                up_blocks.append(block)
+            if i < levels - 1:
+                up_blocks.append(
+                    paddle.nn.Conv2DTranspose(
+                        in_channels=c_levels[levels - 1 - i],
+                        out_channels=c_levels[levels - 2 - i],
+                        kernel_size=4,
+                        stride=2,
+                        padding=1,
+                    )
+                )
+        self.up_blocks = paddle.nn.Sequential(*up_blocks)
+        self.out_block = paddle.nn.Sequential(
+            paddle.nn.Conv2D(in_channels=c_levels[0], out_channels=3 * 4, kernel_size=1),
+            paddle.nn.PixelShuffle(upscale_factor=2),
+        )
+
+    def encode(self, x, quantize=False):
+        x = self.in_block(x)
+        x = self.down_blocks(x)
+        if quantize:
+            qe, (vq_loss, commit_loss), indices = self.vquantizer.forward(x, dim=1)
+            return (
+                qe / self.scale_factor,
+                x / self.scale_factor,
+                indices,
+                vq_loss + commit_loss * 0.25,
+            )
+        else:
+            return x / self.scale_factor, None, None, None
+
+    def decode(self, x):
+        x = x * self.scale_factor
+        x = self.up_blocks(x)
+        x = self.out_block(x)
+        return x
+
+    def forward(self, x, quantize=False):
+        qe, x, _, vq_loss = self.encode(x, quantize)
+        x = self.decode(qe)
+        return x, vq_loss
+
+
+class Discriminator(paddle.nn.Layer):
+    def __init__(self, c_in=3, c_cond=0, c_hidden=512, depth=6):
+        super().__init__()
+        d = max(depth - 3, 3)
+        layers = [
+            paddle.nn.utils.spectral_norm(
+                layer=paddle.nn.Conv2D(
+                    in_channels=c_in,
+                    out_channels=c_hidden // 2**d,
+                    kernel_size=3,
+                    stride=2,
+                    padding=1,
+                )
+            ),
+            paddle.nn.LeakyReLU(negative_slope=0.2),
+        ]
+        for i in range(depth - 1):
+            c_in = c_hidden // 2 ** max(d - i, 0)
+            c_out = c_hidden // 2 ** max(d - 1 - i, 0)
+            layers.append(
+                paddle.nn.utils.spectral_norm(
+                    layer=paddle.nn.Conv2D(
+                        in_channels=c_in,
+                        out_channels=c_out,
+                        kernel_size=3,
+                        stride=2,
+                        padding=1,
+                    )
+                )
+            )
+            layers.append(paddle.nn.InstanceNorm2D(num_features=c_out, momentum=1 - 0.1))
+            layers.append(paddle.nn.LeakyReLU(negative_slope=0.2))
+        self.encoder = paddle.nn.Sequential(*layers)
+        self.shuffle = paddle.nn.Conv2D(
+            in_channels=c_hidden + c_cond if c_cond > 0 else c_hidden,
+            out_channels=1,
+            kernel_size=1,
+        )
+        self.logits = paddle.nn.Sigmoid()
+
+    def forward(self, x, cond=None):
+        x = self.encoder(x)
+        if cond is not None:
+            cond = cond.reshape([cond.shape[0], cond.shape[1], 1, 1]).expand(shape=[-1, -1, x.shape[-2], x.shape[-1]])
+            x = paddle.concat(x=[x, cond], axis=1)
+        x = self.shuffle(x)
+        x = self.logits(x)
+        return x
diff --git a/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/stable_cascade/modules/stage_b.py b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/stable_cascade/modules/stage_b.py
new file mode 100644
index 0000000000000000000000000000000000000000..34a9fd7abc8b43658437d367b56ef064dab746fc
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/stable_cascade/modules/stage_b.py
@@ -0,0 +1,349 @@
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import math
+
+import numpy as np
+import paddle
+import paddle_aux  # noqa
+
+from .common import AttnBlock, FeedForwardBlock, LayerNorm2d, ResBlock, TimestepBlock
+
+
+class StageB(paddle.nn.Layer):
+    def __init__(
+        self,
+        c_in=4,
+        c_out=4,
+        c_r=64,
+        patch_size=2,
+        c_cond=1280,
+        c_hidden=[320, 640, 1280, 1280],
+        nhead=[-1, -1, 20, 20],
+        blocks=[[2, 6, 28, 6], [6, 28, 6, 2]],
+        block_repeat=[[1, 1, 1, 1], [3, 3, 2, 2]],
+        level_config=["CT", "CT", "CTA", "CTA"],
+        c_clip=1280,
+        c_clip_seq=4,
+        c_effnet=16,
+        c_pixels=3,
+        kernel_size=3,
+        dropout=[0, 0, 0.1, 0.1],
+        self_attn=True,
+        t_conds=["sca"],
+    ):
+        super().__init__()
+        self.c_r = c_r
+        self.t_conds = t_conds
+        self.c_clip_seq = c_clip_seq
+        if not isinstance(dropout, list):
+            dropout = [dropout] * len(c_hidden)
+        if not isinstance(self_attn, list):
+            self_attn = [self_attn] * len(c_hidden)
+        self.effnet_mapper = paddle.nn.Sequential(
+            paddle.nn.Conv2D(in_channels=c_effnet, out_channels=c_hidden[0] * 4, kernel_size=1),
+            paddle.nn.GELU(),
+            paddle.nn.Conv2D(in_channels=c_hidden[0] * 4, out_channels=c_hidden[0], kernel_size=1),
+            LayerNorm2d(c_hidden[0], weight_attr=False, bias_attr=False, epsilon=1e-06),
+        )
+        self.pixels_mapper = paddle.nn.Sequential(
+            paddle.nn.Conv2D(in_channels=c_pixels, out_channels=c_hidden[0] * 4, kernel_size=1),
+            paddle.nn.GELU(),
+            paddle.nn.Conv2D(in_channels=c_hidden[0] * 4, out_channels=c_hidden[0], kernel_size=1),
+            LayerNorm2d(c_hidden[0], weight_attr=False, bias_attr=False, epsilon=1e-06),
+        )
+        self.clip_mapper = paddle.nn.Linear(in_features=c_clip, out_features=c_cond * c_clip_seq)
+        self.clip_norm = paddle.nn.LayerNorm(
+            normalized_shape=c_cond, weight_attr=False, bias_attr=False, epsilon=1e-06
+        )
+        self.embedding = paddle.nn.Sequential(
+            paddle.nn.PixelUnshuffle(downscale_factor=patch_size),
+            paddle.nn.Conv2D(
+                in_channels=c_in * patch_size**2,
+                out_channels=c_hidden[0],
+                kernel_size=1,
+            ),
+            LayerNorm2d(c_hidden[0], weight_attr=False, bias_attr=False, epsilon=1e-06),
+        )
+
+        def get_block(block_type, c_hidden, nhead, c_skip=0, dropout=0, self_attn=True):
+            if block_type == "C":
+                return ResBlock(c_hidden, c_skip, kernel_size=kernel_size, dropout=dropout)
+            elif block_type == "A":
+                return AttnBlock(c_hidden, c_cond, nhead, self_attn=self_attn, dropout=dropout)
+            elif block_type == "F":
+                return FeedForwardBlock(c_hidden, dropout=dropout)
+            elif block_type == "T":
+                return TimestepBlock(c_hidden, c_r, conds=t_conds)
+            else:
+                raise Exception(f"Block type {block_type} not supported")
+
+        self.down_blocks = paddle.nn.LayerList()
+        self.down_downscalers = paddle.nn.LayerList()
+        self.down_repeat_mappers = paddle.nn.LayerList()
+        for i in range(len(c_hidden)):
+            if i > 0:
+                self.down_downscalers.append(
+                    paddle.nn.Sequential(
+                        LayerNorm2d(
+                            c_hidden[i - 1],
+                            weight_attr=False,
+                            bias_attr=False,
+                            epsilon=1e-06,
+                        ),
+                        paddle.nn.Conv2D(
+                            in_channels=c_hidden[i - 1],
+                            out_channels=c_hidden[i],
+                            kernel_size=2,
+                            stride=2,
+                        ),
+                    )
+                )
+            else:
+                self.down_downscalers.append(paddle.nn.Identity())
+            down_block = paddle.nn.LayerList()
+            for _ in range(blocks[0][i]):
+                for block_type in level_config[i]:
+                    block = get_block(
+                        block_type,
+                        c_hidden[i],
+                        nhead[i],
+                        dropout=dropout[i],
+                        self_attn=self_attn[i],
+                    )
+                    down_block.append(block)
+            self.down_blocks.append(down_block)
+            if block_repeat is not None:
+                block_repeat_mappers = paddle.nn.LayerList()
+                for _ in range(block_repeat[0][i] - 1):
+                    block_repeat_mappers.append(
+                        paddle.nn.Conv2D(
+                            in_channels=c_hidden[i],
+                            out_channels=c_hidden[i],
+                            kernel_size=1,
+                        )
+                    )
+                self.down_repeat_mappers.append(block_repeat_mappers)
+        self.up_blocks = paddle.nn.LayerList()
+        self.up_upscalers = paddle.nn.LayerList()
+        self.up_repeat_mappers = paddle.nn.LayerList()
+        for i in reversed(range(len(c_hidden))):
+            if i > 0:
+                self.up_upscalers.append(
+                    paddle.nn.Sequential(
+                        LayerNorm2d(
+                            c_hidden[i],
+                            weight_attr=False,
+                            bias_attr=False,
+                            epsilon=1e-06,
+                        ),
+                        paddle.nn.Conv2DTranspose(
+                            in_channels=c_hidden[i],
+                            out_channels=c_hidden[i - 1],
+                            kernel_size=2,
+                            stride=2,
+                        ),
+                    )
+                )
+            else:
+                self.up_upscalers.append(paddle.nn.Identity())
+            up_block = paddle.nn.LayerList()
+            for j in range(blocks[1][::-1][i]):
+                for k, block_type in enumerate(level_config[i]):
+                    c_skip = c_hidden[i] if i < len(c_hidden) - 1 and j == k == 0 else 0
+                    block = get_block(
+                        block_type,
+                        c_hidden[i],
+                        nhead[i],
+                        c_skip=c_skip,
+                        dropout=dropout[i],
+                        self_attn=self_attn[i],
+                    )
+                    up_block.append(block)
+            self.up_blocks.append(up_block)
+            if block_repeat is not None:
+                block_repeat_mappers = paddle.nn.LayerList()
+                for _ in range(block_repeat[1][::-1][i] - 1):
+                    block_repeat_mappers.append(
+                        paddle.nn.Conv2D(
+                            in_channels=c_hidden[i],
+                            out_channels=c_hidden[i],
+                            kernel_size=1,
+                        )
+                    )
+                self.up_repeat_mappers.append(block_repeat_mappers)
+        self.clf = paddle.nn.Sequential(
+            LayerNorm2d(c_hidden[0], weight_attr=False, bias_attr=False, epsilon=1e-06),
+            paddle.nn.Conv2D(
+                in_channels=c_hidden[0],
+                out_channels=c_out * patch_size**2,
+                kernel_size=1,
+            ),
+            paddle.nn.PixelShuffle(upscale_factor=patch_size),
+        )
+        self.apply(self._init_weights)
+        init_Normal = paddle.nn.initializer.Normal(std=0.02)
+        init_Normal(self.clip_mapper.weight)
+        init_Normal = paddle.nn.initializer.Normal(std=0.02)
+        init_Normal(self.effnet_mapper[0].weight)
+        init_Normal = paddle.nn.initializer.Normal(std=0.02)
+        init_Normal(self.effnet_mapper[2].weight)
+        init_Normal = paddle.nn.initializer.Normal(std=0.02)
+        init_Normal(self.pixels_mapper[0].weight)
+        init_Normal = paddle.nn.initializer.Normal(std=0.02)
+        init_Normal(self.pixels_mapper[2].weight)
+        paddle.nn.initializer.XavierUniform()(self.embedding[1].weight)
+        init_Constant = paddle.nn.initializer.Constant(value=0)
+        init_Constant(self.clf[1].weight)
+        for level_list in (self.down_blocks, self.up_blocks):
+            for level_block in level_list:
+                for block in level_block:
+                    if isinstance(block, ResBlock) or isinstance(block, FeedForwardBlock):
+                        block.channelwise[-1].weight.data *= np.sqrt(1 / sum(blocks[0]))
+                    elif isinstance(block, TimestepBlock):
+                        for layer in block.sublayers():
+                            if isinstance(layer, paddle.nn.Linear):
+                                init_Constant = paddle.nn.initializer.Constant(value=0)
+                                init_Constant(layer.weight)
+
+    def _init_weights(self, m):
+        if isinstance(m, (paddle.nn.Conv2D, paddle.nn.Linear)):
+            init_XavierUniform = paddle.nn.initializer.XavierUniform()
+            init_XavierUniform(m.weight)
+            if m.bias is not None:
+                init_Constant = paddle.nn.initializer.Constant(value=0)
+                init_Constant(m.bias)
+
+    def gen_r_embedding(self, r, max_positions=10000):
+        r = r * max_positions
+        half_dim = self.c_r // 2
+        emb = math.log(max_positions) / (half_dim - 1)
+        emb = paddle.arange(end=half_dim).astype(dtype="float32").mul(-emb).exp()
+        emb = r[:, None] * emb[None, :]
+        emb = paddle.concat(x=[emb.sin(), emb.cos()], axis=1)
+        if self.c_r % 2 == 1:
+            emb = paddle.nn.functional.pad(emb, [0, 1], mode="constant")
+        return emb
+
+    def gen_c_embeddings(self, clip):
+        if len(clip.shape) == 2:
+            clip = clip.unsqueeze(axis=1)
+        clip = self.clip_mapper(clip).reshape([clip.shape[0], clip.shape[1] * self.c_clip_seq, -1])
+
+        clip = self.clip_norm(clip)
+        return clip
+
+    def _down_encode(self, x, r_embed, clip):
+        level_outputs = []
+        block_group = zip(self.down_blocks, self.down_downscalers, self.down_repeat_mappers)
+        for down_block, downscaler, repmap in block_group:
+            x = downscaler(x)
+            for i in range(len(repmap) + 1):
+                for block in down_block:
+                    if (
+                        isinstance(block, ResBlock)
+                        or hasattr(block, "_fsdp_wrapped_module")
+                        and isinstance(block._fsdp_wrapped_module, ResBlock)
+                    ):
+                        x = block(x)
+                    elif (
+                        isinstance(block, AttnBlock)
+                        or hasattr(block, "_fsdp_wrapped_module")
+                        and isinstance(block._fsdp_wrapped_module, AttnBlock)
+                    ):
+                        x = block(x, clip)
+                    elif (
+                        isinstance(block, TimestepBlock)
+                        or hasattr(block, "_fsdp_wrapped_module")
+                        and isinstance(block._fsdp_wrapped_module, TimestepBlock)
+                    ):
+                        x = block(x, r_embed)
+                    else:
+                        x = block(x)
+                if i < len(repmap):
+                    x = repmap[i](x)
+            level_outputs.insert(0, x)
+        return level_outputs
+
+    def _up_decode(self, level_outputs, r_embed, clip):
+        x = level_outputs[0]
+        block_group = zip(self.up_blocks, self.up_upscalers, self.up_repeat_mappers)
+        for i, (up_block, upscaler, repmap) in enumerate(block_group):
+            for j in range(len(repmap) + 1):
+                for k, block in enumerate(up_block):
+                    if (
+                        isinstance(block, ResBlock)
+                        or hasattr(block, "_fsdp_wrapped_module")
+                        and isinstance(block._fsdp_wrapped_module, ResBlock)
+                    ):
+                        skip = level_outputs[i] if k == 0 and i > 0 else None
+                        if skip is not None and (x.shape[-1] != skip.shape[-1] or x.shape[-2] != skip.shape[-2]):
+                            x = paddle.nn.functional.interpolate(
+                                x=x.astype(dtype="float32"),
+                                size=skip.shape[-2:],
+                                mode="bilinear",
+                                align_corners=True,
+                            )
+                        x = block(x, skip)
+                    elif (
+                        isinstance(block, AttnBlock)
+                        or hasattr(block, "_fsdp_wrapped_module")
+                        and isinstance(block._fsdp_wrapped_module, AttnBlock)
+                    ):
+                        x = block(x, clip)
+                    elif (
+                        isinstance(block, TimestepBlock)
+                        or hasattr(block, "_fsdp_wrapped_module")
+                        and isinstance(block._fsdp_wrapped_module, TimestepBlock)
+                    ):
+                        x = block(x, r_embed)
+                    else:
+                        x = block(x)
+                if j < len(repmap):
+                    x = repmap[j](x)
+            x = upscaler(x)
+        return x
+
+    def forward(self, x, r, effnet, clip, pixels=None, **kwargs):
+        if pixels is None:
+            pixels = paddle.zeros(shape=[x.shape[0], 3, 8, 8], dtype=x.dtype)
+        r_embed = self.gen_r_embedding(r)
+        for c in self.t_conds:
+            t_cond = kwargs.get(c, paddle.zeros_like(x=r))
+            r_embed = paddle.concat(x=[r_embed, self.gen_r_embedding(t_cond)], axis=1)
+        clip = self.gen_c_embeddings(clip)
+        x = self.embedding(x)
+        x = x + self.effnet_mapper(
+            paddle.nn.functional.interpolate(
+                x=effnet.astype(dtype="float32"),
+                size=x.shape[-2:],
+                mode="bilinear",
+                align_corners=True,
+            )
+        )
+        x = x + paddle.nn.functional.interpolate(
+            x=self.pixels_mapper(pixels).astype(dtype="float32"),
+            size=x.shape[-2:],
+            mode="bilinear",
+            align_corners=True,
+        )
+        level_outputs = self._down_encode(x, r_embed, clip)
+        x = self._up_decode(level_outputs, r_embed, clip)
+        return self.clf(x)
+
+    def update_weights_ema(self, src_model, beta=0.999):
+        for self_params, src_params in zip(self.parameters(), src_model.parameters()):
+            self_params.data = self_params.data * beta + src_params.data.clone() * (1 - beta)
+        for self_buffers, src_buffers in zip(self.buffers(), src_model.buffers()):
+            self_buffers.data = self_buffers.data * beta + src_buffers.data.clone() * (1 - beta)
diff --git a/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/stable_cascade/modules/stage_c.py b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/stable_cascade/modules/stage_c.py
new file mode 100644
index 0000000000000000000000000000000000000000..c868be56c475de877c6cc02b44c03a47e81db102
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/models/stable_cascade/modules/stage_c.py
@@ -0,0 +1,368 @@
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import math
+
+import numpy as np
+import paddle
+import paddle.nn as nn
+import paddle_aux  # noqa
+
+from .common import AttnBlock, FeedForwardBlock, LayerNorm2d, ResBlock, TimestepBlock
+
+
+def load(path="../x.npy"):
+    return paddle.to_tensor(np.load(path))
+
+
+def diff(a, b):
+    return (a - b).abs().mean()
+
+
+class UpDownBlock2d(nn.Layer):
+    def __init__(self, c_in, c_out, mode, enabled=True):
+        super().__init__()
+        assert mode in ["up", "down"]
+        interpolation = (
+            nn.Upsample(
+                scale_factor=2 if mode == "up" else 0.5,
+                mode="bilinear",
+                align_corners=True,
+            )
+            if enabled
+            else nn.Identity()
+        )
+        mapping = nn.Conv2D(in_channels=c_in, out_channels=c_out, kernel_size=1)
+        self.blocks = nn.LayerList(sublayers=[interpolation, mapping] if mode == "up" else [mapping, interpolation])
+
+    def forward(self, x):
+        for block in self.blocks:
+            x = block(x.astype(paddle.float32))
+        return x
+
+
+class StageC(nn.Layer):
+    def __init__(
+        self,
+        c_in=16,
+        c_out=16,
+        c_r=64,
+        patch_size=1,
+        c_cond=2048,
+        c_hidden=[2048, 2048],
+        nhead=[32, 32],
+        blocks=[[8, 24], [24, 8]],
+        block_repeat=[[1, 1], [1, 1]],
+        level_config=["CTA", "CTA"],
+        c_clip_text=1280,
+        c_clip_text_pooled=1280,
+        c_clip_img=768,
+        c_clip_seq=4,
+        kernel_size=3,
+        dropout=[0.1, 0.1],
+        # dropout=[0, 0],
+        self_attn=True,
+        t_conds=["sca", "crp"],
+        switch_level=[False],
+    ):
+        super().__init__()
+        self.c_r = c_r
+        self.t_conds = t_conds
+        self.c_clip_seq = c_clip_seq
+        if not isinstance(dropout, list):
+            dropout = [dropout] * len(c_hidden)
+        if not isinstance(self_attn, list):
+            self_attn = [self_attn] * len(c_hidden)
+        # CONDITIONING
+        self.clip_txt_mapper = nn.Linear(c_clip_text, c_cond)
+        self.clip_txt_pooled_mapper = nn.Linear(c_clip_text_pooled, c_cond * c_clip_seq)
+        self.clip_img_mapper = nn.Linear(c_clip_img, c_cond * c_clip_seq)
+        self.clip_norm = nn.LayerNorm(c_cond, weight_attr=False, bias_attr=False, epsilon=1e-6)
+
+        self.embedding = nn.Sequential(
+            nn.PixelUnshuffle(patch_size),
+            nn.Conv2D(c_in * (patch_size**2), c_hidden[0], kernel_size=1),
+            LayerNorm2d(c_hidden[0], weight_attr=False, bias_attr=False, epsilon=1e-6),
+        )
+
+        def get_block(block_type, c_hidden, nhead, c_skip=0, dropout=0, self_attn=True):
+            if block_type == "C":
+                return ResBlock(c_hidden, c_skip, kernel_size=kernel_size, dropout=dropout)
+            elif block_type == "A":
+                return AttnBlock(c_hidden, c_cond, nhead, self_attn=self_attn, dropout=dropout)
+            elif block_type == "F":
+                return FeedForwardBlock(c_hidden, dropout=dropout)
+            elif block_type == "T":
+                return TimestepBlock(c_hidden, c_r, conds=t_conds)
+            else:
+                raise Exception(f"Block type {block_type} not supported")
+
+        self.down_blocks = nn.LayerList()
+        self.down_downscalers = nn.LayerList()
+        self.down_repeat_mappers = nn.LayerList()
+        for i in range(len(c_hidden)):
+            if i > 0:
+                self.down_downscalers.append(
+                    nn.Sequential(
+                        LayerNorm2d(
+                            c_hidden[i - 1],
+                            weight_attr=False,
+                            bias_attr=False,
+                            epsilon=1e-06,
+                        ),
+                        UpDownBlock2d(
+                            c_hidden[i - 1],
+                            c_hidden[i],
+                            mode="down",
+                            enabled=switch_level[i - 1],
+                        ),
+                    )
+                )
+            else:
+                self.down_downscalers.append(nn.Identity())
+            down_block = nn.LayerList()
+            for _ in range(blocks[0][i]):
+                for block_type in level_config[i]:
+                    block = get_block(
+                        block_type,
+                        c_hidden[i],
+                        nhead[i],
+                        dropout=dropout[i],
+                        self_attn=self_attn[i],
+                    )
+                    down_block.append(block)
+            self.down_blocks.append(down_block)
+            if block_repeat is not None:
+                block_repeat_mappers = nn.LayerList()
+                for _ in range(block_repeat[0][i] - 1):
+                    block_repeat_mappers.append(nn.Conv2D(c_hidden[i], c_hidden[i], kernel_size=1))
+                self.down_repeat_mappers.append(block_repeat_mappers)
+        self.up_blocks = nn.LayerList()
+        self.up_upscalers = nn.LayerList()
+        self.up_repeat_mappers = nn.LayerList()
+        for i in reversed(range(len(c_hidden))):
+            if i > 0:
+                self.up_upscalers.append(
+                    nn.Sequential(
+                        LayerNorm2d(c_hidden[i], weight_attr=False, bias_attr=False, epsilon=1e-6),
+                        UpDownBlock2d(
+                            c_hidden[i],
+                            c_hidden[i - 1],
+                            mode="up",
+                            enabled=switch_level[i - 1],
+                        ),
+                    )
+                )
+            else:
+                self.up_upscalers.append(nn.Identity())
+            up_block = nn.LayerList()
+            for j in range(blocks[1][::-1][i]):
+                for k, block_type in enumerate(level_config[i]):
+                    c_skip = c_hidden[i] if i < len(c_hidden) - 1 and j == k == 0 else 0
+                    block = get_block(
+                        block_type,
+                        c_hidden[i],
+                        nhead[i],
+                        c_skip=c_skip,
+                        dropout=dropout[i],
+                        self_attn=self_attn[i],
+                    )
+                    up_block.append(block)
+            self.up_blocks.append(up_block)
+            if block_repeat is not None:
+                block_repeat_mappers = nn.LayerList()
+                for _ in range(block_repeat[1][::-1][i] - 1):
+                    block_repeat_mappers.append(nn.Conv2D(c_hidden[i], c_hidden[i], kernel_size=1))
+                self.up_repeat_mappers.append(block_repeat_mappers)
+        self.clf = nn.Sequential(
+            LayerNorm2d(c_hidden[0], weight_attr=False, bias_attr=False, epsilon=1e-06),
+            nn.Conv2D(c_hidden[0], c_out * (patch_size**2), kernel_size=1),
+            nn.PixelShuffle(upscale_factor=patch_size),
+        )
+        self.apply(self._init_weights)
+        init_Normal = nn.initializer.Normal(std=0.02)
+        init_Normal(self.clip_txt_mapper.weight)
+        init_Normal = nn.initializer.Normal(std=0.02)
+        init_Normal(self.clip_txt_pooled_mapper.weight)
+        init_Normal = nn.initializer.Normal(std=0.02)
+        init_Normal(self.clip_img_mapper.weight)
+        init_Xavier = nn.initializer.XavierUniform()
+        self.embedding[1].weight = self.create_parameter(
+            shape=self.embedding[1].weight.shape, default_initializer=init_Xavier
+        )
+        init_Constant = nn.initializer.Constant(value=0)
+        init_Constant(self.clf[1].weight)
+
+        for level_list in (self.down_blocks, self.up_blocks):
+            for level_block in level_list:
+                for block in level_block:
+                    if isinstance(block, ResBlock) or isinstance(block, FeedForwardBlock):
+                        block.channelwise[-1].weight.multiply(np.sqrt(1 / sum(blocks[0])))
+                    elif isinstance(block, TimestepBlock):
+                        for layer in block.sublayers():
+                            if isinstance(layer, nn.Linear):
+                                init_Constant = nn.initializer.Constant(value=0)
+                                init_Constant(layer.weight)
+
+    def _init_weights(self, m):
+        if isinstance(m, (nn.Conv2D, nn.Linear)):
+            init_XavierUniform = nn.initializer.XavierUniform()
+            init_XavierUniform(m.weight)
+            if m.bias is not None:
+                init_Constant = nn.initializer.Constant(value=0)
+                init_Constant(m.bias)
+
+    def gen_r_embedding(self, r, max_positions=10000):
+        r = r * max_positions
+        half_dim = self.c_r // 2
+        emb = math.log(max_positions) / (half_dim - 1)
+        emb = paddle.arange(end=half_dim).astype(dtype="float32").mul(-emb).exp()
+        emb = r[:, None] * emb[None, :]
+        emb = paddle.concat(x=[emb.sin(), emb.cos()], axis=1)
+        if self.c_r % 2 == 1:
+            emb = nn.functional.pad(emb, [0, 1], mode="constant")
+        return emb
+
+    def gen_c_embeddings(self, clip_txt, clip_txt_pooled, clip_img):
+        clip_txt = self.clip_txt_mapper(clip_txt)
+        if len(clip_txt_pooled.shape) == 2:
+            clip_txt_pool = clip_txt_pooled.unsqueeze(axis=1)
+        if len(clip_img.shape) == 2:
+            clip_img = paddle.unsqueeze(clip_img, axis=1)
+
+        clip_txt_pool = self.clip_txt_pooled_mapper(clip_txt_pooled).reshape(
+            [clip_txt_pooled.shape[0], clip_txt_pooled.shape[1] * self.c_clip_seq, -1]
+        )
+
+        clip_img = self.clip_img_mapper(clip_img).reshape([clip_img.shape[0], clip_img.shape[1] * self.c_clip_seq, -1])
+
+        clip = paddle.concat(x=[clip_txt, clip_txt_pool, clip_img], axis=1)
+        clip = self.clip_norm(clip)
+
+        return clip
+
+    def _down_encode(self, x, r_embed, clip, cnet=None):
+        level_outputs = []
+        block_group = zip(self.down_blocks, self.down_downscalers, self.down_repeat_mappers)
+        for down_block, downscaler, repmap in block_group:
+            x = downscaler(x)
+            for i in range(len(repmap) + 1):
+                for block in down_block:
+                    if (
+                        isinstance(block, ResBlock)
+                        or hasattr(block, "_fsdp_wrapped_module")
+                        and isinstance(block._fsdp_wrapped_module, ResBlock)
+                    ):
+                        if cnet is not None:
+                            next_cnet = cnet()
+                            if next_cnet is not None:
+                                x = x + nn.functional.interpolate(
+                                    next_cnet,
+                                    size=x.shape[-2:],
+                                    mode="bilinear",
+                                    align_corners=True,
+                                )
+                        x = block(x)
+
+                    elif (
+                        isinstance(block, AttnBlock)
+                        or hasattr(block, "_fsdp_wrapped_module")
+                        and isinstance(block._fsdp_wrapped_module, AttnBlock)
+                    ):
+                        x = block(x, clip)
+
+                    elif (
+                        isinstance(block, TimestepBlock)
+                        or hasattr(block, "_fsdp_wrapped_module")
+                        and isinstance(block._fsdp_wrapped_module, TimestepBlock)
+                    ):
+                        x = block(x, r_embed)
+                    else:
+                        x = block(x)
+
+                if i < len(repmap):
+                    x = repmap[i](x)
+            level_outputs.insert(0, x)
+        return level_outputs
+
+    def _up_decode(self, level_outputs, r_embed, clip, cnet=None):
+        x = level_outputs[0]
+        block_group = zip(self.up_blocks, self.up_upscalers, self.up_repeat_mappers)
+        count_i = 0
+        for i, (up_block, upscaler, repmap) in enumerate(block_group):
+            count_i += 1
+            count_j = 0
+            for j in range(len(repmap) + 1):
+                count_j += 1
+                count_k = 0
+                for k, block in enumerate(up_block):
+                    count_k += 1
+
+                    if (
+                        isinstance(block, ResBlock)
+                        or hasattr(block, "_fsdp_wrapped_module")
+                        and isinstance(block._fsdp_wrapped_module, ResBlock)
+                    ):
+                        skip = level_outputs[i] if k == 0 and i > 0 else None
+                        if skip is not None and (x.shape[-1] != skip.shape[-1] or x.shape[-2] != skip.shape[-2]):
+                            x = nn.functional.interpolate(
+                                x=x.astype(paddle.float32),
+                                size=skip.shape[-2:],
+                                mode="bilinear",
+                                align_corners=True,
+                            )
+                        x = block(x, skip)
+                    elif (
+                        isinstance(block, AttnBlock)
+                        or hasattr(block, "_fsdp_wrapped_module")
+                        and isinstance(block._fsdp_wrapped_module, AttnBlock)
+                    ):
+                        x = block(x, clip)
+                    elif (
+                        isinstance(block, TimestepBlock)
+                        or hasattr(block, "_fsdp_wrapped_module")
+                        and isinstance(block._fsdp_wrapped_module, TimestepBlock)
+                    ):
+                        x = block(x, r_embed)
+                    else:
+                        x = block(x)
+
+                if j < len(repmap):
+                    x = repmap[j](x)
+
+            x = upscaler(x)
+
+        return x
+
+    def forward(self, x, r, clip_text, clip_text_pooled, clip_img, cnet=None, **kwargs):
+
+        r_embed = self.gen_r_embedding(r)
+        for c in self.t_conds:
+            t_cond = kwargs.get(c, paddle.zeros_like(r))
+            r_embed = paddle.concat(x=[r_embed, self.gen_r_embedding(t_cond)], axis=1)
+        clip = self.gen_c_embeddings(clip_text, clip_text_pooled, clip_img)
+
+        x = self.embedding(x)
+        level_outputs = self._down_encode(x, r_embed, clip, cnet)
+        x = self._up_decode(level_outputs, r_embed, clip, cnet)
+        x = self.clf(x)
+        # x.register_hook(lambda grad: print("@@@ before-clf-x @@@", grad.shape, grad.abs().mean()))
+
+        return x
+
+    def update_weights_ema(self, src_model, beta=0.999):
+        for self_params, src_params in zip(self.parameters(), src_model.parameters()):
+            self_params.data = self_params.data * beta + src_params.data.clone() * (1 - beta)
+        for self_buffers, src_buffers in zip(self.buffers(), src_model.buffers()):
+            self_buffers.data = self_buffers.data * beta + src_buffers.data.clone() * (1 - beta)
diff --git a/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/pipelines/blip_diffusion/__init__.py b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/pipelines/blip_diffusion/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..b68e33e511a5ed3eee62e9397a9abd6c05d54086
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/pipelines/blip_diffusion/__init__.py
@@ -0,0 +1,37 @@
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from dataclasses import dataclass
+from typing import List, Optional, Union
+
+import numpy as np
+import PIL
+from PIL import Image
+
+from ...utils import (
+    OptionalDependencyNotAvailable,
+    is_paddle_available,
+    is_paddlenlp_available,
+)
+
+try:
+    if not (is_paddlenlp_available() and is_paddle_available()):
+        raise OptionalDependencyNotAvailable()
+except OptionalDependencyNotAvailable:
+    from ...utils.dummy_paddle_and_paddlenlp_objects import ShapEPipeline
+else:
+    from .blip_image_processing import BlipImageProcessor
+    from .modeling_blip2 import Blip2QFormerModel
+    from .modeling_ctx_clip import ContextCLIPTextModel
+    from .pipeline_blip_diffusion import BlipDiffusionPipeline
diff --git a/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/pipelines/blip_diffusion/modeling_blip2.py b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/pipelines/blip_diffusion/modeling_blip2.py
new file mode 100644
index 0000000000000000000000000000000000000000..484577c2d8ec3c86d85cc0afb335db649d88fa14
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/pipelines/blip_diffusion/modeling_blip2.py
@@ -0,0 +1,659 @@
+# Copyright 2023 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Optional, Tuple, Union
+
+import paddle
+from paddle import nn
+from paddlenlp.transformers.activations import QuickGELUActivation as QuickGELU
+from paddlenlp.transformers.blip_2.configuration import Blip2Config, Blip2VisionConfig
+from paddlenlp.transformers.blip_2.modeling import (
+    Blip2Encoder,
+    Blip2QFormerAttention,
+    Blip2QFormerIntermediate,
+    Blip2QFormerOutput,
+)
+from paddlenlp.transformers.model_outputs import (
+    BaseModelOutputWithPastAndCrossAttentions,
+    BaseModelOutputWithPooling,
+    BaseModelOutputWithPoolingAndCrossAttentions,
+)
+from paddlenlp.transformers.model_utils import apply_chunking_to_forward
+
+from ppdiffusers.transformers import BertTokenizer, PretrainedModel
+
+from ...utils import logging
+
+logger = logging.get_logger(__name__)
+
+
+class Blip2PretrainedModel(PretrainedModel):
+    """
+    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
+    models.
+    """
+
+    config_class = Blip2Config
+    base_model_prefix = "blip"
+    supports_gradient_checkpointing = True
+    _keys_to_ignore_on_load_missing = [
+        r"position_ids",
+        r"language_model.encoder.embed_tokens.weight",
+        r"language_model.decoder.embed_tokens.weight",
+    ]
+    _no_split_modules = ["Blip2Attention", "T5Block", "OPTDecoderLayer"]
+    _keep_in_fp32_modules = ["wo"]
+
+    def _init_weights(self, module):
+        """Initialize the weights"""
+        factor = self.config.initializer_range
+        if isinstance(module, nn.Conv2D) or isinstance(module, nn.Embedding) or isinstance(module, nn.Linear):
+            nn.init.normal_(module.weight, mean=0.0, std=factor)
+            if hasattr(module, "padding_idx") and module.padding_idx is not None:
+                module.weight[module.padding_idx] = 0.0
+            if hasattr(module, "bias") and module.bias is not None:
+                nn.init.zeros_(module.bias)
+        if isinstance(module, Blip2VisionEmbeddings):
+            if hasattr(self.config, "vision_config"):
+                factor = self.config.vision_config.initializer_range
+            trunc_normal_ = nn.initializer.TruncatedNormal(mean=0.0, std=factor)
+            trunc_normal_(module.position_embedding)
+            trunc_normal_(
+                module.class_embedding,
+            )
+        elif isinstance(module, nn.LayerNorm):
+            nn.init.zeros_(module.bias)
+            nn.init.ones_(module.weight)
+        elif isinstance(module, nn.Linear) and module.bias is not None:
+            nn.init.zeros_(module.bias)
+
+
+# There is an implementation of Blip2 in `transformers` : https://github.com/huggingface/transformers/blob/main/src/transformers/models/blip_2/modeling_blip_2.py.
+# But it doesn't support getting multimodal embeddings. So, this module can be
+# replaced with a future `transformers` version supports that.
+class Blip2TextEmbeddings(nn.Layer):
+    """Construct the embeddings from word and position embeddings."""
+
+    def __init__(self, config):
+        super().__init__()
+        self.word_embeddings = nn.Embedding(
+            config.vocab_size, config.hidden_size
+        )  # padding_idx=config.pad_token_id  NOTE, donot set padding_idx
+        self.word_embeddings.padding_idx = config.pad_token_id
+        self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.hidden_size)
+
+        # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load
+        # any TensorFlow checkpoint file
+        self.LayerNorm = nn.LayerNorm(config.hidden_size, epsilon=config.layer_norm_eps)
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+
+        # position_ids (1, len position emb) is contiguous in memory and exported when serialized
+        self.register_buffer(
+            "position_ids", paddle.arange(config.max_position_embeddings, dtype=paddle.int64).expand((1, -1))
+        )
+        self.position_embedding_type = getattr(config, "position_embedding_type", "absolute")
+
+        self.config = config
+
+    def forward(
+        self,
+        input_ids=None,
+        position_ids=None,
+        query_embeds=None,
+        past_key_values_length=0,
+    ):
+        if input_ids is not None:
+            seq_length = input_ids.shape[1]
+        else:
+            seq_length = 0
+
+        if position_ids is None:
+            position_ids = self.position_ids[:, past_key_values_length : seq_length + past_key_values_length].clone()
+
+        if input_ids is not None:
+            embeddings = self.word_embeddings(input_ids)
+            if self.position_embedding_type == "absolute":
+                position_embeddings = self.position_embeddings(position_ids)
+                embeddings = embeddings + position_embeddings
+
+            if query_embeds is not None:
+                batch_size = embeddings.shape[0]
+                # repeat the query embeddings for batch size
+                query_embeds = query_embeds.tile([batch_size, 1, 1])
+                embeddings = paddle.concat((query_embeds, embeddings), axis=1)
+        else:
+            embeddings = query_embeds
+        embeddings = embeddings.cast(query_embeds.dtype)
+        embeddings = self.LayerNorm(embeddings)
+        embeddings = self.dropout(embeddings)
+        return embeddings
+
+
+# Copy-pasted from transformers.models.blip.modeling_blip.BlipVisionEmbeddings with Blip->Blip2
+class Blip2VisionEmbeddings(nn.Layer):
+    def __init__(self, config: Blip2VisionConfig):
+        super().__init__()
+        self.config = config
+        self.embed_dim = config.hidden_size
+        self.image_size = config.image_size
+        self.patch_size = config.patch_size
+
+        self.class_embedding = nn.Parameter(paddle.randn([1, 1, self.embed_dim]))
+
+        self.patch_embedding = nn.Conv2D(
+            in_channels=3,
+            out_channels=self.embed_dim,
+            kernel_size=self.patch_size,
+            stride=self.patch_size,
+            bias_attr=False,
+        )
+
+        self.num_patches = (self.image_size // self.patch_size) ** 2
+        self.num_positions = self.num_patches + 1
+
+        self.position_embedding = nn.Parameter(paddle.randn([1, self.num_positions, self.embed_dim]))
+
+    def forward(self, pixel_values: paddle.Tensor) -> paddle.Tensor:
+        batch_size = pixel_values.shape[0]
+        target_dtype = self.patch_embedding.weight.dtype
+        patch_embeds = self.patch_embedding(pixel_values.cast(dtype=target_dtype))  # shape = [*, width, grid, grid]
+        patch_embeds = patch_embeds.flatten(2).transpose([0, 2, 1])
+
+        class_embeds = self.class_embedding.expand([batch_size, 1, -1]).cast(target_dtype)
+        embeddings = paddle.concat([class_embeds, patch_embeds], axis=1)
+        embeddings = embeddings + self.position_embedding[:, : embeddings.shape[1], :].cast(target_dtype)
+        return embeddings
+
+
+# The Qformer encoder, which takes the visual embeddings, and the text input, to get multimodal embeddings
+class Blip2QFormerEncoder(nn.Layer):
+    def __init__(self, config):
+        super().__init__()
+        self.config = config
+        self.layer = nn.LayerList(
+            [Blip2QFormerLayer(config, layer_idx) for layer_idx in range(config.num_hidden_layers)]
+        )
+        self.gradient_checkpointing = False
+
+    def forward(
+        self,
+        hidden_states,
+        attention_mask=None,
+        encoder_hidden_states=None,
+        encoder_attention_mask=None,
+        past_key_values=None,
+        use_cache=None,
+        output_attentions=False,
+        output_hidden_states=False,
+        return_dict=True,
+        query_length=0,
+    ):
+        all_hidden_states = () if output_hidden_states else None
+        all_self_attentions = () if output_attentions else None
+        all_cross_attentions = () if output_attentions else None
+
+        next_decoder_cache = () if use_cache else None
+
+        if getattr(self.config, "gradient_checkpointing", False) and self.training:
+            if use_cache:
+                logger.warning_once(
+                    "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
+                )
+                use_cache = False
+
+        for i in range(self.config.num_hidden_layers):
+            layer_module = self.layer[i]
+            if output_hidden_states:
+                all_hidden_states = all_hidden_states + (hidden_states,)
+
+            past_key_value = past_key_values[i] if past_key_values is not None else None
+
+            if self.gradient_checkpointing and not hidden_states.stop_gradient:
+                layer_outputs = self._gradient_checkpointing_func(
+                    layer_module.__call__,
+                    hidden_states,
+                    attention_mask,
+                    encoder_hidden_states,
+                    encoder_attention_mask,
+                    past_key_value,
+                    output_attentions,
+                    query_length,
+                )
+
+            else:
+                layer_outputs = layer_module(
+                    hidden_states,
+                    attention_mask,
+                    encoder_hidden_states,
+                    encoder_attention_mask,
+                    past_key_value,
+                    output_attentions,
+                    query_length,
+                )
+
+            hidden_states = layer_outputs[0]
+            if use_cache:
+                next_decoder_cache += (layer_outputs[-1],)
+            if output_attentions:
+                all_self_attentions = all_self_attentions + (layer_outputs[1],)
+                if layer_module.has_cross_attention:
+                    all_cross_attentions = all_cross_attentions + (layer_outputs[2],)
+
+        if output_hidden_states:
+            all_hidden_states = all_hidden_states + (hidden_states,)
+
+        if not return_dict:
+            return tuple(
+                v
+                for v in [
+                    hidden_states,
+                    next_decoder_cache,
+                    all_hidden_states,
+                    all_self_attentions,
+                    all_cross_attentions,
+                ]
+                if v is not None
+            )
+        return BaseModelOutputWithPastAndCrossAttentions(
+            last_hidden_state=hidden_states,
+            past_key_values=next_decoder_cache,
+            hidden_states=all_hidden_states,
+            attentions=all_self_attentions,
+            cross_attentions=all_cross_attentions,
+        )
+
+
+# The layers making up the Qformer encoder
+class Blip2QFormerLayer(nn.Layer):
+    def __init__(self, config, layer_idx):
+        super().__init__()
+        self.chunk_size_feed_forward = config.chunk_size_feed_forward
+        self.seq_len_dim = 1
+        self.attention = Blip2QFormerAttention(config)
+
+        self.layer_idx = layer_idx
+
+        if layer_idx % config.cross_attention_frequency == 0:
+            self.crossattention = Blip2QFormerAttention(config, is_cross_attention=True)
+            self.has_cross_attention = True
+        else:
+            self.has_cross_attention = False
+
+        self.intermediate = Blip2QFormerIntermediate(config)
+        self.intermediate_query = Blip2QFormerIntermediate(config)
+        self.output_query = Blip2QFormerOutput(config)
+        self.output = Blip2QFormerOutput(config)
+
+    def forward(
+        self,
+        hidden_states,
+        attention_mask=None,
+        encoder_hidden_states=None,
+        encoder_attention_mask=None,
+        past_key_value=None,
+        output_attentions=False,
+        query_length=0,
+    ):
+        # decoder uni-directional self-attention cached key/values tuple is at positions 1,2
+        self_attn_past_key_value = past_key_value[:2] if past_key_value is not None else None
+        self_attention_outputs = self.attention(
+            hidden_states,
+            attention_mask,
+            output_attentions=output_attentions,
+            past_key_value=self_attn_past_key_value,
+        )
+        attention_output = self_attention_outputs[0]
+        outputs = self_attention_outputs[1:-1]
+
+        present_key_value = self_attention_outputs[-1]
+
+        if query_length > 0:
+            query_attention_output = attention_output[:, :query_length, :]
+
+            if self.has_cross_attention:
+                if encoder_hidden_states is None:
+                    raise ValueError("encoder_hidden_states must be given for cross-attention layers")
+                cross_attention_outputs = self.crossattention(
+                    query_attention_output,
+                    attention_mask,
+                    encoder_hidden_states,
+                    encoder_attention_mask,
+                    output_attentions=output_attentions,
+                )
+                query_attention_output = cross_attention_outputs[0]
+                # add cross attentions if we output attention weights
+                outputs = outputs + cross_attention_outputs[1:-1]
+
+            layer_output = apply_chunking_to_forward(
+                self.feed_forward_chunk_query,
+                self.chunk_size_feed_forward,
+                self.seq_len_dim,
+                query_attention_output,
+            )
+
+            if attention_output.shape[1] > query_length:
+                layer_output_text = apply_chunking_to_forward(
+                    self.feed_forward_chunk,
+                    self.chunk_size_feed_forward,
+                    self.seq_len_dim,
+                    attention_output[:, query_length:, :],
+                )
+                layer_output = paddle.concat([layer_output, layer_output_text], axis=1)
+        else:
+            layer_output = apply_chunking_to_forward(
+                self.feed_forward_chunk,
+                self.chunk_size_feed_forward,
+                self.seq_len_dim,
+                attention_output,
+            )
+        outputs = (layer_output,) + outputs
+
+        outputs = outputs + (present_key_value,)
+
+        return outputs
+
+    def feed_forward_chunk(self, attention_output):
+        intermediate_output = self.intermediate(attention_output)
+        layer_output = self.output(intermediate_output, attention_output)
+        return layer_output
+
+    def feed_forward_chunk_query(self, attention_output):
+        intermediate_output = self.intermediate_query(attention_output)
+        layer_output = self.output_query(intermediate_output, attention_output)
+        return layer_output
+
+
+# ProjLayer used to project the multimodal Blip2 embeddings to be used in the text encoder
+class ProjLayer(nn.Layer):
+    def __init__(self, in_dim, out_dim, hidden_dim, drop_p=0.1, eps=1e-12):
+        super().__init__()
+
+        # Dense1 -> Act -> Dense2 -> Drop -> Res -> Norm
+        self.dense1 = nn.Linear(in_dim, hidden_dim)
+        self.act_fn = QuickGELU()
+        self.dense2 = nn.Linear(hidden_dim, out_dim)
+        self.dropout = nn.Dropout(drop_p)
+
+        self.LayerNorm = nn.LayerNorm(out_dim, epsilon=eps)
+
+    def forward(self, x):
+        x_in = x
+
+        x = self.LayerNorm(x)
+        x = self.dropout(self.dense2(self.act_fn(self.dense1(x)))) + x_in
+
+        return x
+
+
+# Copy-pasted from transformers.models.blip.modeling_blip.BlipVisionModel with Blip->Blip2, BLIP->BLIP_2
+class Blip2VisionModel(Blip2PretrainedModel):
+    main_input_name = "pixel_values"
+    config_class = Blip2VisionConfig
+
+    def __init__(self, config: Blip2VisionConfig):
+        super().__init__(config)
+        self.config = config
+        embed_dim = config.hidden_size
+        self.embeddings = Blip2VisionEmbeddings(config)
+        self.pre_layernorm = nn.LayerNorm(embed_dim, epsilon=config.layer_norm_eps)
+        self.encoder = Blip2Encoder(config)
+        self.post_layernorm = nn.LayerNorm(embed_dim, epsilon=config.layer_norm_eps)
+
+        self.post_init()
+
+    def forward(
+        self,
+        pixel_values: Optional[paddle.Tensor] = None,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        return_dict: Optional[bool] = None,
+    ) -> Union[Tuple, BaseModelOutputWithPooling]:
+        r"""
+        Returns:
+
+        """
+        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
+        output_hidden_states = (
+            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
+        )
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+
+        if pixel_values is None:
+            raise ValueError("You have to specify pixel_values")
+
+        hidden_states = self.embeddings(pixel_values)
+        hidden_states = self.pre_layernorm(hidden_states)
+        encoder_outputs = self.encoder(
+            inputs_embeds=hidden_states,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=return_dict,
+        )
+        last_hidden_state = encoder_outputs[0]
+        last_hidden_state = self.post_layernorm(last_hidden_state)
+
+        pooled_output = last_hidden_state[:, 0, :]
+        pooled_output = self.post_layernorm(pooled_output)
+
+        if not return_dict:
+            return (last_hidden_state, pooled_output) + encoder_outputs[1:]
+
+        return BaseModelOutputWithPooling(
+            last_hidden_state=last_hidden_state,
+            pooler_output=pooled_output,
+            hidden_states=encoder_outputs.hidden_states,
+            attentions=encoder_outputs.attentions,
+        )
+
+    def get_input_embeddings(self):
+        return self.embeddings
+
+
+# Qformer model, used to get multimodal embeddings from the text and image inputs
+class Blip2QFormerModel(Blip2PretrainedModel):
+    """
+    Querying Transformer (Q-Former), used in BLIP-2.
+    """
+
+    def __init__(self, config: Blip2Config):
+        super().__init__(config)
+        self.config = config
+        self.embeddings = Blip2TextEmbeddings(config.qformer_config)
+        self.visual_encoder = Blip2VisionModel(config.vision_config)
+        self.query_tokens = nn.Parameter(paddle.zeros([1, config.num_query_tokens, config.qformer_config.hidden_size]))
+        if not hasattr(config, "tokenizer") or config.tokenizer is None:
+            self.tokenizer = BertTokenizer.from_pretrained("bert-base-uncased", truncation_side="right")
+        else:
+            self.tokenizer = BertTokenizer.from_pretrained(config.tokenizer, truncation_side="right")
+        self.tokenizer.add_special_tokens({"bos_token": "[DEC]"})
+        self.proj_layer = ProjLayer(
+            in_dim=config.qformer_config.hidden_size,
+            out_dim=config.qformer_config.hidden_size,
+            hidden_dim=config.qformer_config.hidden_size * 4,
+            drop_p=0.1,
+            eps=1e-12,
+        )
+
+        self.encoder = Blip2QFormerEncoder(config.qformer_config)
+
+        self.post_init()
+
+    def get_input_embeddings(self):
+        return self.embeddings.word_embeddings
+
+    def set_input_embeddings(self, value):
+        self.embeddings.word_embeddings = value
+
+    def get_extended_attention_mask(
+        self,
+        attention_mask: paddle.Tensor,
+        input_shape: Tuple[int],
+        has_query: bool = False,
+    ) -> paddle.Tensor:
+        """
+        Makes broadcastable attention and causal masks so that future and masked tokens are ignored.
+
+        Arguments:
+            attention_mask (`paddle.Tensor`):
+                Mask with ones indicating tokens to attend to, zeros for tokens to ignore.
+            input_shape (`Tuple[int]`):
+                The shape of the input to the model.
+
+        Returns:
+            `paddle.Tensor` The extended attention mask, with a the same dtype as `attention_mask.dtype`.
+        """
+        # We can provide a self-attention mask of dimensions [batch_size, from_seq_length, to_seq_length]
+        # ourselves in which case we just need to make it broadcastable to all heads.
+        if attention_mask.dim() == 3:
+            extended_attention_mask = attention_mask[:, None, :, :]
+        elif attention_mask.dim() == 2:
+            # Provided a padding mask of dimensions [batch_size, seq_length]
+            # - the model is an encoder, so make the mask broadcastable to [batch_size, num_heads, seq_length, seq_length]
+            extended_attention_mask = attention_mask[:, None, None, :]
+        else:
+            raise ValueError(
+                "Wrong shape for input_ids (shape {}) or attention_mask (shape {})".format(
+                    input_shape, attention_mask.shape
+                )
+            )
+
+        # Since attention_mask is 1.0 for positions we want to attend and 0.0 for
+        # masked positions, this operation will create a tensor which is 0.0 for
+        # positions we want to attend and -10000.0 for masked positions.
+        # Since we are adding it to the raw scores before the softmax, this is
+        # effectively the same as removing these entirely.
+        extended_attention_mask = extended_attention_mask.cast(dtype=self.dtype)  # fp16 compatibility
+        extended_attention_mask = (1.0 - extended_attention_mask) * -10000.0
+        return extended_attention_mask
+
+    def forward(
+        self,
+        text_input=None,
+        image_input=None,
+        encoder_hidden_states=None,
+        encoder_attention_mask=None,
+        past_key_values=None,
+        use_cache=None,
+        output_attentions=None,
+        output_hidden_states=None,
+        return_dict=None,
+    ):
+        r"""
+        encoder_hidden_states  (`paddle.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, `optional`):
+            Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if
+            the model is configured as a decoder.
+        encoder_attention_mask (`paddle.Tensor` of shape `(batch_size, sequence_length)`, `optional`):
+            Mask to avoid performing attention on the padding token indices of the encoder input. This mask is used in
+            the cross-attention if the model is configured as a decoder. Mask values selected in `[0, 1]`:
+            - 1 for tokens that are **not masked**,
+            - 0 for tokens that are **masked**.
+        past_key_values (`tuple(tuple(paddle.Tensor))` of length `config.n_layers` with each tuple having 4 tensors of:
+            shape `(batch_size, num_heads, sequence_length - 1, embed_size_per_head)`): Contains precomputed key and
+            value hidden states of the attention blocks. Can be used to speed up decoding. If `past_key_values` are
+            used, the user can optionally input only the last `decoder_input_ids` (those that don't have their past key
+            value states given to this model) of shape `(batch_size, 1)` instead of all `decoder_input_ids` of shape
+            `(batch_size, sequence_length)`.
+        use_cache (`bool`, `optional`):
+            If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding (see
+            `past_key_values`).
+        """
+
+        text = self.tokenizer(text_input, return_tensors="pd", padding=True, return_attention_mask=True)
+        input_ids = text.input_ids
+        batch_size = input_ids.shape[0]
+        query_atts = paddle.ones((batch_size, self.query_tokens.shape[1]), dtype=paddle.int64)
+        attention_mask = paddle.concat([query_atts, text.attention_mask], axis=1)
+
+        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
+        output_hidden_states = (
+            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
+        )
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+
+        # past_key_values_length
+        past_key_values_length = (
+            past_key_values[0][0].shape[2] - self.config.query_length if past_key_values is not None else 0
+        )
+
+        query_length = self.query_tokens.shape[1]
+
+        embedding_output = self.embeddings(
+            input_ids=input_ids,
+            query_embeds=self.query_tokens,
+            past_key_values_length=past_key_values_length,
+        )
+
+        # embedding_output = self.layernorm(query_embeds)
+        # embedding_output = self.dropout(embedding_output)
+
+        input_shape = embedding_output.shape[:-1]
+        batch_size, seq_length = input_shape
+
+        image_embeds_frozen = self.visual_encoder(image_input).last_hidden_state
+        # image_embeds_frozen = paddle.ones_like(image_embeds_frozen)
+        encoder_hidden_states = image_embeds_frozen
+
+        if attention_mask is None:
+            attention_mask = paddle.ones(
+                ((batch_size, seq_length + past_key_values_length)),
+            )
+
+        # We can provide a self-attention mask of dimensions [batch_size, from_seq_length, to_seq_length]
+        # ourselves in which case we just need to make it broadcastable to all heads.
+        extended_attention_mask = self.get_extended_attention_mask(attention_mask, input_shape)
+
+        # If a 2D or 3D attention mask is provided for the cross-attention
+        # we need to make broadcastable to [batch_size, num_heads, seq_length, seq_length]
+        if encoder_hidden_states is not None:
+            if isinstance(encoder_hidden_states, list):
+                encoder_batch_size, encoder_sequence_length, _ = encoder_hidden_states[0].shape
+            else:
+                encoder_batch_size, encoder_sequence_length, _ = encoder_hidden_states.shape
+            encoder_hidden_shape = (encoder_batch_size, encoder_sequence_length)
+
+            if isinstance(encoder_attention_mask, list):
+                encoder_extended_attention_mask = [self.invert_attention_mask(mask) for mask in encoder_attention_mask]
+            elif encoder_attention_mask is None:
+                encoder_attention_mask = paddle.ones(encoder_hidden_shape)
+                encoder_extended_attention_mask = self.invert_attention_mask(encoder_attention_mask)
+            else:
+                encoder_extended_attention_mask = self.invert_attention_mask(encoder_attention_mask)
+        else:
+            encoder_extended_attention_mask = None
+
+        encoder_outputs = self.encoder(
+            embedding_output,
+            attention_mask=extended_attention_mask,
+            encoder_hidden_states=encoder_hidden_states,
+            encoder_attention_mask=encoder_extended_attention_mask,
+            past_key_values=past_key_values,
+            use_cache=use_cache,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=return_dict,
+            query_length=query_length,
+        )
+        sequence_output = encoder_outputs[0]
+        pooled_output = sequence_output[:, 0, :]
+
+        if not return_dict:
+            return self.proj_layer(sequence_output[:, :query_length, :])
+
+        return BaseModelOutputWithPoolingAndCrossAttentions(
+            last_hidden_state=sequence_output,
+            pooler_output=pooled_output,
+            past_key_values=encoder_outputs.past_key_values,
+            hidden_states=encoder_outputs.hidden_states,
+            attentions=encoder_outputs.attentions,
+            cross_attentions=encoder_outputs.cross_attentions,
+        )
diff --git a/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/pipelines/blip_diffusion/modeling_ctx_clip.py b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/pipelines/blip_diffusion/modeling_ctx_clip.py
new file mode 100644
index 0000000000000000000000000000000000000000..b78442c52e0b777e6a601f0b205cf9ccb5c75991
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/pipelines/blip_diffusion/modeling_ctx_clip.py
@@ -0,0 +1,248 @@
+# Copyright 2023 Salesforce.com, inc.
+# Copyright 2023 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Optional, Tuple, Union
+
+import paddle
+from paddle import nn
+from paddlenlp.transformers.model_outputs import BaseModelOutputWithPooling
+
+from ppdiffusers.transformers import CLIPPretrainedModel
+from ppdiffusers.transformers.clip.configuration import CLIPTextConfig
+from ppdiffusers.transformers.clip.modeling import CLIPEncoder
+
+
+def _expand_mask(mask: paddle.Tensor, dtype, tgt_len: Optional[int] = None):
+    """
+    Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`.
+    """
+    bsz, src_len = mask.shape
+    tgt_len = tgt_len if tgt_len is not None else src_len
+
+    expanded_mask = mask[:, None, None, :].expand([bsz, 1, tgt_len, src_len]).cast(dtype)
+
+    inverted_mask = 1.0 - expanded_mask
+
+    return paddle.masked_fill(inverted_mask, inverted_mask.cast(paddle.bool), paddle.finfo(dtype).min)
+
+
+# This is a modified version of the CLIPTextModel from transformers.models.clip.modeling_clip
+# Which allows for an extra input of "context embeddings", which are the query embeddings used in Qformer
+# They pass through the clip model, along with the text embeddings, and interact with them using self attention
+class ContextCLIPTextModel(CLIPPretrainedModel):
+    config_class = CLIPTextConfig
+
+    _no_split_modules = ["CLIPEncoderLayer"]
+
+    def __init__(self, config: CLIPTextConfig):
+        super().__init__(config)
+        self.text_model = ContextCLIPTextTransformer(config)
+        # Initialize weights and apply final processing
+        self.post_init()
+
+    def forward(
+        self,
+        ctx_embeddings: paddle.Tensor = None,
+        ctx_begin_pos: list = None,
+        input_ids: Optional[paddle.Tensor] = None,
+        attention_mask: Optional[paddle.Tensor] = None,
+        position_ids: Optional[paddle.Tensor] = None,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        return_dict: Optional[bool] = None,
+    ) -> Union[Tuple, BaseModelOutputWithPooling]:
+        return self.text_model(
+            ctx_embeddings=ctx_embeddings,
+            ctx_begin_pos=ctx_begin_pos,
+            input_ids=input_ids,
+            attention_mask=attention_mask,
+            position_ids=position_ids,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=return_dict,
+        )
+
+
+class ContextCLIPTextTransformer(nn.Layer):
+    def __init__(self, config: CLIPTextConfig):
+        super().__init__()
+        self.config = config
+        embed_dim = config.hidden_size
+        self.embeddings = ContextCLIPTextEmbeddings(config)
+        self.encoder = CLIPEncoder(config)
+        self.final_layer_norm = nn.LayerNorm(embed_dim)
+        self.eos_token_id = config.eos_token_id
+
+    def forward(
+        self,
+        ctx_embeddings: paddle.Tensor,
+        ctx_begin_pos: list,
+        input_ids: Optional[paddle.Tensor] = None,
+        attention_mask: Optional[paddle.Tensor] = None,
+        position_ids: Optional[paddle.Tensor] = None,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        return_dict: Optional[bool] = None,
+    ) -> Union[Tuple, BaseModelOutputWithPooling]:
+        r"""
+        Returns:
+
+        """
+        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
+        output_hidden_states = (
+            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
+        )
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+
+        if input_ids is None:
+            raise ValueError("You have to specify either input_ids")
+
+        input_shape = input_ids.shape
+        input_ids = input_ids.reshape([-1, input_shape[-1]])
+
+        hidden_states = self.embeddings(
+            input_ids=input_ids,
+            position_ids=position_ids,
+            ctx_embeddings=ctx_embeddings,
+            ctx_begin_pos=ctx_begin_pos,
+        )
+
+        bsz, seq_len = input_shape
+        if ctx_embeddings is not None:
+            seq_len += ctx_embeddings.shape[1]
+        # CLIP's text model uses causal mask, prepare it here.
+        # https://github.com/openai/CLIP/blob/cfcffb90e69f37bf2ff1e988237a0fbe41f33c04/clip/model.py#L324
+        causal_attention_mask = self._build_causal_attention_mask(
+            bsz,
+            seq_len,
+            hidden_states.dtype,
+        )
+        # expand attention_mask
+        if attention_mask is not None:
+            # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
+            attention_mask = _expand_mask(attention_mask, hidden_states.dtype)
+
+        encoder_outputs = self.encoder(
+            inputs_embeds=hidden_states,
+            attention_mask=attention_mask,
+            causal_attention_mask=causal_attention_mask,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=return_dict,
+        )
+
+        last_hidden_state = encoder_outputs[0]
+        last_hidden_state = self.final_layer_norm(last_hidden_state)
+
+        if self.eos_token_id == 2:
+            # The `eos_token_id` was incorrect before PR #24773: Let's keep what have been done here.
+            # A CLIP model with such `eos_token_id` in the config can't work correctly with extra new tokens added
+            # ------------------------------------------------------------
+            # text_embeds.shape = [batch_size, sequence_length, transformer.width]
+            # take features from the eot embedding (eot_token is the highest number in each sequence)
+            # casting to paddle.int32 for onnx compatibility: argmax doesn't support int64 inputs with opset 14
+            pooled_output = last_hidden_state.gather_nd(
+                paddle.stack(
+                    [paddle.arange(last_hidden_state.shape[0], dtype="int32"), input_ids.argmax(-1, dtype="int32")],
+                    axis=-1,
+                )
+            )
+        else:
+            # The config gets updated `eos_token_id` from PR #24773 (so the use of extra new tokens is possible)
+            # We need to get the first position of `eos_token_id` value (`pad_token_ids` might equal to `eos_token_id`)
+            pooled_output = last_hidden_state.gather_nd(
+                paddle.stack(
+                    [
+                        paddle.arange(last_hidden_state.shape[0], dtype="int32"),
+                        (input_ids == paddle.to_tensor([self.eos_token_id]))
+                        .cast("int32")
+                        .argmax(axis=-1, dtype="int32"),
+                    ],
+                    axis=-1,
+                )
+            )
+
+        if not return_dict:
+            return (last_hidden_state, pooled_output) + encoder_outputs[1:]
+
+        return BaseModelOutputWithPooling(
+            last_hidden_state=last_hidden_state,
+            pooler_output=pooled_output,
+            hidden_states=encoder_outputs.hidden_states,
+            attentions=encoder_outputs.attentions,
+        )
+
+    def _build_causal_attention_mask(self, bsz, seq_len, dtype):
+        mask = paddle.triu(
+            # paddle.full((bsz, 1, seq_len, seq_len), paddle.finfo(dtype).min, dtype=dtype),
+            paddle.ones((bsz, paddle.to_tensor([1]), seq_len, seq_len), dtype=dtype) * paddle.finfo(dtype).min,
+            diagonal=1,
+        )
+        return mask
+
+
+class ContextCLIPTextEmbeddings(nn.Layer):
+    def __init__(self, config: CLIPTextConfig):
+        super().__init__()
+        embed_dim = config.hidden_size
+
+        self.token_embedding = nn.Embedding(config.vocab_size, embed_dim)
+        self.position_embedding = nn.Embedding(config.max_position_embeddings, embed_dim)
+
+        # position_ids (1, len position emb) is contiguous in memory and exported when serialized
+        self.register_buffer(
+            "position_ids", paddle.arange(config.max_position_embeddings, dtype=paddle.int64).expand((1, -1))
+        )
+
+    def forward(
+        self,
+        ctx_embeddings: paddle.Tensor,
+        ctx_begin_pos: list,
+        input_ids: Optional[paddle.Tensor] = None,
+        position_ids: Optional[paddle.Tensor] = None,
+        inputs_embeds: Optional[paddle.Tensor] = None,
+    ) -> paddle.Tensor:
+        if ctx_embeddings is None:
+            ctx_len = 0
+        else:
+            ctx_len = ctx_embeddings.shape[1]
+
+        seq_length = (input_ids.shape[-1] if input_ids is not None else inputs_embeds.shape[-2]) + ctx_len
+
+        if position_ids is None:
+            position_ids = self.position_ids[:, :seq_length].cast(paddle.int64)
+
+        if inputs_embeds is None:
+            inputs_embeds = self.token_embedding(input_ids)
+
+            # for each input embeddings, add the ctx embeddings at the correct position
+            input_embeds_ctx = []
+            bsz = inputs_embeds.shape[0]
+
+            if ctx_embeddings is not None:
+                for i in range(bsz):
+                    cbp = ctx_begin_pos[i]
+
+                    prefix = inputs_embeds[i, :cbp]
+                    # remove the special token embedding
+                    suffix = inputs_embeds[i, cbp:]
+
+                    input_embeds_ctx.append(paddle.concat([prefix, ctx_embeddings[i], suffix], axis=0))
+
+                inputs_embeds = paddle.stack(input_embeds_ctx, axis=0)
+
+        position_embeddings = self.position_embedding(position_ids)
+        embeddings = inputs_embeds + position_embeddings
+
+        return embeddings
diff --git a/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/pipelines/consistency_models/__init__.py b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/pipelines/consistency_models/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..0de04fa38c3109ea181a0c289564b10ae9e49a92
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/pipelines/consistency_models/__init__.py
@@ -0,0 +1,34 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import TYPE_CHECKING
+
+from ...utils import PPDIFFUSERS_SLOW_IMPORT, _LazyModule
+
+_import_structure = {
+    "pipeline_consistency_models": ["ConsistencyModelPipeline"],
+}
+
+if TYPE_CHECKING or PPDIFFUSERS_SLOW_IMPORT:
+    from .pipeline_consistency_models import ConsistencyModelPipeline
+
+else:
+    import sys
+
+    sys.modules[__name__] = _LazyModule(
+        __name__,
+        globals()["__file__"],
+        _import_structure,
+        module_spec=__spec__,
+    )
diff --git a/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py
new file mode 100644
index 0000000000000000000000000000000000000000..4f22c95d57a1680ad45f763c5bd76591c8d56d2c
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py
@@ -0,0 +1,1308 @@
+# Copyright 2023 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import inspect
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+
+import numpy as np
+import paddle
+import PIL.Image
+
+from ppdiffusers.transformers import (
+    CLIPImageProcessor,
+    CLIPTextModel,
+    CLIPTextModelWithProjection,
+    CLIPTokenizer,
+    CLIPVisionModelWithProjection,
+)
+
+from ...image_processor import PipelineImageInput, VaeImageProcessor
+from ...loaders import (
+    FromSingleFileMixin,
+    IPAdapterMixin,
+    StableDiffusionXLLoraLoaderMixin,
+    TextualInversionLoaderMixin,
+)
+from ...models import AutoencoderKL, ControlNetModel, UNet2DConditionModel
+from ...models.attention_processor import (
+    AttnProcessor2_5,
+    LoRAAttnProcessor2_5,
+    LoRAXFormersAttnProcessor,
+    XFormersAttnProcessor,
+)
+from ...models.lora import adjust_lora_scale_text_encoder
+from ...schedulers import KarrasDiffusionSchedulers
+from ...utils import (
+    USE_PEFT_BACKEND,
+    deprecate,
+    is_pp_invisible_watermark_available,
+    logging,
+    replace_example_docstring,
+)
+from ...utils.paddle_utils import randn_tensor
+from ..pipeline_utils import DiffusionPipeline
+from ..stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
+
+if is_pp_invisible_watermark_available():
+    from ..stable_diffusion_xl.watermark import StableDiffusionXLWatermarker
+
+from .multicontrolnet import MultiControlNetModel
+
+logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
+
+
+EXAMPLE_DOC_STRING = """
+    Examples:
+        ```py
+        >>> # !pip install opencv-python paddlenlp ppdiffusers
+        >>> from ppdiffusers import StableDiffusionXLControlNetPipeline, ControlNetModel, AutoencoderKL
+        >>> from ppdiffusers.utils import load_image
+        >>> import numpy as np
+        >>> import paddle
+
+        >>> import cv2
+        >>> from PIL import Image
+
+        >>> prompt = "aerial view, a futuristic research complex in a bright foggy jungle, hard lighting"
+        >>> negative_prompt = "low quality, bad quality, sketches"
+
+        >>> # download an image
+        >>> image = load_image(
+        ...     "https://hf-mirror.com/datasets/hf-internal-testing/diffusers-images/resolve/main/sd_controlnet/hf-logo.png"
+        ... )
+
+        >>> # initialize the models and pipeline
+        >>> controlnet_conditioning_scale = 0.5  # recommended for good generalization
+        >>> controlnet = ControlNetModel.from_pretrained(
+        ...     "diffusers/controlnet-canny-sdxl-1.0", paddle_dtype=paddle.float16
+        ... )
+        >>> vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", paddle_dtype=paddle.float16)
+        >>> pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
+        ...     "stabilityai/stable-diffusion-xl-base-1.0", controlnet=controlnet, vae=vae, paddle_dtype=paddle.float16
+        ... )
+
+        >>> # get canny image
+        >>> image = np.array(image)
+        >>> image = cv2.Canny(image, 100, 200)
+        >>> image = image[:, :, None]
+        >>> image = np.concatenate([image, image, image], axis=2)
+        >>> canny_image = Image.fromarray(image)
+
+        >>> # generate image
+        >>> image = pipe(
+        ...     prompt, controlnet_conditioning_scale=controlnet_conditioning_scale, image=canny_image
+        ... ).images[0]
+        ```
+"""
+
+
+class StableDiffusionXLControlNetPipeline(
+    DiffusionPipeline,
+    TextualInversionLoaderMixin,
+    StableDiffusionXLLoraLoaderMixin,
+    IPAdapterMixin,
+    FromSingleFileMixin,
+):
+    r"""
+    Pipeline for text-to-image generation using Stable Diffusion XL with ControlNet guidance.
+
+    This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods
+    implemented for all pipelines (downloading, saving, running on a particular device, etc.).
+
+    The pipeline also inherits the following loading methods:
+        - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
+        - [`loaders.StableDiffusionXLLoraLoaderMixin.load_lora_weights`] for loading LoRA weights
+        - [`loaders.FromSingleFileMixin.from_single_file`] for loading `.ckpt` files
+
+    Args:
+        vae ([`AutoencoderKL`]):
+            Variational Auto-Encoder (VAE) model to encode and decode images to and from latent representations.
+        text_encoder ([`~transformers.CLIPTextModel`]):
+            Frozen text-encoder ([clip-vit-large-patch14](https://huggingface.co/openai/clip-vit-large-patch14)).
+        text_encoder_2 ([`~transformers.CLIPTextModelWithProjection`]):
+            Second frozen text-encoder
+            ([laion/CLIP-ViT-bigG-14-laion2B-39B-b160k](https://huggingface.co/laion/CLIP-ViT-bigG-14-laion2B-39B-b160k)).
+        tokenizer ([`~transformers.CLIPTokenizer`]):
+            A `CLIPTokenizer` to tokenize text.
+        tokenizer_2 ([`~transformers.CLIPTokenizer`]):
+            A `CLIPTokenizer` to tokenize text.
+        unet ([`UNet2DConditionModel`]):
+            A `UNet2DConditionModel` to denoise the encoded image latents.
+        controlnet ([`ControlNetModel`] or `List[ControlNetModel]`):
+            Provides additional conditioning to the `unet` during the denoising process. If you set multiple
+            ControlNets as a list, the outputs from each ControlNet are added together to create one combined
+            additional conditioning.
+        scheduler ([`SchedulerMixin`]):
+            A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of
+            [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
+        force_zeros_for_empty_prompt (`bool`, *optional*, defaults to `"True"`):
+            Whether the negative prompt embeddings should always be set to 0. Also see the config of
+            `stabilityai/stable-diffusion-xl-base-1-0`.
+        add_watermarker (`bool`, *optional*):
+            Whether to use the [pp_invisible_watermark](https://github.com/junnyu/pp-invisible-watermark/) library to
+            watermark output images. If not defined, it defaults to `True` if the package is installed; otherwise no
+            watermarker is used.
+    """
+
+    # leave controlnet out on purpose because it iterates with unet
+    model_cpu_offload_seq = "text_encoder->text_encoder_2->unet->vae"
+    _optional_components = [
+        "tokenizer",
+        "tokenizer_2",
+        "text_encoder",
+        "text_encoder_2",
+        "feature_extractor",
+        "image_encoder",
+    ]
+    _callback_tensor_inputs = ["latents", "prompt_embeds", "negative_prompt_embeds"]
+
+    def __init__(
+        self,
+        vae: AutoencoderKL,
+        text_encoder: CLIPTextModel,
+        text_encoder_2: CLIPTextModelWithProjection,
+        tokenizer: CLIPTokenizer,
+        tokenizer_2: CLIPTokenizer,
+        unet: UNet2DConditionModel,
+        controlnet: Union[ControlNetModel, List[ControlNetModel], Tuple[ControlNetModel], MultiControlNetModel],
+        scheduler: KarrasDiffusionSchedulers,
+        force_zeros_for_empty_prompt: bool = True,
+        add_watermarker: Optional[bool] = None,
+        feature_extractor: CLIPImageProcessor = None,
+        image_encoder: CLIPVisionModelWithProjection = None,
+    ):
+        super().__init__()
+
+        if isinstance(controlnet, (list, tuple)):
+            controlnet = MultiControlNetModel(controlnet)
+
+        self.register_modules(
+            vae=vae,
+            text_encoder=text_encoder,
+            text_encoder_2=text_encoder_2,
+            tokenizer=tokenizer,
+            tokenizer_2=tokenizer_2,
+            unet=unet,
+            controlnet=controlnet,
+            scheduler=scheduler,
+            feature_extractor=feature_extractor,
+            image_encoder=image_encoder,
+        )
+        self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
+        self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True)
+        self.control_image_processor = VaeImageProcessor(
+            vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True, do_normalize=False
+        )
+        add_watermarker = add_watermarker if add_watermarker is not None else is_pp_invisible_watermark_available()
+
+        if add_watermarker:
+            self.watermark = StableDiffusionXLWatermarker()
+        else:
+            self.watermark = None
+
+        self.register_to_config(force_zeros_for_empty_prompt=force_zeros_for_empty_prompt)
+
+    # Copied from ppdiffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.encode_prompt
+    def encode_prompt(
+        self,
+        prompt: str,
+        prompt_2: Optional[str] = None,
+        num_images_per_prompt: int = 1,
+        do_classifier_free_guidance: bool = True,
+        negative_prompt: Optional[str] = None,
+        negative_prompt_2: Optional[str] = None,
+        prompt_embeds: Optional[paddle.Tensor] = None,
+        negative_prompt_embeds: Optional[paddle.Tensor] = None,
+        pooled_prompt_embeds: Optional[paddle.Tensor] = None,
+        negative_pooled_prompt_embeds: Optional[paddle.Tensor] = None,
+        lora_scale: Optional[float] = None,
+        clip_skip: Optional[int] = None,
+    ):
+        r"""
+        Encodes the prompt into text encoder hidden states.
+
+        Args:
+            prompt (`str` or `List[str]`, *optional*):
+                prompt to be encoded
+            prompt_2 (`str` or `List[str]`, *optional*):
+                The prompt or prompts to be sent to the `tokenizer_2` and `text_encoder_2`. If not defined, `prompt` is
+                used in both text-encoders
+            num_images_per_prompt (`int`):
+                number of images that should be generated per prompt
+            do_classifier_free_guidance (`bool`):
+                whether to use classifier free guidance or not
+            negative_prompt (`str` or `List[str]`, *optional*):
+                The prompt or prompts not to guide the image generation. If not defined, one has to pass
+                `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
+                less than `1`).
+            negative_prompt_2 (`str` or `List[str]`, *optional*):
+                The prompt or prompts not to guide the image generation to be sent to `tokenizer_2` and
+                `text_encoder_2`. If not defined, `negative_prompt` is used in both text-encoders
+            prompt_embeds (`paddle.Tensor`, *optional*):
+                Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
+                provided, text embeddings will be generated from `prompt` input argument.
+            negative_prompt_embeds (`paddle.Tensor`, *optional*):
+                Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
+                weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
+                argument.
+            pooled_prompt_embeds (`paddle.Tensor`, *optional*):
+                Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
+                If not provided, pooled text embeddings will be generated from `prompt` input argument.
+            negative_pooled_prompt_embeds (`paddle.Tensor`, *optional*):
+                Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
+                weighting. If not provided, pooled negative_prompt_embeds will be generated from `negative_prompt`
+                input argument.
+            lora_scale (`float`, *optional*):
+                A lora scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded.
+            clip_skip (`int`, *optional*):
+                Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
+                the output of the pre-final layer will be used for computing the prompt embeddings.
+        """
+        # set lora scale so that monkey patched LoRA
+        # function of text encoder can correctly access it
+        if lora_scale is not None and isinstance(self, StableDiffusionXLLoraLoaderMixin):
+            self._lora_scale = lora_scale
+
+            # dynamically adjust the LoRA scale
+            if self.text_encoder is not None:
+                if not USE_PEFT_BACKEND:
+                    adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
+
+            if self.text_encoder_2 is not None:
+                if not USE_PEFT_BACKEND:
+                    adjust_lora_scale_text_encoder(self.text_encoder_2, lora_scale)
+
+        prompt = [prompt] if isinstance(prompt, str) else prompt
+
+        if prompt is not None:
+            batch_size = len(prompt)
+        else:
+            batch_size = prompt_embeds.shape[0]
+
+        # Define tokenizers and text encoders
+        tokenizers = [self.tokenizer, self.tokenizer_2] if self.tokenizer is not None else [self.tokenizer_2]
+        text_encoders = (
+            [self.text_encoder, self.text_encoder_2] if self.text_encoder is not None else [self.text_encoder_2]
+        )
+
+        if prompt_embeds is None:
+            prompt_2 = prompt_2 or prompt
+            prompt_2 = [prompt_2] if isinstance(prompt_2, str) else prompt_2
+
+            # textual inversion: process multi-vector tokens if necessary
+            prompt_embeds_list = []
+            prompts = [prompt, prompt_2]
+            for prompt, tokenizer, text_encoder in zip(prompts, tokenizers, text_encoders):
+                if isinstance(self, TextualInversionLoaderMixin):
+                    prompt = self.maybe_convert_prompt(prompt, tokenizer)
+
+                text_inputs = tokenizer(
+                    prompt,
+                    padding="max_length",
+                    max_length=tokenizer.model_max_length,
+                    truncation=True,
+                    return_tensors="pd",
+                )
+
+                text_input_ids = text_inputs.input_ids
+                untruncated_ids = tokenizer(prompt, padding="longest", return_tensors="pd").input_ids
+
+                if untruncated_ids.shape[-1] >= text_input_ids.shape[-1] and not paddle.equal_all(
+                    text_input_ids, untruncated_ids
+                ):
+                    removed_text = tokenizer.batch_decode(untruncated_ids[:, tokenizer.model_max_length - 1 : -1])
+                    logger.warning(
+                        "The following part of your input was truncated because CLIP can only handle sequences up to"
+                        f" {tokenizer.model_max_length} tokens: {removed_text}"
+                    )
+
+                prompt_embeds = text_encoder(text_input_ids, output_hidden_states=True)
+
+                # We are only ALWAYS interested in the pooled output of the final text encoder
+                pooled_prompt_embeds = prompt_embeds[0]
+                if clip_skip is None:
+                    prompt_embeds = prompt_embeds.hidden_states[-2]
+                else:
+                    # "2" because SDXL always indexes from the penultimate layer.
+                    prompt_embeds = prompt_embeds.hidden_states[-(clip_skip + 2)]
+
+                prompt_embeds_list.append(prompt_embeds)
+
+            prompt_embeds = paddle.concat(prompt_embeds_list, axis=-1)
+
+        # get unconditional embeddings for classifier free guidance
+        zero_out_negative_prompt = negative_prompt is None and self.config.force_zeros_for_empty_prompt
+        if do_classifier_free_guidance and negative_prompt_embeds is None and zero_out_negative_prompt:
+            negative_prompt_embeds = paddle.zeros_like(prompt_embeds)
+            negative_pooled_prompt_embeds = paddle.zeros_like(pooled_prompt_embeds)
+        elif do_classifier_free_guidance and negative_prompt_embeds is None:
+            negative_prompt = negative_prompt or ""
+            negative_prompt_2 = negative_prompt_2 or negative_prompt
+
+            # normalize str to list
+            negative_prompt = batch_size * [negative_prompt] if isinstance(negative_prompt, str) else negative_prompt
+            negative_prompt_2 = (
+                batch_size * [negative_prompt_2] if isinstance(negative_prompt_2, str) else negative_prompt_2
+            )
+
+            uncond_tokens: List[str]
+            if prompt is not None and type(prompt) is not type(negative_prompt):
+                raise TypeError(
+                    f"`negative_prompt` should be the same type to `prompt`, but got {type(negative_prompt)} !="
+                    f" {type(prompt)}."
+                )
+            elif batch_size != len(negative_prompt):
+                raise ValueError(
+                    f"`negative_prompt`: {negative_prompt} has batch size {len(negative_prompt)}, but `prompt`:"
+                    f" {prompt} has batch size {batch_size}. Please make sure that passed `negative_prompt` matches"
+                    " the batch size of `prompt`."
+                )
+            else:
+                uncond_tokens = [negative_prompt, negative_prompt_2]
+
+            negative_prompt_embeds_list = []
+            for negative_prompt, tokenizer, text_encoder in zip(uncond_tokens, tokenizers, text_encoders):
+                if isinstance(self, TextualInversionLoaderMixin):
+                    negative_prompt = self.maybe_convert_prompt(negative_prompt, tokenizer)
+
+                max_length = prompt_embeds.shape[1]
+                uncond_input = tokenizer(
+                    negative_prompt,
+                    padding="max_length",
+                    max_length=max_length,
+                    truncation=True,
+                    return_tensors="pd",
+                )
+
+                negative_prompt_embeds = text_encoder(
+                    uncond_input.input_ids,
+                    output_hidden_states=True,
+                )
+                # We are only ALWAYS interested in the pooled output of the final text encoder
+                negative_pooled_prompt_embeds = negative_prompt_embeds[0]
+                negative_prompt_embeds = negative_prompt_embeds.hidden_states[-2]
+
+                negative_prompt_embeds_list.append(negative_prompt_embeds)
+
+            negative_prompt_embeds = paddle.concat(negative_prompt_embeds_list, axis=-1)
+
+        if self.text_encoder_2 is not None:
+            prompt_embeds = prompt_embeds.cast(dtype=self.text_encoder_2.dtype)
+        else:
+            prompt_embeds = prompt_embeds.cast(dtype=self.unet.dtype)
+
+        bs_embed, seq_len, _ = prompt_embeds.shape
+        # duplicate text embeddings for each generation per prompt, using mps friendly method
+        prompt_embeds = prompt_embeds.tile([1, num_images_per_prompt, 1])
+        prompt_embeds = prompt_embeds.reshape([bs_embed * num_images_per_prompt, seq_len, -1])
+
+        if do_classifier_free_guidance:
+            # duplicate unconditional embeddings for each generation per prompt, using mps friendly method
+            seq_len = negative_prompt_embeds.shape[1]
+
+            if self.text_encoder_2 is not None:
+                negative_prompt_embeds = negative_prompt_embeds.cast(dtype=self.text_encoder_2.dtype)
+            else:
+                negative_prompt_embeds = negative_prompt_embeds.cast(dtype=self.unet.dtype)
+
+            negative_prompt_embeds = negative_prompt_embeds.tile([1, num_images_per_prompt, 1])
+            negative_prompt_embeds = negative_prompt_embeds.reshape([batch_size * num_images_per_prompt, seq_len, -1])
+
+        pooled_prompt_embeds = pooled_prompt_embeds.tile([1, num_images_per_prompt]).reshape(
+            [bs_embed * num_images_per_prompt, -1]
+        )
+        if do_classifier_free_guidance:
+            negative_pooled_prompt_embeds = negative_pooled_prompt_embeds.tile([1, num_images_per_prompt]).reshape(
+                [bs_embed * num_images_per_prompt, -1]
+            )
+
+        return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds
+
+    # Copied from ppdiffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.encode_image
+    def encode_image(self, image, num_images_per_prompt):
+        dtype = next(self.image_encoder.named_parameters())[1].dtype
+
+        if not isinstance(image, paddle.Tensor):
+            image = self.feature_extractor(image, return_tensors="pd").pixel_values
+
+        image = image.cast(dtype=dtype)
+        image_embeds = self.image_encoder(image).image_embeds
+        image_embeds = image_embeds.repeat_interleave(num_images_per_prompt, axis=0)
+
+        uncond_image_embeds = paddle.zeros_like(image_embeds)
+        return image_embeds, uncond_image_embeds
+
+    # Copied from ppdiffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs
+    def prepare_extra_step_kwargs(self, generator, eta):
+        # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
+        # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
+        # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
+        # and should be between [0, 1]
+
+        accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
+        extra_step_kwargs = {}
+        if accepts_eta:
+            extra_step_kwargs["eta"] = eta
+
+        # check if the scheduler accepts generator
+        accepts_generator = "generator" in set(inspect.signature(self.scheduler.step).parameters.keys())
+        if accepts_generator:
+            extra_step_kwargs["generator"] = generator
+        return extra_step_kwargs
+
+    def check_inputs(
+        self,
+        prompt,
+        prompt_2,
+        image,
+        callback_steps,
+        negative_prompt=None,
+        negative_prompt_2=None,
+        prompt_embeds=None,
+        negative_prompt_embeds=None,
+        pooled_prompt_embeds=None,
+        negative_pooled_prompt_embeds=None,
+        controlnet_conditioning_scale=1.0,
+        control_guidance_start=0.0,
+        control_guidance_end=1.0,
+        callback_on_step_end_tensor_inputs=None,
+    ):
+        if callback_steps is not None and (not isinstance(callback_steps, int) or callback_steps <= 0):
+            raise ValueError(
+                f"`callback_steps` has to be a positive integer but is {callback_steps} of type"
+                f" {type(callback_steps)}."
+            )
+
+        if callback_on_step_end_tensor_inputs is not None and not all(
+            k in self._callback_tensor_inputs for k in callback_on_step_end_tensor_inputs
+        ):
+            raise ValueError(
+                f"`callback_on_step_end_tensor_inputs` has to be in {self._callback_tensor_inputs}, but found {[k for k in callback_on_step_end_tensor_inputs if k not in self._callback_tensor_inputs]}"
+            )
+
+        if prompt is not None and prompt_embeds is not None:
+            raise ValueError(
+                f"Cannot forward both `prompt`: {prompt} and `prompt_embeds`: {prompt_embeds}. Please make sure to"
+                " only forward one of the two."
+            )
+        elif prompt_2 is not None and prompt_embeds is not None:
+            raise ValueError(
+                f"Cannot forward both `prompt_2`: {prompt_2} and `prompt_embeds`: {prompt_embeds}. Please make sure to"
+                " only forward one of the two."
+            )
+        elif prompt is None and prompt_embeds is None:
+            raise ValueError(
+                "Provide either `prompt` or `prompt_embeds`. Cannot leave both `prompt` and `prompt_embeds` undefined."
+            )
+        elif prompt is not None and (not isinstance(prompt, str) and not isinstance(prompt, list)):
+            raise ValueError(f"`prompt` has to be of type `str` or `list` but is {type(prompt)}")
+        elif prompt_2 is not None and (not isinstance(prompt_2, str) and not isinstance(prompt_2, list)):
+            raise ValueError(f"`prompt_2` has to be of type `str` or `list` but is {type(prompt_2)}")
+
+        if negative_prompt is not None and negative_prompt_embeds is not None:
+            raise ValueError(
+                f"Cannot forward both `negative_prompt`: {negative_prompt} and `negative_prompt_embeds`:"
+                f" {negative_prompt_embeds}. Please make sure to only forward one of the two."
+            )
+        elif negative_prompt_2 is not None and negative_prompt_embeds is not None:
+            raise ValueError(
+                f"Cannot forward both `negative_prompt_2`: {negative_prompt_2} and `negative_prompt_embeds`:"
+                f" {negative_prompt_embeds}. Please make sure to only forward one of the two."
+            )
+
+        if prompt_embeds is not None and negative_prompt_embeds is not None:
+            if prompt_embeds.shape != negative_prompt_embeds.shape:
+                raise ValueError(
+                    "`prompt_embeds` and `negative_prompt_embeds` must have the same shape when passed directly, but"
+                    f" got: `prompt_embeds` {prompt_embeds.shape} != `negative_prompt_embeds`"
+                    f" {negative_prompt_embeds.shape}."
+                )
+
+        if prompt_embeds is not None and pooled_prompt_embeds is None:
+            raise ValueError(
+                "If `prompt_embeds` are provided, `pooled_prompt_embeds` also have to be passed. Make sure to generate `pooled_prompt_embeds` from the same text encoder that was used to generate `prompt_embeds`."
+            )
+
+        if negative_prompt_embeds is not None and negative_pooled_prompt_embeds is None:
+            raise ValueError(
+                "If `negative_prompt_embeds` are provided, `negative_pooled_prompt_embeds` also have to be passed. Make sure to generate `negative_pooled_prompt_embeds` from the same text encoder that was used to generate `negative_prompt_embeds`."
+            )
+
+        # `prompt` needs more sophisticated handling when there are multiple
+        # conditionings.
+        if isinstance(self.controlnet, MultiControlNetModel):
+            if isinstance(prompt, list):
+                logger.warning(
+                    f"You have {len(self.controlnet.nets)} ControlNets and you have passed {len(prompt)}"
+                    " prompts. The conditionings will be fixed across the prompts."
+                )
+
+        # Check `image`
+        if isinstance(self.controlnet, ControlNetModel):
+            self.check_image(image, prompt, prompt_embeds)
+        elif isinstance(self.controlnet, MultiControlNetModel):
+            if not isinstance(image, list):
+                raise TypeError("For multiple controlnets: `image` must be type `list`")
+
+            # When `image` is a nested list:
+            # (e.g. [[canny_image_1, pose_image_1], [canny_image_2, pose_image_2]])
+            elif any(isinstance(i, list) for i in image):
+                raise ValueError("A single batch of multiple conditionings are supported at the moment.")
+            elif len(image) != len(self.controlnet.nets):
+                raise ValueError(
+                    f"For multiple controlnets: `image` must have the same length as the number of controlnets, but got {len(image)} images and {len(self.controlnet.nets)} ControlNets."
+                )
+
+            for image_ in image:
+                self.check_image(image_, prompt, prompt_embeds)
+        else:
+            assert False
+
+        # Check `controlnet_conditioning_scale`
+        if isinstance(self.controlnet, ControlNetModel):
+            if not isinstance(controlnet_conditioning_scale, float):
+                raise TypeError("For single controlnet: `controlnet_conditioning_scale` must be type `float`.")
+        elif isinstance(self.controlnet, MultiControlNetModel):
+            if isinstance(controlnet_conditioning_scale, list):
+                if any(isinstance(i, list) for i in controlnet_conditioning_scale):
+                    raise ValueError("A single batch of multiple conditionings are supported at the moment.")
+            elif isinstance(controlnet_conditioning_scale, list) and len(controlnet_conditioning_scale) != len(
+                self.controlnet.nets
+            ):
+                raise ValueError(
+                    "For multiple controlnets: When `controlnet_conditioning_scale` is specified as `list`, it must have"
+                    " the same length as the number of controlnets"
+                )
+        else:
+            assert False
+
+        if not isinstance(control_guidance_start, (tuple, list)):
+            control_guidance_start = [control_guidance_start]
+
+        if not isinstance(control_guidance_end, (tuple, list)):
+            control_guidance_end = [control_guidance_end]
+
+        if len(control_guidance_start) != len(control_guidance_end):
+            raise ValueError(
+                f"`control_guidance_start` has {len(control_guidance_start)} elements, but `control_guidance_end` has {len(control_guidance_end)} elements. Make sure to provide the same number of elements to each list."
+            )
+
+        if isinstance(self.controlnet, MultiControlNetModel):
+            if len(control_guidance_start) != len(self.controlnet.nets):
+                raise ValueError(
+                    f"`control_guidance_start`: {control_guidance_start} has {len(control_guidance_start)} elements but there are {len(self.controlnet.nets)} controlnets available. Make sure to provide {len(self.controlnet.nets)}."
+                )
+
+        for start, end in zip(control_guidance_start, control_guidance_end):
+            if start >= end:
+                raise ValueError(
+                    f"control guidance start: {start} cannot be larger or equal to control guidance end: {end}."
+                )
+            if start < 0.0:
+                raise ValueError(f"control guidance start: {start} can't be smaller than 0.")
+            if end > 1.0:
+                raise ValueError(f"control guidance end: {end} can't be larger than 1.0.")
+
+    # Copied from ppdiffusers.pipelines.controlnet.pipeline_controlnet.StableDiffusionControlNetPipeline.check_image
+    def check_image(self, image, prompt, prompt_embeds):
+        image_is_pil = isinstance(image, PIL.Image.Image)
+        image_is_tensor = isinstance(image, paddle.Tensor)
+        image_is_np = isinstance(image, np.ndarray)
+        image_is_pil_list = isinstance(image, list) and isinstance(image[0], PIL.Image.Image)
+        image_is_tensor_list = isinstance(image, list) and isinstance(image[0], paddle.Tensor)
+        image_is_np_list = isinstance(image, list) and isinstance(image[0], np.ndarray)
+
+        if (
+            not image_is_pil
+            and not image_is_tensor
+            and not image_is_np
+            and not image_is_pil_list
+            and not image_is_tensor_list
+            and not image_is_np_list
+        ):
+            raise TypeError(
+                f"image must be passed and be one of PIL image, numpy array, paddle tensor, list of PIL images, list of numpy arrays or list of paddle tensors, but is {type(image)}"
+            )
+
+        if image_is_pil:
+            image_batch_size = 1
+        else:
+            image_batch_size = len(image)
+
+        if prompt is not None and isinstance(prompt, str):
+            prompt_batch_size = 1
+        elif prompt is not None and isinstance(prompt, list):
+            prompt_batch_size = len(prompt)
+        elif prompt_embeds is not None:
+            prompt_batch_size = prompt_embeds.shape[0]
+
+        if image_batch_size != 1 and image_batch_size != prompt_batch_size:
+            raise ValueError(
+                f"If image batch size is not 1, image batch size must be same as prompt batch size. image batch size: {image_batch_size}, prompt batch size: {prompt_batch_size}"
+            )
+
+    # Copied from ppdiffusers.pipelines.controlnet.pipeline_controlnet.StableDiffusionControlNetPipeline.prepare_image
+    def prepare_image(
+        self,
+        image,
+        width,
+        height,
+        batch_size,
+        num_images_per_prompt,
+        dtype,
+        do_classifier_free_guidance=False,
+        guess_mode=False,
+    ):
+        image = self.control_image_processor.preprocess(image, height=height, width=width).cast(dtype=paddle.float32)
+        image_batch_size = image.shape[0]
+
+        if image_batch_size == 1:
+            repeat_by = batch_size
+        else:
+            # image batch size is the same as prompt batch size
+            repeat_by = num_images_per_prompt
+
+        image = image.repeat_interleave(repeat_by, axis=0)
+
+        image = image.cast(dtype=dtype)
+
+        if do_classifier_free_guidance and not guess_mode:
+            image = paddle.concat([image] * 2)
+
+        return image
+
+    # Copied from ppdiffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
+    def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, generator, latents=None):
+        shape = (batch_size, num_channels_latents, height // self.vae_scale_factor, width // self.vae_scale_factor)
+        if isinstance(generator, list) and len(generator) != batch_size:
+            raise ValueError(
+                f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
+                f" size of {batch_size}. Make sure the batch size matches the length of the generators."
+            )
+
+        if latents is None:
+            latents = randn_tensor(shape, generator=generator, dtype=dtype)
+        else:
+            latents = latents.cast(dtype)
+
+        # scale the initial noise by the standard deviation required by the scheduler
+        latents = latents * self.scheduler.init_noise_sigma
+        return latents
+
+    # Copied from ppdiffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline._get_add_time_ids
+    def _get_add_time_ids(
+        self, original_size, crops_coords_top_left, target_size, dtype, text_encoder_projection_dim=None
+    ):
+        add_time_ids = list(original_size + crops_coords_top_left + target_size)
+
+        passed_add_embed_dim = (
+            self.unet.config.addition_time_embed_dim * len(add_time_ids) + text_encoder_projection_dim
+        )
+        expected_add_embed_dim = self.unet.add_embedding.linear_1.in_features
+
+        if expected_add_embed_dim != passed_add_embed_dim:
+            raise ValueError(
+                f"Model expects an added time embedding vector of length {expected_add_embed_dim}, but a vector of {passed_add_embed_dim} was created. The model has an incorrect config. Please check `unet.config.time_embedding_type` and `text_encoder_2.config.projection_dim`."
+            )
+
+        add_time_ids = paddle.to_tensor([add_time_ids], dtype=dtype)
+        return add_time_ids
+
+    # Copied from ppdiffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_upscale.StableDiffusionUpscalePipeline.upcast_vae
+    def upcast_vae(self):
+        dtype = self.vae.dtype
+        self.vae.to(dtype=paddle.float32)
+        use_paddle_2_5_or_ppxformers = isinstance(
+            self.vae.decoder.mid_block.attentions[0].processor,
+            (
+                AttnProcessor2_5,
+                XFormersAttnProcessor,
+                LoRAXFormersAttnProcessor,
+                LoRAAttnProcessor2_5,
+            ),
+        )
+        # if xformers or torch_2_0 is used attention block does not need
+        # to be in float32 which can save lots of memory
+        if use_paddle_2_5_or_ppxformers:
+            self.vae.post_quant_conv.to(dtype=dtype)
+            self.vae.decoder.conv_in.to(dtype=dtype)
+            self.vae.decoder.mid_block.to(dtype=dtype)
+
+    # Copied from ppdiffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
+    def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=paddle.float32):
+        """
+        See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298
+
+        Args:
+            timesteps (`paddle.Tensor`):
+                generate embedding vectors at these timesteps
+            embedding_dim (`int`, *optional*, defaults to 512):
+                dimension of the embeddings to generate
+            dtype:
+                data type of the generated embeddings
+
+        Returns:
+            `paddle.Tensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)`
+        """
+        assert len(w.shape) == 1
+        w = w * 1000.0
+
+        half_dim = embedding_dim // 2
+        emb = paddle.log(paddle.to_tensor(10000.0)) / (half_dim - 1)
+        emb = paddle.exp(paddle.arange(half_dim, dtype=dtype) * -emb)
+        emb = w.cast(dtype=dtype)[:, None] * emb[None, :]
+        emb = paddle.concat([paddle.sin(emb), paddle.cos(emb)], axis=1)
+        if embedding_dim % 2 == 1:
+            emb = paddle.concat(emb, paddle.zeros([emb.shape[0], 1]), axis=-1)
+        assert emb.shape == [w.shape[0], embedding_dim]
+        return emb
+
+    @property
+    def guidance_scale(self):
+        return self._guidance_scale
+
+    @property
+    def clip_skip(self):
+        return self._clip_skip
+
+    # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
+    # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
+    # corresponds to doing no classifier free guidance.
+    @property
+    def do_classifier_free_guidance(self):
+        return self._guidance_scale > 1 and self.unet.config.time_cond_proj_dim is None
+
+    @property
+    def cross_attention_kwargs(self):
+        return self._cross_attention_kwargs
+
+    @property
+    def num_timesteps(self):
+        return self._num_timesteps
+
+    @paddle.no_grad()
+    @replace_example_docstring(EXAMPLE_DOC_STRING)
+    def __call__(
+        self,
+        prompt: Union[str, List[str]] = None,
+        prompt_2: Optional[Union[str, List[str]]] = None,
+        image: PipelineImageInput = None,
+        height: Optional[int] = None,
+        width: Optional[int] = None,
+        num_inference_steps: int = 50,
+        guidance_scale: float = 5.0,
+        negative_prompt: Optional[Union[str, List[str]]] = None,
+        negative_prompt_2: Optional[Union[str, List[str]]] = None,
+        num_images_per_prompt: Optional[int] = 1,
+        eta: float = 0.0,
+        generator: Optional[Union[paddle.Generator, List[paddle.Generator]]] = None,
+        latents: Optional[paddle.Tensor] = None,
+        prompt_embeds: Optional[paddle.Tensor] = None,
+        negative_prompt_embeds: Optional[paddle.Tensor] = None,
+        pooled_prompt_embeds: Optional[paddle.Tensor] = None,
+        negative_pooled_prompt_embeds: Optional[paddle.Tensor] = None,
+        ip_adapter_image: Optional[PipelineImageInput] = None,
+        output_type: Optional[str] = "pil",
+        return_dict: bool = True,
+        cross_attention_kwargs: Optional[Dict[str, Any]] = None,
+        controlnet_conditioning_scale: Union[float, List[float]] = 1.0,
+        guess_mode: bool = False,
+        control_guidance_start: Union[float, List[float]] = 0.0,
+        control_guidance_end: Union[float, List[float]] = 1.0,
+        original_size: Tuple[int, int] = None,
+        crops_coords_top_left: Tuple[int, int] = (0, 0),
+        target_size: Tuple[int, int] = None,
+        negative_original_size: Optional[Tuple[int, int]] = None,
+        negative_crops_coords_top_left: Tuple[int, int] = (0, 0),
+        negative_target_size: Optional[Tuple[int, int]] = None,
+        clip_skip: Optional[int] = None,
+        callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None,
+        callback_on_step_end_tensor_inputs: List[str] = ["latents"],
+        **kwargs,
+    ):
+        r"""
+        The call function to the pipeline for generation.
+
+        Args:
+            prompt (`str` or `List[str]`, *optional*):
+                The prompt or prompts to guide image generation. If not defined, you need to pass `prompt_embeds`.
+            prompt_2 (`str` or `List[str]`, *optional*):
+                The prompt or prompts to be sent to `tokenizer_2` and `text_encoder_2`. If not defined, `prompt` is
+                used in both text-encoders.
+            image (`paddle.Tensor`, `PIL.Image.Image`, `np.ndarray`, `List[paddle.Tensor]`, `List[PIL.Image.Image]`, `List[np.ndarray]`,:
+                    `List[List[paddle.Tensor]]`, `List[List[np.ndarray]]` or `List[List[PIL.Image.Image]]`):
+                The ControlNet input condition to provide guidance to the `unet` for generation. If the type is
+                specified as `paddle.Tensor`, it is passed to ControlNet as is. `PIL.Image.Image` can also be
+                accepted as an image. The dimensions of the output image defaults to `image`'s dimensions. If height
+                and/or width are passed, `image` is resized accordingly. If multiple ControlNets are specified in
+                `init`, images must be passed as a list such that each element of the list can be correctly batched for
+                input to a single ControlNet.
+            height (`int`, *optional*, defaults to `self.unet.config.sample_size * self.vae_scale_factor`):
+                The height in pixels of the generated image. Anything below 512 pixels won't work well for
+                [stabilityai/stable-diffusion-xl-base-1.0](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0)
+                and checkpoints that are not specifically fine-tuned on low resolutions.
+            width (`int`, *optional*, defaults to `self.unet.config.sample_size * self.vae_scale_factor`):
+                The width in pixels of the generated image. Anything below 512 pixels won't work well for
+                [stabilityai/stable-diffusion-xl-base-1.0](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0)
+                and checkpoints that are not specifically fine-tuned on low resolutions.
+            num_inference_steps (`int`, *optional*, defaults to 50):
+                The number of denoising steps. More denoising steps usually lead to a higher quality image at the
+                expense of slower inference.
+            guidance_scale (`float`, *optional*, defaults to 5.0):
+                A higher guidance scale value encourages the model to generate images closely linked to the text
+                `prompt` at the expense of lower image quality. Guidance scale is enabled when `guidance_scale > 1`.
+            negative_prompt (`str` or `List[str]`, *optional*):
+                The prompt or prompts to guide what to not include in image generation. If not defined, you need to
+                pass `negative_prompt_embeds` instead. Ignored when not using guidance (`guidance_scale < 1`).
+            negative_prompt_2 (`str` or `List[str]`, *optional*):
+                The prompt or prompts to guide what to not include in image generation. This is sent to `tokenizer_2`
+                and `text_encoder_2`. If not defined, `negative_prompt` is used in both text-encoders.
+            num_images_per_prompt (`int`, *optional*, defaults to 1):
+                The number of images to generate per prompt.
+            eta (`float`, *optional*, defaults to 0.0):
+                Corresponds to parameter eta (η) from the [DDIM](https://arxiv.org/abs/2010.02502) paper. Only applies
+                to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
+            generator (`paddle.Generator` or `List[paddle.Generator]`, *optional*):
+                                A [`paddle.Generator`] to make generation deterministic.
+
+            latents (`paddle.Tensor`, *optional*):
+                Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
+                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
+                tensor is generated by sampling using the supplied random `generator`.
+            prompt_embeds (`paddle.Tensor`, *optional*):
+                Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
+                provided, text embeddings are generated from the `prompt` input argument.
+            negative_prompt_embeds (`paddle.Tensor`, *optional*):
+                Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
+                not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
+            pooled_prompt_embeds (`paddle.Tensor`, *optional*):
+                Pre-generated pooled text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
+                not provided, pooled text embeddings are generated from `prompt` input argument.
+            negative_pooled_prompt_embeds (`paddle.Tensor`, *optional*):
+                Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs (prompt
+                weighting). If not provided, pooled `negative_prompt_embeds` are generated from `negative_prompt` input
+                argument.
+            ip_adapter_image: (`PipelineImageInput`, *optional*): Optional image input to work with IP Adapters.
+            output_type (`str`, *optional*, defaults to `"pil"`):
+                The output format of the generated image. Choose between `PIL.Image` or `np.array`.
+            return_dict (`bool`, *optional*, defaults to `True`):
+                Whether or not to return a [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] instead of a
+                plain tuple.
+            cross_attention_kwargs (`dict`, *optional*):
+                A kwargs dictionary that if specified is passed along to the [`AttentionProcessor`] as defined in
+                [`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
+            controlnet_conditioning_scale (`float` or `List[float]`, *optional*, defaults to 1.0):
+                The outputs of the ControlNet are multiplied by `controlnet_conditioning_scale` before they are added
+                to the residual in the original `unet`. If multiple ControlNets are specified in `init`, you can set
+                the corresponding scale as a list.
+            guess_mode (`bool`, *optional*, defaults to `False`):
+                The ControlNet encoder tries to recognize the content of the input image even if you remove all
+                prompts. A `guidance_scale` value between 3.0 and 5.0 is recommended.
+            control_guidance_start (`float` or `List[float]`, *optional*, defaults to 0.0):
+                The percentage of total steps at which the ControlNet starts applying.
+            control_guidance_end (`float` or `List[float]`, *optional*, defaults to 1.0):
+                The percentage of total steps at which the ControlNet stops applying.
+            original_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
+                If `original_size` is not the same as `target_size` the image will appear to be down- or upsampled.
+                `original_size` defaults to `(height, width)` if not specified. Part of SDXL's micro-conditioning as
+                explained in section 2.2 of
+                [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952).
+            crops_coords_top_left (`Tuple[int]`, *optional*, defaults to (0, 0)):
+                `crops_coords_top_left` can be used to generate an image that appears to be "cropped" from the position
+                `crops_coords_top_left` downwards. Favorable, well-centered images are usually achieved by setting
+                `crops_coords_top_left` to (0, 0). Part of SDXL's micro-conditioning as explained in section 2.2 of
+                [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952).
+            target_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
+                For most cases, `target_size` should be set to the desired height and width of the generated image. If
+                not specified it will default to `(height, width)`. Part of SDXL's micro-conditioning as explained in
+                section 2.2 of [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952).
+            negative_original_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
+                To negatively condition the generation process based on a specific image resolution. Part of SDXL's
+                micro-conditioning as explained in section 2.2 of
+                [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952). For more
+                information, refer to this issue thread: https://github.com/huggingface/diffusers/issues/4208.
+            negative_crops_coords_top_left (`Tuple[int]`, *optional*, defaults to (0, 0)):
+                To negatively condition the generation process based on a specific crop coordinates. Part of SDXL's
+                micro-conditioning as explained in section 2.2 of
+                [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952). For more
+                information, refer to this issue thread: https://github.com/huggingface/diffusers/issues/4208.
+            negative_target_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
+                To negatively condition the generation process based on a target image resolution. It should be as same
+                as the `target_size` for most cases. Part of SDXL's micro-conditioning as explained in section 2.2 of
+                [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952). For more
+                information, refer to this issue thread: https://github.com/huggingface/diffusers/issues/4208.
+            clip_skip (`int`, *optional*):
+                Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
+                the output of the pre-final layer will be used for computing the prompt embeddings.
+            callback_on_step_end (`Callable`, *optional*):
+                A function that calls at the end of each denoising steps during the inference. The function is called
+                with the following arguments: `callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int,
+                callback_kwargs: Dict)`. `callback_kwargs` will include a list of all tensors as specified by
+                `callback_on_step_end_tensor_inputs`.
+            callback_on_step_end_tensor_inputs (`List`, *optional*):
+                The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
+                will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
+                `._callback_tensor_inputs` attribute of your pipeine class.
+
+        Examples:
+
+        Returns:
+            [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] or `tuple`:
+                If `return_dict` is `True`, [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] is returned,
+                otherwise a `tuple` is returned containing the output images.
+        """
+
+        callback = kwargs.pop("callback", None)
+        callback_steps = kwargs.pop("callback_steps", None)
+
+        if callback is not None:
+            deprecate(
+                "callback",
+                "1.0.0",
+                "Passing `callback` as an input argument to `__call__` is deprecated, consider using `callback_on_step_end`",
+            )
+        if callback_steps is not None:
+            deprecate(
+                "callback_steps",
+                "1.0.0",
+                "Passing `callback_steps` as an input argument to `__call__` is deprecated, consider using `callback_on_step_end`",
+            )
+
+        controlnet = self.controlnet
+
+        # align format for control guidance
+        if not isinstance(control_guidance_start, list) and isinstance(control_guidance_end, list):
+            control_guidance_start = len(control_guidance_end) * [control_guidance_start]
+        elif not isinstance(control_guidance_end, list) and isinstance(control_guidance_start, list):
+            control_guidance_end = len(control_guidance_start) * [control_guidance_end]
+        elif not isinstance(control_guidance_start, list) and not isinstance(control_guidance_end, list):
+            mult = len(controlnet.nets) if isinstance(controlnet, MultiControlNetModel) else 1
+            control_guidance_start, control_guidance_end = (
+                mult * [control_guidance_start],
+                mult * [control_guidance_end],
+            )
+
+        # 1. Check inputs. Raise error if not correct
+        self.check_inputs(
+            prompt,
+            prompt_2,
+            image,
+            callback_steps,
+            negative_prompt,
+            negative_prompt_2,
+            prompt_embeds,
+            negative_prompt_embeds,
+            pooled_prompt_embeds,
+            negative_pooled_prompt_embeds,
+            controlnet_conditioning_scale,
+            control_guidance_start,
+            control_guidance_end,
+            callback_on_step_end_tensor_inputs,
+        )
+
+        self._guidance_scale = guidance_scale
+        self._clip_skip = clip_skip
+        self._cross_attention_kwargs = cross_attention_kwargs
+
+        # 2. Define call parameters
+        if prompt is not None and isinstance(prompt, str):
+            batch_size = 1
+        elif prompt is not None and isinstance(prompt, list):
+            batch_size = len(prompt)
+        else:
+            batch_size = prompt_embeds.shape[0]
+
+        if isinstance(controlnet, MultiControlNetModel) and isinstance(controlnet_conditioning_scale, float):
+            controlnet_conditioning_scale = [controlnet_conditioning_scale] * len(controlnet.nets)
+
+        global_pool_conditions = (
+            controlnet.config.global_pool_conditions
+            if isinstance(controlnet, ControlNetModel)
+            else controlnet.nets[0].config.global_pool_conditions
+        )
+        guess_mode = guess_mode or global_pool_conditions
+
+        # 3.1 Encode input prompt
+        text_encoder_lora_scale = (
+            self.cross_attention_kwargs.get("scale", None) if self.cross_attention_kwargs is not None else None
+        )
+        (
+            prompt_embeds,
+            negative_prompt_embeds,
+            pooled_prompt_embeds,
+            negative_pooled_prompt_embeds,
+        ) = self.encode_prompt(
+            prompt,
+            prompt_2,
+            num_images_per_prompt,
+            self.do_classifier_free_guidance,
+            negative_prompt,
+            negative_prompt_2,
+            prompt_embeds=prompt_embeds,
+            negative_prompt_embeds=negative_prompt_embeds,
+            pooled_prompt_embeds=pooled_prompt_embeds,
+            negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
+            lora_scale=text_encoder_lora_scale,
+            clip_skip=self.clip_skip,
+        )
+
+        # 3.2 Encode ip_adapter_image
+        if ip_adapter_image is not None:
+            image_embeds, negative_image_embeds = self.encode_image(ip_adapter_image, num_images_per_prompt)
+            if self.do_classifier_free_guidance:
+                image_embeds = paddle.concat([negative_image_embeds, image_embeds])
+
+        # 4. Prepare image
+        if isinstance(controlnet, ControlNetModel):
+            image = self.prepare_image(
+                image=image,
+                width=width,
+                height=height,
+                batch_size=batch_size * num_images_per_prompt,
+                num_images_per_prompt=num_images_per_prompt,
+                dtype=controlnet.dtype,
+                do_classifier_free_guidance=self.do_classifier_free_guidance,
+                guess_mode=guess_mode,
+            )
+            height, width = image.shape[-2:]
+        elif isinstance(controlnet, MultiControlNetModel):
+            images = []
+
+            for image_ in image:
+                image_ = self.prepare_image(
+                    image=image_,
+                    width=width,
+                    height=height,
+                    batch_size=batch_size * num_images_per_prompt,
+                    num_images_per_prompt=num_images_per_prompt,
+                    dtype=controlnet.dtype,
+                    do_classifier_free_guidance=self.do_classifier_free_guidance,
+                    guess_mode=guess_mode,
+                )
+
+                images.append(image_)
+
+            image = images
+            height, width = image[0].shape[-2:]
+        else:
+            assert False
+
+        # 5. Prepare timesteps
+        self.scheduler.set_timesteps(num_inference_steps)
+        timesteps = self.scheduler.timesteps
+        self._num_timesteps = len(timesteps)
+
+        # 6. Prepare latent variables
+        num_channels_latents = self.unet.config.in_channels
+        latents = self.prepare_latents(
+            batch_size * num_images_per_prompt,
+            num_channels_latents,
+            height,
+            width,
+            prompt_embeds.dtype,
+            generator,
+            latents,
+        )
+
+        # 6.5 Optionally get Guidance Scale Embedding
+        timestep_cond = None
+        if self.unet.config.time_cond_proj_dim is not None:
+            guidance_scale_tensor = paddle.to_tensor([self.guidance_scale - 1]).tile(
+                [
+                    batch_size * num_images_per_prompt,
+                ]
+            )
+            timestep_cond = self.get_guidance_scale_embedding(
+                guidance_scale_tensor, embedding_dim=self.unet.config.time_cond_proj_dim
+            ).cast(dtype=latents.dtype)
+
+        # 7. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
+        extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
+
+        # 7.1 Create tensor stating which controlnets to keep
+        controlnet_keep = []
+        for i in range(len(timesteps)):
+            keeps = [
+                1.0 - float(i / len(timesteps) < s or (i + 1) / len(timesteps) > e)
+                for s, e in zip(control_guidance_start, control_guidance_end)
+            ]
+            controlnet_keep.append(keeps[0] if isinstance(controlnet, ControlNetModel) else keeps)
+
+        # 7.2 Prepare added time ids & embeddings
+        if isinstance(image, list):
+            original_size = original_size or tuple(image[0].shape[-2:])
+        else:
+            original_size = original_size or tuple(image.shape[-2:])
+        target_size = target_size or (height, width)
+
+        add_text_embeds = pooled_prompt_embeds
+        if self.text_encoder_2 is None:
+            text_encoder_projection_dim = int(pooled_prompt_embeds.shape[-1])
+        else:
+            text_encoder_projection_dim = self.text_encoder_2.config.projection_dim
+
+        add_time_ids = self._get_add_time_ids(
+            original_size,
+            crops_coords_top_left,
+            target_size,
+            dtype=prompt_embeds.dtype,
+            text_encoder_projection_dim=text_encoder_projection_dim,
+        )
+
+        if negative_original_size is not None and negative_target_size is not None:
+            negative_add_time_ids = self._get_add_time_ids(
+                negative_original_size,
+                negative_crops_coords_top_left,
+                negative_target_size,
+                dtype=prompt_embeds.dtype,
+                text_encoder_projection_dim=text_encoder_projection_dim,
+            )
+        else:
+            negative_add_time_ids = add_time_ids
+
+        if self.do_classifier_free_guidance:
+            prompt_embeds = paddle.concat([negative_prompt_embeds, prompt_embeds], axis=0)
+            add_text_embeds = paddle.concat([negative_pooled_prompt_embeds, add_text_embeds], axis=0)
+            add_time_ids = paddle.concat([negative_add_time_ids, add_time_ids], axis=0)
+
+        add_time_ids = add_time_ids.tile([batch_size * num_images_per_prompt, 1])
+
+        # 8. Denoising loop
+        num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order
+
+        with self.progress_bar(total=num_inference_steps) as progress_bar:
+            for i, t in enumerate(timesteps):
+                # expand the latents if we are doing classifier free guidance
+                latent_model_input = paddle.concat([latents] * 2) if self.do_classifier_free_guidance else latents
+                latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
+
+                added_cond_kwargs = {"text_embeds": add_text_embeds, "time_ids": add_time_ids}
+
+                # controlnet(s) inference
+                if guess_mode and self.do_classifier_free_guidance:
+                    # Infer ControlNet only for the conditional batch.
+                    control_model_input = latents
+                    control_model_input = self.scheduler.scale_model_input(control_model_input, t)
+                    controlnet_prompt_embeds = prompt_embeds.chunk(2)[1]
+                    controlnet_added_cond_kwargs = {
+                        "text_embeds": add_text_embeds.chunk(2)[1],
+                        "time_ids": add_time_ids.chunk(2)[1],
+                    }
+                else:
+                    control_model_input = latent_model_input
+                    controlnet_prompt_embeds = prompt_embeds
+                    controlnet_added_cond_kwargs = added_cond_kwargs
+
+                if isinstance(controlnet_keep[i], list):
+                    cond_scale = [c * s for c, s in zip(controlnet_conditioning_scale, controlnet_keep[i])]
+                else:
+                    controlnet_cond_scale = controlnet_conditioning_scale
+                    if isinstance(controlnet_cond_scale, list):
+                        controlnet_cond_scale = controlnet_cond_scale[0]
+                    cond_scale = controlnet_cond_scale * controlnet_keep[i]
+
+                down_block_res_samples, mid_block_res_sample = self.controlnet(
+                    control_model_input,
+                    t,
+                    encoder_hidden_states=controlnet_prompt_embeds,
+                    controlnet_cond=image,
+                    conditioning_scale=cond_scale,
+                    guess_mode=guess_mode,
+                    added_cond_kwargs=controlnet_added_cond_kwargs,
+                    return_dict=False,
+                )
+
+                if guess_mode and self.do_classifier_free_guidance:
+                    # Infered ControlNet only for the conditional batch.
+                    # To apply the output of ControlNet to both the unconditional and conditional batches,
+                    # add 0 to the unconditional batch to keep it unchanged.
+                    down_block_res_samples = [paddle.concat([paddle.zeros_like(d), d]) for d in down_block_res_samples]
+                    mid_block_res_sample = paddle.concat(
+                        [paddle.zeros_like(mid_block_res_sample), mid_block_res_sample]
+                    )
+
+                if ip_adapter_image is not None:
+                    added_cond_kwargs["image_embeds"] = image_embeds
+
+                # predict the noise residual
+                noise_pred = self.unet(
+                    latent_model_input,
+                    t,
+                    encoder_hidden_states=prompt_embeds,
+                    timestep_cond=timestep_cond,
+                    cross_attention_kwargs=self.cross_attention_kwargs,
+                    down_block_additional_residuals=down_block_res_samples,
+                    mid_block_additional_residual=mid_block_res_sample,
+                    added_cond_kwargs=added_cond_kwargs,
+                    return_dict=False,
+                )[0]
+
+                # perform guidance
+                if self.do_classifier_free_guidance:
+                    noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
+                    noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
+
+                # compute the previous noisy sample x_t -> x_t-1
+                latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs, return_dict=False)[0]
+
+                if callback_on_step_end is not None:
+                    callback_kwargs = {}
+                    for k in callback_on_step_end_tensor_inputs:
+                        callback_kwargs[k] = locals()[k]
+                    callback_outputs = callback_on_step_end(self, i, t, callback_kwargs)
+
+                    latents = callback_outputs.pop("latents", latents)
+                    prompt_embeds = callback_outputs.pop("prompt_embeds", prompt_embeds)
+                    negative_prompt_embeds = callback_outputs.pop("negative_prompt_embeds", negative_prompt_embeds)
+
+                # call the callback, if provided
+                if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
+                    progress_bar.update()
+                    if callback is not None and i % callback_steps == 0:
+                        step_idx = i // getattr(self.scheduler, "order", 1)
+                        callback(step_idx, t, latents)
+
+        # manually for max memory savings
+        # if self.vae.dtype in [paddle.float16, "float16"] and self.vae.config.force_upcast:
+        #     self.upcast_vae()
+        #     latents = latents.cast(dtype=next(iter(self.vae.post_quant_conv.named_parameters()))[1].dtype)
+
+        if not output_type == "latent":
+            # make sure the VAE is in float32 mode, as it overflows in float16
+            needs_upcasting = self.vae.dtype in [paddle.float16, "float16"] and self.vae.config.force_upcast
+
+            if needs_upcasting:
+                self.upcast_vae()
+                latents = latents.cast(dtype=next(iter(self.vae.post_quant_conv.named_parameters()))[1].dtype)
+
+            image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
+
+            # cast back to fp16 if needed
+            if needs_upcasting:
+                self.vae.to(dtype=paddle.float16)
+        else:
+            image = latents
+
+        if not output_type == "latent":
+            # apply watermark if available
+            if self.watermark is not None:
+                image = self.watermark.apply_watermark(image)
+
+            image = self.image_processor.postprocess(image, output_type=output_type)
+
+        if not return_dict:
+            return (image,)
+
+        return StableDiffusionXLPipelineOutput(images=image)
diff --git a/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/pipelines/dit/__init__.py b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/pipelines/dit/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..62eae6ad873171dc0f578593d9dabb88271c519f
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/pipelines/dit/__init__.py
@@ -0,0 +1,32 @@
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import TYPE_CHECKING
+
+from ...utils import PPDIFFUSERS_SLOW_IMPORT, _LazyModule
+
+_import_structure = {"pipeline_dit": ["DiTPipeline"]}
+
+if TYPE_CHECKING or PPDIFFUSERS_SLOW_IMPORT:
+    from .pipeline_dit import DiTPipeline
+
+else:
+    import sys
+
+    sys.modules[__name__] = _LazyModule(
+        __name__,
+        globals()["__file__"],
+        _import_structure,
+        module_spec=__spec__,
+    )
diff --git a/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/schedulers/deprecated/__init__.py b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/schedulers/deprecated/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..6ab67b337fc3f0f4ab5030f5ea3f81110734f191
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/schedulers/deprecated/__init__.py
@@ -0,0 +1,63 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import TYPE_CHECKING
+
+from ...utils import (
+    PPDIFFUSERS_SLOW_IMPORT,
+    OptionalDependencyNotAvailable,
+    _LazyModule,
+    get_objects_from_module,
+    is_paddle_available,
+    is_paddlenlp_available,
+)
+
+_dummy_objects = {}
+_import_structure = {}
+
+try:
+    if not (is_paddlenlp_available() and is_paddle_available()):
+        raise OptionalDependencyNotAvailable()
+except OptionalDependencyNotAvailable:
+    from ...utils import dummy_paddle_objects  # noqa F403
+
+    _dummy_objects.update(get_objects_from_module(dummy_paddle_objects))
+else:
+    _import_structure["scheduling_karras_ve"] = ["KarrasVeScheduler"]
+    _import_structure["scheduling_sde_vp"] = ["ScoreSdeVpScheduler"]
+
+if TYPE_CHECKING or PPDIFFUSERS_SLOW_IMPORT:
+    try:
+        if not is_paddle_available():
+            raise OptionalDependencyNotAvailable()
+
+    except OptionalDependencyNotAvailable:
+        from ..utils.dummy_pd_objects import *  # noqa F403
+    else:
+        from .scheduling_karras_ve import KarrasVeScheduler
+        from .scheduling_sde_vp import ScoreSdeVpScheduler
+
+
+else:
+    import sys
+
+    sys.modules[__name__] = _LazyModule(
+        __name__,
+        globals()["__file__"],
+        _import_structure,
+        module_spec=__spec__,
+    )
+
+    for name, value in _dummy_objects.items():
+        setattr(sys.modules[__name__], name, value)
diff --git a/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/schedulers/deprecated/scheduling_karras_ve.py b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/schedulers/deprecated/scheduling_karras_ve.py
new file mode 100644
index 0000000000000000000000000000000000000000..3d50991869b7224e45d57950da5f44dc369adeac
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/schedulers/deprecated/scheduling_karras_ve.py
@@ -0,0 +1,243 @@
+# Copyright 2023 NVIDIA and The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from dataclasses import dataclass
+from typing import Optional, Tuple, Union
+
+import numpy as np
+import paddle
+
+from ...configuration_utils import ConfigMixin, register_to_config
+from ...utils import BaseOutput
+from ...utils.paddle_utils import randn_tensor
+from ..scheduling_utils import SchedulerMixin
+
+
+@dataclass
+class KarrasVeOutput(BaseOutput):
+    """
+    Output class for the scheduler's step function output.
+
+    Args:
+        prev_sample (`paddle.Tensor` of shape `(batch_size, num_channels, height, width)` for images):
+            Computed sample (x_{t-1}) of previous timestep. `prev_sample` should be used as next model input in the
+            denoising loop.
+        derivative (`paddle.Tensor` of shape `(batch_size, num_channels, height, width)` for images):
+            Derivative of predicted original image sample (x_0).
+        pred_original_sample (`paddle.Tensor` of shape `(batch_size, num_channels, height, width)` for images):
+            The predicted denoised sample (x_{0}) based on the model output from the current timestep.
+            `pred_original_sample` can be used to preview progress or for guidance.
+    """
+
+    prev_sample: paddle.Tensor
+    derivative: paddle.Tensor
+    pred_original_sample: Optional[paddle.Tensor] = None
+
+
+class KarrasVeScheduler(SchedulerMixin, ConfigMixin):
+    """
+    A stochastic scheduler tailored to variance-expanding models.
+
+    This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic
+    methods the library implements for all schedulers such as loading and saving.
+
+    <Tip>
+
+    For more details on the parameters, see [Appendix E](https://arxiv.org/abs/2206.00364). The grid search values used
+    to find the optimal `{s_noise, s_churn, s_min, s_max}` for a specific model are described in Table 5 of the paper.
+
+    </Tip>
+
+    Args:
+        sigma_min (`float`, defaults to 0.02):
+            The minimum noise magnitude.
+        sigma_max (`float`, defaults to 100):
+            The maximum noise magnitude.
+        s_noise (`float`, defaults to 1.007):
+            The amount of additional noise to counteract loss of detail during sampling. A reasonable range is [1.000,
+            1.011].
+        s_churn (`float`, defaults to 80):
+            The parameter controlling the overall amount of stochasticity. A reasonable range is [0, 100].
+        s_min (`float`, defaults to 0.05):
+            The start value of the sigma range to add noise (enable stochasticity). A reasonable range is [0, 10].
+        s_max (`float`, defaults to 50):
+            The end value of the sigma range to add noise. A reasonable range is [0.2, 80].
+    """
+
+    order = 2
+
+    @register_to_config
+    def __init__(
+        self,
+        sigma_min: float = 0.02,
+        sigma_max: float = 100,
+        s_noise: float = 1.007,
+        s_churn: float = 80,
+        s_min: float = 0.05,
+        s_max: float = 50,
+    ):
+        # standard deviation of the initial noise distribution
+        self.init_noise_sigma = sigma_max
+
+        # setable values
+        self.num_inference_steps: int = None
+        self.timesteps: paddle.Tensor = None
+        self.schedule: paddle.Tensor = None  # sigma(t_i)
+
+    def scale_model_input(self, sample: paddle.Tensor, timestep: Optional[int] = None) -> paddle.Tensor:
+        """
+        Ensures interchangeability with schedulers that need to scale the denoising model input depending on the
+        current timestep.
+
+        Args:
+            sample (`paddle.Tensor`):
+                The input sample.
+            timestep (`int`, *optional*):
+                The current timestep in the diffusion chain.
+
+        Returns:
+            `paddle.Tensor`:
+                A scaled input sample.
+        """
+        return sample
+
+    def set_timesteps(self, num_inference_steps: int):
+        """
+        Sets the discrete timesteps used for the diffusion chain (to be run before inference).
+
+        Args:
+            num_inference_steps (`int`):
+                The number of diffusion steps used when generating samples with a pre-trained model.
+            device (`str` or `torch.device`, *optional*):
+                The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
+        """
+        self.num_inference_steps = num_inference_steps
+        timesteps = np.arange(0, self.num_inference_steps)[::-1].copy()
+        self.timesteps = paddle.to_tensor(timesteps)
+        schedule = [
+            (
+                self.config.sigma_max**2
+                * (self.config.sigma_min**2 / self.config.sigma_max**2) ** (i / (num_inference_steps - 1))
+            )
+            for i in self.timesteps
+        ]
+        self.schedule = paddle.to_tensor(schedule, dtype=paddle.float32)
+
+    def add_noise_to_input(
+        self, sample: paddle.Tensor, sigma: float, generator: Optional[paddle.Generator] = None
+    ) -> Tuple[paddle.Tensor, float]:
+        """
+        Explicit Langevin-like "churn" step of adding noise to the sample according to a `gamma_i ≥ 0` to reach a
+        higher noise level `sigma_hat = sigma_i + gamma_i*sigma_i`.
+
+        Args:
+            sample (`paddle.Tensor`):
+                The input sample.
+            sigma (`float`):
+            generator (`paddle.Generator`, *optional*):
+                A random number generator.
+        """
+        if self.config.s_min <= sigma <= self.config.s_max:
+            gamma = min(self.config.s_churn / self.num_inference_steps, 2**0.5 - 1)
+        else:
+            gamma = 0
+
+        # sample eps ~ N(0, S_noise^2 * I)
+        eps = self.config.s_noise * randn_tensor(sample.shape, generator=generator)
+        sigma_hat = sigma + gamma * sigma
+        sample_hat = sample + ((sigma_hat**2 - sigma**2) ** 0.5 * eps)
+
+        return sample_hat, sigma_hat
+
+    def step(
+        self,
+        model_output: paddle.Tensor,
+        sigma_hat: float,
+        sigma_prev: float,
+        sample_hat: paddle.Tensor,
+        return_dict: bool = True,
+    ) -> Union[KarrasVeOutput, Tuple]:
+        """
+        Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion
+        process from the learned model outputs (most often the predicted noise).
+
+        Args:
+            model_output (`paddle.Tensor`):
+                The direct output from learned diffusion model.
+            sigma_hat (`float`):
+            sigma_prev (`float`):
+            sample_hat (`paddle.Tensor`):
+            return_dict (`bool`, *optional*, defaults to `True`):
+                Whether or not to return a [`~schedulers.scheduling_karras_ve.KarrasVESchedulerOutput`] or `tuple`.
+
+        Returns:
+            [`~schedulers.scheduling_karras_ve.KarrasVESchedulerOutput`] or `tuple`:
+                If return_dict is `True`, [`~schedulers.scheduling_karras_ve.KarrasVESchedulerOutput`] is returned,
+                otherwise a tuple is returned where the first element is the sample tensor.
+
+        """
+
+        pred_original_sample = sample_hat + sigma_hat * model_output
+        derivative = (sample_hat - pred_original_sample) / sigma_hat
+        sample_prev = sample_hat + (sigma_prev - sigma_hat) * derivative
+
+        if not return_dict:
+            return (sample_prev, derivative)
+
+        return KarrasVeOutput(
+            prev_sample=sample_prev, derivative=derivative, pred_original_sample=pred_original_sample
+        )
+
+    def step_correct(
+        self,
+        model_output: paddle.Tensor,
+        sigma_hat: float,
+        sigma_prev: float,
+        sample_hat: paddle.Tensor,
+        sample_prev: paddle.Tensor,
+        derivative: paddle.Tensor,
+        return_dict: bool = True,
+    ) -> Union[KarrasVeOutput, Tuple]:
+        """
+        Corrects the predicted sample based on the `model_output` of the network.
+
+        Args:
+            model_output (`paddle.Tensor`):
+                The direct output from learned diffusion model.
+            sigma_hat (`float`): TODO
+            sigma_prev (`float`): TODO
+            sample_hat (`paddle.Tensor`): TODO
+            sample_prev (`paddle.Tensor`): TODO
+            derivative (`paddle.Tensor`): TODO
+            return_dict (`bool`, *optional*, defaults to `True`):
+                Whether or not to return a [`~schedulers.scheduling_ddpm.DDPMSchedulerOutput`] or `tuple`.
+
+        Returns:
+            prev_sample (TODO): updated sample in the diffusion chain. derivative (TODO): TODO
+
+        """
+        pred_original_sample = sample_prev + sigma_prev * model_output
+        derivative_corr = (sample_prev - pred_original_sample) / sigma_prev
+        sample_prev = sample_hat + (sigma_prev - sigma_hat) * (0.5 * derivative + 0.5 * derivative_corr)
+
+        if not return_dict:
+            return (sample_prev, derivative)
+
+        return KarrasVeOutput(
+            prev_sample=sample_prev, derivative=derivative, pred_original_sample=pred_original_sample
+        )
+
+    def add_noise(self, original_samples, noise, timesteps):
+        raise NotImplementedError()
diff --git a/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/schedulers/deprecated/scheduling_sde_vp.py b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/schedulers/deprecated/scheduling_sde_vp.py
new file mode 100644
index 0000000000000000000000000000000000000000..c63036c9f4b894e8ca1f33701b9082e0606ff52e
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/schedulers/deprecated/scheduling_sde_vp.py
@@ -0,0 +1,110 @@
+# Copyright 2023 Google Brain and The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# DISCLAIMER: This file is strongly influenced by https://github.com/yang-song/score_sde_pytorch
+
+import math
+
+import paddle
+
+from ...configuration_utils import ConfigMixin, register_to_config
+from ...utils.paddle_utils import randn_tensor
+from ..scheduling_utils import SchedulerMixin
+
+
+class ScoreSdeVpScheduler(SchedulerMixin, ConfigMixin):
+    """
+    `ScoreSdeVpScheduler` is a variance preserving stochastic differential equation (SDE) scheduler.
+
+    This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic
+    methods the library implements for all schedulers such as loading and saving.
+
+    Args:
+        num_train_timesteps (`int`, defaults to 2000):
+            The number of diffusion steps to train the model.
+        beta_min (`int`, defaults to 0.1):
+        beta_max (`int`, defaults to 20):
+        sampling_eps (`int`, defaults to 1e-3):
+            The end value of sampling where timesteps decrease progressively from 1 to epsilon.
+    """
+
+    order = 1
+
+    @register_to_config
+    def __init__(self, num_train_timesteps=2000, beta_min=0.1, beta_max=20, sampling_eps=1e-3):
+        self.sigmas = None
+        self.discrete_sigmas = None
+        self.timesteps = None
+
+    def set_timesteps(self, num_inference_steps):
+        """
+        Sets the continuous timesteps used for the diffusion chain (to be run before inference).
+
+        Args:
+            num_inference_steps (`int`):
+                The number of diffusion steps used when generating samples with a pre-trained model.
+            device (`str` or `torch.device`, *optional*):
+                The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
+        """
+        self.timesteps = paddle.linspace(1, self.config.sampling_eps, num_inference_steps)
+
+    def step_pred(self, score, x, t, generator=None):
+        """
+        Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion
+        process from the learned model outputs (most often the predicted noise).
+
+        Args:
+            score ():
+            x ():
+            t ():
+            generator (`paddle.Generator`, *optional*):
+                A random number generator.
+        """
+        if self.timesteps is None:
+            raise ValueError(
+                "`self.timesteps` is not set, you need to run 'set_timesteps' after creating the scheduler"
+            )
+
+        # TODO(Patrick) better comments + non-Paddle
+        # postprocess model score
+        log_mean_coeff = (
+            -0.25 * t**2 * (self.config.beta_max - self.config.beta_min) - 0.5 * t * self.config.beta_min
+        )
+        std = paddle.sqrt(1.0 - paddle.exp(2.0 * log_mean_coeff))
+        std = std.flatten()
+        while len(std.shape) < len(score.shape):
+            std = std.unsqueeze(-1)
+        score = -score / std
+
+        # compute
+        dt = -1.0 / len(self.timesteps)
+
+        beta_t = self.config.beta_min + t * (self.config.beta_max - self.config.beta_min)
+        beta_t = beta_t.flatten()
+        while len(beta_t.shape) < len(x.shape):
+            beta_t = beta_t.unsqueeze(-1)
+        drift = -0.5 * beta_t * x
+
+        diffusion = paddle.sqrt(beta_t)
+        drift = drift - diffusion**2 * score
+        x_mean = x + drift * dt
+
+        # add noise
+        noise = randn_tensor(x.shape, generator=generator, dtype=x.dtype)
+        x = x_mean + diffusion * math.sqrt(-dt) * noise
+
+        return x, x_mean
+
+    def __len__(self):
+        return self.config.num_train_timesteps
diff --git a/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/schedulers/preconfig/__init__.py b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/schedulers/preconfig/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..ecff93753b32dea4e0625006b6d457681611a8d6
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/schedulers/preconfig/__init__.py
@@ -0,0 +1,38 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+# Copyright 2023 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# flake8: noqa
+
+from ...utils import (
+    OptionalDependencyNotAvailable,
+    is_paddle_available,
+    is_scipy_available,
+)
+
+try:
+    if not is_paddle_available():
+        raise OptionalDependencyNotAvailable()
+except OptionalDependencyNotAvailable:
+    from ...utils.dummy_paddle_objects import *  # noqa F403
+else:
+    from .preconfig_scheduling_euler_ancestral_discrete import (
+        PreconfigEulerAncestralDiscreteScheduler,
+    )
+try:
+    if not (is_paddle_available() and is_scipy_available()):
+        raise OptionalDependencyNotAvailable()
+except OptionalDependencyNotAvailable:
+    from ...utils.dummy_paddle_and_scipy_objects import *  # noqa F403
+else:
+    from .preconfig_scheduling_lms_discrete import PreconfigLMSDiscreteScheduler
diff --git a/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/schedulers/preconfig/preconfig_scheduling_euler_ancestral_discrete.py b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/schedulers/preconfig/preconfig_scheduling_euler_ancestral_discrete.py
new file mode 100644
index 0000000000000000000000000000000000000000..b45428de9e12c35acf25d98c53df0f773e1e5ed1
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/schedulers/preconfig/preconfig_scheduling_euler_ancestral_discrete.py
@@ -0,0 +1,313 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+# Copyright 2022 Katherine Crowson and The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import math
+from dataclasses import dataclass
+from typing import List, Optional, Tuple, Union
+
+import numpy as np
+import paddle
+
+from ...configuration_utils import ConfigMixin, register_to_config
+from ...utils import BaseOutput, logging, randn_tensor
+from ..scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin
+
+logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
+
+
+@dataclass
+# Copied from ppdiffusers.schedulers.scheduling_ddpm.DDPMSchedulerOutput with DDPM->EulerAncestralDiscrete
+class PreconfigEulerAncestralDiscreteSchedulerOutput(BaseOutput):
+    """
+    Output class for the scheduler's step function output.
+
+    Args:
+        prev_sample (`paddle.Tensor` of shape `(batch_size, num_channels, height, width)` for images):
+            Computed sample (x_{t-1}) of previous timestep. `prev_sample` should be used as next model input in the
+            denoising loop.
+        pred_original_sample (`paddle.Tensor` of shape `(batch_size, num_channels, height, width)` for images):
+            The predicted denoised sample (x_{0}) based on the model output from the current timestep.
+            `pred_original_sample` can be used to preview progress or for guidance.
+    """
+
+    prev_sample: paddle.Tensor
+    pred_original_sample: Optional[paddle.Tensor] = None
+
+
+# Copied from ppdiffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
+def betas_for_alpha_bar(num_diffusion_timesteps, max_beta=0.999) -> paddle.Tensor:
+    """
+    Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
+    (1-beta) over time from t = [0,1].
+
+    Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up
+    to that part of the diffusion process.
+
+
+    Args:
+        num_diffusion_timesteps (`int`): the number of betas to produce.
+        max_beta (`float`): the maximum beta to use; use values lower than 1 to
+                     prevent singularities.
+
+    Returns:
+        betas (`np.ndarray`): the betas used by the scheduler to step the model outputs
+    """
+
+    def alpha_bar(time_step):
+        return math.cos((time_step + 0.008) / 1.008 * math.pi / 2) ** 2
+
+    betas = []
+    for i in range(num_diffusion_timesteps):
+        t1 = i / num_diffusion_timesteps
+        t2 = (i + 1) / num_diffusion_timesteps
+        betas.append(min(1 - alpha_bar(t2) / alpha_bar(t1), max_beta))
+    return paddle.to_tensor(betas, dtype=paddle.float32)
+
+
+class PreconfigEulerAncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
+    """
+    Ancestral sampling with Euler method steps. Based on the original k-diffusion implementation by Katherine Crowson:
+    https://github.com/crowsonkb/k-diffusion/blob/481677d114f6ea445aa009cf5bd7a9cdee909e47/k_diffusion/sampling.py#L72
+
+    [`~ConfigMixin`] takes care of storing all config attributes that are passed in the scheduler's `__init__`
+    function, such as `num_train_timesteps`. They can be accessed via `scheduler.config.num_train_timesteps`.
+    [`SchedulerMixin`] provides general loading and saving functionality via the [`SchedulerMixin.save_pretrained`] and
+    [`~SchedulerMixin.from_pretrained`] functions.
+
+    Args:
+        num_train_timesteps (`int`): number of diffusion steps used to train the model.
+        beta_start (`float`): the starting `beta` value of inference.
+        beta_end (`float`): the final `beta` value.
+        beta_schedule (`str`):
+            the beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from
+            `linear` or `scaled_linear`.
+        trained_betas (`np.ndarray`, optional):
+            option to pass an array of betas directly to the constructor to bypass `beta_start`, `beta_end` etc.
+        prediction_type (`str`, default `epsilon`, optional):
+            prediction type of the scheduler function, one of `epsilon` (predicting the noise of the diffusion
+            process), `sample` (directly predicting the noisy sample`) or `v_prediction` (see section 2.4
+            https://imagen.research.google/video/paper.pdf)
+
+    """
+
+    _compatibles = [e.name for e in KarrasDiffusionSchedulers]
+    order = 1
+
+    @register_to_config
+    def __init__(
+        self,
+        num_train_timesteps: int = 1000,
+        beta_start: float = 0.0001,
+        beta_end: float = 0.02,
+        beta_schedule: str = "linear",
+        trained_betas: Optional[Union[np.ndarray, List[float]]] = None,
+        prediction_type: str = "epsilon",
+        preconfig: bool = True,
+    ):
+        if trained_betas is not None:
+            self.betas = paddle.to_tensor(trained_betas, dtype=paddle.float32)
+        elif beta_schedule == "linear":
+            self.betas = paddle.linspace(beta_start, beta_end, num_train_timesteps, dtype=paddle.float32)
+        elif beta_schedule == "scaled_linear":
+            # this schedule is very specific to the latent diffusion model.
+            self.betas = (
+                paddle.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=paddle.float32) ** 2
+            )
+        elif beta_schedule == "squaredcos_cap_v2":
+            # Glide cosine schedule
+            self.betas = betas_for_alpha_bar(num_train_timesteps)
+        else:
+            raise NotImplementedError(f"{beta_schedule} does is not implemented for {self.__class__}")
+
+        self.alphas = 1.0 - self.betas
+        self.alphas_cumprod = paddle.cumprod(self.alphas, 0)
+
+        sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5)
+        sigmas = np.concatenate([sigmas[::-1], [0.0]]).astype(np.float32)
+        self.sigmas = paddle.to_tensor(sigmas)
+
+        # standard deviation of the initial noise distribution
+        self.init_noise_sigma = self.sigmas.max()
+
+        # setable values
+        self.num_inference_steps = None
+        timesteps = np.linspace(0, num_train_timesteps - 1, num_train_timesteps, dtype=float)[::-1].copy()
+        self.timesteps = paddle.to_tensor(timesteps, dtype=paddle.float32)
+        self.is_scale_input_called = False
+        self.preconfig = preconfig
+        self.step_index_offset = 0
+
+    def scale_model_input(
+        self, sample: paddle.Tensor, timestep: Union[float, paddle.Tensor], **kwargs
+    ) -> paddle.Tensor:
+        """
+        Scales the denoising model input by `(sigma**2 + 1) ** 0.5` to match the Euler algorithm.
+
+        Args:
+            sample (`paddle.Tensor`): input sample
+            timestep (`float` or `paddle.Tensor`): the current timestep in the diffusion chain
+
+        Returns:
+            `paddle.Tensor`: scaled input sample
+        """
+        self.is_scale_input_called = True
+        if kwargs.get("step_index") is not None:
+            step_index = kwargs["step_index"] + self.step_index_offset
+        else:
+            step_index = (self.timesteps == timestep).nonzero().item()
+
+        if not self.preconfig:
+            sigma = self.sigmas[step_index]
+            sample = sample / ((sigma**2 + 1) ** 0.5)
+            return sample
+        else:
+            if step_index > (len(self.latent_scales) - 1):
+                step_index = -1
+            return sample * self.latent_scales[step_index]
+
+    def set_timesteps(self, num_inference_steps: int):
+        """
+        Sets the timesteps used for the diffusion chain. Supporting function to be run before inference.
+
+        Args:
+            num_inference_steps (`int`):
+                the number of diffusion steps used when generating samples with a pre-trained model.
+        """
+        self.num_inference_steps = num_inference_steps
+        self.step_index_offset = 0
+
+        timesteps = np.linspace(0, self.config.num_train_timesteps - 1, num_inference_steps, dtype=float)[::-1].copy()
+        sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5)
+        sigmas = np.interp(timesteps, np.arange(0, len(sigmas)), sigmas)
+        sigmas = np.concatenate([sigmas, [0.0]]).astype(np.float32)
+        self.sigmas = paddle.to_tensor(sigmas)
+        self.timesteps = paddle.to_tensor(timesteps, dtype=paddle.float32)
+        if self.preconfig:
+            self.sigma_up = []
+            self.sigma_down = []
+            for step_index_i in range(len(self.timesteps)):
+                sigma_from = self.sigmas[step_index_i]
+                sigma_to = self.sigmas[step_index_i + 1]
+                sigma_up = (sigma_to**2 * (sigma_from**2 - sigma_to**2) / sigma_from**2) ** 0.5
+                sigma_down = (sigma_to**2 - sigma_up**2) ** 0.5
+                self.sigma_up.append(sigma_up)
+                self.sigma_down.append(sigma_down)
+            self.latent_scales = 1 / ((self.sigmas**2 + 1) ** 0.5)
+
+    def step(
+        self,
+        model_output: paddle.Tensor,
+        timestep: Union[float, paddle.Tensor],
+        sample: paddle.Tensor,
+        generator: Optional[Union[paddle.Generator, List[paddle.Generator]]] = None,
+        return_dict: bool = True,
+        **kwargs
+    ) -> Union[PreconfigEulerAncestralDiscreteSchedulerOutput, Tuple]:
+        """
+        Predict the sample at the previous timestep by reversing the SDE. Core function to propagate the diffusion
+        process from the learned model outputs (most often the predicted noise).
+
+        Args:
+            model_output (`paddle.Tensor`): direct output from learned diffusion model.
+            timestep (`float`): current timestep in the diffusion chain.
+            sample (`paddle.Tensor`):
+                current instance of sample being created by diffusion process.
+            generator (`paddle.Generator`, optional): Random number generator.
+            return_dict (`bool`): option for returning tuple rather than PreconfigEulerAncestralDiscreteSchedulerOutput class
+
+        Returns:
+            [`~schedulers.scheduling_utils.PreconfigEulerAncestralDiscreteSchedulerOutput`] or `tuple`:
+            [`~schedulers.scheduling_utils.PreconfigEulerAncestralDiscreteSchedulerOutput`] if `return_dict` is True, otherwise
+            a `tuple`. When returning a tuple, the first element is the sample tensor.
+
+        """
+        if not self.is_scale_input_called:
+            logger.warning(
+                "The `scale_model_input` function should be called before `step` to ensure correct denoising. "
+                "See `StableDiffusionPipeline` for a usage example."
+            )
+        if kwargs.get("return_pred_original_sample") is not None:
+            return_pred_original_sample = kwargs["return_pred_original_sample"]
+        else:
+            return_pred_original_sample = True
+        if kwargs.get("step_index") is not None:
+            step_index = kwargs["step_index"] + self.step_index_offset
+        else:
+            step_index = (self.timesteps == timestep).nonzero().item()
+        sigma = self.sigmas[step_index]
+        if self.config.prediction_type == "epsilon" and not return_pred_original_sample:
+            derivative = model_output
+            pred_original_sample = None
+        else:
+            # 1. compute predicted original sample (x_0) from sigma-scaled predicted noise
+            if self.config.prediction_type == "epsilon":
+                pred_original_sample = sample - sigma * model_output
+            elif self.config.prediction_type == "v_prediction":
+                # * c_out + input * c_skip
+                pred_original_sample = model_output * (-sigma / (sigma**2 + 1) ** 0.5) + (sample / (sigma**2 + 1))
+            elif self.config.prediction_type == "sample":
+                raise NotImplementedError("prediction_type not implemented yet: sample")
+            else:
+                raise ValueError(
+                    f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, or `v_prediction`"
+                )
+            derivative = (sample - pred_original_sample) / sigma
+        if not self.preconfig:
+            sigma_from = self.sigmas[step_index]
+            sigma_to = self.sigmas[step_index + 1]
+            sigma_up = (sigma_to**2 * (sigma_from**2 - sigma_to**2) / sigma_from**2) ** 0.5
+            sigma_down = (sigma_to**2 - sigma_up**2) ** 0.5
+        else:
+            sigma_up = self.sigma_up[step_index]
+            sigma_down = self.sigma_down[step_index]
+        # 2. Convert to an ODE derivative
+        dt = sigma_down - sigma
+        prev_sample = sample + derivative * dt
+        noise = randn_tensor(model_output.shape, dtype=model_output.dtype, generator=generator)
+        prev_sample = prev_sample + noise * sigma_up
+        if not return_dict:
+            if not return_pred_original_sample:
+                return (prev_sample,)
+            else:
+                return (prev_sample, pred_original_sample)
+
+        return PreconfigEulerAncestralDiscreteSchedulerOutput(
+            prev_sample=prev_sample, pred_original_sample=pred_original_sample
+        )
+
+    def add_noise(
+        self,
+        original_samples: paddle.Tensor,
+        noise: paddle.Tensor,
+        timesteps: paddle.Tensor,
+    ) -> paddle.Tensor:
+        # Fix 0D tensor
+        if paddle.is_tensor(timesteps) and timesteps.ndim == 0:
+            timesteps = timesteps.unsqueeze(0)
+        # Make sure sigmas and timesteps have the same dtype as original_samples
+        self.sigmas = self.sigmas.cast(original_samples.dtype)
+
+        schedule_timesteps = self.timesteps
+        step_indices = [(schedule_timesteps == t).nonzero().item() for t in timesteps]
+
+        sigma = self.sigmas[step_indices].flatten()
+        while len(sigma.shape) < len(original_samples.shape):
+            sigma = sigma.unsqueeze(-1)
+
+        noisy_samples = original_samples + noise * sigma
+        return noisy_samples
+
+    def __len__(self):
+        return self.config.num_train_timesteps
diff --git a/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/schedulers/preconfig/preconfig_scheduling_lms_discrete.py b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/schedulers/preconfig/preconfig_scheduling_lms_discrete.py
new file mode 100644
index 0000000000000000000000000000000000000000..450dcb635843e07edd7737d2230b9c6ab7502cd3
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/schedulers/preconfig/preconfig_scheduling_lms_discrete.py
@@ -0,0 +1,340 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+# Copyright 2022 Katherine Crowson and The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import math
+import warnings
+from dataclasses import dataclass
+from typing import List, Optional, Tuple, Union
+
+import numpy as np
+import paddle
+from scipy import integrate
+
+from ...configuration_utils import ConfigMixin, register_to_config
+from ...utils import BaseOutput
+from ..scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin
+
+
+@dataclass
+# Copied from ppdiffusers.schedulers.scheduling_ddpm.DDPMSchedulerOutput with DDPM->LMSDiscrete
+class PreconfigLMSDiscreteSchedulerOutput(BaseOutput):
+    """
+    Output class for the scheduler's step function output.
+
+    Args:
+        prev_sample (`paddle.Tensor` of shape `(batch_size, num_channels, height, width)` for images):
+            Computed sample (x_{t-1}) of previous timestep. `prev_sample` should be used as next model input in the
+            denoising loop.
+        pred_original_sample (`paddle.Tensor` of shape `(batch_size, num_channels, height, width)` for images):
+            The predicted denoised sample (x_{0}) based on the model output from the current timestep.
+            `pred_original_sample` can be used to preview progress or for guidance.
+    """
+
+    prev_sample: paddle.Tensor
+    pred_original_sample: Optional[paddle.Tensor] = None
+
+
+# Copied from ppdiffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
+def betas_for_alpha_bar(num_diffusion_timesteps, max_beta=0.999):
+    """
+    Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
+    (1-beta) over time from t = [0,1].
+
+    Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up
+    to that part of the diffusion process.
+
+
+    Args:
+        num_diffusion_timesteps (`int`): the number of betas to produce.
+        max_beta (`float`): the maximum beta to use; use values lower than 1 to
+                     prevent singularities.
+
+    Returns:
+        betas (`np.ndarray`): the betas used by the scheduler to step the model outputs
+    """
+
+    def alpha_bar(time_step):
+        return math.cos((time_step + 0.008) / 1.008 * math.pi / 2) ** 2
+
+    betas = []
+    for i in range(num_diffusion_timesteps):
+        t1 = i / num_diffusion_timesteps
+        t2 = (i + 1) / num_diffusion_timesteps
+        betas.append(min(1 - alpha_bar(t2) / alpha_bar(t1), max_beta))
+    return paddle.to_tensor(betas, dtype=paddle.float32)
+
+
+class PreconfigLMSDiscreteScheduler(SchedulerMixin, ConfigMixin):
+    """
+    Linear Multistep Scheduler for discrete beta schedules. Based on the original k-diffusion implementation by
+    Katherine Crowson:
+    https://github.com/crowsonkb/k-diffusion/blob/481677d114f6ea445aa009cf5bd7a9cdee909e47/k_diffusion/sampling.py#L181
+
+    [`~ConfigMixin`] takes care of storing all config attributes that are passed in the scheduler's `__init__`
+    function, such as `num_train_timesteps`. They can be accessed via `scheduler.config.num_train_timesteps`.
+    [`SchedulerMixin`] provides general loading and saving functionality via the [`SchedulerMixin.save_pretrained`] and
+    [`~SchedulerMixin.from_pretrained`] functions.
+
+    Args:
+        num_train_timesteps (`int`): number of diffusion steps used to train the model.
+        beta_start (`float`): the starting `beta` value of inference.
+        beta_end (`float`): the final `beta` value.
+        beta_schedule (`str`):
+            the beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from
+            `linear` or `scaled_linear`.
+        trained_betas (`np.ndarray`, optional):
+            option to pass an array of betas directly to the constructor to bypass `beta_start`, `beta_end` etc.
+        prediction_type (`str`, default `epsilon`, optional):
+            prediction type of the scheduler function, one of `epsilon` (predicting the noise of the diffusion
+            process), `sample` (directly predicting the noisy sample`) or `v_prediction` (see section 2.4
+            https://imagen.research.google/video/paper.pdf)
+    """
+
+    _compatibles = [e.name for e in KarrasDiffusionSchedulers]
+    order = 1
+
+    @register_to_config
+    def __init__(
+        self,
+        num_train_timesteps: int = 1000,
+        beta_start: float = 0.0001,
+        beta_end: float = 0.02,
+        beta_schedule: str = "linear",
+        trained_betas: Optional[Union[np.ndarray, List[float]]] = None,
+        prediction_type: str = "epsilon",
+        preconfig=True,
+    ):
+        if trained_betas is not None:
+            self.betas = paddle.to_tensor(trained_betas, dtype=paddle.float32)
+        elif beta_schedule == "linear":
+            self.betas = paddle.linspace(beta_start, beta_end, num_train_timesteps, dtype=paddle.float32)
+        elif beta_schedule == "scaled_linear":
+            # this schedule is very specific to the latent diffusion model.
+            self.betas = (
+                paddle.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=paddle.float32) ** 2
+            )
+        elif beta_schedule == "squaredcos_cap_v2":
+            # Glide cosine schedule
+            self.betas = betas_for_alpha_bar(num_train_timesteps)
+        else:
+            raise NotImplementedError(f"{beta_schedule} does is not implemented for {self.__class__}")
+
+        self.alphas = 1.0 - self.betas
+        self.alphas_cumprod = paddle.cumprod(self.alphas, 0)
+
+        sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5)
+        sigmas = np.concatenate([sigmas[::-1], [0.0]]).astype(np.float32)
+        self.sigmas = paddle.to_tensor(sigmas)
+
+        # standard deviation of the initial noise distribution
+        self.init_noise_sigma = self.sigmas.max()
+
+        # setable values
+        self.num_inference_steps = None
+        timesteps = np.linspace(0, num_train_timesteps - 1, num_train_timesteps, dtype=float)[::-1].copy()
+        self.timesteps = paddle.to_tensor(timesteps, dtype=paddle.float32)
+        self.derivatives = []
+        self.is_scale_input_called = False
+        self.preconfig = preconfig
+
+    def scale_model_input(
+        self, sample: paddle.Tensor, timestep: Union[float, paddle.Tensor], **kwargs
+    ) -> paddle.Tensor:
+        """
+        Scales the denoising model input by `(sigma**2 + 1) ** 0.5` to match the K-LMS algorithm.
+
+        Args:
+            sample (`paddle.Tensor`): input sample
+            timestep (`float` or `paddle.Tensor`): the current timestep in the diffusion chain
+
+        Returns:
+            `paddle.Tensor`: scaled input sample
+        """
+        if kwargs.get("step_index") is not None:
+            step_index = kwargs["step_index"]
+        else:
+            step_index = (self.timesteps == timestep).nonzero().item()
+        self.is_scale_input_called = True
+        if not self.preconfig:
+            sigma = self.sigmas[step_index]
+            sample = sample / ((sigma**2 + 1) ** 0.5)
+            return sample
+        else:
+            return sample * self.latent_scales[step_index]
+
+    def get_lms_coefficient(self, order, t, current_order):
+        """
+        Compute a linear multistep coefficient.
+
+        Args:
+            order (TODO):
+            t (TODO):
+            current_order (TODO):
+        """
+
+        def lms_derivative(tau):
+            prod = 1.0
+            for k in range(order):
+                if current_order == k:
+                    continue
+                prod *= (tau - self.sigmas[t - k]) / (self.sigmas[t - current_order] - self.sigmas[t - k])
+            return prod
+
+        integrated_coeff = integrate.quad(lms_derivative, self.sigmas[t], self.sigmas[t + 1], epsrel=1e-4)[0]
+
+        return integrated_coeff
+
+    def set_timesteps(self, num_inference_steps: int, preconfig_order: int = 4):
+        """
+        Sets the timesteps used for the diffusion chain. Supporting function to be run before inference.
+
+        Args:
+            num_inference_steps (`int`):
+                the number of diffusion steps used when generating samples with a pre-trained model.
+        """
+        self.num_inference_steps = num_inference_steps
+
+        timesteps = np.linspace(0, self.config.num_train_timesteps - 1, num_inference_steps, dtype=float)[::-1].copy()
+        sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5)
+        sigmas = np.interp(timesteps, np.arange(0, len(sigmas)), sigmas)
+        sigmas = np.concatenate([sigmas, [0.0]]).astype(np.float32)
+        self.sigmas = paddle.to_tensor(sigmas)
+        self.timesteps = paddle.to_tensor(timesteps, dtype=paddle.float32)
+
+        self.derivatives = []
+        if self.preconfig:
+            self.order = preconfig_order
+            self.lms_coeffs = []
+            self.latent_scales = [1.0 / ((sigma**2 + 1) ** 0.5) for sigma in self.sigmas]
+            for step_index in range(self.num_inference_steps):
+                order = min(step_index + 1, preconfig_order)
+                self.lms_coeffs.append(
+                    [self.get_lms_coefficient(order, step_index, curr_order) for curr_order in range(order)]
+                )
+
+    def step(
+        self,
+        model_output: paddle.Tensor,
+        timestep: Union[float, paddle.Tensor],
+        sample: paddle.Tensor,
+        order: int = 4,
+        return_dict: bool = True,
+        **kwargs
+    ) -> Union[PreconfigLMSDiscreteSchedulerOutput, Tuple]:
+        """
+        Predict the sample at the previous timestep by reversing the SDE. Core function to propagate the diffusion
+        process from the learned model outputs (most often the predicted noise).
+
+        Args:
+            model_output (`paddle.Tensor`): direct output from learned diffusion model.
+            timestep (`float`): current timestep in the diffusion chain.
+            sample (`paddle.Tensor`):
+                current instance of sample being created by diffusion process.
+            order: coefficient for multi-step inference.
+            return_dict (`bool`): option for returning tuple rather than PreconfigLMSDiscreteSchedulerOutput class
+            Args in kwargs:
+                step_index (`int`):
+                return_pred_original_sample (`bool`): option for return pred_original_sample
+
+        Returns:
+            [`~schedulers.scheduling_utils.PreconfigLMSDiscreteSchedulerOutput`] or `tuple`:
+            [`~schedulers.scheduling_utils.PreconfigLMSDiscreteSchedulerOutput`] if `return_dict` is True, otherwise a `tuple`.
+            When returning a tuple, the first element is the sample tensor.
+
+        """
+        if not self.is_scale_input_called:
+            warnings.warn(
+                "The `scale_model_input` function should be called before `step` to ensure correct denoising. "
+                "See `StableDiffusionPipeline` for a usage example."
+            )
+        if kwargs.get("return_pred_original_sample") is not None:
+            return_pred_original_sample = kwargs["return_pred_original_sample"]
+        else:
+            return_pred_original_sample = True
+        if kwargs.get("step_index") is not None:
+            step_index = kwargs["step_index"]
+        else:
+            step_index = (self.timesteps == timestep).nonzero().item()
+        if self.config.prediction_type == "epsilon" and not return_pred_original_sample:
+            # if pred_original_sample is no need
+            self.derivatives.append(model_output)
+            pred_original_sample = None
+        else:
+            sigma = self.sigmas[step_index]
+            # 1. compute predicted original sample (x_0) from sigma-scaled predicted noise
+            if self.config.prediction_type == "epsilon":
+                pred_original_sample = sample - sigma * model_output
+            elif self.config.prediction_type == "v_prediction":
+                # * c_out + input * c_skip
+                pred_original_sample = model_output * (-sigma / (sigma**2 + 1) ** 0.5) + (sample / (sigma**2 + 1))
+            elif self.config.prediction_type == "sample":
+                pred_original_sample = model_output
+            else:
+                raise ValueError(
+                    f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, or `v_prediction`"
+                )
+            # 2. Convert to an ODE derivative
+            derivative = (sample - pred_original_sample) / sigma
+            self.derivatives.append(derivative)
+
+        if len(self.derivatives) > order:
+            self.derivatives.pop(0)
+
+        if not self.preconfig:
+            # 3. If not preconfiged, compute linear multistep coefficients.
+            order = min(step_index + 1, order)
+            lms_coeffs = [self.get_lms_coefficient(order, step_index, curr_order) for curr_order in range(order)]
+            # 4. Compute previous sample based on the derivatives path
+            prev_sample = sample + sum(
+                coeff * derivative for coeff, derivative in zip(lms_coeffs, reversed(self.derivatives))
+            )
+        else:
+            # 3. If preconfiged, direct compute previous sample based on the derivatives path
+            prev_sample = sample + sum(
+                coeff * derivative
+                for coeff, derivative in zip(self.lms_coeffs[step_index], reversed(self.derivatives))
+            )
+
+        if not return_dict:
+            if not return_pred_original_sample:
+                return (prev_sample,)
+            else:
+                return (prev_sample, pred_original_sample)
+
+        return PreconfigLMSDiscreteSchedulerOutput(prev_sample=prev_sample, pred_original_sample=pred_original_sample)
+
+    def add_noise(
+        self,
+        original_samples: paddle.Tensor,
+        noise: paddle.Tensor,
+        timesteps: paddle.Tensor,
+    ) -> paddle.Tensor:
+        # Fix 0D tensor
+        if paddle.is_tensor(timesteps) and timesteps.ndim == 0:
+            timesteps = timesteps.unsqueeze(0)
+        # Make sure sigmas and timesteps have the same dtype as original_samples
+        sigmas = self.sigmas.cast(original_samples.dtype)
+        schedule_timesteps = self.timesteps
+
+        step_indices = [(schedule_timesteps == t).nonzero().item() for t in timesteps]
+
+        sigma = sigmas[step_indices].flatten()
+        while len(sigma.shape) < len(original_samples.shape):
+            sigma = sigma.unsqueeze(-1)
+
+        noisy_samples = original_samples + noise * sigma
+        return noisy_samples
+
+    def __len__(self):
+        return self.config.num_train_timesteps
diff --git a/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/utils/downloader/__init__.py b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/utils/downloader/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..f82ac7ab81aa87600e3dfab5ecd9550fee617c4f
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/utils/downloader/__init__.py
@@ -0,0 +1,218 @@
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+from pathlib import Path
+from typing import Dict, Literal, Optional, Union
+
+from huggingface_hub.utils import (
+    EntryNotFoundError,
+    LocalEntryNotFoundError,
+    RepositoryNotFoundError,
+    RevisionNotFoundError,
+)
+from requests import HTTPError
+
+from .aistudio_hub_download import (
+    aistudio_hub_download,
+    aistudio_hub_file_exists,
+    aistudio_hub_try_to_load_from_cache,
+)
+from .bos_download import bos_download, bos_file_exists, bos_try_to_load_from_cache
+from .hf_hub_download import (
+    hf_hub_download,
+    hf_hub_file_exists,
+    hf_hub_try_to_load_from_cache,
+)
+
+
+def bos_aistudio_hf_download(
+    repo_id: str = None,
+    filename: str = None,
+    subfolder: Optional[str] = None,
+    repo_type: Optional[str] = None,
+    revision: Optional[str] = None,
+    library_name: Optional[str] = None,
+    library_version: Optional[str] = None,
+    cache_dir: Union[str, Path, None] = None,
+    local_dir: Union[str, Path, None] = None,
+    local_dir_use_symlinks: Union[bool, Literal["auto"]] = "auto",
+    user_agent: Union[Dict, str, None] = None,
+    force_download: bool = False,
+    proxies: Optional[Dict] = None,
+    etag_timeout: float = 10,
+    resume_download: bool = False,
+    token: Union[bool, str, None] = None,
+    local_files_only: bool = False,
+    endpoint: Optional[str] = None,
+    url: Optional[str] = None,
+    from_bos: bool = True,
+    from_aistudio: bool = False,
+    from_hf_hub: bool = False,
+) -> str:
+    assert repo_id is not None, "repo_id cannot be None"
+    assert filename is not None, "filename cannot be None"
+
+    download_kwargs = dict(
+        repo_id=repo_id,
+        filename=filename,
+        subfolder=subfolder if subfolder is not None else "",
+        repo_type=repo_type,
+        revision=revision,
+        library_name=library_name,
+        library_version=library_version,
+        cache_dir=cache_dir,
+        local_dir=local_dir,
+        local_dir_use_symlinks=local_dir_use_symlinks,
+        user_agent=user_agent,
+        force_download=force_download,
+        proxies=proxies,
+        etag_timeout=etag_timeout,
+        resume_download=resume_download,
+        token=token,
+        local_files_only=local_files_only,
+        endpoint=endpoint,
+    )
+    cached_file = None
+    log_endpoint = "N/A"
+    log_filename = os.path.join(download_kwargs["subfolder"], filename)
+    try:
+        if from_aistudio:
+            log_endpoint = "Aistudio Hub"
+            cached_file = aistudio_hub_download(
+                **download_kwargs,
+            )
+        elif from_hf_hub:
+            log_endpoint = "Huggingface Hub"
+            cached_file = hf_hub_download(
+                **download_kwargs,
+            )
+        else:
+            log_endpoint = "BOS"
+            download_kwargs["url"] = url
+            cached_file = bos_download(
+                **download_kwargs,
+            )
+    except LocalEntryNotFoundError:
+        raise EnvironmentError(
+            "Cannot find the requested files in the cached path and"
+            " outgoing traffic has been disabled. To enable model look-ups"
+            " and downloads online, set 'local_files_only' to False."
+        )
+    except RepositoryNotFoundError:
+        raise EnvironmentError(
+            f"{repo_id} is not a local folder and is not a valid model identifier "
+            f"listed on '{log_endpoint}'\nIf this is a private repository, make sure to pass a "
+            "token having permission to this repo."
+        )
+    except RevisionNotFoundError:
+        raise EnvironmentError(
+            f"{revision} is not a valid git identifier (branch name, tag name or commit id) that exists for "
+            "this model name. Check the model page at "
+            f"'{log_endpoint}' for available revisions."
+        )
+    except EntryNotFoundError:
+        raise EnvironmentError(f"{repo_id} does not appear to have a file named {log_filename}.")
+    except HTTPError as err:
+        raise EnvironmentError(f"There was a specific connection error when trying to load {repo_id}:\n{err}")
+    except ValueError:
+        raise EnvironmentError(
+            f"We couldn't connect to '{log_endpoint}' to load this model, couldn't find it"
+            f" in the cached files and it looks like {repo_id} is not the path to a"
+            f" directory containing a file named {log_filename} or"
+            " \nCheckout your internet connection or see how to run the library in offline mode."
+        )
+    except EnvironmentError:
+        raise EnvironmentError(
+            f"Can't load the model for '{repo_id}'. If you were trying to load it from "
+            f"'{log_endpoint}', make sure you don't have a local directory with the same name. "
+            f"Otherwise, make sure '{repo_id}' is the correct path to a directory "
+            f"containing a file named {log_filename}"
+        )
+    return cached_file
+
+
+def bos_aistudio_hf_file_exist(
+    repo_id: str = None,
+    filename: str = None,
+    subfolder: Optional[str] = None,
+    repo_type: Optional[str] = None,
+    revision: Optional[str] = None,
+    token: Optional[str] = None,
+    endpoint: Optional[str] = None,
+    from_bos: bool = True,
+    from_aistudio: bool = False,
+    from_hf_hub: bool = False,
+):
+    assert repo_id is not None, "repo_id cannot be None"
+    assert filename is not None, "filename cannot be None"
+
+    if subfolder is None:
+        subfolder = ""
+    filename = os.path.join(subfolder, filename)
+    if from_aistudio:
+        out = aistudio_hub_file_exists(
+            repo_id=repo_id,
+            filename=filename,
+            repo_type=repo_type,
+            revision=revision,
+            token=token,
+            endpoint=endpoint,
+        )
+    elif from_hf_hub:
+        out = hf_hub_file_exists(
+            repo_id=repo_id,
+            filename=filename,
+            repo_type=repo_type,
+            revision=revision,
+            token=token,
+        )
+    else:
+        out = bos_file_exists(
+            repo_id=repo_id,
+            filename=filename,
+            repo_type=repo_type,
+            revision=revision,
+            token=token,  # donot need token
+            endpoint=endpoint,
+        )
+    return out
+
+
+def bos_aistudio_hf_try_to_load_from_cache(
+    repo_id: str,
+    filename: str,
+    cache_dir: Union[str, Path, None] = None,
+    subfolder: str = None,
+    revision: Optional[str] = None,
+    repo_type: Optional[str] = None,
+    from_bos: bool = True,
+    from_aistudio: bool = False,
+    from_hf_hub: bool = False,
+):
+    if subfolder is None:
+        subfolder = ""
+    load_kwargs = dict(
+        repo_id=repo_id,
+        filename=os.path.join(subfolder, filename),
+        cache_dir=cache_dir,
+        revision=revision,
+        repo_type=repo_type,
+    )
+    if from_aistudio:
+        return aistudio_hub_try_to_load_from_cache(**load_kwargs)
+    elif from_hf_hub:
+        return hf_hub_try_to_load_from_cache(**load_kwargs)
+    else:
+        return bos_try_to_load_from_cache(**load_kwargs)
diff --git a/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/utils/downloader/aistudio_hub_download.py b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/utils/downloader/aistudio_hub_download.py
new file mode 100644
index 0000000000000000000000000000000000000000..de8f4bc00cb4db6efbdb36248877f452e216434f
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/utils/downloader/aistudio_hub_download.py
@@ -0,0 +1,729 @@
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import io
+import logging
+import os
+import re
+import shutil
+import tempfile
+from contextlib import contextmanager
+from functools import partial
+from pathlib import Path
+from typing import Dict, Generator, Literal, Optional, Union
+from urllib.parse import quote
+
+import requests
+from filelock import FileLock
+from huggingface_hub.utils import (
+    EntryNotFoundError,
+    FileMetadataError,
+    GatedRepoError,
+    HfHubHTTPError,
+    LocalEntryNotFoundError,
+    RepositoryNotFoundError,
+    RevisionNotFoundError,
+)
+
+logger = logging.getLogger(__name__)
+
+from .common import (
+    _CACHED_NO_EXIST,
+    DEFAULT_ETAG_TIMEOUT,
+    DEFAULT_LOCAL_DIR_AUTO_SYMLINK_THRESHOLD,
+    DEFAULT_REQUEST_TIMEOUT,
+    AistudioBosFileMetadata,
+    OfflineModeIsEnabled,
+    _cache_commit_hash_for_specific_revision,
+    _check_disk_space,
+    _chmod_and_replace,
+    _create_symlink,
+    _get_pointer_path,
+    _is_true,
+    _normalize_etag,
+    _request_wrapper,
+    _to_local_dir,
+    http_get,
+    raise_for_status,
+    repo_folder_name,
+)
+
+VERSION = "0.1.5"
+ENDPOINT = os.getenv("AISTUDIO_ENDPOINT", "http://git.aistudio.baidu.com")
+
+AISTUDIO_URL_TEMPLATE = ENDPOINT + "/api/v1/repos/{user_name}/{repo_name}/contents/{filename}"
+
+
+default_home = os.path.join(os.path.expanduser("~"), ".cache")
+AISTUDIO_HOME = os.path.expanduser(
+    os.getenv(
+        "AISTUDIO_HOME",
+        os.path.join(os.getenv("XDG_CACHE_HOME", default_home), "paddle"),
+    )
+)
+default_cache_path = os.path.join(AISTUDIO_HOME, "aistudio")
+AISTUDIO_HUB_CACHE = os.getenv("AISTUDIO_HUB_CACHE", default_cache_path)
+
+
+DEFAULT_REVISION = "master"
+REPO_TYPE_MODEL = "model"
+REPO_TYPES = [None, REPO_TYPE_MODEL]
+
+
+REGEX_COMMIT_HASH = re.compile(r"^[0-9a-f]{40}$")
+
+
+# TOKEN
+AISTUDIO_TOKEN_PATH = os.path.join(AISTUDIO_HOME, "token")
+AISTUDIO_HUB_DISABLE_IMPLICIT_TOKEN: bool = _is_true(os.environ.get("AISTUDIO_HUB_DISABLE_IMPLICIT_TOKEN"))
+
+
+class LocalTokenNotFoundError(EnvironmentError):
+    """Raised if local token is required but not found."""
+
+
+def _clean_token(token: Optional[str]) -> Optional[str]:
+    """Clean token by removing trailing and leading spaces and newlines.
+
+    If token is an empty string, return None.
+    """
+    if token is None:
+        return None
+    return token.replace("\r", "").replace("\n", "").strip() or None
+
+
+def _get_token_from_environment() -> Optional[str]:
+    return _clean_token(os.environ.get("AISTUDIO_ACCESS_TOKEN") or os.environ.get("AISTUDIO_TOKEN"))
+
+
+def _get_token_from_file() -> Optional[str]:
+    try:
+        return _clean_token(Path(AISTUDIO_TOKEN_PATH).read_text())
+    except FileNotFoundError:
+        return None
+
+
+def get_token() -> Optional[str]:
+    """
+    Get token if user is logged in.
+
+    Note: in most cases, you should use [`build_aistudio_headers`] instead. This method is only useful
+          if you want to retrieve the token for other purposes than sending an HTTP request.
+
+    Token is retrieved in priority from the `AISTUDIO_ACCESS_TOKEN` environment variable. Otherwise, we read the token file located
+    in the Aistudio home folder. Returns None if user is not logged in.
+
+    Returns:
+        `str` or `None`: The token, `None` if it doesn't exist.
+    """
+    return _get_token_from_environment() or _get_token_from_file()
+
+
+def get_token_to_send(token: Optional[Union[bool, str]]) -> Optional[str]:
+    """Select the token to send from either `token` or the cache."""
+    # Case token is explicitly provided
+    if isinstance(token, str):
+        return token
+
+    # Case token is explicitly forbidden
+    if token is False:
+        return None
+
+    # Token is not provided: we get it from local cache
+    cached_token = get_token()
+
+    # Case token is explicitly required
+    if token is True:
+        if cached_token is None:
+            raise LocalTokenNotFoundError(
+                "Token is required (`token=True`), but no token found. You"
+                " to provide a token or be logged in to Aistudio Hub . See"
+                "https://ai.baidu.com/ai-doc/AISTUDIO/slmkadt9z#2-%E5%A6%82%E4%BD%95%E4%BD%BF%E7%94%A8%E8%AE%BF%E9%97%AE%E4%BB%A4%E7%89%8C."
+            )
+        return cached_token
+
+    # Case implicit use of the token is forbidden by env variable
+    if AISTUDIO_HUB_DISABLE_IMPLICIT_TOKEN:
+        return None
+
+    # Otherwise: we use the cached token as the user has not explicitly forbidden it
+    return cached_token
+
+
+def _validate_token_to_send(token: Optional[str], is_write_action: bool) -> None:
+    if is_write_action:
+        if token is None:
+            raise ValueError(
+                "Token is required (write-access action) but no token found. You need"
+                " to provide a token or be logged in to Aistudio Hub . See"
+                "https://ai.baidu.com/ai-doc/AISTUDIO/slmkadt9z#2-%E5%A6%82%E4%BD%95%E4%BD%BF%E7%94%A8%E8%AE%BF%E9%97%AE%E4%BB%A4%E7%89%8C."
+            )
+
+
+def build_aistudio_headers(
+    *,
+    token: Optional[Union[bool, str]] = None,
+    is_write_action: bool = False,
+    library_name: Optional[str] = None,
+    library_version: Optional[str] = None,
+    user_agent: Union[Dict, str, None] = None,
+) -> Dict[str, str]:
+    # Get auth token to send
+    token_to_send = get_token_to_send(token)
+    _validate_token_to_send(token_to_send, is_write_action=is_write_action)
+
+    # Combine headers
+    headers = {"Content-Type": "application/json", "SDK-Version": str(VERSION)}
+    if token_to_send is not None:
+        headers["Authorization"] = f"token {token_to_send}"
+    return headers
+
+
+def get_aistudio_file_metadata(
+    url: str,
+    token: Union[bool, str, None] = None,
+    proxies: Optional[Dict] = None,
+    timeout: Optional[float] = DEFAULT_REQUEST_TIMEOUT,
+    library_name: Optional[str] = None,
+    library_version: Optional[str] = None,
+    user_agent: Union[Dict, str, None] = None,
+):
+    """Fetch metadata of a file versioned on the Hub for a given url.
+
+    Args:
+        url (`str`):
+            File url, for example returned by [`aistudio_hub_url`].
+        token (`str` or `bool`, *optional*):
+            A token to be used for the download.
+                - If `True`, the token is read from the Aistudio config
+                  folder.
+                - If `False` or `None`, no token is provided.
+                - If a string, it's used as the authentication token.
+        proxies (`dict`, *optional*):
+            Dictionary mapping protocol to the URL of the proxy passed to
+            `requests.request`.
+        timeout (`float`, *optional*, defaults to 10):
+            How many seconds to wait for the server to send metadata before giving up.
+        library_name (`str`, *optional*):
+            The name of the library to which the object corresponds.
+        library_version (`str`, *optional*):
+            The version of the library.
+        user_agent (`dict`, `str`, *optional*):
+            The user-agent info in the form of a dictionary or a string.
+
+    Returns:
+        A [`AistudioBosFileMetadata`] object containing metadata such as location, etag, size and
+        commit_hash.
+    """
+    headers = build_aistudio_headers(
+        token=token, library_name=library_name, library_version=library_version, user_agent=user_agent
+    )
+    headers["Accept-Encoding"] = "identity"  # prevent any compression => we want to know the real size of the file
+
+    # Retrieve metadata
+    r = _request_wrapper(
+        method="GET",
+        url=url,
+        headers=headers,
+        allow_redirects=False,
+        follow_relative_redirects=True,
+        proxies=proxies,
+        timeout=timeout,
+    )
+    raise_for_status(r)
+    res = r.json()
+
+    # Return
+    return AistudioBosFileMetadata(
+        commit_hash=res["sha"],
+        etag=_normalize_etag(res["last_commit_sha"]),
+        location=res["git_url"],
+        size=res["size"],
+    )
+
+
+def aistudio_hub_url(
+    repo_id: str,
+    filename: str,
+    *,
+    subfolder: Optional[str] = None,
+    repo_type: Optional[str] = None,
+    revision: Optional[str] = None,
+    endpoint: Optional[str] = None,
+) -> str:
+    if subfolder == "":
+        subfolder = None
+    if subfolder is not None:
+        filename = f"{subfolder}/{filename}"
+
+    if repo_type is None:
+        repo_type = REPO_TYPES[-1]
+    if repo_type not in REPO_TYPES:
+        raise ValueError("Invalid repo type")
+    if revision is None:
+        revision = DEFAULT_REVISION
+
+    # NEW ADD
+    if "/" not in repo_id:
+        raise ValueError("repo_id must be in the format of 'namespace/name'")
+    user_name, repo_name = repo_id.split("/")
+    user_name = user_name.strip()
+    repo_name = repo_name.strip()
+
+    url = AISTUDIO_URL_TEMPLATE.format(
+        user_name=quote(user_name, safe=""), repo_name=quote(repo_name, safe=""), filename=quote(filename)
+    )
+    # Update endpoint if provided
+    if endpoint is not None and url.startswith(ENDPOINT):
+        url = endpoint + url[len(ENDPOINT) :]
+
+    if revision != "master":
+        url += f"?ref={quote(revision, safe='')}"
+    return url
+
+
+def aistudio_hub_download(
+    repo_id: str = None,
+    filename: str = None,
+    subfolder: Optional[str] = None,
+    repo_type: Optional[str] = None,
+    revision: Optional[str] = None,
+    library_name: Optional[str] = None,
+    library_version: Optional[str] = None,
+    cache_dir: Union[str, Path, None] = None,
+    local_dir: Union[str, Path, None] = None,
+    local_dir_use_symlinks: Union[bool, Literal["auto"]] = "auto",
+    # TODO
+    user_agent: Union[Dict, str, None] = None,
+    force_download: bool = False,
+    proxies: Optional[Dict] = None,
+    etag_timeout: float = DEFAULT_ETAG_TIMEOUT,
+    resume_download: bool = False,
+    token: Optional[str] = None,
+    local_files_only: bool = False,
+    endpoint: Optional[str] = None,
+    **kwargs,
+):
+
+    if cache_dir is None:
+        cache_dir = AISTUDIO_HUB_CACHE
+    if revision is None:
+        revision = DEFAULT_REVISION
+    if isinstance(cache_dir, Path):
+        cache_dir = str(cache_dir)
+    if isinstance(local_dir, Path):
+        local_dir = str(local_dir)
+    locks_dir = os.path.join(cache_dir, ".locks")
+
+    if subfolder == "":
+        subfolder = None
+    if subfolder is not None:
+        # This is used to create a URL, and not a local path, hence the forward slash.
+        filename = f"{subfolder}/{filename}"
+
+    if repo_type is None:
+        repo_type = REPO_TYPES[-1]
+    if repo_type not in REPO_TYPES:
+        raise ValueError(f"Invalid repo type: {repo_type}. Accepted repo types are: {str(REPO_TYPES)}")
+
+    storage_folder = os.path.join(cache_dir, repo_folder_name(repo_id=repo_id, repo_type=repo_type))
+    os.makedirs(storage_folder, exist_ok=True)
+
+    # cross platform transcription of filename, to be used as a local file path.
+    relative_filename = os.path.join(*filename.split("/"))
+    if os.name == "nt":
+        if relative_filename.startswith("..\\") or "\\..\\" in relative_filename:
+            raise ValueError(
+                f"Invalid filename: cannot handle filename '{relative_filename}' on Windows. Please ask the repository"
+                " owner to rename this file."
+            )
+
+    # if user provides a commit_hash and they already have the file on disk,
+    # shortcut everything.
+    # TODO, 当前不支持commit id下载，因此这个肯定跑的。
+    if not force_download:  # REGEX_COMMIT_HASH.match(revision)
+        pointer_path = _get_pointer_path(storage_folder, revision, relative_filename)
+        if os.path.exists(pointer_path):
+            if local_dir is not None:
+                return _to_local_dir(pointer_path, local_dir, relative_filename, use_symlinks=local_dir_use_symlinks)
+            return pointer_path
+
+    url = aistudio_hub_url(repo_id, filename, repo_type=repo_type, revision=revision, endpoint=endpoint)
+
+    headers = build_aistudio_headers(
+        token=token,
+        library_name=library_name,
+        library_version=library_version,
+        user_agent=user_agent,
+    )
+    url_to_download = url.replace("/contents/", "/media/")
+
+    etag = None
+    commit_hash = None
+    expected_size = None
+    head_call_error: Optional[Exception] = None
+    if not local_files_only:
+        try:
+            try:
+                metadata = get_aistudio_file_metadata(
+                    url=url,
+                    token=token,
+                    proxies=proxies,
+                    timeout=etag_timeout,
+                    library_name=library_name,
+                    library_version=library_version,
+                    user_agent=user_agent,
+                )
+            except EntryNotFoundError as http_error:  # noqa: F841
+                raise
+            # Commit hash must exist
+            # TODO，这里修改了commit hash，强迫为revision了。
+            commit_hash = revision  # metadata.commit_hash
+            if commit_hash is None:
+                raise FileMetadataError(
+                    "Distant resource does not seem to be on aistudio hub. It is possible that a configuration issue"
+                    " prevents you from downloading resources from aistudio hub. Please check your firewall"
+                    " and proxy settings and make sure your SSL certificates are updated."
+                )
+
+            # Etag must exist
+            etag = metadata.etag
+            # We favor a custom header indicating the etag of the linked resource, and
+            # we fallback to the regular etag header.
+            # If we don't have any of those, raise an error.
+            if etag is None:
+                raise FileMetadataError(
+                    "Distant resource does not have an ETag, we won't be able to reliably ensure reproducibility."
+                )
+
+            # Expected (uncompressed) size
+            expected_size = metadata.size
+
+        except (requests.exceptions.SSLError, requests.exceptions.ProxyError):
+            # Actually raise for those subclasses of ConnectionError
+            raise
+        except (
+            requests.exceptions.ConnectionError,
+            requests.exceptions.Timeout,
+            OfflineModeIsEnabled,
+        ) as error:
+            # Otherwise, our Internet connection is down.
+            # etag is None
+            head_call_error = error
+            pass
+        except (RevisionNotFoundError, EntryNotFoundError):
+            # The repo was found but the revision or entry doesn't exist on the Hub (never existed or got deleted)
+            raise
+        except requests.HTTPError as error:
+            # Multiple reasons for an http error:
+            # - Repository is private and invalid/missing token sent
+            # - Repository is gated and invalid/missing token sent
+            # - Hub is down (error 500 or 504)
+            # => let's switch to 'local_files_only=True' to check if the files are already cached.
+            #    (if it's not the case, the error will be re-raised)
+            head_call_error = error
+            pass
+        except FileMetadataError as error:
+            # Multiple reasons for a FileMetadataError:
+            # - Wrong network configuration (proxy, firewall, SSL certificates)
+            # - Inconsistency on the Hub
+            # => let's switch to 'local_files_only=True' to check if the files are already cached.
+            #    (if it's not the case, the error will be re-raised)
+            head_call_error = error
+            pass
+
+    # etag can be None for several reasons:
+    # 1. we passed local_files_only.
+    # 2. we don't have a connection
+    # 3. Hub is down (HTTP 500 or 504)
+    # 4. repo is not found -for example private or gated- and invalid/missing token sent
+    # 5. Hub is blocked by a firewall or proxy is not set correctly.
+    # => Try to get the last downloaded one from the specified revision.
+    #
+    # If the specified revision is a commit hash, look inside "snapshots".
+    # If the specified revision is a branch or tag, look inside "refs".
+    if etag is None:
+        # In those cases, we cannot force download.
+        if force_download:
+            raise ValueError(
+                "We have no connection or you passed local_files_only, so force_download is not an accepted option."
+            )
+
+        # Try to get "commit_hash" from "revision"
+        commit_hash = None
+        if REGEX_COMMIT_HASH.match(revision):
+            commit_hash = revision
+        else:
+            ref_path = os.path.join(storage_folder, "refs", revision)
+            if os.path.isfile(ref_path):
+                with open(ref_path) as f:
+                    commit_hash = f.read()
+
+        # Return pointer file if exists
+        if commit_hash is not None:
+            pointer_path = _get_pointer_path(storage_folder, commit_hash, relative_filename)
+            if os.path.exists(pointer_path):
+                if local_dir is not None:
+                    return _to_local_dir(
+                        pointer_path, local_dir, relative_filename, use_symlinks=local_dir_use_symlinks
+                    )
+                return pointer_path
+
+        # If we couldn't find an appropriate file on disk, raise an error.
+        # If files cannot be found and local_files_only=True,
+        # the models might've been found if local_files_only=False
+        # Notify the user about that
+        if local_files_only:
+            raise LocalEntryNotFoundError(
+                "Cannot find the requested files in the disk cache and outgoing traffic has been disabled. To enable"
+                " aistudio hub look-ups and downloads online, set 'local_files_only' to False."
+            )
+        elif isinstance(head_call_error, RepositoryNotFoundError) or isinstance(head_call_error, GatedRepoError):
+            # Repo not found => let's raise the actual error
+            raise head_call_error
+        else:
+            # Otherwise: most likely a connection issue or Hub downtime => let's warn the user
+            raise LocalEntryNotFoundError(
+                "An error happened while trying to locate the file on the Hub and we cannot find the requested files"
+                " in the local cache. Please check your connection and try again or make sure your Internet connection"
+                " is on."
+            ) from head_call_error
+
+    # From now on, etag and commit_hash are not None.
+    assert etag is not None, "etag must have been retrieved from server"
+    assert commit_hash is not None, "commit_hash must have been retrieved from server"
+    blob_path = os.path.join(storage_folder, "blobs", etag)
+    pointer_path = _get_pointer_path(storage_folder, commit_hash, relative_filename)
+
+    os.makedirs(os.path.dirname(blob_path), exist_ok=True)
+    os.makedirs(os.path.dirname(pointer_path), exist_ok=True)
+    # if passed revision is not identical to commit_hash
+    # then revision has to be a branch name or tag name.
+    # In that case store a ref.
+    _cache_commit_hash_for_specific_revision(storage_folder, revision, commit_hash)
+
+    if os.path.exists(pointer_path) and not force_download:
+        if local_dir is not None:
+            return _to_local_dir(pointer_path, local_dir, relative_filename, use_symlinks=local_dir_use_symlinks)
+        return pointer_path
+
+    if os.path.exists(blob_path) and not force_download:
+        # we have the blob already, but not the pointer
+        if local_dir is not None:  # to local dir
+            return _to_local_dir(blob_path, local_dir, relative_filename, use_symlinks=local_dir_use_symlinks)
+        else:  # or in snapshot cache
+            _create_symlink(blob_path, pointer_path, new_blob=False)
+            return pointer_path
+
+    # Prevent parallel downloads of the same file with a lock.
+    # etag could be duplicated across repos,
+    lock_path = os.path.join(locks_dir, repo_folder_name(repo_id=repo_id, repo_type=repo_type), f"{etag}.lock")
+
+    # Some Windows versions do not allow for paths longer than 255 characters.
+    # In this case, we must specify it is an extended path by using the "\\?\" prefix.
+    if os.name == "nt" and len(os.path.abspath(lock_path)) > 255:
+        lock_path = "\\\\?\\" + os.path.abspath(lock_path)
+
+    if os.name == "nt" and len(os.path.abspath(blob_path)) > 255:
+        blob_path = "\\\\?\\" + os.path.abspath(blob_path)
+
+    Path(lock_path).parent.mkdir(parents=True, exist_ok=True)
+    with FileLock(lock_path):
+        # If the download just completed while the lock was activated.
+        if os.path.exists(pointer_path) and not force_download:
+            # Even if returning early like here, the lock will be released.
+            if local_dir is not None:
+                return _to_local_dir(pointer_path, local_dir, relative_filename, use_symlinks=local_dir_use_symlinks)
+            return pointer_path
+
+        if resume_download:
+            incomplete_path = blob_path + ".incomplete"
+
+            @contextmanager
+            def _resumable_file_manager() -> Generator[io.BufferedWriter, None, None]:
+                with open(incomplete_path, "ab") as f:
+                    yield f
+
+            temp_file_manager = _resumable_file_manager
+            if os.path.exists(incomplete_path):
+                resume_size = os.stat(incomplete_path).st_size
+            else:
+                resume_size = 0
+        else:
+            temp_file_manager = partial(  # type: ignore
+                tempfile.NamedTemporaryFile, mode="wb", dir=cache_dir, delete=False
+            )
+            resume_size = 0
+
+        # Download to temporary file, then copy to cache dir once finished.
+        # Otherwise you get corrupt cache entries if the download gets interrupted.
+        with temp_file_manager() as temp_file:
+            logger.info("downloading %s to %s", url, temp_file.name)
+
+            if expected_size is not None:  # might be None if HTTP header not set correctly
+                # Check tmp path
+                _check_disk_space(expected_size, os.path.dirname(temp_file.name))
+
+                # Check destination
+                _check_disk_space(expected_size, os.path.dirname(blob_path))
+                if local_dir is not None:
+                    _check_disk_space(expected_size, local_dir)
+
+            http_get(
+                url_to_download,
+                temp_file,
+                proxies=proxies,
+                resume_size=resume_size,
+                headers=headers,
+                expected_size=expected_size,
+            )
+        if local_dir is None:
+            logger.debug(f"Storing {url} in cache at {blob_path}")
+            _chmod_and_replace(temp_file.name, blob_path)
+            _create_symlink(blob_path, pointer_path, new_blob=True)
+        else:
+            local_dir_filepath = os.path.join(local_dir, relative_filename)
+            os.makedirs(os.path.dirname(local_dir_filepath), exist_ok=True)
+
+            # If "auto" (default) copy-paste small files to ease manual editing but symlink big files to save disk
+            # In both cases, blob file is cached.
+            is_big_file = os.stat(temp_file.name).st_size > DEFAULT_LOCAL_DIR_AUTO_SYMLINK_THRESHOLD
+            if local_dir_use_symlinks is True or (local_dir_use_symlinks == "auto" and is_big_file):
+                logger.debug(f"Storing {url} in cache at {blob_path}")
+                _chmod_and_replace(temp_file.name, blob_path)
+                logger.debug("Create symlink to local dir")
+                _create_symlink(blob_path, local_dir_filepath, new_blob=False)
+            elif local_dir_use_symlinks == "auto" and not is_big_file:
+                logger.debug(f"Storing {url} in cache at {blob_path}")
+                _chmod_and_replace(temp_file.name, blob_path)
+                logger.debug("Duplicate in local dir (small file and use_symlink set to 'auto')")
+                shutil.copyfile(blob_path, local_dir_filepath)
+            else:
+                logger.debug(f"Storing {url} in local_dir at {local_dir_filepath} (not cached).")
+                _chmod_and_replace(temp_file.name, local_dir_filepath)
+            pointer_path = local_dir_filepath  # for return value
+
+    return pointer_path
+
+
+def aistudio_hub_file_exists(
+    repo_id: str,
+    filename: str,
+    *,
+    repo_type: Optional[str] = None,
+    revision: Optional[str] = None,
+    token: Optional[str] = None,
+    endpoint: Optional[str] = None,
+) -> bool:
+    """
+    Checks if a file exists in a repository on the Aistudio Hub.
+
+    Args:
+        repo_id (`str`):
+            A namespace (user or an organization) and a repo name separated
+            by a `/`.
+        filename (`str`):
+            The name of the file to check, for example:
+            `"config.json"`
+        repo_type (`str`, *optional*):
+            Set to `"dataset"` or `"space"` if getting repository info from a dataset or a space,
+            `None` or `"model"` if getting repository info from a model. Default is `None`.
+        revision (`str`, *optional*):
+            The revision of the repository from which to get the information. Defaults to `"main"` branch.
+        token (`bool` or `str`, *optional*):
+            A valid authentication token (see https://huggingface.co/settings/token).
+            If `None` or `True` and machine is logged in (through `huggingface-cli login`
+            or [`~login`]), token will be retrieved from the cache.
+            If `False`, token is not sent in the request header.
+
+    Returns:
+        True if the file exists, False otherwise.
+
+    <Tip>
+
+    Examples:
+        ```py
+        >>> from huggingface_hub import file_exists
+        >>> file_exists("bigcode/starcoder", "config.json")
+        True
+        >>> file_exists("bigcode/starcoder", "not-a-file")
+        False
+        >>> file_exists("bigcode/not-a-repo", "config.json")
+        False
+        ```
+
+    </Tip>
+    """
+    url = aistudio_hub_url(
+        repo_id=repo_id, repo_type=repo_type, revision=revision, filename=filename, endpoint=endpoint
+    )
+    try:
+        if token is None:
+            token = get_token()
+        get_aistudio_file_metadata(url, token=token)
+        return True
+    except GatedRepoError:  # raise specifically on gated repo
+        raise
+    except (RepositoryNotFoundError, EntryNotFoundError, RevisionNotFoundError, HfHubHTTPError):
+        return False
+
+
+def aistudio_hub_try_to_load_from_cache(
+    repo_id: str,
+    filename: str,
+    cache_dir: Union[str, Path, None] = None,
+    revision: Optional[str] = None,
+    repo_type: Optional[str] = None,
+):
+    if revision is None:
+        revision = DEFAULT_REVISION
+    if repo_type is None:
+        repo_type = REPO_TYPES[-1]
+    if repo_type not in REPO_TYPES:
+        raise ValueError(f"Invalid repo type: {repo_type}. Accepted repo types are: {str(REPO_TYPES)}")
+    if cache_dir is None:
+        cache_dir = AISTUDIO_HUB_CACHE
+
+    object_id = repo_id.replace("/", "--")
+    repo_cache = os.path.join(cache_dir, f"{repo_type}s--{object_id}")
+    if not os.path.isdir(repo_cache):
+        # No cache for this model
+        return None
+
+    refs_dir = os.path.join(repo_cache, "refs")
+    snapshots_dir = os.path.join(repo_cache, "snapshots")
+    no_exist_dir = os.path.join(repo_cache, ".no_exist")
+
+    # Resolve refs (for instance to convert main to the associated commit sha)
+    if os.path.isdir(refs_dir):
+        revision_file = os.path.join(refs_dir, revision)
+        if os.path.isfile(revision_file):
+            with open(revision_file) as f:
+                revision = f.read()
+
+    # Check if file is cached as "no_exist"
+    if os.path.isfile(os.path.join(no_exist_dir, revision, filename)):
+        return _CACHED_NO_EXIST
+
+    # Check if revision folder exists
+    if not os.path.exists(snapshots_dir):
+        return None
+    cached_shas = os.listdir(snapshots_dir)
+    if revision not in cached_shas:
+        # No cache for this revision and we won't try to return a random revision
+        return None
+
+    # Check if file exists in cache
+    cached_file = os.path.join(snapshots_dir, revision, filename)
+    return cached_file if os.path.isfile(cached_file) else None
diff --git a/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/utils/downloader/bos_download.py b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/utils/downloader/bos_download.py
new file mode 100644
index 0000000000000000000000000000000000000000..372784b9a0888898962f4a136e7efd74ef69cd40
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/utils/downloader/bos_download.py
@@ -0,0 +1,637 @@
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import io
+import logging
+import os
+import re
+import shutil
+import tempfile
+from contextlib import contextmanager
+from functools import partial
+from pathlib import Path
+from typing import Dict, Generator, Literal, Optional, Union
+from urllib.parse import quote
+
+import requests
+from filelock import FileLock
+from huggingface_hub.utils import (
+    EntryNotFoundError,
+    FileMetadataError,
+    GatedRepoError,
+    HfHubHTTPError,
+    LocalEntryNotFoundError,
+    RepositoryNotFoundError,
+    RevisionNotFoundError,
+)
+
+logger = logging.getLogger(__name__)
+
+from .common import (
+    _CACHED_NO_EXIST,
+    DEFAULT_ETAG_TIMEOUT,
+    DEFAULT_LOCAL_DIR_AUTO_SYMLINK_THRESHOLD,
+    DEFAULT_REQUEST_TIMEOUT,
+    REPO_ID_SEPARATOR,
+    AistudioBosFileMetadata,
+    OfflineModeIsEnabled,
+    _as_int,
+    _cache_commit_hash_for_specific_revision,
+    _check_disk_space,
+    _chmod_and_replace,
+    _create_symlink,
+    _get_pointer_path,
+    _normalize_etag,
+    _request_wrapper,
+    _to_local_dir,
+    http_get,
+    raise_for_status,
+)
+
+
+def repo_folder_name(*, repo_id: str, repo_type: str) -> str:
+    """Return a serialized version of a aistudio repo name and type, safe for disk storage
+    as a single non-nested folder.
+
+    Example: models--julien-c--EsperBERTo-small
+    """
+    # remove all `/` occurrences to correctly convert repo to directory name
+    parts = [f"{repo_type}", *repo_id.split("/")]
+    return REPO_ID_SEPARATOR.join(parts)
+
+
+ENDPOINT = os.getenv("PPNLP_ENDPOINT", "https://bj.bcebos.com/paddlenlp")
+ENDPOINT_v2 = "https://paddlenlp.bj.bcebos.com"
+
+BOS_URL_TEMPLATE = ENDPOINT + "/{repo_type}/community/{repo_id}/{revision}/{filename}"
+BOS_URL_TEMPLATE_WITHOUT_REVISION = ENDPOINT + "/{repo_type}/community/{repo_id}/{filename}"
+
+
+default_home = os.path.join(os.path.expanduser("~"), ".cache")
+BOS_HOME = os.path.expanduser(
+    os.getenv(
+        "BOS_HOME",
+        os.path.join(os.getenv("XDG_CACHE_HOME", default_home), "paddle"),
+    )
+)
+default_cache_path = os.path.join(BOS_HOME, "bos")
+BOS_CACHE = os.getenv("BOS_CACHE", default_cache_path)
+
+
+DEFAULT_REVISION = "main"
+REPO_TYPE_MODEL = "models"
+REPO_TYPES = [None, REPO_TYPE_MODEL]
+
+
+REGEX_COMMIT_HASH = re.compile(r"^[0-9a-f]{40}$")
+
+
+def get_bos_file_metadata(
+    url: str,
+    token: Union[bool, str, None] = None,
+    proxies: Optional[Dict] = None,
+    timeout: Optional[float] = DEFAULT_REQUEST_TIMEOUT,
+    library_name: Optional[str] = None,
+    library_version: Optional[str] = None,
+    user_agent: Union[Dict, str, None] = None,
+):
+    """Fetch metadata of a file versioned on the Hub for a given url.
+
+    Args:
+        url (`str`):
+            File url, for example returned by [`bos_url`].
+        token (`str` or `bool`, *optional*):
+            A token to be used for the download.
+                - If `True`, the token is read from the BOS config
+                  folder.
+                - If `False` or `None`, no token is provided.
+                - If a string, it's used as the authentication token.
+        proxies (`dict`, *optional*):
+            Dictionary mapping protocol to the URL of the proxy passed to
+            `requests.request`.
+        timeout (`float`, *optional*, defaults to 10):
+            How many seconds to wait for the server to send metadata before giving up.
+        library_name (`str`, *optional*):
+            The name of the library to which the object corresponds.
+        library_version (`str`, *optional*):
+            The version of the library.
+        user_agent (`dict`, `str`, *optional*):
+            The user-agent info in the form of a dictionary or a string.
+
+    Returns:
+        A [`AistudioBosFileMetadata`] object containing metadata such as location, etag, size and
+        commit_hash.
+    """
+    headers = {}
+    headers["Accept-Encoding"] = "identity"  # prevent any compression => we want to know the real size of the file
+
+    # Retrieve metadata
+    r = _request_wrapper(
+        method="HEAD",
+        url=url,
+        headers=headers,
+        allow_redirects=False,
+        follow_relative_redirects=True,
+        proxies=proxies,
+        timeout=timeout,
+    )
+    raise_for_status(r)
+
+    # Return
+    return AistudioBosFileMetadata(
+        commit_hash=None,
+        etag=_normalize_etag(r.headers.get("ETag")),
+        location=url,
+        size=_as_int(r.headers.get("Content-Length")),
+    )
+
+
+def bos_url(
+    repo_id: str,
+    filename: str,
+    *,
+    subfolder: Optional[str] = None,
+    repo_type: Optional[str] = None,
+    revision: Optional[str] = None,
+    endpoint: Optional[str] = None,
+) -> str:
+    if subfolder == "":
+        subfolder = None
+    if subfolder is not None:
+        filename = f"{subfolder}/{filename}"
+
+    if repo_type is None:
+        repo_type = REPO_TYPES[-1]
+    if repo_type not in REPO_TYPES:
+        raise ValueError("Invalid repo type")
+    if revision is None:
+        revision = DEFAULT_REVISION
+
+    if revision == DEFAULT_REVISION:
+        url = BOS_URL_TEMPLATE_WITHOUT_REVISION.format(
+            repo_type=repo_type,
+            repo_id=repo_id,
+            filename=filename,
+        )
+    else:
+        url = BOS_URL_TEMPLATE.format(
+            repo_type=repo_type,
+            repo_id=repo_id,
+            revision=quote(revision, safe=""),
+            filename=filename,
+        )
+    # Update endpoint if provided
+    if endpoint is not None and url.startswith(ENDPOINT):
+        url = endpoint + url[len(ENDPOINT) :]
+    return url
+
+
+def bos_download(
+    repo_id: str = None,
+    filename: str = None,
+    subfolder: Optional[str] = None,
+    repo_type: Optional[str] = None,
+    revision: Optional[str] = None,
+    library_name: Optional[str] = None,
+    library_version: Optional[str] = None,
+    cache_dir: Union[str, Path, None] = None,
+    local_dir: Union[str, Path, None] = None,
+    local_dir_use_symlinks: Union[bool, Literal["auto"]] = "auto",
+    # TODO
+    user_agent: Union[Dict, str, None] = None,
+    force_download: bool = False,
+    proxies: Optional[Dict] = None,
+    etag_timeout: float = DEFAULT_ETAG_TIMEOUT,
+    resume_download: bool = False,
+    token: Optional[str] = None,
+    local_files_only: bool = False,
+    endpoint: Optional[str] = None,
+    url: Optional[str] = None,
+    **kwargs,
+):
+    if url is not None:
+        assert url.startswith(ENDPOINT) or url.startswith(
+            ENDPOINT_v2
+        ), f"URL must start with {ENDPOINT} or {ENDPOINT_v2}"
+        if repo_id is None:
+            if url.startswith(ENDPOINT):
+                repo_id = "/".join(url[len(ENDPOINT) + 1 :].split("/")[:-1])
+            else:
+                repo_id = "/".join(url[len(ENDPOINT_v2) + 1 :].split("/")[:-1])
+        if filename is None:
+            filename = url.split("/")[-1]
+        subfolder = None
+
+    if cache_dir is None:
+        cache_dir = BOS_CACHE
+    if revision is None:
+        revision = DEFAULT_REVISION
+    if isinstance(cache_dir, Path):
+        cache_dir = str(cache_dir)
+    if isinstance(local_dir, Path):
+        local_dir = str(local_dir)
+    locks_dir = os.path.join(cache_dir, ".locks")
+
+    if subfolder == "":
+        subfolder = None
+    if subfolder is not None:
+        # This is used to create a URL, and not a local path, hence the forward slash.
+        filename = f"{subfolder}/{filename}"
+
+    if repo_type is None:
+        repo_type = REPO_TYPES[-1]
+    if repo_type not in REPO_TYPES:
+        raise ValueError(f"Invalid repo type: {repo_type}. Accepted repo types are: {str(REPO_TYPES)}")
+
+    storage_folder = os.path.join(cache_dir, repo_folder_name(repo_id=repo_id, repo_type=repo_type))
+    os.makedirs(storage_folder, exist_ok=True)
+
+    # cross platform transcription of filename, to be used as a local file path.
+    relative_filename = os.path.join(*filename.split("/"))
+    if os.name == "nt":
+        if relative_filename.startswith("..\\") or "\\..\\" in relative_filename:
+            raise ValueError(
+                f"Invalid filename: cannot handle filename '{relative_filename}' on Windows. Please ask the repository"
+                " owner to rename this file."
+            )
+
+    # if user provides a commit_hash and they already have the file on disk,
+    # shortcut everything.
+    # TODO, 当前不支持commit id下载，因此这个肯定跑的。
+    if not force_download:  # REGEX_COMMIT_HASH.match(revision)
+        pointer_path = _get_pointer_path(storage_folder, revision, relative_filename)
+        if os.path.exists(pointer_path):
+            if local_dir is not None:
+                return _to_local_dir(pointer_path, local_dir, relative_filename, use_symlinks=local_dir_use_symlinks)
+            return pointer_path
+
+    if url is None:
+        url = bos_url(repo_id, filename, repo_type=repo_type, revision=revision, endpoint=endpoint)
+    headers = None
+    url_to_download = url
+
+    etag = None
+    commit_hash = None
+    expected_size = None
+    head_call_error: Optional[Exception] = None
+    if not local_files_only:
+        try:
+            try:
+                metadata = get_bos_file_metadata(
+                    url=url,
+                    token=token,
+                    proxies=proxies,
+                    timeout=etag_timeout,
+                    library_name=library_name,
+                    library_version=library_version,
+                    user_agent=user_agent,
+                )
+            except EntryNotFoundError as http_error:  # noqa: F841
+                raise
+            # Commit hash must exist
+            # TODO，这里修改了commit hash，强迫为revision了。
+            commit_hash = revision  # metadata.commit_hash
+            if commit_hash is None:
+                raise FileMetadataError(
+                    "Distant resource does not seem to be on aistudio hub. It is possible that a configuration issue"
+                    " prevents you from downloading resources from aistudio hub. Please check your firewall"
+                    " and proxy settings and make sure your SSL certificates are updated."
+                )
+
+            # Etag must exist
+            etag = metadata.etag
+            # We favor a custom header indicating the etag of the linked resource, and
+            # we fallback to the regular etag header.
+            # If we don't have any of those, raise an error.
+            if etag is None:
+                raise FileMetadataError(
+                    "Distant resource does not have an ETag, we won't be able to reliably ensure reproducibility."
+                )
+
+            # Expected (uncompressed) size
+            expected_size = metadata.size
+
+        except (requests.exceptions.SSLError, requests.exceptions.ProxyError):
+            # Actually raise for those subclasses of ConnectionError
+            raise
+        except (
+            requests.exceptions.ConnectionError,
+            requests.exceptions.Timeout,
+            OfflineModeIsEnabled,
+        ) as error:
+            # Otherwise, our Internet connection is down.
+            # etag is None
+            head_call_error = error
+            pass
+        except (RevisionNotFoundError, EntryNotFoundError):
+            # The repo was found but the revision or entry doesn't exist on the Hub (never existed or got deleted)
+            raise
+        except requests.HTTPError as error:
+            # Multiple reasons for an http error:
+            # - Repository is private and invalid/missing token sent
+            # - Repository is gated and invalid/missing token sent
+            # - Hub is down (error 500 or 504)
+            # => let's switch to 'local_files_only=True' to check if the files are already cached.
+            #    (if it's not the case, the error will be re-raised)
+            head_call_error = error
+            pass
+        except FileMetadataError as error:
+            # Multiple reasons for a FileMetadataError:
+            # - Wrong network configuration (proxy, firewall, SSL certificates)
+            # - Inconsistency on the Hub
+            # => let's switch to 'local_files_only=True' to check if the files are already cached.
+            #    (if it's not the case, the error will be re-raised)
+            head_call_error = error
+            pass
+
+    # etag can be None for several reasons:
+    # 1. we passed local_files_only.
+    # 2. we don't have a connection
+    # 3. Hub is down (HTTP 500 or 504)
+    # 4. repo is not found -for example private or gated- and invalid/missing token sent
+    # 5. Hub is blocked by a firewall or proxy is not set correctly.
+    # => Try to get the last downloaded one from the specified revision.
+    #
+    # If the specified revision is a commit hash, look inside "snapshots".
+    # If the specified revision is a branch or tag, look inside "refs".
+    if etag is None:
+        # In those cases, we cannot force download.
+        if force_download:
+            raise ValueError(
+                "We have no connection or you passed local_files_only, so force_download is not an accepted option."
+            )
+
+        # Try to get "commit_hash" from "revision"
+        commit_hash = None
+        if REGEX_COMMIT_HASH.match(revision):
+            commit_hash = revision
+        else:
+            ref_path = os.path.join(storage_folder, "refs", revision)
+            if os.path.isfile(ref_path):
+                with open(ref_path) as f:
+                    commit_hash = f.read()
+
+        # Return pointer file if exists
+        if commit_hash is not None:
+            pointer_path = _get_pointer_path(storage_folder, commit_hash, relative_filename)
+            if os.path.exists(pointer_path):
+                if local_dir is not None:
+                    return _to_local_dir(
+                        pointer_path, local_dir, relative_filename, use_symlinks=local_dir_use_symlinks
+                    )
+                return pointer_path
+
+        # If we couldn't find an appropriate file on disk, raise an error.
+        # If files cannot be found and local_files_only=True,
+        # the models might've been found if local_files_only=False
+        # Notify the user about that
+        if local_files_only:
+            raise LocalEntryNotFoundError(
+                "Cannot find the requested files in the disk cache and outgoing traffic has been disabled. To enable"
+                " BOS look-ups and downloads online, set 'local_files_only' to False."
+            )
+        elif isinstance(head_call_error, RepositoryNotFoundError) or isinstance(head_call_error, GatedRepoError):
+            # Repo not found => let's raise the actual error
+            raise head_call_error
+        else:
+            # Otherwise: most likely a connection issue or Hub downtime => let's warn the user
+            raise LocalEntryNotFoundError(
+                "An error happened while trying to locate the file on the Hub and we cannot find the requested files"
+                " in the local cache. Please check your connection and try again or make sure your Internet connection"
+                " is on."
+            ) from head_call_error
+
+    # From now on, etag and commit_hash are not None.
+    assert etag is not None, "etag must have been retrieved from server"
+    assert commit_hash is not None, "commit_hash must have been retrieved from server"
+    blob_path = os.path.join(storage_folder, "blobs", etag)
+    pointer_path = _get_pointer_path(storage_folder, commit_hash, relative_filename)
+
+    os.makedirs(os.path.dirname(blob_path), exist_ok=True)
+    os.makedirs(os.path.dirname(pointer_path), exist_ok=True)
+    # if passed revision is not identical to commit_hash
+    # then revision has to be a branch name or tag name.
+    # In that case store a ref.
+    _cache_commit_hash_for_specific_revision(storage_folder, revision, commit_hash)
+
+    if os.path.exists(pointer_path) and not force_download:
+        if local_dir is not None:
+            return _to_local_dir(pointer_path, local_dir, relative_filename, use_symlinks=local_dir_use_symlinks)
+        return pointer_path
+
+    if os.path.exists(blob_path) and not force_download:
+        # we have the blob already, but not the pointer
+        if local_dir is not None:  # to local dir
+            return _to_local_dir(blob_path, local_dir, relative_filename, use_symlinks=local_dir_use_symlinks)
+        else:  # or in snapshot cache
+            _create_symlink(blob_path, pointer_path, new_blob=False)
+            return pointer_path
+
+    # Prevent parallel downloads of the same file with a lock.
+    # etag could be duplicated across repos,
+    lock_path = os.path.join(locks_dir, repo_folder_name(repo_id=repo_id, repo_type=repo_type), f"{etag}.lock")
+
+    # Some Windows versions do not allow for paths longer than 255 characters.
+    # In this case, we must specify it is an extended path by using the "\\?\" prefix.
+    if os.name == "nt" and len(os.path.abspath(lock_path)) > 255:
+        lock_path = "\\\\?\\" + os.path.abspath(lock_path)
+
+    if os.name == "nt" and len(os.path.abspath(blob_path)) > 255:
+        blob_path = "\\\\?\\" + os.path.abspath(blob_path)
+
+    Path(lock_path).parent.mkdir(parents=True, exist_ok=True)
+    with FileLock(lock_path):
+        # If the download just completed while the lock was activated.
+        if os.path.exists(pointer_path) and not force_download:
+            # Even if returning early like here, the lock will be released.
+            if local_dir is not None:
+                return _to_local_dir(pointer_path, local_dir, relative_filename, use_symlinks=local_dir_use_symlinks)
+            return pointer_path
+
+        if resume_download:
+            incomplete_path = blob_path + ".incomplete"
+
+            @contextmanager
+            def _resumable_file_manager() -> Generator[io.BufferedWriter, None, None]:
+                with open(incomplete_path, "ab") as f:
+                    yield f
+
+            temp_file_manager = _resumable_file_manager
+            if os.path.exists(incomplete_path):
+                resume_size = os.stat(incomplete_path).st_size
+            else:
+                resume_size = 0
+        else:
+            temp_file_manager = partial(  # type: ignore
+                tempfile.NamedTemporaryFile, mode="wb", dir=cache_dir, delete=False
+            )
+            resume_size = 0
+
+        # Download to temporary file, then copy to cache dir once finished.
+        # Otherwise you get corrupt cache entries if the download gets interrupted.
+        with temp_file_manager() as temp_file:
+            logger.info("downloading %s to %s", url, temp_file.name)
+
+            if expected_size is not None:  # might be None if HTTP header not set correctly
+                # Check tmp path
+                _check_disk_space(expected_size, os.path.dirname(temp_file.name))
+
+                # Check destination
+                _check_disk_space(expected_size, os.path.dirname(blob_path))
+                if local_dir is not None:
+                    _check_disk_space(expected_size, local_dir)
+
+            http_get(
+                url_to_download,
+                temp_file,
+                proxies=proxies,
+                resume_size=resume_size,
+                headers=headers,
+                expected_size=expected_size,
+            )
+        if local_dir is None:
+            logger.debug(f"Storing {url} in cache at {blob_path}")
+            _chmod_and_replace(temp_file.name, blob_path)
+            _create_symlink(blob_path, pointer_path, new_blob=True)
+        else:
+            local_dir_filepath = os.path.join(local_dir, relative_filename)
+            os.makedirs(os.path.dirname(local_dir_filepath), exist_ok=True)
+
+            # If "auto" (default) copy-paste small files to ease manual editing but symlink big files to save disk
+            # In both cases, blob file is cached.
+            is_big_file = os.stat(temp_file.name).st_size > DEFAULT_LOCAL_DIR_AUTO_SYMLINK_THRESHOLD
+            if local_dir_use_symlinks is True or (local_dir_use_symlinks == "auto" and is_big_file):
+                logger.debug(f"Storing {url} in cache at {blob_path}")
+                _chmod_and_replace(temp_file.name, blob_path)
+                logger.debug("Create symlink to local dir")
+                _create_symlink(blob_path, local_dir_filepath, new_blob=False)
+            elif local_dir_use_symlinks == "auto" and not is_big_file:
+                logger.debug(f"Storing {url} in cache at {blob_path}")
+                _chmod_and_replace(temp_file.name, blob_path)
+                logger.debug("Duplicate in local dir (small file and use_symlink set to 'auto')")
+                shutil.copyfile(blob_path, local_dir_filepath)
+            else:
+                logger.debug(f"Storing {url} in local_dir at {local_dir_filepath} (not cached).")
+                _chmod_and_replace(temp_file.name, local_dir_filepath)
+            pointer_path = local_dir_filepath  # for return value
+
+    return pointer_path
+
+
+def bos_file_exists(
+    repo_id: str,
+    filename: str,
+    *,
+    repo_type: Optional[str] = None,
+    revision: Optional[str] = None,
+    token: Optional[str] = None,
+    endpoint: Optional[str] = None,
+) -> bool:
+    """
+    Checks if a file exists in a repository on the Aistudio Hub.
+
+    Args:
+        repo_id (`str`):
+            A namespace (user or an organization) and a repo name separated
+            by a `/`.
+        filename (`str`):
+            The name of the file to check, for example:
+            `"config.json"`
+        repo_type (`str`, *optional*):
+            Set to `"dataset"` or `"space"` if getting repository info from a dataset or a space,
+            `None` or `"model"` if getting repository info from a model. Default is `None`.
+        revision (`str`, *optional*):
+            The revision of the repository from which to get the information. Defaults to `"main"` branch.
+        token (`bool` or `str`, *optional*):
+            A valid authentication token (see https://huggingface.co/settings/token).
+            If `None` or `True` and machine is logged in (through `huggingface-cli login`
+            or [`~login`]), token will be retrieved from the cache.
+            If `False`, token is not sent in the request header.
+
+    Returns:
+        True if the file exists, False otherwise.
+
+    <Tip>
+
+    Examples:
+        ```py
+        >>> from huggingface_hub import file_exists
+        >>> file_exists("bigcode/starcoder", "config.json")
+        True
+        >>> file_exists("bigcode/starcoder", "not-a-file")
+        False
+        >>> file_exists("bigcode/not-a-repo", "config.json")
+        False
+        ```
+
+    </Tip>
+    """
+    url = bos_url(repo_id=repo_id, repo_type=repo_type, revision=revision, filename=filename, endpoint=endpoint)
+    try:
+        get_bos_file_metadata(url, token=token)
+        return True
+    except GatedRepoError:  # raise specifically on gated repo
+        raise
+    except (RepositoryNotFoundError, EntryNotFoundError, RevisionNotFoundError, HfHubHTTPError):
+        return False
+
+
+def bos_try_to_load_from_cache(
+    repo_id: str,
+    filename: str,
+    cache_dir: Union[str, Path, None] = None,
+    revision: Optional[str] = None,
+    repo_type: Optional[str] = None,
+):
+    if revision is None:
+        revision = DEFAULT_REVISION
+    if repo_type is None:
+        repo_type = REPO_TYPES[-1]
+    if repo_type not in REPO_TYPES:
+        raise ValueError(f"Invalid repo type: {repo_type}. Accepted repo types are: {str(REPO_TYPES)}")
+    if cache_dir is None:
+        cache_dir = BOS_CACHE
+
+    object_id = repo_id.replace("/", "--")
+    repo_cache = os.path.join(cache_dir, f"{repo_type}--{object_id}")
+    if not os.path.isdir(repo_cache):
+        # No cache for this model
+        return None
+
+    refs_dir = os.path.join(repo_cache, "refs")
+    snapshots_dir = os.path.join(repo_cache, "snapshots")
+    no_exist_dir = os.path.join(repo_cache, ".no_exist")
+
+    # Resolve refs (for instance to convert main to the associated commit sha)
+    if os.path.isdir(refs_dir):
+        revision_file = os.path.join(refs_dir, revision)
+        if os.path.isfile(revision_file):
+            with open(revision_file) as f:
+                revision = f.read()
+
+    # Check if file is cached as "no_exist"
+    if os.path.isfile(os.path.join(no_exist_dir, revision, filename)):
+        return _CACHED_NO_EXIST
+
+    # Check if revision folder exists
+    if not os.path.exists(snapshots_dir):
+        return None
+    cached_shas = os.listdir(snapshots_dir)
+    if revision not in cached_shas:
+        # No cache for this revision and we won't try to return a random revision
+        return None
+
+    # Check if file exists in cache
+    cached_file = os.path.join(snapshots_dir, revision, filename)
+    return cached_file if os.path.isfile(cached_file) else None
diff --git a/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/utils/downloader/common.py b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/utils/downloader/common.py
new file mode 100644
index 0000000000000000000000000000000000000000..faaddf5c5ed272a807fceca665a29ff216c5ae63
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/utils/downloader/common.py
@@ -0,0 +1,662 @@
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import copy
+import logging
+import os
+import re
+import shutil
+import stat
+import tempfile
+import threading
+import time
+import uuid
+import warnings
+from contextlib import contextmanager
+from dataclasses import dataclass
+from functools import lru_cache
+from pathlib import Path
+from typing import BinaryIO, Callable, Dict, Generator, Literal, Optional, Union
+from urllib.parse import urlparse
+
+import requests
+from huggingface_hub.utils import (
+    BadRequestError,
+    EntryNotFoundError,
+    HfHubHTTPError,
+    tqdm,
+)
+from requests import HTTPError, Response
+from requests.adapters import HTTPAdapter
+from requests.models import PreparedRequest
+
+logger = logging.getLogger(__name__)
+
+ENV_VARS_TRUE_VALUES = {"1", "ON", "YES", "TRUE"}
+
+
+def _is_true(value: Optional[str]) -> bool:
+    if value is None:
+        return False
+    return value.upper() in ENV_VARS_TRUE_VALUES
+
+
+def _as_int(value: Optional[str]) -> Optional[int]:
+    if value is None:
+        return None
+    return int(value)
+
+
+DISABLE_SYMLINKS_WARNING = False
+# Regex to get filename from a "Content-Disposition" header for CDN-served files
+HEADER_FILENAME_PATTERN = re.compile(r'filename="(?P<filename>.*?)"')
+DOWNLOAD_CHUNK_SIZE = 10 * 1024 * 1024
+REPO_ID_SEPARATOR = "--"
+
+DEFAULT_DOWNLOAD_TIMEOUT = 10
+DEFAULT_REQUEST_TIMEOUT = 10
+DEFAULT_ETAG_TIMEOUT = 10
+DEFAULT_LOCAL_DIR_AUTO_SYMLINK_THRESHOLD: int = 5 * 1024 * 1024
+
+OFFLINE = _is_true(os.environ.get("AISTUDIO_BOS_OFFLINE"))
+_CACHED_NO_EXIST = object()
+
+
+def _cache_commit_hash_for_specific_revision(storage_folder: str, revision: str, commit_hash: str) -> None:
+    """Cache reference between a revision (tag, branch or truncated commit hash) and the corresponding commit hash.
+
+    Does nothing if `revision` is already a proper `commit_hash` or reference is already cached.
+    """
+    # if revision != commit_hash:
+    ref_path = Path(storage_folder) / "refs" / revision
+    ref_path.parent.mkdir(parents=True, exist_ok=True)
+    if not ref_path.exists() or commit_hash != ref_path.read_text():
+        # Update ref only if has been updated. Could cause useless error in case
+        # repo is already cached and user doesn't have write access to cache folder.
+        # See https://github.com/huggingface/huggingface_hub/issues/1216.
+        ref_path.write_text(commit_hash)
+
+
+def _check_disk_space(expected_size: int, target_dir: Union[str, Path]) -> None:
+    """Check disk usage and log a warning if there is not enough disk space to download the file.
+
+    Args:
+        expected_size (`int`):
+            The expected size of the file in bytes.
+        target_dir (`str`):
+            The directory where the file will be stored after downloading.
+    """
+
+    target_dir = Path(target_dir)  # format as `Path`
+    for path in [target_dir] + list(target_dir.parents):  # first check target_dir, then each parents one by one
+        try:
+            target_dir_free = shutil.disk_usage(path).free
+            if target_dir_free < expected_size:
+                warnings.warn(
+                    "Not enough free disk space to download the file. "
+                    f"The expected file size is: {expected_size / 1e6:.2f} MB. "
+                    f"The target location {target_dir} only has {target_dir_free / 1e6:.2f} MB free disk space."
+                )
+            return
+        except OSError:  # raise on anything: file does not exist or space disk cannot be checked
+            pass
+
+
+def http_get(
+    url: str,
+    temp_file: BinaryIO,
+    *,
+    proxies=None,
+    resume_size: float = 0,
+    headers: Optional[Dict[str, str]] = None,
+    expected_size: Optional[int] = None,
+    _nb_retries: int = 5,
+):
+    """
+    Download a remote file. Do not gobble up errors, and will return errors tailored to the Hugging Face Hub.
+
+    If ConnectionError (SSLError) or ReadTimeout happen while streaming data from the server, it is most likely a
+    transient error (network outage?). We log a warning message and try to resume the download a few times before
+    giving up. The method gives up after 5 attempts if no new data has being received from the server.
+    """
+    initial_headers = headers
+    headers = copy.deepcopy(headers) or {}
+    if resume_size > 0:
+        headers["Range"] = "bytes=%d-" % (resume_size,)
+
+    r = _request_wrapper(
+        method="GET", url=url, stream=True, proxies=proxies, headers=headers, timeout=DEFAULT_DOWNLOAD_TIMEOUT
+    )
+    raise_for_status(r)
+    content_length = r.headers.get("Content-Length")
+
+    # NOTE: 'total' is the total number of bytes to download, not the number of bytes in the file.
+    #       If the file is compressed, the number of bytes in the saved file will be higher than 'total'.
+    total = resume_size + int(content_length) if content_length is not None else None
+
+    displayed_name = url
+    content_disposition = r.headers.get("Content-Disposition")
+    if content_disposition is not None:
+        match = HEADER_FILENAME_PATTERN.search(content_disposition)
+        if match is not None:
+            # Means file is on CDN
+            displayed_name = match.groupdict()["filename"]
+
+    # Truncate filename if too long to display
+    if len(displayed_name) > 40:
+        displayed_name = f"(…){displayed_name[-40:]}"
+
+    consistency_error_message = (
+        f"Consistency check failed: file should be of size {expected_size} but has size"
+        f" {{actual_size}} ({displayed_name}).\nWe are sorry for the inconvenience. Please retry download and"
+        " pass `force_download=True, resume_download=False` as argument.\nIf the issue persists, please let us"
+        " know by opening an issue on https://github.com/huggingface/huggingface_hub."
+    )
+
+    # Stream file to buffer
+    with tqdm(
+        unit="B",
+        unit_scale=True,
+        total=total,
+        initial=resume_size,
+        desc=displayed_name,
+        disable=bool(logger.getEffectiveLevel() == logging.NOTSET),
+    ) as progress:
+        new_resume_size = resume_size
+        try:
+            for chunk in r.iter_content(chunk_size=DOWNLOAD_CHUNK_SIZE):
+                if chunk:  # filter out keep-alive new chunks
+                    progress.update(len(chunk))
+                    temp_file.write(chunk)
+                    new_resume_size += len(chunk)
+                    # Some data has been downloaded from the server so we reset the number of retries.
+                    _nb_retries = 5
+        except (requests.ConnectionError, requests.ReadTimeout) as e:
+            # If ConnectionError (SSLError) or ReadTimeout happen while streaming data from the server, it is most likely
+            # a transient error (network outage?). We log a warning message and try to resume the download a few times
+            # before giving up. Tre retry mechanism is basic but should be enough in most cases.
+            if _nb_retries <= 0:
+                logger.warning("Error while downloading from %s: %s\nMax retries exceeded.", url, str(e))
+                raise
+            logger.warning("Error while downloading from %s: %s\nTrying to resume download...", url, str(e))
+            time.sleep(1)
+            reset_sessions()  # In case of SSLError it's best to reset the shared requests.Session objects
+            return http_get(
+                url=url,
+                temp_file=temp_file,
+                proxies=proxies,
+                resume_size=new_resume_size,
+                headers=initial_headers,
+                expected_size=expected_size,
+                _nb_retries=_nb_retries - 1,
+            )
+
+        if expected_size is not None and expected_size != temp_file.tell():
+            raise EnvironmentError(
+                consistency_error_message.format(
+                    actual_size=temp_file.tell(),
+                )
+            )
+
+
+def _chmod_and_replace(src: str, dst: str) -> None:
+    """Set correct permission before moving a blob from tmp directory to cache dir.
+
+    Do not take into account the `umask` from the process as there is no convenient way
+    to get it that is thread-safe.
+
+    See:
+    - About umask: https://docs.python.org/3/library/os.html#os.umask
+    - Thread-safety: https://stackoverflow.com/a/70343066
+    - About solution: https://github.com/huggingface/huggingface_hub/pull/1220#issuecomment-1326211591
+    - Fix issue: https://github.com/huggingface/huggingface_hub/issues/1141
+    - Fix issue: https://github.com/huggingface/huggingface_hub/issues/1215
+    """
+    # Get umask by creating a temporary file in the cached repo folder.
+    tmp_file = Path(dst).parent.parent / f"tmp_{uuid.uuid4()}"
+    try:
+        tmp_file.touch()
+        cache_dir_mode = Path(tmp_file).stat().st_mode
+        os.chmod(src, stat.S_IMODE(cache_dir_mode))
+    finally:
+        tmp_file.unlink()
+
+    shutil.move(src, dst)
+
+
+def repo_folder_name(*, repo_id: str, repo_type: str) -> str:
+    """Return a serialized version of a aistudio repo name and type, safe for disk storage
+    as a single non-nested folder.
+
+    Example: models--julien-c--EsperBERTo-small
+    """
+    # remove all `/` occurrences to correctly convert repo to directory name
+    parts = [f"{repo_type}s", *repo_id.split("/")]
+    return REPO_ID_SEPARATOR.join(parts)
+
+
+class OfflineModeIsEnabled(ConnectionError):
+    """Raised when a request is made but `AISTUDIO_HUB_OFFLINE=1` is set as environment variable."""
+
+
+class OfflineAdapter(HTTPAdapter):
+    def send(self, request: PreparedRequest, *args, **kwargs) -> Response:
+        raise OfflineModeIsEnabled(
+            f"Cannot reach {request.url}: offline mode is enabled. To disable it, please unset the `AISTUDIO_HUB_OFFLINE` environment variable."
+        )
+
+
+BACKEND_FACTORY_T = Callable[[], requests.Session]
+
+
+def _default_backend_factory() -> requests.Session:
+    session = requests.Session()
+    if OFFLINE:
+        session.mount("http://", OfflineAdapter())
+        session.mount("https://", OfflineAdapter())
+
+    return session
+
+
+_GLOBAL_BACKEND_FACTORY: BACKEND_FACTORY_T = _default_backend_factory
+HTTP_METHOD_T = Literal["GET", "OPTIONS", "HEAD", "POST", "PUT", "PATCH", "DELETE"]
+
+
+@lru_cache
+def _get_session_from_cache(process_id: int, thread_id: int) -> requests.Session:
+    """
+    Create a new session per thread using global factory. Using LRU cache (maxsize 128) to avoid memory leaks when
+    using thousands of threads. Cache is cleared when `configure_http_backend` is called.
+    """
+    return _GLOBAL_BACKEND_FACTORY()
+
+
+def reset_sessions() -> None:
+    """Reset the cache of sessions.
+
+    Mostly used internally when sessions are reconfigured or an SSLError is raised.
+    See [`configure_http_backend`] for more details.
+    """
+    _get_session_from_cache.cache_clear()
+
+
+def get_session() -> requests.Session:
+    """
+    Get a `requests.Session` object, using the session factory from the user.
+
+    Use [`get_session`] to get a configured Session. Since `requests.Session` is not guaranteed to be thread-safe,
+    `huggingface_hub` creates 1 Session instance per thread. They are all instantiated using the same `backend_factory`
+    set in [`configure_http_backend`]. A LRU cache is used to cache the created sessions (and connections) between
+    calls. Max size is 128 to avoid memory leaks if thousands of threads are spawned.
+
+    See [this issue](https://github.com/psf/requests/issues/2766) to know more about thread-safety in `requests`.
+
+    Example:
+    ```py
+    import requests
+    from huggingface_hub import configure_http_backend, get_session
+
+    # Create a factory function that returns a Session with configured proxies
+    def backend_factory() -> requests.Session:
+        session = requests.Session()
+        session.proxies = {"http": "http://10.10.1.10:3128", "https": "https://10.10.1.11:1080"}
+        return session
+
+    # Set it as the default session factory
+    configure_http_backend(backend_factory=backend_factory)
+
+    # In practice, this is mostly done internally in `huggingface_hub`
+    session = get_session()
+    ```
+    """
+    return _get_session_from_cache(process_id=os.getpid(), thread_id=threading.get_ident())
+
+
+def _request_wrapper(
+    method: HTTP_METHOD_T, url: str, *, follow_relative_redirects: bool = False, **params
+) -> requests.Response:
+    """Wrapper around requests methods to follow relative redirects if `follow_relative_redirects=True` even when
+    `allow_redirection=False`.
+
+    Args:
+        method (`str`):
+            HTTP method, such as 'GET' or 'HEAD'.
+        url (`str`):
+            The URL of the resource to fetch.
+        follow_relative_redirects (`bool`, *optional*, defaults to `False`)
+            If True, relative redirection (redirection to the same site) will be resolved even when `allow_redirection`
+            kwarg is set to False. Useful when we want to follow a redirection to a renamed repository without
+            following redirection to a CDN.
+        **params (`dict`, *optional*):
+            Params to pass to `requests.request`.
+    """
+    # Recursively follow relative redirects
+    if follow_relative_redirects:
+        response = _request_wrapper(
+            method=method,
+            url=url,
+            follow_relative_redirects=False,
+            **params,
+        )
+
+        # If redirection, we redirect only relative paths.
+        # This is useful in case of a renamed repository.
+        if 300 <= response.status_code <= 399:
+            parsed_target = urlparse(response.headers["Location"])
+            if parsed_target.netloc == "":
+                # This means it is a relative 'location' headers, as allowed by RFC 7231.
+                # (e.g. '/path/to/resource' instead of 'http://domain.tld/path/to/resource')
+                # We want to follow this relative redirect !
+                #
+                # Highly inspired by `resolve_redirects` from requests library.
+                # See https://github.com/psf/requests/blob/main/requests/sessions.py#L159
+                next_url = urlparse(url)._replace(path=parsed_target.path).geturl()
+                return _request_wrapper(method=method, url=next_url, follow_relative_redirects=True, **params)
+        return response
+    # Perform request and return if status_code is not in the retry list.
+    response = get_session().request(method=method, url=url, **params)
+    raise_for_status(response)
+    return response
+
+
+def _get_pointer_path(storage_folder: str, revision: str, relative_filename: str) -> str:
+    # Using `os.path.abspath` instead of `Path.resolve()` to avoid resolving symlinks
+    snapshot_path = os.path.join(storage_folder, "snapshots")
+    pointer_path = os.path.join(snapshot_path, revision, relative_filename)
+    if Path(os.path.abspath(snapshot_path)) not in Path(os.path.abspath(pointer_path)).parents:
+        raise ValueError(
+            "Invalid pointer path: cannot create pointer path in snapshot folder if"
+            f" `storage_folder='{storage_folder}'`, `revision='{revision}'` and"
+            f" `relative_filename='{relative_filename}'`."
+        )
+    return pointer_path
+
+
+def _create_symlink(src: str, dst: str, new_blob: bool = False) -> None:
+    """Create a symbolic link named dst pointing to src.
+
+    By default, it will try to create a symlink using a relative path. Relative paths have 2 advantages:
+    - If the cache_folder is moved (example: back-up on a shared drive), relative paths within the cache folder will
+      not brake.
+    - Relative paths seems to be better handled on Windows. Issue was reported 3 times in less than a week when
+      changing from relative to absolute paths. See https://github.com/huggingface/huggingface_hub/issues/1398,
+      https://github.com/huggingface/diffusers/issues/2729 and https://github.com/huggingface/transformers/pull/22228.
+      NOTE: The issue with absolute paths doesn't happen on admin mode.
+    When creating a symlink from the cache to a local folder, it is possible that a relative path cannot be created.
+    This happens when paths are not on the same volume. In that case, we use absolute paths.
+
+
+    The result layout looks something like
+        └── [ 128]  snapshots
+            ├── [ 128]  2439f60ef33a0d46d85da5001d52aeda5b00ce9f
+            │   ├── [  52]  README.md -> ../../../blobs/d7edf6bd2a681fb0175f7735299831ee1b22b812
+            │   └── [  76]  pytorch_model.bin -> ../../../blobs/403450e234d65943a7dcf7e05a771ce3c92faa84dd07db4ac20f592037a1e4bd
+
+    If symlinks cannot be created on this platform (most likely to be Windows), the workaround is to avoid symlinks by
+    having the actual file in `dst`. If it is a new file (`new_blob=True`), we move it to `dst`. If it is not a new file
+    (`new_blob=False`), we don't know if the blob file is already referenced elsewhere. To avoid breaking existing
+    cache, the file is duplicated on the disk.
+
+    In case symlinks are not supported, a warning message is displayed to the user once when loading `huggingface_hub`.
+    The warning message can be disable with the `DISABLE_SYMLINKS_WARNING` environment variable.
+    """
+    try:
+        os.remove(dst)
+    except OSError:
+        pass
+
+    abs_src = os.path.abspath(os.path.expanduser(src))
+    abs_dst = os.path.abspath(os.path.expanduser(dst))
+    abs_dst_folder = os.path.dirname(abs_dst)
+
+    # Use relative_dst in priority
+    try:
+        relative_src = os.path.relpath(abs_src, abs_dst_folder)
+    except ValueError:
+        # Raised on Windows if src and dst are not on the same volume. This is the case when creating a symlink to a
+        # local_dir instead of within the cache directory.
+        # See https://docs.python.org/3/library/os.path.html#os.path.relpath
+        relative_src = None
+
+    try:
+        commonpath = os.path.commonpath([abs_src, abs_dst])
+        _support_symlinks = are_symlinks_supported(commonpath)
+    except ValueError:
+        # Raised if src and dst are not on the same volume. Symlinks will still work on Linux/Macos.
+        # See https://docs.python.org/3/library/os.path.html#os.path.commonpath
+        _support_symlinks = os.name != "nt"
+    except PermissionError:
+        # Permission error means src and dst are not in the same volume (e.g. destination path has been provided
+        # by the user via `local_dir`. Let's test symlink support there)
+        _support_symlinks = are_symlinks_supported(abs_dst_folder)
+
+    # Symlinks are supported => let's create a symlink.
+    if _support_symlinks:
+        src_rel_or_abs = relative_src or abs_src
+        logger.debug(f"Creating pointer from {src_rel_or_abs} to {abs_dst}")
+        try:
+            os.symlink(src_rel_or_abs, abs_dst)
+            return
+        except FileExistsError:
+            if os.path.islink(abs_dst) and os.path.realpath(abs_dst) == os.path.realpath(abs_src):
+                # `abs_dst` already exists and is a symlink to the `abs_src` blob. It is most likely that the file has
+                # been cached twice concurrently (exactly between `os.remove` and `os.symlink`). Do nothing.
+                return
+            else:
+                # Very unlikely to happen. Means a file `dst` has been created exactly between `os.remove` and
+                # `os.symlink` and is not a symlink to the `abs_src` blob file. Raise exception.
+                raise
+        except PermissionError:
+            # Permission error means src and dst are not in the same volume (e.g. download to local dir) and symlink
+            # is supported on both volumes but not between them. Let's just make a hard copy in that case.
+            pass
+
+    # Symlinks are not supported => let's move or copy the file.
+    if new_blob:
+        logger.info(f"Symlink not supported. Moving file from {abs_src} to {abs_dst}")
+        shutil.move(abs_src, abs_dst)
+    else:
+        logger.info(f"Symlink not supported. Copying file from {abs_src} to {abs_dst}")
+        shutil.copyfile(abs_src, abs_dst)
+
+
+_are_symlinks_supported_in_dir: Dict[str, bool] = {}
+
+
+def _set_write_permission_and_retry(func, path, excinfo):
+    os.chmod(path, stat.S_IWRITE)
+    func(path)
+
+
+@contextmanager
+def SoftTemporaryDirectory(
+    suffix: Optional[str] = None,
+    prefix: Optional[str] = None,
+    dir: Optional[Union[Path, str]] = None,
+    **kwargs,
+) -> Generator[str, None, None]:
+    """
+    Context manager to create a temporary directory and safely delete it.
+
+    If tmp directory cannot be deleted normally, we set the WRITE permission and retry.
+    If cleanup still fails, we give up but don't raise an exception. This is equivalent
+    to  `tempfile.TemporaryDirectory(..., ignore_cleanup_errors=True)` introduced in
+    Python 3.10.
+
+    See https://www.scivision.dev/python-tempfile-permission-error-windows/.
+    """
+    tmpdir = tempfile.TemporaryDirectory(prefix=prefix, suffix=suffix, dir=dir, **kwargs)
+    yield tmpdir.name
+
+    try:
+        # First once with normal cleanup
+        shutil.rmtree(tmpdir.name)
+    except Exception:
+        # If failed, try to set write permission and retry
+        try:
+            shutil.rmtree(tmpdir.name, onerror=_set_write_permission_and_retry)
+        except Exception:
+            pass
+
+    # And finally, cleanup the tmpdir.
+    # If it fails again, give up but do not throw error
+    try:
+        tmpdir.cleanup()
+    except Exception:
+        pass
+
+
+def _to_local_dir(
+    path: str, local_dir: str, relative_filename: str, use_symlinks: Union[bool, Literal["auto"]]
+) -> str:
+    """Place a file in a local dir (different than cache_dir).
+
+    Either symlink to blob file in cache or duplicate file depending on `use_symlinks` and file size.
+    """
+    # Using `os.path.abspath` instead of `Path.resolve()` to avoid resolving symlinks
+    local_dir_filepath = os.path.join(local_dir, relative_filename)
+    if Path(os.path.abspath(local_dir)) not in Path(os.path.abspath(local_dir_filepath)).parents:
+        raise ValueError(
+            f"Cannot copy file '{relative_filename}' to local dir '{local_dir}': file would not be in the local"
+            " directory."
+        )
+
+    os.makedirs(os.path.dirname(local_dir_filepath), exist_ok=True)
+    real_blob_path = os.path.realpath(path)
+
+    # If "auto" (default) copy-paste small files to ease manual editing but symlink big files to save disk
+    if use_symlinks == "auto":
+        use_symlinks = os.stat(real_blob_path).st_size > DEFAULT_LOCAL_DIR_AUTO_SYMLINK_THRESHOLD
+
+    if use_symlinks:
+        _create_symlink(real_blob_path, local_dir_filepath, new_blob=False)
+    else:
+        shutil.copyfile(real_blob_path, local_dir_filepath)
+    return local_dir_filepath
+
+
+def _normalize_etag(etag: Optional[str]) -> Optional[str]:
+    """Normalize ETag HTTP header, so it can be used to create nice filepaths.
+
+    The HTTP spec allows two forms of ETag:
+      ETag: W/"<etag_value>"
+      ETag: "<etag_value>"
+
+    For now, we only expect the second form from the server, but we want to be future-proof so we support both. For
+    more context, see `TestNormalizeEtag` tests and https://github.com/huggingface/huggingface_hub/pull/1428.
+
+    Args:
+        etag (`str`, *optional*): HTTP header
+
+    Returns:
+        `str` or `None`: string that can be used as a nice directory name.
+        Returns `None` if input is None.
+    """
+    if etag is None:
+        return None
+    return etag.lstrip("W/").strip('"')
+
+
+@dataclass(frozen=True)
+class AistudioBosFileMetadata:
+    """Data structure containing information about a file versioned on the Aistudio Hub.
+
+    Returned by [`get_aistudio_file_metadata`] based on a URL.
+
+    Args:
+        commit_hash (`str`, *optional*):
+            The commit_hash related to the file.
+        etag (`str`, *optional*):
+            Etag of the file on the server.
+        location (`str`):
+            Location where to download the file. Can be a Hub url or not (CDN).
+        size (`size`):
+            Size of the file. In case of an LFS file, contains the size of the actual
+            LFS file, not the pointer.
+    """
+
+    commit_hash: Optional[str]
+    etag: Optional[str]
+    location: str
+    size: Optional[int]
+
+
+def raise_for_status(response: Response, endpoint_name: Optional[str] = None) -> None:
+    try:
+        response.raise_for_status()
+    except HTTPError as e:
+        if response.status_code == 404:
+            message = f"{response.status_code} Client Error." + "\n\n" + f"Entry Not Found for url: {response.url}."
+            raise EntryNotFoundError(message, None) from e
+        elif response.status_code == 400:
+            message = (
+                f"\n\nBad request for {endpoint_name} endpoint:" if endpoint_name is not None else "\n\nBad request:"
+            )
+            raise BadRequestError(message, response=None) from e
+        raise HfHubHTTPError(str(e), response=None) from e
+
+
+def are_symlinks_supported(cache_dir: Union[str, Path, None] = None) -> bool:
+    """Return whether the symlinks are supported on the machine.
+
+    Since symlinks support can change depending on the mounted disk, we need to check
+    on the precise cache folder.
+
+    Args:
+        cache_dir (`str`, `Path`, *optional*):
+            Path to the folder where cached files are stored.
+
+    Returns: [bool] Whether symlinks are supported in the directory.
+    """
+    assert cache_dir is not None
+    cache_dir = str(Path(cache_dir).expanduser().resolve())  # make it unique
+
+    # Check symlink compatibility only once (per cache directory) at first time use
+    if cache_dir not in _are_symlinks_supported_in_dir:
+        _are_symlinks_supported_in_dir[cache_dir] = True
+
+        os.makedirs(cache_dir, exist_ok=True)
+        with SoftTemporaryDirectory(dir=cache_dir) as tmpdir:
+            src_path = Path(tmpdir) / "dummy_file_src"
+            src_path.touch()
+            dst_path = Path(tmpdir) / "dummy_file_dst"
+
+            # Relative source path as in `_create_symlink``
+            relative_src = os.path.relpath(src_path, start=os.path.dirname(dst_path))
+            try:
+                os.symlink(relative_src, dst_path)
+            except OSError:
+                # Likely running on Windows
+                _are_symlinks_supported_in_dir[cache_dir] = False
+
+                if not DISABLE_SYMLINKS_WARNING:
+                    message = (
+                        "cache-system uses symlinks by default to"
+                        " efficiently store duplicated files but your machine does not"
+                        f" support them in {cache_dir}. Caching files will still work"
+                        " but in a degraded version that might require more space on"
+                        " your disk. This warning can be disabled by setting the"
+                        " `DISABLE_SYMLINKS_WARNING` environment variable."
+                    )
+                    if os.name == "nt":
+                        message += (
+                            "\nTo support symlinks on Windows, you either need to"
+                            " activate Developer Mode or to run Python as an"
+                            " administrator. In order to see activate developer mode,"
+                            " see this article:"
+                            " https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development"
+                        )
+                    warnings.warn(message)
+
+    return _are_symlinks_supported_in_dir[cache_dir]
diff --git a/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/utils/downloader/hf_hub_download.py b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/utils/downloader/hf_hub_download.py
new file mode 100644
index 0000000000000000000000000000000000000000..bd030852567dd028f4703005cb837100747da80d
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/ppdiffusers/ppdiffusers/utils/downloader/hf_hub_download.py
@@ -0,0 +1,19 @@
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from huggingface_hub import file_exists as hf_hub_file_exists  # noqa: F401
+from huggingface_hub import hf_hub_download  # noqa: F401
+from huggingface_hub import (  # noqa: F401
+    try_to_load_from_cache as hf_hub_try_to_load_from_cache,
+)
diff --git a/VLMEvalKit_old/PaddleMIX/ppdiffusers/requirements.txt b/VLMEvalKit_old/PaddleMIX/ppdiffusers/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3b7fa12a3c248df7f991de3e61eb3fdc2066ac75
--- /dev/null
+++ b/VLMEvalKit_old/PaddleMIX/ppdiffusers/requirements.txt
@@ -0,0 +1,18 @@
+paddlenlp>=3.0.0b2
+safetensors>=0.3.1
+ftfy
+regex
+Pillow
+opencv-python
+av
+# for test
+parameterized
+requests_mock
+omegaconf
+note_seq
+urllib3<=2.0.0
+einops>=0.6.1
+paddlesde
+ligo-segments
+huggingface_hub==0.23.0
+hf_transfer
diff --git a/VLMEvalKit_old/docs/en/_static/image/logo.svg b/VLMEvalKit_old/docs/en/_static/image/logo.svg
new file mode 100644
index 0000000000000000000000000000000000000000..043530572afb48d0eac26b4b53d448aae6e9a9af
--- /dev/null
+++ b/VLMEvalKit_old/docs/en/_static/image/logo.svg
@@ -0,0 +1,24 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no" ?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px" version="1.1" width="210" height="36" viewBox="0 0 1920 480" xml:space="preserve">
+<desc>Created with Fabric.js 5.3.0</desc>
+<defs>
+</defs>
+<g transform="matrix(8.6096948697 0 0 8.6096948697 283.4607944077 326.096948697)" id="2rbaYufxVtcDPCyBcyhLL"  >
+<path style="stroke: none; stroke-width: 1; stroke-dasharray: none; stroke-linecap: butt; stroke-dashoffset: 0; stroke-linejoin: miter; stroke-miterlimit: 4; fill: rgb(88,120,180); fill-rule: nonzero; opacity: 1;"  transform=" translate(-19.1, -28)" d="M 16.5 22.6 L 10.1 25.700000000000003 L 15.399999999999999 25.500000000000004 L 16.5 22.6 z M 12.3 33.6 L 13.4 30.700000000000003 L 8.100000000000001 30.900000000000002 L 12.3 33.6 z M 21.6 33.3 L 28 30.199999999999996 L 22.7 30.399999999999995 L 21.6 33.3 z M 25.8 22.4 L 24.7 25.299999999999997 L 30 25.099999999999998 L 25.8 22.4 z M 31.5 26.2 L 24.4 26.4 L 22.7 25.299999999999997 L 24.2 21.299999999999997 L 22.2 20 L 19 21.5 L 17.5 25.4 L 14.8 26.7 L 7.700000000000001 26.9 L 4.500000000000001 28.4 L 6.600000000000001 29.799999999999997 L 13.700000000000001 29.599999999999998 L 13.700000000000001 29.599999999999998 L 15.4 30.7 L 13.9 34.7 L 16 36 L 19.2 34.5 L 20.7 30.6 L 20.7 30.6 L 23.3 29.400000000000002 L 23.3 29.400000000000002 L 30.5 29.200000000000003 L 33.7 27.700000000000003 L 31.5 26.2 z M 20.2 28.7 C 19.2 29.2 17.9 29.2 17.2 28.8 C 16.599999999999998 28.400000000000002 16.8 27.6 17.8 27.2 C 18.8 26.7 20.1 26.7 20.8 27.099999999999998 C 21.5 27.5 21.2 28.2 20.2 28.7 z" stroke-linecap="round" />
+</g>
+<g transform="matrix(8.6096948697 0 0 8.6096948697 351.9078686218 195.2295866776)" id="Gj0NH9qgtM_hm6hdZ_Tf1"  >
+<path style="stroke: none; stroke-width: 1; stroke-dasharray: none; stroke-linecap: butt; stroke-dashoffset: 0; stroke-linejoin: miter; stroke-miterlimit: 4; fill: rgb(54,86,155); fill-rule: nonzero; opacity: 1;"  transform=" translate(-27.05, -12.8)" d="M 33.5 19.8 L 32.2 13.3 L 30.700000000000003 15.200000000000001 L 33.5 19.8 z M 27.5 5.1 L 23.3 2.3999999999999995 L 26 7 L 27.5 5.1 z M 20.7 5.7 L 22 12.2 L 23.5 10.299999999999999 L 20.7 5.7 z M 26.8 20.4 L 31 23.099999999999998 L 28.3 18.5 L 26.8 20.4 z M 34 22.3 L 30.4 16.1 L 30.4 16.1 L 29.9 13.400000000000002 L 31.9 10.800000000000002 L 31.299999999999997 7.600000000000002 L 29.199999999999996 6.200000000000003 L 27.199999999999996 8.800000000000002 L 25.499999999999996 7.700000000000003 L 21.799999999999997 1.400000000000003 L 19.6 0 L 20.200000000000003 3.2 L 23.900000000000002 9.5 L 23.900000000000002 9.5 L 24.400000000000002 12.1 L 24.400000000000002 12.1 L 22.400000000000002 14.7 L 23.000000000000004 17.9 L 25.100000000000005 19.299999999999997 L 27.000000000000004 16.799999999999997 L 28.700000000000003 17.9 L 32.400000000000006 24.2 L 34.50000000000001 25.599999999999998 L 34 22.3 z M 27.5 14.6 C 26.9 14.2 26.2 13 26 12 C 25.8 11 26.2 10.5 26.8 10.9 C 27.400000000000002 11.3 28.1 12.5 28.3 13.5 C 28.5 14.6 28.1 15.1 27.5 14.6 z" stroke-linecap="round" />
+</g>
+<g transform="matrix(8.6096948697 0 0 8.6096948697 209.8479032717 200.3954035994)" id="lmxdEhRpowSXVKC1LScVi"  >
+<path style="stroke: none; stroke-width: 1; stroke-dasharray: none; stroke-linecap: butt; stroke-dashoffset: 0; stroke-linejoin: miter; stroke-miterlimit: 4; fill: rgb(27,56,130); fill-rule: nonzero; opacity: 1;"  transform=" translate(-10.55, -13.4)" d="M 12 2.8 L 5.6 5.9 L 9.399999999999999 7.6000000000000005 L 12 2.8 z M 1.1 14.4 L 2.4000000000000004 20.9 L 5 16.099999999999998 L 1.1 14.4 z M 9.1 24 L 15.5 20.9 L 11.7 19.2 L 9.1 24 z M 20 12.4 L 18.7 5.9 L 16.099999999999998 10.7 L 20 12.4 z M 20.4 14.9 L 15.299999999999999 12.600000000000001 L 15.299999999999999 12.600000000000001 L 14.799999999999999 9.900000000000002 L 18.299999999999997 3.400000000000002 L 17.699999999999996 0.20000000000000195 L 14.499999999999996 1.700000000000002 L 11 8.1 L 8.3 9.4 L 8.3 9.4 L 3.2 7.1 L 0 8.6 L 0.6 11.8 L 5.8 14.100000000000001 L 6.3 16.8 L 6.3 16.8 L 2.8 23.4 L 3.4 26.599999999999998 L 6.6 25.099999999999998 L 10.1 18.599999999999998 L 12.7 17.4 L 12.7 17.4 L 17.9 19.799999999999997 L 21.099999999999998 18.299999999999997 L 20.4 14.9 z M 10.9 15.2 C 9.9 15.7 9 15.2 8.8 14.2 C 8.600000000000001 13.2 9.200000000000001 12 10.200000000000001 11.5 C 11.200000000000001 11 12.100000000000001 11.5 12.3 12.5 C 12.5 13.5 11.9 14.7 10.9 15.2 z" stroke-linecap="round" />
+</g>
+<g transform="matrix(1 0 0 1 1109.6072390741 240)" style="" id="IiDed4ej1oioRNDwSqAwf">
+		<text xml:space="preserve" font-family="Amethysta" font-size="225" font-style="normal" font-weight="normal" line-height="1" style="stroke: none; stroke-width: 1; stroke-dasharray: none; stroke-linecap: butt; stroke-dashoffset: 0; stroke-linejoin: miter; stroke-miterlimit: 4; fill: rgb(27,56,130); fill-rule: nonzero; opacity: 1; white-space: pre;"><tspan x="-714.0188713264" y="72.3065" style="white-space: pre; "> VLMEvalKit</tspan></text>
+<style>
+          @font-face {
+          font-family: "Amethysta";
+          font-weight: normal;
+          src: url("data:font/ttf;base64,AAEAAAAQAQAABAAAT1MvMkNd22wAALD0AAAAYFZETVjvltn1AACxVAAAC7pjbWFwzY9v1AAA4BgAAACsY3Z0IAeEKKwAAO3QAAAARmZwZ23wG6vaAADgxAAAC8RnYXNwAAcAHwAA9oQAAAAMZ2x5ZkxCsh8AAAEMAACqNGhkbXh305zTAAC9EAAAIwhoZWFkDGirbQAArSAAAAA2aGhlYRTzDDYAALDQAAAAJGhtdHjJk0PLAACtWAAAA3hsb2NhicReVQAAq2AAAAG+bWF4cANfDf0AAKtAAAAAIG5hbWWgJ7bsAADuGAAABiZwb3N0oRA6+AAA9EAAAAJCcHJlcG+p1t8AAOyIAAABSAACAEH/nAeSBR0AFwC2AapAGLa0sbCtq5+egX5WVFJRREM9OiUjIB8LBytLsCZQWEB7swEICa+uAgoIGgEHCotXHBMSEA4NCwcACwUHiQEABVshAgEASzMCBgF2Ri8pBAMCCBVZAQABFHRubGllQTcHAxIACQgJKwAICggrAAoHCisABwUHKwAFAAUrBAEAAQArAAYBAgEGAikAAQECAQAbAAICCBYAAwMIAxcNG0uwPlBYQHmzAQgJr64CCggaAQcKi1ccExIQDg0LBwALBQeJAQAFWyECAQBLMwIGAXZGLykEAwIIFVkBAAEUdG5saWVBNwcDEgAJCAkrAAgKCCsACgcKKwAHBQcrAAUABSsEAQABACsABgECAQYCKQABAAIDAQIBAB0AAwMIAxcMG0CEswEICa+uAgoIGgEHCotXHBMSEA4NCwcACwUHiQEABVshAgEASzMCBgF2Ri8pBAMCCBVZAQABFHRubGllQTcHAxIACQgJKwAICggrAAoHCisABwUHKwAFAAUrBAEAAQArAAYBAgEGAikAAwIDLAABBgIBAQAaAAEBAgEAGwACAQIBABgOWVmwLysBFz4BJy4BJz4CNyYnByYnBgcXBx4BNwEHJwIBFA4BIwcWFzI2HgIXHAEOAgcuAgcWFxYXBgcmByIOAwcuASImJz4DNy4CNTQ3IiYnIyInBicGBx4DFx4DFxQOAQcuAScWFwYnJicmByY1PgcyNjMuAjU0PgE3JjcuAycmJy4IJyY2MwU+Bzc+ATMyFzU3Mh0BBzcyBI2/CDABDn8rGkMtCy0fl4JFKQy6kwE9CAOhgwIF/hgFEg4ZdU4FLBgpJhICAQQBGRosGDETBAQDEEQ/DRMXCCMGAgUDAgEKFQ4hCiVcMRcPFQM3PTQUIxUKCzshMRQJMxwrEAIGAhsiJykOBAw1OD9QBgUKCg0IEAcSBhUCG1IsBRAFEwVSjGpCJAUDCkwYQh04HyohDg8IEwIMSqxndUNHKSMLPYFYNSuYBjOPBwIUkwI8CA1rJyNYOxEnDr5nLi8hk74KMAIDKYcC/hz+swkUFBkoJgICBhENAwQDAQMBDAsHAxwXBQYJA0QCAQcDEAICAQQFCw4FCgMUIhEHDRIcCwg2BwsKByUVIxACCQgbFgUDBAIYEQYbNQoCVQsJEwkHAwYEBAICAQIBFjUfCgcHCwQULi12hGI8CgQDGggXDBYQFhUMDSEuDyITGBEVFhoRbF0PAjYEAo4GAAIAmv/lAZYGAAAKAA4AYbgAMCu4AAkvQQMAEAAJAAFduAAD3EEHAFAAAwBgAAMAcAADAANdugANAAkAAxESObgADS+4AA7QALgADS+4ABNFWLgABi8buQAGABk+WbgAANy4AAzcQQMAIAANAAFdMDElMhYVFAYjIiY0NjcjAzMBGTRJSTQ2SUl1cyK44Uk0NUpKakhxBK4AAgBoA8kCLQXbAAMABwBFuAAwK7gAAS+4AALQuAABELgABdxBAwAvAAUAAV24AAbQALgAE0VYuAABLxu5AAEAHT5ZuAAA3LgABNC4AAEQuAAF0DAxEwMzAzMDMwONJaYpxyWmKQPJAhL97gIS/e4AAAIAZv/VBUgGFwAbAB8AjLgAMCsAuAAKL7gAE0VYuAAALxu5AAAAGT5ZugAdAAoAABESObgAHS+5ABsAFvS4AALQuAAdELgABdC6AAwACgAAERI5uAAML7kAHAAW9LgABtC4AAwQuAAJ0LgAChC4AA7QuAAMELgAENC4ABwQuAAT0LgAHRC4ABTQuAAbELgAF9C4AAAQuAAY0DAxBScTIzUzEyM1IRMXAyETFwMhFSEDIRUhAycTIRMDIRMBNYeU3Pxp/gEho4ieATOkh54BDf7TaQEv/q6Zh5P+zYtoATFoKxcB424BVG8CFxf+AAIXF/4Ab/6sbv4GFwHjAcL+rAFUAAMAhf9/A/4GRgAmAC0ANAE8uAAwK7oAJwANADMrQQMA/wANAAFdQQMAEAANAAFdQQMAEAAnAAFdQQMAMAAnAAFduAAnELgAINC6AAEADQAgERI5uAABL7oABQANACAREjm4AAUvuAAH0LgAARC4AAnQuAAx0LgAEtC4AAEQuAAl0EEFABgAJQAoACUAAl24ACvQuAAd0LgAFdC6ABcAIAANERI5uAAXL7gAGtC4AA0QuAAu0AC4ABNFWLgAFS8buQAVAB0+WbgAE0VYuAABLxu5AAEAGT5ZuAAA3LgAARC4AAbcuAABELkACQAW9LoACgABABUREjm4ABUQuAAS0LgAFRC4ABTcuAAVELgAGdy4ABUQuQAcABb0ugAxABUAARESObgAMRC4AB3QuAABELgAJdC4AAoQuAAq0LgACRC4ACvQuAAcELgAMtAwMQU1LgEnETMTFjMRLgE1ND4CNzUzFRYXESMnJiMRHgEVFA4CBxUTNCYnET4BARQWFxEOAQIUbrJpMV+Mbdi3Pm+OVF2Tridoa0froj9wi1PIVHRZb/4PU3lha4F/AR0fAV/+/CcCL2HJjFSHWTUIgX0DLP6y+BT97nGxlV+TXTUJgwHbW30//gYRbgOPTXtGAdsQZwAABQCa/9EGdQXDABMAHwArADcAOwCcuAAwK7gAJi+4ADLcQQMAEAAyAAFduAAA0LgAMhC4ACzcuAAK0LgAJhC4ACDcuAAU0LgAJhC4ABrQugA6ACYALBESOQC4ACkvuAA5L7gAE0VYuAA7Lxu5ADsAHT5ZuAATRVi4AC8vG7kALwAZPlm5AAUAFPS4AC8QuAA13LkADwAU9LgAKRC5ABcAFPS4ACkQuAAj3LkAHQAU9DAxARQeAjMyPgI1NC4CIyIOAgE0JiMiBhUUFjMyNjcUBiMiJjU0NjMyFgEUBiMiJjU0NjMyFgkBIwEEWh01RyoqSjcfHzVIKilJNh/+UnJVVHJwUlV2jceNkbq+j5S+AzzHjZG7vpCTv/60/UaGArsBMStOOyMkPVAsLFA8IyM9UgMKW4GEXleAg1qLvrCLkbuw/D+Kv7CLkbuwA/76DgXyAAADAGr/1wXlBdkAKgAyAD4AzLgAMCu4ABIvuAAw0LgAM9AAuAATRVi4ACEvG7kAIQAdPlm4ABNFWLgADS8buQANABk+WbgAE0VYuAAKLxu5AAoAGT5ZugABACEADRESObgAAS+5ACoAFvS4AATQugALAA0AARESOboAKAABAA0REjm6AAYACwAoERI5uAAKELkABwAW9LoANgAhAA0REjm6AC4ADQAhERI5ugAZADYALhESOboAJwA2AC4REjm4AA0QuQArABb0ugAtACgACxESObgAIRC5ADwAFPQwMQE1IRUHBgcfARUhJwYhIi4CNTQ+BDcnJjU0PgIzMhYVFAYHATY3ATI3AQYVFBYDFB8BPgE1NCYjIgYEGwGbi1Bso9P+gX/I/vhXmHhGECguVlFDYGg4YpVWrciLqQFzTUT9nsCM/mC+kDI9in1abVZhegLRKSlC1piyRimJsixUi1c1VEo6QjUoa3SeSIdpP8GHe69l/mtzqf3FgQHEi7SMegRKakKYU5FVbnyIAAABAGgDyQEOBdsAAwAkuAAwK7gAAS+4AALQALgAE0VYuAABLxu5AAEAHT5ZuAAA3DAxEwMzA40lpikDyQIS/e4AAQCo/lQCdQZ5AA0AKbgAMCu4AAMvQQMAfwADAAFdQQMAwAADAAFduAAK0AC4AAYvuAAALzAxASYCERASNxcGAhEQEhcCPbfe37Y2go2Pgv5UnQIcAVkBVwIdnzG0/fX+3f7c/fe0AAABAB7+VAHrBnkADQApuAAwK7gACy9BAwAvAAsAAV1BAwDAAAsAAV24AATQALgACC+4AAAvMDETJzYSERACJzcWEhEQAlY4go+Ngja2397+VDG0AgkBJAEjAgu0MZ/94/6p/qf95AAFABQDqANEBrAABQALABEAFwAdAEm4ADAruAABL0EDABAAAQABXbgABNC4AAfQuAABELgAENC4AAQQuAAV0LgAARC4ABrQALgAAi+4AA3cQQMAEAANAAFduAAK0DAxAQM3MxcDHwEPAScDIwMjLwE/ASUfAQcFJyUnPwEFAY05KV4pOSHpEEwzjB6KM0wO6HYBGykdEf7DlP7BDx0rARsFVgE7Hx/+xWjeMTcCASH+3Tcx4FSZH1gxKwIpL1odlwABAGYAjQSPBLYACwA4uAAwK7gABi+4AAPcuAAA0LgABhC4AAnQALgABi+4AAncQQMAAAAJAAFxuAAA0LgABhC4AAPQMDEBIRUhESMRITUhETMCwwHM/jSM/i8B0YwC44P+LQHTgwHTAAEAcf64AboA4QAQAFa4ADAruAAGL0EDABAABgABXbgAANxBCQBPAAAAXwAAAG8AAAB/AAAABF24AArQuAAGELgADtwAuAATRVi4AA4vG7kADgAZPlm4AAPcuAAOELgACdwwMTc0NjMyFhUUBgcnPgE1JyImuko1Qz6qfyBVdQI1SmQ0SV9JhtckPih0QRJKAAEAZgIpAuECpAADADi4ADAruAADL0EDAA8AAwABcUEFAEAAAwBQAAMAAl1BAwCgAAMAAV24AALcALgAAy+5AAAAFfQwMRMhFSFmAnv9hQKkewABAHH/5QFtAOEACgA+uAAwK7gACS9BAwAQAAkAAV24AAPcQQcAUAADAGAAAwBwAAMAA10AuAATRVi4AAYvG7kABgAZPlm4AADcMDE3MhYVFAYjIiY0NvA0SUk0NklJ4Uk0NUpKakgAAAH/mv6HA74GAAADAA+4ADArALgAAi+4AAAvMDETIwEzN50Dh53+hwd5AAIAcf/XBIUF7AAbADAAlLgAMCu6ABUAIQAzK0EDAA8AIQABcUEDAC8AIQABcUEDAD8AIQABXUEDAE8AIQABcbgAIRC4AAfQQQMAgAAVAAFdQQMAYAAVAAFduAAVELgALNAAuAATRVi4ACYvG7kAJgAdPlm4ABNFWLgAHC8buQAcABk+WbgAJhC5AAAAFvS4ABwQuQAOABb0QQMARQAhAAFxMDEBIg4EFRQeBDMyPgQ1NC4EAyIuAQI1NBI+ATMyHgMVFAIOAQJ9RmVILRkJCRktSGZFRGRGLRkJCRksR2RQicl2OECA0YltrHNNIT9/zwV7M1t9ladXVqWTfVszMlp9lKdXVqaVfVsz+lx01QESq6oBGNZ3TorG5omp/urUdQAAAQBIAAAC/gXbAAoAVbgAMCu4AAkvQQMAXwAJAAFdQQMAEAAJAAFduAAE0AC4ABNFWLgAAi8buQACAB0+WbgAE0VYuAAHLxu5AAcAGT5ZuQAJABb0uAAE0LgAAhC4AArQMDETJyUzERcVITUlEV4WAZtOzf1sAQsExVi++pwpTk4pBI8AAQBcAAAEEgXsAB4A7LgAMCu6AAYAAwAzK0EDAJAABgABXUEDAPAABgABXUEDAMAABgABXbgABhC4ABXQuAAA0LgAAC9BAwBAAAAAAV1BAwBvAAMAAV1BAwA/AAMAAV26AA4AAwAVERI5uAAOL0EFAC8ADgA/AA4AAnG4AAvQuAADELgAHNBBAwC2ABwAAV0AuAATRVi4ABAvG7kAEAAdPlm4ABNFWLgAAS8buQABABk+WbgAANy4AAEQuAAd3EEDALAAHQABXboAAwAdAAEREjm6AAQAEAABERI5uAAQELkACQAW9LgAEBC4AAzcugAbAAEAEBESOTAxAREhNQEAETQmIyIHAyMRNjMyHgIVFA4DBwEhNwQS/EoBmgEQnZZLX2s9n8lmrJBSCSVJgWD+sAJSRAGJ/neDAaABFAEbjpsV/tcBcT4nVZhnQ1Z4c5xf/rDnAAEAcf/XA8kF7AAvAPy4ADArugAIAAAAMytBAwBvAAAAAXFBAwA/AAAAAV1BAwA/AAAAAXFBAwBAAAAAAV24AAAQuAAD0EEDAGAACAABXUEDAEAACAABXboADAAAAAgREjm4AAwvugASAAAACBESObgAEi+4AAAQuAAZ0LgAGS+4ABbQuAASELgAIdC6ACQADAAhERI5uAAIELgAKtAAuAATRVi4ABsvG7kAGwAdPlm4ABNFWLgALi8buQAuABk+WbgAAty4AC4QuQAFABb0ugANABsALhESObgADS9BAwA/AA0AAV25AAwAFfS4ABsQuQAUABb0uAAbELgAF9y6ACQADQAMERI5MDE3ETMTFjMyNjU0JisBNTMyPgE1ECEiBwMjETYzMh4DFRQGBxUeAxUUDgEjInEzbGJZkKeogWhmSm8z/u9BUms9oa1IfnZUMoh0T2xYK4vpmasbAX3+1yuSn4+yiV6ERwEREf7XAXE8FjZTg1NtzTEIGjxchFmHvVoAAgAKAAAEIQXsAAoADQCyuAAwK7oACAAJADMrQQMAMAAIAAFdQQMALwAIAAFdQQMAwAAIAAFdQQMAkAAIAAFduAAIELgADNC4AAgQuAAF0LgAAtC6AAAADAACERI5QQMALwAJAAFduAAJELgAC9AAuAATRVi4AAAvG7kAAAAdPlm4ABNFWLgABi8buQAGABk+WboABQAAAAYREjm4AAUvuAAC3LgABRC4AAjQuAACELgADNC4AArQuAAAELgADdAwMQEzETMVIxEjESE1FyERAtGLxcW4/WaaAgAF7Pvrnv7HATmiBAL6AAEAXP/XA9UFwwAlANu4ADArugAKAAAAMytBAwA/AAAAAV1BAwBvAAAAAV24AAAQuAAD0EEDAPAACgABXUEDAJAACgABXUEDAMAACgABXbgAChC4AB/QugASAAAAHxESObgAEi9BBQAPABIAHwASAAJxugAVAB8AABESObgAFS+4ABIQuAAX0AC4ABNFWLgAFC8buQAUAB0+WbgAE0VYuAAkLxu5ACQAGT5ZuAAC3LgAJBC5AAUAFvS6ABkAFAAkERI5uAAZL0EDADAAGQABcUEDAMAAGQABXbkAEAAV9LgAFBC4ABXcMDE3ETMTFjMyPgI1NC4DIyIHESEVIRE2MzIeAxUUDgIjIlwzbWBaRHhhORI0U49dbJYDFf12fmZtq2lFG16ez3KgGwF9/tcrJkt9UT5qb04xIQLLnv5iGTtdfnxBdMB9RQACAHH/1wR1BewAEwAtANq4ADArugAFACMAMytBAwBgAAUAAV1BAwAgAAUAAXFBAwCwAAUAAV1BAwBAAAUAAV1BAwAQAAUAAV1BAwAPACMAAXFBAwBfACMAAXFBAwAgACMAAXG4ACMQuAAP0LgAF9C4AAUQuAAc0LoAKwAcACMREjkAuAATRVi4ACkvG7kAKQAdPlm4ABNFWLgAIC8buQAgABk+WbkAAAAW9LgAIBC4ABncQQMAAAAZAAFxQQMAQAAZAAFxQQMAYAAZAAFxuQAKABX0uAApELkAFAAU9LoAFwAZACAREjkwMSUyPgI1NC4CIyIHDgEVFB4CASIAAzYzMgAVFA4BIyAAETQ+AzMyFwcmAnU8cFYzNlp4QoKaAgMgSHcBbeX+zCa7nccBCovnjv8A/vw7d6Pgf3FyCjFEMGKUY2aPWSlIETUjhtqbVAVH/s7+64H+89+a7XsBawFag/jWol0bTggAAAEAHwAAA8sFwwAIAG24ADAruAAHL0EDAC8ABwABXUELAC8ABwA/AAcATwAHAF8ABwBvAAcABXG4AATQALgAE0VYuAAHLxu5AAcAHT5ZuAATRVi4AAEvG7kAAQAZPlm4AAcQuAAE3LoAAAAHAAQREjm4AAcQuAAF3DAxCQEjASEHIxEhA8v+GNMCD/2VK2oDrAV5+ocFI9MBcwADAFz/1wQhBewAIAAvADwBH7gAMCu6ACsAEwAzK0EDAKAAKwABXUEDAC8AKwABXUEDAE8AKwABcUEDADAAKwABXUEDABAAKwABXUEDAH8AEwABXUEDAC8AEwABXUEDABAAEwABXUEDADAAEwABXboANwArABMREjm4ADcvuAAA0LgAKxC4AAnQugAZABMAKxESObgAGS+4ADDQugADAAkAMBESOboAFgAZACsREjm6ACIAKwAZERI5uAATELgAJdC6ADQAMAAJERI5ALgAE0VYuAAeLxu5AB4AHT5ZuAATRVi4AA4vG7kADgAZPlm6ADQAHgAOERI5ugAiAA4AHhESOboAAwA0ACIREjm6ABYAIgA0ERI5uQAoABb0QQMARwArAAFxuAAeELkAOgAW9DAxARQGBx4EFRQOAiMiLgI1NDY3LgE1ND4CMzIWAScOARUUFjMyNjU0LgIBFBYfAT4BNTQmIyIGA+GNeUNVWzMgRXq7bm24dkKTkoNrSXiVUMnf/khUY1eegH+cFz17/tV2oRxIUnl1ZXoEcV+zSCIySUpoP1CafExJeZpSetM8Q6xtW5ZeM8/9mSs2noKQt7NzK0lWVwH4X4dRDzqcR4OghwAAAgBI/9cETAXsABkALADiuAAwK7oAKAAIADMrQQMAEAAoAAFdQQMALwAoAAFdQQMAUAAoAAFxQQMAAAAoAAFxuAAoELgAA9BBAwBPAAgAAV1BAwAvAAgAAV1BAwBvAAgAAV1BAwC/AAgAAV24ACgQuAAP0LoAFwAIAA8REjm4AAgQuAAf0AC4ABNFWLgADC8buQAMAB0+WbgAE0VYuAAVLxu5ABUAGT5ZuQAAABT0uAAMELgABdxBAwAPAAUAAXFBAwBvAAUAAXG6AAMABQAMERI5QQMAZQAIAAFduAAMELkAGgAW9LgABRC5ACQAFfQwMSUyABMGIyIANTQ+ATMgABEUDgMjIic3FgEiDgIVFB4CMzI3NjU0LgIBMeUBNCe6nsf+9orojgEAAQQ7d6Pgf3hsCjEBWTxtWTQ1XXNFg5oEIEZ6NwEyARaBAQzfm+17/pX+poP41qJdG00IBUgwXJtiY5RWKkgiRonSoVQAAgCa/+cBlgQMABMAHgBjuAAwK7gADy9BAwAQAA8AAV24AAXcQQcAUAAFAGAABQBwAAUAA124ABfQuAAPELgAHNAAuAATRVi4ABQvG7kAFAAfPlm4ABNFWLgACi8buQAKABk+WbgAANy4ABQQuAAa3DAxJTIeAhUUDgIjIi4CNTQ+AhMyFhUUBiMiJjQ2ARkZLiIUEyIuGhsuIhQUIi4bNElJNDZJSeMUIi4ZGi4jFBQiLhsbLSITAylJNDVKSmpIAAACAJr+uAHnBAwACQAaAIO4ADAruAAQL7gAA9C4AAfcQQcAXwAHAG8ABwB/AAcAA124ABAQuAAK3EEHAF8ACgBvAAoAfwAKAANduAAU0LgAEBC4ABjcALgAE0VYuAAALxu5AAAAHz5ZuAATRVi4ABgvG7kAGAAZPlm4AAAQuAAF3LgAGBC4AA3cuAAYELgAE9wwMQEyFhQGIyImNDYDNDYzMhYVFAYHJz4BNSciJgFqNElJNDVJSVJKNUM+qn8gVXUCNUoEDEhqSkpqSPxYNElfSYbXJD4odEESSgAAAQBcAHkEMwTBAAYAM7gAMCu4AAEvuAAC3LgAARC4AATQuAACELgABdAAuAACL7gAA9y4AAIQuAAG3LgABdwwMRM1ARUJARVcA9f84QMfAmprAeym/oX+faQAAgBmAZwEkQN5AAMABwAvuAAwK7gABC+4AAXcuAAC0LgABBC4AAPQALgABy+4AAPcuAAA3LgABxC4AATcMDETIRUhFSEVIWYEK/vVBCv71QN5e+d7AAABAJoAeQRxBMEABgA4uAAwK7gAAC9BAwAQAAAAAV24AAbcuAAD0LgAABC4AATQALgABi+4AALcuAAD3LgABhC4AAXcMDEBFQE1CQE1BHH8KQMe/OIC1Wv+D6QBgwF7pgACAHH/5QO5BewAFgAhALi4ADArugAKABEAMytBAwBwAAoAAV24AAoQuAAA0LoABgARAAAREjm4AAYvuAAD0LgAERC4AA/QuAAGELgAGtC4ABovQQUAfwAaAI8AGgACXbgAH9xBBwBQAB8AYAAfAHAAHwADXQC4ABNFWLgAFC8buQAUAB0+WbgAE0VYuAAXLxu5ABcAGT5ZuAAd3LgABdy6AAIABQAUERI5ugAHABQABRESObgAFBC5AA0AFvS4ABQQuAAQ3DAxAQYPARUjNTc+ATU0JgciBwMjETYzMgQBIiY1NDYzMhYUBgO5A8biZsU7Obx7S0ZrPae45QEB/iU0SUk0NklJBIT0p8i99NNHnlWTjQsU/tcBcj2++rdJNDVKSmpIAAACAHv/HwcABbgAQgBPAQm4ADAruAA0L0EDABAANAABXbgAPdy6AAsANAA9ERI5uAALL7gAR9y4AAHQuABHELgAEtC4AD0QuAAZ0LgANBC4ACLQugArADQAPRESObgACxC4AE3QALgAE0VYuAA5Lxu5ADkAHT5ZuAAu3EEDABAALgABXboABgA5AC4REjm4AAYvQQMA0AAGAAFduAAS3EEDACAAEgABXboAAQASAAYREjm4AAYQuABA0LgAQC+4ABPcQQMA8AATAAFdQQMAAAATAAFxuAA5ELgAHNxBAwAPABwAAXG4AC4QuAAo3LgABhC4AEPcQQcA0ABDAOAAQwDwAEMAA11BAwAAAEMAAXG4ABIQuABH3DAxATUnDgIjIi4CNTQ+BBcRMj4DNTQAISIOAxUUHgMzMjcXDgEjIi4CAjU0EjYkMzIEEhUQACEiJiUyNjcRIg4DFRQWBHUGRlJkRkFfNRkgRHWb4IZIdE41F/6s/vF12baHTEyGttJztMshcvhvgfLaol+L8AFSwOABW73+yf8AJi7+80J1SUJ5WUQiRgEKgQJaUis1XXBCRoqOd1svBfznOWGHl1XcARNEhLTshYnkoG80VkBGSz2DuwEQoroBRueFpP7cuf7k/pc2YktfAfM8Y3yGPldnAAL//AAABcsFwwAPABMAibgAMCu4ABQvuAAH0EEDAIUABwABXUEDABMABwABXbgAEdAAuAATRVi4AAkvG7kACQAdPlm4ABNFWLgABS8buQAFABk+WboAEQAJAAUREjm4ABEvuQABABb0uAAFELkABwAW9LgAAtC4AAcQuAAK0LgABRC4AA3QuAAKELgAD9C4AAkQuAAT0DAxASEDFxUhNTcBMwEXFSE1NwEDIQMD0/3wkqj+I64CClwCF6T+AKL+VtcBvt8B5f55NSkpSAVS+q5IKSk1BDP9vwJBAAADAHEAAAUABcMAEwAfACgAxLgAMCu6ABoADQAzK0EDAEAAGgABXUEDABAAGgABXbgAGhC4ACTQuAAkL7gAANBBAwAvAA0AAV1BAwAQAA0AAV1BAwCQAA0AAV24AA0QuAAV0LgAINC6AAMAIAAAERI5uAAaELgAB9AAuAATRVi4ABAvG7kAEAAdPlm4ABNFWLgACi8buQAKABk+WbkADQAW9LgAEBC5AA4AFvS6ACAAEAAKERI5uAAgL7kAFAAW9LgAChC5ABYAFvS4ABAQuQAnABb0MDEBFAYHFR4BFRQEKQE1NxEnNSEgFgERITI+ATU0LgIjJTMyNjU0JisBBKqAbJer/ub++f2SuLgCJwEY+v1CAQ5jj0EoSnhJ/vLfhoqJnsgEaHekMAstwonB2SlIBOFIKbT9yv2RXYlOOm5bOGudd3uFAAEAe//nBOcF4wAjAL24ADArugAAAAgAMytBAwBgAAAAAV1BAwBgAAAAAXFBAwCwAAAAAV1BAwBAAAAAAV1BAwAQAAAAAV1BAwAvAAgAAV1BAwAQAAgAAV24AAAQuAAP0LgADy+4ABLQuAAIELgAGtC4AAAQuAAh0AC4ABNFWLgADS8buQANAB0+WbgAE0VYuAACLxu5AAIAGT5ZuAANELgAEdy4AA0QuQAUABb0uAACELkAHwAW9LgAAhC4ACLcQQMAMAAiAAFdMDElBiMiLgM1NBI2JDMyFwMjAyYjIg4DFRQeAjMyNxMzBOevt3zhxZFTetMBGKGqrgIpfVSJUI6DXzhZmstzZltuKSU+N3is+Za8AS/BZjf+gwEdJSpileSPmPmhVx0BSQACAHEAAAWoBcMADwAaAIC4ADArugAYAA0AMytBAwCwABgAAV24ABgQuAAG0EEDAC8ADQABXUEDAE8ADQABXbgADRC4ABLQALgAE0VYuAAALxu5AAAAHT5ZuAATRVi4AAovG7kACgAZPlm5AA0AFvS4AAAQuQAOABb0uAAAELkAEAAW9LgAChC5ABMAFvQwMRMhMgQWEhUUAgQjITU3EScFIxEzMj4CNRAAcQJsnQEEv2u2/rrR/Za4uAJe4+llt49V/vIFw1+2/uW02P6xuClIBOFIQvsSTpTskwE7AVIAAAEAcQAABI8F3QAWAPi4ADArugAWAAIAMytBAwA/AAIAAV1BAwAfAAIAAXFBAwCvAAIAAV1BAwA/ABYAAV1BAwBQABYAAV24ABYQuAAI0LgACC+4AAvQuAACELgAEtC4AA3QugAPAA0ACBESObgAFhC4ABPQALgAE0VYuAAFLxu5AAUAHT5ZuAATRVi4AAAvG7kAAAAZPlm5AAIAFvRBAwCnAAIAAV24AAUQuQADABb0uAAFELgACty4AAUQuQAMABb0ugANAAUAABESObgADS9BAwA/AA0AAV1BAwBfAA0AAV1BAwBPAA0AAXG5ABEAFvS4AAAQuQASABb0uAAAELgAFNwwMTM1NxEnNSE3FxEjAyERJRcHJREhEzMRcbi4Ay2+Hyl//hkBnycU/k4B+38pKUgE4UgpGhr+bgEn/c0UDoUQ/bQBJ/5vAAEAcQAABD0F3QAUALq4ADArugALAAUAMytBAwAvAAUAAV1BAwAfAAUAAXFBAwAQAAUAAV24AAUQuAAA0EEDAC8ACwABXbgACxC4AA7QuAAAELgAENC6ABIACwAQERI5ALgAE0VYuAAILxu5AAgAHT5ZuAATRVi4AAMvG7kAAwAZPlm5AAUAFvS4AADQuAAIELkABgAW9LgACBC4AA3cuAAIELkADwAW9LoAEAAIAAMREjm4ABAvQQMAPwAQAAFduQAUABb0MDElFxUhNTcRJzUhNxcRIwMhESUXByUB7PX9kLi4Au+/Hit//lkBnycU/k5xSCkpSAThSCkaGv5sASn9uBUOhhEAAAEAe//lBVIF4wAsALS4ADArugAnAAwAMytBAwAPAAwAAXFBAwAQAAwAAV1BBQAQACcAIAAnAAJdQQMAQAAnAAFxQQMAIAAnAAFxuAAnELgALNC6ABUALAAMERI5uAAVL7gAGNC4AAwQuAAf0AC4ABNFWLgAEy8buQATAB0+WbgAE0VYuAAFLxu5AAUAGT5ZuAATELgAF9y4ABMQuQAaABb0uAAFELkAJAAW9LoAKQATAAUREjm4ACkvuQAnABb0MDElDgMjIi4ENTQ+BDMyFwMjAyYjIg4CFRQeAjMyNxElNSEXBwUpOmtpazllv6qOZzk+bpm4z223pQIpe2Ntb86eX1yf1HdoY/78AbwpKUocJhgLLFV/p8x5gtqvg1gsN/6DARsgS574rKHzpFMdAeUzPil/AAABAHEAAAZQBcMAGwDXuAAwK7oAAAAIADMrQQMAsAAAAAFdQQMALwAIAAFdQQMATwAIAAFduAAIELgAA9C4AA/QuAAAELgAENC4AAAQuAAX0AC4ABNFWLgACy8buQALAB0+WbgAE0VYuAAGLxu5AAYAGT5ZuQAIABb0uAAD0LgAANC6AA8ACwAGERI5uAAPL0EDAE8ADwABcUEDAF8ADwABXUEDAD8ADwABXbkAAgAW9LgACxC5AAkAFvS4AA7QuAAR0LgACxC4ABPQuAARELgAFtC4AAAQuAAX0LgABhC4ABrQMDElESERFxUhNTcRJzUhFQcRIREnNSEVBxEXFSE1BNX9F7j91664AimuAum4AhSZuP3XcQJH/blIKSlIBOFIKSlI/dECL0gpJUz7H0gpKQABAHEAAAKkBcMACwBWuAAwK7gABi9BAwAvAAYAAV24AAHQALgAE0VYuAAJLxu5AAkAHT5ZuAATRVi4AAQvG7kABAAZPlm4AAkQuQAHABb0uAAA0LgABBC5AAYAFvS4AAHQMDEBERcVITU3ESc1IRUB7Lj91664AikFUvsfSCkpSAThSCkpAAEAEP7yAnUFwwANAES4ADAruAAIL0EDAD8ACAABXUEFAC8ACAA/AAgAAnG4AAHQALgABC+4ABNFWLgACy8buQALAB0+WbkACQAW9LgAANAwMQERFAYHJz4BNREnNSEVAcfU3AeOZrgCKQVS+5PB9D5BUb+zBFxIKSkAAQBxAAAFiwXDABoAybgAMCu4AA8vQQMALwAPAAFduAAE0EEDACMABAABXbgAAtC4AA8QuAAK0LgAFtC6AAMAFgACERI5uAAEELgAB9C4AAIQuAAY0AC4ABNFWLgAEi8buQASAB0+WbgAE0VYuAANLxu5AA0AGT5ZuAASELkAEAAW9LgAGNC4AALQugAJABIADRESObgACS+4ABfQugADABcACRESObgADRC5AA8AFvS4AATQuAANELgAB9C4AA8QuAAK0LgAEBC4ABXQuAASELgAGtAwMQEVBwkBFxUhASMRFxUhNTcRJzUhFQcRNwEnNQV7xf5EAdW8/qr+JXm5/eGuuAIerpgBppoFwzBJ/dP9XkwvAsP9rkgpKUgE4UgpKUj9zRwCHTswAAEAcQAABJoFwwANAIK4ADArugAAAAMAMytBAwA/AAAAAV1BAwA/AAMAAV1BAwCvAAMAAV24AAMQuAAK0LgAABC4AAvQALgAE0VYuAAGLxu5AAYAHT5ZuAATRVi4AAEvG7kAAQAZPlm5AAMAFvS4AAYQuQAEABb0uAAJ0LgAARC5AAoAFvS4AAEQuAAM3DAxKQE1NxEnNSEVBxEhEzMEmvvXuLgCM64B/H8pKUgE4UgpKUj7IwEnAAABAFwAAAcEBcMAHAEpuAAwK7oACQAVADMrQQMALwAJAAFxQQMA4AAJAAFdQQMAwAAJAAFduAAJELgABNBBDwCYAAQAqAAEALgABADIAAQA2AAEAOgABAD4AAQAB124AADQQQMALwAVAAFxQQMAEAAVAAFxugANABUABBESObgADRC4AAzQuAAVELgAENC4ABUQuAAZ0LoAHAAVAAQREjkAuAATRVi4ABkvG7kAGQAdPlm4ABNFWLgAAS8buQABAB0+WbgAE0VYuAATLxu5ABMAGT5ZuAATRVi4AAcvG7kABwAZPlm4AAEQuQADABb0uAAHELkACQAW9LgABNC4AAEQuAAK0LgAExC4AA3QuAAKELgADtC4ABMQuQAVABb0uAAQ0LgAGRC5ABYAFvS4AA0QuAAc0DAxASEVBxEXFSE1NxEjASMBIxEXFSE1NxEnNSEXATMFagGQrrj+CqAM/ill/j4Ko/4prrgB3QQBbwgFwylI+x9IKSlIBHv7FATw+4tSKSlSBNdIKV/71wABAEgAAAXsBcMAGgC9uAAwK7oAFgAMADMrQQMAPwAWAAFdQQMAYAAWAAFduAAWELgAANC4ABYQuAAC0EEHAC8ADAA/AAwATwAMAANdQQMAvwAMAAFduAAMELgAB9C4AAwQuAAQ0AC4ABNFWLgADy8buQAPAB0+WbgAE0VYuAAKLxu5AAoAGT5ZuAAPELkADQAW9LgAFtC4AADQuAAKELgAAtC4AA8QuAAF0LgAChC5AAwAFfS4AAfQuAACELgAFNC4AA8QuAAY0DAxAREjAScjFxEXFSE1NxEnNSEXARMzJxEnNSEVBT1e/Sl9DAak/imuuAG4FAIDnw0LpAHYBUj6uAQx4ff8YFIpKVIE10gpY/0R/vnwAu5SKSkAAAIAe//XBcsF8AASAB8AnbgAMCu6AB0ABAAzK0EDAC8ABAABXUEDAC8ABAABcUEDABAABAABXUEDAJAAHQABXUEDAGAAHQABcUEDAEAAHQABcUEDAEAAHQABXUEDABAAHQABXbgAHRC4AA7QuAAEELgAF9AAuAATRVi4AAkvG7kACQAdPlm4ABNFWLgAAC8buQAAABk+WbgACRC5ABMAFvS4AAAQuQAaABb0MDEFIiQCNTQSPgEzMh4BEhUUAg4BAyIGAhUQADMyEhEQAAMZwf7Rrm29/pCP9bBkbbr9gozVcgEA09D//wApuwFi6a4BKcpyccr+2a+u/trGbgWorf7PxP7W/pEBcgEpASoBdgAAAgBxAAAErAXDABUAIgCsuAAwK7oAIAAGADMrQQMAPwAGAAFdQQMAHwAGAAFxQQMAgAAGAAFduAAGELgAAdBBAwA/ACAAAV1BAwCAACAAAV24ACAQuAAP0LgAARC4ABjQALgAE0VYuAAJLxu5AAkAHT5ZuAATRVi4AAQvG7kABAAZPlm5AAYAFvS4AAHQuAAJELkABwAW9LoAFAAJAAQREjm4ABQvuAAJELkAFwAW9LgAFBC5ABsAFvQwMQERFxUhNTcRJzUhMh4CFRQOAiMiEyMRHgEzMj4CNTQmAeHi/a64uAIcgsqLSEKM2JdDfskmRB9wkVUhoAJY/hlIKSlIBOFIKTtsl1xfrIJNAwv9aQUFOGCASKGgAAIAe/5WBd8F3wATAC4BFLgAMCu6AA8AJQAzK0EDABAAJQABcUEDAC8AJQABXUEDAA8AJQABcUEDAN8AJQABXUEDAC8AJQABcUEDAOAAJQABXUEDABAAJQABXbgAJRC4AAXQQQMAQAAPAAFdQQMA4AAPAAFdQQMAQAAPAAFxQQMA3wAPAAFdQQMAEAAPAAFxQQMAkAAPAAFdQQMAEAAPAAFdQQMAYAAPAAFxuAAPELgAFNC6ACAAJQAUERI5uAAgELgAGdBBAwAgADAAAXFBAwBAADAAAV0AuAAeL7gAE0VYuAAqLxu5ACoAHT5ZuAATRVi4ACAvG7kAIAAZPlm4ACoQuQAAABb0uAAgELkACgAW9LgAIBC4ABnQuAAeELgAG9AwMQEiDgIVFB4CMzI+AjU0LgIBFAIOAQcWFzcXByYDLgICNTQSNiQzMh4BEgMva615QkJ5rWtqq3lBQnqqAkdWmct3S5CcFPHlZ4rrq2FwwQEBkI/3tWcFb2Cu8pKT76pdXqzvkpPxrV/9cJj++rp7FqNqFSWDiQEKB26/AReprgEkxGxsw/7eAAIAcQAABWYFwwAcACYAxrgAMCu6ACQADQAzK0EDAC8ADQABXUEDALAAJAABXUEDABAAJAABcboAAQANACQREjm4AA0QuAAI0LgAJBC4ABbQuAABELgAGdC4AAgQuAAf0AC4ABNFWLgAEC8buQAQAB0+WbgAE0VYuAALLxu5AAsAGT5ZuAAA0LoABAAQAAsREjm4AAQvuAALELkADQAW9LgACNC4ABAQuQAOABb0ugAZAAQAEBESObgADRC4ABrQuAAQELkAHgAW9LgABBC5ACIAFvQwMSEBDgEjIiYnERcVITU3ESc1ITIeAhUUBgcBFxUBIxEeATMgETQmBDH+xR09ICVNKeL9rri4AjOCyotIi4gBI5P9W+ApSSMBgqACagQEBgX+BEgpKUgE4UgpO2yXXIfRN/3IOSkFWv19BQUBTKGgAAEAZv/nA/AF3wAzALu4ADArugAHABEAMytBAwA/ABEAAV1BAwAvABEAAXFBAwAQABEAAV24ABEQuAAx0LgAMS+4AADQQQMAEAAHAAFduAAHELgAJ9C6ABUAJwARERI5uAAVL7gAGNC4ABEQuAAd0AC4ABNFWLgAEy8buQATAB0+WbgAE0VYuAAsLxu5ACwAGT5ZuQACABb0ugAMACwAExESObgAExC4ABfcuAATELkAGgAW9LoAIgATACwREjm4ACwQuAAz3DAxNxYzMj4CNTQuAicuAzUQITIXESMnJiMiBhUUHgIXHgMVFA4CIyIuAicRM/ydaTZpUzMaRXZcZJFfLQHXp7kpbGpVj6AWRHxlZJViMVCHtGM6Y2BkOzSDJxc4XUYuUVBTMjZhaHhNAXkv/qb+Fm1yKEhOWDg2XGFyS3ihYyoHDxkRAWgAAQAKAAAE2QXdABMAr7gAMCu4ABAvuAAA0LgAAC9BAwCvAAAAAV1BAwAvAAAAAV24ABAQuAAJ0LgABdC4AAUvQQMAbwAFAAFxQQMAvwAFAAFdQQMAkAAFAAFdQQMAEAAFAAFxuAAI0LgAABC4ABHQALgAE0VYuAACLxu5AAIAHT5ZuAATRVi4AA0vG7kADQAZPlm4AAIQuAAT3LgAB9C4AAIQuQARABb0uAAI0LgADRC5AA8AFvS4AArQMDETNxchNxcRIwMlERcVITU3EQUDIwofvgMVvh8pff6kuP3irv6XgykFwxoaGhr+YgExDvsNSCkpSATzEP7RAAEAXv/XBhIFwwAdAJ+4ADArugAFABUAMytBAwCAAAUAAV24AAUQuAAM0EEDAE8AFQABXUEDAM8AFQABXUEDAK8AFQABXUEDAC8AFQABXUEDAIAAFQABXbgAFRC4ABzQALgAE0VYuAAYLxu5ABgAHT5ZuAATRVi4AA8vG7kADwAZPlm5AAAAFvS4ABgQuQAWABb0uAAb0LgABtC4ABgQuAAI0LgABhC4AAvQMDElMj4CNREnNSEVBxEQACEiLgM1ESc1IRUHERADO2WWdDu4AeWo/uz+6WKdkGA5uQIprkQuaryHAylSKSlS/OX+z/7bH1GBzokDM0gpKUj85/4LAAAB/83/7AWeBcMADwBTuAAwKwC4ABNFWLgADy8buQAPAB0+WbgAE0VYuAAMLxu5AAwAGT5ZuAAPELkADQAW9LgAAtC4AAwQuAAD0LgADRC4AArQuAAF0LgADxC4AAfQMDEBFQcBMwEnNSEVBwEjASc1AdGkAZwIAZmVAcmu/eVO/eymBcMpPPvRBC88KSlI+poFZkgpAAAB/83/7Af+BcMAFwBzuAAwKwC4ABNFWLgAFy8buQAXAB0+WbgAE0VYuAAULxu5ABQAGT5ZuAAXELkAFQAW9LgAAtC4ABQQuAAE0LgAFxC4AAXQuAAEELgACNC4ABUQuAAO0LgACdC4AAUQuAAL0LgAFBC4ABDQuAAFELgAEdAwMQEVBwEzATMBMwEnNSEVBwEjASMBIwEnNQICsAEWCQFqgQFQCAEZrgHfrv6FXv6FBv6bbP5guAXDKUT7zwSe+3gEGUYpKUj6mgSB+38FZkgpAAABAA4AAAW2BcMAGwCiuAAwK0EDABQAEwABXQC4ABNFWLgAFi8buQAWAB0+WbgAE0VYuAAQLxu5ABAAGT5ZuAAWELgAAdC4ABYQuQAUABX0uAAb0LgABNC6AAwAEAAWERI5ugAaABYAEBESOboABQAMABoREjm4ABAQuQASABX0uAAG0LgAEBC4AAnQuAAGELgAC9C4ABIQuAAN0LoAEwAaAAwREjm4ABQQuAAZ0DAxATUhFQcJARcVITU3CQEXFSE1NwkBJzUhFQcJAQORAgLG/nUBwLT9v7T+qv6arv3zyQGu/mC4AjGgATgBQwWHPDxU/df9d0Y7O0IB8v4KPjs7TgJWAl5KPDxK/j4BxAAB/80AAAUnBcMAFQBruAAwK7gAEi+4AAvQALgAE0VYuAAVLxu5ABUAHT5ZuAATRVi4AA8vG7kADwAZPlm4ABUQuQATABb0uAAC0LoAAwAVAA8REjm4ABMQuAAK0LgABdC4ABUQuAAH0LgADxC5ABEAFvS4AAzQMDEBFQcBMwEnNSEVBwERFxUhNTcRASc1Ab6DAVgJAW6HAaSa/ky4/eKu/lCkBcMpNP1YAqY2KSlI/OP+PEgpKUgBwgMfSCkAAQA9/+UElgXdABMAlrgAMCu6AAoAEwAzK7gAChC4AATQQQMAPwATAAFduAATELgADtC4AAbQuAAKELgAB9C4AAQQuAAQ0LgAExC4ABHQALgAE0VYuAACLxu5AAIAHT5ZuAATRVi4AAwvG7kADAAZPlm4AAIQuQARABb0uAAE0LgADBC5AAcAFvS4AAwQuAAI3LgABxC4AA7QuAACELgAEtwwMRM3FyEXBwEhEzMRBychJzcBIQMjbx6/AzUVef0iAqR/KR6//KQVeQLT/Yt8KQXDGhpbkfueASf+ZBsbWpIEYv7ZAAABAM3+iQJkBnEABwAruAAwK7gAAS+4AAbQALgAAy+4AAAvuAADELkABAAW9LgAABC5AAcAFvQwMQEhESEVIxEzAmT+aQGX6en+iQfocfj6AAH/mv6HA74GAAADAA+4ADArALgAAy+4AAEvMDEBIwEzA76d/Hmd/ocHeQAAAQAf/okBtgZxAAcAMLgAMCu4AAIvQQMALwACAAFduAAF0AC4AAAvuAADL7kABAAW9LgAABC5AAcAFvQwMRMhESE1MxEjHwGX/mnp6QZx+BhxBwYAAQCYBAAD6QXLAAYAMLgAMCu4AAYvuAAC3AC4ABNFWLgAAC8buQAAAB0+WbgABdy4AAPQuAAAELgABNAwMQEzAQcJAScCCm0Bcj3+k/6WPQXL/nI9AUb+uj0AAf/6/wgEGf+DAAMAOLgAMCu4AAQvuAAFL7gABBC4AADQuAAFELgAAdAAuAATRVi4AAQvG7kABAAZPlm4AADcuAAD3DAxByEVIQYEH/vhfXsAAAH/gQSPAOUGFAAEAFq4ADAruAADL0EDAG8AAwABXbgAAdwAuAACL0EDAF8AAgABXUEDAB8AAgABcUEFAD8AAgBPAAIAAnFBAwD/AAIAAV1BAwA/AAIAAV1BAwAfAAIAAV24AADcMDEbAQcBNyHEN/7TDAYU/qInAS8pAAACAFz/4gQfBBkAIAAqAOe4ADArugAlAAoAMytBAwCwACUAAV1BAwCAACUAAV24ACUQuAAF0EEDAG8ACgABcUEFADAACgBAAAoAAl24ACUQuAAQ0LgAJRC4AB/QugAZAAoAHxESObgAGS+4ABbQuAAKELgAKNBBAwAAACwAAXEAuAATRVi4ABwvG7kAHAAfPlm4ABNFWLgABy8buQAHABk+WbgAE0VYuAACLxu5AAIAGT5ZuAAA3LoAEAAcAAcREjm4ABAvugAFABAABxESObgAHBC5ABQAFPS4ABwQuAAX3LgABxC5ACEAFvS4ABAQuQAlABT0MDElFQcGJjUGIyImNTQ+Azc1NCYjIg8BIxE+ATMyFhURBTI2NxEOARUUFgQf2UBYgq+RkEx2pJhUWGZNaGoxWshMpbP+azl4LMjSblglQw45RXmWcVWDTjQUAmCMbhjoAR0gJIWY/Y8vNzABSQmRcFZQAAACAAr/0wQ/BhkAFQAiAKq4ADArugAgAAEAMytBAwAQAAEAAV1BAwAwAAEAAV24AAEQuAAZ0LgAB9BBAwAQACAAAV1BAwAwACAAAV24ACAQuAAO0LoAFQAZAAEREjkAuAATRVi4AAUvG7kABQAhPlm4ABNFWLgACS8buQAJAB8+WbgAE0VYuAATLxu5ABMAGT5ZuAAA0LgAAC+4AAUQuQACABf0uAAJELkAFgAU9LgAExC5ABwAFPQwMQUjESc1JRcRNjMyHgIVFA4CIyInASIHER4BMzI+ATU0JgEAPbkBXBVoqFOdekpVkLdmg4EBCXZRMmM4XYlBpS0FjUgpSBX9ultLh855gNCDRjwDj179WDklfsFzv/MAAQBc/+cDnAQbAB0Ax7gAMCu6AAwABQAzK0EDABAABQABXUEDAC8ABQABXUEDAG8ABQABcUEDAFAABQABXUEDADAABQABXUEDAFAADAABXUEDAJAADAABXUEDAHAADAABXUEDADAADAABXUEDABAADAABXbgADBC4AA/QuAAFELgAFNC4AAwQuAAb0AC4ABNFWLgACi8buQAKAB8+WbgAE0VYuAAALxu5AAAAGT5ZuAAKELgADty4AAoQuQARABT0uAAAELkAFwAW9LgAABC4ABrcMDEFIi4CNTQ+AjMyFxEjAyYjIgYVFBYzMjY3Fw4BAjVeq4JOTomyZ5OOKW88O52Wxq07X0UfWaYZTYnMd4HQhEZC/ssBDA7o1szaHydARjMAAAIAXP/nBJEGGQAYACMA1rgAMCu6ABkACwAzK0EDAI8AGQABXUEDAEAAGQABXbgAGRC4AATQQQMAjwALAAFdQQMAXwALAAFdQQMAQAALAAFduAAZELgAEtC4ABkQuAAY0LgACxC4AB/QALgAE0VYuAAWLxu5ABYAIT5ZuAATRVi4ABAvG7kAEAAfPlm4ABNFWLgABi8buQAGABk+WbgAE0VYuAACLxu5AAIAGT5ZuAAA3LoABAAGABAREjm6ABIAEAAGERI5uAAWELkAEwAX9LgAEBC5ABwAFPS4AAYQuQAiABb0MDElFQUnNQYjIi4CNTQ+AjMyFxEnNSUXEScRJiMiBhUUFjMyBJH+pBR4o1OadkdRirxpX2a5AV0UuF1TjbeXiH1YJUwVYHVLh816d8qJTSEBakgpSBX6hz4CtjP5ucLsAAIAXP/nA88EGQAcACMA77gAMCu6ACEAEQAzK0EDACAAEQABXUEDAG8AEQABXUEDAG8AEQABcUEDAEAAEQABXUEDAKAAEQABXbgAERC4AAHQQQMA5wABAAFdQQMAQAAhAAFdQQMAwAAhAAFdQQMAbwAhAAFxQQUAkAAhAKAAIQACXUEDACAAIQABXUEDACAAIQABcbgAIRC4ABzQugAJABwAERESObgAARC4ACDQALgAE0VYuAAXLxu5ABcAHz5ZuAATRVi4AAwvG7kADAAZPlm6AAEAFwAMERI5uAABL7gADBC5AAYAFvS4ABcQuQAdABT0uAABELkAIAAU9DAxASEeAzMyNxcOASMiLgI1ND4DMzIeAhUBIgYHITQmA7L9eQE+an9LaHcfVKhrXayCTjlefYNEVZFxQf5sc4kQAd1sAfpwpl4sRjxKM0+LynVvt3pUJT97yIABobaeoLQAAQBIAAADbQYZAB4A3LgAMCu6ABMABwAzK0EDADAABwABXUEDAKAABwABXUEDAFAABwABXbgABxC4AADQuAAHELgACtBBAwAwABMAAV1BAwBQABMAAV1BAwCgABMAAV24ABMQuAAW0LgAABC4ABvQugAdABMABxESOQC4ABNFWLgAHC8buQAcAB8+WbgAE0VYuAAQLxu5ABAAIT5ZuAATRVi4AAQvG7kABAAZPlm4ABwQuQAAABX0uAAEELkABgAW9LgAAdC4AAAQuAAH0LgAHBC4AArQuAAQELgAFdy4ABAQuQAWABT0MDEBEQUVITU3ESc1NzU0PgIzMhYXESMnIg4BHQEhFxUBwwEA/Y+4wsJMf59XH2gbKY5Xby0BGiMDdfz8SCkpSAMANykpZmusajgQC/7P71iNYnsMPAAAAwBm/ecEewQZABMAQgBOAZm4ADArugBJADIAMytBAwAQADIAAV1BAwAwADIAAV24ADIQuAAp0LgAKS+4AALQuAApELgADNy4ADIQuAAt0LgALS+4ABjQuAAMELgAINBBAwAQAEkAAV1BAwAAAEkAAXFBAwAwAEkAAV24AEkQuAA+0LoAOQAyAD4REjm6ADwAPgAyERI5QQMA9QA8AAFdQQMABQA8AAFxQQUAFAA8ACQAPAACcUEDADMAPAABcbgAMhC4AEPQALgAE0VYuAA3Lxu5ADcAHz5ZuAATRVi4ADovG7kAOgAfPlm4ABNFWLgAJi8buQAmABs+WbkABwAU9LgANxC4ABTcQQMAzwAUAAFdugARACYAFBESOUEPAJoAEQCqABEAugARAMoAEQDaABEA6gARAPoAEQAHXUEJAAoAEQAaABEAKgARADoAEQAEcUEHAGkAEQB5ABEAiQARAANdugAWABQANxESOboAGwAUACYREjm6ACsAGwARERI5ugAvADcAFBESObgAOhC5ADwAFvS4ABQQuQBGABT0uAA3ELkATAAU9DAxBQYVFB4CMzI+AjU0LgInLgETIicGFRQeBRUUDgMjIiY1NDcmNTQ3LgE1ND4CMzIXIRUHFhUUDgIBFBYzMjY1NCYjIgYBf2I6Wm4zNWtVNj9sklMjN7VnVj9ViKSkiFVCa5CVTdfyv3ecV106a6hlWEkBkttaO2un/rl4aml4eGlqeCc8gjtPLxQVKT0pP0wtFgkECAFCICpAJSwTECI7d1ZFckkyFpV4rU8obYBVNapfSYltQhkpTmmISYltQgGBiZKTiIiSkgABAD0AAAT+BhkAHQC2uAAwK7oAFgAEADMrQQMAbwAEAAFdQQMAbwAEAAFxQQMAUAAEAAFduAAEELgAHdC4AArQQQMAUAAWAAFduAAWELgAEdAAuAATRVi4AAgvG7kACAAhPlm4ABNFWLgADS8buQANAB8+WbgAE0VYuAACLxu5AAIAGT5ZuQAEABb0uAAIELkABQAX9LoACgANAAIREjm4AAQQuAAd0LgAFtC4ABHQuAACELgAFNC4AA0QuQAaABX0MDElFSE1NxEnNSUXET4BMzIWFREXFSE1NxE0JiMiBxECXP3hubkBXRRGvlyHsbj94a5mfZVnKSkpSATvSClIFf2HQkykov2eSCkpSAIlkHBZ/TQAAAIAUgAAAnsFwwAKABYAzLgAMCu4AAQvQQUAQAAEAFAABAACXbgACtC4ABHQuAARL0EDAI8AEQABXbgAC9xBBQBvAAsAfwALAAJdQQMAXgALAAFdQQMAQAALAAFxQQMAEAAYAAFxALgAFC+4ABNFWLgACC8buQAIAB8+WbgAE0VYuAACLxu5AAIAGT5ZuQAEABb0uAAIELkABQAX9LgABBC4AArQQQMAvwAUAAFdQQMAHwAUAAFdQQMAIAAUAAFxQQMAgAAUAAFduAAUELgADtxBAwBPAA4AAXEwMSUVITU3ESc1JRcRAzQ2MzIWFRQGIyImAnv917i4AVwV/Ek2NElJNDZJKSkpSALvSClIFfxtBNU0SUk0NklJAAIAJf3nAbwFwwAUAB8Ay7gAMCu4ABMvQQMAUAATAAFduAAE0LgAG9C4ABsvQQcAEAAbACAAGwAwABsAA3G4ABXcQQUAbwAVAH8AFQACXUEDAF4AFQABXUEDAEAAFQABcUEDAJ8AIQABXUEDAB8AIQABXQC4AB0vuAATRVi4AAIvG7kAAgAfPlm4ABNFWLgACi8buQAKABs+WbgAAhC5ABQAF/RBAwAfAB0AAV1BAwC/AB0AAV1BAwAgAB0AAXFBAwCAAB0AAV24AB0QuAAY3EEDAE8AGAABcTAxEzUlFxEUDgMHJz4GNREDNDYzMhYVFAYiJjcBXBUTMVaFXgYoPisdEggDL0k1NElIakkDqClIFfw+bZ2LYEwaQhY3OlNOdWdOAuUB5jRJSTQ2SUkAAQA9AAAEngYZABoA4bgAMCu4AAQvQQMAzwAEAAFdQQMAUAAEAAFduAAa0LgACtC4AAQQuAAT0EEDAMUAEwABXUEDAGUAEwABXUEDAGMAEwABcbgAEdC4AAzQugASAAoAERESObgAExC4ABfQALgAE0VYuAAILxu5AAgAIT5ZuAATRVi4AA8vG7kADwAfPlm4ABNFWLgAAi8buQACABk+WbkABAAW9LgACBC5AAUAF/S6ABkADwACERI5uAAZL7gAC9y4AA8QuQARABb0uAAM0LgACxC4ABLQuAAEELgAGtC4ABPQuAACELgAF9AwMSUVITU3ESc1JRcRNwEnNSEVBwkBNRcVIQEjEQJS/eu5uQFdFFoBK4MBtKz+2wFdrv7P/olIKSkpSATvSClIFfwrFQFgMykpRP6o/jQCSCkB4/6OAAEAPQAAAmYGGQAKAGS4ADAruAAEL0EDAM8ABAABXUEDAFAABAABXbgACtBBAwAQAAwAAXEAuAATRVi4AAgvG7kACAAhPlm4ABNFWLgAAi8buQACABk+WbkABAAW9LgACBC5AAUAF/S4AAQQuAAK0DAxJRUhNTcRJzUlFxECZv3XubkBXRQpKSlIBO9IKUgV+m0AAQBIAAAHdwQZADMBG7gAMCu6ACwABAAzK0EDADAABAABXUEDAKAABAABXUEDAFAABAABXbgABBC4ADPQuAAK0EEDADAALAABXUEDAFAALAABXUEDAKAALAABXbgALBC4ACfQugAQACcALBESObgALBC4AB7cuAAZ0AC4ABNFWLgACC8buQAIAB8+WbgAE0VYuAANLxu5AA0AHz5ZuAATRVi4ABMvG7kAEwAfPlm4ABNFWLgAAi8buQACABk+WbkABAAW9LgACBC5AAUAF/S6AAoADQACERI5uAACELgAKtC6ABAAEwAqERI5uAAEELgALNC4AB7QuAAZ0LgAKhC4ABzQuAATELkAIgAV9LgALBC4ACfQuAANELkAMAAV9LgABBC4ADPQMDElFSE1NxEnNSUXFT4BMzIWFz4BMzIeAhURFxUhNTcRNCYjIgcWFREXFSE1NxE0JiMiBxECZv3iuLgBXBRFtVxakiVFx2Y8alUyuf3hrmN2lWcKrv3srmN2jmQpKSlIAu9IKUgVd0JKVVVPWyZNgFP9nkgpKUgCJY9xYTIw/Z5IKSlIAiWPcVn9NAABAEgAAAUIBBkAHQDDuAAwK7oAFgAEADMrQQMAUAAEAAFdQQMAQAAEAAFxuAAEELgAHdC4AArQQQUAUAAWAGAAFgACXUEDAEAAFgABcUEDAGAAFgABcbgAFhC4ABHQALgAE0VYuAAILxu5AAgAHz5ZuAATRVi4AA0vG7kADQAfPlm4ABNFWLgAAi8buQACABk+WbkABAAW9LgACBC5AAUAF/S6AAoADQACERI5uAAEELgAHdC4ABbQuAAR0LgAAhC4ABTQuAANELkAGgAV9DAxJRUhNTcRJzUlFxU+ATMyFhURFxUhNTcRNCYjIgcRAmb94ri4AVwUR75chrG4/eGvZ32VZykpKUgC70gpSBV5Qkykov2eSCkpSAIlkHBZ/TQAAAIAXP/nBC8EGQATACYA3LgAMCu6ACAADwAzK0EDADAAIAABXUEFAJAAIACgACAAAl1BAwAPACAAAXFBAwDAACAAAV1BAwBQACAAAV1BAwAQACAAAV1BAwDgACAAAV24ACAQuAAF0EEDAJgABQABXUEDADAADwABXUEDAC8ADwABXUEDAG8ADwABcUEDAA8ADwABcUEDAFAADwABXUEDABAADwABXUEDAKAADwABXbgADxC4ABbQALgAE0VYuAAALxu5AAAAHz5ZuAATRVi4AAovG7kACgAZPlm5ABsAFPS4AAAQuQAlABT0MDEBMh4CFRQOAiMiLgI1ND4CBwYVFB4CMzI+AjU0LgIjIgJGcrZ+Q0J+tnN0tn5CQ362Z00nS25HR3BNKSlNcEeOBBlOjcZ4d8aOTk6OxXh4xo1O1H3IY6V3QkJ3pWNipXdDAAACAAr+AAQ/BBkAGgAqAM+4ADArugAoAAQAMytBAwAQAAQAAV1BAwAwAAQAAV24AAQQuAAa0LgAH9C4AArQQQMAMAAoAAFdQQMAEAAoAAFduAAoELgAEtAAuAATRVi4AAgvG7kACAAfPlm4ABNFWLgADS8buQANAB8+WbgAE0VYuAAXLxu5ABcAGT5ZuAATRVi4AAIvG7kAAgAbPlm5AAQAFvS4AAgQuQAFABf0ugAKAA0AFxESOboAGQAXAA0REjm4AAQQuAAa0LgADRC5ABsAFPS4ABcQuQAiABT0MDEBFSE1NxEnNSUXFT4BMzIeAhUUDgIjIicREyIGBxEeATMyPgM1NCYCXP2uubkBXBU4gFZTnXtLVI2qXndk4z96Kh13RytOSjYgkP4pKSlIBO9IKUgVZkU2S4fOeYnUfT8r/l8FQTo0/YU5QhxCYpVdwvAAAAIAXP4ABJEENwAYACUAyrgAMCu6ABsACwAzK0EDAEAAGwABXbgAGxC4AAPQQQMAXwALAAFdQQMAjwALAAFdQQMAQAALAAFduAAbELgAFdC6ABMAGwAVERI5uAALELgAI9AAuAATRVi4ABEvG7kAEQAfPlm4ABNFWLgABi8buQAGABk+WbgAE0VYuAAALxu5AAAAGz5ZuQACABb0ugADAAYAERESOboAEwARAAYREjm4ABEQuAAU0LgAFC+4AAIQuAAW0LgABhC5ABkAFvS4ABEQuQAfABT0MDEBNTcRDgEjIi4CNTQ+AzMyFzczERcVATI3ES4BIyIOARUUFgIr9juWSlOadkc4YYKXUIR6QD24/bt4XTJjOF6JQJf+AClIAek5OkuHzXpnsH1aKzpY+jpIKQJSdwKLOSV+wXPC7AAAAQBIAAADSAQZABMAtrgAMCu6AAwABAAzK0EDAM8ABAABXUEDAEAABAABXUEDALAABAABXbgABBC4ABPQuAAK0EEDAEAADAABXUEDALAADAABXUEDAGAADAABcbgADBC4AA/QALgAE0VYuAAILxu5AAgAHz5ZuAATRVi4AAwvG7kADAAfPlm4ABNFWLgAAi8buQACABk+WbkABAAW9LgACBC5AAUAF/S6AAoADAACERI5uAAMELgAD9C4AAQQuAAT0DAxJRUhNTcRJzUlFxU2MxUHJyIGBxECpP2kuLgBXBSY+CmSLXQ0KSkpSALvSClIFXuQ3g5QJyX9QAAAAQB7/+MDVAQZADsA8bgAMCu6AAsAFAAzK0EDADAAFAABXUEDAO8AFAABXUEDAC8AFAABXUEDABAAFAABXUEDAKAAFAABXbgAFBC4ADrQuAA6L7gAAdBBAwAQAAsAAV1BAwAgAAsAAXFBAwAwAAsAAV1BAwCgAAsAAV24AAsQuAAw0LoAGwAwABQREjm4ABsvuAAe0LgAFBC4ACbQALgAE0VYuAAZLxu5ABkAHz5ZuAATRVi4ADUvG7kANQAZPlm4AADcQQMAEAAAAAFduAA1ELkABgAU9LoADwA1ABkREjm4ABkQuAAd3LgAGRC5ACEAFPS6ACsAGQA1ERI5MDETFx4DMzI+AjU0Ji8BLgM1ND4CMzIXAyMnLgEjIg4CFRQeAhceARcWFRQOAiMiLgInEa5rDSYsLhYzUzwgTUZrOWNKKzZkjVaEhAQpaiY3EjZPMxkZMUoxRlsUnUdzkEkmWFhRHwFS4wgNCgYVJzkkL1UoPB5ATFw5RWZDISv+3+sFBhkpNBwhNTAvGiYwDmmJSm9LJQoSGQ8BKwABAB3/5wL2BQ4AGQCIuAAwK7gADy9BAwBvAA8AAV1BAwDvAA8AAV24AAHQuAAPELgAE9C4AAEQuAAW0EEDANAAGwABXQC4ABNFWLgAFi8buQAWAB8+WbgAE0VYuAAMLxu5AAwAGT5ZuAAWELkAAAAV9LgADBC5AAYAFfS4AAAQuAAQ0LgAFhC4ABPQuAAWELgAFdwwMQERFB4CMzI2NxcGIyImNREnNTcTMxEhFxUBjxUpLiA8US8fjahiiLrCSmYBJSMDdf3TRFwtERUdQHWbjAJnMykpART+7Aw8AAABAB//5wTLBBkAHQDRuAAwK7oACgAdADMrQQMADwAdAAFxQQMA7wAdAAFduAAdELgAA9BBAwDvAAoAAV1BAwA/AAoAAV1BAwCgAAoAAV24AAoQuAAO0LgAChC4ABTQQQMAwAAfAAFdALgAE0VYuAACLxu5AAIAHz5ZuAATRVi4AA0vG7kADQAfPlm4ABNFWLgAFy8buQAXABk+WbgAE0VYuAASLxu5ABIAGT5ZuAAXELkABwAV9LgADRC5AAoAF/S4ABIQuAAQ3LoAFAANABcREjm4AAIQuQAdABf0MDETNSUXERQWMzI3ESc1JRcRFxUFJzUOASMiLgI1ER8BXBRkdYpouAFcFLn+pBVFtls8alUyA6gpSBX9Zo9xVwKfSClIFfyHMyVMFXdCSiZNgFMCMwAB/+H/5wRcBAAADwBguAAwK0EDAEYABgABXQC4ABNFWLgACC8buQAIAB8+WbgAE0VYuAAFLxu5AAUAGT5ZuAAIELkABgAW9LgAAtC4AAYQuAAL0LgABRC4AAzQuAACELgADdC4AAgQuAAP0DAxARUHNQEHASc1IRUHARMnNQRcj/6bbv51jgHqnAEh+JQEAClIB/xoFwOoSCkpPf1NArU7KQAB/+H/5wYzBAAAFABwuAAwK0EDAD8AFgABXQC4ABNFWLgABi8buQAGAB8+WbgAE0VYuAADLxu5AAMAGT5ZuAAA0LgABhC4AAvQuAAB0LgABhC5AAQAFvS4AAnQuAADELgACtC4AA3QuAAEELgAE9C4AA7QuAALELgAENAwMQUBAwcBJzUhFQcTATMBEyc1IRUHAwQ//tj8b/64gwHqptsBBm0BGLSnAZuH/hkDB/0QFwOvQSkpQf1eAwz9CAKOQSkpRPxrAAAB/98AAARiBAAAHACduAAwKwC4ABNFWLgAFy8buQAXAB8+WbgAE0VYuAAQLxu5ABAAGT5ZuAAXELgAAdC4ABcQuQAVABb0uAAE0LoADAAQABcREjm6ABsAFwAQERI5ugAFAAwAGxESObgAEBC5ABIAFvS4AAbQuAAQELgACdC4AAYQuAAL0LgAEhC4AA3QugATABsADBESObgAFRC4ABrQuAAEELgAHNAwMQE1IRUHCQEXFSE1NwsBFxUhNTcBMwEnNSEVBxsBAmgBpJf+/AE9tP3yj+/ymP5FtAEjAv7msAH+f8bLA9cpKTv+l/48RikpLQFW/qwvKSlGAZMBlEEpKS/+4wEbAAAB/9f95wSHBAAAEgBXuAAwKwC4ABAvuAATRVi4AAEvG7kAAQAfPlm4ABNFWLgADS8buQANABs+WbgAARC5ABEAFvS4AATQuAAQELgABdC4ABEQuAAL0LgABtC4AAEQuAAI0DAxAzUhFQcBEyc1IRUHAQMvAQkBFSkCEpsBJ/GTAbS4/rjhqhMBPv5oA9cpKT39YgKiOSkpSPxx/ecfKQH0A3MHAAEAXAAAA3sEAAANAOq4ADArugAJAAgAMytBAwBjAAkAAV1BAwD0AAkAAV1BAwAkAAkAAXFBBQCkAAkAtAAJAAJdQQUAMAAJAEAACQACXUEDAIAACQABXbgACRC4AAHQQQMALwAIAAFdQQMAawAIAAFxQQMAMAAIAAFduAAIELgAAtC4AAEQuAAG0LgABi+4AAPQuAAIELgADdC4AA0vuAAK0AC4ABNFWLgADS8buQANAB8+WbgAE0VYuAAGLxu5AAYAGT5ZuAANELkACgAU9LgAAdC4AAYQuQADABT0uAAGELgABdy4AAMQuAAI0LgADRC4AAvcMDEBFQEhNzMRITUBIQcjEQN7/aoBrnUp/OsCVv5SdSkEAGb8zPb+pGYDNPYBXAABAJr+iQMABk4AJQBDuAAwK7gABS+4AA/QALgACS+4AB4vugABAAkAHhESObgAAS+4AADcuAAJELgACty6ABUAAQAAERI5uAAeELgAHdwwMRM1MjY9ATQ2OwEVDgMdARQOAgcWERUUHgIXFSMiJjURNCaaXma1zh9QXDcRFChGLrARN1xQH861YwI/Ttyr8K+bTgQYO1RJxFibhFsQPP6E40lUOxgETpyuAQ6muAABALj+BgE7Bh8AAwAguAAwK7gAAS9BAwAQAAEAAV24AADcALgAAy+4AAEvMDEBBxEzATuDg/4IAggZAAABAEj+iQKuBk4AJQBMuAAwK7gAIi9BAwAvACIAAV24ABjQALgAHi+4AAkvugAAAB4ACRESObgAAC+4AAHcuAAJELgACty6ABIAAAABERI5uAAeELgAHdwwMQEVIgYVERQGKwE1PgM9ARA3LgM9ATQuAic1MzIWHQEUFgKuYWO1zh9QXDcRsC5GKBQRN1xQH861ZgKNTrim/vKunE4EGDtUSeMBfDwQW4SbWMRJVDsYBE6br/Cr3AAAAQCaAaoF5QMlAB4ARLgAMCu4ABMvQQMAEAATAAFduAAE3AC4AAovuAAA3LgAChC4ABncuAAD0LgAAy+4ABkQuAAP3LgAChC4ABLQuAASLzAxATI2NxcOBCMiLgIjIgYHJz4EMzIeAwRaQdNVIgkhZWWPQUKXcYczSqF4IAoiaGWHODdvXF5wAlBaQiUMJmJLPkNPQ0ZJJwskXUg6LD8+LAD//wAAAAAAAAAAAgYABAAAAAIAmf9qAZUFhQAKAA4AXLgAMCu4AAMvQQMAEAADAAFduAAI3EEHAFAACABgAAgAcAAIAANdugAOAAMACBESObgADi9BBQBfAA4AbwAOAAJxuAAN0AC4AAYvuAAOL7gABhC4AADcuAAL3DAxASImNTQ2MzIWFAYHMxMjARY0SUk0NklJdXMiuASJSTQ1SkpqSHH7UgACAHH/zQOwBRQAGwAhANe4ADArugAMAAQAMytBAwBPAAQAAXFBAwAvAAQAAV1BAwBPAAQAAV1BAwAQAAQAAV1BAwAvAAwAAV26AAEABAAMERI5uAABL7gAH9C4AAfQuAABELgAGtBBBQAYABoAKAAaAAJduAAS0LgACtC4AAwQuAAP0LgADBC4ABjQuAAEELgAHNAAuAAaL7gAAdC4ABoQuAAK3LgAB9C4AAoQuAAJ3LgAChC4AA7cuAAKELkAEQAU9LgAGhC5ABQAFvS4ABoQuAAb3LgAFBC4AB/QuAARELgAINAwMQU1JgI1NBI3NTMVFhcRIwMmJxEWMzI2NxcGBxUBFBYXEQYCKbn/98FcgnopbzA0Dh87X0UfiqH+untv6jOVDwEo4u4BFxGDgQY5/ssBDAsD/J4CHihAbQqXAq6eziYDSDUAAQBS/+EEpAXJAEABObgAMCu6AB8AGAAzK7gAHxC4AAHQQQMAfwAYAAFduAAYELgAD9C4ABgQuAAT0LoAFgATABgREjm4AB8QuAAi0LgAGBC4ACnQuAAz0LoALwApADMREjm6ADEAHwAzERI5ALgAE0VYuAAdLxu5AB0AHT5ZuAATRVi4AAYvG7kABgAZPlm4ABNFWLgADi8buQAOABk+WbgABhC4AADcuAAGELgAOtxBAwBgADoAAXFBCwCQADoAoAA6ALAAOgDAADoA0AA6AAVduAAK3EEHAM8ACgDfAAoA7wAKAANduAAOELgAD9y6AC8AHQAGERI5uAAvL7kAMwAV9LgAE9C4AC8QuAAW0LgAHRC4ACHcuAAdELkAJAAW9LoANwA6AA8REjm4AAYQuAA+3EEHAMAAPgDQAD4A4AA+AANdMDElFw4DIyIuASMiDgEHJzY1NC8BNTcmNTQ+AjMyFxEjJyYjIg4CFRQeAxclFwclFhUUBxU2MzIeATMyNgRkQB9GS0csOqyxQC2Acw8p2xl+bCFFe5xduZYoik41N11UMAQJBhACAV4nFf6gDCcxLT+zoSolO+cOTGYzE05OO0UFRWz9dowEZgW7fmKTWSsp/sTqDBMsVDogQlgwdBAQDoUIgD2CWwQTTk4tAAIAcQHHBAAFUgAbACcAQLgAMCu4AAQvuAAS3LgABBC4AB/cuAASELgAJdwAuAALL7gAGdxBAwAvABkAAV24AAsQuAAc3LgAGRC4ACLcMDETJzcmNTQ3JzcXNjMyFzcXBxYVFAcXBycGIyInEyIGFRQWMzI2NTQmuEemPj6mS6Jee4JZokymQD6kSKhae4Fa12N8fWZlfX4Bx0umWH94W6RMokZGokyiWXx9XKRLoUFDAg2CZmiDg2hngQAAAf/NAAAFJwXDACUAsLgAMCu4ABovuAAT0AC4ABNFWLgAJS8buQAlAB0+WbgAE0VYuAAXLxu5ABcAGT5ZuAAlELkAIwAW9LgAAtC6ACEAJQAXERI5uAAhL7gAA9C4ACMQuAAK0LgABdC4ACUQuAAH0LgAIRC4AAvQuAAhELkAIAAV9LgADtC4ACEQuAAc3EEFAFAAHABgABwAAl24ABDQuAAcELkAGwAV9LgAE9C4ABcQuQAZABb0uAAU0DAxARUHATMBJzUhFQcBMxUhBxUhFSERFxUhNTcRITUhNSchNTMBJzUBwYQBWwIBZIcBsJr+lN3+3wQBJf7buP3irv7lARsG/uvR/pqkBcMpNP1UAqo2KSlI/Wh7Ckd7/v5IKSlIAQJ7RQx7AphIKQAAAgC4/qABOwWPAAMABwAsuAAwK7gABC9BAwAQAAQAAV24AAXcuAAA0LgABBC4AAHQALgAAy+4AAYvMDEBIxEzAzMRBwE7g4ODg4MCpALr/Af9DAIAAgDb/tkEAgYCADYAQgEBuAAwK7oAQAAPADMrQQMAEABAAAFduABAELgAJ9C6ADQADwAnERI5uAA0L7gAANC6AC0AJwAPERI5uAAtL7gABdC6ABQADwAnERI5uAAUL7oAEQAUAEAREjm6ABkAJwAPERI5uAAZL0EDAB8AGQABXbgAHNC4ABQQuAAh0LgADxC4ADrQugAqAC0AOhESOboAOABAABQREjm6AD4AOgAtERI5ALgAFy+4ADIvuAAC3LoAPgAyABcREjm4AD4QuAAH0LoAOAAXADIREjm4ADgQuAAj0LoAEQAjADgREjm4ABcQuAAb3LgAFxC4AB7cugAqAD4ABxESObgAMhC4ADbcMDEFFhcWNjU0JicuBTU0Ny4BNTQ2MzIXESMnJiMiBhUUFhceARUUBgceARUUDgIjIicRMwEnBhUUFh8BNjU0JgF/fVplbGfALTBRLzEX21hgzLGEnTIxbmNacGedy5Vka0tFMF2VX4rCLwEfMKNtiG99i5geAgNZVkpfbRoeODNGUy/dfziBWYGeKf7owCtVUThjX3mhc2eiZjdxTUJyWjQ7AUwDIRlklE9wUEOPeVeLAAIAbQTJAqgFrgATACcAergAMCu4AAAvQQUAIAAAADAAAAACXbgACtxBBQBgAAoAcAAKAAJduAAAELgAFNy4AB7cQQUAYAAeAHAAHgACXQC4AA8vQQMAvwAPAAFdQQMAHwAPAAFdQQMAgAAPAAFdQQMAIAAPAAFxuAAF3LgAGdC4AA8QuAAj0DAxEzQ+AjMyHgIVFA4CIyIuAiU0PgIzMh4CFRQOAiMiLgJtEh8rGBgqHhERHioYGCsfEgFWEh4qGBgqHxISHyoYGCofEQU9GCkeEhIeKRgYKx8SEh8rGBcpHxISHykXGCsfEhIfKwADAHH/5wYrBb4AFQAoAEUA2rgAMCu4AB8vQQMAEAAfAAFduAAW3EEDAB8AFgABXbgAANC4AB8QuAAM0LoAMAAfABYREjm4ADAvuAA13EEDAHAANQABXbgAKdC4ACkvuAA1ELgAONC4ADAQuAA+0LgAKRC4AEPQALgAE0VYuAAbLxu5ABsAGT5ZuAAk3LgAB9y4ABsQuAAR3EEDAHAAEQABcroAKwAkABsREjm4ACsvQQMAfwArAAFduAAz3EEHABAAMwAgADMAMAAzAANduAA33LgAMxC4ADncuAArELgAQdy4ACsQuABE3DAxATQuBCMiDgIVFB4CMzI+AjcUAgYEIyIkAjU0EjYkMzIEFhIBBiMiLgI1NDYzMhcVIycmDgMVFBYzMj8BMwWwJklphp9bguGnYFmg4YiC36Nce3PC/u2Xyf6vwXTGARSZmgEPvW3+IWRjVpZ4RvO3YV4rSDpnVz8jj4ctKzorAttVoIx0VC5hq+qJgeewZ2Wv7IeV/urNfMgBWMyZARXIdXTG/vD93iMyYaJnzeUf0YoIAy1Lf1Sxkw2XAAIAXAHwA9sF2wAhACoArLgAMCu6ACYACwAzK7gAJhC4AAXQQQMAXwALAAFxQQUALwALAD8ACwACXbgAJhC4ABHQuAAmELgAINC6ABkACwAgERI5uAAZL7gAF9C4AAsQuAAp0AC4ABNFWLgAHS8buQAdAB0+WbgACNy4AAPQuAADL7gAANy6ABEAHQAIERI5uAARL7gAHRC5ABUAFvS4AB0QuAAY3LgACBC5ACIAFfS4ABEQuQAmABT0MDEBFQcGJj0BBiMiJjU0PgM3NTQmIyIPASMRPgEzMhYVEQUyNjcRDgEVFAPbyz1ieZCGhkNokotNP19KXlI9UbpJmab+fTRpKLGsAl4jPww4QAJ0imlNeUkyFQNYglEXwwEJHyB7jf26HCwoASQKfmWLAAIAcQB1A4EELwAGAA0AO7gAMCu4AAQvQQMAEAAEAAFdQQUAMAAEAEAABAACXbgAANC4AAQQuAAL3LgAB9AAGbgAAC8YuAAH0DAxCQEHATUBFxsBBwE1ARcBTAErIf4bAeUhN9Mh/owBdCECUv49GgHGLQHHG/4+/n8bAYcpAYgbAAABAGYBOQSPArYABQBGuAAwK7gAAC+4AAHcQQUAEAABACAAAQACcbgAABC4AAPcALgAAy+4AAHcuAADELgABNxBAwDwAAQAAV1BAwAAAAQAAXEwMQEjNSE1IQSPcvxJBCkBOfSJAAABAGYCKQMzAqQAAwAiuAAwK7gAAy9BAwAPAAMAAXG4AALcALgAAy+5AAAAFfQwMRMhFSFmAs39MwKkewAEAHH/5wYrBb4AFQAoAD8ASAD6uAAwK7gAHy9BAwAQAB8AAV24ABbcQQMAHwAWAAFduAAA0LgAHxC4AAzQugAyAB8AFhESObgAMi+4AEbcuAA50LoAKgAyADkREjm4ADIQuAAt0LgAKhC4ADzQuAAtELgAQtAAuAATRVi4ABsvG7kAGwAZPlm4ACTcuAAH3LgAGxC4ABHcQQMAcAARAAFyugAwACQAGxESObgAMC+4ACnQuAAwELgANtxBBQAQADYAIAA2AAJdugAsADYAMBESObgALC+4ADAQuAAy3LgALdC4ADYQuAAz3LoAPAAsADYREjm4ADIQuAA90LgANhC4AEDcuAAsELgAQtwwMQE0LgQjIg4CFRQeAjMyPgI3FAIGBCMiJAI1NBI2JDMyBBYSAQMGJxEXFSE1NxEnNSEyFhUUBgcTFxUBIxEWPgE1NCYFsCZJaYafW4Lhp2BZoOGIgt+jXHtzwv7tl8n+r8F0xgEUmZoBD71t/fK7OUx5/qhkZAFIjplNSaZe/nFvaHgvTALbVaCMdFQuYavqiYHnsGdlr+yHlf7qzXzIAVjMmQEVyHV0xv7w/dQBTgcL/vUpHh4pApIpHnlkSHMg/tkjHgLM/toQH0U9VEEAAAH/+gUdBBkFmAADABm4ADAruAADL7gAAtwAuAADL7kAAAAV9DAxAyEVIQYEH/vhBZh7AAACAE4DmgKwBfwAEgAmAE64ADAruAACL7gADNy4ABjQuAACELgAItAAuAATRVi4AAcvG7kABwAdPlm4ABHcuAAT3EEDADAAEwABcbgABxC4AB3cQQMAPwAdAAFxMDETJjU0PgIzMh4CFRQOAiMiNzI+AjU0LgIjIg4CFRQeAqhaMVNvPj5vUzExU28+fX0oRTMeHjRFJylFMx0cM0YD9Fp9Pm9TMTFTbz4+b1Mxch0zRiknRTQeHjNGJypGMxwAAgBmAD0EjwTLAAsADwBtuAAwK7gABi+4AAPcuAAA0LgAAxC4AALcuAAGELgAB9y4AAYQuAAJ0LgABxC4AAzQuAACELgADdAAuAAPL7gAB9y4AAjcQQMAAAAIAAFxuAAA0LgABxC4AAPQuAAPELgADNxBAwAAAAwAAXEwMQEhFSERIxEhNSERMwEhFSECwwHM/jSM/i8B0Yz9owQp+9cDSoP+qAFYgwGB+/aEAAEATgKJAo8F7wAZAEi4ADAruAATL7gAB9AAuAATRVi4AA8vG7kADwAdPlm4AAHcuAAA3LgAARC5ABgAFfS4AAPQuAAPELkACgAU9LgADxC4AAvcMDEBByE1AT4BNTQmDwEjNT4BHgIVFAYPASE3Ao8C/cEBEEswjnxCNUiSkW9FU2zGAUMtA4f+bwEnTmA9VkAZruwWFAotY0lHiWnDdwABAHECcwKRBfAAJgDCuAAwK7gAFi+4ACPQALgAE0VYuAANLxu5AA0AHT5ZuAAZ3EEDAH8AGQABXUEDAO8AGQABXUEDAF8AGQABcUEDAD8AGQABcboAAAANABkREjm4AAAvQQUAHwAAAC8AAAACXUEHAF8AAABvAAAAfwAAAANdQQMAbwAAAAFxQQUArwAAAL8AAAACXbgADRC5AAYAFPS4AA0QuAAJ3LgAABC5ACYAFPS6ABMAAAAmERI5uAAZELgAHdy4ABkQuQAgABT0MDEBMjY1NCYjIg8BIzU2MzIWFRQGBxUWFRQGIyInNTMXFjMyNjU0JicBJ1JqTEk9MDk1l2N2jFlHwrV4fnUnOz5PSFhdfARgZEZJPgqu7Ctza0J7EQcax2x9L+udG0ZJV0UGAAABAHkEjwHdBhQABABeuAAwK7gAAC9BBQAgAAAAMAAAAAJduAAD3AC4AAQvQQMAXwAEAAFdQQMAHwAEAAFxQQUAPwAEAE8ABAACcUEDAP8ABAABXUEDAD8ABAABXUEDAB8ABAABXbgAAdwwMRsBHwEBecSUDP7TBLYBXigu/tEAAAEAGf4ABNkEAAAcANS4ADArugAaABAAMyu4ABoQuAAE0EEDAA8AEAABcUEDAJ8AEAABXUEDAG8AEAABXbgAEBC4ABPQugAJABAAExESObgAC9C4ABAQuAAO0LgAGhC4ABvQALgAE0VYuAATLxu5ABMAHz5ZuAATRVi4AAIvG7kAAgAZPlm4ABNFWLgADS8buQANABs+WbgAE0VYuAAHLxu5AAcAGT5ZuAACELgAANy4ABMQuAAa0LoABAAHABoREjm6AAkAEwAHERI5uAATELkAEAAW9LgABxC5ABcAFfQwMSUVBSc1DgEjIicWFw8BJwMRJzUhERQWMzI3ETMRBNn+pBVHvF1xVSaEFcAfJ7gBcGd9kmm5WClIFXlDSzz13CkpKQMEAmJIKf1qkHBZAz38oAAAAQBx/1oDsAVeAA8AVrgAMCu4AAgvuAAE3EEDAD8ABAABXbgAA9C4AAgQuAAH0LgACBC4AAzcALgAAC+4AAcvuAAAELkAAQAV9LgABxC4AATQuAABELgABdC4AAAQuAAJ3DAxARUjESMRIxEjES4BNTQ2MwOwanuSe5K7yJgFXnv6dwWJ+ncDTAjAlJnDAAEAdQF7AXECdwALACC4ADAruAAAL0EDABAAAAABXbgABtwAuAAJL7gAA9wwMRM0NjMyFhUUBiMiJnVJNjRJSTQ2SQH6NElJNDZJSQABAI/+FAI3AAAAEwAsuAAwK7gABS+4ABHQALgACi+4ABNFWLgAAS8buQABABk+WbgAChC4AA/cMDEhMwceARUUDgIjIic1FjMyNTQnASNHFGl4LU9eOExKPR2tklIEak48WTIXE0wJhnoXAAABAEgCiQJ/BdsACgBjuAAwK7gAAy+4AAnQALgAE0VYuAAHLxu5AAcAHT5ZuAAA3EEDAKAAAAABXUEDABAAAAABcUEDAMAAAAABXUEDAFAAAAABXUEDAGAAAAABcbkACQAW9LgAA9C4AAcQuAAE0DAxASE1NxEHJyUzERcCf/3fz8kcASVctgKJTikCNzlUif0lKQAAAgBcAfYDwQXbABMAJwBouAAwK7oAHgAAADMrQQMAHwAAAAFdQQMAXwAAAAFxQQMAPwAAAAFdQQMAwAAeAAFduAAeELgACtC4AAAQuAAU0AC4ABNFWLgABS8buQAFAB0+WbgAD9y5ABkAFPS4AAUQuQAjABT0MDETND4CMzIeAhUUDgIjIi4CNxQeAjMyPgI1NC4CIyIOAlw7cKFmZqFxOztxoWZmom87tCFBXj49YUMjI0NhPT1eQSID53C4hEhIhLhwb7iCSEiCuG9blms7O2uWW1yXbDs6bJcAAAIASAB1A1gELwAGAA0ALrgAMCu4AAQvQQMAEAAEAAFduAAB0LgABBC4AAvcuAAI0AAZuAAILxi4AAHQMDElCQE3ARUBJRMDNwEVAQFSASv+1SEB5f4b/tXT0yABdf6LjwHDAcIb/jkt/jpcAYEBgRv+eCn+ef//AIX/0Qb4BdsAJgB8PQAAJwDVAqwAAAEHANYEM/14AGq4ADArQQcAbwADAH8AAwCPAAMAA11BAwAVAAwAAV1BAwAQABYAAV1BAwAQABoAAV0AuAATRVi4AAgvG7kACAAdPlm4ABNFWLgADi8buQAOAB0+WbgAE0VYuAAULxu5ABQAGT5ZuAAa3DAx//8Ahf/RBykF2wAnAHUEmv14ACYAfD0AAQcA1QKsAAAAb7gAMCtBAwBwABMAAV1BAwAvAB0AAV1BBQBvAB0AfwAdAAJdQQUAoAAdALAAHQACXUEDACoAJgABXQC4ABNFWLgAIi8buQAiAB0+WbgAE0VYuAAnLxu5ACcAHT5ZuAATRVi4AAEvG7kAAQAZPlkwMQD//wCF/9EGpgXwACcA1gPh/XgAJwDVAloAAAEGAHYUAABHuAAwK0EDAG8ALQABXQC4ABNFWLgAHy8buQAfAB0+WbgAE0VYuAAQLxu5ABAAHT5ZuAATRVi4AAUvG7kABQAZPlm4AAvcMDEAAAIAPf8MA4UFEwAWACEAmbgAMCu6ABIAAAAzK7oABQASAAAREjm4AAUvuAAE0LgAABC4AArQuAASELgAD9C4AAUQuAAa0LgAGi9BBQBgABoAcAAaAAJduAAf3EEHAF8AHwBvAB8AfwAfAANdALgAFy+4ABQvuAAXELgAHdy4AATcugACAAQAFBESOboABwAUAAQREjm4ABQQuQANABb0uAAUELgAENwwMTc2PwE1MxUHDgEVFBY3MjcTMxEGIyIkATIWFRQGIyImNDY9A8biZsU7Obx7S0ZrPae45f7/Ads0SUk0NklJdPSnyL3000eeVZONCxQBKf6OPb4FSUk0NUpKakj////8AAAFywdWAiYAJQAAAQcA2QGxAAAAGLgAMCtBAwAQABcAAV1BAwBAABcAAV0wMf////wAAAXLB1YCJgAlAAABBwDaAjwAAAAluAAwK0EDAP8AGAABXUEFAF8AGABvABgAAnFBAwAQABgAAV0wMQD////8AAAFywdvAiYAJQAAAQcA2wFyAAAAN7gAMCtBBQBfAB0AbwAdAAJxQQMAvwAdAAFdQQMA/wAdAAFdQQMADwAdAAFxQQMAEAAdAAFdMDEA/////AAABcsHIgImACUAAAEHAN0BWQAAABy4ADArQQMAEAAmAAFdQQUA0AAmAOAAJgACXTAx/////AAABcsHTAImACUAAAEHANwBVwAAADK4ADAruAAUL0EDABAAFAABXUEDAG8AFAABcUEDAIAAFAABXUEDADAAFAABXbgAKNAwMf////wAAAXLB6ACJgAlAAABBwDIAd4BXgBsuAAwK7gAFC9BAwBPABQAAV1BAwAvABQAAV1BAwBvABQAAV1BAwCgABQAAV1BAwBAABQAAXG4ADfQALgAIy9BAwBvACMAAV1BAwBPACMAAV1BAwBwACMAAV1BBQAwACMAQAAjAAJxuAAo0DAxAAIAH//lB1oF3QAcAB8A9bgAMCu6ABoAAAAzK0EDAGAAGgABcbgAGhC4AAzQuAAML7gAD9C4AAAQuAAW0LgAEdC6ABMADAARERI5uAAaELgAF9C4AAAQuAAf0AC4ABNFWLgACS8buQAJAB0+WbgAE0VYuAAGLxu5AAYAGT5ZuAATRVi4AAAvG7kAAAAZPlm6AB8AAAAJERI5uAAfL7kAAQAW9LgABhC5AAgAFvS4AAPQuAAJELgADty4AAkQuQAQABb0ugARAAkAABESObgAES9BAwBfABEAAV1BAwA/ABEAAV25ABUAFvS4AAAQuQAWABb0uAAAELgAGNy4AAkQuAAd0DAxIREhAxcVITU3ASE3FxEjAyERJRcHJREhEzMRBycJASED8v471dv97K4C5wK0vx8pf/4WAaAnFf5OAf5/KR++/XX+dAGMAeX+ezcpKUgFUhoa/m4BJ/3NFA6FEP20ASf+bxsbBSH9LwABAHv+FATnBeMANwD3uAAwK7oAAAAcADMrQQMAsAAAAAFdQQMAEAAAAAFdQQMAYAAAAAFxQQMAYAAAAAFdQQMAQAAAAAFdQQMALwAcAAFdQQMAEAAcAAFdugAGABwAABESObgABi+4ABXQuAAC0LgABhC4AA3cuAAGELgAEtC4AAAQuAAj0LgAIy+4ACbQuAAcELgALtC4AAAQuAA10AC4AAsvuAATRVi4ACEvG7kAIQAdPlm4ABNFWLgAAi8buQACABk+WbgACxC4ABDcuAACELgAFdC4ACEQuAAl3LgAIRC5ACgAFvS4AAIQuQAzABb0uAACELgANtxBAwAwADYAAV0wMSUGDwEeARUUDgIjIic1FjMyNTQnNyYnLgM1NBI2JDMyFwMjAyYjIg4DFRQeAjMyNxMzBOeutg5peC1PXjhMSj0drJEZV1FwxZFTetMBGKGqrgIpfVSJUI6DXzhZmstzZltuKSU9ATkEak48WTIXE0wJhnoXaAYTHHis+Za8AS/BZjf+gwEdJSpileSPmPmhVx0BSf//AHEAAASPB1YCJgApAAABBwDZAZcAAAAPuAAwK0EDAJ8AGgABXTAxAP//AHEAAASPB1YCJgApAAABBwDaAdwAAAAcuAAwK0EDAC8AGwABXUEFAG8AGwB/ABsAAl0wMf//AHEAAASPB28CJgApAAABBwDbAUUAAAAPuAAwK0EDAC8AIAABcTAxAP//AHEAAASPB0wCJgApAAABBwDcATMAAAAOuAAwK7gAFy+4ACvQMDH//wBxAAACpAdWAiYALQAAAQcA2QDJAAAAMbgAMCtBAwAQAA8AAV1BCwAgAA8AMAAPAEAADwBQAA8AYAAPAAVxQQMAgAAPAAFdMDEA//8AcQAAAqQHVgImAC0AAAEHANoAiwAAAE24ADArQQMALwAQAAFdQQ0ArwAQAL8AEADPABAA3wAQAO8AEAD/ABAABl1BDwAPABAAHwAQAC8AEAA/ABAATwAQAF8AEABvABAAB3EwMQD//wBxAAADCAdvACYALTEAAQYA21AAAE64ADArQQsAHwAVAC8AFQA/ABUATwAVAF8AFQAFcUEHAC8AFQA/ABUATwAVAANdQQMA3wAVAAFdQQkAYAAVAHAAFQCAABUAkAAVAARdMDH//wBwAAAC0QdMACYALRcAAQYA3A4AABe4ADAruAAML0EDAC8ADAABXbgAINAwMQAAAgBxAAAFzQXDABMAIgDRuAAwK7oAIAANADMrQQMAQAAgAAFdQQMAEAAgAAFdQQMAsAAgAAFduAAgELgABtBBAwAvAA0AAV1BAwAQAA0AAV24AA0QuAAR0LgADRC4ABrQuAAW0LoAFwAWACAREjkAuAATRVi4AAAvG7kAAAAdPlm4ABNFWLgACi8buQAKABk+WbkADQAW9LoAFgAAAAoREjm4ABYvQQMAPwAWAAFduQAZABX0uAAO0LgAFhC4ABHQuAAAELkAEgAW9LgAABC5ABQAFvS4AAoQuQAbABb0MDETITIEFhIVFAIEIyE1NxEjNTMRJwUjESEVIREzMj4CNRAAlgJsnQEEv2u2/rrR/Za43d24Al7kAWv+leplt49V/vIFw1+2/uW02P6xuClIAjV7AjFIQv3Je/3ETpTskwE7AVL//wBIAAAF7AciAiYAMgAAAQcA3QGFAAAAF7gAMCtBBwA/AC0ATwAtAF8ALQADXTAxAP//AHv/1wXLB1YCJgAzAAABBwDZAkQAAAAhuAAwK0EDABAAIwABXUEDAGAAIwABcUEDAPAAIwABXTAxAP//AHv/1wXLB1YCJgAzAAABBwDaAiUAAAAYuAAwK0EDAA8AJAABcUEDAD8AJAABcTAx//8Ae//XBcsHbwImADMAAAEHANsBtgAAADK4ADArQQMAXwApAAFxQQUALwApAD8AKQACXUEFAL8AKQDPACkAAl1BAwCPACkAAV0wMf//AHv/1wXLByICJgAzAAABBwDdAXsAAAAguAAwK0EHAD8AMgBPADIAXwAyAANdQQMADwAyAAFxMDH//wB7/9cFywdMAiYAMwAAAQcA3AGQAAAANrgAMCu4ACAvQQMAbwAgAAFxQQMALwAgAAFdQQUAPwAgAE8AIAACcUEDAPAAIAABXbgANNAwMQABANMA+gQnBE4ACwBfuAAwK7gACC+4AATcuAAC0LgAAi+6AAMABAAIERI5ugAJAAgABBESObgACBC4AArQALgABy+4AAvcugAAAAsABxESObgAAdC4AAEvuAAHELgABdC6AAYABwALERI5MDEJARcJAQcJAScJATcCfwFGXP66AUxi/rT+uFwBSP62YgMCAUZc/rr+tmIBSv64XAFIAUxiAAADAHv/kwXfBgAAGQAhACkBFrgAMCu6ABoABQAzK0EDABAABQABXUEDAA8ABQABcUEDAC8ABQABXUEDAC8ABQABcUEDAN8ABQABXUEDABAABQABcUEDAOAABQABXUEDAJAAGgABXUEDABAAGgABcUEDAGAAGgABcUEDAN8AGgABXUEDAEAAGgABcUEDAOAAGgABXUEDAEAAGgABXUEDABAAGgABXbgAGhC4ABLQuAAFELgAJdC6AB0AJQAaERI5ugAoABoAJRESOUEDAEAAKwABXUEDACAAKwABcQC4ABNFWLgACi8buQAKAB0+WbgAE0VYuAAXLxu5ABcAGT5ZuAAKELkAIgAW9LgAFxC5AB8AFvS6ABwAIgAfERI5ugAnAB8AIhESOTAxBSM3JgI1NBI2JDMyFzczBxYSFRQCDgEjIicBECcBFjMyEgEiAhEQFwEmAWaZlW55cMEBAZC6kkqaeW98cL/+j8KUAzF1/Z5vmdL9/jHV/m8CZHBt+GUBLcCuASTEbFh5x2T+y8Gu/t/AaVYCogEcsfwGXgFjA7j+lv7Y/uutA/ddAP//AF7/1wYSB1YCJgA5AAABBwDZAkgAAAAYuAAwK0EDAIAAIQABXUEDAGAAIQABcTAx//8AXv/XBhIHVgImADkAAAAHANoCtAAA//8AXv/XBhIHbwImADkAAAEHANsCBAAAAA+4ADArQQMAgAAnAAFdMDEA//8AXv/XBhIHTAImADkAAAEHANwB6QAAAC24ADAruAAeL0EDAC8AHgABXUEFAHAAHgCAAB4AAl1BAwAAAB4AAXG4ADLQMDEA////zQAABScHVgImAD0AAAEHANoB1QAAACW4ADArQQMALwAaAAFdQQUArwAaAL8AGgACXUEDAG8AGgABXTAxAAACAHEAAASsBcMAFgAjAMK4ADArugAhABEAMytBAwA/ABEAAV1BAwAfABEAAXFBAwCAABEAAV24ABEQuAAM0LgAGdC4AAHQQQMAPwAhAAFdQQMAgAAhAAFduAAhELgABdAAuAATRVi4ABQvG7kAFAAdPlm4ABNFWLgADy8buQAPABk+WbgAFBC5ABIAFvS4AADQugABABQADxESObgAAS+6AAoADwAUERI5uAAKL7gADxC5ABEAFvS4AAzQuAABELkAGAAW9LgAChC5ABoAFvQwMQEVMyAEFRQOAycRFxUhNTcRJzUhFRMjERY+BTU0JgHhrAEEARs0banql7n94a64Ah4byViMa081IA2hBVJx37pWl4FSIxL++kgpKUgE4UgpKf7f/WgNCBc0O1JSMaCiAAABACn/1wRcBhkAPAEJuAAwK7oAJQA7ADMruAA7ELgAAtC4ADsQuAA40EEDAC8AJQABXboALwA4ACUREjm4AC8vQQUAIAAvADAALwACXbgAC9C6ACwAOAAlERI5uAAsL7gADtC4ACUQuAAW0LoAHQA4ACUREjm4AB0vALgAE0VYuAACLxu5AAIAHz5ZuAATRVi4AAgvG7kACAAhPlm4ABNFWLgAGy8buQAbABk+WbgAE0VYuAA4Lxu5ADgAGT5ZugALAAgAGxESOboAEQAIABsREjm4ABsQuAAf3LgAGxC5ACIAFPS6ACgAGwAIERI5uAALELgAL9C4AAgQuQA0ABT0uAA4ELkAOwAW9LgAAhC5ADwAFfQwMRM1NzU0PgIzMhYVDgEVFB4FFRQOAiMiJzUzFxYzMjY1NC4ENTQ2NzQuAiMiBhURITU3ESnHSXeRTLC3rn0sSFZVSCw3W288k5JQOS9AZlo6VmVWOqCTITxELG6D/pW5A3VON4tfnmI1p7RSl3EsVEdJUlh0QEZwRSQxxYkTU2wuXEhaWX9Hg+VCQFksEp6G+2grSAMC//8AXP/iBB8GFAImAEUAAAEHAEQBaAAAAA+4ADArQQMArwAuAAFdMDEA//8AXP/iBB8GFAImAEUAAAEHAHcBPgAAAA+4ADArQQMAQAArAAFdMDEA//8AXP/iBB8GEgImAEUAAAEHAMYAwQAAADK4ADArQQMAgAAzAAFdQQUAbwAzAH8AMwACXUEFADAAMwBAADMAAnFBAwDAADMAAV0wMf//AFz/4gQfBaoCJgBFAAABBgDJdQAAF7gAMCtBBwBgADsAcAA7AIAAOwADXTAxAP//AFz/4gQfBa4CJgBFAAABBgBreQAAG7gAMCu4ACsvQQUAQAArAFAAKwACXbgAP9AwMQD//wBc/+IEHwZCAiYARQAAAQcAyAEGAAAAMrgAMCu4ACsvQQMAnwArAAFdQQMAPwArAAFdQQMA4AArAAFdQQMAQAArAAFxuABO0DAxAAMAXP/nBiEEGQAyADwAQwGTuAAwK7oANgAVADMrQQUAoAA2ALAANgACXUEDADAANgABXUEDABAANgABXbgANhC4AAHQuAA2ELgAQdxBAwBgAEEAAV1BAwDAAEEAAV1BAwDwAEEAAV1BAwCQAEEAAV1BAwAgAEEAAXFBAwBQAEEAAXG4ADLQugAJADIANhESOboADwA2AAEREjlBAwAvABUAAV1BAwCgABUAAV1BAwAwABUAAV24ADYQuAAb0LoAJAAVAAEREjm4ACQvuAAh0LgAARC4AEDQugAqABsAQBESObgAFRC4ADrQALgAE0VYuAAnLxu5ACcAHz5ZuAATRVi4AC0vG7kALQAfPlm4ABNFWLgAEi8buQASABk+WbgAE0VYuAAMLxu5AAwAGT5ZugABAC0ADBESObgAAS+4AAwQuQAGABb0ugAPAAwALRESOboAGwAnABIREjm4ABsvuAAnELkAHwAU9LgAJxC4ACLQugAqAC0ADBESObgAEhC5ADMAFvS4ABsQuQA4ABT0uAAtELkAPQAU9LgAARC5AEAAFPQwMQEhHgMzMjcXDgEjIiYnDgEjIiY1ND4DNzU0JiMiDwEjET4BMzIWFz4BMzIeAhUBMjY9AgQVFBYBIgYHITQmBgT9eQE+an9LaHcfVKhraLxEPtiMkZBMdqSYVFhmTWhqMVrITGSeK0q4TFWRcUH7sGR5/nViAwhziRAB3W0B+nCmXixGPEozYFdcW5ZxVYNONBQCYIxuGOgBHSAkWkpOVj97yID+P6KHaCUQ+lhUA2K2nqC0AAEAXP4UA5wEGwAwAQG4ADArugAiABsAMytBAwAwACIAAV1BAwBwACIAAV1BAwBQACIAAV1BAwAQACIAAV1BAwCQACIAAV24ACIQuAAA0EEDAFAAGwABXUEDAG8AGwABcUEDAC8AGwABXUEDADAAGwABXUEDABAAGwABXboABwAbACIREjm4AAcvuAAW0LgAA9C4AAcQuAAO3LgABxC4ABPQuAAiELgAJdC4ABsQuAAq0AC4AAwvuAATRVi4ACAvG7kAIAAfPlm4ABNFWLgAAy8buQADABk+WbgADBC4ABHcuAADELgAFtC4ACAQuAAk3LgAIBC5ACcAFPS4AAMQuQAtABb0uAADELgAMNwwMSUOAQ8BHgEVFA4CIyInNRYzMjU0JzcuAzU0PgIzMhcRIwMmIyIGFRQWMzI2NwOcU5ZbDml4LU9eOExKPR2skRhYnXhHTomyZ5OOKW88O52Wxq07X0VgQTQEOQRqTjxZMhcTTAmGehdoB1OHxXGB0IRGQv7LAQwO6NbM2h8n//8AXP/nA88GFAAmAEkAAAEHAEQBkQAAAA+4ADArQQMAoAAnAAFdMDEA//8AXP/nA88GFAAmAEkAAAEHAHcBcQAAADy4ADArQQMAEAAkAAFdQQMAvwAkAAFdQQMA/wAkAAFdQQMADwAkAAFxQQMAIAAkAAFxQQMAYAAkAAFdMDH//wBc/+cDzwYSACYASQAAAQcAxgDpAAAAR7gAMCtBAwC/ACwAAV1BAwBfACwAAV1BAwDAACwAAV1BBQDgACwA8AAsAAJdQQsAAAAsABAALAAgACwAMAAsAEAALAAFcTAxAP//AFz/5wPPBa4AJgBJAAABBwBrAKIAAAAXuAAwK7gAJC9BAwBAACQAAV24ADjQMDEA//8AUgAAAqgGFAAmAMMtAAEHAEQA0QAAABy4ADArQQMAXwAOAAFdQQUA0AAOAOAADgACXTAx//8AUgAAAnsGFAImAMMAAAEHAHcAgwAAACq4ADArQQMAEAALAAFdQQMAzwALAAFdQQMAIAALAAFxQQMAQAALAAFdMDH//wBSAAACngYSACYAwyMAAQYAxh8AACm4ADArQQMAXwATAAFxQQMA8AATAAFdQQcAAAATABAAEwAgABMAA3EwMQD//wBTAAACrAWuACYAwzEAAQYAa+YAADG4ADAruAALL0EFAN8ACwDvAAsAAl1BAwBAAAsAAV1BBQCgAAsAsAALAAJduAAf0DAxAAACAJz/0QQ3BfYAHgAuAKe4ADArugAfABYAMytBAwAQABYAAV1BAwCgABYAAV1BAwAwABYAAV1BAwAwAB8AAXFBAwCgAB8AAV1BBwAQAB8AIAAfADAAHwADXbgAHxC4AA3QugAEABYADRESOboAHAAfAAQREjm4ABYQuAAm0AC4ABovuAAFL7gAE0VYuAASLxu5ABIAGT5ZQQMAXwAFAAFduAAaELkAIwAW9LgAEhC5ACkAFPQwMQEnNyYnNxYXNxcHFgARFA4CIyIuATU0PgEzMhcmJwE0JyYjIgYVFBYzMj4DAaRKYkVeQXoqXkhU5wEAOXO+f4LIaGPRlGdgWskBbSNeg4WMjGpIaj0kDASoN2stKzEvFWc4XIf+R/7YfMaWUYzqkI7bhTTqmfzBp5JIqb3i3zBMc3AA//8ASAAABQgFqgImAFIAAAEHAMkBPwAAACS4ADArQQMAYAAuAAFdQQkAgAAuAJAALgCgAC4AsAAuAARdMDH//wBc/+cELwYUAiYAUwAAAQcARAGiAAAAGLgAMCtBAwBPACoAAV1BAwCgACoAAV0wMf//AFz/5wQvBhQCJgBTAAABBwB3AYEAAAAcuAAwK0EDABAAJwABXUEFACAAJwAwACcAAnEwMf//AFz/5wQvBhICJgBTAAABBwDGAPoAAABIuAAwK0EFAFAALwBgAC8AAnFBAwC/AC8AAV1BAwB/AC8AAV1BAwBQAC8AAV1BAwAwAC8AAXFBBwDQAC8A4AAvAPAALwADXTAx//8AXP/nBC8FqgImAFMAAAEHAMkArgAAABy4ADArQQMALwA3AAFdQQUAoAA3ALAANwACXTAx//8AXP/nBC8FrgImAFMAAAEHAGsAsgAAADa4ADAruAAnL0EDAA8AJwABcUEFAG8AJwB/ACcAAl1BAwCgACcAAV1BAwBQACcAAXG4ADvQMDEAAwBmAIUEjwSPAAMADwAaAGa4ADAruAAEL0EDAC8ABAABXbgACty4AALcuAAEELgAA9y4AAoQuAAT0LgABBC4ABjQALgAAy+4AADcQQMAAAAAAAFxuAADELgADdxBAwA/AA0AAV24AAfcuAAAELgAENy4ABbcMDETIRUhATQ2MzIWFRQGIyImEzIWFRQGIyImNDZmBCn71wGYSTY1SEg1Nkl/NUhJNDZJSQLjg/6kNUhJNDZJSQPBSTQ1SkpqSAAAAwBc/6QELwRUABQAHAAkAPu4ADArugAdAAAAMytBAwCgAAAAAV1BAwAPAAAAAXFBAwAvAAAAAV1BAwBvAAAAAXFBAwAQAAAAAV1BAwBQAAAAAV1BAwAwAAAAAV1BAwBQAB0AAV1BAwDAAB0AAV1BAwAPAB0AAXFBAwDgAB0AAV1BBQCQAB0AoAAdAAJdQQMAMAAdAAFdQQMAEAAdAAFduAAdELgACtC4AAAQuAAV0LoAGAAVAB0REjm6ACAAHQAVERI5ALgAE0VYuAADLxu5AAMAHz5ZuAATRVi4AA0vG7kADQAZPlm5ACIAFPS4AAMQuQAaABT0ugAXACIAGhESOboAHwAaACIREjkwMRM0ADMyFzczBxYVFAAjIicHIzcuATcUFwEmIyIGBTQnARYzMjZcAQTmlnZKk3t7/vzlZFc5lFtcYcNUAYtOao2aAlQr/o00PY2gAgDyASdIg9mR6vL+2R9ioEXkk9h4AsNO+MmNb/1cGfkA//8AH//nBMsGFAImAFkAAAEHAEQBwQAAAA+4ADArQQMAHwAhAAFxMDEA//8AH//nBMsGFAImAFkAAAEHAHcBoAAAACG4ADArQQMAPwAeAAFdQQMA7wAeAAFdQQMAUAAeAAFdMDEA//8AH//nBMsGEgImAFkAAAEHAMYBGQAAACq4ADArQQMAUAAmAAFdQQMArwAmAAFdQQMAMAAmAAFxQQMAwAAmAAFdMDH//wAf/+cEywWuAiYAWQAAAQcAawDRAAAAILgAMCu4AB4vQQMAbwAeAAFdQQMAcAAeAAFduAAy0DAx////1/3nBIcGFAImAF0AAAEHAHcBjQAAAA+4ADArQQMAPwATAAFdMDEAAAIACv4ABD8GGQAaACsA07gAMCu6ACcABAAzK0EDABAABAABXUEDADAABAABXbgABBC4ABrQuAAf0LgACtBBAwAwACcAAV1BAwAQACcAAV24ACcQuAAS0AC4ABNFWLgACC8buQAIACE+WbgAE0VYuAANLxu5AA0AHz5ZuAATRVi4AAIvG7kAAgAbPlm4ABNFWLgAFy8buQAXABk+WbgAAhC5AAQAFvS4AAgQuQAFABf0ugAKAA0AFxESOboAGQAXAA0REjm4AAQQuAAa0LgADRC5ABsAFPS4ABcQuQAiABT0MDEBFSE1NxEnNSUXET4BMzIeAhUUDgIjIicREyIGBxEeATMyPgI1NC4CAlz9rrm5AVwVOIBWVJ56SlKJsF54Y+M/eiodd0czZVAxJUZl/ikpKUgG70gpSBX9mkU2S4zHe4vKhEAr/l8FQTo0/YU5Qi5npXhfoHNAAP///9f95wSHBa4CJgBdAAABBwBrAL4AAAAguAAwK7gAEy9BAwA/ABMAAV1BAwBvABMAAV24ACfQMDEAAQBSAAACewQZAAoAW7gAMCu4AAQvQQMAQAAEAAFduAAK0EEDABAADAABcQC4ABNFWLgACC8buQAIAB8+WbgAE0VYuAACLxu5AAIAGT5ZuQAEABb0uAAIELkABQAX9LgABBC4AArQMDElFSE1NxEnNSUXEQJ7/de4uAFcFSkpKUgC70gpSBX8bQAAAgB7/+UILwXfABMANwF+uAAwK7oAGQAcADMrQQMAgAAcAAFduAAcELgAJNC4ACQvQQUAPwAkAE8AJAACXUEDAG8AJAABXUEDAI8AJAABXbgABdC4ABwQuAAP0LgAHBC4ABXQQQMAgAAZAAFdQQMAoAAZAAFdQQMAYAAZAAFxuAAZELgAFtC4ABwQuAAr0LgAGRC4AC/QuAAvL7gAMtC4ABUQuAA00LoANgAvADQREjkAuAATRVi4ACkvG7kAKQAdPlm4ABNFWLgALC8buQAsAB0+WbgAE0VYuAAcLxu5ABwAGT5ZuAATRVi4AB8vG7kAHwAZPlm4ACkQuQAAABb0uAAfELkACgAW9EEDAIgAHAABXboANAAsABwREjm4ADQvQQMATwA0AAFxQQMAXwA0AAFdQQMAPwA0AAFduQAUABb0uAAcELkAFQAW9LgAHBC4ABfcQQMAqwAZAAFdugAdABwALBESOUEDAIYAJAABXboAKwAsABwREjm4ACwQuAAx3LgALBC5ADMAFvQwMQEiDgIVFB4CMzI+AjU0LgIBESETMxEHJyE1BiEiLgECNTQSNiQzMhc1ITcXESMDIRElFwcDL2uteUJCea1rYJxuPDxvmwH8Afx/KR++/YWn/vOR+bZocMEBAZD/mwJmvx8pf/4YAaAnFQVvYK7ykpPvql1eq/CSkvKtX/1H/bQBJ/5vGxuqw2jAAR+vrgEkxGy4nBoa/m4BJ/3NFA6FAAMAXP/nBtMEGQASADwARwGduAAwK7oADAA4ADMrQQMAoAA4AAFdQQMAzwA4AAFdQQMAbwA4AAFxQQMADwA4AAFxQQMALwA4AAFdQQMAMAA4AAFdQQMAEAA4AAFduAA4ELgAAtBBAwBwAAwAAV1BAwDgAAwAAV1BAwAPAAwAAXFBBQCQAAwAoAAMAAJdQQMAMAAMAAFdQQMAEAAMAAFduAAMELgAINC4AELQugAWAAwAQhESObgADBC4AEPcQQMAwABDAAFdQQMAYABDAAFdQQMAkABDAAFdQQMA8ABDAAFdQQMAAABDAAFxQQMAIABDAAFxuAAe0LoAKAAMAB4REjm6ADAADAAgERI5ALgAE0VYuAATLxu5ABMAHz5ZuAATRVi4ABkvG7kAGQAfPlm4ABNFWLgAMy8buQAzABk+WbgAE0VYuAAtLxu5AC0AGT5ZuAAzELkABwAU9EEDAJsADAABXbgAExC5ABEAFPS6ABYAGQAtERI5ugAgABkALRESObgAIC+4AC0QuQAlABb0ugAwAC0AGRESObgAGRC5AD0AFPS4ACAQuQBCABT0MDEBBhUUHgIzMj4CNTQuAiMiNzIWFz4BMzIeAhUHIR4DMzI3Fw4DIyImJw4BIyIuAjU0PgIFIg4CByE0LgIBbE0nS25HR3BNKSlNcEeOjorOPTm3cFSUb0Ed/XkDQGaDR2l2HytSVl42crE3P86MdLZ+QkN+tgNsOV1DKwgB3Rs1TgNFfchjpXdCQnelY2Kld0NYb2RqaT+AwYIdd6BgKUY8JjAcC3BnZ3BOjsV4eMaNTmEvWH5PUH5YLgABADMEjQJ9BhIACQBvuAAwK7gACC+4AAHcALgABi9BAwBeAAYAAV1BAwAfAAYAAXFBAwD/AAYAAV1BBQA/AAYATwAGAAJxQQMAPgAGAAFdQQMAHgAGAAFduAAD0LgABhC4AAncQQMAsAAJAAFduAAF0EEDAFkABQABXTAxARMPAScjBy8BEwGF+AxSxQjHTAz0BhL+phYV4uIVGAFYAAEASATLApEGUAAIACe4ADAruAABL7gAB9wAuAAAL7gAA9y4AAAQuAAE0LgAAxC4AAXQMDEBAz8BFzcfAQMBP/cMUsnKTAzzBMsBWhYV4eEVGP6oAAIAOQSaAd8GQgATACcAdrgAMCu4AAAvuAAK3LgAGdC4AAAQuAAj0AC4AA8vQQMAAAAPAAFxQQMAXwAPAAFdQQMAHwAPAAFdQQMATwAPAAFxQQMAPwAPAAFdQQMA4AAPAAFdQQMAwAAPAAFduAAF3LgADxC5ABQAFPS4AAUQuQAeABT0MDETND4CMzIeAhUUDgIjIi4CFzI+AjU0LgIjIg4CFRQeAjkiOk0qKk06IiI5TSsqTToi0RgpHxERHykYFygeEREeKAVvK0w6IiI6TSorTTsiIjpNRREeKBgXKB4RER4oFxcoHhIAAAEASASgAwgFqgAaAKi4ADAruAAQL7gAA9wAuAAJL0EDAN8ACQABXUEDAB8ACQABXUEDAKAACQABXUEDAIAACQABXbgAANxBBQDgAAAA8AAAAAJdQQUAAAAAABAAAAACcbgACRC4ABbcQQkATwAWAF8AFgBvABYAfwAWAARduAAC0LgAAi+4ABYQuAAM3EEFAO8ADAD/AAwAAl1BBQAPAAwAHwAMAAJxuAAJELgAD9C4AA8vMDEBNjcXDgQHBiYHDgEHJz4ENzYeAgJCVkUrBA0tLkciN8knKEEwKwQPLy9CHiNRPVMFTg9NJQcXPjIvBwpQBwcqLysHGD4xLQUHFSEVAAEAZgIrBIcCpgADACa4ADAruAADL0EFAEAAAwBQAAMAAl24AALcALgAAy+5AAAAFfQwMRMhFSFmBCH73wKmewABAGYCKQc/AqQAAwAmuAAwK7gAAy9BBQBAAAMAUAADAAJduAAC3AC4AAMvuQAAABX0MDETIRUhZgbZ+ScCpHsAAQCaA8MB4wXsABAASbgAMCu4AAYvuAAA3EEJAEAAAABQAAAAYAAAAHAAAAAEXbgACtC4AAYQuAAO3AC4ABNFWLgACS8buQAJAB0+WbgADty4AAPcMDEBFAYjIiY1NDY3Fw4BFRcyFgGaSjVDPqp/IFV1AjVKBD80SF5Jh9YlPih0QRNJAAABAEgDwwGRBewAEABJuAAwK7gABi+4AADcQQkATwAAAF8AAABvAAAAfwAAAARduAAK0LgABhC4AA7cALgAE0VYuAADLxu5AAMAHT5ZuAAO3LgACdwwMRM0NjMyFhUUBgcnPgE1JyImkUo1Qz6qfyBVdQI1SgVvNElfSYfWJD0odEETSQABAHH+uAG6AOEAEABWuAAwK7gABi9BAwAQAAYAAV24AADcQQkATwAAAF8AAABvAAAAfwAAAARduAAK0LgABhC4AA7cALgAE0VYuAAOLxu5AA4AGT5ZuAAD3LgADhC4AAncMDE3NDYzMhYVFAYHJz4BNSciJrpKNUM+qn8gVXUCNUpkNElfSYbXJD4odEESSgACAJoDwwN9BewAEAAhAIq4ADAruAAGL7gAANxBCQBAAAAAUAAAAGAAAABwAAAABF24AArQuAAGELgADty4AAYQuAAX3LgAEdxBCQBAABEAUAARAGAAEQBwABEABF24ABvQuAAXELgAH9wAuAATRVi4AAkvG7kACQAdPlm4AA7cuAAD3LgAFNC4AAkQuAAa0LgADhC4AB/QMDEBFAYjIiY1NDY3Fw4BFRcyFgUUBiMiJjU0NjcXDgEVFzIWAZpKNUM+qn8gVXUCNUoBmUk2Qj+qfyFVdgI2SQQ/NEheSYfWJT4odEETSTY0SF5Jh9YlPih0QRNJAAACAEgDwwMrBewAEAAhAIq4ADAruAAGL7gAANxBCQBPAAAAXwAAAG8AAAB/AAAABF24AArQuAAGELgADty4AAYQuAAX3LgAEdxBCQBPABEAXwARAG8AEQB/ABEABF24ABvQuAAXELgAH9wAuAATRVi4ABQvG7kAFAAdPlm4AAPQuAAUELgAH9y4ABrcuAAJ0LgAHxC4AA7QMDEBNDYzMhYVFAYHJz4BNSciJiU0NjMyFhUUBgcnPgE1JyImAitJNkI/qn8hVnUCNkn+Zko1Qz6qfyBVdQI1SgVvNElfSYfWJD0odEETSTY0SV9Jh9YkPSh0QRNJAAACAHH+tgNUAN8ADwAgAJO4ADAruAADL0EDABAAAwABXbgADdxBCQBPAA0AXwANAG8ADQB/AA0ABF24AAfQuAADELgAC9y4AAMQuAAW3LgAENxBCQBPABAAXwAQAG8AEAB/ABAABF24ABrQuAAWELgAHtwAuAATRVi4AB4vG7kAHgAZPlm4ABPcuAAA0LgAHhC4ABncuAAG0LgAHhC4AAvQMDElMhYVFAYHJz4BNSciJjQ2BTQ2MzIWFRQGByc+ATUnIiYC00I/qn8hVnUCNklJ/h1KNUM+qn8gVXUCNUrfX0mG1yQ+KHRBEkpqSH00SV9JhtckPih0QRJKAAABAM0CAALTBAYAEwAXuAAwK7gAAC+4AArcALgADy+4AAXcMDETND4CMzIeAhUUDgIjIi4CzSpHXjU0XkcpKkZeNDVeRyoDAjRfRyoqR180NF5GKipHXgAAAQBxAHUCdwQvAAYALrgAMCu4AAQvQQsAEAAEACAABAAwAAQAQAAEAFAABAAFXbgAANAAGbgAAC8YMDEJAQcBNQEXAUwBKyH+GwHlIQJS/j0aAcYtAccbAAEASAB1Ak4ELwAGACe4ADAruAAEL0EDABAABAABXUEDAFAABAABXbgAAdAAGbgAAS8YMDE3CQE3ARUBSAEr/tUgAeb+Go8BwwHCG/45Lf46AAAB/4n/0QLJBcMAAwAcuAAwKwC4AAEvuAATRVi4AAIvG7kAAgAdPlkwMRcjATMOhQK7hS8F8gACAAoCiQLFBgAACgANAJq4ADAruAAHL7gABNC4AAHQuAAHELgAC9AAuAAAL0EHAAAAAAAQAAAAIAAAAANxQQMAYAAAAAFxQQMA4AAAAAFdQQMAoAAAAAFduAAF3EEDAF8ABQABcUEDAD8ABQABcUEDAO8ABQABXboABAAAAAUREjm4AAQvuQABABb0uAAEELgAB9C4AAEQuAAL0LgACdC4AAAQuAAM0DAxAREXFSMVIzUhNQEDEQECUHV1nP5WAc8l/tEGAP22FF67u3QCSP22AXv+hQAAAf/w/+cExQXfACkA/7gAMCu6AAwAAgAzK0EDAA8AAgABcbgAAhC4AAfQQQMAUAAMAAFdQQMAMAAMAAFduAAMELgAD9C4AAIQuAAY0LgAFNC6ABUAGAAMERI5ugAaABgADBESObgAGBC4ABzQuAAMELgAItC4AAIQuAAo0AC4ABNFWLgACi8buQAKAB0+WbgAE0VYuAAlLxu5ACUAGT5ZugAVAAoAJRESObgAFS+4ABrcQQcAMAAaAEAAGgBQABoAA124AAHQuAAVELkAFgAV9LgABNC4ABUQuAAH0LgAChC4AA7cuAAKELkAEQAW9LgAGhC5ABsAFfS4ACUQuQAfABX0uAAbELgAKNAwMQMzNTQ3IzUzEgAzMhcDIxEmIyICByEHIR0BIQchHgEzMjcXDgEjIgADIxCLAo2bMwF19r7Dbz1igafcIQHrFP4cAbsV/mch8LqsvhlT6nr//qctmQKmNyMSewELAUcz/osBESn+99t7NTd74+RjNk5cATkBCwABAGYCYASPAuMAAwAguAAwK7gAAi+4AAHcALgAAi+4AAPcQQMAAAADAAFxMDEBFSE1BI/71wLjg4MAAf/jBjEBngdWAAQAc7gAMCu4AAMvQQMAkAADAAFduAAA3AC4AAEvQQUAjgABAJ4AAQACXUEDAF8AAQABcUEDAB8AAQABXUEFAM8AAQDfAAEAAl1BBwBOAAEAXgABAG4AAQADXUEDAAAAAQABcbgABNxBBQAfAAQALwAEAAJdMDEBByUnNwGeI/5uBnUGbTyWKWYAAAEAIQYxAdsHVgAEAHO4ADAruAAEL0EDAD8ABAABXbgAAdwAuAADL0EHAE8AAwBfAAMAbwADAANdQQUAzwADAN8AAwACXUEDAF8AAwABcUEFAI8AAwCfAAMAAl1BAwAfAAMAAV1BAwAAAAMAAXG4AADcQQUAHwAAAC8AAAACXTAxARcHBScBZnUG/m8jB1ZmKZY8AAABACEGJQK4B28ACQBxuAAwK7gACC+4AAPcALgABy9BBQCPAAcAnwAHAAJdQQMAXwAHAAFxQQUAzwAHAN8ABwACXUEJAD8ABwBPAAcAXwAHAG8ABwAEXUEDAB8ABwABXbgAANxBAwAfAAAAAV24AAcQuAAE0LgAABC4AAbQMDEBMwEVByUjBSc1ATNzARJD/vwJ/vxDB2/++xwpzc0pHAAAAgBiBmACwwdMABMAJwB5uAAwK7gAAC+4AArcQQUAYAAKAHAACgACXbgAABC4ABTcuAAe3EEHAFAAHgBgAB4AcAAeAANdALgADy9BBQBPAA8AXwAPAAJxQQMAHwAPAAFdQQUAXwAPAG8ADwACXUEDAOAADwABXbgABdy4ABnQuAAPELgAI9AwMRM0PgIzMh4CFRQOAiMiLgIlND4CMzIeAhUUDgIjIi4CYhEgKhoYKx8TEx8rGBoqIBEBdRMgKxkZKx8SEh8rGRkrIBMG1xkqIBISICoZGisgEhIgKxoZKiASEiAqGRorIBISICsAAAEAQgY+Aw4HIgAcAKC4ADAruAASL0EDAIAAEgABXbgABNwAuAAJL0EHAE8ACQBfAAkAbwAJAANdQQMA3wAJAAFdQQUAjwAJAJ8ACQACXUEDAB8ACQABXUEFAAAACQAQAAkAAnG4AADcQQMA4AAAAAFduAAJELgAF9xBBQBNABcAXQAXAAJduAAD0LgAAy+4ABcQuAAO3EEDAO8ADgABXbgACRC4ABHQuAARLzAxAT4BNxcOAyMiLgIjIgYHJz4DMzIeAjICRCNVKycTMj5IKiNEQT4dLU02JBU1PUMkIz4+PykGywMhKikcPzQjHCEcKSovGz01IhwhGwABAAAA3gGcAAcAUwAEAAIAOABHADoAAAIAC8QAAwABAAAB1QHVAdUB1QHVAiECWQLaA8sEdAU6BVoFjgXBBiEGVQadBsYG+gcPB6AH4giMCU0JwQplCxoLZwxQDQcNaA3WDgMOLg5eDvEP6BBWEPkRjhH+EqMTJxPCFFsUnhTbFXAVzBaSFyAXqBg0GQkZqxpRGs4bThuZHAIciRzoHVsdgx2ZHcMd8B4ZHlcfCx+XICkgyyF6IhkjUiPdJGolASWiJesmxCdVJ/wopSlFKcMqkCr/K5gr6SxMLNAtIi20LgsuKS6FLtYu3i8oL8wwxDEiMbgx4jLCMzk0DDSiNOM1FjU0NiI2PDabNvE3QTfaOBo4tDj7OSE5VzmhOg86SjqQOtk7DjuQO6k7yTvyPA08Mzx2PSw9+D4NPig+PT5RPnc+qz7ePvY/mD+xP88/6EAOQCtAU0CkQXlBkkGeQbNB10H3QpBDZkN7Q5BDtkPOQ+hEDkU6RgBGFUZARnFGikalRsdG6EcNR6pHyUfiR/1ILkhJSHFI0UmNSaJJwEniSf9KFErBSt5LI0w5TW1NvE3mTltO3E78TxxPX0+hT+lQZFDfUV1RiVG1Ud5R+VJkUyVTQlOMU9ZUJlSdVRoAAAABAAAAAQDFerZfHF8PPPUAGwgAAAAAAMsDk3oAAAAA1SvMxP+B/c0NiQegAAAACAACAAAAAAAAB7gAQQAAAAAAAAAAAAAAAAK4AAACLwCaArAAaAV9AGYEgwCFBuUAmgYEAGoBkQBoApMAqAKTAB4DVAAUBPYAZgIrAHEDSABmAd0AcQNY/5oFCABxA28ASASFAFwEOQBxBH0ACgQdAFwEvABxBDsAHwR9AFwEvABIAi8AmgJ9AJoEzQBcBPgAZgTNAJoD9gBxB2YAewWs//wFSABxBUQAewYOAHEFAABxBK4AcQW4AHsGwQBxAxQAcQKoABAFlgBxBOEAcQdgAFwGMwBIBkYAewT0AHEGWgB7BaQAcQRgAGYE4wAKBjEAXgVq/80H5f/NBcUADgT0/80E/AA9ApgAzQNY/5oCgwAfBFQAmAQS//oBxf+BBD0AXAScAAoD7gBcBM8AXAQhAFwDJQBIBJoAZgUdAD0CuABSAhAAJQSoAD0CuAA9B5YASAUnAEgEiwBcBJwACgScAFwDZgBIA7oAewNCAB0FCAAfBD3/4QYU/+EEHf/fBGj/1wOkAFwDSACaAfQAuANIAEgGfwCaArgAAAIvAJkEIQBxBHEAUgRxAHEEy//NAfQAuARmANsDMwBtBpwAcQP6AFwDyQBxBSkAZgOaAGYGnABxBBL/+gL+AE4E9gBmAtcATgMbAHEB3wB5BQwAGQR9AHEB5wB1Ar4AjwLHAEgEHQBcA8kASAd9AIUHrgCFBysAhQP2AD0FrP/8Baz//AWs//wFrP/8Baz//AWs//wHyQAfBUQAewUAAHEFAABxBQAAcQUAAHEDFABxAxQAcQN5AHEDQgBwBj0AcQYzAEgGRgB7BkYAewZGAHsGRgB7BkYAewT2ANMGWgB7BjEAXgYxAF4GMQBeBjEAXgT0/80FLQBxBKQAKQQ9AFwEPQBcBD0AXAQ9AFwEPQBcBD0AXAZoAFwD4wBcBBcAXAQXAFwEFwBcBBcAXALlAFICuABSAtsAUgLpAFMEqACcBScASASLAFwEiwBcBIsAXASLAFwEiwBcBPgAZgSLAFwFCAAfBQgAHwUIAB8FCAAfBGj/1wScAAoEaP/XArgAUgigAHsHGwBcArAAMwLRAEgCEAA5A1QASATuAGYHpgBmAisAmgIrAEgCKwBxA8UAmgPFAEgDxQBxA4EAzQK+AHECvgBIAtv/iQM3AAoFAv/wBPYAZgIE/+MCBAAhAtcAIQMCAGIDEABCAAEAAAfm/c0AAA3w/4H/mg2JAAEAAAAAAAAAAAAAAAAAAADeAAMEdgGQAAUAAAUzBMwAAACZBTMEzAAAAswAZgJmAAACAAAAAAAAAAAAgAAAJwAAAAIAAAAAAAAAACAgICAAQAAgIhIH5v3NAAAH5gIzAAAAAQAAAAAEAAXDAAAAIAAAAAEAAgACAQEBAQEAAAAAEgXmAPgI/wAIAAj//gAJAAr//gAKAAr//QALAAv//QAMAAz//QANAA3//AAOAA7//AAPAA7//AAQABD//AARABH/+wASABH/+wATABL/+wAUABP/+gAVABT/+gAWABX/+gAXABb/+gAYABb/+QAZABf/+QAaABj/+QAbABr/+QAcABv/+AAdABz/+AAeABz/+AAfAB3/9wAgAB7/9wAhACD/9wAiACH/9wAjACH/9gAkACL/9gAlACP/9gAmACX/9gAnACX/9QAoACb/9QApACf/9QAqACj/9AArACn/9AAsACr/9AAtACv/9AAuACz/8wAvAC3/8wAwAC7/8wAxAC7/8wAyADD/8gAzADH/8gA0ADL/8gA1ADL/8QA2ADP/8QA3ADT/8QA4ADb/8QA5ADf/8AA6ADf/8AA7ADj/8AA8ADn/7wA9ADr/7wA+ADz/7wA/ADz/7wBAAD3/7gBBAD3/7gBCAD//7gBDAED/7gBEAEH/7QBFAEL/7QBGAEP/7QBHAET/7ABIAET/7ABJAEX/7ABKAEf/7ABLAEj/6wBMAEn/6wBNAEn/6wBOAEr/6wBPAEv/6gBQAE3/6gBRAE7/6gBSAE7/6QBTAE//6QBUAE//6QBVAFL/6QBWAFL/6ABXAFP/6ABYAFT/6ABZAFT/6ABaAFb/5wBbAFf/5wBcAFj/5wBdAFn/5gBeAFn/5gBfAFv/5gBgAFv/5gBhAF3/5QBiAF3/5QBjAF//5QBkAGD/5QBlAGD/5ABmAGH/5ABnAGL/5ABoAGT/4wBpAGT/4wBqAGX/4wBrAGb/4wBsAGb/4gBtAGn/4gBuAGn/4gBvAGr/4QBwAGr/4QBxAGv/4QByAG3/4QBzAG7/4AB0AG//4AB1AG//4AB2AHD/4AB3AHH/3wB4AHP/3wB5AHT/3wB6AHT/3gB7AHX/3gB8AHb/3gB9AHf/3gB+AHj/3QB/AHn/3QCAAHv/3QCBAHv/3QCCAHz/3ACDAHz/3ACEAH7/3ACFAID/2wCGAID/2wCHAIH/2wCIAIH/2wCJAIL/2gCKAIT/2gCLAIX/2gCMAIb/2gCNAIb/2QCOAIf/2QCPAIj/2QCQAIr/2ACRAIr/2ACSAIv/2ACTAIz/2ACUAI3/1wCVAI7/1wCWAI//1wCXAJD/1gCYAJD/1gCZAJL/1gCaAJP/1gCbAJP/1QCcAJX/1QCdAJb/1QCeAJf/1QCfAJf/1ACgAJj/1AChAJr/1ACiAJv/0wCjAJz/0wCkAJz/0wClAJ3/0wCmAJ3/0gCnAKD/0gCoAKH/0gCpAKH/0gCqAKL/0QCrAKL/0QCsAKT/0QCtAKb/0ACuAKb/0ACvAKf/0ACwAKf/0ACxAKn/zwCyAKn/zwCzAKv/zwC0AKz/zwC1AKz/zgC2AK7/zgC3AK7/zgC4AK//zQC5ALD/zQC6ALL/zQC7ALP/zQC8ALP/zAC9ALT/zAC+ALT/zAC/ALf/ywDAALf/ywDBALj/ywDCALn/ywDDALn/ygDEALz/ygDFALz/ygDGAL3/ygDHAL3/yQDIAL7/yQDJAMD/yQDKAMH/yADLAML/yADMAML/yADNAMP/yADOAMT/xwDPAMX/xwDQAMf/xwDRAMf/xwDSAMj/xgDTAMn/xgDUAMr/xgDVAMv/xQDWAMz/xQDXAM7/xQDYAM7/xQDZAM//xADaAM//xADbAND/xADcANP/xADdANP/wwDeANT/wwDfANT/wwDgANX/wgDhANb/wgDiANj/wgDjANn/wgDkANn/wQDlANr/wQDmANv/wQDnANz/wADoAN3/wADpAN7/wADqAN//wADrAOD/vwDsAOH/vwDtAOL/vwDuAOP/vwDvAOP/vgDwAOX/vgDxAOb/vgDyAOb/vQDzAOj/vQD0AOn/vQD1AOr/vQD2AOv/vAD3AOv/vAD4AOz/vAD5AO7/vAD6AO//uwD7AO//uwD8APD/uwD9APH/ugD+APL/ugD/APT/ugD4CP8ACAAI//4ACQAK//4ACgAK//0ACwAL//0ADAAM//0ADQAN//wADgAO//wADwAO//wAEAAQ//wAEQAR//sAEgAR//sAEwAS//sAFAAT//oAFQAU//oAFgAV//oAFwAW//oAGAAW//kAGQAX//kAGgAY//kAGwAa//kAHAAb//gAHQAc//gAHgAc//gAHwAd//cAIAAe//cAIQAg//cAIgAh//cAIwAh//YAJAAi//YAJQAj//YAJgAl//YAJwAl//UAKAAm//UAKQAn//UAKgAo//QAKwAp//QALAAq//QALQAr//QALgAs//MALwAt//MAMAAu//MAMQAu//MAMgAw//IAMwAx//IANAAy//IANQAy//EANgAz//EANwA0//EAOAA2//EAOQA3//AAOgA3//AAOwA4//AAPAA5/+8APQA6/+8APgA8/+8APwA8/+8AQAA9/+4AQQA9/+4AQgA//+4AQwBA/+4ARABB/+0ARQBC/+0ARgBD/+0ARwBE/+wASABE/+wASQBF/+wASgBH/+wASwBI/+sATABJ/+sATQBJ/+sATgBK/+sATwBL/+oAUABN/+oAUQBO/+oAUgBO/+kAUwBP/+kAVABP/+kAVQBS/+kAVgBS/+gAVwBT/+gAWABU/+gAWQBU/+gAWgBW/+cAWwBX/+cAXABY/+cAXQBZ/+YAXgBZ/+YAXwBb/+YAYABb/+YAYQBd/+UAYgBd/+UAYwBf/+UAZABg/+UAZQBg/+QAZgBh/+QAZwBi/+QAaABk/+MAaQBk/+MAagBl/+MAawBm/+MAbABm/+IAbQBp/+IAbgBp/+IAbwBq/+EAcABq/+EAcQBr/+EAcgBt/+EAcwBu/+AAdABv/+AAdQBv/+AAdgBw/+AAdwBx/98AeABz/98AeQB0/98AegB0/94AewB1/94AfAB2/94AfQB3/94AfgB4/90AfwB5/90AgAB7/90AgQB7/90AggB8/9wAgwB8/9wAhAB+/9wAhQCA/9sAhgCA/9sAhwCB/9sAiACB/9sAiQCC/9oAigCE/9oAiwCF/9oAjACG/9oAjQCG/9kAjgCH/9kAjwCI/9kAkACK/9gAkQCK/9gAkgCL/9gAkwCM/9gAlACN/9cAlQCO/9cAlgCP/9cAlwCQ/9YAmACQ/9YAmQCS/9YAmgCT/9YAmwCT/9UAnACV/9UAnQCW/9UAngCX/9UAnwCX/9QAoACY/9QAoQCa/9QAogCb/9MAowCc/9MApACc/9MApQCd/9MApgCd/9IApwCg/9IAqACh/9IAqQCh/9IAqgCi/9EAqwCi/9EArACk/9EArQCm/9AArgCm/9AArwCn/9AAsACn/9AAsQCp/88AsgCp/88AswCr/88AtACs/88AtQCs/84AtgCu/84AtwCu/84AuACv/80AuQCw/80AugCy/80AuwCz/80AvACz/8wAvQC0/8wAvgC0/8wAvwC3/8sAwAC3/8sAwQC4/8sAwgC5/8sAwwC5/8oAxAC8/8oAxQC8/8oAxgC9/8oAxwC9/8kAyAC+/8kAyQDA/8kAygDB/8gAywDC/8gAzADC/8gAzQDD/8gAzgDE/8cAzwDF/8cA0ADH/8cA0QDH/8cA0gDI/8YA0wDJ/8YA1ADK/8YA1QDL/8UA1gDM/8UA1wDO/8UA2ADO/8UA2QDP/8QA2gDP/8QA2wDQ/8QA3ADT/8QA3QDT/8MA3gDU/8MA3wDU/8MA4ADV/8IA4QDW/8IA4gDY/8IA4wDZ/8IA5ADZ/8EA5QDa/8EA5gDb/8EA5wDc/8AA6ADd/8AA6QDe/8AA6gDf/8AA6wDg/78A7ADh/78A7QDi/78A7gDj/78A7wDj/74A8ADl/74A8QDm/74A8gDm/70A8wDo/70A9ADp/70A9QDq/70A9gDr/7wA9wDr/7wA+ADs/7wA+QDu/7wA+gDv/7sA+wDv/7sA/ADw/7sA/QDx/7oA/gDy/7oA/wD0/7oAAAAAACgAAADgCQoJAAAAAwIDBgUIBwIDAwQGAgQCBAYEBQUFBQUFBQUCAwUGBQQIBgYGBwYFBggDAwYFCAcHBgcGBQYHBgkGBgYDBAMFBQIFBQQFBQQFBgMDBQMJBgUFBQQEBAYFBwUFBAQCBAcDAgUFBQUCBQQHBAQGBAcFAwYDBAIGBQIDAwUECAkIBAYGBgYGBgkGBgYGBgMDBAQHBwcHBwcHBgcHBwcHBgYFBQUFBQUFBwQFBQUFAwMDAwUGBQUFBQUGBQYGBgYFBQUDCggDAwIEBgkCAgIEBAQEAwMDBAYGAgIDAwMKCwoAAAADAwMHBgkIAgMDBAYDBAIEBgQGBQYFBgUGBgMDBgYGBQkHBwcIBgYHCAQDBwYJCAgGCAcFBggHCgcGBgMEAwUFAgUGBQYFBAYGAwMGAwkGBgYGBAUEBgUIBQYFBAIECAMDBQYGBgIGBAgFBQYFCAUEBgQEAgYGAgMDBQUJCgkFBwcHBwcHCgcGBgYGBAQEBAgICAgICAgGCAgICAgGBgYFBQUFBQUIBQUFBQUEAwQEBgYGBgYGBgYGBgYGBgYGBgMLCQMEAwQGCgMDAwUFBQQDAwQEBgYDAwQEBAsMCwAAAAQDBAgGCQgCBAQFBwMFAwUHBQYGBgYHBgYHAwMHBwcFCggHBwgHBggJBAQIBwoJCQcJCAYHCQcLCAcHBAUDBgYCBgYFBwYEBgcEAwYECgcGBgYFBQQHBgkGBgUFAwUJBAMGBgYHAwYECQUFBwUJBgQHBAQDBwYDBAQGBQoLCgUICAgICAgLBwcHBwcEBAUECQkJCQkJCQcJCQkJCQcHBgYGBgYGBgkFBgYGBgQEBAQGBwYGBgYGBwYHBwcHBgYGBAwKBAQDBQcLAwMDBQUFBQQEBAQHBwMDBAQEDA0MAAAABAMECAcKCQIEBAUHAwUDBQgFBwYHBgcGBwcDBAcHBwYLCQgICQgHCQoFBAgHCwkJBwkIBwcJCAwJBwcEBQQHBgMGBwYHBgUHCAQDBwQLCAcHBwUGBQgGCQYHBQUDBQoEAwYHBwcDBwUKBgYIBQoGBAcEBQMIBwMEBAYGCwwLBgkJCQkJCQwICAgICAUFBQUJCQkJCQkJBwkJCQkJBwgHBgYGBgYGCgYGBgYGBAQEBAcIBwcHBwcHBwgICAgHBwcEDQsEBAMFBwsDAwMGBgYFBAQEBQgHAwMEBQUNDg0AAAAEBAQJBwsKAwQEBQgEBQMFCAYHBwcHCAcHCAQECAgIBgwJCQkKCAgJCwUECQgMCgoICgkHCAoJDQkICAQFBAcHAwcHBggHBQcIBAMIBAwIBwcHBgYFCAcKBwcGBQMFCwQEBwcHCAMHBQsGBggGCwcFCAUFAwgHAwQFBwYMDAwGCQkJCQkJDQkICAgIBQUGBQoKCgoKCgoICgoKCgoICAgHBwcHBwcKBgcHBwcFBAUFCAgHBwcHBwgHCAgICAcHBwQODAQFAwUIDAQEBAYGBgYEBAUFCAgDAwUFBQ4PDgAAAAUEBQoIDAsDBQUGCQQGAwYJBggHCAcIBwgIBAQICQgHDQoJCQsJCAoMBQUKCQ0LCwkLCggJCwkOCgkJBQYECAcDBwgHCAcGCAkFBAgFDQkICAgGBwYJBwsHCAYGAwYLBQQHCAgIAwgGDAcHCQYMBwUJBQUDCQgDBQUHBw0NDQcKCgoKCgoOCQkJCQkFBQYGCwsLCwsLCwkLCwsLCwkJCAcHBwcHBwsHBwcHBwUFBQUICQgICAgICQgJCQkJCAgIBQ8MBQUEBgkNBAQEBwcHBgUFBQYJCQQEBQUFDxAOAAAABQQFCggNCwMFBQYJBAYEBgkGCAgICAkICAkEBQkJCQcOCwoKCwkJCw0GBQoJDgwMCQwLCAkMCg8LCQkFBgUICAMICQcJCAYJCgUECQUOCgkJCQYHBgkICwgIBwYEBgwFBAgICAkECAYMBwcKBwwIBgkFBgQJCAQFBQgHDg4NBwsLCwsLCw8KCQkJCQYGBwYMDAwMDAwMCQwMDAwMCQoJCAgICAgIDAcICAgIBQUFBQkKCQkJCQkJCQkJCQkICQgFEA0FBQQGCQ4EBAQHBwcHBQUFBgkJBAQFBgYQEQ8AAAAFBAULCQ4MAwUFBwoEBwQHCgcJCAkICQgJCQQFCgoKCA8LCwsMCgkLDgYFCwoPDA0KDQsJCgwLEAwKCgUHBQkIBAgJCAoIBgkKBQQJBQ8KCQkJBwcHCggMCAkHBwQHDQUECAkJCgQJBg0ICAoHDQgGCgYGBAoJBAUGCAgPDw4ICwsLCwsLEAsKCgoKBgYHBwwMDQ0NDQ0KDQwMDAwKCgkICAgICAgNCAgICAgGBQYGCQoJCQkJCQoJCgoKCgkJCQURDgUGBAcKDwQEBAgICAcFBQYGCgoEBAYGBhESEAAAAAYFBgwKDw0DBQUHCwUHBAcLBwoJCgkKCQoKBQUKCwoIEAwLCw0LCgwOBwYMChANDQsODAkKDQwRDAsLBgcFCQkECQoICgkHCgsGBQoGEAsKCgoHCAcLCQ0JCQgHBAcOBgUJCQkKBAkHDggICwgOCQYLBgcECwoEBgYJCBAQDwgMDAwMDAwRCwsLCwsHBwcHDQ0NDQ0NDQsODQ0NDQsLCgkJCQkJCQ4ICQkJCQYGBgYKCwoKCgoKCwoLCwsLCQoJBhIPBgYEBwoQBQUFCAgIBwYGBgcLCwQEBgYHEhMRAAAABgUGDAoQDgQGBgcLBQcECAsICgoKCQsKCgsFBgsLCwkRDQwMDgsLDQ8HBg0LEQ4OCw4NCgsODBINCwsGCAYKCQQKCgkLCQcKDAYFCgYRDAoKCggIBwsKDgkKCAcEBw8GBQkKCgsECgcPCQkMCA8JBwsGBwQLCgQGBgkJEREQCQ0NDQ0NDRIMCwsLCwcHCAcODg4ODg4OCw4ODg4OCwwKCgoKCgoKDgkJCQkJBwYGBwoMCgoKCgoLCgsLCwsKCgoGExAGBgUHCxEFBQUICAgIBgYGBwsLBQUGBwcTFBIAAAAGBQYNCxAOBAYGCAwFCAQIDAgLCgsKCwoLCwUGCwwLCRINDQ0ODAsOEAcGDQwSDw8MDw0KDA8NEw4MDAYIBgoKBAoLCQsKBwsMBgULBhIMCwsLCAkIDAoOCgoJCAUIDwYFCgsLCwUKCBAJCQwJEAoHDAcHBAwLBQcHCgkSEhEJDQ0NDQ0NEg0MDAwMBwcICA8PDw8PDw8MDw8PDw8MDAsKCgoKCgoPCQoKCgoHBgcHCwwLCwsLCwwLDAwMDAoLCgYUEQYHBQgMEgUFBQkJCQgHBwcIDAwFBQcHBxQWEwAAAAcFBw4LEQ8EBgYIDAUIBQgNCQsLCwoMCwsMBQYMDAwKEw4NDQ8NDA4RCAcODBIQEAwQDgsMDw4UDgwMBggGCwoECwwKDAoIDA0HBQwHEw0LDAwJCQgMCw8KCwkIBQgQBwUKCwsMBQsIEQoJDQkRCgcMBwgFDQsFBwcKCRMTEgoODg4ODg4TDQ0NDQ0ICAkIEBAQEBAQEAwQDw8PDwwNDAsLCwsLCxAKCgoKCgcHBwcMDQsLCwsLDAsMDAwMCwwLBxYSBwcFCAwTBQUFCQkJCQcHBwgNDAUFBwgIFRcUAAAABwYHDgwSEAQHBwkNBgkFCQ0JDAsMCwwLDAwGBw0NDQoTDw4OEA0MDxIIBw8NExAQDREPCw0QDhUPDQ0HCQcLCwULDAoNCwgMDQcFDAcUDgwMDAkKCA0LEAsMCgkFCREHBgsMDA0FDAgRCgoOCRELCA0HCAUNDAUHBwsKFBQTCg8PDw8PDxQODQ0NDQgICQkQEBAQEBAQDREQEBAQDQ4MCwsLCwsLEQoLCwsLCAcICAwODAwMDAwNDA0NDQ0MDAwHFxMHBwUJDRQGBgYKCgoJBwcICA0NBQUHCAgWGBUAAAAHBgcPDBMRBAcHCQ4GCQUJDgkMDAwLDQwMDQYHDQ4NCxQQDw4RDg0QEwgHDw0UEREOERAMDREPFhAODgcJBwwLBQwNCw0LCQ0OBwYNBxUODQ0NCQoJDgwRCwwKCQUJEgcGCwwMDQUMCRILCg4KEgsIDggJBQ4MBQgICwoVFRQLEBAQEBAQFQ4ODg4OCAgKCREREREREREOEREREREODg0MDAwMDAwSCwsLCwsIBwgIDQ4NDQ0NDQ4NDg4ODgwNDAcYFAcIBgkOFQYGBgoKCgoICAgJDg4GBggICBcZFgAAAAgGCBANFBEFBwcKDgYJBQoOCg0MDQwODA0OBgcODg4LFRAPDxEODRATCQgQDhUSEg4SEA0OEhAXEQ4OBwoHDAwFDA0LDgwJDQ8IBg0IFg8NDQ0KCwkODBEMDQoJBgkTCAYMDQ0OBg0JEwsLDwoTDAkOCAkFDw0FCAgMCxYWFQsQEBAQEBAWDw4ODg4JCQoJEhISEhISEg4SEhISEg4PDQwMDAwMDBILDAwMDAgICAgNDw0NDQ0NDg0ODg4ODQ0NCBkUCAgGCg4WBgYGCwsLCggICAkODgYGCAkJGBoXAAAACAcIEA4VEgUICAoPBwoGCg8KDg0NDA4NDQ4HBw4PDgwWERAQEg8OERQJCBEPFhMTDxMRDQ8TEBgRDw8ICggNDAUMDgwODAkODwgGDggXDw4ODgoLCg8NEgwNCwoGChMIBwwNDQ4GDQoUDAsPCxQMCQ8JCQYPDQYICAwLFhcWDBERERERERcQDw8PDwkJCgoTExMTExMTDxMTExMTDxAODAwMDAwMEwwMDAwMCQgJCQ4PDg4ODg4PDg8PDw8NDg0IGhUICAYKDxcHBwcLCwsLCAgJCg8PBgYJCQkZGxgAAAAJBwgRDhYTBQgIChAHCgYKEAsODQ4NDw0ODwcIDxAPDBcSERATEA8SFQoIEQ8XExQPFBIODxMRGRIPEAgKCA4NBg0ODA8NCg4QCAYPCBgQDg4OCwwKEA0TDQ4LCgYKFAkHDQ4ODwYOChUMDBALFQ0JEAkKBhAOBgkJDQwXGBYMEhISEhISGBAQEBAQCgoLChQTFBQUFBQQFBMTExMPEA8NDQ0NDQ0UDA0NDQ0JCAkJDxAODg4ODhAOEBAQEA4ODggbFggJBgoPGAcHBwwMDAsJCQkKEBAGBgkJChocGQAAAAkHCRIPFhQFCAgLEAcLBgsQCw8ODw0PDg8PBwgQEBANGBIRERQQDxMWCgkSEBgUFBAUEg4QFBIaExAQCAsIDg0GDg8NEA0KDxEJBw8JGREPDw8LDAsQDhQNDgwLBgsVCQcNDg4QBg4KFQ0MEQwVDQoQCQoGEA8GCQkNDBgZFw0SEhISEhIZERAQEBAKCgsLFBQUFBQUFBAUFBQUFBARDw4ODg4ODhUNDQ0NDQkJCQkPEQ8PDw8PEA8QEBAQDg8OCRwXCQkHCxAZBwcHDAwMCwkJCQoQEAcHCQoKGx0aAAAACQcJEw8XFAUJCQsRBwsGCxEMDw4PDhAODxAHCBAREA0ZExISFBEQExcKCRMQGRUVERUTDxEVEhsTEREJCwgPDgYOEA0QDgsQEQkHEAkaEQ8QEAsNCxEOFQ4PDAsHCxYJBw4PDxAHDwsWDQ0RDBYOChEKCgYRDwYJCQ4NGRoYDRMTExMTExoSEREREQoKDAsVFRUVFRUVERUVFRUVEREQDg4ODg4OFg0ODg4OCgkKChARDw8PDw8RDxEREREPEA8JHRgJCgcLERoHBwcNDQ0MCQkKCxERBwcKCgocHhsAAAAKCAkTEBgVBQkJDBEICwcMEgwQDxAOEQ8QEQgJERERDhoUEhIVEhAUGAsJFBEaFhYRFhQPERYTHBQREQkMCQ8OBg8QDhEOCxASCgcQChsSEBAQDA0LEg8VDg8NCwcLFwoIDhAQEQcPCxcODRINFw4KEQoLBxIQBwoKDg0aGxkOFBQUFBQUGxISEhISCwsMCxYWFhYWFhYRFhYWFhYREhAPDw8PDw8WDg4ODg4KCgoKEBIQEBAQEBEQEhISEg8QDwoeGQkKBwwRGwgICA0NDQwKCgoLEhEHBwoLCx0fHAAAAAoIChQQGRYGCQkMEggMBwwSDBAPEA8RDxARCAkREhEOGxUTExYSERUYCwoUEhsWFxIXFBASFhQdFRISCQwJEA8GDxEOEQ8LERMKBxEKHBMQEREMDgwSDxYPEA0MBwwYCggPEBARBxAMGA4OEw0YDwsSCgsHEhAHCgoPDhscGg4VFRUVFRUcExISEhILCw0MFxYXFxcXFxIXFhYWFhITEQ8PDw8PDxcODw8PDwsKCgsRExAQEBAQEhASEhISEBEQCh8aCgoHDBIcCAgIDg4ODQoKCgwSEgcHCgsLHiAdAAAACggKFREaFwYKCgwTCAwHDRMNERARDxIQERIICRITEg8cFRQUFxMSFRkMChUSHBcYExgVEBIXFB4WExMKDQkQDwcQEQ8SDwwREwoIEQocExEREQ0ODBMQFw8RDgwHDBgKCA8RERIHEQwZDw4TDhkPCxMLDAcTEQcKCg8OHB0bDxUVFRUVFR0UExMTEwwMDQwXFxgYGBgYExgXFxcXExMREBAQEBAQGA8PDw8PCwoLCxETERERERETERMTExMREREKIBsKCwgMEh0ICAgODg4NCgoLDBMTCAgLCwsfIR4AAAALCAoVERsXBgoKDRMIDQcNFA0SEBEQEhAREggKExMTDx0WFBQXExIWGgwKFhMdGBgTGRYRExgVHxYTEwoNChEQBxASDxMQDBIUCwgSCx0UEhISDQ4NFBAYEBEODQgNGQsIEBEREwgRDBoPDxQOGhAMEwsMBxQRBwsLEA8dHhwPFhYWFhYWHhQTExMTDAwNDRgYGBgYGBgTGRgYGBgTFBIQEBAQEBAZDxAQEBALCwsLEhQSEhISEhMSFBQUFBESEQshHAoLCA0THggICA8PDw4LCwsMExMICAsMDCAjHwAAAAsJCxYSHBgGCgoNFAkNBw0UDhIREhATERITCQoTFBMQHhcVFRgUExcbDAsWFB4ZGRQZFxIUGRYgFxQUCg0KERAHERIQExENEhQLCBMLHhUSEhIODw0UERgQEg8NCA0aCwkREhITCBINGhAPFQ4aEAwUCwwHFBIICwsQDx4fHRAXFxcXFxcfFRQUFBQMDA4NGRkZGRkZGRQZGRkZGRQVExERERERERoQEBAQEAwLCwwTFRISEhISFBIUFBQUEhISCyMcCwsIDRQfCQkJDw8PDgsLCw0UFAgICwwMISQgAAAACwkLFxMcGQYLCw4UCQ4IDhUOExETERQRExQJChQVFBAfFxYWGRUTGBwNCxcUHhoaFBoXEhQaFiEYFBULDgoSEQcRExAUEQ0TFQsJEwsfFRMTEw4PDRURGRESDw4IDhsLCRESEhQIEg0bEBAVDxsRDBQMDQgVEwgLCxEQHyAeEBcXFxcXFyAWFRUVFQ0NDg0aGhoaGhoaFBoaGhoaFBUTERERERERGhARERERDAsMDBMVExMTExMVExUVFRUSExILJB0LDAkOFCAJCQkQEBAOCwsMDRUUCAgMDA0iJSEAAAAMCQsXEx0aBwsLDhUJDggOFQ8TEhMRFBITFAkLFBUUER8YFhYaFRQYHQ0LGBUfGhsVGxgTFRoXIhkVFQsOCxIRCBIUERQSDRQWDAkUDCAWExQUDhAOFRIaERMPDggOHAwJEhMTFAgTDhwREBYPHBENFQwNCBUTCAwMERAgIR4RGBgYGBgYIRYVFRUVDQ0PDhsaGxsbGxsVGxoaGhoVFhQSEhISEhIbEREREREMDAwMFBYTExMTExUTFRUVFRMUEwwlHgsMCQ4VIQkJCRAQEA8MDAwOFRUJCQwNDSMmIgAAAAwKDBgUHhoHCwsPFgkOCA8WDxQSFBIVExQVCgsVFhURIBkXFxoWFBkeDQwYFSAbGxYcGRMVGxgjGRYWCw8LExIIExQRFRIOFBYMCRQMIRcUFBQPEA4WExsSExAOCQ4cDAoSExMVCRMOHRERFxAdEg0WDA4IFhQIDAwSESEiHxEZGRkZGRkiFxYWFhYNDQ8OGxsbGxsbGxYcGxsbGxYXFBMTExMTExwREhISEg0MDQ0UFxQUFBQUFhQWFhYWExQTDCYfDAwJDxYhCQkJEBAQDwwMDQ4WFgkJDA0NJCcjAAAADAoMGRQfGwcMDA8WCg8IDxcPFBMUExUTFBUKCxYWFhIhGhgYGxcVGh4ODBkWIRwcFh0ZFBYcGCQaFhYMDwsTEggTFRIWEw4VFwwJFQwiFxQVFQ8RDxcTGxMUEA8JDx0MChMUFBYJFA4eEhEXEB4SDRYNDggXFAkMDRMRIiMgEhoaGhoaGiMYFxcXFw4OEA8cHBwcHBwcFh0cHBwcFhcVExMTExMTHRESEhISDQwNDRUXFBQUFBQWFBcXFxcUFRQMJyAMDQkPFiIKCgoREREQDAwNDhcWCQkNDg4lKCQAAAANCgwZFSAcBwwMDxcKDwkPFxAVFBUTFhQVFgoMFhcWEiIaGBgcFxYaHw4MGhciHR0XHRoUFx0ZJRsXFwwPDBQTCBQVEhYTDxUYDQoWDSMYFRUVEBEPFxQcExQRDwkPHg0KExUVFgkUDx8SEhgRHxMOFw0OCRcVCQ0NExIjJCESGhoaGhoaJBgXFxcXDg4QDx0dHR0dHR0XHR0dHR0XGBUUFBQUFBQeEhMTExMNDQ0NFhgVFRUVFRcVFxcXFxQVFA0oIQwNCg8XIwoKChERERANDQ0PFxcJCQ0ODiYpJQAAAA0KDRoVIR0HDAwQGAoQCRAYEBUUFRQWFBUWCgwXGBcTIxsZGR0YFhsgDw0bFyMdHhgeGxUXHRomGxgYDBAMFRMIFBYTFxQPFhgNChYNJBgWFhYQEg8YFB0UFREQCRAfDQoUFRUXCRUPHxMSGREfEw4YDQ8JGBUJDQ0UEiQkIhMbGxsbGxslGRgYGBgPDxEPHh0eHh4eHhgeHR0dHRgZFhQUFBQUFB4SExMTEw4NDg4WGBYWFhYWGBYYGBgYFRYVDSkiDQ0KEBckCgoKEhISEQ0NDg8YGAoKDQ4PJyomAAAADQsNGxYiHQgNDRAYCxAJEBkRFhUWFBcVFhcLDBcYFxMkHBoaHhgXHCEPDRsYJB4fGB8cFRgeGiYcGBgNEAwVFAkVFhMXFA8WGQ0KFw0lGRYWFhESEBkVHhQVEhAKECANCxQWFhcKFRAgExIZEiAUDxgODwkZFgkNDhQSJSUjExwcHBwcHCYaGBgYGA8PERAeHh8fHx8fGB8eHh4eGBkXFRUVFRUVHxMUFBQUDg0ODhcZFhYWFhYYFhkZGRkVFhUNKiMNDgoQGCULCwsSEhIRDQ0OEBgYCgoODw8oKycAAAAOCw0bFyIeCA0NERkLEAkRGREXFRYVGBUWGAsMGBkYFCUcGhoeGRcdIg8NHBglHx8ZIBwWGB8bJx0ZGQ0RDRYUCRUXFBgVEBcaDgoXDiYaFxcXERMQGRUeFRYSEAoQIA4LFRYWGAoWECEUExoSIRQPGQ4QCRkWCg4OFRMlJiQUHBwcHBwcJxoZGRkZDw8REB8fHx8fHx8ZIB8fHx8ZGhcVFRUVFRUgExQUFBQODg4PFxoXFxcXFxkXGRkZGRYXFg4rJA0OChEZJgsLCxMTExIODg4QGRkKCg4PDyksKAAAAA4LDhwXIx8IDQ0RGQsRChEaEhcWFxUYFhcYCw0ZGRkUJh0bGx8aGB0jEA4dGSYgIBkhHRYZIBwoHhkaDRENFhUJFhgUGRUQGBoOCxgOJxoXGBgRExEaFh8VFxMRChEhDgsVFxcZChcQIhQTGhIiFQ8ZDxAKGhcKDg4VEyYnJRQdHR0dHR0oGxoaGhoQEBIRICAgICAgIBkhICAgIBkbGBYWFhYWFiEUFRUVFQ8ODw8YGhcXFxcXGRcaGhoaFxgXDiwkDg4LERknCwsLExMTEg4ODxAaGQoKDw8QKi0pAAAADgsOHRgkIAgODhEaCxEKEhoSGBYYFhkWGBkLDRkaGRUnHhwcIBoZHiMQDh0aJyEhGiEeFxohHCkeGhoOEg0XFQkWGBUZFhEYGw4LGA4oGxgYGBIUERoWIBYXExEKESIOCxYXFxkKFxEjFRQbEyMVEBoPEAobGAoODxYUJygmFR4eHh4eHikcGhoaGhAQEhEhISEhISEhGiEhISEhGhsYFhYWFhYWIhQVFRUVDw4PDxgbGBgYGBgaGBoaGhoXGBcOLSUODwsRGigLCwsUFBQSDg4PERoaCwsPEBArLikAAAAPDA4eGCUgCA4OEhsMEgoSGxIYFxgWGRcYGQwNGhsaFSgeHBwhGxkfJBEOHhooISIbIh4YGiEdKh8bGw4SDhcWChcZFRoWERkbDwsZDykcGBkZEhQSGxchFhgUEgsSIw8MFhgYGgsYESQVFBwTJBYQGw8RChsYCg8PFhQoKScVHh4eHh4eKhwbGxsbERETEiIhIiIiIiIbIiEhISEbHBkXFxcXFxciFRYWFhYQDw8QGRwYGBgYGBsYGxsbGxgZGA8uJg4PCxIbKQwMDBQUFBMPDw8RGxsLCw8QECwvKgAAAA8MDx4ZJiEJDg4SGwwSChIcExkXGRcaFxkaDA4aGxoWKR8dHSEcGh8lEQ8fGykiIxsjHxgbIh4rIBsbDhIOGBYKFxkWGhcRGRwPCxoPKhwZGRkTFRIcFyEXGBQSCxIkDwwXGBgaCxgSJBYVHBQkFhAbEBEKHBkKDw8XFSkqJxYfHx8fHx8rHRwcHBwRERMSIiIjIyMjIxsjIiIiIhscGhcXFxcXFyMVFxcXFxAPEBAaHBkZGRkZGxkcHBwcGBkYDy8nDw8LEhsqDAwMFRUVEw8PEBIcGwsLEBERLTErAAAADwwPHxknIgkODhMcDBIKExwTGRgZFxsYGRsMDhscGxYqIB4eIhwaICYRDx8bKSMjHCQgGRsjHiwgHBwPEw4YFwoYGhYbFxIaHQ8MGg8rHRoaGhMVEhwYIhcZFBILEiUPDBcZGRsLGRIlFhUdFCUXERwQEQscGQsPEBcVKisoFiAgICAgICweHBwcHBERFBIjIyMjIyMjHCQjIyMjHB0aGBgYGBgYJBYXFxcXEA8QEBodGhoaGhocGhwcHBwZGhkPMSgPEAwTHCsMDAwVFRUUDw8QEhwcCwsQEREuMiwAAAAQDQ8gGigjCQ8PEx0MEwsTHRQaGBoYGxgaGw0OHB0cFyshHh4jHRshJxIPIBwqJCQcJSAZHCQfLSEcHQ8TDhkXChgbFxwYEhodEAwbECweGhsbFBUTHRgjGBkVEwsTJRANGBoaHAsZEiYXFh4VJhcRHRASCx0aCxAQGBYrLCkXISEhISEhLR4dHR0dEhIUEyQkJCQkJCQdJSQkJCQcHhsYGBgYGBglFhgYGBgREBARGx4aGhoaGh0aHR0dHRkbGRAyKQ8QDBMcLAwMDBYWFhQQEBASHR0MDBAREi8zLQAAABANECAbKSMJDw8UHQ0TCxQeFBsZGhgcGRocDQ8cHRwXKyEfHyQdHCIoEhAhHSskJR0lIRodJCAuIh0dDxQPGRgKGRsXHBgSGx4QDBsQLR4bGxsUFhMeGSQYGhUTCxMmEA0YGhocCxoTJxcWHhUnGBIdERILHhoLEBAYFiwtKhchISEhISEuHx0dHR0SEhQTJSQlJSUlJR0lJCQkJB0eGxkZGRkZGSYXGBgYGBEQEREbHhsbGxsbHRseHh4eGhsaEDMqEBEMFB0tDQ0NFhYWFRAQERMdHQwMERISMDQuAAAAEA0QIRspJAkPDxQeDRQLFB4VGxkbGRwZGxwNDx0eHRgsIiAgJB4cIikSECIdLCUmHiYiGh0lIC8jHh4QFA8aGAsZHBgdGRMcHxAMHBAuHxscHBQWFB4ZJBkaFhQMFCcQDRkbGx0MGhMoGBcfFigYEh4REwseGwsQERkXLS4rGCIiIiIiIi8gHh4eHhISFRQlJSYmJiYmHiYlJSUlHh8cGRkZGRkZJhcZGRkZERARERwfGxsbGxseGx4eHh4aHBoQNCsQEQwUHi4NDQ0XFxcVEBAREx4eDAwREhIAAAACAAAAAwAAABQAAwABAAAAFAAEAJgAAAAiACAABAACAH4A/wExAVMCxwLaAtwgFCAaIB4gIiA6IEQgdCCsIhL//wAAACAAoAExAVICxgLaAtwgEyAYIBwgIiA5IEQgdCCsIhL////k/8P/kv9y/gD97v3t4LfgtOCz4LDgmuCR4GLgK97GAAEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAsAAsIGSwIGBmI7AAUFhlWS2wASwgZCCwwFCwBCZasAtDW1ghIyEbilggsFBQWCGwQFkbILA4UFghsDhZWSCwBUVhZLAoUFghsAVFILAwUFghsDBZGyCwwFBYIGYgiophILAKUFhgGyCwIFBYIbAKYBsgsDZQWCGwNmAbYFlZWRuwACtZWSOwAFBYZVlZLbACLLAHI0KwBiNCsAAjQrAAQ7AGQ1FYsAdDK7IAAQBDYEKwFmUcWS2wAyywAEMgRbAJQ2OwCkNiRC2wBCywAEMgRSCwACsjsQYEJWAgRYojYSBkILAgUFghsAAbsDBQWLAgG7BAWVkjsABQWGVZsAMlI2FERC2wBSywAWAgILANQ0qwAFBYILANI0JZsA5DSrAAUlggsA4jQlktsAYssABDsAIlQrIAAQBDYEKxDQIlQrEOAiVCsAEWIyCwAyVQWLAAQ7AEJUKKiiCKI2GwBSohI7ABYSCKI2GwBSohG7AAQ7ACJUKwAiVhsAUqIVmwDUNHsA5DR2CwgGKwCUNjsApDYiCxAQAVQyBGiiNhOLACQyBGiiNhOLUCAQIBAQFDYEJDYEItsAcsALAII0K2Dw8IAgABCENCQkMgYGCwAWGxBgIrLbAILCBgsA9gIEMjsAFgQ7ACJbACJVFYIyA8sAFgI7ASZRwbISFZLbAJLLAIK7AIKi2wCiwgIEcgsAlDY7AKQ2IjYTgjIIpVWCBHILAJQ2OwCkNiI2E4GyFZLbALLACwARawCiqwARUwLbAMLCA1sAFgLbANLACwAEVjsApDYrAAK7AJQ7AKQ2FjsApDYrAAK7AAFrEAAC4jsABHsABGYWA4sQwBFSotsA4sIDwgR7AJQ2OwCkNisABDYTgtsA8sLhc8LbAQLCA8IEewCUNjsApDYrAAQ2GwAUNjOC2wESyxAgAWJSAusAhDYCBGsAAjQrACJbAIQ2BJiopJI2KwASNCshABARUUKi2wEiywABUgsAhDYEawACNCsgABARUUEy6wDiotsBMssAAVILAIQ2BGsAAjQrIAAQEVFBMusA4qLbAULLEAARQTsA8qLbAVLLARKi2wGiywABawBCWwCENgsAQlsAhDYEmwAStlii4jICA8ijgjIC5GsAIlRlJYIDxZLrEJARQrLbAdLLAAFrAEJbAIQ2CwBCUgLrAIQ2BJILAFI0KwASsgsGBQWCCwQFFYswMgBCAbswMmBBpZQkIjILAIQ2CwDEMgsAhDYIojSSNGYLAFQ7CAYmAgsAArIIqKYSCwA0NgZCOwBENhZFBYsANDYRuwBENgWbADJbCAYmEjICCwBCYjRmE4GyOwDENGsAIlsAhDYLAMQ7AIQ2BJYCCwBUOwgGJgIyCwACsjsAVDYLAAK7AFJWGwBSWwgGKwBCZhILAEJWBkI7ADJWBkUFghGyMhWSMgILAEJiNGYThZiiAgPLAFI0KKOCMgLkawAiVGUlggPFkusQkBFCuwBUMusAkrLbAbLLAAFrAEJbAIQ2CwBCYgLrAIQ2BJsAErIyA8IC4jOLEJARQrLbAYLLEMBCVCsAAWsAQlsAhDYLAEJSAusAhDYEkgsAUjQrABKyCwYFBYILBAUVizAyAEIBuzAyYEGllCQiMgsAhDYEawBUOwgGJgILAAKyCKimEgsANDYGQjsARDYWRQWLADQ2EbsARDYFmwAyWwgGJhsAIlRmE4IyA8IzgbISCwCENgLiA8LyFZsQkBFCstsBcssAwjQrAAEz6xCQEUKy2wGSywABawBCWwCENgsAQlsAhDYEmwAStlii4jICA8ijgusQkBFCstsBwssAAWsAQlsAhDYLAEJSAusAhDYEkgsAUjQrABKyCwYFBYILBAUVizAyAEIBuzAyYEGllCQiMgsAhDYLAMQyCwCENgiiNJI0ZgsAVDsIBiYCCwACsgiophILADQ2BkI7AEQ2FkUFiwA0NhG7AEQ2BZsAMlsIBiYSMgsAMmI0ZhOBsjsAxDRrACJbAIQ2CwDEOwCENgSWAgsAVDsIBiYCMgsAArI7AFQ2CwACuwBSVhsAUlsIBisAQmYSCwBCVgZCOwAyVgZFBYIRsjIVkjILADJiNGYThZIyAgPLAFI0IjOLEJARQrsAVDLrAJKy2wFiywABM+sQkBFCstsB4ssAAWICCwBCawAiWwCENgIyAusAhDYEkjPDgusQkBFCstsB8ssAAWICCwBCawAiWwCENgIyAusAhDYEkjPDgjIC5GsAIlRlJYIDxZLrEJARQrLbAgLLAAFiAgsAQmsAIlsAhDYCMgLrAIQ2BJIzw4IyAuRrACJUZQWCA8WS6xCQEUKy2wISywABYgILAEJrACJbAIQ2AjIC6wCENgSSM8OCMgLkawAiVGUlggPFkjIC5GsAIlRlBYIDxZLrEJARQrLbAiLLAAFiCwDCNCILAIQ2AuICA8Ly6xCQEUKy2wIyywABYgsAwjQiCwCENgLiAgPC8jIC5GsAIlRlJYIDxZLrEJARQrLbAkLLAAFiCwDCNCILAIQ2AuICA8LyMgLkawAiVGUFggPFkusQkBFCstsCUssAAWILAMI0IgsAhDYC4gIDwvIyAuRrACJUZSWCA8WSMgLkawAiVGUFggPFkusQkBFCstsCYssAAWsAMlsAhDYLACJbAIQ2BJsABUWC4gPCMhG7ACJbAIQ2CwAiWwCENgSbgQAGOwBCWwAyVJY7AEJbAIQ2CwAyWwCENgSbgQAGNiIy4jICA8ijgjIVkusQkBFCstsCcssAAWsAMlsAhDYLACJbAIQ2BJsABUWC4gPCMhG7ACJbAIQ2CwAiWwCENgSbgQAGOwBCWwAyVJY7AEJbAIQ2CwAyWwCENgSbgQAGNiIy4jICA8ijgjIVkjIC5GsAIlRlJYIDxZLrEJARQrLbAoLLAAFrADJbAIQ2CwAiWwCENgSbAAVFguIDwjIRuwAiWwCENgsAIlsAhDYEm4EABjsAQlsAMlSWOwBCWwCENgsAMlsAhDYEm4EABjYiMuIyAgPIo4IyFZIyAuRrACJUZQWCA8WS6xCQEUKy2wKSywABawAyWwCENgsAIlsAhDYEmwAFRYLiA8IyEbsAIlsAhDYLACJbAIQ2BJuBAAY7AEJbADJUljsAQlsAhDYLADJbAIQ2BJuBAAY2IjLiMgIDyKOCMhWSMgLkawAiVGUlggPFkjIC5GsAIlRlBYIDxZLrEJARQrLbAqLLAAFiCwCENgsAxDIC6wCENgSSBgsCBgZrCAYiMgIDyKOC6xCQEUKy2wKyywABYgsAhDYLAMQyAusAhDYEkgYLAgYGawgGIjICA8ijgjIC5GsAIlRlJYIDxZLrEJARQrLbAsLLAAFiCwCENgsAxDIC6wCENgSSBgsCBgZrCAYiMgIDyKOCMgLkawAiVGUFggPFkusQkBFCstsC0ssAAWILAIQ2CwDEMgLrAIQ2BJIGCwIGBmsIBiIyAgPIo4IyAuRrACJUZSWCA8WSMgLkawAiVGUFggPFkusQkBFCstsC4sKy2wLyywLiqwARUwLbgAMCxLuAAIUFixAQGOWbgB/4W4AEQduQAIAANfXi24ADEsICBFaUSwAWAtuAAyLLgAMSohLbgAMywgRrADJUZSWCNZIIogiklkiiBGIGhhZLAEJUYgaGFkUlgjZYpZLyCwAFNYaSCwAFRYIbBAWRtpILAAVFghsEBlWVk6LbgANCwgRrAEJUZSWCOKWSBGIGphZLAEJUYgamFkUlgjilkv/S24ADUsSyCwAyZQWFFYsIBEG7BARFkbISEgRbDAUFiwwEQbIVlZLbgANiwgIEVpRLABYCAgRX1pGESwAWAtuAA3LLgANiotuAA4LEsgsAMmU1iwQBuwAFmKiiCwAyZTWCMhsICKihuKI1kgsAMmU1gjIbgAwIqKG4ojWSCwAyZTWCMhuAEAioobiiNZILADJlNYIyG4AUCKihuKI1kguAADJlNYsAMlRbgBgFBYIyG4AYAjIRuwAyVFIyEjIVkbIVlELbgAOSxLU1hFRBshIVktuAAwKwG6AAUAFAAyKwG/ABgAOQAvACQAGgAQAAAAOCsAvwAUAHUAYABKADYAHAAAADgrvwAVAFcARwA4ACwAHAAAADgrvwAWAGIAUAA+AC4AHAAAADgrvwAXAD0AMgAnABwAEQAAADgrALoAGQAFADcruAATIEV9aRhEugCQABsAAXO6AFAAGwABdLoAHwAdAAFzugBfAB0AAXO6AG8AHQABc7oAnwAdAAFzugDPAB0AAXO6AN8AHQABc7oAPwAdAAF0ugBPAB0AAXS6AF8AHQABdLoATwAfAAF0ugB/ACEAAXO6AL8AIQABc7oATwAhAAF0ugA/ACEAAXO5CAAIAGMgsAojQiCwACNwsBBFICCwKGBmIIpVWLAKQ2MjYrAJI0KzBQYDAiuzBwwDAiuzDRIDAisbsQkKQ0JZsgsoAkVSQrMHDAQCKwAAALwAWAC8AMMAWABYBd0AAAXuBAAAAP4ABeP/5wXuBBn/5/3nAEIAYQCCAHQAuQDIAAAAKf4AABoFwwAtBAAAGQYZAAIAAAAAAA4ArgADAAEECQAAAcQAAAADAAEECQABABIBxAADAAEECQACAA4B1gADAAEECQADADgB5AADAAEECQAEACICHAADAAEECQAFABoCPgADAAEECQAGACICWAADAAEECQAHAGgCegADAAEECQAIAC4C4gADAAEECQAJAE4DEAADAAEECQALACIDXgADAAEECQAMACIDXgADAAEECQANAcQDgAADAAEECQAOADQFRABDAG8AcAB5AHIAaQBnAGgAdAAgACgAYwApACAAMgAwADEAMQAsACAAQwB5AHIAZQBhAGwAIAAoAHcAdwB3AC4AYwB5AHIAZQBhAGwALgBvAHIAZwApAA0AdwBpAHQAaAAgAFIAZQBzAGUAcgB2AGUAZAAgAEYAbwBuAHQAIABOAGEAbQBlACAAIgBBAG0AZQB0AGgAeQBzAHQAYQAiAC4ADQANAFQAaABpAHMAIABGAG8AbgB0ACAAUwBvAGYAdAB3AGEAcgBlACAAaQBzACAAbABpAGMAZQBuAHMAZQBkACAAdQBuAGQAZQByACAAdABoAGUAIABTAEkATAAgAE8AcABlAG4AIABGAG8AbgB0ACAATABpAGMAZQBuAHMAZQAsAA0AVgBlAHIAcwBpAG8AbgAgADEALgAxAC4AIABUAGgAaQBzACAAbABpAGMAZQBuAHMAZQAgAGkAcwAgAGEAdgBhAGkAbABhAGIAbABlACAAdwBpAHQAaAAgAGEAIABGAEEAUQAgAGEAdAA6AA0AaAB0AHQAcAA6AC8ALwBzAGMAcgBpAHAAdABzAC4AcwBpAGwALgBvAHIAZwAvAE8ARgBMAEEAbQBlAHQAaAB5AHMAdABhAFIAZQBnAHUAbABhAHIAMQAuADAAMAAzADsAVQBLAFcATgA7AEEAbQBlAHQAaAB5AHMAdABhAC0AUgBlAGcAdQBsAGEAcgBBAG0AZQB0AGgAeQBzAHQAYQAgAFIAZQBnAHUAbABhAHIAVgBlAHIAcwBpAG8AbgAgADEALgAwADAAMwBBAG0AZQB0AGgAeQBzAHQAYQAtAFIAZQBnAHUAbABhAHIAQQBtAGUAdABoAHkAcwB0AGEAIABpAHMAIABhACAAdAByAGEAZABlAG0AYQByAGsAIABvAGYAIABDAHkAcgBlAGEAbAAgACgAdwB3AHcALgBjAHkAcgBlAGEAbAAuAG8AcgBnACkALgBDAHkAcgBlAGEAbAAgACgAdwB3AHcALgBjAHkAcgBlAGEAbAAuAG8AcgBnACkASwBvAG4AcwB0AGEAbgB0AGkAbgAgAFYAaQBuAG8AZwByAGEAZABvAHYALAAgAEEAbABlAHgAZQBpACAAVgBhAG4AeQBhAHMAaABpAG4AaAB0AHQAcAA6AC8ALwBjAHkAcgBlAGEAbAAuAG8AcgBnAEMAbwBwAHkAcgBpAGcAaAB0ACAAKABjACkAIAAyADAAMQAxACwAIABDAHkAcgBlAGEAbAAgACgAdwB3AHcALgBjAHkAcgBlAGEAbAAuAG8AcgBnACkADQB3AGkAdABoACAAUgBlAHMAZQByAHYAZQBkACAARgBvAG4AdAAgAE4AYQBtAGUAIAAiAEEAbQBlAHQAaAB5AHMAdABhACIALgANAA0AVABoAGkAcwAgAEYAbwBuAHQAIABTAG8AZgB0AHcAYQByAGUAIABpAHMAIABsAGkAYwBlAG4AcwBlAGQAIAB1AG4AZABlAHIAIAB0AGgAZQAgAFMASQBMACAATwBwAGUAbgAgAEYAbwBuAHQAIABMAGkAYwBlAG4AcwBlACwAIABWAGUAcgBzAGkAbwBuACAAMQAuADEALgAgAFQAaABpAHMAIABsAGkAYwBlAG4AcwBlACAAaQBzACAAYQB2AGEAaQBsAGEAYgBsAGUAIAB3AGkAdABoACAAYQAgAEYAQQBRACAAYQB0ADoADQBoAHQAdABwADoALwAvAHMAYwByAGkAcAB0AHMALgBzAGkAbAAuAG8AcgBnAC8ATwBGAEwAaAB0AHQAcAA6AC8ALwBzAGMAcgBpAHAAdABzAC4AcwBpAGwALgBvAHIAZwAvAE8ARgBMAAAAAgAAAAAAAP9mAGYAAAAAAAAAAAAAAAAAAAAAAAAAAADeAAAAAQACAQIAAwAEAAUABgAHAAgACQAKAAsADAANAA4ADwAQABEAEgATABQAFQAWABcAGAAZABoAGwAcAB0AHgAfACAAIQAiACMAJAAlACYAJwAoACkAKgArACwALQAuAC8AMAAxADIAMwA0ADUANgA3ADgAOQA6ADsAPAA9AD4APwBAAEEAQgBDAEQARQBGAEcASABJAEoASwBMAE0ATgBPAFAAUQBSAFMAVABVAFYAVwBYAFkAWgBbAFwAXQBeAF8AYABhAQMAowCEAIUAvQCWAOgAhgCOAIsAnQCpAKQBBACKANoAgwCTAPIA8wCNAJcAiADDAN4A8QCeAKoA9QD0APYAogCtAMkAxwCuAGIAYwCQAGQAywBlAMgAygDPAMwAzQDOAOkAZgDTANAA0QCvAGcA8ACRANYA1ADVAGgA6wDtAIkAagBpAGsAbQBsAG4AoABvAHEAcAByAHMAdQB0AHYAdwDqAHgAegB5AHsAfQB8ALgAoQB/AH4AgACBAOwA7gC6ANcAsACxANgA4QDdANkAsgCzALYAtwDEALQAtQDFAIcAvgC/ALwBBQEGAO8BBwEIAQkBCgELB25vQnJlYWsHdW5pMDBBMAd1bmkwMEFEB3VuaTIwNzQERXVybwpncmF2ZS5jYXNlCmFjdXRlLmNhc2UPY2lyY3VtZmxleC5jYXNlDWRpZXJlc2lzLmNhc2UKdGlsZGUuY2FzZQAAAAAAAgAIAA7//wAP");
+          }
+        </style></g></svg>
diff --git a/lightning-hydra-template/src/models/components/__init__.py b/lightning-hydra-template/src/models/components/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/lightning-hydra-template/tests/helpers/__init__.py b/lightning-hydra-template/tests/helpers/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/lightning-hydra-template/tests/helpers/run_if.py b/lightning-hydra-template/tests/helpers/run_if.py
new file mode 100644
index 0000000000000000000000000000000000000000..9703af425129d0225d0aeed20dedc3ed35bc7548
--- /dev/null
+++ b/lightning-hydra-template/tests/helpers/run_if.py
@@ -0,0 +1,142 @@
+"""Adapted from:
+
+https://github.com/PyTorchLightning/pytorch-lightning/blob/master/tests/helpers/runif.py
+"""
+
+import sys
+from typing import Any, Dict, Optional
+
+import pytest
+import torch
+from packaging.version import Version
+from pkg_resources import get_distribution
+from pytest import MarkDecorator
+
+from tests.helpers.package_available import (
+    _COMET_AVAILABLE,
+    _DEEPSPEED_AVAILABLE,
+    _FAIRSCALE_AVAILABLE,
+    _IS_WINDOWS,
+    _MLFLOW_AVAILABLE,
+    _NEPTUNE_AVAILABLE,
+    _SH_AVAILABLE,
+    _TPU_AVAILABLE,
+    _WANDB_AVAILABLE,
+)
+
+
+class RunIf:
+    """RunIf wrapper for conditional skipping of tests.
+
+    Fully compatible with `@pytest.mark`.
+
+    Example:
+
+    ```python
+        @RunIf(min_torch="1.8")
+        @pytest.mark.parametrize("arg1", [1.0, 2.0])
+        def test_wrapper(arg1):
+            assert arg1 > 0
+    ```
+    """
+
+    def __new__(
+        cls,
+        min_gpus: int = 0,
+        min_torch: Optional[str] = None,
+        max_torch: Optional[str] = None,
+        min_python: Optional[str] = None,
+        skip_windows: bool = False,
+        sh: bool = False,
+        tpu: bool = False,
+        fairscale: bool = False,
+        deepspeed: bool = False,
+        wandb: bool = False,
+        neptune: bool = False,
+        comet: bool = False,
+        mlflow: bool = False,
+        **kwargs: Dict[Any, Any],
+    ) -> MarkDecorator:
+        """Creates a new `@RunIf` `MarkDecorator` decorator.
+
+        :param min_gpus: Min number of GPUs required to run test.
+        :param min_torch: Minimum pytorch version to run test.
+        :param max_torch: Maximum pytorch version to run test.
+        :param min_python: Minimum python version required to run test.
+        :param skip_windows: Skip test for Windows platform.
+        :param tpu: If TPU is available.
+        :param sh: If `sh` module is required to run the test.
+        :param fairscale: If `fairscale` module is required to run the test.
+        :param deepspeed: If `deepspeed` module is required to run the test.
+        :param wandb: If `wandb` module is required to run the test.
+        :param neptune: If `neptune` module is required to run the test.
+        :param comet: If `comet` module is required to run the test.
+        :param mlflow: If `mlflow` module is required to run the test.
+        :param kwargs: Native `pytest.mark.skipif` keyword arguments.
+        """
+        conditions = []
+        reasons = []
+
+        if min_gpus:
+            conditions.append(torch.cuda.device_count() < min_gpus)
+            reasons.append(f"GPUs>={min_gpus}")
+
+        if min_torch:
+            torch_version = get_distribution("torch").version
+            conditions.append(Version(torch_version) < Version(min_torch))
+            reasons.append(f"torch>={min_torch}")
+
+        if max_torch:
+            torch_version = get_distribution("torch").version
+            conditions.append(Version(torch_version) >= Version(max_torch))
+            reasons.append(f"torch<{max_torch}")
+
+        if min_python:
+            py_version = (
+                f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}"
+            )
+            conditions.append(Version(py_version) < Version(min_python))
+            reasons.append(f"python>={min_python}")
+
+        if skip_windows:
+            conditions.append(_IS_WINDOWS)
+            reasons.append("does not run on Windows")
+
+        if tpu:
+            conditions.append(not _TPU_AVAILABLE)
+            reasons.append("TPU")
+
+        if sh:
+            conditions.append(not _SH_AVAILABLE)
+            reasons.append("sh")
+
+        if fairscale:
+            conditions.append(not _FAIRSCALE_AVAILABLE)
+            reasons.append("fairscale")
+
+        if deepspeed:
+            conditions.append(not _DEEPSPEED_AVAILABLE)
+            reasons.append("deepspeed")
+
+        if wandb:
+            conditions.append(not _WANDB_AVAILABLE)
+            reasons.append("wandb")
+
+        if neptune:
+            conditions.append(not _NEPTUNE_AVAILABLE)
+            reasons.append("neptune")
+
+        if comet:
+            conditions.append(not _COMET_AVAILABLE)
+            reasons.append("comet")
+
+        if mlflow:
+            conditions.append(not _MLFLOW_AVAILABLE)
+            reasons.append("mlflow")
+
+        reasons = [rs for cond, rs in zip(conditions, reasons) if cond]
+        return pytest.mark.skipif(
+            condition=any(conditions),
+            reason=f"Requires: [{' + '.join(reasons)}]",
+            **kwargs,
+        )
diff --git a/lightning-hydra-template/tests/helpers/run_sh_command.py b/lightning-hydra-template/tests/helpers/run_sh_command.py
new file mode 100644
index 0000000000000000000000000000000000000000..fdd2ed633f1185dd7936924616be6a6359a7bca7
--- /dev/null
+++ b/lightning-hydra-template/tests/helpers/run_sh_command.py
@@ -0,0 +1,22 @@
+from typing import List
+
+import pytest
+
+from tests.helpers.package_available import _SH_AVAILABLE
+
+if _SH_AVAILABLE:
+    import sh
+
+
+def run_sh_command(command: List[str]) -> None:
+    """Default method for executing shell commands with `pytest` and `sh` package.
+
+    :param command: A list of shell commands as strings.
+    """
+    msg = None
+    try:
+        sh.python(command)
+    except sh.ErrorReturnCode as e:
+        msg = e.stderr.decode()
+    if msg:
+        pytest.fail(msg=msg)