import torch import clip import pandas as pd import numpy as np import os from PIL import Image from IPython.display import display from tqdm.notebook import tqdm
data_location = "./imgs" img_dict = {} for inx, f inenumerate(os.listdir(data_location)): img_dict[inx] = f img_nums = len(img_dict) print("There are {} images.".format(img_nums))
if img_nums == 0: print('no image in the folder.')
# get text input from user && text encode instr = input('Pleaze input description:') text_input = clip.tokenize(instr).to(device) with torch.no_grad(): text_f = model.encode_text(text_input)
sim = {} pbar = tqdm(total=img_nums)
for i inrange(img_nums): image_path = f'{data_location}/{img_dict[i]}' img = Image.open(image_path) img_input = preprocess(img).unsqueeze(0).to(device)
# image encode with torch.no_grad(): img_f = model.encode_image(img_input)