We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
import torch import clip from PIL import Image from torchvision import transforms import json import cv2 import numpy as np from sklearn.cluster import OPTICS, KMeans from sklearn.manifold import TSNE from sklearn.datasets import make_blobs import matplotlib.pyplot as plt import matplotlib import random matplotlib.use('TkAgg')
import os import pdb
all_img_text_features = {"coco/train2017/":[], "vg/VG_100K/":[], "vg/VG_100K_2/":[], "gqa/images/":[], "ocr_vqa/images/":[], "textvqa/train_images/":[]}
all_img_path = {"coco/train2017/":[], "vg/VG_100K/":[], "vg/VG_100K_2/":[], "gqa/images/":[], "ocr_vqa/images/":[], "textvqa/train_images/":[]}
if name == "main": device = "cuda" if torch.cuda.is_available() else "cpu" model, processor = clip.load("ViT-B/32", device=device) tokenizer = clip.tokenize
input_json = "llava_v1_5_mix665k.json" # 从文件解析JSON merged_datas = [] count = 0 batch_size = 2 img_paths = [] all_descriptions = [] with open(input_json, 'r', encoding='utf-8') as json_file: datas = json.load(json_file) print("all data num: ", len(datas)) #datas = random.sample(datas, 103) print("all data num: ", len(datas)) max_num = -1 max_num_list = [] max_num_img_path = [] for data in datas: img_text_id = data['id'] if "image" not in data.keys(): count +=1 img_path = os.path.join("textvqa/train_images", img_text_id+".jpg") else: img_path = data['image'] print(img_path) #test #img_path = "med_1.png" #img = Image.open(img_path) conversations = data['conversations'] descriptions = "" for index, conversation in enumerate(conversations): #print("num: ", len(conversation['value'].split(" "))) if len(conversation['value'].split(" ")) > max_num: max_num = len(conversation['value'].split(" ")) max_num_list.append(max_num) max_num_img_path.append(img_path) print("max_num: ", max_num_list) print("max_num_img_path: ", max_num_img_path)
The text was updated successfully, but these errors were encountered:
No branches or pull requests
import torch
import clip
from PIL import Image
from torchvision import transforms
import json
import cv2
import numpy as np
from sklearn.cluster import OPTICS, KMeans
from sklearn.manifold import TSNE
from sklearn.datasets import make_blobs
import matplotlib.pyplot as plt
import matplotlib
import random
matplotlib.use('TkAgg')
import os
import pdb
all_img_text_features = {"coco/train2017/":[], "vg/VG_100K/":[], "vg/VG_100K_2/":[],
"gqa/images/":[], "ocr_vqa/images/":[], "textvqa/train_images/":[]}
all_img_path = {"coco/train2017/":[], "vg/VG_100K/":[], "vg/VG_100K_2/":[],
"gqa/images/":[], "ocr_vqa/images/":[], "textvqa/train_images/":[]}
if name == "main":
device = "cuda" if torch.cuda.is_available() else "cpu"
model, processor = clip.load("ViT-B/32", device=device)
tokenizer = clip.tokenize
The text was updated successfully, but these errors were encountered: