import numpy as np
from PIL import Image
class ImagePromptGenerator:
def __init__(self):
self.word_embeddings = np.load("word_embeddings.npy")
self.feature_extractor = FeatureExtractor()
def generate_prompt(self, image):
# Extract features from the image.
features = self.feature_extractor.extract_features(image)
# Generate a list of possible text prompts.
candidate_prompts = []
for feature in features:
# Find the most similar words to the feature in the word embeddings.
similar_words = self.word_embeddings.most_similar(feature, topn=10)
# Generate a text prompt for each similar word.
for word in similar_words:
candidate_prompts.append(word)
# Select the final prompt based on the overall composition of the image and the mood or atmosphere that it conveys.
final_prompt = candidate_prompts[0]
if image.size[0] > image.size[1]:
final_prompt = final_prompt + " in a landscape orientation."
else:
final_prompt = final_prompt + " in a portrait orientation."
return final_prompt
class FeatureExtractor:
def __init__(self):
self.vgg16 = torchvision.models.vgg16(pretrained=True)
def extract_features(self, image):
# Preprocess the image.
image = Image.fromarray(image)
image = image.resize((224, 224))
image = torchvision.transforms.ToTensor()(image)
# Extract the features from the image.
features = self.vgg16.features(image.unsqueeze(0))
# Flatten the features.
features = features.flatten(1)
# Return the features.
return features
# Example usage:
image_path = "cat_on_couch.jpg"
image = Image.open(image_path)
prompt_generator = ImagePromptGenerator()
prompt = prompt_generator.generate_prompt(image)
print(prompt)
Post a Comment