Header Ads

import numpy as np from PIL import Image class ImagePromptGenerator: def __init__(self): self.word_embeddings = np.load("word_embeddings.npy") self.feature_extractor = FeatureExtractor() def generate_prompt(self, image): # Extract features from the image. features = self.feature_extractor.extract_features(image) # Generate a list of possible text prompts. candidate_prompts = [] for feature in features: # Find the most similar words to the feature in the word embeddings. similar_words = self.word_embeddings.most_similar(feature, topn=10) # Generate a text prompt for each similar word. for word in similar_words: candidate_prompts.append(word) # Select the final prompt based on the overall composition of the image and the mood or atmosphere that it conveys. final_prompt = candidate_prompts[0] if image.size[0] > image.size[1]: final_prompt = final_prompt + " in a landscape orientation." else: final_prompt = final_prompt + " in a portrait orientation." return final_prompt class FeatureExtractor: def __init__(self): self.vgg16 = torchvision.models.vgg16(pretrained=True) def extract_features(self, image): # Preprocess the image. image = Image.fromarray(image) image = image.resize((224, 224)) image = torchvision.transforms.ToTensor()(image) # Extract the features from the image. features = self.vgg16.features(image.unsqueeze(0)) # Flatten the features. features = features.flatten(1) # Return the features. return features # Example usage: image_path = "cat_on_couch.jpg" image = Image.open(image_path) prompt_generator = ImagePromptGenerator() prompt = prompt_generator.generate_prompt(image) print(prompt)

No comments

Powered by Blogger.