From 9c3430691e4a4c8cb0a144110a8e2f81d3f69dc3 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Tue, 25 Nov 2025 15:53:11 +0000 Subject: [PATCH 1/2] feat: Add resolution options to Google image generators Co-authored-by: ernestfrench --- MultiImageClient/Enums/GoogleImageSize.cs | 23 ++ .../ImageGenerators/GoogleGenerator.cs | 39 ++- .../ImageGenerators/GoogleImagen4Generator.cs | 29 +- MultiImageClient/Workflows/GeneratorGroups.cs | 19 +- ...oogleImagenResolutionImplementationPlan.md | 325 ++++++++++++++++++ docs/GoogleImagenResolutionOptions.md | 133 +++++++ 6 files changed, 549 insertions(+), 19 deletions(-) create mode 100644 MultiImageClient/Enums/GoogleImageSize.cs create mode 100644 docs/GoogleImagenResolutionImplementationPlan.md create mode 100644 docs/GoogleImagenResolutionOptions.md diff --git a/MultiImageClient/Enums/GoogleImageSize.cs b/MultiImageClient/Enums/GoogleImageSize.cs new file mode 100644 index 0000000..6342c1b --- /dev/null +++ b/MultiImageClient/Enums/GoogleImageSize.cs @@ -0,0 +1,23 @@ +namespace MultiImageClient +{ + public enum GoogleImageSize + { + Size1K, + Size2K, + Size4K // Note: 4K only supported by Gemini, not Imagen 4 + } + + public static class GoogleImageSizeExtensions + { + public static string ToApiString(this GoogleImageSize size) + { + return size switch + { + GoogleImageSize.Size1K => "1K", + GoogleImageSize.Size2K => "2K", + GoogleImageSize.Size4K => "4K", + _ => "1K" + }; + } + } +} diff --git a/MultiImageClient/ImageGenerators/GoogleGenerator.cs b/MultiImageClient/ImageGenerators/GoogleGenerator.cs index 09004ee..6124f54 100644 --- a/MultiImageClient/ImageGenerators/GoogleGenerator.cs +++ b/MultiImageClient/ImageGenerators/GoogleGenerator.cs @@ -1,4 +1,4 @@ -using System; +using System; using System.Collections.Generic; using System.Linq; using System.Net.Http; @@ -17,11 +17,15 @@ public class GoogleGenerator : IImageGenerator private MultiClientRunStats _stats; private string _name; private ImageGeneratorApiType _apiType; + private GoogleImageSize _imageSize; + private string _aspectRatio; public ImageGeneratorApiType ApiType => ImageGeneratorApiType.GoogleNanoBanana; public GoogleGenerator(ImageGeneratorApiType apiType, string apiKey, int maxConcurrency, - MultiClientRunStats stats, string name = "") + MultiClientRunStats stats, string name = "", + GoogleImageSize imageSize = GoogleImageSize.Size1K, + string aspectRatio = "1:1") { _apiKey = apiKey; _googleSemaphore = new SemaphoreSlim(maxConcurrency); @@ -29,21 +33,32 @@ public GoogleGenerator(ImageGeneratorApiType apiType, string apiKey, int maxConc _name = string.IsNullOrEmpty(name) ? "" : name; _stats = stats; _apiType = apiType; - + _imageSize = imageSize; + _aspectRatio = aspectRatio; } public string GetFilenamePart(PromptDetails pd) { - return $"{_apiType}"; + var namePart = string.IsNullOrEmpty(_name) ? "" : $"-{_name}"; + return $"{_apiType}{namePart}_{_imageSize.ToApiString()}_{_aspectRatio.Replace(":", "x")}"; } public decimal GetCost() { // Gemini 2.5 Flash Image uses token-based pricing // $30 per 1 million tokens for image output (1290 tokens per image up to 1024x1024px) + // Higher resolutions consume proportionally more tokens if (_apiType == ImageGeneratorApiType.GoogleNanoBanana) { - return (30m / 1000000m) * 1290m; + var baseTokens = 1290m; + var multiplier = _imageSize switch + { + GoogleImageSize.Size1K => 1.0m, + GoogleImageSize.Size2K => 4.0m, // 2x2 = 4x pixels + GoogleImageSize.Size4K => 16.0m, // 4x4 = 16x pixels + _ => 1.0m + }; + return (30m / 1000000m) * baseTokens * multiplier; } else if (_apiType == ImageGeneratorApiType.GoogleImagen4) { @@ -57,18 +72,19 @@ public decimal GetCost() public List GetRightParts() { - return new List { _apiType.ToString() }; + var namePart = string.IsNullOrEmpty(_name) ? "" : _name; + return new List { _apiType.ToString(), namePart, _imageSize.ToApiString(), _aspectRatio }; } public string GetGeneratorSpecPart() { if (string.IsNullOrEmpty(_name)) { - return $"google-{_apiType.ToString()}"; + return $"google-{_apiType.ToString()}\n{_imageSize.ToApiString()} {_aspectRatio}"; } else { - return _name; + return $"{_name}\n{_imageSize.ToApiString()} {_aspectRatio}"; } } @@ -96,7 +112,12 @@ public async Task ProcessPromptAsync(IImageGenerator generato }, generationConfig = new { - responseModalities = new[] { "TEXT", "IMAGE" } + responseModalities = new[] { "TEXT", "IMAGE" }, + imageConfig = new + { + imageSize = _imageSize.ToApiString(), + aspectRatio = _aspectRatio + } } }; diff --git a/MultiImageClient/ImageGenerators/GoogleImagen4Generator.cs b/MultiImageClient/ImageGenerators/GoogleImagen4Generator.cs index 7d62c3c..4a0b43b 100644 --- a/MultiImageClient/ImageGenerators/GoogleImagen4Generator.cs +++ b/MultiImageClient/ImageGenerators/GoogleImagen4Generator.cs @@ -26,6 +26,7 @@ public class GoogleImagen4Generator : IImageGenerator private string _projectId; private string _googleServiceAccountKeyPath; private GoogleCredential _credential; + private GoogleImageSize _imageSize; public ImageGeneratorApiType ApiType => ImageGeneratorApiType.GoogleImagen4; @@ -35,14 +36,22 @@ public GoogleImagen4Generator(string apiKey, int maxConcurrency, string safetyFilterLevel, string location, string projectId, - string googleServiceAccountKeyPath) + string googleServiceAccountKeyPath, + GoogleImageSize imageSize = GoogleImageSize.Size1K) { + // Imagen 4 does not support 4K resolution + if (imageSize == GoogleImageSize.Size4K) + { + throw new ArgumentException("Imagen 4 does not support 4K resolution. Use 1K or 2K."); + } + _apiKey = apiKey; _googleSemaphore = new SemaphoreSlim(maxConcurrency); _location = location; _projectId = projectId; _googleServiceAccountKeyPath = googleServiceAccountKeyPath; + _imageSize = imageSize; if (!string.IsNullOrEmpty(_googleServiceAccountKeyPath)) { @@ -68,30 +77,35 @@ public GoogleImagen4Generator(string apiKey, int maxConcurrency, public string GetFilenamePart(PromptDetails pd) { var namePart = string.IsNullOrEmpty(_name) ? "" : $"-{_name}"; - return $"google-imagen4{namePart}"; + return $"google-imagen4{namePart}_{_imageSize.ToApiString()}_{_aspectRatio.Replace(":", "x")}"; } public decimal GetCost() { - // Imagen 4 pricing (higher than Imagen 3) - return 0.04m; + // Imagen 4 pricing - higher resolutions cost more + return _imageSize switch + { + GoogleImageSize.Size1K => 0.04m, + GoogleImageSize.Size2K => 0.08m, + _ => 0.04m + }; } public List GetRightParts() { var namePart = string.IsNullOrEmpty(_name) ? "" : _name; - return new List { "imagen4", namePart }; + return new List { "imagen4", namePart, _imageSize.ToApiString(), _aspectRatio }; } public string GetGeneratorSpecPart() { if (string.IsNullOrEmpty(_name)) { - return "google-imagen4"; + return $"google-imagen4\n{_imageSize.ToApiString()} {_aspectRatio}"; } else { - return _name; + return $"{_name}\n{_imageSize.ToApiString()} {_aspectRatio}"; } } @@ -115,6 +129,7 @@ public async Task ProcessPromptAsync(IImageGenerator generato { "prompt", Google.Protobuf.WellKnownTypes.Value.ForString(promptDetails.Prompt) }, { "numberOfImages", Google.Protobuf.WellKnownTypes.Value.ForNumber(1) }, { "aspectRatio", Google.Protobuf.WellKnownTypes.Value.ForString(_aspectRatio) }, + { "sampleImageSize", Google.Protobuf.WellKnownTypes.Value.ForString(_imageSize.ToApiString()) }, { "enhancePrompt", Google.Protobuf.WellKnownTypes.Value.ForBool(false) }, { "includeRaiReason", Google.Protobuf.WellKnownTypes.Value.ForBool(true) }, { "safetyFilterLevel", Google.Protobuf.WellKnownTypes.Value.ForString(_safetyFilterLevel) }, diff --git a/MultiImageClient/Workflows/GeneratorGroups.cs b/MultiImageClient/Workflows/GeneratorGroups.cs index 92dd2e9..84f63e2 100644 --- a/MultiImageClient/Workflows/GeneratorGroups.cs +++ b/MultiImageClient/Workflows/GeneratorGroups.cs @@ -1,4 +1,4 @@ -using IdeogramAPIClient; +using IdeogramAPIClient; using OpenAI.Images; @@ -63,8 +63,21 @@ public IEnumerable GetAll() //var myGenerators = new List() { dalle3, ideogram2, bfl1, bfl2, bfl3, recraft6, ideogram4, }; //var myGenerators = new List() { dalle3, recraft1, recraft2, recraft3, recraft4, recraft5, recraft6, ideogram1, ideogram2, bfl1, bfl2 }; - var google_banana = new GoogleGenerator(ImageGeneratorApiType.GoogleNanoBanana, _settings.GoogleGeminiApiKey, _concurrency, _stats); - var googleimagen = new GoogleImagen4Generator(_settings.GoogleGeminiApiKey, _concurrency, _stats, "", "2:5", "BLOCK_NONE", location: _settings.GoogleCloudLocation, projectId: _settings.GoogleCloudProjectId, googleServiceAccountKeyPath: _settings.GoogleServiceAccountKeyPath); + // Google Gemini/Nano Banana generators with various resolutions + var google_banana = new GoogleGenerator(ImageGeneratorApiType.GoogleNanoBanana, _settings.GoogleGeminiApiKey, _concurrency, _stats, + imageSize: GoogleImageSize.Size1K, aspectRatio: "1:1"); + var google_banana_2k = new GoogleGenerator(ImageGeneratorApiType.GoogleNanoBanana, _settings.GoogleGeminiApiKey, _concurrency, _stats, + name: "banana-2k", imageSize: GoogleImageSize.Size2K, aspectRatio: "16:9"); + var google_banana_4k = new GoogleGenerator(ImageGeneratorApiType.GoogleNanoBanana, _settings.GoogleGeminiApiKey, _concurrency, _stats, + name: "banana-4k", imageSize: GoogleImageSize.Size4K, aspectRatio: "1:1"); + + // Google Imagen 4 generators (max 2K, no 4K support) + var googleimagen = new GoogleImagen4Generator(_settings.GoogleGeminiApiKey, _concurrency, _stats, "", "16:9", "BLOCK_NONE", + location: _settings.GoogleCloudLocation, projectId: _settings.GoogleCloudProjectId, googleServiceAccountKeyPath: _settings.GoogleServiceAccountKeyPath, + imageSize: GoogleImageSize.Size1K); + var googleimagen_2k = new GoogleImagen4Generator(_settings.GoogleGeminiApiKey, _concurrency, _stats, "imagen4-2k", "16:9", "BLOCK_NONE", + location: _settings.GoogleCloudLocation, projectId: _settings.GoogleCloudProjectId, googleServiceAccountKeyPath: _settings.GoogleServiceAccountKeyPath, + imageSize: GoogleImageSize.Size2K); //recraft8, recraft9, var myGenerators = new List() { }; diff --git a/docs/GoogleImagenResolutionImplementationPlan.md b/docs/GoogleImagenResolutionImplementationPlan.md new file mode 100644 index 0000000..7d94bff --- /dev/null +++ b/docs/GoogleImagenResolutionImplementationPlan.md @@ -0,0 +1,325 @@ +# Implementation Plan: Google Imagen Resolution Support + +## Summary + +Add support for 1K, 2K, and 4K resolution options to both Google image generators in the codebase: +- `GoogleGenerator` (Gemini/Nano Banana) +- `GoogleImagen4Generator` (Vertex AI Imagen 4) + +## Existing Pattern Analysis + +Looking at how other generators handle size/resolution: + +| Generator | Size Parameter | Type | +|-----------|---------------|------| +| `RecraftGenerator` | `RecraftImageSize` | Enum with pixel dimensions | +| `IdeogramGenerator` | `IdeogramAspectRatio` | Enum with aspect ratios | +| `BFLGenerator` | `aspectRatio`, `width`, `height` | String + ints | +| `GptImageOneGenerator` | `size` | String ("1024x1024") | +| `Dalle3Generator` | `GeneratedImageSize` | OpenAI SDK enum | + +The Google APIs use simple string values ("1K", "2K", "4K"), so we should create a clean enum for type safety. + +--- + +## Implementation Steps + +### Step 1: Create GoogleImageSize Enum + +**File:** `MultiImageClient/Enums/GoogleImageSize.cs` + +```csharp +namespace MultiImageClient +{ + public enum GoogleImageSize + { + Size1K, + Size2K, + Size4K // Note: 4K only supported by Gemini, not Imagen 4 + } + + public static class GoogleImageSizeExtensions + { + public static string ToApiString(this GoogleImageSize size) + { + return size switch + { + GoogleImageSize.Size1K => "1K", + GoogleImageSize.Size2K => "2K", + GoogleImageSize.Size4K => "4K", + _ => "1K" + }; + } + } +} +``` + +### Step 2: Update GoogleGenerator (Gemini/Nano Banana) + +**File:** `MultiImageClient/ImageGenerators/GoogleGenerator.cs` + +Changes needed: +1. Add `_imageSize` and `_aspectRatio` fields +2. Update constructor to accept these parameters +3. Modify the request body to include `imageConfig` +4. Update pricing based on resolution + +```csharp +public class GoogleGenerator : IImageGenerator +{ + private SemaphoreSlim _googleSemaphore; + private HttpClient _httpClient; + private string _apiKey; + private MultiClientRunStats _stats; + private string _name; + private ImageGeneratorApiType _apiType; + private GoogleImageSize _imageSize; // NEW + private string _aspectRatio; // NEW + + public GoogleGenerator( + ImageGeneratorApiType apiType, + string apiKey, + int maxConcurrency, + MultiClientRunStats stats, + string name = "", + GoogleImageSize imageSize = GoogleImageSize.Size1K, // NEW + string aspectRatio = "1:1") // NEW + { + _apiKey = apiKey; + _googleSemaphore = new SemaphoreSlim(maxConcurrency); + _httpClient = new HttpClient(); + _name = string.IsNullOrEmpty(name) ? "" : name; + _stats = stats; + _apiType = apiType; + _imageSize = imageSize; // NEW + _aspectRatio = aspectRatio; // NEW + } + + // In ProcessPromptAsync, update the request body: + var requestBody = new + { + contents = new[] + { + new + { + parts = new[] + { + new { text = promptDetails.Prompt } + } + } + }, + generationConfig = new + { + responseModalities = new[] { "TEXT", "IMAGE" }, + imageConfig = new // NEW + { + imageSize = _imageSize.ToApiString(), + aspectRatio = _aspectRatio + } + } + }; + + // Update GetCost() to account for resolution + public decimal GetCost() + { + if (_apiType == ImageGeneratorApiType.GoogleNanoBanana) + { + // Higher resolution = more tokens + var baseTokens = 1290m; + var multiplier = _imageSize switch + { + GoogleImageSize.Size1K => 1.0m, + GoogleImageSize.Size2K => 4.0m, // 2x2 = 4x pixels + GoogleImageSize.Size4K => 16.0m, // 4x4 = 16x pixels + _ => 1.0m + }; + return (30m / 1000000m) * baseTokens * multiplier; + } + // ... rest + } + + // Update GetFilenamePart and GetGeneratorSpecPart to include resolution + public string GetFilenamePart(PromptDetails pd) + { + return $"{_apiType}_{_imageSize.ToApiString()}_{_aspectRatio.Replace(":", "x")}"; + } +} +``` + +### Step 3: Update GoogleImagen4Generator + +**File:** `MultiImageClient/ImageGenerators/GoogleImagen4Generator.cs` + +Changes needed: +1. Add `_imageSize` field (only supports 1K and 2K) +2. Update constructor +3. Modify the request to include `sampleImageSize` + +```csharp +public class GoogleImagen4Generator : IImageGenerator +{ + // ... existing fields ... + private GoogleImageSize _imageSize; // NEW + + public GoogleImagen4Generator( + string apiKey, + int maxConcurrency, + MultiClientRunStats stats, + string name, + string aspectRatio, + string safetyFilterLevel, + string location, + string projectId, + string googleServiceAccountKeyPath, + GoogleImageSize imageSize = GoogleImageSize.Size1K) // NEW + { + // ... existing initialization ... + + // Validate: Imagen 4 doesn't support 4K + if (imageSize == GoogleImageSize.Size4K) + { + throw new ArgumentException("Imagen 4 does not support 4K resolution. Use 1K or 2K."); + } + _imageSize = imageSize; + } + + public async Task ProcessPromptAsync(...) + { + // Update the instance to include sampleImageSize: + var instance = new Google.Protobuf.WellKnownTypes.Value + { + StructValue = new Google.Protobuf.WellKnownTypes.Struct + { + Fields = + { + { "prompt", Google.Protobuf.WellKnownTypes.Value.ForString(promptDetails.Prompt) }, + { "numberOfImages", Google.Protobuf.WellKnownTypes.Value.ForNumber(1) }, + { "aspectRatio", Google.Protobuf.WellKnownTypes.Value.ForString(_aspectRatio) }, + { "sampleImageSize", Google.Protobuf.WellKnownTypes.Value.ForString(_imageSize.ToApiString()) }, // NEW + // ... other fields ... + } + } + }; + // ... + } + + // Update pricing based on resolution + public decimal GetCost() + { + return _imageSize switch + { + GoogleImageSize.Size1K => 0.04m, + GoogleImageSize.Size2K => 0.08m, // Estimated - higher res likely costs more + _ => 0.04m + }; + } +} +``` + +### Step 4: Update GeneratorGroups.cs + +**File:** `MultiImageClient/Workflows/GeneratorGroups.cs` + +```csharp +// Example usage with different resolutions: + +// Gemini/Nano Banana with various resolutions +var google_banana_1k = new GoogleGenerator( + ImageGeneratorApiType.GoogleNanoBanana, + _settings.GoogleGeminiApiKey, + _concurrency, + _stats, + name: "banana-1k", + imageSize: GoogleImageSize.Size1K, + aspectRatio: "16:9"); + +var google_banana_2k = new GoogleGenerator( + ImageGeneratorApiType.GoogleNanoBanana, + _settings.GoogleGeminiApiKey, + _concurrency, + _stats, + name: "banana-2k", + imageSize: GoogleImageSize.Size2K, + aspectRatio: "16:9"); + +var google_banana_4k = new GoogleGenerator( + ImageGeneratorApiType.GoogleNanoBanana, + _settings.GoogleGeminiApiKey, + _concurrency, + _stats, + name: "banana-4k", + imageSize: GoogleImageSize.Size4K, + aspectRatio: "1:1"); + +// Imagen 4 with various resolutions (no 4K support) +var googleimagen_1k = new GoogleImagen4Generator( + _settings.GoogleGeminiApiKey, + _concurrency, + _stats, + name: "imagen4-1k", + aspectRatio: "16:9", + safetyFilterLevel: "BLOCK_NONE", + location: _settings.GoogleCloudLocation, + projectId: _settings.GoogleCloudProjectId, + googleServiceAccountKeyPath: _settings.GoogleServiceAccountKeyPath, + imageSize: GoogleImageSize.Size1K); + +var googleimagen_2k = new GoogleImagen4Generator( + _settings.GoogleGeminiApiKey, + _concurrency, + _stats, + name: "imagen4-2k", + aspectRatio: "16:9", + safetyFilterLevel: "BLOCK_NONE", + location: _settings.GoogleCloudLocation, + projectId: _settings.GoogleCloudProjectId, + googleServiceAccountKeyPath: _settings.GoogleServiceAccountKeyPath, + imageSize: GoogleImageSize.Size2K); +``` + +--- + +## Files to Create/Modify + +| File | Action | Description | +|------|--------|-------------| +| `MultiImageClient/Enums/GoogleImageSize.cs` | **CREATE** | New enum for resolution options | +| `MultiImageClient/ImageGenerators/GoogleGenerator.cs` | MODIFY | Add imageSize & aspectRatio params | +| `MultiImageClient/ImageGenerators/GoogleImagen4Generator.cs` | MODIFY | Add sampleImageSize param | +| `MultiImageClient/Workflows/GeneratorGroups.cs` | MODIFY | Update instantiation examples | + +--- + +## Testing Checklist + +- [ ] Verify 1K resolution works for GoogleGenerator (Gemini) +- [ ] Verify 2K resolution works for GoogleGenerator (Gemini) +- [ ] Verify 4K resolution works for GoogleGenerator (Gemini) +- [ ] Verify 1K resolution works for GoogleImagen4Generator +- [ ] Verify 2K resolution works for GoogleImagen4Generator +- [ ] Verify 4K throws appropriate error for GoogleImagen4Generator +- [ ] Verify aspect ratio combinations work with different resolutions +- [ ] Verify cost calculations are updated appropriately +- [ ] Verify file naming includes resolution info + +--- + +## Comparison with Other Generators + +This implementation follows established patterns: + +| Feature | GoogleGenerator | RecraftGenerator | IdeogramGenerator | +|---------|----------------|------------------|-------------------| +| Size enum | `GoogleImageSize` | `RecraftImageSize` | N/A (aspect only) | +| Aspect param | String | N/A (part of size) | Enum | +| Default size | 1K | 1024x1024 | N/A | +| Max size | 4K (Gemini) | 2048x1024 | 1536x1536 | + +--- + +## Notes + +1. **4K Limitation**: Only Gemini supports 4K; Imagen 4 maxes out at 2K +2. **Pricing**: Higher resolutions will increase costs - exact pricing TBD +3. **Quality vs Speed**: Larger images take longer to generate +4. **Aspect Ratio Interaction**: Resolution constrains the longer dimension while aspect ratio is maintained diff --git a/docs/GoogleImagenResolutionOptions.md b/docs/GoogleImagenResolutionOptions.md new file mode 100644 index 0000000..b9c689f --- /dev/null +++ b/docs/GoogleImagenResolutionOptions.md @@ -0,0 +1,133 @@ +# Google Imagen API Resolution Options + +## Overview + +Google provides two distinct APIs for image generation, each with different resolution options: + +1. **Google Imagen 4 (Vertex AI)** - Dedicated image generation model accessed via Vertex AI +2. **Google Gemini API (Nano Banana)** - Multimodal LLM with native image generation + +--- + +## Imagen 4 (Vertex AI) - `imagen-4.0-generate-001` + +### Supported Models +- `imagen-4.0-generate-001` +- `imagen-4.0-ultra-generate-001` + +### Resolution Parameter: `sampleImageSize` + +| Value | Description | +|-------|-------------| +| `"1K"` | ~1024px (default) | +| `"2K"` | ~2048px | + +**Note:** 4K is NOT supported for Imagen 4 according to current documentation. + +### API Request Structure (REST) + +```json +{ + "instances": [ + { + "prompt": "TEXT_PROMPT" + } + ], + "parameters": { + "sampleImageSize": "2K", + "sampleCount": 1 + } +} +``` + +### Aspect Ratio Support +Imagen 4 also supports `aspectRatio` parameter with these values: +- `"1:1"`, `"2:3"`, `"3:2"`, `"3:4"`, `"4:3"`, `"4:5"`, `"5:4"`, `"9:16"`, `"16:9"`, `"21:9"` + +--- + +## Gemini API (Native Image Generation - "Nano Banana") + +### Resolution Parameter: `imageSize` (within `imageConfig`) + +| Value | Description | +|-------|-------------| +| `"1K"` | ~1024px | +| `"2K"` | ~2048px | +| `"4K"` | ~4096px | + +### Aspect Ratio Support +Same as Imagen 4: +- `"1:1"`, `"2:3"`, `"3:2"`, `"3:4"`, `"4:3"`, `"4:5"`, `"5:4"`, `"9:16"`, `"16:9"`, `"21:9"` + +### API Request Structure (REST) + +```json +{ + "contents": [ + { + "parts": [ + { "text": "PROMPT" } + ] + } + ], + "generationConfig": { + "responseModalities": ["TEXT", "IMAGE"], + "imageConfig": { + "aspectRatio": "16:9", + "imageSize": "2K" + } + } +} +``` + +### Python SDK Example + +```python +from google import genai +from google.genai import types + +client = genai.Client() + +response = client.models.generate_content( + model="gemini-2.0-flash-exp", + contents="Create an image of a dog", + config=types.GenerateContentConfig( + response_modalities=["TEXT", "IMAGE"], + image_config=types.ImageConfig( + aspect_ratio="16:9", + image_size="2K" # "1K", "2K", or "4K" + ), + ), +) +``` + +--- + +## Resolution vs Pixel Dimensions Summary + +| Setting | Approximate Resolution | +|---------|----------------------| +| 1K | 1024 × 1024 (for 1:1) | +| 2K | 2048 × 2048 (for 1:1) | +| 4K | 4096 × 4096 (for 1:1, Gemini only) | + +For non-square aspect ratios, the longer dimension is constrained to the K value while maintaining the aspect ratio. + +--- + +## Pricing Considerations + +Higher resolutions may incur higher costs: +- **Imagen 4**: ~$0.04 per image (standard resolution) +- **Gemini Flash Image**: Token-based pricing (~$30/1M output tokens, ~1290 tokens per image at 1K) + +Higher resolutions likely consume more tokens for Gemini and may have different pricing tiers for Imagen. + +--- + +## References + +- [Vertex AI Set Output Resolution](https://cloud.google.com/vertex-ai/generative-ai/docs/image/set-output-resolution) +- [Gemini API Image Generation](https://ai.google.dev/gemini-api/docs/image-generation) +- [Configure Aspect Ratio](https://cloud.google.com/vertex-ai/generative-ai/docs/image/configure-aspect-ratio) From ff6ec4f3c590e0de231cefb6f59d72d01fcad8e7 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Tue, 25 Nov 2025 15:59:56 +0000 Subject: [PATCH 2/2] Refactor Google generators to use options classes Co-authored-by: ernestfrench --- .../Enums/GoogleImageAspectRatio.cs | 37 ++++ .../Enums/GoogleOutputMimeType.cs | 31 ++++ .../Enums/GooglePersonGeneration.cs | 23 +++ .../Enums/GoogleSafetyFilterLevel.cs | 25 +++ .../ImageGenerators/GoogleGenerator.cs | 24 ++- .../GoogleImageGenerationOptions.cs | 136 ++++++++++++++ .../ImageGenerators/GoogleImagen4Generator.cs | 138 +++++++------- MultiImageClient/Workflows/GeneratorGroups.cs | 52 +++++- docs/GoogleImagenResolutionOptions.md | 172 ++++++++++-------- 9 files changed, 480 insertions(+), 158 deletions(-) create mode 100644 MultiImageClient/Enums/GoogleImageAspectRatio.cs create mode 100644 MultiImageClient/Enums/GoogleOutputMimeType.cs create mode 100644 MultiImageClient/Enums/GooglePersonGeneration.cs create mode 100644 MultiImageClient/Enums/GoogleSafetyFilterLevel.cs create mode 100644 MultiImageClient/ImageGenerators/GoogleImageGenerationOptions.cs diff --git a/MultiImageClient/Enums/GoogleImageAspectRatio.cs b/MultiImageClient/Enums/GoogleImageAspectRatio.cs new file mode 100644 index 0000000..329f77c --- /dev/null +++ b/MultiImageClient/Enums/GoogleImageAspectRatio.cs @@ -0,0 +1,37 @@ +namespace MultiImageClient +{ + public enum GoogleImageAspectRatio + { + Ratio1x1, + Ratio2x3, + Ratio3x2, + Ratio3x4, + Ratio4x3, + Ratio4x5, + Ratio5x4, + Ratio9x16, + Ratio16x9, + Ratio21x9 + } + + public static class GoogleImageAspectRatioExtensions + { + public static string ToApiString(this GoogleImageAspectRatio ratio) + { + return ratio switch + { + GoogleImageAspectRatio.Ratio1x1 => "1:1", + GoogleImageAspectRatio.Ratio2x3 => "2:3", + GoogleImageAspectRatio.Ratio3x2 => "3:2", + GoogleImageAspectRatio.Ratio3x4 => "3:4", + GoogleImageAspectRatio.Ratio4x3 => "4:3", + GoogleImageAspectRatio.Ratio4x5 => "4:5", + GoogleImageAspectRatio.Ratio5x4 => "5:4", + GoogleImageAspectRatio.Ratio9x16 => "9:16", + GoogleImageAspectRatio.Ratio16x9 => "16:9", + GoogleImageAspectRatio.Ratio21x9 => "21:9", + _ => "1:1" + }; + } + } +} diff --git a/MultiImageClient/Enums/GoogleOutputMimeType.cs b/MultiImageClient/Enums/GoogleOutputMimeType.cs new file mode 100644 index 0000000..10c9d1d --- /dev/null +++ b/MultiImageClient/Enums/GoogleOutputMimeType.cs @@ -0,0 +1,31 @@ +namespace MultiImageClient +{ + public enum GoogleOutputMimeType + { + Png, // Default - lossless + Jpeg // Lossy, smaller file size, supports compression quality + } + + public static class GoogleOutputMimeTypeExtensions + { + public static string ToApiString(this GoogleOutputMimeType mimeType) + { + return mimeType switch + { + GoogleOutputMimeType.Png => "image/png", + GoogleOutputMimeType.Jpeg => "image/jpeg", + _ => "image/png" + }; + } + + public static string ToFileExtension(this GoogleOutputMimeType mimeType) + { + return mimeType switch + { + GoogleOutputMimeType.Png => ".png", + GoogleOutputMimeType.Jpeg => ".jpg", + _ => ".png" + }; + } + } +} diff --git a/MultiImageClient/Enums/GooglePersonGeneration.cs b/MultiImageClient/Enums/GooglePersonGeneration.cs new file mode 100644 index 0000000..c555461 --- /dev/null +++ b/MultiImageClient/Enums/GooglePersonGeneration.cs @@ -0,0 +1,23 @@ +namespace MultiImageClient +{ + public enum GooglePersonGeneration + { + AllowAdult, // Default - allow generation of adults only (no celebrities) + DontAllow, // Disable people/faces in generated images + AllowAll // Allow all person generation (most permissive) + } + + public static class GooglePersonGenerationExtensions + { + public static string ToApiString(this GooglePersonGeneration setting) + { + return setting switch + { + GooglePersonGeneration.AllowAdult => "allow_adult", + GooglePersonGeneration.DontAllow => "dont_allow", + GooglePersonGeneration.AllowAll => "ALLOW_ALL", + _ => "allow_adult" + }; + } + } +} diff --git a/MultiImageClient/Enums/GoogleSafetyFilterLevel.cs b/MultiImageClient/Enums/GoogleSafetyFilterLevel.cs new file mode 100644 index 0000000..076fa38 --- /dev/null +++ b/MultiImageClient/Enums/GoogleSafetyFilterLevel.cs @@ -0,0 +1,25 @@ +namespace MultiImageClient +{ + public enum GoogleSafetyFilterLevel + { + BlockLowAndAbove, // Highest safety - most filtering + BlockMediumAndAbove, // Default - balanced filtering + BlockOnlyHigh, // Lowest safety - least filtering (may increase objectionable content) + BlockNone // Disable safety filtering entirely (if supported) + } + + public static class GoogleSafetyFilterLevelExtensions + { + public static string ToApiString(this GoogleSafetyFilterLevel level) + { + return level switch + { + GoogleSafetyFilterLevel.BlockLowAndAbove => "block_low_and_above", + GoogleSafetyFilterLevel.BlockMediumAndAbove => "block_medium_and_above", + GoogleSafetyFilterLevel.BlockOnlyHigh => "block_only_high", + GoogleSafetyFilterLevel.BlockNone => "BLOCK_NONE", + _ => "block_medium_and_above" + }; + } + } +} diff --git a/MultiImageClient/ImageGenerators/GoogleGenerator.cs b/MultiImageClient/ImageGenerators/GoogleGenerator.cs index 6124f54..5f248f0 100644 --- a/MultiImageClient/ImageGenerators/GoogleGenerator.cs +++ b/MultiImageClient/ImageGenerators/GoogleGenerator.cs @@ -18,14 +18,18 @@ public class GoogleGenerator : IImageGenerator private string _name; private ImageGeneratorApiType _apiType; private GoogleImageSize _imageSize; - private string _aspectRatio; + private GoogleImageAspectRatio _aspectRatio; public ImageGeneratorApiType ApiType => ImageGeneratorApiType.GoogleNanoBanana; - public GoogleGenerator(ImageGeneratorApiType apiType, string apiKey, int maxConcurrency, - MultiClientRunStats stats, string name = "", + public GoogleGenerator( + ImageGeneratorApiType apiType, + string apiKey, + int maxConcurrency, + MultiClientRunStats stats, + string name = "", GoogleImageSize imageSize = GoogleImageSize.Size1K, - string aspectRatio = "1:1") + GoogleImageAspectRatio aspectRatio = GoogleImageAspectRatio.Ratio1x1) { _apiKey = apiKey; _googleSemaphore = new SemaphoreSlim(maxConcurrency); @@ -40,7 +44,7 @@ public GoogleGenerator(ImageGeneratorApiType apiType, string apiKey, int maxConc public string GetFilenamePart(PromptDetails pd) { var namePart = string.IsNullOrEmpty(_name) ? "" : $"-{_name}"; - return $"{_apiType}{namePart}_{_imageSize.ToApiString()}_{_aspectRatio.Replace(":", "x")}"; + return $"{_apiType}{namePart}_{_imageSize.ToApiString()}_{_aspectRatio.ToApiString().Replace(":", "x")}"; } public decimal GetCost() @@ -73,18 +77,18 @@ public decimal GetCost() public List GetRightParts() { var namePart = string.IsNullOrEmpty(_name) ? "" : _name; - return new List { _apiType.ToString(), namePart, _imageSize.ToApiString(), _aspectRatio }; + return new List { _apiType.ToString(), namePart, _imageSize.ToApiString(), _aspectRatio.ToApiString() }; } public string GetGeneratorSpecPart() { if (string.IsNullOrEmpty(_name)) { - return $"google-{_apiType.ToString()}\n{_imageSize.ToApiString()} {_aspectRatio}"; + return $"google-{_apiType.ToString()}\n{_imageSize.ToApiString()} {_aspectRatio.ToApiString()}"; } else { - return $"{_name}\n{_imageSize.ToApiString()} {_aspectRatio}"; + return $"{_name}\n{_imageSize.ToApiString()} {_aspectRatio.ToApiString()}"; } } @@ -116,7 +120,7 @@ public async Task ProcessPromptAsync(IImageGenerator generato imageConfig = new { imageSize = _imageSize.ToApiString(), - aspectRatio = _aspectRatio + aspectRatio = _aspectRatio.ToApiString() } } }; @@ -235,4 +239,4 @@ public void Dispose() } } -} \ No newline at end of file +} diff --git a/MultiImageClient/ImageGenerators/GoogleImageGenerationOptions.cs b/MultiImageClient/ImageGenerators/GoogleImageGenerationOptions.cs new file mode 100644 index 0000000..34d23a5 --- /dev/null +++ b/MultiImageClient/ImageGenerators/GoogleImageGenerationOptions.cs @@ -0,0 +1,136 @@ +namespace MultiImageClient +{ + /// + /// Configuration options for Google image generation APIs (both Gemini and Imagen 4). + /// Not all options are supported by all APIs - see individual property docs. + /// + public class GoogleImageGenerationOptions + { + /// + /// Output image resolution. Gemini supports 1K/2K/4K, Imagen 4 only supports 1K/2K. + /// Default: Size1K + /// + public GoogleImageSize ImageSize { get; set; } = GoogleImageSize.Size1K; + + /// + /// Aspect ratio of generated images. + /// Supported: 1:1, 2:3, 3:2, 3:4, 4:3, 4:5, 5:4, 9:16, 16:9, 21:9 + /// Default: Ratio1x1 + /// + public GoogleImageAspectRatio AspectRatio { get; set; } = GoogleImageAspectRatio.Ratio1x1; + + /// + /// Controls generation of people/faces in images. + /// Imagen 4 only. Default: AllowAdult + /// + public GooglePersonGeneration PersonGeneration { get; set; } = GooglePersonGeneration.AllowAdult; + + /// + /// Safety filter threshold level. + /// Imagen 4 only. Default: BlockMediumAndAbove + /// + public GoogleSafetyFilterLevel SafetyFilterLevel { get; set; } = GoogleSafetyFilterLevel.BlockMediumAndAbove; + + /// + /// Output image format. + /// Imagen 4 only. Default: Png + /// + public GoogleOutputMimeType OutputMimeType { get; set; } = GoogleOutputMimeType.Png; + + /// + /// JPEG compression quality (0-100). Only applies when OutputMimeType is Jpeg. + /// Imagen 4 only. Default: 75 + /// + public int CompressionQuality { get; set; } = 75; + + /// + /// Whether to use LLM-based prompt enhancement for higher quality images. + /// Imagen 4 only. Default: false (to preserve exact prompts) + /// + public bool EnhancePrompt { get; set; } = false; + + /// + /// Whether to add a SynthID digital watermark to generated images. + /// When true, seed parameter is ignored. + /// Imagen 4 only. Default: false + /// + public bool AddWatermark { get; set; } = false; + + /// + /// Random seed for deterministic output. Only works when AddWatermark=false and EnhancePrompt=false. + /// Set to null for random generation. + /// Imagen 4 only. Default: null + /// + public uint? Seed { get; set; } = null; + + /// + /// Number of images to generate per request (1-4). + /// Imagen 4 only. Default: 1 + /// + public int NumberOfImages { get; set; } = 1; + + /// + /// Whether to include RAI (Responsible AI) filter reason in responses. + /// Imagen 4 only. Default: true + /// + public bool IncludeRaiReason { get; set; } = true; + + /// + /// Creates a copy of this options object with potentially modified values. + /// + public GoogleImageGenerationOptions Clone() + { + return new GoogleImageGenerationOptions + { + ImageSize = this.ImageSize, + AspectRatio = this.AspectRatio, + PersonGeneration = this.PersonGeneration, + SafetyFilterLevel = this.SafetyFilterLevel, + OutputMimeType = this.OutputMimeType, + CompressionQuality = this.CompressionQuality, + EnhancePrompt = this.EnhancePrompt, + AddWatermark = this.AddWatermark, + Seed = this.Seed, + NumberOfImages = this.NumberOfImages, + IncludeRaiReason = this.IncludeRaiReason + }; + } + + /// + /// Validates options for Imagen 4 API compatibility. + /// + public void ValidateForImagen4() + { + if (ImageSize == GoogleImageSize.Size4K) + { + throw new System.ArgumentException("Imagen 4 does not support 4K resolution. Use 1K or 2K."); + } + + if (NumberOfImages < 1 || NumberOfImages > 4) + { + throw new System.ArgumentException("NumberOfImages must be between 1 and 4."); + } + + if (CompressionQuality < 0 || CompressionQuality > 100) + { + throw new System.ArgumentException("CompressionQuality must be between 0 and 100."); + } + } + + /// + /// Gets a string representation of the key options for display/logging. + /// + public string ToDisplayString() + { + return $"{ImageSize.ToApiString()} {AspectRatio.ToApiString()}"; + } + + /// + /// Gets a filename-safe string representation of key options. + /// + public string ToFilenamePart() + { + return $"{ImageSize.ToApiString()}_{AspectRatio.ToApiString().Replace(":", "x")}"; + } + } +} diff --git a/MultiImageClient/ImageGenerators/GoogleImagen4Generator.cs b/MultiImageClient/ImageGenerators/GoogleImagen4Generator.cs index 4a0b43b..4f72972 100644 --- a/MultiImageClient/ImageGenerators/GoogleImagen4Generator.cs +++ b/MultiImageClient/ImageGenerators/GoogleImagen4Generator.cs @@ -20,38 +20,32 @@ public class GoogleImagen4Generator : IImageGenerator private string _apiKey; private MultiClientRunStats _stats; private string _name; - private string _aspectRatio; - private string _safetyFilterLevel; private string _location; private string _projectId; private string _googleServiceAccountKeyPath; private GoogleCredential _credential; - private GoogleImageSize _imageSize; + private GoogleImageGenerationOptions _options; public ImageGeneratorApiType ApiType => ImageGeneratorApiType.GoogleImagen4; - public GoogleImagen4Generator(string apiKey, int maxConcurrency, - MultiClientRunStats stats, string name, - string aspectRatio, - string safetyFilterLevel, + public GoogleImagen4Generator( + string apiKey, + int maxConcurrency, + MultiClientRunStats stats, + string name, string location, string projectId, string googleServiceAccountKeyPath, - GoogleImageSize imageSize = GoogleImageSize.Size1K) - + GoogleImageGenerationOptions options = null) { - // Imagen 4 does not support 4K resolution - if (imageSize == GoogleImageSize.Size4K) - { - throw new ArgumentException("Imagen 4 does not support 4K resolution. Use 1K or 2K."); - } + _options = options ?? new GoogleImageGenerationOptions(); + _options.ValidateForImagen4(); _apiKey = apiKey; _googleSemaphore = new SemaphoreSlim(maxConcurrency); _location = location; _projectId = projectId; _googleServiceAccountKeyPath = googleServiceAccountKeyPath; - _imageSize = imageSize; if (!string.IsNullOrEmpty(_googleServiceAccountKeyPath)) { @@ -70,20 +64,18 @@ public GoogleImagen4Generator(string apiKey, int maxConcurrency, _name = string.IsNullOrEmpty(name) ? "" : name; _stats = stats; - _aspectRatio = aspectRatio; - _safetyFilterLevel = safetyFilterLevel; } public string GetFilenamePart(PromptDetails pd) { var namePart = string.IsNullOrEmpty(_name) ? "" : $"-{_name}"; - return $"google-imagen4{namePart}_{_imageSize.ToApiString()}_{_aspectRatio.Replace(":", "x")}"; + return $"google-imagen4{namePart}_{_options.ToFilenamePart()}"; } public decimal GetCost() { // Imagen 4 pricing - higher resolutions cost more - return _imageSize switch + return _options.ImageSize switch { GoogleImageSize.Size1K => 0.04m, GoogleImageSize.Size2K => 0.08m, @@ -94,18 +86,18 @@ public decimal GetCost() public List GetRightParts() { var namePart = string.IsNullOrEmpty(_name) ? "" : _name; - return new List { "imagen4", namePart, _imageSize.ToApiString(), _aspectRatio }; + return new List { "imagen4", namePart, _options.ImageSize.ToApiString(), _options.AspectRatio.ToApiString() }; } public string GetGeneratorSpecPart() { if (string.IsNullOrEmpty(_name)) { - return $"google-imagen4\n{_imageSize.ToApiString()} {_aspectRatio}"; + return $"google-imagen4\n{_options.ToDisplayString()}"; } else { - return $"{_name}\n{_imageSize.ToApiString()} {_aspectRatio}"; + return $"{_name}\n{_options.ToDisplayString()}"; } } @@ -116,73 +108,95 @@ public async Task ProcessPromptAsync(IImageGenerator generato { _stats.GoogleRequestCount++; - // Google Gemini API endpoint for Imagen 4 + // Google Vertex AI endpoint for Imagen 4 var apiUrl = $"https://{_location}-aiplatform.googleapis.com/v1/projects/{_projectId}/locations/{_location}/publishers/google/models/imagen-4.0-generate-001:predict"; - // Construct the instance for the predict request - var instance = new Google.Protobuf.WellKnownTypes.Value + // Build the fields dictionary with all options + var fields = new Dictionary + { + { "prompt", Google.Protobuf.WellKnownTypes.Value.ForString(promptDetails.Prompt) }, + { "numberOfImages", Google.Protobuf.WellKnownTypes.Value.ForNumber(_options.NumberOfImages) }, + { "aspectRatio", Google.Protobuf.WellKnownTypes.Value.ForString(_options.AspectRatio.ToApiString()) }, + { "sampleImageSize", Google.Protobuf.WellKnownTypes.Value.ForString(_options.ImageSize.ToApiString()) }, + { "enhancePrompt", Google.Protobuf.WellKnownTypes.Value.ForBool(_options.EnhancePrompt) }, + { "includeRaiReason", Google.Protobuf.WellKnownTypes.Value.ForBool(_options.IncludeRaiReason) }, + { "safetySetting", Google.Protobuf.WellKnownTypes.Value.ForString(_options.SafetyFilterLevel.ToApiString()) }, + { "personGeneration", Google.Protobuf.WellKnownTypes.Value.ForString(_options.PersonGeneration.ToApiString()) }, + { "addWatermark", Google.Protobuf.WellKnownTypes.Value.ForBool(_options.AddWatermark) } + }; + + // Add seed if specified and watermark is disabled + if (_options.Seed.HasValue && !_options.AddWatermark && !_options.EnhancePrompt) + { + fields.Add("seed", Google.Protobuf.WellKnownTypes.Value.ForNumber(_options.Seed.Value)); + } + + // Add output options if not using defaults + if (_options.OutputMimeType != GoogleOutputMimeType.Png) { - StructValue = new Google.Protobuf.WellKnownTypes.Struct + var outputOptions = new Google.Protobuf.WellKnownTypes.Struct(); + outputOptions.Fields.Add("mimeType", Google.Protobuf.WellKnownTypes.Value.ForString(_options.OutputMimeType.ToApiString())); + + if (_options.OutputMimeType == GoogleOutputMimeType.Jpeg) { - Fields = - { - { "prompt", Google.Protobuf.WellKnownTypes.Value.ForString(promptDetails.Prompt) }, - { "numberOfImages", Google.Protobuf.WellKnownTypes.Value.ForNumber(1) }, - { "aspectRatio", Google.Protobuf.WellKnownTypes.Value.ForString(_aspectRatio) }, - { "sampleImageSize", Google.Protobuf.WellKnownTypes.Value.ForString(_imageSize.ToApiString()) }, - { "enhancePrompt", Google.Protobuf.WellKnownTypes.Value.ForBool(false) }, - { "includeRaiReason", Google.Protobuf.WellKnownTypes.Value.ForBool(true) }, - { "safetyFilterLevel", Google.Protobuf.WellKnownTypes.Value.ForString(_safetyFilterLevel) }, - { "safetySetting", Google.Protobuf.WellKnownTypes.Value.ForString("block_only_high") }, - { "personGeneration", Google.Protobuf.WellKnownTypes.Value.ForString("ALLOW_ALL") }, - { "addWatermark", Google.Protobuf.WellKnownTypes.Value.ForBool(false) } - } + outputOptions.Fields.Add("compressionQuality", Google.Protobuf.WellKnownTypes.Value.ForNumber(_options.CompressionQuality)); } + + fields.Add("outputOptions", Google.Protobuf.WellKnownTypes.Value.ForStruct(outputOptions)); + } + + // Construct the instance for the predict request + var instanceStruct = new Google.Protobuf.WellKnownTypes.Struct(); + foreach (var field in fields) + { + instanceStruct.Fields.Add(field.Key, field.Value); + } + + var instance = new Google.Protobuf.WellKnownTypes.Value + { + StructValue = instanceStruct }; var instances = new List { instance }; - - // Imagen 4 does not use a separate 'config' field in parameters. - // Parameters are directly specified in the instance. - var parameters = new Google.Protobuf.WellKnownTypes.Value(); // No parameters needed for now. + var parameters = new Google.Protobuf.WellKnownTypes.Value(); var endpoint = EndpointName.FromProjectLocationPublisherModel(_projectId, _location, "google", "imagen-4.0-generate-001"); var response = await _predictionServiceClient.PredictAsync(endpoint, instances, parameters); var base64Images = new List(); - string commonMimeType = "image/png"; // Default or first detected mime type + string commonMimeType = _options.OutputMimeType.ToApiString(); if (response?.Predictions != null && response.Predictions.Any()) { foreach (var prediction in response.Predictions) + { + if (prediction?.StructValue?.Fields != null) { - if (prediction?.StructValue?.Fields != null) + var predictionFields = prediction.StructValue.Fields; + if (predictionFields.ContainsKey("bytesBase64Encoded") && predictionFields.ContainsKey("mimeType")) { - var predictionFields = prediction.StructValue.Fields; - if (predictionFields.ContainsKey("bytesBase64Encoded") && predictionFields.ContainsKey("mimeType")) - { - var imageData = predictionFields["bytesBase64Encoded"].StringValue; - var newPrompt = predictionFields["prompt"].StringValue; - var currentMimeType = predictionFields["mimeType"].StringValue; + var imageData = predictionFields["bytesBase64Encoded"].StringValue; + var newPrompt = predictionFields.ContainsKey("prompt") ? predictionFields["prompt"].StringValue : promptDetails.Prompt; + var currentMimeType = predictionFields["mimeType"].StringValue; - if (!string.IsNullOrEmpty(imageData)) + if (!string.IsNullOrEmpty(imageData)) + { + var bd = new CreatedBase64Image { - var bd = new CreatedBase64Image - { - bytesBase64 = imageData, - newPrompt = newPrompt, - }; + bytesBase64 = imageData, + newPrompt = newPrompt, + }; - base64Images.Add(bd); - if (!string.IsNullOrEmpty(currentMimeType) && (commonMimeType == "image/png")) - { - commonMimeType = currentMimeType; // Use the first valid mime type found if default - } + base64Images.Add(bd); + if (!string.IsNullOrEmpty(currentMimeType)) + { + commonMimeType = currentMimeType; } } } } + } } if (base64Images.Count == 0) diff --git a/MultiImageClient/Workflows/GeneratorGroups.cs b/MultiImageClient/Workflows/GeneratorGroups.cs index 84f63e2..eec569a 100644 --- a/MultiImageClient/Workflows/GeneratorGroups.cs +++ b/MultiImageClient/Workflows/GeneratorGroups.cs @@ -65,19 +65,51 @@ public IEnumerable GetAll() // Google Gemini/Nano Banana generators with various resolutions var google_banana = new GoogleGenerator(ImageGeneratorApiType.GoogleNanoBanana, _settings.GoogleGeminiApiKey, _concurrency, _stats, - imageSize: GoogleImageSize.Size1K, aspectRatio: "1:1"); + imageSize: GoogleImageSize.Size1K, aspectRatio: GoogleImageAspectRatio.Ratio1x1); var google_banana_2k = new GoogleGenerator(ImageGeneratorApiType.GoogleNanoBanana, _settings.GoogleGeminiApiKey, _concurrency, _stats, - name: "banana-2k", imageSize: GoogleImageSize.Size2K, aspectRatio: "16:9"); + name: "banana-2k", imageSize: GoogleImageSize.Size2K, aspectRatio: GoogleImageAspectRatio.Ratio16x9); var google_banana_4k = new GoogleGenerator(ImageGeneratorApiType.GoogleNanoBanana, _settings.GoogleGeminiApiKey, _concurrency, _stats, - name: "banana-4k", imageSize: GoogleImageSize.Size4K, aspectRatio: "1:1"); + name: "banana-4k", imageSize: GoogleImageSize.Size4K, aspectRatio: GoogleImageAspectRatio.Ratio1x1); - // Google Imagen 4 generators (max 2K, no 4K support) - var googleimagen = new GoogleImagen4Generator(_settings.GoogleGeminiApiKey, _concurrency, _stats, "", "16:9", "BLOCK_NONE", - location: _settings.GoogleCloudLocation, projectId: _settings.GoogleCloudProjectId, googleServiceAccountKeyPath: _settings.GoogleServiceAccountKeyPath, - imageSize: GoogleImageSize.Size1K); - var googleimagen_2k = new GoogleImagen4Generator(_settings.GoogleGeminiApiKey, _concurrency, _stats, "imagen4-2k", "16:9", "BLOCK_NONE", - location: _settings.GoogleCloudLocation, projectId: _settings.GoogleCloudProjectId, googleServiceAccountKeyPath: _settings.GoogleServiceAccountKeyPath, - imageSize: GoogleImageSize.Size2K); + // Google Imagen 4 generators (max 2K, no 4K support) - using options class + var imagen4Options1k = new GoogleImageGenerationOptions + { + ImageSize = GoogleImageSize.Size1K, + AspectRatio = GoogleImageAspectRatio.Ratio16x9, + SafetyFilterLevel = GoogleSafetyFilterLevel.BlockNone, + PersonGeneration = GooglePersonGeneration.AllowAll + }; + var googleimagen = new GoogleImagen4Generator(_settings.GoogleGeminiApiKey, _concurrency, _stats, "", + location: _settings.GoogleCloudLocation, projectId: _settings.GoogleCloudProjectId, + googleServiceAccountKeyPath: _settings.GoogleServiceAccountKeyPath, + options: imagen4Options1k); + + var imagen4Options2k = new GoogleImageGenerationOptions + { + ImageSize = GoogleImageSize.Size2K, + AspectRatio = GoogleImageAspectRatio.Ratio16x9, + SafetyFilterLevel = GoogleSafetyFilterLevel.BlockNone, + PersonGeneration = GooglePersonGeneration.AllowAll + }; + var googleimagen_2k = new GoogleImagen4Generator(_settings.GoogleGeminiApiKey, _concurrency, _stats, "imagen4-2k", + location: _settings.GoogleCloudLocation, projectId: _settings.GoogleCloudProjectId, + googleServiceAccountKeyPath: _settings.GoogleServiceAccountKeyPath, + options: imagen4Options2k); + + // Example with JPEG output and deterministic seed + var imagen4OptionsJpeg = new GoogleImageGenerationOptions + { + ImageSize = GoogleImageSize.Size2K, + AspectRatio = GoogleImageAspectRatio.Ratio3x2, + OutputMimeType = GoogleOutputMimeType.Jpeg, + CompressionQuality = 90, + SafetyFilterLevel = GoogleSafetyFilterLevel.BlockOnlyHigh, + Seed = 12345 // Deterministic output + }; + var googleimagen_jpeg = new GoogleImagen4Generator(_settings.GoogleGeminiApiKey, _concurrency, _stats, "imagen4-jpeg", + location: _settings.GoogleCloudLocation, projectId: _settings.GoogleCloudProjectId, + googleServiceAccountKeyPath: _settings.GoogleServiceAccountKeyPath, + options: imagen4OptionsJpeg); //recraft8, recraft9, var myGenerators = new List() { }; diff --git a/docs/GoogleImagenResolutionOptions.md b/docs/GoogleImagenResolutionOptions.md index b9c689f..7eee4fe 100644 --- a/docs/GoogleImagenResolutionOptions.md +++ b/docs/GoogleImagenResolutionOptions.md @@ -1,133 +1,153 @@ -# Google Imagen API Resolution Options +# Google Imagen API Options Reference ## Overview -Google provides two distinct APIs for image generation, each with different resolution options: +Google provides two distinct APIs for image generation, each with different options: 1. **Google Imagen 4 (Vertex AI)** - Dedicated image generation model accessed via Vertex AI 2. **Google Gemini API (Nano Banana)** - Multimodal LLM with native image generation --- -## Imagen 4 (Vertex AI) - `imagen-4.0-generate-001` +## All Available Parameters -### Supported Models -- `imagen-4.0-generate-001` -- `imagen-4.0-ultra-generate-001` +### Imagen 4 (Vertex AI) Parameters -### Resolution Parameter: `sampleImageSize` +| Parameter | Type | Values | Default | Description | +|-----------|------|--------|---------|-------------| +| `sampleImageSize` | string | `"1K"`, `"2K"` | `"1K"` | Output resolution (4K NOT supported) | +| `aspectRatio` | string | `"1:1"`, `"2:3"`, `"3:2"`, `"3:4"`, `"4:3"`, `"4:5"`, `"5:4"`, `"9:16"`, `"16:9"`, `"21:9"` | `"1:1"` | Image aspect ratio | +| `numberOfImages` | int | 1-4 | 1 | Number of images to generate | +| `enhancePrompt` | boolean | true/false | true | LLM-based prompt rewriting | +| `personGeneration` | string | `"allow_adult"`, `"dont_allow"`, `"ALLOW_ALL"` | `"allow_adult"` | Controls person/face generation | +| `safetySetting` | string | `"block_low_and_above"`, `"block_medium_and_above"`, `"block_only_high"` | `"block_medium_and_above"` | Safety filter threshold | +| `addWatermark` | boolean | true/false | false | Add SynthID digital watermark | +| `seed` | uint32 | any | random | Deterministic generation (only when addWatermark=false and enhancePrompt=false) | +| `includeRaiReason` | boolean | true/false | true | Include RAI filter reason in response | +| `outputOptions.mimeType` | string | `"image/png"`, `"image/jpeg"` | `"image/png"` | Output format | +| `outputOptions.compressionQuality` | int | 0-100 | 75 | JPEG compression (only for JPEG) | + +### Gemini API Parameters + +| Parameter | Type | Values | Default | Description | +|-----------|------|--------|---------|-------------| +| `imageConfig.imageSize` | string | `"1K"`, `"2K"`, `"4K"` | `"1K"` | Output resolution | +| `imageConfig.aspectRatio` | string | `"1:1"`, `"2:3"`, `"3:2"`, `"3:4"`, `"4:3"`, `"9:16"`, `"16:9"`, `"21:9"` | auto | Image aspect ratio | + +--- + +## Resolution Details + +| Setting | Approximate Resolution | Imagen 4 | Gemini | +|---------|----------------------|----------|--------| +| 1K | ~1024 × 1024 (for 1:1) | ✅ | ✅ | +| 2K | ~2048 × 2048 (for 1:1) | ✅ | ✅ | +| 4K | ~4096 × 4096 (for 1:1) | ❌ | ✅ | + +For non-square aspect ratios, the longer dimension is constrained to the K value. + +--- + +## Person Generation Settings | Value | Description | |-------|-------------| -| `"1K"` | ~1024px (default) | -| `"2K"` | ~2048px | +| `allow_adult` | Default. Allow generation of adults only. Celebrity generation is blocked. | +| `dont_allow` | Disable all people/faces in generated images | +| `ALLOW_ALL` | Most permissive - allows all person generation | + +--- + +## Safety Filter Levels + +| Value | Description | +|-------|-------------| +| `block_low_and_above` | Highest safety - most filtering, fewest images pass | +| `block_medium_and_above` | Default - balanced filtering | +| `block_only_high` | Lowest safety - least filtering, may increase objectionable content | + +--- + +## Output Format Options + +| Format | Pros | Cons | +|--------|------|------| +| PNG | Lossless, supports transparency | Larger file size | +| JPEG | Smaller file size, configurable quality | Lossy compression, no transparency | -**Note:** 4K is NOT supported for Imagen 4 according to current documentation. +JPEG compression quality: 0 (smallest/worst) to 100 (largest/best), default 75. -### API Request Structure (REST) +--- + +## API Request Examples + +### Imagen 4 (Full Options) ```json { "instances": [ { - "prompt": "TEXT_PROMPT" + "prompt": "A serene mountain landscape at sunset" } ], "parameters": { "sampleImageSize": "2K", - "sampleCount": 1 + "sampleCount": 1, + "aspectRatio": "16:9", + "enhancePrompt": false, + "personGeneration": "allow_adult", + "safetySetting": "block_only_high", + "addWatermark": false, + "seed": 12345, + "includeRaiReason": true, + "outputOptions": { + "mimeType": "image/jpeg", + "compressionQuality": 90 + } } } ``` -### Aspect Ratio Support -Imagen 4 also supports `aspectRatio` parameter with these values: -- `"1:1"`, `"2:3"`, `"3:2"`, `"3:4"`, `"4:3"`, `"4:5"`, `"5:4"`, `"9:16"`, `"16:9"`, `"21:9"` - ---- - -## Gemini API (Native Image Generation - "Nano Banana") - -### Resolution Parameter: `imageSize` (within `imageConfig`) - -| Value | Description | -|-------|-------------| -| `"1K"` | ~1024px | -| `"2K"` | ~2048px | -| `"4K"` | ~4096px | - -### Aspect Ratio Support -Same as Imagen 4: -- `"1:1"`, `"2:3"`, `"3:2"`, `"3:4"`, `"4:3"`, `"4:5"`, `"5:4"`, `"9:16"`, `"16:9"`, `"21:9"` - -### API Request Structure (REST) +### Gemini API ```json { "contents": [ { "parts": [ - { "text": "PROMPT" } + { "text": "A serene mountain landscape at sunset" } ] } ], "generationConfig": { "responseModalities": ["TEXT", "IMAGE"], "imageConfig": { - "aspectRatio": "16:9", - "imageSize": "2K" + "imageSize": "4K", + "aspectRatio": "16:9" } } } ``` -### Python SDK Example - -```python -from google import genai -from google.genai import types - -client = genai.Client() - -response = client.models.generate_content( - model="gemini-2.0-flash-exp", - contents="Create an image of a dog", - config=types.GenerateContentConfig( - response_modalities=["TEXT", "IMAGE"], - image_config=types.ImageConfig( - aspect_ratio="16:9", - image_size="2K" # "1K", "2K", or "4K" - ), - ), -) -``` - ---- - -## Resolution vs Pixel Dimensions Summary - -| Setting | Approximate Resolution | -|---------|----------------------| -| 1K | 1024 × 1024 (for 1:1) | -| 2K | 2048 × 2048 (for 1:1) | -| 4K | 4096 × 4096 (for 1:1, Gemini only) | - -For non-square aspect ratios, the longer dimension is constrained to the K value while maintaining the aspect ratio. - --- ## Pricing Considerations -Higher resolutions may incur higher costs: -- **Imagen 4**: ~$0.04 per image (standard resolution) -- **Gemini Flash Image**: Token-based pricing (~$30/1M output tokens, ~1290 tokens per image at 1K) +### Imagen 4 +- Base price: ~$0.04 per image at 1K +- 2K resolution: ~$0.08 per image (estimated 2x) -Higher resolutions likely consume more tokens for Gemini and may have different pricing tiers for Imagen. +### Gemini (Token-based) +- $30 per 1M output tokens +- ~1290 tokens per 1K image +- Higher resolutions consume proportionally more tokens (4x for 2K, 16x for 4K) --- ## References -- [Vertex AI Set Output Resolution](https://cloud.google.com/vertex-ai/generative-ai/docs/image/set-output-resolution) +- [Vertex AI Image Generation](https://cloud.google.com/vertex-ai/generative-ai/docs/image/generate-images) +- [Imagen API Reference](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/imagen-api) +- [Set Output Resolution](https://cloud.google.com/vertex-ai/generative-ai/docs/image/set-output-resolution) +- [Configure Safety Settings](https://cloud.google.com/vertex-ai/generative-ai/docs/image/configure-responsible-ai-safety-settings) - [Gemini API Image Generation](https://ai.google.dev/gemini-api/docs/image-generation) -- [Configure Aspect Ratio](https://cloud.google.com/vertex-ai/generative-ai/docs/image/configure-aspect-ratio)