From 536a3be97c29499f6e666e28ebde0d006f4ea233 Mon Sep 17 00:00:00 2001 From: ruslansenchukov Date: Sun, 26 Oct 2025 22:37:55 +0400 Subject: [PATCH 1/3] added support paths to file with wildcard (*) for minio with http/https prefix --- ice/pom.xml | 5 ++ .../altinity/ice/cli/internal/cmd/Insert.java | 33 +++++++---- .../ice/cli/internal/http/MinioWildcard.java | 55 +++++++++++++++++++ 3 files changed, 83 insertions(+), 10 deletions(-) create mode 100644 ice/src/main/java/com/altinity/ice/cli/internal/http/MinioWildcard.java diff --git a/ice/pom.xml b/ice/pom.xml index feb357a..6275fc3 100644 --- a/ice/pom.xml +++ b/ice/pom.xml @@ -521,6 +521,11 @@ ${assertj.version} test + + org.json + json + 20231013 + diff --git a/ice/src/main/java/com/altinity/ice/cli/internal/cmd/Insert.java b/ice/src/main/java/com/altinity/ice/cli/internal/cmd/Insert.java index 805f41b..eaff5bc 100644 --- a/ice/src/main/java/com/altinity/ice/cli/internal/cmd/Insert.java +++ b/ice/src/main/java/com/altinity/ice/cli/internal/cmd/Insert.java @@ -85,6 +85,8 @@ import software.amazon.awssdk.services.s3.model.CopyObjectRequest; import software.amazon.awssdk.services.s3.model.NoSuchKeyException; import software.amazon.awssdk.utils.Lazy; +import com.altinity.ice.cli.internal.http.MinioWildcard; + public final class Insert { @@ -106,22 +108,33 @@ public static void run( // Create transaction and pass it to updatePartitionAndSortOrderMetadata Transaction txn = table.newTransaction(); - try (FileIO tableIO = table.io()) { - Lazy s3ClientLazy = newS3Client(options, tableIO, table); - try { +try (FileIO tableIO = table.io()) { + Lazy s3ClientLazy = newS3Client(options, tableIO, table); + try { var filesExpanded = Arrays.stream(files) .flatMap( s -> { - if (s.startsWith("s3://") && s.contains("*")) { - var b = S3.bucketPath(s); - return S3 - .listWildcard(s3ClientLazy.getValue(), b.bucket(), b.path(), -1) - .stream(); - } - return Stream.of(s); + if (s.startsWith("s3://") && s.contains("*")) { + var b = S3.bucketPath(s); + return S3 + .listWildcard(s3ClientLazy.getValue(), b.bucket(), b.path(), -1) + .stream(); + } + + // HTTP(S) wildcard for Minio & etc. + if ((s.startsWith("http://") || s.startsWith("https://")) && s.contains("*")) { + try { + return MinioWildcard.listHTTPWildcard(s).stream(); + } catch (Exception e) { + throw new RuntimeException("Failed to expand HTTP wildcard for " + s, e); + } + } + + return Stream.of(s); }) .toList(); + if (filesExpanded.isEmpty()) { throw new BadRequestException("No matching files found"); } diff --git a/ice/src/main/java/com/altinity/ice/cli/internal/http/MinioWildcard.java b/ice/src/main/java/com/altinity/ice/cli/internal/http/MinioWildcard.java new file mode 100644 index 0000000..953accf --- /dev/null +++ b/ice/src/main/java/com/altinity/ice/cli/internal/http/MinioWildcard.java @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2025 Altinity Inc and/or its affiliates. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + */ + + +package com.altinity.ice.cli.internal.http; + +import java.net.URI; +import java.net.http.*; +import java.util.*; +import org.json.*; + +public final class MinioWildcard { + private static final HttpClient client = HttpClient.newHttpClient(); + + public static List listHTTPWildcard(String urlWithStar) throws Exception { + URI uri = URI.create(urlWithStar); + String[] parts = uri.getPath().split("/", 3); + if (parts.length < 2) + throw new IllegalArgumentException("Bad MinIO URL: " + urlWithStar); + + String bucket = parts[1]; + String prefix = parts.length == 3 ? parts[2].replaceAll("\\*", "") : ""; + String endpoint = uri.getScheme() + "://" + uri.getHost() + ":" + uri.getPort(); + + String listUrl = endpoint + "/" + bucket + "?list-type=2&prefix=" + prefix; + HttpRequest req = HttpRequest.newBuilder(URI.create(listUrl)).GET().build(); + HttpResponse resp = client.send(req, HttpResponse.BodyHandlers.ofString()); + if (resp.statusCode() != 200) + throw new RuntimeException("MinIO listObjects error: " + resp.statusCode()); + + JSONObject xml = org.json.XML.toJSONObject(resp.body()); + List files = new ArrayList<>(); + var result = xml.getJSONObject("ListBucketResult"); + var contents = result.optJSONArray("Contents"); + if (contents != null) { + for (int i = 0; i < contents.length(); i++) { + String key = contents.getJSONObject(i).getString("Key"); + if (key.endsWith(".parquet")) + files.add(endpoint + "/" + bucket + "/" + key); + } + } else { + var single = result.optJSONObject("Contents"); + if (single != null) + files.add(endpoint + "/" + bucket + "/" + single.getString("Key")); + } + return files; + } +} From 157b49991a5eddc260db5058a92a59c812a0d300 Mon Sep 17 00:00:00 2001 From: ruslansenchukov Date: Mon, 27 Oct 2025 11:36:14 +0400 Subject: [PATCH 2/3] fixed formations --- .../altinity/ice/cli/internal/cmd/Insert.java | 42 ++++++------ .../ice/cli/internal/http/MinioWildcard.java | 66 +++++++++---------- 2 files changed, 52 insertions(+), 56 deletions(-) diff --git a/ice/src/main/java/com/altinity/ice/cli/internal/cmd/Insert.java b/ice/src/main/java/com/altinity/ice/cli/internal/cmd/Insert.java index eaff5bc..cd418e4 100644 --- a/ice/src/main/java/com/altinity/ice/cli/internal/cmd/Insert.java +++ b/ice/src/main/java/com/altinity/ice/cli/internal/cmd/Insert.java @@ -10,6 +10,7 @@ package com.altinity.ice.cli.internal.cmd; import com.altinity.ice.cli.Main; +import com.altinity.ice.cli.internal.http.MinioWildcard; import com.altinity.ice.cli.internal.iceberg.Partitioning; import com.altinity.ice.cli.internal.iceberg.RecordComparator; import com.altinity.ice.cli.internal.iceberg.SchemaEvolution; @@ -85,8 +86,6 @@ import software.amazon.awssdk.services.s3.model.CopyObjectRequest; import software.amazon.awssdk.services.s3.model.NoSuchKeyException; import software.amazon.awssdk.utils.Lazy; -import com.altinity.ice.cli.internal.http.MinioWildcard; - public final class Insert { @@ -108,33 +107,34 @@ public static void run( // Create transaction and pass it to updatePartitionAndSortOrderMetadata Transaction txn = table.newTransaction(); -try (FileIO tableIO = table.io()) { - Lazy s3ClientLazy = newS3Client(options, tableIO, table); - try { + try (FileIO tableIO = table.io()) { + Lazy s3ClientLazy = newS3Client(options, tableIO, table); + try { var filesExpanded = Arrays.stream(files) .flatMap( s -> { - if (s.startsWith("s3://") && s.contains("*")) { - var b = S3.bucketPath(s); - return S3 - .listWildcard(s3ClientLazy.getValue(), b.bucket(), b.path(), -1) - .stream(); - } - - // HTTP(S) wildcard for Minio & etc. - if ((s.startsWith("http://") || s.startsWith("https://")) && s.contains("*")) { - try { - return MinioWildcard.listHTTPWildcard(s).stream(); - } catch (Exception e) { - throw new RuntimeException("Failed to expand HTTP wildcard for " + s, e); - } + if (s.startsWith("s3://") && s.contains("*")) { + var b = S3.bucketPath(s); + return S3 + .listWildcard(s3ClientLazy.getValue(), b.bucket(), b.path(), -1) + .stream(); + } + + // HTTP(S) wildcard for Minio & etc. + if ((s.startsWith("http://") || s.startsWith("https://")) + && s.contains("*")) { + try { + return MinioWildcard.listHTTPWildcard(s).stream(); + } catch (Exception e) { + throw new RuntimeException("Failed to expand HTTP wildcard for " + s, e); } + } - return Stream.of(s); + return Stream.of(s); }) .toList(); - + if (filesExpanded.isEmpty()) { throw new BadRequestException("No matching files found"); } diff --git a/ice/src/main/java/com/altinity/ice/cli/internal/http/MinioWildcard.java b/ice/src/main/java/com/altinity/ice/cli/internal/http/MinioWildcard.java index 953accf..8d93c67 100644 --- a/ice/src/main/java/com/altinity/ice/cli/internal/http/MinioWildcard.java +++ b/ice/src/main/java/com/altinity/ice/cli/internal/http/MinioWildcard.java @@ -8,7 +8,6 @@ * http://www.apache.org/licenses/LICENSE-2.0 */ - package com.altinity.ice.cli.internal.http; import java.net.URI; @@ -17,39 +16,36 @@ import org.json.*; public final class MinioWildcard { - private static final HttpClient client = HttpClient.newHttpClient(); - - public static List listHTTPWildcard(String urlWithStar) throws Exception { - URI uri = URI.create(urlWithStar); - String[] parts = uri.getPath().split("/", 3); - if (parts.length < 2) - throw new IllegalArgumentException("Bad MinIO URL: " + urlWithStar); - - String bucket = parts[1]; - String prefix = parts.length == 3 ? parts[2].replaceAll("\\*", "") : ""; - String endpoint = uri.getScheme() + "://" + uri.getHost() + ":" + uri.getPort(); - - String listUrl = endpoint + "/" + bucket + "?list-type=2&prefix=" + prefix; - HttpRequest req = HttpRequest.newBuilder(URI.create(listUrl)).GET().build(); - HttpResponse resp = client.send(req, HttpResponse.BodyHandlers.ofString()); - if (resp.statusCode() != 200) - throw new RuntimeException("MinIO listObjects error: " + resp.statusCode()); - - JSONObject xml = org.json.XML.toJSONObject(resp.body()); - List files = new ArrayList<>(); - var result = xml.getJSONObject("ListBucketResult"); - var contents = result.optJSONArray("Contents"); - if (contents != null) { - for (int i = 0; i < contents.length(); i++) { - String key = contents.getJSONObject(i).getString("Key"); - if (key.endsWith(".parquet")) - files.add(endpoint + "/" + bucket + "/" + key); - } - } else { - var single = result.optJSONObject("Contents"); - if (single != null) - files.add(endpoint + "/" + bucket + "/" + single.getString("Key")); - } - return files; + private static final HttpClient client = HttpClient.newHttpClient(); + + public static List listHTTPWildcard(String urlWithStar) throws Exception { + URI uri = URI.create(urlWithStar); + String[] parts = uri.getPath().split("/", 3); + if (parts.length < 2) throw new IllegalArgumentException("Bad MinIO URL: " + urlWithStar); + + String bucket = parts[1]; + String prefix = parts.length == 3 ? parts[2].replaceAll("\\*", "") : ""; + String endpoint = uri.getScheme() + "://" + uri.getHost() + ":" + uri.getPort(); + + String listUrl = endpoint + "/" + bucket + "?list-type=2&prefix=" + prefix; + HttpRequest req = HttpRequest.newBuilder(URI.create(listUrl)).GET().build(); + HttpResponse resp = client.send(req, HttpResponse.BodyHandlers.ofString()); + if (resp.statusCode() != 200) + throw new RuntimeException("MinIO listObjects error: " + resp.statusCode()); + + JSONObject xml = org.json.XML.toJSONObject(resp.body()); + List files = new ArrayList<>(); + var result = xml.getJSONObject("ListBucketResult"); + var contents = result.optJSONArray("Contents"); + if (contents != null) { + for (int i = 0; i < contents.length(); i++) { + String key = contents.getJSONObject(i).getString("Key"); + if (key.endsWith(".parquet")) files.add(endpoint + "/" + bucket + "/" + key); + } + } else { + var single = result.optJSONObject("Contents"); + if (single != null) files.add(endpoint + "/" + bucket + "/" + single.getString("Key")); } + return files; + } } From 98afc496549caf5d52c5531e90509f5448f7388d Mon Sep 17 00:00:00 2001 From: ruslansenchukov Date: Mon, 27 Oct 2025 19:52:18 +0400 Subject: [PATCH 3/3] Added support for HTTP/MinIO wildcard (*) paths when creating tables --- .../altinity/ice/cli/internal/cmd/CreateTable.java | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/ice/src/main/java/com/altinity/ice/cli/internal/cmd/CreateTable.java b/ice/src/main/java/com/altinity/ice/cli/internal/cmd/CreateTable.java index 422ced3..499933f 100644 --- a/ice/src/main/java/com/altinity/ice/cli/internal/cmd/CreateTable.java +++ b/ice/src/main/java/com/altinity/ice/cli/internal/cmd/CreateTable.java @@ -81,6 +81,19 @@ public static void run( } schemaFile = files.getFirst(); } + if ((schemaFile.startsWith("http://") || schemaFile.startsWith("https://")) + && schemaFile.contains("*")) { + try { + var files = com.altinity.ice.cli.internal.http.MinioWildcard.listHTTPWildcard(schemaFile); + if (files.isEmpty()) { + throw new BadRequestException( + String.format("No files matching \"%s\" found", schemaFile)); + } + schemaFile = files.get(0); + } catch (Exception e) { + throw new RuntimeException("Failed to expand HTTP wildcard: " + schemaFile, e); + } + } try (var inputIO = Input.newIO(schemaFile, null, s3ClientLazy)) { InputFile inputFile = Input.newFile(schemaFile, catalog, inputIO);