From 8c6a944dac3d37bf90ce54a43ad1cbea9e4d913e Mon Sep 17 00:00:00 2001 From: Radhakrishnan Pachyappan Date: Wed, 3 Jun 2026 02:23:44 +0530 Subject: [PATCH 1/6] Fix PPL CalciteException for non-ASCII string literals (e.g. Chinese) visitLiteral() built VARCHAR/CHAR types using typeFactory.createSqlType(SqlTypeName.VARCHAR) without specifying a charset. Calcite defaults to ISO-8859-1, which cannot encode non-Latin characters, causing a CalciteException at query time. Fix: explicitly create the type with UTF-8 charset and IMPLICIT collation via typeFactory.createTypeWithCharsetAndCollation() for both the CHAR(1) and VARCHAR branches of the STRING literal case. This is a regression introduced in 3.6.0 when the PPL/Calcite integration was added. SQL queries were unaffected because the SQL path uses a different literal-building flow. Fixes opensearch-project/OpenSearch#21880 Signed-off-by: Radhakrishnan Pachyappan --- .../sql/calcite/CalciteRexNodeVisitor.java | 17 +++++++- .../calcite/CalciteRexNodeVisitorTest.java | 42 +++++++++++++++++++ 2 files changed, 57 insertions(+), 2 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java index 830b40d2551..0a9da0ed55c 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java @@ -12,6 +12,7 @@ import static org.opensearch.sql.calcite.utils.OpenSearchTypeFactory.TYPE_FACTORY; import java.math.BigDecimal; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; @@ -31,6 +32,7 @@ import org.apache.calcite.rex.RexLambdaRef; import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexNode; +import org.apache.calcite.sql.SqlCollation; import org.apache.calcite.sql.SqlIntervalQualifier; import org.apache.calcite.sql.SqlOperator; import org.apache.calcite.sql.fun.SqlStdOperatorTable; @@ -131,17 +133,28 @@ public RexNode visitLiteral(Literal node, CalcitePlanContext context) { case NULL: return rexBuilder.makeNullLiteral(typeFactory.createSqlType(SqlTypeName.NULL)); case STRING: + // Use UTF-8 explicitly so non-ASCII characters (e.g. Chinese, Arabic) are not rejected + // by Calcite's NlsString, which otherwise defaults to ISO-8859-1. if (value.toString().length() == 1) { // To align Spark/PostgreSQL, Char(1) is useful, such as cast('1' to boolean) should // return true return rexBuilder.makeLiteral( - value.toString(), typeFactory.createSqlType(SqlTypeName.CHAR)); + value.toString(), + typeFactory.createTypeWithCharsetAndCollation( + typeFactory.createSqlType(SqlTypeName.CHAR), + StandardCharsets.UTF_8, + SqlCollation.IMPLICIT)); } else { // Specific the type to VARCHAR and allowCast to true, or the STRING will be optimized to // CHAR(n) // which leads to incorrect return type in deriveReturnType of some functions/operators return rexBuilder.makeLiteral( - value.toString(), typeFactory.createSqlType(SqlTypeName.VARCHAR), true); + value.toString(), + typeFactory.createTypeWithCharsetAndCollation( + typeFactory.createSqlType(SqlTypeName.VARCHAR), + StandardCharsets.UTF_8, + SqlCollation.IMPLICIT), + true); } case INTEGER: return rexBuilder.makeExactLiteral(new BigDecimal((Integer) value)); diff --git a/core/src/test/java/org/opensearch/sql/calcite/CalciteRexNodeVisitorTest.java b/core/src/test/java/org/opensearch/sql/calcite/CalciteRexNodeVisitorTest.java index 9be542f2086..fa77d962e42 100644 --- a/core/src/test/java/org/opensearch/sql/calcite/CalciteRexNodeVisitorTest.java +++ b/core/src/test/java/org/opensearch/sql/calcite/CalciteRexNodeVisitorTest.java @@ -13,9 +13,13 @@ import java.sql.Connection; import java.util.List; import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexNode; import org.apache.calcite.sql.type.ArraySqlType; import org.apache.calcite.sql.type.SqlTypeName; +import org.opensearch.sql.ast.expression.DataType; +import org.opensearch.sql.ast.expression.Literal; import org.apache.calcite.tools.FrameworkConfig; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; @@ -76,6 +80,44 @@ public void tearDown() { mockedStatic.close(); } + @Test + public void testVisitLiteralNonAsciiStringDoesNotThrow() { + // Regression test for https://github.com/opensearch-project/OpenSearch/issues/21880 + // Chinese (and other non-Latin) string literals must not throw CalciteException when + // visitLiteral builds them via RexBuilder.makeLiteral. The fix is to use UTF-8 explicitly. + CalciteRexNodeVisitor realVisitor = new CalciteRexNodeVisitor(relNodeVisitor); + RexBuilder realRexBuilder = new ExtendedRexBuilder(new RexBuilder(TYPE_FACTORY)); + CalcitePlanContext realContext = + CalcitePlanContext.create(frameworkConfig, SysLimit.DEFAULT, QueryType.PPL); + + // Simulate visitLiteral with the real RexBuilder by calling it directly + Literal chineseLiteral = new Literal("未处置", DataType.STRING); + Literal arabicLiteral = new Literal("مرحبا", DataType.STRING); + Literal singleCharLiteral = new Literal("中", DataType.STRING); + + // None of these should throw CalciteException + assertDoesNotThrow( + () -> { + RexNode node = realVisitor.visitLiteral(chineseLiteral, realContext); + assertNotNull(node); + assertInstanceOf(RexLiteral.class, node); + assertEquals(SqlTypeName.VARCHAR, node.getType().getSqlTypeName()); + }); + assertDoesNotThrow( + () -> { + RexNode node = realVisitor.visitLiteral(arabicLiteral, realContext); + assertNotNull(node); + assertInstanceOf(RexLiteral.class, node); + }); + assertDoesNotThrow( + () -> { + RexNode node = realVisitor.visitLiteral(singleCharLiteral, realContext); + assertNotNull(node); + assertInstanceOf(RexLiteral.class, node); + assertEquals(SqlTypeName.CHAR, node.getType().getSqlTypeName()); + }); + } + @Test public void testPrepareLambdaForBasicLambda() { when(componentType.getSqlTypeName()).thenReturn(SqlTypeName.DOUBLE); From 2afb328cec5a8d31179e6614087956edf845e50d Mon Sep 17 00:00:00 2001 From: Radhakrishnan Pachyappan Date: Wed, 3 Jun 2026 02:30:05 +0530 Subject: [PATCH 2/6] Address review feedback on non-ASCII literal regression test - Remove unused realRexBuilder variable (context.rexBuilder is already a real ExtendedRexBuilder backed by TYPE_FACTORY via the constructor) - Add charset assertions to verify resulting RelDataType carries UTF-8, so future accidental charset drops are caught - Remove unused RexBuilder import Signed-off-by: Radhakrishnan Pachyappan --- .../calcite/CalciteRexNodeVisitorTest.java | 51 ++++++++++--------- 1 file changed, 26 insertions(+), 25 deletions(-) diff --git a/core/src/test/java/org/opensearch/sql/calcite/CalciteRexNodeVisitorTest.java b/core/src/test/java/org/opensearch/sql/calcite/CalciteRexNodeVisitorTest.java index fa77d962e42..558c3b8d9d3 100644 --- a/core/src/test/java/org/opensearch/sql/calcite/CalciteRexNodeVisitorTest.java +++ b/core/src/test/java/org/opensearch/sql/calcite/CalciteRexNodeVisitorTest.java @@ -13,7 +13,6 @@ import java.sql.Connection; import java.util.List; import org.apache.calcite.rel.type.RelDataType; -import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexNode; import org.apache.calcite.sql.type.ArraySqlType; @@ -84,38 +83,40 @@ public void tearDown() { public void testVisitLiteralNonAsciiStringDoesNotThrow() { // Regression test for https://github.com/opensearch-project/OpenSearch/issues/21880 // Chinese (and other non-Latin) string literals must not throw CalciteException when - // visitLiteral builds them via RexBuilder.makeLiteral. The fix is to use UTF-8 explicitly. + // visitLiteral builds them via RexBuilder.makeLiteral. + // context.rexBuilder is a real ExtendedRexBuilder backed by TYPE_FACTORY (the mock + // only supplies TYPE_FACTORY through getTypeFactory()), so this exercises the real + // Calcite NlsString / makeLiteral code path. CalciteRexNodeVisitor realVisitor = new CalciteRexNodeVisitor(relNodeVisitor); - RexBuilder realRexBuilder = new ExtendedRexBuilder(new RexBuilder(TYPE_FACTORY)); CalcitePlanContext realContext = CalcitePlanContext.create(frameworkConfig, SysLimit.DEFAULT, QueryType.PPL); - // Simulate visitLiteral with the real RexBuilder by calling it directly Literal chineseLiteral = new Literal("未处置", DataType.STRING); Literal arabicLiteral = new Literal("مرحبا", DataType.STRING); Literal singleCharLiteral = new Literal("中", DataType.STRING); - // None of these should throw CalciteException - assertDoesNotThrow( - () -> { - RexNode node = realVisitor.visitLiteral(chineseLiteral, realContext); - assertNotNull(node); - assertInstanceOf(RexLiteral.class, node); - assertEquals(SqlTypeName.VARCHAR, node.getType().getSqlTypeName()); - }); - assertDoesNotThrow( - () -> { - RexNode node = realVisitor.visitLiteral(arabicLiteral, realContext); - assertNotNull(node); - assertInstanceOf(RexLiteral.class, node); - }); - assertDoesNotThrow( - () -> { - RexNode node = realVisitor.visitLiteral(singleCharLiteral, realContext); - assertNotNull(node); - assertInstanceOf(RexLiteral.class, node); - assertEquals(SqlTypeName.CHAR, node.getType().getSqlTypeName()); - }); + // VARCHAR multi-char: must not throw and must carry UTF-8 charset + RexNode chineseNode = realVisitor.visitLiteral(chineseLiteral, realContext); + assertNotNull(chineseNode); + assertInstanceOf(RexLiteral.class, chineseNode); + assertEquals(SqlTypeName.VARCHAR, chineseNode.getType().getSqlTypeName()); + assertEquals( + java.nio.charset.StandardCharsets.UTF_8, chineseNode.getType().getCharset()); + + RexNode arabicNode = realVisitor.visitLiteral(arabicLiteral, realContext); + assertNotNull(arabicNode); + assertInstanceOf(RexLiteral.class, arabicNode); + assertEquals(SqlTypeName.VARCHAR, arabicNode.getType().getSqlTypeName()); + assertEquals( + java.nio.charset.StandardCharsets.UTF_8, arabicNode.getType().getCharset()); + + // CHAR(1): single non-ASCII character must also carry UTF-8 charset + RexNode singleCharNode = realVisitor.visitLiteral(singleCharLiteral, realContext); + assertNotNull(singleCharNode); + assertInstanceOf(RexLiteral.class, singleCharNode); + assertEquals(SqlTypeName.CHAR, singleCharNode.getType().getSqlTypeName()); + assertEquals( + java.nio.charset.StandardCharsets.UTF_8, singleCharNode.getType().getCharset()); } @Test From 4c7bf0385e91cd930c48cb5f61996aedf7cce932 Mon Sep 17 00:00:00 2001 From: Radhakrishnan Pachyappan Date: Mon, 8 Jun 2026 12:11:46 +0530 Subject: [PATCH 3/6] Fix string concat incompatibility from UTF-8 charset on literals The previous fix added UTF-8 charset only to string literals in visitLiteral(), leaving column VARCHAR types with no charset. Calcite then rejected string concatenation (e.g. 'Hello ' + firstname) with: VARCHAR CHARACTER SET "UTF-8" NOT NULL is not comparable to VARCHAR Fix: move the UTF-8 + IMPLICIT collation enforcement into OpenSearchTypeFactory.createSqlType() for VARCHAR/CHAR so both column types and literal types carry the same charset consistently. visitLiteral() reverts to plain createSqlType() calls since the factory now handles encoding globally. Signed-off-by: Radhakrishnan Pachyappan --- .../sql/calcite/CalciteRexNodeVisitor.java | 20 +++++-------------- .../calcite/utils/OpenSearchTypeFactory.java | 16 ++++++++++++++- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java index 0a9da0ed55c..e0c6604f6f7 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java @@ -12,7 +12,6 @@ import static org.opensearch.sql.calcite.utils.OpenSearchTypeFactory.TYPE_FACTORY; import java.math.BigDecimal; -import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; @@ -32,7 +31,6 @@ import org.apache.calcite.rex.RexLambdaRef; import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexNode; -import org.apache.calcite.sql.SqlCollation; import org.apache.calcite.sql.SqlIntervalQualifier; import org.apache.calcite.sql.SqlOperator; import org.apache.calcite.sql.fun.SqlStdOperatorTable; @@ -133,28 +131,20 @@ public RexNode visitLiteral(Literal node, CalcitePlanContext context) { case NULL: return rexBuilder.makeNullLiteral(typeFactory.createSqlType(SqlTypeName.NULL)); case STRING: - // Use UTF-8 explicitly so non-ASCII characters (e.g. Chinese, Arabic) are not rejected - // by Calcite's NlsString, which otherwise defaults to ISO-8859-1. + // UTF-8 charset is applied globally by OpenSearchTypeFactory.createSqlType() for + // VARCHAR/CHAR, so non-ASCII characters (e.g. Chinese, Arabic) are handled correctly + // and literal types stay compatible with column types for string operations. if (value.toString().length() == 1) { // To align Spark/PostgreSQL, Char(1) is useful, such as cast('1' to boolean) should // return true return rexBuilder.makeLiteral( - value.toString(), - typeFactory.createTypeWithCharsetAndCollation( - typeFactory.createSqlType(SqlTypeName.CHAR), - StandardCharsets.UTF_8, - SqlCollation.IMPLICIT)); + value.toString(), typeFactory.createSqlType(SqlTypeName.CHAR)); } else { // Specific the type to VARCHAR and allowCast to true, or the STRING will be optimized to // CHAR(n) // which leads to incorrect return type in deriveReturnType of some functions/operators return rexBuilder.makeLiteral( - value.toString(), - typeFactory.createTypeWithCharsetAndCollation( - typeFactory.createSqlType(SqlTypeName.VARCHAR), - StandardCharsets.UTF_8, - SqlCollation.IMPLICIT), - true); + value.toString(), typeFactory.createSqlType(SqlTypeName.VARCHAR), true); } case INTEGER: return rexBuilder.makeExactLiteral(new BigDecimal((Integer) value)); diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java b/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java index 9c15c1485c1..3447b9c077c 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java @@ -32,6 +32,7 @@ import java.lang.reflect.Type; import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.LinkedHashMap; import java.util.List; @@ -104,8 +105,21 @@ public RelDataType createTypeWithCharsetAndCollation( return super.createTypeWithCharsetAndCollation(type, charset, collation); } + @Override + public RelDataType createSqlType(SqlTypeName typeName) { + RelDataType type = super.createSqlType(typeName); + if (typeName == SqlTypeName.VARCHAR || typeName == SqlTypeName.CHAR) { + return createTypeWithCharsetAndCollation(type, StandardCharsets.UTF_8, SqlCollation.IMPLICIT); + } + return type; + } + public RelDataType createSqlType(SqlTypeName typeName, boolean nullable) { - return createTypeWithNullability(super.createSqlType(typeName), nullable); + RelDataType type = createTypeWithNullability(super.createSqlType(typeName), nullable); + if (typeName == SqlTypeName.VARCHAR || typeName == SqlTypeName.CHAR) { + return createTypeWithCharsetAndCollation(type, StandardCharsets.UTF_8, SqlCollation.IMPLICIT); + } + return type; } public RelDataType createStructType( From a4db157c5a13cfbe2797da56a4fb17fbe5219626 Mon Sep 17 00:00:00 2001 From: Radhakrishnan Pachyappan Date: Mon, 8 Jun 2026 14:07:15 +0530 Subject: [PATCH 4/6] Fix charset mismatch by overriding getDefaultCharset() with UTF-8 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous fix patched createSqlType() for the no-arg and boolean variants, but Calcite has many code paths for char type creation: - createSqlType(SqlTypeName, int precision) - RexBuilder.makeLiteral(String) → getDefaultCharset() - RelBuilder.literal(String) → getDefaultCharset() All of these bypassed the per-method overrides, causing residual 'VARCHAR CHARACTER SET UTF-8 is not comparable to CHAR(1)' errors in RangeFormatter and other callers (e.g. bin command). Fix: override getDefaultCharset() in OpenSearchTypeFactory to return UTF-8. This is the single source of truth Calcite uses across all char type creation paths, making every VARCHAR/CHAR consistently UTF-8 without needing per-call patches. The per-method createSqlType overrides are removed as redundant. Signed-off-by: Radhakrishnan Pachyappan --- .../calcite/utils/OpenSearchTypeFactory.java | 20 ++++++------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java b/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java index 3447b9c077c..0b557c69a08 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java @@ -96,6 +96,11 @@ public RelDataType createTypeWithNullability(RelDataType type, boolean nullable) return super.createTypeWithNullability(type, nullable); } + @Override + public Charset getDefaultCharset() { + return StandardCharsets.UTF_8; + } + @Override public RelDataType createTypeWithCharsetAndCollation( RelDataType type, Charset charset, SqlCollation collation) { @@ -105,21 +110,8 @@ public RelDataType createTypeWithCharsetAndCollation( return super.createTypeWithCharsetAndCollation(type, charset, collation); } - @Override - public RelDataType createSqlType(SqlTypeName typeName) { - RelDataType type = super.createSqlType(typeName); - if (typeName == SqlTypeName.VARCHAR || typeName == SqlTypeName.CHAR) { - return createTypeWithCharsetAndCollation(type, StandardCharsets.UTF_8, SqlCollation.IMPLICIT); - } - return type; - } - public RelDataType createSqlType(SqlTypeName typeName, boolean nullable) { - RelDataType type = createTypeWithNullability(super.createSqlType(typeName), nullable); - if (typeName == SqlTypeName.VARCHAR || typeName == SqlTypeName.CHAR) { - return createTypeWithCharsetAndCollation(type, StandardCharsets.UTF_8, SqlCollation.IMPLICIT); - } - return type; + return createTypeWithNullability(super.createSqlType(typeName), nullable); } public RelDataType createStructType( From 997cd2b49e7e3987780ea66d2ea5297669991e69 Mon Sep 17 00:00:00 2001 From: Radhakrishnan Pachyappan Date: Tue, 9 Jun 2026 13:21:36 +0530 Subject: [PATCH 5/6] Fix charset annotation in plan strings via saffron.properties The getDefaultCharset() override (introduced to fix non-ASCII PPL string literals) caused Calcite to annotate all VARCHAR/CHAR types and literals with CHARACTER SET "UTF-8" and _UTF-8 prefix in plan strings, breaking dozens of unit tests that compare logical plan representations. Root cause: Calcite suppresses the charset annotation and _charset prefix only when the charset matches CalciteSystemProperty.DEFAULT_CHARSET (which defaults to ISO-8859-1). Overriding getDefaultCharset() to UTF-8 set the charset on all types, but the suppression checks still compared against ISO-8859-1, making the annotations appear everywhere. Fix: add saffron.properties to core/src/main/resources with: calcite.default.charset=UTF-8 calcite.default.collation.name=UTF-8$en_US Calcite reads this file at CalciteSystemProperty class-load time (before any DEFAULT_CHARSET or SqlCollation.IMPLICIT static field is initialized), shifting the entire "default charset" universe to UTF-8. Both suppression checks now compare against UTF-8, so plan strings are identical to before while non-ASCII string literals continue to work correctly. The now-redundant getDefaultCharset() override is removed; the inherited SqlTypeFactoryImpl path already returns UTF-8 via Util.getDefaultCharset() which reads from CalciteSystemProperty.DEFAULT_CHARSET = "UTF-8". Signed-off-by: Radhakrishnan Pachyappan --- .../org/opensearch/sql/calcite/CalciteRexNodeVisitor.java | 5 ++--- .../sql/calcite/utils/OpenSearchTypeFactory.java | 6 ------ core/src/main/resources/saffron.properties | 7 +++++++ 3 files changed, 9 insertions(+), 9 deletions(-) create mode 100644 core/src/main/resources/saffron.properties diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java index e0c6604f6f7..59a7c171dc1 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java @@ -131,9 +131,8 @@ public RexNode visitLiteral(Literal node, CalcitePlanContext context) { case NULL: return rexBuilder.makeNullLiteral(typeFactory.createSqlType(SqlTypeName.NULL)); case STRING: - // UTF-8 charset is applied globally by OpenSearchTypeFactory.createSqlType() for - // VARCHAR/CHAR, so non-ASCII characters (e.g. Chinese, Arabic) are handled correctly - // and literal types stay compatible with column types for string operations. + // saffron.properties sets calcite.default.charset=UTF-8 so non-ASCII characters + // (e.g. Chinese, Arabic) are accepted and literal types stay compatible with column types. if (value.toString().length() == 1) { // To align Spark/PostgreSQL, Char(1) is useful, such as cast('1' to boolean) should // return true diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java b/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java index 0b557c69a08..9c15c1485c1 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java @@ -32,7 +32,6 @@ import java.lang.reflect.Type; import java.nio.charset.Charset; -import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.LinkedHashMap; import java.util.List; @@ -96,11 +95,6 @@ public RelDataType createTypeWithNullability(RelDataType type, boolean nullable) return super.createTypeWithNullability(type, nullable); } - @Override - public Charset getDefaultCharset() { - return StandardCharsets.UTF_8; - } - @Override public RelDataType createTypeWithCharsetAndCollation( RelDataType type, Charset charset, SqlCollation collation) { diff --git a/core/src/main/resources/saffron.properties b/core/src/main/resources/saffron.properties new file mode 100644 index 00000000000..db44a8a2246 --- /dev/null +++ b/core/src/main/resources/saffron.properties @@ -0,0 +1,7 @@ +# Shift Calcite's default charset from ISO-8859-1 to UTF-8 so that: +# 1. Non-ASCII PPL string literals (Chinese, Arabic, etc.) are accepted without error. +# 2. Plan-string representations (used in unit-test assertions) are unchanged, +# because Calcite suppresses the CHARACTER SET annotation and _charset prefix +# whenever the charset matches this "default" value. +calcite.default.charset=UTF-8 +calcite.default.collation.name=UTF-8$en_US From c7521f7e590288f2bacc54359214606c1ed82549 Mon Sep 17 00:00:00 2001 From: Radhakrishnan Pachyappan Date: Sat, 13 Jun 2026 13:10:38 +0530 Subject: [PATCH 6/6] fix(calcite): set UTF-8 charset system properties at plugin startup saffron.properties is correctly bundled in core-3.7.0.0.jar but CalciteSystemProperty loads it via its own classloader. In OpenSearch 3.x, Calcite is loaded by a parent/server classloader that cannot see resources inside the plugin JAR, so the file is silently ignored and the default charset remains ISO-8859-1. Set the equivalent properties programmatically in SQLPlugin's static initializer, which runs before any Calcite class is used. System properties are JVM-wide and classloader-agnostic, producing the same effect as adding -Dcalcite.default.charset=UTF-8 to jvm.options but without requiring user configuration. The existing saffron.properties is retained as a fallback for environments where the plugin classloader does load Calcite directly. Signed-off-by: Radhakrishnan Pachyappan --- .../java/org/opensearch/sql/plugin/SQLPlugin.java | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/plugin/src/main/java/org/opensearch/sql/plugin/SQLPlugin.java b/plugin/src/main/java/org/opensearch/sql/plugin/SQLPlugin.java index 0eee03102bb..0676af35eb8 100644 --- a/plugin/src/main/java/org/opensearch/sql/plugin/SQLPlugin.java +++ b/plugin/src/main/java/org/opensearch/sql/plugin/SQLPlugin.java @@ -151,6 +151,19 @@ public class SQLPlugin extends Plugin private static final Logger LOGGER = LogManager.getLogger(SQLPlugin.class); + static { + // CalciteSystemProperty reads saffron.properties via its own classloader. In OpenSearch 3.x, + // Calcite is loaded by a parent/server classloader that cannot see resources bundled inside + // the plugin JAR, so saffron.properties is silently ignored. Setting the equivalent JVM + // system properties here (before any Calcite class is first used) is classloader-agnostic + // and produces the same effect as -Dcalcite.default.charset=UTF-8 in jvm.options, making + // non-ASCII PPL string literals (Chinese, Arabic, etc.) work without user configuration. + if (System.getProperty("calcite.default.charset") == null) { + System.setProperty("calcite.default.charset", "UTF-8"); + System.setProperty("calcite.default.collation.name", "UTF-8$en_US"); + } + } + private List executionEngineExtensions = List.of(); private ClusterService clusterService;