Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,8 @@ public RexNode visitLiteral(Literal node, CalcitePlanContext context) {
case NULL:
return rexBuilder.makeNullLiteral(typeFactory.createSqlType(SqlTypeName.NULL));
case STRING:
// saffron.properties sets calcite.default.charset=UTF-8 so non-ASCII characters
// (e.g. Chinese, Arabic) are accepted and literal types stay compatible with column types.
if (value.toString().length() == 1) {
// To align Spark/PostgreSQL, Char(1) is useful, such as cast('1' to boolean) should
// return true
Expand Down
7 changes: 7 additions & 0 deletions core/src/main/resources/saffron.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Shift Calcite's default charset from ISO-8859-1 to UTF-8 so that:
# 1. Non-ASCII PPL string literals (Chinese, Arabic, etc.) are accepted without error.
# 2. Plan-string representations (used in unit-test assertions) are unchanged,
# because Calcite suppresses the CHARACTER SET annotation and _charset prefix
# whenever the charset matches this "default" value.
calcite.default.charset=UTF-8
calcite.default.collation.name=UTF-8$en_US
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,12 @@
import java.sql.Connection;
import java.util.List;
import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.rex.RexLiteral;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.sql.type.ArraySqlType;
import org.apache.calcite.sql.type.SqlTypeName;
import org.opensearch.sql.ast.expression.DataType;
import org.opensearch.sql.ast.expression.Literal;
import org.apache.calcite.tools.FrameworkConfig;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
Expand Down Expand Up @@ -76,6 +79,46 @@ public void tearDown() {
mockedStatic.close();
}

@Test
public void testVisitLiteralNonAsciiStringDoesNotThrow() {
// Regression test for https://github.com/opensearch-project/OpenSearch/issues/21880
// Chinese (and other non-Latin) string literals must not throw CalciteException when
// visitLiteral builds them via RexBuilder.makeLiteral.
// context.rexBuilder is a real ExtendedRexBuilder backed by TYPE_FACTORY (the mock
// only supplies TYPE_FACTORY through getTypeFactory()), so this exercises the real
// Calcite NlsString / makeLiteral code path.
CalciteRexNodeVisitor realVisitor = new CalciteRexNodeVisitor(relNodeVisitor);
CalcitePlanContext realContext =
CalcitePlanContext.create(frameworkConfig, SysLimit.DEFAULT, QueryType.PPL);
Comment thread
gingeekrishna marked this conversation as resolved.

Literal chineseLiteral = new Literal("未处置", DataType.STRING);
Literal arabicLiteral = new Literal("مرحبا", DataType.STRING);
Literal singleCharLiteral = new Literal("中", DataType.STRING);

// VARCHAR multi-char: must not throw and must carry UTF-8 charset
RexNode chineseNode = realVisitor.visitLiteral(chineseLiteral, realContext);
assertNotNull(chineseNode);
assertInstanceOf(RexLiteral.class, chineseNode);
assertEquals(SqlTypeName.VARCHAR, chineseNode.getType().getSqlTypeName());
assertEquals(
java.nio.charset.StandardCharsets.UTF_8, chineseNode.getType().getCharset());

RexNode arabicNode = realVisitor.visitLiteral(arabicLiteral, realContext);
assertNotNull(arabicNode);
assertInstanceOf(RexLiteral.class, arabicNode);
assertEquals(SqlTypeName.VARCHAR, arabicNode.getType().getSqlTypeName());
assertEquals(
java.nio.charset.StandardCharsets.UTF_8, arabicNode.getType().getCharset());

// CHAR(1): single non-ASCII character must also carry UTF-8 charset
RexNode singleCharNode = realVisitor.visitLiteral(singleCharLiteral, realContext);
assertNotNull(singleCharNode);
assertInstanceOf(RexLiteral.class, singleCharNode);
assertEquals(SqlTypeName.CHAR, singleCharNode.getType().getSqlTypeName());
assertEquals(
java.nio.charset.StandardCharsets.UTF_8, singleCharNode.getType().getCharset());
}

@Test
public void testPrepareLambdaForBasicLambda() {
when(componentType.getSqlTypeName()).thenReturn(SqlTypeName.DOUBLE);
Expand Down
13 changes: 13 additions & 0 deletions plugin/src/main/java/org/opensearch/sql/plugin/SQLPlugin.java
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,19 @@ public class SQLPlugin extends Plugin

private static final Logger LOGGER = LogManager.getLogger(SQLPlugin.class);

static {
// CalciteSystemProperty reads saffron.properties via its own classloader. In OpenSearch 3.x,
// Calcite is loaded by a parent/server classloader that cannot see resources bundled inside
// the plugin JAR, so saffron.properties is silently ignored. Setting the equivalent JVM
// system properties here (before any Calcite class is first used) is classloader-agnostic
// and produces the same effect as -Dcalcite.default.charset=UTF-8 in jvm.options, making
// non-ASCII PPL string literals (Chinese, Arabic, etc.) work without user configuration.
if (System.getProperty("calcite.default.charset") == null) {
System.setProperty("calcite.default.charset", "UTF-8");
System.setProperty("calcite.default.collation.name", "UTF-8$en_US");
}
}

private List<ExecutionEngine> executionEngineExtensions = List.of();
private ClusterService clusterService;

Expand Down
Loading