Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions language-grammar/src/main/antlr4/OpenSearchSQLParser.g4
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,12 @@ scalarFunctionName
| flowControlFunctionName
| systemFunctionName
| nestedFunctionName
| bucketFunctionName
;

bucketFunctionName
: HISTOGRAM
| DATE_HISTOGRAM
;

specificFunction
Expand Down
6 changes: 6 additions & 0 deletions sql/src/main/antlr/OpenSearchSQLParser.g4
Original file line number Diff line number Diff line change
Expand Up @@ -443,6 +443,12 @@ scalarFunctionName
| flowControlFunctionName
| systemFunctionName
| nestedFunctionName
| bucketFunctionName
;

bucketFunctionName
: HISTOGRAM
| DATE_HISTOGRAM
;

specificFunction
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,8 @@
import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.OrExpressionContext;
import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.TableNameContext;
import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParserBaseVisitor;
import org.opensearch.sql.sql.parser.bucket.BucketFunctionExpander;
import org.opensearch.sql.sql.parser.bucket.BucketFunctionRegistry;

/** Expression builder to parse text to expression in AST. */
public class AstExpressionBuilder extends OpenSearchSQLParserBaseVisitor<UnresolvedExpression> {
Expand Down Expand Up @@ -139,7 +141,18 @@ public UnresolvedExpression visitNestedAllFunctionCall(NestedAllFunctionCallCont

@Override
public UnresolvedExpression visitScalarFunctionCall(ScalarFunctionCallContext ctx) {
return buildFunction(ctx.scalarFunctionName().getText(), ctx.functionArgs().functionArg());
String functionName = ctx.scalarFunctionName().getText();
List<UnresolvedExpression> args =
ctx.functionArgs().functionArg().stream()
.map(this::visitFunctionArg)
.collect(Collectors.toList());

Optional<BucketFunctionExpander> bucketExpander = BucketFunctionRegistry.lookup(functionName);
if (bucketExpander.isPresent()) {
return bucketExpander.get().expand(args);
}

return new Function(functionName, args);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.sql.parser.bucket;

import java.util.List;
import org.opensearch.sql.ast.expression.UnresolvedExpression;

/**
* Parse-time expander for a bucket function call. Each implementation lowers calls to one bucket
* function (e.g. {@code histogram}) into standard SQL constructs the rest of the engine already
* understands.
*
* <p>Implementations are stateless and registered by name in {@link BucketFunctionRegistry}.
*/
public interface BucketFunctionExpander {

/** Lowers a bucket function call into its bucket-key expression. */
UnresolvedExpression expand(List<UnresolvedExpression> args);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.sql.parser.bucket;

import java.util.Locale;
import java.util.Map;
import java.util.Optional;

/** Lookup table mapping bucket-function names to their {@link BucketFunctionExpander}. */
public final class BucketFunctionRegistry {

private static final Map<String, BucketFunctionExpander> EXPANDERS =
Map.of(
HistogramExpander.FUNCTION_NAME, new HistogramExpander(),
DateHistogramExpander.FUNCTION_NAME, new DateHistogramExpander());

private BucketFunctionRegistry() {}

/**
* Returns the expander for {@code functionName} (case-insensitive), or empty if not a bucket
* function.
*/
public static Optional<BucketFunctionExpander> lookup(String functionName) {
if (functionName == null) {
return Optional.empty();
}
return Optional.ofNullable(EXPANDERS.get(functionName.toUpperCase(Locale.ROOT)));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.sql.parser.bucket;

import java.util.List;
import org.opensearch.sql.ast.dsl.AstDSL;
import org.opensearch.sql.ast.expression.DataType;
import org.opensearch.sql.ast.expression.Function;
import org.opensearch.sql.ast.expression.Literal;
import org.opensearch.sql.ast.expression.QualifiedName;
import org.opensearch.sql.ast.expression.UnresolvedExpression;

/**
* Shared parameter helpers for bucket-function expanders. Operates on values pulled from a {@link
* NamedArguments} or from a positional argument list.
*/
final class BucketFunctionUtils {

private BucketFunctionUtils() {}

/**
* Named-argument form accepts string-literal field names ({@code 'field'='age'}). Coerce them to
* {@link QualifiedName} so downstream sees a column reference regardless of how the user spelled
* it.
*/
static UnresolvedExpression normalizeFieldRef(UnresolvedExpression expr) {
if (expr instanceof Literal lit && lit.getType() == DataType.STRING) {
return AstDSL.qualifiedName(lit.getValue().toString());
}
return expr;
}

/** If {@code missingOrNull} is non-null, wrap field with {@code COALESCE(field, missing)}. */
static UnresolvedExpression applyMissing(
UnresolvedExpression field, UnresolvedExpression missingOrNull) {
if (missingOrNull == null) {
return field;
}
return new Function("coalesce", List.of(field, missingOrNull));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.sql.parser.bucket;

import static org.opensearch.sql.sql.parser.bucket.BucketFunctionUtils.applyMissing;
import static org.opensearch.sql.sql.parser.bucket.BucketFunctionUtils.normalizeFieldRef;

import java.time.ZoneOffset;
import java.util.List;
import java.util.Objects;
import java.util.stream.Stream;
import org.opensearch.sql.ast.dsl.AstDSL;
import org.opensearch.sql.ast.expression.Function;
import org.opensearch.sql.ast.expression.Literal;
import org.opensearch.sql.ast.expression.Span;
import org.opensearch.sql.ast.expression.UnresolvedExpression;
import org.opensearch.sql.exception.SemanticCheckException;

/**
* Lowers {@code date_histogram(...)} calls to a {@link Span} expression with the time unit inferred
* from the interval string. Optional parameters wrap the bucket key:
*
* <ul>
* <li>{@code missing} — wraps the field with {@code COALESCE(field, missing)} before bucketing.
* <li>{@code time_zone} — shifts the field with {@code TIMESTAMPADD(SECOND, offset, field)}
* before bucketing. Validated as a {@link java.time.ZoneOffset} at parse time.
* <li>{@code format} — wraps the bucket with {@code DATE_FORMAT(span, format)}.
* </ul>
*
* <p>{@code interval}, {@code fixed_interval}, and {@code calendar_interval} are accepted as
* mutually-exclusive syntactic synonyms; this lowering does not preserve the calendar-vs-fixed
* distinction across them.
*
* <p>TODO: V1 also accepts the following parameters; they are currently rejected:
*
* <ul>
* <li>{@code min_doc_count} — would lower to {@code HAVING COUNT(*) >= N}. Needs parser-side
* plumbing to inject a HAVING clause from inside a scalar function call.
* <li>{@code order} — would lower to {@code ORDER BY}. Same plumbing requirement as above.
* <li>{@code alias} — would set the surrounding SELECT-list alias. Needs reaching outside the
* function call to mutate the parent SELECT element.
* <li>{@code offset} — would shift bucket boundaries via {@code TIMESTAMPADD(SECOND, -offset,
* field)} before bucketing and {@code TIMESTAMPADD(SECOND, offset, span)} after. Needs a
* duration-string parser ({@code '1h'}, {@code '2d'}, etc.) distinct from {@code time_zone}'s
* {@code ZoneOffset} format.
* </ul>
*/
final class DateHistogramExpander implements BucketFunctionExpander {

static final String FUNCTION_NAME = "DATE_HISTOGRAM";

@Override
public UnresolvedExpression expand(List<UnresolvedExpression> args) {
if (!NamedArguments.isNamedArguments(args)) {
throw new SemanticCheckException(
"date_histogram requires named arguments: date_histogram('field'=<column>,"
+ " 'interval'=<duration>)");
}
NamedArguments named = NamedArguments.parse(args);
UnresolvedExpression field = named.require("field", FUNCTION_NAME);
Literal intervalLiteral = extractIntervalLiteral(named);
Literal formatLiteral = named.requireStringIfPresent("format");
Literal timeZoneLiteral = named.requireStringIfPresent("time_zone");
UnresolvedExpression missing = named.remove("missing");
named.rejectRemaining(FUNCTION_NAME);
return buildBucket(field, intervalLiteral, formatLiteral, timeZoneLiteral, missing);
}

/**
* Pulls the interval from the named arguments accepting any of {@code interval}, {@code
* fixed_interval}, {@code calendar_interval}. Exactly one must be present.
*/
private static Literal extractIntervalLiteral(NamedArguments named) {
Literal interval = named.requireStringIfPresent("interval");
Literal fixedInterval = named.requireStringIfPresent("fixed_interval");
Literal calendarInterval = named.requireStringIfPresent("calendar_interval");

List<Literal> suppliedIntervals =
Stream.of(interval, fixedInterval, calendarInterval).filter(Objects::nonNull).toList();

if (suppliedIntervals.isEmpty()) {
throw new SemanticCheckException(
"date_histogram requires one of: interval, fixed_interval, calendar_interval");
}
if (suppliedIntervals.size() > 1) {
throw new SemanticCheckException(
"date_histogram accepts only one of: interval, fixed_interval, calendar_interval");
}
return suppliedIntervals.get(0);
}

private static UnresolvedExpression buildBucket(
UnresolvedExpression field,
Literal intervalLiteral,
Literal formatLiteral,
Literal timeZoneLiteral,
UnresolvedExpression missingOrNull) {
UnresolvedExpression resolvedField = applyMissing(normalizeFieldRef(field), missingOrNull);
UnresolvedExpression shiftedField =
timeZoneLiteral != null
? applyTimeZoneShift(resolvedField, timeZoneLiteral)
: resolvedField;
Span span = AstDSL.spanFromSpanLengthLiteral(shiftedField, intervalLiteral);
if (formatLiteral == null) {
return span;
}
return new Function("date_format", List.of(span, formatLiteral));
}

/**
* Wraps the field with a {@code TIMESTAMPADD(SECOND, offset, field)} shift derived from a
* timezone literal. Validates the literal at parse time as a {@link ZoneOffset} (e.g. {@code
* '+05:30'}, {@code 'Z'}); runtime arithmetic is plain second addition.
*/
private static UnresolvedExpression applyTimeZoneShift(
UnresolvedExpression field, Literal timeZoneLiteral) {
String tzString = timeZoneLiteral.getValue().toString();
int offsetSeconds;
try {
offsetSeconds = ZoneOffset.of(tzString).getTotalSeconds();
} catch (RuntimeException ex) {
throw new SemanticCheckException(
"time_zone must be a valid offset like '+05:30' or 'Z'; got '" + tzString + "'");
}
return new Function(
"timestampadd",
List.of(AstDSL.stringLiteral("SECOND"), AstDSL.intLiteral(offsetSeconds), field));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.sql.parser.bucket;

import static org.opensearch.sql.sql.parser.bucket.BucketFunctionUtils.applyMissing;
import static org.opensearch.sql.sql.parser.bucket.BucketFunctionUtils.normalizeFieldRef;

import java.util.List;
import org.opensearch.sql.ast.dsl.AstDSL;
import org.opensearch.sql.ast.expression.Function;
import org.opensearch.sql.ast.expression.Span;
import org.opensearch.sql.ast.expression.SpanUnit;
import org.opensearch.sql.ast.expression.UnresolvedExpression;
import org.opensearch.sql.exception.SemanticCheckException;

/**
* Lowers {@code histogram(...)} calls to a {@link Span} expression with {@code SpanUnit.NONE}.
* Optional parameters wrap the bucket key:
*
* <ul>
* <li>{@code missing} — wraps the field with {@code COALESCE(field, missing)} before bucketing.
* <li>{@code offset} — wraps as {@code +(Span(-(field, offset), interval, NONE), offset)} to
* preserve the standard {@code [k*interval+offset, (k+1)*interval+offset)} boundaries.
* </ul>
*
* <p>TODO: V1 also accepts the following parameters; they are currently rejected:
*
* <ul>
* <li>{@code min_doc_count} — would lower to {@code HAVING COUNT(*) >= N}. Needs parser-side
* plumbing to inject a HAVING clause from inside a scalar function call.
* <li>{@code order} — would lower to {@code ORDER BY}. Same plumbing requirement as above.
* <li>{@code alias} — would set the surrounding SELECT-list alias. Needs reaching outside the
* function call to mutate the parent SELECT element.
* </ul>
*/
final class HistogramExpander implements BucketFunctionExpander {

static final String FUNCTION_NAME = "HISTOGRAM";

@Override
public UnresolvedExpression expand(List<UnresolvedExpression> args) {
if (!NamedArguments.isNamedArguments(args)) {
throw new SemanticCheckException(
"histogram requires named arguments: histogram('field'=<column>, 'interval'=<n>)");
}
NamedArguments named = NamedArguments.parse(args);
UnresolvedExpression field = named.require("field", FUNCTION_NAME);
UnresolvedExpression interval = named.require("interval", FUNCTION_NAME);
UnresolvedExpression offset = named.remove("offset");
UnresolvedExpression missing = named.remove("missing");
named.rejectRemaining(FUNCTION_NAME);
return buildBucket(field, interval, offset, missing);
}

private static UnresolvedExpression buildBucket(
UnresolvedExpression field,
UnresolvedExpression interval,
UnresolvedExpression offsetOrNull,
UnresolvedExpression missingOrNull) {
UnresolvedExpression resolvedField = applyMissing(normalizeFieldRef(field), missingOrNull);
if (offsetOrNull == null) {
return AstDSL.span(resolvedField, interval, SpanUnit.NONE);
}
UnresolvedExpression shifted = new Function("-", List.of(resolvedField, offsetOrNull));
Span bucket = (Span) AstDSL.span(shifted, interval, SpanUnit.NONE);
return new Function("+", List.of(bucket, offsetOrNull));
}
}
Loading
Loading