-
Notifications
You must be signed in to change notification settings - Fork 113
feat(schema): represent, serialize and validate v3 column default values (1/4) #746
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
f663e0e
511d9c8
9690529
fe3b348
d15a0fe
180a9f9
2052e4f
54d9636
925611a
0b4d452
b252b72
36a76ca
7f49ebc
04ee5c6
0163d35
26b1f1b
462d2d9
c58050f
41c2bd1
bda2d78
38bbdb0
6fe4640
2efca23
70e029b
d022953
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -21,19 +21,39 @@ | |
|
|
||
| #include <format> | ||
| #include <string_view> | ||
| #include <utility> | ||
|
|
||
| #include "iceberg/expression/literal.h" | ||
| #include "iceberg/type.h" | ||
| #include "iceberg/util/formatter.h" // IWYU pragma: keep | ||
| #include "iceberg/util/macros.h" | ||
|
|
||
| namespace iceberg { | ||
|
|
||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This always create a new literal even when their types are the same which is unlikely?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done, remove the normalization method
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we return
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
| namespace { | ||
|
|
||
| // A null default value is modeled as the absence of a default (matching Java), so it is | ||
| // not stored. | ||
| std::shared_ptr<const Literal> DropNullDefault(std::shared_ptr<const Literal> value) { | ||
| if (value != nullptr && value->IsNull()) { | ||
| return nullptr; | ||
| } | ||
| return value; | ||
| } | ||
|
|
||
| } // namespace | ||
|
|
||
| SchemaField::SchemaField(int32_t field_id, std::string_view name, | ||
| std::shared_ptr<Type> type, bool optional, std::string_view doc) | ||
| std::shared_ptr<Type> type, bool optional, std::string_view doc, | ||
| std::shared_ptr<const Literal> initial_default, | ||
| std::shared_ptr<const Literal> write_default) | ||
| : field_id_(field_id), | ||
| name_(name), | ||
| type_(std::move(type)), | ||
| optional_(optional), | ||
| doc_(doc) {} | ||
| doc_(doc), | ||
| initial_default_(DropNullDefault(std::move(initial_default))), | ||
| write_default_(DropNullDefault(std::move(write_default))) {} | ||
|
|
||
| SchemaField SchemaField::MakeOptional(int32_t field_id, std::string_view name, | ||
| std::shared_ptr<Type> type, std::string_view doc) { | ||
|
|
@@ -55,13 +75,74 @@ bool SchemaField::optional() const { return optional_; } | |
|
|
||
| std::string_view SchemaField::doc() const { return doc_; } | ||
|
|
||
| const std::shared_ptr<const Literal>& SchemaField::initial_default() const { | ||
| return initial_default_; | ||
| } | ||
|
|
||
| const std::shared_ptr<const Literal>& SchemaField::write_default() const { | ||
| return write_default_; | ||
| } | ||
|
|
||
| namespace { | ||
|
|
||
| Status ValidateDefault(const SchemaField& field, const Literal& value, | ||
| std::string_view kind) { | ||
| // A null default is modeled as absence and dropped at construction, so it never reaches | ||
| // here; only the out-of-range cast sentinels need rejecting. | ||
| if (value.IsAboveMax() || value.IsBelowMin()) { | ||
| return InvalidSchema("Invalid {} value for {}: value is out of range", kind, | ||
| field.name()); | ||
| } | ||
| if (field.type() == nullptr) { | ||
| return InvalidSchema("Invalid {} value for {}: field has no type", kind, | ||
| field.name()); | ||
| } | ||
| // The spec requires unknown/variant/geometry/geography columns to default to null, so a | ||
| // non-null default on them is invalid (a null default was already dropped as absence). | ||
| switch (field.type()->type_id()) { | ||
| case TypeId::kUnknown: | ||
| case TypeId::kVariant: | ||
| case TypeId::kGeometry: | ||
| case TypeId::kGeography: | ||
| return InvalidSchema("Invalid {} value for {}: type {} cannot have a default value", | ||
| kind, field.name(), *field.type()); | ||
| default: | ||
| break; | ||
| } | ||
| // Defaults are otherwise only supported on primitive fields. The spec also permits JSON | ||
| // single-value defaults for struct/list/map (e.g. an empty struct `{}` whose sub-field | ||
| // defaults live in field metadata); that matches the current Java model's gap and is | ||
| // left as a follow-up. | ||
| if (!field.type()->is_primitive()) { | ||
| return InvalidSchema( | ||
| "Invalid {} value for {}: default values are only supported for primitive types", | ||
|
wgtmac marked this conversation as resolved.
|
||
| kind, field.name()); | ||
| } | ||
| // Defaults are stored verbatim (no implicit cast), so a default whose literal type does | ||
| // not match the field type is invalid. | ||
| if (*value.type() != *field.type()) { | ||
| return InvalidSchema("{} of field {} has type {} but expected {}", kind, field.name(), | ||
| *value.type(), *field.type()); | ||
| } | ||
| return {}; | ||
| } | ||
|
|
||
| } // namespace | ||
|
|
||
| Status SchemaField::Validate() const { | ||
| if (name_.empty()) [[unlikely]] { | ||
| return InvalidSchema("SchemaField cannot have empty name"); | ||
| } | ||
| if (type_ == nullptr) [[unlikely]] { | ||
| return InvalidSchema("SchemaField cannot have null type"); | ||
| } | ||
| if (initial_default_ != nullptr) { | ||
| ICEBERG_RETURN_UNEXPECTED( | ||
| ValidateDefault(*this, *initial_default_, "initial-default")); | ||
| } | ||
| if (write_default_ != nullptr) { | ||
| ICEBERG_RETURN_UNEXPECTED(ValidateDefault(*this, *write_default_, "write-default")); | ||
| } | ||
| return {}; | ||
| } | ||
|
|
||
|
|
@@ -72,9 +153,23 @@ std::string SchemaField::ToString() const { | |
| return result; | ||
| } | ||
|
|
||
| namespace { | ||
|
|
||
| bool DefaultEquals(const std::shared_ptr<const Literal>& lhs, | ||
| const std::shared_ptr<const Literal>& rhs) { | ||
| if (lhs == nullptr || rhs == nullptr) { | ||
| return lhs == rhs; | ||
| } | ||
| return *lhs == *rhs; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just a reminder that
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Or add a overload |
||
| } | ||
|
|
||
| } // namespace | ||
|
|
||
| bool SchemaField::Equals(const SchemaField& other) const { | ||
| return field_id_ == other.field_id_ && name_ == other.name_ && *type_ == *other.type_ && | ||
| optional_ == other.optional_; | ||
| optional_ == other.optional_ && | ||
| DefaultEquals(initial_default_, other.initial_default_) && | ||
| DefaultEquals(write_default_, other.write_default_); | ||
| } | ||
|
|
||
| } // namespace iceberg | ||
Uh oh!
There was an error while loading. Please reload this page.