diff --git a/LogicalTypes.md b/LogicalTypes.md index 644ffebc..ec32fa69 100644 --- a/LogicalTypes.md +++ b/LogicalTypes.md @@ -258,13 +258,21 @@ The sort order for `FLOAT16` is signed (with special handling of NANs and signed ### FIXED_SIZE_LIST The `FIXED_SIZE_LIST` annotation represents a fixed-size list of elements -of a primitive data type. It must annotate a `binary` primitive type. +of a primitive data type. It must annotate a `FIXED_LEN_BYTE_ARRAY` primitive type. -The `binary` data is interpreted as a sequence of elements of -the same primitive data type. +The `FIXED_LEN_BYTE_ARRAY` data is interpreted as a fixed size sequence of +elements of the same primitive data type. The sort order used for `FIXED_SIZE_LIST` is undefined. +### VARIABLE_SIZE_LIST + +The `VARIABLE_SIZE_LIST` annotation represents a variable-size list of elements +of a primitive data type. It must annotate a `BYTE_ARRAY` primitive type. + +The `BYTE_ARRAY` data is interpreted as a variable size sequence of elements of +the same primitive data type. + ## Temporal Types ### DATE diff --git a/src/main/thrift/parquet.thrift b/src/main/thrift/parquet.thrift index ebae4526..62867eaa 100644 --- a/src/main/thrift/parquet.thrift +++ b/src/main/thrift/parquet.thrift @@ -282,14 +282,20 @@ struct Statistics { } /** Empty structs to use as logical type annotations */ -struct StringType {} // allowed for BINARY, must be encoded with UTF-8 -struct UUIDType {} // allowed for FIXED[16], must encoded raw UUID bytes -struct MapType {} // see LogicalTypes.md -struct ListType {} // see LogicalTypes.md -struct EnumType {} // allowed for BINARY, must be encoded with UTF-8 -struct DateType {} // allowed for INT32 -struct Float16Type {} // allowed for FIXED[2], must encoded raw FLOAT16 bytes -struct FixedSizeListType {} // see LogicalTypes.md +struct StringType {} // allowed for BINARY, must be encoded with UTF-8 +struct UUIDType {} // allowed for FIXED[16], must encoded raw UUID bytes +struct MapType {} // see LogicalTypes.md +struct ListType {} // see LogicalTypes.md +struct EnumType {} // allowed for BINARY, must be encoded with UTF-8 +struct DateType {} // allowed for INT32 +struct Float16Type {} // allowed for FIXED[2], must encoded raw FLOAT16 bytes +struct FixedSizeListType { // allowed for FIXED_LEN_BYTE_ARRAY[num_values * width of type], + 1: required Type type; // see LogicalTypes.md + 2: required i32 num_values; +} +struct VariableSizeListType { // allowed for BYTE_ARRAY, see LogicalTypes.md + 1: required Type type; +} /** * Logical type to annotate a column that is always null. @@ -398,14 +404,15 @@ union LogicalType { 8: TimestampType TIMESTAMP // 9: reserved for INTERVAL - 10: IntType INTEGER // use ConvertedType INT_* or UINT_* - 11: NullType UNKNOWN // no compatible ConvertedType - 12: JsonType JSON // use ConvertedType JSON - 13: BsonType BSON // use ConvertedType BSON - 14: UUIDType UUID // no compatible ConvertedType - 15: Float16Type FLOAT16 // no compatible ConvertedType + 10: IntType INTEGER // use ConvertedType INT_* or UINT_* + 11: NullType UNKNOWN // no compatible ConvertedType + 12: JsonType JSON // use ConvertedType JSON + 13: BsonType BSON // use ConvertedType BSON + 14: UUIDType UUID // no compatible ConvertedType + 15: Float16Type FLOAT16 // no compatible ConvertedType // 16: reserved for GEOMETRY - 17: FixedSizeListType FIXED_SIZE_LIST // no compatible ConvertedType + 17: FixedSizeListType FIXED_SIZE_LIST // no compatible ConvertedType + 18: VariableSizeListType VARIABLE_SIZE_LIST // no compatible ConvertedType } /**