Skip to content

Commit

Permalink
PARQUET-2473: Clarify records can not be split across v2 pages or Pag…
Browse files Browse the repository at this point in the history
…eIndex (#244)


Co-authored-by: Ed Seidl <[email protected]>
  • Loading branch information
alamb and etseidl authored May 31, 2024
1 parent 384bedd commit 38c108c
Showing 1 changed file with 15 additions and 5 deletions.
20 changes: 15 additions & 5 deletions src/main/thrift/parquet.thrift
Original file line number Diff line number Diff line change
Expand Up @@ -578,7 +578,13 @@ enum BoundaryOrder {

/** Data page header */
struct DataPageHeader {
/** Number of values, including NULLs, in this data page. **/
/**
* Number of values, including NULLs, in this data page.
*
* If a OffsetIndex is present, a page must begin at a record
* boundary (repetition_level = 0). Otherwise, pages may begin
* within a record (repetition_level > 0).
**/
1: required i32 num_values

/** Encoding used for this data page **/
Expand Down Expand Up @@ -625,7 +631,11 @@ struct DataPageHeaderV2 {
/** Number of NULL values, in this data page.
Number of non-null = num_values - num_nulls which is also the number of values in the data section **/
2: required i32 num_nulls
/** Number of rows in this data page. which means pages change on record boundaries (r = 0) **/
/**
* Number of rows in this data page. Every page must begin at a
* record boundary (repetition_level = 0): records must **not** be
* split across page boundaries when using V2 data pages.
**/
3: required i32 num_rows
/** Encoding used for data in this page **/
4: required Encoding encoding
Expand Down Expand Up @@ -995,8 +1005,9 @@ struct PageLocation {
2: required i32 compressed_page_size

/**
* Index within the RowGroup of the first row of the page; this means pages
* change on record boundaries (r = 0).
* Index within the RowGroup of the first row of the page. When an
* OffsetIndex is present, pages must begin on record boundaries
* (repetition_level = 0).
*/
3: required i64 first_row_index
}
Expand Down Expand Up @@ -1190,4 +1201,3 @@ struct FileCryptoMetaData {
* and (possibly) columns **/
2: optional binary key_metadata
}

0 comments on commit 38c108c

Please sign in to comment.