Skip to content

Commit

Permalink
support for tuple and geometric types for clickhouse (#5587)
Browse files Browse the repository at this point in the history
* support for tuple and geometric types

* support for tuple and geometric types

* point is an array of floats
  • Loading branch information
k-anshul committed Sep 3, 2024
1 parent 58abf68 commit 8db19b5
Show file tree
Hide file tree
Showing 4 changed files with 162 additions and 2 deletions.
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ require (
github.com/mitchellh/hashstructure/v2 v2.0.2
github.com/mitchellh/mapstructure v1.5.0
github.com/orbcorp/orb-go v0.34.0
github.com/paulmach/orb v0.11.1
github.com/pingcap/tidb/pkg/parser v0.0.0-20231124053542-069631e2ecfe
github.com/prometheus/client_golang v1.19.1
github.com/redis/go-redis/v9 v9.0.2
Expand Down Expand Up @@ -322,7 +323,6 @@ require (
github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect
github.com/opencontainers/go-digest v1.0.0 // indirect
github.com/opencontainers/image-spec v1.1.0 // indirect
github.com/paulmach/orb v0.11.1 // indirect
github.com/pelletier/go-toml v1.9.5 // indirect
github.com/perimeterx/marshmallow v1.1.5 // indirect
github.com/pierrec/lz4/v4 v4.1.21 // indirect
Expand Down
152 changes: 151 additions & 1 deletion runtime/drivers/clickhouse/olap.go
Original file line number Diff line number Diff line change
Expand Up @@ -614,7 +614,6 @@ func rowsToSchema(r *sqlx.Rows) (*runtimev1.StructType, error) {

// databaseTypeToPB converts Clickhouse types to Rill's generic schema type.
// Refer the list of types here: https://clickhouse.com/docs/en/sql-reference/data-types
// NOTE: Doesn't handle aggregation function types, nested data structures, tuples, geo types, special data types.
func databaseTypeToPB(dbt string, nullable bool) (*runtimev1.Type, error) {
dbt = strings.ToUpper(dbt)

Expand Down Expand Up @@ -686,6 +685,18 @@ func databaseTypeToPB(dbt string, nullable bool) (*runtimev1.Type, error) {
t.Code = runtimev1.Type_CODE_STRING
case "OTHER":
t.Code = runtimev1.Type_CODE_JSON
case "POINT":
return databaseTypeToPB("Array(Float64)", nullable)
case "RING":
return databaseTypeToPB("Array(Point)", nullable)
case "LINESTRING":
return databaseTypeToPB("Array(Point)", nullable)
case "MULTILINESTRING":
return databaseTypeToPB("Array(LineString)", nullable)
case "POLYGON":
return databaseTypeToPB("Array(Ring)", nullable)
case "MULTIPOLYGON":
return databaseTypeToPB("Array(Polygon)", nullable)
default:
match = false
}
Expand Down Expand Up @@ -749,6 +760,39 @@ func databaseTypeToPB(dbt string, nullable bool) (*runtimev1.Type, error) {
case "ENUM", "ENUM8", "ENUM16":
// Representing enums as strings
t.Code = runtimev1.Type_CODE_STRING
case "TUPLE":
t.Code = runtimev1.Type_CODE_STRUCT
t.StructType = &runtimev1.StructType{}
fields := splitCommasUnlessQuotedOrNestedInParens(args)
if len(fields) == 0 {
return nil, errUnsupportedType
}
_, _, isNamed := splitStructFieldStr(fields[0])
for i, fieldStr := range fields {
if isNamed {
name, typ, ok := splitStructFieldStr(fieldStr)
if !ok {
return nil, errUnsupportedType
}
fieldType, err := databaseTypeToPB(typ, false)
if err != nil {
return nil, err
}
t.StructType.Fields = append(t.StructType.Fields, &runtimev1.StructType_Field{
Name: name,
Type: fieldType,
})
} else {
fieldType, err := databaseTypeToPB(fieldStr, true)
if err != nil {
return nil, err
}
t.StructType.Fields = append(t.StructType.Fields, &runtimev1.StructType_Field{
Name: fmt.Sprintf("%d", i),
Type: fieldType,
})
}
}
default:
return nil, errUnsupportedType
}
Expand All @@ -772,6 +816,112 @@ func splitBaseAndArgs(s string) (string, string, bool) {
return base, rest, true
}

// Splits a comma-separated list, but ignores commas inside strings or nested in parentheses.
// (NOTE: DuckDB escapes strings by replacing `"` with `""`. Example: hello "world" -> "hello ""world""".)
//
// Examples:
//
// `10,20` -> [`10`, `20`]
// `VARCHAR, INT` -> [`VARCHAR`, `INT`]
// `"foo "",""" INT, "bar" STRUCT("a" INT, "b" INT)` -> [`"foo "",""" INT`, `"bar" STRUCT("a" INT, "b" INT)`]
func splitCommasUnlessQuotedOrNestedInParens(s string) []string {
// Result slice
splits := []string{}
// Starting idx of current split
fromIdx := 0
// True if quote level is unmatched (this is sufficient for escaped quotes since they will immediately flip again)
quoted := false
// Nesting level
nestCount := 0

// Consume input character-by-character
for idx, char := range s {
// Toggle quoted
if char == '"' {
quoted = !quoted
continue
}
// If quoted, don't parse for nesting or commas
if quoted {
continue
}
// Increase nesting on opening paren
if char == '(' {
nestCount++
continue
}
// Decrease nesting on closing paren
if char == ')' {
nestCount--
continue
}
// If nested, don't parse for commas
if nestCount != 0 {
continue
}
// If not nested and there's a comma, add split to result
if char == ',' {
splits = append(splits, s[fromIdx:idx])
fromIdx = idx + 1
continue
}
// If not nested, and there's a space at the start of the split, skip it
if fromIdx == idx && char == ' ' {
fromIdx++
continue
}
}

// Add last split to result and return
splits = append(splits, s[fromIdx:])
return splits
}

// splitStructFieldStr splits a single struct name/type pair.
// It expects fieldStr to have the format `name TYPE` or `"name" TYPE`.
// If the name string is quoted and contains escaped quotes `""`, they'll be replaced by `"`.
// For example: splitStructFieldStr(`"hello "" world" VARCHAR`) -> (`hello " world`, `VARCHAR`, true).
func splitStructFieldStr(fieldStr string) (string, string, bool) {
// If the string DOES NOT start with a `"`, we can just split on the first space.
if fieldStr == "" || fieldStr[0] != '"' {
return strings.Cut(fieldStr, " ")
}

// Find end of quoted string (skipping `""` since they're escaped quotes)
idx := 1
found := false
for !found && idx < len(fieldStr) {
// Continue if not a quote
if fieldStr[idx] != '"' {
idx++
continue
}

// Skip two ahead if it's two quotes in a row (i.e. an escaped quote)
if len(fieldStr) > idx+1 && fieldStr[idx+1] == '"' {
idx += 2
continue
}

// It's the last quote of the string. We're done.
idx++
found = true
}

// If not found, format was unexpected
if !found {
return "", "", false
}

// Remove surrounding `"` and replace escaped quotes `""` with `"`
nameStr := strings.ReplaceAll(fieldStr[1:idx-1], `""`, `"`)

// The rest of the string is the type, minus the initial space
typeStr := strings.TrimLeft(fieldStr[idx:], " ")

return nameStr, typeStr, true
}

var errUnsupportedType = errors.New("encountered unsupported clickhouse type")

func tempName(prefix string) string {
Expand Down
3 changes: 3 additions & 0 deletions runtime/pkg/jsonval/jsonval.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (

"github.com/google/uuid"
"github.com/marcboeker/go-duckdb"
"github.com/paulmach/orb"
runtimev1 "github.com/rilldata/rill/proto/gen/rill/runtime/v1"
)

Expand Down Expand Up @@ -112,6 +113,8 @@ func ToValue(v any, t *runtimev1.Type) (any, error) {
return map[string]any{"months": v.Months, "days": v.Days, "micros": v.Micros}, nil
case net.IP:
return v.String(), nil
case orb.Point:
return []any{v[0], v[1]}, nil
case *net.IP:
if v != nil {
return ToValue(*v, t)
Expand Down
7 changes: 7 additions & 0 deletions runtime/pkg/pbutil/pbutil.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (

"github.com/google/uuid"
"github.com/marcboeker/go-duckdb"
"github.com/paulmach/orb"
runtimev1 "github.com/rilldata/rill/proto/gen/rill/runtime/v1"
"google.golang.org/protobuf/types/known/structpb"
)
Expand Down Expand Up @@ -175,6 +176,12 @@ func ToValue(v any, t *runtimev1.Type) (*structpb.Value, error) {
return structpb.NewNumberValue(*v), nil
case *net.IP:
return structpb.NewStringValue(v.String()), nil
case orb.Point:
st, err := structpb.NewList([]any{v[0], v[1]})
if err != nil {
return nil, err
}
return structpb.NewListValue(st), nil
default:
}
if t != nil && t.ArrayElementType != nil {
Expand Down

0 comments on commit 8db19b5

Please sign in to comment.