sql

package
v1.0.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Oct 16, 2025 License: Apache-2.0 Imports: 7 Imported by: 0

Documentation

Index

Examples

Constants

This section is empty.

Variables

This section is empty.

Functions

func StandardFunctions

func StandardFunctions(dlct *d.Dialect) d.Fns

StandardFunctions returns the built-in functions for in-memory data to be used by Parser.

Types

type Col

type Col struct {
	*d.ColCore
	// contains filtered or unexported fields
}

Col implements Column for SQL.

func NewCol

func NewCol(dt d.DataTypes, dlct *d.Dialect, sqlx string, opts ...d.ColOpt) (*Col, error)

NewCol creates a new *Col from SQL

dt   - data type of the column
dlct - Dialect to use
sqlx - SQL to create the column
opts - Column options

func (*Col) AllRows

func (c *Col) AllRows() iter.Seq2[int, []any]

func (*Col) Copy

func (c *Col) Copy() d.Column

func (*Col) Core

func (c *Col) Core() *d.ColCore

func (*Col) Data

func (c *Col) Data() *d.Vector

Data runs the SQl to pull the data.

func (*Col) DataLimit

func (c *Col) DataLimit(limit int) *d.Vector

DataLimit pulls the first limit rows of data. Pulls all the data if limit=0.

func (*Col) Len

func (c *Col) Len() int

func (*Col) MakeQuery

func (c *Col) MakeQuery() string

MakeQuery creates a stand-alone query that will pull the data for this column

func (*Col) Rename

func (c *Col) Rename(newName string) error

func (*Col) SQL

func (c *Col) SQL() (snippet string, isFieldName bool)

SQL returns

  • the name of the column if it is not a calculated field.
  • the SQL if it is calculated. Note: this is not a complete query, just the snippet needed for the column.

func (*Col) String

func (c *Col) String() string

type DF

type DF struct {
	*d.DFcore
	// contains filtered or unexported fields
}

DF is the implementation of DF for SQL.

func DBload

func DBload(query string, dlct *d.Dialect, opts ...d.DFopt) (*DF, error)

DBload creates a *DF from a query. Note: the data is not loaded to memory.

Example

Create a dataframe from a ClickHouse table. On the loading side, there is no difference between Postgres and ClickHouse. There is when creating tables, see Dialect.Create and Dialect.Save.

Note that this code is identical to the DBload example in df/mem. The mem/df package loads the data into memory, the sql/df package does not.

const (
	dbProvider = "clickhouse"
	chTable    = "testing.d1"
)

// ClickHouse connection parameters.
user := os.Getenv("user")
host := os.Getenv("host")
password := os.Getenv("password")
db := newConnectCH(host, user, password)

qry := "SELECT k, x FROM " + chTable

var (
	dlct *d.Dialect
	e    error
)
if dlct, e = d.NewDialect(dbProvider, db); e != nil {
	panic(e)
}

var (
	df *DF
	e1 error
)
if df, e1 = DBload(qry, dlct); e1 != nil {
	panic(e1)
}

fmt.Println("# of Rows: ", df.RowCount())
fmt.Println("Columns: ", df.ColumnNames())
Output:

# of Rows:  6
Columns:  [k x]
Example (Postgress)

Create a dataframe from a Postgres table. On the loading side, there is no difference between Postgres and ClickHouse. There is when creating tables, see Dialect.Create and Dialect.Save.

const (
	dbProvider = "postgres"
	pgTable    = "d1"
)

// ClickHouse connection parameters.
user := os.Getenv("user")
host := os.Getenv("host")
password := os.Getenv("password")
dbName := os.Getenv("db")
db := newConnectPG(host, user, password, dbName)

qry := "SELECT k, x FROM " + pgTable

var (
	dlct *d.Dialect
	e    error
)
if dlct, e = d.NewDialect(dbProvider, db); e != nil {
	panic(e)
}

var (
	df *DF
	e1 error
)
if df, e1 = DBload(qry, dlct); e1 != nil {
	panic(e1)
}

fmt.Println("# of Rows: ", df.RowCount())
fmt.Println("Columns: ", df.ColumnNames())
Output:

# of Rows:  6
Columns:  [k x]

func NewDF

func NewDF(dlct *d.Dialect, input d.HasIter, opts ...d.DFopt) (*DF, error)

NewDF creates a *DF from input.

if input is a *DF, a copy is returned. Otherwise, NewDF saves the data to a temp table and returns a *DF based on that.

func NewDFseq

func NewDFseq(dlct *d.Dialect, n int, name string, opts ...d.DFopt) (*DF, error)

NewDFseq creates a *DF with a single column, "seq". That column is a DTint sequence from 0 to n-1.

func (*DF) AllRows

func (f *DF) AllRows() iter.Seq2[int, []any]

func (*DF) AppendColumn

func (f *DF) AppendColumn(col d.Column, replace bool) error

AppendColumn makses the DFcore version to check that f and col come from the same source.

func (*DF) AppendDF

func (f *DF) AppendDF(dfNew d.DF) (d.DF, error)

func (*DF) By

func (f *DF) By(groupBy string, fns ...string) (d.DF, error)

By creates a new *DF with function fns calculated within the groups defined by groupBy.

groupBy - comma-separated list of fields to group on.  If groupBy is empty, then the output will have 1 row.
fns     - functions to calculate on the By groups.
Example

Create a new table grouping one one column with two summary columns.

const n = 1000
const (
	dbProvider = "clickhouse"
	chTable    = "testing.d1"
)

// ClickHouse connection parameters.
user := os.Getenv("user")
host := os.Getenv("host")
password := os.Getenv("password")
db := newConnectCH(host, user, password)

var (
	dlct *d.Dialect
	e0   error
)
if dlct, e0 = d.NewDialect(dbProvider, db); e0 != nil {
	panic(e0)
}

// df starts with 1 column, "seq", ranging from 0 to n
var (
	df *DF
	e1 error
)
if df, e1 = NewDFseq(dlct, n, "seq"); e1 != nil {
	panic(e1)
}

// add some columns
if e := d.Parse(df, "x := mod(seq, 10)"); e != nil {
	panic(e)
}
if e := d.Parse(df, "y := float(rowNumber())"); e != nil {
	panic(e)
}
var (
	dfBy d.DF
	e2   error
)

_ = dlct.Save("testing.temp", "seq", true, false, df)

// produce a summary
if dfBy, e2 = df.By("x", "cy := count(y)", "my := mean(y)"); e2 != nil {
	panic(e2)
}

if e := dfBy.Sort(true, "x"); e != nil {
	panic(e)
}

// These run a query to fetch the data
fmt.Println(dfBy.Column("x").Data().AsAny())
fmt.Println(dfBy.Column("cy").Data().AsAny())
fmt.Println(dfBy.Column("my").Data().AsAny())
Output:

[0 1 2 3 4 5 6 7 8 9]
[100 100 100 100 100 100 100 100 100 100]
[495 496 497 498 499 500 501 502 503 504]
Example (Global)

Create a new table grouping one one column with two summary columns.

const n = 1000
const (
	dbProvider = "clickhouse"
	chTable    = "testing.d1"
)

// ClickHouse connection parameters.
user := os.Getenv("user")
host := os.Getenv("host")
password := os.Getenv("password")
db := newConnectCH(host, user, password)

var (
	dlct *d.Dialect
	e0   error
)
if dlct, e0 = d.NewDialect(dbProvider, db); e0 != nil {
	panic(e0)
}

// df starts with 1 column, "seq", ranging from 0 to n
var (
	df *DF
	e1 error
)
if df, e1 = NewDFseq(dlct, n, "seq"); e1 != nil {
	panic(e1)
}

// add some columns
if e := d.Parse(df, "x := mod(seq, 10)"); e != nil {
	panic(e)
}
if e := d.Parse(df, "y := float(rowNumber())"); e != nil {
	panic(e)
}
var (
	dfBy d.DF
	e2   error
)

_ = dlct.Save("testing.temp", "seq", true, false, df)

// produce a summary
if dfBy, e2 = df.By("x", "cy := count(y)", "prop := cy / count(global(y))"); e2 != nil {
	panic(e2)
}

if e := dfBy.Sort(true, "x"); e != nil {
	panic(e)
}

// These run a query to fetch the data
fmt.Println(dfBy.Column("x").Data().AsAny())
fmt.Println(dfBy.Column("cy").Data().AsAny())
fmt.Println(dfBy.Column("prop").Data().AsAny())
Output:

[0 1 2 3 4 5 6 7 8 9]
[100 100 100 100 100 100 100 100 100 100]
[0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1]
Example (OneRow)

Create a new table grouping one one column with two summary columns.

const n = 1000
const (
	dbProvider = "clickhouse"
	chTable    = "testing.d1"
)

// ClickHouse connection parameters.
user := os.Getenv("user")
host := os.Getenv("host")
password := os.Getenv("password")
db := newConnectCH(host, user, password)

var (
	dlct *d.Dialect
	e0   error
)
if dlct, e0 = d.NewDialect(dbProvider, db); e0 != nil {
	panic(e0)
}

// df starts with 1 column, "seq", ranging from 0 to n
var (
	df *DF
	e1 error
)
if df, e1 = NewDFseq(dlct, n, "seq"); e1 != nil {
	panic(e1)
}

// add some columns
if e := d.Parse(df, "x := mod(seq, 10)"); e != nil {
	panic(e)
}
if e := d.Parse(df, "y := float(rowNumber())"); e != nil {
	panic(e)
}
var (
	dfBy d.DF
	e2   error
)

_ = dlct.Save("testing.temp", "seq", true, false, df)

// produce a summary
if dfBy, e2 = df.By("", "cy := count(y)", "my := mean(y)"); e2 != nil {
	panic(e2)
}

qry := dfBy.Column("cy").(*Col).MakeQuery()
_ = qry

// These run a query to fetch the data
fmt.Println(dfBy.Column("cy").Data().AsAny())
fmt.Println(dfBy.Column("my").Data().AsAny())
Output:

[1000]
[499.5]

func (*DF) Categorical

func (f *DF) Categorical(colName string, catMap d.CategoryMap, fuzz int, defaultVal any, levels []any) (d.Column, error)

Categorical creates a categorical column

colName    - name of the source column
catMap     - optionally supply a category map of source value -> category level
fuzz       - if a source column value has counts < fuzz, then it is put in the 'other' category.
defaultVal - optional source column value for the 'other' category.
levels     - slice of source values to make categories from

func (*DF) Column

func (f *DF) Column(colName string) d.Column

func (*DF) Copy

func (f *DF) Copy() d.DF

func (*DF) DropColumns

func (f *DF) DropColumns(colNames ...string) error

func (*DF) GroupBy

func (f *DF) GroupBy() string

func (*DF) Interp

func (f *DF) Interp(points d.HasIter, xSfield, xIfield, yfield, outField string) (d.DF, error)

Interp interpolates the columns (xIfield,yfield) at xsField points.

points   - input iterator (e.g. Column or DF) that yields the points to interpolate at
xSfield  - column name of x values in source DF
xIfield  - name of x values in iDF
yfield   - column name of y values in source DF
outField - column name of interpolated y's in return DF

The output DF is restricted to interpolated points that lie within the data. It has columns:

xIfield  - points at which to interpolate. This may be a subset of the input "points".
outField - interpolated values.
Example
const (
	n1         = 10
	dbProvider = "clickhouse"
)

// ClickHouse connection parameters.
user := os.Getenv("user")
host := os.Getenv("host")
password := os.Getenv("password")
db := newConnectCH(host, user, password)

var (
	dlct *d.Dialect
	e0   error
)
if dlct, e0 = d.NewDialect(dbProvider, db); e0 != nil {
	panic(e0)
}

// create first dataframe.
x := make([]float64, n1)
y := make([]float64, n1)
for ind := range n1 {
	x[ind] = float64(ind)
	y[ind] = float64(ind) * 4
}

var (
	cx1, cy1 *m.Col
	e1       error
)
if cx1, e1 = m.NewCol(x, d.ColName("x")); e1 != nil {
	panic(e1)
}
if cy1, e1 = m.NewCol(y, d.ColName("y")); e1 != nil {
	panic(e1)
}

var (
	df1 *m.DF
	e2  error
)
if df1, e2 = m.NewDFcol([]*m.Col{cx1, cy1}); e2 != nil {
	panic(e2)
}
if e := dlct.Save("temp1", "x", true, false, df1); e != nil {
	panic(e)
}

var (
	df2 d.DF
	e3  error
)
if df2, e3 = DBload("select * from temp1", dlct); e3 != nil {
	panic(e3)
}

cxi := []float64{0.5, 4.25, -1, 20, 6.8}
coli, _ := m.NewCol(cxi, d.ColName("xi"))
var (
	dfi d.DF
	e4  error
)
if dfi, e4 = NewDF(dlct, coli); e4 != nil {
	panic(e4)
}

var (
	dfOut d.DF
	e5    error
)
if dfOut, e5 = df2.Interp(dfi, "x", "xi", "y", "yInterp"); e5 != nil {
	panic(e5)
}

fmt.Println(dfOut.Column("yInterp").Data().AsAny())
Output:

[2 17 27.2]

func (*DF) Join

func (f *DF) Join(df d.HasIter, joinOn string) (d.DF, error)

Join joins f and df on the columns of joinOn. This is an inner join.

df - data to join.
joinOn - comma-separated list of fields to join on.  These fields must have the same name in both data sets.
Example
const (
	nLeft      = 10
	nRight     = 15
	dbProvider = "clickhouse"
)

// ClickHouse connection parameters.
user := os.Getenv("user")
host := os.Getenv("host")
password := os.Getenv("password")
db := newConnectCH(host, user, password)

// initialize dialect
var (
	dlct *d.Dialect
	e0   error
)
if dlct, e0 = d.NewDialect(dbProvider, db); e0 != nil {
	panic(e0)
}

// create the dataframes to join
var (
	dfLeft, dfRight d.DF
	e1              error
)
if dfLeft, e1 = NewDFseq(dlct, nLeft, "seq"); e1 != nil {
	panic(e1)
}

if dfRight, e1 = NewDFseq(dlct, nRight, "seq"); e1 != nil {
	panic(e1)
}

// add a column
if e := d.Parse(dfLeft, "x := exp(float(seq) / 100.0)"); e != nil {
	panic(e)
}

if e := d.Parse(dfRight, "y := seq^2"); e != nil {
	panic(e)
}

// join
var (
	dfJoin d.DF
	e2     error
)
if dfJoin, e2 = dfLeft.Join(dfRight, "seq"); e2 != nil {
	panic(e2)
}

fmt.Println(dfJoin.RowCount())
fmt.Println(dfJoin.Column("seq").Data().AsAny())
fmt.Println(dfJoin.Column("y").Data().AsAny())
Output:

10
[0 1 2 3 4 5 6 7 8 9]
[0 1 4 9 16 25 36 49 64 81]
Example (TwoColumns)
const (
	nLeft      = 10
	nRight     = 15
	dbProvider = "clickhouse"
)

// ClickHouse connection parameters.
user := os.Getenv("user")
host := os.Getenv("host")
password := os.Getenv("password")
db := newConnectCH(host, user, password)

// initialize Dialect
var (
	dlct *d.Dialect
	e0   error
)
if dlct, e0 = d.NewDialect(dbProvider, db); e0 != nil {
	panic(e0)
}

// Create the dataframes to join
var (
	dfLeft, dfRight d.DF
	e1              error
)
if dfLeft, e1 = NewDFseq(dlct, nLeft, "seq"); e1 != nil {
	panic(e1)
}

if dfRight, e1 = NewDFseq(dlct, nRight, "seq"); e1 != nil {
	panic(e1)
}

// second column to join on
if e := d.Parse(dfLeft, "b := if(mod(seq,4) == 0, 'a', if(mod(seq,4)==1, 'b', if(mod(seq,4)==2, 'c', 'd')))"); e != nil {
	panic(e)
}

if e := d.Parse(dfRight, "b := if(mod(seq,4) == 0, 'a', 'b')"); e != nil {
	panic(e)
}

// add another column to each
if e := d.Parse(dfLeft, "x := exp(float(seq) / 100.0)"); e != nil {
	panic(e)
}

if e := d.Parse(dfRight, "y := seq^2"); e != nil {
	panic(e)
}

// join
var (
	dfJoin d.DF
	e2     error
)
if dfJoin, e2 = dfLeft.Join(dfRight, "seq,b"); e2 != nil {
	panic(e2)
}

fmt.Println(dfJoin.RowCount())
fmt.Println(dfJoin.Column("seq").Data().AsAny())
fmt.Println(dfJoin.Column("b").Data().AsAny())
fmt.Println(dfJoin.Column("y").Data().AsAny())
Output:

6
[0 1 4 5 8 9]
[a b a b a b]
[0 1 16 25 64 81]

func (*DF) MakeQuery

func (f *DF) MakeQuery(colNames ...string) string

func (*DF) RowCount

func (f *DF) RowCount() int

RowCount returns # of rows in f

func (*DF) SetParent

func (f *DF) SetParent() error

SetParent sets the parent to f for all the columns in f.

func (*DF) Sort

func (f *DF) Sort(ascending bool, sortCols string) error

Sort sorts f according to sortCols. ascending - true = sort ascending sortCols - comma-separated list of columns to sort on.

func (*DF) SourceSQL

func (f *DF) SourceSQL() string

SourceQuery returns the query used to create f.

func (*DF) String

func (f *DF) String() string

String produces a summary of f.

func (*DF) Table

func (f *DF) Table(cols string) (d.DF, error)

Table produces a table based on cols. cols is a comma-separated list of fields. The metrics within each group calculated are:

n    - count of rows
rate - fraction of original row count.

func (*DF) Where

func (f *DF) Where(condition string) (d.DF, error)

Where subsets f to rows where condition is true.

Example
const (
	n          = 10
	dbProvider = "clickhouse"
)

// ClickHouse connection parameters.
user := os.Getenv("user")
host := os.Getenv("host")
password := os.Getenv("password")
db := newConnectCH(host, user, password)

var (
	dlct *d.Dialect
	e0   error
)
if dlct, e0 = d.NewDialect(dbProvider, db); e0 != nil {
	panic(e0)
}

var (
	df d.DF
	e1 error
)
if df, e1 = NewDFseq(dlct, n, "seq"); e1 != nil {
	panic(e1)
}

if e := d.Parse(df, "x := 4.0 * float(seq)"); e != nil {
	panic(e)
}

// subset to where x <= 12.0 or x > 32.0
dfOut, _ := df.Where("x <= 12.0 || x > 32.0")
fmt.Println(dfOut.Column("x").Data().AsAny())
Output:

[0 4 8 12 36]

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL