Documentation
¶
Index ¶
- func StandardFunctions(dlct *d.Dialect) d.Fns
- type Col
- func (c *Col) AllRows() iter.Seq2[int, []any]
- func (c *Col) Copy() d.Column
- func (c *Col) Core() *d.ColCore
- func (c *Col) Data() *d.Vector
- func (c *Col) DataLimit(limit int) *d.Vector
- func (c *Col) Len() int
- func (c *Col) MakeQuery() string
- func (c *Col) Rename(newName string) error
- func (c *Col) SQL() (snippet string, isFieldName bool)
- func (c *Col) String() string
- type DF
- func (f *DF) AllRows() iter.Seq2[int, []any]
- func (f *DF) AppendColumn(col d.Column, replace bool) error
- func (f *DF) AppendDF(dfNew d.DF) (d.DF, error)
- func (f *DF) By(groupBy string, fns ...string) (d.DF, error)
- func (f *DF) Categorical(colName string, catMap d.CategoryMap, fuzz int, defaultVal any, levels []any) (d.Column, error)
- func (f *DF) Column(colName string) d.Column
- func (f *DF) Copy() d.DF
- func (f *DF) DropColumns(colNames ...string) error
- func (f *DF) GroupBy() string
- func (f *DF) Interp(points d.HasIter, xSfield, xIfield, yfield, outField string) (d.DF, error)
- func (f *DF) Join(df d.HasIter, joinOn string) (d.DF, error)
- func (f *DF) MakeQuery(colNames ...string) string
- func (f *DF) RowCount() int
- func (f *DF) SetParent() error
- func (f *DF) Sort(ascending bool, sortCols string) error
- func (f *DF) SourceSQL() string
- func (f *DF) String() string
- func (f *DF) Table(cols string) (d.DF, error)
- func (f *DF) Where(condition string) (d.DF, error)
Examples ¶
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
Types ¶
type Col ¶
Col implements Column for SQL.
func NewCol ¶
NewCol creates a new *Col from SQL
dt - data type of the column dlct - Dialect to use sqlx - SQL to create the column opts - Column options
func (*Col) DataLimit ¶
DataLimit pulls the first limit rows of data. Pulls all the data if limit=0.
func (*Col) MakeQuery ¶
MakeQuery creates a stand-alone query that will pull the data for this column
type DF ¶
DF is the implementation of DF for SQL.
func DBload ¶
DBload creates a *DF from a query. Note: the data is not loaded to memory.
Example ¶
Create a dataframe from a ClickHouse table. On the loading side, there is no difference between Postgres and ClickHouse. There is when creating tables, see Dialect.Create and Dialect.Save.
Note that this code is identical to the DBload example in df/mem. The mem/df package loads the data into memory, the sql/df package does not.
const (
dbProvider = "clickhouse"
chTable = "testing.d1"
)
// ClickHouse connection parameters.
user := os.Getenv("user")
host := os.Getenv("host")
password := os.Getenv("password")
db := newConnectCH(host, user, password)
qry := "SELECT k, x FROM " + chTable
var (
dlct *d.Dialect
e error
)
if dlct, e = d.NewDialect(dbProvider, db); e != nil {
panic(e)
}
var (
df *DF
e1 error
)
if df, e1 = DBload(qry, dlct); e1 != nil {
panic(e1)
}
fmt.Println("# of Rows: ", df.RowCount())
fmt.Println("Columns: ", df.ColumnNames())
Output: # of Rows: 6 Columns: [k x]
Example (Postgress) ¶
Create a dataframe from a Postgres table. On the loading side, there is no difference between Postgres and ClickHouse. There is when creating tables, see Dialect.Create and Dialect.Save.
const (
dbProvider = "postgres"
pgTable = "d1"
)
// ClickHouse connection parameters.
user := os.Getenv("user")
host := os.Getenv("host")
password := os.Getenv("password")
dbName := os.Getenv("db")
db := newConnectPG(host, user, password, dbName)
qry := "SELECT k, x FROM " + pgTable
var (
dlct *d.Dialect
e error
)
if dlct, e = d.NewDialect(dbProvider, db); e != nil {
panic(e)
}
var (
df *DF
e1 error
)
if df, e1 = DBload(qry, dlct); e1 != nil {
panic(e1)
}
fmt.Println("# of Rows: ", df.RowCount())
fmt.Println("Columns: ", df.ColumnNames())
Output: # of Rows: 6 Columns: [k x]
func NewDF ¶
NewDF creates a *DF from input.
if input is a *DF, a copy is returned. Otherwise, NewDF saves the data to a temp table and returns a *DF based on that.
func NewDFseq ¶
NewDFseq creates a *DF with a single column, "seq". That column is a DTint sequence from 0 to n-1.
func (*DF) AppendColumn ¶
AppendColumn makses the DFcore version to check that f and col come from the same source.
func (*DF) By ¶
By creates a new *DF with function fns calculated within the groups defined by groupBy.
groupBy - comma-separated list of fields to group on. If groupBy is empty, then the output will have 1 row. fns - functions to calculate on the By groups.
Example ¶
Create a new table grouping one one column with two summary columns.
const n = 1000
const (
dbProvider = "clickhouse"
chTable = "testing.d1"
)
// ClickHouse connection parameters.
user := os.Getenv("user")
host := os.Getenv("host")
password := os.Getenv("password")
db := newConnectCH(host, user, password)
var (
dlct *d.Dialect
e0 error
)
if dlct, e0 = d.NewDialect(dbProvider, db); e0 != nil {
panic(e0)
}
// df starts with 1 column, "seq", ranging from 0 to n
var (
df *DF
e1 error
)
if df, e1 = NewDFseq(dlct, n, "seq"); e1 != nil {
panic(e1)
}
// add some columns
if e := d.Parse(df, "x := mod(seq, 10)"); e != nil {
panic(e)
}
if e := d.Parse(df, "y := float(rowNumber())"); e != nil {
panic(e)
}
var (
dfBy d.DF
e2 error
)
_ = dlct.Save("testing.temp", "seq", true, false, df)
// produce a summary
if dfBy, e2 = df.By("x", "cy := count(y)", "my := mean(y)"); e2 != nil {
panic(e2)
}
if e := dfBy.Sort(true, "x"); e != nil {
panic(e)
}
// These run a query to fetch the data
fmt.Println(dfBy.Column("x").Data().AsAny())
fmt.Println(dfBy.Column("cy").Data().AsAny())
fmt.Println(dfBy.Column("my").Data().AsAny())
Output: [0 1 2 3 4 5 6 7 8 9] [100 100 100 100 100 100 100 100 100 100] [495 496 497 498 499 500 501 502 503 504]
Example (Global) ¶
Create a new table grouping one one column with two summary columns.
const n = 1000
const (
dbProvider = "clickhouse"
chTable = "testing.d1"
)
// ClickHouse connection parameters.
user := os.Getenv("user")
host := os.Getenv("host")
password := os.Getenv("password")
db := newConnectCH(host, user, password)
var (
dlct *d.Dialect
e0 error
)
if dlct, e0 = d.NewDialect(dbProvider, db); e0 != nil {
panic(e0)
}
// df starts with 1 column, "seq", ranging from 0 to n
var (
df *DF
e1 error
)
if df, e1 = NewDFseq(dlct, n, "seq"); e1 != nil {
panic(e1)
}
// add some columns
if e := d.Parse(df, "x := mod(seq, 10)"); e != nil {
panic(e)
}
if e := d.Parse(df, "y := float(rowNumber())"); e != nil {
panic(e)
}
var (
dfBy d.DF
e2 error
)
_ = dlct.Save("testing.temp", "seq", true, false, df)
// produce a summary
if dfBy, e2 = df.By("x", "cy := count(y)", "prop := cy / count(global(y))"); e2 != nil {
panic(e2)
}
if e := dfBy.Sort(true, "x"); e != nil {
panic(e)
}
// These run a query to fetch the data
fmt.Println(dfBy.Column("x").Data().AsAny())
fmt.Println(dfBy.Column("cy").Data().AsAny())
fmt.Println(dfBy.Column("prop").Data().AsAny())
Output: [0 1 2 3 4 5 6 7 8 9] [100 100 100 100 100 100 100 100 100 100] [0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1]
Example (OneRow) ¶
Create a new table grouping one one column with two summary columns.
const n = 1000
const (
dbProvider = "clickhouse"
chTable = "testing.d1"
)
// ClickHouse connection parameters.
user := os.Getenv("user")
host := os.Getenv("host")
password := os.Getenv("password")
db := newConnectCH(host, user, password)
var (
dlct *d.Dialect
e0 error
)
if dlct, e0 = d.NewDialect(dbProvider, db); e0 != nil {
panic(e0)
}
// df starts with 1 column, "seq", ranging from 0 to n
var (
df *DF
e1 error
)
if df, e1 = NewDFseq(dlct, n, "seq"); e1 != nil {
panic(e1)
}
// add some columns
if e := d.Parse(df, "x := mod(seq, 10)"); e != nil {
panic(e)
}
if e := d.Parse(df, "y := float(rowNumber())"); e != nil {
panic(e)
}
var (
dfBy d.DF
e2 error
)
_ = dlct.Save("testing.temp", "seq", true, false, df)
// produce a summary
if dfBy, e2 = df.By("", "cy := count(y)", "my := mean(y)"); e2 != nil {
panic(e2)
}
qry := dfBy.Column("cy").(*Col).MakeQuery()
_ = qry
// These run a query to fetch the data
fmt.Println(dfBy.Column("cy").Data().AsAny())
fmt.Println(dfBy.Column("my").Data().AsAny())
Output: [1000] [499.5]
func (*DF) Categorical ¶
func (f *DF) Categorical(colName string, catMap d.CategoryMap, fuzz int, defaultVal any, levels []any) (d.Column, error)
Categorical creates a categorical column
colName - name of the source column catMap - optionally supply a category map of source value -> category level fuzz - if a source column value has counts < fuzz, then it is put in the 'other' category. defaultVal - optional source column value for the 'other' category. levels - slice of source values to make categories from
func (*DF) DropColumns ¶
func (*DF) Interp ¶
Interp interpolates the columns (xIfield,yfield) at xsField points.
points - input iterator (e.g. Column or DF) that yields the points to interpolate at xSfield - column name of x values in source DF xIfield - name of x values in iDF yfield - column name of y values in source DF outField - column name of interpolated y's in return DF
The output DF is restricted to interpolated points that lie within the data. It has columns:
xIfield - points at which to interpolate. This may be a subset of the input "points". outField - interpolated values.
Example ¶
const (
n1 = 10
dbProvider = "clickhouse"
)
// ClickHouse connection parameters.
user := os.Getenv("user")
host := os.Getenv("host")
password := os.Getenv("password")
db := newConnectCH(host, user, password)
var (
dlct *d.Dialect
e0 error
)
if dlct, e0 = d.NewDialect(dbProvider, db); e0 != nil {
panic(e0)
}
// create first dataframe.
x := make([]float64, n1)
y := make([]float64, n1)
for ind := range n1 {
x[ind] = float64(ind)
y[ind] = float64(ind) * 4
}
var (
cx1, cy1 *m.Col
e1 error
)
if cx1, e1 = m.NewCol(x, d.ColName("x")); e1 != nil {
panic(e1)
}
if cy1, e1 = m.NewCol(y, d.ColName("y")); e1 != nil {
panic(e1)
}
var (
df1 *m.DF
e2 error
)
if df1, e2 = m.NewDFcol([]*m.Col{cx1, cy1}); e2 != nil {
panic(e2)
}
if e := dlct.Save("temp1", "x", true, false, df1); e != nil {
panic(e)
}
var (
df2 d.DF
e3 error
)
if df2, e3 = DBload("select * from temp1", dlct); e3 != nil {
panic(e3)
}
cxi := []float64{0.5, 4.25, -1, 20, 6.8}
coli, _ := m.NewCol(cxi, d.ColName("xi"))
var (
dfi d.DF
e4 error
)
if dfi, e4 = NewDF(dlct, coli); e4 != nil {
panic(e4)
}
var (
dfOut d.DF
e5 error
)
if dfOut, e5 = df2.Interp(dfi, "x", "xi", "y", "yInterp"); e5 != nil {
panic(e5)
}
fmt.Println(dfOut.Column("yInterp").Data().AsAny())
Output: [2 17 27.2]
func (*DF) Join ¶
Join joins f and df on the columns of joinOn. This is an inner join.
df - data to join. joinOn - comma-separated list of fields to join on. These fields must have the same name in both data sets.
Example ¶
const (
nLeft = 10
nRight = 15
dbProvider = "clickhouse"
)
// ClickHouse connection parameters.
user := os.Getenv("user")
host := os.Getenv("host")
password := os.Getenv("password")
db := newConnectCH(host, user, password)
// initialize dialect
var (
dlct *d.Dialect
e0 error
)
if dlct, e0 = d.NewDialect(dbProvider, db); e0 != nil {
panic(e0)
}
// create the dataframes to join
var (
dfLeft, dfRight d.DF
e1 error
)
if dfLeft, e1 = NewDFseq(dlct, nLeft, "seq"); e1 != nil {
panic(e1)
}
if dfRight, e1 = NewDFseq(dlct, nRight, "seq"); e1 != nil {
panic(e1)
}
// add a column
if e := d.Parse(dfLeft, "x := exp(float(seq) / 100.0)"); e != nil {
panic(e)
}
if e := d.Parse(dfRight, "y := seq^2"); e != nil {
panic(e)
}
// join
var (
dfJoin d.DF
e2 error
)
if dfJoin, e2 = dfLeft.Join(dfRight, "seq"); e2 != nil {
panic(e2)
}
fmt.Println(dfJoin.RowCount())
fmt.Println(dfJoin.Column("seq").Data().AsAny())
fmt.Println(dfJoin.Column("y").Data().AsAny())
Output: 10 [0 1 2 3 4 5 6 7 8 9] [0 1 4 9 16 25 36 49 64 81]
Example (TwoColumns) ¶
const (
nLeft = 10
nRight = 15
dbProvider = "clickhouse"
)
// ClickHouse connection parameters.
user := os.Getenv("user")
host := os.Getenv("host")
password := os.Getenv("password")
db := newConnectCH(host, user, password)
// initialize Dialect
var (
dlct *d.Dialect
e0 error
)
if dlct, e0 = d.NewDialect(dbProvider, db); e0 != nil {
panic(e0)
}
// Create the dataframes to join
var (
dfLeft, dfRight d.DF
e1 error
)
if dfLeft, e1 = NewDFseq(dlct, nLeft, "seq"); e1 != nil {
panic(e1)
}
if dfRight, e1 = NewDFseq(dlct, nRight, "seq"); e1 != nil {
panic(e1)
}
// second column to join on
if e := d.Parse(dfLeft, "b := if(mod(seq,4) == 0, 'a', if(mod(seq,4)==1, 'b', if(mod(seq,4)==2, 'c', 'd')))"); e != nil {
panic(e)
}
if e := d.Parse(dfRight, "b := if(mod(seq,4) == 0, 'a', 'b')"); e != nil {
panic(e)
}
// add another column to each
if e := d.Parse(dfLeft, "x := exp(float(seq) / 100.0)"); e != nil {
panic(e)
}
if e := d.Parse(dfRight, "y := seq^2"); e != nil {
panic(e)
}
// join
var (
dfJoin d.DF
e2 error
)
if dfJoin, e2 = dfLeft.Join(dfRight, "seq,b"); e2 != nil {
panic(e2)
}
fmt.Println(dfJoin.RowCount())
fmt.Println(dfJoin.Column("seq").Data().AsAny())
fmt.Println(dfJoin.Column("b").Data().AsAny())
fmt.Println(dfJoin.Column("y").Data().AsAny())
Output: 6 [0 1 4 5 8 9] [a b a b a b] [0 1 16 25 64 81]
func (*DF) Sort ¶
Sort sorts f according to sortCols. ascending - true = sort ascending sortCols - comma-separated list of columns to sort on.
func (*DF) Table ¶
Table produces a table based on cols. cols is a comma-separated list of fields. The metrics within each group calculated are:
n - count of rows rate - fraction of original row count.
func (*DF) Where ¶
Where subsets f to rows where condition is true.
Example ¶
const (
n = 10
dbProvider = "clickhouse"
)
// ClickHouse connection parameters.
user := os.Getenv("user")
host := os.Getenv("host")
password := os.Getenv("password")
db := newConnectCH(host, user, password)
var (
dlct *d.Dialect
e0 error
)
if dlct, e0 = d.NewDialect(dbProvider, db); e0 != nil {
panic(e0)
}
var (
df d.DF
e1 error
)
if df, e1 = NewDFseq(dlct, n, "seq"); e1 != nil {
panic(e1)
}
if e := d.Parse(df, "x := 4.0 * float(seq)"); e != nil {
panic(e)
}
// subset to where x <= 12.0 or x > 32.0
dfOut, _ := df.Where("x <= 12.0 || x > 32.0")
fmt.Println(dfOut.Column("x").Data().AsAny())
Output: [0 4 8 12 36]