Documentation
¶
Overview ¶
Package toolapp provides endpoints to handle tool management.
Index ¶
- func Routes(app *web.App, cfg Config)
- type CatalogCapabilities
- type CatalogFile
- type CatalogFileInfoResponse
- type CatalogFiles
- type CatalogFilesListResponse
- type CatalogMetadata
- type CatalogModelResponse
- type CatalogModelsResponse
- type Config
- type GrammarContentResponse
- type GrammarFilesResponse
- type HFLookupRequest
- type HFLookupResponse
- type HFRepoFile
- type KeyResponse
- type KeysResponse
- type ListModelDetail
- type ListModelInfoResponse
- type ModelConfig
- type ModelDetail
- type ModelDetailsResponse
- type ModelInfoResponse
- type PublishCatalogRequest
- type PublishCatalogResponse
- type PullCatalogRequest
- type PullRequest
- type PullResponse
- type RateLimit
- type RepoPathResponse
- type SamplingConfig
- type SaveCatalogRequest
- type SaveCatalogResponse
- type TemplateFilesResponse
- type TokenRequest
- type TokenResponse
- type UnloadRequest
- type UnloadResponse
- type VRAM
- type VRAMInput
- type VRAMRequest
- type VRAMResponse
- type VersionResponse
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
Types ¶
type CatalogCapabilities ¶ added in v1.3.1
type CatalogCapabilities struct {
Endpoint string `json:"endpoint"`
Images bool `json:"images"`
Audio bool `json:"audio"`
Video bool `json:"video"`
Streaming bool `json:"streaming"`
Reasoning bool `json:"reasoning"`
Tooling bool `json:"tooling"`
Embedding bool `json:"embedding"`
Rerank bool `json:"rerank"`
}
CatalogCapabilities represents the capabilities of a model.
type CatalogFile ¶ added in v1.3.1
CatalogFile represents the actual file url and size.
type CatalogFileInfoResponse ¶ added in v1.18.0
type CatalogFileInfoResponse struct {
Name string `json:"name"`
ModelCount int `json:"model_count"`
}
CatalogFileInfoResponse represents a catalog file.
type CatalogFiles ¶ added in v1.3.1
type CatalogFiles struct {
Models []CatalogFile `json:"model"`
Proj CatalogFile `json:"proj"`
}
CatalogFiles represents file information for a model.
type CatalogFilesListResponse ¶ added in v1.18.0
type CatalogFilesListResponse []CatalogFileInfoResponse
CatalogFilesListResponse is a list of catalog files.
type CatalogMetadata ¶ added in v1.3.1
type CatalogMetadata struct {
Created time.Time `json:"created"`
Collections string `json:"collections"`
Description string `json:"description"`
}
CatalogMetadata represents extra information about the model.
type CatalogModelResponse ¶ added in v1.3.1
type CatalogModelResponse struct {
ID string `json:"id"`
Category string `json:"category"`
OwnedBy string `json:"owned_by"`
ModelFamily string `json:"model_family"`
Architecture string `json:"architecture"`
GGUFArch string `json:"gguf_arch"`
Parameters string `json:"parameters"`
ParameterCount int64 `json:"parameter_count"`
WebPage string `json:"web_page"`
GatedModel bool `json:"gated_model"`
Template string `json:"template"`
TotalSize string `json:"total_size"`
TotalSizeBytes int64 `json:"total_size_bytes"`
Files CatalogFiles `json:"files"`
Capabilities CatalogCapabilities `json:"capabilities"`
Metadata CatalogMetadata `json:"metadata"`
ModelConfig *ModelConfig `json:"model_config,omitempty"`
BaseConfig *ModelConfig `json:"base_config,omitempty"`
ModelMetadata map[string]string `json:"model_metadata,omitempty"`
VRAM *VRAM `json:"vram,omitempty"`
Downloaded bool `json:"downloaded"`
Validated bool `json:"validated"`
CatalogFile string `json:"catalog_file,omitempty"`
}
CatalogModelResponse represents information for a model.
type CatalogModelsResponse ¶ added in v1.3.1
type CatalogModelsResponse []CatalogModelResponse
CatalogModelsResponse represents a list of catalog models.
type Config ¶
type Config struct {
Log *logger.Logger
AuthClient *authclient.Client
Cache *cache.Cache
Libs *libs.Libs
Models *models.Models
Catalog *catalog.Catalog
}
Config contains all the mandatory systems required by handlers.
type GrammarContentResponse ¶ added in v1.18.9
type GrammarContentResponse struct {
Content string `json:"content"`
}
GrammarContentResponse returns the content of a grammar file.
type GrammarFilesResponse ¶ added in v1.18.9
type GrammarFilesResponse struct {
Files []string `json:"files"`
}
GrammarFilesResponse is a list of available grammar filenames.
type HFLookupRequest ¶ added in v1.18.0
type HFLookupRequest struct {
Input string `json:"input"`
}
HFLookupRequest represents the input for a HuggingFace model lookup.
func (*HFLookupRequest) Decode ¶ added in v1.18.0
func (app *HFLookupRequest) Decode(data []byte) error
Decode implements the decoder interface.
type HFLookupResponse ¶ added in v1.18.0
type HFLookupResponse struct {
Model CatalogModelResponse `json:"model"`
RepoFiles []HFRepoFile `json:"repo_files"`
}
HFLookupResponse contains the results from a HuggingFace lookup.
type HFRepoFile ¶ added in v1.18.0
type HFRepoFile struct {
Filename string `json:"filename"`
Size int64 `json:"size"`
SizeStr string `json:"size_str"`
}
HFRepoFile represents a GGUF file in a HuggingFace repository.
type KeyResponse ¶ added in v1.5.0
KeyResponse represents a key in the system.
type KeysResponse ¶ added in v1.5.0
type KeysResponse []KeyResponse
KeysResponse is a collection of keys.
type ListModelDetail ¶
type ListModelDetail struct {
ID string `json:"id"`
Object string `json:"object"`
Created int64 `json:"created"`
OwnedBy string `json:"owned_by"`
ModelFamily string `json:"model_family"`
Size int64 `json:"size"`
Modified time.Time `json:"modified"`
Validated bool `json:"validated"`
Sampling *SamplingConfig `json:"sampling,omitempty"`
}
ListModelDetail provides information about a model.
type ListModelInfoResponse ¶
type ListModelInfoResponse struct {
Object string `json:"object"`
Data []ListModelDetail `json:"data"`
}
ListModelInfoResponse contains the list of models loaded in the system.
type ModelConfig ¶ added in v1.15.8
type ModelConfig struct {
Device string `json:"device"`
ContextWindow int `json:"context-window"`
NBatch int `json:"nbatch"`
NUBatch int `json:"nubatch"`
NThreads int `json:"nthreads"`
NThreadsBatch int `json:"nthreads-batch"`
CacheTypeK model.GGMLType `json:"cache-type-k"`
CacheTypeV model.GGMLType `json:"cache-type-v"`
UseDirectIO bool `json:"use-direct-io"`
FlashAttention model.FlashAttentionType `json:"flash-attention"`
IgnoreIntegrityCheck bool `json:"ignore-integrity-check"`
NSeqMax int `json:"nseq-max"`
OffloadKQV *bool `json:"offload-kqv"`
OpOffload *bool `json:"op-offload"`
NGpuLayers *int `json:"ngpu-layers"`
SplitMode model.SplitMode `json:"split-mode"`
SystemPromptCache bool `json:"system-prompt-cache"`
IncrementalCache bool `json:"incremental-cache"`
CacheMinTokens int `json:"cache-min-tokens"`
CacheSlotTimeout int `json:"cache-slot-timeout"`
Sampling SamplingConfig `json:"sampling-parameters"`
RopeScaling model.RopeScalingType `json:"rope-scaling-type"`
RopeFreqBase *float32 `json:"rope-freq-base"`
RopeFreqScale *float32 `json:"rope-freq-scale"`
YarnExtFactor *float32 `json:"yarn-ext-factor"`
YarnAttnFactor *float32 `json:"yarn-attn-factor"`
YarnBetaFast *float32 `json:"yarn-beta-fast"`
YarnBetaSlow *float32 `json:"yarn-beta-slow"`
YarnOrigCtx *int `json:"yarn-orig-ctx"`
}
ModelConfig represents the model configuration the model will use by default.
type ModelDetail ¶
type ModelDetail struct {
ID string `json:"id"`
OwnedBy string `json:"owned_by"`
ModelFamily string `json:"model_family"`
Size int64 `json:"size"`
VRAMTotal int64 `json:"vram_total"`
SlotMemory int64 `json:"slot_memory"`
ExpiresAt time.Time `json:"expires_at"`
ActiveStreams int `json:"active_streams"`
}
ModelDetail provides details for the models in the cache.
type ModelDetailsResponse ¶ added in v1.3.1
type ModelDetailsResponse []ModelDetail
ModelDetailsResponse is a collection of model detail.
type ModelInfoResponse ¶
type ModelInfoResponse struct {
ID string `json:"id"`
Object string `json:"object"`
Created int64 `json:"created"`
OwnedBy string `json:"owned_by"`
Desc string `json:"desc"`
Size int64 `json:"size"`
HasProjection bool `json:"has_projection"`
IsGPT bool `json:"is_gpt"`
Metadata map[string]string `json:"metadata"`
VRAM *VRAM `json:"vram,omitempty"`
ModelConfig *ModelConfig `json:"model_config,omitempty"`
}
ModelInfoResponse returns information about a model.
type PublishCatalogRequest ¶ added in v1.18.0
type PublishCatalogRequest struct {
CatalogFile string `json:"catalog_file"`
}
PublishCatalogRequest represents the input for publishing a catalog file to the cloned repository.
func (*PublishCatalogRequest) Decode ¶ added in v1.18.0
func (app *PublishCatalogRequest) Decode(data []byte) error
Decode implements the decoder interface.
type PublishCatalogResponse ¶ added in v1.18.0
type PublishCatalogResponse struct {
Status string `json:"status"`
}
PublishCatalogResponse is returned after publishing a catalog file.
type PullCatalogRequest ¶ added in v1.20.1
type PullCatalogRequest struct {
DownloadServer string `json:"download_server"`
}
PullCatalogRequest represents the request to pull a catalog model.
func (*PullCatalogRequest) Decode ¶ added in v1.20.1
func (app *PullCatalogRequest) Decode(data []byte) error
Decode implements the decoder interface.
type PullRequest ¶
type PullRequest struct {
ModelURL string `json:"model_url"`
ProjURL string `json:"proj_url"`
SplitURLs []string `json:"-"`
}
PullRequest represents the input for the pull command.
func (*PullRequest) Decode ¶
func (app *PullRequest) Decode(data []byte) error
Decode implements the decoder interface.
type PullResponse ¶
type PullResponse struct {
Status string `json:"status"`
ModelFiles []string `json:"model_files,omitempty"`
ProjFile string `json:"proj_file,omitempty"`
Downloaded bool `json:"downloaded,omitempty"`
}
PullResponse returns information about a model being downloaded.
type RepoPathResponse ¶ added in v1.18.0
type RepoPathResponse struct {
RepoPath string `json:"repo_path"`
}
RepoPathResponse is returned for querying the repo path configuration.
type SamplingConfig ¶ added in v1.15.8
type SamplingConfig struct {
Temperature float32 `json:"temperature"`
TopK int32 `json:"top_k"`
TopP float32 `json:"top_p"`
MinP float32 `json:"min_p"`
MaxTokens int `json:"max_tokens"`
RepeatPenalty float32 `json:"repeat_penalty"`
RepeatLastN int32 `json:"repeat_last_n"`
DryMultiplier float32 `json:"dry_multiplier"`
DryBase float32 `json:"dry_base"`
DryAllowedLen int32 `json:"dry_allowed_length"`
DryPenaltyLast int32 `json:"dry_penalty_last_n"`
XtcProbability float32 `json:"xtc_probability"`
XtcThreshold float32 `json:"xtc_threshold"`
XtcMinKeep uint32 `json:"xtc_min_keep"`
FrequencyPenalty float32 `json:"frequency_penalty"`
PresencePenalty float32 `json:"presence_penalty"`
EnableThinking string `json:"enable_thinking"`
ReasoningEffort string `json:"reasoning_effort"`
Grammar string `json:"grammar"`
}
SamplingConfig represents sampling parameters for model inference.
type SaveCatalogRequest ¶ added in v1.18.0
type SaveCatalogRequest struct {
ID string `json:"id"`
Category string `json:"category"`
OwnedBy string `json:"owned_by"`
ModelFamily string `json:"model_family"`
Architecture string `json:"architecture"`
Parameters string `json:"parameters"`
WebPage string `json:"web_page"`
GatedModel bool `json:"gated_model"`
Template string `json:"template"`
Files CatalogFiles `json:"files"`
Capabilities CatalogCapabilities `json:"capabilities"`
Metadata CatalogMetadata `json:"metadata"`
Config *ModelConfig `json:"config,omitempty"`
CatalogFile string `json:"catalog_file"`
}
SaveCatalogRequest represents the input for saving a catalog entry.
func (*SaveCatalogRequest) Decode ¶ added in v1.18.0
func (app *SaveCatalogRequest) Decode(data []byte) error
Decode implements the decoder interface.
type SaveCatalogResponse ¶ added in v1.18.0
SaveCatalogResponse is returned after saving or deleting a catalog entry.
type TemplateFilesResponse ¶ added in v1.18.9
type TemplateFilesResponse struct {
Files []string `json:"files"`
}
TemplateFilesResponse is a list of available template filenames.
type TokenRequest ¶ added in v1.5.0
type TokenRequest struct {
Admin bool `json:"admin"`
Endpoints map[string]RateLimit `json:"endpoints"`
Duration time.Duration `json:"duration"`
}
TokenRequest represents the input for the create token command.
func (*TokenRequest) Decode ¶ added in v1.5.0
func (app *TokenRequest) Decode(data []byte) error
Decode implements the decoder interface.
type TokenResponse ¶ added in v1.5.0
type TokenResponse struct {
Token string `json:"token"`
}
TokenResponse represents the response for a successful token creation.
type UnloadRequest ¶ added in v1.19.5
type UnloadRequest struct {
ID string `json:"id"`
}
UnloadRequest represents the input for unloading a model from the cache.
func (*UnloadRequest) Decode ¶ added in v1.19.5
func (app *UnloadRequest) Decode(data []byte) error
Decode implements the decoder interface.
func (*UnloadRequest) Validate ¶ added in v1.19.5
func (app *UnloadRequest) Validate() error
Validate checks the request is valid.
type UnloadResponse ¶ added in v1.19.5
UnloadResponse represents the output for a model unload operation.
type VRAM ¶ added in v1.16.0
type VRAM struct {
Input VRAMInput `json:"input"`
KVPerTokenPerLayer int64 `json:"kv_per_token_per_layer"`
KVPerSlot int64 `json:"kv_per_slot"`
SlotMemory int64 `json:"slot_memory"`
TotalVRAM int64 `json:"total_vram"`
}
VRAM contains the calculated VRAM requirements.
type VRAMInput ¶ added in v1.16.4
type VRAMInput struct {
ModelSizeBytes int64 `json:"model_size_bytes"`
ContextWindow int64 `json:"context_window"`
BlockCount int64 `json:"block_count"`
HeadCountKV int64 `json:"head_count_kv"`
KeyLength int64 `json:"key_length"`
ValueLength int64 `json:"value_length"`
BytesPerElement int64 `json:"bytes_per_element"`
Slots int64 `json:"slots"`
}
VRAMInput contains the input parameters used for VRAM calculation.
type VRAMRequest ¶ added in v1.16.4
type VRAMRequest struct {
ModelURL string `json:"model_url"`
ContextWindow int64 `json:"context_window"`
BytesPerElement int64 `json:"bytes_per_element"`
Slots int64 `json:"slots"`
}
VRAMRequest represents the input for VRAM calculation.
func (*VRAMRequest) Decode ¶ added in v1.16.4
func (app *VRAMRequest) Decode(data []byte) error
Decode implements the decoder interface.
type VRAMResponse ¶ added in v1.16.4
type VRAMResponse struct {
Input VRAMInput `json:"input"`
KVPerTokenPerLayer int64 `json:"kv_per_token_per_layer"`
KVPerSlot int64 `json:"kv_per_slot"`
SlotMemory int64 `json:"slot_memory"`
TotalVRAM int64 `json:"total_vram"`
}
VRAMResponse represents the VRAM calculation results.
type VersionResponse ¶
type VersionResponse struct {
Status string `json:"status"`
Arch string `json:"arch,omitempty"`
OS string `json:"os,omitempty"`
Processor string `json:"processor,omitempty"`
Latest string `json:"latest,omitempty"`
Current string `json:"current,omitempty"`
AllowUpgrade bool `json:"allow_upgrade"`
}
VersionResponse returns information about the installed libraries.