Documentation
¶
Index ¶
- Constants
- func ByteToCompression(b byte) (ct compression_type.CompressionType, err error)
- func CompressionToByte(ct compression_type.CompressionType) (b byte, err error)
- func DeltaAlgorithmByteForName(name string) (byte, error)
- func GearCDCChunks(data []byte, minChunkSize, maxChunkSize, avgChunkSize int) [][]byte
- func MinHashJaccard(sigA, sigB []uint32) float64
- func MinHashSignature(features []uint32, k int) []uint32
- func RegisterBaseSelector(name string, factory func(BaseSelectorParams) BaseSelector)
- func RegisterDeltaAlgorithm(alg DeltaAlgorithm)
- func RegisterSignatureComputer(name string, factory func(SignatureComputerParams) SignatureComputer)
- func ToMap(entries []CacheEntry) map[string]CacheEntry
- func ToMapV1(entries []CacheEntryV1) map[string]CacheEntryV1
- func WriteCache(w io.Writer, hashFormatId string, entries []CacheEntry) (checksum []byte, err error)
- func WriteCacheV1(w io.Writer, hashFormatId string, entries []CacheEntryV1) (checksum []byte, err error)
- func WriteIndex(w io.Writer, hashFormatId string, entries []IndexEntry) (checksum []byte, err error)
- func WriteIndexV1(w io.Writer, hashFormatId string, entries []IndexEntryV1) (checksum []byte, err error)
- type BaseSelector
- type BaseSelectorParams
- type BlobMetadata
- type BlobSet
- type Bsdiff
- type CacheEntry
- type CacheEntryV1
- type CacheReader
- type CacheReaderV1
- type DataEntry
- type DataEntryV1
- type DataReader
- func (dr *DataReader) CompressionType() compression_type.CompressionType
- func (dr *DataReader) HashFormatId() string
- func (dr *DataReader) ReadAllEntries() (entries []DataEntry, err error)
- func (dr *DataReader) ReadEntry() (entry DataEntry, err error)
- func (dr *DataReader) ReadEntryAt(offset uint64) (entry DataEntry, err error)
- func (dr *DataReader) Validate() (err error)
- type DataReaderV1
- func (dr *DataReaderV1) CompressionType() compression_type.CompressionType
- func (dr *DataReaderV1) Flags() uint16
- func (dr *DataReaderV1) HashFormatId() string
- func (dr *DataReaderV1) ReadAllEntries() (entries []DataEntryV1, err error)
- func (dr *DataReaderV1) ReadEntry() (entry DataEntryV1, err error)
- func (dr *DataReaderV1) ReadEntryAt(offset uint64) (entry DataEntryV1, err error)
- func (dr *DataReaderV1) Validate() (err error)
- type DataWriter
- type DataWriterV1
- type DeltaAlgorithm
- type DeltaAssignments
- type GearCDCMinHashComputer
- type IndexEntry
- type IndexEntryV1
- type IndexReader
- func (ir *IndexReader) EntryCount() uint64
- func (ir *IndexReader) FanOut() [256]uint64
- func (ir *IndexReader) HashFormatId() string
- func (ir *IndexReader) LookupHash(hash []byte) (packOffset uint64, storedSize uint64, found bool, err error)
- func (ir *IndexReader) ReadAllEntries() (entries []IndexEntry, err error)
- func (ir *IndexReader) Validate() (err error)
- type IndexReaderV1
- func (ir *IndexReaderV1) EntryCount() uint64
- func (ir *IndexReaderV1) FanOut() [256]uint64
- func (ir *IndexReaderV1) HashFormatId() string
- func (ir *IndexReaderV1) LookupHash(hash []byte) (packOffset uint64, storedSize uint64, entryType byte, baseOffset uint64, ...)
- func (ir *IndexReaderV1) ReadAllEntries() (entries []IndexEntryV1, err error)
- func (ir *IndexReaderV1) Validate() (err error)
- type LSHBandingSelector
- type SignatureComputer
- type SignatureComputerParams
- type SizeBasedSelector
Constants ¶
const ( DataFileMagic = "MIAR" IndexFileMagic = "MIAX" CacheFileMagic = "MIAC" DataFileVersion uint16 = 0 IndexFileVersion uint16 = 0 CacheFileVersion uint16 = 0 DataFileExtension = ".inventory_archive-v0" IndexFileExtension = ".inventory_archive_index-v0" CacheFileName = "index_cache-v0" CompressionByteNone byte = 0 CompressionByteGzip byte = 1 CompressionByteZlib byte = 2 CompressionByteZstd byte = 3 FlagHasEncryption uint16 = 1 << 0 )
const ( DataFileVersionV1 uint16 = 1 IndexFileVersionV1 uint16 = 1 CacheFileVersionV1 uint16 = 1 DataFileExtensionV1 = ".inventory_archive-v1" IndexFileExtensionV1 = ".inventory_archive_index-v1" CacheFileNameV1 = "index_cache-v1" EntryTypeFull byte = 0x00 EntryTypeDelta byte = 0x01 FlagHasDeltas uint16 = 1 << 0 FlagReservedCrossArch uint16 = 1 << 1 FlagHasEncryptionV1 uint16 = 1 << 2 )
const (
DeltaAlgorithmByteBsdiff byte = 0
)
Variables ¶
This section is empty.
Functions ¶
func ByteToCompression ¶
func ByteToCompression( b byte, ) (ct compression_type.CompressionType, err error)
func CompressionToByte ¶
func CompressionToByte( ct compression_type.CompressionType, ) (b byte, err error)
func GearCDCChunks ¶
func RegisterBaseSelector ¶
func RegisterBaseSelector( name string, factory func(BaseSelectorParams) BaseSelector, )
func RegisterDeltaAlgorithm ¶
func RegisterDeltaAlgorithm(alg DeltaAlgorithm)
RegisterDeltaAlgorithm adds a DeltaAlgorithm to the registry.
func RegisterSignatureComputer ¶
func RegisterSignatureComputer( name string, factory func(SignatureComputerParams) SignatureComputer, )
func ToMap ¶
func ToMap(entries []CacheEntry) map[string]CacheEntry
func ToMapV1 ¶
func ToMapV1(entries []CacheEntryV1) map[string]CacheEntryV1
func WriteCache ¶
func WriteCacheV1 ¶
func WriteIndex ¶
Types ¶
type BaseSelector ¶
type BaseSelector interface {
SelectBases(blobs BlobSet, assignments DeltaAssignments)
}
BaseSelector chooses which blobs become deltas and which become bases. It reads from blobs and writes results to assignments.
func BaseSelectorForName ¶
func BaseSelectorForName( name string, params BaseSelectorParams, ) (BaseSelector, error)
type BaseSelectorParams ¶
type BlobMetadata ¶
type BlobMetadata struct {
Id domain_interfaces.MarklId
Size uint64
Signature []uint32
}
BlobMetadata describes a blob candidate for delta packing.
type BlobSet ¶
type BlobSet interface {
Len() int
At(index int) BlobMetadata
}
BlobSet provides indexed access to blob metadata without requiring all blobs in memory simultaneously.
type Bsdiff ¶
type Bsdiff struct{}
Bsdiff implements DeltaAlgorithm using the bsdiff4 binary delta algorithm.
func (*Bsdiff) Apply ¶
func (b *Bsdiff) Apply( base domain_interfaces.BlobReader, baseSize int64, delta io.Reader, target io.Writer, ) error
func (*Bsdiff) Compute ¶
func (b *Bsdiff) Compute( base domain_interfaces.BlobReader, baseSize int64, target io.Reader, delta io.Writer, ) error
type CacheEntry ¶
type CacheEntryV1 ¶
type CacheReader ¶
type CacheReader struct {
// contains filtered or unexported fields
}
func NewCacheReader ¶
func (*CacheReader) EntryCount ¶
func (cr *CacheReader) EntryCount() uint64
func (*CacheReader) HashFormatId ¶
func (cr *CacheReader) HashFormatId() string
func (*CacheReader) ReadAllEntries ¶
func (cr *CacheReader) ReadAllEntries() (entries []CacheEntry, err error)
func (*CacheReader) Validate ¶
func (cr *CacheReader) Validate() (err error)
type CacheReaderV1 ¶
type CacheReaderV1 struct {
// contains filtered or unexported fields
}
func NewCacheReaderV1 ¶
func (*CacheReaderV1) EntryCount ¶
func (cr *CacheReaderV1) EntryCount() uint64
func (*CacheReaderV1) HashFormatId ¶
func (cr *CacheReaderV1) HashFormatId() string
func (*CacheReaderV1) ReadAllEntries ¶
func (cr *CacheReaderV1) ReadAllEntries() (entries []CacheEntryV1, err error)
func (*CacheReaderV1) Validate ¶
func (cr *CacheReaderV1) Validate() (err error)
type DataEntry ¶
type DataEntryV1 ¶
type DataReader ¶
type DataReader struct {
// contains filtered or unexported fields
}
func NewDataReader ¶
func NewDataReader( r io.ReadSeeker, encryption interfaces.IOWrapper, ) (dr *DataReader, err error)
func (*DataReader) CompressionType ¶
func (dr *DataReader) CompressionType() compression_type.CompressionType
func (*DataReader) HashFormatId ¶
func (dr *DataReader) HashFormatId() string
func (*DataReader) ReadAllEntries ¶
func (dr *DataReader) ReadAllEntries() (entries []DataEntry, err error)
func (*DataReader) ReadEntry ¶
func (dr *DataReader) ReadEntry() (entry DataEntry, err error)
func (*DataReader) ReadEntryAt ¶
func (dr *DataReader) ReadEntryAt( offset uint64, ) (entry DataEntry, err error)
func (*DataReader) Validate ¶
func (dr *DataReader) Validate() (err error)
type DataReaderV1 ¶
type DataReaderV1 struct {
// contains filtered or unexported fields
}
func NewDataReaderV1 ¶
func NewDataReaderV1( r io.ReadSeeker, encryption interfaces.IOWrapper, ) (dr *DataReaderV1, err error)
func (*DataReaderV1) CompressionType ¶
func (dr *DataReaderV1) CompressionType() compression_type.CompressionType
func (*DataReaderV1) Flags ¶
func (dr *DataReaderV1) Flags() uint16
func (*DataReaderV1) HashFormatId ¶
func (dr *DataReaderV1) HashFormatId() string
func (*DataReaderV1) ReadAllEntries ¶
func (dr *DataReaderV1) ReadAllEntries() (entries []DataEntryV1, err error)
func (*DataReaderV1) ReadEntry ¶
func (dr *DataReaderV1) ReadEntry() (entry DataEntryV1, err error)
func (*DataReaderV1) ReadEntryAt ¶
func (dr *DataReaderV1) ReadEntryAt( offset uint64, ) (entry DataEntryV1, err error)
func (*DataReaderV1) Validate ¶
func (dr *DataReaderV1) Validate() (err error)
type DataWriter ¶
type DataWriter struct {
// contains filtered or unexported fields
}
func NewDataWriter ¶
func NewDataWriter( w io.Writer, hashFormatId string, ct compression_type.CompressionType, encryption interfaces.IOWrapper, ) (dw *DataWriter, err error)
func (*DataWriter) Close ¶
func (dw *DataWriter) Close() ( checksum []byte, entries []DataEntry, err error, )
func (*DataWriter) WriteEntry ¶
func (dw *DataWriter) WriteEntry( entryHash []byte, data []byte, ) (err error)
type DataWriterV1 ¶
type DataWriterV1 struct {
// contains filtered or unexported fields
}
func NewDataWriterV1 ¶
func NewDataWriterV1( w io.Writer, hashFormatId string, ct compression_type.CompressionType, flags uint16, encryption interfaces.IOWrapper, ) (dw *DataWriterV1, err error)
func (*DataWriterV1) Close ¶
func (dw *DataWriterV1) Close() ( checksum []byte, entries []DataEntryV1, err error, )
func (*DataWriterV1) WriteDeltaEntry ¶
func (*DataWriterV1) WriteFullEntry ¶
func (dw *DataWriterV1) WriteFullEntry( entryHash []byte, data []byte, ) (err error)
type DeltaAlgorithm ¶
type DeltaAlgorithm interface {
// Id returns the byte identifier written to data file delta entries.
Id() byte
// Compute produces a delta that transforms base into target.
// The delta is written to the delta writer. base is a BlobReader
// because current compression/encryption does not support seeking;
// when BlobReader gains full ReadAtSeeker support, delta algorithms
// can use random access for better performance.
Compute(
base domain_interfaces.BlobReader,
baseSize int64,
target io.Reader,
delta io.Writer,
) error
// Apply reconstructs the original blob from a base and a delta.
Apply(
base domain_interfaces.BlobReader,
baseSize int64,
delta io.Reader,
target io.Writer,
) error
}
DeltaAlgorithm computes and applies binary deltas between blobs.
func DeltaAlgorithmForByte ¶
func DeltaAlgorithmForByte(b byte) (DeltaAlgorithm, error)
type DeltaAssignments ¶
type DeltaAssignments interface {
// Assign records that the blob at blobIndex should be delta-encoded
// against the blob at baseIndex. Both indices refer to the BlobSet.
// Not calling Assign for a given index means store it as a full entry.
Assign(blobIndex, baseIndex int)
// AssignError reports that the strategy encountered an error for the
// blob at blobIndex. The packer decides how to handle these.
AssignError(blobIndex int, err error)
}
DeltaAssignments receives base selection results. The packer passes this to the strategy, which calls Assign for each blob that should be delta-encoded.
type GearCDCMinHashComputer ¶
GearCDCMinHashComputer splits blob content into variable-length chunks using Gear hash CDC, hashes each chunk with FNV-1a, and computes a MinHash signature over the chunk hash set.
func (*GearCDCMinHashComputer) ComputeSignature ¶
func (c *GearCDCMinHashComputer) ComputeSignature( content io.Reader, ) ([]uint32, error)
func (*GearCDCMinHashComputer) SignatureLen ¶
func (c *GearCDCMinHashComputer) SignatureLen() int
type IndexEntryV1 ¶
type IndexReader ¶
type IndexReader struct {
// contains filtered or unexported fields
}
func NewIndexReader ¶
func (*IndexReader) EntryCount ¶
func (ir *IndexReader) EntryCount() uint64
func (*IndexReader) FanOut ¶
func (ir *IndexReader) FanOut() [256]uint64
func (*IndexReader) HashFormatId ¶
func (ir *IndexReader) HashFormatId() string
func (*IndexReader) LookupHash ¶
func (*IndexReader) ReadAllEntries ¶
func (ir *IndexReader) ReadAllEntries() (entries []IndexEntry, err error)
func (*IndexReader) Validate ¶
func (ir *IndexReader) Validate() (err error)
type IndexReaderV1 ¶
type IndexReaderV1 struct {
// contains filtered or unexported fields
}
func NewIndexReaderV1 ¶
func (*IndexReaderV1) EntryCount ¶
func (ir *IndexReaderV1) EntryCount() uint64
func (*IndexReaderV1) FanOut ¶
func (ir *IndexReaderV1) FanOut() [256]uint64
func (*IndexReaderV1) HashFormatId ¶
func (ir *IndexReaderV1) HashFormatId() string
func (*IndexReaderV1) LookupHash ¶
func (*IndexReaderV1) ReadAllEntries ¶
func (ir *IndexReaderV1) ReadAllEntries() (entries []IndexEntryV1, err error)
func (*IndexReaderV1) Validate ¶
func (ir *IndexReaderV1) Validate() (err error)
type LSHBandingSelector ¶
LSHBandingSelector finds similar blobs via Locality-Sensitive Hashing over MinHash signatures stored in BlobMetadata.Signature. It divides each signature into Bands bands of RowsPerBand rows and hashes each band into a bucket. Blobs sharing any bucket are candidates. The best candidate (highest estimated Jaccard) becomes the delta base.
func (*LSHBandingSelector) SelectBases ¶
func (s *LSHBandingSelector) SelectBases( blobs BlobSet, assignments DeltaAssignments, )
type SignatureComputer ¶
type SignatureComputer interface {
SignatureLen() int
ComputeSignature(content io.Reader) ([]uint32, error)
}
SignatureComputer produces a fixed-length similarity signature from blob content. Signatures from the same computer are comparable: the fraction of matching positions estimates content similarity.
func SignatureComputerForName ¶
func SignatureComputerForName( name string, params SignatureComputerParams, ) (SignatureComputer, error)
type SignatureComputerParams ¶
type SizeBasedSelector ¶
SizeBasedSelector groups blobs by similar size and assigns deltas within each group against the largest blob as the base.
TODO: Content-type base selection strategy — madder queries dodder for blob type info (binary flag), groups text blobs separately from binary.
TODO: Object-history base selection strategy — dodder provides related-object hash chains, packer deltas successive versions of the same object against each other.
func (*SizeBasedSelector) SelectBases ¶
func (s *SizeBasedSelector) SelectBases( blobs BlobSet, assignments DeltaAssignments, )
Source Files
¶
- base_selector.go
- base_selector_lsh.go
- base_selector_size.go
- cache_reader.go
- cache_v1.go
- cache_writer.go
- data_reader.go
- data_reader_v1.go
- data_writer.go
- data_writer_v1.go
- delta_algorithm.go
- delta_bsdiff.go
- gear_hash.go
- index_reader.go
- index_v1.go
- index_writer.go
- minhash.go
- selector_registry.go
- signature_computer.go
- signature_gear_cdc_minhash.go
- signature_registry.go
- types.go