inventory_archive

package
v0.0.0-...-8970ab1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 3, 2026 License: MIT Imports: 17 Imported by: 0

Documentation

Index

Constants

View Source
const (
	DataFileMagic  = "MIAR"
	IndexFileMagic = "MIAX"
	CacheFileMagic = "MIAC"

	DataFileVersion  uint16 = 0
	IndexFileVersion uint16 = 0
	CacheFileVersion uint16 = 0

	DataFileExtension  = ".inventory_archive-v0"
	IndexFileExtension = ".inventory_archive_index-v0"
	CacheFileName      = "index_cache-v0"

	CompressionByteNone byte = 0
	CompressionByteGzip byte = 1
	CompressionByteZlib byte = 2
	CompressionByteZstd byte = 3

	FlagHasEncryption uint16 = 1 << 0
)
View Source
const (
	DataFileVersionV1  uint16 = 1
	IndexFileVersionV1 uint16 = 1
	CacheFileVersionV1 uint16 = 1

	DataFileExtensionV1  = ".inventory_archive-v1"
	IndexFileExtensionV1 = ".inventory_archive_index-v1"
	CacheFileNameV1      = "index_cache-v1"

	EntryTypeFull  byte = 0x00
	EntryTypeDelta byte = 0x01

	FlagHasDeltas         uint16 = 1 << 0
	FlagReservedCrossArch uint16 = 1 << 1
	FlagHasEncryptionV1   uint16 = 1 << 2
)
View Source
const (
	DeltaAlgorithmByteBsdiff byte = 0
)

Variables

This section is empty.

Functions

func ByteToCompression

func ByteToCompression(
	b byte,
) (ct compression_type.CompressionType, err error)

func CompressionToByte

func CompressionToByte(
	ct compression_type.CompressionType,
) (b byte, err error)

func DeltaAlgorithmByteForName

func DeltaAlgorithmByteForName(name string) (byte, error)

func GearCDCChunks

func GearCDCChunks(
	data []byte,
	minChunkSize, maxChunkSize, avgChunkSize int,
) [][]byte

func MinHashJaccard

func MinHashJaccard(sigA, sigB []uint32) float64

func MinHashSignature

func MinHashSignature(features []uint32, k int) []uint32

func RegisterBaseSelector

func RegisterBaseSelector(
	name string,
	factory func(BaseSelectorParams) BaseSelector,
)

func RegisterDeltaAlgorithm

func RegisterDeltaAlgorithm(alg DeltaAlgorithm)

RegisterDeltaAlgorithm adds a DeltaAlgorithm to the registry.

func RegisterSignatureComputer

func RegisterSignatureComputer(
	name string,
	factory func(SignatureComputerParams) SignatureComputer,
)

func ToMap

func ToMap(entries []CacheEntry) map[string]CacheEntry

func ToMapV1

func ToMapV1(entries []CacheEntryV1) map[string]CacheEntryV1

func WriteCache

func WriteCache(
	w io.Writer,
	hashFormatId string,
	entries []CacheEntry,
) (checksum []byte, err error)

func WriteCacheV1

func WriteCacheV1(
	w io.Writer,
	hashFormatId string,
	entries []CacheEntryV1,
) (checksum []byte, err error)

func WriteIndex

func WriteIndex(
	w io.Writer,
	hashFormatId string,
	entries []IndexEntry,
) (checksum []byte, err error)

func WriteIndexV1

func WriteIndexV1(
	w io.Writer,
	hashFormatId string,
	entries []IndexEntryV1,
) (checksum []byte, err error)

Types

type BaseSelector

type BaseSelector interface {
	SelectBases(blobs BlobSet, assignments DeltaAssignments)
}

BaseSelector chooses which blobs become deltas and which become bases. It reads from blobs and writes results to assignments.

func BaseSelectorForName

func BaseSelectorForName(
	name string,
	params BaseSelectorParams,
) (BaseSelector, error)

type BaseSelectorParams

type BaseSelectorParams struct {
	Bands       int
	RowsPerBand int
	MinBlobSize uint64
	MaxBlobSize uint64
}

type BlobMetadata

type BlobMetadata struct {
	Id        domain_interfaces.MarklId
	Size      uint64
	Signature []uint32
}

BlobMetadata describes a blob candidate for delta packing.

type BlobSet

type BlobSet interface {
	Len() int
	At(index int) BlobMetadata
}

BlobSet provides indexed access to blob metadata without requiring all blobs in memory simultaneously.

type Bsdiff

type Bsdiff struct{}

Bsdiff implements DeltaAlgorithm using the bsdiff4 binary delta algorithm.

func (*Bsdiff) Apply

func (b *Bsdiff) Apply(
	base domain_interfaces.BlobReader,
	baseSize int64,
	delta io.Reader,
	target io.Writer,
) error

func (*Bsdiff) Compute

func (b *Bsdiff) Compute(
	base domain_interfaces.BlobReader,
	baseSize int64,
	target io.Reader,
	delta io.Writer,
) error

func (*Bsdiff) Id

func (b *Bsdiff) Id() byte

type CacheEntry

type CacheEntry struct {
	Hash            []byte
	ArchiveChecksum []byte
	Offset          uint64
	StoredSize      uint64
}

type CacheEntryV1

type CacheEntryV1 struct {
	Hash            []byte
	ArchiveChecksum []byte
	Offset          uint64
	StoredSize      uint64
	EntryType       byte
	BaseOffset      uint64
}

type CacheReader

type CacheReader struct {
	// contains filtered or unexported fields
}

func NewCacheReader

func NewCacheReader(
	r io.ReaderAt,
	totalSize int64,
	hashFormatId string,
) (cr *CacheReader, err error)

func (*CacheReader) EntryCount

func (cr *CacheReader) EntryCount() uint64

func (*CacheReader) HashFormatId

func (cr *CacheReader) HashFormatId() string

func (*CacheReader) ReadAllEntries

func (cr *CacheReader) ReadAllEntries() (entries []CacheEntry, err error)

func (*CacheReader) Validate

func (cr *CacheReader) Validate() (err error)

type CacheReaderV1

type CacheReaderV1 struct {
	// contains filtered or unexported fields
}

func NewCacheReaderV1

func NewCacheReaderV1(
	r io.ReaderAt,
	totalSize int64,
	hashFormatId string,
) (cr *CacheReaderV1, err error)

func (*CacheReaderV1) EntryCount

func (cr *CacheReaderV1) EntryCount() uint64

func (*CacheReaderV1) HashFormatId

func (cr *CacheReaderV1) HashFormatId() string

func (*CacheReaderV1) ReadAllEntries

func (cr *CacheReaderV1) ReadAllEntries() (entries []CacheEntryV1, err error)

func (*CacheReaderV1) Validate

func (cr *CacheReaderV1) Validate() (err error)

type DataEntry

type DataEntry struct {
	Hash        []byte
	LogicalSize uint64
	StoredSize  uint64
	Data        []byte
	Offset      uint64
}

type DataEntryV1

type DataEntryV1 struct {
	Hash        []byte
	EntryType   byte
	Encoding    byte
	LogicalSize uint64
	StoredSize  uint64 // For delta entries, this is the stored delta payload size
	Data        []byte
	Offset      uint64
	// Delta-specific fields (only set when EntryType == EntryTypeDelta)
	DeltaAlgorithm byte
	BaseHash       []byte
}

type DataReader

type DataReader struct {
	// contains filtered or unexported fields
}

func NewDataReader

func NewDataReader(
	r io.ReadSeeker,
	encryption interfaces.IOWrapper,
) (dr *DataReader, err error)

func (*DataReader) CompressionType

func (dr *DataReader) CompressionType() compression_type.CompressionType

func (*DataReader) HashFormatId

func (dr *DataReader) HashFormatId() string

func (*DataReader) ReadAllEntries

func (dr *DataReader) ReadAllEntries() (entries []DataEntry, err error)

func (*DataReader) ReadEntry

func (dr *DataReader) ReadEntry() (entry DataEntry, err error)

func (*DataReader) ReadEntryAt

func (dr *DataReader) ReadEntryAt(
	offset uint64,
) (entry DataEntry, err error)

func (*DataReader) Validate

func (dr *DataReader) Validate() (err error)

type DataReaderV1

type DataReaderV1 struct {
	// contains filtered or unexported fields
}

func NewDataReaderV1

func NewDataReaderV1(
	r io.ReadSeeker,
	encryption interfaces.IOWrapper,
) (dr *DataReaderV1, err error)

func (*DataReaderV1) CompressionType

func (dr *DataReaderV1) CompressionType() compression_type.CompressionType

func (*DataReaderV1) Flags

func (dr *DataReaderV1) Flags() uint16

func (*DataReaderV1) HashFormatId

func (dr *DataReaderV1) HashFormatId() string

func (*DataReaderV1) ReadAllEntries

func (dr *DataReaderV1) ReadAllEntries() (entries []DataEntryV1, err error)

func (*DataReaderV1) ReadEntry

func (dr *DataReaderV1) ReadEntry() (entry DataEntryV1, err error)

func (*DataReaderV1) ReadEntryAt

func (dr *DataReaderV1) ReadEntryAt(
	offset uint64,
) (entry DataEntryV1, err error)

func (*DataReaderV1) Validate

func (dr *DataReaderV1) Validate() (err error)

type DataWriter

type DataWriter struct {
	// contains filtered or unexported fields
}

func NewDataWriter

func NewDataWriter(
	w io.Writer,
	hashFormatId string,
	ct compression_type.CompressionType,
	encryption interfaces.IOWrapper,
) (dw *DataWriter, err error)

func (*DataWriter) Close

func (dw *DataWriter) Close() (
	checksum []byte,
	entries []DataEntry,
	err error,
)

func (*DataWriter) WriteEntry

func (dw *DataWriter) WriteEntry(
	entryHash []byte,
	data []byte,
) (err error)

type DataWriterV1

type DataWriterV1 struct {
	// contains filtered or unexported fields
}

func NewDataWriterV1

func NewDataWriterV1(
	w io.Writer,
	hashFormatId string,
	ct compression_type.CompressionType,
	flags uint16,
	encryption interfaces.IOWrapper,
) (dw *DataWriterV1, err error)

func (*DataWriterV1) Close

func (dw *DataWriterV1) Close() (
	checksum []byte,
	entries []DataEntryV1,
	err error,
)

func (*DataWriterV1) WriteDeltaEntry

func (dw *DataWriterV1) WriteDeltaEntry(
	entryHash []byte,
	deltaAlgorithm byte,
	baseHash []byte,
	logicalSize uint64,
	deltaPayload []byte,
) (err error)

func (*DataWriterV1) WriteFullEntry

func (dw *DataWriterV1) WriteFullEntry(
	entryHash []byte,
	data []byte,
) (err error)

type DeltaAlgorithm

type DeltaAlgorithm interface {
	// Id returns the byte identifier written to data file delta entries.
	Id() byte

	// Compute produces a delta that transforms base into target.
	// The delta is written to the delta writer. base is a BlobReader
	// because current compression/encryption does not support seeking;
	// when BlobReader gains full ReadAtSeeker support, delta algorithms
	// can use random access for better performance.
	Compute(
		base domain_interfaces.BlobReader,
		baseSize int64,
		target io.Reader,
		delta io.Writer,
	) error

	// Apply reconstructs the original blob from a base and a delta.
	Apply(
		base domain_interfaces.BlobReader,
		baseSize int64,
		delta io.Reader,
		target io.Writer,
	) error
}

DeltaAlgorithm computes and applies binary deltas between blobs.

func DeltaAlgorithmForByte

func DeltaAlgorithmForByte(b byte) (DeltaAlgorithm, error)

type DeltaAssignments

type DeltaAssignments interface {
	// Assign records that the blob at blobIndex should be delta-encoded
	// against the blob at baseIndex. Both indices refer to the BlobSet.
	// Not calling Assign for a given index means store it as a full entry.
	Assign(blobIndex, baseIndex int)

	// AssignError reports that the strategy encountered an error for the
	// blob at blobIndex. The packer decides how to handle these.
	AssignError(blobIndex int, err error)
}

DeltaAssignments receives base selection results. The packer passes this to the strategy, which calls Assign for each blob that should be delta-encoded.

type GearCDCMinHashComputer

type GearCDCMinHashComputer struct {
	AvgChunkSize int
	MinChunkSize int
	MaxChunkSize int
	K            int
}

GearCDCMinHashComputer splits blob content into variable-length chunks using Gear hash CDC, hashes each chunk with FNV-1a, and computes a MinHash signature over the chunk hash set.

func (*GearCDCMinHashComputer) ComputeSignature

func (c *GearCDCMinHashComputer) ComputeSignature(
	content io.Reader,
) ([]uint32, error)

func (*GearCDCMinHashComputer) SignatureLen

func (c *GearCDCMinHashComputer) SignatureLen() int

type IndexEntry

type IndexEntry struct {
	Hash       []byte
	PackOffset uint64
	StoredSize uint64
}

type IndexEntryV1

type IndexEntryV1 struct {
	Hash       []byte
	PackOffset uint64
	StoredSize uint64
	EntryType  byte
	BaseOffset uint64
}

type IndexReader

type IndexReader struct {
	// contains filtered or unexported fields
}

func NewIndexReader

func NewIndexReader(
	r io.ReaderAt,
	totalSize int64,
	hashFormatId string,
) (ir *IndexReader, err error)

func (*IndexReader) EntryCount

func (ir *IndexReader) EntryCount() uint64

func (*IndexReader) FanOut

func (ir *IndexReader) FanOut() [256]uint64

func (*IndexReader) HashFormatId

func (ir *IndexReader) HashFormatId() string

func (*IndexReader) LookupHash

func (ir *IndexReader) LookupHash(hash []byte) (
	packOffset uint64,
	storedSize uint64,
	found bool,
	err error,
)

func (*IndexReader) ReadAllEntries

func (ir *IndexReader) ReadAllEntries() (entries []IndexEntry, err error)

func (*IndexReader) Validate

func (ir *IndexReader) Validate() (err error)

type IndexReaderV1

type IndexReaderV1 struct {
	// contains filtered or unexported fields
}

func NewIndexReaderV1

func NewIndexReaderV1(
	r io.ReaderAt,
	totalSize int64,
	hashFormatId string,
) (ir *IndexReaderV1, err error)

func (*IndexReaderV1) EntryCount

func (ir *IndexReaderV1) EntryCount() uint64

func (*IndexReaderV1) FanOut

func (ir *IndexReaderV1) FanOut() [256]uint64

func (*IndexReaderV1) HashFormatId

func (ir *IndexReaderV1) HashFormatId() string

func (*IndexReaderV1) LookupHash

func (ir *IndexReaderV1) LookupHash(hash []byte) (
	packOffset uint64,
	storedSize uint64,
	entryType byte,
	baseOffset uint64,
	found bool,
	err error,
)

func (*IndexReaderV1) ReadAllEntries

func (ir *IndexReaderV1) ReadAllEntries() (entries []IndexEntryV1, err error)

func (*IndexReaderV1) Validate

func (ir *IndexReaderV1) Validate() (err error)

type LSHBandingSelector

type LSHBandingSelector struct {
	Bands       int
	RowsPerBand int
	MinBlobSize uint64
	MaxBlobSize uint64
}

LSHBandingSelector finds similar blobs via Locality-Sensitive Hashing over MinHash signatures stored in BlobMetadata.Signature. It divides each signature into Bands bands of RowsPerBand rows and hashes each band into a bucket. Blobs sharing any bucket are candidates. The best candidate (highest estimated Jaccard) becomes the delta base.

func (*LSHBandingSelector) SelectBases

func (s *LSHBandingSelector) SelectBases(
	blobs BlobSet,
	assignments DeltaAssignments,
)

type SignatureComputer

type SignatureComputer interface {
	SignatureLen() int
	ComputeSignature(content io.Reader) ([]uint32, error)
}

SignatureComputer produces a fixed-length similarity signature from blob content. Signatures from the same computer are comparable: the fraction of matching positions estimates content similarity.

func SignatureComputerForName

func SignatureComputerForName(
	name string,
	params SignatureComputerParams,
) (SignatureComputer, error)

type SignatureComputerParams

type SignatureComputerParams struct {
	SignatureLen int
	AvgChunkSize int
	MinChunkSize int
	MaxChunkSize int
}

type SizeBasedSelector

type SizeBasedSelector struct {
	MinBlobSize uint64
	MaxBlobSize uint64
	SizeRatio   float64
}

SizeBasedSelector groups blobs by similar size and assigns deltas within each group against the largest blob as the base.

TODO: Content-type base selection strategy — madder queries dodder for blob type info (binary flag), groups text blobs separately from binary.

TODO: Object-history base selection strategy — dodder provides related-object hash chains, packer deltas successive versions of the same object against each other.

func (*SizeBasedSelector) SelectBases

func (s *SizeBasedSelector) SelectBases(
	blobs BlobSet,
	assignments DeltaAssignments,
)

Source Files

  • base_selector.go
  • base_selector_lsh.go
  • base_selector_size.go
  • cache_reader.go
  • cache_v1.go
  • cache_writer.go
  • data_reader.go
  • data_reader_v1.go
  • data_writer.go
  • data_writer_v1.go
  • delta_algorithm.go
  • delta_bsdiff.go
  • gear_hash.go
  • index_reader.go
  • index_v1.go
  • index_writer.go
  • minhash.go
  • selector_registry.go
  • signature_computer.go
  • signature_gear_cdc_minhash.go
  • signature_registry.go
  • types.go

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL