Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
9cb6bc6
Add sparse file infrastructure for partial downloads
claude Nov 7, 2025
f0816d8
Implement sparse file partial download for random I/O optimization
claude Nov 7, 2025
bb6c98d
Simplify ByteRangeMap to use chunk-based tracking
xinyangge Nov 7, 2025
bcdd6e4
Fix sparse file bug: enable continuous on-demand chunk downloads
xinyangge Nov 7, 2025
5b4d9df
Add sparse file configuration to params.yaml schema
xinyangge Nov 7, 2025
4269678
Fix sparse file compilation errors
xinyangge Nov 8, 2025
035ab99
Fix sparse file cache validation to prevent premature cache handle cl…
xinyangge Nov 8, 2025
ff4b64f
Fix LRU cache size tracking for sparse file chunk downloads
xinyangge Nov 8, 2025
2258f1c
Handle sparse file cache size limit by resetting to single chunk
xinyangge Nov 8, 2025
e45b31f
Use O_DIRECT and in-memory buffering to eliminate double page cache
xinyangge Nov 8, 2025
65c7b47
Simplify CacheHandle by removing lazy Job initialization
xinyangge Nov 8, 2025
961cdfc
Use MaxUint64 sentinel for sparse file Offset to simplify checks
xinyangge Nov 8, 2025
5416e84
Revert "Simplify CacheHandle by removing lazy Job initialization"
xinyangge Nov 8, 2025
ce4c3a2
Simplify CacheHandle by removing jobManager, bucket, and object fields
xinyangge Nov 8, 2025
ee4ecaf
Remove obsolete comment about sparse file Offset sentinel
xinyangge Nov 8, 2025
b6eaf64
Replace fileCacheConfig with sparseFileChunkSizeMb in CacheHandle
xinyangge Nov 8, 2025
f841392
Use sequential-read-size-mb for sparse file chunks instead of separat…
xinyangge Nov 8, 2025
135a27b
Remove empty resolveSparseFileConfig function
xinyangge Nov 8, 2025
90e3109
Remove sparseFileChunkSizeMb field from CacheHandle and use job's Seq…
xinyangge Nov 8, 2025
fffbd1b
Refactor CacheHandler to store isSparse bool instead of full FileCach…
xinyangge Nov 8, 2025
8c84cb5
Pass isSparse bool to NewCacheHandler instead of whole FileCacheConfig
xinyangge Nov 8, 2025
debc995
Consolidate duplicate sparse file download logic in CacheHandle.Read
xinyangge Nov 8, 2025
e9d9429
Fix comment to clarify fileDownloadJob can be nil in either case
xinyangge Nov 8, 2025
7d8562f
Replace hardcoded oDirectFlag constant with syscall.O_DIRECT
xinyangge Nov 8, 2025
adc2c10
Remove in-memory sparse chunk caching from CacheHandle.Read
xinyangge Nov 8, 2025
dd87a22
Remove duplicate range check from DownloadRange
xinyangge Nov 8, 2025
91a4905
Remove redundant sparse mode check from DownloadRange
xinyangge Nov 8, 2025
68334c8
Remove unnecessary nil check for DownloadedRanges in DownloadRange
xinyangge Nov 8, 2025
57cbc2c
Remove unused sparseChunkData fields and change DownloadRange to retu…
xinyangge Nov 8, 2025
bd3df3b
Remove LRU cache size accounting for sparse file range downloads
xinyangge Nov 8, 2025
6df727a
Simplify DownloadRange by removing redundant FileInfo lookup
xinyangge Nov 9, 2025
5c31264
Use GetJob instead of CreateJobIfNotExists in GetCacheHandle
xinyangge Nov 9, 2025
6349c2a
Simplify DownloadRange by removing O_DIRECT complexity
xinyangge Nov 9, 2025
933be33
Add UpdateSize method to LRU cache for sparse file size tracking
xinyangge Nov 9, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3,218 changes: 1,943 additions & 1,275 deletions cfg/config.go

Large diffs are not rendered by default.

7 changes: 7 additions & 0 deletions cfg/params.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,13 @@ params:
usage: "Enable parallel downloads."
default: false

- config-path: "file-cache.enable-sparse-file"
flag-name: "file-cache-enable-sparse-file"
type: "bool"
usage: "Enable sparse file mode for random I/O optimization with partial downloads."
default: false
hide-flag: true

- config-path: "file-cache.exclude-regex"
flag-name: "file-cache-exclude-regex"
type: "string"
Expand Down
180 changes: 180 additions & 0 deletions internal/cache/data/byte_range.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
// Copyright 2025 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package data

import (
"sort"
"sync"
)

const DefaultChunkSize = 1024 * 1024 // 1MB

// ByteRange represents a contiguous range of bytes [Start, End)
type ByteRange struct {
Start uint64
End uint64 // exclusive
}

// ByteRangeMap tracks which chunk-aligned byte ranges have been downloaded in a sparse file.
// It uses fixed-size chunks (1MB by default) for simplified tracking and assumes all downloads
// are aligned to chunk boundaries.
type ByteRangeMap struct {
mu sync.RWMutex
chunkSize uint64
chunks map[uint64]bool // chunk ID -> downloaded
}

// NewByteRangeMap creates a new empty ByteRangeMap with 1MB chunks
func NewByteRangeMap() *ByteRangeMap {
return &ByteRangeMap{
chunkSize: DefaultChunkSize,
chunks: make(map[uint64]bool),
}
}

// chunkID returns the chunk ID for a given byte offset
func (brm *ByteRangeMap) chunkID(offset uint64) uint64 {
return offset / brm.chunkSize
}

// AddRange marks all chunks in the range [start, end) as downloaded.
// Returns the total number of new bytes added (chunks * chunkSize).
func (brm *ByteRangeMap) AddRange(start, end uint64) uint64 {
brm.mu.Lock()
defer brm.mu.Unlock()

if start >= end {
return 0
}

startChunk := brm.chunkID(start)
endChunk := brm.chunkID(end - 1) // inclusive end

bytesAdded := uint64(0)
for chunkID := startChunk; chunkID <= endChunk; chunkID++ {
if !brm.chunks[chunkID] {
brm.chunks[chunkID] = true
bytesAdded += brm.chunkSize
}
}

return bytesAdded
}

// ContainsRange checks if all chunks covering [start, end) have been downloaded
func (brm *ByteRangeMap) ContainsRange(start, end uint64) bool {
brm.mu.RLock()
defer brm.mu.RUnlock()

if start >= end {
return true
}

startChunk := brm.chunkID(start)
endChunk := brm.chunkID(end - 1)

for chunkID := startChunk; chunkID <= endChunk; chunkID++ {
if !brm.chunks[chunkID] {
return false
}
}
return true
}

// GetMissingRanges returns chunk-aligned ranges that haven't been downloaded.
// Each returned range will be exactly chunkSize bytes.
func (brm *ByteRangeMap) GetMissingRanges(start, end uint64) []ByteRange {
brm.mu.RLock()
defer brm.mu.RUnlock()

if start >= end {
return nil
}

var missing []ByteRange
startChunk := brm.chunkID(start)
endChunk := brm.chunkID(end - 1)

for chunkID := startChunk; chunkID <= endChunk; chunkID++ {
if !brm.chunks[chunkID] {
chunkStart := chunkID * brm.chunkSize
chunkEnd := chunkStart + brm.chunkSize
missing = append(missing, ByteRange{
Start: chunkStart,
End: chunkEnd,
})
}
}

return missing
}

// TotalBytes returns the total number of bytes downloaded (number of chunks * chunk size)
func (brm *ByteRangeMap) TotalBytes() uint64 {
brm.mu.RLock()
defer brm.mu.RUnlock()
return uint64(len(brm.chunks)) * brm.chunkSize
}

// Clear removes all chunk records
func (brm *ByteRangeMap) Clear() {
brm.mu.Lock()
defer brm.mu.Unlock()
brm.chunks = make(map[uint64]bool)
}

// Ranges returns all downloaded ranges as chunk-aligned ByteRanges (for debugging/testing)
func (brm *ByteRangeMap) Ranges() []ByteRange {
brm.mu.RLock()
defer brm.mu.RUnlock()

if len(brm.chunks) == 0 {
return nil
}

// Collect and sort chunk IDs
chunkIDs := make([]uint64, 0, len(brm.chunks))
for id := range brm.chunks {
chunkIDs = append(chunkIDs, id)
}
sort.Slice(chunkIDs, func(i, j int) bool {
return chunkIDs[i] < chunkIDs[j]
})

// Build ranges by merging consecutive chunks
var ranges []ByteRange
start := chunkIDs[0]
prev := start

for i := 1; i < len(chunkIDs); i++ {
if chunkIDs[i] != prev+1 {
// Gap found, emit current range
ranges = append(ranges, ByteRange{
Start: start * brm.chunkSize,
End: (prev + 1) * brm.chunkSize,
})
start = chunkIDs[i]
}
prev = chunkIDs[i]
}

// Emit final range
ranges = append(ranges, ByteRange{
Start: start * brm.chunkSize,
End: (prev + 1) * brm.chunkSize,
})

return ranges
}
Loading