diff --git a/README.md b/README.md index 6e428368..15cce28b 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,7 @@ Also check out our CSI S3 driver (GeeseFS-based): https://github.com/yandex-clou | Read after write | + | + | - | + | + | | Partial writes | + | + | - | + | + | | Truncate | + | - | - | + | + | +| fallocate | + | - | - | - | - | | chmod/chown | Y | - | - | + | - | | fsync | + | - | - | + | + | | Symlinks | Y | - | - | + | + | diff --git a/internal/file.go b/internal/file.go index c1daef03..2ec33f68 100644 --- a/internal/file.go +++ b/internal/file.go @@ -205,20 +205,54 @@ func insertBuffer(buffers []*FileBuffer, pos int, add ...*FileBuffer) []*FileBuf } func (inode *Inode) addBuffer(offset uint64, data []byte, state int16, copyData bool) int64 { - allocated := int64(0) - - start := locateBuffer(inode.buffers, offset) dataLen := uint64(len(data)) endOffset := offset+dataLen // Remove intersecting parts as they're being overwritten - // If we're inserting a clean buffer, don't remove dirty ones + allocated := inode.removeRange(offset, dataLen, state) + + // Insert non-overlapping parts of the buffer + curOffset := offset + dataPtr := &BufferPointer{ + mem: data, + refs: 0, + } + start := locateBuffer(inode.buffers, offset) + pos := start + for ; pos < len(inode.buffers) && curOffset < endOffset; pos++ { + b := inode.buffers[pos] + if b.offset + b.length <= offset { + continue + } + if b.offset > curOffset { + // insert curOffset->min(b.offset,endOffset) + nextEnd := b.offset + if nextEnd > endOffset { + nextEnd = endOffset + } + allocated += inode.insertBuffer(pos, curOffset, data[curOffset-offset : nextEnd-offset], state, copyData, dataPtr) + } + curOffset = b.offset + b.length + } + if curOffset < endOffset { + // Insert curOffset->endOffset + allocated += inode.insertBuffer(pos, curOffset, data[curOffset-offset : ], state, copyData, dataPtr) + } + + return allocated +} + +// Remove buffers in range (offset..size) +func (inode *Inode) removeRange(offset, size uint64, state int16) (allocated int64) { + start := locateBuffer(inode.buffers, offset) + endOffset := offset+size for pos := start; pos < len(inode.buffers); pos++ { b := inode.buffers[pos] if b.offset >= endOffset { break } bufEnd := b.offset+b.length + // If we're inserting a clean buffer, don't remove dirty ones if (state >= BUF_DIRTY || b.state < BUF_DIRTY) && bufEnd > offset && endOffset > b.offset { if offset <= b.offset { if endOffset >= bufEnd { @@ -284,35 +318,35 @@ func (inode *Inode) addBuffer(offset uint64, data []byte, state int16, copyData } } } + return +} - // Insert non-overlapping parts of the buffer - curOffset := offset - dataPtr := &BufferPointer{ - mem: data, - refs: 0, - } - pos := start - for ; pos < len(inode.buffers) && curOffset < endOffset; pos++ { - b := inode.buffers[pos] - if b.offset + b.length <= offset { - continue - } - if b.offset > curOffset { - // insert curOffset->min(b.offset,endOffset) - nextEnd := b.offset - if nextEnd > endOffset { - nextEnd = endOffset - } - allocated += inode.insertBuffer(pos, curOffset, data[curOffset-offset : nextEnd-offset], state, copyData, dataPtr) - } - curOffset = b.offset + b.length - } - if curOffset < endOffset { - // Insert curOffset->endOffset - allocated += inode.insertBuffer(pos, curOffset, data[curOffset-offset : ], state, copyData, dataPtr) +func (inode *Inode) zeroRange(offset, size uint64) (bool, int64) { + // Check if it's already zeroed + pos := locateBuffer(inode.buffers, offset) + if pos < len(inode.buffers) && inode.buffers[pos].zero && + inode.buffers[pos].offset == offset && inode.buffers[pos].length == size { + return false, 0 } - return allocated + // Remove intersecting parts as they're being overwritten + allocated := inode.removeRange(offset, size, BUF_DIRTY) + + // Insert a zero buffer + pos = locateBuffer(inode.buffers, offset) + inode.buffers = insertBuffer(inode.buffers, pos, &FileBuffer{ + offset: offset, + dirtyID: atomic.AddUint64(&inode.fs.bufferPool.curDirtyID, 1), + state: BUF_DIRTY, + onDisk: false, + zero: true, + recency: 0, + length: size, + data: nil, + ptr: nil, + }) + + return true, allocated } func (inode *Inode) ResizeUnlocked(newSize uint64, zeroFill bool, finalizeFlushed bool) { diff --git a/internal/goofys.go b/internal/goofys.go index 19204768..95deb9a9 100644 --- a/internal/goofys.go +++ b/internal/goofys.go @@ -1805,3 +1805,85 @@ func (fs *Goofys) SyncFS(parent *Inode) (err error) { } return } + +const ( + FALLOC_FL_KEEP_SIZE = uint32(0x01) + FALLOC_FL_PUNCH_HOLE = uint32(0x02) + FALLOC_FL_COLLAPSE_RANGE = uint32(0x08) + FALLOC_FL_ZERO_RANGE = uint32(0x10) + FALLOC_FL_INSERT_RANGE = uint32(0x20) +) + +func (fs *Goofys) Fallocate( + ctx context.Context, + op *fuseops.FallocateOp) (err error) { + + atomic.AddInt64(&fs.stats.metadataWrites, 1) + + fs.mu.RLock() + inode := fs.getInodeOrDie(op.Inode) + fs.mu.RUnlock() + + if atomic.LoadInt32(&inode.refreshed) == -1 { + // Stale inode + return syscall.ESTALE + } + + if op.Length == 0 { + return nil + } + + inode.mu.Lock() + + modified := false + + if (op.Mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE)) != 0 { + // Insert range/remove range operations are not supported + // It's possible to support them, but it will require buffer remapping support. + // I.e. if you open a file, insert/collapse a range and then read past the + // affected offset you should get data from the old offset! And it's probably + // wise to use UploadPartCopy with the corresponding ranges to optimize copying + // on the server side in this case. Some day we might even be able to preserve + // multipart part IDs if cutting a non-finalized upload across part boundaries, + // but now we can't - part offsets are always fixed. + inode.mu.Unlock() + return syscall.ENOTSUP + } + + if op.Offset+op.Length > inode.Attributes.Size { + if (op.Mode & FALLOC_FL_KEEP_SIZE) == 0 { + // Resize + if op.Offset+op.Length > fs.getMaxFileSize() { + // File size too large + log.Warnf( + "Maximum file size exceeded when trying to extend %v to %v bytes using fallocate", + inode.FullName(), op.Offset+op.Length, + ) + inode.mu.Unlock() + return syscall.EFBIG + } + inode.ResizeUnlocked(op.Offset+op.Length, true, true) + modified = true + } else { + if op.Offset > inode.Attributes.Size { + op.Offset = inode.Attributes.Size + } + op.Length = inode.Attributes.Size-op.Offset + } + } + + if (op.Mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) != 0 { + // Zero fill + mod, _ := inode.zeroRange(op.Offset, op.Length) + modified = modified || mod + } + + if modified && inode.CacheState == ST_CACHED { + inode.SetCacheState(ST_MODIFIED) + inode.fs.WakeupFlusher() + } + + inode.mu.Unlock() + + return +}