What happens when a sparse file allocation fails?

time to read 18 min | 3531 words

Today I set out to figure out an answer to a very specific question. What happens at the OS level when you try to allocate disk space for a sparse file and there is no additional disk space?

Sparse files are a fairly advanced feature of file systems. They allow you to define a file whose size is 10GB, but that only takes 2GB of actual disk space. The rest is sparse (takes no disk space and on read will return just zeroes). The OS will automatically allocate additional disk space for you if you write to the sparse ranges.

This leads to an interesting question, what happens when you write to a sparse file if there is no additional disk space?

Let’s look at the problem on Linux first. We define a RAM disk with 32MB, like so:


sudo mkdir -p /mnt/ramdisk
sudo mount -t tmpfs -o size=32M tmpfs /mnt/ramdisk

And then we write the following code, which does the following (on a disk with just 32MB):

  • Create a file - write 32 MB to it
  • Punch a hole of 8 MB in the file (range is 12MB - 20MB)
  • Create another file - write 4 MB to it (there is now only 4MB available)
  • Open the original file and try to write to the range with the hole in it (requiring additional disk space allocation)


#define _GNU_SOURCE


#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <linux/falloc.h>
#include <errno.h>
#include <string.h>
#include <sys/random.h>


#define MB (1024 * 1024)


void write_all(int fd, const void *buf, size_t count)
{
    size_t bytes_written = 0;
    const char *ptr = (const char *)buf;


    while (bytes_written < count)
    {
        ssize_t result = write(fd, ptr + bytes_written, count - bytes_written);


        if (result < 0)
        {
            if (errno == EINTR)
                continue;


            fprintf(stderr, "Write error: errno = %d (%s)\n", errno, strerror(errno));
            exit(EXIT_FAILURE);
        }


        if (result == 0)
        {


            fprintf(stderr, "Zero len write is bad: errno = %d (%s)\n", errno, strerror(errno));
            exit(EXIT_FAILURE);
        }


        bytes_written += result;
    }
}


int main()
{
    int fd;
    char buffer[MB];


    unlink("/mnt/ramdisk/fullfile");
    unlink("/mnt/ramdisk/anotherfile");


    getrandom(buffer, MB, 0);


    ssize_t bytes_written;


    fd = open("/mnt/ramdisk/fullfile", O_RDWR | O_CREAT | O_TRUNC, 0644);
    if (fd == -1)
    {
        fprintf(stderr, "open full file: errno = %d (%s)\n", errno, strerror(errno));
        exit(EXIT_FAILURE);
    }
    for (int i = 0; i < 32; i++)
    {
        write_all(fd, buffer, MB);
    }
    close(fd);


    fd = open("/mnt/ramdisk/fullfile", O_RDWR);
    if (fd == -1)
    {
        fprintf(stderr, "reopen full file: errno = %d (%s)\n", errno, strerror(errno));
        exit(EXIT_FAILURE);
    }


    if (fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 12 * MB, 8 * MB) == -1)
    {
        fprintf(stderr, "fallocate failure: errno = %d (%s)\n", errno, strerror(errno));
        exit(EXIT_FAILURE);
    }
    close(fd);


    fd = open("/mnt/ramdisk/anotherfile", O_RDWR | O_CREAT | O_TRUNC, 0644);
    if (fd == -1)
    {
        fprintf(stderr, "open another file: errno = %d (%s)\n", errno, strerror(errno));
        exit(EXIT_FAILURE);
    }


    for (int i = 0; i < 4; i++)
    {
        write_all(fd, buffer, MB);
    }
    close(fd);


    // Write 8 MB to the hole in the first file
    fd = open("/mnt/ramdisk/fullfile", O_RDWR);
    if (fd == -1)
    {
        fprintf(stderr, "reopen full file 2: errno = %d (%s)\n", errno, strerror(errno));
        exit(EXIT_FAILURE);
    }


    // Seek to the start of the hole
    if (lseek(fd, 12 * MB, SEEK_SET) == -1)
    {
        fprintf(stderr, "seek full file: errno = %d (%s)\n", errno, strerror(errno));
        exit(EXIT_FAILURE);
    }
    for (int i = 0; i < 8; i++)
    {
        write_all(fd, buffer, MB);
    }
    close(fd);


    printf("Operations completed successfully.\n");
    return 0;
}

As expected, this code will fail on the 5th write (since there is no disk space to allocate in the disk). The error would be:


Write error: errno = 28 (No space left on device)

Here is what the file system reports:


$ du -h /mnt/ramdisk/*
4.0M    /mnt/ramdisk/anotherfile
28M     /mnt/ramdisk/fullfile


$ ll -h /mnt/ramdisk/
total 33M
drwxrwxrwt 2 root   root     80 Jan  9 10:43 ./
drwxr-xr-x 6 root   root   4.0K Jan  9 10:30 ../
-rw-r--r-- 1 ayende ayende 4.0M Jan  9 10:43 anotherfile
-rw-r--r-- 1 ayende ayende  32M Jan  9 10:43 fullfile

As you can see, we have a total of 32 MB of actual size reported, but ll is reporting that we actually have files bigger than that (because we have hole punching).

What would happen if we were to run this using memory-mapped I/O? Here is the code:


fd = open("/mnt/ramdisk/fullfile", O_RDWR);


char *mapped_memory = mmap(NULL, 32 * MB, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (mapped_memory == MAP_FAILED)
{
    fprintf(stderr, "fail mmap: errno = %d (%s)\n", errno, strerror(errno));
    exit(EXIT_FAILURE);
}


for (size_t i = (12 * MB); i < (20 * MB); i++)
{
    mapped_memory[i] = 1;
}
munmap(mapped_memory, 32 * MB);
close(fd);

This will lead to an interesting scenario. We need to allocate disk space for the memory, and we’ll do so (note that we are writing into the hole), and this code will fail with a segmentation fault.

It will fail in the loop, by the way, as part of the page fault to bring the memory in, the file system needs to allocate the disk space. If there is no such disk space, it will fail. The only way for the OS to behave in this case is to fail the write, which leads to a segmentation fault.

I also tried that on Windows. I defined a virtual disk like so:


$ diskpart
create vdisk file="D:\ramdisk.vhd" maximum=32
select vdisk file=D:\ramdisk.vhd"
attach vdisk
create partition primary
format fs=NTFS quick label=RAMDISK
assign letter=R
exit

This creates a 32MB disk and assigns it the letter R. Note that we are using NTFS, which has its own metadata, we have roughly 21MB or so of usable disk space to play with here.

Here is the Windows code that simulates the same behavior as the Linux code above:


#include <stdio.h>
#include <windows.h>


#define MB (1024 * 1024)


int main() {
    HANDLE hFile, hFile2;
    DWORD bytesWritten;
    LARGE_INTEGER fileSize, moveAmount;
    char* buffer = malloc(MB);
    
    int i;


        DeleteFileA("R:\\original_file.bin");
        DeleteFileA("R:\\another_file.bin");


    hFile = CreateFileA("R:/original_file.bin", GENERIC_READ | GENERIC_WRITE, 0, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
    if (hFile == INVALID_HANDLE_VALUE) {
        printf("Error creating file: %d\n", GetLastError());
        exit(__LINE__);
    }


    for (int i = 0; i < 20; i++) {
        if (!WriteFile(hFile, buffer, MB, &bytesWritten, NULL)) {
            fprintf(stderr, "WriteFile failed on iteration %d: %x\n", i, GetLastError());
            exit(__LINE__);
        }
        if (bytesWritten != MB) {
            fprintf(stderr, "Failed to write full buffer on iteration %d\n", i);
            exit(__LINE__);
        }
    }


    FILE_ZERO_DATA_INFORMATION zeroDataInfo;
    zeroDataInfo.FileOffset.QuadPart = 6 * MB; 
    zeroDataInfo.BeyondFinalZero.QuadPart = 18 * MB; 


    if (!DeviceIoControl(hFile, FSCTL_SET_SPARSE, NULL, 0, NULL, 0, NULL, NULL) || 
        !DeviceIoControl(hFile, FSCTL_SET_ZERO_DATA, &zeroDataInfo, sizeof(zeroDataInfo), NULL, 0, NULL, NULL)) {
                printf("Error setting zero data: %d\n", GetLastError());
        exit(__LINE__);
        }




    // Create another file of size 4 MB
    hFile2 = CreateFileA("R:/another_file.bin", GENERIC_READ | GENERIC_WRITE, 0, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
    if (hFile2 == INVALID_HANDLE_VALUE) {
        printf("Error creating second file: %d\n", GetLastError());
        exit(__LINE__);
    }




    for (int i = 0; i < 4; i++) {
        if (!WriteFile(hFile2, buffer, MB, &bytesWritten, NULL)) {
            fprintf(stderr, "WriteFile 2 failed on iteration %d: %x\n", i, GetLastError());
            exit(__LINE__);
        }
        if (bytesWritten != MB) {
            fprintf(stderr, "Failed to write full buffer 2 on iteration %d\n", i);
            exit(__LINE__);
        }
    }


        moveAmount.QuadPart = 12 * MB;
    SetFilePointerEx(hFile, moveAmount, NULL, FILE_BEGIN);
    for (i = 0; i < 8; i++) {
        if (!WriteFile(hFile, buffer, MB, &bytesWritten, NULL)) {
            printf("Error writing to file: %d\n", GetLastError());
            exit(__LINE__);
        }
    }


    return 0;
}

And that gives us the exact same behavior as in Linux. One of these writes will fail because there is no more disk space for it. What about when we use memory-mapped I/O?


HANDLE hMapFile = CreateFileMapping(hFile, NULL, PAGE_READWRITE, 0, 0, NULL);
if (hMapFile == NULL) {
    fprintf(stderr, "Could not create file mapping object: %x\n", GetLastError());
    exit(__LINE__);


}


char* lpMapAddress = MapViewOfFile(hMapFile, FILE_MAP_WRITE, 0, 0, 0);
if (lpMapAddress == NULL) {
    fprintf(stderr, "Could not map view of file: %x\n", GetLastError());
    exit(__LINE__);
}


for (i = 0; i < 20 * MB; i++) {
    ((char*)lpMapAddress)[i]++;
}

That results in the expected access violation:

I didn’t bother checking Mac or BSD, but I’m assuming that they behave in the same manner. I can’t conceive of anything else that they could reasonably do.

You can find my full source here.