• Masatake YAMATO's avatar
    check ADVICE of fadvise64_64 even if get_xip_page is given · b5beb1ca
    Masatake YAMATO authored
    
    I've written some test programs in ltp project.  During writing I met an
    problem which I cannot solve in user land.  So I wrote a patch for linux
    kernel.  Please, include this patch if acceptable.
    
    The test program tests the 4th parameter of fadvise64_64:
    
        long sys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice);
    
    My test case calls fadvise64_64 with invalid advice value and checks errno is
    set to EINVAL.  About the advice parameter man page says:
    
        ...
        Permissible values for advice include:
    
    	   POSIX_FADV_NORMAL
                      ...
    	   POSIX_FADV_SEQUENTIAL
                      ...
    	   POSIX_FADV_RANDOM
    		  ...
    	   POSIX_FADV_NOREUSE
                      ...
    	   POSIX_FADV_WILLNEED
                      ...
    	   POSIX_FADV_DONTNEED
    		  ...
        ERRORS
               ...
    	   EINVAL An invalid value was specified for advice.
    
    However, I got a bug report that the system call invocations
    in my test case returned 0 unexpectedly.
    
    I've inspected the kernel code:
    
        asmlinkage long sys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice)
        {
    	    struct file *file = fget(fd);
    	    struct address_space *mapping;
    	    struct backing_dev_info *bdi;
    	    loff_t endbyte;			/* inclusive */
    	    pgoff_t start_index;
    	    pgoff_t end_index;
    	    unsigned long nrpages;
    	    int ret = 0;
    
    	    if (!file)
    		    return -EBADF;
    
    	    if (S_ISFIFO(file->f_path.dentry->d_inode->i_mode)) {
    		    ret = -ESPIPE;
    		    goto out;
    	    }
    
    	    mapping = file->f_mapping;
    	    if (!mapping || len < 0) {
    		    ret = -EINVAL;
    		    goto out;
    	    }
    
    	    if (mapping->a_ops->get_xip_page)
    		    /* no bad return value, but ignore advice */
    		    goto out;
        ...
        out:
    	    fput(file);
    	    return ret;
        }
    
    I found the advice parameter is just ignored in the case
    mapping->a_ops->get_xip_page is given. This behavior is different from
    what is written on the man page. Is this o.k.?
    
    get_xip_page is given if CONFIG_EXT2_FS_XIP is true.
    Anyway I cannot find the easy way to detect get_xip_page
    field is given or CONFIG_EXT2_FS_XIP is true from the
    user space.
    
    I propose the following patch which checks the advice parameter
    even if get_xip_page is given.
    Signed-off-by: default avatarMasatake YAMATO <yamato@redhat.com>
    Acked-by: default avatarCarsten Otte <cotte@de.ibm.com>
    Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
    Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
    b5beb1ca
fadvise.c 2.86 KB
/*
 * mm/fadvise.c
 *
 * Copyright (C) 2002, Linus Torvalds
 *
 * 11Jan2003	akpm@digeo.com
 *		Initial version.
 */

#include <linux/kernel.h>
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/pagemap.h>
#include <linux/backing-dev.h>
#include <linux/pagevec.h>
#include <linux/fadvise.h>
#include <linux/writeback.h>
#include <linux/syscalls.h>

#include <asm/unistd.h>

/*
 * POSIX_FADV_WILLNEED could set PG_Referenced, and POSIX_FADV_NOREUSE could
 * deactivate the pages and clear PG_Referenced.
 */
asmlinkage long sys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice)
{
	struct file *file = fget(fd);
	struct address_space *mapping;
	struct backing_dev_info *bdi;
	loff_t endbyte;			/* inclusive */
	pgoff_t start_index;
	pgoff_t end_index;
	unsigned long nrpages;
	int ret = 0;

	if (!file)
		return -EBADF;

	if (S_ISFIFO(file->f_path.dentry->d_inode->i_mode)) {
		ret = -ESPIPE;
		goto out;
	}

	mapping = file->f_mapping;
	if (!mapping || len < 0) {
		ret = -EINVAL;
		goto out;
	}

	if (mapping->a_ops->get_xip_page) {
		switch (advice) {
		case POSIX_FADV_NORMAL:
		case POSIX_FADV_RANDOM:
		case POSIX_FADV_SEQUENTIAL:
		case POSIX_FADV_WILLNEED:
		case POSIX_FADV_NOREUSE:
		case POSIX_FADV_DONTNEED:
			/* no bad return value, but ignore advice */
			break;
		default:
			ret = -EINVAL;
		}
		goto out;
	}

	/* Careful about overflows. Len == 0 means "as much as possible" */
	endbyte = offset + len;
	if (!len || endbyte < len)
		endbyte = -1;
	else
		endbyte--;		/* inclusive */

	bdi = mapping->backing_dev_info;

	switch (advice) {
	case POSIX_FADV_NORMAL:
		file->f_ra.ra_pages = bdi->ra_pages;
		break;
	case POSIX_FADV_RANDOM:
		file->f_ra.ra_pages = 0;
		break;
	case POSIX_FADV_SEQUENTIAL:
		file->f_ra.ra_pages = bdi->ra_pages * 2;
		break;
	case POSIX_FADV_WILLNEED:
		if (!mapping->a_ops->readpage) {
			ret = -EINVAL;
			break;
		}

		/* First and last PARTIAL page! */
		start_index = offset >> PAGE_CACHE_SHIFT;
		end_index = endbyte >> PAGE_CACHE_SHIFT;

		/* Careful about overflow on the "+1" */
		nrpages = end_index - start_index + 1;
		if (!nrpages)
			nrpages = ~0UL;
		
		ret = force_page_cache_readahead(mapping, file,
				start_index,
				max_sane_readahead(nrpages));
		if (ret > 0)
			ret = 0;
		break;
	case POSIX_FADV_NOREUSE:
		break;
	case POSIX_FADV_DONTNEED:
		if (!bdi_write_congested(mapping->backing_dev_info))
			filemap_flush(mapping);

		/* First and last FULL page! */
		start_index = (offset+(PAGE_CACHE_SIZE-1)) >> PAGE_CACHE_SHIFT;
		end_index = (endbyte >> PAGE_CACHE_SHIFT);

		if (end_index >= start_index)
			invalidate_mapping_pages(mapping, start_index,
						end_index);
		break;
	default:
		ret = -EINVAL;
	}
out:
	fput(file);
	return ret;
}

#ifdef __ARCH_WANT_SYS_FADVISE64

asmlinkage long sys_fadvise64(int fd, loff_t offset, size_t len, int advice)
{
	return sys_fadvise64_64(fd, offset, len, advice);
}

#endif