• Mel Gorman's avatar
    mm: mark the correct zone as full when scanning zonelists · 5bead2a0
    Mel Gorman authored
    
    The iterator for_each_zone_zonelist() uses a struct zoneref *z cursor when
    scanning zonelists to keep track of where in the zonelist it is.  The
    zoneref that is returned corresponds to the the next zone that is to be
    scanned, not the current one.  It was intended to be treated as an opaque
    list.
    
    When the page allocator is scanning a zonelist, it marks elements in the
    zonelist corresponding to zones that are temporarily full.  As the
    zonelist is being updated, it uses the cursor here;
    
      if (NUMA_BUILD)
            zlc_mark_zone_full(zonelist, z);
    
    This is intended to prevent rescanning in the near future but the zoneref
    cursor does not correspond to the zone that has been found to be full.
    This is an easy misunderstanding to make so this patch corrects the
    problem by changing zoneref cursor to be the current zone being scanned
    instead of the next one.
    Signed-off-by: default avatarMel Gorman <mel@csn.ul.ie>
    Cc: Andy Whitcroft <apw@shadowen.org>
    Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
    Cc: <stable@kernel.org>		[2.6.26.x]
    Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
    Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
    5bead2a0
mmzone.c 1.47 KB
/*
 * linux/mm/mmzone.c
 *
 * management codes for pgdats and zones.
 */


#include <linux/stddef.h>
#include <linux/mmzone.h>
#include <linux/module.h>

struct pglist_data *first_online_pgdat(void)
{
	return NODE_DATA(first_online_node);
}

struct pglist_data *next_online_pgdat(struct pglist_data *pgdat)
{
	int nid = next_online_node(pgdat->node_id);

	if (nid == MAX_NUMNODES)
		return NULL;
	return NODE_DATA(nid);
}

/*
 * next_zone - helper magic for for_each_zone()
 */
struct zone *next_zone(struct zone *zone)
{
	pg_data_t *pgdat = zone->zone_pgdat;

	if (zone < pgdat->node_zones + MAX_NR_ZONES - 1)
		zone++;
	else {
		pgdat = next_online_pgdat(pgdat);
		if (pgdat)
			zone = pgdat->node_zones;
		else
			zone = NULL;
	}
	return zone;
}

static inline int zref_in_nodemask(struct zoneref *zref, nodemask_t *nodes)
{
#ifdef CONFIG_NUMA
	return node_isset(zonelist_node_idx(zref), *nodes);
#else
	return 1;
#endif /* CONFIG_NUMA */
}

/* Returns the next zone at or below highest_zoneidx in a zonelist */
struct zoneref *next_zones_zonelist(struct zoneref *z,
					enum zone_type highest_zoneidx,
					nodemask_t *nodes,
					struct zone **zone)
{
	/*
	 * Find the next suitable zone to use for the allocation.
	 * Only filter based on nodemask if it's set
	 */
	if (likely(nodes == NULL))
		while (zonelist_zone_idx(z) > highest_zoneidx)
			z++;
	else
		while (zonelist_zone_idx(z) > highest_zoneidx ||
				(z->zone && !zref_in_nodemask(z, nodes)))
			z++;

	*zone = zonelist_zone(z);
	return z;
}