Commit 223cdea4 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'for-linus' of git://neil.brown.name/md

* 'for-linus' of git://neil.brown.name/md: (53 commits)
  md/raid5 revise rules for when to update metadata during reshape
  md/raid5: minor code cleanups in make_request.
  md: remove CONFIG_MD_RAID_RESHAPE config option.
  md/raid5: be more careful about write ordering when reshaping.
  md: don't display meaningless values in sysfs files resync_start and sync_speed
  md/raid5: allow layout and chunksize to be changed on active array.
  md/raid5: reshape using largest of old and new chunk size
  md/raid5: prepare for allowing reshape to change layout
  md/raid5: prepare for allowing reshape to change chunksize.
  md/raid5: clearly differentiate 'before' and 'after' stripes during reshape.
  Documentation/md.txt update
  md: allow number of drives in raid5 to be reduced
  md/raid5: change reshape-progress measurement to cope with reshaping backwards.
  md: add explicit method to signal the end of a reshape.
  md/raid5: enhance raid5_size to work correctly with negative delta_disks
  md/raid5: drop qd_idx from r6_state
  md/raid6: move raid6 data processing to raid6_pq.ko
  md: raid5 run(): Fix max_degraded for raid level 4.
  md: 'array_size' sysfs attribute
  md: centralize ->array_sectors modifications
  ...
parents 31e6e2da c8f517c4
......@@ -164,15 +164,19 @@ All md devices contain:
raid_disks
a text file with a simple number indicating the number of devices
in a fully functional array. If this is not yet known, the file
will be empty. If an array is being resized (not currently
possible) this will contain the larger of the old and new sizes.
Some raid level (RAID1) allow this value to be set while the
array is active. This will reconfigure the array. Otherwise
it can only be set while assembling an array.
will be empty. If an array is being resized this will contain
the new number of devices.
Some raid levels allow this value to be set while the array is
active. This will reconfigure the array. Otherwise it can only
be set while assembling an array.
A change to this attribute will not be permitted if it would
reduce the size of the array. To reduce the number of drives
in an e.g. raid5, the array size must first be reduced by
setting the 'array_size' attribute.
chunk_size
This is the size if bytes for 'chunks' and is only relevant to
raid levels that involve striping (1,4,5,6,10). The address space
This is the size in bytes for 'chunks' and is only relevant to
raid levels that involve striping (0,4,5,6,10). The address space
of the array is conceptually divided into chunks and consecutive
chunks are striped onto neighbouring devices.
The size should be at least PAGE_SIZE (4k) and should be a power
......@@ -183,6 +187,20 @@ All md devices contain:
simply a number that is interpretted differently by different
levels. It can be written while assembling an array.
array_size
This can be used to artificially constrain the available space in
the array to be less than is actually available on the combined
devices. Writing a number (in Kilobytes) which is less than
the available size will set the size. Any reconfiguration of the
array (e.g. adding devices) will not cause the size to change.
Writing the word 'default' will cause the effective size of the
array to be whatever size is actually available based on
'level', 'chunk_size' and 'component_size'.
This can be used to reduce the size of the array before reducing
the number of devices in a raid4/5/6, or to support external
metadata formats which mandate such clipping.
reshape_position
This is either "none" or a sector number within the devices of
the array where "reshape" is up to. If this is set, the three
......@@ -207,6 +225,11 @@ All md devices contain:
about the array. It can be 0.90 (traditional format), 1.0, 1.1,
1.2 (newer format in varying locations) or "none" indicating that
the kernel isn't managing metadata at all.
Alternately it can be "external:" followed by a string which
is set by user-space. This indicates that metadata is managed
by a user-space program. Any device failure or other event that
requires a metadata update will cause array activity to be
suspended until the event is acknowledged.
resync_start
The point at which resync should start. If no resync is needed,
......
......@@ -18,8 +18,8 @@
#define BH_TRACE 0
#include <linux/module.h>
#include <linux/raid/md.h>
#include <linux/raid/xor.h>
#include <linux/jiffies.h>
#include <asm/xor.h>
/* The xor routines to use. */
......
......@@ -121,6 +121,7 @@ config MD_RAID10
config MD_RAID456
tristate "RAID-4/RAID-5/RAID-6 mode"
depends on BLK_DEV_MD
select MD_RAID6_PQ
select ASYNC_MEMCPY
select ASYNC_XOR
---help---
......@@ -151,34 +152,8 @@ config MD_RAID456
If unsure, say Y.
config MD_RAID5_RESHAPE
bool "Support adding drives to a raid-5 array"
depends on MD_RAID456
default y
---help---
A RAID-5 set can be expanded by adding extra drives. This
requires "restriping" the array which means (almost) every
block must be written to a different place.
This option allows such restriping to be done while the array
is online.
You will need mdadm version 2.4.1 or later to use this
feature safely. During the early stage of reshape there is
a critical section where live data is being over-written. A
crash during this time needs extra care for recovery. The
newer mdadm takes a copy of the data in the critical section
and will restore it, if necessary, after a crash.
The mdadm usage is e.g.
mdadm --grow /dev/md1 --raid-disks=6
to grow '/dev/md1' to having 6 disks.
Note: The array can only be expanded, not contracted.
There should be enough spares already present to make the new
array workable.
If unsure, say Y.
config MD_RAID6_PQ
tristate
config MD_MULTIPATH
tristate "Multipath I/O support"
......
......@@ -2,20 +2,21 @@
# Makefile for the kernel software RAID and LVM drivers.
#
dm-mod-objs := dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \
dm-mod-y += dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \
dm-ioctl.o dm-io.o dm-kcopyd.o dm-sysfs.o
dm-multipath-objs := dm-path-selector.o dm-mpath.o
dm-snapshot-objs := dm-snap.o dm-exception-store.o dm-snap-transient.o \
dm-multipath-y += dm-path-selector.o dm-mpath.o
dm-snapshot-y += dm-snap.o dm-exception-store.o dm-snap-transient.o \
dm-snap-persistent.o
dm-mirror-objs := dm-raid1.o
md-mod-objs := md.o bitmap.o
raid456-objs := raid5.o raid6algos.o raid6recov.o raid6tables.o \
dm-mirror-y += dm-raid1.o
md-mod-y += md.o bitmap.o
raid456-y += raid5.o
raid6_pq-y += raid6algos.o raid6recov.o raid6tables.o \
raid6int1.o raid6int2.o raid6int4.o \
raid6int8.o raid6int16.o raid6int32.o \
raid6altivec1.o raid6altivec2.o raid6altivec4.o \
raid6altivec8.o \
raid6mmx.o raid6sse1.o raid6sse2.o
hostprogs-y := mktables
hostprogs-y += mktables
# Note: link order is important. All raid personalities
# and must come before md.o, as they each initialise
......@@ -26,6 +27,7 @@ obj-$(CONFIG_MD_LINEAR) += linear.o
obj-$(CONFIG_MD_RAID0) += raid0.o
obj-$(CONFIG_MD_RAID1) += raid1.o
obj-$(CONFIG_MD_RAID10) += raid10.o
obj-$(CONFIG_MD_RAID6_PQ) += raid6_pq.o
obj-$(CONFIG_MD_RAID456) += raid456.o
obj-$(CONFIG_MD_MULTIPATH) += multipath.o
obj-$(CONFIG_MD_FAULTY) += faulty.o
......
......@@ -16,6 +16,7 @@
* wait if count gets too high, wake when it drops to half.
*/
#include <linux/blkdev.h>
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/slab.h>
......@@ -26,8 +27,8 @@
#include <linux/file.h>
#include <linux/mount.h>
#include <linux/buffer_head.h>
#include <linux/raid/md.h>
#include <linux/raid/bitmap.h>
#include "md.h"
#include "bitmap.h"
/* debug macros */
......@@ -111,9 +112,10 @@ static int bitmap_checkpage(struct bitmap *bitmap, unsigned long page, int creat
unsigned char *mappage;
if (page >= bitmap->pages) {
printk(KERN_ALERT
"%s: invalid bitmap page request: %lu (> %lu)\n",
bmname(bitmap), page, bitmap->pages-1);
/* This can happen if bitmap_start_sync goes beyond
* End-of-device while looking for a whole page.
* It is harmless.
*/
return -EINVAL;
}
......@@ -265,7 +267,6 @@ static mdk_rdev_t *next_active_rdev(mdk_rdev_t *rdev, mddev_t *mddev)
list_for_each_continue_rcu(pos, &mddev->disks) {
rdev = list_entry(pos, mdk_rdev_t, same_set);
if (rdev->raid_disk >= 0 &&
test_bit(In_sync, &rdev->flags) &&
!test_bit(Faulty, &rdev->flags)) {
/* this is a usable devices */
atomic_inc(&rdev->nr_pending);
......@@ -297,7 +298,7 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
+ size/512 > 0)
/* bitmap runs in to metadata */
goto bad_alignment;
if (rdev->data_offset + mddev->size*2
if (rdev->data_offset + mddev->dev_sectors
> rdev->sb_start + bitmap->offset)
/* data runs in to bitmap */
goto bad_alignment;
......@@ -570,7 +571,7 @@ static int bitmap_read_sb(struct bitmap *bitmap)
else if (le32_to_cpu(sb->version) < BITMAP_MAJOR_LO ||
le32_to_cpu(sb->version) > BITMAP_MAJOR_HI)
reason = "unrecognized superblock version";
else if (chunksize < PAGE_SIZE)
else if (chunksize < 512)
reason = "bitmap chunksize too small";
else if ((1 << ffz(~chunksize)) != chunksize)
reason = "bitmap chunksize not a power of 2";
......@@ -1306,6 +1307,9 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto
PRINTK(KERN_DEBUG "dec write-behind count %d/%d\n",
atomic_read(&bitmap->behind_writes), bitmap->max_write_behind);
}
if (bitmap->mddev->degraded)
/* Never clear bits or update events_cleared when degraded */
success = 0;
while (sectors) {
int blocks;
......@@ -1345,8 +1349,8 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto
}
}
int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks,
int degraded)
static int __bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks,
int degraded)
{
bitmap_counter_t *bmc;
int rv;
......@@ -1374,6 +1378,29 @@ int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks,
return rv;
}
int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks,
int degraded)
{
/* bitmap_start_sync must always report on multiples of whole
* pages, otherwise resync (which is very PAGE_SIZE based) will
* get confused.
* So call __bitmap_start_sync repeatedly (if needed) until
* At least PAGE_SIZE>>9 blocks are covered.
* Return the 'or' of the result.
*/
int rv = 0;
int blocks1;
*blocks = 0;
while (*blocks < (PAGE_SIZE>>9)) {
rv |= __bitmap_start_sync(bitmap, offset,
&blocks1, degraded);
offset += blocks1;
*blocks += blocks1;
}
return rv;
}
void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int aborted)
{
bitmap_counter_t *bmc;
......@@ -1443,6 +1470,8 @@ void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector)
wait_event(bitmap->mddev->recovery_wait,
atomic_read(&bitmap->mddev->recovery_active) == 0);
bitmap->mddev->curr_resync_completed = bitmap->mddev->curr_resync;
set_bit(MD_CHANGE_CLEAN, &bitmap->mddev->flags);
sector &= ~((1ULL << CHUNK_BLOCK_SHIFT(bitmap)) - 1);
s = 0;
while (s < sector && s < bitmap->mddev->resync_max_sectors) {
......
File moved
......@@ -62,7 +62,10 @@
#define ModeShift 5
#define MaxFault 50
#include <linux/raid/md.h>
#include <linux/blkdev.h>
#include <linux/raid/md_u.h>
#include "md.h"
#include <linux/seq_file.h>
static void faulty_fail(struct bio *bio, int error)
......@@ -280,6 +283,17 @@ static int reconfig(mddev_t *mddev, int layout, int chunk_size)
return 0;
}
static sector_t faulty_size(mddev_t *mddev, sector_t sectors, int raid_disks)
{
WARN_ONCE(raid_disks,
"%s does not support generic reshape\n", __func__);
if (sectors == 0)
return mddev->dev_sectors;
return sectors;
}
static int run(mddev_t *mddev)
{
mdk_rdev_t *rdev;
......@@ -298,7 +312,7 @@ static int run(mddev_t *mddev)
list_for_each_entry(rdev, &mddev->disks, same_set)
conf->rdev = rdev;
mddev->array_sectors = mddev->size * 2;
md_set_array_sectors(mddev, faulty_size(mddev, 0, 0));
mddev->private = conf;
reconfig(mddev, mddev->layout, -1);
......@@ -325,6 +339,7 @@ static struct mdk_personality faulty_personality =
.stop = stop,
.status = status,
.reconfig = reconfig,
.size = faulty_size,
};
static int __init raid_init(void)
......
......@@ -16,7 +16,11 @@
Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <linux/raid/linear.h>
#include <linux/blkdev.h>
#include <linux/raid/md_u.h>
#include <linux/seq_file.h>
#include "md.h"
#include "linear.h"
/*
* find which device holds a particular offset
......@@ -97,6 +101,16 @@ static int linear_congested(void *data, int bits)
return ret;
}
static sector_t linear_size(mddev_t *mddev, sector_t sectors, int raid_disks)
{
linear_conf_t *conf = mddev_to_conf(mddev);
WARN_ONCE(sectors || raid_disks,
"%s does not support generic reshape\n", __func__);
return conf->array_sectors;
}
static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
{
linear_conf_t *conf;
......@@ -135,8 +149,8 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
mddev->queue->max_sectors > (PAGE_SIZE>>9))
blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
disk->num_sectors = rdev->size * 2;
conf->array_sectors += rdev->size * 2;
disk->num_sectors = rdev->sectors;
conf->array_sectors += rdev->sectors;
cnt++;
}
......@@ -249,7 +263,7 @@ static int linear_run (mddev_t *mddev)
if (!conf)
return 1;
mddev->private = conf;
mddev->array_sectors = conf->array_sectors;
md_set_array_sectors(mddev, linear_size(mddev, 0, 0));
blk_queue_merge_bvec(mddev->queue, linear_mergeable_bvec);
mddev->queue->unplug_fn = linear_unplug;
......@@ -283,7 +297,7 @@ static int linear_add(mddev_t *mddev, mdk_rdev_t *rdev)
newconf->prev = mddev_to_conf(mddev);
mddev->private = newconf;
mddev->raid_disks++;
mddev->array_sectors = newconf->array_sectors;
md_set_array_sectors(mddev, linear_size(mddev, 0, 0));
set_capacity(mddev->gendisk, mddev->array_sectors);
return 0;
}
......@@ -381,6 +395,7 @@ static struct mdk_personality linear_personality =
.stop = linear_stop,
.status = linear_status,
.hot_add_disk = linear_add,
.size = linear_size,
};
static int __init linear_init (void)
......
#ifndef _LINEAR_H
#define _LINEAR_H
#include <linux/raid/md.h>
struct dev_info {
mdk_rdev_t *rdev;
sector_t num_sectors;
......
This diff is collapsed.
......@@ -15,21 +15,8 @@
#ifndef _MD_K_H
#define _MD_K_H
/* and dm-bio-list.h is not under include/linux because.... ??? */
#include "../../../drivers/md/dm-bio-list.h"
#ifdef CONFIG_BLOCK
#define LEVEL_MULTIPATH (-4)
#define LEVEL_LINEAR (-1)
#define LEVEL_FAULTY (-5)
/* we need a value for 'no level specified' and 0
* means 'raid0', so we need something else. This is
* for internal use only
*/
#define LEVEL_NONE (-1000000)
#define MaxSector (~(sector_t)0)
typedef struct mddev_s mddev_t;
......@@ -49,9 +36,9 @@ struct mdk_rdev_s
{
struct list_head same_set; /* RAID devices within the same set */
sector_t size; /* Device size (in blocks) */
sector_t sectors; /* Device size (in 512bytes sectors) */
mddev_t *mddev; /* RAID array if running */
long last_events; /* IO event timestamp */
int last_events; /* IO event timestamp */
struct block_device *bdev; /* block device handle */
......@@ -132,6 +119,8 @@ struct mddev_s
#define MD_CHANGE_CLEAN 1 /* transition to or from 'clean' */
#define MD_CHANGE_PENDING 2 /* superblock update in progress */
int suspended;
atomic_t active_io;
int ro;
struct gendisk *gendisk;
......@@ -155,8 +144,11 @@ struct mddev_s
char clevel[16];
int raid_disks;
int max_disks;
sector_t size; /* used size of component devices */
sector_t dev_sectors; /* used size of
* component devices */
sector_t array_sectors; /* exported array size */
int external_size; /* size managed
* externally */
__u64 events;
char uuid[16];
......@@ -172,6 +164,13 @@ struct mddev_s
struct mdk_thread_s *thread; /* management thread */
struct mdk_thread_s *sync_thread; /* doing resync or reconstruct */
sector_t curr_resync; /* last block scheduled */
/* As resync requests can complete out of order, we cannot easily track
* how much resync has been completed. So we occasionally pause until
* everything completes, then set curr_resync_completed to curr_resync.
* As such it may be well behind the real resync mark, but it is a value
* we are certain of.
*/
sector_t curr_resync_completed;
unsigned long resync_mark; /* a recent timestamp */
sector_t resync_mark_cnt;/* blocks written at resync_mark */
sector_t curr_mark_cnt; /* blocks scheduled now */
......@@ -315,8 +314,10 @@ struct mdk_personality
int (*spare_active) (mddev_t *mddev);
sector_t (*sync_request)(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster);
int (*resize) (mddev_t *mddev, sector_t sectors);
sector_t (*size) (mddev_t *mddev, sector_t sectors, int raid_disks);
int (*check_reshape) (mddev_t *mddev);
int (*start_reshape) (mddev_t *mddev);
void (*finish_reshape) (mddev_t *mddev);
int (*reconfig) (mddev_t *mddev, int layout, int chunk_size);
/* quiesce moves between quiescence states
* 0 - fully active
......@@ -324,6 +325,16 @@ struct mdk_personality
* others - reserved
*/
void (*quiesce) (mddev_t *mddev, int state);
/* takeover is used to transition an array from one
* personality to another. The new personality must be able
* to handle the data in the current layout.
* e.g. 2drive raid1 -> 2drive raid5
* ndrive raid5 -> degraded n+1drive raid6 with special layout
* If the takeover succeeds, a new 'private' structure is returned.
* This needs to be installed and then ->run used to activate the
* array.
*/
void *(*takeover) (mddev_t *mddev);
};
......@@ -400,3 +411,26 @@ static inline void safe_put_page(struct page *p)
#endif /* CONFIG_BLOCK */
#endif
extern int register_md_personality(struct mdk_personality *p);
extern int unregister_md_personality(struct mdk_personality *p);
extern mdk_thread_t * md_register_thread(void (*run) (mddev_t *mddev),
mddev_t *mddev, const char *name);
extern void md_unregister_thread(mdk_thread_t *thread);
extern void md_wakeup_thread(mdk_thread_t *thread);
extern void md_check_recovery(mddev_t *mddev);
extern void md_write_start(mddev_t *mddev, struct bio *bi);
extern void md_write_end(mddev_t *mddev);
extern void md_done_sync(mddev_t *mddev, int blocks, int ok);
extern void md_error(mddev_t *mddev, mdk_rdev_t *rdev);
extern void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
sector_t sector, int size, struct page *page);
extern void md_super_wait(mddev_t *mddev);
extern int sync_page_io(struct block_device *bdev, sector_t sector, int size,
struct page *page, int rw);
extern void md_do_sync(mddev_t *mddev);
extern void md_new_event(mddev_t *mddev);
extern int md_allow_write(mddev_t *mddev);
extern void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev);
extern void md_set_array_sectors(mddev_t *mddev, sector_t array_sectors);
......@@ -59,7 +59,7 @@ int main(int argc, char *argv[])
uint8_t v;
uint8_t exptbl[256], invtbl[256];
printf("#include \"raid6.h\"\n");
printf("#include <linux/raid/pq.h>\n");
/* Compute multiplication table */
printf("\nconst u8 __attribute__((aligned(256)))\n"
......@@ -76,6 +76,9 @@ int main(int argc, char *argv[])
printf("\t},\n");
}
printf("};\n");
printf("#ifdef __KERNEL__\n");
printf("EXPORT_SYMBOL(raid6_gfmul);\n");
printf("#endif\n");
/* Compute power-of-2 table (exponent) */
v = 1;
......@@ -92,6 +95,9 @@ int main(int argc, char *argv[])
}
}
printf("};\n");
printf("#ifdef __KERNEL__\n");
printf("EXPORT_SYMBOL(raid6_gfexp);\n");
printf("#endif\n");
/* Compute inverse table x^-1 == x^254 */
printf("\nconst u8 __attribute__((aligned(256)))\n"
......@@ -104,6 +110,9 @@ int main(int argc, char *argv[])
}
}
printf("};\n");
printf("#ifdef __KERNEL__\n");
printf("EXPORT_SYMBOL(raid6_gfinv);\n");
printf("#endif\n");
/* Compute inv(2^x + 1) (exponent-xor-inverse) table */
printf("\nconst u8 __attribute__((aligned(256)))\n"
......@@ -115,6 +124,9 @@ int main(int argc, char *argv[])
(j == 7) ? '\n' : ' ');
}
printf("};\n");
printf("#ifdef __KERNEL__\n");
printf("EXPORT_SYMBOL(raid6_gfexi);\n");
printf("#endif\n");
return 0;
}
......@@ -19,7 +19,11 @@
* Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <linux/raid/multipath.h>
#include <linux/blkdev.h>
#include <linux/raid/md_u.h>
#include <linux/seq_file.h>
#include "md.h"
#include "multipath.h"
#define MAX_WORK_PER_DISK 128
......@@ -402,6 +406,14 @@ static void multipathd (mddev_t *mddev)
spin_unlock_irqrestore(&conf->device_lock, flags);
}
static sector_t multipath_size(mddev_t *mddev, sector_t sectors, int raid_disks)
{
WARN_ONCE(sectors || raid_disks,
"%s does not support generic reshape\n", __func__);
return mddev->dev_sectors;
}
static int multipath_run (mddev_t *mddev)
{
multipath_conf_t *conf;
......@@ -498,7 +510,7 @@ static int multipath_run (mddev_t *mddev)
/*
* Ok, everything is just fine now
*/
mddev->array_sectors = mddev->size * 2;
md_set_array_sectors(mddev, multipath_size(mddev, 0, 0));
mddev->queue->unplug_fn = multipath_unplug;
mddev->queue->backing_dev_info.congested_fn = multipath_congested;
......@@ -543,6 +555,7 @@ static struct mdk_personality multipath_personality =
.error_handler = multipath_error,
.hot_add_disk = multipath_add_disk,
.hot_remove_disk= multipath_remove_disk,
.size = multipath_size,
};
static int __init multipath_init (void)
......
#ifndef _MULTIPATH_H
#define _MULTIPATH_H
#include <linux/raid/md.h>
struct multipath_info {
mdk_rdev_t *rdev;
};
......
......@@ -18,7 +18,10 @@
Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <linux/raid/raid0.h>
#include <linux/blkdev.h>
#include <linux/seq_file.h>
#include "md.h"
#include "raid0.h"
static void raid0_unplug(struct request_queue *q)
{
......@@ -73,16 +76,15 @@ static int create_strip_zones (mddev_t *mddev)
list_for_each_entry(rdev2, &mddev->disks, same_set) {
printk(KERN_INFO "raid0: comparing %s(%llu)",
bdevname(rdev1->bdev,b),
(unsigned long long)rdev1->size);
(unsigned long long)rdev1->sectors);
printk(KERN_INFO " with %s(%llu)\n",
bdevname(rdev2->bdev,b),
(unsigned long long)rdev2->size);
(unsigned long long)rdev2->sectors);
if (rdev2 == rdev1) {
printk(KERN_INFO "raid0: END\n");
break;
}
if (rdev2->size == rdev1->size)
{
if (rdev2->sectors == rdev1->sectors) {
/*
* Not unique, don't count it as a new
* group
......@@ -145,7 +147,7 @@ static int create_strip_zones (mddev_t *mddev)
mddev->queue->max_sectors > (PAGE_SIZE>>9))
blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
if (!smallest || (rdev1->size <smallest->size))
if (!smallest || (rdev1->sectors < smallest->sectors))
smallest = rdev1;
cnt++;
}
......@@ -155,10 +157,10 @@ static int create_strip_zones (mddev_t *mddev)
goto abort;
}
zone->nb_dev = cnt;
zone->sectors = smallest->size * cnt * 2;
zone->sectors = smallest->sectors * cnt;
zone->zone_start = 0;
current_start = smallest->size * 2;
current_start = smallest->sectors;
curr_zone_start = zone->sectors;
/* now do the other zones */
......@@ -177,29 +179,29 @@ static int create_strip_zones (mddev_t *mddev)
rdev = conf->strip_zone[0].dev[j];
printk(KERN_INFO "raid0: checking %s ...",
bdevname(rdev->bdev, b));
if (rdev->size > current_start / 2) {
printk(KERN_INFO " contained as device %d\n",
c);
zone->dev[c] = rdev;
c++;
if (!smallest || (rdev->size <smallest->size)) {
smallest = rdev;
printk(KERN_INFO " (%llu) is smallest!.\n",
(unsigned long long)rdev->size);
}
} else
if (rdev->sectors <= current_start) {
printk(KERN_INFO " nope.\n");
continue;
}
printk(KERN_INFO " contained as device %d\n", c);
zone->dev[c] = rdev;
c++;
if (!smallest || rdev->sectors < smallest->sectors) {
smallest = rdev;
printk(KERN_INFO " (%llu) is smallest!.\n",
(unsigned long long)rdev->sectors);
}
}
zone->nb_dev = c;
zone->sectors = (smallest->size * 2 - current_start) * c;
zone->sectors = (smallest->sectors - current_start) * c;
printk(KERN_INFO "raid0: zone->nb_dev: %d, sectors: %llu\n",
zone->nb_dev, (unsigned long long)zone->sectors);
zone->zone_start = curr_zone_start;
curr_zone_start += zone->sectors;
current_start = smallest->size * 2;
current_start = smallest->sectors;
printk(KERN_INFO "raid0: current zone start: %llu\n",
(unsigned long long)current_start);
}
......@@ -261,12 +263,25 @@ static int raid0_mergeable_bvec(struct request_queue *q,
return max;
}
static sector_t raid0_size(mddev_t *mddev, sector_t sectors, int raid_disks)
{
sector_t array_sectors = 0;
mdk_rdev_t *rdev;
WARN_ONCE(sectors || raid_disks,
"%s does not support generic reshape\n", __func__);
list_for_each_entry(rdev, &mddev->disks, same_set)
array_sectors += rdev->sectors;
return array_sectors;
}
static int raid0_run (mddev_t *mddev)
{
unsigned cur=0, i=0, nb_zone;
s64 sectors;
raid0_conf_t *conf;
mdk_rdev_t *rdev;
if (mddev->chunk_size == 0) {
printk(KERN_ERR "md/raid0: non-zero chunk size required.\n");
......@@ -291,16 +306,14 @@ static int raid0_run (mddev_t *mddev)
goto out_free_conf;
/* calculate array device size */
mddev->array_sectors = 0;
list_for_each_entry(rdev, &mddev->disks, same_set)
mddev->array_sectors += rdev->size * 2;
md_set_array_sectors(mddev, raid0_size(mddev, 0, 0));
printk(KERN_INFO "raid0 : md_size is %llu sectors.\n",
(unsigned long long)mddev->array_sectors);
printk(KERN_INFO "raid0 : conf->spacing is %llu sectors.\n",
(unsigned long long)conf->spacing);
{
sector_t s = mddev->array_sectors;
sector_t s = raid0_size(mddev, 0, 0);
sector_t space = conf->spacing;
int round;
conf->sector_shift = 0;
......@@ -509,6 +522,7 @@ static struct mdk_personality raid0_personality=
.run = raid0_run,
.stop = raid0_stop,
.status = raid0_status,
.size = raid0_size,
};
static int __init raid0_init (void)
......
#ifndef _RAID0_H
#define _RAID0_H
#include <linux/raid/md.h>
struct strip_zone
{
sector_t zone_start; /* Zone offset in md_dev (in sectors) */
......
......@@ -31,10 +31,13 @@
* Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include "dm-bio-list.h"
#include <linux/delay.h>
#include <linux/raid/raid1.h>
#include <linux/raid/bitmap.h>
#include <linux/blkdev.h>
#include <linux/seq_file.h>
#include "md.h"
#include "dm-bio-list.h"
#include "raid1.h"
#include "bitmap.h"
#define DEBUG 0
#if DEBUG
......@@ -1723,7 +1726,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
return 0;
}
max_sector = mddev->size << 1;
max_sector = mddev->dev_sectors;
if (sector_nr >= max_sector) {
/* If we aborted, we need to abort the
* sync on the 'current' bitmap chunk (there will
......@@ -1919,6 +1922,14 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
return nr_sectors;
}
static sector_t raid1_size(mddev_t *mddev, sector_t sectors, int raid_disks)
{
if (sectors)
return sectors;
return mddev->dev_sectors;
}
static int run(mddev_t *mddev)
{
conf_t *conf;
......@@ -2048,7 +2059,7 @@ static int run(mddev_t *mddev)
/*
* Ok, everything is just fine now
*/
mddev->array_sectors = mddev->size * 2;
md_set_array_sectors(mddev, raid1_size(mddev, 0, 0));
mddev->queue->unplug_fn = raid1_unplug;
mddev->queue->backing_dev_info.congested_fn = raid1_congested;
......@@ -2089,6 +2100,9 @@ static int stop(mddev_t *mddev)
/* need to kick something here to make sure I/O goes? */
}
raise_barrier(conf);
lower_barrier(conf);
md_unregister_thread(mddev->thread);
mddev->thread = NULL;
blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
......@@ -2110,15 +2124,17 @@ static int raid1_resize(mddev_t *mddev, sector_t sectors)
* any io in the removed space completes, but it hardly seems
* worth it.
*/
mddev->array_sectors = sectors;
md_set_array_sectors(mddev, raid1_size(mddev, sectors, 0));
if (mddev->array_sectors > raid1_size(mddev, sectors, 0))
return -EINVAL;
set_capacity(mddev->gendisk, mddev->array_sectors);
mddev->changed = 1;
if (mddev->array_sectors / 2 > mddev->size &&
if (sectors > mddev->dev_sectors &&
mddev->recovery_cp == MaxSector) {
mddev->recovery_cp = mddev->size << 1;
mddev->recovery_cp = mddev->dev_sectors;
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
}
mddev->size = mddev->array_sectors / 2;
mddev->dev_sectors = sectors;
mddev->resync_max_sectors = sectors;
return 0;
}
......@@ -2264,6 +2280,7 @@ static struct mdk_personality raid1_personality =
.spare_active = raid1_spare_active,
.sync_request = sync_request,
.resize = raid1_resize,
.size = raid1_size,
.check_reshape = raid1_reshape,
.quiesce = raid1_quiesce,
};
......
#ifndef _RAID1_H
#define _RAID1_H
#include <linux/raid/md.h>
typedef struct mirror_info mirror_info_t;
struct mirror_info {
......
......@@ -18,10 +18,13 @@
* Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include "dm-bio-list.h"
#include <linux/delay.h>
#include <linux/raid/raid10.h>
#include <linux/raid/bitmap.h>
#include <linux/blkdev.h>
#include <linux/seq_file.h>
#include "md.h"
#include "dm-bio-list.h"
#include "raid10.h"
#include "bitmap.h"
/*
* RAID10 provides a combination of RAID0 and RAID1 functionality.
......@@ -1695,7 +1698,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
return 0;
skipped:
max_sector = mddev->size << 1;
max_sector = mddev->dev_sectors;
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
max_sector = mddev->resync_max_sectors;
if (sector_nr >= max_sector) {
......@@ -2020,6 +2023,25 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
goto skipped;
}
static sector_t
raid10_size(mddev_t *mddev, sector_t sectors, int raid_disks)
{
sector_t size;
conf_t *conf = mddev_to_conf(mddev);
if (!raid_disks)
raid_disks = mddev->raid_disks;
if (!sectors)
sectors = mddev->dev_sectors;
size = sectors >> conf->chunk_shift;
sector_div(size, conf->far_copies);
size = size * raid_disks;
sector_div(size, conf->near_copies);
return size << conf->chunk_shift;
}
static int run(mddev_t *mddev)
{
conf_t *conf;
......@@ -2076,7 +2098,7 @@ static int run(mddev_t *mddev)
conf->far_offset = fo;
conf->chunk_mask = (sector_t)(mddev->chunk_size>>9)-1;
conf->chunk_shift = ffz(~mddev->chunk_size) - 9;
size = mddev->size >> (conf->chunk_shift-1);
size = mddev->dev_sectors >> conf->chunk_shift;
sector_div(size, fc);
size = size * conf->raid_disks;
sector_div(size, nc);
......@@ -2089,7 +2111,7 @@ static int run(mddev_t *mddev)
*/
stride += conf->raid_disks - 1;
sector_div(stride, conf->raid_disks);
mddev->size = stride << (conf->chunk_shift-1);
mddev->dev_sectors = stride << conf->chunk_shift;
if (fo)
stride = 1;
......@@ -2171,8 +2193,8 @@ static int run(mddev_t *mddev)
/*
* Ok, everything is just fine now
*/
mddev->array_sectors = size << conf->chunk_shift;
mddev->resync_max_sectors = size << conf->chunk_shift;
md_set_array_sectors(mddev, raid10_size(mddev, 0, 0));
mddev->resync_max_sectors = raid10_size(mddev, 0, 0);
mddev->queue->unplug_fn = raid10_unplug;
mddev->queue->backing_dev_info.congested_fn = raid10_congested;
......@@ -2208,6 +2230,9 @@ static int stop(mddev_t *mddev)
{
conf_t *conf = mddev_to_conf(mddev);
raise_barrier(conf, 0);
lower_barrier(conf);
md_unregister_thread(mddev->thread);
mddev->thread = NULL;
blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
......@@ -2255,6 +2280,7 @@ static struct mdk_personality raid10_personality =
.spare_active = raid10_spare_active,
.sync_request = sync_request,
.quiesce = raid10_quiesce,
.size = raid10_size,
};
static int __init raid_init(void)
......
#ifndef _RAID10_H
#define _RAID10_H
#include <linux/raid/md.h>
typedef struct mirror_info mirror_info_t;
struct mirror_info {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment