Changes - j0ke.net Open Build Service

Changes of Revision 2

[-] [+]	Changed	mdadm.changes
@@ -1,4 +1,9 @@ ------------------------------------------------------------------- +Sat May 12 16:21:24 UTC 2012 - cs@linux-administrator.com + +- update to upstream release 3.2.4 + +------------------------------------------------------------------- Thu Oct 6 02:16:35 UTC 2011 - nfbrown@suse.com - mkinitrd-setup.sh -- add "AUTO -all" to initrd mdadm.conf 1 @@ -1,4 +1,9 @@ 2 ------------------------------------------------------------------- 3 +Sat May 12 16:21:24 UTC 2012 - cs@linux-administrator.com 4 + 5 +- update to upstream release 3.2.4 6 + 7 +------------------------------------------------------------------- 8 Thu Oct 6 02:16:35 UTC 2011 - nfbrown@suse.com 9 10 - mkinitrd-setup.sh -- add "AUTO -all" to initrd mdadm.conf 11
[-] [+]	Changed	mdadm.spec ^
@@ -19,7 +19,7 @@ Name: mdadm -Version: 3.2.2 +Version: 3.2.4 Release: 1 BuildRequires: sgmltool PreReq: %fillup_prereq %insserv_prereq @@ -37,8 +37,9 @@ Source4: boot.md Source5: mkinitrd-setup.sh Source6: mkinitrd-boot.sh -Patch0: mdadm-3.2.2_git3b1dab1bdbda0 -Patch1: auto-line.fix +# disabled with update to 3.2.4 +#Patch0: mdadm-3.2.2_git3b1dab1bdbda0 +#Patch1: auto-line.fix %description Mdadm is a program that can be used to control Linux md devices. It is @@ -53,8 +54,6 @@ %prep %setup -q -a1 -%patch0 -p1 -%patch1 -p1 %build %{suse_update_config -f} 31 1 @@ -19,7 +19,7 @@ 2 3 4 Name: mdadm 5 -Version: 3.2.2 6 +Version: 3.2.4 7 Release: 1 8 BuildRequires: sgmltool 9 PreReq: %fillup_prereq %insserv_prereq 10 @@ -37,8 +37,9 @@ 11 Source4: boot.md 12 Source5: mkinitrd-setup.sh 13 Source6: mkinitrd-boot.sh 14 -Patch0: mdadm-3.2.2_git3b1dab1bdbda0 15 -Patch1: auto-line.fix 16 +# disabled with update to 3.2.4 17 +#Patch0: mdadm-3.2.2_git3b1dab1bdbda0 18 +#Patch1: auto-line.fix 19 20 %description 21 Mdadm is a program that can be used to control Linux md devices. It is 22 @@ -53,8 +54,6 @@ 23 24 %prep 25 %setup -q -a1 26 -%patch0 -p1 27 -%patch1 -p1 28 29 %build 30 %{suse_update_config -f} 31
[-] [+]	Deleted	auto-line.fix ^
@@ -1,125 +0,0 @@ -From b451aa4846c5ccca5447a6b6d45e5623b8c8e961 Mon Sep 17 00:00:00 2001 -From: NeilBrown <neilb@suse.de> -Date: Thu, 6 Oct 2011 13:00:28 +1100 -Subject: [PATCH] Fix handling for "auto" line in mdadm.conf - -Two problems. - -1/ pol_merge was ignoring the pol_auto tag so any 'auto' information - was lost -2/ If a device had not path (e.g. loop devices) or if there were no - path-based policies, we didn't bother looking for policy at all. - So path-independant policies were ignored. - -Reported-by: Christian Boltz <suse-beta@cboltz.de> -Signed-off-by: NeilBrown <neilb@suse.de> ---- - policy.c \| 35 ++++++++++++++++++++++------------- - 1 files changed, 22 insertions(+), 13 deletions(-) - -diff --git a/policy.c b/policy.c -index 4a6ef82..ef48353 100644 ---- a/policy.c -+++ b/policy.c -@@ -195,7 +195,9 @@ static char disk_path(struct mdinfo disk) - int prefix_len; - DIR by_path; - char symlink[PATH_MAX] = "/dev/disk/by-path/"; -+ char nm[PATH_MAX]; - struct dirent ent; -+ int rv; - - by_path = opendir(symlink); - if (!by_path) -@@ -218,7 +220,17 @@ static char disk_path(struct mdinfo disk) - return strdup(ent->d_name); - } - closedir(by_path); -- return NULL; -+ /* A NULL path isn't really acceptable - use the devname.. / -+ sprintf(symlink, "/sys/dev/block/%d:%d", disk->disk.major, disk->disk.minor); -+ rv = readlink(symlink, nm, sizeof(nm)); -+ if (rv > 0) { -+ char dname; -+ nm[rv] = 0; -+ dname = strrchr(nm, '/'); -+ if (dname) -+ return strdup(dname + 1); -+ } -+ return strdup("unknown"); - } - - char type_part[] = "part"; -@@ -245,13 +257,13 @@ static int pol_match(struct rule rule, char path, char type) - if (rule->name == rule_path) { - if (pathok == 0) - pathok = -1; -- if (fnmatch(rule->value, path, 0) == 0) -+ if (path && fnmatch(rule->value, path, 0) == 0) - pathok = 1; - } - if (rule->name == rule_type) { - if (typeok == 0) - typeok = -1; -- if (strcmp(rule->value, type) == 0) -+ if (type && strcmp(rule->value, type) == 0) - typeok = 1; - } - rule = rule->next; -@@ -270,7 +282,8 @@ static void pol_merge(struct dev_policy pol, struct rule rule) - - for (r = rule; r ; r = r->next) - if (r->name == pol_act \|\| -- r->name == pol_domain) -+ r->name == pol_domain \|\| -+ r->name == pol_auto) - pol_new(pol, r->name, r->value, metadata); - } - -@@ -280,7 +293,10 @@ static int path_has_part(char path, char part) - if it does, place a pointer to "-pathNN" - * in 'part'. - / -- int l = strlen(path); -+ int l; -+ if (!path) -+ return 0; -+ l = strlen(path); - while (l > 1 && isdigit(path[l-1])) - l--; - if (l < 5 \|\| strncmp(path+l-5, "-part", 5) != 0) -@@ -343,9 +359,6 @@ struct dev_policy path_policy(char path, char type) - struct dev_policy pol = NULL; - int i; - -- if (!type) -- return NULL; -- - rules = config_rules; - - while (rules) { -@@ -366,7 +379,7 @@ struct dev_policy path_policy(char path, char type) - /* Now add any metadata-specific internal knowledge - * about this path - / -- for (i=0; superlist[i]; i++) -+ for (i=0; path && superlist[i]; i++) - if (superlist[i]->get_disk_controller_domain) { - const char d = - superlist[i]->get_disk_controller_domain(path); -@@ -399,12 +412,8 @@ struct dev_policy disk_policy(struct mdinfo disk) - char type = disk_type(disk); - struct dev_policy pol = NULL; - -- if (!type) -- return NULL; - if (config_rules_has_path) - path = disk_path(disk); -- if (!path) -- return NULL; - - pol = path_policy(path, type); - --- -1.7.6.4 -
[-] [+]	Deleted	mdadm-3.2.2_git3b1dab1bdbda0 ^
@@ -1,1003 +0,0 @@ -diff --git a/.gitignore b/.gitignore -index 2503bd8..7200741 100644 ---- a/.gitignore -+++ b/.gitignore -@@ -2,6 +2,7 @@ - /.man - /-stamp - /mdadm -+/mdadm.8 - /mdadm.udeb - /mdmon - /swap_super -diff --git a/COPYING b/COPYING -index 60549be..d159169 100644 ---- a/COPYING -+++ b/COPYING -@@ -1,12 +1,12 @@ -- GNU GENERAL PUBLIC LICENSE -- Version 2, June 1991 -+ GNU GENERAL PUBLIC LICENSE -+ Version 2, June 1991 - -- Copyright (C) 1989, 1991 Free Software Foundation, Inc. -- 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -+ Copyright (C) 1989, 1991 Free Software Foundation, Inc., -+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - -- Preamble -+ Preamble - - The licenses for most software are designed to take away your - freedom to share and change it. By contrast, the GNU General Public -@@ -15,7 +15,7 @@ software--to make sure the software is free for all its users. This - General Public License applies to most of the Free Software - Foundation's software and to any other program whose authors commit to - using it. (Some other Free Software Foundation software is covered by --the GNU Library General Public License instead.) You can apply it to -+the GNU Lesser General Public License instead.) You can apply it to - your programs, too. - - When we speak of free software, we are referring to freedom, not -@@ -55,8 +55,8 @@ patent must be licensed for everyone's free use or not licensed at all. - - The precise terms and conditions for copying, distribution and - modification follow. -- -- GNU GENERAL PUBLIC LICENSE -+ -+ GNU GENERAL PUBLIC LICENSE - TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION - - 0. This License applies to any program or other work which contains -@@ -110,7 +110,7 @@ above, provided that you also meet all of these conditions: - License. (Exception: if the Program itself is interactive but - does not normally print such an announcement, your work based on - the Program is not required to print an announcement.) -- -+ - These requirements apply to the modified work as a whole. If - identifiable sections of that work are not derived from the Program, - and can be reasonably considered independent and separate works in -@@ -168,7 +168,7 @@ access to copy from a designated place, then offering equivalent - access to copy the source code from the same place counts as - distribution of the source code, even though third parties are not - compelled to copy the source along with the object code. -- -+ - 4. You may not copy, modify, sublicense, or distribute the Program - except as expressly provided under this License. Any attempt - otherwise to copy, modify, sublicense or distribute the Program is -@@ -225,7 +225,7 @@ impose that choice. - - This section is intended to make thoroughly clear what is believed to - be a consequence of the rest of this License. -- -+ - 8. If the distribution and/or use of the Program is restricted in - certain countries either by patents or by copyrighted interfaces, the - original copyright holder who places the Program under this License -@@ -255,7 +255,7 @@ make exceptions for this. Our decision will be guided by the two goals - of preserving the free status of all derivatives of our free software and - of promoting the sharing and reuse of software generally. - -- NO WARRANTY -+ NO WARRANTY - - 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY - FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN -@@ -277,9 +277,9 @@ YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER - PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE - POSSIBILITY OF SUCH DAMAGES. - -- END OF TERMS AND CONDITIONS -- -- How to Apply These Terms to Your New Programs -+ END OF TERMS AND CONDITIONS -+ -+ How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest - possible use to the public, the best way to achieve this is to make it -@@ -291,7 +291,7 @@ convey the exclusion of warranty; and each file should have at least - the "copyright" line and a pointer to where the full notice is found. - - <one line to give the program's name and a brief idea of what it does.> -- Copyright (C) 19yy <name of author> -+ Copyright (C) <year> <name of author> - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by -@@ -303,17 +303,16 @@ the "copyright" line and a pointer to where the full notice is found. - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - -- You should have received a copy of the GNU General Public License -- along with this program; if not, write to the Free Software -- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -- -+ You should have received a copy of the GNU General Public License along -+ with this program; if not, write to the Free Software Foundation, Inc., -+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - Also add information on how to contact you by electronic and paper mail. - - If the program is interactive, make it output a short notice like this - when it starts in an interactive mode: - -- Gnomovision version 69, Copyright (C) 19yy name of author -+ Gnomovision version 69, Copyright (C) year name of author - Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type `show c' for details. -@@ -336,5 +335,5 @@ necessary. Here is a sample; alter the names: - This General Public License does not permit incorporating your program into - proprietary programs. If your program is a subroutine library, you may - consider it more useful to permit linking proprietary applications with the --library. If this is what you want to do, use the GNU Library General -+library. If this is what you want to do, use the GNU Lesser General - Public License instead of this License. -diff --git a/Create.c b/Create.c -index 48115db..8d88aa1 100644 ---- a/Create.c -+++ b/Create.c -@@ -856,15 +856,6 @@ int Create(struct supertype st, char mddev, - /* getinfo_super might have lost these ... / - inf->disk.major = major(stb.st_rdev); - inf->disk.minor = minor(stb.st_rdev); -- / FIXME the following should not be needed -- * as getinfo_super is suppose to set -- * them. However it doesn't for imsm, -- * so we have this hack for now -- / -- if (st->ss == &super_imsm) { -- inf->disk.number = dnum; -- inf->disk.raid_disk = dnum; -- } - } - break; - case 2: -diff --git a/Detail.c b/Detail.c -index 375189d..ca34abe 100644 ---- a/Detail.c -+++ b/Detail.c -@@ -372,11 +372,13 @@ int Detail(char dev, int brief, int export, int test, char homehost) - else - st = ", degraded"; - -- printf(" State : %s%s%s%s\n", -- (array.state&(1<<MD_SB_CLEAN))?"clean":"active", -- st, -- (!e \|\| e->percent < 0) ? "" : sync_action[e->resync], -- larray_size ? "": ", Not Started"); -+ printf(" State : %s%s%s%s%s%s \n", -+ (array.state&(1<<MD_SB_CLEAN))?"clean":"active", st, -+ (!e \|\| (e->percent < 0 && e->percent != PROCESS_PENDING && -+ e->percent != PROCESS_DELAYED)) ? "" : sync_action[e->resync], -+ larray_size ? "": ", Not Started", -+ e->percent == PROCESS_DELAYED ? " (DELAYED)": "", -+ e->percent == PROCESS_PENDING ? " (PENDING)": ""); - } - if (array.raid_disks) - printf(" Active Devices : %d\n", array.active_disks); -@@ -416,10 +418,8 @@ int Detail(char dev, int brief, int export, int test, char homehost) - } - - if (e && e->percent >= 0) { -- printf(" Re%s Status : %d%% complete\n", -- (st && st->sb && info->reshape_active)? -- "shape":"build", -- e->percent); -+ static char sync_action[] = {"Rebuild", "Resync", "Reshape", "Check"}; -+ printf(" %7s Status : %d%% complete\n", sync_action[e->resync], e->percent); - is_rebuilding = 1; - } - free_mdstat(ms); -@@ -430,12 +430,9 @@ This is pretty boring - printf(" Reshape pos'n : %llu%s\n", (unsigned long long) info->reshape_progress<<9,
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/.gitignore ^
@@ -2,6 +2,7 @@ /.man /-stamp /mdadm +/mdadm.8 /mdadm.udeb /mdmon /swap_super
[-] [+]	Added	mdadm-3.2.4.tar.bz2/ANNOUNCE-3.2.3 ^
@@ -0,0 +1,24 @@ +Subject: ANNOUNCE: mdadm 3.2.3 - A tool for managing Soft RAID under Linux + +I am pleased to announce the availability of + mdadm version 3.2.3 + +It is available at the usual places: + countrycode=xx. + http://www.${countrycode}kernel.org/pub/linux/utils/raid/mdadm/ +and via git at + git://neil.brown.name/mdadm + http://neil.brown.name/git/mdadm + +This release is largely a bugfix release for the 3.2 series with many +minor fixes with little or no impact. + +The largest single area of change is support for reshape of Intel +IMSM arrays (OnLine Capacity Explansion and Level Migtration). +Among other fixes, this now has a better chance of surviving if a +device fails during reshape. + +Upgrading is recommended - particularly if you use mdadm for IMSM +arrays - but not essential. + +NeilBrown 23rd December 2011
[-] [+]	Added	mdadm-3.2.4.tar.bz2/ANNOUNCE-3.2.4 ^
@@ -0,0 +1,144 @@ +Subject: ANNOUNCE: mdadm 3.2.4 - A tool for managing Soft RAID under Linux + +I am pleased to announce the availability of + mdadm version 3.2.4 + +It is available at the usual places, now including github: + countrycode=xx. + http://www.${countrycode}kernel.org/pub/linux/utils/raid/mdadm/ +and via git at + git://github.com/neilbrown/mdadm + git://neil.brown.name/mdadm + http://neil.brown.name/git/mdadm + +This release is largely a bugfix release for the 3.2 series with many +minor fixes with little or no impact. + +"--oneline" log of changes is below. Some notable ones are: + + - --offroot argument to improve interactions between mdmon and initrd + - --prefer argument to select which /dev names to display in some + circumstances. + - relax restructions on when "--add" will be allowed + - Fix bug with adding write-intent-bitmap to active array + - Now defaults to "/run/mdadm" for storing run-time files. + +Upgrading is encouraged. + +The next mdadm release is expected to be 3.3 with a number of new +features. + +NeilBrown 9th May 2012 + +77b3ac8 monitor: make return from read_and_act more symbolic. +68226a8 monitor: ensure we retry soon when 'remove' fails. +8453f8d fix: Monitor sometimes crashes +90fa1a2 Work around gcc-4.7's strict aliasing checks +0c4304c fix: container creation with --incremental used. +5d1c7cd FIX: External metadata sometimes is not updated +3c20f98 FIX: mdmon check in reshape_container() can cause a problem +59ab9f5 FIX: Typo error in fprint command +9587c37 imsm: load_super_imsm_all function refactoring +ec50f7b imsm: load_imsm_super_all supports loading metadata from the device list +ca9de18 imsm: validate the number of imsm volumes per controller +30602f5 imsm: display fd in error trace when when store_imsm_mpb failes +eb155f6 mdmon: Use getopt_long() to parse command line options +08ca2ad Add --offroot argument to mdadm +da82751 Add --offroot argument to mdmon +a0963a8 Spawn mdmon with --offroot if mdadm was launched with --offroot +f878b24 imsm: fix, the second array need to have the whole available space on devices +d597705 getinfo_super1: Use MaxSector in place of sb->size +6ef8905 super1: make aread/awrite always use an aligned buffer. +de5a472 Remove avail_disks arg from 'enough'. +da8fe5a Assemble: fix --force assemble during reshape. +b10c663 config: fix handing of 'homehost' in AUTO line. +92d49ec FIX: NULL pointer to strdup() can be passed +d2bde6d imsm: FIX: No new missing disks are allowed during general migration +111e9fd FIX: Array is not run when expansion disks are added +bf5cf7c imsm: FIX: imsm_get_allowed_degradation() doesn't count degradation for raid1 +50927b1 Fix: Sometimes mdmon throws core dump during reshape +78340e2 Flush mdmon before next reshape step during container operation +e174219 imsm: FIX: Chunk size migration problem +f93346e FIX: use md position to reshape restart +6a75c8c imsm: FIX: use md position to reshape restart +51d83f5 imsm: FIX: Clear migration record when migration switches to next volume. +e1dd332 FIX: restart reshape when reshape process is stopped just between 2 reshapes +1ca90aa FIX: Do not try to (continue) reshape using inactive array +9f1b0f0 config: conf_match should ignore devname when not set. +d669228 Use posix_memalign() for memory used to write bitmaps +178950e FIX: Changes in '0' case for reshape position verification +9200d41 avoid double-free upon "old buggy kernel" sysfs_read failure +4011421 Print error message if failing to write super for 1.x metadata +0011874 Use MDMON_DIR for pid files created in Monitor.c +56d1885 Assemble: don't use O_EXCL until we have checked device content. +b720636 Assemble: support assembling of a RAID0 being reshaped. +c69ffac Manage: allow --re-add to failed array. +52f07f5 Reset bad flag on map update +911cead super1: support superblocks up to 4K. +ad6db3c Create: reduce the verbosity of 'default_layout'. +b2bfdfa super1.c don't keep recalculating bitmap pointer +4122675 Define and use SUPER1_SIZE for allocations +1afa930 init_super1() memset full buffer allocated for superblock +2de0b8a match_metadata_desc1(): Use calloc instead of malloc+memset +3c0bcd4 Use 4K buffer alignment for superblock allocations +308340a Use struct align_fd to cache fd's block size for aligned reads/writes +65ed615 match_metadata_desc0(): Use calloc instead of malloc+memset +de89706 Generalize ROUND_UP() macro and introduce matching ROUND_UP_PTR() +0a2f189 super1.c: use ROUND_UP/ROUND_UP_PTR +654a381 super-intel.c: Use ROUND_UP() instead of manually coding it +42d5dfd __write_init_super_ddf(): Use posix_memalign() instead of static aligned buffer +d4633e0 Examine: fix array size calculation for RAID10. +e62b778 Assemble: improve verbose logging when including old devices. +0073a6e Remove possible crash during RAID6 -> RAID5 reshape. +69fe207 Incremental: fix adding devices with --incremental +bcbb311 Manage: replace 'return 1' with 'goto abort'. +9f58469 Manage: freeze recovery while adding multiple devices. +ae6c05a Create: round off size for RAID1 arrays. +5ca3a90 Grow: print useful error when converting RAID1->RAID5 will fail. +c07d640 Fix tests/05r1-re-add-nosupper +2d762ad Fix the new ROUND_UP macro. +fd324b0 sysfs: fixed sysfs_freeze_array array to work properly with Manage_subdevs. +5551b11 imsm: avoid overflows for disks over 1TB +97f81ee clear hi bits if not used after loading metadata from disk +e03640b simplify calculating array_blocks +29cd082 show 2TB volumes/disks support in --detail-platform +2cc699a check volume size in validate_geometry_imsm_orom +9126b9a check that no disk over 2TB is used to create container when no support +027c374 imsm: set 2tb disk attribute for spare +3556c2f Fix typo: wan -> want +15632a9 parse_size: distinguish between 0 and error. +fbdef49 Bitmap_offset is a signed number +508a7f1 super1: leave more space in front of data by default. +40110b9 Fix two typos in fprintf messages +342460c mdadm man page: fix typo +0e7f69a imsm: display maximum volumes per controller and array +36fd8cc imsm: FIX: Update function imsm_num_data_members() for Raid1/10 +7abc987 imsm: FIX: Add volume size expand support to imsm_analyze_change() +f3871fd imsm: Add new metadata update for volume size expansion +54397ed imsm: Execute size change for external metatdata +016e00f FIX: Support metadata changes rollback +fbf3d20 imsm: FIX: Support metadata changes rollback +44f6f18 FIX: Extend size of raid0 array +7e7e9a4 FIX: Respect metadata size limitations +65a9798 FIX: Detect error and rollback metadata +13bcac9 imsm: Add function imsm_get_free_size() +b130333 imsm: Support setting max size for size change operation +c41e00b imsm: FIX: Component size alignment check +58d26a2 FIX: Size change is possible as standalone change only +4aecb54 FIX: Assembled second array is in read only state during reshape +ae2416e FIX: resolve make everything compilation error +480f356 Raid limit of 1024 when scanning for devices. +c2ecf5f Add --prefer option for --detail and --monitor +0a99975 Relax restrictions on when --add is permitted. +7ce0570 imsm: fix: rebuild does not continue after reboot +b51702b fix: correct extending size of raid0 array +34a1395 Fix sign extension of bitmap_offset in super1.c +012a864 Introduce sysfs_set_num_signed() and use it to set bitmap/offset +5d7b407 imsm: fix: thunderdome may drop 2tb attribute +5ffdc2d Update test for "is udev active". +96fd06e Adjust to new standard of /run +974e039 test: don't worry too much about array size. +b0a658f Grow: failing the set the per-device size is not an error. +36614e9 super-intel.c: Don't try to close negative fd +562aa10 super-intel.c: Fix resource leak from opendir() +
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/Assemble.c ^
@@ -138,7 +138,7 @@ char backup_file, int invalid_backup, int readonly, int runstop, char update, char homehost, int require_homehost, - int verbose, int force) + int verbose, int force, int freeze_reshape) { / * The task of Assemble is to find a collection of @@ -220,7 +220,9 @@ int change = 0; int inargv = 0; int report_missmatch; +#ifndef MDASSEMBLE int bitmap_done; +#endif int start_partial_ok = (runstop >= 0) && (force \|\| devlist==NULL \|\| auto_assem); unsigned int num_devs; @@ -293,7 +295,7 @@ char devname = tmpdev->devname; int dfd; struct stat stb; - struct supertype tst = dup_super(st); + struct supertype tst; struct dev_policy pol = NULL; int found_container = 0; @@ -306,7 +308,9 @@ continue; } - dfd = dev_open(devname, O_RDONLY\|O_EXCL); + tst = dup_super(st); + + dfd = dev_open(devname, O_RDONLY); if (dfd < 0) { if (report_missmatch) fprintf(stderr, Name ": cannot open device %s: %s\n", @@ -404,6 +408,17 @@ /* tmpdev is a container. We need to be either * looking for a member, or auto-assembling / + / should be safe to try an exclusive open now, we + * have rejected anything that some other mdadm might + * be looking at + / + dfd = dev_open(devname, O_RDONLY \| O_EXCL); + if (dfd < 0) { + if (report_missmatch) + fprintf(stderr, Name ": %s is busy - skipping\n", devname); + goto loop; + } + close(dfd); if (ident->container) { if (ident->container[0] == '/' && @@ -439,13 +454,6 @@ content; content = content->next) { - / do not assemble arrays that might have bad blocks / - if (content->array.state & (1<<MD_SB_BBM_ERRORS)) { - fprintf(stderr, Name ": BBM log found in metadata. " - "Cannot activate array(s).\n"); - tmpdev->used = 2; - goto loop; - } if (!ident_matches(ident, content, tst, homehost, update, report_missmatch ? devname : NULL)) @@ -455,6 +463,11 @@ fprintf(stderr, Name ": member %s in %s is already assembled\n", content->text_version, devname); + } else if (content->array.state & (1<<MD_SB_BLOCK_VOLUME)) { + / do not assemble arrays with unsupported configurations / + fprintf(stderr, Name ": Cannot activate member %s in %s.\n", + content->text_version, + devname); } else break; } @@ -490,6 +503,18 @@ report_missmatch ? devname : NULL)) goto loop; + / should be safe to try an exclusive open now, we + * have rejected anything that some other mdadm might + * be looking at + / + dfd = dev_open(devname, O_RDONLY \| O_EXCL); + if (dfd < 0) { + if (report_missmatch) + fprintf(stderr, Name ": %s is busy - skipping\n", devname); + goto loop; + } + close(dfd); + if (st == NULL) st = dup_super(tst); if (st->minor_version == -1) @@ -697,14 +722,13 @@ int err; err = assemble_container_content(st, mdfd, content, runstop, chosen_name, verbose, - backup_file); + backup_file, freeze_reshape); close(mdfd); return err; } + bitmap_done = 0; #endif / Ok, no bad inconsistancy, we can try updating etc / - bitmap_done = 0; - content->update_private = NULL; devices = malloc(num_devs sizeof(devices)); devmap = calloc(num_devs content->array.raid_disks, 1); for (tmpdev = devlist; tmpdev; tmpdev=tmpdev->next) if (tmpdev->used == 1) { @@ -889,8 +913,6 @@ } devcnt++; } - free(content->update_private); - content->update_private = NULL; if (devcnt == 0) { fprintf(stderr, Name ": no devices found for %s\n", @@ -935,7 +957,7 @@ } continue; } - /* If this devices thinks that 'most_recent' has failed, then + /* If this device thinks that 'most_recent' has failed, then * we must reject this device. / if (j != most_recent && @@ -954,7 +976,9 @@ if (i < content->array.raid_disks) { if (devices[j].i.recovery_start == MaxSector \|\| (content->reshape_active && - j >= content->array.raid_disks - content->delta_disks)) { + ((i >= content->array.raid_disks - content->delta_disks) \|\| + (i >= content->array.raid_disks - content->delta_disks - 1 + && content->array.level == 4)))) { okcnt++; avail[i]=1; } else @@ -964,9 +988,17 @@ } } free(devmap); - while (force && !enough(content->array.level, content->array.raid_disks, - content->array.layout, 1, - avail, okcnt)) { + while (force && + (!enough(content->array.level, content->array.raid_disks, + content->array.layout, 1, + avail) + \|\| + (content->reshape_active && content->delta_disks > 0 && + !enough(content->array.level, (content->array.raid_disks + - content->delta_disks), + content->new_layout, 1, + avail) + ))) { / Choose the newest best drive which is * not up-to-date, update the superblock * and add it. @@ -1133,7 +1165,7 @@ if (force && !clean && !enough(content->array.level, content->array.raid_disks, content->array.layout, clean, - avail, okcnt)) { + avail)) { change += st->ss->update_super(st, content, "force-array", devices[chosen_drive].devname, verbose, 0, NULL); @@ -1302,9 +1334,11 @@ sparecnt--; } else if (verbose > 0) fprintf(stderr, Name ": added %s " - "to %s as %d\n", + "to %s as %d%s\n", devices[j].devname, mddev, - devices[j].i.disk.raid_disk); + devices[j].i.disk.raid_disk, + devices[j].uptodate?"": + " (possibly out of date)"); } else if (verbose > 0 && i < content->array.raid_disks) fprintf(stderr, Name ": no uptodate device for " "slot %d of %s\n", @@ -1332,7 +1366,7 @@ if (runstop == 1 \|\| (runstop <= 0 && ( enough(content->array.level, content->array.raid_disks, - content->array.layout, clean, avail, okcnt) && + content->array.layout, clean, avail) && (okcnt + rebuilding_cnt >= req_cnt \|\| start_partial_ok) ))) { /* This array is good-to-go. @@ -1343,9 +1377,14 @@ int rv; #ifndef MDASSEMBLE if (content->reshape_active && - content->delta_disks <= 0) - rv = Grow_continue(mdfd, st, content, backup_file); - else + content->delta_disks <= 0) { + rv = sysfs_set_str(content, NULL, + "array_state", "readonly"); + if (rv == 0) + rv = Grow_continue(mdfd, st, content, + backup_file, + freeze_reshape); + } else #endif rv = ioctl(mdfd, RUN_ARRAY, NULL); if (rv == 0) { @@ -1372,6 +1411,7 @@ sysfs_set_num(sra, NULL, "stripe_cache_size", (4 * content->array.chunk_size / 4096) + 1); + sysfs_free(sra); } } if (okcnt < (unsigned)content->array.raid_disks) { @@ -1432,13 +1472,13 @@ mddev, strerror(errno)); if (!enough(content->array.level, content->array.raid_disks, - content->array.layout, 1, avail, okcnt)) + content->array.layout, 1, avail)) fprintf(stderr, Name ": Not enough devices to " "start the array.\n"); else if (!enough(content->array.level, content->array.raid_disks, content->array.layout, clean, - avail, okcnt)) + avail)) fprintf(stderr, Name ": Not enough devices to " "start the array while not clean " "- consider --force.\n"); @@ -1466,12 +1506,12 @@ if (sparecnt) fprintf(stderr, " and %d spare%s", sparecnt, sparecnt==1?"":"s"); if (!enough(content->array.level, content->array.raid_disks, - content->array.layout, 1, avail, okcnt)) + content->array.layout, 1, avail)) fprintf(stderr, " - not enough to start the array.\n"); else if (!enough(content->array.level, content->array.raid_disks, content->array.layout, clean, - avail, okcnt)) + avail)) fprintf(stderr, " - not enough to start the " "array while not clean - consider " "--force.\n"); @@ -1511,22 +1551,36 @@ int assemble_container_content(struct supertype st, int mdfd, struct mdinfo content, int runstop, char chosen_name, int verbose, - char backup_file) + char backup_file, int freeze_reshape) { struct mdinfo dev, sra; int working = 0, preexist = 0; int expansion = 0; struct map_ent map = NULL; int old_raid_disks; + int start_reshape; sysfs_init(content, mdfd, 0); sra = sysfs_read(mdfd, 0, GET_VERSION); if (sra == NULL \|\| strcmp(sra->text_version, content->text_version) != 0) - if (sysfs_set_array(content, md_get_version(mdfd)) != 0) + if (sysfs_set_array(content, md_get_version(mdfd)) != 0) { + if (sra) + sysfs_free(sra); return 1; + } - if (content->reshape_active) + /* There are two types of reshape: container wide or sub-array specific + * Check if metadata requests blocking container wide reshapes + / + start_reshape = (content->reshape_active && + !((content->reshape_active == CONTAINER_RESHAPE) && + (content->array.state & (1<<MD_SB_BLOCK_CONTAINER_RESHAPE)))); + + / Block subarray here if it is under reshape now + * Do not allow for any changes in this array + / + if (st->ss->external && content->recovery_blocked && start_reshape) block_subarray(content); if (sra) @@ -1541,7 +1595,7 @@ working++; } else if (errno == EEXIST) preexist++; - if (working == 0) + if (working + expansion == 0) return 1;/ Nothing new, don't try to start / map_update(&map, fd2devnum(mdfd), @@ -1553,47 +1607,29 @@ content->array.working_disks) { int err; - if (content->reshape_active) { + if (start_reshape) { int spare = content->array.raid_disks + expansion; - int i; - int fdlist = malloc(sizeof(int) * - (working + expansion - + content->array.raid_disks)); - for (i=0; i<spare; i++) - fdlist[i] = -1; - for (dev = content->devs; dev; dev = dev->next) { - char buf[20]; - int fd; - sprintf(buf, "%d:%d", - dev->disk.major, - dev->disk.minor); - fd = dev_open(buf, O_RDWR); + if (restore_backup(st, content, + working, + spare, backup_file, verbose) == 1) + return 1; - if (dev->disk.raid_disk >= 0) - fdlist[dev->disk.raid_disk] = fd; - else - fdlist[spare++] = fd; - } - if (st->ss->external && st->ss->recover_backup) - err = st->ss->recover_backup(st, content); - else - err = Grow_restart(st, content, fdlist, spare, - backup_file, verbose > 0); - while (spare > 0) { - spare--; - if (fdlist[spare] >= 0) - close(fdlist[spare]); - } - if (err) { - fprintf(stderr, Name ": Failed to restore critical" - " section for reshape - sorry.\n"); - if (!backup_file) - fprintf(stderr, Name ": Possibly you need" - " to specify a --backup-file\n"); + err = sysfs_set_str(content, NULL, + "array_state", "readonly"); + if (err) return 1; + + if (st->ss->external) { + if (!mdmon_running(st->container_dev)) + start_mdmon(st->container_dev); + ping_monitor_by_id(st->container_dev); + if (mdmon_running(st->container_dev) && + st->update_tail == NULL) + st->update_tail = &st->updates; } - err = Grow_continue(mdfd, st, content, backup_file); + err = Grow_continue(mdfd, st, content, backup_file, + freeze_reshape); } else switch(content->array.level) { case LEVEL_LINEAR: case LEVEL_MULTIPATH: @@ -1614,15 +1650,26 @@ } if (!err) sysfs_set_safemode(content, content->safe_mode_delay); + + /* Block subarray here if it is not reshaped now + * It has be blocked a little later to allow mdmon to switch in + * in to R/W state + / + if (st->ss->external && content->recovery_blocked && + !start_reshape) + block_subarray(content); + if (verbose >= 0) { if (err) fprintf(stderr, Name - ": array %s now has %d devices", - chosen_name, working + preexist); + ": array %s now has %d device%s", + chosen_name, working + preexist, + working + preexist == 1 ? "":"s"); else fprintf(stderr, Name - ": Started %s with %d devices", - chosen_name, working + preexist); + ": Started %s with %d device%s", + chosen_name, working + preexist, + working + preexist == 1 ? "":"s"); if (preexist) fprintf(stderr, " (%d new)", working); if (expansion) @@ -1635,11 +1682,15 @@ return err; / FIXME should have an O_EXCL and wait for read-auto */ } else { - if (verbose >= 0) + if (verbose >= 0) { fprintf(stderr, Name - ": %s assembled with %d devices but " - "not started\n", - chosen_name, working); + ": %s assembled with %d device%s", + chosen_name, preexist + working, + preexist + working == 1 ? "":"s"); + if (preexist) + fprintf(stderr, " (%d new)", working); + fprintf(stderr, " but not started\n"); + } return 1; } }
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/COPYING ^
@@ -1,12 +1,12 @@ - GNU GENERAL PUBLIC LICENSE - Version 2, June 1991 + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 - Copyright (C) 1989, 1991 Free Software Foundation, Inc. - 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. - Preamble + Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public @@ -15,7 +15,7 @@ General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by -the GNU Library General Public License instead.) You can apply it to +the GNU Lesser General Public License instead.) You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not @@ -55,8 +55,8 @@ The precise terms and conditions for copying, distribution and modification follow. - - GNU GENERAL PUBLIC LICENSE + + GNU GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License applies to any program or other work which contains @@ -110,7 +110,7 @@ License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.) - + These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in @@ -168,7 +168,7 @@ access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code. - + 4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is @@ -225,7 +225,7 @@ This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. - + 8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License @@ -255,7 +255,7 @@ of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. - NO WARRANTY + NO WARRANTY 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN @@ -277,9 +277,9 @@ PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it @@ -291,7 +291,7 @@ the "copyright" line and a pointer to where the full notice is found. <one line to give the program's name and a brief idea of what it does.> - Copyright (C) 19yy <name of author> + Copyright (C) <year> <name of author> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -303,17 +303,16 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. Also add information on how to contact you by electronic and paper mail. If the program is interactive, make it output a short notice like this when it starts in an interactive mode: - Gnomovision version 69, Copyright (C) 19yy name of author + Gnomovision version 69, Copyright (C) year name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. @@ -336,5 +335,5 @@ This General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the -library. If this is what you want to do, use the GNU Library General +library. If this is what you want to do, use the GNU Lesser General Public License instead of this License.
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/Create.c ^
@@ -280,6 +280,13 @@ if (size == 0) { size = newsize / 2; + if (level == 1) + /* If this is ever reshaped to RAID5, we will + * need a chunksize. So round it off a bit + * now just to be safe + / + size &= ~(64ULL-1); + if (size && verbose > 0) fprintf(stderr, Name ": setting size to %lluK\n", (unsigned long long)size); @@ -332,15 +339,25 @@ char name = "default"; for(i=0; !st && superlist[i]; i++) { st = superlist[i]->match_metadata_desc(name); + if (!st) + continue; if (do_default_layout) layout = default_layout(st, level, verbose); - if (st && !st->ss->validate_geometry - (st, level, layout, raiddisks, - &chunk, size2, dname, &freesize, - verbose > 0)) { + switch (st->ss->validate_geometry( + st, level, layout, raiddisks, + &chunk, size2, dname, &freesize, + verbose > 0)) { + case -1: /* Not valid, message printed, and not + * worth checking any further / + exit(2); + break; + case 0: / Geometry not valid / free(st); st = NULL; chunk = do_default_chunk ? UnSet : chunk; + break; + case 1: / All happy / + break; } } @@ -361,7 +378,7 @@ did_default = 1; } else { if (do_default_layout) - layout = default_layout(st, level, verbose); + layout = default_layout(st, level, 0); if (!st->ss->validate_geometry(st, level, layout, raiddisks, &chunk, size2, dname, @@ -472,6 +489,12 @@ return 1; } size = minsize; + if (level == 1) + /* If this is ever reshaped to RAID5, we will + * need a chunksize. So round it off a bit + * now just to be safe + / + size &= ~(64ULL-1); if (verbose > 0) fprintf(stderr, Name ": size set to %lluK\n", size); } @@ -544,14 +567,28 @@ / We need to create the device / map_lock(&map); mdfd = create_mddev(mddev, name, autof, LOCAL, chosen_name); - if (mdfd < 0) + if (mdfd < 0) { + map_unlock(&map); + return 1; + } + / verify if chosen_name is not in use, + * it could be in conflict with already existing device + * e.g. container, array + / + if (strncmp(chosen_name, "/dev/md/", 8) == 0 + && map_by_name(&map, chosen_name+8) != NULL) { + fprintf(stderr, Name ": Array name %s is in use already.\n", + chosen_name); + close(mdfd); + map_unlock(&map); return 1; + } mddev = chosen_name; vers = md_get_version(mdfd); if (vers < 9000) { fprintf(stderr, Name ": Create requires md driver version 0.90.0 or later\n"); - goto abort; + goto abort_locked; } else { mdu_array_info_t inf; memset(&inf, 0, sizeof(inf)); @@ -559,7 +596,7 @@ if (inf.working_disks != 0) { fprintf(stderr, Name ": another array by this name" " is already running.\n"); - goto abort; + goto abort_locked; } } @@ -655,7 +692,7 @@ } } if (!st->ss->init_super(st, &info.array, size, name, homehost, uuid)) - goto abort; + goto abort_locked; total_slots = info.array.nr_disks; st->ss->getinfo_super(st, &info, NULL); @@ -778,6 +815,10 @@ } infos = malloc(sizeof(infos) * total_slots); + if (!infos) { + fprintf(stderr, Name ": Unable to allocate memory\n"); + goto abort; + } for (pass=1; pass <=2 ; pass++) { struct mddev_dev moved_disk = NULL; / the disk that was moved out of the insert point / @@ -856,15 +897,6 @@ / getinfo_super might have lost these ... / inf->disk.major = major(stb.st_rdev); inf->disk.minor = minor(stb.st_rdev); - / FIXME the following should not be needed - * as getinfo_super is suppose to set - * them. However it doesn't for imsm, - * so we have this hack for now - / - if (st->ss == &super_imsm) { - inf->disk.number = dnum; - inf->disk.raid_disk = dnum; - } } break; case 2: @@ -905,11 +937,8 @@ } if (st->ss->write_init_super(st)) { - fprintf(stderr, - Name ": Failed to write metadata to %s\n", - dv->devname); st->ss->free_super(st); - goto abort; + goto abort_locked; } / update parent container uuid */ @@ -992,6 +1021,7 @@ abort: map_lock(&map); + abort_locked: map_remove(&map, fd2devnum(mdfd)); map_unlock(&map);
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/Detail.c ^
@@ -27,7 +27,7 @@ #include "md_u.h" #include <dirent.h> -int Detail(char dev, int brief, int export, int test, char homehost) +int Detail(char dev, int brief, int export, int test, char homehost, char prefer) { / * Print out details for an md array by using @@ -58,7 +58,7 @@ int rv = test ? 4 : 1; int avail_disks = 0; - char avail; + char avail = NULL; if (fd < 0) { fprintf(stderr, Name ": cannot open %s: %s\n", @@ -105,7 +105,7 @@ int dn = st->container_dev; member = subarray; - container = map_dev(dev2major(dn), dev2minor(dn), 1); + container = map_dev_preferred(dev2major(dn), dev2minor(dn), 1, prefer); } /* try to load a superblock / @@ -367,16 +367,18 @@ if (avail_disks == array.raid_disks) st = ""; else if (!enough(array.level, array.raid_disks, - array.layout, 1, avail, avail_disks)) + array.layout, 1, avail)) st = ", FAILED"; else st = ", degraded"; - printf(" State : %s%s%s%s\n", - (array.state&(1<<MD_SB_CLEAN))?"clean":"active", - st, - (!e \|\| e->percent < 0) ? "" : sync_action[e->resync], - larray_size ? "": ", Not Started"); + printf(" State : %s%s%s%s%s%s \n", + (array.state&(1<<MD_SB_CLEAN))?"clean":"active", st, + (!e \|\| (e->percent < 0 && e->percent != PROCESS_PENDING && + e->percent != PROCESS_DELAYED)) ? "" : sync_action[e->resync], + larray_size ? "": ", Not Started", + e->percent == PROCESS_DELAYED ? " (DELAYED)": "", + e->percent == PROCESS_PENDING ? " (PENDING)": ""); } if (array.raid_disks) printf(" Active Devices : %d\n", array.active_disks); @@ -416,10 +418,8 @@ } if (e && e->percent >= 0) { - printf(" Re%s Status : %d%% complete\n", - (st && st->sb && info->reshape_active)? - "shape":"build", - e->percent); + static char sync_action[] = {"Rebuild", "Resync", "Reshape", "Check"}; + printf(" %7s Status : %d%% complete\n", sync_action[e->resync], e->percent); is_rebuilding = 1; } free_mdstat(ms); @@ -430,12 +430,9 @@ printf(" Reshape pos'n : %llu%s\n", (unsigned long long) info->reshape_progress<<9, human_size((unsigned long long)info->reshape_progress<<9)); #endif - if (info->delta_disks > 0) + if (info->delta_disks != 0) printf(" Delta Devices : %d, (%d->%d)\n", info->delta_disks, array.raid_disks - info->delta_disks, array.raid_disks); - if (info->delta_disks < 0) - printf(" Delta Devices : %d, (%d->%d)\n", - info->delta_disks, array.raid_disks, array.raid_disks + info->delta_disks); if (info->new_level != array.level) { char *c = map_num(pers, info->new_level); printf(" New Level : %s\n", c?c:"-unknown-"); @@ -494,8 +491,9 @@ vbuf[10+nlen] != '/') continue; dn = devname2devnum(de->d_name); - printf(" %s", map_dev(dev2major(dn), - dev2minor(dn), 1)); + printf(" %s", map_dev_preferred( + dev2major(dn), + dev2minor(dn), 1, prefer)); } if (dir) closedir(dir); @@ -561,7 +559,7 @@ if (test && d < array.raid_disks && !(disk.state & (1<<MD_DISK_SYNC))) rv \|= 1; - if ((dv=map_dev(disk.major, disk.minor, 0))) { + if ((dv=map_dev_preferred(disk.major, disk.minor, 0, prefer))) { if (brief) { if (devices) { devices = realloc(devices, @@ -583,13 +581,15 @@ if (brief) printf("\n"); if (test && !enough(array.level, array.raid_disks, array.layout, - 1, avail, avail_disks)) + 1, avail)) rv = 2; free(disks); out: close(fd); free(subarray); + free(avail); + sysfs_free(sra); return rv; }
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/Grow.c ^
@@ -35,6 +35,67 @@ #define offsetof(t,f) ((size_t)&(((t)0)->f)) #endif +int restore_backup(struct supertype st, + struct mdinfo content, + int working_disks, + int next_spare, + char backup_file, + int verbose) +{ + int i; + int fdlist; + struct mdinfo dev; + int err; + int disk_count = next_spare + working_disks; + + dprintf("Called restore_backup()\n"); + fdlist = malloc(sizeof(int) * disk_count); + if (fdlist == NULL) { + fprintf(stderr, + Name ": cannot allocate memory for disk list\n"); + return 1; + } + for (i = 0; i < next_spare; i++) + fdlist[i] = -1; + for (dev = content->devs; dev; dev = dev->next) { + char buf[22]; + int fd; + sprintf(buf, "%d:%d", + dev->disk.major, + dev->disk.minor); + fd = dev_open(buf, O_RDWR); + + if (dev->disk.raid_disk >= 0) + fdlist[dev->disk.raid_disk] = fd; + else + fdlist[next_spare++] = fd; + } + + if (st->ss->external && st->ss->recover_backup) + err = st->ss->recover_backup(st, content); + else + err = Grow_restart(st, content, fdlist, next_spare, + backup_file, verbose > 0); + + while (next_spare > 0) { + next_spare--; + if (fdlist[next_spare] >= 0) + close(fdlist[next_spare]); + } + free(fdlist); + if (err) { + fprintf(stderr, Name ": Failed to restore critical" + " section for reshape - sorry.\n"); + if (!backup_file) + fprintf(stderr, Name ": Possibly you need" + " to specify a --backup-file\n"); + return 1; + } + + dprintf("restore_backup() returns status OK.\n"); + return 0; +} + int Grow_Add_device(char devname, int fd, char newdev) { /* Add a device to an active array. @@ -73,6 +134,7 @@ fprintf(stderr, Name ": Cannot grow linear sub-arrays yet\n"); free(subarray); free(st); + return 1; } nfd = open(newdev, O_RDWR\|O_EXCL\|O_DIRECT); @@ -112,7 +174,7 @@ return 1; } fd2 = dev_open(dv, O_RDWR); - if (!fd2) { + if (fd2 < 0) { fprintf(stderr, Name ": cannot open device file %s\n", dv); close(nfd); free(st); @@ -313,12 +375,18 @@ return 1; } if (strcmp(file, "internal") == 0) { + int rv; int d; + int offset_setable = 0; + struct mdinfo mdi; if (st->ss->add_internal_bitmap == NULL) { fprintf(stderr, Name ": Internal bitmaps not supported " "with %s metadata\n", st->ss->name); return 1; } + mdi = sysfs_read(fd, -1, GET_BITMAP_LOCATION); + if (mdi) + offset_setable = 1; for (d=0; d< st->max_devs; d++) { mdu_disk_info_t disk; char dv; @@ -339,11 +407,13 @@ if (st->ss->add_internal_bitmap( st, &chunk, delay, write_behind, - bitmapsize, 0, major) + bitmapsize, offset_setable, + major) ) st->ss->write_bitmap(st, fd2); else { - fprintf(stderr, Name ": failed to create internal bitmap - chunksize problem.\n"); + fprintf(stderr, Name ": failed " + "to create internal bitmap - chunksize problem.\n"); close(fd2); return 1; } @@ -351,8 +421,16 @@ close(fd2); } } - array.state \|= (1<<MD_SB_BITMAP_PRESENT); - if (ioctl(fd, SET_ARRAY_INFO, &array)!= 0) { + if (offset_setable) { + st->ss->getinfo_super(st, mdi, NULL); + sysfs_init(mdi, fd, -1); + rv = sysfs_set_num_signed(mdi, NULL, "bitmap/location", + mdi->bitmap_offset); + } else { + array.state \|= (1<<MD_SB_BITMAP_PRESENT); + rv = ioctl(fd, SET_ARRAY_INFO, &array); + } + if (rv < 0) { if (errno == EBUSY) fprintf(stderr, Name ": Cannot add bitmap while array is" @@ -380,13 +458,14 @@ dv = map_dev(disk.major, disk.minor, 1); if (!dv) continue; fd2 = dev_open(dv, O_RDONLY); - if (fd2 >= 0 && - st->ss->load_super(st, fd2, NULL) == 0) { + if (fd2 >= 0) { + if (st->ss->load_super(st, fd2, NULL) == 0) { + close(fd2); + st->ss->uuid_from_super(st, uuid); + break; + } close(fd2); - st->ss->uuid_from_super(st, uuid); - break; } - close(fd2); } if (d == max_devs) { fprintf(stderr, Name ": cannot find UUID for array!\n"); @@ -417,7 +496,6 @@ return 0; } - /* * When reshaping an array we might need to backup some data. * This is written to all spares with a 'super_block' describing it. @@ -463,7 +541,7 @@ char container[40]; struct mdstat_ent ent, e; int is_idle = 1; - + fmt_devname(container, container_dev); ent = mdstat_read(0, 0); for (e = ent ; e; e = e->next) { @@ -486,7 +564,7 @@ if (!check_idle(st)) return -1; - + fmt_devname(container, container_dev); if (block_monitor(container, 1)) { @@ -502,7 +580,7 @@ int container_dev = (st->container_dev != NoMdDev ? st->container_dev : st->devnum); char container[40]; - + fmt_devname(container, container_dev); unblock_monitor(container, 1); @@ -572,7 +650,7 @@ static int reshape_super(struct supertype st, long long size, int level, int layout, int chunksize, int raid_disks, int delta_disks, char backup_file, char dev, - int verbose) + int direction, int verbose) { / nothing extra to check in the native case / if (!st->ss->external) @@ -586,7 +664,7 @@ return st->ss->reshape_super(st, size, level, layout, chunksize, raid_disks, delta_disks, backup_file, dev, - verbose); + direction, verbose); } static void sync_metadata(struct supertype st) @@ -635,15 +713,24 @@ return rc; } -int start_reshape(struct mdinfo sra, int already_running) +int start_reshape(struct mdinfo sra, int already_running, + int before_data_disks, int data_disks) { int err; + unsigned long long sync_max_to_set; + sysfs_set_num(sra, NULL, "suspend_lo", 0x7FFFFFFFFFFFFFFFULL); - err = sysfs_set_num(sra, NULL, "suspend_hi", 0); - err = err ?: sysfs_set_num(sra, NULL, "suspend_lo", 0); + err = sysfs_set_num(sra, NULL, "suspend_hi", sra->reshape_progress); + err = err ?: sysfs_set_num(sra, NULL, "suspend_lo", + sra->reshape_progress); + if (before_data_disks <= data_disks) + sync_max_to_set = sra->reshape_progress / data_disks; + else + sync_max_to_set = (sra->component_size * data_disks + - sra->reshape_progress) / data_disks; if (!already_running) - sysfs_set_num(sra, NULL, "sync_min", 0); - err = err ?: sysfs_set_num(sra, NULL, "sync_max", 0); + sysfs_set_num(sra, NULL, "sync_min", sync_max_to_set); + err = err ?: sysfs_set_num(sra, NULL, "sync_max", sync_max_to_set); if (!already_running) err = err ?: sysfs_set_str(sra, NULL, "sync_action", "reshape"); @@ -936,6 +1023,10 @@ * raid5 with 2 disks, or * raid0 with 1 disk / + if (info->new_level > 1 && + (info->component_size & 7)) + return "Cannot convert RAID1 of this size - " + "reduce size to multiple of 4K first."; if (info->new_level == 0) { if (info->delta_disks != UnSet && info->delta_disks != 0) @@ -1188,7 +1279,7 @@ break; case 5: - / We get to RAID5 for RAID5 or RAID6 / + / We get to RAID5 from RAID5 or RAID6 / if (re->level != 5 && re->level != 6) return "Cannot convert to RAID5 from this level"; @@ -1210,11 +1301,27 @@ char layout[40]; char ls = map_num(r5layout, info->new_layout); int l; - strcat(strcpy(layout, ls), "-6"); - l = map_name(r6layout, layout); - if (l == UnSet) - return "Cannot find RAID6 layout" - " to convert to"; + if (ls) { + /* Current RAID6 layout has a RAID5 + * equivalent - good + / + strcat(strcpy(layout, ls), "-6"); + l = map_name(r6layout, layout); + if (l == UnSet) + return "Cannot find RAID6 layout" + " to convert to"; + } else { + / Current RAID6 has no equivalent. + * If it is already a '-6' layout we + * can leave it unchanged, else we must + * fail + / + ls = map_num(r6layout, info->new_layout); + if (!ls \|\| + strcmp(ls+strlen(ls)-2, "-6") != 0) + return "Please specify new layout"; + l = info->new_layout; + } re->after.layout = l; } } @@ -1268,7 +1375,7 @@ if (re->after.data_disks < re->before.data_disks && get_linux_version() < 2006030) - return "reshape to fewer devices is not supported before 2.6.32 - sorry."; + return "reshape to fewer devices is not supported before 2.6.30 - sorry."; re->backup_blocks = compute_backup_blocks( info->new_chunk, info->array.chunk_size, @@ -1279,17 +1386,56 @@ return NULL; } +static int set_array_size(struct supertype st, struct mdinfo sra, + char text_version) +{ + struct mdinfo info; + char subarray; + int ret_val = -1; + + if ((st == NULL) \|\| (sra == NULL)) + return ret_val; + + if (text_version == NULL) + text_version = sra->text_version; + subarray = strchr(text_version+1, '/')+1; + info = st->ss->container_content(st, subarray); + if (info) { + unsigned long long current_size = 0; + unsigned long long new_size = + info->custom_array_size/2; + + if (sysfs_get_ll(sra, NULL, "array_size", &current_size) == 0 && + new_size > current_size) { + if (sysfs_set_num(sra, NULL, "array_size", new_size) + < 0) + dprintf("Error: Cannot set array size"); + else { + ret_val = 0; + dprintf("Array size changed"); + } + dprintf(" from %llu to %llu.\n", + current_size, new_size); + } + sysfs_free(info); + } else + dprintf("Error: set_array_size(): info pointer in NULL\n"); + + return ret_val; +} + static int reshape_array(char container, int fd, char devname, struct supertype st, struct mdinfo info, int force, struct mddev_dev devlist, char backup_file, int quiet, int forked, - int restart); + int restart, int freeze_reshape); static int reshape_container(char container, char devname, - struct supertype st, + int mdfd, + struct supertype st, struct mdinfo info, int force, char backup_file, - int quiet, int restart); + int quiet, int restart, int freeze_reshape); int Grow_reshape(char devname, int fd, int quiet, char backup_file, long long size, @@ -1401,6 +1547,36 @@ return 1; } + /* check if operation is supported for metadata handler / + if (st->ss->container_content) { + struct mdinfo cc = NULL; + struct mdinfo content = NULL; + + cc = st->ss->container_content(st, subarray); + for (content = cc; content ; content = content->next) { + int allow_reshape = 1; + + / check if reshape is allowed based on metadata + * indications stored in content.array.status + / + if (content->array.state & (1<<MD_SB_BLOCK_VOLUME)) + allow_reshape = 0; + if (content->array.state + & (1<<MD_SB_BLOCK_CONTAINER_RESHAPE)) + allow_reshape = 0; + if (!allow_reshape) { + fprintf(stderr, Name + " cannot reshape arrays in" + " container with unsupported" + " metadata: %s(%s)\n", + devname, container_buf); + sysfs_free(cc); + free(subarray); + return 1; + } + } + sysfs_free(cc); + } if (mdmon_running(container_dev)) st->update_tail = &st->updates; } @@ -1415,15 +1591,15 @@ Name ": Need %d spare%s to avoid degraded array," " and only have %d.\n" " Use --force to over-ride this check.\n", - raid_disks - array.raid_disks, - raid_disks - array.raid_disks == 1 ? "" : "s", + raid_disks - array.raid_disks, + raid_disks - array.raid_disks == 1 ? "" : "s", array.spare_disks + added_disks); return 1; } sra = sysfs_read(fd, 0, GET_LEVEL \| GET_DISKS \| GET_DEVS \| GET_STATE \| GET_VERSION); - if (sra) { + if (sra) { if (st->ss->external && subarray == NULL) { array.level = LEVEL_CONTAINER; sra->array.level = LEVEL_CONTAINER; @@ -1436,35 +1612,121 @@ frozen = freeze(st); if (frozen < -1) { / freeze() already spewed the reason / + sysfs_free(sra); return 1; } else if (frozen < 0) { fprintf(stderr, Name ": %s is performing resync/recovery and cannot" " be reshaped\n", devname); + sysfs_free(sra); return 1; } / ========= set size =============== / if (size >= 0 && (size == 0 \|\| size != array.size)) { long long orig_size = get_component_size(fd)/2; + long long min_csize; struct mdinfo mdi; + int raid0_takeover = 0; if (orig_size == 0) orig_size = array.size; if (reshape_super(st, size, UnSet, UnSet, 0, 0, UnSet, NULL, - devname, !quiet)) { + devname, APPLY_METADATA_CHANGES, !quiet)) { rv = 1; goto release; } sync_metadata(st); + if (st->ss->external) { + /* metadata can have size limitation + * update size value according to metadata information + / + struct mdinfo sizeinfo = + st->ss->container_content(st, subarray); + if (sizeinfo) { + unsigned long long new_size = + sizeinfo->custom_array_size/2; + int data_disks = get_data_disks( + sizeinfo->array.level, + sizeinfo->array.layout, + sizeinfo->array.raid_disks); + new_size /= data_disks; + dprintf("Metadata size correction from %llu to " + "%llu (%llu)\n", orig_size, new_size, + new_size * data_disks); + size = new_size; + sysfs_free(sizeinfo); + } + } /* Update the size of each member device in case * they have been resized. This will never reduce * below the current used-size. The "size" attribute - * understand '0' to mean 'max'. + * understands '0' to mean 'max'. / - for (mdi = sra->devs; mdi; mdi = mdi->next) - sysfs_set_num(sra, mdi, "size", size); + min_csize = 0; + rv = 0; + for (mdi = sra->devs; mdi; mdi = mdi->next) { + if (sysfs_set_num(sra, mdi, "size", size) < 0) { + / Probably kernel refusing to let us + * reduce the size - not an error. + / + break; + } + if (array.not_persistent == 0 && + array.major_version == 0 && + get_linux_version() < 3001000) { + / Dangerous to allow size to exceed 2TB / + unsigned long long csize; + if (sysfs_get_ll(sra, mdi, "size", &csize) == 0) { + if (csize >= 2ULL102410241024) + csize = 2ULL102410241024; + if ((min_csize == 0 \|\| (min_csize + > (long long)csize))) + min_csize = csize; + } + } + } + if (rv) { + fprintf(stderr, Name ": Cannot set size on " + "array members.\n"); + goto size_change_error; + } + if (min_csize && size > min_csize) { + fprintf(stderr, Name ": Cannot safely make this array " + "use more than 2TB per device on this kernel.\n"); + rv = 1; + goto size_change_error; + } + if (min_csize && size == 0) { + / Don't let the kernel choose a size - it will get + * it wrong + / + fprintf(stderr, Name ": Limited v0.90 array to " + "2TB per device\n"); + size = min_csize; + } + if (st->ss->external) { + if (sra->array.level == 0) { + rv = sysfs_set_str(sra, NULL, "level", + "raid5"); + if (!rv) { + raid0_takeover = 1; + / get array parametes after takeover + * to chane one parameter at time only + / + rv = ioctl(fd, GET_ARRAY_INFO, &array); + } + } + / make sure mdmon is + * aware of the new level / + if (!mdmon_running(st->container_dev)) + start_mdmon(st->container_dev); + ping_monitor(container); + if (mdmon_running(st->container_dev) && + st->update_tail == NULL) + st->update_tail = &st->updates; + } array.size = size; if (array.size != size) { @@ -1476,25 +1738,46 @@ "component_size", size); else rv = -1; - } else + } else { rv = ioctl(fd, SET_ARRAY_INFO, &array); + + / manage array size when it is managed externally + / + if ((rv == 0) && st->ss->external) + rv = set_array_size(st, sra, sra->text_version); + } + + if (raid0_takeover) { + / do not recync non-existing parity, + * we will drop it anyway + / + sysfs_set_str(sra, NULL, "sync_action", "frozen"); + / go back to raid0, drop parity disk + / + sysfs_set_str(sra, NULL, "level", "raid0"); + ioctl(fd, GET_ARRAY_INFO, &array); + } + +size_change_error: if (rv != 0) { int err = errno; / restore metadata / if (reshape_super(st, orig_size, UnSet, UnSet, 0, 0, - UnSet, NULL, devname, !quiet) == 0) + UnSet, NULL, devname, + ROLLBACK_METADATA_CHANGES, + !quiet) == 0) sync_metadata(st); fprintf(stderr, Name ": Cannot set device size for %s: %s\n", devname, strerror(err)); - if (err == EBUSY && + if (err == EBUSY && (array.state & (1<<MD_SB_BITMAP_PRESENT))) fprintf(stderr, " Bitmap must be removed before size can be changed\n"); rv = 1; goto release; } if (assume_clean) { - / This will fail on kernels newer than 2.6.40 unless + /* This will fail on kernels newer than 3.0 unless * a backport has been arranged. / if (sra == NULL \|\| @@ -1537,11 +1820,11 @@ / ========= check for Raid10/Raid1 -> Raid0 conversion =============== * current implementation assumes that following conditions must be met: * - RAID10: - * - far_copies == 1 - * - near_copies == 2 + * - far_copies == 1 + * - near_copies == 2 / if ((level == 0 && array.level == 10 && sra && - array.layout == ((1 << 8) + 2) && !(array.raid_disks & 1)) \|\| + array.layout == ((1 << 8) + 2) && !(array.raid_disks & 1)) \|\| (level == 0 && array.level == 1 && sra)) { int err; err = remove_disks_for_takeover(st, sra, array.layout); @@ -1552,8 +1835,12 @@ rv = 1; goto release; } - / FIXME this is added with no justification - why is it here / - ping_monitor(container); + / Make sure mdmon has seen the device removal + * and updated metadata before we continue with + * level change + / + if (container) + ping_monitor(container); } memset(&info, 0, sizeof(info)); @@ -1587,7 +1874,7 @@ goto release; } } else if (strcmp(layout_str, "normalise") == 0 \|\| - strcmp(layout_str, "normalize") == 0) { + strcmp(layout_str, "normalize") == 0) { / If we have a -6 RAID6 layout, remove the '-6'. / info.new_layout = UnSet; if (info.array.level == 6 && info.new_level == UnSet) { @@ -1668,8 +1955,8 @@ number of devices (On-Line Capacity Expansion) must be * performed at the level of the container / - rv = reshape_container(container, devname, st, &info, - force, backup_file, quiet, 0); + rv = reshape_container(container, devname, -1, st, &info, + force, backup_file, quiet, 0, 0); frozen = 0; } else { / get spare devices from external metadata @@ -1688,29 +1975,88 @@ /* Impose these changes on a single array. First * check that the metadata is OK with the change. / - if (reshape_super(st, info.component_size, info.new_level, + if (reshape_super(st, -1, info.new_level, info.new_layout, info.new_chunk, info.array.raid_disks, info.delta_disks, - backup_file, devname, quiet)) { + backup_file, devname, APPLY_METADATA_CHANGES, + quiet)) { rv = 1; goto release; } sync_metadata(st); rv = reshape_array(container, fd, devname, st, &info, force, - devlist, backup_file, quiet, 0, 0); + devlist, backup_file, quiet, 0, 0, 0); frozen = 0; } release: + sysfs_free(sra); if (frozen > 0) unfreeze(st); return rv; } +/ verify_reshape_position() + * Function checks if reshape position in metadata is not farther + * than position in md. + * Return value: + * 0 : not valid sysfs entry + * it can be caused by not started reshape, it should be started + * by reshape array or raid0 array is before takeover + * -1 : error, reshape position is obviously wrong + * 1 : success, reshape progress correct or updated +/ +static int verify_reshape_position(struct mdinfo info, int level) +{ + int ret_val = 0; + char buf[40]; + int rv; + + /* read sync_max, failure can mean raid0 array / + rv = sysfs_get_str(info, NULL, "sync_max", buf, 40); + + if (rv > 0) { + char ep; + unsigned long long position = strtoull(buf, &ep, 0); + + dprintf(Name": Read sync_max sysfs entry is: %s\n", buf); + if (!(ep == buf \|\| (ep != 0 && ep != '\n' && ep != ' '))) { + position = get_data_disks(level, + info->new_layout, + info->array.raid_disks); + if (info->reshape_progress < position) { + dprintf("Corrected reshape progress (%llu) to " + "md position (%llu)\n", + info->reshape_progress, position); + info->reshape_progress = position; + ret_val = 1; + } else if (info->reshape_progress > position) { + fprintf(stderr, Name ": Fatal error: array " + "reshape was not properly frozen " + "(expected reshape position is %llu, " + "but reshape progress is %llu.\n", + position, info->reshape_progress); + ret_val = -1; + } else { + dprintf("Reshape position in md and metadata " + "are the same;"); + ret_val = 1; + } + } + } else if (rv == 0) { + /* for valid sysfs entry, 0-length content + * should be indicated as error + / + ret_val = -1; + } + + return ret_val; +} + static int reshape_array(char container, int fd, char devname, struct supertype st, struct mdinfo info, int force, struct mddev_dev devlist, char backup_file, int quiet, int forked, - int restart) + int restart, int freeze_reshape) { struct reshape reshape; int spares_needed; @@ -1724,8 +2070,8 @@ struct mddev_dev dv; int added_disks; - int fdlist; - unsigned long long offsets; + int fdlist = NULL; + unsigned long long offsets = NULL; int d; int nrdisks; int err; @@ -1750,10 +2096,12 @@ if (info->reshape_active) { int new_level = info->new_level; info->new_level = UnSet; - info->array.raid_disks -= info->delta_disks; + if (info->delta_disks > 0) + info->array.raid_disks -= info->delta_disks; msg = analyse_change(info, &reshape); info->new_level = new_level; - info->array.raid_disks += info->delta_disks; + if (info->delta_disks > 0) + info->array.raid_disks += info->delta_disks; if (!restart) /* Make sure the array isn't read-only / ioctl(fd, RESTART_ARRAY_RW, 0); @@ -1767,12 +2115,24 @@ (reshape.level != info->array.level \|\| reshape.before.layout != info->array.layout \|\| reshape.before.data_disks + reshape.parity - != info->array.raid_disks - info->delta_disks)) { + != info->array.raid_disks - max(0, info->delta_disks))) { fprintf(stderr, Name ": reshape info is not in native format -" " cannot continue.\n"); goto release; } + if (st->ss->external && restart && (info->reshape_progress == 0)) { + / When reshape is restarted from '0', very begin of array + * it is possible that for external metadata reshape and array + * configuration doesn't happen. + * Check if md has the same opinion, and reshape is restarted + * from 0. If so, this is regular reshape start after reshape + * switch in metadata to next array only. + / + if ((verify_reshape_position(info, reshape.level) >= 0) && + (info->reshape_progress == 0)) + restart = 0; + } if (restart) { / reshape already started. just skip to monitoring the reshape / if (reshape.backup_blocks == 0) @@ -1801,7 +2161,7 @@ " and only have %d.\n" " Use --force to over-ride this check.\n", spares_needed, - spares_needed == 1 ? "" : "s", + spares_needed == 1 ? "" : "s", info->array.spare_disks + added_disks); goto release; } @@ -1815,7 +2175,7 @@ Name ": Need %d spare%s to create working array," " and only have %d.\n", spares_needed, - spares_needed == 1 ? "" : "s", + spares_needed == 1 ? "" : "s", info->array.spare_disks + added_disks); goto release; } @@ -1831,7 +2191,7 @@ err = errno; fprintf(stderr, Name ": %s: could not set level to %s\n", devname, c); - if (err == EBUSY && + if (err == EBUSY && (info->array.state & (1<<MD_SB_BITMAP_PRESENT))) fprintf(stderr, " Bitmap must be removed" " before level can be changed\n"); @@ -1839,12 +2199,15 @@ } if (!quiet) fprintf(stderr, Name ": level of %s changed to %s\n", - devname, c); + devname, c); orig_level = array.level; sysfs_freeze_array(info); if (reshape.level > 0 && st->ss->external) { / make sure mdmon is aware of the new level / + if (mdmon_running(st->container_dev)) + flush_mdmon(container); + if (!mdmon_running(st->container_dev)) start_mdmon(st->container_dev); ping_monitor(container); @@ -1893,7 +2256,7 @@ / if (devlist) Manage_subdevs(devname, fd, devlist, !quiet, - 0,NULL); + 0,NULL, 0); if (reshape.backup_blocks == 0) { /* No restriping needed, but we might need to impose @@ -1947,7 +2310,7 @@ * 1/ The array will shrink. * We need to ensure the reshape will pause before reaching * the 'critical section'. We also need to fork and wait for - * that to happen. When it does we + * that to happen. When it does we * suspend/backup/complete/unfreeze * * 2/ The array will not change size. @@ -2000,7 +2363,7 @@ * unit. The number we have so far is just a minimum / blocks = reshape.backup_blocks; - if (reshape.before.data_disks == + if (reshape.before.data_disks == reshape.after.data_disks) { / Make 'blocks' bigger for better throughput, but * not so big that we reject it below. @@ -2090,9 +2453,16 @@ sra->new_chunk = info->new_chunk; - if (restart) + if (restart) { + /* for external metadata checkpoint saved by mdmon can be lost + * or missed /due to e.g. crash/. Check if md is not during + * restart farther than metadata points to. + * If so, this means metadata information is obsolete. + / + if (st->ss->external) + verify_reshape_position(info, reshape.level); sra->reshape_progress = info->reshape_progress; - else { + } else { sra->reshape_progress = 0; if (reshape.after.data_disks < reshape.before.data_disks) / start from the end of the new array / @@ -2114,7 +2484,7 @@ Name ": Cannot set device shape for %s: %s\n", devname, strerror(errno)); - if (err == EBUSY && + if (err == EBUSY && (array.state & (1<<MD_SB_BITMAP_PRESENT))) fprintf(stderr, " Bitmap must be removed before" @@ -2129,18 +2499,18 @@ int err = 0; if (sysfs_set_num(sra, NULL, "chunk_size", info->new_chunk) < 0) err = errno; - if (!err && sysfs_set_num(sra, NULL, "layout", - reshape.after.layout) < 0) + if (!err && sysfs_set_num(sra, NULL, "layout", + reshape.after.layout) < 0) err = errno; if (!err && subarray_set_num(container, sra, "raid_disks", - reshape.after.data_disks + - reshape.parity) < 0) + reshape.after.data_disks + + reshape.parity) < 0) err = errno; if (err) { fprintf(stderr, Name ": Cannot set device shape for %s\n", devname); - if (err == EBUSY && + if (err == EBUSY && (array.state & (1<<MD_SB_BITMAP_PRESENT))) fprintf(stderr, " Bitmap must be removed before" @@ -2149,9 +2519,10 @@ } } - err = start_reshape(sra, restart); + err = start_reshape(sra, restart, reshape.before.data_disks, + reshape.after.data_disks); if (err) { - fprintf(stderr, + fprintf(stderr, Name ": Cannot %s reshape for %s\n", restart ? "continue" : "start", devname); @@ -2159,6 +2530,15 @@ } if (restart) sysfs_set_str(sra, NULL, "array_state", "active"); + if (freeze_reshape) { + free(fdlist); + free(offsets); + sysfs_free(sra); + fprintf(stderr, Name ": Reshape has to be continued from" + " location %llu when root filesystem has been mounted.\n", + sra->reshape_progress); + return 1; + } / Now we just need to kick off the reshape and watch, while * handling backups of the data... @@ -2171,8 +2551,12 @@ abort_reshape(sra); goto release; default: + free(fdlist); + free(offsets); + sysfs_free(sra); return 0; case 0: + map_fork(); break; } @@ -2197,6 +2581,9 @@ d - odisks, fdlist+odisks, offsets+odisks); + free(fdlist); + free(offsets); + if (backup_file && done) unlink(backup_file); if (!done) { @@ -2212,6 +2599,7 @@ /* no need to wait for the reshape to finish as * there is nothing more to do. / + sysfs_free(sra); exit(0); } wait_reshape(sra); @@ -2220,7 +2608,7 @@ / Re-load the metadata as much could have changed / int cfd = open_dev(st->container_dev); if (cfd >= 0) { - ping_monitor(container); + flush_mdmon(container); st->ss->free_super(st); st->ss->load_container(st, cfd, container); close(cfd); @@ -2232,35 +2620,8 @@ / if (reshape.before.data_disks != reshape.after.data_disks && - info->custom_array_size) { - struct mdinfo info2; - char subarray = strchr(info->text_version+1, '/')+1; - - info2 = st->ss->container_content(st, subarray); - if (info2) { - unsigned long long current_size = 0; - unsigned long long new_size = - info2->custom_array_size/2; - - if (sysfs_get_ll(sra, - NULL, - "array_size", - &current_size) == 0 && - new_size > current_size) { - if (sysfs_set_num(sra, NULL, - "array_size", new_size) - < 0) - dprintf("Error: Cannot" - " set array size"); - else - dprintf("Array size " - "changed"); - dprintf(" from %llu to %llu.\n", - current_size, new_size); - } - sysfs_free(info2); - } - } + info->custom_array_size) + set_array_size(st, info, info->text_version); if (info->new_level != reshape.level) { @@ -2276,31 +2637,39 @@ st->update_tail = NULL; } out: + sysfs_free(sra); if (forked) return 0; unfreeze(st); exit(0); release: + free(fdlist); + free(offsets); if (orig_level != UnSet && sra) { c = map_num(pers, orig_level); if (c && sysfs_set_str(sra, NULL, "level", c) == 0) fprintf(stderr, Name ": aborting level change\n"); } + sysfs_free(sra); if (!forked) unfreeze(st); return 1; } +/* mdfd handle is passed to be closed in child process (after fork). + / int reshape_container(char container, char devname, - struct supertype st, + int mdfd, + struct supertype st, struct mdinfo info, int force, char backup_file, - int quiet, int restart) + int quiet, int restart, int freeze_reshape) { struct mdinfo cc = NULL; int rv = restart; + int last_devnum = -1; /* component_size is not meaningful for a container, * so pass '-1' meaning 'no change' @@ -2309,7 +2678,8 @@ reshape_super(st, -1, info->new_level, info->new_layout, info->new_chunk, info->array.raid_disks, info->delta_disks, - backup_file, devname, quiet)) { + backup_file, devname, APPLY_METADATA_CHANGES, + quiet)) { unfreeze(st); return 1; } @@ -2326,12 +2696,20 @@ unfreeze(st); return 1; default: /* parent / - printf(Name ": multi-array reshape continues in background\n"); + if (!freeze_reshape) + printf(Name ": multi-array reshape continues" + " in background\n"); return 0; case 0: / child / + map_fork(); break; } + / close unused handle in child process + / + if (mdfd > -1) + close(mdfd); + while(1) { / For each member array with reshape_active, * we need to perform the reshape. @@ -2363,29 +2741,71 @@ devname2devnum(container)); if (!mdstat) continue; + if (mdstat->active == 0) { + fprintf(stderr, Name ": Skipping inactive " + "array md%i.\n", mdstat->devnum); + free_mdstat(mdstat); + mdstat = NULL; + continue; + } break; } if (!content) break; - fd = open_dev(mdstat->devnum); - if (fd < 0) - break; adev = map_dev(dev2major(mdstat->devnum), dev2minor(mdstat->devnum), 0); if (!adev) adev = content->text_version; + fd = open_dev(mdstat->devnum); + if (fd < 0) { + printf(Name ": Device %s cannot be opened for reshape.", + adev); + break; + } + + if (last_devnum == mdstat->devnum) { + /* Do not allow for multiple reshape_array() calls for + * the same array. + * It can happen when reshape_array() returns without + * error, when reshape is not finished (wrong reshape + * starting/continuation conditions). Mdmon doesn't + * switch to next array in container and reentry + * conditions for the same array occur. + * This is possibly interim until the behaviour of + * reshape_array is resolved(). + / + printf(Name ": Multiple reshape execution detected for " + "device %s.", adev); + close(fd); + break; + } + last_devnum = mdstat->devnum; + sysfs_init(content, fd, mdstat->devnum); + if (mdmon_running(devname2devnum(container))) + flush_mdmon(container); + rv = reshape_array(container, fd, adev, st, content, force, NULL, - backup_file, quiet, 1, restart); + backup_file, quiet, 1, restart, + freeze_reshape); close(fd); + + if (freeze_reshape) { + sysfs_free(cc); + exit(0); + } + restart = 0; if (rv) break; + + if (mdmon_running(devname2devnum(container))) + flush_mdmon(container); } if (!rv) unfreeze(st); @@ -2414,7 +2834,7 @@ suspend/backup/allow always come together * wait/resume/discard do too. * For the same-size case we have two backups to improve flow. - * + * / int progress_reshape(struct mdinfo info, struct reshape reshape, @@ -2559,7 +2979,7 @@ this much. / target = 6410242 min(reshape->before.data_disks, - reshape->after.data_disks); + reshape->after.data_disks); target /= reshape->backup_blocks; if (target < 2) target = 2; @@ -2696,7 +3116,7 @@ - completed; } reshape_completed = completed; - + close(fd); / We return the need_backup flag. Caller will decide @@ -2725,15 +3145,21 @@ int rv = -2; tv.tv_sec = 10; tv.tv_usec = 0; - while (fd >= 0 && rv < 0) { + while (fd >= 0 && rv < 0 && tv.tv_sec > 0) { fd_set rfds; FD_ZERO(&rfds); FD_SET(fd, &rfds); if (select(fd+1, NULL, NULL, &rfds, &tv) != 1) break; - if (sysfs_fd_get_ll(fd, &completed) >= 0) + switch (sysfs_fd_get_ll(fd, &completed)) { + case 0: /* all good again / rv = 1; + break; + case -2: / read error - abort / + tv.tv_sec = 0; + break; + } } if (fd >= 0) close(fd); @@ -2750,7 +3176,6 @@ } } - / FIXME return status is never checked / static int grow_backup(struct mdinfo sra, unsigned long long offset, /* per device / @@ -2820,7 +3245,7 @@ else lseek64(destfd[i], destoffsets[i], 0); - rv = save_stripes(sources, offsets, + rv = save_stripes(sources, offsets, disks, chunk, level, layout, dests, destfd, offset512odata, stripes chunk * odata, @@ -2868,11 +3293,11 @@ * every works. / / FIXME return value is often ignored / -static int forget_backup( - int dests, int destfd, unsigned long long destoffsets, - int part) +static int forget_backup(int dests, int destfd, + unsigned long long destoffsets, + int part) { - / + /* * Erase backup 'part' (which is 0 or 1) / int i; @@ -2896,7 +3321,7 @@ if ((unsigned long long)lseek64(destfd[i], destoffsets[i]-4096, 0) != destoffsets[i]-4096) rv = -1; - if (rv == 0 && + if (rv == 0 && write(destfd[i], &bsb, 512) != 512) rv = -1; fsync(destfd[i]); @@ -2932,7 +3357,7 @@ fail("magic is bad"); if (memcmp(bsb2.magic, "md_backup_data-2", 16) == 0 && bsb2.sb_csum2 != bsb_csum((char)&bsb2, - ((char)&bsb2.sb_csum2)-((char)&bsb2))) + ((char)&bsb2.sb_csum2)-((char)&bsb2))) fail("second csum bad"); if (__le64_to_cpu(bsb2.devstart)512 != offset) @@ -2962,7 +3387,7 @@ if ((unsigned long long)read(afd, abuf, len) != len) fail("read first from array failed"); if (memcmp(bbuf, abuf, len) != 0) { - #if 0 +#if 0 int i; printf("offset=%llu len=%llu\n", (unsigned long long)__le64_to_cpu(bsb2.arraystart)512, len); @@ -2971,7 +3396,7 @@ printf("first diff byte %d\n", i); break; } - #endif +#endif fail("data1 compare failed"); } } @@ -3322,7 +3747,7 @@ nonew: if (verbose) fprintf(stderr, Name - ": backup-metadata found on %s but is not needed\n", devname); + ": backup-metadata found on %s but is not needed\n", devname); continue; /* No new data here / } } else { @@ -3357,7 +3782,7 @@ second_fail: if (verbose) fprintf(stderr, Name - ": Failed to verify secondary backup-metadata block on %s\n", + ": Failed to verify secondary backup-metadata block on %s\n", devname); continue; / Cannot seek / } @@ -3398,9 +3823,10 @@ if (verbose) fprintf(stderr, Name ": Error restoring backup from %s\n", devname); + free(offsets); return 1; } - + if (bsb.magic[15] == '2' && restore_stripes(fdlist, offsets, info->array.raid_disks, @@ -3415,9 +3841,11 @@ if (verbose) fprintf(stderr, Name ": Error restoring second backup from %s\n", devname); + free(offsets); return 1; } + free(offsets); / Ok, so the data is restored. Let's update those superblocks. / @@ -3514,37 +3942,201 @@ return 1; } -int Grow_continue(int mdfd, struct supertype st, struct mdinfo info, - char backup_file) +int Grow_continue_command(char devname, int fd, + char backup_file, int verbose) { + int ret_val = 0; + struct supertype st = NULL; + struct mdinfo content = NULL; + struct mdinfo array; + char subarray = NULL; + struct mdinfo cc = NULL; + struct mdstat_ent mdstat = NULL; char buf[40]; - char container = NULL; - int err; + int cfd = -1; + int fd2 = -1; - err = sysfs_set_str(info, NULL, "array_state", "readonly"); - if (err) - return err; - if (st->ss->external) { - fmt_devname(buf, st->container_dev); - container = buf; - freeze(st); - - if (!mdmon_running(st->container_dev)) - start_mdmon(st->container_dev); - ping_monitor_by_id(st->container_dev); + dprintf("Grow continue from command line called for %s\n", + devname); + st = super_by_fd(fd, &subarray); + if (!st \|\| !st->ss) { + fprintf(stderr, + Name ": Unable to determine metadata format for %s\n", + devname); + return 1; + } + dprintf("Grow continue is run for "); + if (st->ss->external == 0) { + dprintf("native array (%s)\n", devname); + if (ioctl(fd, GET_ARRAY_INFO, &array) < 0) { + fprintf(stderr, Name ": %s is not an active md array -" + " aborting\n", devname); + ret_val = 1; + goto Grow_continue_command_exit; + } + content = &array; + sysfs_init(content, fd, st->devnum); + } else { + int container_dev; - if (info->reshape_active == 2) { - int cfd = open_dev(st->container_dev); - if (cfd < 0) - return 1; - st->ss->load_container(st, cfd, container); - close(cfd); - return reshape_container(container, NULL, - st, info, 0, backup_file, - 0, 1); + if (subarray) { + dprintf("subarray (%s)\n", subarray); + container_dev = st->container_dev; + cfd = open_dev_excl(st->container_dev); + } else { + container_dev = st->devnum; + close(fd); + cfd = open_dev_excl(st->devnum); + dprintf("container (%i)\n", container_dev); + fd = cfd; + } + if (cfd < 0) { + fprintf(stderr, Name ": Unable to open container " + "for %s\n", devname); + ret_val = 1; + goto Grow_continue_command_exit; + } + fmt_devname(buf, container_dev); + + /* find in container array under reshape + / + ret_val = st->ss->load_container(st, cfd, NULL); + if (ret_val) { + fprintf(stderr, + Name ": Cannot read superblock for %s\n", + devname); + ret_val = 1; + goto Grow_continue_command_exit; + } + + cc = st->ss->container_content(st, subarray); + for (content = cc; content ; content = content->next) { + char array; + int allow_reshape = 1; + + if (content->reshape_active == 0) + continue; + /* The decision about array or container wide + * reshape is taken in Grow_continue based + * content->reshape_active state, therefore we + * need to check_reshape based on + * reshape_active and subarray name + / + if (content->array.state & (1<<MD_SB_BLOCK_VOLUME)) + allow_reshape = 0; + if (content->reshape_active == CONTAINER_RESHAPE && + (content->array.state + & (1<<MD_SB_BLOCK_CONTAINER_RESHAPE))) + allow_reshape = 0; + + if (!allow_reshape) { + fprintf(stderr, Name + ": cannot continue reshape of an array" + " in container with unsupported" + " metadata: %s(%s)\n", + devname, buf); + ret_val = 1; + goto Grow_continue_command_exit; + } + + array = strchr(content->text_version+1, '/')+1; + mdstat = mdstat_by_subdev(array, container_dev); + if (!mdstat) + continue; + if (mdstat->active == 0) { + fprintf(stderr, Name ": Skipping inactive " + "array md%i.\n", mdstat->devnum); + free_mdstat(mdstat); + mdstat = NULL; + continue; + } + break; + } + if (!content) { + fprintf(stderr, + Name ": Unable to determine reshaped " + "array for %s\n", devname); + ret_val = 1; + goto Grow_continue_command_exit; } + fd2 = open_dev(mdstat->devnum); + if (fd2 < 0) { + fprintf(stderr, Name ": cannot open (md%i)\n", + mdstat->devnum); + ret_val = 1; + goto Grow_continue_command_exit; + } + + sysfs_init(content, fd2, mdstat->devnum); + + / start mdmon in case it is not running + / + if (!mdmon_running(container_dev)) + start_mdmon(container_dev); + ping_monitor(buf); + + if (mdmon_running(container_dev)) + st->update_tail = &st->updates; + else { + fprintf(stderr, Name ": No mdmon found. " + "Grow cannot continue.\n"); + ret_val = 1; + goto Grow_continue_command_exit; + } + } + + / verify that array under reshape is started from + * correct position + / + if (verify_reshape_position(content, + map_name(pers, mdstat->level)) < 0) { + ret_val = 1; + goto Grow_continue_command_exit; } - return reshape_array(container, mdfd, "array", st, info, 1, - NULL, backup_file, 0, 0, 1); + + / continue reshape + / + ret_val = Grow_continue(fd, st, content, backup_file, 0); + +Grow_continue_command_exit: + if (fd2 > -1) + close(fd2); + if (cfd > -1) + close(cfd); + st->ss->free_super(st); + free_mdstat(mdstat); + sysfs_free(cc); + free(subarray); + + return ret_val; +} + +int Grow_continue(int mdfd, struct supertype st, struct mdinfo info, + char backup_file, int freeze_reshape) +{ + int ret_val = 2; + + if (!info->reshape_active) + return ret_val; + + if (st->ss->external) { + char container[40]; + int cfd = open_dev(st->container_dev); + + if (cfd < 0) + return 1; + + fmt_devname(container, st->container_dev); + st->ss->load_container(st, cfd, container); + close(cfd); + ret_val = reshape_container(container, NULL, mdfd, + st, info, 0, backup_file, + 0, 1, freeze_reshape); + } else + ret_val = reshape_array(NULL, mdfd, "array", st, info, 1, + NULL, backup_file, 0, 0, 1, + freeze_reshape); + + return ret_val; }
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/Incremental.c ^
@@ -44,16 +44,12 @@ static int Incremental_container(struct supertype st, char devname, char homehost, - int verbose, int runstop, int autof); - -static struct mddev_ident search_mdstat(struct supertype st, - struct mdinfo info, - char devname, - int verbose, int rvp); + int verbose, int runstop, int autof, + int freeze_reshape); int Incremental(char devname, int verbose, int runstop, struct supertype st, char homehost, int require_homehost, - int autof) + int autof, int freeze_reshape) { / Add this device to an array, creating the array if necessary * and starting the array if sensible or - if runstop>0 - if possible. @@ -138,9 +134,16 @@ rv = st->ss->load_container(st, dfd, NULL); close(dfd); - if (!rv && st->ss->container_content) - return Incremental_container(st, devname, homehost, - verbose, runstop, autof); + if (!rv && st->ss->container_content) { + if (map_lock(&map)) + fprintf(stderr, Name ": failed to get " + "exclusive lock on mapfile\n"); + rv = Incremental_container(st, devname, homehost, + verbose, runstop, autof, + freeze_reshape); + map_unlock(&map); + return rv; + } fprintf(stderr, Name ": %s is not part of an md array.\n", devname); @@ -205,7 +208,7 @@ st->ss->getinfo_super(st, &info, NULL); /* 3/ Check if there is a match in mdadm.conf / - match = search_mdstat(st, &info, devname, verbose, &rv); + match = conf_match(st, &info, devname, verbose, &rv); if (!match && rv == 2) goto out; @@ -256,8 +259,7 @@ name_to_use = info.name; if (name_to_use[0] == 0 && - info.array.level == LEVEL_CONTAINER && - trustworthy == LOCAL) { + info.array.level == LEVEL_CONTAINER) { name_to_use = info.text_version; trustworthy = METADATA; } @@ -290,7 +292,7 @@ name_to_use, autof, trustworthy, chosen_name); if (mdfd < 0) - goto out; + goto out_unlock; sysfs_init(&info, mdfd, 0); @@ -298,7 +300,7 @@ fprintf(stderr, Name ": failed to set array info for %s: %s\n", chosen_name, strerror(errno)); rv = 2; - goto out; + goto out_unlock; } dinfo = info; @@ -309,7 +311,7 @@ devname, chosen_name, strerror(errno)); ioctl(mdfd, STOP_ARRAY, 0); rv = 2; - goto out; + goto out_unlock; } sra = sysfs_read(mdfd, -1, (GET_DEVS \| GET_STATE \| GET_OFFSET \| GET_SIZE)); @@ -323,9 +325,8 @@ fprintf(stderr, Name ": You have an old buggy kernel which cannot support\n" " --incremental reliably. Aborting.\n"); - sysfs_free(sra); rv = 2; - goto out; + goto out_unlock; } info.array.working_disks = 1; / 6/ Make sure /var/run/mdadm.map contains this array. / @@ -372,17 +373,23 @@ ": not adding %s to active array (without --run) %s\n", devname, chosen_name); rv = 2; - goto out; + goto out_unlock; } } if (!sra) { rv = 2; - goto out; + goto out_unlock; } if (sra->devs) { sprintf(dn, "%d:%d", sra->devs->disk.major, sra->devs->disk.minor); dfd2 = dev_open(dn, O_RDONLY); + if (dfd2 < 0) { + fprintf(stderr, Name + ": unable to open %s\n", devname); + rv = 2; + goto out_unlock; + } st2 = dup_super(st); if (st2->ss->load_super(st2, dfd2, NULL) \|\| st->ss->compare_super(st, st2) != 0) { @@ -392,7 +399,7 @@ devname, chosen_name); close(dfd2); rv = 2; - goto out; + goto out_unlock; } close(dfd2); st2->ss->getinfo_super(st2, &info2, NULL); @@ -404,28 +411,28 @@ ": unexpected difference between %s and %s.\n", chosen_name, devname); rv = 2; - goto out; + goto out_unlock; } } - info2.disk.major = major(stb.st_rdev); - info2.disk.minor = minor(stb.st_rdev); + info.disk.major = major(stb.st_rdev); + info.disk.minor = minor(stb.st_rdev); / add disk needs to know about containers / if (st->ss->external) sra->array.level = LEVEL_CONTAINER; - err = add_disk(mdfd, st, sra, &info2); + err = add_disk(mdfd, st, sra, &info); if (err < 0 && errno == EBUSY) { / could be another device present with the same * disk.number. Find and reject any such / find_reject(mdfd, st, sra, info.disk.number, info.events, verbose, chosen_name); - err = add_disk(mdfd, st, sra, &info2); + err = add_disk(mdfd, st, sra, &info); } if (err < 0) { fprintf(stderr, Name ": failed to add %s to %s: %s.\n", devname, chosen_name, strerror(errno)); rv = 2; - goto out; + goto out_unlock; } info.array.working_disks = 0; for (d = sra->devs; d; d=d->next) @@ -438,19 +445,24 @@ if (info.array.level == LEVEL_CONTAINER) { int devnum = devnum; / defined and used iff ->external / / Try to assemble within the container / - map_unlock(&map); - sysfs_uevent(&info, "change"); + sysfs_uevent(sra, "change"); if (verbose >= 0) fprintf(stderr, Name - ": container %s now has %d devices\n", - chosen_name, info.array.working_disks); + ": container %s now has %d device%s\n", + chosen_name, info.array.working_disks, + info.array.working_disks==1?"":"s"); wait_for(chosen_name, mdfd); if (st->ss->external) devnum = fd2devnum(mdfd); + if (st->ss->load_container) + rv = st->ss->load_container(st, mdfd, NULL); close(mdfd); sysfs_free(sra); - rv = Incremental(chosen_name, verbose, runstop, - NULL, homehost, require_homehost, autof); + if (!rv) + rv = Incremental_container(st, chosen_name, homehost, + verbose, runstop, autof, + freeze_reshape); + map_unlock(&map); if (rv == 1) / Don't fail the whole -I if a subarray didn't * have enough devices to start yet @@ -473,14 +485,13 @@ active_disks = count_active(st, sra, mdfd, &avail, &info); if (enough(info.array.level, info.array.raid_disks, info.array.layout, info.array.state & 1, - avail, active_disks) == 0) { + avail) == 0) { if (verbose >= 0) fprintf(stderr, Name ": %s attached to %s, not enough to start (%d).\n", devname, chosen_name, active_disks); - map_unlock(&map); rv = 0; - goto out; + goto out_unlock; } /* 7b/ if yes, / @@ -494,9 +505,8 @@ fprintf(stderr, Name ": %s attached to %s which is already active.\n", devname, chosen_name); - map_unlock(&map); rv = 0; - goto out; + goto out_unlock; } map_unlock(&map); @@ -577,79 +587,9 @@ if (sra) sysfs_free(sra); return rv; -} - -static struct mddev_ident search_mdstat(struct supertype st, - struct mdinfo info, - char devname, - int verbose, int rvp) -{ - struct mddev_ident array_list, match; - array_list = conf_get_ident(NULL); - match = NULL; - for (; array_list; array_list = array_list->next) { - if (array_list->uuid_set && - same_uuid(array_list->uuid, info->uuid, st->ss->swapuuid) - == 0) { - if (verbose >= 2 && array_list->devname) - fprintf(stderr, Name - ": UUID differs from %s.\n", - array_list->devname); - continue; - } - if (array_list->name[0] && - strcasecmp(array_list->name, info->name) != 0) { - if (verbose >= 2 && array_list->devname) - fprintf(stderr, Name - ": Name differs from %s.\n", - array_list->devname); - continue; - } - if (array_list->devices && - !match_oneof(array_list->devices, devname)) { - if (verbose >= 2 && array_list->devname) - fprintf(stderr, Name - ": Not a listed device for %s.\n", - array_list->devname); - continue; - } - if (array_list->super_minor != UnSet && - array_list->super_minor != info->array.md_minor) { - if (verbose >= 2 && array_list->devname) - fprintf(stderr, Name - ": Different super-minor to %s.\n", - array_list->devname); - continue; - } - if (!array_list->uuid_set && - !array_list->name[0] && - !array_list->devices && - array_list->super_minor == UnSet) { - if (verbose >= 2 && array_list->devname) - fprintf(stderr, Name - ": %s doesn't have any identifying information.\n", - array_list->devname); - continue; - } - /* FIXME, should I check raid_disks and level too?? / - - if (match) { - if (verbose >= 0) { - if (match->devname && array_list->devname) - fprintf(stderr, Name - ": we match both %s and %s - cannot decide which to use.\n", - match->devname, array_list->devname); - else - fprintf(stderr, Name - ": multiple lines in mdadm.conf match\n"); - } - rvp = 2; - match = NULL; - break; - } - match = array_list; - } - return match; +out_unlock: + map_unlock(&map); + goto out; } static void find_reject(int mdfd, struct supertype st, struct mdinfo sra, @@ -944,8 +884,10 @@ * to obtain minimum spare size / struct supertype st3 = dup_super(st2); int mdfd = open_dev(mp->devnum); - if (!mdfd) + if (mdfd < 0) { + free(st3); goto next; + } if (st3->ss->load_container && !st3->ss->load_container(st3, mdfd, mp->path)) { component_size = st3->ss->min_acceptable_spare_size(st3); @@ -1035,7 +977,7 @@ close(dfd); dfdp = -1; rv = Manage_subdevs(chosen->sys_name, mdfd, &devlist, - -1, 0, NULL); + -1, 0, NULL, 0); close(mdfd); } if (verbose > 0) { @@ -1048,6 +990,7 @@ } sysfs_free(chosen); } + map_unlock(&map); return rv; } @@ -1184,6 +1127,8 @@ close(fd); } + closedir(dir); + if (!chosen) return 1; @@ -1384,6 +1329,7 @@ strerror(errno)); rv = 1; } + sysfs_free(sra); } } return rv; @@ -1416,7 +1362,7 @@ static int Incremental_container(struct supertype st, char devname, char homehost, int verbose, - int runstop, int autof) + int runstop, int autof, int freeze_reshape) { /* Collect the contents of this container and for each * array, choose a device name and assemble the array. @@ -1433,6 +1379,8 @@ struct map_ent smp; int suuid[4]; int sfd; + int ra_blocked = 0; + int ra_all = 0; st->ss->getinfo_super(st, &info, NULL); @@ -1445,7 +1393,7 @@ return 0; } - match = search_mdstat(st, &info, devname, verbose, &rv); + match = conf_match(st, &info, devname, verbose, &rv); if (match == NULL && rv == 2) return rv; @@ -1460,24 +1408,23 @@ trustworthy = FOREIGN; list = st->ss->container_content(st, NULL); - if (map_lock(&map)) - fprintf(stderr, Name ": failed to get exclusive lock on " - "mapfile\n"); - / do not assemble arrays that might have bad blocks / - if (list->array.state & (1<<MD_SB_BBM_ERRORS)) { - fprintf(stderr, Name ": BBM log found in metadata. " - "Cannot activate array(s).\n"); - / free container data and exit / - sysfs_free(list); - return 2; - } - + / when nothing to activate - quit / + if (list == NULL) + return 0; for (ra = list ; ra ; ra = ra->next) { int mdfd; char chosen_name[1024]; struct map_ent mp; struct mddev_ident match = NULL; + ra_all++; + / do not activate arrays blocked by metadata handler / + if (ra->array.state & (1 << MD_SB_BLOCK_VOLUME)) { + fprintf(stderr, Name ": Cannot activate array %s in %s.\n", + ra->text_version, devname); + ra_blocked++; + continue; + } mp = map_by_uuid(&map, ra->uuid); if (mp) { @@ -1551,10 +1498,16 @@ } assemble_container_content(st, mdfd, ra, runstop, - chosen_name, verbose, NULL); + chosen_name, verbose, NULL, + freeze_reshape); close(mdfd); } + / don't move spares to container with volume being activated + when all volumes are blocked / + if (ra_all == ra_blocked) + return 0; + / Now move all suitable spares from spare container */ domains = domain_from_array(list, st->ss->name); memcpy(suuid, uuid_zero, sizeof(int[4])); @@ -1600,7 +1553,6 @@ close(sfd); } domain_free(domains); - map_unlock(&map); return 0; } @@ -1666,15 +1618,15 @@ if (subfd >= 0) { Manage_subdevs(memb->dev, subfd, &devlist, verbose, 0, - NULL); + NULL, 0); close(subfd); } } free_mdstat(mdstat); } else - Manage_subdevs(ent->dev, mdfd, &devlist, verbose, 0, NULL); + Manage_subdevs(ent->dev, mdfd, &devlist, verbose, 0, NULL, 0); devlist.disposition = 'r'; - rv = Manage_subdevs(ent->dev, mdfd, &devlist, verbose, 0, NULL); + rv = Manage_subdevs(ent->dev, mdfd, &devlist, verbose, 0, NULL, 0); close(mdfd); free_mdstat(ent); return rv;
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/Kill.c ^
@@ -59,10 +59,9 @@ close(fd); return 2; } + st->ignore_hw_compat = 1; rv = st->ss->load_super(st, fd, dev); - if (force && rv >= 2) - rv = 0; /* ignore bad data in superblock */ - if (rv== 0 \|\| (force && rv >= 2)) { + if (rv == 0 \|\| (force && rv >= 2)) { st->ss->free_super(st); st->ss->init_super(st, NULL, 0, "", NULL, NULL); if (st->ss->store_super(st, fd)) {
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/Makefile ^
@@ -66,13 +66,13 @@ CONFFILEFLAGS = -DCONFFILE=\"$(CONFFILE)\" -DCONFFILE2=\"$(CONFFILE2)\" # Both MAP_DIR and MDMON_DIR should be somewhere that persists across the # pivotroot from early boot to late boot. -# /dev is an odd place to put this, but it is the only directory that -# meets the requirements. -MAP_DIR=/dev/.mdadm +# /run is best, but for distros that don't support that, /dev can work. +MAP_DIR=/run/mdadm MAP_FILE = map -MDMON_DIR = /dev/.mdadm +MAP_PATH = $(MAP_DIR)/$(MAP_FILE) +MDMON_DIR = $(MAP_DIR) # place for autoreplace cookies -FAILED_SLOTS_DIR = /dev/.mdadm/failed-slots +FAILED_SLOTS_DIR = /run/mdadm/failed-slots DIRFLAGS = -DMAP_DIR=\"$(MAP_DIR)\" -DMAP_FILE=\"$(MAP_FILE)\" DIRFLAGS += -DMDMON_DIR=\"$(MDMON_DIR)\" DIRFLAGS += -DFAILED_SLOTS_DIR=\"$(FAILED_SLOTS_DIR)\" @@ -137,7 +137,7 @@ endif all : mdadm mdmon -man : mdadm.man md.man mdadm.conf.man mdmon.man +man : mdadm.man md.man mdadm.conf.man mdmon.man raid6check.man everything: all mdadm.static swap_super test_stripe \ mdassemble mdassemble.auto mdassemble.static mdassemble.man \ @@ -204,7 +204,8 @@ $(KLIBC_GCC) $(ASSEMBLE_FLAGS) -o mdassemble $(ASSEMBLE_SRCS) mdadm.8 : mdadm.8.in - sed -e 's/{DEFAULT_METADATA}/$(DEFAULT_METADATA)/g' mdadm.8.in > mdadm.8 + sed -e 's/{DEFAULT_METADATA}/$(DEFAULT_METADATA)/g' \ + -e 's,{MAP_PATH},$(MAP_PATH),g' mdadm.8.in > mdadm.8 mdadm.man : mdadm.8 nroff -man mdadm.8 > mdadm.man @@ -221,6 +222,9 @@ mdassemble.man : mdassemble.8 nroff -man mdassemble.8 > mdassemble.man +raid6check.man : raid6check.8 + nroff -man raid6check.8 > raid6check.man + $(OBJS) : $(INCL) mdmon.h $(MON_OBJS) : $(INCL) mdmon.h
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/Manage.c ^
@@ -44,6 +44,7 @@ #ifndef MDASSEMBLE struct mdinfo mdi; #endif + int rv = 0; if (md_get_version(fd) < 9000) { fprintf(stderr, Name ": need md driver version 0.90.0 or later\n"); @@ -75,7 +76,8 @@ vers[9] = mdi->text_version[0]; sysfs_set_str(mdi, NULL, "metadata_version", vers); - return 1; + rv = 1; + goto out; } } else { char cp; @@ -84,35 +86,43 @@ sysfs_set_str(mdi, NULL, "metadata_version", vers); cp = strchr(vers+10, '/'); - if (cp) + if (cp) cp = 0; ping_monitor(vers+10); if (mdi->array.level <= 0) sysfs_set_str(mdi, NULL, "array_state", "active"); } - return 0; + goto out; } #endif if (ioctl(fd, GET_ARRAY_INFO, &array)) { fprintf(stderr, Name ": %s does not appear to be active.\n", devname); - return 1; + rv = 1; + goto out; } if (readonly>0) { if (ioctl(fd, STOP_ARRAY_RO, NULL)) { fprintf(stderr, Name ": failed to set readonly for %s: %s\n", devname, strerror(errno)); - return 1; + rv = 1; + goto out; } } else if (readonly < 0) { if (ioctl(fd, RESTART_ARRAY_RW, NULL)) { fprintf(stderr, Name ": failed to set writable for %s: %s\n", devname, strerror(errno)); - return 1; + rv = 1; + goto out; } } - return 0; +out: +#ifndef MDASSEMBLE + if (mdi) + sysfs_free(mdi); +#endif + return rv; } #ifndef MDASSEMBLE @@ -156,7 +166,7 @@ sprintf(pe, "%d", part); } n = readlink(path2, link, sizeof(link)); - if (n && (int)strlen(base) == n && + if (n > 0 && (int)strlen(base) == n && strncmp(link, base, n) == 0) unlink(path2); } @@ -173,6 +183,7 @@ * quiet < 0 means we will try again if it fails. / mdu_param_t param; / unused / + int rv = 0; if (runstop == -1 && md_get_version(fd) < 9000) { if (ioctl(fd, STOP_MD, 0)) { @@ -251,7 +262,8 @@ fprintf(stderr, Name ": failed to stop array %s: %s\n", devname, strerror(errno)); - return 1; + rv = 1; + goto out; } / Give monitor a chance to act / @@ -263,7 +275,8 @@ ": failed to completely stop %s" ": Device is busy\n", devname); - return 1; + rv = 1; + goto out; } } else if (mdi && mdi->array.major_version == -1 && @@ -291,9 +304,8 @@ "member %s still active\n", devname, m->dev); free_mdstat(mds); - if (mdi) - sysfs_free(mdi); - return 1; + rv = 1; + goto out; } } @@ -318,9 +330,8 @@ "process, mounted filesystem " "or active volume group?\n"); } - if (mdi) - sysfs_free(mdi); - return 1; + rv = 1; + goto out; } / prior to 2.6.28, KOBJ_CHANGE was not sent when an md array * was stopped, so We'll do it here just to be sure. Drop any @@ -345,8 +356,11 @@ map_lock(&map); map_remove(&map, devnum); map_unlock(&map); + out: + if (mdi) + sysfs_free(mdi); } - return 0; + return rv; } int Manage_resize(char devname, int fd, long long size, int raid_disks) @@ -371,7 +385,7 @@ int Manage_subdevs(char devname, int fd, struct mddev_dev devlist, int verbose, int test, - char update) + char update, int force) { / do something to each dev. * devmode can be @@ -402,12 +416,15 @@ int lfd = -1; int sysfd = -1; int count = 0; /* number of actions taken / + struct mdinfo info; + int frozen = 0; if (ioctl(fd, GET_ARRAY_INFO, &array)) { fprintf(stderr, Name ": cannot get array info for %s\n", devname); - return 1; + goto abort; } + sysfs_init(&info, fd, 0); / array.size is only 32 bit and may be truncated. * So read from sysfs if possible, and record number of sectors @@ -421,7 +438,7 @@ if (!tst) { fprintf(stderr, Name ": unsupport array - version %d.%d\n", array.major_version, array.minor_version); - return 1; + goto abort; } stb.st_rdev = 0; @@ -431,7 +448,7 @@ char dnprintable = dv->devname; char add_dev = dv->devname; int err; - int re_add_failed = 0; + int array_failed; next = dv->next; jnext = 0; @@ -443,9 +460,9 @@ fprintf(stderr, Name ": %s only meaningful " "with -r, not -%c\n", dv->devname, dv->disposition); - return 1; + goto abort; } - for (; j < 1024 && remaining_disks > 0; j++) { + for (; j < MAX_DISKS && remaining_disks > 0; j++) { unsigned dev; disc.number = j; if (ioctl(fd, GET_DISK_INFO, &disc)) @@ -476,9 +493,9 @@ fprintf(stderr, Name ": %s only meaningful " "with -r of -f, not -%c\n", dv->devname, dv->disposition); - return 1; + goto abort; } - for (; j < 1024 && remaining_disks > 0; j++) { + for (; j < MAX_DISKS && remaining_disks > 0; j++) { int sfd; unsigned dev; disc.number = j; @@ -516,7 +533,7 @@ if (dv->disposition != 'a' \|\| dv->re_add == 0) { fprintf(stderr, Name ": 'missing' only meaningful " "with --re-add\n"); - return 1; + goto abort; } if (add_devlist == NULL) add_devlist = conf_get_devs(); @@ -540,7 +557,7 @@ fprintf(stderr, Name ": %s only meaningful " "with -r or -f, not -%c\n", dv->devname, dv->disposition); - return 1; + goto abort; } sprintf(dname, "dev-%s", dv->devname); @@ -562,7 +579,7 @@ fprintf(stderr, Name ": %s does not appear " "to be a component of %s\n", dv->devname, devname); - return 1; + goto abort; } } } else { @@ -581,7 +598,7 @@ dv->devname, strerror(errno)); if (tfd >= 0) close(tfd); - return 1; + goto abort; } close(tfd); tfd = -1; @@ -590,21 +607,21 @@ fprintf(stderr, Name ": %s is not a " "block device.\n", dv->devname); - return 1; + goto abort; } } switch(dv->disposition){ default: fprintf(stderr, Name ": internal error - devmode[%s]=%d\n", dv->devname, dv->disposition); - return 1; + goto abort; case 'a': /* add the device / if (subarray) { fprintf(stderr, Name ": Cannot add disks to a" " \'member\' array, perform this" " operation on the parent container\n"); - return 1; + goto abort; } / Make sure it isn't in use (in 2.6 or later) / tfd = dev_open(add_dev, O_RDONLY\|O_EXCL\|O_DIRECT); @@ -613,7 +630,13 @@ if (tfd < 0) { fprintf(stderr, Name ": Cannot open %s: %s\n", dv->devname, strerror(errno)); - return 1; + goto abort; + } + if (!frozen) { + if (sysfs_freeze_array(&info) == 1) + frozen = 1; + else + frozen = -1; } st = dup_super(tst); @@ -623,19 +646,44 @@ if (add_dev == dv->devname) { if (!get_dev_size(tfd, dv->devname, &ldsize)) { + st->ss->free_super(st); close(tfd); - return 1; + goto abort; } } else if (!get_dev_size(tfd, NULL, &ldsize)) { + st->ss->free_super(st); close(tfd); tfd = -1; continue; } + if (tst->ss->validate_geometry( + tst, array.level, array.layout, + array.raid_disks, NULL, + ldsize >> 9, NULL, NULL, 0) == 0) { + if (!force) { + fprintf(stderr, Name + ": %s is larger than %s can " + "effectively use.\n" + " Add --force is you " + "really want to add this device.\n", + add_dev, devname); + st->ss->free_super(st); + close(tfd); + goto abort; + } + fprintf(stderr, Name + ": %s is larger than %s can " + "effectively use.\n" + " Adding anyway as --force " + "was given.\n", + add_dev, devname); + } if (!tst->ss->external && array.major_version == 0 && md_get_version(fd)%100 < 2) { close(tfd); + st->ss->free_super(st); tfd = -1; if (ioctl(fd, HOT_ADD_DISK, (unsigned long)stb.st_rdev)==0) { @@ -647,7 +695,7 @@ fprintf(stderr, Name ": hot add failed for %s: %s\n", add_dev, strerror(errno)); - return 1; + goto abort; } if (array.not_persistent == 0 \|\| tst->ss->external) { @@ -684,10 +732,17 @@ break; } / FIXME this is a bad test to be using / - if (!tst->sb) { + if (!tst->sb && + dv->re_add) { + / we are re-adding a device to a + * completely dead array - have to depend + * on kernel to check + / + } else if (!tst->sb) { close(tfd); + st->ss->free_super(st); fprintf(stderr, Name ": cannot load array metadata from %s\n", devname); - return 1; + goto abort; } / Make sure device is large enough / @@ -695,23 +750,28 @@ array_size) { close(tfd); tfd = -1; + st->ss->free_super(st); if (add_dev != dv->devname) continue; fprintf(stderr, Name ": %s not large enough to join array\n", dv->devname); - return 1; + goto abort; } / Possibly this device was recently part of the array * and was temporarily removed, and is now being re-added. * If so, we can simply re-add it. / - tst->ss->uuid_from_super(tst, duuid); if (st->sb) { struct mdinfo mdi; st->ss->getinfo_super(st, &mdi, NULL); st->ss->uuid_from_super(st, ouuid); + if (tst->sb) + tst->ss->uuid_from_super(tst, duuid); + else + / Assume uuid matches: kernel will check / + memcpy(duuid, ouuid, sizeof(ouuid)); if ((mdi.disk.state & (1<<MD_DISK_ACTIVE)) && !(mdi.disk.state & (1<<MD_DISK_FAULTY)) && memcmp(duuid, ouuid, sizeof(ouuid))==0) { @@ -727,7 +787,7 @@ disc.number = mdi.disk.number; if (ioctl(fd, GET_DISK_INFO, &disc) != 0 \|\| disc.major != 0 \|\| disc.minor != 0 - \|\| !enough_fd(fd)) + ) goto skip_re_add; disc.major = major(stb.st_rdev); disc.minor = minor(stb.st_rdev); @@ -741,11 +801,25 @@ remove_partitions(tfd); close(tfd); tfd = -1; - if (update) { + if (update \|\| dv->writemostly > 0) { int rv = -1; tfd = dev_open(dv->devname, O_RDWR); + if (tfd < 0) { + fprintf(stderr, Name ": failed to open %s for" + " superblock update during re-add\n", dv->devname); + st->ss->free_super(st); + goto abort; + } - if (tfd >= 0) + if (dv->writemostly == 1) + rv = st->ss->update_super( + st, NULL, "writemostly", + devname, verbose, 0, NULL); + if (dv->writemostly == 2) + rv = st->ss->update_super( + st, NULL, "readwrite", + devname, verbose, 0, NULL); + if (update) rv = st->ss->update_super( st, NULL, update, devname, verbose, 0, NULL); @@ -756,7 +830,8 @@ if (rv != 0) { fprintf(stderr, Name ": failed to update" " superblock during re-add\n"); - return 1; + st->ss->free_super(st); + goto abort; } } / don't even try if disk is marked as faulty / @@ -765,18 +840,19 @@ if (verbose >= 0) fprintf(stderr, Name ": re-added %s\n", add_dev); count++; + st->ss->free_super(st); continue; } if (errno == ENOMEM \|\| errno == EROFS) { fprintf(stderr, Name ": add new device failed for %s: %s\n", add_dev, strerror(errno)); + st->ss->free_super(st); if (add_dev != dv->devname) continue; - return 1; + goto abort; } - skip_re_add: - re_add_failed = 1; } + skip_re_add: st->ss->free_super(st); } if (add_dev != dv->devname) { @@ -796,18 +872,36 @@ fprintf(stderr, Name ": --re-add for %s to %s is not possible\n", dv->devname, devname); - return 1; + goto abort; } - if (re_add_failed) { - fprintf(stderr, Name ": %s reports being an active member for %s, but a --re-add fails.\n", - dv->devname, devname); - fprintf(stderr, Name ": not performing --add as that would convert %s in to a spare.\n", - dv->devname); - fprintf(stderr, Name ": To make this a spare, use \"mdadm --zero-superblock %s\" first.\n", + if (array.active_disks < array.raid_disks) { + char avail = calloc(array.raid_disks, 1); + int d; + int found = 0; + + for (d = 0; d < MAX_DISKS && found < array.active_disks; d++) { + disc.number = d; + if (ioctl(fd, GET_DISK_INFO, &disc)) + continue; + if (disc.major == 0 && disc.minor == 0) + continue; + if (!(disc.state & (1<<MD_DISK_SYNC))) + continue; + avail[disc.raid_disk] = 1; + found++; + } + array_failed = !enough(array.level, array.raid_disks, + array.layout, 1, avail); + } else + array_failed = 0; + if (array_failed) { + fprintf(stderr, Name ": %s has failed so using --add cannot work and might destroy\n", + devname); + fprintf(stderr, Name ": data on %s. You should stop the array and re-assemble it.\n", dv->devname); if (tfd >= 0) close(tfd); - return 1; + goto abort; } } else { /* non-persistent. Must ensure that new drive @@ -818,7 +912,7 @@ dv->devname); if (tfd >= 0) close(tfd); - return 1; + goto abort; } } /* committed to really trying this device now/ @@ -853,11 +947,11 @@ if (tst->ss->add_to_super(tst, &disc, dfd, dv->devname)) { close(dfd); - return 1; + goto abort; } if (tst->ss->write_init_super(tst)) { close(dfd); - return 1; + goto abort; } } else if (dv->re_add) { / this had better be raid1. @@ -906,7 +1000,7 @@ " could not get exclusive access to container\n", dv->devname); tst->ss->free_super(tst); - return 1; + goto abort; } dfd = dev_open(dv->devname, O_RDWR \| O_EXCL\|O_DIRECT); @@ -916,7 +1010,7 @@ dv->devname)) { close(dfd); close(container_fd); - return 1; + goto abort; } if (tst->update_tail) flush_metadata_updates(tst); @@ -929,7 +1023,7 @@ dv->devname); close(container_fd); tst->ss->free_super(tst); - return 1; + goto abort; } sra->array.level = LEVEL_CONTAINER; /* Need to set data_offset and component_size / @@ -945,7 +1039,7 @@ " failed for %s\n", dv->devname); close(container_fd); sysfs_free(sra); - return 1; + goto abort; } ping_monitor_by_id(devnum); sysfs_free(sra); @@ -955,7 +1049,7 @@ if (ioctl(fd, ADD_NEW_DISK, &disc)) { fprintf(stderr, Name ": add new device failed for %s as %d: %s\n", dv->devname, j, strerror(errno)); - return 1; + goto abort; } } if (verbose >= 0) @@ -970,7 +1064,7 @@ " operation on the parent container\n"); if (sysfd >= 0) close(sysfd); - return 1; + goto abort; } if (tst->ss->external) { / To remove a device from a container, we must @@ -990,7 +1084,7 @@ " to container - odd\n"); if (sysfd >= 0) close(sysfd); - return 1; + goto abort; } /* in the detached case it is not possible to * check if we are the unique holder, so just @@ -1007,7 +1101,7 @@ errno == EEXIST ? "still in use": "not a member"); close(lfd); - return 1; + goto abort; } } /* FIXME check that it is a current member / @@ -1050,7 +1144,7 @@ strerror(errno)); if (lfd >= 0) close(lfd); - return 1; + goto abort; } if (tst->ss->external) { / @@ -1063,7 +1157,7 @@ if (!name) { fprintf(stderr, Name ": unable to get container name\n"); - return 1; + goto abort; } ping_manager(name); @@ -1086,7 +1180,7 @@ dnprintable, strerror(errno)); if (sysfd >= 0) close(sysfd); - return 1; + goto abort; } if (sysfd >= 0) close(sysfd); @@ -1098,9 +1192,16 @@ break; } } + if (frozen > 0) + sysfs_set_str(&info, NULL, "sync_action","idle"); if (test && count == 0) return 2; return 0; + +abort: + if (frozen > 0) + sysfs_set_str(&info, NULL, "sync_action","idle"); + return 1; } int autodetect(void) @@ -1188,9 +1289,9 @@ sprintf(devname, "%d:%d", major(devid), minor(devid)); devlist.disposition = 'r'; - if (Manage_subdevs(from_devname, fd2, &devlist, -1, 0, NULL) == 0) { + if (Manage_subdevs(from_devname, fd2, &devlist, -1, 0, NULL, 0) == 0) { devlist.disposition = 'a'; - if (Manage_subdevs(to_devname, fd1, &devlist, -1, 0, NULL) == 0) { + if (Manage_subdevs(to_devname, fd1, &devlist, -1, 0, NULL, 0) == 0) { /* make sure manager is aware of changes */ ping_manager(to_devname); ping_manager(from_devname); @@ -1198,7 +1299,7 @@ close(fd2); return 1; } - else Manage_subdevs(from_devname, fd2, &devlist, -1, 0, NULL); + else Manage_subdevs(from_devname, fd2, &devlist, -1, 0, NULL, 0); } close(fd1); close(fd2);
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/Monitor.c ^
@@ -69,7 +69,7 @@ static void alert(char event, char dev, char disc, struct alert_info info); static int check_array(struct state st, struct mdstat_ent mdstat, int test, struct alert_info info, - int increments); + int increments, char prefer); static int add_new_arrays(struct mdstat_ent mdstat, struct state statelist, int test, struct alert_info info); static void try_spare_migration(struct state statelist, struct alert_info info); @@ -79,7 +79,7 @@ char mailaddr, char alert_cmd, int period, int daemonise, int scan, int oneshot, int dosyslog, int test, char pidfile, int increments, - int share) + int share, char prefer) { /* * Every few seconds, scan every md device looking for changes @@ -124,6 +124,7 @@ / struct state statelist = NULL; + struct state st2; int finished = 0; struct mdstat_ent mdstat = NULL; char mailfrom = NULL; @@ -220,7 +221,8 @@ mdstat = mdstat_read(oneshot?0:1, 0); for (st=statelist; st; st=st->next) - if (check_array(st, mdstat, test, &info, increments)) + if (check_array(st, mdstat, test, &info, + increments, prefer)) anydegraded = 1; / now check if there are any new devices found in mdstat / @@ -242,6 +244,11 @@ } test = 0; } + for (st2 = statelist; st2; st2 = statelist) { + statelist = st2->next; + free(st2); + } + if (pidfile) unlink(pidfile); return 0; @@ -288,8 +295,10 @@ int pid, rv; FILE fp; char dir[20]; + char path[100]; struct stat buf; - fp = fopen("/var/run/mdadm/autorebuild.pid", "r"); + sprintf(path, "%s/autorebuild.pid", MDMON_DIR); + fp = fopen(path, "r"); if (fp) { if (fscanf(fp, "%d", &pid) != 1) pid = -1; @@ -311,12 +320,12 @@ fclose(fp); } if (scan) { - if (mkdir("/var/run/mdadm", S_IRWXU) < 0 && + if (mkdir(MDMON_DIR, S_IRWXU) < 0 && errno != EEXIST) { fprintf(stderr, Name ": Can't create " "autorebuild.pid file\n"); } else { - fp = fopen("/var/run/mdadm/autorebuild.pid", "w"); + fp = fopen(path, "w"); if (!fp) fprintf(stderr, Name ": Cannot create" " autorebuild.pid" @@ -437,7 +446,7 @@ static int check_array(struct state st, struct mdstat_ent mdstat, int test, struct alert_info ainfo, - int increments) + int increments, char prefer) { /* Update the state 'st' to reflect any changes shown in mdstat, * or found by directly examining the array, and return @@ -557,8 +566,10 @@ struct mdinfo *sra = sysfs_read(-1, st->devnum, GET_MISMATCH); if (sra && sra->mismatch_cnt > 0) { - char cnt[40]; - sprintf(cnt, " mismatches found: %d", sra->mismatch_cnt); + char cnt[80]; + snprintf(cnt, sizeof(cnt), + " mismatches found: %d (on raid level %d)", + sra->mismatch_cnt, array.level); alert("RebuildFinished", dev, cnt, ainfo); } else alert("RebuildFinished", dev, NULL, ainfo); @@ -607,7 +618,9 @@ disc.major = disc.minor = 0; } else if (info[i].major \|\| info[i].minor) { newstate = info[i].state; - dv = map_dev(info[i].major, info[i].minor, 1); + dv = map_dev_preferred( + info[i].major, info[i].minor, 1, + prefer); disc.state = newstate; disc.major = info[i].major; disc.minor = info[i].minor; @@ -619,8 +632,9 @@ disc.major = disc.minor = 0; } if (dv == NULL && st->devid[i]) - dv = map_dev(major(st->devid[i]), - minor(st->devid[i]), 1); + dv = map_dev_preferred( + major(st->devid[i]), + minor(st->devid[i]), 1, prefer); change = newstate ^ st->devstate[i]; if (st->utime && change && !st->err) { if (i < array.raid_disks &&
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/ReadMe.c ^
@@ -24,7 +24,7 @@ #include "mdadm.h" -char Version[] = Name " - v3.2.2 - 17th June 2011\n"; +char Version[] = Name " - v3.2.4 - 9th May 2012\n"; /* * File: ReadMe.c @@ -111,6 +111,7 @@ {"kill-subarray", 1, 0, KillSubarray}, {"update-subarray", 1, 0, UpdateSubarray}, {"udev-rules", 2, 0, UdevRules}, + {"offroot", 0, 0, OffRootOpt}, /* synonyms / {"monitor", 0, 0, 'F'}, @@ -153,6 +154,7 @@ {"scan", 0, 0, 's'}, {"force", 0, 0, Force}, {"update", 1, 0, 'U'}, + {"freeze-reshape", 0, 0, FreezeReshape}, / Management / {"add", 0, 0, Add}, @@ -172,6 +174,7 @@ {"export", 0, 0, 'Y'}, {"sparc2.2", 0, 0, Sparc22}, {"test", 0, 0, 't'}, + {"prefer", 1, 0, Prefer}, / For Follow/monitor / {"mail", 1, 0, EMail}, @@ -190,6 +193,7 @@ {"backup-file", 1,0, BackupFile}, {"invalid-backup",0,0,InvalidBackup}, {"array-size", 1, 0, 'Z'}, + {"continue", 0, 0, Continue}, / For Incremental / {"rebuild-map", 0, 0, RebuildMapOpt}, @@ -266,6 +270,10 @@ " --query -Q : Display general information about how a\n" " device relates to the md driver\n" " --auto-detect : Start arrays auto-detected by the kernel\n" +" --offroot : Set first character of argv[0] to @ to indicate the\n" +" application was launched from initrd/initramfs and\n" +" should not be shutdown by systemd as part of the\n" +" regular shutdown process.\n" ; / "\n"
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/bitmap.c ^
@@ -132,7 +132,7 @@ void buf; unsigned int n, skip; - if (posix_memalign(&buf, 512, 8192) != 0) { + if (posix_memalign(&buf, 4096, 8192) != 0) { fprintf(stderr, Name ": failed to allocate 8192 bytes\n"); return NULL; } @@ -147,6 +147,7 @@ fprintf(stderr, Name ": failed to allocate %zd bytes\n", sizeof(info)); #endif + free(buf); return NULL; } @@ -154,6 +155,7 @@ fprintf(stderr, Name ": failed to read superblock of bitmap " "file: %s\n", strerror(errno)); free(info); + free(buf); return NULL; } memcpy(&info->sb, buf, sizeof(info->sb)); @@ -198,6 +200,7 @@ total_bits = read_bits; } out: + free(buf); info->total_bits = total_bits; info->dirty_bits = dirty_bits; return info; @@ -331,7 +334,7 @@ goto free_info; printf(" Bitmap : %llu bits (chunks), %llu dirty (%2.1f%%)\n", info->total_bits, info->dirty_bits, - 100.0 * info->dirty_bits / (info->total_bits + 1)); + 100.0 * info->dirty_bits / (info->total_bits?:1)); free_info: free(info); return rv;
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/config.c ^
@@ -641,7 +641,7 @@ * We translate that to policy by creating 'auto=yes' when we see * a '+version' line, 'auto=no' if we see '-version' before 'homehost', * or 'auto=homehost' if we see '-version' after 'homehost'. - * When we see yes, no, +all or -all we stop an any version that hasn't + * When we see yes, no, +all or -all we stop and any version that hasn't * been seen gets an appropriate auto= entry. / @@ -707,6 +707,8 @@ for (i = 0; i < super_cnt; i++) if (!seen[i]) policy_add(rule_policy, pol_auto, dflt, pol_metadata, superlist[i]->name, NULL); + + free(seen); } int loaded = 0; @@ -919,19 +921,19 @@ else 'yes'. / struct dev_policy p; - int no=0, found_auto=0; + int no=0, found_homehost=0; load_conffile(); pol = pol_find(pol, pol_auto); pol_for_each(p, pol, version) { if (strcmp(p->value, "yes") == 0) return 1; - if (strcmp(p->value, "auto") == 0) - found_auto = 1; + if (strcmp(p->value, "homehost") == 0) + found_homehost = 1; if (strcmp(p->value, "no") == 0) no = 1; } - if (is_homehost && found_auto) + if (is_homehost && found_homehost) return 1; if (no) return 0; @@ -1017,11 +1019,12 @@ return 1; } -struct mddev_ident conf_match(struct mdinfo info, struct supertype st) +struct mddev_ident conf_match(struct supertype st, + struct mdinfo info, + char devname, + int verbose, int rvp) { struct mddev_ident array_list, match; - int verbose = 0; - char devname = NULL; array_list = conf_get_ident(NULL); match = NULL; for (; array_list; array_list = array_list->next) { @@ -1064,7 +1067,8 @@ array_list->super_minor == UnSet) { if (verbose >= 2 && array_list->devname) fprintf(stderr, Name - ": %s doesn't have any identifying information.\n", + ": %s doesn't have any identifying" + " information.\n", array_list->devname); continue; } @@ -1074,15 +1078,54 @@ if (verbose >= 0) { if (match->devname && array_list->devname) fprintf(stderr, Name - ": we match both %s and %s - cannot decide which to use.\n", - match->devname, array_list->devname); + ": we match both %s and %s - " + "cannot decide which to use.\n", + match->devname, + array_list->devname); else fprintf(stderr, Name - ": multiple lines in mdadm.conf match\n"); + ": multiple lines in mdadm.conf" + " match\n"); } - return NULL; + if (rvp) + rvp = 2; + match = NULL; + break; } match = array_list; } return match; } + +int conf_verify_devnames(struct mddev_ident array_list) +{ + struct mddev_ident a1, *a2; + + for (a1 = array_list; a1; a1 = a1->next) { + if (!a1->devname) + continue; + for (a2 = a1->next; a2; a2 = a2->next) { + if (!a2->devname) + continue; + if (strcmp(a1->devname, a2->devname) != 0) + continue; + + if (a1->uuid_set && a2->uuid_set) { + char nbuf[64]; + __fname_from_uuid(a1->uuid, 0, nbuf, ':'); + fprintf(stderr, + Name ": Devices %s and ", + nbuf); + __fname_from_uuid(a2->uuid, 0, nbuf, ':'); + fprintf(stderr, + "%s have the same name: %s\n", + nbuf, a1->devname); + } else + fprintf(stderr, Name ": Device %s given twice" + " in config file\n", a1->devname); + return 1; + } + } + + return 0; +}
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/inventory ^
@@ -1,4 +1,5 @@ +.gitignore ANNOUNCE-3.0 ANNOUNCE-3.0.1 ANNOUNCE-3.0.2 @@ -12,62 +13,67 @@ ANNOUNCE-3.2 ANNOUNCE-3.2.1 ANNOUNCE-3.2.2 +ANNOUNCE-3.2.3 +ANNOUNCE-3.2.4 Assemble.c -bitmap.c -bitmap.h Build.c +COPYING ChangeLog +Create.c +Detail.c +Examine.c +Grow.c +INSTALL +Incremental.c +Kill.c +Makefile +Manage.c +Monitor.c +Query.c +README.initramfs +ReadMe.c +TODO +bitmap.c +bitmap.h config.c -COPYING crc32.c crc32.h -Create.c -Detail.c dlink.c dlink.h -Examine.c external-reshape-design.txt -.gitignore -Grow.c -Incremental.c -INSTALL inventory kernel-patch-2.6.18 kernel-patch-2.6.18.6 kernel-patch-2.6.19 kernel-patch-2.6.25 kernel-patch-2.6.27 -Kill.c lib.c makedist -Makefile -Manage.c managemon.c mapfile.c maps.c md.4 md5.h +md_p.h +md_u.h mdadm.8.in mdadm.c -mdadm.conf.5 mdadm.conf-example +mdadm.conf.5 mdadm.h mdadm.spec mdassemble.8 mdassemble.c +mdmon-design.txt mdmon.8 mdmon.c -mdmon-design.txt mdmon.h mdopen.c -md_p.h mdstat.c -md_u.h misc/ misc/syslog-events mkinitramfs monitor.c -Monitor.c msg.c msg.h part.h @@ -77,21 +83,19 @@ probe_roms.c probe_roms.h pwgr.c -Query.c raid5extend.c +raid6check.8 raid6check.c -ReadMe.c -README.initramfs restripe.c sg_io.c sha1.c sha1.h -super0.c -super1.c super-ddf.c super-gpt.c super-intel.c super-mbr.c +super0.c +super1.c swap_super.c sysfs.c test @@ -162,8 +166,8 @@ tests/13imsm-r0_r5_3d-grow-r0_r5_5d tests/13imsm-r5_r0_3d-grow-r5_r0_4d tests/13imsm-r5_r0_3d-grow-r5_r0_5d -tests/14imsm-r0_3d_no_spares-migrate-r5_3d tests/14imsm-r0_3d-r5_3d-migrate-r5_4d-r5_4d +tests/14imsm-r0_3d_no_spares-migrate-r5_3d tests/14imsm-r0_r0_2d-takeover-r10_4d tests/14imsm-r10_4d-grow-r10_5d tests/14imsm-r10_r5_4d-takeover-r0_2d @@ -185,12 +189,11 @@ tests/18imsm-r0_2d-takeover-r10_4d tests/18imsm-r10_4d-takeover-r0_2d tests/18imsm-r1_2d-takeover-r0_1d +tests/ToTest tests/check tests/env-imsm-template tests/imsm-grow-template tests/testdev -tests/ToTest tests/utils -TODO udev-md-raid.rules util.c
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/lib.c ^
@@ -188,8 +188,11 @@ * If we find multiple names, choose the shortest. * If we find a name in /dev/md/, we prefer that. * This applies only to names for MD devices. + * If 'prefer' is set (normally to e.g. /by-path/) + * then we prefer a name which contains that string. / -char map_dev(int major, int minor, int create) +char map_dev_preferred(int major, int minor, int create, + char prefer) { struct devmap p; char regular = NULL, preferred=NULL; @@ -219,7 +222,8 @@ for (p=devlist; p; p=p->next) if (p->major == major && p->minor == minor) { - if (strncmp(p->name, "/dev/md/",8) == 0) { + if (strncmp(p->name, "/dev/md/",8) == 0 + \|\| (prefer && strstr(p->name, prefer))) { if (preferred == NULL \|\| strlen(p->name) < strlen(preferred)) preferred = p->name; @@ -243,6 +247,7 @@ } + / conf_word gets one word from the conf file. * if "allow_key", then accept words at the start of a line, * otherwise stop when such a word is found.
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/makedist ^
@@ -1,4 +1,6 @@ #!/bin/sh +# avoid silly sorting +export LANG=C arg=$1 target=~/public_html/source/mdadm if [ " $arg" = " test" ] @@ -42,7 +44,7 @@ fi echo version = $version -base=mdadm-$version.tgz +base=mdadm-$version.tar.gz if [ " $arg" != " diff" ] then if [ -f $target/$base ] @@ -51,7 +53,7 @@ exit 1 fi trap "rm $target/$base; exit" 1 2 3 - ( cd .. ; ln -s mdadm.v2 mdadm-$version ; tar chvf - --exclude=.git --exclude="TAGS" --exclude='~' --exclude=.patches --exclude=',v' --exclude='.o' --exclude mdadm --exclude=mdadm'.[^ch0-9]' --exclude=RCS mdadm-$version ; rm mdadm-$version ) \| gzip --best > $target/$base + git archive --prefix=mdadm-$version/ HEAD \| gzip --best > $target/$base chmod a+r $target/$base ls -l $target/$base if tar tzf $target/$base \| sed 's,[^/]/,,' \| sort \| diff -u inventory - @@ -67,8 +69,18 @@ cp ChangeLog $target/ChangeLog if [ " $arg" != " test" ] then - scp $target/$base master.kernel.org:/pub/linux/utils/raid/mdadm/mdadm-$version.tar.gz - scp $target/ANNOUNCE $target/ChangeLog master.kernel.org:/pub/linux/utils/raid/mdadm/ + echo -n "Confirm signing this release? " + read a + if [ " $a" != " y" ]; then echo OK - bye. ; exit 1; fi + if zcat $target/$base \| gpg -ba > $target/$base.sign && gpg -ba $target/ANNOUNCE + then + kup put $target/$base $target/$base.sign \ + /pub/linux/utils/raid/mdadm/mdadm-$version.tar.gz + kup put $target/ANNOUNCE $target/ANNOUNCE.asc /pub/linux/utils/raid/mdadm/ANNOUNCE + else + echo signing failed + exit 1 + fi fi else if [ ! -f $target/$base ]
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/managemon.c ^
@@ -117,11 +117,16 @@ close(d->state_fd); } - close(aa->action_fd); - close(aa->info.state_fd); - close(aa->resync_start_fd); - close(aa->metadata_fd); - close(aa->sync_completed_fd); + if (aa->action_fd >= 0) + close(aa->action_fd); + if (aa->info.state_fd >= 0) + close(aa->info.state_fd); + if (aa->resync_start_fd >= 0) + close(aa->resync_start_fd); + if (aa->metadata_fd >= 0) + close(aa->metadata_fd); + if (aa->sync_completed_fd >= 0) + close(aa->sync_completed_fd); } static void free_aa(struct active_array aa) @@ -409,7 +414,13 @@ disk = clone; disk->recovery_fd = sysfs_open(aa->devnum, disk->sys_name, "recovery_start"); + if (disk->recovery_fd < 0) + return -1; disk->state_fd = sysfs_open(aa->devnum, disk->sys_name, "state"); + if (disk->state_fd < 0) { + close(disk->recovery_fd); + return -1; + } disk->prev_state = read_dev_state(disk->state_fd); disk->curr_state = disk->prev_state; disk->next = aa->info.devs; @@ -461,7 +472,7 @@ if (mdstat->level) { int level = map_name(pers, mdstat->level); if (level == 0 \|\| level == LEVEL_LINEAR) { - a->container = NULL; + a->to_remove = 1; wakeup_monitor(); return; } @@ -475,6 +486,12 @@ } } + / we are after monitor kick, + * so container field can be cleared - check it again + / + if (a->container == NULL) + return; + / We don't check the array while any update is pending, as it * might container a change (such as a spare assignment) which * could affect our decisions. @@ -498,7 +515,10 @@ newa = duplicate_aa(a); if (!newa) goto out; - /* Cool, we can add a device or several. / + / prevent the kernel from activating the disk(s) before we + * finish adding them + / + sysfs_set_str(&a->info, NULL, "sync_action", "frozen"); / Add device to array and set offset/size/slot. * and open files for each newdev / @@ -736,7 +756,7 @@ / Looks like a member of this container */ for (a = container->arrays; a; a = a->next) { if (mdstat->devnum == a->devnum) { - if (a->container) + if (a->container && a->to_remove == 0) manage_member(mdstat, a); break; }
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/mapfile.c ^
@@ -38,27 +38,19 @@ * UUID - uuid of the array * path - path where device created: /dev/md/home * - * The best place for the mapfile wold be /var/run/mdadm/map. However - * it is needed during initramfs early-boot, and /var/run doesn't exist there - * and certainly doesn't persist through to normal boot. - * So we store it in /dev/.mdadm/map but allow this to be changed at - * compile time. via MAP_DIR and MAP_FILE - * + * The best place for the mapfile is /run/mdadm/map. Distros and users + * which have not switched to /run yet can choose a different location + * at compile time via MAP_DIR and MAP_FILE. / #include "mdadm.h" #include <sys/file.h> #include <ctype.h> -#ifndef MAP_DIR -#define MAP_DIR "/dev/.mdadm" -#define MAP_FILE "map" -#endif #define MAP_READ 0 #define MAP_NEW 1 #define MAP_LOCK 2 #define MAP_DIRNAME 3 -#define mapnames(dir, base) { \ char mapname[4] = { MAP_DIR "/" MAP_FILE, @@ -159,6 +151,18 @@ lf = NULL; } +void map_fork(void) +{ + /* We are forking, so must close the lock file. + * Don't risk flushing anything though. + / + if (lf) { + close(fileno(lf)); + fclose(lf); + lf = NULL; + } +} + void map_add(struct map_ent melp, int devnum, char metadata, int uuid[4], char path) { @@ -234,6 +238,7 @@ memcpy(mp->uuid, uuid, 16); free(mp->path); mp->path = path ? strdup(path) : NULL; + mp->bad = 0; break; } if (!mp) @@ -419,7 +424,9 @@ find a unique name based on metadata name. * / - struct mddev_ident match = conf_match(info, st); + struct mddev_ident *match = conf_match(st, info, + NULL, 0, + NULL); struct stat stb; if (match && match->devname && match->devname[0] == '/') { path = match->devname;
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/md.4 ^
@@ -120,7 +120,7 @@ RAID1 In some configurations it might be desired to create a raid1 configuration that does not use a superblock, and to maintain the state of -the array elsewhere. While not encouraged for general us, it does +the array elsewhere. While not encouraged for general use, it does have special-purpose uses and is supported. .SS ARRAYS WITH EXTERNAL METADATA @@ -128,7 +128,7 @@ From release 2.6.28, the .I md driver supports arrays with externally managed metadata. That is, -the metadata is not managed by the kernel by rather by a user-space +the metadata is not managed by the kernel but rather by a user-space program which is external to the kernel. This allows support for a variety of metadata formats without cluttering the kernel with lots of details. @@ -136,7 +136,7 @@ .I md is able to communicate with the user-space program through various sysfs attributes so that it can make appropriate changes to the -metadata \- for example to make a device as faulty. When necessary, +metadata \- for example to mark a device as faulty. When necessary, .I md will wait for the program to acknowledge the event by writing to a sysfs attribute. @@ -215,7 +215,7 @@ threads to read from all disks. Individual devices in a RAID1 can be marked as "write-mostly". -This drives are excluded from the normal read balancing and will only +These drives are excluded from the normal read balancing and will only be read from when there is no other option. This can be useful for devices connected over a slow link. @@ -468,7 +468,7 @@ found that is a mismatch. .I md normally works in units much larger than a single sector and when it -finds a mismatch, it does not determin exactly how many actual sectors were +finds a mismatch, it does not determine exactly how many actual sectors were affected but simply adds the number of sectors in the IO unit that was used. So a value of 128 could simply mean that a single 64KB check found an error (128 x 512bytes = 64KB). @@ -661,7 +661,7 @@ This is the partner of .B md/sync_speed_min and overrides -.B /proc/sys/dev/raid/spool_limit_max +.B /proc/sys/dev/raid/speed_limit_max described below. .TP
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/md_p.h ^
@@ -101,7 +101,9 @@ #define MD_SB_CLEAN 0 #define MD_SB_ERRORS 1 #define MD_SB_BBM_ERRORS 2 - +#define MD_SB_BLOCK_CONTAINER_RESHAPE 3 /* block container wide reshapes / +#define MD_SB_BLOCK_VOLUME 4 / block activation of array, other arrays + * in container can be activated / #define MD_SB_BITMAP_PRESENT 8 / bitmap may be present nearby */ typedef struct mdp_superblock_s {
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/mdadm.8.in ^
@@ -5,7 +5,7 @@ .\" the Free Software Foundation; either version 2 of the License, or .\" (at your option) any later version. .\" See file COPYING in distribution for details. -.TH MDADM 8 "" v3.2.2 +.TH MDADM 8 "" v3.2.4 .SH NAME mdadm \- manage MD devices .I aka @@ -255,6 +255,18 @@ will be silent unless there is something really important to report. .TP +.BR \-\-offroot +Set first character of argv[0] to @ to indicate mdadm was launched +from initrd/initramfs and should not be shutdown by systemd as part of +the regular shutdown process. This option is normally only used by +the system's initscripts. Please see here for more details on how +systemd handled argv[0]: +.IP +.B http://www.freedesktop.org/wiki/Software/systemd/RootStorageDaemons +.PP + + +.TP .BR \-f ", " \-\-force Be more forceful about certain operations. See the various modes for the exact meaning of this option in different contexts. @@ -336,7 +348,8 @@ recovery operation can be checkpointed and restarted. The different sub-versions store the superblock at different locations on the device, either at the end (for 1.0), at the start (for 1.1) or 4K from -the start (for 1.2). "1" is equivalent to "1.0". +the start (for 1.2). "1" is equivalent to "1.2" (the commonly +preferred 1.x format). 'if '{DEFAULT_METADATA}'1.2' "default" is equivalent to "1.2". .IP ddf Use the "Industry Standard" DDF (Disk Data Format) format defined by @@ -376,6 +389,28 @@ by a digit string). See below under .BR "Auto Assembly" . +.TP +.B \-\-prefer= +When +.I mdadm +needs to print the name for a device it normally finds the name in +.B /dev +which refers to the device and is shortest. When a path component is +given with +.B \-\-prefer +.I mdadm +will prefer a longer name if it contains that component. For example +.B \-\-prefer=by-uuid +will prefer a name in a subdirectory of +.B /dev +called +.BR by-uuid . + +This functionality is currently only provided by +.B \-\-detail +and +.BR \-\-monitor . + .SH For create, build, or grow: .TP @@ -418,11 +453,22 @@ A suffix of 'M' or 'G' can be given to indicate Megabytes or Gigabytes respectively. +Sometimes a replacement drive can be a little smaller than the +original drives though this should be minimised by IDEMA standards. +Such a replacement drive will be rejected by +.IR md . +To guard against this it can be useful to set the initial size +slightly smaller than the smaller device with the aim that it will +still be larger than any replacement. + This value can be set with .B \-\-grow -for RAID level 1/4/5/6. If the array was created with a size smaller -than the currently active drives, the extra space can be accessed -using +for RAID level 1/4/5/6 though +.B CONTAINER +based arrays such as those with IMSM metadata may not be able to +support this. +If the array was created with a size smaller than the currently +active drives, the extra space can be accessed using .BR \-\-grow . The size can be given as .B max @@ -440,9 +486,10 @@ .B "\-\-grow \-\-size=" command. -This value can not be used with +This value cannot be used when creating a .B CONTAINER -metadata such as DDF and IMSM. +such as with DDF and IMSM metadata, though it perfectly valid when +creating an array inside a container. .TP .BR \-Z ", " \-\-array\-size= @@ -706,7 +753,7 @@ When an array is resized to a larger size with .B "\-\-grow \-\-size=" the new space is normally resynced in that same way that the whole -array is resynced at creation. From Linux version 2.6.40, +array is resynced at creation. From Linux version 3.0, .B \-\-assume\-clean can be used with that command to avoid the automatic resync. @@ -721,6 +768,31 @@ being reshaped. .TP +.BR \-\-continue +This option is complementary to the +.B \-\-freeze-reshape +option for assembly. It is needed when +.B \-\-grow +operation is interrupted and it is not restarted automatically due to +.B \-\-freeze-reshape +usage during array assembly. This option is used together with +.BR \-G +, ( +.BR \-\-grow +) command and device for a pending reshape to be continued. +All parameters required for reshape continuation will be read from array metadata. +If initial +.BR \-\-grow +command had required +.BR \-\-backup\-file= +option to be set, continuation option will require to have exactly the same +backup file given as well. +.IP +Any other parameter passed together with +.BR \-\-continue +option will be ignored. + +.TP .BR \-N ", " \-\-name= Set a .B name @@ -811,6 +883,11 @@ non-standard name. Names that are not in 'standard' format are only allowed in "/dev/md/". +This is meaningful with +.B \-\-create +or +.BR \-\-build . + .ig XX .\".TP .\".BR \-\-symlink = no @@ -835,6 +912,28 @@ .\" .XX +.TP +.BR \-a ", " "\-\-add" +This option can be used in Grow mode in two cases. + +If the target array is a Linear array, then +.B \-\-add +can be used to add one or more devices to the array. They +are simply catenated on to the end of the array. Once added, the +devices cannot be removed. + +If the +.B \-\-raid\-disks +option is being used to increase the number of devices in an array, +then +.B \-\-add +can be used to add some extra devices to be included in the array. +In most cases this is not needed as the extra devices can be added as +spares first, and then the number of raid-disks can be changed. +However for RAID0, it is not possible to add spares. So to increase +the number of devices in a RAID0, it is necessary to set the new +number of devices, and to add the new devices, in the same command. + .SH For assemble: .TP @@ -912,28 +1011,6 @@ See this option under Create and Build options. .TP -.BR \-a ", " "\-\-add" -This option can be used in Grow mode in two cases. - -If the target array is a Linear array, then -.B \-\-add -can be used to add one or more devices to the array. They -are simply catenated on to the end of the array. Once added, the -devices cannot be removed. - -If the -.B \-\-raid\-disks -option is being used to increase the number of devices in an array, -then -.B \-\-add -can be used to add some extra devices to be included in the array. -In most cases this is not needed as the extra devices can be added as -spares first, and then the number of raid-disks can be changed. -However for RAID0, it is not possible to add spares. So to increase -the number of devices in a RAID0, it is necessary to set the new -number of devices, and to add the new devices, in the same command. - -.TP .BR \-b ", " \-\-bitmap= Specify the bitmap file that was given when the array was created. If an array has an @@ -1078,6 +1155,18 @@ corrupt in some way so that assembling the array normally fails. It will cause any internal bitmap to be ignored. +.TP +.BR \-\-freeze\-reshape +Option is intended to be used in start-up scripts during initrd boot phase. +When array under reshape is assembled during initrd phase, this option +stops reshape after reshape critical section is being restored. This happens +before file system pivot operation and avoids loss of file system context. +Losing file system context would cause reshape to be broken. + +Reshape can be continued later using the +.B \-\-continue +option for the grow command. + .SH For Manage mode: .TP @@ -1099,7 +1188,7 @@ .BR \-a ", " \-\-add hot-add listed devices. If a device appears to have recently been part of the array -(possibly it failed or was removed) the device is re\-added as describe +(possibly it failed or was removed) the device is re\-added as described in the next point. If that fails or the device was never part of the array, the device is added as a hot-spare. @@ -1345,7 +1434,7 @@ .TP .BR \-\-rebuild\-map ", " \-r Rebuild the map file -.RB ( /var/run/mdadm/map ) +.RB ( {MAP_PATH} ) that .I mdadm uses to help track which arrays are currently being assembled. @@ -1491,7 +1580,7 @@ In the second usage example, all devices listed are treated as md devices and assembly is attempted. In the third (where no devices are listed) all md devices that are -listed in the configuration file are assembled. If not arrays are +listed in the configuration file are assembled. If no arrays are described by the configuration file, then any arrays that can be found on unused devices will be assembled. @@ -1552,7 +1641,7 @@ but leaves that to .IR udev . It does record information in -.B /var/run/mdadm/map +.B {MAP_PATH} which will allow .I udev to choose the correct name. @@ -1600,7 +1689,7 @@ will first attempt to assemble all the arrays listed in the config file. -In no array at listed in the config (other than those marked +If no arrays are listed in the config (other than those marked .BR <ignore> ) it will look through the available devices for possible arrays and will try to assemble anything that it finds. Arrays which are tagged @@ -2200,22 +2289,24 @@ .IP \(bu 4 increase or decrease the "raid\-devices" attribute of RAID0, RAID1, RAID4, RAID5, and RAID6. -.IP \bu 4 +.IP \(bu 4 change the chunk-size and layout of RAID0, RAID4, RAID5 and RAID6. -.IP \bu 4 +.IP \(bu 4 convert between RAID1 and RAID5, between RAID5 and RAID6, between -RAID0, RAID5, and RAID5, and between RAID0 and RAID10 (in the near-2 mode). +RAID0, RAID4, and RAID5, and between RAID0 and RAID10 (in the near-2 mode). .IP \(bu 4 add a write-intent bitmap to any array which supports these bitmaps, or remove a write-intent bitmap from such an array. .PP -Using GROW on containers is currently only support for Intel's IMSM +Using GROW on containers is currently supported only for Intel's IMSM container format. The number of devices in a container can be increased - which affects all arrays in the container - or an array in a container can be converted between levels where those levels are supported by the container, and the conversion is on of those listed -above. +above. Resizing arrays in an IMSM container with +.B "--grow --size" +is not yet supported. Grow functionality (e.g. expand a number of raid devices) for Intel's IMSM container format has an experimental status. It is guarded by the @@ -2250,7 +2341,7 @@ are synchronised. Note that when an array changes size, any filesystem that may be -stored in the array will not automatically grow for shrink to use or +stored in the array will not automatically grow or shrink to use or vacate the space. The filesystem will need to be explicitly told to use the extra space after growing, or to reduce its size @@ -2259,7 +2350,7 @@ Also the size of an array cannot be changed while it has an active bitmap. If an array has a bitmap, it must be removed before the size -can be changed. Once the change it complete a new bitmap can be created. +can be changed. Once the change is complete a new bitmap can be created. .SS RAID\-DEVICES CHANGES @@ -2435,8 +2526,8 @@ line. .IP + -Does the device have a valid md superblock. If a specific metadata -version is request with +Does the device have a valid md superblock? If a specific metadata +version is requested with .B \-\-metadata or .B \-e @@ -2467,13 +2558,10 @@ current host, the device will be rejected. .. +.PP .I mdadm keeps a list of arrays that it has partially assembled in -.B /var/run/mdadm/map -(or -.B /var/run/mdadm.map -if the directory doesn't exist. Or maybe even -.BR /dev/.mdadm.map ). +.BR {MAP_PATH} . If no array exists which matches the metadata on the new device, .I mdadm @@ -2617,12 +2705,12 @@ .B /dev/md0 out of all such devices with a RAID superblock with a minor number of 0. -.B " mdadm \-\-monitor \-\-scan \-\-daemonise > /var/run/mdadm" +.B " mdadm \-\-monitor \-\-scan \-\-daemonise > /run/mdadm/mon.pid" .br If config file contains a mail address or alert program, run mdadm in the background in monitor mode monitoring all md devices. Also write pid of mdadm daemon to -.BR /var/run/mdadm . +.BR /run/mdadm/mon.pid . .B " mdadm \-Iq /dev/somedevice" .br @@ -2639,7 +2727,7 @@ Any devices which are components of /dev/md4 will be marked as faulty and then remove from the array. -.B " mdadm --grow /dev/md4 --level=6 --backup-file=/root/backup-md4 +.B " mdadm --grow /dev/md4 --level=6 --backup-file=/root/backup-md4" .br The array .B /dev/md4 @@ -2700,21 +2788,10 @@ .BR mdadm.conf (5) for more details. -.SS /var/run/mdadm/map +.SS {MAP_PATH} When .B \-\-incremental mode is used, this file gets a list of arrays currently being created. -If -.B /var/run/mdadm -does not exist as a directory, then -.B /var/run/mdadm.map -is used instead. If -.B /var/run -is not available (as may be the case during early boot), -.B /dev/.mdadm.map -is used on the basis that -.B /dev -is usually available very early in boot. .SH DEVICE NAMES @@ -2787,7 +2864,7 @@ For further information on mdadm usage, MD and the various levels of RAID, see: .IP -.B http://linux\-raid.osdl.org/ +.B http://raid.wiki.kernel.org/ .PP (based upon Jakob \(/Ostergaard's Software\-RAID.HOWTO) .\".PP
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/mdadm.c ^
@@ -73,7 +73,9 @@ int test = 0; int export = 0; int assume_clean = 0; + char prefer = NULL; char symlinks = NULL; + int grow_continue = 0; /* autof indicates whether and how to create device node. * bottom 3 bits are style. Rest (when shifted) are number of parts * 0 - unset @@ -112,6 +114,8 @@ int mdfd = -1; + int freeze_reshape = 0; + srandom(time(0) ^ getpid()); ident.uuid_set=0; @@ -171,6 +175,23 @@ homehost = optarg; continue; + /* + * --offroot sets first char of argv[0] to @. This is used + * by systemd to signal that the tast was launched from + * initrd/initramfs and should be preserved during shutdown + / + case OffRootOpt: + argv[0][0] = '@'; + __offroot = 1; + continue; + + case Prefer: + if (prefer) + free(prefer); + if (asprintf(&prefer, "/%s/", optarg) <= 0) + prefer = NULL; + continue; + case ':': case '?': fputs(Usage, stderr); @@ -209,13 +230,17 @@ case 'I': newmode = INCREMENTAL; shortopt = short_bitmap_auto_options; break; case AutoDetect: - newmode = AUTODETECT; break; + newmode = AUTODETECT; + break; case MiscOpt: case 'D': case 'E': case 'X': - case 'Q': newmode = MISC; break; + case 'Q': + newmode = MISC; + break; + case 'R': case 'S': case 'o': @@ -226,17 +251,15 @@ case DetailPlatform: case KillSubarray: case UpdateSubarray: - if (opt == KillSubarray \|\| opt == UpdateSubarray) { - if (subarray) { - fprintf(stderr, Name ": subarray can only" - " be specified once\n"); - exit(2); - } - subarray = optarg; - } case UdevRules: - case 'K': if (!mode) newmode = MISC; break; - case NoSharing: newmode = MONITOR; break; + case 'K': + if (!mode) + newmode = MISC; + break; + + case NoSharing: + newmode = MONITOR; + break; } if (mode && newmode == mode) { / everybody happy ! / @@ -609,10 +632,15 @@ case O(ASSEMBLE,Force): / force assembly / case O(MISC,'f'): / force zero / case O(MISC,Force): / force zero / + case O(MANAGE,Force): / add device which is too large / force=1; continue; - / now for the Assemble options / + case O(ASSEMBLE, FreezeReshape): / Freeze reshape during + * initrd phase / + case O(INCREMENTAL, FreezeReshape): + freeze_reshape = 1; + continue; case O(CREATE,'u'): / uuid of array / case O(ASSEMBLE,'u'): / uuid of array / if (ident.uuid_set) { @@ -840,6 +868,7 @@ continue; case O(MONITOR,'1'): / oneshot / oneshot = 1; + spare_sharing = 0; continue; case O(MONITOR,'t'): / test / test = 1; @@ -915,6 +944,14 @@ case O(MISC, DetailPlatform): case O(MISC, KillSubarray): case O(MISC, UpdateSubarray): + if (opt == KillSubarray \|\| opt == UpdateSubarray) { + if (subarray) { + fprintf(stderr, Name ": subarray can only" + " be specified once\n"); + exit(2); + } + subarray = optarg; + } if (devmode && devmode != opt && (devmode == 'E' \|\| (opt == 'E' && devmode != 'Q'))) { fprintf(stderr, Name ": --examine/-E cannot be given with "); @@ -988,6 +1025,11 @@ backup_file = optarg; continue; + case O(GROW, Continue): + / Continue interrupted grow + / + grow_continue = 1; + continue; case O(ASSEMBLE, InvalidBackup): / Acknowledge that the backupfile is invalid, but ask * to continue anyway @@ -1022,15 +1064,14 @@ case O(BUILD,BitmapChunk): case O(CREATE,BitmapChunk): /* bitmap chunksize / bitmap_chunk = parse_size(optarg); - if (bitmap_chunk < 0 \|\| + if (bitmap_chunk <= 0 \|\| bitmap_chunk & (bitmap_chunk - 1)) { fprintf(stderr, Name ": invalid bitmap chunksize: %s\n", optarg); exit(2); } - / convert sectors to B, chunk of 0 means 512B / - bitmap_chunk = bitmap_chunk ? bitmap_chunk 512 : 512; + bitmap_chunk = bitmap_chunk * 512; continue; case O(GROW, WriteBehind): @@ -1185,7 +1226,8 @@ require_homehost = 0; } - if ((mode != MISC \|\| devmode != 'E') && + if (!((mode == MISC && devmode == 'E') + \|\| (mode == MONITOR && spare_sharing == 0)) && geteuid() != 0) { fprintf(stderr, Name ": must be super-user to perform this action\n"); exit(1); @@ -1202,7 +1244,7 @@ if (!rv && devs_found>1) rv = Manage_subdevs(devlist->devname, mdfd, devlist->next, verbose-quiet, test, - update); + update, force); if (!rv && readonly < 0) rv = Manage_ro(devlist->devname, mdfd, readonly); if (!rv && runstop) @@ -1226,14 +1268,16 @@ NULL, backup_file, invalid_backup, readonly, runstop, update, homehost, require_homehost, - verbose-quiet, force); + verbose-quiet, force, + freeze_reshape); } } else if (!scan) rv = Assemble(ss, devlist->devname, &ident, devlist->next, backup_file, invalid_backup, readonly, runstop, update, homehost, require_homehost, - verbose-quiet, force); + verbose-quiet, force, + freeze_reshape); else if (devs_found>0) { if (update && devs_found > 1) { fprintf(stderr, Name ": can only update a single array at a time\n"); @@ -1257,13 +1301,22 @@ NULL, backup_file, invalid_backup, readonly, runstop, update, homehost, require_homehost, - verbose-quiet, force); + verbose-quiet, force, + freeze_reshape); } } else { struct mddev_ident a, array_list = conf_get_ident(NULL); struct mddev_dev devlist = conf_get_devs(); + struct map_ent map = NULL; int cnt = 0; int failures, successes; + + if (conf_verify_devnames(array_list)) { + fprintf(stderr, Name + ": Duplicate MD device names in " + "conf file were found.\n"); + exit(1); + } if (devlist == NULL) { fprintf(stderr, Name ": No devices listed in conf file were found.\n"); exit(1); @@ -1281,6 +1334,10 @@ if (a->autof == 0) a->autof = autof; } + if (map_lock(&map)) + fprintf(stderr, Name " %s: failed to get " + "exclusive lock on mapfile\n", + __func__); do { failures = 0; successes = 0; @@ -1298,7 +1355,8 @@ NULL, NULL, 0, readonly, runstop, NULL, homehost, require_homehost, - verbose-quiet, force); + verbose-quiet, force, + freeze_reshape); if (r == 0) { a->assembled = 1; successes++; @@ -1323,9 +1381,13 @@ rv2 = Assemble(ss, NULL, &ident, devlist, NULL, 0, - readonly, runstop, NULL, - homehost, require_homehost, - verbose-quiet, force); + readonly, + runstop, NULL, + homehost, + require_homehost, + verbose-quiet, + force, + freeze_reshape); if (rv2==0) { cnt++; acnt++; @@ -1342,6 +1404,7 @@ fprintf(stderr, Name ": No arrays found in config file\n"); rv = 1; } + map_unlock(&map); } break; case BUILD: @@ -1443,7 +1506,7 @@ if (devmode == 'D') rv \|= Detail(name, v, export, test, - homehost); + homehost, prefer); else rv \|= WaitClean(name, -1, v); put_md_name(name); @@ -1497,7 +1560,7 @@ case 'D': rv \|= Detail(dv->devname, brief?1+verbose:0, - export, test, homehost); + export, test, homehost, prefer); continue; case 'K': /* Zero superblock */ if (ss) @@ -1571,7 +1634,8 @@ } rv= Monitor(devlist, mailaddr, program, delay?delay:60, daemonise, scan, oneshot, - dosyslog, test, pidfile, increments, spare_sharing); + dosyslog, test, pidfile, increments, + spare_sharing, prefer); break; case GROW: @@ -1633,7 +1697,11 @@ delay = DEFAULT_BITMAP_DELAY; rv = Grow_addbitmap(devlist->devname, mdfd, bitmap_file, bitmap_chunk, delay, write_behind, force); - } else if (size >= 0 \|\| raiddisks != 0 \|\| layout_str != NULL + } else if (grow_continue) + rv = Grow_continue_command(devlist->devname, + mdfd, backup_file, + verbose); + else if (size >= 0 \|\| raiddisks != 0 \|\| layout_str != NULL \|\| chunk != 0 \|\| level != UnSet) { rv = Grow_reshape(devlist->devname, mdfd, quiet, backup_file, size, level, layout_str, chunk, raiddisks, @@ -1679,7 +1747,8 @@ else rv = Incremental(devlist->devname, verbose-quiet, runstop, ss, homehost, - require_homehost, autof); + require_homehost, autof, + freeze_reshape); break; case AUTODETECT: autodetect();
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/mdadm.conf-example ^
@@ -59,7 +59,7 @@ # When used in --follow (aka --monitor) mode, mdadm needs a # mail address and/or a program. This can be given with "mailaddr" # and "program" lines to that monitoring can be started using -# mdadm --follow --scan & echo $! > /var/run/mdadm +# mdadm --follow --scan & echo $! > /run/mdadm/mon.pid # If the lines are not found, mdadm will exit quietly #MAILADDR root@mydomain.tld #PROGRAM /usr/sbin/handle-mdadm-events
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/mdadm.conf.5 ^
@@ -439,7 +439,7 @@ of domains of all devices in that array. A spare can be automatically moved from one array to another if the set of the destination array's .I domains -contains all the +ppcontains all the .I domains of the new disk or if both arrays have the same .IR spare-group . @@ -463,6 +463,7 @@ .B path= file glob matching anything from .B /dev/disk/by-path +.TP .B type= either .B disk @@ -471,6 +472,8 @@ .TP .B action= include, re-add, spare, spare-same-slot, or force-spare +.B auto= +yes, no, or homehost. .P The
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/mdadm.h ^
@@ -71,10 +71,10 @@ /* MAP_DIR should be somewhere that persists across the pivotroot * from early boot to late boot. - * Currently /dev seems to be the only option on most distros. + * /run seems to have emerged as the best standard. / #ifndef MAP_DIR -#define MAP_DIR "/dev/.mdadm" +#define MAP_DIR "/run/mdadm" #endif / MAP_DIR / / MAP_FILE is what we name the map file we put in MAP_DIR, in case you * want something other than the default of "map" @@ -83,7 +83,7 @@ #define MAP_FILE "map" #endif /* MAP_FILE / / MDMON_DIR is where pid and socket files used for communicating - * with mdmon normally live. It should be /var/run, but when + * with mdmon normally live. Best is /var/run/mdadm as * mdmon is needed at early boot then it needs to write there prior * to /var/run being mounted read/write, and it also then needs to * persist beyond when /var/run is mounter read-only. So, to be @@ -91,7 +91,7 @@ * boot process and stays up as long as possible during shutdown. / #ifndef MDMON_DIR -#define MDMON_DIR "/dev/.mdadm/" +#define MDMON_DIR "/run/mdadm" #endif / MDMON_DIR / / FAILED_SLOTS is where to save files storing recent removal of array @@ -99,7 +99,7 @@ * slot for array recovery / #ifndef FAILED_SLOTS_DIR -#define FAILED_SLOTS_DIR "/dev/.mdadm/failed-slots" +#define FAILED_SLOTS_DIR "/run/mdadm/failed-slots" #endif / FAILED_SLOTS / #include "md_u.h" @@ -194,24 +194,30 @@ unsigned long long custom_array_size; / size for non-default sized * arrays (in sectors) / +#define NO_RESHAPE 0 +#define VOLUME_RESHAPE 1 +#define CONTAINER_RESHAPE 2 int reshape_active; unsigned long long reshape_progress; + int recovery_blocked; / for external metadata it + * indicates that there is + * reshape in progress in + * container, + * for native metadata it is + * reshape_active field mirror + / union { unsigned long long resync_start; / per-array resync position / unsigned long long recovery_start; / per-device rebuild position / #define MaxSector (~0ULL) / resync/recovery complete position / }; + long bitmap_offset; / 0 == none, 1 == a file / unsigned long safe_mode_delay; / ms delay to mark clean / int new_level, delta_disks, new_layout, new_chunk; int errors; unsigned long cache_size; / size of raid456 stripe cache/ int mismatch_cnt; char text_version[50]; - void update_private; /* for passing metadata-format - * specific update data - * between successive calls to - * update_super() - / int container_member; / for assembling external-metatdata arrays * This is to be used internally by metadata @@ -313,6 +319,10 @@ RebuildMapOpt, InvalidBackup, UdevRules, + FreezeReshape, + Continue, + OffRootOpt, + Prefer, }; /* structures read from config file / @@ -427,6 +437,7 @@ int devnum, char metadata, int uuid[4], char path); extern int map_lock(struct map_ent melp); extern void map_unlock(struct map_ent melp); +extern void map_fork(void); / various details can be requested / enum sysfs_read_flags { @@ -440,11 +451,13 @@ GET_DISKS = (1 << 7), GET_DEGRADED = (1 << 8), GET_SAFEMODE = (1 << 9), - GET_DEVS = (1 << 10), / gets role, major, minor / - GET_OFFSET = (1 << 11), - GET_SIZE = (1 << 12), - GET_STATE = (1 << 13), - GET_ERROR = (1 << 14), + GET_BITMAP_LOCATION = (1 << 10), + + GET_DEVS = (1 << 20), / gets role, major, minor / + GET_OFFSET = (1 << 21), + GET_SIZE = (1 << 22), + GET_STATE = (1 << 23), + GET_ERROR = (1 << 24), }; / If fd >= 0, get the array it is open on, @@ -460,6 +473,8 @@ char name, char val); extern int sysfs_set_num(struct mdinfo sra, struct mdinfo dev, char name, unsigned long long val); +extern int sysfs_set_num_signed(struct mdinfo sra, struct mdinfo dev, + char name, long long val); extern int sysfs_uevent(struct mdinfo sra, char event); extern int sysfs_get_fd(struct mdinfo sra, struct mdinfo dev, char name); @@ -520,7 +535,12 @@ extern int map_name(mapping_t map, char name); extern mapping_t r5layout[], r6layout[], pers[], modes[], faultylayout[]; -extern char map_dev(int major, int minor, int create); +extern char map_dev_preferred(int major, int minor, int create, + char prefer); +static inline char map_dev(int major, int minor, int create) +{ + return map_dev_preferred(major, minor, create, NULL); +} struct active_array; struct metadata_update; @@ -646,6 +666,8 @@ linear-grow-new - add a new device to a linear array, but don't * change the size: so superblock still matches * linear-grow-update - now change the size of the array. + * writemostly - set the WriteMostly1 bit in the superblock devflags + * readwrite - clear the WriteMostly1 bit in the superblock devflags / int (update_super)(struct supertype st, struct mdinfo info, char update, @@ -700,6 +722,12 @@ inter-device dependencies, it should record sufficient details * so these can be validated. * Both 'size' and 'freesize' are in sectors. chunk is KiB. + Return value is: + * 1: everything is OK + * 0: not OK for some reason - if 'verbose', then error was reported. + * -1: st->sb was NULL, 'subdev' is a member of a container of this + * types, but array is not acceptable for some reason + * message was reported even if verbose is 0. / int (validate_geometry)(struct supertype st, int level, int layout, int raiddisks, @@ -720,9 +748,13 @@ initialized to indicate if reshape is being performed at the * container or subarray level / +#define APPLY_METADATA_CHANGES 1 +#define ROLLBACK_METADATA_CHANGES 0 + int (reshape_super)(struct supertype st, long long size, int level, int layout, int chunksize, int raid_disks, int delta_disks, char backup, char dev, + int direction, int verbose); / optional / int (manage_reshape)( /* optional / int afd, struct mdinfo sra, struct reshape reshape, @@ -848,6 +880,7 @@ external:/md0/12 / int devcnt; + int retry_soon; struct mdinfo devs; @@ -1010,7 +1043,7 @@ extern int Manage_resize(char devname, int fd, long long size, int raid_disks); extern int Manage_subdevs(char devname, int fd, struct mddev_dev devlist, int verbose, int test, - char update); + char update, int force); extern int autodetect(void); extern int Grow_Add_device(char devname, int fd, char newdev); extern int Grow_addbitmap(char devname, int fd, char file, int chunk, int delay, int write_behind, int force); @@ -1022,7 +1055,17 @@ extern int Grow_restart(struct supertype st, struct mdinfo info, int fdlist, int cnt, char backup_file, int verbose); extern int Grow_continue(int mdfd, struct supertype st, - struct mdinfo info, char backup_file); + struct mdinfo info, char backup_file, + int freeze_reshape); + +extern int restore_backup(struct supertype st, + struct mdinfo content, + int working_disks, + int spares, + char backup_file, + int verbose); +extern int Grow_continue_command(char devname, int fd, + char backup_file, int verbose); extern int Assemble(struct supertype st, char mddev, struct mddev_ident ident, @@ -1030,7 +1073,7 @@ char backup_file, int invalid_backup, int readonly, int runstop, char update, char homehost, int require_homehost, - int verbose, int force); + int verbose, int force, int freeze_reshape); extern int Build(char mddev, int chunk, int level, int layout, int raiddisks, struct mddev_dev devlist, int assume_clean, @@ -1045,7 +1088,7 @@ int runstop, int verbose, int force, int assume_clean, char bitmap_file, int bitmap_chunk, int write_behind, int delay, int autof); -extern int Detail(char dev, int brief, int export, int test, char homehost); +extern int Detail(char dev, int brief, int export, int test, char homehost, char prefer); extern int Detail_Platform(struct superswitch ss, int scan, int verbose); extern int Query(char dev); extern int Examine(struct mddev_dev devlist, int brief, int export, int scan, @@ -1054,7 +1097,7 @@ char mailaddr, char alert_cmd, int period, int daemonise, int scan, int oneshot, int dosyslog, int test, char pidfile, int increments, - int share); + int share, char prefer); extern int Kill(char dev, struct supertype st, int force, int quiet, int noexcl); extern int Kill_subarray(char dev, char subarray, int quiet); @@ -1064,7 +1107,7 @@ extern int Incremental(char devname, int verbose, int runstop, struct supertype st, char homehost, int require_homehost, - int autof); + int autof, int freeze_reshape); extern void RebuildMap(void); extern int IncrementalScan(int verbose); extern int IncrementalRemove(char devname, char path, int verbose); @@ -1114,8 +1157,12 @@ extern char conf_line(FILE file); extern char conf_word(FILE file, int allow_key); extern int conf_name_is_free(char name); +extern int conf_verify_devnames(struct mddev_ident array_list); extern int devname_matches(char name, char match); -extern struct mddev_ident conf_match(struct mdinfo info, struct supertype st); +extern struct mddev_ident conf_match(struct supertype st, + struct mdinfo info, + char devname, + int verbose, int rvp); extern int experimental(void); extern void free_line(char line); @@ -1129,13 +1176,14 @@ struct mdinfo info, char buf, char sep); extern unsigned long calc_csum(void super, int bytes); extern int enough(int level, int raid_disks, int layout, int clean, - char avail, int avail_disks); + char avail); extern int enough_fd(int fd); extern int ask(char mesg); extern unsigned long long get_component_size(int fd); extern void remove_partitions(int fd); extern int test_partition(int fd); extern int test_partition_from_id(dev_t id); +extern int get_data_disks(int level, int layout, int raid_disks); extern unsigned long long calc_array_size(int level, int raid_disks, int layout, int chunksize, unsigned long long devsize); extern int flush_metadata_updates(struct supertype st); @@ -1143,7 +1191,7 @@ extern int assemble_container_content(struct supertype st, int mdfd, struct mdinfo content, int runstop, char chosen_name, int verbose, - char backup_file); + char backup_file, int freeze_reshape); extern struct mdinfo container_choose_spares(struct supertype st, unsigned long long min_size, struct domainlist domlist, @@ -1216,10 +1264,10 @@ return (-1-d) << MdpMinorShift; } -static inline int ROUND_UP(int a, int base) -{ - return ((a+base-1)/base)base; -} +#define _ROUND_UP(val, base) (((val) + (base) - 1) & ~(base - 1)) +#define ROUND_UP(val, base) _ROUND_UP(val, (typeof(val))(base)) +#define ROUND_UP_PTR(ptr, base) ((typeof(ptr)) \ + (ROUND_UP((unsigned long)(ptr), base))) static inline int is_subarray(char vers) { @@ -1345,3 +1393,15 @@ #define PATH_MAX 4096 #endif +#define PROCESS_DELAYED -2 +#define PROCESS_PENDING -3 + +/ When using "GET_DISK_INFO" it isn't certain how high + * we need to check. So we impose an absolute limit of + * MAX_DISKS. This needs to be much more than the largest + * number of devices any metadata can support. Currently + * v1.x can support 1920 + */ +#define MAX_DISKS 4096 + +extern int __offroot;
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/mdadm.spec ^
@@ -1,8 +1,8 @@ Summary: mdadm is used for controlling Linux md devices (aka RAID arrays) Name: mdadm -Version: 3.2.2 +Version: 3.2.4 Release: 1 -Source: http://www.kernel.org/pub/linux/utils/raid/mdadm/mdadm-%{version}.tgz +Source: http://www.kernel.org/pub/linux/utils/raid/mdadm/mdadm-%{version}.tar.gz URL: http://neil.brown.name/blog/mdadm License: GPL Group: Utilities/System @@ -41,6 +41,8 @@ %defattr(-,root,root) %doc TODO ChangeLog mdadm.conf-example COPYING %{_sbindir}/mdadm +%{_sbindir}/mdmon +/lib/udev/rules.d/64-md-raid.rules %config(noreplace,missingok)/%{_sysconfdir}/mdadm.conf %{_mandir}/man/md
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/mdassemble.8 ^
@@ -1,5 +1,5 @@ .\" -- nroff -- -.TH MDASSEMBLE 8 "" v3.2.2 +.TH MDASSEMBLE 8 "" v3.2.4 .SH NAME mdassemble \- assemble MD devices .I aka
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/mdassemble.c ^
@@ -85,7 +85,7 @@ rv \|= Assemble(array_list->st, array_list->devname, array_list, NULL, NULL, 0, readonly, runstop, NULL, NULL, 0, - verbose, force); + verbose, force, 1); } return rv; }
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/mdmon.8 ^
@@ -1,11 +1,11 @@ .\" See file COPYING in distribution for details. -.TH MDMON 8 "" v3.2.2 +.TH MDMON 8 "" v3.2.4 .SH NAME mdmon \- monitor MD external metadata arrays .SH SYNOPSIS -.BI mdmon " [--all] [--takeover] CONTAINER" +.BI mdmon " [--all] [--takeover] [--offroot] CONTAINER" .SH OVERVIEW The 2.6.27 kernel brings the ability to support external metadata arrays. @@ -104,7 +104,7 @@ relationship between a set of block devices and a raid array. For example to create 2 arrays at different raid levels on a single set of disks, MD metadata requires the disks be partitioned and then -each array can created be created with a subset of those partitions. The +each array can be created with a subset of those partitions. The supported external formats perform this disk carving internally. .P Container devices simply hold references to all member disks and allow @@ -165,6 +165,16 @@ containers with names longer than 5 characters, this argument can be arbitrarily extended, e.g. to .BR \-\-all-active-arrays . +.TP +.BR \-\-offroot +Set first character of argv[0] to @ to indicate mdmon was launched +from initrd/initramfs and should not be shutdown by systemd as part of +the regular shutdown process. This option is normally only used by +the system's initscripts. Please see here for more details on how +systemd handled argv[0]: +.IP +.B http://www.freedesktop.org/wiki/Software/systemd/RootStorageDaemons +.PP .PP Note that @@ -172,7 +182,7 @@ is automatically started by .I mdadm when needed and so does not need to be considered when working with -RAID arrays. The only times it is run other that by +RAID arrays. The only times it is run other than by .I mdadm is when the boot scripts need to restart it after mounting the new root filesystem. @@ -208,7 +218,7 @@ .B .sock file. The particular filesystem to use is given to mdmon at compile time and defaults to -.BR /dev/.mdadm . +.BR /run/mdadm . This filesystem must persist through to shutdown time.
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/mdmon.c ^
@@ -189,6 +189,9 @@ kill(pid, SIGTERM); + if (sock < 0) + return; + /* Wait for monitor to exit by reading from the socket, after * clearing the non-blocking flag / fl = fcntl(sock, F_GETFL, 0); @@ -262,7 +265,18 @@ void usage(void) { - fprintf(stderr, "Usage: mdmon [--all] [--takeover] CONTAINER\n"); + fprintf(stderr, +"Usage: mdmon [options] CONTAINER\n" +"\n" +"Options are:\n" +" --help -h : This message\n" +" --all : All devices\n" +" --takeover -t : Takeover container\n" +" --offroot : Set first character of argv[0] to @ to indicate the\n" +" application was launched from initrd/initramfs and\n" +" should not be shutdown by systemd as part of the\n" +" regular shutdown process.\n" +); exit(2); } @@ -274,25 +288,51 @@ int devnum; char devname; int status = 0; - int arg; + int opt; int all = 0; int takeover = 0; - - for (arg = 1; arg < argc; arg++) { - if (strncmp(argv[arg], "--all",5) == 0 \|\| - strcmp(argv[arg], "/proc/mdstat") == 0) { - container_name = argv[arg]; + static struct option options[] = { + {"all", 0, NULL, 'a'}, + {"takeover", 0, NULL, 't'}, + {"help", 0, NULL, 'h'}, + {"offroot", 0, NULL, OffRootOpt}, + {NULL, 0, NULL, 0} + }; + + while ((opt = getopt_long(argc, argv, "th", options, NULL)) != -1) { + switch (opt) { + case 'a': + container_name = argv[optind-1]; all = 1; - } else if (strcmp(argv[arg], "--takeover") == 0) + break; + case 't': + container_name = optarg; takeover = 1; - else if (container_name == NULL) - container_name = argv[arg]; - else + break; + case OffRootOpt: + argv[0][0] = '@'; + break; + case 'h': + default: usage(); + break; + } } + + if (all == 0 && container_name == NULL) { + if (argv[optind]) + container_name = argv[optind]; + } + if (container_name == NULL) usage(); + if (argc - optind > 1) + usage(); + + if (strcmp(container_name, "/proc/mdstat") == 0) + all = 1; + if (all) { struct mdstat_ent mdstat, e; int container_len = strlen(container_name); @@ -468,6 +508,7 @@ exit(3); } close(victim_sock); + victim_sock = -1; } if (container->ss->load_container(container, mdfd, devname)) { fprintf(stderr, "mdmon: Cannot load metadata for %s\n", @@ -501,7 +542,8 @@ if (victim > 0) { try_kill_monitor(victim, container->devname, victim_sock); - close(victim_sock); + if (victim_sock >= 0) + close(victim_sock); } setsid();
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/mdmon.h ^
@@ -28,6 +28,7 @@ struct mdinfo info; struct supertype container; struct active_array next, *replaces; + int to_remove; int action_fd; int resync_start_fd;
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/mdopen.c ^
@@ -38,9 +38,9 @@ * else that of dev / struct stat stb; - int major_num = major_num; / quiet gcc -Os unitialized warning / - int minor_num = minor_num; / quiet gcc -Os unitialized warning / - int odig = odig; / quiet gcc -Os unitialized warning / + int major_num; + int minor_num; + int odig; int i; int nlen = strlen(dev) + 20; char name; @@ -53,23 +53,26 @@ if (lstat(dev, &stb)!= 0) return; - if (S_ISLNK(stb.st_mode)) { + if (S_ISBLK(stb.st_mode)) { + major_num = major(stb.st_rdev); + minor_num = minor(stb.st_rdev); + odig = -1; + } else if (S_ISLNK(stb.st_mode)) { int len = readlink(dev, orig, sizeof(orig)); if (len < 0 \|\| len > 1000) return; orig[len] = 0; odig = isdigit(orig[len-1]); - } else if (S_ISBLK(stb.st_mode)) { - major_num = major(stb.st_rdev); - minor_num = minor(stb.st_rdev); + major_num = -1; + minor_num = -1; } else - return; + return; name = malloc(nlen); for (i=1; i <= cnt ; i++) { struct stat stb2; snprintf(name, nlen, "%s%s%d", dev, dig?"p":"", i); if (stat(name, &stb2)==0) { - if (!S_ISBLK(stb2.st_mode)) + if (!S_ISBLK(stb2.st_mode) \|\| !S_ISBLK(stb.st_mode)) continue; if (stb2.st_rdev == makedev(major_num, minor_num+i)) continue; @@ -318,7 +321,7 @@ * If we cannot detect udev, we need to make * devices and links ourselves. / - if (stat("/dev/.udev", &stb) != 0 \|\| + if ((stat("/dev/.udev", &stb) != 0 && stat("/run/udev", &stb) != 0) \|\| check_env("MDADM_NO_UDEV")) { / Make sure 'devname' exists and 'chosen' is a symlink to it */ if (lstat(devname, &stb) == 0) { @@ -360,8 +363,12 @@ if (lstat(chosen, &stb) == 0) { char buf[300]; + ssize_t link_len = readlink(chosen, buf, sizeof(buf)-1); + if (link_len >= 0) + buf[link_len] = '\0'; + if ((stb.st_mode & S_IFMT) != S_IFLNK \|\| - readlink(chosen, buf, 300) <0 \|\| + link_len < 0 \|\| strcmp(buf, devname) != 0) { fprintf(stderr, Name ": %s exists - ignoring\n", chosen);
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/mdstat.c ^
@@ -131,10 +131,15 @@ FILE f; struct mdstat_ent all, rv, end, insert_here; char line; + int fd; if (hold && mdstat_fd != -1) { lseek(mdstat_fd, 0L, 0); - f = fdopen(dup(mdstat_fd), "r"); + fd = dup(mdstat_fd); + if (fd >= 0) + f = fdopen(fd, "r"); + else + return NULL; } else f = fopen("/proc/mdstat", "r"); if (f == NULL) @@ -257,10 +262,10 @@ if (strncmp(w, "check", 5)==0) ent->resync = 3; - if (l > 8 && strcmp(w+l-8, "=DELAYED")) - ent->percent = 0; - if (l > 8 && strcmp(w+l-8, "=PENDING")) - ent->percent = 0; + if (l > 8 && strcmp(w+l-8, "=DELAYED") == 0) + ent->percent = PROCESS_DELAYED; + if (l > 8 && strcmp(w+l-8, "=PENDING") == 0) + ent->percent = PROCESS_PENDING; } else if (ent->percent == -1 && w[0] >= '0' && w[0] <= '9' &&
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/monitor.c ^
@@ -211,6 +211,8 @@ * / +#define ARRAY_DIRTY 1 +#define ARRAY_BUSY 2 static int read_and_act(struct active_array a) { unsigned long long sync_completed; @@ -218,7 +220,7 @@ int check_reshape = 0; int deactivate = 0; struct mdinfo mdi; - int dirty = 0; + int ret = 0; int count = 0; a->next_state = bad_word; @@ -254,14 +256,14 @@ if (a->curr_state == write_pending) { a->container->ss->set_array_state(a, 0); a->next_state = active; - dirty = 1; + ret \|= ARRAY_DIRTY; } if (a->curr_state == active_idle) { / Set array to 'clean' FIRST, then mark clean * in the metadata / a->next_state = clean; - dirty = 1; + ret \|= ARRAY_DIRTY; } if (a->curr_state == clean) { a->container->ss->set_array_state(a, 1); @@ -269,7 +271,7 @@ if (a->curr_state == active \|\| a->curr_state == suspended \|\| a->curr_state == bad_word) - dirty = 1; + ret \|= ARRAY_DIRTY; if (a->curr_state == readonly) { / Well, I'm ready to handle things. If readonly * wasn't requested, transition to read-auto. @@ -284,7 +286,7 @@ a->next_state = read_auto; /* array is clean / else { a->next_state = active; / Now active for recovery etc / - dirty = 1; + ret \|= ARRAY_DIRTY; } } } @@ -339,7 +341,8 @@ a->container->ss->set_disk(a, mdi->disk.raid_disk, mdi->curr_state); check_degraded = 1; - mdi->next_state \|= DS_UNBLOCK; + if (mdi->curr_state & DS_BLOCKED) + mdi->next_state \|= DS_UNBLOCK; if (a->curr_state == read_auto) { a->container->ss->set_array_state(a, 0); a->next_state = active; @@ -417,9 +420,9 @@ if ((mdi->next_state & DS_REMOVE) && mdi->state_fd >= 0) { int remove_result; - / the kernel may not be able to immediately remove the - * disk, we can simply wait until the next event to try - * again. + /* The kernel may not be able to immediately remove the + * disk. In that case we wait a little while and + * try again. / remove_result = write_attr("remove", mdi->state_fd); if (remove_result > 0) { @@ -427,7 +430,8 @@ close(mdi->state_fd); close(mdi->recovery_fd); mdi->state_fd = -1; - } + } else + ret \|= ARRAY_BUSY; } if (mdi->next_state & DS_INSYNC) { write_attr("+in_sync", mdi->state_fd); @@ -458,7 +462,7 @@ if (deactivate) a->container = NULL; - return dirty; + return ret; } static struct mdinfo @@ -479,7 +483,7 @@ struct mdinfo victim; for (a = aa; a; a = a->next) { - if (!a->container) + if (!a->container \|\| a->to_remove) continue; victim = find_device(a, failed->disk.major, failed->disk.minor); if (!victim) @@ -539,7 +543,7 @@ / once an array has been deactivated we want to * ask the manager to discard it. / - if (!a->container) { + if (!a->container \|\| a->to_remove) { if (discard_this) { ap = &(ap)->next; continue; @@ -595,7 +599,7 @@ struct timespec ts; ts.tv_sec = 243600; ts.tv_nsec = 0; - if (aap == NULL) { + if (aap == NULL \|\| container->retry_soon) { / just waiting to get O_EXCL access / ts.tv_sec = 0; ts.tv_nsec = 20000000ULL; @@ -610,7 +614,7 @@ #ifdef DEBUG dprint_wake_reasons(&rfds); #endif - + container->retry_soon = 0; } if (update_queue) { @@ -628,7 +632,6 @@ rv = 0; dirty_arrays = 0; for (a = aap; a ; a = a->next) { - int is_dirty; if (a->replaces && !discard_this) { struct active_array *ap; @@ -642,22 +645,24 @@ / FIXME check if device->state_fd need to be cleared?/ signal_manager(); } - if (a->container) { - is_dirty = read_and_act(a); + if (a->container && !a->to_remove) { + int ret = read_and_act(a); rv \|= 1; - dirty_arrays += is_dirty; + dirty_arrays += !!(ret & ARRAY_DIRTY); / when terminating stop manipulating the array after it * is clean, but make sure read_and_act() is given a * chance to handle 'active_idle' / - if (sigterm && !is_dirty) + if (sigterm && !(ret & ARRAY_DIRTY)) a->container = NULL; / stop touching this array / + if (ret & ARRAY_BUSY) + container->retry_soon = 1; } } / propagate failures across container members / for (a = aap; a ; a = a->next) { - if (!a->container) + if (!a->container \|\| a->to_remove) continue; for (mdi = a->info.devs ; mdi ; mdi = mdi->next) if (mdi->curr_state & DS_FAULTY)
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/msg.c ^
@@ -207,9 +207,14 @@ int ping_monitor(char devname) { int sfd = connect_monitor(devname); - int err = fping_monitor(sfd); + int err; + + if (sfd >= 0) { + err = fping_monitor(sfd); + close(sfd); + } else + err = -1; - close(sfd); return err; } @@ -281,6 +286,40 @@ return rc; } + +/ check mdmon version if it supports + * array blocking mechanism + / +int check_mdmon_version(char container) +{ + char version = NULL; + int devnum = devname2devnum(container); + + if (!mdmon_running(devnum)) { + / if mdmon is not active we assume that any instance that is + * later started will match the current mdadm version, if this + * assumption is violated we may inadvertantly rebuild an array + * that was meant for reshape, or start rebuild on a spare that + * was to be moved to another container + / + / pass /; + } else { + int ver; + + version = ping_monitor_version(container); + ver = version ? mdadm_version(version) : -1; + free(version); + if (ver < 3002000) { + fprintf(stderr, Name + ": mdmon instance for %s cannot be disabled\n", + container); + return -1; + } + } + + return 0; +} + /* * block_monitor - prevent mdmon spare assignment * @container - container to block @@ -302,34 +341,13 @@ / int block_monitor(char container, const int freeze) { - int devnum = devname2devnum(container); struct mdstat_ent ent, e, e2; struct mdinfo sra = NULL; - char version = NULL; char buf[64]; int rv = 0; - if (!mdmon_running(devnum)) { - / if mdmon is not active we assume that any instance that is - * later started will match the current mdadm version, if this - * assumption is violated we may inadvertantly rebuild an array - * that was meant for reshape, or start rebuild on a spare that - * was to be moved to another container - / - / pass /; - } else { - int ver; - - version = ping_monitor_version(container); - ver = version ? mdadm_version(version) : -1; - free(version); - if (ver < 3002000) { - fprintf(stderr, Name - ": mdmon instance for %s cannot be disabled\n", - container); - return -1; - } - } + if (check_mdmon_version(container)) + return -1; ent = mdstat_read(0, 0); if (!ent) { @@ -430,6 +448,8 @@ continue; sysfs_free(sra); sra = sysfs_read(-1, e->devnum, GET_VERSION\|GET_LEVEL); + if (!sra) + continue; if (sra->array.level > 0) to_ping++; if (unblock_subarray(sra, unfreeze)) @@ -467,3 +487,13 @@ close(sfd); return err; } + +/ using takeover operation for grow purposes, mdadm has to be sure + * that mdmon processes all updates, and if necessary it will be closed + * at takeover to raid0 operation + / +void flush_mdmon(char container) +{ + ping_manager(container); + ping_monitor(container); +}
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/msg.h ^
@@ -34,5 +34,6 @@ extern void unblock_monitor(char container, const int unfreeze); extern int fping_monitor(int sock); extern int ping_manager(char devname); +extern void flush_mdmon(char container); #define MSG_MAX_LEN (41024*1024)
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/platform-intel.h ^
@@ -76,6 +76,7 @@ #define IMSM_OROM_ATTR_RAID1E IMSM_OROM_RLC_RAID1E #define IMSM_OROM_ATTR_RAID5 IMSM_OROM_RLC_RAID5 #define IMSM_OROM_ATTR_RAID_CNG IMSM_OROM_RLC_RAID_CNG + #define IMSM_OROM_ATTR_2TB_DISK (1 << 26) #define IMSM_OROM_ATTR_2TB (1 << 29) #define IMSM_OROM_ATTR_PM (1 << 30) #define IMSM_OROM_ATTR_ChecksumVerify (1 << 31) @@ -124,10 +125,11 @@ static inline int imsm_orom_has_chunk(const struct imsm_orom orom, int chunk) { int fs = ffs(chunk); - if (!fs) return 0; fs--; / bit num to bit index / + if (chunk & (chunk-1)) + return 0; / not a power of 2 / return !!(orom->sss & (1 << (fs - 1))); } @@ -167,21 +169,6 @@ return r; } -/* - * imsm_orom_default_chunk - return the largest chunk size supported via orom - * @orom: orom pointer from find_imsm_orom - / -static inline int imsm_orom_default_chunk(const struct imsm_orom orom) -{ - int fs = fls(orom->sss); - - if (!fs) - return 0; - - return min(512, (1 << fs)); -} - - enum sys_dev_type { SYS_DEV_UNKNOWN = 0, SYS_DEV_SAS, @@ -189,7 +176,6 @@ SYS_DEV_MAX }; - struct sys_dev { enum sys_dev_type type; char *path;
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/policy.c ^
@@ -195,7 +195,9 @@ int prefix_len; DIR by_path; char symlink[PATH_MAX] = "/dev/disk/by-path/"; + char nm[PATH_MAX]; struct dirent ent; + int rv; by_path = opendir(symlink); if (!by_path) @@ -218,7 +220,17 @@ return strdup(ent->d_name); } closedir(by_path); - return NULL; + /* A NULL path isn't really acceptable - use the devname.. / + sprintf(symlink, "/sys/dev/block/%d:%d", disk->disk.major, disk->disk.minor); + rv = readlink(symlink, nm, sizeof(nm)-1); + if (rv > 0) { + char dname; + nm[rv] = 0; + dname = strrchr(nm, '/'); + if (dname) + return strdup(dname + 1); + } + return strdup("unknown"); } char type_part[] = "part"; @@ -245,13 +257,13 @@ if (rule->name == rule_path) { if (pathok == 0) pathok = -1; - if (fnmatch(rule->value, path, 0) == 0) + if (path && fnmatch(rule->value, path, 0) == 0) pathok = 1; } if (rule->name == rule_type) { if (typeok == 0) typeok = -1; - if (strcmp(rule->value, type) == 0) + if (type && strcmp(rule->value, type) == 0) typeok = 1; } rule = rule->next; @@ -270,7 +282,8 @@ for (r = rule; r ; r = r->next) if (r->name == pol_act \|\| - r->name == pol_domain) + r->name == pol_domain \|\| + r->name == pol_auto) pol_new(pol, r->name, r->value, metadata); } @@ -280,7 +293,10 @@ * if it does, place a pointer to "-pathNN" * in 'part'. / - int l = strlen(path); + int l; + if (!path) + return 0; + l = strlen(path); while (l > 1 && isdigit(path[l-1])) l--; if (l < 5 \|\| strncmp(path+l-5, "-part", 5) != 0) @@ -343,9 +359,6 @@ struct dev_policy pol = NULL; int i; - if (!type) - return NULL; - rules = config_rules; while (rules) { @@ -366,7 +379,7 @@ /* Now add any metadata-specific internal knowledge * about this path / - for (i=0; superlist[i]; i++) + for (i=0; path && superlist[i]; i++) if (superlist[i]->get_disk_controller_domain) { const char d = superlist[i]->get_disk_controller_domain(path); @@ -399,12 +412,8 @@ char type = disk_type(disk); struct dev_policy pol = NULL; - if (!type) - return NULL; if (config_rules_has_path) path = disk_path(disk); - if (!path) - return NULL; pol = path_policy(path, type); @@ -501,6 +510,7 @@ } pr->next = config_rules; config_rules = pr; + va_end(ap); } void policy_free(void) @@ -678,6 +688,8 @@ { struct domainlist *domlist = NULL; + if (!mdi) + return NULL; for (mdi = mdi->devs ; mdi ; mdi = mdi->next) domainlist_add_dev(&domlist, makedev(mdi->disk.major, mdi->disk.minor), @@ -755,8 +767,10 @@ snprintf(path, PATH_MAX, FAILED_SLOTS_DIR "/%s", id_path); f = fopen(path, "r"); - if (!f) + if (!f) { + free(id_path); return 0; + } rv = fscanf(f, " %s %x:%x:%x:%x\n", array->metadata, @@ -765,6 +779,7 @@ array->uuid+2, array->uuid+3); fclose(f); + free(id_path); return rv == 5; } @@ -868,7 +883,8 @@ char udev_rule_file[PATH_MAX]; if (rule_name) { - strcpy(udev_rule_file, rule_name); + strncpy(udev_rule_file, rule_name, sizeof(udev_rule_file) - 6); + udev_rule_file[sizeof(udev_rule_file) - 6] = '\0'; strcat(udev_rule_file, ".temp"); fd = creat(udev_rule_file, S_IRUSR \| S_IWUSR \| S_IRGRP \| S_IROTH);
[-] [+]	Added	mdadm-3.2.4.tar.bz2/raid6check.8 ^
@@ -0,0 +1,96 @@ +.\" -- nroff -- +.\" Copyright Piergiorgio Sartor and others. +.\" This program is free software; you can redistribute it and/or modify +.\" it under the terms of the GNU General Public License as published by +.\" the Free Software Foundation; either version 2 of the License, or +.\" (at your option) any later version. +.\" See file COPYING in distribution for details. +.TH RAID6CHECK 8 "" v1.0.0 +.SH NAME +raid6check \- check MD RAID6 device for errors +.I aka +Linux Software RAID + +.SH SYNOPSIS + +.BI raid6check " <raid6 device> <start stripe> <number of stripes>" + +.SH DESCRIPTION +RAID6 devices in which one single component drive has errors can use +the double parity in order to find out which component drive. +The "raid6check" tool checks, for each stripe, the double parity +consistency, reports mismatches and, if possible, which +component drive has the mismatch. +Since it works at stripe level, it can report different drives with +mismatches at different stripes. + +"raid6check" requires a non-degraded RAID6 MD device as first +parameter, a starting stripe (usually 0) and the number of stripes +to be checked. +If this third parameter is also 0, it will check the array up to +the end. + +"raid6check" will start printing information about the RAID6, then +for each stripe, it will report the parity rotation status. +In case of parity mismatches, "raid6check" reports, if possible, +which component drive could be responsible. Otherwise it reports +that it is not possible to find the component drive. + +If the given MD device is not a RAID6, "raid6check" will, of +course, not continue. + +If the RAID6 MD device is degraded, "raid6check" will report +an error and it will not proceed further. + +No write operations are performed on the array or the components. +Furthermore, the checked array can be online and in use during +the operation of "raid6check". + +.SH EXAMPLES + +.B " raid6check /dev/md0 0 0" +.br +This will check /dev/md0 from start to end. + +.B " raid6check /dev/md3 0 1" +.br +This will check the first stripe of /dev/md3. + +.B " raid6check /dev/md1 1000 0" +.br +This will check /dev/md1 from stripe 1000 up to the end. + +.B " raid6check /dev/m127 128 256" +.br +This will check 256 stripes of /dev/md127 starting from stripe 128. + +.B " raid6check /dev/md0 0 0 \| grep -i error > md0_err.log" +.br +This will check /dev/md0 completely and create a log file only +with errors, if any. + +.SH FILES + +"raid6check" uses directly the component drives as found in /dev. +Furthermore, the sysfs interface is needed in order to find out +the RAID6 parameters. + +.SH BUGS +Negative parameters can lead to unexpected results. + +It is not clear what will happen if the RAID6 MD device gets +degraded during the check. + +.PP +The latest version of +.I raid6check +should always be available from +.IP +.B http://www.kernel.org/pub/linux/utils/raid/mdadm/ +.PP +Related man pages: +.PP +.IR mdadm (8) +.IR mdmon (8), +.IR mdadm.conf (5), +.IR md (4).
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/restripe.c ^
@@ -687,6 +687,7 @@ char *stripes = malloc(raid_disks sizeof(char)); char blocks = malloc(raid_disks sizeof(char)); int i; + int rv; int data_disks = raid_disks - (level == 0 ? 0 : level <= 5 ? 1 : 2); @@ -704,11 +705,8 @@ if (stripe_buf == NULL \|\| stripes == NULL \|\| blocks == NULL \|\| zero == NULL) { - free(stripe_buf); - free(stripes); - free(blocks); - free(zero); - return -2; + rv = -2; + goto abort; } for (i = 0; i < raid_disks; i++) stripes[i] = stripe_buf + i chunk_size; @@ -717,20 +715,26 @@ unsigned long long offset; int disk, qdisk; int syndrome_disks; - if (length < len) - return -3; + if (length < len) { + rv = -3; + goto abort; + } for (i = 0; i < data_disks; i++) { int disk = geo_map(i, start/chunk_size/data_disks, raid_disks, level, layout); if (src_buf == NULL) { /* read from file / - if (lseek64(source, - read_offset, 0) != (off64_t)read_offset) - return -1; + if (lseek64(source, read_offset, 0) != + (off64_t)read_offset) { + rv = -1; + goto abort; + } if (read(source, stripes[disk], - chunk_size) != chunk_size) - return -1; + chunk_size) != chunk_size) { + rv = -1; + goto abort; + } } else { / read from input buffer */ memcpy(stripes[disk], @@ -782,15 +786,27 @@ } for (i=0; i < raid_disks ; i++) if (dest[i] >= 0) { - if (lseek64(dest[i], offsets[i]+offset, 0) < 0) - return -1; - if (write(dest[i], stripes[i], chunk_size) != chunk_size) - return -1; + if (lseek64(dest[i], + offsets[i]+offset, 0) < 0) { + rv = -1; + goto abort; + } + if (write(dest[i], stripes[i], + chunk_size) != chunk_size) { + rv = -1; + goto abort; + } } length -= len; start += len; } - return 0; + rv = 0; + +abort: + free(stripe_buf); + free(stripes); + free(blocks); + return rv; } #ifdef MAIN
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/sha1.c ^
@@ -101,6 +101,7 @@ /* Take yet unprocessed bytes into account. / md5_uint32 bytes = ctx->buflen; size_t pad; + md5_uint32 ptr; /* Now count remaining bytes. / ctx->total[0] += bytes; @@ -111,9 +112,10 @@ memcpy (&ctx->buffer[bytes], fillbuf, pad); / Put the 64-bit file length in bits at the end of the buffer. / - (md5_uint32 ) &ctx->buffer[bytes + pad + 4] = SWAP (ctx->total[0] << 3); - (md5_uint32 ) &ctx->buffer[bytes + pad] = SWAP ((ctx->total[1] << 3) \| - (ctx->total[0] >> 29)); + ptr = (md5_uint32 ) &ctx->buffer[bytes + pad + 4]; + ptr = SWAP (ctx->total[0] << 3); + ptr = (md5_uint32 ) &ctx->buffer[bytes + pad]; + ptr = SWAP ((ctx->total[1] << 3) \| (ctx->total[0] >> 29)); / Process last bytes. */ sha1_process_block (ctx->buffer, bytes + pad + 8, ctx);
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/super-ddf.c ^
@@ -1336,6 +1336,7 @@ { struct ddf_super ddf = st->sb; int map_disks = info->array.raid_disks; + __u32 cptr; if (ddf->currentconf) { getinfo_super_ddf_bvd(st, info, map); @@ -1347,8 +1348,9 @@ info->array.level = LEVEL_CONTAINER; info->array.layout = 0; info->array.md_minor = -1; - info->array.ctime = DECADE + __be32_to_cpu((__u32) - (ddf->anchor.guid+16)); + cptr = (__u32 )(ddf->anchor.guid + 16); + info->array.ctime = DECADE + __be32_to_cpu(cptr); + info->array.utime = 0; info->array.chunk_size = 0; info->container_enough = 1; @@ -1374,6 +1376,7 @@ info->recovery_start = MaxSector; info->reshape_active = 0; + info->recovery_blocked = 0; info->name[0] = 0; info->array.major_version = -1; @@ -1406,6 +1409,7 @@ int j; struct dl dl; int map_disks = info->array.raid_disks; + __u32 cptr; memset(info, 0, sizeof(info)); / FIXME this returns BVD info - what if we want SVD ?? / @@ -1415,8 +1419,8 @@ info->array.layout = rlq_to_layout(vc->conf.rlq, vc->conf.prl, info->array.raid_disks); info->array.md_minor = -1; - info->array.ctime = DECADE + - __be32_to_cpu((__u32)(vc->conf.guid+16)); + cptr = (__u32 )(vc->conf.guid + 16); + info->array.ctime = DECADE + __be32_to_cpu(cptr); info->array.utime = DECADE + __be32_to_cpu(vc->conf.timestamp); info->array.chunk_size = 512 << vc->conf.chunk_shift; info->custom_array_size = 0; @@ -1449,6 +1453,7 @@ info->recovery_start = MaxSector; info->resync_start = 0; info->reshape_active = 0; + info->recovery_blocked = 0; if (!(ddf->virt->entries[info->container_member].state & DDF_state_inconsistent) && (ddf->virt->entries[info->container_member].init_state @@ -2190,6 +2195,7 @@ struct phys_disk_entry pde; unsigned int n, i; struct stat stb; + __u32 tptr; if (ddf->currentconf) { add_to_super_ddf_bvd(st, dk, fd, devname); @@ -2218,8 +2224,9 @@ tm = localtime(&now); sprintf(dd->disk.guid, "%8s%04d%02d%02d", T10, tm->tm_year+1900, tm->tm_mon+1, tm->tm_mday); - (__u32)(dd->disk.guid + 16) = random32(); - (__u32)(dd->disk.guid + 20) = random32(); + tptr = (__u32 )(dd->disk.guid + 16); + tptr++ = random32(); + tptr = random32(); do { /* Cannot be bothered finding a CRC of some irrelevant details/ @@ -2319,8 +2326,7 @@ called when creating a container or adding another device to a * container. / - -static unsigned char null_conf[4096+512]; +#define NULL_CONF_SZ 4096 static int __write_init_super_ddf(struct supertype st) { @@ -2333,6 +2339,12 @@ int attempts = 0; int successes = 0; unsigned long long size, sector; + char null_aligned; + + if (posix_memalign((void)&null_aligned, 4096, NULL_CONF_SZ) != 0) { + return -ENOMEM; + } + memset(null_aligned, 0xff, NULL_CONF_SZ); / try to write updated metadata, * if we catch a failure move on to the next disk @@ -2402,14 +2414,11 @@ if (write(fd, &c->conf, conf_size) < 0) break; } else { - char null_aligned = (char)((((unsigned long)null_conf)+511)&~511UL); - if (null_conf[0] != 0xff) - memset(null_conf, 0xff, sizeof(null_conf)); unsigned int togo = conf_size; - while (togo > sizeof(null_conf)-512) { - if (write(fd, null_aligned, sizeof(null_conf)-512) < 0) + while (togo > NULL_CONF_SZ) { + if (write(fd, null_aligned, NULL_CONF_SZ) < 0) break; - togo -= sizeof(null_conf)-512; + togo -= NULL_CONF_SZ; } if (write(fd, null_aligned, togo) < 0) break; @@ -2428,6 +2437,7 @@ continue; successes++; } + free(null_aligned); return attempts != successes; } @@ -2555,7 +2565,7 @@ continue; /* This is bigger than 'size', see if there are enough / cnt = 0; - for (dl2 = dl; dl2 ; dl2=dl2->next) + for (dl2 = ddf->dlist; dl2 ; dl2=dl2->next) if (dl2->esize >= dl->esize) cnt++; if (cnt >= raiddisks) @@ -2965,6 +2975,7 @@ unsigned int j; struct mdinfo this; char ep; + __u32 cptr; if (subarray && (strtoul(subarray, &ep, 10) != vc->vcnum \|\| @@ -2984,8 +2995,8 @@ this->array.md_minor = -1; this->array.major_version = -1; this->array.minor_version = -2; - this->array.ctime = DECADE + - __be32_to_cpu((__u32)(vc->conf.guid+16)); + cptr = (__u32 )(vc->conf.guid + 16); + this->array.ctime = DECADE + __be32_to_cpu(cptr); this->array.utime = DECADE + __be32_to_cpu(vc->conf.timestamp); this->array.chunk_size = 512 << vc->conf.chunk_shift;
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/super-gpt.c ^
@@ -76,7 +76,7 @@ free_gpt(st); - if (posix_memalign((void*)&super, 512, 32512) != 0) { + if (posix_memalign((void*)&super, 4096, 32512) != 0) { fprintf(stderr, Name ": %s could not allocate superblock\n", __func__); return 1; @@ -179,8 +179,10 @@ if (!st) return st; - if (strcmp(arg, "gpt") != 0) + if (strcmp(arg, "gpt") != 0) { + free(st); return NULL; + } st->ss = &gpt; st->info = NULL;
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/super-intel.c ^
@@ -74,39 +74,50 @@ /* Define all supported attributes that have to be accepted by mdadm / -#define MPB_ATTRIB_SUPPORTED MPB_ATTRIB_CHECKSUM_VERIFY \| \ +#define MPB_ATTRIB_SUPPORTED (MPB_ATTRIB_CHECKSUM_VERIFY \| \ MPB_ATTRIB_2TB \| \ MPB_ATTRIB_2TB_DISK \| \ MPB_ATTRIB_RAID0 \| \ MPB_ATTRIB_RAID1 \| \ MPB_ATTRIB_RAID10 \| \ MPB_ATTRIB_RAID5 \| \ - MPB_ATTRIB_EXP_STRIPE_SIZE + MPB_ATTRIB_EXP_STRIPE_SIZE) + +/ Define attributes that are unused but not harmful / +#define MPB_ATTRIB_IGNORED (MPB_ATTRIB_NEVER_USE) #define MPB_SECTOR_CNT 2210 #define IMSM_RESERVED_SECTORS 4096 +#define NUM_BLOCKS_DIRTY_STRIPE_REGION 2056 #define SECT_PER_MB_SHIFT 11 / Disk configuration info. / #define IMSM_MAX_DEVICES 255 struct imsm_disk { __u8 serial[MAX_RAID_SERIAL_LEN];/ 0xD8 - 0xE7 ascii serial number / - __u32 total_blocks; / 0xE8 - 0xEB total blocks / + __u32 total_blocks_lo; / 0xE8 - 0xEB total blocks lo / __u32 scsi_id; / 0xEC - 0xEF scsi ID / #define SPARE_DISK __cpu_to_le32(0x01) / Spare / #define CONFIGURED_DISK __cpu_to_le32(0x02) / Member of some RaidDev / #define FAILED_DISK __cpu_to_le32(0x04) / Permanent failure / __u32 status; / 0xF0 - 0xF3 / __u32 owner_cfg_num; / which config 0,1,2... owns this disk / -#define IMSM_DISK_FILLERS 4 - __u32 filler[IMSM_DISK_FILLERS]; / 0xF4 - 0x107 MPB_DISK_FILLERS for future expansion / + __u32 total_blocks_hi; / 0xF4 - 0xF5 total blocks hi / +#define IMSM_DISK_FILLERS 3 + __u32 filler[IMSM_DISK_FILLERS]; / 0xF5 - 0x107 MPB_DISK_FILLERS for future expansion / }; +/ map selector for map managment + / +#define MAP_0 0 +#define MAP_1 1 +#define MAP_X -1 + / RAID map configuration infos. / struct imsm_map { - __u32 pba_of_lba0; / start address of partition / - __u32 blocks_per_member;/ blocks per member / - __u32 num_data_stripes; / number of data stripes / + __u32 pba_of_lba0_lo; / start address of partition / + __u32 blocks_per_member_lo;/ blocks per member / + __u32 num_data_stripes_lo; / number of data stripes / __u16 blocks_per_strip; __u8 map_state; / Normal, Uninitialized, Degraded, Failed / #define IMSM_T_STATE_NORMAL 0 @@ -121,7 +132,10 @@ __u8 num_domains; / number of parity domains / __u8 failed_disk_num; / valid only when state is degraded / __u8 ddf; - __u32 filler[7]; / expansion area / + __u32 pba_of_lba0_hi; + __u32 blocks_per_member_hi; + __u32 num_data_stripes_hi; + __u32 filler[4]; / expansion area / #define IMSM_ORD_REBUILD (1 << 24) __u32 disk_ord_tbl[1]; / disk_ord_tbl[num_members], * top byte contains some flags @@ -230,6 +244,12 @@ #define GEN_MIGR_AREA_SIZE 2048 /* General Migration Copy Area size in blocks / +#define MIGR_REC_BUF_SIZE 512 / size of migr_record i/o buffer / +#define MIGR_REC_POSITION 512 / migr_record position offset on disk, + * MIGR_REC_BUF_SIZE <= MIGR_REC_POSITION + / + + #define UNIT_SRC_NORMAL 0 / Source data for curr_migr_unit must * be recovered using srcMap / #define UNIT_SRC_IN_CP_AREA 1 / Source data for curr_migr_unit has @@ -261,6 +281,22 @@ * (for recovered migrations) / } __attribute__ ((__packed__)); +struct md_list { + / usage marker: + * 1: load metadata + * 2: metadata does not match + * 4: already checked + / + int used; + char devname; + int found; + int container; + dev_t st_rdev; + struct md_list next; +}; + +#define pr_vrb(fmt, arg...) (void) (verbose && fprintf(stderr, Name fmt, ##arg)) + static __u8 migr_type(struct imsm_dev dev) { if (dev->vol.migr_type == MIGR_VERIFY && @@ -286,7 +322,7 @@ static unsigned int sector_count(__u32 bytes) { - return ((bytes + (512-1)) & (~(512-1))) / 512; + return ROUND_UP(bytes, 512) / 512; } static unsigned int mpb_sectors(struct imsm_super mpb) @@ -321,12 +357,15 @@ void migr_rec_buf; /* buffer for I/O operations / struct migr_record migr_rec; /* migration record / }; + int clean_migration_record_by_mdmon; / when reshape is switched to next + array, it indicates that mdmon is allowed to clean migration + record / size_t len; / size of the 'buf' allocation / void next_buf; /* for realloc'ing buf from the manager / size_t next_len; int updates_pending; / count of pending updates for mdmon / int current_vol; / index of raid device undergoing creation / - __u32 create_offset; / common start for 'current_vol' / + unsigned long long create_offset; / common start for 'current_vol' / __u32 random; / random data for seeding new family numbers / struct intel_dev devlist; struct dl { @@ -341,7 +380,7 @@ struct extent e; / for determining freespace @ create / int raiddisk; / slot to fill in autolayout / enum action action; - } disks; + } disks, current_disk; struct dl disk_mgmt_list; / list of disks to add/remove while mdmon active / struct dl missing; /* disks removed while we weren't looking / @@ -366,6 +405,7 @@ enum imsm_reshape_type { CH_TAKEOVER, CH_MIGRATION, + CH_ARRAY_SIZE, }; / definition of messages passed to imsm_process_update / @@ -379,6 +419,7 @@ update_reshape_migration, update_takeover, update_general_migration_checkpoint, + update_size_change, }; struct imsm_update_activate_spare { @@ -431,6 +472,12 @@ int new_disks[1]; / new_raid_disks - old_raid_disks makedev number / }; +struct imsm_update_size_change { + enum imsm_update_type type; + int subdev; + long long new_size; +}; + struct imsm_update_general_migration_checkpoint { enum imsm_update_type type; __u32 curr_migr_unit; @@ -658,21 +705,30 @@ { / A device can have 2 maps if it is in the middle of a migration. * If second_map is: - * 0 - we return the first map - * 1 - we return the second map if it exists, else NULL - * -1 - we return the second map if it exists, else the first + * MAP_0 - we return the first map + * MAP_1 - we return the second map if it exists, else NULL + * MAP_X - we return the second map if it exists, else the first / struct imsm_map map = &dev->vol.map[0]; + struct imsm_map map2 = NULL; - if (second_map == 1 && !dev->vol.migr_state) - return NULL; - else if (second_map == 1 \|\| - (second_map < 0 && dev->vol.migr_state)) { - void ptr = map; + if (dev->vol.migr_state) + map2 = (void )map + sizeof_imsm_map(map); - return ptr + sizeof_imsm_map(map); - } else - return map; + switch (second_map) { + case MAP_0: + break; + case MAP_1: + map = map2; + break; + case MAP_X: + if (map2) + map = map2; + break; + default: + map = NULL; + } + return map; } @@ -682,13 +738,13 @@ static size_t sizeof_imsm_dev(struct imsm_dev dev, int migr_state) { size_t size = sizeof(dev) - sizeof(struct imsm_map) + - sizeof_imsm_map(get_imsm_map(dev, 0)); + sizeof_imsm_map(get_imsm_map(dev, MAP_0)); / migrating means an additional map / if (dev->vol.migr_state) - size += sizeof_imsm_map(get_imsm_map(dev, 1)); + size += sizeof_imsm_map(get_imsm_map(dev, MAP_1)); else if (migr_state) - size += sizeof_imsm_map(get_imsm_map(dev, 0)); + size += sizeof_imsm_map(get_imsm_map(dev, MAP_0)); return size; } @@ -742,9 +798,9 @@ / * for second_map: - * == 0 get first map - * == 1 get second map - * == -1 than get map according to the current migr_state + * == MAP_0 get first map + * == MAP_1 get second map + * == MAP_X than get map according to the current migr_state / static __u32 get_imsm_ord_tbl_ent(struct imsm_dev dev, int slot, @@ -815,7 +871,7 @@ for (i = 0; i < super->anchor->num_raid_devs; i++) { struct imsm_dev dev = get_imsm_dev(super, i); - struct imsm_map map = get_imsm_map(dev, 0); + struct imsm_map map = get_imsm_map(dev, MAP_0); if (get_imsm_disk_slot(map, dl->index) >= 0) memberships++; @@ -824,13 +880,89 @@ return memberships; } +static __u32 imsm_min_reserved_sectors(struct intel_super super); + +static int split_ull(unsigned long long n, __u32 lo, __u32 hi) +{ + if (lo == 0 \|\| hi == 0) + return 1; + lo = __le32_to_cpu((unsigned)n); + hi = __le32_to_cpu((unsigned)(n >> 32)); + return 0; +} + +static unsigned long long join_u32(__u32 lo, __u32 hi) +{ + return (unsigned long long)__le32_to_cpu(lo) \| + (((unsigned long long)__le32_to_cpu(hi)) << 32); +} + +static unsigned long long total_blocks(struct imsm_disk disk) +{ + if (disk == NULL) + return 0; + return join_u32(disk->total_blocks_lo, disk->total_blocks_hi); +} + +static unsigned long long pba_of_lba0(struct imsm_map map) +{ + if (map == NULL) + return 0; + return join_u32(map->pba_of_lba0_lo, map->pba_of_lba0_hi); +} + +static unsigned long long blocks_per_member(struct imsm_map map) +{ + if (map == NULL) + return 0; + return join_u32(map->blocks_per_member_lo, map->blocks_per_member_hi); +} + +#ifndef MDASSEMBLE +static unsigned long long num_data_stripes(struct imsm_map map) +{ + if (map == NULL) + return 0; + return join_u32(map->num_data_stripes_lo, map->num_data_stripes_hi); +} + +static void set_total_blocks(struct imsm_disk disk, unsigned long long n) +{ + split_ull(n, &disk->total_blocks_lo, &disk->total_blocks_hi); +} +#endif + +static void set_pba_of_lba0(struct imsm_map map, unsigned long long n) +{ + split_ull(n, &map->pba_of_lba0_lo, &map->pba_of_lba0_hi); +} + +static void set_blocks_per_member(struct imsm_map map, unsigned long long n) +{ + split_ull(n, &map->blocks_per_member_lo, &map->blocks_per_member_hi); +} + +static void set_num_data_stripes(struct imsm_map map, unsigned long long n) +{ + split_ull(n, &map->num_data_stripes_lo, &map->num_data_stripes_hi); +} + static struct extent get_extents(struct intel_super super, struct dl dl) { / find a list of used extents on the given physical device / struct extent rv, e; int i; int memberships = count_memberships(dl, super); - __u32 reservation = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS; + __u32 reservation; + + / trim the reserved area for spares, so they can join any array + * regardless of whether the OROM has assigned sectors from the + * IMSM_RESERVED_SECTORS region + / + if (dl->index == -1) + reservation = imsm_min_reserved_sectors(super); + else + reservation = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS; rv = malloc(sizeof(struct extent) (memberships + 1)); if (!rv) @@ -839,11 +971,11 @@ for (i = 0; i < super->anchor->num_raid_devs; i++) { struct imsm_dev dev = get_imsm_dev(super, i); - struct imsm_map map = get_imsm_map(dev, 0); + struct imsm_map map = get_imsm_map(dev, MAP_0); if (get_imsm_disk_slot(map, dl->index) >= 0) { - e->start = __le32_to_cpu(map->pba_of_lba0); - e->size = __le32_to_cpu(map->blocks_per_member); + e->start = pba_of_lba0(map); + e->size = blocks_per_member(map); e++; } } @@ -856,10 +988,9 @@ / if (memberships) { struct extent last = &rv[memberships - 1]; - __u32 remainder; + unsigned long long remainder; - remainder = __le32_to_cpu(dl->disk.total_blocks) - - (last->start + last->size); + remainder = total_blocks(&dl->disk) - (last->start + last->size); / round down to 1k block to satisfy precision of the kernel * 'size' interface / @@ -870,7 +1001,7 @@ if (reservation > remainder) reservation = remainder; } - e->start = __le32_to_cpu(dl->disk.total_blocks) - reservation; + e->start = total_blocks(&dl->disk) - reservation; e->size = 0; return rv; } @@ -899,7 +1030,7 @@ for (i = 0; e[i].size; i++) continue; - rv = __le32_to_cpu(dl->disk.total_blocks) - e[i].start; + rv = total_blocks(&dl->disk) - e[i].start; free(e); @@ -921,6 +1052,53 @@ return (disk->status & FAILED_DISK) == FAILED_DISK; } +/ try to determine how much space is reserved for metadata from + * the last get_extents() entry on the smallest active disk, + * otherwise fallback to the default + / +static __u32 imsm_min_reserved_sectors(struct intel_super super) +{ + struct extent e; + int i; + unsigned long long min_active; + __u32 remainder; + __u32 rv = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS; + struct dl dl, dl_min = NULL; + + if (!super) + return rv; + + min_active = 0; + for (dl = super->disks; dl; dl = dl->next) { + if (dl->index < 0) + continue; + unsigned long long blocks = total_blocks(&dl->disk); + if (blocks < min_active \|\| min_active == 0) { + dl_min = dl; + min_active = blocks; + } + } + if (!dl_min) + return rv; + + / find last lba used by subarrays on the smallest active disk / + e = get_extents(super, dl_min); + if (!e) + return rv; + for (i = 0; e[i].size; i++) + continue; + + remainder = min_active - e[i].start; + free(e); + + / to give priority to recovery we should not require full + IMSM_RESERVED_SECTORS from the spare / + rv = MPB_SECTOR_CNT + NUM_BLOCKS_DIRTY_STRIPE_REGION; + + / if real reservation is smaller use that value / + return (remainder < rv) ? remainder : rv; +} + / Return minimum size of a spare that can be used in this array/ static unsigned long long min_acceptable_spare_size_imsm(struct supertype st) { @@ -947,11 +1125,15 @@ if (i > 0) rv = e[i-1].start + e[i-1].size; free(e); + /* add the amount of space needed for metadata / - rv = rv + MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS; + rv = rv + imsm_min_reserved_sectors(super); + return rv 512; } +static int is_gen_migration(struct imsm_dev dev); + #ifndef MDASSEMBLE static __u64 blocks_per_migr_unit(struct intel_super super, struct imsm_dev dev); @@ -963,8 +1145,8 @@ { __u64 sz; int slot, i; - struct imsm_map map = get_imsm_map(dev, 0); - struct imsm_map map2 = get_imsm_map(dev, 1); + struct imsm_map map = get_imsm_map(dev, MAP_0); + struct imsm_map map2 = get_imsm_map(dev, MAP_1); __u32 ord; printf("\n"); @@ -980,14 +1162,14 @@ printf("\n"); printf(" Slots : ["); for (i = 0; i < map->num_members; i++) { - ord = get_imsm_ord_tbl_ent(dev, i, 0); + ord = get_imsm_ord_tbl_ent(dev, i, MAP_0); printf("%s", ord & IMSM_ORD_REBUILD ? "_" : "U"); } printf("]"); if (map2) { printf(" <-- ["); for (i = 0; i < map2->num_members; i++) { - ord = get_imsm_ord_tbl_ent(dev, i, 1); + ord = get_imsm_ord_tbl_ent(dev, i, MAP_1); printf("%s", ord & IMSM_ORD_REBUILD ? "_" : "U"); } printf("]"); @@ -1001,7 +1183,7 @@ printf("\n"); slot = get_imsm_disk_slot(map, disk_idx); if (slot >= 0) { - ord = get_imsm_ord_tbl_ent(dev, slot, -1); + ord = get_imsm_ord_tbl_ent(dev, slot, MAP_X); printf(" This Slot : %d%s\n", slot, ord & IMSM_ORD_REBUILD ? " (out-of-sync)" : ""); } else @@ -1011,13 +1193,13 @@ sz += __le32_to_cpu(dev->size_low); printf(" Array Size : %llu%s\n", (unsigned long long)sz, human_size(sz 512)); - sz = __le32_to_cpu(map->blocks_per_member); + sz = blocks_per_member(map); printf(" Per Dev Size : %llu%s\n", (unsigned long long)sz, human_size(sz * 512)); - printf(" Sector Offset : %u\n", - __le32_to_cpu(map->pba_of_lba0)); - printf(" Num Stripes : %u\n", - __le32_to_cpu(map->num_data_stripes)); + printf(" Sector Offset : %llu\n", + pba_of_lba0(map)); + printf(" Num Stripes : %llu\n", + num_data_stripes(map)); printf(" Chunk Size : %u KiB", __le16_to_cpu(map->blocks_per_strip) / 2); if (map2) @@ -1045,40 +1227,44 @@ printf("idle\n"); printf(" Map State : %s", map_state_str[map->map_state]); if (dev->vol.migr_state) { - struct imsm_map map = get_imsm_map(dev, 1); + struct imsm_map map = get_imsm_map(dev, MAP_1); printf(" <-- %s", map_state_str[map->map_state]); - printf("\n Checkpoint : %u (%llu)", - __le32_to_cpu(dev->vol.curr_migr_unit), - (unsigned long long)blocks_per_migr_unit(super, dev)); + printf("\n Checkpoint : %u ", + __le32_to_cpu(dev->vol.curr_migr_unit)); + if ((is_gen_migration(dev)) && ((slot > 1) \|\| (slot < 0))) + printf("(N/A)"); + else + printf("(%llu)", (unsigned long long) + blocks_per_migr_unit(super, dev)); } printf("\n"); printf(" Dirty State : %s\n", dev->vol.dirty ? "dirty" : "clean"); } -static void print_imsm_disk(struct imsm_super mpb, int index, __u32 reserved) +static void print_imsm_disk(struct imsm_disk disk, int index, __u32 reserved) { - struct imsm_disk disk = __get_imsm_disk(mpb, index); char str[MAX_RAID_SERIAL_LEN + 1]; __u64 sz; - if (index < 0 \|\| !disk) + if (index < -1 \|\| !disk) return; printf("\n"); snprintf(str, MAX_RAID_SERIAL_LEN + 1, "%s", disk->serial); - printf(" Disk%02d Serial : %s\n", index, str); + if (index >= 0) + printf(" Disk%02d Serial : %s\n", index, str); + else + printf(" Disk Serial : %s\n", str); printf(" State :%s%s%s\n", is_spare(disk) ? " spare" : "", is_configured(disk) ? " active" : "", is_failed(disk) ? " failed" : ""); printf(" Id : %08x\n", __le32_to_cpu(disk->scsi_id)); - sz = __le32_to_cpu(disk->total_blocks) - reserved; + sz = total_blocks(disk) - reserved; printf(" Usable Size : %llu%s\n", (unsigned long long)sz, human_size(sz 512)); } -static int is_gen_migration(struct imsm_dev dev); - void examine_migr_rec_imsm(struct intel_super super) { struct migr_record migr_rec = super->migr_rec; @@ -1087,11 +1273,19 @@ for (i = 0; i < mpb->num_raid_devs; i++) { struct imsm_dev dev = __get_imsm_dev(mpb, i); + struct imsm_map map; + int slot = -1; + if (is_gen_migration(dev) == 0) continue; printf("\nMigration Record Information:"); - if (super->disks->index > 1) { + + / first map under migration / + map = get_imsm_map(dev, MAP_0); + if (map) + slot = get_imsm_disk_slot(map, super->disks->index); + if ((map == NULL) \|\| (slot > 1) \|\| (slot < 0)) { printf(" Empty\n "); printf("Examine one of first two disks in array\n"); break; @@ -1141,11 +1335,14 @@ static int imsm_check_attributes(__u32 attributes) { int ret_val = 1; - __u32 not_supported = (MPB_ATTRIB_SUPPORTED)^0xffffffff; + __u32 not_supported = MPB_ATTRIB_SUPPORTED^0xffffffff; + + not_supported &= ~MPB_ATTRIB_IGNORED; not_supported &= attributes; if (not_supported) { - fprintf(stderr, Name "(IMSM): Unsupported attributes : %x\n", not_supported); + fprintf(stderr, Name "(IMSM): Unsupported attributes : %x\n", + (unsigned)__le32_to_cpu(not_supported)); if (not_supported & MPB_ATTRIB_CHECKSUM_VERIFY) { dprintf("\t\tMPB_ATTRIB_CHECKSUM_VERIFY \n"); not_supported ^= MPB_ATTRIB_CHECKSUM_VERIFY; @@ -1248,7 +1445,7 @@ printf(" MPB Sectors : %d\n", mpb_sectors(mpb)); printf(" Disks : %d\n", mpb->num_disks); printf(" RAID Devices : %d\n", mpb->num_raid_devs); - print_imsm_disk(mpb, super->disks->index, reserved); + print_imsm_disk(__get_imsm_disk(mpb, super->disks->index), super->disks->index, reserved); if (super->bbm_log) { struct bbm_log log = super->bbm_log; @@ -1273,28 +1470,12 @@ for (i = 0; i < mpb->num_disks; i++) { if (i == super->disks->index) continue; - print_imsm_disk(mpb, i, reserved); + print_imsm_disk(__get_imsm_disk(mpb, i), i, reserved); } - for (dl = super->disks ; dl; dl = dl->next) { - struct imsm_disk disk; - char str[MAX_RAID_SERIAL_LEN + 1]; - __u64 sz; - - if (dl->index >= 0) - continue; - disk = &dl->disk; - printf("\n"); - snprintf(str, MAX_RAID_SERIAL_LEN + 1, "%s", disk->serial); - printf(" Disk Serial : %s\n", str); - printf(" State :%s%s%s\n", is_spare(disk) ? " spare" : "", - is_configured(disk) ? " active" : "", - is_failed(disk) ? " failed" : ""); - printf(" Id : %08x\n", __le32_to_cpu(disk->scsi_id)); - sz = __le32_to_cpu(disk->total_blocks) - reserved; - printf(" Usable Size : %llu%s\n", (unsigned long long)sz, - human_size(sz 512)); - } + for (dl = super->disks; dl; dl = dl->next) + if (dl->index == -1) + print_imsm_disk(&dl->disk, -1, reserved); examine_migr_rec_imsm(super); } @@ -1518,11 +1699,11 @@ fd2devname(fd, buf); printf(" Port%d : %s", port, buf); if (imsm_read_serial(fd, NULL, (__u8 ) buf) == 0) - printf(" (%s)\n", buf); + printf(" (%.s)\n", MAX_RAID_SERIAL_LEN, buf); else - printf("()\n"); + printf(" ()\n"); + close(fd); } - close(fd); free(path); path = NULL; } @@ -1612,8 +1793,13 @@ imsm_orom_has_chunk(orom, 102416) ? " 16M" : "", imsm_orom_has_chunk(orom, 102432) ? " 32M" : "", imsm_orom_has_chunk(orom, 102464) ? " 64M" : ""); + printf(" 2TB volumes :%s supported\n", + (orom->attr & IMSM_OROM_ATTR_2TB)?"":" not"); + printf(" 2TB disks :%s supported\n", + (orom->attr & IMSM_OROM_ATTR_2TB_DISK)?"":" not"); printf(" Max Disks : %d\n", orom->tds); - printf(" Max Volumes : %d\n", orom->vpa); + printf(" Max Volumes : %d per array, %d per controller\n", + orom->vpa, orom->vphba); return; } @@ -1789,7 +1975,7 @@ static __u32 migr_strip_blocks_resync(struct imsm_dev dev) { /* migr_strip_size when repairing or initializing parity / - struct imsm_map map = get_imsm_map(dev, 0); + struct imsm_map map = get_imsm_map(dev, MAP_0); __u32 chunk = __le32_to_cpu(map->blocks_per_strip); switch (get_imsm_raid_level(map)) { @@ -1807,7 +1993,7 @@ this is different than migr_strip_size_resync(), but it's good * to be compatible / - struct imsm_map map = get_imsm_map(dev, 1); + struct imsm_map map = get_imsm_map(dev, MAP_1); __u32 chunk = __le32_to_cpu(map->blocks_per_strip); switch (get_imsm_raid_level(map)) { @@ -1826,8 +2012,8 @@ static __u32 num_stripes_per_unit_resync(struct imsm_dev dev) { - struct imsm_map lo = get_imsm_map(dev, 0); - struct imsm_map hi = get_imsm_map(dev, 1); + struct imsm_map lo = get_imsm_map(dev, MAP_0); + struct imsm_map hi = get_imsm_map(dev, MAP_1); __u32 lo_chunk = __le32_to_cpu(lo->blocks_per_strip); __u32 hi_chunk = __le32_to_cpu(hi->blocks_per_strip); @@ -1836,11 +2022,11 @@ static __u32 num_stripes_per_unit_rebuild(struct imsm_dev dev) { - struct imsm_map lo = get_imsm_map(dev, 0); + struct imsm_map lo = get_imsm_map(dev, MAP_0); int level = get_imsm_raid_level(lo); if (level == 1 \|\| level == 10) { - struct imsm_map hi = get_imsm_map(dev, 1); + struct imsm_map hi = get_imsm_map(dev, MAP_1); return hi->num_domains; } else @@ -1856,9 +2042,11 @@ switch (get_imsm_raid_level(map)) { case 0: + return map->num_members; + break; case 1: case 10: - return map->num_members; + return map->num_members/2; case 5: return map->num_members - 1; default: @@ -1869,7 +2057,7 @@ static __u32 parity_segment_depth(struct imsm_dev dev) { - struct imsm_map map = get_imsm_map(dev, 0); + struct imsm_map map = get_imsm_map(dev, MAP_0); __u32 chunk = __le32_to_cpu(map->blocks_per_strip); switch(get_imsm_raid_level(map)) { @@ -1885,7 +2073,7 @@ static __u32 map_migr_block(struct imsm_dev dev, __u32 block) { - struct imsm_map map = get_imsm_map(dev, 1); + struct imsm_map map = get_imsm_map(dev, MAP_1); __u32 chunk = __le32_to_cpu(map->blocks_per_strip); __u32 strip = block / chunk; @@ -1924,7 +2112,7 @@ case MIGR_VERIFY: case MIGR_REPAIR: case MIGR_INIT: { - struct imsm_map map = get_imsm_map(dev, 0); + struct imsm_map map = get_imsm_map(dev, MAP_0); __u32 stripes_per_unit; __u32 blocks_per_unit; __u32 parity_depth; @@ -1940,7 +2128,7 @@ / stripes_per_unit = num_stripes_per_unit_resync(dev); migr_chunk = migr_strip_blocks_resync(dev); - disks = imsm_num_data_members(dev, 0); + disks = imsm_num_data_members(dev, MAP_0); blocks_per_unit = stripes_per_unit * migr_chunk * disks; stripe = __le16_to_cpu(map->blocks_per_strip) * disks; segment = blocks_per_unit / stripe; @@ -1994,13 +2182,14 @@ unsigned long long dsize; get_dev_size(fd, NULL, &dsize); - if (lseek64(fd, dsize - 512, SEEK_SET) < 0) { + if (lseek64(fd, dsize - MIGR_REC_POSITION, SEEK_SET) < 0) { fprintf(stderr, Name ": Cannot seek to anchor block: %s\n", strerror(errno)); goto out; } - if (read(fd, super->migr_rec_buf, 512) != 512) { + if (read(fd, super->migr_rec_buf, MIGR_REC_BUF_SIZE) != + MIGR_REC_BUF_SIZE) { fprintf(stderr, Name ": Cannot read migr record block: %s\n", strerror(errno)); @@ -2012,6 +2201,19 @@ return ret_val; } +static struct imsm_dev imsm_get_device_during_migration( + struct intel_super super) +{ + + struct intel_dev dv; + + for (dv = super->devlist; dv; dv = dv->next) { + if (is_gen_migration(dv->dev)) + return dv->dev; + } + return NULL; +} + /****************************************************************************** * Function: load_imsm_migr_rec * Description: Function reads imsm migration record (it is stored at the last @@ -2022,6 +2224,7 @@ * Returns: * 0 : success * -1 : fail + * -2 : no migration in progress *****************************************************************************/ static int load_imsm_migr_rec(struct intel_super super, struct mdinfo info) { @@ -2030,13 +2233,31 @@ char nm[30]; int retval = -1; int fd = -1; + struct imsm_dev dev; + struct imsm_map map = NULL; + int slot = -1; + + / find map under migration / + dev = imsm_get_device_during_migration(super); + / nothing to load,no migration in progress? + / + if (dev == NULL) + return -2; + map = get_imsm_map(dev, MAP_0); if (info) { for (sd = info->devs ; sd ; sd = sd->next) { + / skip spare and failed disks + / + if (sd->disk.raid_disk < 0) + continue; / read only from one of the first two slots / - if ((sd->disk.raid_disk > 1) \|\| - (sd->disk.raid_disk < 0)) + if (map) + slot = get_imsm_disk_slot(map, + sd->disk.raid_disk); + if ((map == NULL) \|\| (slot > 1) \|\| (slot < 0)) continue; + sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor); fd = dev_open(nm, O_RDONLY); if (fd >= 0) @@ -2045,8 +2266,14 @@ } if (fd < 0) { for (dl = super->disks; dl; dl = dl->next) { + / skip spare and failed disks + / + if (dl->index < 0) + continue; / read only from one of the first two slots / - if (dl->index > 1) + if (map) + slot = get_imsm_disk_slot(map, dl->index); + if ((map == NULL) \|\| (slot > 1) \|\| (slot < 0)) continue; sprintf(nm, "%d:%d", dl->major, dl->minor); fd = dev_open(nm, O_RDONLY); @@ -2130,23 +2357,45 @@ struct dl sd; int len; struct imsm_update_general_migration_checkpoint u; + struct imsm_dev dev; + struct imsm_map map = NULL; + + / find map under migration / + dev = imsm_get_device_during_migration(super); + / if no migration, write buffer anyway to clear migr_record + * on disk based on first available device + / + if (dev == NULL) + dev = get_imsm_dev(super, super->current_vol < 0 ? 0 : + super->current_vol); + + map = get_imsm_map(dev, MAP_0); for (sd = super->disks ; sd ; sd = sd->next) { + int slot = -1; + + / skip failed and spare devices / + if (sd->index < 0) + continue; / write to 2 first slots only / - if ((sd->index < 0) \|\| (sd->index > 1)) + if (map) + slot = get_imsm_disk_slot(map, sd->index); + if ((map == NULL) \|\| (slot > 1) \|\| (slot < 0)) continue; + sprintf(nm, "%d:%d", sd->major, sd->minor); fd = dev_open(nm, O_RDWR); if (fd < 0) continue; get_dev_size(fd, NULL, &dsize); - if (lseek64(fd, dsize - 512, SEEK_SET) < 0) { + if (lseek64(fd, dsize - MIGR_REC_POSITION, SEEK_SET) < 0) { fprintf(stderr, Name ": Cannot seek to anchor block: %s\n", strerror(errno)); goto out; } - if (write(fd, super->migr_rec_buf, 512) != 512) { + if (write(fd, super->migr_rec_buf, MIGR_REC_BUF_SIZE) != + MIGR_REC_BUF_SIZE) { fprintf(stderr, Name ": Cannot write migr record block: %s\n", strerror(errno)); @@ -2184,24 +2433,73 @@ } #endif / MDASSEMBLE / +/ spare/missing disks activations are not allowe when + * array/container performs reshape operation, because + * all arrays in container works on the same disks set + / +int imsm_reshape_blocks_arrays_changes(struct intel_super super) +{ + int rv = 0; + struct intel_dev i_dev; + struct imsm_dev dev; + + /* check whole container + / + for (i_dev = super->devlist; i_dev; i_dev = i_dev->next) { + dev = i_dev->dev; + if (is_gen_migration(dev)) { + / No repair during any migration in container + / + rv = 1; + break; + } + } + return rv; +} +static unsigned long long imsm_component_size_aligment_check(int level, + int chunk_size, + unsigned long long component_size) +{ + unsigned int component_size_alligment; + + / check component size aligment + / + component_size_alligment = component_size % (chunk_size/512); + + dprintf("imsm_component_size_aligment_check(Level: %i, " + "chunk_size = %i, component_size = %llu), " + "component_size_alligment = %u\n", + level, chunk_size, component_size, + component_size_alligment); + + if (component_size_alligment && (level != 1) && (level != UnSet)) { + dprintf("imsm: reported component size alligned from %llu ", + component_size); + component_size -= component_size_alligment; + dprintf("to %llu (%i).\n", + component_size, component_size_alligment); + } + + return component_size; +} + static void getinfo_super_imsm_volume(struct supertype st, struct mdinfo info, char dmap) { struct intel_super super = st->sb; struct migr_record migr_rec = super->migr_rec; struct imsm_dev dev = get_imsm_dev(super, super->current_vol); - struct imsm_map map = get_imsm_map(dev, 0); - struct imsm_map prev_map = get_imsm_map(dev, 1); + struct imsm_map map = get_imsm_map(dev, MAP_0); + struct imsm_map prev_map = get_imsm_map(dev, MAP_1); struct imsm_map map_to_analyse = map; struct dl dl; char devname; - unsigned int component_size_alligment; int map_disks = info->array.raid_disks; memset(info, 0, sizeof(info)); if (prev_map) map_to_analyse = prev_map; - dl = super->disks; + dl = super->current_disk; info->container_member = super->current_vol; info->array.raid_disks = map->num_members; @@ -2216,7 +2514,9 @@ info->custom_array_size = __le32_to_cpu(dev->size_high); info->custom_array_size <<= 32; info->custom_array_size \|= __le32_to_cpu(dev->size_low); - if (prev_map && map->map_state == prev_map->map_state) { + info->recovery_blocked = imsm_reshape_blocks_arrays_changes(st->sb); + + if (is_gen_migration(dev)) { info->reshape_active = 1; info->new_level = get_imsm_raid_level(map); info->new_layout = imsm_level_to_layout(info->new_level); @@ -2226,7 +2526,7 @@ / this needs to be applied to every array * in the container. / - info->reshape_active = 2; + info->reshape_active = CONTAINER_RESHAPE; } / We shape information that we give to md might have to be * modify to cope with md's requirement for reshaping arrays. @@ -2263,38 +2563,31 @@ info->new_chunk = info->array.chunk_size; info->delta_disks = 0; } - info->disk.major = 0; - info->disk.minor = 0; + if (dl) { info->disk.major = dl->major; info->disk.minor = dl->minor; + info->disk.number = dl->index; + info->disk.raid_disk = get_imsm_disk_slot(map_to_analyse, + dl->index); } - info->data_offset = __le32_to_cpu(map_to_analyse->pba_of_lba0); - info->component_size = - __le32_to_cpu(map_to_analyse->blocks_per_member); - - /* check component size aligment - / - component_size_alligment = - info->component_size % (info->array.chunk_size/512); - - if (component_size_alligment && - (info->array.level != 1) && (info->array.level != UnSet)) { - dprintf("imsm: reported component size alligned from %llu ", - info->component_size); - info->component_size -= component_size_alligment; - dprintf("to %llu (%i).\n", - info->component_size, component_size_alligment); - } + info->data_offset = pba_of_lba0(map_to_analyse); + info->component_size = blocks_per_member(map_to_analyse); + + info->component_size = imsm_component_size_aligment_check( + info->array.level, + info->array.chunk_size, + info->component_size); memset(info->uuid, 0, sizeof(info->uuid)); info->recovery_start = MaxSector; info->reshape_progress = 0; info->resync_start = MaxSector; - if (map_to_analyse->map_state == IMSM_T_STATE_UNINITIALIZED \|\| - dev->vol.dirty) { + if ((map_to_analyse->map_state == IMSM_T_STATE_UNINITIALIZED \|\| + dev->vol.dirty) && + imsm_reshape_blocks_arrays_changes(super) == 0) { info->resync_start = 0; } if (dev->vol.migr_state) { @@ -2326,11 +2619,13 @@ dprintf("IMSM: General Migration checkpoint : %llu " "(%llu) -> read reshape progress : %llu\n", - units, blocks_per_unit, info->reshape_progress); + (unsigned long long)units, + (unsigned long long)blocks_per_unit, + info->reshape_progress); - used_disks = imsm_num_data_members(dev, 1); + used_disks = imsm_num_data_members(dev, MAP_1); if (used_disks > 0) { - array_blocks = map->blocks_per_member + array_blocks = blocks_per_member(map) * used_disks; /* round array size down to closest MB / @@ -2373,7 +2668,7 @@ dmap[i] = 0; if (i < info->array.raid_disks) { struct imsm_disk dsk; - j = get_imsm_disk_idx(dev, i, -1); + j = get_imsm_disk_idx(dev, i, MAP_X); dsk = get_imsm_disk(super, j); if (dsk && (dsk->status & CONFIGURED_DISK)) dmap[i] = 1; @@ -2382,8 +2677,30 @@ } } -static __u8 imsm_check_degraded(struct intel_super super, struct imsm_dev dev, int failed); -static int imsm_count_failed(struct intel_super super, struct imsm_dev dev); +static __u8 imsm_check_degraded(struct intel_super super, struct imsm_dev dev, + int failed, int look_in_map); + +static int imsm_count_failed(struct intel_super super, struct imsm_dev dev, + int look_in_map); + + +#ifndef MDASSEMBLE +static void manage_second_map(struct intel_super super, struct imsm_dev dev) +{ + if (is_gen_migration(dev)) { + int failed; + __u8 map_state; + struct imsm_map map2 = get_imsm_map(dev, MAP_1); + + failed = imsm_count_failed(super, dev, MAP_1); + map_state = imsm_check_degraded(super, dev, failed, MAP_1); + if (map2->map_state != map_state) { + map2->map_state = map_state; + super->updates_pending++; + } + } +} +#endif static struct imsm_disk get_imsm_missing(struct intel_super super, __u8 index) { @@ -2433,6 +2750,7 @@ info->disk.state = 0; info->name[0] = 0; info->recovery_start = MaxSector; + info->recovery_blocked = imsm_reshape_blocks_arrays_changes(st->sb); / do we have the all the insync disks that we expect? / mpb = super->anchor; @@ -2443,15 +2761,15 @@ struct imsm_map map; __u8 state; - failed = imsm_count_failed(super, dev); - state = imsm_check_degraded(super, dev, failed); - map = get_imsm_map(dev, dev->vol.migr_state); + failed = imsm_count_failed(super, dev, MAP_0); + state = imsm_check_degraded(super, dev, failed, MAP_0); + map = get_imsm_map(dev, MAP_0); /* any newly missing disks? * (catches single-degraded vs double-degraded) / for (j = 0; j < map->num_members; j++) { - __u32 ord = get_imsm_ord_tbl_ent(dev, i, -1); + __u32 ord = get_imsm_ord_tbl_ent(dev, j, MAP_0); __u32 idx = ord_to_idx(ord); if (!(ord & IMSM_ORD_REBUILD) && @@ -2468,7 +2786,17 @@ enough = 0; else / we're normal, or already degraded / enough = 1; - + if (is_gen_migration(dev) && missing) { + / during general migration we need all disks + * that process is running on. + * No new missing disk is allowed. + / + max_enough = -1; + enough = -1; + / no more checks necessary + / + break; + } / in the missing/failed disk case check to see * if at least one array is runnable / @@ -2481,7 +2809,7 @@ __u32 reserved = imsm_reserved_sectors(super, super->disks); disk = &super->disks->disk; - info->data_offset = __le32_to_cpu(disk->total_blocks) - reserved; + info->data_offset = total_blocks(&super->disks->disk) - reserved; info->component_size = reserved; info->disk.state = is_configured(disk) ? (1 << MD_DISK_ACTIVE) : 0; / we don't change info->disk.raid_disk here because @@ -2592,25 +2920,30 @@ mpb = super->anchor; - if (strcmp(update, "uuid") == 0 && uuid_set && !info->update_private) - rv = -1; - else if (strcmp(update, "uuid") == 0 && uuid_set && info->update_private) { - mpb->orig_family_num = ((__u32 ) info->update_private); - rv = 0; - } else if (strcmp(update, "uuid") == 0) { - __u32 new_family = malloc(sizeof(new_family)); - - /* update orig_family_number with the incoming random - * data, report the new effective uuid, and store the - * new orig_family_num for future updates. - / - if (new_family) { - memcpy(&mpb->orig_family_num, info->uuid, sizeof(__u32)); - uuid_from_super_imsm(st, info->uuid); - new_family = mpb->orig_family_num; - info->update_private = new_family; + if (strcmp(update, "uuid") == 0) { + /* We take this to mean that the family_num should be updated. + * However that is much smaller than the uuid so we cannot really + * allow an explicit uuid to be given. And it is hard to reliably + * know if one was. + * So if !uuid_set we know the current uuid is random and just used + * the first 'int' and copy it to the other 3 positions. + * Otherwise we require the 4 'int's to be the same as would be the + * case if we are using a random uuid. So an explicit uuid will be + * accepted as long as all for ints are the same... which shouldn't hurt + / + if (!uuid_set) { + info->uuid[1] = info->uuid[2] = info->uuid[3] = info->uuid[0]; rv = 0; + } else { + if (info->uuid[0] != info->uuid[1] \|\| + info->uuid[1] != info->uuid[2] \|\| + info->uuid[2] != info->uuid[3]) + rv = -1; + else + rv = 0; } + if (rv == 0) + mpb->orig_family_num = info->uuid[0]; } else if (strcmp(update, "assemble") == 0) rv = 0; else @@ -2778,14 +3111,16 @@ sprintf(path, "/sys/dev/block/%d:%d", major(st.st_rdev), minor(st.st_rdev)); - rv = readlink(path, dname, sizeof(dname)); + rv = readlink(path, dname, sizeof(dname)-1); if (rv <= 0) return; - + dname[rv] = '\0'; nm = strrchr(dname, '/'); - nm++; - snprintf(name, MAX_RAID_SERIAL_LEN, "/dev/%s", nm); + if (nm) { + nm++; + snprintf(name, MAX_RAID_SERIAL_LEN, "/dev/%s", nm); + } } extern int scsi_get_serial(int fd, void buf, size_t buf_len); @@ -2870,7 +3205,6 @@ strncpy((char ) dest, (char ) src, MAX_RAID_SERIAL_LEN); } -#ifndef MDASSEMBLE static struct dl serial_to_dl(__u8 serial, struct intel_super super) { struct dl dl; @@ -2881,7 +3215,6 @@ return dl; } -#endif static struct imsm_disk * __serial_to_disk(__u8 serial, struct imsm_super mpb, int idx) @@ -2978,12 +3311,12 @@ __u8 to_state, int migr_type) { struct imsm_map dest; - struct imsm_map src = get_imsm_map(dev, 0); + struct imsm_map src = get_imsm_map(dev, MAP_0); dev->vol.migr_state = 1; set_migr_type(dev, migr_type); dev->vol.curr_migr_unit = 0; - dest = get_imsm_map(dev, 1); + dest = get_imsm_map(dev, MAP_1); /* duplicate and then set the target end state in map[0] / memcpy(dest, src, sizeof_imsm_map(src)); @@ -3005,10 +3338,12 @@ src->map_state = to_state; } -static void end_migration(struct imsm_dev dev, __u8 map_state) +static void end_migration(struct imsm_dev dev, struct intel_super super, + __u8 map_state) { - struct imsm_map map = get_imsm_map(dev, 0); - struct imsm_map prev = get_imsm_map(dev, dev->vol.migr_state); + struct imsm_map map = get_imsm_map(dev, MAP_0); + struct imsm_map prev = get_imsm_map(dev, dev->vol.migr_state == 0 ? + MAP_0 : MAP_1); int i, j; /* merge any IMSM_ORD_REBUILD bits that were not successfully @@ -3016,19 +3351,31 @@ * * FIXME add support for raid-level-migration / - for (i = 0; i < prev->num_members; i++) - for (j = 0; j < map->num_members; j++) - / during online capacity expansion - * disks position can be changed if takeover is used - / - if (ord_to_idx(map->disk_ord_tbl[j]) == - ord_to_idx(prev->disk_ord_tbl[i])) { - map->disk_ord_tbl[j] \|= prev->disk_ord_tbl[i]; - break; - } + if ((map_state != map->map_state) && (is_gen_migration(dev) == 0) && + (prev->map_state != IMSM_T_STATE_UNINITIALIZED)) { + / when final map state is other than expected + * merge maps (not for migration) + / + int failed; + + for (i = 0; i < prev->num_members; i++) + for (j = 0; j < map->num_members; j++) + / during online capacity expansion + * disks position can be changed + * if takeover is used + / + if (ord_to_idx(map->disk_ord_tbl[j]) == + ord_to_idx(prev->disk_ord_tbl[i])) { + map->disk_ord_tbl[j] \|= + prev->disk_ord_tbl[i]; + break; + } + failed = imsm_count_failed(super, dev, MAP_0); + map_state = imsm_check_degraded(super, dev, failed, MAP_0); + } dev->vol.migr_state = 0; - dev->vol.migr_type = 0; + set_migr_type(dev, 0); dev->vol.curr_migr_unit = 0; map->map_state = map_state; } @@ -3051,7 +3398,7 @@ len_migr = sizeof_imsm_dev(dev_iter, 1); if (len_migr > len) space_needed += len_migr - len; - + dv = malloc(sizeof(dv)); if (!dv) return 1; @@ -3087,7 +3434,7 @@ super->buf = buf; super->len = len; } - + return 0; } @@ -3128,9 +3475,9 @@ dev_iter->vol.migr_state == 1 && dev_iter->vol.migr_type == MIGR_GEN_MIGR) { /* This device is migrating / - map0 = get_imsm_map(dev_iter, 0); - map1 = get_imsm_map(dev_iter, 1); - if (map0->pba_of_lba0 != map1->pba_of_lba0) + map0 = get_imsm_map(dev_iter, MAP_0); + map1 = get_imsm_map(dev_iter, MAP_1); + if (pba_of_lba0(map0) != pba_of_lba0(map1)) / migration optimization area was used / return -1; if (migr_rec->ascending_migr == 0 @@ -3215,12 +3562,13 @@ sectors = mpb_sectors(anchor) - 1; free(anchor); - if (posix_memalign(&super->migr_rec_buf, 512, 512) != 0) { + if (posix_memalign(&super->migr_rec_buf, 512, MIGR_REC_BUF_SIZE) != 0) { fprintf(stderr, Name ": %s could not allocate migr_rec buffer\n", __func__); free(super->buf); return 2; } + super->clean_migration_record_by_mdmon = 0; if (!sectors) { check_sum = __gen_imsm_checksum(super->anchor); @@ -3275,6 +3623,32 @@ static int read_imsm_migr_rec(int fd, struct intel_super super); +/* clears hi bits in metadata if MPB_ATTRIB_2TB_DISK not set / +static void clear_hi(struct intel_super super) +{ + struct imsm_super mpb = super->anchor; + int i, n; + if (mpb->attributes & MPB_ATTRIB_2TB_DISK) + return; + for (i = 0; i < mpb->num_disks; ++i) { + struct imsm_disk disk = &mpb->disk[i]; + disk->total_blocks_hi = 0; + } + for (i = 0; i < mpb->num_raid_devs; ++i) { + struct imsm_dev dev = get_imsm_dev(super, i); + if (!dev) + return; + for (n = 0; n < 2; ++n) { + struct imsm_map map = get_imsm_map(dev, n); + if (!map) + continue; + map->pba_of_lba0_hi = 0; + map->blocks_per_member_hi = 0; + map->num_data_stripes_hi = 0; + } + } +} + static int load_and_parse_mpb(int fd, struct intel_super super, char devname, int keep_fd) { @@ -3287,7 +3661,7 @@ if (err) return err; err = parse_raid_devices(super); - + clear_hi(super); return err; } @@ -3378,7 +3752,7 @@ if (super) { memset(super, 0, sizeof(super)); super->current_vol = -1; - super->create_offset = ~((__u32 ) 0); + super->create_offset = ~((unsigned long long) 0); } return super; } @@ -3439,7 +3813,6 @@ return 0; } -#ifndef MDASSEMBLE / find_missing - helper routine for load_super_imsm_all that identifies * disks that have disappeared from the system. This routine relies on * the mpb being uptodate, which it is at load time. @@ -3475,6 +3848,7 @@ return 0; } +#ifndef MDASSEMBLE static struct intel_disk disk_list_get(__u8 serial, struct intel_disk disk_list) { struct intel_disk idisk = disk_list; @@ -3734,6 +4108,8 @@ if (s == champion) continue; + mpb->attributes \|= s->anchor->attributes & MPB_ATTRIB_2TB_DISK; + for (i = 0; i < mpb->num_disks; i++) { struct imsm_disk disk; @@ -3791,67 +4167,33 @@ return champion; } + +static int +get_sra_super_block(int fd, struct intel_super super_list, char devname, int max, int keep_fd); +static int get_super_block(struct intel_super super_list, int devnum, char devname, + int major, int minor, int keep_fd); +static int +get_devlist_super_block(struct md_list devlist, struct intel_super super_list, + int max, int keep_fd); + + static int load_super_imsm_all(struct supertype st, int fd, void sbp, - char devname) + char devname, struct md_list devlist, + int keep_fd) { - struct mdinfo sra; struct intel_super super_list = NULL; struct intel_super super = NULL; - int devnum = fd2devnum(fd); - struct mdinfo sd; - int retry; int err = 0; - int i; - - /* check if 'fd' an opened container / - sra = sysfs_read(fd, 0, GET_LEVEL\|GET_VERSION\|GET_DEVS\|GET_STATE); - if (!sra) - return 1; + int i = 0; - if (sra->array.major_version != -1 \|\| - sra->array.minor_version != -2 \|\| - strcmp(sra->text_version, "imsm") != 0) { - err = 1; + if (fd >= 0) + / 'fd' is an opened container / + err = get_sra_super_block(fd, &super_list, devname, &i, keep_fd); + else + / get super block from devlist devices / + err = get_devlist_super_block(devlist, &super_list, &i, keep_fd); + if (err) goto error; - } - / load all mpbs / - for (sd = sra->devs, i = 0; sd; sd = sd->next, i++) { - struct intel_super s = alloc_super(); - char nm[32]; - int dfd; - int rv; - - err = 1; - if (!s) - goto error; - s->next = super_list; - super_list = s; - - err = 2; - sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor); - dfd = dev_open(nm, O_RDWR); - if (dfd < 0) - goto error; - - rv = find_intel_hba_capability(dfd, s, devname); - /* no orom/efi or non-intel hba of the disk / - if (rv != 0) - goto error; - - err = load_and_parse_mpb(dfd, s, NULL, 1); - - / retry the load if we might have raced against mdmon / - if (err == 3 && mdmon_running(devnum)) - for (retry = 0; retry < 3; retry++) { - usleep(3000); - err = load_and_parse_mpb(dfd, s, NULL, 1); - if (err != 3) - break; - } - if (err) - goto error; - } - / all mpbs enter, maybe one leaves / super = imsm_thunderdome(&super_list, i); if (!super) { @@ -3867,13 +4209,16 @@ / load migration record / err = load_imsm_migr_rec(super, NULL); - if (err) { + if (err == -1) { + / migration is in progress, + * but migr_rec cannot be loaded, + / err = 4; goto error; } / Check migration compatibility / - if (check_mpb_migr_compatibility(super) != 0) { + if ((err == 0) && (check_mpb_migr_compatibility(super) != 0)) { fprintf(stderr, Name ": Unsupported migration detected"); if (devname) fprintf(stderr, " on %s\n", devname); @@ -3893,13 +4238,16 @@ super_list = super_list->next; free_imsm(s); } - sysfs_free(sra); + if (err) return err; sbp = super; - st->container_dev = devnum; + if (fd >= 0) + st->container_dev = fd2devnum(fd); + else + st->container_dev = NoMdDev; if (err == 0 && st->ss == NULL) { st->ss = &super_imsm; st->minor_version = 0; @@ -3908,9 +4256,148 @@ return 0; } + +static int +get_devlist_super_block(struct md_list devlist, struct intel_super super_list, + int max, int keep_fd) +{ + struct md_list tmpdev; + int err = 0; + int i = 0; + + for (i = 0, tmpdev = devlist; tmpdev; tmpdev = tmpdev->next) { + if (tmpdev->used != 1) + continue; + if (tmpdev->container == 1) { + int lmax = 0; + int fd = dev_open(tmpdev->devname, O_RDONLY\|O_EXCL); + if (fd < 0) { + fprintf(stderr, Name ": cannot open device %s: %s\n", + tmpdev->devname, strerror(errno)); + err = 8; + goto error; + } + err = get_sra_super_block(fd, super_list, + tmpdev->devname, &lmax, + keep_fd); + i += lmax; + close(fd); + if (err) { + err = 7; + goto error; + } + } else { + int major = major(tmpdev->st_rdev); + int minor = minor(tmpdev->st_rdev); + err = get_super_block(super_list, + -1, + tmpdev->devname, + major, minor, + keep_fd); + i++; + if (err) { + err = 6; + goto error; + } + } + } + error: + max = i; + return err; +} + +static int get_super_block(struct intel_super *super_list, int devnum, char devname, + int major, int minor, int keep_fd) +{ + struct intel_supers = NULL; + char nm[32]; + int dfd = -1; + int rv; + int err = 0; + int retry; + + s = alloc_super(); + if (!s) { + err = 1; + goto error; + } + + sprintf(nm, "%d:%d", major, minor); + dfd = dev_open(nm, O_RDWR); + if (dfd < 0) { + err = 2; + goto error; + } + + rv = find_intel_hba_capability(dfd, s, devname); + / no orom/efi or non-intel hba of the disk / + if (rv != 0) { + err = 4; + goto error; + } + + err = load_and_parse_mpb(dfd, s, NULL, keep_fd); + + / retry the load if we might have raced against mdmon / + if (err == 3 && (devnum != -1) && mdmon_running(devnum)) + for (retry = 0; retry < 3; retry++) { + usleep(3000); + err = load_and_parse_mpb(dfd, s, NULL, keep_fd); + if (err != 3) + break; + } + error: + if (!err) { + s->next = super_list; + super_list = s; + } else { + if (s) + free(s); + if (dfd >= 0) + close(dfd); + } + if ((dfd >= 0) && (!keep_fd)) + close(dfd); + return err; + +} + +static int +get_sra_super_block(int fd, struct intel_super super_list, char devname, int max, int keep_fd) +{ + struct mdinfo sra; + int devnum; + struct mdinfo sd; + int err = 0; + int i = 0; + sra = sysfs_read(fd, 0, GET_LEVEL\|GET_VERSION\|GET_DEVS\|GET_STATE); + if (!sra) + return 1; + + if (sra->array.major_version != -1 \|\| + sra->array.minor_version != -2 \|\| + strcmp(sra->text_version, "imsm") != 0) { + err = 1; + goto error; + } + / load all mpbs / + devnum = fd2devnum(fd); + for (sd = sra->devs, i = 0; sd; sd = sd->next, i++) { + if (get_super_block(super_list, devnum, devname, + sd->disk.major, sd->disk.minor, keep_fd) != 0) { + err = 7; + goto error; + } + } + error: + sysfs_free(sra); + max = i; + return err; +} + static int load_container_imsm(struct supertype st, int fd, char devname) { - return load_super_imsm_all(st, fd, &st->sb, devname); + return load_super_imsm_all(st, fd, &st->sb, devname, NULL, 1); } #endif @@ -3987,22 +4474,13 @@ return info->chunk_size >> 9; } -static __u32 info_to_num_data_stripes(mdu_array_info_t info, int num_domains) -{ - __u32 num_stripes; - - num_stripes = (info->size 2) / info_to_blocks_per_strip(info); - num_stripes /= num_domains; - - return num_stripes; -} - -static __u32 info_to_blocks_per_member(mdu_array_info_t info) +static unsigned long long info_to_blocks_per_member(mdu_array_info_t info, + unsigned long long size) { if (info->level == 1) - return info->size * 2; + return size * 2; else - return (info->size * 2) & ~(info_to_blocks_per_strip(info) - 1); + return (size * 2) & ~(info_to_blocks_per_strip(info) - 1); } static void imsm_update_version_info(struct intel_super super) @@ -4016,7 +4494,7 @@ for (i = 0; i < mpb->num_raid_devs; i++) { dev = get_imsm_dev(super, i); - map = get_imsm_map(dev, 0); + map = get_imsm_map(dev, MAP_0); if (__le32_to_cpu(dev->size_high) > 0) mpb->attributes \|= MPB_ATTRIB_2TB; @@ -4093,7 +4571,7 @@ int i; unsigned long long array_blocks; size_t size_old, size_new; - __u32 num_data_stripes; + unsigned long long num_data_stripes; if (super->orom && mpb->num_raid_devs >= super->orom->vpa) { fprintf(stderr, Name": This imsm-container already has the " @@ -4112,12 +4590,14 @@ fprintf(stderr, Name": could not allocate new mpb\n"); return 0; } - if (posix_memalign(&super->migr_rec_buf, 512, 512) != 0) { + if (posix_memalign(&super->migr_rec_buf, 512, + MIGR_REC_BUF_SIZE) != 0) { fprintf(stderr, Name ": %s could not allocate migr_rec buffer\n", __func__); free(super->buf); free(super); + free(mpb_new); return 0; } memcpy(mpb_new, mpb, size_old); @@ -4128,12 +4608,40 @@ memset(mpb_new + size_old, 0, size_round - size_old); } super->current_vol = idx; - / when creating the first raid device in this container set num_disks - * to zero, i.e. delete this spare and add raid member devices in - * add_to_super_imsm_volume() + + /* handle 'failed_disks' by either: + * a) create dummy disk entries in the table if this the first + * volume in the array. We add them here as this is the only + * opportunity to add them. add_to_super_imsm_volume() + * handles the non-failed disks and continues incrementing + * mpb->num_disks. + * b) validate that 'failed_disks' matches the current number + * of missing disks if the container is populated / - if (super->current_vol == 0) + if (super->current_vol == 0) { mpb->num_disks = 0; + for (i = 0; i < info->failed_disks; i++) { + struct imsm_disk disk; + + mpb->num_disks++; + disk = __get_imsm_disk(mpb, i); + disk->status = CONFIGURED_DISK \| FAILED_DISK; + disk->scsi_id = __cpu_to_le32(~(__u32)0); + snprintf((char ) disk->serial, MAX_RAID_SERIAL_LEN, + "missing:%d", i); + } + find_missing(super); + } else { + int missing = 0; + struct dl d; + + for (d = super->missing; d; d = d->next) + missing++; + if (info->failed_disks > missing) { + fprintf(stderr, Name": unable to add 'missing' disk to container\n"); + return 0; + } + } if (!check_name(super, name, 0)) return 0; @@ -4150,12 +4658,9 @@ } strncpy((char ) dev->volume, name, MAX_RAID_SERIAL_LEN); - if (info->level == 1) - array_blocks = info_to_blocks_per_member(info); - else - array_blocks = calc_array_size(info->level, info->raid_disks, + array_blocks = calc_array_size(info->level, info->raid_disks, info->layout, info->chunk_size, - info->size2); + size * 2); /* round array size down to closest MB / array_blocks = (array_blocks >> SECT_PER_MB_SHIFT) << SECT_PER_MB_SHIFT; @@ -4165,15 +4670,18 @@ vol = &dev->vol; vol->migr_state = 0; set_migr_type(dev, MIGR_INIT); - vol->dirty = 0; + vol->dirty = !info->state; vol->curr_migr_unit = 0; - map = get_imsm_map(dev, 0); - map->pba_of_lba0 = __cpu_to_le32(super->create_offset); - map->blocks_per_member = __cpu_to_le32(info_to_blocks_per_member(info)); + map = get_imsm_map(dev, MAP_0); + set_pba_of_lba0(map, super->create_offset); + set_blocks_per_member(map, info_to_blocks_per_member(info, size)); map->blocks_per_strip = __cpu_to_le16(info_to_blocks_per_strip(info)); map->failed_disk_num = ~0; - map->map_state = info->level ? IMSM_T_STATE_UNINITIALIZED : - IMSM_T_STATE_NORMAL; + if (info->level > 0) + map->map_state = IMSM_T_STATE_UNINITIALIZED; + else + map->map_state = info->failed_disks ? IMSM_T_STATE_FAILED : + IMSM_T_STATE_NORMAL; map->ddf = 1; if (info->level == 1 && info->raid_disks > 2) { @@ -4193,8 +4701,10 @@ else map->num_domains = 1; - num_data_stripes = info_to_num_data_stripes(info, map->num_domains); - map->num_data_stripes = __cpu_to_le32(num_data_stripes); + / info->size is only int so use the 'size' parameter instead / + num_data_stripes = (size 2) / info_to_blocks_per_strip(info); + num_data_stripes /= map->num_domains; + set_num_data_stripes(map, num_data_stripes); map->num_members = info->raid_disks; for (i = 0; i < map->num_members; i++) { @@ -4248,7 +4758,7 @@ ": %s could not allocate superblock\n", __func__); return 0; } - if (posix_memalign(&super->migr_rec_buf, 512, 512) != 0) { + if (posix_memalign(&super->migr_rec_buf, 512, MIGR_REC_BUF_SIZE) != 0) { fprintf(stderr, Name ": %s could not allocate migr_rec buffer\n", __func__); free(super->buf); @@ -4281,13 +4791,14 @@ { struct intel_super super = st->sb; struct imsm_super mpb = super->anchor; - struct dl dl; + struct imsm_disk _disk; struct imsm_dev dev; struct imsm_map map; + struct dl dl, df; int slot; dev = get_imsm_dev(super, super->current_vol); - map = get_imsm_map(dev, 0); + map = get_imsm_map(dev, MAP_0); if (! (dk->state & (1<<MD_DISK_SYNC))) { fprintf(stderr, Name ": %s: Cannot add spare devices to IMSM volume\n", @@ -4322,20 +4833,62 @@ /* Check the device has not already been added / slot = get_imsm_disk_slot(map, dl->index); if (slot >= 0 && - (get_imsm_ord_tbl_ent(dev, slot, -1) & IMSM_ORD_REBUILD) == 0) { + (get_imsm_ord_tbl_ent(dev, slot, MAP_X) & IMSM_ORD_REBUILD) == 0) { fprintf(stderr, Name ": %s has been included in this array twice\n", devname); return 1; } - set_imsm_ord_tbl_ent(map, dk->number, dl->index); + set_imsm_ord_tbl_ent(map, dk->raid_disk, dl->index); dl->disk.status = CONFIGURED_DISK; + / update size of 'missing' disks to be at least as large as the + * largest acitve member (we only have dummy missing disks when + * creating the first volume) + / + if (super->current_vol == 0) { + for (df = super->missing; df; df = df->next) { + if (total_blocks(&dl->disk) > total_blocks(&df->disk)) + set_total_blocks(&df->disk, total_blocks(&dl->disk)); + _disk = __get_imsm_disk(mpb, df->index); + _disk = df->disk; + } + } + + /* refresh unset/failed slots to point to valid 'missing' entries / + for (df = super->missing; df; df = df->next) + for (slot = 0; slot < mpb->num_disks; slot++) { + __u32 ord = get_imsm_ord_tbl_ent(dev, slot, MAP_X); + + if ((ord & IMSM_ORD_REBUILD) == 0) + continue; + set_imsm_ord_tbl_ent(map, slot, df->index \| IMSM_ORD_REBUILD); + if (is_gen_migration(dev)) { + struct imsm_map map2 = get_imsm_map(dev, + MAP_1); + int slot2 = get_imsm_disk_slot(map2, df->index); + if ((slot2 < map2->num_members) && + (slot2 >= 0)) { + __u32 ord2 = get_imsm_ord_tbl_ent(dev, + slot2, + MAP_1); + if ((unsigned)df->index == + ord_to_idx(ord2)) + set_imsm_ord_tbl_ent(map2, + slot2, + df->index \| + IMSM_ORD_REBUILD); + } + } + dprintf("set slot:%d to missing disk:%d\n", slot, df->index); + break; + } + /* if we are creating the first raid device update the family number / if (super->current_vol == 0) { __u32 sum; struct imsm_dev _dev = __get_imsm_dev(mpb, 0); - struct imsm_disk _disk = __get_imsm_disk(mpb, dl->index); + _disk = __get_imsm_disk(mpb, dl->index); if (!_dev \|\| !_disk) { fprintf(stderr, Name ": BUG mpb setup error\n"); return 1; @@ -4347,10 +4900,41 @@ mpb->family_num = __cpu_to_le32(sum); mpb->orig_family_num = mpb->family_num; } - + super->current_disk = dl; return 0; } +/ mark_spare() + * Function marks disk as spare and restores disk serial + * in case it was previously marked as failed by takeover operation + * reruns: + * -1 : critical error + * 0 : disk is marked as spare but serial is not set + * 1 : success + / +int mark_spare(struct dl disk) +{ + __u8 serial[MAX_RAID_SERIAL_LEN]; + int ret_val = -1; + + if (!disk) + return ret_val; + + ret_val = 0; + if (!imsm_read_serial(disk->fd, NULL, serial)) { + /* Restore disk serial number, because takeover marks disk + * as failed and adds to serial ':0' before it becomes + * a spare disk. + / + serialcpy(disk->serial, serial); + serialcpy(disk->disk.serial, serial); + ret_val = 1; + } + disk->disk.status = SPARE_DISK; + disk->index = -1; + + return ret_val; +} static int add_to_super_imsm(struct supertype st, mdu_disk_info_t dk, int fd, char devname) @@ -4388,7 +4972,6 @@ memset(dd, 0, sizeof(dd)); dd->major = major(stb.st_rdev); dd->minor = minor(stb.st_rdev); - dd->index = -1; dd->devname = devname ? strdup(devname) : NULL; dd->fd = fd; dd->e = NULL; @@ -4404,8 +4987,12 @@ get_dev_size(fd, NULL, &size); size /= 512; serialcpy(dd->disk.serial, dd->serial); - dd->disk.total_blocks = __cpu_to_le32(size); - dd->disk.status = SPARE_DISK; + set_total_blocks(&dd->disk, size); + if (__le32_to_cpu(dd->disk.total_blocks_hi) > 0) { + struct imsm_super mpb = super->anchor; + mpb->attributes \|= MPB_ATTRIB_2TB_DISK; + } + mark_spare(dd); if (sysfs_disk_to_scsi_id(fd, &id) == 0) dd->disk.scsi_id = __cpu_to_le32(id); else @@ -4448,9 +5035,8 @@ memset(dd, 0, sizeof(dd)); dd->major = dk->major; dd->minor = dk->minor; - dd->index = -1; dd->fd = -1; - dd->disk.status = SPARE_DISK; + mark_spare(dd); dd->action = DISK_REMOVE; dd->next = super->disk_mgmt_list; @@ -4493,6 +5079,9 @@ continue; spare->disk[0] = d->disk; + if (__le32_to_cpu(d->disk.total_blocks_hi) > 0) + spare->attributes \|= MPB_ATTRIB_2TB_DISK; + sum = __gen_imsm_checksum(spare); spare->family_num = __cpu_to_le32(sum); spare->orig_family_num = 0; @@ -4569,25 +5158,35 @@ sum = __gen_imsm_checksum(mpb); mpb->check_sum = __cpu_to_le32(sum); + if (super->clean_migration_record_by_mdmon) { + clear_migration_record = 1; + super->clean_migration_record_by_mdmon = 0; + } if (clear_migration_record) - memset(super->migr_rec_buf, 0, 512); + memset(super->migr_rec_buf, 0, MIGR_REC_BUF_SIZE); / write the mpb for disks that compose raid devices / for (d = super->disks; d ; d = d->next) { - if (d->index < 0) + if (d->index < 0 \|\| is_failed(&d->disk)) continue; - if (store_imsm_mpb(d->fd, mpb)) - fprintf(stderr, "%s: failed for device %d:%d %s\n", - __func__, d->major, d->minor, strerror(errno)); + if (clear_migration_record) { unsigned long long dsize; get_dev_size(d->fd, NULL, &dsize); if (lseek64(d->fd, dsize - 512, SEEK_SET) >= 0) { - if (write(d->fd, super->migr_rec_buf, 512) != 512) + if (write(d->fd, super->migr_rec_buf, + MIGR_REC_BUF_SIZE) != MIGR_REC_BUF_SIZE) perror("Write migr_rec failed"); } } + + if (store_imsm_mpb(d->fd, mpb)) + fprintf(stderr, + "%s: failed for device %d:%d (fd: %d)%s\n", + __func__, d->major, d->minor, + d->fd, strerror(errno)); + if (doclose) { close(d->fd); d->fd = -1; @@ -4607,7 +5206,7 @@ struct imsm_update_create_array u; struct intel_super super = st->sb; struct imsm_dev dev = get_imsm_dev(super, dev_idx); - struct imsm_map map = get_imsm_map(dev, 0); + struct imsm_map map = get_imsm_map(dev, MAP_0); struct disk_info inf; struct imsm_disk disk; int i; @@ -4626,7 +5225,7 @@ imsm_copy_dev(&u->dev, dev); inf = get_disk_info(u); for (i = 0; i < map->num_members; i++) { - int idx = get_imsm_disk_idx(dev, i, -1); + int idx = get_imsm_disk_idx(dev, i, MAP_X); disk = get_imsm_disk(super, idx); serialcpy(inf[i].serial, disk->serial); @@ -4766,14 +5365,22 @@ return 0; } close(fd); - if (super->orom && raiddisks > super->orom->tds) { - if (verbose) - fprintf(stderr, Name ": %d exceeds maximum number of" - " platform supported disks: %d\n", - raiddisks, super->orom->tds); - - free_imsm(super); - return 0; + if (super->orom) { + if (raiddisks > super->orom->tds) { + if (verbose) + fprintf(stderr, Name ": %d exceeds maximum number of" + " platform supported disks: %d\n", + raiddisks, super->orom->tds); + free_imsm(super); + return 0; + } + if ((super->orom->attr & IMSM_OROM_ATTR_2TB_DISK) == 0 && + (ldsize >> 9) >> 32 > 0) { + if (verbose) + fprintf(stderr, Name ": %s exceeds maximum platform supported size\n", dev); + free_imsm(super); + return 0; + } } freesize = avail_size_imsm(st, ldsize >> 9); @@ -4882,7 +5489,7 @@ if (maxsize < reserve) return 0; - super->create_offset = ~((__u32) 0); + super->create_offset = ~((unsigned long long) 0); if (start + reserve > super->create_offset) return 0; / start overflows create_offset / super->create_offset = start + reserve; @@ -4913,42 +5520,414 @@ } -#define pr_vrb(fmt, arg...) (void) (verbose && fprintf(stderr, Name fmt, ##arg)) -/ - * validate volume parameters with OROM/EFI capabilities - / static int -validate_geometry_imsm_orom(struct intel_super super, int level, int layout, - int raiddisks, int chunk, int verbose) +active_arrays_by_format(char name, char* hba, struct md_list *devlist, + int dpa, int verbose) { -#if DEBUG - verbose = 1; + struct mdstat_ent mdstat = mdstat_read(0, 0); + struct mdstat_ent memb = NULL; + int count = 0; + int num = 0; + struct md_list dv = NULL; + int found; + + for (memb = mdstat ; memb ; memb = memb->next) { + if (memb->metadata_version && + (strncmp(memb->metadata_version, "external:", 9) == 0) && + (strcmp(&memb->metadata_version[9], name) == 0) && + !is_subarray(memb->metadata_version+9) && + memb->members) { + struct dev_member dev = memb->members; + int fd = -1; + while(dev && (fd < 0)) { + char path = malloc(strlen(dev->name) + strlen("/dev/") + 1); + if (path) { + num = sprintf(path, "%s%s", "/dev/", dev->name); + if (num > 0) + fd = open(path, O_RDONLY, 0); + if ((num <= 0) \|\| (fd < 0)) { + pr_vrb(": Cannot open %s: %s\n", + dev->name, strerror(errno)); + } + free(path); + } + dev = dev->next; + } + found = 0; + if ((fd >= 0) && disk_attached_to_hba(fd, hba)) { + struct mdstat_ent vol; + for (vol = mdstat ; vol ; vol = vol->next) { + if ((vol->active > 0) && + vol->metadata_version && + is_container_member(vol, memb->dev)) { + found++; + count++; + } + } + if (devlist && (found < dpa)) { + dv = calloc(1, sizeof(dv)); + if (dv == NULL) + fprintf(stderr, Name ": calloc failed\n"); + else { + dv->devname = malloc(strlen(memb->dev) + strlen("/dev/") + 1); + if (dv->devname != NULL) { + sprintf(dv->devname, "%s%s", "/dev/", memb->dev); + dv->found = found; + dv->used = 0; + dv->next = devlist; + devlist = dv; + } else + free(dv); + } + } + } + if (fd >= 0) + close(fd); + } + } + free_mdstat(mdstat); + return count; +} + +#ifdef DEBUG_LOOP +static struct md_list +get_loop_devices(void) +{ + int i; + struct md_list devlist = NULL; + struct md_list dv = NULL; + + for(i = 0; i < 12; i++) { + dv = calloc(1, sizeof(dv)); + if (dv == NULL) { + fprintf(stderr, Name ": calloc failed\n"); + break; + } + dv->devname = malloc(40); + if (dv->devname == NULL) { + fprintf(stderr, Name ": malloc failed\n"); + free(dv); + break; + } + sprintf(dv->devname, "/dev/loop%d", i); + dv->next = devlist; + devlist = dv; + } + return devlist; +} #endif - / validate container capabilities / - if (super->orom && raiddisks > super->orom->tds) { - if (verbose) - fprintf(stderr, Name ": %d exceeds maximum number of" - " platform supported disks: %d\n", - raiddisks, super->orom->tds); + +static struct md_list +get_devices(const char hba_path) +{ + struct md_list devlist = NULL; + struct md_list dv = NULL; + struct dirent ent; + DIR dir; + int err = 0; + +#if DEBUG_LOOP + devlist = get_loop_devices(); + return devlist; +#endif + / scroll through /sys/dev/block looking for devices attached to + * this hba + / + dir = opendir("/sys/dev/block"); + for (ent = dir ? readdir(dir) : NULL; ent; ent = readdir(dir)) { + int fd; + char buf[1024]; + int major, minor; + char path = NULL; + if (sscanf(ent->d_name, "%d:%d", &major, &minor) != 2) + continue; + path = devt_to_devpath(makedev(major, minor)); + if (!path) + continue; + if (!path_attached_to_hba(path, hba_path)) { + free(path); + path = NULL; + continue; + } + free(path); + path = NULL; + fd = dev_open(ent->d_name, O_RDONLY); + if (fd >= 0) { + fd2devname(fd, buf); + close(fd); + } else { + fprintf(stderr, Name ": cannot open device: %s\n", + ent->d_name); + continue; + } + + + dv = calloc(1, sizeof(dv)); + if (dv == NULL) { + fprintf(stderr, Name ": malloc failed\n"); + err = 1; + break; + } + dv->devname = strdup(buf); + if (dv->devname == NULL) { + fprintf(stderr, Name ": malloc failed\n"); + err = 1; + free(dv); + break; + } + dv->next = devlist; + devlist = dv; + } + if (err) { + while(devlist) { + dv = devlist; + devlist = devlist->next; + free(dv->devname); + free(dv); + } + } + closedir(dir); + return devlist; +} + +static int +count_volumes_list(struct md_list devlist, char homehost, + int verbose, int found) +{ + struct md_list tmpdev; + int count = 0; + struct supertype st = NULL; + + /* first walk the list of devices to find a consistent set + * that match the criterea, if that is possible. + * We flag the ones we like with 'used'. + / + found = 0; + st = match_metadata_desc_imsm("imsm"); + if (st == NULL) { + pr_vrb(": cannot allocate memory for imsm supertype\n"); + return 0; + } + + for (tmpdev = devlist; tmpdev; tmpdev = tmpdev->next) { + char devname = tmpdev->devname; + struct stat stb; + struct supertype tst; + int dfd; + if (tmpdev->used > 1) + continue; + tst = dup_super(st); + if (tst == NULL) { + pr_vrb(": cannot allocate memory for imsm supertype\n"); + goto err_1; + } + tmpdev->container = 0; + dfd = dev_open(devname, O_RDONLY\|O_EXCL); + if (dfd < 0) { + dprintf(": cannot open device %s: %s\n", + devname, strerror(errno)); + tmpdev->used = 2; + } else if (fstat(dfd, &stb)< 0) { + /* Impossible! / + dprintf(": fstat failed for %s: %s\n", + devname, strerror(errno)); + tmpdev->used = 2; + } else if ((stb.st_mode & S_IFMT) != S_IFBLK) { + dprintf(": %s is not a block device.\n", + devname); + tmpdev->used = 2; + } else if (must_be_container(dfd)) { + struct supertype cst; + cst = super_by_fd(dfd, NULL); + if (cst == NULL) { + dprintf(": cannot recognize container type %s\n", + devname); + tmpdev->used = 2; + } else if (tst->ss != st->ss) { + dprintf(": non-imsm container - ignore it: %s\n", + devname); + tmpdev->used = 2; + } else if (!tst->ss->load_container \|\| + tst->ss->load_container(tst, dfd, NULL)) + tmpdev->used = 2; + else { + tmpdev->container = 1; + } + if (cst) + cst->ss->free_super(cst); + } else { + tmpdev->st_rdev = stb.st_rdev; + if (tst->ss->load_super(tst,dfd, NULL)) { + dprintf(": no RAID superblock on %s\n", + devname); + tmpdev->used = 2; + } else if (tst->ss->compare_super == NULL) { + dprintf(": Cannot assemble %s metadata on %s\n", + tst->ss->name, devname); + tmpdev->used = 2; + } + } + if (dfd >= 0) + close(dfd); + if (tmpdev->used == 2 \|\| tmpdev->used == 4) { + /* Ignore unrecognised devices during auto-assembly / + goto loop; + } + else { + struct mdinfo info; + tst->ss->getinfo_super(tst, &info, NULL); + + if (st->minor_version == -1) + st->minor_version = tst->minor_version; + + if (memcmp(info.uuid, uuid_zero, + sizeof(int[4])) == 0) { + / this is a floating spare. It cannot define + * an array unless there are no more arrays of + * this type to be found. It can be included + * in an array of this type though. + / + tmpdev->used = 3; + goto loop; + } + + if (st->ss != tst->ss \|\| + st->minor_version != tst->minor_version \|\| + st->ss->compare_super(st, tst) != 0) { + / Some mismatch. If exactly one array matches this host, + * we can resolve on that one. + * Or, if we are auto assembling, we just ignore the second + * for now. + / + dprintf(": superblock on %s doesn't match others - assembly aborted\n", + devname); + goto loop; + } + tmpdev->used = 1; + found = 1; + dprintf("found: devname: %s\n", devname); + } + loop: + if (tst) + tst->ss->free_super(tst); + } + if (found != 0) { + int err; + if ((err = load_super_imsm_all(st, -1, &st->sb, NULL, devlist, 0)) == 0) { + struct mdinfo iter, head = st->ss->container_content(st, NULL); + for (iter = head; iter; iter = iter->next) { + dprintf("content->text_version: %s vol\n", + iter->text_version); + if (iter->array.state & (1<<MD_SB_BLOCK_VOLUME)) { + / do not assemble arrays with unsupported + configurations / + dprintf(": Cannot activate member %s.\n", + iter->text_version); + } else + count++; + } + sysfs_free(head); + + } else { + dprintf(" no valid super block on device list: err: %d %p\n", + err, st->sb); + } + } else { + dprintf(" no more devices to examin\n"); + } + + for (tmpdev = devlist; tmpdev; tmpdev = tmpdev->next) { + if ((tmpdev->used == 1) && (tmpdev->found)) { + if (count) { + if (count < tmpdev->found) + count = 0; + else + count -= tmpdev->found; + } + } + if (tmpdev->used == 1) + tmpdev->used = 4; + } + err_1: + if (st) + st->ss->free_super(st); + return count; +} + + +static int +count_volumes(char hba, int dpa, int verbose) +{ + struct md_list devlist = NULL; + int count = 0; + int found = 0;; + + devlist = get_devices(hba); + / if no intel devices return zero volumes / + if (devlist == NULL) + return 0; + + count = active_arrays_by_format("imsm", hba, &devlist, dpa, verbose); + dprintf(" path: %s active arrays: %d\n", hba, count); + if (devlist == NULL) + return 0; + do { + found = 0; + count += count_volumes_list(devlist, + NULL, + verbose, + &found); + dprintf("found %d count: %d\n", found, count); + } while (found); + + dprintf("path: %s total number of volumes: %d\n", hba, count); + + while(devlist) { + struct md_list dv = devlist; + devlist = devlist->next; + free(dv->devname); + free(dv); + } + return count; +} + +static int imsm_default_chunk(const struct imsm_orom orom) +{ + / up to 512 if the plaform supports it, otherwise the platform max. + * 128 if no platform detected + / + int fs = max(7, orom ? fls(orom->sss) : 0); + + return min(512, (1 << fs)); +} + +static int +validate_geometry_imsm_orom(struct intel_super super, int level, int layout, + int raiddisks, int chunk, unsigned long long size, int verbose) +{ + / check/set platform and metadata limits/defaults / + if (super->orom && raiddisks > super->orom->dpa) { + pr_vrb(": platform supports a maximum of %d disks per array\n", + super->orom->dpa); return 0; } / capabilities of OROM tested - copied from validate_geometry_imsm_volume / - if (super->orom && (!is_raid_level_supported(super->orom, level, - raiddisks))) { + if (!is_raid_level_supported(super->orom, level, raiddisks)) { pr_vrb(": platform does not support raid%d with %d disk%s\n", level, raiddisks, raiddisks > 1 ? "s" : ""); return 0; } - if (super->orom && level != 1) { - if (chunk && (chunk == 0 \|\| chunk == UnSet)) - chunk = imsm_orom_default_chunk(super->orom); - else if (chunk && !imsm_orom_has_chunk(super->orom, chunk)) { - pr_vrb(": platform does not support a chunk size of: " - "%d\n", chunk); - return 0; - } + + if (chunk && (chunk == 0 \|\| chunk == UnSet)) + chunk = imsm_default_chunk(super->orom); + + if (super->orom && chunk && !imsm_orom_has_chunk(super->orom, chunk)) { + pr_vrb(": platform does not support a chunk size of: " + "%d\n", chunk); + return 0; } + if (layout != imsm_level_to_layout(level)) { if (level == 5) pr_vrb(": imsm raid 5 only supports the left-asymmetric layout\n"); @@ -4959,6 +5938,12 @@ layout, level); return 0; } + + if (super->orom && (super->orom->attr & IMSM_OROM_ATTR_2TB) == 0 && chunk && + (calc_array_size(level, raiddisks, layout, chunk, size) >> 32) > 0) { + pr_vrb(": platform does not support a volume size over 2TB\n"); + return 0; + } return 1; } @@ -4973,7 +5958,7 @@ { struct stat stb; struct intel_super super = st->sb; - struct imsm_super mpb = super->anchor; + struct imsm_super mpb; struct dl dl; unsigned long long pos = 0; unsigned long long maxsize; @@ -4984,7 +5969,9 @@ if (!super) return 0; - if (!validate_geometry_imsm_orom(super, level, layout, raiddisks, chunk, verbose)) { + mpb = super->anchor; + + if (!validate_geometry_imsm_orom(super, level, layout, raiddisks, chunk, size, verbose)) { fprintf(stderr, Name ": RAID gemetry validation failed. " "Cannot proceed with the action(s).\n"); return 0; @@ -5062,6 +6049,11 @@ fprintf(stderr, Name ": The option-rom requires all member" " disks to be a member of all volumes\n"); return 0; + } else if (super->orom && mpb->num_raid_devs > 0 && + mpb->num_disks != raiddisks) { + fprintf(stderr, Name ": The option-rom requires all member" + " disks to be a member of all volumes\n"); + return 0; } /* retrieve the largest free space block / @@ -5100,19 +6092,44 @@ i += dl->extent_cnt; maxsize = merge_extents(super, i); + + if (!check_env("IMSM_NO_PLATFORM") && + mpb->num_raid_devs > 0 && size && size != maxsize) { + fprintf(stderr, Name ": attempting to create a second " + "volume with size less then remaining space. " + "Aborting...\n"); + return 0; + } + if (maxsize < size \|\| maxsize == 0) { - if (verbose) - fprintf(stderr, Name ": not enough space after merge (%llu < %llu)\n", - maxsize, size); + if (verbose) { + if (maxsize == 0) + fprintf(stderr, Name ": no free space" + " left on device. Aborting...\n"); + else + fprintf(stderr, Name ": not enough space" + " to create volume of given size" + " (%llu < %llu). Aborting...\n", + maxsize, size); + } return 0; } freesize = maxsize; + if (super->orom) { + int count = count_volumes(super->hba->path, + super->orom->dpa, verbose); + if (super->orom->vphba <= count) { + pr_vrb(": platform does not support more than %d raid volumes.\n", + super->orom->vphba); + return 0; + } + } return 1; } -static int reserve_space(struct supertype st, int raiddisks, +static int imsm_get_free_size(struct supertype st, int raiddisks, unsigned long long size, int chunk, unsigned long long freesize) { @@ -5174,16 +6191,46 @@ size /= 2 chunk; size = 2 chunk; } + maxsize = size; + } + if (!check_env("IMSM_NO_PLATFORM") && + mpb->num_raid_devs > 0 && size && size != maxsize) { + fprintf(stderr, Name ": attempting to create a second " + "volume with size less then remaining space. " + "Aborting...\n"); + return 0; } - cnt = 0; for (dl = super->disks; dl; dl = dl->next) if (dl->e) dl->raiddisk = cnt++; - freesize = size; + freesize = size; + + dprintf("imsm: imsm_get_free_size() returns : %llu\n", size); + + return 1; +} + +static int reserve_space(struct supertype st, int raiddisks, + unsigned long long size, int chunk, + unsigned long long freesize) +{ + struct intel_super super = st->sb; + struct dl dl; + int cnt; + int rv = 0; + + rv = imsm_get_free_size(st, raiddisks, size, chunk, freesize); + if (rv) { + cnt = 0; + for (dl = super->disks; dl; dl = dl->next) + if (dl->e) + dl->raiddisk = cnt++; + rv = 1; + } - return 1; + return rv; } static int validate_geometry_imsm(struct supertype st, int level, int layout, @@ -5207,9 +6254,14 @@ dev, freesize, verbose); } - + if (!dev) { - if (st->sb && freesize) { + if (st->sb) { + struct intel_super super = st->sb; + if (!validate_geometry_imsm_orom(st->sb, level, layout, + raiddisks, chunk, size, + verbose)) + return 0; /* we are being asked to automatically layout a * new volume based on the current contents of * the container. If the the parameters can be @@ -5218,12 +6270,22 @@ * created. add_to_super and getinfo_super * detect when autolayout is in progress. / - if (!validate_geometry_imsm_orom(st->sb, level, layout, - raiddisks, chunk, - verbose)) - return 0; - return reserve_space(st, raiddisks, size, - chunk?chunk:0, freesize); + /* assuming that freesize is always given when array is + created / + if (super->orom && freesize) { + int count; + count = count_volumes(super->hba->path, + super->orom->dpa, verbose); + if (super->orom->vphba <= count) { + pr_vrb(": platform does not support more" + " than %d raid volumes.\n", + super->orom->vphba); + return 0; + } + } + if (freesize) + return reserve_space(st, raiddisks, size, + chunk?chunk:0, freesize); } return 1; } @@ -5270,14 +6332,15 @@ / struct intel_super super; - if (load_super_imsm_all(st, cfd, (void ) &super, NULL) == 0) { + if (load_super_imsm_all(st, cfd, (void ) &super, NULL, NULL, 1) == 0) { st->sb = super; st->container_dev = fd2devnum(cfd); close(cfd); return validate_geometry_imsm_volume(st, level, layout, raiddisks, chunk, size, dev, - freesize, verbose); + freesize, 1) + ? 1 : -1; } } @@ -5298,9 +6361,8 @@ if (level && layout && layout == UnSet) layout = imsm_level_to_layout(level); - if (chunk && (chunk == UnSet \|\| chunk == 0) && - super && super->orom) - chunk = imsm_orom_default_chunk(super->orom); + if (chunk && (chunk == UnSet \|\| chunk == 0)) + chunk = imsm_default_chunk(super->orom); } static void handle_missing(struct intel_super super, struct imsm_dev dev); @@ -5368,10 +6430,8 @@ struct dl d; for (d = super->disks; d; d = d->next) - if (d->index > -2) { - d->index = -1; - d->disk.status = SPARE_DISK; - } + if (d->index > -2) + mark_spare(d); } super->updates_pending++; @@ -5430,6 +6490,7 @@ return 0; } +#endif /* MDASSEMBLE / static int is_gen_migration(struct imsm_dev dev) { @@ -5444,7 +6505,6 @@ return 0; } -#endif /* MDASSEMBLE / static int is_rebuilding(struct imsm_dev dev) { @@ -5456,7 +6516,7 @@ if (migr_type(dev) != MIGR_REBUILD) return 0; - migr_map = get_imsm_map(dev, 1); + migr_map = get_imsm_map(dev, MAP_1); if (migr_map->map_state == IMSM_T_STATE_DEGRADED) return 1; @@ -5464,6 +6524,26 @@ return 0; } +#ifndef MDASSEMBLE +static int is_initializing(struct imsm_dev dev) +{ + struct imsm_map migr_map; + + if (!dev->vol.migr_state) + return 0; + + if (migr_type(dev) != MIGR_INIT) + return 0; + + migr_map = get_imsm_map(dev, MAP_1); + + if (migr_map->map_state == IMSM_T_STATE_UNINITIALIZED) + return 1; + + return 0; +} +#endif + static void update_recovery_start(struct intel_super super, struct imsm_dev dev, struct mdinfo array) @@ -5515,20 +6595,24 @@ struct imsm_super mpb = super->anchor; struct mdinfo rest = NULL; unsigned int i; - int bbm_errors = 0; + int sb_errors = 0; struct dl d; int spare_disks = 0; /* do not assemble arrays when not all attributes are supported / if (imsm_check_attributes(mpb->attributes) == 0) { - fprintf(stderr, Name ": IMSM metadata loading not allowed " - "due to attributes incompatibility.\n"); - return NULL; + sb_errors = 1; + fprintf(stderr, Name ": Unsupported attributes in IMSM metadata." + "Arrays activation is blocked.\n"); } / check for bad blocks / - if (imsm_bbm_log_size(super->anchor)) - bbm_errors = 1; + if (imsm_bbm_log_size(super->anchor)) { + fprintf(stderr, Name ": BBM log found in IMSM metadata." + "Arrays activation is blocked.\n"); + sb_errors = 1; + } + / count spare devices, not used in maps / @@ -5541,7 +6625,10 @@ struct imsm_map map; struct imsm_map map2; struct mdinfo this; - int slot, chunk; + int slot; +#ifndef MDASSEMBLE + int chunk; +#endif char ep; if (subarray && @@ -5549,8 +6636,8 @@ continue; dev = get_imsm_dev(super, i); - map = get_imsm_map(dev, 0); - map2 = get_imsm_map(dev, 1); + map = get_imsm_map(dev, MAP_0); + map2 = get_imsm_map(dev, MAP_1); / do not publish arrays that are in the middle of an * unsupported migration @@ -5566,19 +6653,6 @@ * OROM/EFI / - chunk = __le16_to_cpu(map->blocks_per_strip) >> 1; -#ifndef MDASSEMBLE - if (!validate_geometry_imsm_orom(super, - get_imsm_raid_level(map), / RAID level / - imsm_level_to_layout(get_imsm_raid_level(map)), - map->num_members, / raid disks / - &chunk, - 1 / verbose /)) { - fprintf(stderr, Name ": RAID gemetry validation failed. " - "Cannot proceed with the action(s).\n"); - continue; - } -#endif / MDASSEMBLE / this = malloc(sizeof(this)); if (!this) { fprintf(stderr, Name ": failed to allocate %zu bytes\n", @@ -5589,6 +6663,30 @@ super->current_vol = i; getinfo_super_imsm_volume(st, this, NULL); this->next = rest; +#ifndef MDASSEMBLE + chunk = __le16_to_cpu(map->blocks_per_strip) >> 1; + /* mdadm does not support all metadata features- set the bit in all arrays state / + if (!validate_geometry_imsm_orom(super, + get_imsm_raid_level(map), / RAID level / + imsm_level_to_layout(get_imsm_raid_level(map)), + map->num_members, / raid disks / + &chunk, join_u32(dev->size_low, dev->size_high), + 1 / verbose /)) { + fprintf(stderr, Name ": IMSM RAID geometry validation" + " failed. Array %s activation is blocked.\n", + dev->volume); + this->array.state \|= + (1<<MD_SB_BLOCK_CONTAINER_RESHAPE) \| + (1<<MD_SB_BLOCK_VOLUME); + } +#endif + + / if array has bad blocks, set suitable bit in all arrays state / + if (sb_errors) + this->array.state \|= + (1<<MD_SB_BLOCK_CONTAINER_RESHAPE) \| + (1<<MD_SB_BLOCK_VOLUME); + for (slot = 0 ; slot < map->num_members; slot++) { unsigned long long recovery_start; struct mdinfo info_d; @@ -5598,8 +6696,8 @@ __u32 ord; skip = 0; - idx = get_imsm_disk_idx(dev, slot, 0); - ord = get_imsm_ord_tbl_ent(dev, slot, -1); + idx = get_imsm_disk_idx(dev, slot, MAP_0); + ord = get_imsm_ord_tbl_ent(dev, slot, MAP_X); for (d = super->disks; d ; d = d->next) if (d->index == idx) break; @@ -5662,8 +6760,8 @@ this->array.working_disks++; info_d->events = __le32_to_cpu(mpb->generation_num); - info_d->data_offset = __le32_to_cpu(map->pba_of_lba0); - info_d->component_size = __le32_to_cpu(map->blocks_per_member); + info_d->data_offset = pba_of_lba0(map); + info_d->component_size = blocks_per_member(map); } /* now that the disk list is up-to-date fixup recovery_start / update_recovery_start(super, dev, this); @@ -5677,17 +6775,16 @@ rest = this; } - / if array has bad blocks, set suitable bit in array status / - if (bbm_errors) - rest->array.state \|= (1<<MD_SB_BBM_ERRORS); - return rest; } -static __u8 imsm_check_degraded(struct intel_super super, struct imsm_dev dev, int failed) +static __u8 imsm_check_degraded(struct intel_super super, struct imsm_dev dev, + int failed, int look_in_map) { - struct imsm_map map = get_imsm_map(dev, 0); + struct imsm_map map; + + map = get_imsm_map(dev, look_in_map); if (!failed) return map->map_state == IMSM_T_STATE_UNINITIALIZED ? @@ -5715,7 +6812,7 @@ int insync = insync; for (i = 0; i < map->num_members; i++) { - __u32 ord = get_imsm_ord_tbl_ent(dev, i, -1); + __u32 ord = get_imsm_ord_tbl_ent(dev, i, MAP_X); int idx = ord_to_idx(ord); struct imsm_disk disk; @@ -5751,33 +6848,55 @@ return map->map_state; } -static int imsm_count_failed(struct intel_super super, struct imsm_dev dev) +static int imsm_count_failed(struct intel_super super, struct imsm_dev dev, + int look_in_map) { int i; int failed = 0; struct imsm_disk disk; - struct imsm_map map = get_imsm_map(dev, 0); - struct imsm_map prev = get_imsm_map(dev, dev->vol.migr_state); + struct imsm_map map = get_imsm_map(dev, MAP_0); + struct imsm_map prev = get_imsm_map(dev, MAP_1); + struct imsm_map map_for_loop; __u32 ord; int idx; + int idx_1; /* at the beginning of migration we set IMSM_ORD_REBUILD on * disks that are being rebuilt. New failures are recorded to * map[0]. So we look through all the disks we started with and * see if any failures are still present, or if any new ones * have arrived - * - * FIXME add support for online capacity expansion and - * raid-level-migration / - for (i = 0; i < prev->num_members; i++) { - ord = __le32_to_cpu(prev->disk_ord_tbl[i]); - ord \|= __le32_to_cpu(map->disk_ord_tbl[i]); - idx = ord_to_idx(ord); + map_for_loop = map; + if (prev && (map->num_members < prev->num_members)) + map_for_loop = prev; + + for (i = 0; i < map_for_loop->num_members; i++) { + idx_1 = -255; + / when MAP_X is passed both maps failures are counted + / + if (prev && + ((look_in_map == MAP_1) \|\| (look_in_map == MAP_X)) && + (i < prev->num_members)) { + ord = __le32_to_cpu(prev->disk_ord_tbl[i]); + idx_1 = ord_to_idx(ord); - disk = get_imsm_disk(super, idx); - if (!disk \|\| is_failed(disk) \|\| ord & IMSM_ORD_REBUILD) - failed++; + disk = get_imsm_disk(super, idx_1); + if (!disk \|\| is_failed(disk) \|\| ord & IMSM_ORD_REBUILD) + failed++; + } + if (((look_in_map == MAP_0) \|\| (look_in_map == MAP_X)) && + (i < map->num_members)) { + ord = __le32_to_cpu(map->disk_ord_tbl[i]); + idx = ord_to_idx(ord); + + if (idx != idx_1) { + disk = get_imsm_disk(super, idx); + if (!disk \|\| is_failed(disk) \|\| + ord & IMSM_ORD_REBUILD) + failed++; + } + } } return failed; @@ -5789,7 +6908,7 @@ { struct intel_super super = c->sb; struct imsm_super mpb = super->anchor; - + if (atoi(inst) >= mpb->num_raid_devs) { fprintf(stderr, "%s: subarry index %d, out of range\n", __func__, atoi(inst)); @@ -5815,7 +6934,7 @@ if (migr_type(dev) == MIGR_GEN_MIGR) return 0; - migr_map = get_imsm_map(dev, 1); + migr_map = get_imsm_map(dev, MAP_1); if ((migr_map->map_state == IMSM_T_STATE_NORMAL) && (dev->vol.migr_type != MIGR_GEN_MIGR)) @@ -5830,9 +6949,11 @@ __u32 ord; int slot; struct imsm_map map; + char buf[MAX_RAID_SERIAL_LEN+3]; + unsigned int len, shift = 0; /* new failures are always set in map[0] / - map = get_imsm_map(dev, 0); + map = get_imsm_map(dev, MAP_0); slot = get_imsm_disk_slot(map, idx); if (slot < 0) @@ -5842,8 +6963,28 @@ if (is_failed(disk) && (ord & IMSM_ORD_REBUILD)) return 0; + memcpy(buf, disk->serial, MAX_RAID_SERIAL_LEN); + buf[MAX_RAID_SERIAL_LEN] = '\000'; + strcat(buf, ":0"); + if ((len = strlen(buf)) >= MAX_RAID_SERIAL_LEN) + shift = len - MAX_RAID_SERIAL_LEN + 1; + strncpy((char )disk->serial, &buf[shift], MAX_RAID_SERIAL_LEN); + disk->status \|= FAILED_DISK; set_imsm_ord_tbl_ent(map, slot, idx \| IMSM_ORD_REBUILD); + /* mark failures in second map if second map exists and this disk + * in this slot. + * This is valid for migration, initialization and rebuild + / + if (dev->vol.migr_state) { + struct imsm_map map2 = get_imsm_map(dev, MAP_1); + int slot2 = get_imsm_disk_slot(map2, idx); + + if ((slot2 < map2->num_members) && + (slot2 >= 0)) + set_imsm_ord_tbl_ent(map2, slot2, + idx \| IMSM_ORD_REBUILD); + } if (map->failed_disk_num == 0xff) map->failed_disk_num = slot; return 1; @@ -5862,25 +7003,32 @@ static void handle_missing(struct intel_super super, struct imsm_dev dev) { - __u8 map_state; struct dl dl; - int failed; if (!super->missing) return; - failed = imsm_count_failed(super, dev); - map_state = imsm_check_degraded(super, dev, failed); dprintf("imsm: mark missing\n"); - end_migration(dev, map_state); + / end process for initialization and rebuild only + / + if (is_gen_migration(dev) == 0) { + __u8 map_state; + int failed; + + failed = imsm_count_failed(super, dev, MAP_0); + map_state = imsm_check_degraded(super, dev, failed, MAP_0); + + end_migration(dev, super, map_state); + } for (dl = super->missing; dl; dl = dl->next) mark_missing(dev, &dl->disk, dl->index); super->updates_pending++; } -static unsigned long long imsm_set_array_size(struct imsm_dev dev) +static unsigned long long imsm_set_array_size(struct imsm_dev dev, + long long new_size) { - int used_disks = imsm_num_data_members(dev, 0); + int used_disks = imsm_num_data_members(dev, MAP_0); unsigned long long array_blocks; struct imsm_map map; @@ -5897,8 +7045,17 @@ /* set array size in metadata / - map = get_imsm_map(dev, 0); - array_blocks = map->blocks_per_member used_disks; + if (new_size <= 0) { + /* OLCE size change is caused by added disks + / + map = get_imsm_map(dev, MAP_0); + array_blocks = blocks_per_member(map) used_disks; + } else { + /* Online Volume Size Change + * Using available free space + / + array_blocks = new_size; + } / round array size down to closest MB / @@ -5925,7 +7082,7 @@ for (i = 0; i < mpb->num_raid_devs; i++) { struct imsm_dev dev = get_imsm_dev(super, i); - struct imsm_map map = get_imsm_map(dev, 0); + struct imsm_map map = get_imsm_map(dev, MAP_0); struct imsm_map map2; int prev_num_members; @@ -5946,16 +7103,17 @@ map->num_members = prev_disks; dev->vol.migr_state = 1; dev->vol.curr_migr_unit = 0; - dev->vol.migr_type = MIGR_GEN_MIGR; + set_migr_type(dev, MIGR_GEN_MIGR); for (i = prev_num_members; i < map->num_members; i++) set_imsm_ord_tbl_ent(map, i, i); - map2 = get_imsm_map(dev, 1); + map2 = get_imsm_map(dev, MAP_1); / Copy the current map / memcpy(map2, map, copy_map_size); map2->num_members = prev_num_members; - imsm_set_array_size(dev); + imsm_set_array_size(dev, -1); + super->clean_migration_record_by_mdmon = 1; super->updates_pending++; } } @@ -5970,9 +7128,9 @@ int inst = a->info.container_member; struct intel_super super = a->container->sb; struct imsm_dev dev = get_imsm_dev(super, inst); - struct imsm_map map = get_imsm_map(dev, 0); - int failed = imsm_count_failed(super, dev); - __u8 map_state = imsm_check_degraded(super, dev, failed); + struct imsm_map map = get_imsm_map(dev, MAP_0); + int failed = imsm_count_failed(super, dev, MAP_0); + __u8 map_state = imsm_check_degraded(super, dev, failed, MAP_0); __u32 blocks_per_unit; if (dev->vol.migr_state && @@ -5994,12 +7152,14 @@ user action is required to recover process / if (0) { - struct imsm_map map2 = get_imsm_map(dev, 1); - dev->vol.migr_state = 0; - dev->vol.migr_type = 0; - dev->vol.curr_migr_unit = 0; - memcpy(map, map2, sizeof_imsm_map(map2)); - super->updates_pending++; + struct imsm_map map2 = + get_imsm_map(dev, MAP_1); + dev->vol.migr_state = 0; + set_migr_type(dev, 0); + dev->vol.curr_migr_unit = 0; + memcpy(map, map2, + sizeof_imsm_map(map2)); + super->updates_pending++; } } if (a->last_checkpoint >= a->info.component_size) { @@ -6007,10 +7167,10 @@ int used_disks; struct mdinfo mdi; - used_disks = imsm_num_data_members(dev, 0); + used_disks = imsm_num_data_members(dev, MAP_0); if (used_disks > 0) { array_blocks = - map->blocks_per_member * + blocks_per_member(map) * used_disks; /* round array size down to closest MB / @@ -6052,11 +7212,12 @@ / if (is_resyncing(dev)) { dprintf("imsm: mark resync done\n"); - end_migration(dev, map_state); + end_migration(dev, super, map_state); super->updates_pending++; a->last_checkpoint = 0; } - } else if (!is_resyncing(dev) && !failed) { + } else if ((!is_resyncing(dev) && !failed) && + (imsm_reshape_blocks_arrays_changes(super) == 0)) { /* mark the start of the init process if nothing is failed / dprintf("imsm: mark resync start\n"); if (map->map_state == IMSM_T_STATE_UNINITIALIZED) @@ -6113,8 +7274,10 @@ int inst = a->info.container_member; struct intel_super super = a->container->sb; struct imsm_dev dev = get_imsm_dev(super, inst); - struct imsm_map map = get_imsm_map(dev, 0); + struct imsm_map map = get_imsm_map(dev, MAP_0); struct imsm_disk disk; + struct mdinfo mdi; + int recovery_not_finished = 0; int failed; __u32 ord; __u8 map_state; @@ -6128,7 +7291,7 @@ dprintf("imsm: set_disk %d:%x\n", n, state); - ord = get_imsm_ord_tbl_ent(dev, n, -1); + ord = get_imsm_ord_tbl_ent(dev, n, MAP_0); disk = get_imsm_disk(super, ord_to_idx(ord)); / check for new failures / @@ -6139,53 +7302,114 @@ / check if in_sync / if (state & DS_INSYNC && ord & IMSM_ORD_REBUILD && is_rebuilding(dev)) { - struct imsm_map migr_map = get_imsm_map(dev, 1); + struct imsm_map migr_map = get_imsm_map(dev, MAP_1); set_imsm_ord_tbl_ent(migr_map, n, ord_to_idx(ord)); super->updates_pending++; } - failed = imsm_count_failed(super, dev); - map_state = imsm_check_degraded(super, dev, failed); + failed = imsm_count_failed(super, dev, MAP_0); + map_state = imsm_check_degraded(super, dev, failed, MAP_0); / check if recovery complete, newly degraded, or failed / - if (map_state == IMSM_T_STATE_NORMAL && is_rebuilding(dev)) { - end_migration(dev, map_state); - map = get_imsm_map(dev, 0); - map->failed_disk_num = ~0; - super->updates_pending++; - a->last_checkpoint = 0; - } else if (map_state == IMSM_T_STATE_DEGRADED && - map->map_state != map_state && - !dev->vol.migr_state) { - dprintf("imsm: mark degraded\n"); - map->map_state = map_state; - super->updates_pending++; - a->last_checkpoint = 0; - } else if (map_state == IMSM_T_STATE_FAILED && - map->map_state != map_state) { - dprintf("imsm: mark failed\n"); - end_migration(dev, map_state); - super->updates_pending++; - a->last_checkpoint = 0; - } else if (is_gen_migration(dev)) { - dprintf("imsm: Detected General Migration in state: "); - if (map_state == IMSM_T_STATE_NORMAL) { - end_migration(dev, map_state); - map = get_imsm_map(dev, 0); + dprintf("imsm: Detected transition to state "); + switch (map_state) { + case IMSM_T_STATE_NORMAL: / transition to normal state / + dprintf("normal: "); + if (is_rebuilding(dev)) { + dprintf("while rebuilding"); + / check if recovery is really finished / + for (mdi = a->info.devs; mdi ; mdi = mdi->next) + if (mdi->recovery_start != MaxSector) { + recovery_not_finished = 1; + break; + } + if (recovery_not_finished) { + dprintf("\nimsm: Rebuild has not finished yet, " + "state not changed"); + if (a->last_checkpoint < mdi->recovery_start) { + a->last_checkpoint = mdi->recovery_start; + super->updates_pending++; + } + break; + } + end_migration(dev, super, map_state); + map = get_imsm_map(dev, MAP_0); map->failed_disk_num = ~0; - dprintf("normal\n"); - } else { - if (map_state == IMSM_T_STATE_DEGRADED) { - printf("degraded\n"); - end_migration(dev, map_state); - } else { - dprintf("failed\n"); + super->updates_pending++; + a->last_checkpoint = 0; + break; + } + if (is_gen_migration(dev)) { + dprintf("while general migration"); + if (a->last_checkpoint >= a->info.component_size) + end_migration(dev, super, map_state); + else + map->map_state = map_state; + map = get_imsm_map(dev, MAP_0); + map->failed_disk_num = ~0; + super->updates_pending++; + break; + } + break; + case IMSM_T_STATE_DEGRADED: / transition to degraded state / + dprintf("degraded: "); + if ((map->map_state != map_state) && + !dev->vol.migr_state) { + dprintf("mark degraded"); + map->map_state = map_state; + super->updates_pending++; + a->last_checkpoint = 0; + break; + } + if (is_rebuilding(dev)) { + dprintf("while rebuilding."); + if (map->map_state != map_state) { + dprintf(" Map state change"); + end_migration(dev, super, map_state); + super->updates_pending++; + } + break; + } + if (is_gen_migration(dev)) { + dprintf("while general migration"); + if (a->last_checkpoint >= a->info.component_size) + end_migration(dev, super, map_state); + else { + map->map_state = map_state; + manage_second_map(super, dev); } + super->updates_pending++; + break; + } + if (is_initializing(dev)) { + dprintf("while initialization."); + map->map_state = map_state; + super->updates_pending++; + break; + } + break; + case IMSM_T_STATE_FAILED: / transition to failed state / + dprintf("failed: "); + if (is_gen_migration(dev)) { + dprintf("while general migration"); map->map_state = map_state; + super->updates_pending++; + break; } - super->updates_pending++; + if (map->map_state != map_state) { + dprintf("mark failed"); + end_migration(dev, super, map_state); + super->updates_pending++; + a->last_checkpoint = 0; + break; + } + break; + default: + dprintf("state %i\n", map_state); } + dprintf("\n"); + } static int store_imsm_mpb(int fd, struct imsm_super mpb) @@ -6236,7 +7460,7 @@ static struct dl imsm_readd(struct intel_super super, int idx, struct active_array a) { struct imsm_dev dev = get_imsm_dev(super, a->info.container_member); - int i = get_imsm_disk_idx(dev, idx, -1); + int i = get_imsm_disk_idx(dev, idx, MAP_X); struct dl dl; for (dl = super->disks; dl; dl = dl->next) @@ -6257,7 +7481,7 @@ struct mdinfo additional_test_list) { struct imsm_dev dev = get_imsm_dev(super, a->info.container_member); - int idx = get_imsm_disk_idx(dev, slot, -1); + int idx = get_imsm_disk_idx(dev, slot, MAP_X); struct imsm_super mpb = super->anchor; struct imsm_map map; unsigned long long pos; @@ -6319,7 +7543,7 @@ } for (i = 0; i < mpb->num_raid_devs; i++) { dev = get_imsm_dev(super, i); - map = get_imsm_map(dev, 0); + map = get_imsm_map(dev, MAP_0); / check if this disk is already a member of * this array @@ -6330,9 +7554,9 @@ found = 0; j = 0; pos = 0; - array_start = __le32_to_cpu(map->pba_of_lba0); + array_start = pba_of_lba0(map); array_end = array_start + - __le32_to_cpu(map->blocks_per_member) - 1; + blocks_per_member(map) - 1; do { /* check that we can start at pba_of_lba0 with @@ -6375,9 +7599,9 @@ dev2 = get_imsm_dev(cont->sb, dev_idx); if (dev2) { - state = imsm_check_degraded(cont->sb, dev2, failed); + state = imsm_check_degraded(cont->sb, dev2, failed, MAP_0); if (state == IMSM_T_STATE_FAILED) { - map = get_imsm_map(dev2, 0); + map = get_imsm_map(dev2, MAP_0); if (!map) return 1; for (slot = 0; slot < map->num_members; slot++) { @@ -6385,7 +7609,7 @@ * Check if failed disks are deleted from intel * disk list or are marked to be deleted / - idx = get_imsm_disk_idx(dev2, slot, -1); + idx = get_imsm_disk_idx(dev2, slot, MAP_X); idisk = get_imsm_dl_disk(cont->sb, idx); / * Do not rebuild the array if failed disks @@ -6419,7 +7643,7 @@ struct intel_super super = a->container->sb; int inst = a->info.container_member; struct imsm_dev dev = get_imsm_dev(super, inst); - struct imsm_map map = get_imsm_map(dev, 0); + struct imsm_map map = get_imsm_map(dev, MAP_0); int failed = a->info.array.raid_disks; struct mdinfo rv = NULL; struct mdinfo d; @@ -6443,10 +7667,16 @@ dprintf("imsm: activate spare: inst=%d failed=%d (%d) level=%d\n", inst, failed, a->info.array.raid_disks, a->info.array.level); - if (dev->vol.migr_state && - dev->vol.migr_type == MIGR_GEN_MIGR) - /* No repair during migration / + if (imsm_reshape_blocks_arrays_changes(super)) + return NULL; + + / Cannot activate another spare if rebuild is in progress already + / + if (is_rebuilding(dev)) { + dprintf("imsm: No spare activation allowed. " + "Rebuild in progress already.\n"); return NULL; + } if (a->info.array.level == 4) / No repair for takeovered array @@ -6454,7 +7684,8 @@ / return NULL; - if (imsm_check_degraded(super, dev, failed) != IMSM_T_STATE_DEGRADED) + if (imsm_check_degraded(super, dev, failed, MAP_0) != + IMSM_T_STATE_DEGRADED) return NULL; / @@ -6463,9 +7694,9 @@ * are removed from container. / if (failed) { - dprintf("found failed disks in %s, check if there another" + dprintf("found failed disks in %.s, check if there another" "failed sub-array.\n", - dev->volume); + MAX_RAID_SERIAL_LEN, dev->volume); /* check if states of the other volumes allow for rebuild / for (i = 0; i < super->anchor->num_raid_devs; i++) { if (i != inst) { @@ -6495,9 +7726,9 @@ / dl = imsm_readd(super, i, a); if (!dl) - dl = imsm_add_spare(super, i, a, 0, NULL); + dl = imsm_add_spare(super, i, a, 0, rv); if (!dl) - dl = imsm_add_spare(super, i, a, 1, NULL); + dl = imsm_add_spare(super, i, a, 1, rv); if (!dl) continue; @@ -6525,7 +7756,7 @@ di->disk.minor = dl->minor; di->disk.state = 0; di->recovery_start = 0; - di->data_offset = __le32_to_cpu(map->pba_of_lba0); + di->data_offset = pba_of_lba0(map); di->component_size = a->info.component_size; di->container_member = inst; super->random = random32(); @@ -6534,8 +7765,6 @@ num_spares++; dprintf("%x:%x to be %d at %llu\n", dl->major, dl->minor, i, di->data_offset); - - break; } if (!rv) @@ -6562,7 +7791,7 @@ } return NULL; } - + mu->space = NULL; mu->space_list = NULL; mu->len = sizeof(struct imsm_update_activate_spare) * num_spares; @@ -6587,15 +7816,15 @@ static int disks_overlap(struct intel_super super, int idx, struct imsm_update_create_array u) { struct imsm_dev dev = get_imsm_dev(super, idx); - struct imsm_map map = get_imsm_map(dev, 0); - struct imsm_map new_map = get_imsm_map(&u->dev, 0); + struct imsm_map map = get_imsm_map(dev, MAP_0); + struct imsm_map new_map = get_imsm_map(&u->dev, MAP_0); struct disk_info inf = get_disk_info(u); struct imsm_disk disk; int i; int j; for (i = 0; i < map->num_members; i++) { - disk = get_imsm_disk(super, get_imsm_disk_idx(dev, i, -1)); + disk = get_imsm_disk(super, get_imsm_disk_idx(dev, i, MAP_X)); for (j = 0; j < new_map->num_members; j++) if (serialcmp(disk->serial, inf[j].serial) == 0) return 1; @@ -6708,7 +7937,7 @@ struct imsm_map map; struct imsm_dev new_dev = (struct imsm_dev )space_list; - struct imsm_map migr_map = get_imsm_map(dev, 1); + struct imsm_map migr_map = get_imsm_map(dev, MAP_1); int to_state; struct dl new_disk; @@ -6716,7 +7945,7 @@ return ret_val; space_list = space_list; memcpy(new_dev, dev, sizeof_imsm_dev(dev, 0)); - map = get_imsm_map(new_dev, 0); + map = get_imsm_map(new_dev, MAP_0); if (migr_map) { dprintf("imsm: Error: migration in progress"); return ret_val; @@ -6736,7 +7965,7 @@ migrate(new_dev, super, to_state, MIGR_GEN_MIGR); if (u->new_level > -1) map->raid_level = u->new_level; - migr_map = get_imsm_map(new_dev, 1); + migr_map = get_imsm_map(new_dev, MAP_1); if ((u->new_level == 5) && (migr_map->raid_level == 0)) { int ord = map->num_members - 1; @@ -6786,26 +8015,178 @@ } else goto error_disk_add; -skip_disk_add: - tofree = space_list; - / calculate new size +skip_disk_add: + tofree = space_list; + /* calculate new size + / + imsm_set_array_size(new_dev, -1); + + ret_val = 1; + } + } + + if (tofree) + space_list = tofree; + return ret_val; + +error_disk_add: + dprintf("Error: imsm: Cannot find disk.\n"); + return ret_val; +} + +static int apply_size_change_update(struct imsm_update_size_change u, + struct intel_super super) +{ + struct intel_dev id; + int ret_val = 0; + + dprintf("apply_size_change_update()\n"); + if ((u->subdev < 0) \|\| + (u->subdev > 1)) { + dprintf("imsm: Error: Wrong subdev: %i\n", u->subdev); + return ret_val; + } + + for (id = super->devlist ; id; id = id->next) { + if (id->index == (unsigned)u->subdev) { + struct imsm_dev dev = get_imsm_dev(super, u->subdev); + struct imsm_map map = get_imsm_map(dev, MAP_0); + int used_disks = imsm_num_data_members(dev, MAP_0); + unsigned long long blocks_per_member; + + / calculate new size + / + blocks_per_member = u->new_size / used_disks; + dprintf("imsm: apply_size_change_update(size: %llu, " + "blocks per member: %llu)\n", + u->new_size, blocks_per_member); + set_blocks_per_member(map, blocks_per_member); + imsm_set_array_size(dev, u->new_size); + + ret_val = 1; + break; + } + } + + return ret_val; +} + + +static int apply_update_activate_spare(struct imsm_update_activate_spare u, + struct intel_super super, + struct active_array active_array) +{ + struct imsm_super mpb = super->anchor; + struct imsm_dev dev = get_imsm_dev(super, u->array); + struct imsm_map map = get_imsm_map(dev, MAP_0); + struct imsm_map migr_map; + struct active_array a; + struct imsm_disk disk; + __u8 to_state; + struct dl dl; + unsigned int found; + int failed; + int victim; + int i; + int second_map_created = 0; + + for (; u; u = u->next) { + victim = get_imsm_disk_idx(dev, u->slot, MAP_X); + + if (victim < 0) + return 0; + + for (dl = super->disks; dl; dl = dl->next) + if (dl == u->dl) + break; + + if (!dl) { + fprintf(stderr, "error: imsm_activate_spare passed " + "an unknown disk (index: %d)\n", + u->dl->index); + return 0; + } + + / count failures (excluding rebuilds and the victim) + * to determine map[0] state + / + failed = 0; + for (i = 0; i < map->num_members; i++) { + if (i == u->slot) + continue; + disk = get_imsm_disk(super, + get_imsm_disk_idx(dev, i, MAP_X)); + if (!disk \|\| is_failed(disk)) + failed++; + } + + / adding a pristine spare, assign a new index / + if (dl->index < 0) { + dl->index = super->anchor->num_disks; + super->anchor->num_disks++; + } + disk = &dl->disk; + disk->status \|= CONFIGURED_DISK; + disk->status &= ~SPARE_DISK; + + / mark rebuild / + to_state = imsm_check_degraded(super, dev, failed, MAP_0); + if (!second_map_created) { + second_map_created = 1; + map->map_state = IMSM_T_STATE_DEGRADED; + migrate(dev, super, to_state, MIGR_REBUILD); + } else + map->map_state = to_state; + migr_map = get_imsm_map(dev, MAP_1); + set_imsm_ord_tbl_ent(map, u->slot, dl->index); + set_imsm_ord_tbl_ent(migr_map, u->slot, + dl->index \| IMSM_ORD_REBUILD); + + / update the family_num to mark a new container + * generation, being careful to record the existing + * family_num in orig_family_num to clean up after + * earlier mdadm versions that neglected to set it. + / + if (mpb->orig_family_num == 0) + mpb->orig_family_num = mpb->family_num; + mpb->family_num += super->random; + + / count arrays using the victim in the metadata / + found = 0; + for (a = active_array; a ; a = a->next) { + dev = get_imsm_dev(super, a->info.container_member); + map = get_imsm_map(dev, MAP_0); + + if (get_imsm_disk_slot(map, victim) >= 0) + found++; + } + + / delete the victim if it is no longer being + * utilized anywhere + / + if (!found) { + struct dl dlp; + + / We know that 'manager' isn't touching anything, + * so it is safe to delete / - imsm_set_array_size(new_dev); + for (dlp = &super->disks; dlp; dlp = &(dlp)->next) + if ((dlp)->index == victim) + break; - ret_val = 1; + /* victim may be on the missing list / + if (!dlp) + for (dlp = &super->missing; dlp; + dlp = &(dlp)->next) + if ((dlp)->index == victim) + break; + imsm_delete(super, dlp, victim); } } - if (tofree) - space_list = tofree; - return ret_val; - -error_disk_add: - dprintf("Error: imsm: Cannot find disk.\n"); - return ret_val; + return 1; } - static int apply_reshape_container_disks_update(struct imsm_update_reshape u, struct intel_super super, void **space_list) @@ -6866,8 +8247,8 @@ newdev = (void)sp; /* Copy the dev, but not (all of) the map / memcpy(newdev, id->dev, sizeof(newdev)); - oldmap = get_imsm_map(id->dev, 0); - newmap = get_imsm_map(newdev, 0); + oldmap = get_imsm_map(id->dev, MAP_0); + newmap = get_imsm_map(newdev, MAP_0); /* Copy the current map / memcpy(newmap, oldmap, sizeof_imsm_map(oldmap)); / update one device only @@ -6878,7 +8259,7 @@ devices_to_reshape--; newdev->vol.migr_state = 1; newdev->vol.curr_migr_unit = 0; - newdev->vol.migr_type = MIGR_GEN_MIGR; + set_migr_type(newdev, MIGR_GEN_MIGR); newmap->num_members = u->new_raid_disks; for (i = 0; i < delta_disks; i++) { set_imsm_ord_tbl_ent(newmap, @@ -6887,10 +8268,10 @@ } /* New map is correct, now need to save old map / - newmap = get_imsm_map(newdev, 1); + newmap = get_imsm_map(newdev, MAP_1); memcpy(newmap, oldmap, sizeof_imsm_map(oldmap)); - imsm_set_array_size(newdev); + imsm_set_array_size(newdev, -1); } sp = (void )id->dev; @@ -6930,11 +8311,12 @@ if (dev == NULL) return 0; - map = get_imsm_map(dev, 0); + map = get_imsm_map(dev, MAP_0); if (u->direction == R10_TO_R0) { / Number of failed disks must be half of initial disk number / - if (imsm_count_failed(super, dev) != (map->num_members / 2)) + if (imsm_count_failed(super, dev, MAP_0) != + (map->num_members / 2)) return 0; / iterate through devices to mark removed disks as spare / @@ -6948,8 +8330,7 @@ if (du->index > idx) du->index--; / mark as spare disk / - dm->disk.status = SPARE_DISK; - dm->index = -1; + mark_spare(dm); } } / update map / @@ -6994,7 +8375,7 @@ dev_new = (void )space; memcpy(dev_new, dev, sizeof(dev)); / update new map / - map = get_imsm_map(dev_new, 0); + map = get_imsm_map(dev_new, MAP_0); map->num_members = map->num_members 2; map->map_state = IMSM_T_STATE_DEGRADED; map->num_domains = 2; @@ -7098,101 +8479,16 @@ super->updates_pending++; break; } + case update_size_change: { + struct imsm_update_size_change u = (void )update->buf; + if (apply_size_change_update(u, super)) + super->updates_pending++; + break; + } case update_activate_spare: { struct imsm_update_activate_spare u = (void ) update->buf; - struct imsm_dev dev = get_imsm_dev(super, u->array); - struct imsm_map map = get_imsm_map(dev, 0); - struct imsm_map migr_map; - struct active_array a; - struct imsm_disk disk; - __u8 to_state; - struct dl dl; - unsigned int found; - int failed; - int victim = get_imsm_disk_idx(dev, u->slot, -1); - int i; - - for (dl = super->disks; dl; dl = dl->next) - if (dl == u->dl) - break; - - if (!dl) { - fprintf(stderr, "error: imsm_activate_spare passed " - "an unknown disk (index: %d)\n", - u->dl->index); - return; - } - - super->updates_pending++; - /* count failures (excluding rebuilds and the victim) - * to determine map[0] state - / - failed = 0; - for (i = 0; i < map->num_members; i++) { - if (i == u->slot) - continue; - disk = get_imsm_disk(super, - get_imsm_disk_idx(dev, i, -1)); - if (!disk \|\| is_failed(disk)) - failed++; - } - - / adding a pristine spare, assign a new index / - if (dl->index < 0) { - dl->index = super->anchor->num_disks; - super->anchor->num_disks++; - } - disk = &dl->disk; - disk->status \|= CONFIGURED_DISK; - disk->status &= ~SPARE_DISK; - - / mark rebuild / - to_state = imsm_check_degraded(super, dev, failed); - map->map_state = IMSM_T_STATE_DEGRADED; - migrate(dev, super, to_state, MIGR_REBUILD); - migr_map = get_imsm_map(dev, 1); - set_imsm_ord_tbl_ent(map, u->slot, dl->index); - set_imsm_ord_tbl_ent(migr_map, u->slot, dl->index \| IMSM_ORD_REBUILD); - - / update the family_num to mark a new container - * generation, being careful to record the existing - * family_num in orig_family_num to clean up after - * earlier mdadm versions that neglected to set it. - / - if (mpb->orig_family_num == 0) - mpb->orig_family_num = mpb->family_num; - mpb->family_num += super->random; - - / count arrays using the victim in the metadata / - found = 0; - for (a = st->arrays; a ; a = a->next) { - dev = get_imsm_dev(super, a->info.container_member); - map = get_imsm_map(dev, 0); - - if (get_imsm_disk_slot(map, victim) >= 0) - found++; - } - - / delete the victim if it is no longer being - * utilized anywhere - / - if (!found) { - struct dl dlp; - - / We know that 'manager' isn't touching anything, - * so it is safe to delete - / - for (dlp = &super->disks; dlp; dlp = &(dlp)->next) - if ((dlp)->index == victim) - break; - - /* victim may be on the missing list / - if (!dlp) - for (dlp = &super->missing; dlp; dlp = &(dlp)->next) - if ((dlp)->index == victim) - break; - imsm_delete(super, dlp, victim); - } + if (apply_update_activate_spare(u, super, st->arrays)) + super->updates_pending++; break; } case update_create_array: { @@ -7229,9 +8525,9 @@ goto create_error; } - new_map = get_imsm_map(&u->dev, 0); - new_start = __le32_to_cpu(new_map->pba_of_lba0); - new_end = new_start + __le32_to_cpu(new_map->blocks_per_member); + new_map = get_imsm_map(&u->dev, MAP_0); + new_start = pba_of_lba0(new_map); + new_end = new_start + blocks_per_member(new_map); inf = get_disk_info(u); / handle activate_spare versus create race: @@ -7240,9 +8536,9 @@ / for (i = 0; i < mpb->num_raid_devs; i++) { dev = get_imsm_dev(super, i); - map = get_imsm_map(dev, 0); - start = __le32_to_cpu(map->pba_of_lba0); - end = start + __le32_to_cpu(map->blocks_per_member); + map = get_imsm_map(dev, MAP_0); + start = pba_of_lba0(map); + end = start + blocks_per_member(map); if ((new_start >= start && new_start <= end) \|\| (start >= new_start && start <= new_end)) / overlap /; @@ -7421,7 +8717,7 @@ if (u->direction == R0_TO_R10) { void tail = (void )&update->space_list; struct imsm_dev dev = get_imsm_dev(super, u->subarray); - struct imsm_map map = get_imsm_map(dev, 0); + struct imsm_map map = get_imsm_map(dev, MAP_0); int num_members = map->num_members; void space; int size, i; @@ -7553,7 +8849,7 @@ struct imsm_map map; dev = get_imsm_dev(super, u->subdev); - map = get_imsm_map(dev, 0); + map = get_imsm_map(dev, MAP_0); current_level = map->raid_level; break; } @@ -7583,11 +8879,14 @@ dprintf("New anchor length is %llu\n", (unsigned long long)len); break; } + case update_size_change: { + break; + } case update_create_array: { struct imsm_update_create_array u = (void ) update->buf; struct intel_dev dv; struct imsm_dev dev = &u->dev; - struct imsm_map map = get_imsm_map(dev, 0); + struct imsm_map map = get_imsm_map(dev, MAP_0); struct dl dl; struct disk_info inf; int i; @@ -7672,20 +8971,20 @@ for (i = 0; i < mpb->num_raid_devs; i++) { dev = get_imsm_dev(super, i); - map = get_imsm_map(dev, 0); + map = get_imsm_map(dev, MAP_0); num_members = map->num_members; for (j = 0; j < num_members; j++) { /* update ord entries being careful not to propagate * ord-flags to the first map / - ord = get_imsm_ord_tbl_ent(dev, j, -1); + ord = get_imsm_ord_tbl_ent(dev, j, MAP_X); if (ord_to_idx(ord) <= index) continue; - map = get_imsm_map(dev, 0); + map = get_imsm_map(dev, MAP_0); set_imsm_ord_tbl_ent(map, j, ord_to_idx(ord - 1)); - map = get_imsm_map(dev, 1); + map = get_imsm_map(dev, MAP_1); if (map) set_imsm_ord_tbl_ent(map, j, ord - 1); } @@ -7701,6 +9000,76 @@ } } #endif / MDASSEMBLE / + +static void close_targets(int targets, int new_disks) +{ + int i; + + if (!targets) + return; + + for (i = 0; i < new_disks; i++) { + if (targets[i] >= 0) { + close(targets[i]); + targets[i] = -1; + } + } +} + +static int imsm_get_allowed_degradation(int level, int raid_disks, + struct intel_super super, + struct imsm_dev dev) +{ + switch (level) { + case 1: + case 10:{ + int ret_val = 0; + struct imsm_map map; + int i; + + ret_val = raid_disks/2; + / check map if all disks pairs not failed + * in both maps + / + map = get_imsm_map(dev, MAP_0); + for (i = 0; i < ret_val; i++) { + int degradation = 0; + if (get_imsm_disk(super, i) == NULL) + degradation++; + if (get_imsm_disk(super, i + 1) == NULL) + degradation++; + if (degradation == 2) + return 0; + } + map = get_imsm_map(dev, MAP_1); + / if there is no second map + * result can be returned + / + if (map == NULL) + return ret_val; + / check degradation in second map + / + for (i = 0; i < ret_val; i++) { + int degradation = 0; + if (get_imsm_disk(super, i) == NULL) + degradation++; + if (get_imsm_disk(super, i + 1) == NULL) + degradation++; + if (degradation == 2) + return 0; + } + return ret_val; + } + case 5: + return 1; + case 6: + return 2; + default: + return 0; + } +} + + /****************************************************************************** * Function: open_backup_targets * Description: Function opens file descriptors for all devices given in @@ -7709,13 +9078,21 @@ * info : general array info * raid_disks : number of disks * raid_fds : table of device's file descriptors + * super : intel super for raid10 degradation check + * dev : intel device for raid10 degradation check * Returns: * 0 : success * -1 : fail *****************************************************************************/ -int open_backup_targets(struct mdinfo info, int raid_disks, int raid_fds) +int open_backup_targets(struct mdinfo info, int raid_disks, int raid_fds, + struct intel_super super, struct imsm_dev dev) { struct mdinfo sd; + int i; + int opened = 0; + + for (i = 0; i < raid_disks; i++) + raid_fds[i] = -1; for (sd = info->devs ; sd ; sd = sd->next) { char dn; @@ -7734,8 +9111,19 @@ raid_fds[sd->disk.raid_disk] = dev_open(dn, O_RDWR); if (raid_fds[sd->disk.raid_disk] < 0) { fprintf(stderr, "cannot open component\n"); - return -1; + continue; } + opened++; + } + / check if maximum array degradation level is not exceeded + / + if ((raid_disks - opened) > + imsm_get_allowed_degradation(info->new_level, + raid_disks, + super, dev)) { + fprintf(stderr, "Not enough disks can be opened.\n"); + close_targets(raid_fds, raid_disks); + return -2; } return 0; } @@ -7762,8 +9150,8 @@ struct mdinfo sd; char nm[30]; int fd; - struct imsm_map map_dest = get_imsm_map(dev, 0); - struct imsm_map map_src = get_imsm_map(dev, 1); + struct imsm_map map_dest = get_imsm_map(dev, MAP_0); + struct imsm_map map_src = get_imsm_map(dev, MAP_1); unsigned long long num_migr_units; unsigned long long array_blocks; @@ -7775,8 +9163,9 @@ migr_rec->dest_depth_per_unit = GEN_MIGR_AREA_SIZE / max(map_dest->blocks_per_strip, map_src->blocks_per_strip); - migr_rec->dest_depth_per_unit = map_dest->blocks_per_strip; - new_data_disks = imsm_num_data_members(dev, 0); + migr_rec->dest_depth_per_unit = + max(map_dest->blocks_per_strip, map_src->blocks_per_strip); + new_data_disks = imsm_num_data_members(dev, MAP_0); migr_rec->blocks_per_unit = __cpu_to_le32(migr_rec->dest_depth_per_unit * new_data_disks); migr_rec->dest_depth_per_unit = @@ -7840,12 +9229,12 @@ unsigned long long target_offsets = NULL; int targets = NULL; int i; - struct imsm_map map_dest = get_imsm_map(dev, 0); + struct imsm_map map_dest = get_imsm_map(dev, MAP_0); int new_disks = map_dest->num_members; int dest_layout = 0; int dest_chunk; unsigned long long start; - int data_disks = imsm_num_data_members(dev, 0); + int data_disks = imsm_num_data_members(dev, MAP_0); targets = malloc(new_disks * sizeof(int)); if (!targets) @@ -7868,7 +9257,8 @@ target_offsets[i] -= start/data_disks; } - if (open_backup_targets(info, new_disks, targets)) + if (open_backup_targets(info, new_disks, targets, + super, dev)) goto abort; dest_layout = imsm_level_to_layout(map_dest->raid_level); @@ -7894,9 +9284,7 @@ abort: if (targets) { - for (i = 0; i < new_disks; i++) - if (targets[i] >= 0) - close(targets[i]); + close_targets(targets, new_disks); free(targets); } free(target_offsets); @@ -7985,7 +9373,6 @@ unsigned long num_migr_units = __le32_to_cpu(migr_rec->num_migr_units); char buffer[20]; int skipped_disks = 0; - int max_degradation; err = sysfs_get_str(info, NULL, "array_state", (char )buffer, 20); if (err < 1) @@ -8007,16 +9394,15 @@ if (id == NULL) return 1; - map_dest = get_imsm_map(id->dev, 0); + map_dest = get_imsm_map(id->dev, MAP_0); new_disks = map_dest->num_members; - max_degradation = new_disks - imsm_num_data_members(id->dev, 0); read_offset = (unsigned long long) __le32_to_cpu(migr_rec->ckpt_area_pba) 512; write_offset = ((unsigned long long) __le32_to_cpu(migr_rec->dest_1st_member_lba) + - __le32_to_cpu(map_dest->pba_of_lba0)) * 512; + pba_of_lba0(map_dest)) * 512; unit_len = __le32_to_cpu(migr_rec->dest_depth_per_unit) * 512; if (posix_memalign((void *)&buf, 512, unit_len) != 0) @@ -8025,7 +9411,11 @@ if (!targets) goto abort; - open_backup_targets(info, new_disks, targets); + if (open_backup_targets(info, new_disks, targets, super, id->dev)) { + fprintf(stderr, + Name ": Cannot open some devices belonging to array.\n"); + goto abort; + } for (i = 0; i < new_disks; i++) { if (targets[i] < 0) { @@ -8036,29 +9426,36 @@ fprintf(stderr, Name ": Cannot seek to block: %s\n", strerror(errno)); - goto abort; + skipped_disks++; + continue; } if ((unsigned)read(targets[i], buf, unit_len) != unit_len) { fprintf(stderr, Name ": Cannot read copy area block: %s\n", strerror(errno)); - goto abort; + skipped_disks++; + continue; } if (lseek64(targets[i], write_offset, SEEK_SET) < 0) { fprintf(stderr, Name ": Cannot seek to block: %s\n", strerror(errno)); - goto abort; + skipped_disks++; + continue; } if ((unsigned)write(targets[i], buf, unit_len) != unit_len) { fprintf(stderr, Name ": Cannot restore block: %s\n", strerror(errno)); - goto abort; + skipped_disks++; + continue; } } - if (skipped_disks > max_degradation) { + if (skipped_disks > imsm_get_allowed_degradation(info->new_level, + new_disks, + super, + id->dev)) { fprintf(stderr, Name ": Cannot restore data from backup." " Too many failed disks\n"); @@ -8134,7 +9531,8 @@ static int imsm_reshape_is_allowed_on_container(struct supertype st, struct geo_params geo, - int old_raid_disks) + int old_raid_disks, + int direction) { / currently we only support increasing the number of devices * for a container. This increases the number of device for each @@ -8158,6 +9556,12 @@ return ret_val; } + if (direction == ROLLBACK_METADATA_CHANGES) { + dprintf("imsm: Metadata changes rollback is not supported for " + "container operation.\n"); + return ret_val; + } + info = container_content_imsm(st, NULL); for (member = info; member; member = member->next) { int result; @@ -8342,6 +9746,43 @@ return 0; } + +/****************************************************************************** + * function: imsm_create_metadata_update_for_size_change() + * Creates update for IMSM array for array size change. + * + *****************************************************************************/ +static int imsm_create_metadata_update_for_size_change( + struct supertype st, + struct geo_params geo, + struct imsm_update_size_change updatep) +{ + struct intel_super super = st->sb; + int update_memory_size = 0; + struct imsm_update_size_change u = NULL; + + dprintf("imsm_create_metadata_update_for_size_change(enter)" + " New size = %llu\n", geo->size); + + / size of all update data without anchor / + update_memory_size = sizeof(struct imsm_update_size_change); + + u = calloc(1, update_memory_size); + if (u == NULL) { + dprintf("error: cannot get memory for " + "imsm_create_metadata_update_for_size_change\n"); + return 0; + } + u->type = update_size_change; + u->subdev = super->current_vol; + u->new_size = geo->size; + + dprintf("imsm: reshape update preparation : OK\n"); + updatep = u; + + return update_memory_size; +} + /****************************************************************************** * function: imsm_create_metadata_update_for_migration() * Creates update for IMSM array. @@ -8382,7 +9823,7 @@ if (dev) { struct imsm_map map; - map = get_imsm_map(dev, 0); + map = get_imsm_map(dev, MAP_0); if (map) { int current_chunk_size = __le16_to_cpu(map->blocks_per_strip) / 2; @@ -8441,16 +9882,26 @@ Function: imsm_analyze_change * Description: Function analyze change for single volume * and validate if transition is supported -* Parameters: Geometry parameters, supertype structure +* Parameters: Geometry parameters, supertype structure, +* metadata change direction (apply/rollback) * Returns: Operation type code on success, -1 if fail ***************************************************************************/ enum imsm_reshape_type imsm_analyze_change(struct supertype st, - struct geo_params geo) + struct geo_params geo, + int direction) { struct mdinfo info; int change = -1; int check_devs = 0; int chunk; + /* number of added/removed disks in operation result / + int devNumChange = 0; + / imsm compatible layout value for array geometry verification / + int imsm_layout = -1; + int data_disks; + struct imsm_dev dev; + struct intel_super super; + long long current_size; getinfo_super_imsm_volume(st, &info, NULL); if ((geo->level != info.array.level) && @@ -8468,23 +9919,23 @@ change = -1; goto analyse_change_exit; } + imsm_layout = geo->layout; check_devs = 1; - } - if (geo->level == 10) { + devNumChange = 1; / parity disk added / + } else if (geo->level == 10) { change = CH_TAKEOVER; check_devs = 1; + devNumChange = 2; / two mirrors added / + imsm_layout = 0x102; / imsm supported layout / } break; case 1: - if (geo->level == 0) { - change = CH_TAKEOVER; - check_devs = 1; - } - break; case 10: if (geo->level == 0) { change = CH_TAKEOVER; check_devs = 1; + devNumChange = -(geo->raid_disks/2); + imsm_layout = 0; / imsm raid0 layout / } break; } @@ -8519,8 +9970,11 @@ change = -1; goto analyse_change_exit; } - } else + } else { geo->layout = info.array.layout; + if (imsm_layout == -1) + imsm_layout = info.array.layout; + } if ((geo->chunksize > 0) && (geo->chunksize != UnSet) && (geo->chunksize != info.array.chunk_size)) @@ -8529,10 +9983,89 @@ geo->chunksize = info.array.chunk_size; chunk = geo->chunksize / 1024; + + super = st->sb; + dev = get_imsm_dev(super, super->current_vol); + data_disks = imsm_num_data_members(dev , MAP_0); + / compute current size per disk member + / + current_size = info.custom_array_size / data_disks; + + if (geo->size > 0) { + / align component size + / + geo->size = imsm_component_size_aligment_check( + get_imsm_raid_level(dev->vol.map), + chunk 1024, + geo->size * 2); + } + + if ((current_size != geo->size) && (geo->size >= 0)) { + if (change != -1) { + fprintf(stderr, + Name " Error. Size change should be the only " + "one at a time.\n"); + change = -1; + goto analyse_change_exit; + } + if ((super->current_vol + 1) != super->anchor->num_raid_devs) { + fprintf(stderr, + Name " Error. The last volume in container " + "can be expanded only (%i/%i).\n", + super->current_vol, st->devnum); + goto analyse_change_exit; + } + if (geo->size == 0) { + /* requested size change to the maximum available size + / + unsigned long long freesize; + int rv; + + rv = imsm_get_free_size(st, dev->vol.map->num_members, + 0, chunk, &freesize); + if (rv == 0) { + fprintf(stderr, Name " Error. Cannot find " + "maximum available space.\n"); + change = -1; + goto analyse_change_exit; + } + geo->size = freesize + current_size; + + / align component size + / + geo->size = imsm_component_size_aligment_check( + get_imsm_raid_level(dev->vol.map), + chunk 1024, + geo->size); + } + + if ((direction == ROLLBACK_METADATA_CHANGES)) { + /* accept size for rollback only + / + } else { + / round size due to metadata compatibility + / + geo->size = (geo->size >> SECT_PER_MB_SHIFT) + << SECT_PER_MB_SHIFT; + dprintf("Prepare update for size change to %llu\n", + geo->size ); + if (current_size >= geo->size) { + fprintf(stderr, + Name " Error. Size expanssion is " + "supported only (current size is %llu, " + "requested size /rounded/ is %llu).\n", + current_size, geo->size); + goto analyse_change_exit; + } + } + geo->size = data_disks; + geo->raid_disks = dev->vol.map->num_members; + change = CH_ARRAY_SIZE; + } if (!validate_geometry_imsm(st, geo->level, - geo->layout, - geo->raid_disks, + imsm_layout, + geo->raid_disks + devNumChange, &chunk, geo->size, 0, 0, 1)) @@ -8553,7 +10086,12 @@ } analyse_change_exit: - + if ((direction == ROLLBACK_METADATA_CHANGES) && + ((change == CH_MIGRATION) \|\| (change == CH_TAKEOVER))) { + dprintf("imsm: Metadata changes rollback is not supported for " + "migration and takeover operations.\n"); + change = -1; + } return change; } @@ -8593,7 +10131,7 @@ static int imsm_reshape_super(struct supertype st, long long size, int level, int layout, int chunksize, int raid_disks, int delta_disks, char backup, char dev, - int verbose) + int direction, int verbose) { int ret_val = 1; struct geo_params geo; @@ -8624,7 +10162,7 @@ int old_raid_disks = 0; if (imsm_reshape_is_allowed_on_container( - st, &geo, &old_raid_disks)) { + st, &geo, &old_raid_disks, direction)) { struct imsm_update_reshape u = NULL; int len; @@ -8661,8 +10199,9 @@ dprintf("imsm: info: Volume operation\n"); /* find requested device / while (dev) { - imsm_find_array_minor_by_subdev(dev->index, st->container_dev, &devnum); - if (devnum == geo.dev_id) + if (imsm_find_array_minor_by_subdev( + dev->index, st->container_dev, &devnum) == 0 + && devnum == geo.dev_id) break; dev = dev->next; } @@ -8672,7 +10211,7 @@ goto exit_imsm_reshape_super; } super->current_vol = dev->index; - change = imsm_analyze_change(st, &geo); + change = imsm_analyze_change(st, &geo, direction); switch (change) { case CH_TAKEOVER: ret_val = imsm_takeover(st, &geo); @@ -8697,6 +10236,26 @@ free(u); } break; + case CH_ARRAY_SIZE: { + struct imsm_update_size_change u = NULL; + int len = + imsm_create_metadata_update_for_size_change( + st, &geo, &u); + if (len < 1) { + dprintf("imsm: " + "Cannot prepare update\n"); + break; + } + ret_val = 0; + /* update metadata locally / + imsm_update_metadata_locally(st, u, len); + / and possibly remotely / + if (st->update_tail) + append_metadata_update(st, u, len); + else + free(u); + } + break; default: ret_val = 1; } @@ -8891,12 +10450,12 @@ goto abort; } - map_src = get_imsm_map(dev, 1); + map_src = get_imsm_map(dev, MAP_1); if (map_src == NULL) goto abort; - ndata = imsm_num_data_members(dev, 0); - odata = imsm_num_data_members(dev, 1); + ndata = imsm_num_data_members(dev, MAP_0); + odata = imsm_num_data_members(dev, MAP_1); chunk = __le16_to_cpu(map_src->blocks_per_strip) 512; old_data_stripe_length = odata * chunk; @@ -8912,6 +10471,18 @@ "are present in copy area.\n"); goto abort; } + /* Save checkpoint to update migration record for current + * reshape position (in md). It can be farther than current + * reshape position in metadata. + / + if (save_checkpoint_imsm(st, sra, UNIT_SRC_NORMAL) == 1) { + / ignore error == 2, this can mean end of reshape here + / + dprintf("imsm: Cannot write checkpoint to " + "migration record (UNIT_SRC_NORMAL, " + "initial save)\n"); + goto abort; + } } / size for data */
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/super-mbr.c ^
@@ -169,13 +169,14 @@ static struct supertype match_metadata_desc(char arg) { - struct supertype st = malloc(sizeof(st)); + struct supertype st; - if (!st) - return st; if (strcmp(arg, "mbr") != 0) return NULL; + st = malloc(sizeof(st)); + if (!st) + return st; st->ss = &mbr; st->info = NULL; st->minor_version = 0;
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/super0.c ^
@@ -114,7 +114,7 @@ c=map_num(pers, sb->level); printf(" Raid Level : %s\n", c?c:"-unknown-"); if ((int)sb->level > 0) { - int ddsks=0; + int ddsks = 0, ddsks_denom = 1; printf(" Used Dev Size : %d%s\n", sb->size, human_size((long long)sb->size<<10)); switch(sb->level) { @@ -122,11 +122,15 @@ case 4: case 5: ddsks = sb->raid_disks-1; break; case 6: ddsks = sb->raid_disks-2; break; - case 10: ddsks = sb->raid_disks / (sb->layout&255) / ((sb->layout>>8)&255); + case 10: ddsks = sb->raid_disks; + ddsks_denom = (sb->layout&255) * ((sb->layout>>8)&255); + } + if (ddsks) { + long long asize = sb->size; + asize = (asize << 10) * ddsks / ddsks_denom; + printf(" Array Size : %llu%s\n", + asize >> 10, human_size(asize)); } - if (ddsks) - printf(" Array Size : %llu%s\n", (unsigned long long)ddsks * sb->size, - human_size(ddsks(long long)sb->size<<10)); } printf(" Raid Devices : %d\n", sb->raid_disks); printf(" Total Devices : %d\n", sb->nr_disks); @@ -360,6 +364,9 @@ info->array.state = sb->state; info->component_size = sb->size2; + if (sb->state & (1<<MD_SB_BITMAP_PRESENT)) + info->bitmap_offset = 8; + info->disk.state = sb->this_disk.state; info->disk.major = sb->this_disk.major; info->disk.minor = sb->this_disk.minor; @@ -387,6 +394,8 @@ } else info->reshape_active = 0; + info->recovery_blocked = info->reshape_active; + sprintf(info->name, "%d", sb->md_minor); /* work_disks is calculated rather than read directly / for (i=0; i < MD_SB_DISKS; i++) @@ -570,6 +579,10 @@ sb->state &= ~(1<<MD_SB_BITMAP_PRESENT); } else if (strcmp(update, "_reshape_progress")==0) sb->reshape_position = info->reshape_progress; + else if (strcmp(update, "writemostly")==0) + sb->state \|= (1<<MD_DISK_WRITEMOSTLY); + else if (strcmp(update, "readwrite")==0) + sb->state &= ~(1<<MD_DISK_WRITEMOSTLY); else rv = -1; @@ -688,6 +701,8 @@ dk->minor = dinfo->minor; dk->raid_disk = dinfo->raid_disk; dk->state = dinfo->state; + / In case our source disk was writemostly, don't copy that bit / + dk->state &= ~(1<<MD_DISK_WRITEMOSTLY); sb->this_disk = sb->disks[dinfo->number]; sb->sb_csum = calc_sb0_csum(sb); @@ -931,10 +946,10 @@ static struct supertype match_metadata_desc0(char arg) { - struct supertype st = malloc(sizeof(st)); - if (!st) return st; + struct supertype st = calloc(1, sizeof(st)); + if (!st) + return st; - memset(st, 0, sizeof(st)); st->container_dev = NoMdDev; st->ss = &super0; st->info = NULL; @@ -1054,13 +1069,11 @@ int rv = 0; int towrite, n; - char abuf[4096+4096]; - char buf = (char)(((long)(abuf+4096))&~4095L); + void buf; if (!get_dev_size(fd, NULL, &dsize)) return 1; - if (dsize < MD_RESERVED_SECTORS512) return -1; @@ -1071,6 +1084,9 @@ if (lseek64(fd, offset + 4096, 0)< 0LL) return 3; + if (posix_memalign(&buf, 4096, 4096)) + return -ENOMEM; + memset(buf, 0xff, 4096); memcpy(buf, ((char)sb)+MD_SB_BYTES, sizeof(bitmap_super_t)); towrite = 601024; @@ -1089,6 +1105,7 @@ if (towrite) rv = -2; + free(buf); return rv; } @@ -1115,6 +1132,13 @@ { unsigned long long ldsize; int fd; + unsigned int tbmax = 4; + + /* prior to linux 3.1, a but limits usable device size to 2TB. + * It was introduced in 2.6.29, but we won't worry about that detail + / + if (get_linux_version() < 3001000) + tbmax = 2; if (level == LEVEL_CONTAINER) { if (verbose) @@ -1127,9 +1151,10 @@ MD_SB_DISKS); return 0; } - if (size > (0x7fffffffULL<<9)) { + if (size >= tbmax 2ULL102410241024) { if (verbose) - fprintf(stderr, Name ": 0.90 metadata supports at most 2 terrabytes per device\n"); + fprintf(stderr, Name ": 0.90 metadata supports at most " + "%d terabytes per device\n", tbmax); return 0; } if (chunk && chunk == UnSet) @@ -1154,8 +1179,6 @@ if (ldsize < MD_RESERVED_SECTORS * 512) return 0; - if (size > (0x7fffffffULL<<9)) - return 0; *freesize = MD_NEW_SIZE_SECTORS(ldsize >> 9); return 1; }
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/super1.c ^
@@ -89,6 +89,13 @@ __u16 dev_roles[0]; /* role in array, or 0xffff for a spare, or 0xfffe for faulty / }; +#define MAX_SB_SIZE 4096 +/ bitmap super size is 256, but we round up to a sector for alignment / +#define BM_SUPER_SIZE 512 +#define MAX_DEVS ((int)(MAX_SB_SIZE - sizeof(struct mdp_superblock_1)) / 2) +#define SUPER1_SIZE (MAX_SB_SIZE + BM_SUPER_SIZE \ + + sizeof(struct misc_dev_info)) + struct misc_dev_info { __u64 device_size; }; @@ -135,35 +142,58 @@ return __cpu_to_le32(csum); } +/ + * Information related to file descriptor used for aligned reads/writes. + * Cache the block size. + / +struct align_fd { + int fd; + int blk_sz; +}; + +static void init_afd(struct align_fd afd, int fd) +{ + afd->fd = fd; + + if (ioctl(afd->fd, BLKSSZGET, &afd->blk_sz) != 0) + afd->blk_sz = 512; +} + static char abuf[4096+4096]; -static int aread(int fd, void buf, int len) +static int aread(struct align_fd afd, void buf, int len) { / aligned read. * On devices with a 4K sector size, we need to read * the full sector and copy relevant bits into * the buffer / - int bsize; + int bsize, iosize; char b; int n; - if (ioctl(fd, BLKSSZGET, &bsize) != 0 \|\| - bsize <= len) - return read(fd, buf, len); - if (bsize > 4096) + + bsize = afd->blk_sz; + + if (!bsize \|\| bsize > 4096 \|\| len > 4096) { + if (!bsize) + fprintf(stderr, "WARNING - aread() called with " + "invalid block size\n"); return -1; - b = (char)(((long)(abuf+4096))&~4095UL); + } + b = ROUND_UP_PTR((char )abuf, 4096); - n = read(fd, b, bsize); + for (iosize = 0; iosize < len; iosize += bsize) + ; + n = read(afd->fd, b, iosize); if (n <= 0) return n; - lseek(fd, len - n, 1); + lseek(afd->fd, len - n, 1); if (n > len) n = len; memcpy(buf, b, n); return n; } -static int awrite(int fd, void buf, int len) +static int awrite(struct align_fd afd, void buf, int len) { / aligned write. * On devices with a 4K sector size, we need to write @@ -171,25 +201,34 @@ * than the write. * The address must be sector-aligned. / - int bsize; + int bsize, iosize; char b; int n; - if (ioctl(fd, BLKSSZGET, &bsize) != 0 \|\| - bsize <= len) - return write(fd, buf, len); - if (bsize > 4096) + + bsize = afd->blk_sz; + if (!bsize \|\| bsize > 4096 \|\| len > 4096) { + if (!bsize) + fprintf(stderr, "WARNING - awrite() called with " + "invalid block size\n"); return -1; - b = (char)(((long)(abuf+4096))&~4095UL); + } + b = ROUND_UP_PTR((char )abuf, 4096); + + for (iosize = 0; iosize < len ; iosize += bsize) + ; + + if (len != iosize) { + n = read(afd->fd, b, iosize); + if (n <= 0) + return n; + lseek(afd->fd, -n, 1); + } - n = read(fd, b, bsize); - if (n <= 0) - return n; - lseek(fd, -n, 1); memcpy(b, buf, len); - n = write(fd, b, bsize); + n = write(afd->fd, b, iosize); if (n <= 0) return n; - lseek(fd, len - n, 1); + lseek(afd->fd, len - n, 1); return len; } @@ -239,7 +278,7 @@ (unsigned long long)__le64_to_cpu(sb->data_size), human_size(__le64_to_cpu(sb->data_size)<<9)); if (__le32_to_cpu(sb->level) > 0) { - int ddsks=0; + int ddsks = 0, ddsks_denom = 1; switch(__le32_to_cpu(sb->level)) { case 1: ddsks=1;break; case 4: @@ -247,13 +286,15 @@ case 6: ddsks = __le32_to_cpu(sb->raid_disks)-2; break; case 10: layout = __le32_to_cpu(sb->layout); - ddsks = __le32_to_cpu(sb->raid_disks) - / (layout&255) / ((layout>>8)&255); + ddsks = __le32_to_cpu(sb->raid_disks); + ddsks_denom = (layout&255) * ((layout>>8)&255); } - if (ddsks) + if (ddsks) { + long long asize = __le64_to_cpu(sb->size); + asize = (asize << 9) * ddsks / ddsks_denom; printf(" Array Size : %llu%s\n", - ddsks(unsigned long long)__le64_to_cpu(sb->size), - human_size(ddsks__le64_to_cpu(sb->size)<<9)); + asize >> 10, human_size(asize)); + } if (sb->size != sb->data_size) printf(" Used Dev Size : %llu%s\n", (unsigned long long)__le64_to_cpu(sb->size), @@ -313,7 +354,7 @@ printf("\n"); } if (sb->devflags) { - printf(" Flags :"); + printf(" Flags :"); if (sb->devflags & WriteMostly1) printf(" write-mostly"); printf("\n"); @@ -573,17 +614,19 @@ info->array.utime = __le64_to_cpu(sb->utime); info->array.chunk_size = __le32_to_cpu(sb->chunksize)512; info->array.state = - (__le64_to_cpu(sb->resync_offset) >= __le64_to_cpu(sb->size)) + (__le64_to_cpu(sb->resync_offset) == MaxSector) ? 1 : 0; info->data_offset = __le64_to_cpu(sb->data_offset); info->component_size = __le64_to_cpu(sb->size); + if (sb->feature_map & __le32_to_cpu(MD_FEATURE_BITMAP_OFFSET)) + info->bitmap_offset = (int32_t)__le32_to_cpu(sb->bitmap_offset); info->disk.major = 0; info->disk.minor = 0; info->disk.number = __le32_to_cpu(sb->dev_number); if (__le32_to_cpu(sb->dev_number) >= __le32_to_cpu(sb->max_dev) \|\| - __le32_to_cpu(sb->max_dev) > 512) + __le32_to_cpu(sb->dev_number) >= MAX_DEVS) role = 0xfffe; else role = __le16_to_cpu(sb->dev_roles[__le32_to_cpu(sb->dev_number)]); @@ -600,6 +643,8 @@ info->disk.state = 6; / active and in sync / info->disk.raid_disk = role; } + if (sb->devflags & WriteMostly1) + info->disk.state \|= (1 << MD_DISK_WRITEMOSTLY); info->events = __le64_to_cpu(sb->events); sprintf(info->text_version, "1.%d", st->minor_version); info->safe_mode_delay = 200; @@ -626,6 +671,8 @@ } else info->reshape_active = 0; + info->recovery_blocked = info->reshape_active; + if (map) for (i=0; i<map_disks; i++) map[i] = 0; @@ -762,7 +809,7 @@ if (__le32_to_cpu(sb->feature_map)&MD_FEATURE_BITMAP_OFFSET) { struct bitmap_super_s bm; - bm = (struct bitmap_super_s)(st->sb+1024); + bm = (struct bitmap_super_s)(st->sb+MAX_SB_SIZE); memcpy(bm->uuid, sb->set_uuid, 16); } } else if (strcmp(update, "no-bitmap") == 0) { @@ -794,7 +841,7 @@ __le64_to_cpu(sb->data_offset)) { /* set data_size to device size less data_offset / struct misc_dev_info misc = (struct misc_dev_info) - (st->sb + 1024 + 512); + (st->sb + MAX_SB_SIZE + BM_SUPER_SIZE); printf("Size was %llu\n", (unsigned long long) __le64_to_cpu(sb->data_size)); sb->data_size = __cpu_to_le64( @@ -803,6 +850,10 @@ __le64_to_cpu(sb->data_size)); } else if (strcmp(update, "_reshape_progress")==0) sb->reshape_position = __cpu_to_le64(info->reshape_progress); + else if (strcmp(update, "writemostly")==0) + sb->devflags \|= WriteMostly1; + else if (strcmp(update, "readwrite")==0) + sb->devflags &= ~WriteMostly1; else rv = -1; @@ -817,14 +868,14 @@ int spares; int rfd; char defname[10]; + int sbsize; - if (posix_memalign((void)&sb, 512, (1024 + 512 + - sizeof(struct misc_dev_info))) != 0) { + if (posix_memalign((void)&sb, 4096, SUPER1_SIZE) != 0) { fprintf(stderr, Name ": %s could not allocate superblock\n", __func__); return 0; } - memset(sb, 0, 1024); + memset(sb, 0, SUPER1_SIZE); st->sb = sb; if (info == NULL) { @@ -833,9 +884,9 @@ } spares = info->working_disks - info->active_disks; - if (info->raid_disks + spares > 384) { + if (info->raid_disks + spares > MAX_DEVS) { fprintf(stderr, Name ": too many devices requested: %d+%d > %d\n", - info->raid_disks , spares, 384); + info->raid_disks , spares, MAX_DEVS); return 0; } @@ -859,7 +910,6 @@ sprintf(defname, "%d", info->md_minor); name = defname; } - memset(sb->set_name, 0, 32); if (homehost && strchr(name, ':')== NULL && strlen(homehost)+1+strlen(name) < 32) { @@ -887,11 +937,11 @@ sb->resync_offset = MaxSector; else sb->resync_offset = 0; - sb->max_dev = __cpu_to_le32((1024- sizeof(struct mdp_superblock_1))/ - sizeof(sb->dev_roles[0])); - memset(sb->pad3, 0, sizeof(sb->pad3)); + sbsize = sizeof(struct mdp_superblock_1) + 2 (info->raid_disks + spares); + sbsize = ROUND_UP(sbsize, 512); + sb->max_dev = __cpu_to_le32((sbsize - sizeof(struct mdp_superblock_1)) / 2); - memset(sb->dev_roles, 0xff, 1024 - sizeof(struct mdp_superblock_1)); + memset(sb->dev_roles, 0xff, MAX_SB_SIZE - sizeof(struct mdp_superblock_1)); return 1; } @@ -919,10 +969,11 @@ rp = 0xfffe; if (dk->number >= (int)__le32_to_cpu(sb->max_dev) && - __le32_to_cpu(sb->max_dev) < 384) + __le32_to_cpu(sb->max_dev) < MAX_DEVS) sb->max_dev = __cpu_to_le32(dk->number+1); sb->dev_number = __cpu_to_le32(dk->number); + sb->devflags = 0; / don't copy another disks flags / sb->sb_csum = calc_sb_1_csum(sb); dip = (struct devinfo )&st->info; @@ -945,6 +996,7 @@ { struct mdp_superblock_1 sb = st->sb; unsigned long long sb_offset; + struct align_fd afd; int sbsize; unsigned long long dsize; @@ -956,6 +1008,8 @@ if (dsize < 24) return 2; + init_afd(&afd, fd); + /* * Calculate the position of the superblock. * It is always aligned to a 4K boundary and @@ -992,20 +1046,18 @@ if (lseek64(fd, sb_offset << 9, 0)< 0LL) return 3; - sbsize = sizeof(sb) + 2 __le32_to_cpu(sb->max_dev); - sbsize = (sbsize+511)&(~511UL); + sbsize = ROUND_UP(sizeof(sb) + 2 __le32_to_cpu(sb->max_dev), 512); - if (awrite(fd, sb, sbsize) != sbsize) + if (awrite(&afd, sb, sbsize) != sbsize) return 4; if (sb->feature_map & __cpu_to_le32(MD_FEATURE_BITMAP_OFFSET)) { struct bitmap_super_s bm = (struct bitmap_super_s) - (((char)sb)+1024); + (((char)sb)+MAX_SB_SIZE); if (__le32_to_cpu(bm->magic) == BITMAP_MAGIC) { locate_bitmap1(st, fd); - if (awrite(fd, bm, sizeof(bm)) != - sizeof(bm)) - return 5; + if (awrite(&afd, bm, sizeof(bm)) != sizeof(bm)) + return 5; } } fsync(fd); @@ -1042,7 +1094,7 @@ unsigned long long reserved; struct devinfo di; unsigned long long dsize, array_size; - unsigned long long sb_offset; + unsigned long long sb_offset, headroom; for (di = st->info; di && ! rv ; di = di->next) { if (di->disk.state == 1) @@ -1055,7 +1107,9 @@ sb->dev_number = __cpu_to_le32(di->disk.number); if (di->disk.state & (1<<MD_DISK_WRITEMOSTLY)) - sb->devflags \|= __cpu_to_le32(WriteMostly1); + sb->devflags \|= WriteMostly1; + else + sb->devflags &= ~WriteMostly1; if ((rfd = open("/dev/urandom", O_RDONLY)) < 0 \|\| read(rfd, sb->device_uuid, 16) != 16) { @@ -1086,13 +1140,16 @@ } free(refst); - if (!get_dev_size(di->fd, NULL, &dsize)) - return 1; + if (!get_dev_size(di->fd, NULL, &dsize)) { + rv = 1; + goto error_out; + } dsize >>= 9; if (dsize < 24) { close(di->fd); - return 2; + rv = 2; + goto error_out; } @@ -1110,6 +1167,14 @@ / work out how much space we left for a bitmap / bm_space = choose_bm_space(array_size); + / We try to leave 0.1% at the start for reshape + * operations, but limit this to 128Meg (0.1% of 10Gig) + * which is plenty for efficient reshapes + / + headroom = 128 1024 * 2; + while (headroom << 10 > array_size) + headroom >>= 1; + switch(st->minor_version) { case 0: sb_offset = dsize; @@ -1132,6 +1197,9 @@ /* force 4K alignment / reserved &= ~7ULL; + if (reserved < headroom) + reserved = headroom; + sb->data_offset = __cpu_to_le64(reserved); sb->data_size = __cpu_to_le64(dsize - reserved); break; @@ -1152,26 +1220,33 @@ / force 4K alignment / reserved &= ~7ULL; + if (reserved < headroom) + reserved = headroom; + sb->data_offset = __cpu_to_le64(reserved); sb->data_size = __cpu_to_le64(dsize - reserved); break; default: - return -EINVAL; + fprintf(stderr, Name ": Failed to write invalid " + "metadata format 1.%i to %s\n", + st->minor_version, di->devname); + rv = -EINVAL; + goto out; } sb->sb_csum = calc_sb_1_csum(sb); rv = store_super1(st, di->fd); - if (rv) - fprintf(stderr, - Name ": failed to write superblock to %s\n", - di->devname); - if (rv == 0 && (__le32_to_cpu(sb->feature_map) & 1)) rv = st->ss->write_bitmap(st, di->fd); close(di->fd); di->fd = -1; } +error_out: + if (rv) + fprintf(stderr, Name ": Failed to write metadata to %s\n", + di->devname); +out: return rv; } #endif @@ -1194,15 +1269,12 @@ return 1; if (!first) { - if (posix_memalign((void)&first, 512, - 1024 + 512 + - sizeof(struct misc_dev_info)) != 0) { + if (posix_memalign((void)&first, 4096, SUPER1_SIZE) != 0) { fprintf(stderr, Name ": %s could not allocate superblock\n", __func__); return 1; } - memcpy(first, second, 1024 + 512 + - sizeof(struct misc_dev_info)); + memcpy(first, second, SUPER1_SIZE); st->sb = first; return 0; } @@ -1227,9 +1299,12 @@ int uuid[4]; struct bitmap_super_s bsb; struct misc_dev_info misc; + struct align_fd afd; free_super1(st); + init_afd(&afd, fd); + if (st->ss == NULL \|\| st->minor_version == -1) { int bestvers = -1; struct supertype tst; @@ -1256,7 +1331,7 @@ int rv; tst.minor_version = bestvers; tst.ss = &super1; - tst.max_devs = 384; + tst.max_devs = MAX_DEVS; rv = load_super1(&tst, fd, devname); if (rv == 0) st = tst; @@ -1309,15 +1384,13 @@ return 1; } - if (posix_memalign((void)&super, 512, - 1024 + 512 + - sizeof(struct misc_dev_info)) != 0) { + if (posix_memalign((void)&super, 4096, SUPER1_SIZE) != 0) { fprintf(stderr, Name ": %s could not allocate superblock\n", __func__); return 1; } - if (aread(fd, super, 1024) != 1024) { + if (aread(&afd, super, MAX_SB_SIZE) != MAX_SB_SIZE) { if (devname) fprintf(stderr, Name ": Cannot read superblock on %s\n", devname); @@ -1349,9 +1422,9 @@ } st->sb = super; - bsb = (struct bitmap_super_s )(((char)super)+1024); + bsb = (struct bitmap_super_s )(((char)super)+MAX_SB_SIZE); - misc = (struct misc_dev_info) (((char)super)+1024+512); + misc = (struct misc_dev_info) (((char)super)+MAX_SB_SIZE+BM_SUPER_SIZE); misc->device_size = dsize; /* Now check on the bitmap superblock / @@ -1362,8 +1435,7 @@ should get that written out. / locate_bitmap1(st, fd); - if (aread(fd, ((char)super)+1024, 512) - != 512) + if (aread(&afd, bsb, 512) != 512) goto no_bitmap; uuid_from_super1(st, uuid); @@ -1373,20 +1445,21 @@ return 0; no_bitmap: - super->feature_map = __cpu_to_le32(__le32_to_cpu(super->feature_map) & ~1); + super->feature_map = __cpu_to_le32(__le32_to_cpu(super->feature_map) + & ~MD_FEATURE_BITMAP_OFFSET); return 0; } static struct supertype match_metadata_desc1(char arg) { - struct supertype st = malloc(sizeof(st)); - if (!st) return st; + struct supertype st = calloc(1, sizeof(st)); + if (!st) + return st; - memset(st, 0, sizeof(st)); st->container_dev = NoMdDev; st->ss = &super1; - st->max_devs = 384; + st->max_devs = MAX_DEVS; st->sb = NULL; / leading zeros can be safely ignored. --detail generates them. / while (arg == '0') @@ -1437,7 +1510,7 @@ else if (__le32_to_cpu(super->feature_map)&MD_FEATURE_BITMAP_OFFSET) { /* hot-add. allow for actual size of bitmap / struct bitmap_super_s bsb; - bsb = (struct bitmap_super_s )(((char)super)+1024); + bsb = (struct bitmap_super_s )(((char)super)+MAX_SB_SIZE); devsize -= bitmap_sectors(bsb); } #endif @@ -1447,12 +1520,13 @@ st->minor_version = 2; if (super == NULL && st->minor_version > 0) { /* haven't committed to a size yet, so allow some - * slack for alignment of data_offset. - * We haven't access to device details so allow - * 1 Meg if bigger than 1Gig + * slack for space for reshape. + * Limit slack to 128M, but aim for about 0.1% / - if (devsize > 102410242) - devsize -= 10242; + unsigned long long headroom = 12810242; + while ((headroom << 10) > devsize) + headroom >>= 1; + devsize -= headroom; } switch(st->minor_version) { case 0: @@ -1475,12 +1549,10 @@ int may_change, int major) { /* - * If not may_change, then this is a 'Grow', and the bitmap - * must fit after the superblock. - * If may_change, then this is create, and we can put the bitmap - * before the superblock if we like, or may move the start. - * If !may_change, the bitmap MUST live at offset of 1K, until - * we get a sysfs interface. + * If not may_change, then this is a 'Grow' without sysfs support for + * bitmaps, and the bitmap must fit after the superblock at 1K offset. + * If may_change, then this is create or a Grow with sysfs syupport, + * and we can put the bitmap wherever we like. * * size is in sectors, chunk is in bytes !!! / @@ -1491,16 +1563,20 @@ long offset; unsigned long long chunk = chunkp; int room = 0; + int creating = 0; struct mdp_superblock_1 sb = st->sb; - bitmap_super_t bms = (bitmap_super_t)(((char)sb) + 1024); + bitmap_super_t bms = (bitmap_super_t)(((char)sb) + MAX_SB_SIZE); int uuid[4]; + if (__le64_to_cpu(sb->data_size) == 0) + / Must be creating the array, else data_size would be non-zero / + creating = 1; switch(st->minor_version) { case 0: / either 3K after the superblock (when hot-add), * or some amount of space before. / - if (may_change) { + if (creating) { / We are creating array, so we know how much room has * been left. / @@ -1510,8 +1586,8 @@ room = __le64_to_cpu(sb->super_offset) - __le64_to_cpu(sb->data_offset) - __le64_to_cpu(sb->data_size); - / remove '1 \|\|' when we can set offset via sysfs / - if (1 \|\| (room < 32 && + + if (!may_change \|\| (room < 32 && __le32_to_cpu(sb->max_dev) <= 384)) { room = 32; offset = 12; @@ -1522,17 +1598,17 @@ break; case 1: case 2: / between superblock and data / - if (may_change) { + if (creating) { offset = 42; room = choose_bm_space(__le64_to_cpu(sb->size)); } else { room = __le64_to_cpu(sb->data_offset) - __le64_to_cpu(sb->super_offset); - if (1 \|\| __le32_to_cpu(sb->max_dev) <= 384) { - room -= 2; + if (!may_change) { + room -= 2; /* Leave 1K for superblock / offset = 2; } else { - room -= 42; + room -= 42; / leave 4K for superblock / offset = 42; } } @@ -1575,9 +1651,10 @@ offset = -room; } - sb->bitmap_offset = __cpu_to_le32(offset); + sb->bitmap_offset = (int32_t)__cpu_to_le32(offset); - sb->feature_map = __cpu_to_le32(__le32_to_cpu(sb->feature_map) \| 1); + sb->feature_map = __cpu_to_le32(__le32_to_cpu(sb->feature_map) + \| MD_FEATURE_BITMAP_OFFSET); memset(bms, 0, sizeof(bms)); bms->magic = __cpu_to_le32(BITMAP_MAGIC); bms->version = __cpu_to_le32(major); @@ -1592,7 +1669,6 @@ return 1; } - static void locate_bitmap1(struct supertype st, int fd) { unsigned long long offset; @@ -1616,16 +1692,21 @@ static int write_bitmap1(struct supertype st, int fd) { struct mdp_superblock_1 sb = st->sb; - bitmap_super_t bms = (bitmap_super_t)(((char)sb)+1024); + bitmap_super_t bms = (bitmap_super_t)(((char)sb)+MAX_SB_SIZE); int rv = 0; - + void buf; int towrite, n; - char buf = (char)(((long)(abuf+4096))&~4095UL); + struct align_fd afd; + + init_afd(&afd, fd); locate_bitmap1(st, fd); + if (posix_memalign(&buf, 4096, 4096)) + return -ENOMEM; + memset(buf, 0xff, 4096); - memcpy(buf, ((char)sb)+1024, sizeof(bitmap_super_t)); + memcpy(buf, (char )bms, sizeof(bitmap_super_t)); towrite = __le64_to_cpu(bms->sync_size) / (__le32_to_cpu(bms->chunksize)>>9); towrite = (towrite+7) >> 3; / bits to bytes */ @@ -1635,7 +1716,7 @@ n = towrite; if (n > 4096) n = 4096; - n = write(fd, buf, n); + n = awrite(&afd, buf, n); if (n > 0) towrite -= n; else @@ -1646,6 +1727,7 @@ if (towrite) rv = -2; + free(buf); return rv; }
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/sysfs.c ^
@@ -217,6 +217,19 @@ msec = (msec * 1000) / scale; sra->safe_mode_delay = msec; } + if (options & GET_BITMAP_LOCATION) { + strcpy(base, "bitmap/location"); + if (load_sys(fname, buf)) + goto abort; + if (strncmp(buf, "file", 4) == 0) + sra->bitmap_offset = 1; + else if (strncmp(buf, "none", 4) == 0) + sra->bitmap_offset = 0; + else if (buf[0] == '+') + sra->bitmap_offset = strtol(buf+1, NULL, 10); + else + goto abort; + } if (! (options & GET_DEVS)) return sra; @@ -379,7 +392,7 @@ return 0; n = read(fd, fname, sizeof(fname)); close(fd); - if (n == sizeof(fname)) + if (n < 0 \|\| n == sizeof(fname)) return 0; fname[n] = 0; return strtoull(fname, NULL, 10) * 2; @@ -415,6 +428,14 @@ return sysfs_set_str(sra, dev, name, valstr); } +int sysfs_set_num_signed(struct mdinfo sra, struct mdinfo dev, + char name, long long val) +{ + char valstr[50]; + sprintf(valstr, "%lli", val); + return sysfs_set_str(sra, dev, name, valstr); +} + int sysfs_uevent(struct mdinfo sra, char event) { char fname[50]; @@ -470,7 +491,7 @@ lseek(fd, 0, 0); n = read(fd, buf, sizeof(buf)); if (n <= 0) - return -1; + return -2; buf[n] = 0; val = strtoull(buf, &ep, 0); if (ep == buf \|\| (ep != 0 && ep != '\n' && ep != ' ')) @@ -541,8 +562,21 @@ ver[0] = 0; if (info->array.major_version == -1 && info->array.minor_version == -2) { + char buf[1024]; + strcat(strcpy(ver, "external:"), info->text_version); + / meta version might already be set if we are setting + * new geometry for a reshape. In that case we don't + * want to over-write the 'readonly' flag that is + * stored in the metadata version. So read the current + * version first, and preserve the flag + / + if (sysfs_get_str(info, NULL, "metadata_version", + buf, 1024) > 0) + if (strlen(buf) >= 9 && buf[9] == '-') + ver[9] = '-'; + if ((vers % 100) < 2 \|\| sysfs_set_str(info, NULL, "metadata_version", ver) < 0) { @@ -606,7 +640,7 @@ memset(nm, 0, sizeof(nm)); sprintf(dv, "/sys/dev/block/%d:%d", sd->disk.major, sd->disk.minor); - rv = readlink(dv, nm, sizeof(nm)); + rv = readlink(dv, nm, sizeof(nm)-1); if (rv <= 0) return -1; nm[rv] = '\0'; @@ -709,9 +743,9 @@ / from an open block device, try to retrieve it scsi_id / struct stat st; char path[256]; - char c1, c2; DIR dir; struct dirent de; + int host, bus, target, lun; if (fstat(fd, &st)) return 1; @@ -723,32 +757,22 @@ if (!dir) return 1; - de = readdir(dir); - while (de) { - if (strchr(de->d_name, ':')) + for (de = readdir(dir); de; de = readdir(dir)) { + int count; + + if (de->d_type != DT_DIR) + continue; + + count = sscanf(de->d_name, "%d:%d:%d:%d", &host, &bus, &target, &lun); + if (count == 4) break; - de = readdir(dir); } closedir(dir); if (!de) return 1; - c1 = de->d_name; - c2 = strchr(c1, ':'); - c2 = '\0'; - id = strtol(c1, NULL, 10) << 24; / host / - c1 = c2 + 1; - c2 = strchr(c1, ':'); - c2 = '\0'; - id \|= strtol(c1, NULL, 10) << 16; / bus / - c1 = c2 + 1; - c2 = strchr(c1, ':'); - c2 = '\0'; - id \|= strtol(c1, NULL, 10) << 8; / target / - c1 = c2 + 1; - id \|= strtol(c1, NULL, 10); /* lun / - + id = (host << 24) \| (bus << 16) \| (target << 8) \| (lun << 0); return 0; } @@ -793,6 +817,8 @@ } n = read(fd, buf, sizeof(buf)-1); close(fd); + if (n < 0) + continue; buf[n] = 0; if (sscanf(buf, "%d:%d%c", &mj, &mn, &c) != 3 \|\| c != '\n') { @@ -819,7 +845,6 @@ { /* Try to freeze resync/rebuild on this array/container. * Return -1 if the array is busy, - * return -2 container cannot be frozen, * return 0 if this kernel doesn't support 'frozen' * return 1 if it worked. / @@ -829,8 +854,10 @@ return 1; / no sync_action == frozen / if (sysfs_get_str(sra, NULL, "sync_action", buf, 20) <= 0) return 0; - if (strcmp(buf, "idle\n") != 0 && - strcmp(buf, "frozen\n") != 0) + if (strcmp(buf, "frozen\n") == 0) + / Already frozen */ + return 0; + if (strcmp(buf, "idle\n") != 0) return -1; if (sysfs_set_str(sra, NULL, "sync_action", "frozen") < 0) return 0;
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/test ^
@@ -190,9 +190,10 @@ rasize=$[rasizeDEV_ROUND_K2] fi if [ `/sbin/blockdev --getsize $dev` -eq 0 ]; then sleep 2 ; fi - if [ $rasize -ne `/sbin/blockdev --getsize $dev` ] + _sz=`/sbin/blockdev --getsize $dev` + if [ $rasize -lt $_sz -o $[rasize4/5] -gt $_sz ] then - echo "ERROR: size is wrong for $dev: $cnt $dvsize (chunk=$chunk) = $rasize, not `/sbin/blockdev --getsize $dev`" + echo "ERROR: size is wrong for $dev: $cnt * $dvsize (chunk=$chunk) = $rasize, not $_sz" exit 1 fi }
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/tests/00raid1 ^
@@ -6,14 +6,14 @@ mdadm -CR $md0 -l 1 -n2 $dev0 $dev1 check resync check raid1 -testdev $md0 1 $mdsize1a 1 +testdev $md0 1 $mdsize1a 64 mdadm -S $md0 # now with version-0.90 superblock, spare mdadm -CR $md0 -e0.90 --level=raid1 -n3 -x2 $dev0 missing missing $dev1 $dev2 check recovery check raid1 -testdev $md0 1 $mdsize0 1 +testdev $md0 1 $mdsize0 64 mdadm -S $md0 # now with no superblock
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/tests/03r5assemV1 ^
@@ -58,6 +58,9 @@ eval $tst ### Now with a missing device +# We don't want the recovery to complete while we are +# messing about here. +echo 1000 > /proc/sys/dev/raid/speed_limit_max mdadm -AR $md1 $dev0 $dev2 $dev3 $dev4 # check state U_U @@ -120,3 +123,4 @@ mdadm -I -c $conf $dev1 mdadm -I -c $conf $dev2 eval $tst +echo 2000 > /proc/sys/dev/raid/speed_limit_max
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/tests/05r1-bitmapfile ^
@@ -6,11 +6,11 @@ rm -f $bmf mdadm --create --run $md0 --level=1 -n2 --delay=1 --bitmap $bmf $dev1 $dev2 check wait -testdev $md0 1 $mdsize1a 1 +testdev $md0 1 $mdsize1a 64 mdadm -S $md0 mdadm --assemble $md0 --bitmap=$bmf $dev1 $dev2 -testdev $md0 1 $mdsize1a 1 +testdev $md0 1 $mdsize1a 64 dirty1=`mdadm -X $bmf \| sed -n -e 's/.Bitmap. $[0-9]$ dirty./\1/p'` sleep 4 dirty2=`mdadm -X $bmf \| sed -n -e 's/.Bitmap. $[0-9]$ dirty./\1/p'` @@ -20,7 +20,7 @@ exit 1 fi mdadm $md0 -f $dev1 -testdev $md0 1 $mdsize1a 1 +testdev $md0 1 $mdsize1a 64 sleep 4 dirty3=`mdadm -X $bmf \| sed -n -e 's/.Bitmap. $[0-9]$ dirty./\1/p'` if [ $dirty3 -lt 400 ]
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/tests/05r1-grow-external ^
@@ -4,7 +4,7 @@ # mdadm --create --run $md0 -l 1 -n 2 $dev1 $dev2 check wait -testdev $md0 1 $mdsize1a 1 +testdev $md0 1 $mdsize1a 64 bmf=$targetdir/bm rm -f $bmf @@ -14,7 +14,7 @@ sleep 4 dirty2=`mdadm -X $bmf \| sed -n -e 's/.Bitmap. $[0-9]$ dirty./\1/p'` -testdev $md0 1 $mdsize1a 1 +testdev $md0 1 $mdsize1a 64 dirty3=`mdadm -X $bmf \| sed -n -e 's/.Bitmap. $[0-9]$ dirty./\1/p'` sleep 4 dirty4=`mdadm -X $bmf \| sed -n -e 's/.Bitmap. $[0-9]$ dirty./\1/p'`
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/tests/05r1-grow-internal ^
@@ -4,7 +4,7 @@ # mdadm --create --run $md0 -l 1 -n 2 $dev1 $dev2 check wait -testdev $md0 1 $mdsize1a 1 +testdev $md0 1 $mdsize1a 64 #mdadm -E $dev1 mdadm --grow $md0 --bitmap=internal --bitmap-chunk=4 --delay=1 \|\| { mdadm -X $dev2 ; exit 1; } @@ -12,7 +12,7 @@ sleep 4 dirty2=`mdadm -X $dev2 \| sed -n -e 's/.Bitmap. $[0-9]$ dirty./\1/p'` -testdev $md0 1 $mdsize1a 1 +testdev $md0 1 $mdsize1a 64 dirty3=`mdadm -X $dev2 \| sed -n -e 's/.Bitmap. $[0-9]$ dirty./\1/p'` sleep 4 dirty4=`mdadm -X $dev2 \| sed -n -e 's/.Bitmap. $[0-9]$ dirty./\1/p'`
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/tests/05r1-grow-internal-1 ^
@@ -4,7 +4,7 @@ # mdadm --create --run $md0 -e1 -l 1 -n 2 $dev1 $dev2 check wait -testdev $md0 1 $mdsize1b 1 +testdev $md0 1 $mdsize1b 64 #mdadm -E $dev1 mdadm --grow $md0 --bitmap=internal --bitmap-chunk=4 --delay=1 @@ -12,7 +12,7 @@ sleep 4 dirty2=`mdadm -X $dev2 \| sed -n -e 's/.Bitmap. $[0-9]$ dirty./\1/p'` -testdev $md0 1 $mdsize1b 1 +testdev $md0 1 $mdsize1b 64 dirty3=`mdadm -X $dev2 \| sed -n -e 's/.Bitmap. $[0-9]$ dirty./\1/p'` sleep 4 dirty4=`mdadm -X $dev2 \| sed -n -e 's/.Bitmap. $[0-9]$ dirty./\1/p'`
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/tests/05r1-internalbitmap ^
@@ -4,11 +4,11 @@ # mdadm --create -e0.90 --run $md0 --level=1 -n2 --delay=1 --bitmap internal --bitmap-chunk=4 $dev1 $dev2 check wait -testdev $md0 1 $mdsize0 1 +testdev $md0 1 $mdsize0 64 mdadm -S $md0 mdadm --assemble $md0 $dev1 $dev2 -testdev $md0 1 $mdsize0 1 +testdev $md0 1 $mdsize0 64 dirty1=`mdadm -X $dev2 \| sed -n -e 's/.Bitmap. $[0-9]$ dirty./\1/p'` sleep 4 dirty2=`mdadm -X $dev2 \| sed -n -e 's/.Bitmap. $[0-9]$ dirty./\1/p'` @@ -18,7 +18,7 @@ exit 1 fi mdadm $md0 -f $dev1 -testdev $md0 1 $mdsize0 1 +testdev $md0 1 $mdsize0 64 sleep 4 dirty3=`mdadm -X $dev2 \| sed -n -e 's/.Bitmap. $[0-9]$ dirty./\1/p'` if [ $dirty3 -lt 400 ]
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/tests/05r1-internalbitmap-v1a ^
@@ -5,11 +5,11 @@ mdadm --create --run $md0 --metadata=1.0 --level=1 -n2 --delay=1 --bitmap internal --bitmap-chunk=4 $dev1 $dev2 check wait check bitmap -testdev $md0 1 $mdsize1b 1 +testdev $md0 1 $mdsize1b 64 mdadm -S $md0 mdadm --assemble $md0 $dev1 $dev2 -testdev $md0 1 $mdsize1b 1 +testdev $md0 1 $mdsize1b 64 dirty1=`mdadm -X $dev2 \| sed -n -e 's/.Bitmap. $[0-9]$ dirty./\1/p'` sleep 4 dirty2=`mdadm -X $dev2 \| sed -n -e 's/.Bitmap. $[0-9]$ dirty./\1/p'` @@ -19,7 +19,7 @@ exit 1 fi mdadm $md0 -f $dev1 -testdev $md0 1 $mdsize1b 1 +testdev $md0 1 $mdsize1b 64 sleep 4 dirty3=`mdadm -X $dev2 \| sed -n -e 's/.Bitmap. $[0-9]$ dirty./\1/p'` if [ $dirty3 -lt 400 ]
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/tests/05r1-internalbitmap-v1b ^
@@ -5,12 +5,12 @@ mdadm --create --run $md0 --metadata=1.1 --level=1 -n2 --delay=1 --bitmap internal --bitmap-chunk=4 $dev1 $dev2 check wait check bitmap -testdev $md0 1 $mdsize11 1 +testdev $md0 1 $mdsize11 64 mdadm -S $md0 mdadm --assemble $md0 $dev1 $dev2 check bitmap -testdev $md0 1 $mdsize11 1 +testdev $md0 1 $mdsize11 64 dirty1=`mdadm -X $dev2 \| sed -n -e 's/.Bitmap. $[0-9]$ dirty./\1/p'` sleep 4 dirty2=`mdadm -X $dev2 \| sed -n -e 's/.Bitmap. $[0-9]$ dirty./\1/p'` @@ -20,7 +20,7 @@ exit 1 fi mdadm $md0 -f $dev1 -testdev $md0 1 $mdsize11 1 +testdev $md0 1 $mdsize11 64 sleep 4 dirty3=`mdadm -X $dev2 \| sed -n -e 's/.Bitmap. $[0-9]$ dirty./\1/p'` if [ $dirty3 -lt 400 ]
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/tests/05r1-internalbitmap-v1c ^
@@ -5,11 +5,11 @@ mdadm --create --run $md0 --metadata=1.2 --level=1 -n2 --delay=1 --bitmap internal --bitmap-chunk 4 $dev1 $dev2 check wait check bitmap -testdev $md0 1 $mdsize12 1 +testdev $md0 1 $mdsize12 64 mdadm -S $md0 mdadm --assemble $md0 $dev1 $dev2 -testdev $md0 1 $mdsize12 1 +testdev $md0 1 $mdsize12 64 dirty1=`mdadm -X $dev2 \| sed -n -e 's/.Bitmap. $[0-9]$ dirty./\1/p'` sleep 4 dirty2=`mdadm -X $dev2 \| sed -n -e 's/.Bitmap. $[0-9]$ dirty./\1/p'` @@ -19,7 +19,7 @@ exit 1 fi mdadm $md0 -f $dev1 -testdev $md0 1 $mdsize12 1 +testdev $md0 1 $mdsize12 64 sleep 4 dirty3=`mdadm -X $dev2 \| sed -n -e 's/.Bitmap. $[0-9]$ dirty./\1/p'` if [ $dirty3 -lt 400 ]
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/tests/05r1-n3-bitmapfile ^
@@ -8,11 +8,11 @@ rm -f $bmf mdadm --create -e0.90 --run $md0 --level=1 -n3 --delay=1 --bitmap $bmf $dev1 $dev2 $dev3 check wait -testdev $md0 1 $mdsize0 1 +testdev $md0 1 $mdsize0 64 mdadm -S $md0 mdadm --assemble $md0 --bitmap=$bmf $dev1 $dev2 $dev3 -testdev $md0 1 $mdsize0 1 +testdev $md0 1 $mdsize0 64 dirty1=`mdadm -X $bmf \| sed -n -e 's/.Bitmap. $[0-9]$ dirty./\1/p'` sleep 4 dirty2=`mdadm -X $bmf \| sed -n -e 's/.Bitmap. $[0-9]$ dirty./\1/p'` @@ -22,7 +22,7 @@ exit 1 fi mdadm $md0 -f $dev2 -testdev $md0 1 $mdsize0 1 +testdev $md0 1 $mdsize0 64 sleep 4 dirty3=`mdadm -X $bmf \| sed -n -e 's/.Bitmap. $[0-9]$ dirty./\1/p'` if [ $dirty3 -lt 400 ]
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/tests/05r1-re-add ^
@@ -8,7 +8,7 @@ mdadm -CR $md0 -l1 -n2 -binternal --bitmap-chunk=4 -d1 $dev1 $dev2 check resync check wait -testdev $md0 1 $mdsize1a 1 +testdev $md0 1 $mdsize1a 64 sleep 4 mdadm $md0 -f $dev2 @@ -21,7 +21,7 @@ mdadm $md0 -f $dev2 sleep 1 mdadm $md0 -r $dev2 -testdev $md0 1 $mdsize1a 1 +testdev $md0 1 $mdsize1a 64 mdadm $md0 -a $dev2 check wait cmp --ignore-initial=$[16512] --bytes=$[$mdsize01024] $dev1 $dev2
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/tests/06wrmostly ^
@@ -2,13 +2,13 @@ # create a raid1 array with a wrmostly device mdadm -CR $md0 -l1 -n3 $dev0 $dev1 --write-mostly $dev2 -testdev $md0 1 $mdsize1a 1 +testdev $md0 1 $mdsize1a 64 # unfortunately, we cannot measure if any read requests are going to $dev2 mdadm -S $md0 mdadm -CR $md0 -l1 -n3 --write-behind --bitmap=internal --bitmap-chunk=4 $dev0 $dev1 --write-mostly $dev2 -testdev $md0 1 $mdsize1a 1 +testdev $md0 1 $mdsize1a 64 mdadm -S $md0
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/tests/07autoassemble ^
@@ -8,8 +8,8 @@ mdadm -Ss mdadm -As -c /dev/null --homehost=testing -vvv -testdev $md1 1 $mdsize1a 1 -testdev $md2 1 $mdsize1a 1 +testdev $md1 1 $mdsize1a 64 +testdev $md2 1 $mdsize1a 64 testdev $md0 2 $mdsize11a 512 mdadm -Ss @@ -19,6 +19,6 @@ mdadm -CR $md0 -l0 -n2 $md1 $dev2 --homehost=testing mdadm -Ss mdadm -As -c /dev/null --homehost=testing -vvv -testdev $md1 1 $mdsize1a 1 +testdev $md1 1 $mdsize1a 64 testdev $md0 1 $[mdsize1a+mdsize11a-1024] 512 mdadm -Ss
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/tests/09imsm-assemble ^
@@ -49,7 +49,7 @@ # reassemble and make sure one of the families falls out mdadm -A $container $dev0 $dev1 $dev2 $dev3 mdadm -IR $container -testdev ${member}_0 1 $size 1 +testdev ${member}_0 1 $size 64 if mdadm --remove $container $dev0 ; then # the dev[23] family won imsm_check_removal $container $dev1 @@ -69,5 +69,5 @@ mdadm -A ${container}2 $dev2 $dev3 --update=uuid mdadm -IR ${container}2 -testdev ${member}_0 1 $size 1 -testdev ${member}_1 1 $size 1 +testdev ${member}_0 1 $size 64 +testdev ${member}_1 1 $size 64
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/tests/09imsm-create-fail-rebuild ^
@@ -42,7 +42,7 @@ chunk=0 mdadm -CR $member1 $dev0 $dev1 -n $num_disks -l $level -z $size imsm_check member $member1 $num_disks $level $size $size $offset $chunk -testdev $member1 1 $size 1 +testdev $member1 1 $size 64 check wait mdadm -Ss
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/tests/10ddf-create ^
@@ -18,7 +18,7 @@ mdadm -CR r5 -l5 -n3 /dev/md/ddf0 testdev /dev/md/r0 5 5000 512 # r0 will use 4608 due to chunk size, so that leaves 28160 for the rest -testdev /dev/md/r1 1 28160 1 +testdev /dev/md/r1 1 28160 64 testdev /dev/md/r5 2 28160 512 dd if=/dev/sda of=/dev/md/r0 \|\| true dd if=/dev/sda of=/dev/md/r1 \|\| true
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/udev-md-raid.rules ^
@@ -2,11 +2,19 @@ SUBSYSTEM!="block", GOTO="md_end" -# handle potential components of arrays -ENV{ID_FS_TYPE}=="linux_raid_member", ACTION=="remove", RUN+="/sbin/mdadm -If $name --path $env{ID_PATH}" -ENV{ID_FS_TYPE}=="linux_raid_member", ACTION=="add", RUN+="/sbin/mdadm --incremental $env{DEVNAME}" -ENV{ID_FS_TYPE}=="isw_raid_member", ACTION=="remove", RUN+="/sbin/mdadm -If $name --path $env{ID_PATH}" -ENV{ID_FS_TYPE}=="isw_raid_member", ACTION=="add", RUN+="/sbin/mdadm --incremental $env{DEVNAME}" +# handle potential components of arrays (the ones supported by md) +ENV{ID_FS_TYPE}=="ddf_raid_member\|isw_raid_member\|linux_raid_member", GOTO="md_inc" +GOTO="md_inc_skip" + +LABEL="md_inc" + +# remember you can limit what gets auto/incrementally assembled by +# mdadm.conf(5)'s 'AUTO' and selectively whitelist using 'ARRAY' +ACTION=="add", RUN+="/sbin/mdadm --incremental $tempnode" +ACTION=="remove", ENV{ID_PATH}=="?", RUN+="/sbin/mdadm -If $name --path $env{ID_PATH}" +ACTION=="remove", ENV{ID_PATH}!="?", RUN+="/sbin/mdadm -If $name" + +LABEL="md_inc_skip" # handle md arrays ACTION!="add\|change", GOTO="md_end"
[-] [+]	Changed	mdadm-3.2.4.tar.bz2/util.c ^
@@ -32,6 +32,8 @@ #include <dirent.h> #include <signal.h> +int __offroot; + /* * following taken from linux/blkpg.h because they aren't * anywhere else and it isn't safe to #include linux/ * stuff. @@ -146,16 +148,16 @@ { struct utsname name; char cp; - int a,b,c; + int a = 0, b = 0,c = 0; if (uname(&name) <0) return -1; cp = name.release; a = strtoul(cp, &cp, 10); - if (cp != '.') return -1; - b = strtoul(cp+1, &cp, 10); - if (cp != '.') return -1; - c = strtoul(cp+1, NULL, 10); + if (cp == '.') + b = strtoul(cp+1, &cp, 10); + if (cp == '.') + c = strtoul(cp+1, &cp, 10); return (a1000000)+(b1000)+c; } @@ -192,6 +194,7 @@ followed by 'K', 'M', or 'G'. * Without a suffix, K is assumed. * Number returned is in sectors (half-K) + * -1 returned on error. / char c; long long s = strtoll(size, &c, 10); @@ -213,7 +216,7 @@ } } if (c) - s = 0; + s = -1; return s; } @@ -309,10 +312,15 @@ return rv; } -int enough(int level, int raid_disks, int layout, int clean, - char avail, int avail_disks) +int enough(int level, int raid_disks, int layout, int clean, char avail) { int copies, first; + int i; + int avail_disks = 0; + + for (i = 0; i < raid_disks; i++) + avail_disks += !!avail[i]; + switch (level) { case 10: / This is the tricky one - we need to check @@ -363,14 +371,14 @@ struct mdu_array_info_s array; struct mdu_disk_info_s disk; int avail_disks = 0; - int i; + int i, rv; char avail; if (ioctl(fd, GET_ARRAY_INFO, &array) != 0 \|\| array.raid_disks <= 0) return 0; avail = calloc(array.raid_disks, 1); - for (i=0; i < 1024 && array.nr_disks > 0; i++) { + for (i=0; i < MAX_DISKS && array.nr_disks > 0; i++) { disk.number = i; if (ioctl(fd, GET_DISK_INFO, &disk) != 0) continue; @@ -386,9 +394,10 @@ avail[disk.raid_disk] = 1; } / This is used on an active array, so assume it is clean / - return enough(array.level, array.raid_disks, array.layout, - 1, - avail, avail_disks); + rv = enough(array.level, array.raid_disks, array.layout, + 1, avail); + free(avail); + return rv; } @@ -535,6 +544,7 @@ struct supertype st = guess_super(fd); if (!st) return 0; + st->ignore_hw_compat = 1; st->ss->load_super(st, fd, name); /* Looks like a raid array .. / fprintf(stderr, Name ": %s appears to be part of a raid array:\n", @@ -639,7 +649,7 @@ We allow upto 2048Megabytes before converting to * gigabytes, as that shows more precision and isn't * too large a number. - * Terrabytes are not yet handled. + * Terabytes are not yet handled. / if (bytes < 50001024) @@ -702,6 +712,14 @@ unsigned long long calc_array_size(int level, int raid_disks, int layout, int chunksize, unsigned long long devsize) { + if (level == 1) + return devsize; + devsize &= ~(unsigned long long)((chunksize>>9)-1); + return get_data_disks(level, layout, raid_disks) * devsize; +} + +int get_data_disks(int level, int layout, int raid_disks) +{ int data_disks = 0; switch (level) { case 0: data_disks = raid_disks; break; @@ -712,8 +730,8 @@ case 10: data_disks = raid_disks / (layout & 255) / ((layout>>8)&255); break; } - devsize &= ~(unsigned long long)((chunksize>>9)-1); - return data_disks * devsize; + + return data_disks; } #if !defined(MDASSEMBLE) \|\| defined(MDASSEMBLE) && defined(MDASSEMBLE_AUTO) @@ -951,9 +969,10 @@ char dev = verstr+1; subarray = strchr(dev, '/'); - if (subarray) + if (subarray) { subarray++ = '\0'; - subarray = strdup(subarray); + subarray = strdup(subarray); + } container = devname2devnum(dev); if (sra) sysfs_free(sra); @@ -1120,7 +1139,8 @@ { struct GPT gpt; unsigned char empty_gpt_entry[16]= {0}; - struct GPT_part_entry part; + struct GPT_part_entry part; + char buf[512]; unsigned long long curr_part_end; unsigned all_partitions, entry_size; unsigned part_nr; @@ -1144,18 +1164,20 @@ / sanity checks / if (all_partitions > 1024 \|\| - entry_size > 512) + entry_size > sizeof(buf)) return -1; + part = (struct GPT_part_entry )buf; + for (part_nr=0; part_nr < all_partitions; part_nr++) { /* read partition entry / - if (read(fd, &part, entry_size) != (ssize_t)entry_size) + if (read(fd, buf, entry_size) != (ssize_t)entry_size) return 0; / is this valid partition? / - if (memcmp(part.type_guid, empty_gpt_entry, 16) != 0) { + if (memcmp(part->type_guid, empty_gpt_entry, 16) != 0) { / check the last lba for the current partition / - curr_part_end = __le64_to_cpu(part.ending_lba); + curr_part_end = __le64_to_cpu(part->ending_lba); if (curr_part_end > endofpart) endofpart = curr_part_end; } @@ -1253,7 +1275,7 @@ int d; ioctl(mdfd, GET_ARRAY_INFO, ainf); - for (d = 0 ; d < 1024 ; d++) { + for (d = 0 ; d < MAX_DISKS ; d++) { if (ioctl(mdfd, GET_DISK_INFO, disk) == 0 && (disk->major \|\| disk->minor)) return; @@ -1369,7 +1391,7 @@ if (!quiet) fprintf(stderr, Name ": Couldn't open %s, aborting\n", dev); - return 2; + return -1; } st->devnum = fd2devnum(fd); @@ -1572,7 +1594,7 @@ int start_mdmon(int devnum) { - int i; + int i, skipped; int len; pid_t pid; int status; @@ -1587,7 +1609,7 @@ if (check_env("MDADM_NO_MDMON")) return 0; - len = readlink("/proc/self/exe", pathbuf, sizeof(pathbuf)); + len = readlink("/proc/self/exe", pathbuf, sizeof(pathbuf)-1); if (len > 0) { char sl; pathbuf[len] = 0; @@ -1603,13 +1625,25 @@ switch(fork()) { case 0: /* FIXME yuk. CLOSE_EXEC?? */ - for (i=3; i < 100; i++) - close(i); + skipped = 0; + for (i=3; skipped < 20; i++) + if (close(i) < 0) + skipped++; + else + skipped = 0; + for (i=0; paths[i]; i++) - if (paths[i][0]) - execl(paths[i], "mdmon", - devnum2devname(devnum), - NULL); + if (paths[i][0]) { + if (__offroot) { + execl(paths[i], "mdmon", "--offroot", + devnum2devname(devnum), + NULL); + } else { + execl(paths[i], "mdmon", + devnum2devname(devnum), + NULL); + } + } exit(1); case -1: fprintf(stderr, Name ": cannot run mdmon. " "Array remains readonly\n"); @@ -1697,7 +1731,8 @@ if (check_env("MDADM_EXPERIMENTAL")) return 1; else { - fprintf(stderr, Name ": To use this feature MDADM_EXPERIMENTAL enviroment variable has to defined.\n"); + fprintf(stderr, Name ": To use this feature MDADM_EXPERIMENTAL" + " environment variable has to be defined.\n"); return 0; } }