Sophie: drbd-utils-8.3.8.1-1.fc14 x86

drbd-utils-8.3.8.1-1.fc14.x86_64.rpm

#
# drbd.conf example
#
# parameters you _need_ to change are the hostname, device, disk,
# meta-disk, address and port in the "on <hostname> {}" sections.
#
# you ought to know about the protocol, and the various timeouts.
#
# you probably want to set the rate in the syncer sections

#
# NOTE common pitfall:
# rate is given in units of _byte_ not bit
#

#
# increase timeout and maybe ping-int in net{}, if you see
# problems with "connection lost/connection established"
# (or change your setup to reduce network latency; make sure full
#  duplex behaves as such; check average roundtrip times while
#  network is saturated; and so on ...)
#

skip {
  As you can see, you can also comment chunks of text
  with a 'skip[optional nonsense]{ skipped text }' section.
  This comes in handy, if you just want to comment out
  some 'resource <some name> {...}' section:
  just precede it with 'skip'.

  The basic format of option assignment is
  <option name><linear whitespace><value>;

  It should be obvious from the examples below,
  but if you really care to know the details:

  <option name> :=
        valid options in the respective scope
  <value>  := <num>|<string>|<choice>|...
              depending on the set of allowed values
              for the respective option.
  <num>    := [0-9]+, sometimes with an optional suffix of K,M,G
  <string> := (<name>|\"([^\"\\\n]*|\\.)*\")+
  <name>   := [/_.A-Za-z0-9-]+
}

#
# At most ONE global section is allowed.
# It must precede any resource section.
#
global {
    # By default we load the module with a minor-count of 32. In case you
    # have more devices in your config, the module gets loaded with
    # a minor-count that ensures that you have 10 minors spare.
    # In case 10 spare minors are too little for you, you can set the
    # minor-count exeplicit here. ( Note, in contrast to DRBD-0.7 an
    # unused, spare minor has only a very little overhead of allocated
    # memory (a single pointer to be exact). )
    #
    # minor-count 64;

    # The user dialog counts and displays the seconds it waited so
    # far. You might want to disable this if you have the console
    # of your server connected to a serial terminal server with
    # limited logging capacity.
    # The Dialog will print the count each 'dialog-refresh' seconds,
    # set it to 0 to disable redrawing completely. [ default = 1 ]
    #
    # dialog-refresh 5; # 5 seconds

    # You might disable one of drbdadm's sanity check.
    # disable-ip-verification;

    # Participate in DRBD's online usage counter at http://usage.drbd.org
    # possilbe options: ask, yes, no. Default is ask. In case you do not
    # know, set it to ask, and follow the on screen instructions later.
    usage-count yes;
}


#
# The common section can have all the sections a resource can have but
# not the host section (started with the "on" keyword).
# The common section must precede all resources.
# All resources inherit the settings from the common section.
# Whereas settings in the resources have precedence over the common
# setting.
#

common {
  syncer { rate 10M; }
}

#
# this need not be r#, you may use phony resource names,
# like "resource web" or "resource mail", too
#

resource r0 {

  # transfer protocol to use.
  # C: write IO is reported as completed, if we know it has
  #    reached _both_ local and remote DISK.
  #    * for critical transactional data.
  # B: write IO is reported as completed, if it has reached
  #    local DISK and remote buffer cache.
  #    * for most cases.
  # A: write IO is reported as completed, if it has reached
  #    local DISK and local tcp send buffer. (see also sndbuf-size)
  #    * for high latency networks
  #
  #**********
  # uhm, benchmarks have shown that C is actually better than B.
  # this note shall disappear, when we are convinced that B is
  # the right choice "for most cases".
  # Until then, always use C unless you have a reason not to.
  #	--lge
  #**********
  #
  protocol C;

  handlers {
    # what should be done in case the node is primary, degraded
    # (=no connection) and has inconsistent data.
    pri-on-incon-degr "/usr/lib/drbd/notify-pri-on-incon-degr.sh; /usr/lib/drbd/notify-emergency-reboot.sh; echo b > /proc/sysrq-trigger ; reboot -f";

    # The node is currently primary, but lost the after split brain
    # auto recovery procedure. As as consequence it should go away.
    pri-lost-after-sb "/usr/lib/drbd/notify-pri-lost-after-sb.sh; /usr/lib/drbd/notify-emergency-reboot.sh; echo b > /proc/sysrq-trigger ; reboot -f";

    # In case you have set the on-io-error option to "call-local-io-error",
    # this script will get executed in case of a local IO error. It is
    # expected that this script will case a immediate failover in the
    # cluster.
    local-io-error "/usr/lib/drbd/notify-io-error.sh; /usr/lib/drbd/notify-emergency-shutdown.sh; echo o > /proc/sysrq-trigger ; halt -f";

    # Commands to run in case we need to downgrade the peer's disk
    # state to "Outdated". Should be implemented by the superior
    # communication possibilities of our cluster manager.
    # The provided script uses ssh, and is for demonstration/development
    # purposis.
    # fence-peer "/usr/lib/drbd/outdate-peer.sh on amd 192.168.22.11 192.168.23.11 on alf 192.168.22.12 192.168.23.12";
    #
    # Update: Now there is a solution that relies on heartbeat's
    # communication layers. You should really use this.
    fence-peer "/usr/lib/heartbeat/drbd-peer-outdater -t 5";
    # For Pacemaker you might use:
    # fence-peer "/usr/lib/drbd/crm-fence-peer.sh";

    # The node is currently primary, but should become sync target
    # after the negotiating phase. Alert someone about this incident.
    #pri-lost "/usr/lib/drbd/notify-pri-lost.sh; /usr/lib/drbd/notify-emergency-reboot.sh; echo b > /proc/sysrq-trigger ; reboot -f";

    # Notify someone of a split brain immediately, regardless of auto recovery policies.
    #initial-split-brain "/usr/lib/drbd/notify-split-brain.sh root";
    # Notify someone or maybe do something else if DRBD split brained, was not automatically
    # recovered and resource is now sitting disconneted.
    #split-brain "/usr/lib/drbd/notify-split-brain.sh root";
    # Notify someone in case an online verify run found the backing devices out of sync.
    #out-of-sync "/usr/lib/drbd/notify-out-of-sync.sh root";
    #

    # These two handlers can be used to snapshot sync-target devices
    # before for the time of the resync.
    # The provided scripts has these options:
    # -p | --percent <reserve space in percent of the original volume. Default: 10%>
    # -a | --additional <snapshot space in KiB. Default: 10 MiB>
    # -n | --disconnect-on-error
    #    By default the script tells DRBD to do the resync no matter
    #    if the taking the snapshot works or not.
    #    If you prefer to drop connection in case taking the snapshot
    #    failes use the --disconnect-on-error option.
    # -v | --verbose
    # -- <additional lvcreate options>
    #before-resync-target "/usr/lib/drbd/snapshot-resync-target-lvm.sh -p 15 -- -c 16k";
    #after-resync-target /usr/lib/drbd/unsnapshot-resync-target-lvm.sh;
  }

  startup {
    # Wait for connection timeout.
    # The init script blocks the boot process until the resources
    # are connected. This is so when the cluster manager starts later,
    # it does not see a resource with internal split-brain.
    # In case you want to limit the wait time, do it here.
    # Default is 0, which means unlimited. Unit is seconds.
    #
    # wfc-timeout  0;

    # Wait for connection timeout if this node was a degraded cluster.
    # In case a degraded cluster (= cluster with only one node left)
    # is rebooted, this timeout value is used.
    #
    degr-wfc-timeout 120;    # 2 minutes.

    # Wait for connection timeout if the peer node is already outdated.
    # (Do not set this to 0, since that means unlimited)
    #
    outdated-wfc-timeout 2;  # 2 seconds.

    # In case there was a split brain situation the devices will
    # drop their network configuration instead of connecting. Since
    # this means that the network is working, the cluster manager
    # should be able to communicate as well. Therefore the default
    # of DRBD's init script is to terminate in this case. To make
    # it to continue waiting in this case set this option.
    # 
    # wait-after-sb;

    # In case you are using DRBD for GFS/OCFS2 you want that the
    # startup script promotes it to primary. Nodenames are also
    # possible instead of "both".
    # become-primary-on both;
  }

  disk {
    # if the lower level device reports io-error you have the choice of
    #  "pass_on"  ->  Report the io-error to the upper layers.
    #                 Primary   -> report it to the mounted file system.
    #                 Secondary -> ignore it.
    #  "call-local-io-error"
    #	          ->  Call the script configured by the name "local-io-error".
    #  "detach"   ->  The node drops its backing storage device, and
    #                 continues in disk less mode.
    #
    on-io-error   detach;

    # Controls the fencing policy, default is "dont-care". Before you
    # set any policy you need to make sure that you have a working
    # fence-peer handler. Possible values are:
    #  "dont-care"     -> Never call the fence-peer handler. [ DEFAULT ]
    #  "resource-only" -> Call the fence-peer handler if we primary and
    #			  loose the connection to the secondary. As well
    #			  whenn a unconnected secondary wants to become
    #			  primary.
    #  "resource-and-stonith"
    #                  -> Calls the fence-peer handler and freezes local
    #                     IO immediately after loss of connection. This is
    #			  necessary if your heartbeat can STONITH the other
    #                     node.
    # fencing resource-only;

    # In case you only want to use a fraction of the available space
    # you might use the "size" option here.
    #
    # size 10G;

    # In case you are sure that your storage subsystem has battery
    # backed up RAM and you know from measurements that it really honors
    # flush instructions by flushing data out from its non volatile
    # write cache to disk, you have double security. You might then
    # reduce this to single security by disabling disk flushes with
    # this option. It might improve performance in this case.
    # ONLY USE THIS OPTION IF YOU KNOW WHAT YOU ARE DOING.
    # no-disk-flushes;
    # no-md-flushes;

    # In some special circumstances the device mapper stack manages to
    # pass BIOs to DRBD that violate the constraints that are set forth
    # by DRBD's merge_bvec() function and which have more than one bvec.
    # A known example is:
    # phys-disk -> DRBD -> LVM -> Xen -> missaligned partition (63) -> DomU FS
    # Then you might see "bio would need to, but cannot, be split:" in
    # the Dom0's kernel log.
    # The best workaround is to proper align the partition within
    # the VM (E.g. start it at sector 1024). (Costs 480 KiByte of storage)
    # Unfortunately the default of most Linux partitioning tools is
    # to start the first partition at an odd number (63). Therefore
    # most distribution's install helpers for virtual linux machines will
    # end up with missaligned partitions.
    # The second best workaround is to limit DRBD's max bvecs per BIO
    # (= max-bio-bvecs) to 1. (Costs performance).
    # max-bio-bvecs 1;
  }

  net {
    # this is the size of the tcp socket send buffer
    # increase it _carefully_ if you want to use protocol A over a
    # high latency network with reasonable write throughput.
    # defaults to 2*65535; you might try even 1M, but if your kernel or
    # network driver chokes on that, you have been warned.
    # sndbuf-size 512k;

    # timeout       60;    #  6 seconds  (unit = 0.1 seconds)
    # connect-int   10;    # 10 seconds  (unit = 1 second)
    # ping-int      10;    # 10 seconds  (unit = 1 second)
    # ping-timeout   5;    # 500 ms (unit = 0.1 seconds)

    # Maximal number of requests (4K) to be allocated by DRBD.
    # The minimum is hardcoded to 32 (=128 kByte).
    # For high performance installations it might help if you
    # increase that number. These buffers are used to hold
    # datablocks while they are written to disk.
    #
    # max-buffers     2048;

    # When the number of outstanding requests on a standby (secondary)
    # node exceeds bdev-threshold, we start to kick the backing device
    # to start its request processing. This is an advanced tuning
    # parameter to get more performance out of capable storage controlers.
    # Some controlers like to be kicked often, other controlers
    # deliver better performance when they are kicked less frequently.
    # Set it to the value of max-buffers to get the least possible
    # number of run_task_queue_disk() / q->unplug_fn(q) calls.
    #
    # unplug-watermark   128;


    # The highest number of data blocks between two write barriers.
    # If you set this < 10 you might decrease your performance.
    # max-epoch-size  2048;

    # if some block send times out this many times, the peer is
    # considered dead, even if it still answers ping requests.
    # ko-count 4;

    # If you want to use OCFS2/openGFS on top of DRBD enable
    # this optione, and only enable it if you are going to use
    # one of these filesystems. Do not enable it for ext2,
    # ext3,reiserFS,XFS,JFS etc...
    # allow-two-primaries;

    # This enables peer authentication. Without this everybody
    # on the network could connect to one of your DRBD nodes with
    # a program that emulates DRBD's protocoll and could suck off
    # all your data.
    # Specify one of the kernel's digest algorithms, e.g.:
    # md5, sha1, sha256, sha512, wp256, wp384, wp512, michael_mic ...
    # an a shared secret.
    # Authentication is only done once after the TCP connection
    # is establised, there are no disadvantages from using authentication,
    # therefore I suggest to enable it in any case.
    # cram-hmac-alg "sha1";
    # shared-secret "FooFunFactory";

    # In case the nodes of your cluster nodes see each other again, after
    # an split brain situation in which both nodes where primary
    # at the same time, you have two diverged versions of your data.
    #
    # In case both nodes are secondary you can control DRBD's
    # auto recovery strategy by the "after-sb-0pri" options. The
    # default is to disconnect.
    #    "disconnect" ... No automatic resynchronisation, simply disconnect.
    #    "discard-younger-primary"
    #                     Auto sync from the node that was primary before
    #                     the split brain situation happened.
    #    "discard-older-primary"
    #                     Auto sync from the node that became primary
    #                     as second during the split brain situation.
    #    "discard-least-changes"
    #                     Auto sync from the node that touched more
    #                     blocks during the split brain situation.
    #    "discard-node-NODENAME"
    #                     Auto sync _to_ the named node.
    after-sb-0pri disconnect;

    # In one of the nodes is already primary, then the auto-recovery
    # strategie is controled by the "after-sb-1pri" options.
    #    "disconnect" ... always disconnect
    #    "consensus"  ... discard the version of the secondary if the outcome
    #                     of the "after-sb-0pri" algorithm would also destroy
    #                     the current secondary's data. Otherwise disconnect.
    #    "violently-as0p" Always take the decission of the "after-sb-0pri"
    #                     algorithm. Even if that causes case an erratic change
    #		          of the primarie's view of the data.
    #                     This is only usefull if you use an 1node FS (i.e.
    #		          not OCFS2 or GFS) with the allow-two-primaries
    #		          flag, _AND_ you really know what you are doing.
    #		          This is DANGEROUS and MAY CRASH YOUR MACHINE if you
    #		          have a FS mounted on the primary node.
    #    "discard-secondary"
    #                     discard the version of the secondary.
    #    "call-pri-lost-after-sb"  Always honour the outcome of the "after-sb-0pri"
    #                     algorithm. In case it decides the the current
    #                     secondary has the right data, it panics the
    #                     current primary.
    #    "suspend-primary" ???
    after-sb-1pri disconnect;

    # In case both nodes are primary you control DRBD's strategy by
    # the "after-sb-2pri" option.
    #    "disconnect" ... Go to StandAlone mode on both sides.
    #    "violently-as0p" Always take the decission of the "after-sb-0pri".
    #    "call-pri-lost-after-sb" ... Honor the outcome of the "after-sb-0pri"
    #                     algorithm and panic the other node.

    after-sb-2pri disconnect;

    # To solve the cases when the outcome of the resync descissions is
    # incompatible to the current role asignment in the cluster.
    #    "disconnect" ... No automatic resynchronisation, simply disconnect.
    #    "violently" .... Sync to the primary node is allowed, violating the
    #	                  assumption that data on a block device is stable
    #		          for one of the nodes. DANGEROUS, DO NOT USE.
    #    "call-pri-lost"  Call the "pri-lost" helper program on one of the
    #	                  machines. This program is expected to reboot the
    #                     machine. (I.e. make it secondary.)
    rr-conflict disconnect;

    # DRBD-0.7's behaviour is equivalent to
    #   after-sb-0pri discard-younger-primary;
    #   after-sb-1pri consensus;
    #   after-sb-2pri disconnect;

    # DRBD can ensure the data integrity of the user's data on the network
    # by comparing hash values. 
    # Note: Normally this is ensured by the 16 bit checksums in the headers 
    # of TCP/IP packets. Unforunately it turned out that GBit NICs with 
    # various offloading engines might produce valid checksums for corrupted 
    # data. Use this option during your pre-production tests, usually you
    # want to turn it off for production to reduce CPU overhead.
    # Note2: If data blocks that gets written to disk are changed while the
    # transfer goes on cause false positives. Known block device users which
    # do so are the swap code and ReiserFS
    # data-integrity-alg "md5";

    # DRBD usually uses the TCP socket option TCP_CORK to hint to the network
    # stack when it can expect more data, and when it should flush out what it
    # has in its send queue. It turned out that there is at lease one network
    # stack that performs worse when one uses this hinting method. Therefore
    # we introducted this option, which disable the setting and clearing of
    # the TCP_CORK socket option by DRBD.
    # no-tcp-cork;
  }

  syncer {
    # Limit the bandwith used by the resynchronisation process.
    # default unit is kByte/sec; optional suffixes K,M,G are allowed.
    #
    # Even though this is a network setting, the units are based
    # on _byte_ (octet for our french friends) not bit.
    # We are storage guys.
    #
    # Note that on 100Mbit ethernet, you cannot expect more than
    # 12.5 MByte total transfer rate.
    # Consider using GigaBit Ethernet.
    #
    rate 10M;

    # Normally all devices are resynchronized parallel.
    # To achieve better resynchronisation performance you should resync
    # DRBD resources which have their backing storage on one physical
    # disk sequentially. The express this use the "after" keyword.
    after "r2";

    # Configures the size of the active set. Each extent is 4M,
    # 257 Extents ~> 1GB active set size. In case your syncer
    # runs @ 10MB/sec, all resync after a primary's crash will last
    # 1GB / ( 10MB/sec ) ~ 102 seconds ~ One Minute and 42 Seconds.
    # BTW, the hash algorithm works best if the number of al-extents
    # is prime. (To test the worst case performace use a power of 2)
    al-extents 257;

    # Sets the CPU affinity mask of DRBD's threads. Might be of interest
    # for advanced performance tuning.
    # cpu-mask 15;
  }

  on amd {
    device     /dev/drbd0;
    disk       /dev/hde5;
    address    192.168.22.11:7788;
    flexible-meta-disk  internal;

    # meta-disk is either 'internal' or '/dev/ice/name [idx]'
    #
    # You can use a single block device to store meta-data
    # of multiple DRBD's.
    # E.g. use meta-disk /dev/hde6[0]; and meta-disk /dev/hde6[1];
    # for two different resources. In this case the meta-disk
    # would need to be at least 256 MB in size.
    #
    # 'internal' means, that the last 128 MB of the lower device
    # are used to store the meta-data.
    # You must not give an index with 'internal'.
  }

  on alf {
    device    /dev/drbd0;
    disk      /dev/hdc5;
    address   192.168.22.12:7788;
    meta-disk internal;
  }
}

#
# yes, you may also quote the resource name.
# but don't include whitespace, unless you mean it :)
#
resource "r1" {
  protocol C;
  startup {
    wfc-timeout         0;  ## Infinite!
    degr-wfc-timeout  120;  ## 2 minutes.
  }
  disk {
    on-io-error detach;
  }
  net {
    # timeout           60;
    # connect-int       10;
    # ping-int          10;
    # max-buffers     2048;
    # max-epoch-size  2048;
  }
  syncer {
  }

  # It is valid to move device, disk and meta-disk to the
  # resource level.
  device	/dev/drbd1;
  disk		/dev/hde6;
  meta-disk	/dev/somewhere [7];

  on amd {
    # Here is an example of ipv6.
    # If you want to use ipv4 in ipv6 i.e. something like [::ffff:192.168.22.11]
    # you have to set disable-ip-verification in the global section.
    address	ipv6 [fd0c:39f4:f135:305:230:48ff:fe63:5c9a]:7789;
  }

  on alf {
    address     ipv6 [fd0c:39f4:f135:305:230:48ff:fe63:5ebe]:7789;
  }
}

resource r2 {
  protocol C;

  startup { wfc-timeout 0; degr-wfc-timeout 120; }
  disk    { on-io-error detach; }
  net     { timeout 60; connect-int 10; ping-int 10;
            max-buffers 2048; max-epoch-size 2048; }
  syncer  { rate 4M; } # sync when r0 and r1 are finished syncing.
  on amd {
    address 192.168.22.11:7790;
    disk /dev/hde7; device /dev/drbd2; meta-disk "internal";
  }
  on alf {
    device "/dev/drbd2"; disk "/dev/hdc7"; meta-disk "internal";
    address 192.168.22.12:7790;
  }
}

resource r3 {
  protocol	C;
  device	/dev/drbd3;

  on amd {
    disk	/dev/hde8;
    address	192.168.22.11:7791;
    meta-disk	internal;
  }
  on alf {
    disk	/dev/hdc8;
    address	192.168.22.12:7791;
    meta-disk	/some/where[8];
  }
}

resource r4 {
  protocol	C;
  device	minor 4;

  on amd {
    disk	/dev/hde9;
    address	192.168.22.11:7792;
    meta-disk	internal;
  }
  on alf {
    disk	/dev/hdc9;
    address	192.168.22.12:7792;
    meta-disk	/some/where[9];
  }
}

resource lower-alice-bob {
  protocol	C;
  on alice {
    device	/dev/drbd4;
    disk	/dev/hde9;
    address	192.168.23.11:7791;
    meta-disk	internal;
  }
  on bob {
    device	/dev/drbd4;
    disk	/dev/hdc9;
    address	192.168.23.12:7791;
    meta-disk	/some/where[8];
  }
}

resource lower-charly-daisy {
  protocol	C;
  on charly {
    device	/dev/drbd4;
    disk	/dev/hde9;
    address	192.168.23.13:7791;
    meta-disk	internal;
  }
  on daisy {
    device	/dev/drbd4;
    disk	/dev/hdc9;
    address	192.168.23.14:7791;
    meta-disk	/some/where[8];
  }
}

resource upper {
  protocol	A;
  stacked-on-top-of lower-alice-bob {
    device	/dev/drbd10;
    address	127.0.0.1:1230;
    proxy on alic bob {
     inside     127.0.0.1:1234;
     outside	192.168.23.21:7791;
    }
  }

  stacked-on-top-of lower-charly-daisy {
    device	/dev/drbd10;
    address	127.0.0.1:1230;
    proxy on charly daisy {
     inside     127.0.0.1:1234;
     outside	192.168.23.22:7791;
    }
  }
}